vfs_syscalls.c revision 1.390 1 /* $NetBSD: vfs_syscalls.c,v 1.390 2009/02/23 20:33:30 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
66 */
67
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.390 2009/02/23 20:33:30 ad Exp $");
70
71 #ifdef _KERNEL_OPT
72 #include "opt_fileassoc.h"
73 #include "veriexec.h"
74 #endif
75
76 #include <sys/param.h>
77 #include <sys/systm.h>
78 #include <sys/namei.h>
79 #include <sys/filedesc.h>
80 #include <sys/kernel.h>
81 #include <sys/file.h>
82 #include <sys/stat.h>
83 #include <sys/vnode.h>
84 #include <sys/mount.h>
85 #include <sys/proc.h>
86 #include <sys/uio.h>
87 #include <sys/kmem.h>
88 #include <sys/dirent.h>
89 #include <sys/sysctl.h>
90 #include <sys/syscallargs.h>
91 #include <sys/vfs_syscalls.h>
92 #include <sys/ktrace.h>
93 #ifdef FILEASSOC
94 #include <sys/fileassoc.h>
95 #endif /* FILEASSOC */
96 #include <sys/verified_exec.h>
97 #include <sys/kauth.h>
98 #include <sys/atomic.h>
99 #include <sys/module.h>
100 #include <sys/buf.h>
101
102 #include <miscfs/genfs/genfs.h>
103 #include <miscfs/syncfs/syncfs.h>
104 #include <miscfs/specfs/specdev.h>
105
106 #include <nfs/rpcv2.h>
107 #include <nfs/nfsproto.h>
108 #include <nfs/nfs.h>
109 #include <nfs/nfs_var.h>
110
111 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
112
113 static int change_dir(struct nameidata *, struct lwp *);
114 static int change_flags(struct vnode *, u_long, struct lwp *);
115 static int change_mode(struct vnode *, int, struct lwp *l);
116 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
117
118 void checkdirs(struct vnode *);
119
120 int dovfsusermount = 0;
121
122 /*
123 * Virtual File System System Calls
124 */
125
126 /*
127 * Mount a file system.
128 */
129
130 /*
131 * This table is used to maintain compatibility with 4.3BSD
132 * and NetBSD 0.9 mount syscalls - and possibly other systems.
133 * Note, the order is important!
134 *
135 * Do not modify this table. It should only contain filesystems
136 * supported by NetBSD 0.9 and 4.3BSD.
137 */
138 const char * const mountcompatnames[] = {
139 NULL, /* 0 = MOUNT_NONE */
140 MOUNT_FFS, /* 1 = MOUNT_UFS */
141 MOUNT_NFS, /* 2 */
142 MOUNT_MFS, /* 3 */
143 MOUNT_MSDOS, /* 4 */
144 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
145 MOUNT_FDESC, /* 6 */
146 MOUNT_KERNFS, /* 7 */
147 NULL, /* 8 = MOUNT_DEVFS */
148 MOUNT_AFS, /* 9 */
149 };
150 const int nmountcompatnames = sizeof(mountcompatnames) /
151 sizeof(mountcompatnames[0]);
152
153 static int
154 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
155 void *data, size_t *data_len)
156 {
157 struct mount *mp;
158 int error = 0, saved_flags;
159
160 mp = vp->v_mount;
161 saved_flags = mp->mnt_flag;
162
163 /* We can operate only on VV_ROOT nodes. */
164 if ((vp->v_vflag & VV_ROOT) == 0) {
165 error = EINVAL;
166 goto out;
167 }
168
169 /*
170 * We only allow the filesystem to be reloaded if it
171 * is currently mounted read-only. Additionally, we
172 * prevent read-write to read-only downgrades.
173 */
174 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
175 (mp->mnt_flag & MNT_RDONLY) == 0) {
176 error = EOPNOTSUPP; /* Needs translation */
177 goto out;
178 }
179
180 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
181 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
182 if (error)
183 goto out;
184
185 if (vfs_busy(mp, NULL)) {
186 error = EPERM;
187 goto out;
188 }
189
190 mutex_enter(&mp->mnt_updating);
191
192 mp->mnt_flag &= ~MNT_OP_FLAGS;
193 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
194
195 /*
196 * Set the mount level flags.
197 */
198 if (flags & MNT_RDONLY)
199 mp->mnt_flag |= MNT_RDONLY;
200 else if (mp->mnt_flag & MNT_RDONLY)
201 mp->mnt_iflag |= IMNT_WANTRDWR;
202 mp->mnt_flag &=
203 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
204 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
205 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
206 MNT_LOG);
207 mp->mnt_flag |= flags &
208 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
209 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
210 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
211 MNT_LOG | MNT_IGNORE);
212
213 error = VFS_MOUNT(mp, path, data, data_len);
214
215 if (error && data != NULL) {
216 int error2;
217
218 /*
219 * Update failed; let's try and see if it was an
220 * export request. For compat with 3.0 and earlier.
221 */
222 error2 = vfs_hooks_reexport(mp, path, data);
223
224 /*
225 * Only update error code if the export request was
226 * understood but some problem occurred while
227 * processing it.
228 */
229 if (error2 != EJUSTRETURN)
230 error = error2;
231 }
232
233 if (mp->mnt_iflag & IMNT_WANTRDWR)
234 mp->mnt_flag &= ~MNT_RDONLY;
235 if (error)
236 mp->mnt_flag = saved_flags;
237 mp->mnt_flag &= ~MNT_OP_FLAGS;
238 mp->mnt_iflag &= ~IMNT_WANTRDWR;
239 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
240 if (mp->mnt_syncer == NULL)
241 error = vfs_allocate_syncvnode(mp);
242 } else {
243 if (mp->mnt_syncer != NULL)
244 vfs_deallocate_syncvnode(mp);
245 }
246 mutex_exit(&mp->mnt_updating);
247 vfs_unbusy(mp, false, NULL);
248
249 out:
250 return (error);
251 }
252
253 static int
254 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
255 {
256 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
257 int error;
258
259 /* Copy file-system type from userspace. */
260 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
261 if (error) {
262 /*
263 * Historically, filesystem types were identified by numbers.
264 * If we get an integer for the filesystem type instead of a
265 * string, we check to see if it matches one of the historic
266 * filesystem types.
267 */
268 u_long fsindex = (u_long)fstype;
269 if (fsindex >= nmountcompatnames ||
270 mountcompatnames[fsindex] == NULL)
271 return ENODEV;
272 strlcpy(fstypename, mountcompatnames[fsindex],
273 sizeof(fstypename));
274 }
275
276 /* Accept `ufs' as an alias for `ffs', for compatibility. */
277 if (strcmp(fstypename, "ufs") == 0)
278 fstypename[0] = 'f';
279
280 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
281 return 0;
282
283 /* If we can autoload a vfs module, try again */
284 mutex_enter(&module_lock);
285 (void)module_autoload(fstype, MODULE_CLASS_VFS);
286 mutex_exit(&module_lock);
287
288 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
289 return 0;
290
291 return ENODEV;
292 }
293
294 static int
295 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops,
296 const char *path, int flags, void *data, size_t *data_len, u_int recurse)
297 {
298 struct mount *mp;
299 struct vnode *vp = *vpp;
300 struct vattr va;
301 int error;
302
303 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
304 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
305 if (error)
306 return error;
307
308 /* Can't make a non-dir a mount-point (from here anyway). */
309 if (vp->v_type != VDIR)
310 return ENOTDIR;
311
312 /*
313 * If the user is not root, ensure that they own the directory
314 * onto which we are attempting to mount.
315 */
316 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 ||
317 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
318 (error = kauth_authorize_generic(l->l_cred,
319 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) {
320 return error;
321 }
322
323 if (flags & MNT_EXPORTED)
324 return EINVAL;
325
326 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0)
327 return error;
328
329 /*
330 * Check if a file-system is not already mounted on this vnode.
331 */
332 if (vp->v_mountedhere != NULL)
333 return EBUSY;
334
335 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
336 if (mp == NULL)
337 return ENOMEM;
338
339 mp->mnt_op = vfsops;
340 mp->mnt_refcnt = 1;
341
342 TAILQ_INIT(&mp->mnt_vnodelist);
343 rw_init(&mp->mnt_unmounting);
344 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
345 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
346 error = vfs_busy(mp, NULL);
347 KASSERT(error == 0);
348 mutex_enter(&mp->mnt_updating);
349
350 mp->mnt_vnodecovered = vp;
351 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
352 mount_initspecific(mp);
353
354 /*
355 * The underlying file system may refuse the mount for
356 * various reasons. Allow the user to force it to happen.
357 *
358 * Set the mount level flags.
359 */
360 mp->mnt_flag = flags &
361 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
362 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
363 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
364 MNT_LOG | MNT_IGNORE | MNT_RDONLY);
365
366 error = VFS_MOUNT(mp, path, data, data_len);
367 mp->mnt_flag &= ~MNT_OP_FLAGS;
368
369 /*
370 * Put the new filesystem on the mount list after root.
371 */
372 cache_purge(vp);
373 if (error != 0) {
374 vp->v_mountedhere = NULL;
375 mutex_exit(&mp->mnt_updating);
376 vfs_unbusy(mp, false, NULL);
377 vfs_destroy(mp);
378 return error;
379 }
380
381 mp->mnt_iflag &= ~IMNT_WANTRDWR;
382 mutex_enter(&mountlist_lock);
383 vp->v_mountedhere = mp;
384 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
385 mutex_exit(&mountlist_lock);
386 vn_restorerecurse(vp, recurse);
387 VOP_UNLOCK(vp, 0);
388 checkdirs(vp);
389 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
390 error = vfs_allocate_syncvnode(mp);
391 /* Hold an additional reference to the mount across VFS_START(). */
392 mutex_exit(&mp->mnt_updating);
393 vfs_unbusy(mp, true, NULL);
394 (void) VFS_STATVFS(mp, &mp->mnt_stat);
395 error = VFS_START(mp, 0);
396 if (error)
397 vrele(vp);
398 /* Drop reference held for VFS_START(). */
399 vfs_destroy(mp);
400 *vpp = NULL;
401 return error;
402 }
403
404 static int
405 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
406 void *data, size_t *data_len)
407 {
408 struct mount *mp;
409 int error;
410
411 /* If MNT_GETARGS is specified, it should be the only flag. */
412 if (flags & ~MNT_GETARGS)
413 return EINVAL;
414
415 mp = vp->v_mount;
416
417 /* XXX: probably some notion of "can see" here if we want isolation. */
418 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
419 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
420 if (error)
421 return error;
422
423 if ((vp->v_vflag & VV_ROOT) == 0)
424 return EINVAL;
425
426 if (vfs_busy(mp, NULL))
427 return EPERM;
428
429 mutex_enter(&mp->mnt_updating);
430 mp->mnt_flag &= ~MNT_OP_FLAGS;
431 mp->mnt_flag |= MNT_GETARGS;
432 error = VFS_MOUNT(mp, path, data, data_len);
433 mp->mnt_flag &= ~MNT_OP_FLAGS;
434 mutex_exit(&mp->mnt_updating);
435
436 vfs_unbusy(mp, false, NULL);
437 return (error);
438 }
439
440 int
441 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
442 {
443 /* {
444 syscallarg(const char *) type;
445 syscallarg(const char *) path;
446 syscallarg(int) flags;
447 syscallarg(void *) data;
448 syscallarg(size_t) data_len;
449 } */
450
451 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
452 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
453 SCARG(uap, data_len), retval);
454 }
455
456 int
457 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
458 const char *path, int flags, void *data, enum uio_seg data_seg,
459 size_t data_len, register_t *retval)
460 {
461 struct vnode *vp;
462 struct nameidata nd;
463 void *data_buf = data;
464 u_int recurse;
465 int error;
466
467 /*
468 * Get vnode to be covered
469 */
470 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
471 if ((error = namei(&nd)) != 0)
472 return (error);
473 vp = nd.ni_vp;
474
475 /*
476 * A lookup in VFS_MOUNT might result in an attempt to
477 * lock this vnode again, so make the lock recursive.
478 */
479 if (vfsops == NULL) {
480 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
481 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
482 recurse = vn_setrecurse(vp);
483 vfsops = vp->v_mount->mnt_op;
484 } else {
485 /* 'type' is userspace */
486 error = mount_get_vfsops(type, &vfsops);
487 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
488 recurse = vn_setrecurse(vp);
489 if (error != 0)
490 goto done;
491 }
492 } else {
493 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
494 recurse = vn_setrecurse(vp);
495 }
496
497 if (data != NULL && data_seg == UIO_USERSPACE) {
498 if (data_len == 0) {
499 /* No length supplied, use default for filesystem */
500 data_len = vfsops->vfs_min_mount_data;
501 if (data_len > VFS_MAX_MOUNT_DATA) {
502 error = EINVAL;
503 goto done;
504 }
505 /*
506 * Hopefully a longer buffer won't make copyin() fail.
507 * For compatibility with 3.0 and earlier.
508 */
509 if (flags & MNT_UPDATE
510 && data_len < sizeof (struct mnt_export_args30))
511 data_len = sizeof (struct mnt_export_args30);
512 }
513 data_buf = kmem_alloc(data_len, KM_SLEEP);
514
515 /* NFS needs the buffer even for mnt_getargs .... */
516 error = copyin(data, data_buf, data_len);
517 if (error != 0)
518 goto done;
519 }
520
521 if (flags & MNT_GETARGS) {
522 if (data_len == 0) {
523 error = EINVAL;
524 goto done;
525 }
526 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
527 if (error != 0)
528 goto done;
529 if (data_seg == UIO_USERSPACE)
530 error = copyout(data_buf, data, data_len);
531 *retval = data_len;
532 } else if (flags & MNT_UPDATE) {
533 error = mount_update(l, vp, path, flags, data_buf, &data_len);
534 } else {
535 /* Locking is handled internally in mount_domount(). */
536 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
537 &data_len, recurse);
538 }
539
540 done:
541 if (vp != NULL) {
542 vn_restorerecurse(vp, recurse);
543 vput(vp);
544 }
545 if (data_buf != data)
546 kmem_free(data_buf, data_len);
547 return (error);
548 }
549
550 /*
551 * Scan all active processes to see if any of them have a current
552 * or root directory onto which the new filesystem has just been
553 * mounted. If so, replace them with the new mount point.
554 */
555 void
556 checkdirs(struct vnode *olddp)
557 {
558 struct cwdinfo *cwdi;
559 struct vnode *newdp, *rele1, *rele2;
560 struct proc *p;
561 bool retry;
562
563 if (olddp->v_usecount == 1)
564 return;
565 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
566 panic("mount: lost mount");
567
568 do {
569 retry = false;
570 mutex_enter(proc_lock);
571 PROCLIST_FOREACH(p, &allproc) {
572 if ((p->p_flag & PK_MARKER) != 0)
573 continue;
574 if ((cwdi = p->p_cwdi) == NULL)
575 continue;
576 /*
577 * Can't change to the old directory any more,
578 * so even if we see a stale value it's not a
579 * problem.
580 */
581 if (cwdi->cwdi_cdir != olddp &&
582 cwdi->cwdi_rdir != olddp)
583 continue;
584 retry = true;
585 rele1 = NULL;
586 rele2 = NULL;
587 atomic_inc_uint(&cwdi->cwdi_refcnt);
588 mutex_exit(proc_lock);
589 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
590 if (cwdi->cwdi_cdir == olddp) {
591 rele1 = cwdi->cwdi_cdir;
592 VREF(newdp);
593 cwdi->cwdi_cdir = newdp;
594 }
595 if (cwdi->cwdi_rdir == olddp) {
596 rele2 = cwdi->cwdi_rdir;
597 VREF(newdp);
598 cwdi->cwdi_rdir = newdp;
599 }
600 rw_exit(&cwdi->cwdi_lock);
601 cwdfree(cwdi);
602 if (rele1 != NULL)
603 vrele(rele1);
604 if (rele2 != NULL)
605 vrele(rele2);
606 mutex_enter(proc_lock);
607 break;
608 }
609 mutex_exit(proc_lock);
610 } while (retry);
611
612 if (rootvnode == olddp) {
613 vrele(rootvnode);
614 VREF(newdp);
615 rootvnode = newdp;
616 }
617 vput(newdp);
618 }
619
620 /*
621 * Unmount a file system.
622 *
623 * Note: unmount takes a path to the vnode mounted on as argument,
624 * not special file (as before).
625 */
626 /* ARGSUSED */
627 int
628 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
629 {
630 /* {
631 syscallarg(const char *) path;
632 syscallarg(int) flags;
633 } */
634 struct vnode *vp;
635 struct mount *mp;
636 int error;
637 struct nameidata nd;
638
639 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
640 SCARG(uap, path));
641 if ((error = namei(&nd)) != 0)
642 return (error);
643 vp = nd.ni_vp;
644 mp = vp->v_mount;
645 atomic_inc_uint(&mp->mnt_refcnt);
646 VOP_UNLOCK(vp, 0);
647
648 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
649 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
650 if (error) {
651 vrele(vp);
652 vfs_destroy(mp);
653 return (error);
654 }
655
656 /*
657 * Don't allow unmounting the root file system.
658 */
659 if (mp->mnt_flag & MNT_ROOTFS) {
660 vrele(vp);
661 vfs_destroy(mp);
662 return (EINVAL);
663 }
664
665 /*
666 * Must be the root of the filesystem
667 */
668 if ((vp->v_vflag & VV_ROOT) == 0) {
669 vrele(vp);
670 vfs_destroy(mp);
671 return (EINVAL);
672 }
673
674 vrele(vp);
675 error = dounmount(mp, SCARG(uap, flags), l);
676 vfs_destroy(mp);
677 return error;
678 }
679
680 /*
681 * Do the actual file system unmount. File system is assumed to have
682 * been locked by the caller.
683 *
684 * => Caller hold reference to the mount, explicitly for dounmount().
685 */
686 int
687 dounmount(struct mount *mp, int flags, struct lwp *l)
688 {
689 struct vnode *coveredvp;
690 int error;
691 int async;
692 int used_syncer;
693
694 #if NVERIEXEC > 0
695 error = veriexec_unmountchk(mp);
696 if (error)
697 return (error);
698 #endif /* NVERIEXEC > 0 */
699
700 /*
701 * XXX Freeze syncer. Must do this before locking the
702 * mount point. See dounmount() for details.
703 */
704 mutex_enter(&syncer_mutex);
705 rw_enter(&mp->mnt_unmounting, RW_WRITER);
706 if ((mp->mnt_iflag & IMNT_GONE) != 0) {
707 rw_exit(&mp->mnt_unmounting);
708 mutex_exit(&syncer_mutex);
709 return ENOENT;
710 }
711
712 used_syncer = (mp->mnt_syncer != NULL);
713
714 /*
715 * XXX Syncer must be frozen when we get here. This should really
716 * be done on a per-mountpoint basis, but the syncer doesn't work
717 * like that.
718 *
719 * The caller of dounmount() must acquire syncer_mutex because
720 * the syncer itself acquires locks in syncer_mutex -> vfs_busy
721 * order, and we must preserve that order to avoid deadlock.
722 *
723 * So, if the file system did not use the syncer, now is
724 * the time to release the syncer_mutex.
725 */
726 if (used_syncer == 0)
727 mutex_exit(&syncer_mutex);
728
729 mp->mnt_iflag |= IMNT_UNMOUNT;
730 async = mp->mnt_flag & MNT_ASYNC;
731 mp->mnt_flag &= ~MNT_ASYNC;
732 cache_purgevfs(mp); /* remove cache entries for this file sys */
733 if (mp->mnt_syncer != NULL)
734 vfs_deallocate_syncvnode(mp);
735 error = 0;
736 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
737 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
738 }
739 vfs_scrubvnlist(mp);
740 if (error == 0 || (flags & MNT_FORCE))
741 error = VFS_UNMOUNT(mp, flags);
742 if (error) {
743 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
744 (void) vfs_allocate_syncvnode(mp);
745 mp->mnt_iflag &= ~IMNT_UNMOUNT;
746 mp->mnt_flag |= async;
747 rw_exit(&mp->mnt_unmounting);
748 if (used_syncer)
749 mutex_exit(&syncer_mutex);
750 return (error);
751 }
752 vfs_scrubvnlist(mp);
753 mutex_enter(&mountlist_lock);
754 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
755 coveredvp->v_mountedhere = NULL;
756 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
757 mp->mnt_iflag |= IMNT_GONE;
758 mutex_exit(&mountlist_lock);
759 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
760 panic("unmount: dangling vnode");
761 if (used_syncer)
762 mutex_exit(&syncer_mutex);
763 vfs_hooks_unmount(mp);
764 rw_exit(&mp->mnt_unmounting);
765 vfs_destroy(mp); /* reference from mount() */
766 if (coveredvp != NULLVP)
767 vrele(coveredvp);
768 return (0);
769 }
770
771 /*
772 * Sync each mounted filesystem.
773 */
774 #ifdef DEBUG
775 int syncprt = 0;
776 struct ctldebug debug0 = { "syncprt", &syncprt };
777 #endif
778
779 /* ARGSUSED */
780 int
781 sys_sync(struct lwp *l, const void *v, register_t *retval)
782 {
783 struct mount *mp, *nmp;
784 int asyncflag;
785
786 if (l == NULL)
787 l = &lwp0;
788
789 mutex_enter(&mountlist_lock);
790 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
791 mp = nmp) {
792 if (vfs_busy(mp, &nmp)) {
793 continue;
794 }
795 mutex_enter(&mp->mnt_updating);
796 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
797 asyncflag = mp->mnt_flag & MNT_ASYNC;
798 mp->mnt_flag &= ~MNT_ASYNC;
799 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
800 if (asyncflag)
801 mp->mnt_flag |= MNT_ASYNC;
802 }
803 mutex_exit(&mp->mnt_updating);
804 vfs_unbusy(mp, false, &nmp);
805 }
806 mutex_exit(&mountlist_lock);
807 #ifdef DEBUG
808 if (syncprt)
809 vfs_bufstats();
810 #endif /* DEBUG */
811 return (0);
812 }
813
814 /*
815 * Change filesystem quotas.
816 */
817 /* ARGSUSED */
818 int
819 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval)
820 {
821 /* {
822 syscallarg(const char *) path;
823 syscallarg(int) cmd;
824 syscallarg(int) uid;
825 syscallarg(void *) arg;
826 } */
827 struct mount *mp;
828 int error;
829 struct nameidata nd;
830
831 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
832 SCARG(uap, path));
833 if ((error = namei(&nd)) != 0)
834 return (error);
835 mp = nd.ni_vp->v_mount;
836 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
837 SCARG(uap, arg));
838 vrele(nd.ni_vp);
839 return (error);
840 }
841
842 int
843 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
844 int root)
845 {
846 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
847 int error = 0;
848
849 /*
850 * If MNT_NOWAIT or MNT_LAZY is specified, do not
851 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
852 * overrides MNT_NOWAIT.
853 */
854 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
855 (flags != MNT_WAIT && flags != 0)) {
856 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
857 goto done;
858 }
859
860 /* Get the filesystem stats now */
861 memset(sp, 0, sizeof(*sp));
862 if ((error = VFS_STATVFS(mp, sp)) != 0) {
863 return error;
864 }
865
866 if (cwdi->cwdi_rdir == NULL)
867 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
868 done:
869 if (cwdi->cwdi_rdir != NULL) {
870 size_t len;
871 char *bp;
872 char c;
873 char *path = PNBUF_GET();
874
875 bp = path + MAXPATHLEN;
876 *--bp = '\0';
877 rw_enter(&cwdi->cwdi_lock, RW_READER);
878 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
879 MAXPATHLEN / 2, 0, l);
880 rw_exit(&cwdi->cwdi_lock);
881 if (error) {
882 PNBUF_PUT(path);
883 return error;
884 }
885 len = strlen(bp);
886 if (len != 1) {
887 /*
888 * for mount points that are below our root, we can see
889 * them, so we fix up the pathname and return them. The
890 * rest we cannot see, so we don't allow viewing the
891 * data.
892 */
893 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
894 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
895 (void)strlcpy(sp->f_mntonname,
896 c == '\0' ? "/" : &sp->f_mntonname[len],
897 sizeof(sp->f_mntonname));
898 } else {
899 if (root)
900 (void)strlcpy(sp->f_mntonname, "/",
901 sizeof(sp->f_mntonname));
902 else
903 error = EPERM;
904 }
905 }
906 PNBUF_PUT(path);
907 }
908 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
909 return error;
910 }
911
912 /*
913 * Get filesystem statistics by path.
914 */
915 int
916 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
917 {
918 struct mount *mp;
919 int error;
920 struct nameidata nd;
921
922 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
923 if ((error = namei(&nd)) != 0)
924 return error;
925 mp = nd.ni_vp->v_mount;
926 error = dostatvfs(mp, sb, l, flags, 1);
927 vrele(nd.ni_vp);
928 return error;
929 }
930
931 /* ARGSUSED */
932 int
933 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
934 {
935 /* {
936 syscallarg(const char *) path;
937 syscallarg(struct statvfs *) buf;
938 syscallarg(int) flags;
939 } */
940 struct statvfs *sb;
941 int error;
942
943 sb = STATVFSBUF_GET();
944 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
945 if (error == 0)
946 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
947 STATVFSBUF_PUT(sb);
948 return error;
949 }
950
951 /*
952 * Get filesystem statistics by fd.
953 */
954 int
955 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
956 {
957 file_t *fp;
958 struct mount *mp;
959 int error;
960
961 /* fd_getvnode() will use the descriptor for us */
962 if ((error = fd_getvnode(fd, &fp)) != 0)
963 return (error);
964 mp = ((struct vnode *)fp->f_data)->v_mount;
965 error = dostatvfs(mp, sb, curlwp, flags, 1);
966 fd_putfile(fd);
967 return error;
968 }
969
970 /* ARGSUSED */
971 int
972 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
973 {
974 /* {
975 syscallarg(int) fd;
976 syscallarg(struct statvfs *) buf;
977 syscallarg(int) flags;
978 } */
979 struct statvfs *sb;
980 int error;
981
982 sb = STATVFSBUF_GET();
983 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
984 if (error == 0)
985 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
986 STATVFSBUF_PUT(sb);
987 return error;
988 }
989
990
991 /*
992 * Get statistics on all filesystems.
993 */
994 int
995 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
996 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
997 register_t *retval)
998 {
999 int root = 0;
1000 struct proc *p = l->l_proc;
1001 struct mount *mp, *nmp;
1002 struct statvfs *sb;
1003 size_t count, maxcount;
1004 int error = 0;
1005
1006 sb = STATVFSBUF_GET();
1007 maxcount = bufsize / entry_sz;
1008 mutex_enter(&mountlist_lock);
1009 count = 0;
1010 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1011 mp = nmp) {
1012 if (vfs_busy(mp, &nmp)) {
1013 continue;
1014 }
1015 if (sfsp && count < maxcount) {
1016 error = dostatvfs(mp, sb, l, flags, 0);
1017 if (error) {
1018 vfs_unbusy(mp, false, &nmp);
1019 error = 0;
1020 continue;
1021 }
1022 error = copyfn(sb, sfsp, entry_sz);
1023 if (error) {
1024 vfs_unbusy(mp, false, NULL);
1025 goto out;
1026 }
1027 sfsp = (char *)sfsp + entry_sz;
1028 root |= strcmp(sb->f_mntonname, "/") == 0;
1029 }
1030 count++;
1031 vfs_unbusy(mp, false, &nmp);
1032 }
1033 mutex_exit(&mountlist_lock);
1034
1035 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1036 /*
1037 * fake a root entry
1038 */
1039 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1040 sb, l, flags, 1);
1041 if (error != 0)
1042 goto out;
1043 if (sfsp) {
1044 error = copyfn(sb, sfsp, entry_sz);
1045 if (error != 0)
1046 goto out;
1047 }
1048 count++;
1049 }
1050 if (sfsp && count > maxcount)
1051 *retval = maxcount;
1052 else
1053 *retval = count;
1054 out:
1055 STATVFSBUF_PUT(sb);
1056 return error;
1057 }
1058
1059 int
1060 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1061 {
1062 /* {
1063 syscallarg(struct statvfs *) buf;
1064 syscallarg(size_t) bufsize;
1065 syscallarg(int) flags;
1066 } */
1067
1068 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1069 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1070 }
1071
1072 /*
1073 * Change current working directory to a given file descriptor.
1074 */
1075 /* ARGSUSED */
1076 int
1077 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1078 {
1079 /* {
1080 syscallarg(int) fd;
1081 } */
1082 struct proc *p = l->l_proc;
1083 struct cwdinfo *cwdi;
1084 struct vnode *vp, *tdp;
1085 struct mount *mp;
1086 file_t *fp;
1087 int error, fd;
1088
1089 /* fd_getvnode() will use the descriptor for us */
1090 fd = SCARG(uap, fd);
1091 if ((error = fd_getvnode(fd, &fp)) != 0)
1092 return (error);
1093 vp = fp->f_data;
1094
1095 VREF(vp);
1096 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1097 if (vp->v_type != VDIR)
1098 error = ENOTDIR;
1099 else
1100 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1101 if (error) {
1102 vput(vp);
1103 goto out;
1104 }
1105 while ((mp = vp->v_mountedhere) != NULL) {
1106 error = vfs_busy(mp, NULL);
1107 vput(vp);
1108 if (error != 0)
1109 goto out;
1110 error = VFS_ROOT(mp, &tdp);
1111 vfs_unbusy(mp, false, NULL);
1112 if (error)
1113 goto out;
1114 vp = tdp;
1115 }
1116 VOP_UNLOCK(vp, 0);
1117
1118 /*
1119 * Disallow changing to a directory not under the process's
1120 * current root directory (if there is one).
1121 */
1122 cwdi = p->p_cwdi;
1123 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1124 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1125 vrele(vp);
1126 error = EPERM; /* operation not permitted */
1127 } else {
1128 vrele(cwdi->cwdi_cdir);
1129 cwdi->cwdi_cdir = vp;
1130 }
1131 rw_exit(&cwdi->cwdi_lock);
1132
1133 out:
1134 fd_putfile(fd);
1135 return (error);
1136 }
1137
1138 /*
1139 * Change this process's notion of the root directory to a given file
1140 * descriptor.
1141 */
1142 int
1143 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1144 {
1145 struct proc *p = l->l_proc;
1146 struct cwdinfo *cwdi;
1147 struct vnode *vp;
1148 file_t *fp;
1149 int error, fd = SCARG(uap, fd);
1150
1151 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1152 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1153 return error;
1154 /* fd_getvnode() will use the descriptor for us */
1155 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
1156 return error;
1157 vp = fp->f_data;
1158 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1159 if (vp->v_type != VDIR)
1160 error = ENOTDIR;
1161 else
1162 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1163 VOP_UNLOCK(vp, 0);
1164 if (error)
1165 goto out;
1166 VREF(vp);
1167
1168 /*
1169 * Prevent escaping from chroot by putting the root under
1170 * the working directory. Silently chdir to / if we aren't
1171 * already there.
1172 */
1173 cwdi = p->p_cwdi;
1174 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1175 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1176 /*
1177 * XXX would be more failsafe to change directory to a
1178 * deadfs node here instead
1179 */
1180 vrele(cwdi->cwdi_cdir);
1181 VREF(vp);
1182 cwdi->cwdi_cdir = vp;
1183 }
1184
1185 if (cwdi->cwdi_rdir != NULL)
1186 vrele(cwdi->cwdi_rdir);
1187 cwdi->cwdi_rdir = vp;
1188 rw_exit(&cwdi->cwdi_lock);
1189
1190 out:
1191 fd_putfile(fd);
1192 return (error);
1193 }
1194
1195 /*
1196 * Change current working directory (``.'').
1197 */
1198 /* ARGSUSED */
1199 int
1200 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1201 {
1202 /* {
1203 syscallarg(const char *) path;
1204 } */
1205 struct proc *p = l->l_proc;
1206 struct cwdinfo *cwdi;
1207 int error;
1208 struct nameidata nd;
1209
1210 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1211 SCARG(uap, path));
1212 if ((error = change_dir(&nd, l)) != 0)
1213 return (error);
1214 cwdi = p->p_cwdi;
1215 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1216 vrele(cwdi->cwdi_cdir);
1217 cwdi->cwdi_cdir = nd.ni_vp;
1218 rw_exit(&cwdi->cwdi_lock);
1219 return (0);
1220 }
1221
1222 /*
1223 * Change notion of root (``/'') directory.
1224 */
1225 /* ARGSUSED */
1226 int
1227 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1228 {
1229 /* {
1230 syscallarg(const char *) path;
1231 } */
1232 struct proc *p = l->l_proc;
1233 struct cwdinfo *cwdi;
1234 struct vnode *vp;
1235 int error;
1236 struct nameidata nd;
1237
1238 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1239 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1240 return (error);
1241 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1242 SCARG(uap, path));
1243 if ((error = change_dir(&nd, l)) != 0)
1244 return (error);
1245
1246 cwdi = p->p_cwdi;
1247 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1248 if (cwdi->cwdi_rdir != NULL)
1249 vrele(cwdi->cwdi_rdir);
1250 vp = nd.ni_vp;
1251 cwdi->cwdi_rdir = vp;
1252
1253 /*
1254 * Prevent escaping from chroot by putting the root under
1255 * the working directory. Silently chdir to / if we aren't
1256 * already there.
1257 */
1258 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1259 /*
1260 * XXX would be more failsafe to change directory to a
1261 * deadfs node here instead
1262 */
1263 vrele(cwdi->cwdi_cdir);
1264 VREF(vp);
1265 cwdi->cwdi_cdir = vp;
1266 }
1267 rw_exit(&cwdi->cwdi_lock);
1268
1269 return (0);
1270 }
1271
1272 /*
1273 * Common routine for chroot and chdir.
1274 */
1275 static int
1276 change_dir(struct nameidata *ndp, struct lwp *l)
1277 {
1278 struct vnode *vp;
1279 int error;
1280
1281 if ((error = namei(ndp)) != 0)
1282 return (error);
1283 vp = ndp->ni_vp;
1284 if (vp->v_type != VDIR)
1285 error = ENOTDIR;
1286 else
1287 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1288
1289 if (error)
1290 vput(vp);
1291 else
1292 VOP_UNLOCK(vp, 0);
1293 return (error);
1294 }
1295
1296 /*
1297 * Check permissions, allocate an open file structure,
1298 * and call the device open routine if any.
1299 */
1300 int
1301 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1302 {
1303 /* {
1304 syscallarg(const char *) path;
1305 syscallarg(int) flags;
1306 syscallarg(int) mode;
1307 } */
1308 struct proc *p = l->l_proc;
1309 struct cwdinfo *cwdi = p->p_cwdi;
1310 file_t *fp;
1311 struct vnode *vp;
1312 int flags, cmode;
1313 int type, indx, error;
1314 struct flock lf;
1315 struct nameidata nd;
1316
1317 flags = FFLAGS(SCARG(uap, flags));
1318 if ((flags & (FREAD | FWRITE)) == 0)
1319 return (EINVAL);
1320 if ((error = fd_allocfile(&fp, &indx)) != 0)
1321 return (error);
1322 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1323 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1324 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
1325 SCARG(uap, path));
1326 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1327 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1328 fd_abort(p, fp, indx);
1329 if ((error == EDUPFD || error == EMOVEFD) &&
1330 l->l_dupfd >= 0 && /* XXX from fdopen */
1331 (error =
1332 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1333 *retval = indx;
1334 return (0);
1335 }
1336 if (error == ERESTART)
1337 error = EINTR;
1338 return (error);
1339 }
1340
1341 l->l_dupfd = 0;
1342 vp = nd.ni_vp;
1343 fp->f_flag = flags & FMASK;
1344 fp->f_type = DTYPE_VNODE;
1345 fp->f_ops = &vnops;
1346 fp->f_data = vp;
1347 if (flags & (O_EXLOCK | O_SHLOCK)) {
1348 lf.l_whence = SEEK_SET;
1349 lf.l_start = 0;
1350 lf.l_len = 0;
1351 if (flags & O_EXLOCK)
1352 lf.l_type = F_WRLCK;
1353 else
1354 lf.l_type = F_RDLCK;
1355 type = F_FLOCK;
1356 if ((flags & FNONBLOCK) == 0)
1357 type |= F_WAIT;
1358 VOP_UNLOCK(vp, 0);
1359 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1360 if (error) {
1361 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1362 fd_abort(p, fp, indx);
1363 return (error);
1364 }
1365 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1366 atomic_or_uint(&fp->f_flag, FHASLOCK);
1367 }
1368 VOP_UNLOCK(vp, 0);
1369 *retval = indx;
1370 fd_affix(p, fp, indx);
1371 return (0);
1372 }
1373
1374 static void
1375 vfs__fhfree(fhandle_t *fhp)
1376 {
1377 size_t fhsize;
1378
1379 if (fhp == NULL) {
1380 return;
1381 }
1382 fhsize = FHANDLE_SIZE(fhp);
1383 kmem_free(fhp, fhsize);
1384 }
1385
1386 /*
1387 * vfs_composefh: compose a filehandle.
1388 */
1389
1390 int
1391 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1392 {
1393 struct mount *mp;
1394 struct fid *fidp;
1395 int error;
1396 size_t needfhsize;
1397 size_t fidsize;
1398
1399 mp = vp->v_mount;
1400 fidp = NULL;
1401 if (*fh_size < FHANDLE_SIZE_MIN) {
1402 fidsize = 0;
1403 } else {
1404 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1405 if (fhp != NULL) {
1406 memset(fhp, 0, *fh_size);
1407 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1408 fidp = &fhp->fh_fid;
1409 }
1410 }
1411 error = VFS_VPTOFH(vp, fidp, &fidsize);
1412 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1413 if (error == 0 && *fh_size < needfhsize) {
1414 error = E2BIG;
1415 }
1416 *fh_size = needfhsize;
1417 return error;
1418 }
1419
1420 int
1421 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1422 {
1423 struct mount *mp;
1424 fhandle_t *fhp;
1425 size_t fhsize;
1426 size_t fidsize;
1427 int error;
1428
1429 *fhpp = NULL;
1430 mp = vp->v_mount;
1431 fidsize = 0;
1432 error = VFS_VPTOFH(vp, NULL, &fidsize);
1433 KASSERT(error != 0);
1434 if (error != E2BIG) {
1435 goto out;
1436 }
1437 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1438 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1439 if (fhp == NULL) {
1440 error = ENOMEM;
1441 goto out;
1442 }
1443 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1444 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1445 if (error == 0) {
1446 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1447 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1448 *fhpp = fhp;
1449 } else {
1450 kmem_free(fhp, fhsize);
1451 }
1452 out:
1453 return error;
1454 }
1455
1456 void
1457 vfs_composefh_free(fhandle_t *fhp)
1458 {
1459
1460 vfs__fhfree(fhp);
1461 }
1462
1463 /*
1464 * vfs_fhtovp: lookup a vnode by a filehandle.
1465 */
1466
1467 int
1468 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1469 {
1470 struct mount *mp;
1471 int error;
1472
1473 *vpp = NULL;
1474 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1475 if (mp == NULL) {
1476 error = ESTALE;
1477 goto out;
1478 }
1479 if (mp->mnt_op->vfs_fhtovp == NULL) {
1480 error = EOPNOTSUPP;
1481 goto out;
1482 }
1483 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1484 out:
1485 return error;
1486 }
1487
1488 /*
1489 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1490 * the needed size.
1491 */
1492
1493 int
1494 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1495 {
1496 fhandle_t *fhp;
1497 int error;
1498
1499 *fhpp = NULL;
1500 if (fhsize > FHANDLE_SIZE_MAX) {
1501 return EINVAL;
1502 }
1503 if (fhsize < FHANDLE_SIZE_MIN) {
1504 return EINVAL;
1505 }
1506 again:
1507 fhp = kmem_alloc(fhsize, KM_SLEEP);
1508 if (fhp == NULL) {
1509 return ENOMEM;
1510 }
1511 error = copyin(ufhp, fhp, fhsize);
1512 if (error == 0) {
1513 /* XXX this check shouldn't be here */
1514 if (FHANDLE_SIZE(fhp) == fhsize) {
1515 *fhpp = fhp;
1516 return 0;
1517 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1518 /*
1519 * a kludge for nfsv2 padded handles.
1520 */
1521 size_t sz;
1522
1523 sz = FHANDLE_SIZE(fhp);
1524 kmem_free(fhp, fhsize);
1525 fhsize = sz;
1526 goto again;
1527 } else {
1528 /*
1529 * userland told us wrong size.
1530 */
1531 error = EINVAL;
1532 }
1533 }
1534 kmem_free(fhp, fhsize);
1535 return error;
1536 }
1537
1538 void
1539 vfs_copyinfh_free(fhandle_t *fhp)
1540 {
1541
1542 vfs__fhfree(fhp);
1543 }
1544
1545 /*
1546 * Get file handle system call
1547 */
1548 int
1549 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1550 {
1551 /* {
1552 syscallarg(char *) fname;
1553 syscallarg(fhandle_t *) fhp;
1554 syscallarg(size_t *) fh_size;
1555 } */
1556 struct vnode *vp;
1557 fhandle_t *fh;
1558 int error;
1559 struct nameidata nd;
1560 size_t sz;
1561 size_t usz;
1562
1563 /*
1564 * Must be super user
1565 */
1566 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1567 0, NULL, NULL, NULL);
1568 if (error)
1569 return (error);
1570 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1571 SCARG(uap, fname));
1572 error = namei(&nd);
1573 if (error)
1574 return (error);
1575 vp = nd.ni_vp;
1576 error = vfs_composefh_alloc(vp, &fh);
1577 vput(vp);
1578 if (error != 0) {
1579 goto out;
1580 }
1581 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1582 if (error != 0) {
1583 goto out;
1584 }
1585 sz = FHANDLE_SIZE(fh);
1586 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1587 if (error != 0) {
1588 goto out;
1589 }
1590 if (usz >= sz) {
1591 error = copyout(fh, SCARG(uap, fhp), sz);
1592 } else {
1593 error = E2BIG;
1594 }
1595 out:
1596 vfs_composefh_free(fh);
1597 return (error);
1598 }
1599
1600 /*
1601 * Open a file given a file handle.
1602 *
1603 * Check permissions, allocate an open file structure,
1604 * and call the device open routine if any.
1605 */
1606
1607 int
1608 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1609 register_t *retval)
1610 {
1611 file_t *fp;
1612 struct vnode *vp = NULL;
1613 kauth_cred_t cred = l->l_cred;
1614 file_t *nfp;
1615 int type, indx, error=0;
1616 struct flock lf;
1617 struct vattr va;
1618 fhandle_t *fh;
1619 int flags;
1620 proc_t *p;
1621
1622 p = curproc;
1623
1624 /*
1625 * Must be super user
1626 */
1627 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1628 0, NULL, NULL, NULL)))
1629 return (error);
1630
1631 flags = FFLAGS(oflags);
1632 if ((flags & (FREAD | FWRITE)) == 0)
1633 return (EINVAL);
1634 if ((flags & O_CREAT))
1635 return (EINVAL);
1636 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1637 return (error);
1638 fp = nfp;
1639 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1640 if (error != 0) {
1641 goto bad;
1642 }
1643 error = vfs_fhtovp(fh, &vp);
1644 if (error != 0) {
1645 goto bad;
1646 }
1647
1648 /* Now do an effective vn_open */
1649
1650 if (vp->v_type == VSOCK) {
1651 error = EOPNOTSUPP;
1652 goto bad;
1653 }
1654 error = vn_openchk(vp, cred, flags);
1655 if (error != 0)
1656 goto bad;
1657 if (flags & O_TRUNC) {
1658 VOP_UNLOCK(vp, 0); /* XXX */
1659 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1660 VATTR_NULL(&va);
1661 va.va_size = 0;
1662 error = VOP_SETATTR(vp, &va, cred);
1663 if (error)
1664 goto bad;
1665 }
1666 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
1667 goto bad;
1668 if (flags & FWRITE) {
1669 mutex_enter(&vp->v_interlock);
1670 vp->v_writecount++;
1671 mutex_exit(&vp->v_interlock);
1672 }
1673
1674 /* done with modified vn_open, now finish what sys_open does. */
1675
1676 fp->f_flag = flags & FMASK;
1677 fp->f_type = DTYPE_VNODE;
1678 fp->f_ops = &vnops;
1679 fp->f_data = vp;
1680 if (flags & (O_EXLOCK | O_SHLOCK)) {
1681 lf.l_whence = SEEK_SET;
1682 lf.l_start = 0;
1683 lf.l_len = 0;
1684 if (flags & O_EXLOCK)
1685 lf.l_type = F_WRLCK;
1686 else
1687 lf.l_type = F_RDLCK;
1688 type = F_FLOCK;
1689 if ((flags & FNONBLOCK) == 0)
1690 type |= F_WAIT;
1691 VOP_UNLOCK(vp, 0);
1692 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1693 if (error) {
1694 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1695 fd_abort(p, fp, indx);
1696 return (error);
1697 }
1698 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1699 atomic_or_uint(&fp->f_flag, FHASLOCK);
1700 }
1701 VOP_UNLOCK(vp, 0);
1702 *retval = indx;
1703 fd_affix(p, fp, indx);
1704 vfs_copyinfh_free(fh);
1705 return (0);
1706
1707 bad:
1708 fd_abort(p, fp, indx);
1709 if (vp != NULL)
1710 vput(vp);
1711 vfs_copyinfh_free(fh);
1712 return (error);
1713 }
1714
1715 int
1716 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
1717 {
1718 /* {
1719 syscallarg(const void *) fhp;
1720 syscallarg(size_t) fh_size;
1721 syscallarg(int) flags;
1722 } */
1723
1724 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1725 SCARG(uap, flags), retval);
1726 }
1727
1728 int
1729 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
1730 {
1731 int error;
1732 fhandle_t *fh;
1733 struct vnode *vp;
1734
1735 /*
1736 * Must be super user
1737 */
1738 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1739 0, NULL, NULL, NULL)))
1740 return (error);
1741
1742 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1743 if (error != 0)
1744 return error;
1745
1746 error = vfs_fhtovp(fh, &vp);
1747 vfs_copyinfh_free(fh);
1748 if (error != 0)
1749 return error;
1750
1751 error = vn_stat(vp, sb);
1752 vput(vp);
1753 return error;
1754 }
1755
1756
1757 /* ARGSUSED */
1758 int
1759 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval)
1760 {
1761 /* {
1762 syscallarg(const void *) fhp;
1763 syscallarg(size_t) fh_size;
1764 syscallarg(struct stat *) sb;
1765 } */
1766 struct stat sb;
1767 int error;
1768
1769 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
1770 if (error)
1771 return error;
1772 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
1773 }
1774
1775 int
1776 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
1777 int flags)
1778 {
1779 fhandle_t *fh;
1780 struct mount *mp;
1781 struct vnode *vp;
1782 int error;
1783
1784 /*
1785 * Must be super user
1786 */
1787 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1788 0, NULL, NULL, NULL)))
1789 return error;
1790
1791 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1792 if (error != 0)
1793 return error;
1794
1795 error = vfs_fhtovp(fh, &vp);
1796 vfs_copyinfh_free(fh);
1797 if (error != 0)
1798 return error;
1799
1800 mp = vp->v_mount;
1801 error = dostatvfs(mp, sb, l, flags, 1);
1802 vput(vp);
1803 return error;
1804 }
1805
1806 /* ARGSUSED */
1807 int
1808 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
1809 {
1810 /* {
1811 syscallarg(const void *) fhp;
1812 syscallarg(size_t) fh_size;
1813 syscallarg(struct statvfs *) buf;
1814 syscallarg(int) flags;
1815 } */
1816 struct statvfs *sb = STATVFSBUF_GET();
1817 int error;
1818
1819 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
1820 SCARG(uap, flags));
1821 if (error == 0)
1822 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1823 STATVFSBUF_PUT(sb);
1824 return error;
1825 }
1826
1827 /*
1828 * Create a special file.
1829 */
1830 /* ARGSUSED */
1831 int
1832 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
1833 register_t *retval)
1834 {
1835 /* {
1836 syscallarg(const char *) path;
1837 syscallarg(mode_t) mode;
1838 syscallarg(dev_t) dev;
1839 } */
1840 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode),
1841 SCARG(uap, dev), retval);
1842 }
1843
1844 int
1845 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
1846 register_t *retval)
1847 {
1848 struct proc *p = l->l_proc;
1849 struct vnode *vp;
1850 struct vattr vattr;
1851 int error, optype;
1852 struct nameidata nd;
1853 char *path;
1854 const char *cpath;
1855 enum uio_seg seg = UIO_USERSPACE;
1856
1857 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1858 0, NULL, NULL, NULL)) != 0)
1859 return (error);
1860
1861 optype = VOP_MKNOD_DESCOFFSET;
1862
1863 VERIEXEC_PATH_GET(pathname, seg, cpath, path);
1864 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath);
1865
1866 if ((error = namei(&nd)) != 0)
1867 goto out;
1868 vp = nd.ni_vp;
1869 if (vp != NULL)
1870 error = EEXIST;
1871 else {
1872 VATTR_NULL(&vattr);
1873 /* We will read cwdi->cwdi_cmask unlocked. */
1874 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1875 vattr.va_rdev = dev;
1876
1877 switch (mode & S_IFMT) {
1878 case S_IFMT: /* used by badsect to flag bad sectors */
1879 vattr.va_type = VBAD;
1880 break;
1881 case S_IFCHR:
1882 vattr.va_type = VCHR;
1883 break;
1884 case S_IFBLK:
1885 vattr.va_type = VBLK;
1886 break;
1887 case S_IFWHT:
1888 optype = VOP_WHITEOUT_DESCOFFSET;
1889 break;
1890 case S_IFREG:
1891 #if NVERIEXEC > 0
1892 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp,
1893 O_CREAT);
1894 #endif /* NVERIEXEC > 0 */
1895 vattr.va_type = VREG;
1896 vattr.va_rdev = VNOVAL;
1897 optype = VOP_CREATE_DESCOFFSET;
1898 break;
1899 default:
1900 error = EINVAL;
1901 break;
1902 }
1903 }
1904 if (!error) {
1905 switch (optype) {
1906 case VOP_WHITEOUT_DESCOFFSET:
1907 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1908 if (error)
1909 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1910 vput(nd.ni_dvp);
1911 break;
1912
1913 case VOP_MKNOD_DESCOFFSET:
1914 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1915 &nd.ni_cnd, &vattr);
1916 if (error == 0)
1917 vput(nd.ni_vp);
1918 break;
1919
1920 case VOP_CREATE_DESCOFFSET:
1921 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
1922 &nd.ni_cnd, &vattr);
1923 if (error == 0)
1924 vput(nd.ni_vp);
1925 break;
1926 }
1927 } else {
1928 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1929 if (nd.ni_dvp == vp)
1930 vrele(nd.ni_dvp);
1931 else
1932 vput(nd.ni_dvp);
1933 if (vp)
1934 vrele(vp);
1935 }
1936 out:
1937 VERIEXEC_PATH_PUT(path);
1938 return (error);
1939 }
1940
1941 /*
1942 * Create a named pipe.
1943 */
1944 /* ARGSUSED */
1945 int
1946 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
1947 {
1948 /* {
1949 syscallarg(const char *) path;
1950 syscallarg(int) mode;
1951 } */
1952 struct proc *p = l->l_proc;
1953 struct vattr vattr;
1954 int error;
1955 struct nameidata nd;
1956
1957 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
1958 SCARG(uap, path));
1959 if ((error = namei(&nd)) != 0)
1960 return (error);
1961 if (nd.ni_vp != NULL) {
1962 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1963 if (nd.ni_dvp == nd.ni_vp)
1964 vrele(nd.ni_dvp);
1965 else
1966 vput(nd.ni_dvp);
1967 vrele(nd.ni_vp);
1968 return (EEXIST);
1969 }
1970 VATTR_NULL(&vattr);
1971 vattr.va_type = VFIFO;
1972 /* We will read cwdi->cwdi_cmask unlocked. */
1973 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1974 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1975 if (error == 0)
1976 vput(nd.ni_vp);
1977 return (error);
1978 }
1979
1980 /*
1981 * Make a hard file link.
1982 */
1983 /* ARGSUSED */
1984 int
1985 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
1986 {
1987 /* {
1988 syscallarg(const char *) path;
1989 syscallarg(const char *) link;
1990 } */
1991 struct vnode *vp;
1992 struct nameidata nd;
1993 int error;
1994
1995 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
1996 SCARG(uap, path));
1997 if ((error = namei(&nd)) != 0)
1998 return (error);
1999 vp = nd.ni_vp;
2000 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
2001 SCARG(uap, link));
2002 if ((error = namei(&nd)) != 0)
2003 goto out;
2004 if (nd.ni_vp) {
2005 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2006 if (nd.ni_dvp == nd.ni_vp)
2007 vrele(nd.ni_dvp);
2008 else
2009 vput(nd.ni_dvp);
2010 vrele(nd.ni_vp);
2011 error = EEXIST;
2012 goto out;
2013 }
2014 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2015 out:
2016 vrele(vp);
2017 return (error);
2018 }
2019
2020 /*
2021 * Make a symbolic link.
2022 */
2023 /* ARGSUSED */
2024 int
2025 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2026 {
2027 /* {
2028 syscallarg(const char *) path;
2029 syscallarg(const char *) link;
2030 } */
2031 struct proc *p = l->l_proc;
2032 struct vattr vattr;
2033 char *path;
2034 int error;
2035 struct nameidata nd;
2036
2037 path = PNBUF_GET();
2038 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
2039 if (error)
2040 goto out;
2041 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
2042 SCARG(uap, link));
2043 if ((error = namei(&nd)) != 0)
2044 goto out;
2045 if (nd.ni_vp) {
2046 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2047 if (nd.ni_dvp == nd.ni_vp)
2048 vrele(nd.ni_dvp);
2049 else
2050 vput(nd.ni_dvp);
2051 vrele(nd.ni_vp);
2052 error = EEXIST;
2053 goto out;
2054 }
2055 VATTR_NULL(&vattr);
2056 vattr.va_type = VLNK;
2057 /* We will read cwdi->cwdi_cmask unlocked. */
2058 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2059 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2060 if (error == 0)
2061 vput(nd.ni_vp);
2062 out:
2063 PNBUF_PUT(path);
2064 return (error);
2065 }
2066
2067 /*
2068 * Delete a whiteout from the filesystem.
2069 */
2070 /* ARGSUSED */
2071 int
2072 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2073 {
2074 /* {
2075 syscallarg(const char *) path;
2076 } */
2077 int error;
2078 struct nameidata nd;
2079
2080 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT,
2081 UIO_USERSPACE, SCARG(uap, path));
2082 error = namei(&nd);
2083 if (error)
2084 return (error);
2085
2086 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2087 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2088 if (nd.ni_dvp == nd.ni_vp)
2089 vrele(nd.ni_dvp);
2090 else
2091 vput(nd.ni_dvp);
2092 if (nd.ni_vp)
2093 vrele(nd.ni_vp);
2094 return (EEXIST);
2095 }
2096 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2097 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2098 vput(nd.ni_dvp);
2099 return (error);
2100 }
2101
2102 /*
2103 * Delete a name from the filesystem.
2104 */
2105 /* ARGSUSED */
2106 int
2107 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2108 {
2109 /* {
2110 syscallarg(const char *) path;
2111 } */
2112
2113 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE);
2114 }
2115
2116 int
2117 do_sys_unlink(const char *arg, enum uio_seg seg)
2118 {
2119 struct vnode *vp;
2120 int error;
2121 struct nameidata nd;
2122 kauth_cred_t cred;
2123 char *path;
2124 const char *cpath;
2125
2126 VERIEXEC_PATH_GET(arg, seg, cpath, path);
2127 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath);
2128
2129 if ((error = namei(&nd)) != 0)
2130 goto out;
2131 vp = nd.ni_vp;
2132
2133 /*
2134 * The root of a mounted filesystem cannot be deleted.
2135 */
2136 if (vp->v_vflag & VV_ROOT) {
2137 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2138 if (nd.ni_dvp == vp)
2139 vrele(nd.ni_dvp);
2140 else
2141 vput(nd.ni_dvp);
2142 vput(vp);
2143 error = EBUSY;
2144 goto out;
2145 }
2146
2147 #if NVERIEXEC > 0
2148 /* Handle remove requests for veriexec entries. */
2149 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) {
2150 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2151 if (nd.ni_dvp == vp)
2152 vrele(nd.ni_dvp);
2153 else
2154 vput(nd.ni_dvp);
2155 vput(vp);
2156 goto out;
2157 }
2158 #endif /* NVERIEXEC > 0 */
2159
2160 cred = kauth_cred_get();
2161 #ifdef FILEASSOC
2162 (void)fileassoc_file_delete(vp);
2163 #endif /* FILEASSOC */
2164 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2165 out:
2166 VERIEXEC_PATH_PUT(path);
2167 return (error);
2168 }
2169
2170 /*
2171 * Reposition read/write file offset.
2172 */
2173 int
2174 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2175 {
2176 /* {
2177 syscallarg(int) fd;
2178 syscallarg(int) pad;
2179 syscallarg(off_t) offset;
2180 syscallarg(int) whence;
2181 } */
2182 kauth_cred_t cred = l->l_cred;
2183 file_t *fp;
2184 struct vnode *vp;
2185 struct vattr vattr;
2186 off_t newoff;
2187 int error, fd;
2188
2189 fd = SCARG(uap, fd);
2190
2191 if ((fp = fd_getfile(fd)) == NULL)
2192 return (EBADF);
2193
2194 vp = fp->f_data;
2195 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2196 error = ESPIPE;
2197 goto out;
2198 }
2199
2200 switch (SCARG(uap, whence)) {
2201 case SEEK_CUR:
2202 newoff = fp->f_offset + SCARG(uap, offset);
2203 break;
2204 case SEEK_END:
2205 error = VOP_GETATTR(vp, &vattr, cred);
2206 if (error) {
2207 goto out;
2208 }
2209 newoff = SCARG(uap, offset) + vattr.va_size;
2210 break;
2211 case SEEK_SET:
2212 newoff = SCARG(uap, offset);
2213 break;
2214 default:
2215 error = EINVAL;
2216 goto out;
2217 }
2218 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2219 *(off_t *)retval = fp->f_offset = newoff;
2220 }
2221 out:
2222 fd_putfile(fd);
2223 return (error);
2224 }
2225
2226 /*
2227 * Positional read system call.
2228 */
2229 int
2230 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2231 {
2232 /* {
2233 syscallarg(int) fd;
2234 syscallarg(void *) buf;
2235 syscallarg(size_t) nbyte;
2236 syscallarg(off_t) offset;
2237 } */
2238 file_t *fp;
2239 struct vnode *vp;
2240 off_t offset;
2241 int error, fd = SCARG(uap, fd);
2242
2243 if ((fp = fd_getfile(fd)) == NULL)
2244 return (EBADF);
2245
2246 if ((fp->f_flag & FREAD) == 0) {
2247 fd_putfile(fd);
2248 return (EBADF);
2249 }
2250
2251 vp = fp->f_data;
2252 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2253 error = ESPIPE;
2254 goto out;
2255 }
2256
2257 offset = SCARG(uap, offset);
2258
2259 /*
2260 * XXX This works because no file systems actually
2261 * XXX take any action on the seek operation.
2262 */
2263 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2264 goto out;
2265
2266 /* dofileread() will unuse the descriptor for us */
2267 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2268 &offset, 0, retval));
2269
2270 out:
2271 fd_putfile(fd);
2272 return (error);
2273 }
2274
2275 /*
2276 * Positional scatter read system call.
2277 */
2278 int
2279 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2280 {
2281 /* {
2282 syscallarg(int) fd;
2283 syscallarg(const struct iovec *) iovp;
2284 syscallarg(int) iovcnt;
2285 syscallarg(off_t) offset;
2286 } */
2287 off_t offset = SCARG(uap, offset);
2288
2289 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2290 SCARG(uap, iovcnt), &offset, 0, retval);
2291 }
2292
2293 /*
2294 * Positional write system call.
2295 */
2296 int
2297 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2298 {
2299 /* {
2300 syscallarg(int) fd;
2301 syscallarg(const void *) buf;
2302 syscallarg(size_t) nbyte;
2303 syscallarg(off_t) offset;
2304 } */
2305 file_t *fp;
2306 struct vnode *vp;
2307 off_t offset;
2308 int error, fd = SCARG(uap, fd);
2309
2310 if ((fp = fd_getfile(fd)) == NULL)
2311 return (EBADF);
2312
2313 if ((fp->f_flag & FWRITE) == 0) {
2314 fd_putfile(fd);
2315 return (EBADF);
2316 }
2317
2318 vp = fp->f_data;
2319 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2320 error = ESPIPE;
2321 goto out;
2322 }
2323
2324 offset = SCARG(uap, offset);
2325
2326 /*
2327 * XXX This works because no file systems actually
2328 * XXX take any action on the seek operation.
2329 */
2330 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2331 goto out;
2332
2333 /* dofilewrite() will unuse the descriptor for us */
2334 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2335 &offset, 0, retval));
2336
2337 out:
2338 fd_putfile(fd);
2339 return (error);
2340 }
2341
2342 /*
2343 * Positional gather write system call.
2344 */
2345 int
2346 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2347 {
2348 /* {
2349 syscallarg(int) fd;
2350 syscallarg(const struct iovec *) iovp;
2351 syscallarg(int) iovcnt;
2352 syscallarg(off_t) offset;
2353 } */
2354 off_t offset = SCARG(uap, offset);
2355
2356 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2357 SCARG(uap, iovcnt), &offset, 0, retval);
2358 }
2359
2360 /*
2361 * Check access permissions.
2362 */
2363 int
2364 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2365 {
2366 /* {
2367 syscallarg(const char *) path;
2368 syscallarg(int) flags;
2369 } */
2370 kauth_cred_t cred;
2371 struct vnode *vp;
2372 int error, flags;
2373 struct nameidata nd;
2374
2375 cred = kauth_cred_dup(l->l_cred);
2376 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2377 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2378 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2379 SCARG(uap, path));
2380 /* Override default credentials */
2381 nd.ni_cnd.cn_cred = cred;
2382 if ((error = namei(&nd)) != 0)
2383 goto out;
2384 vp = nd.ni_vp;
2385
2386 /* Flags == 0 means only check for existence. */
2387 if (SCARG(uap, flags)) {
2388 flags = 0;
2389 if (SCARG(uap, flags) & R_OK)
2390 flags |= VREAD;
2391 if (SCARG(uap, flags) & W_OK)
2392 flags |= VWRITE;
2393 if (SCARG(uap, flags) & X_OK)
2394 flags |= VEXEC;
2395
2396 error = VOP_ACCESS(vp, flags, cred);
2397 if (!error && (flags & VWRITE))
2398 error = vn_writechk(vp);
2399 }
2400 vput(vp);
2401 out:
2402 kauth_cred_free(cred);
2403 return (error);
2404 }
2405
2406 /*
2407 * Common code for all sys_stat functions, including compat versions.
2408 */
2409 int
2410 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb)
2411 {
2412 int error;
2413 struct nameidata nd;
2414
2415 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT,
2416 UIO_USERSPACE, path);
2417 error = namei(&nd);
2418 if (error != 0)
2419 return error;
2420 error = vn_stat(nd.ni_vp, sb);
2421 vput(nd.ni_vp);
2422 return error;
2423 }
2424
2425 /*
2426 * Get file status; this version follows links.
2427 */
2428 /* ARGSUSED */
2429 int
2430 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval)
2431 {
2432 /* {
2433 syscallarg(const char *) path;
2434 syscallarg(struct stat *) ub;
2435 } */
2436 struct stat sb;
2437 int error;
2438
2439 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb);
2440 if (error)
2441 return error;
2442 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2443 }
2444
2445 /*
2446 * Get file status; this version does not follow links.
2447 */
2448 /* ARGSUSED */
2449 int
2450 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval)
2451 {
2452 /* {
2453 syscallarg(const char *) path;
2454 syscallarg(struct stat *) ub;
2455 } */
2456 struct stat sb;
2457 int error;
2458
2459 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb);
2460 if (error)
2461 return error;
2462 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2463 }
2464
2465 /*
2466 * Get configurable pathname variables.
2467 */
2468 /* ARGSUSED */
2469 int
2470 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
2471 {
2472 /* {
2473 syscallarg(const char *) path;
2474 syscallarg(int) name;
2475 } */
2476 int error;
2477 struct nameidata nd;
2478
2479 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2480 SCARG(uap, path));
2481 if ((error = namei(&nd)) != 0)
2482 return (error);
2483 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2484 vput(nd.ni_vp);
2485 return (error);
2486 }
2487
2488 /*
2489 * Return target name of a symbolic link.
2490 */
2491 /* ARGSUSED */
2492 int
2493 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval)
2494 {
2495 /* {
2496 syscallarg(const char *) path;
2497 syscallarg(char *) buf;
2498 syscallarg(size_t) count;
2499 } */
2500 struct vnode *vp;
2501 struct iovec aiov;
2502 struct uio auio;
2503 int error;
2504 struct nameidata nd;
2505
2506 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2507 SCARG(uap, path));
2508 if ((error = namei(&nd)) != 0)
2509 return (error);
2510 vp = nd.ni_vp;
2511 if (vp->v_type != VLNK)
2512 error = EINVAL;
2513 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2514 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
2515 aiov.iov_base = SCARG(uap, buf);
2516 aiov.iov_len = SCARG(uap, count);
2517 auio.uio_iov = &aiov;
2518 auio.uio_iovcnt = 1;
2519 auio.uio_offset = 0;
2520 auio.uio_rw = UIO_READ;
2521 KASSERT(l == curlwp);
2522 auio.uio_vmspace = l->l_proc->p_vmspace;
2523 auio.uio_resid = SCARG(uap, count);
2524 error = VOP_READLINK(vp, &auio, l->l_cred);
2525 }
2526 vput(vp);
2527 *retval = SCARG(uap, count) - auio.uio_resid;
2528 return (error);
2529 }
2530
2531 /*
2532 * Change flags of a file given a path name.
2533 */
2534 /* ARGSUSED */
2535 int
2536 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
2537 {
2538 /* {
2539 syscallarg(const char *) path;
2540 syscallarg(u_long) flags;
2541 } */
2542 struct vnode *vp;
2543 int error;
2544 struct nameidata nd;
2545
2546 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2547 SCARG(uap, path));
2548 if ((error = namei(&nd)) != 0)
2549 return (error);
2550 vp = nd.ni_vp;
2551 error = change_flags(vp, SCARG(uap, flags), l);
2552 vput(vp);
2553 return (error);
2554 }
2555
2556 /*
2557 * Change flags of a file given a file descriptor.
2558 */
2559 /* ARGSUSED */
2560 int
2561 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
2562 {
2563 /* {
2564 syscallarg(int) fd;
2565 syscallarg(u_long) flags;
2566 } */
2567 struct vnode *vp;
2568 file_t *fp;
2569 int error;
2570
2571 /* fd_getvnode() will use the descriptor for us */
2572 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2573 return (error);
2574 vp = fp->f_data;
2575 error = change_flags(vp, SCARG(uap, flags), l);
2576 VOP_UNLOCK(vp, 0);
2577 fd_putfile(SCARG(uap, fd));
2578 return (error);
2579 }
2580
2581 /*
2582 * Change flags of a file given a path name; this version does
2583 * not follow links.
2584 */
2585 int
2586 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
2587 {
2588 /* {
2589 syscallarg(const char *) path;
2590 syscallarg(u_long) flags;
2591 } */
2592 struct vnode *vp;
2593 int error;
2594 struct nameidata nd;
2595
2596 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2597 SCARG(uap, path));
2598 if ((error = namei(&nd)) != 0)
2599 return (error);
2600 vp = nd.ni_vp;
2601 error = change_flags(vp, SCARG(uap, flags), l);
2602 vput(vp);
2603 return (error);
2604 }
2605
2606 /*
2607 * Common routine to change flags of a file.
2608 */
2609 int
2610 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2611 {
2612 struct vattr vattr;
2613 int error;
2614
2615 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2616 /*
2617 * Non-superusers cannot change the flags on devices, even if they
2618 * own them.
2619 */
2620 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2621 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2622 goto out;
2623 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2624 error = EINVAL;
2625 goto out;
2626 }
2627 }
2628 VATTR_NULL(&vattr);
2629 vattr.va_flags = flags;
2630 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2631 out:
2632 return (error);
2633 }
2634
2635 /*
2636 * Change mode of a file given path name; this version follows links.
2637 */
2638 /* ARGSUSED */
2639 int
2640 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
2641 {
2642 /* {
2643 syscallarg(const char *) path;
2644 syscallarg(int) mode;
2645 } */
2646 int error;
2647 struct nameidata nd;
2648
2649 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2650 SCARG(uap, path));
2651 if ((error = namei(&nd)) != 0)
2652 return (error);
2653
2654 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2655
2656 vrele(nd.ni_vp);
2657 return (error);
2658 }
2659
2660 /*
2661 * Change mode of a file given a file descriptor.
2662 */
2663 /* ARGSUSED */
2664 int
2665 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
2666 {
2667 /* {
2668 syscallarg(int) fd;
2669 syscallarg(int) mode;
2670 } */
2671 file_t *fp;
2672 int error;
2673
2674 /* fd_getvnode() will use the descriptor for us */
2675 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2676 return (error);
2677 error = change_mode(fp->f_data, SCARG(uap, mode), l);
2678 fd_putfile(SCARG(uap, fd));
2679 return (error);
2680 }
2681
2682 /*
2683 * Change mode of a file given path name; this version does not follow links.
2684 */
2685 /* ARGSUSED */
2686 int
2687 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
2688 {
2689 /* {
2690 syscallarg(const char *) path;
2691 syscallarg(int) mode;
2692 } */
2693 int error;
2694 struct nameidata nd;
2695
2696 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2697 SCARG(uap, path));
2698 if ((error = namei(&nd)) != 0)
2699 return (error);
2700
2701 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2702
2703 vrele(nd.ni_vp);
2704 return (error);
2705 }
2706
2707 /*
2708 * Common routine to set mode given a vnode.
2709 */
2710 static int
2711 change_mode(struct vnode *vp, int mode, struct lwp *l)
2712 {
2713 struct vattr vattr;
2714 int error;
2715
2716 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2717 VATTR_NULL(&vattr);
2718 vattr.va_mode = mode & ALLPERMS;
2719 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2720 VOP_UNLOCK(vp, 0);
2721 return (error);
2722 }
2723
2724 /*
2725 * Set ownership given a path name; this version follows links.
2726 */
2727 /* ARGSUSED */
2728 int
2729 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
2730 {
2731 /* {
2732 syscallarg(const char *) path;
2733 syscallarg(uid_t) uid;
2734 syscallarg(gid_t) gid;
2735 } */
2736 int error;
2737 struct nameidata nd;
2738
2739 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2740 SCARG(uap, path));
2741 if ((error = namei(&nd)) != 0)
2742 return (error);
2743
2744 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2745
2746 vrele(nd.ni_vp);
2747 return (error);
2748 }
2749
2750 /*
2751 * Set ownership given a path name; this version follows links.
2752 * Provides POSIX semantics.
2753 */
2754 /* ARGSUSED */
2755 int
2756 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
2757 {
2758 /* {
2759 syscallarg(const char *) path;
2760 syscallarg(uid_t) uid;
2761 syscallarg(gid_t) gid;
2762 } */
2763 int error;
2764 struct nameidata nd;
2765
2766 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2767 SCARG(uap, path));
2768 if ((error = namei(&nd)) != 0)
2769 return (error);
2770
2771 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2772
2773 vrele(nd.ni_vp);
2774 return (error);
2775 }
2776
2777 /*
2778 * Set ownership given a file descriptor.
2779 */
2780 /* ARGSUSED */
2781 int
2782 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
2783 {
2784 /* {
2785 syscallarg(int) fd;
2786 syscallarg(uid_t) uid;
2787 syscallarg(gid_t) gid;
2788 } */
2789 int error;
2790 file_t *fp;
2791
2792 /* fd_getvnode() will use the descriptor for us */
2793 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2794 return (error);
2795 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2796 l, 0);
2797 fd_putfile(SCARG(uap, fd));
2798 return (error);
2799 }
2800
2801 /*
2802 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2803 */
2804 /* ARGSUSED */
2805 int
2806 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
2807 {
2808 /* {
2809 syscallarg(int) fd;
2810 syscallarg(uid_t) uid;
2811 syscallarg(gid_t) gid;
2812 } */
2813 int error;
2814 file_t *fp;
2815
2816 /* fd_getvnode() will use the descriptor for us */
2817 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2818 return (error);
2819 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2820 l, 1);
2821 fd_putfile(SCARG(uap, fd));
2822 return (error);
2823 }
2824
2825 /*
2826 * Set ownership given a path name; this version does not follow links.
2827 */
2828 /* ARGSUSED */
2829 int
2830 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
2831 {
2832 /* {
2833 syscallarg(const char *) path;
2834 syscallarg(uid_t) uid;
2835 syscallarg(gid_t) gid;
2836 } */
2837 int error;
2838 struct nameidata nd;
2839
2840 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2841 SCARG(uap, path));
2842 if ((error = namei(&nd)) != 0)
2843 return (error);
2844
2845 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2846
2847 vrele(nd.ni_vp);
2848 return (error);
2849 }
2850
2851 /*
2852 * Set ownership given a path name; this version does not follow links.
2853 * Provides POSIX/XPG semantics.
2854 */
2855 /* ARGSUSED */
2856 int
2857 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
2858 {
2859 /* {
2860 syscallarg(const char *) path;
2861 syscallarg(uid_t) uid;
2862 syscallarg(gid_t) gid;
2863 } */
2864 int error;
2865 struct nameidata nd;
2866
2867 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2868 SCARG(uap, path));
2869 if ((error = namei(&nd)) != 0)
2870 return (error);
2871
2872 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2873
2874 vrele(nd.ni_vp);
2875 return (error);
2876 }
2877
2878 /*
2879 * Common routine to set ownership given a vnode.
2880 */
2881 static int
2882 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2883 int posix_semantics)
2884 {
2885 struct vattr vattr;
2886 mode_t newmode;
2887 int error;
2888
2889 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2890 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2891 goto out;
2892
2893 #define CHANGED(x) ((int)(x) != -1)
2894 newmode = vattr.va_mode;
2895 if (posix_semantics) {
2896 /*
2897 * POSIX/XPG semantics: if the caller is not the super-user,
2898 * clear set-user-id and set-group-id bits. Both POSIX and
2899 * the XPG consider the behaviour for calls by the super-user
2900 * implementation-defined; we leave the set-user-id and set-
2901 * group-id settings intact in that case.
2902 */
2903 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2904 NULL) != 0)
2905 newmode &= ~(S_ISUID | S_ISGID);
2906 } else {
2907 /*
2908 * NetBSD semantics: when changing owner and/or group,
2909 * clear the respective bit(s).
2910 */
2911 if (CHANGED(uid))
2912 newmode &= ~S_ISUID;
2913 if (CHANGED(gid))
2914 newmode &= ~S_ISGID;
2915 }
2916 /* Update va_mode iff altered. */
2917 if (vattr.va_mode == newmode)
2918 newmode = VNOVAL;
2919
2920 VATTR_NULL(&vattr);
2921 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2922 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2923 vattr.va_mode = newmode;
2924 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2925 #undef CHANGED
2926
2927 out:
2928 VOP_UNLOCK(vp, 0);
2929 return (error);
2930 }
2931
2932 /*
2933 * Set the access and modification times given a path name; this
2934 * version follows links.
2935 */
2936 /* ARGSUSED */
2937 int
2938 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
2939 register_t *retval)
2940 {
2941 /* {
2942 syscallarg(const char *) path;
2943 syscallarg(const struct timeval *) tptr;
2944 } */
2945
2946 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
2947 SCARG(uap, tptr), UIO_USERSPACE);
2948 }
2949
2950 /*
2951 * Set the access and modification times given a file descriptor.
2952 */
2953 /* ARGSUSED */
2954 int
2955 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
2956 register_t *retval)
2957 {
2958 /* {
2959 syscallarg(int) fd;
2960 syscallarg(const struct timeval *) tptr;
2961 } */
2962 int error;
2963 file_t *fp;
2964
2965 /* fd_getvnode() will use the descriptor for us */
2966 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2967 return (error);
2968 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr),
2969 UIO_USERSPACE);
2970 fd_putfile(SCARG(uap, fd));
2971 return (error);
2972 }
2973
2974 /*
2975 * Set the access and modification times given a path name; this
2976 * version does not follow links.
2977 */
2978 int
2979 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
2980 register_t *retval)
2981 {
2982 /* {
2983 syscallarg(const char *) path;
2984 syscallarg(const struct timeval *) tptr;
2985 } */
2986
2987 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
2988 SCARG(uap, tptr), UIO_USERSPACE);
2989 }
2990
2991 /*
2992 * Common routine to set access and modification times given a vnode.
2993 */
2994 int
2995 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
2996 const struct timeval *tptr, enum uio_seg seg)
2997 {
2998 struct vattr vattr;
2999 struct nameidata nd;
3000 int error;
3001 bool vanull, setbirthtime;
3002 struct timespec ts[2];
3003
3004 if (tptr == NULL) {
3005 vanull = true;
3006 nanotime(&ts[0]);
3007 ts[1] = ts[0];
3008 } else {
3009 struct timeval tv[2];
3010
3011 vanull = false;
3012 if (seg != UIO_SYSSPACE) {
3013 error = copyin(tptr, tv, sizeof (tv));
3014 if (error != 0)
3015 return error;
3016 tptr = tv;
3017 }
3018 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3019 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3020 }
3021
3022 if (vp == NULL) {
3023 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path);
3024 if ((error = namei(&nd)) != 0)
3025 return error;
3026 vp = nd.ni_vp;
3027 } else
3028 nd.ni_vp = NULL;
3029
3030 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3031 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3032 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3033 VATTR_NULL(&vattr);
3034 vattr.va_atime = ts[0];
3035 vattr.va_mtime = ts[1];
3036 if (setbirthtime)
3037 vattr.va_birthtime = ts[1];
3038 if (vanull)
3039 vattr.va_flags |= VA_UTIMES_NULL;
3040 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3041 VOP_UNLOCK(vp, 0);
3042
3043 if (nd.ni_vp != NULL)
3044 vrele(nd.ni_vp);
3045
3046 return error;
3047 }
3048
3049 /*
3050 * Truncate a file given its path name.
3051 */
3052 /* ARGSUSED */
3053 int
3054 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3055 {
3056 /* {
3057 syscallarg(const char *) path;
3058 syscallarg(int) pad;
3059 syscallarg(off_t) length;
3060 } */
3061 struct vnode *vp;
3062 struct vattr vattr;
3063 int error;
3064 struct nameidata nd;
3065
3066 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3067 SCARG(uap, path));
3068 if ((error = namei(&nd)) != 0)
3069 return (error);
3070 vp = nd.ni_vp;
3071 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3072 if (vp->v_type == VDIR)
3073 error = EISDIR;
3074 else if ((error = vn_writechk(vp)) == 0 &&
3075 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3076 VATTR_NULL(&vattr);
3077 vattr.va_size = SCARG(uap, length);
3078 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3079 }
3080 vput(vp);
3081 return (error);
3082 }
3083
3084 /*
3085 * Truncate a file given a file descriptor.
3086 */
3087 /* ARGSUSED */
3088 int
3089 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3090 {
3091 /* {
3092 syscallarg(int) fd;
3093 syscallarg(int) pad;
3094 syscallarg(off_t) length;
3095 } */
3096 struct vattr vattr;
3097 struct vnode *vp;
3098 file_t *fp;
3099 int error;
3100
3101 /* fd_getvnode() will use the descriptor for us */
3102 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3103 return (error);
3104 if ((fp->f_flag & FWRITE) == 0) {
3105 error = EINVAL;
3106 goto out;
3107 }
3108 vp = fp->f_data;
3109 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3110 if (vp->v_type == VDIR)
3111 error = EISDIR;
3112 else if ((error = vn_writechk(vp)) == 0) {
3113 VATTR_NULL(&vattr);
3114 vattr.va_size = SCARG(uap, length);
3115 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3116 }
3117 VOP_UNLOCK(vp, 0);
3118 out:
3119 fd_putfile(SCARG(uap, fd));
3120 return (error);
3121 }
3122
3123 /*
3124 * Sync an open file.
3125 */
3126 /* ARGSUSED */
3127 int
3128 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3129 {
3130 /* {
3131 syscallarg(int) fd;
3132 } */
3133 struct vnode *vp;
3134 file_t *fp;
3135 int error;
3136
3137 /* fd_getvnode() will use the descriptor for us */
3138 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3139 return (error);
3140 vp = fp->f_data;
3141 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3142 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
3143 VOP_UNLOCK(vp, 0);
3144 fd_putfile(SCARG(uap, fd));
3145 return (error);
3146 }
3147
3148 /*
3149 * Sync a range of file data. API modeled after that found in AIX.
3150 *
3151 * FDATASYNC indicates that we need only save enough metadata to be able
3152 * to re-read the written data. Note we duplicate AIX's requirement that
3153 * the file be open for writing.
3154 */
3155 /* ARGSUSED */
3156 int
3157 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
3158 {
3159 /* {
3160 syscallarg(int) fd;
3161 syscallarg(int) flags;
3162 syscallarg(off_t) start;
3163 syscallarg(off_t) length;
3164 } */
3165 struct vnode *vp;
3166 file_t *fp;
3167 int flags, nflags;
3168 off_t s, e, len;
3169 int error;
3170
3171 /* fd_getvnode() will use the descriptor for us */
3172 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3173 return (error);
3174
3175 if ((fp->f_flag & FWRITE) == 0) {
3176 error = EBADF;
3177 goto out;
3178 }
3179
3180 flags = SCARG(uap, flags);
3181 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3182 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3183 error = EINVAL;
3184 goto out;
3185 }
3186 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3187 if (flags & FDATASYNC)
3188 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3189 else
3190 nflags = FSYNC_WAIT;
3191 if (flags & FDISKSYNC)
3192 nflags |= FSYNC_CACHE;
3193
3194 len = SCARG(uap, length);
3195 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3196 if (len) {
3197 s = SCARG(uap, start);
3198 e = s + len;
3199 if (e < s) {
3200 error = EINVAL;
3201 goto out;
3202 }
3203 } else {
3204 e = 0;
3205 s = 0;
3206 }
3207
3208 vp = fp->f_data;
3209 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3210 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
3211 VOP_UNLOCK(vp, 0);
3212 out:
3213 fd_putfile(SCARG(uap, fd));
3214 return (error);
3215 }
3216
3217 /*
3218 * Sync the data of an open file.
3219 */
3220 /* ARGSUSED */
3221 int
3222 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
3223 {
3224 /* {
3225 syscallarg(int) fd;
3226 } */
3227 struct vnode *vp;
3228 file_t *fp;
3229 int error;
3230
3231 /* fd_getvnode() will use the descriptor for us */
3232 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3233 return (error);
3234 if ((fp->f_flag & FWRITE) == 0) {
3235 fd_putfile(SCARG(uap, fd));
3236 return (EBADF);
3237 }
3238 vp = fp->f_data;
3239 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3240 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
3241 VOP_UNLOCK(vp, 0);
3242 fd_putfile(SCARG(uap, fd));
3243 return (error);
3244 }
3245
3246 /*
3247 * Rename files, (standard) BSD semantics frontend.
3248 */
3249 /* ARGSUSED */
3250 int
3251 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
3252 {
3253 /* {
3254 syscallarg(const char *) from;
3255 syscallarg(const char *) to;
3256 } */
3257
3258 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0));
3259 }
3260
3261 /*
3262 * Rename files, POSIX semantics frontend.
3263 */
3264 /* ARGSUSED */
3265 int
3266 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
3267 {
3268 /* {
3269 syscallarg(const char *) from;
3270 syscallarg(const char *) to;
3271 } */
3272
3273 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1));
3274 }
3275
3276 /*
3277 * Rename files. Source and destination must either both be directories,
3278 * or both not be directories. If target is a directory, it must be empty.
3279 * If `from' and `to' refer to the same object, the value of the `retain'
3280 * argument is used to determine whether `from' will be
3281 *
3282 * (retain == 0) deleted unless `from' and `to' refer to the same
3283 * object in the file system's name space (BSD).
3284 * (retain == 1) always retained (POSIX).
3285 */
3286 int
3287 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
3288 {
3289 struct vnode *tvp, *fvp, *tdvp;
3290 struct nameidata fromnd, tond;
3291 struct mount *fs;
3292 struct lwp *l = curlwp;
3293 struct proc *p;
3294 uint32_t saveflag;
3295 int error;
3296
3297 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT,
3298 seg, from);
3299 if ((error = namei(&fromnd)) != 0)
3300 return (error);
3301 if (fromnd.ni_dvp != fromnd.ni_vp)
3302 VOP_UNLOCK(fromnd.ni_dvp, 0);
3303 fvp = fromnd.ni_vp;
3304
3305 fs = fvp->v_mount;
3306 error = VFS_RENAMELOCK_ENTER(fs);
3307 if (error) {
3308 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3309 vrele(fromnd.ni_dvp);
3310 vrele(fvp);
3311 goto out1;
3312 }
3313
3314 /*
3315 * close, partially, yet another race - ideally we should only
3316 * go as far as getting fromnd.ni_dvp before getting the per-fs
3317 * lock, and then continue to get fromnd.ni_vp, but we can't do
3318 * that with namei as it stands.
3319 *
3320 * This still won't prevent rmdir from nuking fromnd.ni_vp
3321 * under us. The real fix is to get the locks in the right
3322 * order and do the lookups in the right places, but that's a
3323 * major rototill.
3324 *
3325 * Preserve the SAVESTART in cn_flags, because who knows what
3326 * might happen if we don't.
3327 *
3328 * Note: this logic (as well as this whole function) is cloned
3329 * in nfs_serv.c. Proceed accordingly.
3330 */
3331 vrele(fvp);
3332 if ((fromnd.ni_cnd.cn_namelen == 1 &&
3333 fromnd.ni_cnd.cn_nameptr[0] == '.') ||
3334 (fromnd.ni_cnd.cn_namelen == 2 &&
3335 fromnd.ni_cnd.cn_nameptr[0] == '.' &&
3336 fromnd.ni_cnd.cn_nameptr[1] == '.')) {
3337 error = EINVAL;
3338 VFS_RENAMELOCK_EXIT(fs);
3339 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3340 vrele(fromnd.ni_dvp);
3341 goto out1;
3342 }
3343 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART;
3344 fromnd.ni_cnd.cn_flags &= ~SAVESTART;
3345 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY);
3346 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd);
3347 fromnd.ni_cnd.cn_flags |= saveflag;
3348 if (error) {
3349 VOP_UNLOCK(fromnd.ni_dvp, 0);
3350 VFS_RENAMELOCK_EXIT(fs);
3351 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3352 vrele(fromnd.ni_dvp);
3353 goto out1;
3354 }
3355 VOP_UNLOCK(fromnd.ni_vp, 0);
3356 if (fromnd.ni_dvp != fromnd.ni_vp)
3357 VOP_UNLOCK(fromnd.ni_dvp, 0);
3358 fvp = fromnd.ni_vp;
3359
3360 NDINIT(&tond, RENAME,
3361 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT
3362 | (fvp->v_type == VDIR ? CREATEDIR : 0),
3363 seg, to);
3364 if ((error = namei(&tond)) != 0) {
3365 VFS_RENAMELOCK_EXIT(fs);
3366 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3367 vrele(fromnd.ni_dvp);
3368 vrele(fvp);
3369 goto out1;
3370 }
3371 tdvp = tond.ni_dvp;
3372 tvp = tond.ni_vp;
3373
3374 if (tvp != NULL) {
3375 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3376 error = ENOTDIR;
3377 goto out;
3378 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3379 error = EISDIR;
3380 goto out;
3381 }
3382 }
3383
3384 if (fvp == tdvp)
3385 error = EINVAL;
3386
3387 /*
3388 * Source and destination refer to the same object.
3389 */
3390 if (fvp == tvp) {
3391 if (retain)
3392 error = -1;
3393 else if (fromnd.ni_dvp == tdvp &&
3394 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3395 !memcmp(fromnd.ni_cnd.cn_nameptr,
3396 tond.ni_cnd.cn_nameptr,
3397 fromnd.ni_cnd.cn_namelen))
3398 error = -1;
3399 }
3400
3401 #if NVERIEXEC > 0
3402 if (!error) {
3403 char *f1, *f2;
3404 size_t f1_len;
3405 size_t f2_len;
3406
3407 f1_len = fromnd.ni_cnd.cn_namelen + 1;
3408 f1 = kmem_alloc(f1_len, KM_SLEEP);
3409 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, f1_len);
3410
3411 f2_len = tond.ni_cnd.cn_namelen + 1;
3412 f2 = kmem_alloc(f2_len, KM_SLEEP);
3413 strlcpy(f2, tond.ni_cnd.cn_nameptr, f2_len);
3414
3415 error = veriexec_renamechk(l, fvp, f1, tvp, f2);
3416
3417 kmem_free(f1, f1_len);
3418 kmem_free(f2, f2_len);
3419 }
3420 #endif /* NVERIEXEC > 0 */
3421
3422 out:
3423 p = l->l_proc;
3424 if (!error) {
3425 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3426 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3427 VFS_RENAMELOCK_EXIT(fs);
3428 } else {
3429 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3430 if (tdvp == tvp)
3431 vrele(tdvp);
3432 else
3433 vput(tdvp);
3434 if (tvp)
3435 vput(tvp);
3436 VFS_RENAMELOCK_EXIT(fs);
3437 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3438 vrele(fromnd.ni_dvp);
3439 vrele(fvp);
3440 }
3441 vrele(tond.ni_startdir);
3442 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3443 out1:
3444 if (fromnd.ni_startdir)
3445 vrele(fromnd.ni_startdir);
3446 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3447 return (error == -1 ? 0 : error);
3448 }
3449
3450 /*
3451 * Make a directory file.
3452 */
3453 /* ARGSUSED */
3454 int
3455 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
3456 {
3457 /* {
3458 syscallarg(const char *) path;
3459 syscallarg(int) mode;
3460 } */
3461 struct proc *p = l->l_proc;
3462 struct vnode *vp;
3463 struct vattr vattr;
3464 int error;
3465 struct nameidata nd;
3466
3467 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE,
3468 SCARG(uap, path));
3469 if ((error = namei(&nd)) != 0)
3470 return (error);
3471 vp = nd.ni_vp;
3472 if (vp != NULL) {
3473 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3474 if (nd.ni_dvp == vp)
3475 vrele(nd.ni_dvp);
3476 else
3477 vput(nd.ni_dvp);
3478 vrele(vp);
3479 return (EEXIST);
3480 }
3481 VATTR_NULL(&vattr);
3482 vattr.va_type = VDIR;
3483 /* We will read cwdi->cwdi_cmask unlocked. */
3484 vattr.va_mode =
3485 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3486 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3487 if (!error)
3488 vput(nd.ni_vp);
3489 return (error);
3490 }
3491
3492 /*
3493 * Remove a directory file.
3494 */
3495 /* ARGSUSED */
3496 int
3497 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
3498 {
3499 /* {
3500 syscallarg(const char *) path;
3501 } */
3502 struct vnode *vp;
3503 int error;
3504 struct nameidata nd;
3505
3506 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
3507 SCARG(uap, path));
3508 if ((error = namei(&nd)) != 0)
3509 return (error);
3510 vp = nd.ni_vp;
3511 if (vp->v_type != VDIR) {
3512 error = ENOTDIR;
3513 goto out;
3514 }
3515 /*
3516 * No rmdir "." please.
3517 */
3518 if (nd.ni_dvp == vp) {
3519 error = EINVAL;
3520 goto out;
3521 }
3522 /*
3523 * The root of a mounted filesystem cannot be deleted.
3524 */
3525 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) {
3526 error = EBUSY;
3527 goto out;
3528 }
3529 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3530 return (error);
3531
3532 out:
3533 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3534 if (nd.ni_dvp == vp)
3535 vrele(nd.ni_dvp);
3536 else
3537 vput(nd.ni_dvp);
3538 vput(vp);
3539 return (error);
3540 }
3541
3542 /*
3543 * Read a block of directory entries in a file system independent format.
3544 */
3545 int
3546 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
3547 {
3548 /* {
3549 syscallarg(int) fd;
3550 syscallarg(char *) buf;
3551 syscallarg(size_t) count;
3552 } */
3553 file_t *fp;
3554 int error, done;
3555
3556 /* fd_getvnode() will use the descriptor for us */
3557 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3558 return (error);
3559 if ((fp->f_flag & FREAD) == 0) {
3560 error = EBADF;
3561 goto out;
3562 }
3563 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3564 SCARG(uap, count), &done, l, 0, 0);
3565 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
3566 *retval = done;
3567 out:
3568 fd_putfile(SCARG(uap, fd));
3569 return (error);
3570 }
3571
3572 /*
3573 * Set the mode mask for creation of filesystem nodes.
3574 */
3575 int
3576 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
3577 {
3578 /* {
3579 syscallarg(mode_t) newmask;
3580 } */
3581 struct proc *p = l->l_proc;
3582 struct cwdinfo *cwdi;
3583
3584 /*
3585 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
3586 * important is that we serialize changes to the mask. The
3587 * rw_exit() will issue a write memory barrier on our behalf,
3588 * and force the changes out to other CPUs (as it must use an
3589 * atomic operation, draining the local CPU's store buffers).
3590 */
3591 cwdi = p->p_cwdi;
3592 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
3593 *retval = cwdi->cwdi_cmask;
3594 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3595 rw_exit(&cwdi->cwdi_lock);
3596
3597 return (0);
3598 }
3599
3600 int
3601 dorevoke(struct vnode *vp, kauth_cred_t cred)
3602 {
3603 struct vattr vattr;
3604 int error;
3605
3606 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0)
3607 return error;
3608 if (kauth_cred_geteuid(cred) == vattr.va_uid ||
3609 (error = kauth_authorize_generic(cred,
3610 KAUTH_GENERIC_ISSUSER, NULL)) == 0)
3611 VOP_REVOKE(vp, REVOKEALL);
3612 return (error);
3613 }
3614
3615 /*
3616 * Void all references to file by ripping underlying filesystem
3617 * away from vnode.
3618 */
3619 /* ARGSUSED */
3620 int
3621 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
3622 {
3623 /* {
3624 syscallarg(const char *) path;
3625 } */
3626 struct vnode *vp;
3627 int error;
3628 struct nameidata nd;
3629
3630 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3631 SCARG(uap, path));
3632 if ((error = namei(&nd)) != 0)
3633 return (error);
3634 vp = nd.ni_vp;
3635 error = dorevoke(vp, l->l_cred);
3636 vrele(vp);
3637 return (error);
3638 }
3639