vfs_syscalls.c revision 1.306.2.4 1 /* $NetBSD: vfs_syscalls.c,v 1.306.2.4 2007/04/13 15:49:48 ad Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.306.2.4 2007/04/13 15:49:48 ad Exp $");
41
42 #include "opt_compat_netbsd.h"
43 #include "opt_compat_43.h"
44 #include "opt_fileassoc.h"
45 #include "opt_ktrace.h"
46 #include "fss.h"
47 #include "veriexec.h"
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/namei.h>
52 #include <sys/filedesc.h>
53 #include <sys/kernel.h>
54 #include <sys/file.h>
55 #include <sys/stat.h>
56 #include <sys/vnode.h>
57 #include <sys/mount.h>
58 #include <sys/proc.h>
59 #include <sys/uio.h>
60 #include <sys/malloc.h>
61 #include <sys/kmem.h>
62 #include <sys/dirent.h>
63 #include <sys/sysctl.h>
64 #include <sys/syscallargs.h>
65 #include <sys/vfs_syscalls.h>
66 #ifdef KTRACE
67 #include <sys/ktrace.h>
68 #endif
69 #ifdef FILEASSOC
70 #include <sys/fileassoc.h>
71 #endif /* FILEASSOC */
72 #if NVERIEXEC > 0
73 #include <sys/verified_exec.h>
74 #include <sys/syslog.h>
75 #endif /* NVERIEXEC > 0 */
76 #include <sys/kauth.h>
77
78 #include <miscfs/genfs/genfs.h>
79 #include <miscfs/syncfs/syncfs.h>
80
81 #ifdef COMPAT_30
82 #include "opt_nfsserver.h"
83 #include <nfs/rpcv2.h>
84 #endif
85 #include <nfs/nfsproto.h>
86 #ifdef COMPAT_30
87 #include <nfs/nfs.h>
88 #include <nfs/nfs_var.h>
89 #endif
90
91 #if NFSS > 0
92 #include <dev/fssvar.h>
93 #endif
94
95 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
96
97 static int change_dir(struct nameidata *, struct lwp *);
98 static int change_flags(struct vnode *, u_long, struct lwp *);
99 static int change_mode(struct vnode *, int, struct lwp *l);
100 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
101 static int change_utimes(struct vnode *vp, const struct timeval *,
102 struct lwp *l);
103 static int rename_files(const char *, const char *, struct lwp *, int);
104
105 void checkdirs(struct vnode *);
106
107 static int mount_update(struct lwp *, struct vnode *, const char *, int,
108 void *, struct nameidata *);
109 static int mount_domount(struct lwp *, struct vnode *, const char *,
110 const char *, int, void *, struct nameidata *);
111 static int mount_getargs(struct lwp *, struct vnode *, const char *, int,
112 void *, struct nameidata *);
113
114 int dovfsusermount = 0;
115
116 /*
117 * Virtual File System System Calls
118 */
119
120 /*
121 * Mount a file system.
122 */
123
124 #if defined(COMPAT_09) || defined(COMPAT_43)
125 /*
126 * This table is used to maintain compatibility with 4.3BSD
127 * and NetBSD 0.9 mount syscalls. Note, the order is important!
128 *
129 * Do not modify this table. It should only contain filesystems
130 * supported by NetBSD 0.9 and 4.3BSD.
131 */
132 const char * const mountcompatnames[] = {
133 NULL, /* 0 = MOUNT_NONE */
134 MOUNT_FFS, /* 1 = MOUNT_UFS */
135 MOUNT_NFS, /* 2 */
136 MOUNT_MFS, /* 3 */
137 MOUNT_MSDOS, /* 4 */
138 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
139 MOUNT_FDESC, /* 6 */
140 MOUNT_KERNFS, /* 7 */
141 NULL, /* 8 = MOUNT_DEVFS */
142 MOUNT_AFS, /* 9 */
143 };
144 const int nmountcompatnames = sizeof(mountcompatnames) /
145 sizeof(mountcompatnames[0]);
146 #endif /* COMPAT_09 || COMPAT_43 */
147
148 static int
149 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
150 void *data, struct nameidata *ndp)
151 {
152 struct mount *mp;
153 int error = 0, saved_flags;
154
155 mp = vp->v_mount;
156 saved_flags = mp->mnt_flag;
157
158 /* We can operate only on VROOT nodes. */
159 if ((vp->v_flag & VROOT) == 0) {
160 error = EINVAL;
161 goto out;
162 }
163
164 /*
165 * We only allow the filesystem to be reloaded if it
166 * is currently mounted read-only.
167 */
168 if (flags & MNT_RELOAD && !(mp->mnt_flag & MNT_RDONLY)) {
169 error = EOPNOTSUPP; /* Needs translation */
170 goto out;
171 }
172
173 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
174 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
175 if (error)
176 goto out;
177
178 if (vfs_busy(mp, LK_NOWAIT, 0)) {
179 error = EPERM;
180 goto out;
181 }
182
183 mp->mnt_flag &= ~MNT_OP_FLAGS;
184 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
185
186 /*
187 * Set the mount level flags.
188 */
189 if (flags & MNT_RDONLY)
190 mp->mnt_flag |= MNT_RDONLY;
191 else if (mp->mnt_flag & MNT_RDONLY)
192 mp->mnt_iflag |= IMNT_WANTRDWR;
193 mp->mnt_flag &=
194 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
195 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
196 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
197 mp->mnt_flag |= flags &
198 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
199 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
200 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
201 MNT_IGNORE);
202
203 error = VFS_MOUNT(mp, path, data, ndp, l);
204
205 #if defined(COMPAT_30) && defined(NFSSERVER)
206 if (error) {
207 int error2;
208
209 /* Update failed; let's try and see if it was an
210 * export request. */
211 error2 = nfs_update_exports_30(mp, path, data, l);
212
213 /* Only update error code if the export request was
214 * understood but some problem occurred while
215 * processing it. */
216 if (error2 != EJUSTRETURN)
217 error = error2;
218 }
219 #endif
220 if (mp->mnt_iflag & IMNT_WANTRDWR)
221 mp->mnt_flag &= ~MNT_RDONLY;
222 if (error)
223 mp->mnt_flag = saved_flags;
224 mp->mnt_flag &= ~MNT_OP_FLAGS;
225 mp->mnt_iflag &= ~IMNT_WANTRDWR;
226 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
227 if (mp->mnt_syncer == NULL)
228 error = vfs_allocate_syncvnode(mp);
229 } else {
230 if (mp->mnt_syncer != NULL)
231 vfs_deallocate_syncvnode(mp);
232 }
233 vfs_unbusy(mp);
234
235 out:
236 return (error);
237 }
238
239 static int
240 mount_domount(struct lwp *l, struct vnode *vp, const char *fstype,
241 const char *path, int flags, void *data, struct nameidata *ndp)
242 {
243 struct mount *mp = NULL;
244 struct vattr va;
245 char fstypename[MFSNAMELEN];
246 int error;
247
248 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
249 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
250 if (error) {
251 vput(vp);
252 goto out;
253 }
254
255 /* Can't make a non-dir a mount-point (from here anyway). */
256 if (vp->v_type != VDIR) {
257 error = ENOTDIR;
258 vput(vp);
259 goto out;
260 }
261
262 /*
263 * If the user is not root, ensure that they own the directory
264 * onto which we are attempting to mount.
265 */
266 if ((error = VOP_GETATTR(vp, &va, l->l_cred, l)) != 0 ||
267 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
268 (error = kauth_authorize_generic(l->l_cred,
269 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) {
270 vput(vp);
271 goto out;
272 }
273
274 if (flags & MNT_EXPORTED) {
275 error = EINVAL;
276 vput(vp);
277 goto out;
278 }
279
280 /*
281 * Copy file-system type from userspace.
282 */
283 error = copyinstr(fstype, fstypename, MFSNAMELEN, NULL);
284 if (error) {
285 #if defined(COMPAT_09) || defined(COMPAT_43)
286 /*
287 * Historically, filesystem types were identified by numbers.
288 * If we get an integer for the filesystem type instead of a
289 * string, we check to see if it matches one of the historic
290 * filesystem types.
291 */
292 u_long fsindex = (u_long)fstype;
293 if (fsindex >= nmountcompatnames ||
294 mountcompatnames[fsindex] == NULL) {
295 error = ENODEV;
296 vput(vp);
297 goto out;
298 }
299 strlcpy(fstypename, mountcompatnames[fsindex], sizeof(fstypename));
300 #else
301 vput(vp);
302 goto out;
303 #endif
304 }
305
306 #ifdef COMPAT_10
307 /* Accept `ufs' as an alias for `ffs'. */
308 if (strncmp(fstypename, "ufs", MFSNAMELEN) == 0)
309 strlcpy(fstypename, "ffs", sizeof(fstypename));
310 #endif
311
312 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) {
313 vput(vp);
314 goto out;
315 }
316
317 /*
318 * Check if a file-system is not already mounted on this vnode.
319 */
320 if (vp->v_mountedhere != NULL) {
321 error = EBUSY;
322 vput(vp);
323 goto out;
324 }
325
326 mp = malloc(sizeof(*mp), M_MOUNT, M_WAITOK|M_ZERO);
327
328 if ((mp->mnt_op = vfs_getopsbyname(fstypename)) == NULL) {
329 free(mp, M_MOUNT);
330 error = ENODEV;
331 vput(vp);
332 goto out;
333 }
334
335 TAILQ_INIT(&mp->mnt_vnodelist);
336 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
337 mutex_init(&mp->mnt_mutex, MUTEX_DEFAULT, IPL_NONE);
338 (void)vfs_busy(mp, LK_NOWAIT, 0);
339
340 mp->mnt_op->vfs_refcount++;
341 mp->mnt_vnodecovered = vp;
342 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
343 mp->mnt_unmounter = NULL;
344 mount_initspecific(mp);
345
346 /*
347 * The underlying file system may refuse the mount for
348 * various reasons. Allow the user to force it to happen.
349 *
350 * Set the mount level flags.
351 */
352 mp->mnt_flag = flags &
353 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
354 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
355 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
356 MNT_IGNORE | MNT_RDONLY);
357
358 error = VFS_MOUNT(mp, path, data, ndp, l);
359 mp->mnt_flag &= ~MNT_OP_FLAGS;
360
361 /*
362 * Put the new filesystem on the mount list after root.
363 */
364 cache_purge(vp);
365 if (!error) {
366 mp->mnt_iflag &= ~IMNT_WANTRDWR;
367 vp->v_mountedhere = mp;
368 mutex_enter(&mountlist_lock);
369 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
370 mutex_exit(&mountlist_lock);
371 VOP_UNLOCK(vp, 0);
372 checkdirs(vp);
373 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
374 error = vfs_allocate_syncvnode(mp);
375 vfs_unbusy(mp);
376 (void) VFS_STATVFS(mp, &mp->mnt_stat, l);
377 error = VFS_START(mp, 0, l);
378 if (error)
379 vrele(vp);
380 } else {
381 vp->v_mountedhere = NULL;
382 mp->mnt_op->vfs_refcount--;
383 vfs_unbusy(mp);
384 free(mp, M_MOUNT);
385 vput(vp);
386 }
387
388 out:
389 return (error);
390 }
391
392 static int
393 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
394 void *data, struct nameidata *ndp)
395 {
396 struct mount *mp;
397 int error;
398
399 /* If MNT_GETARGS is specified, it should be the only flag. */
400 if (flags & ~MNT_GETARGS) {
401 error = EINVAL;
402 goto out;
403 }
404
405 mp = vp->v_mount;
406
407 /* XXX: probably some notion of "can see" here if we want isolation. */
408 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
409 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
410 if (error)
411 goto out;
412
413 if ((vp->v_flag & VROOT) == 0) {
414 error = EINVAL;
415 goto out;
416 }
417
418 if (vfs_busy(mp, LK_NOWAIT, 0)) {
419 error = EPERM;
420 goto out;
421 }
422
423 mp->mnt_flag &= ~MNT_OP_FLAGS;
424 mp->mnt_flag |= MNT_GETARGS;
425 error = VFS_MOUNT(mp, path, data, ndp, l);
426 mp->mnt_flag &= ~MNT_OP_FLAGS;
427
428 vfs_unbusy(mp);
429 out:
430 return (error);
431 }
432
433 /* ARGSUSED */
434 int
435 sys_mount(struct lwp *l, void *v, register_t *retval)
436 {
437 struct sys_mount_args /* {
438 syscallarg(const char *) type;
439 syscallarg(const char *) path;
440 syscallarg(int) flags;
441 syscallarg(void *) data;
442 } */ *uap = v;
443 struct vnode *vp;
444 struct nameidata nd;
445 int error;
446
447 /*
448 * Get vnode to be covered
449 */
450 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
451 SCARG(uap, path), l);
452 if ((error = namei(&nd)) != 0)
453 return (error);
454 vp = nd.ni_vp;
455
456 /*
457 * A lookup in VFS_MOUNT might result in an attempt to
458 * lock this vnode again, so make the lock recursive.
459 */
460 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
461
462 if (SCARG(uap, flags) & MNT_GETARGS) {
463 error = mount_getargs(l, vp, SCARG(uap, path),
464 SCARG(uap, flags), SCARG(uap, data), &nd);
465 vput(vp);
466 } else if (SCARG(uap, flags) & MNT_UPDATE) {
467 error = mount_update(l, vp, SCARG(uap, path),
468 SCARG(uap, flags), SCARG(uap, data), &nd);
469 vput(vp);
470 } else {
471 /* Locking is handled internally in mount_domount(). */
472 error = mount_domount(l, vp, SCARG(uap, type),
473 SCARG(uap, path), SCARG(uap, flags), SCARG(uap, data), &nd);
474 }
475
476 return (error);
477 }
478
479 /*
480 * Scan all active processes to see if any of them have a current
481 * or root directory onto which the new filesystem has just been
482 * mounted. If so, replace them with the new mount point.
483 */
484 void
485 checkdirs(struct vnode *olddp)
486 {
487 struct cwdinfo *cwdi;
488 struct vnode *newdp;
489 struct proc *p;
490
491 if (olddp->v_usecount == 1)
492 return;
493 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
494 panic("mount: lost mount");
495 mutex_enter(&proclist_lock);
496 PROCLIST_FOREACH(p, &allproc) {
497 cwdi = p->p_cwdi;
498 if (!cwdi)
499 continue;
500 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
501 if (cwdi->cwdi_cdir == olddp) {
502 vrele(cwdi->cwdi_cdir);
503 VREF(newdp);
504 cwdi->cwdi_cdir = newdp;
505 }
506 if (cwdi->cwdi_rdir == olddp) {
507 vrele(cwdi->cwdi_rdir);
508 VREF(newdp);
509 cwdi->cwdi_rdir = newdp;
510 }
511 rw_exit(&cwdi->cwdi_lock);
512 }
513 mutex_exit(&proclist_lock);
514 if (rootvnode == olddp) {
515 vrele(rootvnode);
516 VREF(newdp);
517 rootvnode = newdp;
518 }
519 vput(newdp);
520 }
521
522 /*
523 * Unmount a file system.
524 *
525 * Note: unmount takes a path to the vnode mounted on as argument,
526 * not special file (as before).
527 */
528 /* ARGSUSED */
529 int
530 sys_unmount(struct lwp *l, void *v, register_t *retval)
531 {
532 struct sys_unmount_args /* {
533 syscallarg(const char *) path;
534 syscallarg(int) flags;
535 } */ *uap = v;
536 struct vnode *vp;
537 struct mount *mp;
538 int error;
539 struct nameidata nd;
540
541 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
542 SCARG(uap, path), l);
543 if ((error = namei(&nd)) != 0)
544 return (error);
545 vp = nd.ni_vp;
546 mp = vp->v_mount;
547
548 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
549 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
550 if (error) {
551 vput(vp);
552 return (error);
553 }
554
555 /*
556 * Don't allow unmounting the root file system.
557 */
558 if (mp->mnt_flag & MNT_ROOTFS) {
559 vput(vp);
560 return (EINVAL);
561 }
562
563 /*
564 * Must be the root of the filesystem
565 */
566 if ((vp->v_flag & VROOT) == 0) {
567 vput(vp);
568 return (EINVAL);
569 }
570 vput(vp);
571
572 /*
573 * XXX Freeze syncer. Must do this before locking the
574 * mount point. See dounmount() for details.
575 */
576 mutex_enter(&syncer_mutex);
577
578 if (vfs_busy(mp, 0, 0)) {
579 mutex_exit(&syncer_mutex);
580 return (EBUSY);
581 }
582
583 return (dounmount(mp, SCARG(uap, flags), l));
584 }
585
586 /*
587 * Do the actual file system unmount. File system is assumed to have been
588 * marked busy by the caller.
589 */
590 int
591 dounmount(struct mount *mp, int flags, struct lwp *l)
592 {
593 struct vnode *coveredvp;
594 int error;
595 int async;
596 int used_syncer;
597
598 #if NVERIEXEC > 0
599 error = veriexec_unmountchk(mp);
600 if (error)
601 return (error);
602 #endif /* NVERIEXEC > 0 */
603
604 mutex_enter(&mountlist_lock);
605 vfs_unbusy(mp);
606 used_syncer = (mp->mnt_syncer != NULL);
607
608 /*
609 * XXX Syncer must be frozen when we get here. This should really
610 * be done on a per-mountpoint basis, but especially the softdep
611 * code possibly called from the syncer doesn't exactly work on a
612 * per-mountpoint basis, so the softdep code would become a maze
613 * of vfs_busy() calls.
614 *
615 * The caller of dounmount() must acquire syncer_mutex because
616 * the syncer itself acquires locks in syncer_mutex -> vfs_busy
617 * order, and we must preserve that order to avoid deadlock.
618 *
619 * So, if the file system did not use the syncer, now is
620 * the time to release the syncer_mutex.
621 */
622 if (used_syncer == 0)
623 mutex_exit(&syncer_mutex);
624
625 mp->mnt_iflag |= IMNT_UNMOUNT;
626 mp->mnt_unmounter = l;
627 lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_lock);
628
629 async = mp->mnt_flag & MNT_ASYNC;
630 mp->mnt_flag &= ~MNT_ASYNC;
631 cache_purgevfs(mp); /* remove cache entries for this file sys */
632 if (mp->mnt_syncer != NULL)
633 vfs_deallocate_syncvnode(mp);
634 error = 0;
635 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
636 #if NFSS > 0
637 error = fss_umount_hook(mp, (flags & MNT_FORCE));
638 #endif
639 if (error == 0)
640 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred, l);
641 }
642 if (error == 0 || (flags & MNT_FORCE))
643 error = VFS_UNMOUNT(mp, flags, l);
644 mutex_enter(&mountlist_lock);
645 if (error) {
646 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
647 (void) vfs_allocate_syncvnode(mp);
648 mp->mnt_iflag &= ~IMNT_UNMOUNT;
649 mp->mnt_unmounter = NULL;
650 mp->mnt_flag |= async;
651 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
652 &mountlist_lock);
653 if (used_syncer)
654 mutex_exit(&syncer_mutex);
655 mutex_enter(&mp->mnt_mutex);
656 while (mp->mnt_wcnt > 0) {
657 wakeup(mp);
658 mtsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1",
659 0, &mp->mnt_mutex);
660 }
661 mutex_exit(&mp->mnt_mutex);
662 return (error);
663 }
664 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
665 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
666 coveredvp->v_mountedhere = NULL;
667 mp->mnt_op->vfs_refcount--;
668 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
669 panic("unmount: dangling vnode");
670 mp->mnt_iflag |= IMNT_GONE;
671 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_lock);
672 if (coveredvp != NULLVP)
673 vrele(coveredvp);
674 mount_finispecific(mp);
675 if (used_syncer)
676 mutex_exit(&syncer_mutex);
677 mutex_enter(&mp->mnt_mutex);
678 while (mp->mnt_wcnt > 0) {
679 wakeup(mp);
680 mtsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_mutex);
681 }
682 mutex_exit(&mp->mnt_mutex);
683 vfs_hooks_unmount(mp);
684 mutex_destroy(&mp->mnt_mutex);
685 free(mp, M_MOUNT);
686 return (0);
687 }
688
689 /*
690 * Sync each mounted filesystem.
691 */
692 #ifdef DEBUG
693 int syncprt = 0;
694 struct ctldebug debug0 = { "syncprt", &syncprt };
695 #endif
696
697 /* ARGSUSED */
698 int
699 sys_sync(struct lwp *l, void *v, register_t *retval)
700 {
701 struct mount *mp, *nmp;
702 int asyncflag;
703
704 if (l == NULL)
705 l = &lwp0;
706
707 mutex_enter(&mountlist_lock);
708 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
709 if (vfs_busy(mp, LK_NOWAIT, &mountlist_lock)) {
710 nmp = mp->mnt_list.cqe_prev;
711 continue;
712 }
713 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
714 asyncflag = mp->mnt_flag & MNT_ASYNC;
715 mp->mnt_flag &= ~MNT_ASYNC;
716 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred, l);
717 if (asyncflag)
718 mp->mnt_flag |= MNT_ASYNC;
719 }
720 mutex_enter(&mountlist_lock);
721 nmp = mp->mnt_list.cqe_prev;
722 vfs_unbusy(mp);
723
724 }
725 mutex_exit(&mountlist_lock);
726 #ifdef DEBUG
727 if (syncprt)
728 vfs_bufstats();
729 #endif /* DEBUG */
730 return (0);
731 }
732
733 /*
734 * Change filesystem quotas.
735 */
736 /* ARGSUSED */
737 int
738 sys_quotactl(struct lwp *l, void *v, register_t *retval)
739 {
740 struct sys_quotactl_args /* {
741 syscallarg(const char *) path;
742 syscallarg(int) cmd;
743 syscallarg(int) uid;
744 syscallarg(void *) arg;
745 } */ *uap = v;
746 struct mount *mp;
747 int error;
748 struct nameidata nd;
749
750 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
751 if ((error = namei(&nd)) != 0)
752 return (error);
753 mp = nd.ni_vp->v_mount;
754 vrele(nd.ni_vp);
755 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
756 SCARG(uap, arg), l);
757 return (error);
758 }
759
760 int
761 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
762 int root)
763 {
764 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
765 int error = 0;
766
767 /*
768 * If MNT_NOWAIT or MNT_LAZY is specified, do not
769 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
770 * overrides MNT_NOWAIT.
771 */
772 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
773 (flags != MNT_WAIT && flags != 0)) {
774 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
775 goto done;
776 }
777
778 /* Get the filesystem stats now */
779 memset(sp, 0, sizeof(*sp));
780 if ((error = VFS_STATVFS(mp, sp, l)) != 0) {
781 return error;
782 }
783
784 if (cwdi->cwdi_rdir == NULL)
785 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
786 done:
787 if (cwdi->cwdi_rdir != NULL) {
788 size_t len;
789 char *bp;
790 char *path = PNBUF_GET();
791 if (!path)
792 return ENOMEM;
793
794 bp = path + MAXPATHLEN;
795 *--bp = '\0';
796 rw_enter(&cwdi->cwdi_lock, RW_READER);
797 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
798 MAXPATHLEN / 2, 0, l);
799 rw_exit(&cwdi->cwdi_lock);
800 if (error) {
801 PNBUF_PUT(path);
802 return error;
803 }
804 len = strlen(bp);
805 /*
806 * for mount points that are below our root, we can see
807 * them, so we fix up the pathname and return them. The
808 * rest we cannot see, so we don't allow viewing the
809 * data.
810 */
811 if (strncmp(bp, sp->f_mntonname, len) == 0) {
812 strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
813 sizeof(sp->f_mntonname));
814 if (sp->f_mntonname[0] == '\0')
815 (void)strlcpy(sp->f_mntonname, "/",
816 sizeof(sp->f_mntonname));
817 } else {
818 if (root)
819 (void)strlcpy(sp->f_mntonname, "/",
820 sizeof(sp->f_mntonname));
821 else
822 error = EPERM;
823 }
824 PNBUF_PUT(path);
825 }
826 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
827 return error;
828 }
829
830 /*
831 * Get filesystem statistics.
832 */
833 /* ARGSUSED */
834 int
835 sys_statvfs1(struct lwp *l, void *v, register_t *retval)
836 {
837 struct sys_statvfs1_args /* {
838 syscallarg(const char *) path;
839 syscallarg(struct statvfs *) buf;
840 syscallarg(int) flags;
841 } */ *uap = v;
842 struct mount *mp;
843 struct statvfs *sb;
844 int error;
845 struct nameidata nd;
846
847 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
848 if ((error = namei(&nd)) != 0)
849 return error;
850 mp = nd.ni_vp->v_mount;
851 vrele(nd.ni_vp);
852 sb = STATVFSBUF_GET();
853 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1);
854 if (error == 0) {
855 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
856 }
857 STATVFSBUF_PUT(sb);
858 return error;
859 }
860
861 /*
862 * Get filesystem statistics.
863 */
864 /* ARGSUSED */
865 int
866 sys_fstatvfs1(struct lwp *l, void *v, register_t *retval)
867 {
868 struct sys_fstatvfs1_args /* {
869 syscallarg(int) fd;
870 syscallarg(struct statvfs *) buf;
871 syscallarg(int) flags;
872 } */ *uap = v;
873 struct proc *p = l->l_proc;
874 struct file *fp;
875 struct mount *mp;
876 struct statvfs *sb;
877 int error;
878
879 /* getvnode() will use the descriptor for us */
880 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
881 return (error);
882 mp = ((struct vnode *)fp->f_data)->v_mount;
883 sb = STATVFSBUF_GET();
884 if ((error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1)) != 0)
885 goto out;
886 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
887 out:
888 FILE_UNUSE(fp, l);
889 STATVFSBUF_PUT(sb);
890 return error;
891 }
892
893
894 /*
895 * Get statistics on all filesystems.
896 */
897 int
898 sys_getvfsstat(struct lwp *l, void *v, register_t *retval)
899 {
900 struct sys_getvfsstat_args /* {
901 syscallarg(struct statvfs *) buf;
902 syscallarg(size_t) bufsize;
903 syscallarg(int) flags;
904 } */ *uap = v;
905 int root = 0;
906 struct proc *p = l->l_proc;
907 struct mount *mp, *nmp;
908 struct statvfs *sb;
909 struct statvfs *sfsp;
910 size_t count, maxcount;
911 int error = 0;
912
913 sb = STATVFSBUF_GET();
914 maxcount = SCARG(uap, bufsize) / sizeof(struct statvfs);
915 sfsp = SCARG(uap, buf);
916 mutex_enter(&mountlist_lock);
917 count = 0;
918 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
919 mp = nmp) {
920 if (vfs_busy(mp, LK_NOWAIT, &mountlist_lock)) {
921 nmp = CIRCLEQ_NEXT(mp, mnt_list);
922 continue;
923 }
924 if (sfsp && count < maxcount) {
925 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 0);
926 if (error) {
927 mutex_enter(&mountlist_lock);
928 nmp = CIRCLEQ_NEXT(mp, mnt_list);
929 vfs_unbusy(mp);
930 continue;
931 }
932 error = copyout(sb, sfsp, sizeof(*sfsp));
933 if (error) {
934 vfs_unbusy(mp);
935 goto out;
936 }
937 sfsp++;
938 root |= strcmp(sb->f_mntonname, "/") == 0;
939 }
940 count++;
941 mutex_enter(&mountlist_lock);
942 nmp = CIRCLEQ_NEXT(mp, mnt_list);
943 vfs_unbusy(mp);
944 }
945 mutex_exit(&mountlist_lock);
946 if (root == 0 && p->p_cwdi->cwdi_rdir) {
947 /*
948 * fake a root entry
949 */
950 if ((error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, sb, l,
951 SCARG(uap, flags), 1)) != 0)
952 goto out;
953 if (sfsp)
954 error = copyout(sb, sfsp, sizeof(*sfsp));
955 count++;
956 }
957 if (sfsp && count > maxcount)
958 *retval = maxcount;
959 else
960 *retval = count;
961 out:
962 STATVFSBUF_PUT(sb);
963 return error;
964 }
965
966 /*
967 * Change current working directory to a given file descriptor.
968 */
969 /* ARGSUSED */
970 int
971 sys_fchdir(struct lwp *l, void *v, register_t *retval)
972 {
973 struct sys_fchdir_args /* {
974 syscallarg(int) fd;
975 } */ *uap = v;
976 struct proc *p = l->l_proc;
977 struct filedesc *fdp = p->p_fd;
978 struct cwdinfo *cwdi;
979 struct vnode *vp, *tdp;
980 struct mount *mp;
981 struct file *fp;
982 int error;
983
984 /* getvnode() will use the descriptor for us */
985 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
986 return (error);
987 vp = (struct vnode *)fp->f_data;
988
989 VREF(vp);
990 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
991 if (vp->v_type != VDIR)
992 error = ENOTDIR;
993 else
994 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
995 if (error) {
996 vput(vp);
997 goto out;
998 }
999 while ((mp = vp->v_mountedhere) != NULL) {
1000 if (vfs_busy(mp, 0, 0))
1001 continue;
1002
1003 vput(vp);
1004 error = VFS_ROOT(mp, &tdp);
1005 vfs_unbusy(mp);
1006 if (error)
1007 goto out;
1008 vp = tdp;
1009 }
1010 VOP_UNLOCK(vp, 0);
1011
1012 /*
1013 * Disallow changing to a directory not under the process's
1014 * current root directory (if there is one).
1015 */
1016 cwdi = p->p_cwdi;
1017 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1018 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1019 vrele(vp);
1020 error = EPERM; /* operation not permitted */
1021 } else {
1022 vrele(cwdi->cwdi_cdir);
1023 cwdi->cwdi_cdir = vp;
1024 }
1025 rw_exit(&cwdi->cwdi_lock);
1026
1027 out:
1028 FILE_UNUSE(fp, l);
1029 return (error);
1030 }
1031
1032 /*
1033 * Change this process's notion of the root directory to a given file
1034 * descriptor.
1035 */
1036 int
1037 sys_fchroot(struct lwp *l, void *v, register_t *retval)
1038 {
1039 struct sys_fchroot_args *uap = v;
1040 struct proc *p = l->l_proc;
1041 struct filedesc *fdp = p->p_fd;
1042 struct cwdinfo *cwdi;
1043 struct vnode *vp;
1044 struct file *fp;
1045 int error;
1046
1047 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1048 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1049 return error;
1050 /* getvnode() will use the descriptor for us */
1051 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1052 return error;
1053 vp = (struct vnode *) fp->f_data;
1054 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1055 if (vp->v_type != VDIR)
1056 error = ENOTDIR;
1057 else
1058 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1059 VOP_UNLOCK(vp, 0);
1060 if (error)
1061 goto out;
1062 VREF(vp);
1063
1064 /*
1065 * Prevent escaping from chroot by putting the root under
1066 * the working directory. Silently chdir to / if we aren't
1067 * already there.
1068 */
1069 cwdi = p->p_cwdi;
1070 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1071 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1072 /*
1073 * XXX would be more failsafe to change directory to a
1074 * deadfs node here instead
1075 */
1076 vrele(cwdi->cwdi_cdir);
1077 VREF(vp);
1078 cwdi->cwdi_cdir = vp;
1079 }
1080
1081 if (cwdi->cwdi_rdir != NULL)
1082 vrele(cwdi->cwdi_rdir);
1083 cwdi->cwdi_rdir = vp;
1084 rw_exit(&cwdi->cwdi_lock);
1085
1086 out:
1087 FILE_UNUSE(fp, l);
1088 return (error);
1089 }
1090
1091 /*
1092 * Change current working directory (``.'').
1093 */
1094 /* ARGSUSED */
1095 int
1096 sys_chdir(struct lwp *l, void *v, register_t *retval)
1097 {
1098 struct sys_chdir_args /* {
1099 syscallarg(const char *) path;
1100 } */ *uap = v;
1101 struct proc *p = l->l_proc;
1102 struct cwdinfo *cwdi;
1103 int error;
1104 struct nameidata nd;
1105
1106 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1107 SCARG(uap, path), l);
1108 if ((error = change_dir(&nd, l)) != 0)
1109 return (error);
1110 cwdi = p->p_cwdi;
1111 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1112 vrele(cwdi->cwdi_cdir);
1113 cwdi->cwdi_cdir = nd.ni_vp;
1114 rw_exit(&cwdi->cwdi_lock);
1115 return (0);
1116 }
1117
1118 /*
1119 * Change notion of root (``/'') directory.
1120 */
1121 /* ARGSUSED */
1122 int
1123 sys_chroot(struct lwp *l, void *v, register_t *retval)
1124 {
1125 struct sys_chroot_args /* {
1126 syscallarg(const char *) path;
1127 } */ *uap = v;
1128 struct proc *p = l->l_proc;
1129 struct cwdinfo *cwdi;
1130 struct vnode *vp;
1131 int error;
1132 struct nameidata nd;
1133
1134 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1135 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1136 return (error);
1137 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1138 SCARG(uap, path), l);
1139 if ((error = change_dir(&nd, l)) != 0)
1140 return (error);
1141
1142 cwdi = p->p_cwdi;
1143 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1144 if (cwdi->cwdi_rdir != NULL)
1145 vrele(cwdi->cwdi_rdir);
1146 vp = nd.ni_vp;
1147 cwdi->cwdi_rdir = vp;
1148
1149 /*
1150 * Prevent escaping from chroot by putting the root under
1151 * the working directory. Silently chdir to / if we aren't
1152 * already there.
1153 */
1154 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1155 /*
1156 * XXX would be more failsafe to change directory to a
1157 * deadfs node here instead
1158 */
1159 vrele(cwdi->cwdi_cdir);
1160 VREF(vp);
1161 cwdi->cwdi_cdir = vp;
1162 }
1163 rw_exit(&cwdi->cwdi_lock);
1164
1165 return (0);
1166 }
1167
1168 /*
1169 * Common routine for chroot and chdir.
1170 */
1171 static int
1172 change_dir(struct nameidata *ndp, struct lwp *l)
1173 {
1174 struct vnode *vp;
1175 int error;
1176
1177 if ((error = namei(ndp)) != 0)
1178 return (error);
1179 vp = ndp->ni_vp;
1180 if (vp->v_type != VDIR)
1181 error = ENOTDIR;
1182 else
1183 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1184
1185 if (error)
1186 vput(vp);
1187 else
1188 VOP_UNLOCK(vp, 0);
1189 return (error);
1190 }
1191
1192 /*
1193 * Check permissions, allocate an open file structure,
1194 * and call the device open routine if any.
1195 */
1196 int
1197 sys_open(struct lwp *l, void *v, register_t *retval)
1198 {
1199 struct sys_open_args /* {
1200 syscallarg(const char *) path;
1201 syscallarg(int) flags;
1202 syscallarg(int) mode;
1203 } */ *uap = v;
1204 struct proc *p = l->l_proc;
1205 struct cwdinfo *cwdi = p->p_cwdi;
1206 struct filedesc *fdp = p->p_fd;
1207 struct file *fp;
1208 struct vnode *vp;
1209 int flags, cmode;
1210 int type, indx, error;
1211 struct flock lf;
1212 struct nameidata nd;
1213
1214 flags = FFLAGS(SCARG(uap, flags));
1215 if ((flags & (FREAD | FWRITE)) == 0)
1216 return (EINVAL);
1217 /* falloc() will use the file descriptor for us */
1218 if ((error = falloc(l, &fp, &indx)) != 0)
1219 return (error);
1220 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1221 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1222 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1223 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1224 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1225 rw_enter(&fdp->fd_lock, RW_WRITER);
1226 FILE_UNUSE(fp, l);
1227 fdp->fd_ofiles[indx] = NULL;
1228 rw_exit(&fdp->fd_lock);
1229 ffree(fp);
1230 if ((error == EDUPFD || error == EMOVEFD) &&
1231 l->l_dupfd >= 0 && /* XXX from fdopen */
1232 (error =
1233 dupfdopen(l, indx, l->l_dupfd, flags, error)) == 0) {
1234 *retval = indx;
1235 return (0);
1236 }
1237 if (error == ERESTART)
1238 error = EINTR;
1239 fdremove(fdp, indx);
1240 return (error);
1241 }
1242 l->l_dupfd = 0;
1243 vp = nd.ni_vp;
1244 fp->f_flag = flags & FMASK;
1245 fp->f_type = DTYPE_VNODE;
1246 fp->f_ops = &vnops;
1247 fp->f_data = vp;
1248 if (flags & (O_EXLOCK | O_SHLOCK)) {
1249 lf.l_whence = SEEK_SET;
1250 lf.l_start = 0;
1251 lf.l_len = 0;
1252 if (flags & O_EXLOCK)
1253 lf.l_type = F_WRLCK;
1254 else
1255 lf.l_type = F_RDLCK;
1256 type = F_FLOCK;
1257 if ((flags & FNONBLOCK) == 0)
1258 type |= F_WAIT;
1259 VOP_UNLOCK(vp, 0);
1260 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1261 if (error) {
1262 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1263 FILE_UNUSE(fp, l);
1264 ffree(fp);
1265 fdremove(fdp, indx);
1266 return (error);
1267 }
1268 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1269 fp->f_flag |= FHASLOCK;
1270 }
1271 VOP_UNLOCK(vp, 0);
1272 *retval = indx;
1273 FILE_SET_MATURE(fp);
1274 FILE_UNUSE(fp, l);
1275 return (0);
1276 }
1277
1278 static void
1279 vfs__fhfree(fhandle_t *fhp)
1280 {
1281 size_t fhsize;
1282
1283 if (fhp == NULL) {
1284 return;
1285 }
1286 fhsize = FHANDLE_SIZE(fhp);
1287 kmem_free(fhp, fhsize);
1288 }
1289
1290 /*
1291 * vfs_composefh: compose a filehandle.
1292 */
1293
1294 int
1295 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1296 {
1297 struct mount *mp;
1298 struct fid *fidp;
1299 int error;
1300 size_t needfhsize;
1301 size_t fidsize;
1302
1303 mp = vp->v_mount;
1304 fidp = NULL;
1305 if (*fh_size < FHANDLE_SIZE_MIN) {
1306 fidsize = 0;
1307 } else {
1308 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1309 if (fhp != NULL) {
1310 memset(fhp, 0, *fh_size);
1311 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1312 fidp = &fhp->fh_fid;
1313 }
1314 }
1315 error = VFS_VPTOFH(vp, fidp, &fidsize);
1316 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1317 if (error == 0 && *fh_size < needfhsize) {
1318 error = E2BIG;
1319 }
1320 *fh_size = needfhsize;
1321 return error;
1322 }
1323
1324 int
1325 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1326 {
1327 struct mount *mp;
1328 fhandle_t *fhp;
1329 size_t fhsize;
1330 size_t fidsize;
1331 int error;
1332
1333 *fhpp = NULL;
1334 mp = vp->v_mount;
1335 fidsize = 0;
1336 error = VFS_VPTOFH(vp, NULL, &fidsize);
1337 KASSERT(error != 0);
1338 if (error != E2BIG) {
1339 goto out;
1340 }
1341 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1342 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1343 if (fhp == NULL) {
1344 error = ENOMEM;
1345 goto out;
1346 }
1347 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1348 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1349 if (error == 0) {
1350 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1351 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1352 *fhpp = fhp;
1353 } else {
1354 kmem_free(fhp, fhsize);
1355 }
1356 out:
1357 return error;
1358 }
1359
1360 void
1361 vfs_composefh_free(fhandle_t *fhp)
1362 {
1363
1364 vfs__fhfree(fhp);
1365 }
1366
1367 /*
1368 * vfs_fhtovp: lookup a vnode by a filehandle.
1369 */
1370
1371 int
1372 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1373 {
1374 struct mount *mp;
1375 int error;
1376
1377 *vpp = NULL;
1378 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1379 if (mp == NULL) {
1380 error = ESTALE;
1381 goto out;
1382 }
1383 if (mp->mnt_op->vfs_fhtovp == NULL) {
1384 error = EOPNOTSUPP;
1385 goto out;
1386 }
1387 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1388 out:
1389 return error;
1390 }
1391
1392 /*
1393 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1394 * the needed size.
1395 */
1396
1397 int
1398 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1399 {
1400 fhandle_t *fhp;
1401 int error;
1402
1403 *fhpp = NULL;
1404 if (fhsize > FHANDLE_SIZE_MAX) {
1405 return EINVAL;
1406 }
1407 if (fhsize < FHANDLE_SIZE_MIN) {
1408 return EINVAL;
1409 }
1410 again:
1411 fhp = kmem_alloc(fhsize, KM_SLEEP);
1412 if (fhp == NULL) {
1413 return ENOMEM;
1414 }
1415 error = copyin(ufhp, fhp, fhsize);
1416 if (error == 0) {
1417 /* XXX this check shouldn't be here */
1418 if (FHANDLE_SIZE(fhp) == fhsize) {
1419 *fhpp = fhp;
1420 return 0;
1421 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1422 /*
1423 * a kludge for nfsv2 padded handles.
1424 */
1425 size_t sz;
1426
1427 sz = FHANDLE_SIZE(fhp);
1428 kmem_free(fhp, fhsize);
1429 fhsize = sz;
1430 goto again;
1431 } else {
1432 /*
1433 * userland told us wrong size.
1434 */
1435 error = EINVAL;
1436 }
1437 }
1438 kmem_free(fhp, fhsize);
1439 return error;
1440 }
1441
1442 void
1443 vfs_copyinfh_free(fhandle_t *fhp)
1444 {
1445
1446 vfs__fhfree(fhp);
1447 }
1448
1449 /*
1450 * Get file handle system call
1451 */
1452 int
1453 sys___getfh30(struct lwp *l, void *v, register_t *retval)
1454 {
1455 struct sys___getfh30_args /* {
1456 syscallarg(char *) fname;
1457 syscallarg(fhandle_t *) fhp;
1458 syscallarg(size_t *) fh_size;
1459 } */ *uap = v;
1460 struct vnode *vp;
1461 fhandle_t *fh;
1462 int error;
1463 struct nameidata nd;
1464 size_t sz;
1465 size_t usz;
1466
1467 /*
1468 * Must be super user
1469 */
1470 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1471 0, NULL, NULL, NULL);
1472 if (error)
1473 return (error);
1474 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1475 SCARG(uap, fname), l);
1476 error = namei(&nd);
1477 if (error)
1478 return (error);
1479 vp = nd.ni_vp;
1480 error = vfs_composefh_alloc(vp, &fh);
1481 vput(vp);
1482 if (error != 0) {
1483 goto out;
1484 }
1485 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1486 if (error != 0) {
1487 goto out;
1488 }
1489 sz = FHANDLE_SIZE(fh);
1490 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1491 if (error != 0) {
1492 goto out;
1493 }
1494 if (usz >= sz) {
1495 error = copyout(fh, SCARG(uap, fhp), sz);
1496 } else {
1497 error = E2BIG;
1498 }
1499 out:
1500 vfs_composefh_free(fh);
1501 return (error);
1502 }
1503
1504 /*
1505 * Open a file given a file handle.
1506 *
1507 * Check permissions, allocate an open file structure,
1508 * and call the device open routine if any.
1509 */
1510
1511 int
1512 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1513 register_t *retval)
1514 {
1515 struct filedesc *fdp = l->l_proc->p_fd;
1516 struct file *fp;
1517 struct vnode *vp = NULL;
1518 kauth_cred_t cred = l->l_cred;
1519 struct file *nfp;
1520 int type, indx, error=0;
1521 struct flock lf;
1522 struct vattr va;
1523 fhandle_t *fh;
1524 int flags;
1525
1526 /*
1527 * Must be super user
1528 */
1529 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1530 0, NULL, NULL, NULL)))
1531 return (error);
1532
1533 flags = FFLAGS(oflags);
1534 if ((flags & (FREAD | FWRITE)) == 0)
1535 return (EINVAL);
1536 if ((flags & O_CREAT))
1537 return (EINVAL);
1538 /* falloc() will use the file descriptor for us */
1539 if ((error = falloc(l, &nfp, &indx)) != 0)
1540 return (error);
1541 fp = nfp;
1542 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1543 if (error != 0) {
1544 goto bad;
1545 }
1546 error = vfs_fhtovp(fh, &vp);
1547 if (error != 0) {
1548 goto bad;
1549 }
1550
1551 /* Now do an effective vn_open */
1552
1553 if (vp->v_type == VSOCK) {
1554 error = EOPNOTSUPP;
1555 goto bad;
1556 }
1557 if (flags & FREAD) {
1558 if ((error = VOP_ACCESS(vp, VREAD, cred, l)) != 0)
1559 goto bad;
1560 }
1561 if (flags & (FWRITE | O_TRUNC)) {
1562 if (vp->v_type == VDIR) {
1563 error = EISDIR;
1564 goto bad;
1565 }
1566 if ((error = vn_writechk(vp)) != 0 ||
1567 (error = VOP_ACCESS(vp, VWRITE, cred, l)) != 0)
1568 goto bad;
1569 }
1570 if (flags & O_TRUNC) {
1571 VOP_UNLOCK(vp, 0); /* XXX */
1572 VOP_LEASE(vp, l, cred, LEASE_WRITE);
1573 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1574 VATTR_NULL(&va);
1575 va.va_size = 0;
1576 error = VOP_SETATTR(vp, &va, cred, l);
1577 if (error)
1578 goto bad;
1579 }
1580 if ((error = VOP_OPEN(vp, flags, cred, l)) != 0)
1581 goto bad;
1582 if (vp->v_type == VREG &&
1583 uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
1584 error = EIO;
1585 goto bad;
1586 }
1587 if (flags & FWRITE)
1588 vp->v_writecount++;
1589
1590 /* done with modified vn_open, now finish what sys_open does. */
1591
1592 fp->f_flag = flags & FMASK;
1593 fp->f_type = DTYPE_VNODE;
1594 fp->f_ops = &vnops;
1595 fp->f_data = vp;
1596 if (flags & (O_EXLOCK | O_SHLOCK)) {
1597 lf.l_whence = SEEK_SET;
1598 lf.l_start = 0;
1599 lf.l_len = 0;
1600 if (flags & O_EXLOCK)
1601 lf.l_type = F_WRLCK;
1602 else
1603 lf.l_type = F_RDLCK;
1604 type = F_FLOCK;
1605 if ((flags & FNONBLOCK) == 0)
1606 type |= F_WAIT;
1607 VOP_UNLOCK(vp, 0);
1608 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1609 if (error) {
1610 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1611 FILE_UNUSE(fp, l);
1612 ffree(fp);
1613 fdremove(fdp, indx);
1614 return (error);
1615 }
1616 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1617 fp->f_flag |= FHASLOCK;
1618 }
1619 VOP_UNLOCK(vp, 0);
1620 *retval = indx;
1621 FILE_SET_MATURE(fp);
1622 FILE_UNUSE(fp, l);
1623 vfs_copyinfh_free(fh);
1624 return (0);
1625
1626 bad:
1627 FILE_UNUSE(fp, l);
1628 ffree(fp);
1629 fdremove(fdp, indx);
1630 if (vp != NULL)
1631 vput(vp);
1632 vfs_copyinfh_free(fh);
1633 return (error);
1634 }
1635
1636 int
1637 sys___fhopen40(struct lwp *l, void *v, register_t *retval)
1638 {
1639 struct sys___fhopen40_args /* {
1640 syscallarg(const void *) fhp;
1641 syscallarg(size_t) fh_size;
1642 syscallarg(int) flags;
1643 } */ *uap = v;
1644
1645 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1646 SCARG(uap, flags), retval);
1647 }
1648
1649 /* XXX: temp (mar '07) for LKM compat */
1650 int
1651 dofhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sbp,
1652 register_t *retval)
1653 {
1654 int error;
1655 struct stat sb;
1656
1657 error = do_fhstat(l, ufhp, fhsize, &sb);
1658 if (error == 0)
1659 error = copyout(&sb, sbp, sizeof(sb));
1660 return error;
1661 }
1662
1663 int
1664 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
1665 {
1666 int error;
1667 fhandle_t *fh;
1668 struct vnode *vp;
1669
1670 /*
1671 * Must be super user
1672 */
1673 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1674 0, NULL, NULL, NULL)))
1675 return (error);
1676
1677 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1678 if (error != 0)
1679 return error;
1680
1681 error = vfs_fhtovp(fh, &vp);
1682 vfs_copyinfh_free(fh);
1683 if (error != 0)
1684 return error;
1685
1686 error = vn_stat(vp, sb, l);
1687 vput(vp);
1688 return error;
1689 }
1690
1691
1692 /* ARGSUSED */
1693 int
1694 sys___fhstat40(struct lwp *l, void *v, register_t *retval)
1695 {
1696 struct sys___fhstat40_args /* {
1697 syscallarg(const void *) fhp;
1698 syscallarg(size_t) fh_size;
1699 syscallarg(struct stat *) sb;
1700 } */ *uap = v;
1701 struct stat sb;
1702 int error;
1703
1704 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
1705 if (error)
1706 return error;
1707 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
1708 }
1709
1710 /* XXX: temp (mar '07) for LKM compat */
1711 int
1712 dofhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *buf,
1713 int flags, register_t *retval)
1714 {
1715 struct statvfs *sb = STATVFSBUF_GET();
1716 int error;
1717
1718 error = do_fhstatvfs(l, ufhp, fhsize, sb, flags);
1719 if (error == 0)
1720 error = copyout(sb, buf, sizeof(*sb));
1721 STATVFSBUF_PUT(sb);
1722 return error;
1723 }
1724
1725 int
1726 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
1727 int flags)
1728 {
1729 fhandle_t *fh;
1730 struct mount *mp;
1731 struct vnode *vp;
1732 int error;
1733
1734 /*
1735 * Must be super user
1736 */
1737 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1738 0, NULL, NULL, NULL)))
1739 return error;
1740
1741 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1742 if (error != 0)
1743 return error;
1744
1745 error = vfs_fhtovp(fh, &vp);
1746 vfs_copyinfh_free(fh);
1747 if (error != 0)
1748 return error;
1749
1750 mp = vp->v_mount;
1751 error = dostatvfs(mp, sb, l, flags, 1);
1752 vput(vp);
1753 return error;
1754 }
1755
1756 /* ARGSUSED */
1757 int
1758 sys___fhstatvfs140(struct lwp *l, void *v, register_t *retval)
1759 {
1760 struct sys___fhstatvfs140_args /* {
1761 syscallarg(const void *) fhp;
1762 syscallarg(size_t) fh_size;
1763 syscallarg(struct statvfs *) buf;
1764 syscallarg(int) flags;
1765 } */ *uap = v;
1766 struct statvfs *sb = STATVFSBUF_GET();
1767 int error;
1768
1769 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
1770 SCARG(uap, flags));
1771 if (error == 0)
1772 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1773 STATVFSBUF_PUT(sb);
1774 return error;
1775 }
1776
1777 /*
1778 * Create a special file.
1779 */
1780 /* ARGSUSED */
1781 int
1782 sys_mknod(struct lwp *l, void *v, register_t *retval)
1783 {
1784 struct sys_mknod_args /* {
1785 syscallarg(const char *) path;
1786 syscallarg(int) mode;
1787 syscallarg(int) dev;
1788 } */ *uap = v;
1789 struct proc *p = l->l_proc;
1790 struct vnode *vp;
1791 struct vattr vattr;
1792 int error, optype;
1793 struct nameidata nd;
1794
1795 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1796 0, NULL, NULL, NULL)) != 0)
1797 return (error);
1798
1799 optype = VOP_MKNOD_DESCOFFSET;
1800 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1801 if ((error = namei(&nd)) != 0)
1802 return (error);
1803 vp = nd.ni_vp;
1804 if (vp != NULL)
1805 error = EEXIST;
1806 else {
1807 VATTR_NULL(&vattr);
1808 /* We will read cwdi->cwdi_cmask unlocked. */
1809 vattr.va_mode =
1810 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1811 vattr.va_rdev = SCARG(uap, dev);
1812
1813 switch (SCARG(uap, mode) & S_IFMT) {
1814 case S_IFMT: /* used by badsect to flag bad sectors */
1815 vattr.va_type = VBAD;
1816 break;
1817 case S_IFCHR:
1818 vattr.va_type = VCHR;
1819 break;
1820 case S_IFBLK:
1821 vattr.va_type = VBLK;
1822 break;
1823 case S_IFWHT:
1824 optype = VOP_WHITEOUT_DESCOFFSET;
1825 break;
1826 case S_IFREG:
1827 vattr.va_type = VREG;
1828 vattr.va_rdev = VNOVAL;
1829 optype = VOP_CREATE_DESCOFFSET;
1830 break;
1831 default:
1832 error = EINVAL;
1833 break;
1834 }
1835 }
1836 if (!error) {
1837 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1838 switch (optype) {
1839 case VOP_WHITEOUT_DESCOFFSET:
1840 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1841 if (error)
1842 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1843 vput(nd.ni_dvp);
1844 break;
1845
1846 case VOP_MKNOD_DESCOFFSET:
1847 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1848 &nd.ni_cnd, &vattr);
1849 if (error == 0)
1850 vput(nd.ni_vp);
1851 break;
1852
1853 case VOP_CREATE_DESCOFFSET:
1854 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
1855 &nd.ni_cnd, &vattr);
1856 if (error == 0)
1857 vput(nd.ni_vp);
1858 break;
1859 }
1860 } else {
1861 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1862 if (nd.ni_dvp == vp)
1863 vrele(nd.ni_dvp);
1864 else
1865 vput(nd.ni_dvp);
1866 if (vp)
1867 vrele(vp);
1868 }
1869 return (error);
1870 }
1871
1872 /*
1873 * Create a named pipe.
1874 */
1875 /* ARGSUSED */
1876 int
1877 sys_mkfifo(struct lwp *l, void *v, register_t *retval)
1878 {
1879 struct sys_mkfifo_args /* {
1880 syscallarg(const char *) path;
1881 syscallarg(int) mode;
1882 } */ *uap = v;
1883 struct proc *p = l->l_proc;
1884 struct vattr vattr;
1885 int error;
1886 struct nameidata nd;
1887
1888 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1889 if ((error = namei(&nd)) != 0)
1890 return (error);
1891 if (nd.ni_vp != NULL) {
1892 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1893 if (nd.ni_dvp == nd.ni_vp)
1894 vrele(nd.ni_dvp);
1895 else
1896 vput(nd.ni_dvp);
1897 vrele(nd.ni_vp);
1898 return (EEXIST);
1899 }
1900 VATTR_NULL(&vattr);
1901 vattr.va_type = VFIFO;
1902 /* We will read cwdi->cwdi_cmask unlocked. */
1903 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1904 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1905 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1906 if (error == 0)
1907 vput(nd.ni_vp);
1908 return (error);
1909 }
1910
1911 /*
1912 * Make a hard file link.
1913 */
1914 /* ARGSUSED */
1915 int
1916 sys_link(struct lwp *l, void *v, register_t *retval)
1917 {
1918 struct sys_link_args /* {
1919 syscallarg(const char *) path;
1920 syscallarg(const char *) link;
1921 } */ *uap = v;
1922 struct vnode *vp;
1923 struct nameidata nd;
1924 int error;
1925
1926 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1927 if ((error = namei(&nd)) != 0)
1928 return (error);
1929 vp = nd.ni_vp;
1930 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1931 if ((error = namei(&nd)) != 0)
1932 goto out;
1933 if (nd.ni_vp) {
1934 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1935 if (nd.ni_dvp == nd.ni_vp)
1936 vrele(nd.ni_dvp);
1937 else
1938 vput(nd.ni_dvp);
1939 vrele(nd.ni_vp);
1940 error = EEXIST;
1941 goto out;
1942 }
1943 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1944 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
1945 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1946 out:
1947 vrele(vp);
1948 return (error);
1949 }
1950
1951 /*
1952 * Make a symbolic link.
1953 */
1954 /* ARGSUSED */
1955 int
1956 sys_symlink(struct lwp *l, void *v, register_t *retval)
1957 {
1958 struct sys_symlink_args /* {
1959 syscallarg(const char *) path;
1960 syscallarg(const char *) link;
1961 } */ *uap = v;
1962 struct proc *p = l->l_proc;
1963 struct vattr vattr;
1964 char *path;
1965 int error;
1966 struct nameidata nd;
1967
1968 path = PNBUF_GET();
1969 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
1970 if (error)
1971 goto out;
1972 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1973 if ((error = namei(&nd)) != 0)
1974 goto out;
1975 if (nd.ni_vp) {
1976 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1977 if (nd.ni_dvp == nd.ni_vp)
1978 vrele(nd.ni_dvp);
1979 else
1980 vput(nd.ni_dvp);
1981 vrele(nd.ni_vp);
1982 error = EEXIST;
1983 goto out;
1984 }
1985 VATTR_NULL(&vattr);
1986 vattr.va_type = VLNK;
1987 /* We will read cwdi->cwdi_cmask unlocked. */
1988 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
1989 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1990 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1991 if (error == 0)
1992 vput(nd.ni_vp);
1993 out:
1994 PNBUF_PUT(path);
1995 return (error);
1996 }
1997
1998 /*
1999 * Delete a whiteout from the filesystem.
2000 */
2001 /* ARGSUSED */
2002 int
2003 sys_undelete(struct lwp *l, void *v, register_t *retval)
2004 {
2005 struct sys_undelete_args /* {
2006 syscallarg(const char *) path;
2007 } */ *uap = v;
2008 int error;
2009 struct nameidata nd;
2010
2011 NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
2012 SCARG(uap, path), l);
2013 error = namei(&nd);
2014 if (error)
2015 return (error);
2016
2017 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2018 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2019 if (nd.ni_dvp == nd.ni_vp)
2020 vrele(nd.ni_dvp);
2021 else
2022 vput(nd.ni_dvp);
2023 if (nd.ni_vp)
2024 vrele(nd.ni_vp);
2025 return (EEXIST);
2026 }
2027 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2028 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2029 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2030 vput(nd.ni_dvp);
2031 return (error);
2032 }
2033
2034 /*
2035 * Delete a name from the filesystem.
2036 */
2037 /* ARGSUSED */
2038 int
2039 sys_unlink(struct lwp *l, void *v, register_t *retval)
2040 {
2041 struct sys_unlink_args /* {
2042 syscallarg(const char *) path;
2043 } */ *uap = v;
2044 struct vnode *vp;
2045 int error;
2046 struct nameidata nd;
2047 #if NVERIEXEC > 0
2048 pathname_t pathbuf = NULL;
2049 #endif /* NVERIEXEC > 0 */
2050
2051 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2052 SCARG(uap, path), l);
2053 if ((error = namei(&nd)) != 0)
2054 return (error);
2055 vp = nd.ni_vp;
2056
2057 /*
2058 * The root of a mounted filesystem cannot be deleted.
2059 */
2060 if (vp->v_flag & VROOT) {
2061 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2062 if (nd.ni_dvp == vp)
2063 vrele(nd.ni_dvp);
2064 else
2065 vput(nd.ni_dvp);
2066 vput(vp);
2067 error = EBUSY;
2068 goto out;
2069 }
2070
2071 #if NVERIEXEC > 0
2072 error = pathname_get(nd.ni_dirp, nd.ni_segflg, &pathbuf);
2073
2074 /* Handle remove requests for veriexec entries. */
2075 if (!error) {
2076 error = veriexec_removechk(vp, pathname_path(pathbuf), l);
2077 pathname_put(pathbuf);
2078 }
2079
2080 if (error) {
2081 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2082 if (nd.ni_dvp == vp)
2083 vrele(nd.ni_dvp);
2084 else
2085 vput(nd.ni_dvp);
2086 vput(vp);
2087 goto out;
2088 }
2089 #endif /* NVERIEXEC > 0 */
2090
2091 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2092 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2093 #ifdef FILEASSOC
2094 (void)fileassoc_file_delete(vp);
2095 #endif /* FILEASSOC */
2096 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2097 out:
2098 return (error);
2099 }
2100
2101 /*
2102 * Reposition read/write file offset.
2103 */
2104 int
2105 sys_lseek(struct lwp *l, void *v, register_t *retval)
2106 {
2107 struct sys_lseek_args /* {
2108 syscallarg(int) fd;
2109 syscallarg(int) pad;
2110 syscallarg(off_t) offset;
2111 syscallarg(int) whence;
2112 } */ *uap = v;
2113 struct proc *p = l->l_proc;
2114 kauth_cred_t cred = l->l_cred;
2115 struct filedesc *fdp = p->p_fd;
2116 struct file *fp;
2117 struct vnode *vp;
2118 struct vattr vattr;
2119 off_t newoff;
2120 int error;
2121
2122 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
2123 return (EBADF);
2124
2125 vp = (struct vnode *)fp->f_data;
2126 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2127 error = ESPIPE;
2128 mutex_exit(&fp->f_lock);
2129 goto out;
2130 }
2131
2132 switch (SCARG(uap, whence)) {
2133 case SEEK_CUR:
2134 newoff = fp->f_offset + SCARG(uap, offset);
2135 FILE_USE(fp);
2136 break;
2137 case SEEK_END:
2138 FILE_USE(fp);
2139 error = VOP_GETATTR(vp, &vattr, cred, l);
2140 if (error) {
2141 FILE_UNUSE(fp, l);
2142 goto out;
2143 }
2144 newoff = SCARG(uap, offset) + vattr.va_size;
2145 break;
2146 case SEEK_SET:
2147 FILE_USE(fp);
2148 newoff = SCARG(uap, offset);
2149 break;
2150 default:
2151 mutex_exit(&fp->f_lock);
2152 error = EINVAL;
2153 goto out;
2154 }
2155 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2156 mutex_enter(&fp->f_lock);
2157 *(off_t *)retval = fp->f_offset = newoff;
2158 mutex_exit(&fp->f_lock);
2159 }
2160 FILE_UNUSE(fp, l);
2161 out:
2162 return (error);
2163 }
2164
2165 /*
2166 * Positional read system call.
2167 */
2168 int
2169 sys_pread(struct lwp *l, void *v, register_t *retval)
2170 {
2171 struct sys_pread_args /* {
2172 syscallarg(int) fd;
2173 syscallarg(void *) buf;
2174 syscallarg(size_t) nbyte;
2175 syscallarg(off_t) offset;
2176 } */ *uap = v;
2177 struct proc *p = l->l_proc;
2178 struct filedesc *fdp = p->p_fd;
2179 struct file *fp;
2180 struct vnode *vp;
2181 off_t offset;
2182 int error, fd = SCARG(uap, fd);
2183
2184 if ((fp = fd_getfile(fdp, fd)) == NULL)
2185 return (EBADF);
2186
2187 if ((fp->f_flag & FREAD) == 0) {
2188 mutex_exit(&fp->f_lock);
2189 return (EBADF);
2190 }
2191
2192 FILE_USE(fp);
2193
2194 vp = (struct vnode *)fp->f_data;
2195 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2196 error = ESPIPE;
2197 goto out;
2198 }
2199
2200 offset = SCARG(uap, offset);
2201
2202 /*
2203 * XXX This works because no file systems actually
2204 * XXX take any action on the seek operation.
2205 */
2206 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2207 goto out;
2208
2209 /* dofileread() will unuse the descriptor for us */
2210 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2211 &offset, 0, retval));
2212
2213 out:
2214 FILE_UNUSE(fp, l);
2215 return (error);
2216 }
2217
2218 /*
2219 * Positional scatter read system call.
2220 */
2221 int
2222 sys_preadv(struct lwp *l, void *v, register_t *retval)
2223 {
2224 struct sys_preadv_args /* {
2225 syscallarg(int) fd;
2226 syscallarg(const struct iovec *) iovp;
2227 syscallarg(int) iovcnt;
2228 syscallarg(off_t) offset;
2229 } */ *uap = v;
2230 struct proc *p = l->l_proc;
2231 struct filedesc *fdp = p->p_fd;
2232 struct file *fp;
2233 struct vnode *vp;
2234 off_t offset;
2235 int error, fd = SCARG(uap, fd);
2236
2237 if ((fp = fd_getfile(fdp, fd)) == NULL)
2238 return (EBADF);
2239
2240 if ((fp->f_flag & FREAD) == 0) {
2241 mutex_exit(&fp->f_lock);
2242 return (EBADF);
2243 }
2244
2245 FILE_USE(fp);
2246
2247 vp = (struct vnode *)fp->f_data;
2248 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2249 error = ESPIPE;
2250 goto out;
2251 }
2252
2253 offset = SCARG(uap, offset);
2254
2255 /*
2256 * XXX This works because no file systems actually
2257 * XXX take any action on the seek operation.
2258 */
2259 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2260 goto out;
2261
2262 /* dofilereadv() will unuse the descriptor for us */
2263 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2264 &offset, 0, retval));
2265
2266 out:
2267 FILE_UNUSE(fp, l);
2268 return (error);
2269 }
2270
2271 /*
2272 * Positional write system call.
2273 */
2274 int
2275 sys_pwrite(struct lwp *l, void *v, register_t *retval)
2276 {
2277 struct sys_pwrite_args /* {
2278 syscallarg(int) fd;
2279 syscallarg(const void *) buf;
2280 syscallarg(size_t) nbyte;
2281 syscallarg(off_t) offset;
2282 } */ *uap = v;
2283 struct proc *p = l->l_proc;
2284 struct filedesc *fdp = p->p_fd;
2285 struct file *fp;
2286 struct vnode *vp;
2287 off_t offset;
2288 int error, fd = SCARG(uap, fd);
2289
2290 if ((fp = fd_getfile(fdp, fd)) == NULL)
2291 return (EBADF);
2292
2293 if ((fp->f_flag & FWRITE) == 0) {
2294 mutex_exit(&fp->f_lock);
2295 return (EBADF);
2296 }
2297
2298 FILE_USE(fp);
2299
2300 vp = (struct vnode *)fp->f_data;
2301 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2302 error = ESPIPE;
2303 goto out;
2304 }
2305
2306 offset = SCARG(uap, offset);
2307
2308 /*
2309 * XXX This works because no file systems actually
2310 * XXX take any action on the seek operation.
2311 */
2312 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2313 goto out;
2314
2315 /* dofilewrite() will unuse the descriptor for us */
2316 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2317 &offset, 0, retval));
2318
2319 out:
2320 FILE_UNUSE(fp, l);
2321 return (error);
2322 }
2323
2324 /*
2325 * Positional gather write system call.
2326 */
2327 int
2328 sys_pwritev(struct lwp *l, void *v, register_t *retval)
2329 {
2330 struct sys_pwritev_args /* {
2331 syscallarg(int) fd;
2332 syscallarg(const struct iovec *) iovp;
2333 syscallarg(int) iovcnt;
2334 syscallarg(off_t) offset;
2335 } */ *uap = v;
2336 struct proc *p = l->l_proc;
2337 struct filedesc *fdp = p->p_fd;
2338 struct file *fp;
2339 struct vnode *vp;
2340 off_t offset;
2341 int error, fd = SCARG(uap, fd);
2342
2343 if ((fp = fd_getfile(fdp, fd)) == NULL)
2344 return (EBADF);
2345
2346 if ((fp->f_flag & FWRITE) == 0) {
2347 mutex_exit(&fp->f_lock);
2348 return (EBADF);
2349 }
2350
2351 FILE_USE(fp);
2352
2353 vp = (struct vnode *)fp->f_data;
2354 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2355 error = ESPIPE;
2356 goto out;
2357 }
2358
2359 offset = SCARG(uap, offset);
2360
2361 /*
2362 * XXX This works because no file systems actually
2363 * XXX take any action on the seek operation.
2364 */
2365 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2366 goto out;
2367
2368 /* dofilewritev() will unuse the descriptor for us */
2369 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2370 &offset, 0, retval));
2371
2372 out:
2373 FILE_UNUSE(fp, l);
2374 return (error);
2375 }
2376
2377 /*
2378 * Check access permissions.
2379 */
2380 int
2381 sys_access(struct lwp *l, void *v, register_t *retval)
2382 {
2383 struct sys_access_args /* {
2384 syscallarg(const char *) path;
2385 syscallarg(int) flags;
2386 } */ *uap = v;
2387 kauth_cred_t cred;
2388 struct vnode *vp;
2389 int error, flags;
2390 struct nameidata nd;
2391
2392 cred = kauth_cred_dup(l->l_cred);
2393 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2394 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2395 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2396 SCARG(uap, path), l);
2397 /* Override default credentials */
2398 nd.ni_cnd.cn_cred = cred;
2399 if ((error = namei(&nd)) != 0)
2400 goto out;
2401 vp = nd.ni_vp;
2402
2403 /* Flags == 0 means only check for existence. */
2404 if (SCARG(uap, flags)) {
2405 flags = 0;
2406 if (SCARG(uap, flags) & R_OK)
2407 flags |= VREAD;
2408 if (SCARG(uap, flags) & W_OK)
2409 flags |= VWRITE;
2410 if (SCARG(uap, flags) & X_OK)
2411 flags |= VEXEC;
2412
2413 error = VOP_ACCESS(vp, flags, cred, l);
2414 if (!error && (flags & VWRITE))
2415 error = vn_writechk(vp);
2416 }
2417 vput(vp);
2418 out:
2419 kauth_cred_free(cred);
2420 return (error);
2421 }
2422
2423 /*
2424 * Common code for all sys_stat functions, including compat versions.
2425 */
2426 int
2427 do_sys_stat(struct lwp *l, const char *path, unsigned int nd_flags,
2428 struct stat *sb)
2429 {
2430 int error;
2431 struct nameidata nd;
2432
2433 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF, UIO_USERSPACE, path, l);
2434 error = namei(&nd);
2435 if (error != 0)
2436 return error;
2437 error = vn_stat(nd.ni_vp, sb, l);
2438 vput(nd.ni_vp);
2439 return error;
2440 }
2441
2442 /*
2443 * Get file status; this version follows links.
2444 */
2445 /* ARGSUSED */
2446 int
2447 sys___stat30(struct lwp *l, void *v, register_t *retval)
2448 {
2449 struct sys___stat30_args /* {
2450 syscallarg(const char *) path;
2451 syscallarg(struct stat *) ub;
2452 } */ *uap = v;
2453 struct stat sb;
2454 int error;
2455
2456 error = do_sys_stat(l, SCARG(uap, path), FOLLOW, &sb);
2457 if (error)
2458 return error;
2459 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2460 }
2461
2462 /*
2463 * Get file status; this version does not follow links.
2464 */
2465 /* ARGSUSED */
2466 int
2467 sys___lstat30(struct lwp *l, void *v, register_t *retval)
2468 {
2469 struct sys___lstat30_args /* {
2470 syscallarg(const char *) path;
2471 syscallarg(struct stat *) ub;
2472 } */ *uap = v;
2473 struct stat sb;
2474 int error;
2475
2476 error = do_sys_stat(l, SCARG(uap, path), NOFOLLOW, &sb);
2477 if (error)
2478 return error;
2479 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2480 }
2481
2482 /*
2483 * Get configurable pathname variables.
2484 */
2485 /* ARGSUSED */
2486 int
2487 sys_pathconf(struct lwp *l, void *v, register_t *retval)
2488 {
2489 struct sys_pathconf_args /* {
2490 syscallarg(const char *) path;
2491 syscallarg(int) name;
2492 } */ *uap = v;
2493 int error;
2494 struct nameidata nd;
2495
2496 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2497 SCARG(uap, path), l);
2498 if ((error = namei(&nd)) != 0)
2499 return (error);
2500 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2501 vput(nd.ni_vp);
2502 return (error);
2503 }
2504
2505 /*
2506 * Return target name of a symbolic link.
2507 */
2508 /* ARGSUSED */
2509 int
2510 sys_readlink(struct lwp *l, void *v, register_t *retval)
2511 {
2512 struct sys_readlink_args /* {
2513 syscallarg(const char *) path;
2514 syscallarg(char *) buf;
2515 syscallarg(size_t) count;
2516 } */ *uap = v;
2517 struct vnode *vp;
2518 struct iovec aiov;
2519 struct uio auio;
2520 int error;
2521 struct nameidata nd;
2522
2523 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2524 SCARG(uap, path), l);
2525 if ((error = namei(&nd)) != 0)
2526 return (error);
2527 vp = nd.ni_vp;
2528 if (vp->v_type != VLNK)
2529 error = EINVAL;
2530 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2531 (error = VOP_ACCESS(vp, VREAD, l->l_cred, l)) == 0) {
2532 aiov.iov_base = SCARG(uap, buf);
2533 aiov.iov_len = SCARG(uap, count);
2534 auio.uio_iov = &aiov;
2535 auio.uio_iovcnt = 1;
2536 auio.uio_offset = 0;
2537 auio.uio_rw = UIO_READ;
2538 KASSERT(l == curlwp);
2539 auio.uio_vmspace = l->l_proc->p_vmspace;
2540 auio.uio_resid = SCARG(uap, count);
2541 error = VOP_READLINK(vp, &auio, l->l_cred);
2542 }
2543 vput(vp);
2544 *retval = SCARG(uap, count) - auio.uio_resid;
2545 return (error);
2546 }
2547
2548 /*
2549 * Change flags of a file given a path name.
2550 */
2551 /* ARGSUSED */
2552 int
2553 sys_chflags(struct lwp *l, void *v, register_t *retval)
2554 {
2555 struct sys_chflags_args /* {
2556 syscallarg(const char *) path;
2557 syscallarg(u_long) flags;
2558 } */ *uap = v;
2559 struct vnode *vp;
2560 int error;
2561 struct nameidata nd;
2562
2563 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2564 if ((error = namei(&nd)) != 0)
2565 return (error);
2566 vp = nd.ni_vp;
2567 error = change_flags(vp, SCARG(uap, flags), l);
2568 vput(vp);
2569 return (error);
2570 }
2571
2572 /*
2573 * Change flags of a file given a file descriptor.
2574 */
2575 /* ARGSUSED */
2576 int
2577 sys_fchflags(struct lwp *l, void *v, register_t *retval)
2578 {
2579 struct sys_fchflags_args /* {
2580 syscallarg(int) fd;
2581 syscallarg(u_long) flags;
2582 } */ *uap = v;
2583 struct proc *p = l->l_proc;
2584 struct vnode *vp;
2585 struct file *fp;
2586 int error;
2587
2588 /* getvnode() will use the descriptor for us */
2589 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2590 return (error);
2591 vp = (struct vnode *)fp->f_data;
2592 error = change_flags(vp, SCARG(uap, flags), l);
2593 VOP_UNLOCK(vp, 0);
2594 FILE_UNUSE(fp, l);
2595 return (error);
2596 }
2597
2598 /*
2599 * Change flags of a file given a path name; this version does
2600 * not follow links.
2601 */
2602 int
2603 sys_lchflags(struct lwp *l, void *v, register_t *retval)
2604 {
2605 struct sys_lchflags_args /* {
2606 syscallarg(const char *) path;
2607 syscallarg(u_long) flags;
2608 } */ *uap = v;
2609 struct vnode *vp;
2610 int error;
2611 struct nameidata nd;
2612
2613 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2614 if ((error = namei(&nd)) != 0)
2615 return (error);
2616 vp = nd.ni_vp;
2617 error = change_flags(vp, SCARG(uap, flags), l);
2618 vput(vp);
2619 return (error);
2620 }
2621
2622 /*
2623 * Common routine to change flags of a file.
2624 */
2625 int
2626 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2627 {
2628 struct vattr vattr;
2629 int error;
2630
2631 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2632 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2633 /*
2634 * Non-superusers cannot change the flags on devices, even if they
2635 * own them.
2636 */
2637 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2638 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2639 goto out;
2640 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2641 error = EINVAL;
2642 goto out;
2643 }
2644 }
2645 VATTR_NULL(&vattr);
2646 vattr.va_flags = flags;
2647 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2648 out:
2649 return (error);
2650 }
2651
2652 /*
2653 * Change mode of a file given path name; this version follows links.
2654 */
2655 /* ARGSUSED */
2656 int
2657 sys_chmod(struct lwp *l, void *v, register_t *retval)
2658 {
2659 struct sys_chmod_args /* {
2660 syscallarg(const char *) path;
2661 syscallarg(int) mode;
2662 } */ *uap = v;
2663 int error;
2664 struct nameidata nd;
2665
2666 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2667 if ((error = namei(&nd)) != 0)
2668 return (error);
2669
2670 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2671
2672 vrele(nd.ni_vp);
2673 return (error);
2674 }
2675
2676 /*
2677 * Change mode of a file given a file descriptor.
2678 */
2679 /* ARGSUSED */
2680 int
2681 sys_fchmod(struct lwp *l, void *v, register_t *retval)
2682 {
2683 struct sys_fchmod_args /* {
2684 syscallarg(int) fd;
2685 syscallarg(int) mode;
2686 } */ *uap = v;
2687 struct proc *p = l->l_proc;
2688 struct file *fp;
2689 int error;
2690
2691 /* getvnode() will use the descriptor for us */
2692 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2693 return (error);
2694
2695 error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), l);
2696 FILE_UNUSE(fp, l);
2697 return (error);
2698 }
2699
2700 /*
2701 * Change mode of a file given path name; this version does not follow links.
2702 */
2703 /* ARGSUSED */
2704 int
2705 sys_lchmod(struct lwp *l, void *v, register_t *retval)
2706 {
2707 struct sys_lchmod_args /* {
2708 syscallarg(const char *) path;
2709 syscallarg(int) mode;
2710 } */ *uap = v;
2711 int error;
2712 struct nameidata nd;
2713
2714 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2715 if ((error = namei(&nd)) != 0)
2716 return (error);
2717
2718 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2719
2720 vrele(nd.ni_vp);
2721 return (error);
2722 }
2723
2724 /*
2725 * Common routine to set mode given a vnode.
2726 */
2727 static int
2728 change_mode(struct vnode *vp, int mode, struct lwp *l)
2729 {
2730 struct vattr vattr;
2731 int error;
2732
2733 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2734 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2735 VATTR_NULL(&vattr);
2736 vattr.va_mode = mode & ALLPERMS;
2737 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2738 VOP_UNLOCK(vp, 0);
2739 return (error);
2740 }
2741
2742 /*
2743 * Set ownership given a path name; this version follows links.
2744 */
2745 /* ARGSUSED */
2746 int
2747 sys_chown(struct lwp *l, void *v, register_t *retval)
2748 {
2749 struct sys_chown_args /* {
2750 syscallarg(const char *) path;
2751 syscallarg(uid_t) uid;
2752 syscallarg(gid_t) gid;
2753 } */ *uap = v;
2754 int error;
2755 struct nameidata nd;
2756
2757 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2758 if ((error = namei(&nd)) != 0)
2759 return (error);
2760
2761 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2762
2763 vrele(nd.ni_vp);
2764 return (error);
2765 }
2766
2767 /*
2768 * Set ownership given a path name; this version follows links.
2769 * Provides POSIX semantics.
2770 */
2771 /* ARGSUSED */
2772 int
2773 sys___posix_chown(struct lwp *l, void *v, register_t *retval)
2774 {
2775 struct sys_chown_args /* {
2776 syscallarg(const char *) path;
2777 syscallarg(uid_t) uid;
2778 syscallarg(gid_t) gid;
2779 } */ *uap = v;
2780 int error;
2781 struct nameidata nd;
2782
2783 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2784 if ((error = namei(&nd)) != 0)
2785 return (error);
2786
2787 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2788
2789 vrele(nd.ni_vp);
2790 return (error);
2791 }
2792
2793 /*
2794 * Set ownership given a file descriptor.
2795 */
2796 /* ARGSUSED */
2797 int
2798 sys_fchown(struct lwp *l, void *v, register_t *retval)
2799 {
2800 struct sys_fchown_args /* {
2801 syscallarg(int) fd;
2802 syscallarg(uid_t) uid;
2803 syscallarg(gid_t) gid;
2804 } */ *uap = v;
2805 struct proc *p = l->l_proc;
2806 int error;
2807 struct file *fp;
2808
2809 /* getvnode() will use the descriptor for us */
2810 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2811 return (error);
2812
2813 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2814 SCARG(uap, gid), l, 0);
2815 FILE_UNUSE(fp, l);
2816 return (error);
2817 }
2818
2819 /*
2820 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2821 */
2822 /* ARGSUSED */
2823 int
2824 sys___posix_fchown(struct lwp *l, void *v, register_t *retval)
2825 {
2826 struct sys_fchown_args /* {
2827 syscallarg(int) fd;
2828 syscallarg(uid_t) uid;
2829 syscallarg(gid_t) gid;
2830 } */ *uap = v;
2831 struct proc *p = l->l_proc;
2832 int error;
2833 struct file *fp;
2834
2835 /* getvnode() will use the descriptor for us */
2836 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2837 return (error);
2838
2839 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2840 SCARG(uap, gid), l, 1);
2841 FILE_UNUSE(fp, l);
2842 return (error);
2843 }
2844
2845 /*
2846 * Set ownership given a path name; this version does not follow links.
2847 */
2848 /* ARGSUSED */
2849 int
2850 sys_lchown(struct lwp *l, void *v, register_t *retval)
2851 {
2852 struct sys_lchown_args /* {
2853 syscallarg(const char *) path;
2854 syscallarg(uid_t) uid;
2855 syscallarg(gid_t) gid;
2856 } */ *uap = v;
2857 int error;
2858 struct nameidata nd;
2859
2860 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2861 if ((error = namei(&nd)) != 0)
2862 return (error);
2863
2864 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2865
2866 vrele(nd.ni_vp);
2867 return (error);
2868 }
2869
2870 /*
2871 * Set ownership given a path name; this version does not follow links.
2872 * Provides POSIX/XPG semantics.
2873 */
2874 /* ARGSUSED */
2875 int
2876 sys___posix_lchown(struct lwp *l, void *v, register_t *retval)
2877 {
2878 struct sys_lchown_args /* {
2879 syscallarg(const char *) path;
2880 syscallarg(uid_t) uid;
2881 syscallarg(gid_t) gid;
2882 } */ *uap = v;
2883 int error;
2884 struct nameidata nd;
2885
2886 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2887 if ((error = namei(&nd)) != 0)
2888 return (error);
2889
2890 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2891
2892 vrele(nd.ni_vp);
2893 return (error);
2894 }
2895
2896 /*
2897 * Common routine to set ownership given a vnode.
2898 */
2899 static int
2900 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2901 int posix_semantics)
2902 {
2903 struct vattr vattr;
2904 mode_t newmode;
2905 int error;
2906
2907 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2908 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2909 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2910 goto out;
2911
2912 #define CHANGED(x) ((int)(x) != -1)
2913 newmode = vattr.va_mode;
2914 if (posix_semantics) {
2915 /*
2916 * POSIX/XPG semantics: if the caller is not the super-user,
2917 * clear set-user-id and set-group-id bits. Both POSIX and
2918 * the XPG consider the behaviour for calls by the super-user
2919 * implementation-defined; we leave the set-user-id and set-
2920 * group-id settings intact in that case.
2921 */
2922 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2923 NULL) != 0)
2924 newmode &= ~(S_ISUID | S_ISGID);
2925 } else {
2926 /*
2927 * NetBSD semantics: when changing owner and/or group,
2928 * clear the respective bit(s).
2929 */
2930 if (CHANGED(uid))
2931 newmode &= ~S_ISUID;
2932 if (CHANGED(gid))
2933 newmode &= ~S_ISGID;
2934 }
2935 /* Update va_mode iff altered. */
2936 if (vattr.va_mode == newmode)
2937 newmode = VNOVAL;
2938
2939 VATTR_NULL(&vattr);
2940 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2941 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2942 vattr.va_mode = newmode;
2943 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2944 #undef CHANGED
2945
2946 out:
2947 VOP_UNLOCK(vp, 0);
2948 return (error);
2949 }
2950
2951 /*
2952 * Set the access and modification times given a path name; this
2953 * version follows links.
2954 */
2955 /* ARGSUSED */
2956 int
2957 sys_utimes(struct lwp *l, void *v, register_t *retval)
2958 {
2959 struct sys_utimes_args /* {
2960 syscallarg(const char *) path;
2961 syscallarg(const struct timeval *) tptr;
2962 } */ *uap = v;
2963 int error;
2964 struct nameidata nd;
2965
2966 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2967 if ((error = namei(&nd)) != 0)
2968 return (error);
2969
2970 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
2971
2972 vrele(nd.ni_vp);
2973 return (error);
2974 }
2975
2976 /*
2977 * Set the access and modification times given a file descriptor.
2978 */
2979 /* ARGSUSED */
2980 int
2981 sys_futimes(struct lwp *l, void *v, register_t *retval)
2982 {
2983 struct sys_futimes_args /* {
2984 syscallarg(int) fd;
2985 syscallarg(const struct timeval *) tptr;
2986 } */ *uap = v;
2987 struct proc *p = l->l_proc;
2988 int error;
2989 struct file *fp;
2990
2991 /* getvnode() will use the descriptor for us */
2992 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2993 return (error);
2994
2995 error = change_utimes((struct vnode *)fp->f_data, SCARG(uap, tptr), l);
2996 FILE_UNUSE(fp, l);
2997 return (error);
2998 }
2999
3000 /*
3001 * Set the access and modification times given a path name; this
3002 * version does not follow links.
3003 */
3004 /* ARGSUSED */
3005 int
3006 sys_lutimes(struct lwp *l, void *v, register_t *retval)
3007 {
3008 struct sys_lutimes_args /* {
3009 syscallarg(const char *) path;
3010 syscallarg(const struct timeval *) tptr;
3011 } */ *uap = v;
3012 int error;
3013 struct nameidata nd;
3014
3015 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3016 if ((error = namei(&nd)) != 0)
3017 return (error);
3018
3019 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
3020
3021 vrele(nd.ni_vp);
3022 return (error);
3023 }
3024
3025 /*
3026 * Common routine to set access and modification times given a vnode.
3027 */
3028 static int
3029 change_utimes(struct vnode *vp, const struct timeval *tptr, struct lwp *l)
3030 {
3031 struct vattr vattr;
3032 int error;
3033
3034 VATTR_NULL(&vattr);
3035 if (tptr == NULL) {
3036 nanotime(&vattr.va_atime);
3037 vattr.va_mtime = vattr.va_atime;
3038 vattr.va_vaflags |= VA_UTIMES_NULL;
3039 } else {
3040 struct timeval tv[2];
3041
3042 error = copyin(tptr, tv, sizeof(tv));
3043 if (error)
3044 goto out;
3045 TIMEVAL_TO_TIMESPEC(&tv[0], &vattr.va_atime);
3046 TIMEVAL_TO_TIMESPEC(&tv[1], &vattr.va_mtime);
3047 }
3048 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3049 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3050 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
3051 VOP_UNLOCK(vp, 0);
3052 out:
3053 return (error);
3054 }
3055
3056 /*
3057 * Truncate a file given its path name.
3058 */
3059 /* ARGSUSED */
3060 int
3061 sys_truncate(struct lwp *l, void *v, register_t *retval)
3062 {
3063 struct sys_truncate_args /* {
3064 syscallarg(const char *) path;
3065 syscallarg(int) pad;
3066 syscallarg(off_t) length;
3067 } */ *uap = v;
3068 struct vnode *vp;
3069 struct vattr vattr;
3070 int error;
3071 struct nameidata nd;
3072
3073 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3074 if ((error = namei(&nd)) != 0)
3075 return (error);
3076 vp = nd.ni_vp;
3077 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3078 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3079 if (vp->v_type == VDIR)
3080 error = EISDIR;
3081 else if ((error = vn_writechk(vp)) == 0 &&
3082 (error = VOP_ACCESS(vp, VWRITE, l->l_cred, l)) == 0) {
3083 VATTR_NULL(&vattr);
3084 vattr.va_size = SCARG(uap, length);
3085 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
3086 }
3087 vput(vp);
3088 return (error);
3089 }
3090
3091 /*
3092 * Truncate a file given a file descriptor.
3093 */
3094 /* ARGSUSED */
3095 int
3096 sys_ftruncate(struct lwp *l, void *v, register_t *retval)
3097 {
3098 struct sys_ftruncate_args /* {
3099 syscallarg(int) fd;
3100 syscallarg(int) pad;
3101 syscallarg(off_t) length;
3102 } */ *uap = v;
3103 struct proc *p = l->l_proc;
3104 struct vattr vattr;
3105 struct vnode *vp;
3106 struct file *fp;
3107 int error;
3108
3109 /* getvnode() will use the descriptor for us */
3110 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3111 return (error);
3112 if ((fp->f_flag & FWRITE) == 0) {
3113 error = EINVAL;
3114 goto out;
3115 }
3116 vp = (struct vnode *)fp->f_data;
3117 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3118 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3119 if (vp->v_type == VDIR)
3120 error = EISDIR;
3121 else if ((error = vn_writechk(vp)) == 0) {
3122 VATTR_NULL(&vattr);
3123 vattr.va_size = SCARG(uap, length);
3124 error = VOP_SETATTR(vp, &vattr, fp->f_cred, l);
3125 }
3126 VOP_UNLOCK(vp, 0);
3127 out:
3128 FILE_UNUSE(fp, l);
3129 return (error);
3130 }
3131
3132 /*
3133 * Sync an open file.
3134 */
3135 /* ARGSUSED */
3136 int
3137 sys_fsync(struct lwp *l, void *v, register_t *retval)
3138 {
3139 struct sys_fsync_args /* {
3140 syscallarg(int) fd;
3141 } */ *uap = v;
3142 struct proc *p = l->l_proc;
3143 struct vnode *vp;
3144 struct file *fp;
3145 int error;
3146
3147 /* getvnode() will use the descriptor for us */
3148 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3149 return (error);
3150 vp = (struct vnode *)fp->f_data;
3151 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3152 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, l);
3153 if (error == 0 && bioops.io_fsync != NULL &&
3154 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) {
3155 KERNEL_LOCK(1, l);
3156 (*bioops.io_fsync)(vp, 0);
3157 KERNEL_UNLOCK_ONE(l);
3158 }
3159 VOP_UNLOCK(vp, 0);
3160 FILE_UNUSE(fp, l);
3161 return (error);
3162 }
3163
3164 /*
3165 * Sync a range of file data. API modeled after that found in AIX.
3166 *
3167 * FDATASYNC indicates that we need only save enough metadata to be able
3168 * to re-read the written data. Note we duplicate AIX's requirement that
3169 * the file be open for writing.
3170 */
3171 /* ARGSUSED */
3172 int
3173 sys_fsync_range(struct lwp *l, void *v, register_t *retval)
3174 {
3175 struct sys_fsync_range_args /* {
3176 syscallarg(int) fd;
3177 syscallarg(int) flags;
3178 syscallarg(off_t) start;
3179 syscallarg(off_t) length;
3180 } */ *uap = v;
3181 struct proc *p = l->l_proc;
3182 struct vnode *vp;
3183 struct file *fp;
3184 int flags, nflags;
3185 off_t s, e, len;
3186 int error;
3187
3188 /* getvnode() will use the descriptor for us */
3189 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3190 return (error);
3191
3192 if ((fp->f_flag & FWRITE) == 0) {
3193 error = EBADF;
3194 goto out;
3195 }
3196
3197 flags = SCARG(uap, flags);
3198 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3199 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3200 error = EINVAL;
3201 goto out;
3202 }
3203 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3204 if (flags & FDATASYNC)
3205 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3206 else
3207 nflags = FSYNC_WAIT;
3208 if (flags & FDISKSYNC)
3209 nflags |= FSYNC_CACHE;
3210
3211 len = SCARG(uap, length);
3212 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3213 if (len) {
3214 s = SCARG(uap, start);
3215 e = s + len;
3216 if (e < s) {
3217 error = EINVAL;
3218 goto out;
3219 }
3220 } else {
3221 e = 0;
3222 s = 0;
3223 }
3224
3225 vp = (struct vnode *)fp->f_data;
3226 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3227 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, l);
3228
3229 if (error == 0 && bioops.io_fsync != NULL &&
3230 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) {
3231 KERNEL_LOCK(1, l);
3232 (*bioops.io_fsync)(vp, nflags);
3233 KERNEL_UNLOCK_ONE(l);
3234 }
3235
3236 VOP_UNLOCK(vp, 0);
3237 out:
3238 FILE_UNUSE(fp, l);
3239 return (error);
3240 }
3241
3242 /*
3243 * Sync the data of an open file.
3244 */
3245 /* ARGSUSED */
3246 int
3247 sys_fdatasync(struct lwp *l, void *v, register_t *retval)
3248 {
3249 struct sys_fdatasync_args /* {
3250 syscallarg(int) fd;
3251 } */ *uap = v;
3252 struct proc *p = l->l_proc;
3253 struct vnode *vp;
3254 struct file *fp;
3255 int error;
3256
3257 /* getvnode() will use the descriptor for us */
3258 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3259 return (error);
3260 if ((fp->f_flag & FWRITE) == 0) {
3261 FILE_UNUSE(fp, l);
3262 return (EBADF);
3263 }
3264 vp = (struct vnode *)fp->f_data;
3265 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3266 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, l);
3267 VOP_UNLOCK(vp, 0);
3268 FILE_UNUSE(fp, l);
3269 return (error);
3270 }
3271
3272 /*
3273 * Rename files, (standard) BSD semantics frontend.
3274 */
3275 /* ARGSUSED */
3276 int
3277 sys_rename(struct lwp *l, void *v, register_t *retval)
3278 {
3279 struct sys_rename_args /* {
3280 syscallarg(const char *) from;
3281 syscallarg(const char *) to;
3282 } */ *uap = v;
3283
3284 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 0));
3285 }
3286
3287 /*
3288 * Rename files, POSIX semantics frontend.
3289 */
3290 /* ARGSUSED */
3291 int
3292 sys___posix_rename(struct lwp *l, void *v, register_t *retval)
3293 {
3294 struct sys___posix_rename_args /* {
3295 syscallarg(const char *) from;
3296 syscallarg(const char *) to;
3297 } */ *uap = v;
3298
3299 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 1));
3300 }
3301
3302 /*
3303 * Rename files. Source and destination must either both be directories,
3304 * or both not be directories. If target is a directory, it must be empty.
3305 * If `from' and `to' refer to the same object, the value of the `retain'
3306 * argument is used to determine whether `from' will be
3307 *
3308 * (retain == 0) deleted unless `from' and `to' refer to the same
3309 * object in the file system's name space (BSD).
3310 * (retain == 1) always retained (POSIX).
3311 */
3312 static int
3313 rename_files(const char *from, const char *to, struct lwp *l, int retain)
3314 {
3315 struct vnode *tvp, *fvp, *tdvp;
3316 struct nameidata fromnd, tond;
3317 struct proc *p;
3318 int error;
3319
3320 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART, UIO_USERSPACE,
3321 from, l);
3322 if ((error = namei(&fromnd)) != 0)
3323 return (error);
3324 if (fromnd.ni_dvp != fromnd.ni_vp)
3325 VOP_UNLOCK(fromnd.ni_dvp, 0);
3326 fvp = fromnd.ni_vp;
3327 NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3328 (fvp->v_type == VDIR ? CREATEDIR : 0), UIO_USERSPACE, to, l);
3329 if ((error = namei(&tond)) != 0) {
3330 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3331 vrele(fromnd.ni_dvp);
3332 vrele(fvp);
3333 goto out1;
3334 }
3335 tdvp = tond.ni_dvp;
3336 tvp = tond.ni_vp;
3337
3338 if (tvp != NULL) {
3339 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3340 error = ENOTDIR;
3341 goto out;
3342 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3343 error = EISDIR;
3344 goto out;
3345 }
3346 }
3347
3348 if (fvp == tdvp)
3349 error = EINVAL;
3350
3351 /*
3352 * Source and destination refer to the same object.
3353 */
3354 if (fvp == tvp) {
3355 if (retain)
3356 error = -1;
3357 else if (fromnd.ni_dvp == tdvp &&
3358 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3359 !memcmp(fromnd.ni_cnd.cn_nameptr,
3360 tond.ni_cnd.cn_nameptr,
3361 fromnd.ni_cnd.cn_namelen))
3362 error = -1;
3363 }
3364
3365 #if NVERIEXEC > 0
3366 if (!error) {
3367 pathname_t frompath = NULL, topath = NULL;
3368
3369 error = pathname_get(fromnd.ni_dirp, fromnd.ni_segflg,
3370 &frompath);
3371 if (!error)
3372 error = pathname_get(tond.ni_dirp, tond.ni_segflg,
3373 &topath);
3374 if (!error)
3375 error = veriexec_renamechk(fvp, pathname_path(frompath),
3376 tvp, pathname_path(topath), l);
3377
3378 pathname_put(frompath);
3379 pathname_put(topath);
3380 }
3381 #endif /* NVERIEXEC > 0 */
3382
3383 out:
3384 p = l->l_proc;
3385 if (!error) {
3386 VOP_LEASE(tdvp, l, l->l_cred, LEASE_WRITE);
3387 if (fromnd.ni_dvp != tdvp)
3388 VOP_LEASE(fromnd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3389 if (tvp) {
3390 VOP_LEASE(tvp, l, l->l_cred, LEASE_WRITE);
3391 }
3392 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3393 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3394 } else {
3395 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3396 if (tdvp == tvp)
3397 vrele(tdvp);
3398 else
3399 vput(tdvp);
3400 if (tvp)
3401 vput(tvp);
3402 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3403 vrele(fromnd.ni_dvp);
3404 vrele(fvp);
3405 }
3406 vrele(tond.ni_startdir);
3407 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3408 out1:
3409 if (fromnd.ni_startdir)
3410 vrele(fromnd.ni_startdir);
3411 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3412 return (error == -1 ? 0 : error);
3413 }
3414
3415 /*
3416 * Make a directory file.
3417 */
3418 /* ARGSUSED */
3419 int
3420 sys_mkdir(struct lwp *l, void *v, register_t *retval)
3421 {
3422 struct sys_mkdir_args /* {
3423 syscallarg(const char *) path;
3424 syscallarg(int) mode;
3425 } */ *uap = v;
3426 struct proc *p = l->l_proc;
3427 struct vnode *vp;
3428 struct vattr vattr;
3429 int error;
3430 struct nameidata nd;
3431
3432 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR, UIO_USERSPACE,
3433 SCARG(uap, path), l);
3434 if ((error = namei(&nd)) != 0)
3435 return (error);
3436 vp = nd.ni_vp;
3437 if (vp != NULL) {
3438 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3439 if (nd.ni_dvp == vp)
3440 vrele(nd.ni_dvp);
3441 else
3442 vput(nd.ni_dvp);
3443 vrele(vp);
3444 return (EEXIST);
3445 }
3446 VATTR_NULL(&vattr);
3447 vattr.va_type = VDIR;
3448 /* We will read cwdi->cwdi_cmask unlocked. */
3449 vattr.va_mode =
3450 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3451 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3452 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3453 if (!error)
3454 vput(nd.ni_vp);
3455 return (error);
3456 }
3457
3458 /*
3459 * Remove a directory file.
3460 */
3461 /* ARGSUSED */
3462 int
3463 sys_rmdir(struct lwp *l, void *v, register_t *retval)
3464 {
3465 struct sys_rmdir_args /* {
3466 syscallarg(const char *) path;
3467 } */ *uap = v;
3468 struct vnode *vp;
3469 int error;
3470 struct nameidata nd;
3471
3472 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3473 SCARG(uap, path), l);
3474 if ((error = namei(&nd)) != 0)
3475 return (error);
3476 vp = nd.ni_vp;
3477 if (vp->v_type != VDIR) {
3478 error = ENOTDIR;
3479 goto out;
3480 }
3481 /*
3482 * No rmdir "." please.
3483 */
3484 if (nd.ni_dvp == vp) {
3485 error = EINVAL;
3486 goto out;
3487 }
3488 /*
3489 * The root of a mounted filesystem cannot be deleted.
3490 */
3491 if (vp->v_flag & VROOT) {
3492 error = EBUSY;
3493 goto out;
3494 }
3495 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3496 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3497 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3498 return (error);
3499
3500 out:
3501 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3502 if (nd.ni_dvp == vp)
3503 vrele(nd.ni_dvp);
3504 else
3505 vput(nd.ni_dvp);
3506 vput(vp);
3507 return (error);
3508 }
3509
3510 /*
3511 * Read a block of directory entries in a file system independent format.
3512 */
3513 int
3514 sys___getdents30(struct lwp *l, void *v, register_t *retval)
3515 {
3516 struct sys___getdents30_args /* {
3517 syscallarg(int) fd;
3518 syscallarg(char *) buf;
3519 syscallarg(size_t) count;
3520 } */ *uap = v;
3521 struct proc *p = l->l_proc;
3522 struct file *fp;
3523 int error, done;
3524
3525 /* getvnode() will use the descriptor for us */
3526 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3527 return (error);
3528 if ((fp->f_flag & FREAD) == 0) {
3529 error = EBADF;
3530 goto out;
3531 }
3532 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3533 SCARG(uap, count), &done, l, 0, 0);
3534 #ifdef KTRACE
3535 if (!error && KTRPOINT(p, KTR_GENIO)) {
3536 struct iovec iov;
3537 iov.iov_base = SCARG(uap, buf);
3538 iov.iov_len = done;
3539 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, done, 0);
3540 }
3541 #endif
3542 *retval = done;
3543 out:
3544 FILE_UNUSE(fp, l);
3545 return (error);
3546 }
3547
3548 /*
3549 * Set the mode mask for creation of filesystem nodes.
3550 */
3551 int
3552 sys_umask(struct lwp *l, void *v, register_t *retval)
3553 {
3554 struct sys_umask_args /* {
3555 syscallarg(mode_t) newmask;
3556 } */ *uap = v;
3557 struct proc *p = l->l_proc;
3558 struct cwdinfo *cwdi;
3559
3560 /*
3561 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
3562 * important is that we serialize changes to the mask. The
3563 * rw_exit() will issue a write memory barrier on our behalf,
3564 * and force the changes out to other CPUs (as it must use an
3565 * atomic operation, draining the local CPU's store buffers).
3566 */
3567 cwdi = p->p_cwdi;
3568 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
3569 *retval = cwdi->cwdi_cmask;
3570 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3571 rw_exit(&cwdi->cwdi_lock);
3572
3573 return (0);
3574 }
3575
3576 /*
3577 * Void all references to file by ripping underlying filesystem
3578 * away from vnode.
3579 */
3580 /* ARGSUSED */
3581 int
3582 sys_revoke(struct lwp *l, void *v, register_t *retval)
3583 {
3584 struct sys_revoke_args /* {
3585 syscallarg(const char *) path;
3586 } */ *uap = v;
3587 struct vnode *vp;
3588 struct vattr vattr;
3589 int error;
3590 struct nameidata nd;
3591
3592 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3593 if ((error = namei(&nd)) != 0)
3594 return (error);
3595 vp = nd.ni_vp;
3596 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
3597 goto out;
3598 if (kauth_cred_geteuid(l->l_cred) != vattr.va_uid &&
3599 (error = kauth_authorize_generic(l->l_cred,
3600 KAUTH_GENERIC_ISSUSER, NULL)) != 0)
3601 goto out;
3602 if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER)))
3603 VOP_REVOKE(vp, REVOKEALL);
3604 out:
3605 vrele(vp);
3606 return (error);
3607 }
3608
3609 /*
3610 * Convert a user file descriptor to a kernel file entry.
3611 */
3612 int
3613 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
3614 {
3615 struct vnode *vp;
3616 struct file *fp;
3617
3618 if ((fp = fd_getfile(fdp, fd)) == NULL)
3619 return (EBADF);
3620
3621 FILE_USE(fp);
3622
3623 if (fp->f_type != DTYPE_VNODE) {
3624 FILE_UNUSE(fp, NULL);
3625 return (EINVAL);
3626 }
3627
3628 vp = (struct vnode *)fp->f_data;
3629 if (vp->v_type == VBAD) {
3630 FILE_UNUSE(fp, NULL);
3631 return (EBADF);
3632 }
3633
3634 *fpp = fp;
3635 return (0);
3636 }
3637