vfs_syscalls.c revision 1.297 1 /* $NetBSD: vfs_syscalls.c,v 1.297 2007/01/19 14:49:10 hannken Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.297 2007/01/19 14:49:10 hannken Exp $");
41
42 #include "opt_compat_netbsd.h"
43 #include "opt_compat_43.h"
44 #include "opt_fileassoc.h"
45 #include "opt_ktrace.h"
46 #include "fss.h"
47 #include "veriexec.h"
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/namei.h>
52 #include <sys/filedesc.h>
53 #include <sys/kernel.h>
54 #include <sys/file.h>
55 #include <sys/stat.h>
56 #include <sys/vnode.h>
57 #include <sys/mount.h>
58 #include <sys/proc.h>
59 #include <sys/uio.h>
60 #include <sys/malloc.h>
61 #include <sys/kmem.h>
62 #include <sys/dirent.h>
63 #include <sys/sysctl.h>
64 #include <sys/sa.h>
65 #include <sys/syscallargs.h>
66 #ifdef KTRACE
67 #include <sys/ktrace.h>
68 #endif
69 #ifdef FILEASSOC
70 #include <sys/fileassoc.h>
71 #endif /* FILEASSOC */
72 #if NVERIEXEC > 0
73 #include <sys/verified_exec.h>
74 #include <sys/syslog.h>
75 #endif /* NVERIEXEC > 0 */
76 #include <sys/kauth.h>
77
78 #include <miscfs/genfs/genfs.h>
79 #include <miscfs/syncfs/syncfs.h>
80
81 #ifdef COMPAT_30
82 #include "opt_nfsserver.h"
83 #include <nfs/rpcv2.h>
84 #endif
85 #include <nfs/nfsproto.h>
86 #ifdef COMPAT_30
87 #include <nfs/nfs.h>
88 #include <nfs/nfs_var.h>
89 #endif
90
91 #if NFSS > 0
92 #include <dev/fssvar.h>
93 #endif
94
95 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
96
97 static int change_dir(struct nameidata *, struct lwp *);
98 static int change_flags(struct vnode *, u_long, struct lwp *);
99 static int change_mode(struct vnode *, int, struct lwp *l);
100 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
101 static int change_utimes(struct vnode *vp, const struct timeval *,
102 struct lwp *l);
103 static int rename_files(const char *, const char *, struct lwp *, int);
104
105 void checkdirs(struct vnode *);
106
107 static int mount_update(struct lwp *, struct vnode *, const char *, int,
108 void *, struct nameidata *);
109 static int mount_domount(struct lwp *, struct vnode *, const char *,
110 const char *, int, void *, struct nameidata *);
111 static int mount_getargs(struct lwp *, struct vnode *, const char *, int,
112 void *, struct nameidata *);
113
114 int dovfsusermount = 0;
115
116 /*
117 * Virtual File System System Calls
118 */
119
120 /*
121 * Mount a file system.
122 */
123
124 #if defined(COMPAT_09) || defined(COMPAT_43)
125 /*
126 * This table is used to maintain compatibility with 4.3BSD
127 * and NetBSD 0.9 mount syscalls. Note, the order is important!
128 *
129 * Do not modify this table. It should only contain filesystems
130 * supported by NetBSD 0.9 and 4.3BSD.
131 */
132 const char * const mountcompatnames[] = {
133 NULL, /* 0 = MOUNT_NONE */
134 MOUNT_FFS, /* 1 = MOUNT_UFS */
135 MOUNT_NFS, /* 2 */
136 MOUNT_MFS, /* 3 */
137 MOUNT_MSDOS, /* 4 */
138 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
139 MOUNT_FDESC, /* 6 */
140 MOUNT_KERNFS, /* 7 */
141 NULL, /* 8 = MOUNT_DEVFS */
142 MOUNT_AFS, /* 9 */
143 };
144 const int nmountcompatnames = sizeof(mountcompatnames) /
145 sizeof(mountcompatnames[0]);
146 #endif /* COMPAT_09 || COMPAT_43 */
147
148 static int
149 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
150 void *data, struct nameidata *ndp)
151 {
152 struct mount *mp;
153 int error = 0, saved_flags;
154
155 mp = vp->v_mount;
156 saved_flags = mp->mnt_flag;
157
158 /* We can't operate on VROOT here. */
159 if ((vp->v_flag & VROOT) == 0) {
160 vput(vp);
161 error = EINVAL;
162 goto out;
163 }
164
165 /*
166 * We only allow the filesystem to be reloaded if it
167 * is currently mounted read-only.
168 */
169 if (flags & MNT_RELOAD && !(mp->mnt_flag & MNT_RDONLY)) {
170 error = EOPNOTSUPP; /* Needs translation */
171 goto out;
172 }
173
174 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
175 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
176 if (error)
177 goto out;
178
179 if (vfs_busy(mp, LK_NOWAIT, 0)) {
180 error = EPERM;
181 goto out;
182 }
183
184 mp->mnt_flag &= ~MNT_OP_FLAGS;
185 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
186
187 /*
188 * Set the mount level flags.
189 */
190 if (flags & MNT_RDONLY)
191 mp->mnt_flag |= MNT_RDONLY;
192 else if (mp->mnt_flag & MNT_RDONLY)
193 mp->mnt_iflag |= IMNT_WANTRDWR;
194 mp->mnt_flag &=
195 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
196 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
197 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
198 mp->mnt_flag |= flags &
199 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
200 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
201 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
202 MNT_IGNORE);
203
204 error = VFS_MOUNT(mp, path, data, ndp, l);
205
206 #if defined(COMPAT_30) && defined(NFSSERVER)
207 if (error) {
208 int error2;
209
210 /* Update failed; let's try and see if it was an
211 * export request. */
212 error2 = nfs_update_exports_30(mp, path, data, l);
213
214 /* Only update error code if the export request was
215 * understood but some problem occurred while
216 * processing it. */
217 if (error2 != EJUSTRETURN)
218 error = error2;
219 }
220 #endif
221 if (mp->mnt_iflag & IMNT_WANTRDWR)
222 mp->mnt_flag &= ~MNT_RDONLY;
223 if (error)
224 mp->mnt_flag = saved_flags;
225 mp->mnt_flag &= ~MNT_OP_FLAGS;
226 mp->mnt_iflag &= ~IMNT_WANTRDWR;
227 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
228 if (mp->mnt_syncer == NULL)
229 error = vfs_allocate_syncvnode(mp);
230 } else {
231 if (mp->mnt_syncer != NULL)
232 vfs_deallocate_syncvnode(mp);
233 }
234 vfs_unbusy(mp);
235
236 out:
237 return (error);
238 }
239
240 static int
241 mount_domount(struct lwp *l, struct vnode *vp, const char *fstype,
242 const char *path, int flags, void *data, struct nameidata *ndp)
243 {
244 struct mount *mp = NULL;
245 struct vattr va;
246 char fstypename[MFSNAMELEN];
247 int error;
248
249 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
250 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
251 if (error) {
252 vput(vp);
253 goto out;
254 }
255
256 /* Can't make a non-dir a mount-point (from here anyway). */
257 if (vp->v_type != VDIR) {
258 error = ENOTDIR;
259 vput(vp);
260 goto out;
261 }
262
263 /*
264 * If the user is not root, ensure that they own the directory
265 * onto which we are attempting to mount.
266 */
267 if ((error = VOP_GETATTR(vp, &va, l->l_cred, l)) != 0 ||
268 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
269 (error = kauth_authorize_generic(l->l_cred,
270 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) {
271 vput(vp);
272 goto out;
273 }
274
275 if (flags & MNT_EXPORTED) {
276 error = EINVAL;
277 vput(vp);
278 goto out;
279 }
280
281 /*
282 * Copy file-system type from userspace.
283 */
284 error = copyinstr(fstype, fstypename, MFSNAMELEN, NULL);
285 if (error) {
286 #if defined(COMPAT_09) || defined(COMPAT_43)
287 /*
288 * Historically, filesystem types were identified by numbers.
289 * If we get an integer for the filesystem type instead of a
290 * string, we check to see if it matches one of the historic
291 * filesystem types.
292 */
293 u_long fsindex = (u_long)fstype;
294 if (fsindex >= nmountcompatnames ||
295 mountcompatnames[fsindex] == NULL) {
296 error = ENODEV;
297 vput(vp);
298 goto out;
299 }
300 strlcpy(fstypename, mountcompatnames[fsindex], sizeof(fstypename));
301 #else
302 vput(vp);
303 goto out;
304 #endif
305 }
306
307 #ifdef COMPAT_10
308 /* Accept `ufs' as an alias for `ffs'. */
309 if (strncmp(fstypename, "ufs", MFSNAMELEN) == 0)
310 strlcpy(fstypename, "ffs", sizeof(fstypename));
311 #endif
312
313 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) {
314 vput(vp);
315 goto out;
316 }
317
318 /*
319 * Check if a file-system is not already mounted on this vnode.
320 */
321 if (vp->v_mountedhere != NULL) {
322 error = EBUSY;
323 vput(vp);
324 goto out;
325 }
326
327 mp = malloc(sizeof(*mp), M_MOUNT, M_WAITOK|M_ZERO);
328
329 if ((mp->mnt_op = vfs_getopsbyname(fstypename)) == NULL) {
330 free(mp, M_MOUNT);
331 error = ENODEV;
332 vput(vp);
333 goto out;
334 }
335
336 TAILQ_INIT(&mp->mnt_vnodelist);
337 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
338 simple_lock_init(&mp->mnt_slock);
339 (void)vfs_busy(mp, LK_NOWAIT, 0);
340
341 mp->mnt_op->vfs_refcount++;
342 mp->mnt_vnodecovered = vp;
343 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
344 mp->mnt_unmounter = NULL;
345 mp->mnt_leaf = mp;
346 mount_initspecific(mp);
347
348 /*
349 * The underlying file system may refuse the mount for
350 * various reasons. Allow the user to force it to happen.
351 *
352 * Set the mount level flags.
353 */
354 mp->mnt_flag = flags &
355 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
356 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
357 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
358 MNT_IGNORE | MNT_RDONLY);
359
360 error = VFS_MOUNT(mp, path, data, ndp, l);
361 mp->mnt_flag &= ~MNT_OP_FLAGS;
362
363 /*
364 * Put the new filesystem on the mount list after root.
365 */
366 cache_purge(vp);
367 if (!error) {
368 mp->mnt_iflag &= ~IMNT_WANTRDWR;
369 vp->v_mountedhere = mp;
370 simple_lock(&mountlist_slock);
371 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
372 simple_unlock(&mountlist_slock);
373 checkdirs(vp);
374 VOP_UNLOCK(vp, 0);
375 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
376 error = vfs_allocate_syncvnode(mp);
377 vfs_unbusy(mp);
378 (void) VFS_STATVFS(mp, &mp->mnt_stat, l);
379 error = VFS_START(mp, 0, l);
380 if (error)
381 vrele(vp);
382 } else {
383 vp->v_mountedhere = NULL;
384 mp->mnt_op->vfs_refcount--;
385 vfs_unbusy(mp);
386 free(mp, M_MOUNT);
387 vput(vp);
388 }
389
390 out:
391 return (error);
392 }
393
394 static int
395 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
396 void *data, struct nameidata *ndp)
397 {
398 struct mount *mp;
399 int error;
400
401 /* If MNT_GETARGS is specified, it should be the only flag. */
402 if (flags & ~MNT_GETARGS) {
403 error = EINVAL;
404 goto out;
405 }
406
407 mp = vp->v_mount;
408
409 /* XXX: probably some notion of "can see" here if we want isolation. */
410 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
411 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
412 if (error)
413 goto out;
414
415 if ((vp->v_flag & VROOT) == 0) {
416 error = EINVAL;
417 goto out;
418 }
419
420 if (vfs_busy(mp, LK_NOWAIT, 0)) {
421 error = EPERM;
422 goto out;
423 }
424
425 mp->mnt_flag &= ~MNT_OP_FLAGS;
426 mp->mnt_flag |= MNT_GETARGS;
427 error = VFS_MOUNT(mp, path, data, ndp, l);
428 mp->mnt_flag &= ~MNT_OP_FLAGS;
429
430 vfs_unbusy(mp);
431 out:
432 return (error);
433 }
434
435 /* ARGSUSED */
436 int
437 sys_mount(struct lwp *l, void *v, register_t *retval)
438 {
439 struct sys_mount_args /* {
440 syscallarg(const char *) type;
441 syscallarg(const char *) path;
442 syscallarg(int) flags;
443 syscallarg(void *) data;
444 } */ *uap = v;
445 struct vnode *vp;
446 struct nameidata nd;
447 int error;
448
449 /*
450 * Get vnode to be covered
451 */
452 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
453 SCARG(uap, path), l);
454 if ((error = namei(&nd)) != 0)
455 return (error);
456 vp = nd.ni_vp;
457
458 /*
459 * A lookup in VFS_MOUNT might result in an attempt to
460 * lock this vnode again, so make the lock recursive.
461 */
462 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
463
464 if (SCARG(uap, flags) & MNT_GETARGS) {
465 error = mount_getargs(l, vp, SCARG(uap, path),
466 SCARG(uap, flags), SCARG(uap, data), &nd);
467 vput(vp);
468 } else if (SCARG(uap, flags) & MNT_UPDATE) {
469 error = mount_update(l, vp, SCARG(uap, path),
470 SCARG(uap, flags), SCARG(uap, data), &nd);
471 vput(vp);
472 } else {
473 /* Locking is handled internally in mount_domount(). */
474 error = mount_domount(l, vp, SCARG(uap, type),
475 SCARG(uap, path), SCARG(uap, flags), SCARG(uap, data), &nd);
476 }
477
478 return (error);
479 }
480
481 /*
482 * Scan all active processes to see if any of them have a current
483 * or root directory onto which the new filesystem has just been
484 * mounted. If so, replace them with the new mount point.
485 */
486 void
487 checkdirs(struct vnode *olddp)
488 {
489 struct cwdinfo *cwdi;
490 struct vnode *newdp;
491 struct proc *p;
492
493 if (olddp->v_usecount == 1)
494 return;
495 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
496 panic("mount: lost mount");
497 proclist_lock_read();
498 PROCLIST_FOREACH(p, &allproc) {
499 cwdi = p->p_cwdi;
500 if (!cwdi)
501 continue;
502 if (cwdi->cwdi_cdir == olddp) {
503 vrele(cwdi->cwdi_cdir);
504 VREF(newdp);
505 cwdi->cwdi_cdir = newdp;
506 }
507 if (cwdi->cwdi_rdir == olddp) {
508 vrele(cwdi->cwdi_rdir);
509 VREF(newdp);
510 cwdi->cwdi_rdir = newdp;
511 }
512 }
513 proclist_unlock_read();
514 if (rootvnode == olddp) {
515 vrele(rootvnode);
516 VREF(newdp);
517 rootvnode = newdp;
518 }
519 vput(newdp);
520 }
521
522 /*
523 * Unmount a file system.
524 *
525 * Note: unmount takes a path to the vnode mounted on as argument,
526 * not special file (as before).
527 */
528 /* ARGSUSED */
529 int
530 sys_unmount(struct lwp *l, void *v, register_t *retval)
531 {
532 struct sys_unmount_args /* {
533 syscallarg(const char *) path;
534 syscallarg(int) flags;
535 } */ *uap = v;
536 struct vnode *vp;
537 struct mount *mp;
538 int error;
539 struct nameidata nd;
540
541 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
542 SCARG(uap, path), l);
543 if ((error = namei(&nd)) != 0)
544 return (error);
545 vp = nd.ni_vp;
546 mp = vp->v_mount;
547
548 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
549 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
550 if (error) {
551 vput(vp);
552 return (error);
553 }
554
555 /*
556 * Don't allow unmounting the root file system.
557 */
558 if (mp->mnt_flag & MNT_ROOTFS) {
559 vput(vp);
560 return (EINVAL);
561 }
562
563 /*
564 * Must be the root of the filesystem
565 */
566 if ((vp->v_flag & VROOT) == 0) {
567 vput(vp);
568 return (EINVAL);
569 }
570 vput(vp);
571
572 /*
573 * XXX Freeze syncer. Must do this before locking the
574 * mount point. See dounmount() for details.
575 */
576 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
577
578 if (vfs_busy(mp, 0, 0)) {
579 lockmgr(&syncer_lock, LK_RELEASE, NULL);
580 return (EBUSY);
581 }
582
583 return (dounmount(mp, SCARG(uap, flags), l));
584 }
585
586 /*
587 * Do the actual file system unmount. File system is assumed to have been
588 * marked busy by the caller.
589 */
590 int
591 dounmount(struct mount *mp, int flags, struct lwp *l)
592 {
593 struct vnode *coveredvp;
594 int error;
595 int async;
596 int used_syncer;
597
598 #if NVERIEXEC > 0
599 error = veriexec_unmountchk(mp);
600 if (error)
601 return (error);
602 #endif /* NVERIEXEC > 0 */
603
604 simple_lock(&mountlist_slock);
605 vfs_unbusy(mp);
606 used_syncer = (mp->mnt_syncer != NULL);
607
608 /*
609 * XXX Syncer must be frozen when we get here. This should really
610 * be done on a per-mountpoint basis, but especially the softdep
611 * code possibly called from the syncer doesn't exactly work on a
612 * per-mountpoint basis, so the softdep code would become a maze
613 * of vfs_busy() calls.
614 *
615 * The caller of dounmount() must acquire syncer_lock because
616 * the syncer itself acquires locks in syncer_lock -> vfs_busy
617 * order, and we must preserve that order to avoid deadlock.
618 *
619 * So, if the file system did not use the syncer, now is
620 * the time to release the syncer_lock.
621 */
622 if (used_syncer == 0)
623 lockmgr(&syncer_lock, LK_RELEASE, NULL);
624
625 mp->mnt_iflag |= IMNT_UNMOUNT;
626 mp->mnt_unmounter = l;
627 lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock);
628 vn_start_write(NULL, &mp, V_WAIT);
629
630 async = mp->mnt_flag & MNT_ASYNC;
631 mp->mnt_flag &= ~MNT_ASYNC;
632 cache_purgevfs(mp); /* remove cache entries for this file sys */
633 if (mp->mnt_syncer != NULL)
634 vfs_deallocate_syncvnode(mp);
635 error = 0;
636 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
637 #if NFSS > 0
638 error = fss_umount_hook(mp, (flags & MNT_FORCE));
639 #endif
640 if (error == 0)
641 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred, l);
642 }
643 if (error == 0 || (flags & MNT_FORCE))
644 error = VFS_UNMOUNT(mp, flags, l);
645 vn_finished_write(mp, 0);
646 simple_lock(&mountlist_slock);
647 if (error) {
648 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
649 (void) vfs_allocate_syncvnode(mp);
650 mp->mnt_iflag &= ~IMNT_UNMOUNT;
651 mp->mnt_unmounter = NULL;
652 mp->mnt_flag |= async;
653 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
654 &mountlist_slock);
655 if (used_syncer)
656 lockmgr(&syncer_lock, LK_RELEASE, NULL);
657 simple_lock(&mp->mnt_slock);
658 while (mp->mnt_wcnt > 0) {
659 wakeup(mp);
660 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1",
661 0, &mp->mnt_slock);
662 }
663 simple_unlock(&mp->mnt_slock);
664 return (error);
665 }
666 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
667 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
668 coveredvp->v_mountedhere = NULL;
669 mp->mnt_op->vfs_refcount--;
670 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
671 panic("unmount: dangling vnode");
672 mp->mnt_iflag |= IMNT_GONE;
673 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock);
674 if (coveredvp != NULLVP)
675 vrele(coveredvp);
676 mount_finispecific(mp);
677 if (used_syncer)
678 lockmgr(&syncer_lock, LK_RELEASE, NULL);
679 simple_lock(&mp->mnt_slock);
680 while (mp->mnt_wcnt > 0) {
681 wakeup(mp);
682 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock);
683 }
684 simple_unlock(&mp->mnt_slock);
685 vfs_hooks_unmount(mp);
686 free(mp, M_MOUNT);
687 return (0);
688 }
689
690 /*
691 * Sync each mounted filesystem.
692 */
693 #ifdef DEBUG
694 int syncprt = 0;
695 struct ctldebug debug0 = { "syncprt", &syncprt };
696 #endif
697
698 /* ARGSUSED */
699 int
700 sys_sync(struct lwp *l, void *v, register_t *retval)
701 {
702 struct mount *mp, *nmp;
703 int asyncflag;
704
705 if (l == NULL)
706 l = &lwp0;
707
708 simple_lock(&mountlist_slock);
709 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
710 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
711 nmp = mp->mnt_list.cqe_prev;
712 continue;
713 }
714 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
715 vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
716 asyncflag = mp->mnt_flag & MNT_ASYNC;
717 mp->mnt_flag &= ~MNT_ASYNC;
718 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred, l);
719 if (asyncflag)
720 mp->mnt_flag |= MNT_ASYNC;
721 vn_finished_write(mp, 0);
722 }
723 simple_lock(&mountlist_slock);
724 nmp = mp->mnt_list.cqe_prev;
725 vfs_unbusy(mp);
726
727 }
728 simple_unlock(&mountlist_slock);
729 #ifdef DEBUG
730 if (syncprt)
731 vfs_bufstats();
732 #endif /* DEBUG */
733 return (0);
734 }
735
736 /*
737 * Change filesystem quotas.
738 */
739 /* ARGSUSED */
740 int
741 sys_quotactl(struct lwp *l, void *v, register_t *retval)
742 {
743 struct sys_quotactl_args /* {
744 syscallarg(const char *) path;
745 syscallarg(int) cmd;
746 syscallarg(int) uid;
747 syscallarg(void *) arg;
748 } */ *uap = v;
749 struct mount *mp;
750 int error;
751 struct nameidata nd;
752
753 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
754 if ((error = namei(&nd)) != 0)
755 return (error);
756 error = vn_start_write(nd.ni_vp, &mp, V_WAIT | V_PCATCH);
757 vrele(nd.ni_vp);
758 if (error)
759 return (error);
760 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
761 SCARG(uap, arg), l);
762 vn_finished_write(mp, 0);
763 return (error);
764 }
765
766 int
767 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
768 int root)
769 {
770 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
771 int error = 0;
772
773 /*
774 * If MNT_NOWAIT or MNT_LAZY is specified, do not
775 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
776 * overrides MNT_NOWAIT.
777 */
778 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
779 (flags != MNT_WAIT && flags != 0)) {
780 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
781 goto done;
782 }
783
784 /* Get the filesystem stats now */
785 memset(sp, 0, sizeof(*sp));
786 if ((error = VFS_STATVFS(mp, sp, l)) != 0) {
787 return error;
788 }
789
790 if (cwdi->cwdi_rdir == NULL)
791 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
792 done:
793 if (cwdi->cwdi_rdir != NULL) {
794 size_t len;
795 char *bp;
796 char *path = PNBUF_GET();
797 if (!path)
798 return ENOMEM;
799
800 bp = path + MAXPATHLEN;
801 *--bp = '\0';
802 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
803 MAXPATHLEN / 2, 0, l);
804 if (error) {
805 PNBUF_PUT(path);
806 return error;
807 }
808 len = strlen(bp);
809 /*
810 * for mount points that are below our root, we can see
811 * them, so we fix up the pathname and return them. The
812 * rest we cannot see, so we don't allow viewing the
813 * data.
814 */
815 if (strncmp(bp, sp->f_mntonname, len) == 0) {
816 strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
817 sizeof(sp->f_mntonname));
818 if (sp->f_mntonname[0] == '\0')
819 (void)strlcpy(sp->f_mntonname, "/",
820 sizeof(sp->f_mntonname));
821 } else {
822 if (root)
823 (void)strlcpy(sp->f_mntonname, "/",
824 sizeof(sp->f_mntonname));
825 else
826 error = EPERM;
827 }
828 PNBUF_PUT(path);
829 }
830 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
831 return error;
832 }
833
834 /*
835 * Get filesystem statistics.
836 */
837 /* ARGSUSED */
838 int
839 sys_statvfs1(struct lwp *l, void *v, register_t *retval)
840 {
841 struct sys_statvfs1_args /* {
842 syscallarg(const char *) path;
843 syscallarg(struct statvfs *) buf;
844 syscallarg(int) flags;
845 } */ *uap = v;
846 struct mount *mp;
847 struct statvfs *sb;
848 int error;
849 struct nameidata nd;
850
851 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
852 if ((error = namei(&nd)) != 0)
853 return error;
854 mp = nd.ni_vp->v_mount;
855 vrele(nd.ni_vp);
856 sb = STATVFSBUF_GET();
857 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1);
858 if (error == 0) {
859 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
860 }
861 STATVFSBUF_PUT(sb);
862 return error;
863 }
864
865 /*
866 * Get filesystem statistics.
867 */
868 /* ARGSUSED */
869 int
870 sys_fstatvfs1(struct lwp *l, void *v, register_t *retval)
871 {
872 struct sys_fstatvfs1_args /* {
873 syscallarg(int) fd;
874 syscallarg(struct statvfs *) buf;
875 syscallarg(int) flags;
876 } */ *uap = v;
877 struct proc *p = l->l_proc;
878 struct file *fp;
879 struct mount *mp;
880 struct statvfs *sb;
881 int error;
882
883 /* getvnode() will use the descriptor for us */
884 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
885 return (error);
886 mp = ((struct vnode *)fp->f_data)->v_mount;
887 sb = STATVFSBUF_GET();
888 if ((error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1)) != 0)
889 goto out;
890 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
891 out:
892 FILE_UNUSE(fp, l);
893 STATVFSBUF_PUT(sb);
894 return error;
895 }
896
897
898 /*
899 * Get statistics on all filesystems.
900 */
901 int
902 sys_getvfsstat(struct lwp *l, void *v, register_t *retval)
903 {
904 struct sys_getvfsstat_args /* {
905 syscallarg(struct statvfs *) buf;
906 syscallarg(size_t) bufsize;
907 syscallarg(int) flags;
908 } */ *uap = v;
909 int root = 0;
910 struct proc *p = l->l_proc;
911 struct mount *mp, *nmp;
912 struct statvfs *sb;
913 struct statvfs *sfsp;
914 size_t count, maxcount;
915 int error = 0;
916
917 sb = STATVFSBUF_GET();
918 maxcount = SCARG(uap, bufsize) / sizeof(struct statvfs);
919 sfsp = SCARG(uap, buf);
920 simple_lock(&mountlist_slock);
921 count = 0;
922 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
923 mp = nmp) {
924 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
925 nmp = CIRCLEQ_NEXT(mp, mnt_list);
926 continue;
927 }
928 if (sfsp && count < maxcount) {
929 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 0);
930 if (error) {
931 simple_lock(&mountlist_slock);
932 nmp = CIRCLEQ_NEXT(mp, mnt_list);
933 vfs_unbusy(mp);
934 continue;
935 }
936 error = copyout(sb, sfsp, sizeof(*sfsp));
937 if (error) {
938 vfs_unbusy(mp);
939 goto out;
940 }
941 sfsp++;
942 root |= strcmp(sb->f_mntonname, "/") == 0;
943 }
944 count++;
945 simple_lock(&mountlist_slock);
946 nmp = CIRCLEQ_NEXT(mp, mnt_list);
947 vfs_unbusy(mp);
948 }
949 simple_unlock(&mountlist_slock);
950 if (root == 0 && p->p_cwdi->cwdi_rdir) {
951 /*
952 * fake a root entry
953 */
954 if ((error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, sb, l,
955 SCARG(uap, flags), 1)) != 0)
956 goto out;
957 if (sfsp)
958 error = copyout(sb, sfsp, sizeof(*sfsp));
959 count++;
960 }
961 if (sfsp && count > maxcount)
962 *retval = maxcount;
963 else
964 *retval = count;
965 out:
966 STATVFSBUF_PUT(sb);
967 return error;
968 }
969
970 /*
971 * Change current working directory to a given file descriptor.
972 */
973 /* ARGSUSED */
974 int
975 sys_fchdir(struct lwp *l, void *v, register_t *retval)
976 {
977 struct sys_fchdir_args /* {
978 syscallarg(int) fd;
979 } */ *uap = v;
980 struct proc *p = l->l_proc;
981 struct filedesc *fdp = p->p_fd;
982 struct cwdinfo *cwdi = p->p_cwdi;
983 struct vnode *vp, *tdp;
984 struct mount *mp;
985 struct file *fp;
986 int error;
987
988 /* getvnode() will use the descriptor for us */
989 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
990 return (error);
991 vp = (struct vnode *)fp->f_data;
992
993 VREF(vp);
994 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
995 if (vp->v_type != VDIR)
996 error = ENOTDIR;
997 else
998 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
999 while (!error && (mp = vp->v_mountedhere) != NULL) {
1000 if (vfs_busy(mp, 0, 0))
1001 continue;
1002 error = VFS_ROOT(mp, &tdp);
1003 vfs_unbusy(mp);
1004 if (error)
1005 break;
1006 vput(vp);
1007 vp = tdp;
1008 }
1009 if (error) {
1010 vput(vp);
1011 goto out;
1012 }
1013 VOP_UNLOCK(vp, 0);
1014
1015 /*
1016 * Disallow changing to a directory not under the process's
1017 * current root directory (if there is one).
1018 */
1019 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1020 vrele(vp);
1021 error = EPERM; /* operation not permitted */
1022 goto out;
1023 }
1024
1025 vrele(cwdi->cwdi_cdir);
1026 cwdi->cwdi_cdir = vp;
1027 out:
1028 FILE_UNUSE(fp, l);
1029 return (error);
1030 }
1031
1032 /*
1033 * Change this process's notion of the root directory to a given file
1034 * descriptor.
1035 */
1036 int
1037 sys_fchroot(struct lwp *l, void *v, register_t *retval)
1038 {
1039 struct sys_fchroot_args *uap = v;
1040 struct proc *p = l->l_proc;
1041 struct filedesc *fdp = p->p_fd;
1042 struct cwdinfo *cwdi = p->p_cwdi;
1043 struct vnode *vp;
1044 struct file *fp;
1045 int error;
1046
1047 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1048 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1049 return error;
1050 /* getvnode() will use the descriptor for us */
1051 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1052 return error;
1053 vp = (struct vnode *) fp->f_data;
1054 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1055 if (vp->v_type != VDIR)
1056 error = ENOTDIR;
1057 else
1058 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1059 VOP_UNLOCK(vp, 0);
1060 if (error)
1061 goto out;
1062 VREF(vp);
1063
1064 /*
1065 * Prevent escaping from chroot by putting the root under
1066 * the working directory. Silently chdir to / if we aren't
1067 * already there.
1068 */
1069 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1070 /*
1071 * XXX would be more failsafe to change directory to a
1072 * deadfs node here instead
1073 */
1074 vrele(cwdi->cwdi_cdir);
1075 VREF(vp);
1076 cwdi->cwdi_cdir = vp;
1077 }
1078
1079 if (cwdi->cwdi_rdir != NULL)
1080 vrele(cwdi->cwdi_rdir);
1081 cwdi->cwdi_rdir = vp;
1082 out:
1083 FILE_UNUSE(fp, l);
1084 return (error);
1085 }
1086
1087 /*
1088 * Change current working directory (``.'').
1089 */
1090 /* ARGSUSED */
1091 int
1092 sys_chdir(struct lwp *l, void *v, register_t *retval)
1093 {
1094 struct sys_chdir_args /* {
1095 syscallarg(const char *) path;
1096 } */ *uap = v;
1097 struct proc *p = l->l_proc;
1098 struct cwdinfo *cwdi = p->p_cwdi;
1099 int error;
1100 struct nameidata nd;
1101
1102 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1103 SCARG(uap, path), l);
1104 if ((error = change_dir(&nd, l)) != 0)
1105 return (error);
1106 vrele(cwdi->cwdi_cdir);
1107 cwdi->cwdi_cdir = nd.ni_vp;
1108 return (0);
1109 }
1110
1111 /*
1112 * Change notion of root (``/'') directory.
1113 */
1114 /* ARGSUSED */
1115 int
1116 sys_chroot(struct lwp *l, void *v, register_t *retval)
1117 {
1118 struct sys_chroot_args /* {
1119 syscallarg(const char *) path;
1120 } */ *uap = v;
1121 struct proc *p = l->l_proc;
1122 struct cwdinfo *cwdi = p->p_cwdi;
1123 struct vnode *vp;
1124 int error;
1125 struct nameidata nd;
1126
1127 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1128 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1129 return (error);
1130 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1131 SCARG(uap, path), l);
1132 if ((error = change_dir(&nd, l)) != 0)
1133 return (error);
1134 if (cwdi->cwdi_rdir != NULL)
1135 vrele(cwdi->cwdi_rdir);
1136 vp = nd.ni_vp;
1137 cwdi->cwdi_rdir = vp;
1138
1139 /*
1140 * Prevent escaping from chroot by putting the root under
1141 * the working directory. Silently chdir to / if we aren't
1142 * already there.
1143 */
1144 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1145 /*
1146 * XXX would be more failsafe to change directory to a
1147 * deadfs node here instead
1148 */
1149 vrele(cwdi->cwdi_cdir);
1150 VREF(vp);
1151 cwdi->cwdi_cdir = vp;
1152 }
1153
1154 return (0);
1155 }
1156
1157 /*
1158 * Common routine for chroot and chdir.
1159 */
1160 static int
1161 change_dir(struct nameidata *ndp, struct lwp *l)
1162 {
1163 struct vnode *vp;
1164 int error;
1165
1166 if ((error = namei(ndp)) != 0)
1167 return (error);
1168 vp = ndp->ni_vp;
1169 if (vp->v_type != VDIR)
1170 error = ENOTDIR;
1171 else
1172 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1173
1174 if (error)
1175 vput(vp);
1176 else
1177 VOP_UNLOCK(vp, 0);
1178 return (error);
1179 }
1180
1181 /*
1182 * Check permissions, allocate an open file structure,
1183 * and call the device open routine if any.
1184 */
1185 int
1186 sys_open(struct lwp *l, void *v, register_t *retval)
1187 {
1188 struct sys_open_args /* {
1189 syscallarg(const char *) path;
1190 syscallarg(int) flags;
1191 syscallarg(int) mode;
1192 } */ *uap = v;
1193 struct proc *p = l->l_proc;
1194 struct cwdinfo *cwdi = p->p_cwdi;
1195 struct filedesc *fdp = p->p_fd;
1196 struct file *fp;
1197 struct vnode *vp;
1198 int flags, cmode;
1199 int type, indx, error;
1200 struct flock lf;
1201 struct nameidata nd;
1202
1203 flags = FFLAGS(SCARG(uap, flags));
1204 if ((flags & (FREAD | FWRITE)) == 0)
1205 return (EINVAL);
1206 /* falloc() will use the file descriptor for us */
1207 if ((error = falloc(l, &fp, &indx)) != 0)
1208 return (error);
1209 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1210 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1211 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1212 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1213 FILE_UNUSE(fp, l);
1214 fdp->fd_ofiles[indx] = NULL;
1215 ffree(fp);
1216 if ((error == EDUPFD || error == EMOVEFD) &&
1217 l->l_dupfd >= 0 && /* XXX from fdopen */
1218 (error =
1219 dupfdopen(l, indx, l->l_dupfd, flags, error)) == 0) {
1220 *retval = indx;
1221 return (0);
1222 }
1223 if (error == ERESTART)
1224 error = EINTR;
1225 fdremove(fdp, indx);
1226 return (error);
1227 }
1228 l->l_dupfd = 0;
1229 vp = nd.ni_vp;
1230 fp->f_flag = flags & FMASK;
1231 fp->f_type = DTYPE_VNODE;
1232 fp->f_ops = &vnops;
1233 fp->f_data = vp;
1234 if (flags & (O_EXLOCK | O_SHLOCK)) {
1235 lf.l_whence = SEEK_SET;
1236 lf.l_start = 0;
1237 lf.l_len = 0;
1238 if (flags & O_EXLOCK)
1239 lf.l_type = F_WRLCK;
1240 else
1241 lf.l_type = F_RDLCK;
1242 type = F_FLOCK;
1243 if ((flags & FNONBLOCK) == 0)
1244 type |= F_WAIT;
1245 VOP_UNLOCK(vp, 0);
1246 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1247 if (error) {
1248 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1249 FILE_UNUSE(fp, l);
1250 ffree(fp);
1251 fdremove(fdp, indx);
1252 return (error);
1253 }
1254 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1255 fp->f_flag |= FHASLOCK;
1256 }
1257 VOP_UNLOCK(vp, 0);
1258 *retval = indx;
1259 FILE_SET_MATURE(fp);
1260 FILE_UNUSE(fp, l);
1261 return (0);
1262 }
1263
1264 static void
1265 vfs__fhfree(fhandle_t *fhp)
1266 {
1267 size_t fhsize;
1268
1269 if (fhp == NULL) {
1270 return;
1271 }
1272 fhsize = FHANDLE_SIZE(fhp);
1273 kmem_free(fhp, fhsize);
1274 }
1275
1276 /*
1277 * vfs_composefh: compose a filehandle.
1278 */
1279
1280 int
1281 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1282 {
1283 struct mount *mp;
1284 struct fid *fidp;
1285 int error;
1286 size_t needfhsize;
1287 size_t fidsize;
1288
1289 mp = vp->v_mount;
1290 fidp = NULL;
1291 if (*fh_size < FHANDLE_SIZE_MIN) {
1292 fidsize = 0;
1293 } else {
1294 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1295 if (fhp != NULL) {
1296 memset(fhp, 0, *fh_size);
1297 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1298 fidp = &fhp->fh_fid;
1299 }
1300 }
1301 error = VFS_VPTOFH(vp, fidp, &fidsize);
1302 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1303 if (error == 0 && *fh_size < needfhsize) {
1304 error = E2BIG;
1305 }
1306 *fh_size = needfhsize;
1307 return error;
1308 }
1309
1310 int
1311 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1312 {
1313 struct mount *mp;
1314 fhandle_t *fhp;
1315 size_t fhsize;
1316 size_t fidsize;
1317 int error;
1318
1319 *fhpp = NULL;
1320 mp = vp->v_mount;
1321 fidsize = 0;
1322 error = VFS_VPTOFH(vp, NULL, &fidsize);
1323 KASSERT(error != 0);
1324 if (error != E2BIG) {
1325 goto out;
1326 }
1327 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1328 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1329 if (fhp == NULL) {
1330 error = ENOMEM;
1331 goto out;
1332 }
1333 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1334 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1335 if (error == 0) {
1336 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1337 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1338 *fhpp = fhp;
1339 } else {
1340 kmem_free(fhp, fhsize);
1341 }
1342 out:
1343 return error;
1344 }
1345
1346 void
1347 vfs_composefh_free(fhandle_t *fhp)
1348 {
1349
1350 vfs__fhfree(fhp);
1351 }
1352
1353 /*
1354 * vfs_fhtovp: lookup a vnode by a filehandle.
1355 */
1356
1357 int
1358 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1359 {
1360 struct mount *mp;
1361 int error;
1362
1363 *vpp = NULL;
1364 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1365 if (mp == NULL) {
1366 error = ESTALE;
1367 goto out;
1368 }
1369 if (mp->mnt_op->vfs_fhtovp == NULL) {
1370 error = EOPNOTSUPP;
1371 goto out;
1372 }
1373 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1374 out:
1375 return error;
1376 }
1377
1378 /*
1379 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1380 * the needed size.
1381 */
1382
1383 int
1384 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1385 {
1386 fhandle_t *fhp;
1387 int error;
1388
1389 *fhpp = NULL;
1390 if (fhsize > FHANDLE_SIZE_MAX) {
1391 return EINVAL;
1392 }
1393 if (fhsize < FHANDLE_SIZE_MIN) {
1394 return EINVAL;
1395 }
1396 again:
1397 fhp = kmem_alloc(fhsize, KM_SLEEP);
1398 if (fhp == NULL) {
1399 return ENOMEM;
1400 }
1401 error = copyin(ufhp, fhp, fhsize);
1402 if (error == 0) {
1403 /* XXX this check shouldn't be here */
1404 if (FHANDLE_SIZE(fhp) == fhsize) {
1405 *fhpp = fhp;
1406 return 0;
1407 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1408 /*
1409 * a kludge for nfsv2 padded handles.
1410 */
1411 size_t sz;
1412
1413 sz = FHANDLE_SIZE(fhp);
1414 kmem_free(fhp, fhsize);
1415 fhsize = sz;
1416 goto again;
1417 } else {
1418 /*
1419 * userland told us wrong size.
1420 */
1421 error = EINVAL;
1422 }
1423 }
1424 kmem_free(fhp, fhsize);
1425 return error;
1426 }
1427
1428 void
1429 vfs_copyinfh_free(fhandle_t *fhp)
1430 {
1431
1432 vfs__fhfree(fhp);
1433 }
1434
1435 /*
1436 * Get file handle system call
1437 */
1438 int
1439 sys___getfh30(struct lwp *l, void *v, register_t *retval)
1440 {
1441 struct sys___getfh30_args /* {
1442 syscallarg(char *) fname;
1443 syscallarg(fhandle_t *) fhp;
1444 syscallarg(size_t *) fh_size;
1445 } */ *uap = v;
1446 struct vnode *vp;
1447 fhandle_t *fh;
1448 int error;
1449 struct nameidata nd;
1450 size_t sz;
1451 size_t usz;
1452
1453 /*
1454 * Must be super user
1455 */
1456 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1457 0, NULL, NULL, NULL);
1458 if (error)
1459 return (error);
1460 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1461 SCARG(uap, fname), l);
1462 error = namei(&nd);
1463 if (error)
1464 return (error);
1465 vp = nd.ni_vp;
1466 error = vfs_composefh_alloc(vp, &fh);
1467 vput(vp);
1468 if (error != 0) {
1469 goto out;
1470 }
1471 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1472 if (error != 0) {
1473 goto out;
1474 }
1475 sz = FHANDLE_SIZE(fh);
1476 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1477 if (error != 0) {
1478 goto out;
1479 }
1480 if (usz >= sz) {
1481 error = copyout(fh, SCARG(uap, fhp), sz);
1482 } else {
1483 error = E2BIG;
1484 }
1485 out:
1486 vfs_composefh_free(fh);
1487 return (error);
1488 }
1489
1490 /*
1491 * Open a file given a file handle.
1492 *
1493 * Check permissions, allocate an open file structure,
1494 * and call the device open routine if any.
1495 */
1496
1497 int
1498 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1499 register_t *retval)
1500 {
1501 struct filedesc *fdp = l->l_proc->p_fd;
1502 struct file *fp;
1503 struct vnode *vp = NULL;
1504 struct mount *mp;
1505 kauth_cred_t cred = l->l_cred;
1506 struct file *nfp;
1507 int type, indx, error=0;
1508 struct flock lf;
1509 struct vattr va;
1510 fhandle_t *fh;
1511 int flags;
1512
1513 /*
1514 * Must be super user
1515 */
1516 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1517 0, NULL, NULL, NULL)))
1518 return (error);
1519
1520 flags = FFLAGS(oflags);
1521 if ((flags & (FREAD | FWRITE)) == 0)
1522 return (EINVAL);
1523 if ((flags & O_CREAT))
1524 return (EINVAL);
1525 /* falloc() will use the file descriptor for us */
1526 if ((error = falloc(l, &nfp, &indx)) != 0)
1527 return (error);
1528 fp = nfp;
1529 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1530 if (error != 0) {
1531 goto bad;
1532 }
1533 error = vfs_fhtovp(fh, &vp);
1534 if (error != 0) {
1535 goto bad;
1536 }
1537
1538 /* Now do an effective vn_open */
1539
1540 if (vp->v_type == VSOCK) {
1541 error = EOPNOTSUPP;
1542 goto bad;
1543 }
1544 if (flags & FREAD) {
1545 if ((error = VOP_ACCESS(vp, VREAD, cred, l)) != 0)
1546 goto bad;
1547 }
1548 if (flags & (FWRITE | O_TRUNC)) {
1549 if (vp->v_type == VDIR) {
1550 error = EISDIR;
1551 goto bad;
1552 }
1553 if ((error = vn_writechk(vp)) != 0 ||
1554 (error = VOP_ACCESS(vp, VWRITE, cred, l)) != 0)
1555 goto bad;
1556 }
1557 if (flags & O_TRUNC) {
1558 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
1559 goto bad;
1560 VOP_UNLOCK(vp, 0); /* XXX */
1561 VOP_LEASE(vp, l, cred, LEASE_WRITE);
1562 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1563 VATTR_NULL(&va);
1564 va.va_size = 0;
1565 error = VOP_SETATTR(vp, &va, cred, l);
1566 vn_finished_write(mp, 0);
1567 if (error)
1568 goto bad;
1569 }
1570 if ((error = VOP_OPEN(vp, flags, cred, l)) != 0)
1571 goto bad;
1572 if (vp->v_type == VREG &&
1573 uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
1574 error = EIO;
1575 goto bad;
1576 }
1577 if (flags & FWRITE)
1578 vp->v_writecount++;
1579
1580 /* done with modified vn_open, now finish what sys_open does. */
1581
1582 fp->f_flag = flags & FMASK;
1583 fp->f_type = DTYPE_VNODE;
1584 fp->f_ops = &vnops;
1585 fp->f_data = vp;
1586 if (flags & (O_EXLOCK | O_SHLOCK)) {
1587 lf.l_whence = SEEK_SET;
1588 lf.l_start = 0;
1589 lf.l_len = 0;
1590 if (flags & O_EXLOCK)
1591 lf.l_type = F_WRLCK;
1592 else
1593 lf.l_type = F_RDLCK;
1594 type = F_FLOCK;
1595 if ((flags & FNONBLOCK) == 0)
1596 type |= F_WAIT;
1597 VOP_UNLOCK(vp, 0);
1598 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1599 if (error) {
1600 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1601 FILE_UNUSE(fp, l);
1602 ffree(fp);
1603 fdremove(fdp, indx);
1604 return (error);
1605 }
1606 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1607 fp->f_flag |= FHASLOCK;
1608 }
1609 VOP_UNLOCK(vp, 0);
1610 *retval = indx;
1611 FILE_SET_MATURE(fp);
1612 FILE_UNUSE(fp, l);
1613 vfs_copyinfh_free(fh);
1614 return (0);
1615
1616 bad:
1617 FILE_UNUSE(fp, l);
1618 ffree(fp);
1619 fdremove(fdp, indx);
1620 if (vp != NULL)
1621 vput(vp);
1622 vfs_copyinfh_free(fh);
1623 return (error);
1624 }
1625
1626 int
1627 sys___fhopen40(struct lwp *l, void *v, register_t *retval)
1628 {
1629 struct sys___fhopen40_args /* {
1630 syscallarg(const void *) fhp;
1631 syscallarg(size_t) fh_size;
1632 syscallarg(int) flags;
1633 } */ *uap = v;
1634
1635 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1636 SCARG(uap, flags), retval);
1637 }
1638
1639 int
1640 dofhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sbp,
1641 register_t *retval)
1642 {
1643 struct stat sb;
1644 int error;
1645 fhandle_t *fh;
1646 struct vnode *vp;
1647
1648 /*
1649 * Must be super user
1650 */
1651 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1652 0, NULL, NULL, NULL)))
1653 return (error);
1654
1655 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1656 if (error != 0) {
1657 goto bad;
1658 }
1659 error = vfs_fhtovp(fh, &vp);
1660 if (error != 0) {
1661 goto bad;
1662 }
1663 error = vn_stat(vp, &sb, l);
1664 vput(vp);
1665 if (error) {
1666 goto bad;
1667 }
1668 error = copyout(&sb, sbp, sizeof(sb));
1669 bad:
1670 vfs_copyinfh_free(fh);
1671 return error;
1672 }
1673
1674
1675 /* ARGSUSED */
1676 int
1677 sys___fhstat40(struct lwp *l, void *v, register_t *retval)
1678 {
1679 struct sys___fhstat40_args /* {
1680 syscallarg(const void *) fhp;
1681 syscallarg(size_t) fh_size;
1682 syscallarg(struct stat *) sb;
1683 } */ *uap = v;
1684
1685 return dofhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), SCARG(uap, sb),
1686 retval);
1687 }
1688
1689 int
1690 dofhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *buf,
1691 int flags, register_t *retval)
1692 {
1693 struct statvfs *sb = NULL;
1694 fhandle_t *fh;
1695 struct mount *mp;
1696 struct vnode *vp;
1697 int error;
1698
1699 /*
1700 * Must be super user
1701 */
1702 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1703 0, NULL, NULL, NULL)))
1704 return error;
1705
1706 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1707 if (error != 0) {
1708 goto out;
1709 }
1710 error = vfs_fhtovp(fh, &vp);
1711 if (error != 0) {
1712 goto out;
1713 }
1714 mp = vp->v_mount;
1715 sb = STATVFSBUF_GET();
1716 if ((error = dostatvfs(mp, sb, l, flags, 1)) != 0) {
1717 vput(vp);
1718 goto out;
1719 }
1720 vput(vp);
1721 error = copyout(sb, buf, sizeof(*sb));
1722 out:
1723 if (sb != NULL) {
1724 STATVFSBUF_PUT(sb);
1725 }
1726 vfs_copyinfh_free(fh);
1727 return error;
1728 }
1729
1730 /* ARGSUSED */
1731 int
1732 sys___fhstatvfs140(struct lwp *l, void *v, register_t *retval)
1733 {
1734 struct sys___fhstatvfs140_args /* {
1735 syscallarg(const void *) fhp;
1736 syscallarg(size_t) fh_size;
1737 syscallarg(struct statvfs *) buf;
1738 syscallarg(int) flags;
1739 } */ *uap = v;
1740
1741 return dofhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1742 SCARG(uap, buf), SCARG(uap, flags), retval);
1743 }
1744
1745 /*
1746 * Create a special file.
1747 */
1748 /* ARGSUSED */
1749 int
1750 sys_mknod(struct lwp *l, void *v, register_t *retval)
1751 {
1752 struct sys_mknod_args /* {
1753 syscallarg(const char *) path;
1754 syscallarg(int) mode;
1755 syscallarg(int) dev;
1756 } */ *uap = v;
1757 struct proc *p = l->l_proc;
1758 struct vnode *vp;
1759 struct mount *mp;
1760 struct vattr vattr;
1761 int error;
1762 int whiteout = 0;
1763 struct nameidata nd;
1764
1765 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1766 0, NULL, NULL, NULL)) != 0)
1767 return (error);
1768 restart:
1769 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1770 if ((error = namei(&nd)) != 0)
1771 return (error);
1772 vp = nd.ni_vp;
1773 if (vp != NULL)
1774 error = EEXIST;
1775 else {
1776 VATTR_NULL(&vattr);
1777 vattr.va_mode =
1778 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1779 vattr.va_rdev = SCARG(uap, dev);
1780 whiteout = 0;
1781
1782 switch (SCARG(uap, mode) & S_IFMT) {
1783 case S_IFMT: /* used by badsect to flag bad sectors */
1784 vattr.va_type = VBAD;
1785 break;
1786 case S_IFCHR:
1787 vattr.va_type = VCHR;
1788 break;
1789 case S_IFBLK:
1790 vattr.va_type = VBLK;
1791 break;
1792 case S_IFWHT:
1793 whiteout = 1;
1794 break;
1795 default:
1796 error = EINVAL;
1797 break;
1798 }
1799 }
1800 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1801 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1802 if (nd.ni_dvp == vp)
1803 vrele(nd.ni_dvp);
1804 else
1805 vput(nd.ni_dvp);
1806 if (vp)
1807 vrele(vp);
1808 if ((error = vn_start_write(NULL, &mp,
1809 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1810 return (error);
1811 goto restart;
1812 }
1813 if (!error) {
1814 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1815 if (whiteout) {
1816 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1817 if (error)
1818 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1819 vput(nd.ni_dvp);
1820 } else {
1821 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1822 &nd.ni_cnd, &vattr);
1823 if (error == 0)
1824 vput(nd.ni_vp);
1825 }
1826 } else {
1827 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1828 if (nd.ni_dvp == vp)
1829 vrele(nd.ni_dvp);
1830 else
1831 vput(nd.ni_dvp);
1832 if (vp)
1833 vrele(vp);
1834 }
1835 vn_finished_write(mp, 0);
1836 return (error);
1837 }
1838
1839 /*
1840 * Create a named pipe.
1841 */
1842 /* ARGSUSED */
1843 int
1844 sys_mkfifo(struct lwp *l, void *v, register_t *retval)
1845 {
1846 struct sys_mkfifo_args /* {
1847 syscallarg(const char *) path;
1848 syscallarg(int) mode;
1849 } */ *uap = v;
1850 struct proc *p = l->l_proc;
1851 struct mount *mp;
1852 struct vattr vattr;
1853 int error;
1854 struct nameidata nd;
1855
1856 restart:
1857 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1858 if ((error = namei(&nd)) != 0)
1859 return (error);
1860 if (nd.ni_vp != NULL) {
1861 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1862 if (nd.ni_dvp == nd.ni_vp)
1863 vrele(nd.ni_dvp);
1864 else
1865 vput(nd.ni_dvp);
1866 vrele(nd.ni_vp);
1867 return (EEXIST);
1868 }
1869 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1870 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1871 if (nd.ni_dvp == nd.ni_vp)
1872 vrele(nd.ni_dvp);
1873 else
1874 vput(nd.ni_dvp);
1875 if (nd.ni_vp)
1876 vrele(nd.ni_vp);
1877 if ((error = vn_start_write(NULL, &mp,
1878 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1879 return (error);
1880 goto restart;
1881 }
1882 VATTR_NULL(&vattr);
1883 vattr.va_type = VFIFO;
1884 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1885 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1886 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1887 if (error == 0)
1888 vput(nd.ni_vp);
1889 vn_finished_write(mp, 0);
1890 return (error);
1891 }
1892
1893 /*
1894 * Make a hard file link.
1895 */
1896 /* ARGSUSED */
1897 int
1898 sys_link(struct lwp *l, void *v, register_t *retval)
1899 {
1900 struct sys_link_args /* {
1901 syscallarg(const char *) path;
1902 syscallarg(const char *) link;
1903 } */ *uap = v;
1904 struct vnode *vp;
1905 struct mount *mp;
1906 struct nameidata nd;
1907 int error;
1908
1909 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1910 if ((error = namei(&nd)) != 0)
1911 return (error);
1912 vp = nd.ni_vp;
1913 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
1914 vrele(vp);
1915 return (error);
1916 }
1917 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1918 if ((error = namei(&nd)) != 0)
1919 goto out;
1920 if (nd.ni_vp) {
1921 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1922 if (nd.ni_dvp == nd.ni_vp)
1923 vrele(nd.ni_dvp);
1924 else
1925 vput(nd.ni_dvp);
1926 vrele(nd.ni_vp);
1927 error = EEXIST;
1928 goto out;
1929 }
1930 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1931 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
1932 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1933 out:
1934 vrele(vp);
1935 vn_finished_write(mp, 0);
1936 return (error);
1937 }
1938
1939 /*
1940 * Make a symbolic link.
1941 */
1942 /* ARGSUSED */
1943 int
1944 sys_symlink(struct lwp *l, void *v, register_t *retval)
1945 {
1946 struct sys_symlink_args /* {
1947 syscallarg(const char *) path;
1948 syscallarg(const char *) link;
1949 } */ *uap = v;
1950 struct proc *p = l->l_proc;
1951 struct mount *mp;
1952 struct vattr vattr;
1953 char *path;
1954 int error;
1955 struct nameidata nd;
1956
1957 path = PNBUF_GET();
1958 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
1959 if (error)
1960 goto out;
1961 restart:
1962 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1963 if ((error = namei(&nd)) != 0)
1964 goto out;
1965 if (nd.ni_vp) {
1966 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1967 if (nd.ni_dvp == nd.ni_vp)
1968 vrele(nd.ni_dvp);
1969 else
1970 vput(nd.ni_dvp);
1971 vrele(nd.ni_vp);
1972 error = EEXIST;
1973 goto out;
1974 }
1975 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1976 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1977 if (nd.ni_dvp == nd.ni_vp)
1978 vrele(nd.ni_dvp);
1979 else
1980 vput(nd.ni_dvp);
1981 if ((error = vn_start_write(NULL, &mp,
1982 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1983 return (error);
1984 goto restart;
1985 }
1986 VATTR_NULL(&vattr);
1987 vattr.va_type = VLNK;
1988 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
1989 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1990 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1991 if (error == 0)
1992 vput(nd.ni_vp);
1993 vn_finished_write(mp, 0);
1994 out:
1995 PNBUF_PUT(path);
1996 return (error);
1997 }
1998
1999 /*
2000 * Delete a whiteout from the filesystem.
2001 */
2002 /* ARGSUSED */
2003 int
2004 sys_undelete(struct lwp *l, void *v, register_t *retval)
2005 {
2006 struct sys_undelete_args /* {
2007 syscallarg(const char *) path;
2008 } */ *uap = v;
2009 int error;
2010 struct mount *mp;
2011 struct nameidata nd;
2012
2013 restart:
2014 NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
2015 SCARG(uap, path), l);
2016 error = namei(&nd);
2017 if (error)
2018 return (error);
2019
2020 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2021 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2022 if (nd.ni_dvp == nd.ni_vp)
2023 vrele(nd.ni_dvp);
2024 else
2025 vput(nd.ni_dvp);
2026 if (nd.ni_vp)
2027 vrele(nd.ni_vp);
2028 return (EEXIST);
2029 }
2030 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2031 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2032 if (nd.ni_dvp == nd.ni_vp)
2033 vrele(nd.ni_dvp);
2034 else
2035 vput(nd.ni_dvp);
2036 if ((error = vn_start_write(NULL, &mp,
2037 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
2038 return (error);
2039 goto restart;
2040 }
2041 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2042 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2043 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2044 vput(nd.ni_dvp);
2045 vn_finished_write(mp, 0);
2046 return (error);
2047 }
2048
2049 /*
2050 * Delete a name from the filesystem.
2051 */
2052 /* ARGSUSED */
2053 int
2054 sys_unlink(struct lwp *l, void *v, register_t *retval)
2055 {
2056 struct sys_unlink_args /* {
2057 syscallarg(const char *) path;
2058 } */ *uap = v;
2059 struct mount *mp;
2060 struct vnode *vp;
2061 int error;
2062 struct nameidata nd;
2063 #if NVERIEXEC > 0
2064 pathname_t pathbuf;
2065 #endif /* NVERIEXEC > 0 */
2066
2067 restart:
2068 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2069 SCARG(uap, path), l);
2070 if ((error = namei(&nd)) != 0)
2071 return (error);
2072 vp = nd.ni_vp;
2073
2074 /*
2075 * The root of a mounted filesystem cannot be deleted.
2076 */
2077 if (vp->v_flag & VROOT) {
2078 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2079 if (nd.ni_dvp == vp)
2080 vrele(nd.ni_dvp);
2081 else
2082 vput(nd.ni_dvp);
2083 vput(vp);
2084 error = EBUSY;
2085 goto out;
2086 }
2087
2088 #if NVERIEXEC > 0
2089 error = pathname_get(nd.ni_dirp, nd.ni_segflg, &pathbuf);
2090
2091 /* Handle remove requests for veriexec entries. */
2092 if (!error) {
2093 error = veriexec_removechk(vp, pathname_path(pathbuf), l);
2094 pathname_put(pathbuf);
2095 }
2096
2097 if (error) {
2098 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2099 if (nd.ni_dvp == vp)
2100 vrele(nd.ni_dvp);
2101 else
2102 vput(nd.ni_dvp);
2103 vput(vp);
2104 goto out;
2105 }
2106 #endif /* NVERIEXEC > 0 */
2107
2108 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2109 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2110 if (nd.ni_dvp == vp)
2111 vrele(nd.ni_dvp);
2112 else
2113 vput(nd.ni_dvp);
2114 vput(vp);
2115 if ((error = vn_start_write(NULL, &mp,
2116 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
2117 return (error);
2118 goto restart;
2119 }
2120 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2121 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2122 #ifdef FILEASSOC
2123 (void)fileassoc_file_delete(vp);
2124 #endif /* FILEASSOC */
2125 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2126 vn_finished_write(mp, 0);
2127 out:
2128 return (error);
2129 }
2130
2131 /*
2132 * Reposition read/write file offset.
2133 */
2134 int
2135 sys_lseek(struct lwp *l, void *v, register_t *retval)
2136 {
2137 struct sys_lseek_args /* {
2138 syscallarg(int) fd;
2139 syscallarg(int) pad;
2140 syscallarg(off_t) offset;
2141 syscallarg(int) whence;
2142 } */ *uap = v;
2143 struct proc *p = l->l_proc;
2144 kauth_cred_t cred = l->l_cred;
2145 struct filedesc *fdp = p->p_fd;
2146 struct file *fp;
2147 struct vnode *vp;
2148 struct vattr vattr;
2149 off_t newoff;
2150 int error;
2151
2152 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
2153 return (EBADF);
2154
2155 FILE_USE(fp);
2156
2157 vp = (struct vnode *)fp->f_data;
2158 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2159 error = ESPIPE;
2160 goto out;
2161 }
2162
2163 switch (SCARG(uap, whence)) {
2164 case SEEK_CUR:
2165 newoff = fp->f_offset + SCARG(uap, offset);
2166 break;
2167 case SEEK_END:
2168 error = VOP_GETATTR(vp, &vattr, cred, l);
2169 if (error)
2170 goto out;
2171 newoff = SCARG(uap, offset) + vattr.va_size;
2172 break;
2173 case SEEK_SET:
2174 newoff = SCARG(uap, offset);
2175 break;
2176 default:
2177 error = EINVAL;
2178 goto out;
2179 }
2180 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0)
2181 goto out;
2182
2183 *(off_t *)retval = fp->f_offset = newoff;
2184 out:
2185 FILE_UNUSE(fp, l);
2186 return (error);
2187 }
2188
2189 /*
2190 * Positional read system call.
2191 */
2192 int
2193 sys_pread(struct lwp *l, void *v, register_t *retval)
2194 {
2195 struct sys_pread_args /* {
2196 syscallarg(int) fd;
2197 syscallarg(void *) buf;
2198 syscallarg(size_t) nbyte;
2199 syscallarg(off_t) offset;
2200 } */ *uap = v;
2201 struct proc *p = l->l_proc;
2202 struct filedesc *fdp = p->p_fd;
2203 struct file *fp;
2204 struct vnode *vp;
2205 off_t offset;
2206 int error, fd = SCARG(uap, fd);
2207
2208 if ((fp = fd_getfile(fdp, fd)) == NULL)
2209 return (EBADF);
2210
2211 if ((fp->f_flag & FREAD) == 0) {
2212 simple_unlock(&fp->f_slock);
2213 return (EBADF);
2214 }
2215
2216 FILE_USE(fp);
2217
2218 vp = (struct vnode *)fp->f_data;
2219 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2220 error = ESPIPE;
2221 goto out;
2222 }
2223
2224 offset = SCARG(uap, offset);
2225
2226 /*
2227 * XXX This works because no file systems actually
2228 * XXX take any action on the seek operation.
2229 */
2230 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2231 goto out;
2232
2233 /* dofileread() will unuse the descriptor for us */
2234 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2235 &offset, 0, retval));
2236
2237 out:
2238 FILE_UNUSE(fp, l);
2239 return (error);
2240 }
2241
2242 /*
2243 * Positional scatter read system call.
2244 */
2245 int
2246 sys_preadv(struct lwp *l, void *v, register_t *retval)
2247 {
2248 struct sys_preadv_args /* {
2249 syscallarg(int) fd;
2250 syscallarg(const struct iovec *) iovp;
2251 syscallarg(int) iovcnt;
2252 syscallarg(off_t) offset;
2253 } */ *uap = v;
2254 struct proc *p = l->l_proc;
2255 struct filedesc *fdp = p->p_fd;
2256 struct file *fp;
2257 struct vnode *vp;
2258 off_t offset;
2259 int error, fd = SCARG(uap, fd);
2260
2261 if ((fp = fd_getfile(fdp, fd)) == NULL)
2262 return (EBADF);
2263
2264 if ((fp->f_flag & FREAD) == 0) {
2265 simple_unlock(&fp->f_slock);
2266 return (EBADF);
2267 }
2268
2269 FILE_USE(fp);
2270
2271 vp = (struct vnode *)fp->f_data;
2272 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2273 error = ESPIPE;
2274 goto out;
2275 }
2276
2277 offset = SCARG(uap, offset);
2278
2279 /*
2280 * XXX This works because no file systems actually
2281 * XXX take any action on the seek operation.
2282 */
2283 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2284 goto out;
2285
2286 /* dofilereadv() will unuse the descriptor for us */
2287 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2288 &offset, 0, retval));
2289
2290 out:
2291 FILE_UNUSE(fp, l);
2292 return (error);
2293 }
2294
2295 /*
2296 * Positional write system call.
2297 */
2298 int
2299 sys_pwrite(struct lwp *l, void *v, register_t *retval)
2300 {
2301 struct sys_pwrite_args /* {
2302 syscallarg(int) fd;
2303 syscallarg(const void *) buf;
2304 syscallarg(size_t) nbyte;
2305 syscallarg(off_t) offset;
2306 } */ *uap = v;
2307 struct proc *p = l->l_proc;
2308 struct filedesc *fdp = p->p_fd;
2309 struct file *fp;
2310 struct vnode *vp;
2311 off_t offset;
2312 int error, fd = SCARG(uap, fd);
2313
2314 if ((fp = fd_getfile(fdp, fd)) == NULL)
2315 return (EBADF);
2316
2317 if ((fp->f_flag & FWRITE) == 0) {
2318 simple_unlock(&fp->f_slock);
2319 return (EBADF);
2320 }
2321
2322 FILE_USE(fp);
2323
2324 vp = (struct vnode *)fp->f_data;
2325 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2326 error = ESPIPE;
2327 goto out;
2328 }
2329
2330 offset = SCARG(uap, offset);
2331
2332 /*
2333 * XXX This works because no file systems actually
2334 * XXX take any action on the seek operation.
2335 */
2336 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2337 goto out;
2338
2339 /* dofilewrite() will unuse the descriptor for us */
2340 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2341 &offset, 0, retval));
2342
2343 out:
2344 FILE_UNUSE(fp, l);
2345 return (error);
2346 }
2347
2348 /*
2349 * Positional gather write system call.
2350 */
2351 int
2352 sys_pwritev(struct lwp *l, void *v, register_t *retval)
2353 {
2354 struct sys_pwritev_args /* {
2355 syscallarg(int) fd;
2356 syscallarg(const struct iovec *) iovp;
2357 syscallarg(int) iovcnt;
2358 syscallarg(off_t) offset;
2359 } */ *uap = v;
2360 struct proc *p = l->l_proc;
2361 struct filedesc *fdp = p->p_fd;
2362 struct file *fp;
2363 struct vnode *vp;
2364 off_t offset;
2365 int error, fd = SCARG(uap, fd);
2366
2367 if ((fp = fd_getfile(fdp, fd)) == NULL)
2368 return (EBADF);
2369
2370 if ((fp->f_flag & FWRITE) == 0) {
2371 simple_unlock(&fp->f_slock);
2372 return (EBADF);
2373 }
2374
2375 FILE_USE(fp);
2376
2377 vp = (struct vnode *)fp->f_data;
2378 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2379 error = ESPIPE;
2380 goto out;
2381 }
2382
2383 offset = SCARG(uap, offset);
2384
2385 /*
2386 * XXX This works because no file systems actually
2387 * XXX take any action on the seek operation.
2388 */
2389 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2390 goto out;
2391
2392 /* dofilewritev() will unuse the descriptor for us */
2393 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2394 &offset, 0, retval));
2395
2396 out:
2397 FILE_UNUSE(fp, l);
2398 return (error);
2399 }
2400
2401 /*
2402 * Check access permissions.
2403 */
2404 int
2405 sys_access(struct lwp *l, void *v, register_t *retval)
2406 {
2407 struct sys_access_args /* {
2408 syscallarg(const char *) path;
2409 syscallarg(int) flags;
2410 } */ *uap = v;
2411 kauth_cred_t cred;
2412 struct vnode *vp;
2413 int error, flags;
2414 struct nameidata nd;
2415
2416 cred = kauth_cred_dup(l->l_cred);
2417 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2418 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2419 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2420 SCARG(uap, path), l);
2421 /* Override default credentials */
2422 nd.ni_cnd.cn_cred = cred;
2423 if ((error = namei(&nd)) != 0)
2424 goto out;
2425 vp = nd.ni_vp;
2426
2427 /* Flags == 0 means only check for existence. */
2428 if (SCARG(uap, flags)) {
2429 flags = 0;
2430 if (SCARG(uap, flags) & R_OK)
2431 flags |= VREAD;
2432 if (SCARG(uap, flags) & W_OK)
2433 flags |= VWRITE;
2434 if (SCARG(uap, flags) & X_OK)
2435 flags |= VEXEC;
2436
2437 error = VOP_ACCESS(vp, flags, cred, l);
2438 if (!error && (flags & VWRITE))
2439 error = vn_writechk(vp);
2440 }
2441 vput(vp);
2442 out:
2443 kauth_cred_free(cred);
2444 return (error);
2445 }
2446
2447 /*
2448 * Get file status; this version follows links.
2449 */
2450 /* ARGSUSED */
2451 int
2452 sys___stat30(struct lwp *l, void *v, register_t *retval)
2453 {
2454 struct sys___stat30_args /* {
2455 syscallarg(const char *) path;
2456 syscallarg(struct stat *) ub;
2457 } */ *uap = v;
2458 struct stat sb;
2459 int error;
2460 struct nameidata nd;
2461
2462 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2463 SCARG(uap, path), l);
2464 if ((error = namei(&nd)) != 0)
2465 return (error);
2466 error = vn_stat(nd.ni_vp, &sb, l);
2467 vput(nd.ni_vp);
2468 if (error)
2469 return (error);
2470 error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2471 return (error);
2472 }
2473
2474 /*
2475 * Get file status; this version does not follow links.
2476 */
2477 /* ARGSUSED */
2478 int
2479 sys___lstat30(struct lwp *l, void *v, register_t *retval)
2480 {
2481 struct sys___lstat30_args /* {
2482 syscallarg(const char *) path;
2483 syscallarg(struct stat *) ub;
2484 } */ *uap = v;
2485 struct stat sb;
2486 int error;
2487 struct nameidata nd;
2488
2489 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2490 SCARG(uap, path), l);
2491 if ((error = namei(&nd)) != 0)
2492 return (error);
2493 error = vn_stat(nd.ni_vp, &sb, l);
2494 vput(nd.ni_vp);
2495 if (error)
2496 return (error);
2497 error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2498 return (error);
2499 }
2500
2501 /*
2502 * Get configurable pathname variables.
2503 */
2504 /* ARGSUSED */
2505 int
2506 sys_pathconf(struct lwp *l, void *v, register_t *retval)
2507 {
2508 struct sys_pathconf_args /* {
2509 syscallarg(const char *) path;
2510 syscallarg(int) name;
2511 } */ *uap = v;
2512 int error;
2513 struct nameidata nd;
2514
2515 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2516 SCARG(uap, path), l);
2517 if ((error = namei(&nd)) != 0)
2518 return (error);
2519 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2520 vput(nd.ni_vp);
2521 return (error);
2522 }
2523
2524 /*
2525 * Return target name of a symbolic link.
2526 */
2527 /* ARGSUSED */
2528 int
2529 sys_readlink(struct lwp *l, void *v, register_t *retval)
2530 {
2531 struct sys_readlink_args /* {
2532 syscallarg(const char *) path;
2533 syscallarg(char *) buf;
2534 syscallarg(size_t) count;
2535 } */ *uap = v;
2536 struct vnode *vp;
2537 struct iovec aiov;
2538 struct uio auio;
2539 int error;
2540 struct nameidata nd;
2541
2542 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2543 SCARG(uap, path), l);
2544 if ((error = namei(&nd)) != 0)
2545 return (error);
2546 vp = nd.ni_vp;
2547 if (vp->v_type != VLNK)
2548 error = EINVAL;
2549 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2550 (error = VOP_ACCESS(vp, VREAD, l->l_cred, l)) == 0) {
2551 aiov.iov_base = SCARG(uap, buf);
2552 aiov.iov_len = SCARG(uap, count);
2553 auio.uio_iov = &aiov;
2554 auio.uio_iovcnt = 1;
2555 auio.uio_offset = 0;
2556 auio.uio_rw = UIO_READ;
2557 KASSERT(l == curlwp);
2558 auio.uio_vmspace = l->l_proc->p_vmspace;
2559 auio.uio_resid = SCARG(uap, count);
2560 error = VOP_READLINK(vp, &auio, l->l_cred);
2561 }
2562 vput(vp);
2563 *retval = SCARG(uap, count) - auio.uio_resid;
2564 return (error);
2565 }
2566
2567 /*
2568 * Change flags of a file given a path name.
2569 */
2570 /* ARGSUSED */
2571 int
2572 sys_chflags(struct lwp *l, void *v, register_t *retval)
2573 {
2574 struct sys_chflags_args /* {
2575 syscallarg(const char *) path;
2576 syscallarg(u_long) flags;
2577 } */ *uap = v;
2578 struct vnode *vp;
2579 int error;
2580 struct nameidata nd;
2581
2582 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2583 if ((error = namei(&nd)) != 0)
2584 return (error);
2585 vp = nd.ni_vp;
2586 error = change_flags(vp, SCARG(uap, flags), l);
2587 vput(vp);
2588 return (error);
2589 }
2590
2591 /*
2592 * Change flags of a file given a file descriptor.
2593 */
2594 /* ARGSUSED */
2595 int
2596 sys_fchflags(struct lwp *l, void *v, register_t *retval)
2597 {
2598 struct sys_fchflags_args /* {
2599 syscallarg(int) fd;
2600 syscallarg(u_long) flags;
2601 } */ *uap = v;
2602 struct proc *p = l->l_proc;
2603 struct vnode *vp;
2604 struct file *fp;
2605 int error;
2606
2607 /* getvnode() will use the descriptor for us */
2608 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2609 return (error);
2610 vp = (struct vnode *)fp->f_data;
2611 error = change_flags(vp, SCARG(uap, flags), l);
2612 VOP_UNLOCK(vp, 0);
2613 FILE_UNUSE(fp, l);
2614 return (error);
2615 }
2616
2617 /*
2618 * Change flags of a file given a path name; this version does
2619 * not follow links.
2620 */
2621 int
2622 sys_lchflags(struct lwp *l, void *v, register_t *retval)
2623 {
2624 struct sys_lchflags_args /* {
2625 syscallarg(const char *) path;
2626 syscallarg(u_long) flags;
2627 } */ *uap = v;
2628 struct vnode *vp;
2629 int error;
2630 struct nameidata nd;
2631
2632 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2633 if ((error = namei(&nd)) != 0)
2634 return (error);
2635 vp = nd.ni_vp;
2636 error = change_flags(vp, SCARG(uap, flags), l);
2637 vput(vp);
2638 return (error);
2639 }
2640
2641 /*
2642 * Common routine to change flags of a file.
2643 */
2644 int
2645 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2646 {
2647 struct mount *mp;
2648 struct vattr vattr;
2649 int error;
2650
2651 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2652 return (error);
2653 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2654 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2655 /*
2656 * Non-superusers cannot change the flags on devices, even if they
2657 * own them.
2658 */
2659 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2660 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2661 goto out;
2662 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2663 error = EINVAL;
2664 goto out;
2665 }
2666 }
2667 VATTR_NULL(&vattr);
2668 vattr.va_flags = flags;
2669 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2670 out:
2671 vn_finished_write(mp, 0);
2672 return (error);
2673 }
2674
2675 /*
2676 * Change mode of a file given path name; this version follows links.
2677 */
2678 /* ARGSUSED */
2679 int
2680 sys_chmod(struct lwp *l, void *v, register_t *retval)
2681 {
2682 struct sys_chmod_args /* {
2683 syscallarg(const char *) path;
2684 syscallarg(int) mode;
2685 } */ *uap = v;
2686 int error;
2687 struct nameidata nd;
2688
2689 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2690 if ((error = namei(&nd)) != 0)
2691 return (error);
2692
2693 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2694
2695 vrele(nd.ni_vp);
2696 return (error);
2697 }
2698
2699 /*
2700 * Change mode of a file given a file descriptor.
2701 */
2702 /* ARGSUSED */
2703 int
2704 sys_fchmod(struct lwp *l, void *v, register_t *retval)
2705 {
2706 struct sys_fchmod_args /* {
2707 syscallarg(int) fd;
2708 syscallarg(int) mode;
2709 } */ *uap = v;
2710 struct proc *p = l->l_proc;
2711 struct file *fp;
2712 int error;
2713
2714 /* getvnode() will use the descriptor for us */
2715 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2716 return (error);
2717
2718 error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), l);
2719 FILE_UNUSE(fp, l);
2720 return (error);
2721 }
2722
2723 /*
2724 * Change mode of a file given path name; this version does not follow links.
2725 */
2726 /* ARGSUSED */
2727 int
2728 sys_lchmod(struct lwp *l, void *v, register_t *retval)
2729 {
2730 struct sys_lchmod_args /* {
2731 syscallarg(const char *) path;
2732 syscallarg(int) mode;
2733 } */ *uap = v;
2734 int error;
2735 struct nameidata nd;
2736
2737 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2738 if ((error = namei(&nd)) != 0)
2739 return (error);
2740
2741 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2742
2743 vrele(nd.ni_vp);
2744 return (error);
2745 }
2746
2747 /*
2748 * Common routine to set mode given a vnode.
2749 */
2750 static int
2751 change_mode(struct vnode *vp, int mode, struct lwp *l)
2752 {
2753 struct mount *mp;
2754 struct vattr vattr;
2755 int error;
2756
2757 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2758 return (error);
2759 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2760 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2761 VATTR_NULL(&vattr);
2762 vattr.va_mode = mode & ALLPERMS;
2763 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2764 VOP_UNLOCK(vp, 0);
2765 vn_finished_write(mp, 0);
2766 return (error);
2767 }
2768
2769 /*
2770 * Set ownership given a path name; this version follows links.
2771 */
2772 /* ARGSUSED */
2773 int
2774 sys_chown(struct lwp *l, void *v, register_t *retval)
2775 {
2776 struct sys_chown_args /* {
2777 syscallarg(const char *) path;
2778 syscallarg(uid_t) uid;
2779 syscallarg(gid_t) gid;
2780 } */ *uap = v;
2781 int error;
2782 struct nameidata nd;
2783
2784 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2785 if ((error = namei(&nd)) != 0)
2786 return (error);
2787
2788 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2789
2790 vrele(nd.ni_vp);
2791 return (error);
2792 }
2793
2794 /*
2795 * Set ownership given a path name; this version follows links.
2796 * Provides POSIX semantics.
2797 */
2798 /* ARGSUSED */
2799 int
2800 sys___posix_chown(struct lwp *l, void *v, register_t *retval)
2801 {
2802 struct sys_chown_args /* {
2803 syscallarg(const char *) path;
2804 syscallarg(uid_t) uid;
2805 syscallarg(gid_t) gid;
2806 } */ *uap = v;
2807 int error;
2808 struct nameidata nd;
2809
2810 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2811 if ((error = namei(&nd)) != 0)
2812 return (error);
2813
2814 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2815
2816 vrele(nd.ni_vp);
2817 return (error);
2818 }
2819
2820 /*
2821 * Set ownership given a file descriptor.
2822 */
2823 /* ARGSUSED */
2824 int
2825 sys_fchown(struct lwp *l, void *v, register_t *retval)
2826 {
2827 struct sys_fchown_args /* {
2828 syscallarg(int) fd;
2829 syscallarg(uid_t) uid;
2830 syscallarg(gid_t) gid;
2831 } */ *uap = v;
2832 struct proc *p = l->l_proc;
2833 int error;
2834 struct file *fp;
2835
2836 /* getvnode() will use the descriptor for us */
2837 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2838 return (error);
2839
2840 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2841 SCARG(uap, gid), l, 0);
2842 FILE_UNUSE(fp, l);
2843 return (error);
2844 }
2845
2846 /*
2847 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2848 */
2849 /* ARGSUSED */
2850 int
2851 sys___posix_fchown(struct lwp *l, void *v, register_t *retval)
2852 {
2853 struct sys_fchown_args /* {
2854 syscallarg(int) fd;
2855 syscallarg(uid_t) uid;
2856 syscallarg(gid_t) gid;
2857 } */ *uap = v;
2858 struct proc *p = l->l_proc;
2859 int error;
2860 struct file *fp;
2861
2862 /* getvnode() will use the descriptor for us */
2863 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2864 return (error);
2865
2866 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2867 SCARG(uap, gid), l, 1);
2868 FILE_UNUSE(fp, l);
2869 return (error);
2870 }
2871
2872 /*
2873 * Set ownership given a path name; this version does not follow links.
2874 */
2875 /* ARGSUSED */
2876 int
2877 sys_lchown(struct lwp *l, void *v, register_t *retval)
2878 {
2879 struct sys_lchown_args /* {
2880 syscallarg(const char *) path;
2881 syscallarg(uid_t) uid;
2882 syscallarg(gid_t) gid;
2883 } */ *uap = v;
2884 int error;
2885 struct nameidata nd;
2886
2887 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2888 if ((error = namei(&nd)) != 0)
2889 return (error);
2890
2891 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2892
2893 vrele(nd.ni_vp);
2894 return (error);
2895 }
2896
2897 /*
2898 * Set ownership given a path name; this version does not follow links.
2899 * Provides POSIX/XPG semantics.
2900 */
2901 /* ARGSUSED */
2902 int
2903 sys___posix_lchown(struct lwp *l, void *v, register_t *retval)
2904 {
2905 struct sys_lchown_args /* {
2906 syscallarg(const char *) path;
2907 syscallarg(uid_t) uid;
2908 syscallarg(gid_t) gid;
2909 } */ *uap = v;
2910 int error;
2911 struct nameidata nd;
2912
2913 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2914 if ((error = namei(&nd)) != 0)
2915 return (error);
2916
2917 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2918
2919 vrele(nd.ni_vp);
2920 return (error);
2921 }
2922
2923 /*
2924 * Common routine to set ownership given a vnode.
2925 */
2926 static int
2927 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2928 int posix_semantics)
2929 {
2930 struct mount *mp;
2931 struct vattr vattr;
2932 mode_t newmode;
2933 int error;
2934
2935 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2936 return (error);
2937 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2938 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2939 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2940 goto out;
2941
2942 #define CHANGED(x) ((int)(x) != -1)
2943 newmode = vattr.va_mode;
2944 if (posix_semantics) {
2945 /*
2946 * POSIX/XPG semantics: if the caller is not the super-user,
2947 * clear set-user-id and set-group-id bits. Both POSIX and
2948 * the XPG consider the behaviour for calls by the super-user
2949 * implementation-defined; we leave the set-user-id and set-
2950 * group-id settings intact in that case.
2951 */
2952 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2953 NULL) != 0)
2954 newmode &= ~(S_ISUID | S_ISGID);
2955 } else {
2956 /*
2957 * NetBSD semantics: when changing owner and/or group,
2958 * clear the respective bit(s).
2959 */
2960 if (CHANGED(uid))
2961 newmode &= ~S_ISUID;
2962 if (CHANGED(gid))
2963 newmode &= ~S_ISGID;
2964 }
2965 /* Update va_mode iff altered. */
2966 if (vattr.va_mode == newmode)
2967 newmode = VNOVAL;
2968
2969 VATTR_NULL(&vattr);
2970 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2971 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2972 vattr.va_mode = newmode;
2973 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2974 #undef CHANGED
2975
2976 out:
2977 VOP_UNLOCK(vp, 0);
2978 vn_finished_write(mp, 0);
2979 return (error);
2980 }
2981
2982 /*
2983 * Set the access and modification times given a path name; this
2984 * version follows links.
2985 */
2986 /* ARGSUSED */
2987 int
2988 sys_utimes(struct lwp *l, void *v, register_t *retval)
2989 {
2990 struct sys_utimes_args /* {
2991 syscallarg(const char *) path;
2992 syscallarg(const struct timeval *) tptr;
2993 } */ *uap = v;
2994 int error;
2995 struct nameidata nd;
2996
2997 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2998 if ((error = namei(&nd)) != 0)
2999 return (error);
3000
3001 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
3002
3003 vrele(nd.ni_vp);
3004 return (error);
3005 }
3006
3007 /*
3008 * Set the access and modification times given a file descriptor.
3009 */
3010 /* ARGSUSED */
3011 int
3012 sys_futimes(struct lwp *l, void *v, register_t *retval)
3013 {
3014 struct sys_futimes_args /* {
3015 syscallarg(int) fd;
3016 syscallarg(const struct timeval *) tptr;
3017 } */ *uap = v;
3018 struct proc *p = l->l_proc;
3019 int error;
3020 struct file *fp;
3021
3022 /* getvnode() will use the descriptor for us */
3023 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3024 return (error);
3025
3026 error = change_utimes((struct vnode *)fp->f_data, SCARG(uap, tptr), l);
3027 FILE_UNUSE(fp, l);
3028 return (error);
3029 }
3030
3031 /*
3032 * Set the access and modification times given a path name; this
3033 * version does not follow links.
3034 */
3035 /* ARGSUSED */
3036 int
3037 sys_lutimes(struct lwp *l, void *v, register_t *retval)
3038 {
3039 struct sys_lutimes_args /* {
3040 syscallarg(const char *) path;
3041 syscallarg(const struct timeval *) tptr;
3042 } */ *uap = v;
3043 int error;
3044 struct nameidata nd;
3045
3046 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3047 if ((error = namei(&nd)) != 0)
3048 return (error);
3049
3050 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
3051
3052 vrele(nd.ni_vp);
3053 return (error);
3054 }
3055
3056 /*
3057 * Common routine to set access and modification times given a vnode.
3058 */
3059 static int
3060 change_utimes(struct vnode *vp, const struct timeval *tptr, struct lwp *l)
3061 {
3062 struct mount *mp;
3063 struct vattr vattr;
3064 int error;
3065
3066 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
3067 return (error);
3068 VATTR_NULL(&vattr);
3069 if (tptr == NULL) {
3070 nanotime(&vattr.va_atime);
3071 vattr.va_mtime = vattr.va_atime;
3072 vattr.va_vaflags |= VA_UTIMES_NULL;
3073 } else {
3074 struct timeval tv[2];
3075
3076 error = copyin(tptr, tv, sizeof(tv));
3077 if (error)
3078 goto out;
3079 TIMEVAL_TO_TIMESPEC(&tv[0], &vattr.va_atime);
3080 TIMEVAL_TO_TIMESPEC(&tv[1], &vattr.va_mtime);
3081 }
3082 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3083 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3084 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
3085 VOP_UNLOCK(vp, 0);
3086 out:
3087 vn_finished_write(mp, 0);
3088 return (error);
3089 }
3090
3091 /*
3092 * Truncate a file given its path name.
3093 */
3094 /* ARGSUSED */
3095 int
3096 sys_truncate(struct lwp *l, void *v, register_t *retval)
3097 {
3098 struct sys_truncate_args /* {
3099 syscallarg(const char *) path;
3100 syscallarg(int) pad;
3101 syscallarg(off_t) length;
3102 } */ *uap = v;
3103 struct vnode *vp;
3104 struct mount *mp;
3105 struct vattr vattr;
3106 int error;
3107 struct nameidata nd;
3108
3109 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3110 if ((error = namei(&nd)) != 0)
3111 return (error);
3112 vp = nd.ni_vp;
3113 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3114 vrele(vp);
3115 return (error);
3116 }
3117 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3118 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3119 if (vp->v_type == VDIR)
3120 error = EISDIR;
3121 else if ((error = vn_writechk(vp)) == 0 &&
3122 (error = VOP_ACCESS(vp, VWRITE, l->l_cred, l)) == 0) {
3123 VATTR_NULL(&vattr);
3124 vattr.va_size = SCARG(uap, length);
3125 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
3126 }
3127 vput(vp);
3128 vn_finished_write(mp, 0);
3129 return (error);
3130 }
3131
3132 /*
3133 * Truncate a file given a file descriptor.
3134 */
3135 /* ARGSUSED */
3136 int
3137 sys_ftruncate(struct lwp *l, void *v, register_t *retval)
3138 {
3139 struct sys_ftruncate_args /* {
3140 syscallarg(int) fd;
3141 syscallarg(int) pad;
3142 syscallarg(off_t) length;
3143 } */ *uap = v;
3144 struct proc *p = l->l_proc;
3145 struct mount *mp;
3146 struct vattr vattr;
3147 struct vnode *vp;
3148 struct file *fp;
3149 int error;
3150
3151 /* getvnode() will use the descriptor for us */
3152 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3153 return (error);
3154 if ((fp->f_flag & FWRITE) == 0) {
3155 error = EINVAL;
3156 goto out;
3157 }
3158 vp = (struct vnode *)fp->f_data;
3159 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3160 FILE_UNUSE(fp, l);
3161 return (error);
3162 }
3163 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3164 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3165 if (vp->v_type == VDIR)
3166 error = EISDIR;
3167 else if ((error = vn_writechk(vp)) == 0) {
3168 VATTR_NULL(&vattr);
3169 vattr.va_size = SCARG(uap, length);
3170 error = VOP_SETATTR(vp, &vattr, fp->f_cred, l);
3171 }
3172 VOP_UNLOCK(vp, 0);
3173 vn_finished_write(mp, 0);
3174 out:
3175 FILE_UNUSE(fp, l);
3176 return (error);
3177 }
3178
3179 /*
3180 * Sync an open file.
3181 */
3182 /* ARGSUSED */
3183 int
3184 sys_fsync(struct lwp *l, void *v, register_t *retval)
3185 {
3186 struct sys_fsync_args /* {
3187 syscallarg(int) fd;
3188 } */ *uap = v;
3189 struct proc *p = l->l_proc;
3190 struct vnode *vp;
3191 struct mount *mp;
3192 struct file *fp;
3193 int error;
3194
3195 /* getvnode() will use the descriptor for us */
3196 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3197 return (error);
3198 vp = (struct vnode *)fp->f_data;
3199 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3200 FILE_UNUSE(fp, l);
3201 return (error);
3202 }
3203 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3204 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, l);
3205 if (error == 0 && bioops.io_fsync != NULL &&
3206 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3207 (*bioops.io_fsync)(vp, 0);
3208 VOP_UNLOCK(vp, 0);
3209 vn_finished_write(mp, 0);
3210 FILE_UNUSE(fp, l);
3211 return (error);
3212 }
3213
3214 /*
3215 * Sync a range of file data. API modeled after that found in AIX.
3216 *
3217 * FDATASYNC indicates that we need only save enough metadata to be able
3218 * to re-read the written data. Note we duplicate AIX's requirement that
3219 * the file be open for writing.
3220 */
3221 /* ARGSUSED */
3222 int
3223 sys_fsync_range(struct lwp *l, void *v, register_t *retval)
3224 {
3225 struct sys_fsync_range_args /* {
3226 syscallarg(int) fd;
3227 syscallarg(int) flags;
3228 syscallarg(off_t) start;
3229 syscallarg(off_t) length;
3230 } */ *uap = v;
3231 struct proc *p = l->l_proc;
3232 struct vnode *vp;
3233 struct file *fp;
3234 int flags, nflags;
3235 off_t s, e, len;
3236 int error;
3237
3238 /* getvnode() will use the descriptor for us */
3239 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3240 return (error);
3241
3242 if ((fp->f_flag & FWRITE) == 0) {
3243 error = EBADF;
3244 goto out;
3245 }
3246
3247 flags = SCARG(uap, flags);
3248 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3249 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3250 error = EINVAL;
3251 goto out;
3252 }
3253 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3254 if (flags & FDATASYNC)
3255 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3256 else
3257 nflags = FSYNC_WAIT;
3258 if (flags & FDISKSYNC)
3259 nflags |= FSYNC_CACHE;
3260
3261 len = SCARG(uap, length);
3262 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3263 if (len) {
3264 s = SCARG(uap, start);
3265 e = s + len;
3266 if (e < s) {
3267 error = EINVAL;
3268 goto out;
3269 }
3270 } else {
3271 e = 0;
3272 s = 0;
3273 }
3274
3275 vp = (struct vnode *)fp->f_data;
3276 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3277 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, l);
3278
3279 if (error == 0 && bioops.io_fsync != NULL &&
3280 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3281 (*bioops.io_fsync)(vp, nflags);
3282
3283 VOP_UNLOCK(vp, 0);
3284 out:
3285 FILE_UNUSE(fp, l);
3286 return (error);
3287 }
3288
3289 /*
3290 * Sync the data of an open file.
3291 */
3292 /* ARGSUSED */
3293 int
3294 sys_fdatasync(struct lwp *l, void *v, register_t *retval)
3295 {
3296 struct sys_fdatasync_args /* {
3297 syscallarg(int) fd;
3298 } */ *uap = v;
3299 struct proc *p = l->l_proc;
3300 struct vnode *vp;
3301 struct file *fp;
3302 int error;
3303
3304 /* getvnode() will use the descriptor for us */
3305 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3306 return (error);
3307 if ((fp->f_flag & FWRITE) == 0) {
3308 FILE_UNUSE(fp, l);
3309 return (EBADF);
3310 }
3311 vp = (struct vnode *)fp->f_data;
3312 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3313 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, l);
3314 VOP_UNLOCK(vp, 0);
3315 FILE_UNUSE(fp, l);
3316 return (error);
3317 }
3318
3319 /*
3320 * Rename files, (standard) BSD semantics frontend.
3321 */
3322 /* ARGSUSED */
3323 int
3324 sys_rename(struct lwp *l, void *v, register_t *retval)
3325 {
3326 struct sys_rename_args /* {
3327 syscallarg(const char *) from;
3328 syscallarg(const char *) to;
3329 } */ *uap = v;
3330
3331 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 0));
3332 }
3333
3334 /*
3335 * Rename files, POSIX semantics frontend.
3336 */
3337 /* ARGSUSED */
3338 int
3339 sys___posix_rename(struct lwp *l, void *v, register_t *retval)
3340 {
3341 struct sys___posix_rename_args /* {
3342 syscallarg(const char *) from;
3343 syscallarg(const char *) to;
3344 } */ *uap = v;
3345
3346 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 1));
3347 }
3348
3349 /*
3350 * Rename files. Source and destination must either both be directories,
3351 * or both not be directories. If target is a directory, it must be empty.
3352 * If `from' and `to' refer to the same object, the value of the `retain'
3353 * argument is used to determine whether `from' will be
3354 *
3355 * (retain == 0) deleted unless `from' and `to' refer to the same
3356 * object in the file system's name space (BSD).
3357 * (retain == 1) always retained (POSIX).
3358 */
3359 static int
3360 rename_files(const char *from, const char *to, struct lwp *l, int retain)
3361 {
3362 struct mount *mp = NULL;
3363 struct vnode *tvp, *fvp, *tdvp;
3364 struct nameidata fromnd, tond;
3365 struct proc *p;
3366 int error;
3367
3368 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART, UIO_USERSPACE,
3369 from, l);
3370 if ((error = namei(&fromnd)) != 0)
3371 return (error);
3372 if (fromnd.ni_dvp != fromnd.ni_vp)
3373 VOP_UNLOCK(fromnd.ni_dvp, 0);
3374 fvp = fromnd.ni_vp;
3375 error = vn_start_write(fvp, &mp, V_WAIT | V_PCATCH);
3376 if (error != 0) {
3377 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3378 vrele(fromnd.ni_dvp);
3379 vrele(fvp);
3380 if (fromnd.ni_startdir)
3381 vrele(fromnd.ni_startdir);
3382 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3383 return (error);
3384 }
3385 NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3386 (fvp->v_type == VDIR ? CREATEDIR : 0), UIO_USERSPACE, to, l);
3387 if ((error = namei(&tond)) != 0) {
3388 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3389 vrele(fromnd.ni_dvp);
3390 vrele(fvp);
3391 goto out1;
3392 }
3393 tdvp = tond.ni_dvp;
3394 tvp = tond.ni_vp;
3395
3396 if (tvp != NULL) {
3397 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3398 error = ENOTDIR;
3399 goto out;
3400 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3401 error = EISDIR;
3402 goto out;
3403 }
3404 }
3405
3406 if (fvp == tdvp)
3407 error = EINVAL;
3408
3409 /*
3410 * Source and destination refer to the same object.
3411 */
3412 if (fvp == tvp) {
3413 if (retain)
3414 error = -1;
3415 else if (fromnd.ni_dvp == tdvp &&
3416 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3417 !memcmp(fromnd.ni_cnd.cn_nameptr,
3418 tond.ni_cnd.cn_nameptr,
3419 fromnd.ni_cnd.cn_namelen))
3420 error = -1;
3421 }
3422
3423 #if NVERIEXEC > 0
3424 if (!error) {
3425 pathname_t frompath, topath;
3426
3427 error = pathname_get(fromnd.ni_dirp, fromnd.ni_segflg,
3428 &frompath);
3429 if (!error)
3430 error = pathname_get(tond.ni_dirp, tond.ni_segflg,
3431 &topath);
3432 if (!error)
3433 error = veriexec_renamechk(fvp, pathname_path(frompath),
3434 tvp, pathname_path(topath), l);
3435
3436 pathname_put(frompath);
3437 pathname_put(topath);
3438 }
3439 #endif /* NVERIEXEC > 0 */
3440
3441 out:
3442 p = l->l_proc;
3443 if (!error) {
3444 VOP_LEASE(tdvp, l, l->l_cred, LEASE_WRITE);
3445 if (fromnd.ni_dvp != tdvp)
3446 VOP_LEASE(fromnd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3447 if (tvp) {
3448 VOP_LEASE(tvp, l, l->l_cred, LEASE_WRITE);
3449 }
3450 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3451 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3452 } else {
3453 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3454 if (tdvp == tvp)
3455 vrele(tdvp);
3456 else
3457 vput(tdvp);
3458 if (tvp)
3459 vput(tvp);
3460 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3461 vrele(fromnd.ni_dvp);
3462 vrele(fvp);
3463 }
3464 vrele(tond.ni_startdir);
3465 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3466 out1:
3467 vn_finished_write(mp, 0);
3468 if (fromnd.ni_startdir)
3469 vrele(fromnd.ni_startdir);
3470 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3471 return (error == -1 ? 0 : error);
3472 }
3473
3474 /*
3475 * Make a directory file.
3476 */
3477 /* ARGSUSED */
3478 int
3479 sys_mkdir(struct lwp *l, void *v, register_t *retval)
3480 {
3481 struct sys_mkdir_args /* {
3482 syscallarg(const char *) path;
3483 syscallarg(int) mode;
3484 } */ *uap = v;
3485 struct proc *p = l->l_proc;
3486 struct mount *mp;
3487 struct vnode *vp;
3488 struct vattr vattr;
3489 int error;
3490 struct nameidata nd;
3491
3492 restart:
3493 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR, UIO_USERSPACE,
3494 SCARG(uap, path), l);
3495 if ((error = namei(&nd)) != 0)
3496 return (error);
3497 vp = nd.ni_vp;
3498 if (vp != NULL) {
3499 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3500 if (nd.ni_dvp == vp)
3501 vrele(nd.ni_dvp);
3502 else
3503 vput(nd.ni_dvp);
3504 vrele(vp);
3505 return (EEXIST);
3506 }
3507 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3508 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3509 if (nd.ni_dvp == vp)
3510 vrele(nd.ni_dvp);
3511 else
3512 vput(nd.ni_dvp);
3513 if ((error = vn_start_write(NULL, &mp,
3514 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
3515 return (error);
3516 goto restart;
3517 }
3518 VATTR_NULL(&vattr);
3519 vattr.va_type = VDIR;
3520 vattr.va_mode =
3521 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3522 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3523 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3524 if (!error)
3525 vput(nd.ni_vp);
3526 vn_finished_write(mp, 0);
3527 return (error);
3528 }
3529
3530 /*
3531 * Remove a directory file.
3532 */
3533 /* ARGSUSED */
3534 int
3535 sys_rmdir(struct lwp *l, void *v, register_t *retval)
3536 {
3537 struct sys_rmdir_args /* {
3538 syscallarg(const char *) path;
3539 } */ *uap = v;
3540 struct mount *mp;
3541 struct vnode *vp;
3542 int error;
3543 struct nameidata nd;
3544
3545 restart:
3546 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3547 SCARG(uap, path), l);
3548 if ((error = namei(&nd)) != 0)
3549 return (error);
3550 vp = nd.ni_vp;
3551 if (vp->v_type != VDIR) {
3552 error = ENOTDIR;
3553 goto out;
3554 }
3555 /*
3556 * No rmdir "." please.
3557 */
3558 if (nd.ni_dvp == vp) {
3559 error = EINVAL;
3560 goto out;
3561 }
3562 /*
3563 * The root of a mounted filesystem cannot be deleted.
3564 */
3565 if (vp->v_flag & VROOT) {
3566 error = EBUSY;
3567 goto out;
3568 }
3569 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3570 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3571 if (nd.ni_dvp == vp)
3572 vrele(nd.ni_dvp);
3573 else
3574 vput(nd.ni_dvp);
3575 vput(vp);
3576 if ((error = vn_start_write(NULL, &mp,
3577 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
3578 return (error);
3579 goto restart;
3580 }
3581 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3582 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3583 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3584 vn_finished_write(mp, 0);
3585 return (error);
3586
3587 out:
3588 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3589 if (nd.ni_dvp == vp)
3590 vrele(nd.ni_dvp);
3591 else
3592 vput(nd.ni_dvp);
3593 vput(vp);
3594 return (error);
3595 }
3596
3597 /*
3598 * Read a block of directory entries in a file system independent format.
3599 */
3600 int
3601 sys___getdents30(struct lwp *l, void *v, register_t *retval)
3602 {
3603 struct sys___getdents30_args /* {
3604 syscallarg(int) fd;
3605 syscallarg(char *) buf;
3606 syscallarg(size_t) count;
3607 } */ *uap = v;
3608 struct proc *p = l->l_proc;
3609 struct file *fp;
3610 int error, done;
3611
3612 /* getvnode() will use the descriptor for us */
3613 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3614 return (error);
3615 if ((fp->f_flag & FREAD) == 0) {
3616 error = EBADF;
3617 goto out;
3618 }
3619 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3620 SCARG(uap, count), &done, l, 0, 0);
3621 #ifdef KTRACE
3622 if (!error && KTRPOINT(p, KTR_GENIO)) {
3623 struct iovec iov;
3624 iov.iov_base = SCARG(uap, buf);
3625 iov.iov_len = done;
3626 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, done, 0);
3627 }
3628 #endif
3629 *retval = done;
3630 out:
3631 FILE_UNUSE(fp, l);
3632 return (error);
3633 }
3634
3635 /*
3636 * Set the mode mask for creation of filesystem nodes.
3637 */
3638 int
3639 sys_umask(struct lwp *l, void *v, register_t *retval)
3640 {
3641 struct sys_umask_args /* {
3642 syscallarg(mode_t) newmask;
3643 } */ *uap = v;
3644 struct proc *p = l->l_proc;
3645 struct cwdinfo *cwdi;
3646
3647 cwdi = p->p_cwdi;
3648 *retval = cwdi->cwdi_cmask;
3649 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3650 return (0);
3651 }
3652
3653 /*
3654 * Void all references to file by ripping underlying filesystem
3655 * away from vnode.
3656 */
3657 /* ARGSUSED */
3658 int
3659 sys_revoke(struct lwp *l, void *v, register_t *retval)
3660 {
3661 struct sys_revoke_args /* {
3662 syscallarg(const char *) path;
3663 } */ *uap = v;
3664 struct mount *mp;
3665 struct vnode *vp;
3666 struct vattr vattr;
3667 int error;
3668 struct nameidata nd;
3669
3670 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3671 if ((error = namei(&nd)) != 0)
3672 return (error);
3673 vp = nd.ni_vp;
3674 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
3675 goto out;
3676 if (kauth_cred_geteuid(l->l_cred) != vattr.va_uid &&
3677 (error = kauth_authorize_generic(l->l_cred,
3678 KAUTH_GENERIC_ISSUSER, NULL)) != 0)
3679 goto out;
3680 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
3681 goto out;
3682 if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER)))
3683 VOP_REVOKE(vp, REVOKEALL);
3684 vn_finished_write(mp, 0);
3685 out:
3686 vrele(vp);
3687 return (error);
3688 }
3689
3690 /*
3691 * Convert a user file descriptor to a kernel file entry.
3692 */
3693 int
3694 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
3695 {
3696 struct vnode *vp;
3697 struct file *fp;
3698
3699 if ((fp = fd_getfile(fdp, fd)) == NULL)
3700 return (EBADF);
3701
3702 FILE_USE(fp);
3703
3704 if (fp->f_type != DTYPE_VNODE) {
3705 FILE_UNUSE(fp, NULL);
3706 return (EINVAL);
3707 }
3708
3709 vp = (struct vnode *)fp->f_data;
3710 if (vp->v_type == VBAD) {
3711 FILE_UNUSE(fp, NULL);
3712 return (EBADF);
3713 }
3714
3715 *fpp = fp;
3716 return (0);
3717 }
3718