vfs_syscalls.c revision 1.303 1 /* $NetBSD: vfs_syscalls.c,v 1.303 2007/02/28 20:39:06 pooka Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.303 2007/02/28 20:39:06 pooka Exp $");
41
42 #include "opt_compat_netbsd.h"
43 #include "opt_compat_43.h"
44 #include "opt_fileassoc.h"
45 #include "opt_ktrace.h"
46 #include "fss.h"
47 #include "veriexec.h"
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/namei.h>
52 #include <sys/filedesc.h>
53 #include <sys/kernel.h>
54 #include <sys/file.h>
55 #include <sys/stat.h>
56 #include <sys/vnode.h>
57 #include <sys/mount.h>
58 #include <sys/proc.h>
59 #include <sys/uio.h>
60 #include <sys/malloc.h>
61 #include <sys/kmem.h>
62 #include <sys/dirent.h>
63 #include <sys/sysctl.h>
64 #include <sys/syscallargs.h>
65 #ifdef KTRACE
66 #include <sys/ktrace.h>
67 #endif
68 #ifdef FILEASSOC
69 #include <sys/fileassoc.h>
70 #endif /* FILEASSOC */
71 #if NVERIEXEC > 0
72 #include <sys/verified_exec.h>
73 #include <sys/syslog.h>
74 #endif /* NVERIEXEC > 0 */
75 #include <sys/kauth.h>
76
77 #include <miscfs/genfs/genfs.h>
78 #include <miscfs/syncfs/syncfs.h>
79
80 #ifdef COMPAT_30
81 #include "opt_nfsserver.h"
82 #include <nfs/rpcv2.h>
83 #endif
84 #include <nfs/nfsproto.h>
85 #ifdef COMPAT_30
86 #include <nfs/nfs.h>
87 #include <nfs/nfs_var.h>
88 #endif
89
90 #if NFSS > 0
91 #include <dev/fssvar.h>
92 #endif
93
94 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
95
96 static int change_dir(struct nameidata *, struct lwp *);
97 static int change_flags(struct vnode *, u_long, struct lwp *);
98 static int change_mode(struct vnode *, int, struct lwp *l);
99 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
100 static int change_utimes(struct vnode *vp, const struct timeval *,
101 struct lwp *l);
102 static int rename_files(const char *, const char *, struct lwp *, int);
103
104 void checkdirs(struct vnode *);
105
106 static int mount_update(struct lwp *, struct vnode *, const char *, int,
107 void *, struct nameidata *);
108 static int mount_domount(struct lwp *, struct vnode *, const char *,
109 const char *, int, void *, struct nameidata *);
110 static int mount_getargs(struct lwp *, struct vnode *, const char *, int,
111 void *, struct nameidata *);
112
113 int dovfsusermount = 0;
114
115 /*
116 * Virtual File System System Calls
117 */
118
119 /*
120 * Mount a file system.
121 */
122
123 #if defined(COMPAT_09) || defined(COMPAT_43)
124 /*
125 * This table is used to maintain compatibility with 4.3BSD
126 * and NetBSD 0.9 mount syscalls. Note, the order is important!
127 *
128 * Do not modify this table. It should only contain filesystems
129 * supported by NetBSD 0.9 and 4.3BSD.
130 */
131 const char * const mountcompatnames[] = {
132 NULL, /* 0 = MOUNT_NONE */
133 MOUNT_FFS, /* 1 = MOUNT_UFS */
134 MOUNT_NFS, /* 2 */
135 MOUNT_MFS, /* 3 */
136 MOUNT_MSDOS, /* 4 */
137 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
138 MOUNT_FDESC, /* 6 */
139 MOUNT_KERNFS, /* 7 */
140 NULL, /* 8 = MOUNT_DEVFS */
141 MOUNT_AFS, /* 9 */
142 };
143 const int nmountcompatnames = sizeof(mountcompatnames) /
144 sizeof(mountcompatnames[0]);
145 #endif /* COMPAT_09 || COMPAT_43 */
146
147 static int
148 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
149 void *data, struct nameidata *ndp)
150 {
151 struct mount *mp;
152 int error = 0, saved_flags;
153
154 mp = vp->v_mount;
155 saved_flags = mp->mnt_flag;
156
157 /* We can't operate on VROOT here. */
158 if ((vp->v_flag & VROOT) == 0) {
159 vput(vp);
160 error = EINVAL;
161 goto out;
162 }
163
164 /*
165 * We only allow the filesystem to be reloaded if it
166 * is currently mounted read-only.
167 */
168 if (flags & MNT_RELOAD && !(mp->mnt_flag & MNT_RDONLY)) {
169 error = EOPNOTSUPP; /* Needs translation */
170 goto out;
171 }
172
173 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
174 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
175 if (error)
176 goto out;
177
178 if (vfs_busy(mp, LK_NOWAIT, 0)) {
179 error = EPERM;
180 goto out;
181 }
182
183 mp->mnt_flag &= ~MNT_OP_FLAGS;
184 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
185
186 /*
187 * Set the mount level flags.
188 */
189 if (flags & MNT_RDONLY)
190 mp->mnt_flag |= MNT_RDONLY;
191 else if (mp->mnt_flag & MNT_RDONLY)
192 mp->mnt_iflag |= IMNT_WANTRDWR;
193 mp->mnt_flag &=
194 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
195 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
196 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
197 mp->mnt_flag |= flags &
198 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
199 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
200 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
201 MNT_IGNORE);
202
203 error = VFS_MOUNT(mp, path, data, ndp, l);
204
205 #if defined(COMPAT_30) && defined(NFSSERVER)
206 if (error) {
207 int error2;
208
209 /* Update failed; let's try and see if it was an
210 * export request. */
211 error2 = nfs_update_exports_30(mp, path, data, l);
212
213 /* Only update error code if the export request was
214 * understood but some problem occurred while
215 * processing it. */
216 if (error2 != EJUSTRETURN)
217 error = error2;
218 }
219 #endif
220 if (mp->mnt_iflag & IMNT_WANTRDWR)
221 mp->mnt_flag &= ~MNT_RDONLY;
222 if (error)
223 mp->mnt_flag = saved_flags;
224 mp->mnt_flag &= ~MNT_OP_FLAGS;
225 mp->mnt_iflag &= ~IMNT_WANTRDWR;
226 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
227 if (mp->mnt_syncer == NULL)
228 error = vfs_allocate_syncvnode(mp);
229 } else {
230 if (mp->mnt_syncer != NULL)
231 vfs_deallocate_syncvnode(mp);
232 }
233 vfs_unbusy(mp);
234
235 out:
236 return (error);
237 }
238
239 static int
240 mount_domount(struct lwp *l, struct vnode *vp, const char *fstype,
241 const char *path, int flags, void *data, struct nameidata *ndp)
242 {
243 struct mount *mp = NULL;
244 struct vattr va;
245 char fstypename[MFSNAMELEN];
246 int error;
247
248 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
249 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
250 if (error) {
251 vput(vp);
252 goto out;
253 }
254
255 /* Can't make a non-dir a mount-point (from here anyway). */
256 if (vp->v_type != VDIR) {
257 error = ENOTDIR;
258 vput(vp);
259 goto out;
260 }
261
262 /*
263 * If the user is not root, ensure that they own the directory
264 * onto which we are attempting to mount.
265 */
266 if ((error = VOP_GETATTR(vp, &va, l->l_cred, l)) != 0 ||
267 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
268 (error = kauth_authorize_generic(l->l_cred,
269 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) {
270 vput(vp);
271 goto out;
272 }
273
274 if (flags & MNT_EXPORTED) {
275 error = EINVAL;
276 vput(vp);
277 goto out;
278 }
279
280 /*
281 * Copy file-system type from userspace.
282 */
283 error = copyinstr(fstype, fstypename, MFSNAMELEN, NULL);
284 if (error) {
285 #if defined(COMPAT_09) || defined(COMPAT_43)
286 /*
287 * Historically, filesystem types were identified by numbers.
288 * If we get an integer for the filesystem type instead of a
289 * string, we check to see if it matches one of the historic
290 * filesystem types.
291 */
292 u_long fsindex = (u_long)fstype;
293 if (fsindex >= nmountcompatnames ||
294 mountcompatnames[fsindex] == NULL) {
295 error = ENODEV;
296 vput(vp);
297 goto out;
298 }
299 strlcpy(fstypename, mountcompatnames[fsindex], sizeof(fstypename));
300 #else
301 vput(vp);
302 goto out;
303 #endif
304 }
305
306 #ifdef COMPAT_10
307 /* Accept `ufs' as an alias for `ffs'. */
308 if (strncmp(fstypename, "ufs", MFSNAMELEN) == 0)
309 strlcpy(fstypename, "ffs", sizeof(fstypename));
310 #endif
311
312 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) {
313 vput(vp);
314 goto out;
315 }
316
317 /*
318 * Check if a file-system is not already mounted on this vnode.
319 */
320 if (vp->v_mountedhere != NULL) {
321 error = EBUSY;
322 vput(vp);
323 goto out;
324 }
325
326 mp = malloc(sizeof(*mp), M_MOUNT, M_WAITOK|M_ZERO);
327
328 if ((mp->mnt_op = vfs_getopsbyname(fstypename)) == NULL) {
329 free(mp, M_MOUNT);
330 error = ENODEV;
331 vput(vp);
332 goto out;
333 }
334
335 TAILQ_INIT(&mp->mnt_vnodelist);
336 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
337 simple_lock_init(&mp->mnt_slock);
338 (void)vfs_busy(mp, LK_NOWAIT, 0);
339
340 mp->mnt_op->vfs_refcount++;
341 mp->mnt_vnodecovered = vp;
342 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
343 mp->mnt_unmounter = NULL;
344 mp->mnt_leaf = mp;
345 mount_initspecific(mp);
346
347 /*
348 * The underlying file system may refuse the mount for
349 * various reasons. Allow the user to force it to happen.
350 *
351 * Set the mount level flags.
352 */
353 mp->mnt_flag = flags &
354 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
355 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
356 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
357 MNT_IGNORE | MNT_RDONLY);
358
359 error = VFS_MOUNT(mp, path, data, ndp, l);
360 mp->mnt_flag &= ~MNT_OP_FLAGS;
361
362 /*
363 * Put the new filesystem on the mount list after root.
364 */
365 cache_purge(vp);
366 if (!error) {
367 mp->mnt_iflag &= ~IMNT_WANTRDWR;
368 vp->v_mountedhere = mp;
369 simple_lock(&mountlist_slock);
370 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
371 simple_unlock(&mountlist_slock);
372 VOP_UNLOCK(vp, 0);
373 checkdirs(vp);
374 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
375 error = vfs_allocate_syncvnode(mp);
376 vfs_unbusy(mp);
377 (void) VFS_STATVFS(mp, &mp->mnt_stat, l);
378 error = VFS_START(mp, 0, l);
379 if (error)
380 vrele(vp);
381 } else {
382 vp->v_mountedhere = NULL;
383 mp->mnt_op->vfs_refcount--;
384 vfs_unbusy(mp);
385 free(mp, M_MOUNT);
386 vput(vp);
387 }
388
389 out:
390 return (error);
391 }
392
393 static int
394 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
395 void *data, struct nameidata *ndp)
396 {
397 struct mount *mp;
398 int error;
399
400 /* If MNT_GETARGS is specified, it should be the only flag. */
401 if (flags & ~MNT_GETARGS) {
402 error = EINVAL;
403 goto out;
404 }
405
406 mp = vp->v_mount;
407
408 /* XXX: probably some notion of "can see" here if we want isolation. */
409 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
410 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
411 if (error)
412 goto out;
413
414 if ((vp->v_flag & VROOT) == 0) {
415 error = EINVAL;
416 goto out;
417 }
418
419 if (vfs_busy(mp, LK_NOWAIT, 0)) {
420 error = EPERM;
421 goto out;
422 }
423
424 mp->mnt_flag &= ~MNT_OP_FLAGS;
425 mp->mnt_flag |= MNT_GETARGS;
426 error = VFS_MOUNT(mp, path, data, ndp, l);
427 mp->mnt_flag &= ~MNT_OP_FLAGS;
428
429 vfs_unbusy(mp);
430 out:
431 return (error);
432 }
433
434 /* ARGSUSED */
435 int
436 sys_mount(struct lwp *l, void *v, register_t *retval)
437 {
438 struct sys_mount_args /* {
439 syscallarg(const char *) type;
440 syscallarg(const char *) path;
441 syscallarg(int) flags;
442 syscallarg(void *) data;
443 } */ *uap = v;
444 struct vnode *vp;
445 struct nameidata nd;
446 int error;
447
448 /*
449 * Get vnode to be covered
450 */
451 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
452 SCARG(uap, path), l);
453 if ((error = namei(&nd)) != 0)
454 return (error);
455 vp = nd.ni_vp;
456
457 /*
458 * A lookup in VFS_MOUNT might result in an attempt to
459 * lock this vnode again, so make the lock recursive.
460 */
461 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
462
463 if (SCARG(uap, flags) & MNT_GETARGS) {
464 error = mount_getargs(l, vp, SCARG(uap, path),
465 SCARG(uap, flags), SCARG(uap, data), &nd);
466 vput(vp);
467 } else if (SCARG(uap, flags) & MNT_UPDATE) {
468 error = mount_update(l, vp, SCARG(uap, path),
469 SCARG(uap, flags), SCARG(uap, data), &nd);
470 vput(vp);
471 } else {
472 /* Locking is handled internally in mount_domount(). */
473 error = mount_domount(l, vp, SCARG(uap, type),
474 SCARG(uap, path), SCARG(uap, flags), SCARG(uap, data), &nd);
475 }
476
477 return (error);
478 }
479
480 /*
481 * Scan all active processes to see if any of them have a current
482 * or root directory onto which the new filesystem has just been
483 * mounted. If so, replace them with the new mount point.
484 */
485 void
486 checkdirs(struct vnode *olddp)
487 {
488 struct cwdinfo *cwdi;
489 struct vnode *newdp;
490 struct proc *p;
491
492 if (olddp->v_usecount == 1)
493 return;
494 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
495 panic("mount: lost mount");
496 rw_enter(&proclist_lock, RW_READER);
497 PROCLIST_FOREACH(p, &allproc) {
498 cwdi = p->p_cwdi;
499 if (!cwdi)
500 continue;
501 if (cwdi->cwdi_cdir == olddp) {
502 vrele(cwdi->cwdi_cdir);
503 VREF(newdp);
504 cwdi->cwdi_cdir = newdp;
505 }
506 if (cwdi->cwdi_rdir == olddp) {
507 vrele(cwdi->cwdi_rdir);
508 VREF(newdp);
509 cwdi->cwdi_rdir = newdp;
510 }
511 }
512 rw_exit(&proclist_lock);
513 if (rootvnode == olddp) {
514 vrele(rootvnode);
515 VREF(newdp);
516 rootvnode = newdp;
517 }
518 vput(newdp);
519 }
520
521 /*
522 * Unmount a file system.
523 *
524 * Note: unmount takes a path to the vnode mounted on as argument,
525 * not special file (as before).
526 */
527 /* ARGSUSED */
528 int
529 sys_unmount(struct lwp *l, void *v, register_t *retval)
530 {
531 struct sys_unmount_args /* {
532 syscallarg(const char *) path;
533 syscallarg(int) flags;
534 } */ *uap = v;
535 struct vnode *vp;
536 struct mount *mp;
537 int error;
538 struct nameidata nd;
539
540 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
541 SCARG(uap, path), l);
542 if ((error = namei(&nd)) != 0)
543 return (error);
544 vp = nd.ni_vp;
545 mp = vp->v_mount;
546
547 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
548 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
549 if (error) {
550 vput(vp);
551 return (error);
552 }
553
554 /*
555 * Don't allow unmounting the root file system.
556 */
557 if (mp->mnt_flag & MNT_ROOTFS) {
558 vput(vp);
559 return (EINVAL);
560 }
561
562 /*
563 * Must be the root of the filesystem
564 */
565 if ((vp->v_flag & VROOT) == 0) {
566 vput(vp);
567 return (EINVAL);
568 }
569 vput(vp);
570
571 /*
572 * XXX Freeze syncer. Must do this before locking the
573 * mount point. See dounmount() for details.
574 */
575 mutex_enter(&syncer_mutex);
576
577 if (vfs_busy(mp, 0, 0)) {
578 mutex_exit(&syncer_mutex);
579 return (EBUSY);
580 }
581
582 return (dounmount(mp, SCARG(uap, flags), l));
583 }
584
585 /*
586 * Do the actual file system unmount. File system is assumed to have been
587 * marked busy by the caller.
588 */
589 int
590 dounmount(struct mount *mp, int flags, struct lwp *l)
591 {
592 struct vnode *coveredvp;
593 int error;
594 int async;
595 int used_syncer;
596
597 #if NVERIEXEC > 0
598 error = veriexec_unmountchk(mp);
599 if (error)
600 return (error);
601 #endif /* NVERIEXEC > 0 */
602
603 simple_lock(&mountlist_slock);
604 vfs_unbusy(mp);
605 used_syncer = (mp->mnt_syncer != NULL);
606
607 /*
608 * XXX Syncer must be frozen when we get here. This should really
609 * be done on a per-mountpoint basis, but especially the softdep
610 * code possibly called from the syncer doesn't exactly work on a
611 * per-mountpoint basis, so the softdep code would become a maze
612 * of vfs_busy() calls.
613 *
614 * The caller of dounmount() must acquire syncer_mutex because
615 * the syncer itself acquires locks in syncer_mutex -> vfs_busy
616 * order, and we must preserve that order to avoid deadlock.
617 *
618 * So, if the file system did not use the syncer, now is
619 * the time to release the syncer_mutex.
620 */
621 if (used_syncer == 0)
622 mutex_exit(&syncer_mutex);
623
624 mp->mnt_iflag |= IMNT_UNMOUNT;
625 mp->mnt_unmounter = l;
626 lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock);
627 vn_start_write(NULL, &mp, V_WAIT);
628
629 async = mp->mnt_flag & MNT_ASYNC;
630 mp->mnt_flag &= ~MNT_ASYNC;
631 cache_purgevfs(mp); /* remove cache entries for this file sys */
632 if (mp->mnt_syncer != NULL)
633 vfs_deallocate_syncvnode(mp);
634 error = 0;
635 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
636 #if NFSS > 0
637 error = fss_umount_hook(mp, (flags & MNT_FORCE));
638 #endif
639 if (error == 0)
640 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred, l);
641 }
642 if (error == 0 || (flags & MNT_FORCE))
643 error = VFS_UNMOUNT(mp, flags, l);
644 vn_finished_write(mp, 0);
645 simple_lock(&mountlist_slock);
646 if (error) {
647 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
648 (void) vfs_allocate_syncvnode(mp);
649 mp->mnt_iflag &= ~IMNT_UNMOUNT;
650 mp->mnt_unmounter = NULL;
651 mp->mnt_flag |= async;
652 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
653 &mountlist_slock);
654 if (used_syncer)
655 mutex_exit(&syncer_mutex);
656 simple_lock(&mp->mnt_slock);
657 while (mp->mnt_wcnt > 0) {
658 wakeup(mp);
659 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1",
660 0, &mp->mnt_slock);
661 }
662 simple_unlock(&mp->mnt_slock);
663 return (error);
664 }
665 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
666 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
667 coveredvp->v_mountedhere = NULL;
668 mp->mnt_op->vfs_refcount--;
669 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
670 panic("unmount: dangling vnode");
671 mp->mnt_iflag |= IMNT_GONE;
672 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock);
673 if (coveredvp != NULLVP)
674 vrele(coveredvp);
675 mount_finispecific(mp);
676 if (used_syncer)
677 mutex_exit(&syncer_mutex);
678 simple_lock(&mp->mnt_slock);
679 while (mp->mnt_wcnt > 0) {
680 wakeup(mp);
681 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock);
682 }
683 simple_unlock(&mp->mnt_slock);
684 vfs_hooks_unmount(mp);
685 free(mp, M_MOUNT);
686 return (0);
687 }
688
689 /*
690 * Sync each mounted filesystem.
691 */
692 #ifdef DEBUG
693 int syncprt = 0;
694 struct ctldebug debug0 = { "syncprt", &syncprt };
695 #endif
696
697 /* ARGSUSED */
698 int
699 sys_sync(struct lwp *l, void *v, register_t *retval)
700 {
701 struct mount *mp, *nmp;
702 int asyncflag;
703
704 if (l == NULL)
705 l = &lwp0;
706
707 simple_lock(&mountlist_slock);
708 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
709 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
710 nmp = mp->mnt_list.cqe_prev;
711 continue;
712 }
713 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
714 vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
715 asyncflag = mp->mnt_flag & MNT_ASYNC;
716 mp->mnt_flag &= ~MNT_ASYNC;
717 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred, l);
718 if (asyncflag)
719 mp->mnt_flag |= MNT_ASYNC;
720 vn_finished_write(mp, 0);
721 }
722 simple_lock(&mountlist_slock);
723 nmp = mp->mnt_list.cqe_prev;
724 vfs_unbusy(mp);
725
726 }
727 simple_unlock(&mountlist_slock);
728 #ifdef DEBUG
729 if (syncprt)
730 vfs_bufstats();
731 #endif /* DEBUG */
732 return (0);
733 }
734
735 /*
736 * Change filesystem quotas.
737 */
738 /* ARGSUSED */
739 int
740 sys_quotactl(struct lwp *l, void *v, register_t *retval)
741 {
742 struct sys_quotactl_args /* {
743 syscallarg(const char *) path;
744 syscallarg(int) cmd;
745 syscallarg(int) uid;
746 syscallarg(void *) arg;
747 } */ *uap = v;
748 struct mount *mp;
749 int error;
750 struct nameidata nd;
751
752 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
753 if ((error = namei(&nd)) != 0)
754 return (error);
755 error = vn_start_write(nd.ni_vp, &mp, V_WAIT | V_PCATCH);
756 vrele(nd.ni_vp);
757 if (error)
758 return (error);
759 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
760 SCARG(uap, arg), l);
761 vn_finished_write(mp, 0);
762 return (error);
763 }
764
765 int
766 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
767 int root)
768 {
769 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
770 int error = 0;
771
772 /*
773 * If MNT_NOWAIT or MNT_LAZY is specified, do not
774 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
775 * overrides MNT_NOWAIT.
776 */
777 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
778 (flags != MNT_WAIT && flags != 0)) {
779 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
780 goto done;
781 }
782
783 /* Get the filesystem stats now */
784 memset(sp, 0, sizeof(*sp));
785 if ((error = VFS_STATVFS(mp, sp, l)) != 0) {
786 return error;
787 }
788
789 if (cwdi->cwdi_rdir == NULL)
790 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
791 done:
792 if (cwdi->cwdi_rdir != NULL) {
793 size_t len;
794 char *bp;
795 char *path = PNBUF_GET();
796 if (!path)
797 return ENOMEM;
798
799 bp = path + MAXPATHLEN;
800 *--bp = '\0';
801 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
802 MAXPATHLEN / 2, 0, l);
803 if (error) {
804 PNBUF_PUT(path);
805 return error;
806 }
807 len = strlen(bp);
808 /*
809 * for mount points that are below our root, we can see
810 * them, so we fix up the pathname and return them. The
811 * rest we cannot see, so we don't allow viewing the
812 * data.
813 */
814 if (strncmp(bp, sp->f_mntonname, len) == 0) {
815 strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
816 sizeof(sp->f_mntonname));
817 if (sp->f_mntonname[0] == '\0')
818 (void)strlcpy(sp->f_mntonname, "/",
819 sizeof(sp->f_mntonname));
820 } else {
821 if (root)
822 (void)strlcpy(sp->f_mntonname, "/",
823 sizeof(sp->f_mntonname));
824 else
825 error = EPERM;
826 }
827 PNBUF_PUT(path);
828 }
829 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
830 return error;
831 }
832
833 /*
834 * Get filesystem statistics.
835 */
836 /* ARGSUSED */
837 int
838 sys_statvfs1(struct lwp *l, void *v, register_t *retval)
839 {
840 struct sys_statvfs1_args /* {
841 syscallarg(const char *) path;
842 syscallarg(struct statvfs *) buf;
843 syscallarg(int) flags;
844 } */ *uap = v;
845 struct mount *mp;
846 struct statvfs *sb;
847 int error;
848 struct nameidata nd;
849
850 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
851 if ((error = namei(&nd)) != 0)
852 return error;
853 mp = nd.ni_vp->v_mount;
854 vrele(nd.ni_vp);
855 sb = STATVFSBUF_GET();
856 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1);
857 if (error == 0) {
858 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
859 }
860 STATVFSBUF_PUT(sb);
861 return error;
862 }
863
864 /*
865 * Get filesystem statistics.
866 */
867 /* ARGSUSED */
868 int
869 sys_fstatvfs1(struct lwp *l, void *v, register_t *retval)
870 {
871 struct sys_fstatvfs1_args /* {
872 syscallarg(int) fd;
873 syscallarg(struct statvfs *) buf;
874 syscallarg(int) flags;
875 } */ *uap = v;
876 struct proc *p = l->l_proc;
877 struct file *fp;
878 struct mount *mp;
879 struct statvfs *sb;
880 int error;
881
882 /* getvnode() will use the descriptor for us */
883 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
884 return (error);
885 mp = ((struct vnode *)fp->f_data)->v_mount;
886 sb = STATVFSBUF_GET();
887 if ((error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1)) != 0)
888 goto out;
889 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
890 out:
891 FILE_UNUSE(fp, l);
892 STATVFSBUF_PUT(sb);
893 return error;
894 }
895
896
897 /*
898 * Get statistics on all filesystems.
899 */
900 int
901 sys_getvfsstat(struct lwp *l, void *v, register_t *retval)
902 {
903 struct sys_getvfsstat_args /* {
904 syscallarg(struct statvfs *) buf;
905 syscallarg(size_t) bufsize;
906 syscallarg(int) flags;
907 } */ *uap = v;
908 int root = 0;
909 struct proc *p = l->l_proc;
910 struct mount *mp, *nmp;
911 struct statvfs *sb;
912 struct statvfs *sfsp;
913 size_t count, maxcount;
914 int error = 0;
915
916 sb = STATVFSBUF_GET();
917 maxcount = SCARG(uap, bufsize) / sizeof(struct statvfs);
918 sfsp = SCARG(uap, buf);
919 simple_lock(&mountlist_slock);
920 count = 0;
921 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
922 mp = nmp) {
923 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
924 nmp = CIRCLEQ_NEXT(mp, mnt_list);
925 continue;
926 }
927 if (sfsp && count < maxcount) {
928 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 0);
929 if (error) {
930 simple_lock(&mountlist_slock);
931 nmp = CIRCLEQ_NEXT(mp, mnt_list);
932 vfs_unbusy(mp);
933 continue;
934 }
935 error = copyout(sb, sfsp, sizeof(*sfsp));
936 if (error) {
937 vfs_unbusy(mp);
938 goto out;
939 }
940 sfsp++;
941 root |= strcmp(sb->f_mntonname, "/") == 0;
942 }
943 count++;
944 simple_lock(&mountlist_slock);
945 nmp = CIRCLEQ_NEXT(mp, mnt_list);
946 vfs_unbusy(mp);
947 }
948 simple_unlock(&mountlist_slock);
949 if (root == 0 && p->p_cwdi->cwdi_rdir) {
950 /*
951 * fake a root entry
952 */
953 if ((error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, sb, l,
954 SCARG(uap, flags), 1)) != 0)
955 goto out;
956 if (sfsp)
957 error = copyout(sb, sfsp, sizeof(*sfsp));
958 count++;
959 }
960 if (sfsp && count > maxcount)
961 *retval = maxcount;
962 else
963 *retval = count;
964 out:
965 STATVFSBUF_PUT(sb);
966 return error;
967 }
968
969 /*
970 * Change current working directory to a given file descriptor.
971 */
972 /* ARGSUSED */
973 int
974 sys_fchdir(struct lwp *l, void *v, register_t *retval)
975 {
976 struct sys_fchdir_args /* {
977 syscallarg(int) fd;
978 } */ *uap = v;
979 struct proc *p = l->l_proc;
980 struct filedesc *fdp = p->p_fd;
981 struct cwdinfo *cwdi = p->p_cwdi;
982 struct vnode *vp, *tdp;
983 struct mount *mp;
984 struct file *fp;
985 int error;
986
987 /* getvnode() will use the descriptor for us */
988 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
989 return (error);
990 vp = (struct vnode *)fp->f_data;
991
992 VREF(vp);
993 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
994 if (vp->v_type != VDIR)
995 error = ENOTDIR;
996 else
997 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
998 if (error) {
999 vput(vp);
1000 goto out;
1001 }
1002 while (!error && (mp = vp->v_mountedhere) != NULL) {
1003 if (vfs_busy(mp, 0, 0))
1004 continue;
1005
1006 vput(vp);
1007 error = VFS_ROOT(mp, &tdp);
1008 vfs_unbusy(mp);
1009 if (error)
1010 goto out;
1011 vp = tdp;
1012 }
1013 VOP_UNLOCK(vp, 0);
1014
1015 /*
1016 * Disallow changing to a directory not under the process's
1017 * current root directory (if there is one).
1018 */
1019 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1020 vrele(vp);
1021 error = EPERM; /* operation not permitted */
1022 goto out;
1023 }
1024
1025 vrele(cwdi->cwdi_cdir);
1026 cwdi->cwdi_cdir = vp;
1027 out:
1028 FILE_UNUSE(fp, l);
1029 return (error);
1030 }
1031
1032 /*
1033 * Change this process's notion of the root directory to a given file
1034 * descriptor.
1035 */
1036 int
1037 sys_fchroot(struct lwp *l, void *v, register_t *retval)
1038 {
1039 struct sys_fchroot_args *uap = v;
1040 struct proc *p = l->l_proc;
1041 struct filedesc *fdp = p->p_fd;
1042 struct cwdinfo *cwdi = p->p_cwdi;
1043 struct vnode *vp;
1044 struct file *fp;
1045 int error;
1046
1047 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1048 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1049 return error;
1050 /* getvnode() will use the descriptor for us */
1051 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1052 return error;
1053 vp = (struct vnode *) fp->f_data;
1054 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1055 if (vp->v_type != VDIR)
1056 error = ENOTDIR;
1057 else
1058 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1059 VOP_UNLOCK(vp, 0);
1060 if (error)
1061 goto out;
1062 VREF(vp);
1063
1064 /*
1065 * Prevent escaping from chroot by putting the root under
1066 * the working directory. Silently chdir to / if we aren't
1067 * already there.
1068 */
1069 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1070 /*
1071 * XXX would be more failsafe to change directory to a
1072 * deadfs node here instead
1073 */
1074 vrele(cwdi->cwdi_cdir);
1075 VREF(vp);
1076 cwdi->cwdi_cdir = vp;
1077 }
1078
1079 if (cwdi->cwdi_rdir != NULL)
1080 vrele(cwdi->cwdi_rdir);
1081 cwdi->cwdi_rdir = vp;
1082 out:
1083 FILE_UNUSE(fp, l);
1084 return (error);
1085 }
1086
1087 /*
1088 * Change current working directory (``.'').
1089 */
1090 /* ARGSUSED */
1091 int
1092 sys_chdir(struct lwp *l, void *v, register_t *retval)
1093 {
1094 struct sys_chdir_args /* {
1095 syscallarg(const char *) path;
1096 } */ *uap = v;
1097 struct proc *p = l->l_proc;
1098 struct cwdinfo *cwdi = p->p_cwdi;
1099 int error;
1100 struct nameidata nd;
1101
1102 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1103 SCARG(uap, path), l);
1104 if ((error = change_dir(&nd, l)) != 0)
1105 return (error);
1106 vrele(cwdi->cwdi_cdir);
1107 cwdi->cwdi_cdir = nd.ni_vp;
1108 return (0);
1109 }
1110
1111 /*
1112 * Change notion of root (``/'') directory.
1113 */
1114 /* ARGSUSED */
1115 int
1116 sys_chroot(struct lwp *l, void *v, register_t *retval)
1117 {
1118 struct sys_chroot_args /* {
1119 syscallarg(const char *) path;
1120 } */ *uap = v;
1121 struct proc *p = l->l_proc;
1122 struct cwdinfo *cwdi = p->p_cwdi;
1123 struct vnode *vp;
1124 int error;
1125 struct nameidata nd;
1126
1127 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1128 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1129 return (error);
1130 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1131 SCARG(uap, path), l);
1132 if ((error = change_dir(&nd, l)) != 0)
1133 return (error);
1134 if (cwdi->cwdi_rdir != NULL)
1135 vrele(cwdi->cwdi_rdir);
1136 vp = nd.ni_vp;
1137 cwdi->cwdi_rdir = vp;
1138
1139 /*
1140 * Prevent escaping from chroot by putting the root under
1141 * the working directory. Silently chdir to / if we aren't
1142 * already there.
1143 */
1144 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1145 /*
1146 * XXX would be more failsafe to change directory to a
1147 * deadfs node here instead
1148 */
1149 vrele(cwdi->cwdi_cdir);
1150 VREF(vp);
1151 cwdi->cwdi_cdir = vp;
1152 }
1153
1154 return (0);
1155 }
1156
1157 /*
1158 * Common routine for chroot and chdir.
1159 */
1160 static int
1161 change_dir(struct nameidata *ndp, struct lwp *l)
1162 {
1163 struct vnode *vp;
1164 int error;
1165
1166 if ((error = namei(ndp)) != 0)
1167 return (error);
1168 vp = ndp->ni_vp;
1169 if (vp->v_type != VDIR)
1170 error = ENOTDIR;
1171 else
1172 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1173
1174 if (error)
1175 vput(vp);
1176 else
1177 VOP_UNLOCK(vp, 0);
1178 return (error);
1179 }
1180
1181 /*
1182 * Check permissions, allocate an open file structure,
1183 * and call the device open routine if any.
1184 */
1185 int
1186 sys_open(struct lwp *l, void *v, register_t *retval)
1187 {
1188 struct sys_open_args /* {
1189 syscallarg(const char *) path;
1190 syscallarg(int) flags;
1191 syscallarg(int) mode;
1192 } */ *uap = v;
1193 struct proc *p = l->l_proc;
1194 struct cwdinfo *cwdi = p->p_cwdi;
1195 struct filedesc *fdp = p->p_fd;
1196 struct file *fp;
1197 struct vnode *vp;
1198 int flags, cmode;
1199 int type, indx, error;
1200 struct flock lf;
1201 struct nameidata nd;
1202
1203 flags = FFLAGS(SCARG(uap, flags));
1204 if ((flags & (FREAD | FWRITE)) == 0)
1205 return (EINVAL);
1206 /* falloc() will use the file descriptor for us */
1207 if ((error = falloc(l, &fp, &indx)) != 0)
1208 return (error);
1209 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1210 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1211 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1212 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1213 FILE_UNUSE(fp, l);
1214 fdp->fd_ofiles[indx] = NULL;
1215 ffree(fp);
1216 if ((error == EDUPFD || error == EMOVEFD) &&
1217 l->l_dupfd >= 0 && /* XXX from fdopen */
1218 (error =
1219 dupfdopen(l, indx, l->l_dupfd, flags, error)) == 0) {
1220 *retval = indx;
1221 return (0);
1222 }
1223 if (error == ERESTART)
1224 error = EINTR;
1225 fdremove(fdp, indx);
1226 return (error);
1227 }
1228 l->l_dupfd = 0;
1229 vp = nd.ni_vp;
1230 fp->f_flag = flags & FMASK;
1231 fp->f_type = DTYPE_VNODE;
1232 fp->f_ops = &vnops;
1233 fp->f_data = vp;
1234 if (flags & (O_EXLOCK | O_SHLOCK)) {
1235 lf.l_whence = SEEK_SET;
1236 lf.l_start = 0;
1237 lf.l_len = 0;
1238 if (flags & O_EXLOCK)
1239 lf.l_type = F_WRLCK;
1240 else
1241 lf.l_type = F_RDLCK;
1242 type = F_FLOCK;
1243 if ((flags & FNONBLOCK) == 0)
1244 type |= F_WAIT;
1245 VOP_UNLOCK(vp, 0);
1246 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1247 if (error) {
1248 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1249 FILE_UNUSE(fp, l);
1250 ffree(fp);
1251 fdremove(fdp, indx);
1252 return (error);
1253 }
1254 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1255 fp->f_flag |= FHASLOCK;
1256 }
1257 VOP_UNLOCK(vp, 0);
1258 *retval = indx;
1259 FILE_SET_MATURE(fp);
1260 FILE_UNUSE(fp, l);
1261 return (0);
1262 }
1263
1264 static void
1265 vfs__fhfree(fhandle_t *fhp)
1266 {
1267 size_t fhsize;
1268
1269 if (fhp == NULL) {
1270 return;
1271 }
1272 fhsize = FHANDLE_SIZE(fhp);
1273 kmem_free(fhp, fhsize);
1274 }
1275
1276 /*
1277 * vfs_composefh: compose a filehandle.
1278 */
1279
1280 int
1281 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1282 {
1283 struct mount *mp;
1284 struct fid *fidp;
1285 int error;
1286 size_t needfhsize;
1287 size_t fidsize;
1288
1289 mp = vp->v_mount;
1290 fidp = NULL;
1291 if (*fh_size < FHANDLE_SIZE_MIN) {
1292 fidsize = 0;
1293 } else {
1294 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1295 if (fhp != NULL) {
1296 memset(fhp, 0, *fh_size);
1297 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1298 fidp = &fhp->fh_fid;
1299 }
1300 }
1301 error = VFS_VPTOFH(vp, fidp, &fidsize);
1302 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1303 if (error == 0 && *fh_size < needfhsize) {
1304 error = E2BIG;
1305 }
1306 *fh_size = needfhsize;
1307 return error;
1308 }
1309
1310 int
1311 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1312 {
1313 struct mount *mp;
1314 fhandle_t *fhp;
1315 size_t fhsize;
1316 size_t fidsize;
1317 int error;
1318
1319 *fhpp = NULL;
1320 mp = vp->v_mount;
1321 fidsize = 0;
1322 error = VFS_VPTOFH(vp, NULL, &fidsize);
1323 KASSERT(error != 0);
1324 if (error != E2BIG) {
1325 goto out;
1326 }
1327 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1328 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1329 if (fhp == NULL) {
1330 error = ENOMEM;
1331 goto out;
1332 }
1333 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1334 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1335 if (error == 0) {
1336 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1337 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1338 *fhpp = fhp;
1339 } else {
1340 kmem_free(fhp, fhsize);
1341 }
1342 out:
1343 return error;
1344 }
1345
1346 void
1347 vfs_composefh_free(fhandle_t *fhp)
1348 {
1349
1350 vfs__fhfree(fhp);
1351 }
1352
1353 /*
1354 * vfs_fhtovp: lookup a vnode by a filehandle.
1355 */
1356
1357 int
1358 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1359 {
1360 struct mount *mp;
1361 int error;
1362
1363 *vpp = NULL;
1364 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1365 if (mp == NULL) {
1366 error = ESTALE;
1367 goto out;
1368 }
1369 if (mp->mnt_op->vfs_fhtovp == NULL) {
1370 error = EOPNOTSUPP;
1371 goto out;
1372 }
1373 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1374 out:
1375 return error;
1376 }
1377
1378 /*
1379 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1380 * the needed size.
1381 */
1382
1383 int
1384 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1385 {
1386 fhandle_t *fhp;
1387 int error;
1388
1389 *fhpp = NULL;
1390 if (fhsize > FHANDLE_SIZE_MAX) {
1391 return EINVAL;
1392 }
1393 if (fhsize < FHANDLE_SIZE_MIN) {
1394 return EINVAL;
1395 }
1396 again:
1397 fhp = kmem_alloc(fhsize, KM_SLEEP);
1398 if (fhp == NULL) {
1399 return ENOMEM;
1400 }
1401 error = copyin(ufhp, fhp, fhsize);
1402 if (error == 0) {
1403 /* XXX this check shouldn't be here */
1404 if (FHANDLE_SIZE(fhp) == fhsize) {
1405 *fhpp = fhp;
1406 return 0;
1407 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1408 /*
1409 * a kludge for nfsv2 padded handles.
1410 */
1411 size_t sz;
1412
1413 sz = FHANDLE_SIZE(fhp);
1414 kmem_free(fhp, fhsize);
1415 fhsize = sz;
1416 goto again;
1417 } else {
1418 /*
1419 * userland told us wrong size.
1420 */
1421 error = EINVAL;
1422 }
1423 }
1424 kmem_free(fhp, fhsize);
1425 return error;
1426 }
1427
1428 void
1429 vfs_copyinfh_free(fhandle_t *fhp)
1430 {
1431
1432 vfs__fhfree(fhp);
1433 }
1434
1435 /*
1436 * Get file handle system call
1437 */
1438 int
1439 sys___getfh30(struct lwp *l, void *v, register_t *retval)
1440 {
1441 struct sys___getfh30_args /* {
1442 syscallarg(char *) fname;
1443 syscallarg(fhandle_t *) fhp;
1444 syscallarg(size_t *) fh_size;
1445 } */ *uap = v;
1446 struct vnode *vp;
1447 fhandle_t *fh;
1448 int error;
1449 struct nameidata nd;
1450 size_t sz;
1451 size_t usz;
1452
1453 /*
1454 * Must be super user
1455 */
1456 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1457 0, NULL, NULL, NULL);
1458 if (error)
1459 return (error);
1460 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1461 SCARG(uap, fname), l);
1462 error = namei(&nd);
1463 if (error)
1464 return (error);
1465 vp = nd.ni_vp;
1466 error = vfs_composefh_alloc(vp, &fh);
1467 vput(vp);
1468 if (error != 0) {
1469 goto out;
1470 }
1471 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1472 if (error != 0) {
1473 goto out;
1474 }
1475 sz = FHANDLE_SIZE(fh);
1476 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1477 if (error != 0) {
1478 goto out;
1479 }
1480 if (usz >= sz) {
1481 error = copyout(fh, SCARG(uap, fhp), sz);
1482 } else {
1483 error = E2BIG;
1484 }
1485 out:
1486 vfs_composefh_free(fh);
1487 return (error);
1488 }
1489
1490 /*
1491 * Open a file given a file handle.
1492 *
1493 * Check permissions, allocate an open file structure,
1494 * and call the device open routine if any.
1495 */
1496
1497 int
1498 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1499 register_t *retval)
1500 {
1501 struct filedesc *fdp = l->l_proc->p_fd;
1502 struct file *fp;
1503 struct vnode *vp = NULL;
1504 struct mount *mp;
1505 kauth_cred_t cred = l->l_cred;
1506 struct file *nfp;
1507 int type, indx, error=0;
1508 struct flock lf;
1509 struct vattr va;
1510 fhandle_t *fh;
1511 int flags;
1512
1513 /*
1514 * Must be super user
1515 */
1516 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1517 0, NULL, NULL, NULL)))
1518 return (error);
1519
1520 flags = FFLAGS(oflags);
1521 if ((flags & (FREAD | FWRITE)) == 0)
1522 return (EINVAL);
1523 if ((flags & O_CREAT))
1524 return (EINVAL);
1525 /* falloc() will use the file descriptor for us */
1526 if ((error = falloc(l, &nfp, &indx)) != 0)
1527 return (error);
1528 fp = nfp;
1529 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1530 if (error != 0) {
1531 goto bad;
1532 }
1533 error = vfs_fhtovp(fh, &vp);
1534 if (error != 0) {
1535 goto bad;
1536 }
1537
1538 /* Now do an effective vn_open */
1539
1540 if (vp->v_type == VSOCK) {
1541 error = EOPNOTSUPP;
1542 goto bad;
1543 }
1544 if (flags & FREAD) {
1545 if ((error = VOP_ACCESS(vp, VREAD, cred, l)) != 0)
1546 goto bad;
1547 }
1548 if (flags & (FWRITE | O_TRUNC)) {
1549 if (vp->v_type == VDIR) {
1550 error = EISDIR;
1551 goto bad;
1552 }
1553 if ((error = vn_writechk(vp)) != 0 ||
1554 (error = VOP_ACCESS(vp, VWRITE, cred, l)) != 0)
1555 goto bad;
1556 }
1557 if (flags & O_TRUNC) {
1558 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
1559 goto bad;
1560 VOP_UNLOCK(vp, 0); /* XXX */
1561 VOP_LEASE(vp, l, cred, LEASE_WRITE);
1562 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1563 VATTR_NULL(&va);
1564 va.va_size = 0;
1565 error = VOP_SETATTR(vp, &va, cred, l);
1566 vn_finished_write(mp, 0);
1567 if (error)
1568 goto bad;
1569 }
1570 if ((error = VOP_OPEN(vp, flags, cred, l)) != 0)
1571 goto bad;
1572 if (vp->v_type == VREG &&
1573 uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
1574 error = EIO;
1575 goto bad;
1576 }
1577 if (flags & FWRITE)
1578 vp->v_writecount++;
1579
1580 /* done with modified vn_open, now finish what sys_open does. */
1581
1582 fp->f_flag = flags & FMASK;
1583 fp->f_type = DTYPE_VNODE;
1584 fp->f_ops = &vnops;
1585 fp->f_data = vp;
1586 if (flags & (O_EXLOCK | O_SHLOCK)) {
1587 lf.l_whence = SEEK_SET;
1588 lf.l_start = 0;
1589 lf.l_len = 0;
1590 if (flags & O_EXLOCK)
1591 lf.l_type = F_WRLCK;
1592 else
1593 lf.l_type = F_RDLCK;
1594 type = F_FLOCK;
1595 if ((flags & FNONBLOCK) == 0)
1596 type |= F_WAIT;
1597 VOP_UNLOCK(vp, 0);
1598 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1599 if (error) {
1600 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1601 FILE_UNUSE(fp, l);
1602 ffree(fp);
1603 fdremove(fdp, indx);
1604 return (error);
1605 }
1606 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1607 fp->f_flag |= FHASLOCK;
1608 }
1609 VOP_UNLOCK(vp, 0);
1610 *retval = indx;
1611 FILE_SET_MATURE(fp);
1612 FILE_UNUSE(fp, l);
1613 vfs_copyinfh_free(fh);
1614 return (0);
1615
1616 bad:
1617 FILE_UNUSE(fp, l);
1618 ffree(fp);
1619 fdremove(fdp, indx);
1620 if (vp != NULL)
1621 vput(vp);
1622 vfs_copyinfh_free(fh);
1623 return (error);
1624 }
1625
1626 int
1627 sys___fhopen40(struct lwp *l, void *v, register_t *retval)
1628 {
1629 struct sys___fhopen40_args /* {
1630 syscallarg(const void *) fhp;
1631 syscallarg(size_t) fh_size;
1632 syscallarg(int) flags;
1633 } */ *uap = v;
1634
1635 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1636 SCARG(uap, flags), retval);
1637 }
1638
1639 int
1640 dofhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sbp,
1641 register_t *retval)
1642 {
1643 struct stat sb;
1644 int error;
1645 fhandle_t *fh;
1646 struct vnode *vp;
1647
1648 /*
1649 * Must be super user
1650 */
1651 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1652 0, NULL, NULL, NULL)))
1653 return (error);
1654
1655 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1656 if (error != 0) {
1657 goto bad;
1658 }
1659 error = vfs_fhtovp(fh, &vp);
1660 if (error != 0) {
1661 goto bad;
1662 }
1663 error = vn_stat(vp, &sb, l);
1664 vput(vp);
1665 if (error) {
1666 goto bad;
1667 }
1668 error = copyout(&sb, sbp, sizeof(sb));
1669 bad:
1670 vfs_copyinfh_free(fh);
1671 return error;
1672 }
1673
1674
1675 /* ARGSUSED */
1676 int
1677 sys___fhstat40(struct lwp *l, void *v, register_t *retval)
1678 {
1679 struct sys___fhstat40_args /* {
1680 syscallarg(const void *) fhp;
1681 syscallarg(size_t) fh_size;
1682 syscallarg(struct stat *) sb;
1683 } */ *uap = v;
1684
1685 return dofhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), SCARG(uap, sb),
1686 retval);
1687 }
1688
1689 int
1690 dofhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *buf,
1691 int flags, register_t *retval)
1692 {
1693 struct statvfs *sb = NULL;
1694 fhandle_t *fh;
1695 struct mount *mp;
1696 struct vnode *vp;
1697 int error;
1698
1699 /*
1700 * Must be super user
1701 */
1702 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1703 0, NULL, NULL, NULL)))
1704 return error;
1705
1706 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1707 if (error != 0) {
1708 goto out;
1709 }
1710 error = vfs_fhtovp(fh, &vp);
1711 if (error != 0) {
1712 goto out;
1713 }
1714 mp = vp->v_mount;
1715 sb = STATVFSBUF_GET();
1716 if ((error = dostatvfs(mp, sb, l, flags, 1)) != 0) {
1717 vput(vp);
1718 goto out;
1719 }
1720 vput(vp);
1721 error = copyout(sb, buf, sizeof(*sb));
1722 out:
1723 if (sb != NULL) {
1724 STATVFSBUF_PUT(sb);
1725 }
1726 vfs_copyinfh_free(fh);
1727 return error;
1728 }
1729
1730 /* ARGSUSED */
1731 int
1732 sys___fhstatvfs140(struct lwp *l, void *v, register_t *retval)
1733 {
1734 struct sys___fhstatvfs140_args /* {
1735 syscallarg(const void *) fhp;
1736 syscallarg(size_t) fh_size;
1737 syscallarg(struct statvfs *) buf;
1738 syscallarg(int) flags;
1739 } */ *uap = v;
1740
1741 return dofhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1742 SCARG(uap, buf), SCARG(uap, flags), retval);
1743 }
1744
1745 /*
1746 * Create a special file.
1747 */
1748 /* ARGSUSED */
1749 int
1750 sys_mknod(struct lwp *l, void *v, register_t *retval)
1751 {
1752 struct sys_mknod_args /* {
1753 syscallarg(const char *) path;
1754 syscallarg(int) mode;
1755 syscallarg(int) dev;
1756 } */ *uap = v;
1757 struct proc *p = l->l_proc;
1758 struct vnode *vp;
1759 struct mount *mp;
1760 struct vattr vattr;
1761 int error, optype;
1762 struct nameidata nd;
1763
1764 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1765 0, NULL, NULL, NULL)) != 0)
1766 return (error);
1767
1768 optype = VOP_MKNOD_DESCOFFSET;
1769 restart:
1770 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1771 if ((error = namei(&nd)) != 0)
1772 return (error);
1773 vp = nd.ni_vp;
1774 if (vp != NULL)
1775 error = EEXIST;
1776 else {
1777 VATTR_NULL(&vattr);
1778 vattr.va_mode =
1779 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1780 vattr.va_rdev = SCARG(uap, dev);
1781
1782 switch (SCARG(uap, mode) & S_IFMT) {
1783 case S_IFMT: /* used by badsect to flag bad sectors */
1784 vattr.va_type = VBAD;
1785 break;
1786 case S_IFCHR:
1787 vattr.va_type = VCHR;
1788 break;
1789 case S_IFBLK:
1790 vattr.va_type = VBLK;
1791 break;
1792 case S_IFWHT:
1793 optype = VOP_WHITEOUT_DESCOFFSET;
1794 break;
1795 case S_IFREG:
1796 vattr.va_type = VREG;
1797 vattr.va_rdev = VNOVAL;
1798 optype = VOP_CREATE_DESCOFFSET;
1799 break;
1800 default:
1801 error = EINVAL;
1802 break;
1803 }
1804 }
1805 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1806 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1807 if (nd.ni_dvp == vp)
1808 vrele(nd.ni_dvp);
1809 else
1810 vput(nd.ni_dvp);
1811 if (vp)
1812 vrele(vp);
1813 if ((error = vn_start_write(NULL, &mp,
1814 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1815 return (error);
1816 goto restart;
1817 }
1818 if (!error) {
1819 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1820 switch (optype) {
1821 case VOP_WHITEOUT_DESCOFFSET:
1822 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1823 if (error)
1824 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1825 vput(nd.ni_dvp);
1826 break;
1827
1828 case VOP_MKNOD_DESCOFFSET:
1829 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1830 &nd.ni_cnd, &vattr);
1831 if (error == 0)
1832 vput(nd.ni_vp);
1833 break;
1834
1835 case VOP_CREATE_DESCOFFSET:
1836 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
1837 &nd.ni_cnd, &vattr);
1838 if (error == 0)
1839 vput(nd.ni_vp);
1840 break;
1841 }
1842 } else {
1843 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1844 if (nd.ni_dvp == vp)
1845 vrele(nd.ni_dvp);
1846 else
1847 vput(nd.ni_dvp);
1848 if (vp)
1849 vrele(vp);
1850 }
1851 vn_finished_write(mp, 0);
1852 return (error);
1853 }
1854
1855 /*
1856 * Create a named pipe.
1857 */
1858 /* ARGSUSED */
1859 int
1860 sys_mkfifo(struct lwp *l, void *v, register_t *retval)
1861 {
1862 struct sys_mkfifo_args /* {
1863 syscallarg(const char *) path;
1864 syscallarg(int) mode;
1865 } */ *uap = v;
1866 struct proc *p = l->l_proc;
1867 struct mount *mp;
1868 struct vattr vattr;
1869 int error;
1870 struct nameidata nd;
1871
1872 restart:
1873 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1874 if ((error = namei(&nd)) != 0)
1875 return (error);
1876 if (nd.ni_vp != NULL) {
1877 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1878 if (nd.ni_dvp == nd.ni_vp)
1879 vrele(nd.ni_dvp);
1880 else
1881 vput(nd.ni_dvp);
1882 vrele(nd.ni_vp);
1883 return (EEXIST);
1884 }
1885 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1886 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1887 if (nd.ni_dvp == nd.ni_vp)
1888 vrele(nd.ni_dvp);
1889 else
1890 vput(nd.ni_dvp);
1891 if (nd.ni_vp)
1892 vrele(nd.ni_vp);
1893 if ((error = vn_start_write(NULL, &mp,
1894 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1895 return (error);
1896 goto restart;
1897 }
1898 VATTR_NULL(&vattr);
1899 vattr.va_type = VFIFO;
1900 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1901 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1902 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1903 if (error == 0)
1904 vput(nd.ni_vp);
1905 vn_finished_write(mp, 0);
1906 return (error);
1907 }
1908
1909 /*
1910 * Make a hard file link.
1911 */
1912 /* ARGSUSED */
1913 int
1914 sys_link(struct lwp *l, void *v, register_t *retval)
1915 {
1916 struct sys_link_args /* {
1917 syscallarg(const char *) path;
1918 syscallarg(const char *) link;
1919 } */ *uap = v;
1920 struct vnode *vp;
1921 struct mount *mp;
1922 struct nameidata nd;
1923 int error;
1924
1925 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1926 if ((error = namei(&nd)) != 0)
1927 return (error);
1928 vp = nd.ni_vp;
1929 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
1930 vrele(vp);
1931 return (error);
1932 }
1933 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1934 if ((error = namei(&nd)) != 0)
1935 goto out;
1936 if (nd.ni_vp) {
1937 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1938 if (nd.ni_dvp == nd.ni_vp)
1939 vrele(nd.ni_dvp);
1940 else
1941 vput(nd.ni_dvp);
1942 vrele(nd.ni_vp);
1943 error = EEXIST;
1944 goto out;
1945 }
1946 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1947 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
1948 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1949 out:
1950 vrele(vp);
1951 vn_finished_write(mp, 0);
1952 return (error);
1953 }
1954
1955 /*
1956 * Make a symbolic link.
1957 */
1958 /* ARGSUSED */
1959 int
1960 sys_symlink(struct lwp *l, void *v, register_t *retval)
1961 {
1962 struct sys_symlink_args /* {
1963 syscallarg(const char *) path;
1964 syscallarg(const char *) link;
1965 } */ *uap = v;
1966 struct proc *p = l->l_proc;
1967 struct mount *mp;
1968 struct vattr vattr;
1969 char *path;
1970 int error;
1971 struct nameidata nd;
1972
1973 path = PNBUF_GET();
1974 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
1975 if (error)
1976 goto out;
1977 restart:
1978 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1979 if ((error = namei(&nd)) != 0)
1980 goto out;
1981 if (nd.ni_vp) {
1982 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1983 if (nd.ni_dvp == nd.ni_vp)
1984 vrele(nd.ni_dvp);
1985 else
1986 vput(nd.ni_dvp);
1987 vrele(nd.ni_vp);
1988 error = EEXIST;
1989 goto out;
1990 }
1991 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1992 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1993 if (nd.ni_dvp == nd.ni_vp)
1994 vrele(nd.ni_dvp);
1995 else
1996 vput(nd.ni_dvp);
1997 if ((error = vn_start_write(NULL, &mp,
1998 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1999 return (error);
2000 goto restart;
2001 }
2002 VATTR_NULL(&vattr);
2003 vattr.va_type = VLNK;
2004 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2005 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2006 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2007 if (error == 0)
2008 vput(nd.ni_vp);
2009 vn_finished_write(mp, 0);
2010 out:
2011 PNBUF_PUT(path);
2012 return (error);
2013 }
2014
2015 /*
2016 * Delete a whiteout from the filesystem.
2017 */
2018 /* ARGSUSED */
2019 int
2020 sys_undelete(struct lwp *l, void *v, register_t *retval)
2021 {
2022 struct sys_undelete_args /* {
2023 syscallarg(const char *) path;
2024 } */ *uap = v;
2025 int error;
2026 struct mount *mp;
2027 struct nameidata nd;
2028
2029 restart:
2030 NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
2031 SCARG(uap, path), l);
2032 error = namei(&nd);
2033 if (error)
2034 return (error);
2035
2036 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2037 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2038 if (nd.ni_dvp == nd.ni_vp)
2039 vrele(nd.ni_dvp);
2040 else
2041 vput(nd.ni_dvp);
2042 if (nd.ni_vp)
2043 vrele(nd.ni_vp);
2044 return (EEXIST);
2045 }
2046 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2047 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2048 if (nd.ni_dvp == nd.ni_vp)
2049 vrele(nd.ni_dvp);
2050 else
2051 vput(nd.ni_dvp);
2052 if ((error = vn_start_write(NULL, &mp,
2053 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
2054 return (error);
2055 goto restart;
2056 }
2057 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2058 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2059 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2060 vput(nd.ni_dvp);
2061 vn_finished_write(mp, 0);
2062 return (error);
2063 }
2064
2065 /*
2066 * Delete a name from the filesystem.
2067 */
2068 /* ARGSUSED */
2069 int
2070 sys_unlink(struct lwp *l, void *v, register_t *retval)
2071 {
2072 struct sys_unlink_args /* {
2073 syscallarg(const char *) path;
2074 } */ *uap = v;
2075 struct mount *mp;
2076 struct vnode *vp;
2077 int error;
2078 struct nameidata nd;
2079 #if NVERIEXEC > 0
2080 pathname_t pathbuf = NULL;
2081 #endif /* NVERIEXEC > 0 */
2082
2083 restart:
2084 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2085 SCARG(uap, path), l);
2086 if ((error = namei(&nd)) != 0)
2087 return (error);
2088 vp = nd.ni_vp;
2089
2090 /*
2091 * The root of a mounted filesystem cannot be deleted.
2092 */
2093 if (vp->v_flag & VROOT) {
2094 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2095 if (nd.ni_dvp == vp)
2096 vrele(nd.ni_dvp);
2097 else
2098 vput(nd.ni_dvp);
2099 vput(vp);
2100 error = EBUSY;
2101 goto out;
2102 }
2103
2104 #if NVERIEXEC > 0
2105 error = pathname_get(nd.ni_dirp, nd.ni_segflg, &pathbuf);
2106
2107 /* Handle remove requests for veriexec entries. */
2108 if (!error) {
2109 error = veriexec_removechk(vp, pathname_path(pathbuf), l);
2110 pathname_put(pathbuf);
2111 }
2112
2113 if (error) {
2114 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2115 if (nd.ni_dvp == vp)
2116 vrele(nd.ni_dvp);
2117 else
2118 vput(nd.ni_dvp);
2119 vput(vp);
2120 goto out;
2121 }
2122 #endif /* NVERIEXEC > 0 */
2123
2124 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2125 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2126 if (nd.ni_dvp == vp)
2127 vrele(nd.ni_dvp);
2128 else
2129 vput(nd.ni_dvp);
2130 vput(vp);
2131 if ((error = vn_start_write(NULL, &mp,
2132 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
2133 return (error);
2134 goto restart;
2135 }
2136 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2137 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2138 #ifdef FILEASSOC
2139 (void)fileassoc_file_delete(vp);
2140 #endif /* FILEASSOC */
2141 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2142 vn_finished_write(mp, 0);
2143 out:
2144 return (error);
2145 }
2146
2147 /*
2148 * Reposition read/write file offset.
2149 */
2150 int
2151 sys_lseek(struct lwp *l, void *v, register_t *retval)
2152 {
2153 struct sys_lseek_args /* {
2154 syscallarg(int) fd;
2155 syscallarg(int) pad;
2156 syscallarg(off_t) offset;
2157 syscallarg(int) whence;
2158 } */ *uap = v;
2159 struct proc *p = l->l_proc;
2160 kauth_cred_t cred = l->l_cred;
2161 struct filedesc *fdp = p->p_fd;
2162 struct file *fp;
2163 struct vnode *vp;
2164 struct vattr vattr;
2165 off_t newoff;
2166 int error;
2167
2168 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
2169 return (EBADF);
2170
2171 FILE_USE(fp);
2172
2173 vp = (struct vnode *)fp->f_data;
2174 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2175 error = ESPIPE;
2176 goto out;
2177 }
2178
2179 switch (SCARG(uap, whence)) {
2180 case SEEK_CUR:
2181 newoff = fp->f_offset + SCARG(uap, offset);
2182 break;
2183 case SEEK_END:
2184 error = VOP_GETATTR(vp, &vattr, cred, l);
2185 if (error)
2186 goto out;
2187 newoff = SCARG(uap, offset) + vattr.va_size;
2188 break;
2189 case SEEK_SET:
2190 newoff = SCARG(uap, offset);
2191 break;
2192 default:
2193 error = EINVAL;
2194 goto out;
2195 }
2196 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0)
2197 goto out;
2198
2199 *(off_t *)retval = fp->f_offset = newoff;
2200 out:
2201 FILE_UNUSE(fp, l);
2202 return (error);
2203 }
2204
2205 /*
2206 * Positional read system call.
2207 */
2208 int
2209 sys_pread(struct lwp *l, void *v, register_t *retval)
2210 {
2211 struct sys_pread_args /* {
2212 syscallarg(int) fd;
2213 syscallarg(void *) buf;
2214 syscallarg(size_t) nbyte;
2215 syscallarg(off_t) offset;
2216 } */ *uap = v;
2217 struct proc *p = l->l_proc;
2218 struct filedesc *fdp = p->p_fd;
2219 struct file *fp;
2220 struct vnode *vp;
2221 off_t offset;
2222 int error, fd = SCARG(uap, fd);
2223
2224 if ((fp = fd_getfile(fdp, fd)) == NULL)
2225 return (EBADF);
2226
2227 if ((fp->f_flag & FREAD) == 0) {
2228 simple_unlock(&fp->f_slock);
2229 return (EBADF);
2230 }
2231
2232 FILE_USE(fp);
2233
2234 vp = (struct vnode *)fp->f_data;
2235 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2236 error = ESPIPE;
2237 goto out;
2238 }
2239
2240 offset = SCARG(uap, offset);
2241
2242 /*
2243 * XXX This works because no file systems actually
2244 * XXX take any action on the seek operation.
2245 */
2246 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2247 goto out;
2248
2249 /* dofileread() will unuse the descriptor for us */
2250 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2251 &offset, 0, retval));
2252
2253 out:
2254 FILE_UNUSE(fp, l);
2255 return (error);
2256 }
2257
2258 /*
2259 * Positional scatter read system call.
2260 */
2261 int
2262 sys_preadv(struct lwp *l, void *v, register_t *retval)
2263 {
2264 struct sys_preadv_args /* {
2265 syscallarg(int) fd;
2266 syscallarg(const struct iovec *) iovp;
2267 syscallarg(int) iovcnt;
2268 syscallarg(off_t) offset;
2269 } */ *uap = v;
2270 struct proc *p = l->l_proc;
2271 struct filedesc *fdp = p->p_fd;
2272 struct file *fp;
2273 struct vnode *vp;
2274 off_t offset;
2275 int error, fd = SCARG(uap, fd);
2276
2277 if ((fp = fd_getfile(fdp, fd)) == NULL)
2278 return (EBADF);
2279
2280 if ((fp->f_flag & FREAD) == 0) {
2281 simple_unlock(&fp->f_slock);
2282 return (EBADF);
2283 }
2284
2285 FILE_USE(fp);
2286
2287 vp = (struct vnode *)fp->f_data;
2288 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2289 error = ESPIPE;
2290 goto out;
2291 }
2292
2293 offset = SCARG(uap, offset);
2294
2295 /*
2296 * XXX This works because no file systems actually
2297 * XXX take any action on the seek operation.
2298 */
2299 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2300 goto out;
2301
2302 /* dofilereadv() will unuse the descriptor for us */
2303 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2304 &offset, 0, retval));
2305
2306 out:
2307 FILE_UNUSE(fp, l);
2308 return (error);
2309 }
2310
2311 /*
2312 * Positional write system call.
2313 */
2314 int
2315 sys_pwrite(struct lwp *l, void *v, register_t *retval)
2316 {
2317 struct sys_pwrite_args /* {
2318 syscallarg(int) fd;
2319 syscallarg(const void *) buf;
2320 syscallarg(size_t) nbyte;
2321 syscallarg(off_t) offset;
2322 } */ *uap = v;
2323 struct proc *p = l->l_proc;
2324 struct filedesc *fdp = p->p_fd;
2325 struct file *fp;
2326 struct vnode *vp;
2327 off_t offset;
2328 int error, fd = SCARG(uap, fd);
2329
2330 if ((fp = fd_getfile(fdp, fd)) == NULL)
2331 return (EBADF);
2332
2333 if ((fp->f_flag & FWRITE) == 0) {
2334 simple_unlock(&fp->f_slock);
2335 return (EBADF);
2336 }
2337
2338 FILE_USE(fp);
2339
2340 vp = (struct vnode *)fp->f_data;
2341 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2342 error = ESPIPE;
2343 goto out;
2344 }
2345
2346 offset = SCARG(uap, offset);
2347
2348 /*
2349 * XXX This works because no file systems actually
2350 * XXX take any action on the seek operation.
2351 */
2352 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2353 goto out;
2354
2355 /* dofilewrite() will unuse the descriptor for us */
2356 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2357 &offset, 0, retval));
2358
2359 out:
2360 FILE_UNUSE(fp, l);
2361 return (error);
2362 }
2363
2364 /*
2365 * Positional gather write system call.
2366 */
2367 int
2368 sys_pwritev(struct lwp *l, void *v, register_t *retval)
2369 {
2370 struct sys_pwritev_args /* {
2371 syscallarg(int) fd;
2372 syscallarg(const struct iovec *) iovp;
2373 syscallarg(int) iovcnt;
2374 syscallarg(off_t) offset;
2375 } */ *uap = v;
2376 struct proc *p = l->l_proc;
2377 struct filedesc *fdp = p->p_fd;
2378 struct file *fp;
2379 struct vnode *vp;
2380 off_t offset;
2381 int error, fd = SCARG(uap, fd);
2382
2383 if ((fp = fd_getfile(fdp, fd)) == NULL)
2384 return (EBADF);
2385
2386 if ((fp->f_flag & FWRITE) == 0) {
2387 simple_unlock(&fp->f_slock);
2388 return (EBADF);
2389 }
2390
2391 FILE_USE(fp);
2392
2393 vp = (struct vnode *)fp->f_data;
2394 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2395 error = ESPIPE;
2396 goto out;
2397 }
2398
2399 offset = SCARG(uap, offset);
2400
2401 /*
2402 * XXX This works because no file systems actually
2403 * XXX take any action on the seek operation.
2404 */
2405 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2406 goto out;
2407
2408 /* dofilewritev() will unuse the descriptor for us */
2409 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2410 &offset, 0, retval));
2411
2412 out:
2413 FILE_UNUSE(fp, l);
2414 return (error);
2415 }
2416
2417 /*
2418 * Check access permissions.
2419 */
2420 int
2421 sys_access(struct lwp *l, void *v, register_t *retval)
2422 {
2423 struct sys_access_args /* {
2424 syscallarg(const char *) path;
2425 syscallarg(int) flags;
2426 } */ *uap = v;
2427 kauth_cred_t cred;
2428 struct vnode *vp;
2429 int error, flags;
2430 struct nameidata nd;
2431
2432 cred = kauth_cred_dup(l->l_cred);
2433 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2434 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2435 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2436 SCARG(uap, path), l);
2437 /* Override default credentials */
2438 nd.ni_cnd.cn_cred = cred;
2439 if ((error = namei(&nd)) != 0)
2440 goto out;
2441 vp = nd.ni_vp;
2442
2443 /* Flags == 0 means only check for existence. */
2444 if (SCARG(uap, flags)) {
2445 flags = 0;
2446 if (SCARG(uap, flags) & R_OK)
2447 flags |= VREAD;
2448 if (SCARG(uap, flags) & W_OK)
2449 flags |= VWRITE;
2450 if (SCARG(uap, flags) & X_OK)
2451 flags |= VEXEC;
2452
2453 error = VOP_ACCESS(vp, flags, cred, l);
2454 if (!error && (flags & VWRITE))
2455 error = vn_writechk(vp);
2456 }
2457 vput(vp);
2458 out:
2459 kauth_cred_free(cred);
2460 return (error);
2461 }
2462
2463 /*
2464 * Get file status; this version follows links.
2465 */
2466 /* ARGSUSED */
2467 int
2468 sys___stat30(struct lwp *l, void *v, register_t *retval)
2469 {
2470 struct sys___stat30_args /* {
2471 syscallarg(const char *) path;
2472 syscallarg(struct stat *) ub;
2473 } */ *uap = v;
2474 struct stat sb;
2475 int error;
2476 struct nameidata nd;
2477
2478 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2479 SCARG(uap, path), l);
2480 if ((error = namei(&nd)) != 0)
2481 return (error);
2482 error = vn_stat(nd.ni_vp, &sb, l);
2483 vput(nd.ni_vp);
2484 if (error)
2485 return (error);
2486 error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2487 return (error);
2488 }
2489
2490 /*
2491 * Get file status; this version does not follow links.
2492 */
2493 /* ARGSUSED */
2494 int
2495 sys___lstat30(struct lwp *l, void *v, register_t *retval)
2496 {
2497 struct sys___lstat30_args /* {
2498 syscallarg(const char *) path;
2499 syscallarg(struct stat *) ub;
2500 } */ *uap = v;
2501 struct stat sb;
2502 int error;
2503 struct nameidata nd;
2504
2505 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2506 SCARG(uap, path), l);
2507 if ((error = namei(&nd)) != 0)
2508 return (error);
2509 error = vn_stat(nd.ni_vp, &sb, l);
2510 vput(nd.ni_vp);
2511 if (error)
2512 return (error);
2513 error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2514 return (error);
2515 }
2516
2517 /*
2518 * Get configurable pathname variables.
2519 */
2520 /* ARGSUSED */
2521 int
2522 sys_pathconf(struct lwp *l, void *v, register_t *retval)
2523 {
2524 struct sys_pathconf_args /* {
2525 syscallarg(const char *) path;
2526 syscallarg(int) name;
2527 } */ *uap = v;
2528 int error;
2529 struct nameidata nd;
2530
2531 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2532 SCARG(uap, path), l);
2533 if ((error = namei(&nd)) != 0)
2534 return (error);
2535 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2536 vput(nd.ni_vp);
2537 return (error);
2538 }
2539
2540 /*
2541 * Return target name of a symbolic link.
2542 */
2543 /* ARGSUSED */
2544 int
2545 sys_readlink(struct lwp *l, void *v, register_t *retval)
2546 {
2547 struct sys_readlink_args /* {
2548 syscallarg(const char *) path;
2549 syscallarg(char *) buf;
2550 syscallarg(size_t) count;
2551 } */ *uap = v;
2552 struct vnode *vp;
2553 struct iovec aiov;
2554 struct uio auio;
2555 int error;
2556 struct nameidata nd;
2557
2558 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2559 SCARG(uap, path), l);
2560 if ((error = namei(&nd)) != 0)
2561 return (error);
2562 vp = nd.ni_vp;
2563 if (vp->v_type != VLNK)
2564 error = EINVAL;
2565 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2566 (error = VOP_ACCESS(vp, VREAD, l->l_cred, l)) == 0) {
2567 aiov.iov_base = SCARG(uap, buf);
2568 aiov.iov_len = SCARG(uap, count);
2569 auio.uio_iov = &aiov;
2570 auio.uio_iovcnt = 1;
2571 auio.uio_offset = 0;
2572 auio.uio_rw = UIO_READ;
2573 KASSERT(l == curlwp);
2574 auio.uio_vmspace = l->l_proc->p_vmspace;
2575 auio.uio_resid = SCARG(uap, count);
2576 error = VOP_READLINK(vp, &auio, l->l_cred);
2577 }
2578 vput(vp);
2579 *retval = SCARG(uap, count) - auio.uio_resid;
2580 return (error);
2581 }
2582
2583 /*
2584 * Change flags of a file given a path name.
2585 */
2586 /* ARGSUSED */
2587 int
2588 sys_chflags(struct lwp *l, void *v, register_t *retval)
2589 {
2590 struct sys_chflags_args /* {
2591 syscallarg(const char *) path;
2592 syscallarg(u_long) flags;
2593 } */ *uap = v;
2594 struct vnode *vp;
2595 int error;
2596 struct nameidata nd;
2597
2598 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2599 if ((error = namei(&nd)) != 0)
2600 return (error);
2601 vp = nd.ni_vp;
2602 error = change_flags(vp, SCARG(uap, flags), l);
2603 vput(vp);
2604 return (error);
2605 }
2606
2607 /*
2608 * Change flags of a file given a file descriptor.
2609 */
2610 /* ARGSUSED */
2611 int
2612 sys_fchflags(struct lwp *l, void *v, register_t *retval)
2613 {
2614 struct sys_fchflags_args /* {
2615 syscallarg(int) fd;
2616 syscallarg(u_long) flags;
2617 } */ *uap = v;
2618 struct proc *p = l->l_proc;
2619 struct vnode *vp;
2620 struct file *fp;
2621 int error;
2622
2623 /* getvnode() will use the descriptor for us */
2624 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2625 return (error);
2626 vp = (struct vnode *)fp->f_data;
2627 error = change_flags(vp, SCARG(uap, flags), l);
2628 VOP_UNLOCK(vp, 0);
2629 FILE_UNUSE(fp, l);
2630 return (error);
2631 }
2632
2633 /*
2634 * Change flags of a file given a path name; this version does
2635 * not follow links.
2636 */
2637 int
2638 sys_lchflags(struct lwp *l, void *v, register_t *retval)
2639 {
2640 struct sys_lchflags_args /* {
2641 syscallarg(const char *) path;
2642 syscallarg(u_long) flags;
2643 } */ *uap = v;
2644 struct vnode *vp;
2645 int error;
2646 struct nameidata nd;
2647
2648 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2649 if ((error = namei(&nd)) != 0)
2650 return (error);
2651 vp = nd.ni_vp;
2652 error = change_flags(vp, SCARG(uap, flags), l);
2653 vput(vp);
2654 return (error);
2655 }
2656
2657 /*
2658 * Common routine to change flags of a file.
2659 */
2660 int
2661 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2662 {
2663 struct mount *mp;
2664 struct vattr vattr;
2665 int error;
2666
2667 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2668 return (error);
2669 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2670 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2671 /*
2672 * Non-superusers cannot change the flags on devices, even if they
2673 * own them.
2674 */
2675 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2676 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2677 goto out;
2678 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2679 error = EINVAL;
2680 goto out;
2681 }
2682 }
2683 VATTR_NULL(&vattr);
2684 vattr.va_flags = flags;
2685 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2686 out:
2687 vn_finished_write(mp, 0);
2688 return (error);
2689 }
2690
2691 /*
2692 * Change mode of a file given path name; this version follows links.
2693 */
2694 /* ARGSUSED */
2695 int
2696 sys_chmod(struct lwp *l, void *v, register_t *retval)
2697 {
2698 struct sys_chmod_args /* {
2699 syscallarg(const char *) path;
2700 syscallarg(int) mode;
2701 } */ *uap = v;
2702 int error;
2703 struct nameidata nd;
2704
2705 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2706 if ((error = namei(&nd)) != 0)
2707 return (error);
2708
2709 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2710
2711 vrele(nd.ni_vp);
2712 return (error);
2713 }
2714
2715 /*
2716 * Change mode of a file given a file descriptor.
2717 */
2718 /* ARGSUSED */
2719 int
2720 sys_fchmod(struct lwp *l, void *v, register_t *retval)
2721 {
2722 struct sys_fchmod_args /* {
2723 syscallarg(int) fd;
2724 syscallarg(int) mode;
2725 } */ *uap = v;
2726 struct proc *p = l->l_proc;
2727 struct file *fp;
2728 int error;
2729
2730 /* getvnode() will use the descriptor for us */
2731 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2732 return (error);
2733
2734 error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), l);
2735 FILE_UNUSE(fp, l);
2736 return (error);
2737 }
2738
2739 /*
2740 * Change mode of a file given path name; this version does not follow links.
2741 */
2742 /* ARGSUSED */
2743 int
2744 sys_lchmod(struct lwp *l, void *v, register_t *retval)
2745 {
2746 struct sys_lchmod_args /* {
2747 syscallarg(const char *) path;
2748 syscallarg(int) mode;
2749 } */ *uap = v;
2750 int error;
2751 struct nameidata nd;
2752
2753 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2754 if ((error = namei(&nd)) != 0)
2755 return (error);
2756
2757 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2758
2759 vrele(nd.ni_vp);
2760 return (error);
2761 }
2762
2763 /*
2764 * Common routine to set mode given a vnode.
2765 */
2766 static int
2767 change_mode(struct vnode *vp, int mode, struct lwp *l)
2768 {
2769 struct mount *mp;
2770 struct vattr vattr;
2771 int error;
2772
2773 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2774 return (error);
2775 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2776 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2777 VATTR_NULL(&vattr);
2778 vattr.va_mode = mode & ALLPERMS;
2779 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2780 VOP_UNLOCK(vp, 0);
2781 vn_finished_write(mp, 0);
2782 return (error);
2783 }
2784
2785 /*
2786 * Set ownership given a path name; this version follows links.
2787 */
2788 /* ARGSUSED */
2789 int
2790 sys_chown(struct lwp *l, void *v, register_t *retval)
2791 {
2792 struct sys_chown_args /* {
2793 syscallarg(const char *) path;
2794 syscallarg(uid_t) uid;
2795 syscallarg(gid_t) gid;
2796 } */ *uap = v;
2797 int error;
2798 struct nameidata nd;
2799
2800 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2801 if ((error = namei(&nd)) != 0)
2802 return (error);
2803
2804 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2805
2806 vrele(nd.ni_vp);
2807 return (error);
2808 }
2809
2810 /*
2811 * Set ownership given a path name; this version follows links.
2812 * Provides POSIX semantics.
2813 */
2814 /* ARGSUSED */
2815 int
2816 sys___posix_chown(struct lwp *l, void *v, register_t *retval)
2817 {
2818 struct sys_chown_args /* {
2819 syscallarg(const char *) path;
2820 syscallarg(uid_t) uid;
2821 syscallarg(gid_t) gid;
2822 } */ *uap = v;
2823 int error;
2824 struct nameidata nd;
2825
2826 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2827 if ((error = namei(&nd)) != 0)
2828 return (error);
2829
2830 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2831
2832 vrele(nd.ni_vp);
2833 return (error);
2834 }
2835
2836 /*
2837 * Set ownership given a file descriptor.
2838 */
2839 /* ARGSUSED */
2840 int
2841 sys_fchown(struct lwp *l, void *v, register_t *retval)
2842 {
2843 struct sys_fchown_args /* {
2844 syscallarg(int) fd;
2845 syscallarg(uid_t) uid;
2846 syscallarg(gid_t) gid;
2847 } */ *uap = v;
2848 struct proc *p = l->l_proc;
2849 int error;
2850 struct file *fp;
2851
2852 /* getvnode() will use the descriptor for us */
2853 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2854 return (error);
2855
2856 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2857 SCARG(uap, gid), l, 0);
2858 FILE_UNUSE(fp, l);
2859 return (error);
2860 }
2861
2862 /*
2863 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2864 */
2865 /* ARGSUSED */
2866 int
2867 sys___posix_fchown(struct lwp *l, void *v, register_t *retval)
2868 {
2869 struct sys_fchown_args /* {
2870 syscallarg(int) fd;
2871 syscallarg(uid_t) uid;
2872 syscallarg(gid_t) gid;
2873 } */ *uap = v;
2874 struct proc *p = l->l_proc;
2875 int error;
2876 struct file *fp;
2877
2878 /* getvnode() will use the descriptor for us */
2879 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2880 return (error);
2881
2882 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2883 SCARG(uap, gid), l, 1);
2884 FILE_UNUSE(fp, l);
2885 return (error);
2886 }
2887
2888 /*
2889 * Set ownership given a path name; this version does not follow links.
2890 */
2891 /* ARGSUSED */
2892 int
2893 sys_lchown(struct lwp *l, void *v, register_t *retval)
2894 {
2895 struct sys_lchown_args /* {
2896 syscallarg(const char *) path;
2897 syscallarg(uid_t) uid;
2898 syscallarg(gid_t) gid;
2899 } */ *uap = v;
2900 int error;
2901 struct nameidata nd;
2902
2903 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2904 if ((error = namei(&nd)) != 0)
2905 return (error);
2906
2907 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2908
2909 vrele(nd.ni_vp);
2910 return (error);
2911 }
2912
2913 /*
2914 * Set ownership given a path name; this version does not follow links.
2915 * Provides POSIX/XPG semantics.
2916 */
2917 /* ARGSUSED */
2918 int
2919 sys___posix_lchown(struct lwp *l, void *v, register_t *retval)
2920 {
2921 struct sys_lchown_args /* {
2922 syscallarg(const char *) path;
2923 syscallarg(uid_t) uid;
2924 syscallarg(gid_t) gid;
2925 } */ *uap = v;
2926 int error;
2927 struct nameidata nd;
2928
2929 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2930 if ((error = namei(&nd)) != 0)
2931 return (error);
2932
2933 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2934
2935 vrele(nd.ni_vp);
2936 return (error);
2937 }
2938
2939 /*
2940 * Common routine to set ownership given a vnode.
2941 */
2942 static int
2943 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2944 int posix_semantics)
2945 {
2946 struct mount *mp;
2947 struct vattr vattr;
2948 mode_t newmode;
2949 int error;
2950
2951 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2952 return (error);
2953 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2954 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2955 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2956 goto out;
2957
2958 #define CHANGED(x) ((int)(x) != -1)
2959 newmode = vattr.va_mode;
2960 if (posix_semantics) {
2961 /*
2962 * POSIX/XPG semantics: if the caller is not the super-user,
2963 * clear set-user-id and set-group-id bits. Both POSIX and
2964 * the XPG consider the behaviour for calls by the super-user
2965 * implementation-defined; we leave the set-user-id and set-
2966 * group-id settings intact in that case.
2967 */
2968 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2969 NULL) != 0)
2970 newmode &= ~(S_ISUID | S_ISGID);
2971 } else {
2972 /*
2973 * NetBSD semantics: when changing owner and/or group,
2974 * clear the respective bit(s).
2975 */
2976 if (CHANGED(uid))
2977 newmode &= ~S_ISUID;
2978 if (CHANGED(gid))
2979 newmode &= ~S_ISGID;
2980 }
2981 /* Update va_mode iff altered. */
2982 if (vattr.va_mode == newmode)
2983 newmode = VNOVAL;
2984
2985 VATTR_NULL(&vattr);
2986 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2987 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2988 vattr.va_mode = newmode;
2989 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2990 #undef CHANGED
2991
2992 out:
2993 VOP_UNLOCK(vp, 0);
2994 vn_finished_write(mp, 0);
2995 return (error);
2996 }
2997
2998 /*
2999 * Set the access and modification times given a path name; this
3000 * version follows links.
3001 */
3002 /* ARGSUSED */
3003 int
3004 sys_utimes(struct lwp *l, void *v, register_t *retval)
3005 {
3006 struct sys_utimes_args /* {
3007 syscallarg(const char *) path;
3008 syscallarg(const struct timeval *) tptr;
3009 } */ *uap = v;
3010 int error;
3011 struct nameidata nd;
3012
3013 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3014 if ((error = namei(&nd)) != 0)
3015 return (error);
3016
3017 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
3018
3019 vrele(nd.ni_vp);
3020 return (error);
3021 }
3022
3023 /*
3024 * Set the access and modification times given a file descriptor.
3025 */
3026 /* ARGSUSED */
3027 int
3028 sys_futimes(struct lwp *l, void *v, register_t *retval)
3029 {
3030 struct sys_futimes_args /* {
3031 syscallarg(int) fd;
3032 syscallarg(const struct timeval *) tptr;
3033 } */ *uap = v;
3034 struct proc *p = l->l_proc;
3035 int error;
3036 struct file *fp;
3037
3038 /* getvnode() will use the descriptor for us */
3039 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3040 return (error);
3041
3042 error = change_utimes((struct vnode *)fp->f_data, SCARG(uap, tptr), l);
3043 FILE_UNUSE(fp, l);
3044 return (error);
3045 }
3046
3047 /*
3048 * Set the access and modification times given a path name; this
3049 * version does not follow links.
3050 */
3051 /* ARGSUSED */
3052 int
3053 sys_lutimes(struct lwp *l, void *v, register_t *retval)
3054 {
3055 struct sys_lutimes_args /* {
3056 syscallarg(const char *) path;
3057 syscallarg(const struct timeval *) tptr;
3058 } */ *uap = v;
3059 int error;
3060 struct nameidata nd;
3061
3062 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3063 if ((error = namei(&nd)) != 0)
3064 return (error);
3065
3066 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
3067
3068 vrele(nd.ni_vp);
3069 return (error);
3070 }
3071
3072 /*
3073 * Common routine to set access and modification times given a vnode.
3074 */
3075 static int
3076 change_utimes(struct vnode *vp, const struct timeval *tptr, struct lwp *l)
3077 {
3078 struct mount *mp;
3079 struct vattr vattr;
3080 int error;
3081
3082 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
3083 return (error);
3084 VATTR_NULL(&vattr);
3085 if (tptr == NULL) {
3086 nanotime(&vattr.va_atime);
3087 vattr.va_mtime = vattr.va_atime;
3088 vattr.va_vaflags |= VA_UTIMES_NULL;
3089 } else {
3090 struct timeval tv[2];
3091
3092 error = copyin(tptr, tv, sizeof(tv));
3093 if (error)
3094 goto out;
3095 TIMEVAL_TO_TIMESPEC(&tv[0], &vattr.va_atime);
3096 TIMEVAL_TO_TIMESPEC(&tv[1], &vattr.va_mtime);
3097 }
3098 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3099 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3100 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
3101 VOP_UNLOCK(vp, 0);
3102 out:
3103 vn_finished_write(mp, 0);
3104 return (error);
3105 }
3106
3107 /*
3108 * Truncate a file given its path name.
3109 */
3110 /* ARGSUSED */
3111 int
3112 sys_truncate(struct lwp *l, void *v, register_t *retval)
3113 {
3114 struct sys_truncate_args /* {
3115 syscallarg(const char *) path;
3116 syscallarg(int) pad;
3117 syscallarg(off_t) length;
3118 } */ *uap = v;
3119 struct vnode *vp;
3120 struct mount *mp;
3121 struct vattr vattr;
3122 int error;
3123 struct nameidata nd;
3124
3125 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3126 if ((error = namei(&nd)) != 0)
3127 return (error);
3128 vp = nd.ni_vp;
3129 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3130 vrele(vp);
3131 return (error);
3132 }
3133 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3134 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3135 if (vp->v_type == VDIR)
3136 error = EISDIR;
3137 else if ((error = vn_writechk(vp)) == 0 &&
3138 (error = VOP_ACCESS(vp, VWRITE, l->l_cred, l)) == 0) {
3139 VATTR_NULL(&vattr);
3140 vattr.va_size = SCARG(uap, length);
3141 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
3142 }
3143 vput(vp);
3144 vn_finished_write(mp, 0);
3145 return (error);
3146 }
3147
3148 /*
3149 * Truncate a file given a file descriptor.
3150 */
3151 /* ARGSUSED */
3152 int
3153 sys_ftruncate(struct lwp *l, void *v, register_t *retval)
3154 {
3155 struct sys_ftruncate_args /* {
3156 syscallarg(int) fd;
3157 syscallarg(int) pad;
3158 syscallarg(off_t) length;
3159 } */ *uap = v;
3160 struct proc *p = l->l_proc;
3161 struct mount *mp;
3162 struct vattr vattr;
3163 struct vnode *vp;
3164 struct file *fp;
3165 int error;
3166
3167 /* getvnode() will use the descriptor for us */
3168 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3169 return (error);
3170 if ((fp->f_flag & FWRITE) == 0) {
3171 error = EINVAL;
3172 goto out;
3173 }
3174 vp = (struct vnode *)fp->f_data;
3175 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3176 FILE_UNUSE(fp, l);
3177 return (error);
3178 }
3179 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3180 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3181 if (vp->v_type == VDIR)
3182 error = EISDIR;
3183 else if ((error = vn_writechk(vp)) == 0) {
3184 VATTR_NULL(&vattr);
3185 vattr.va_size = SCARG(uap, length);
3186 error = VOP_SETATTR(vp, &vattr, fp->f_cred, l);
3187 }
3188 VOP_UNLOCK(vp, 0);
3189 vn_finished_write(mp, 0);
3190 out:
3191 FILE_UNUSE(fp, l);
3192 return (error);
3193 }
3194
3195 /*
3196 * Sync an open file.
3197 */
3198 /* ARGSUSED */
3199 int
3200 sys_fsync(struct lwp *l, void *v, register_t *retval)
3201 {
3202 struct sys_fsync_args /* {
3203 syscallarg(int) fd;
3204 } */ *uap = v;
3205 struct proc *p = l->l_proc;
3206 struct vnode *vp;
3207 struct mount *mp;
3208 struct file *fp;
3209 int error;
3210
3211 /* getvnode() will use the descriptor for us */
3212 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3213 return (error);
3214 vp = (struct vnode *)fp->f_data;
3215 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3216 FILE_UNUSE(fp, l);
3217 return (error);
3218 }
3219 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3220 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, l);
3221 if (error == 0 && bioops.io_fsync != NULL &&
3222 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3223 (*bioops.io_fsync)(vp, 0);
3224 VOP_UNLOCK(vp, 0);
3225 vn_finished_write(mp, 0);
3226 FILE_UNUSE(fp, l);
3227 return (error);
3228 }
3229
3230 /*
3231 * Sync a range of file data. API modeled after that found in AIX.
3232 *
3233 * FDATASYNC indicates that we need only save enough metadata to be able
3234 * to re-read the written data. Note we duplicate AIX's requirement that
3235 * the file be open for writing.
3236 */
3237 /* ARGSUSED */
3238 int
3239 sys_fsync_range(struct lwp *l, void *v, register_t *retval)
3240 {
3241 struct sys_fsync_range_args /* {
3242 syscallarg(int) fd;
3243 syscallarg(int) flags;
3244 syscallarg(off_t) start;
3245 syscallarg(off_t) length;
3246 } */ *uap = v;
3247 struct proc *p = l->l_proc;
3248 struct vnode *vp;
3249 struct file *fp;
3250 int flags, nflags;
3251 off_t s, e, len;
3252 int error;
3253
3254 /* getvnode() will use the descriptor for us */
3255 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3256 return (error);
3257
3258 if ((fp->f_flag & FWRITE) == 0) {
3259 error = EBADF;
3260 goto out;
3261 }
3262
3263 flags = SCARG(uap, flags);
3264 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3265 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3266 error = EINVAL;
3267 goto out;
3268 }
3269 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3270 if (flags & FDATASYNC)
3271 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3272 else
3273 nflags = FSYNC_WAIT;
3274 if (flags & FDISKSYNC)
3275 nflags |= FSYNC_CACHE;
3276
3277 len = SCARG(uap, length);
3278 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3279 if (len) {
3280 s = SCARG(uap, start);
3281 e = s + len;
3282 if (e < s) {
3283 error = EINVAL;
3284 goto out;
3285 }
3286 } else {
3287 e = 0;
3288 s = 0;
3289 }
3290
3291 vp = (struct vnode *)fp->f_data;
3292 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3293 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, l);
3294
3295 if (error == 0 && bioops.io_fsync != NULL &&
3296 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3297 (*bioops.io_fsync)(vp, nflags);
3298
3299 VOP_UNLOCK(vp, 0);
3300 out:
3301 FILE_UNUSE(fp, l);
3302 return (error);
3303 }
3304
3305 /*
3306 * Sync the data of an open file.
3307 */
3308 /* ARGSUSED */
3309 int
3310 sys_fdatasync(struct lwp *l, void *v, register_t *retval)
3311 {
3312 struct sys_fdatasync_args /* {
3313 syscallarg(int) fd;
3314 } */ *uap = v;
3315 struct proc *p = l->l_proc;
3316 struct vnode *vp;
3317 struct file *fp;
3318 int error;
3319
3320 /* getvnode() will use the descriptor for us */
3321 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3322 return (error);
3323 if ((fp->f_flag & FWRITE) == 0) {
3324 FILE_UNUSE(fp, l);
3325 return (EBADF);
3326 }
3327 vp = (struct vnode *)fp->f_data;
3328 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3329 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, l);
3330 VOP_UNLOCK(vp, 0);
3331 FILE_UNUSE(fp, l);
3332 return (error);
3333 }
3334
3335 /*
3336 * Rename files, (standard) BSD semantics frontend.
3337 */
3338 /* ARGSUSED */
3339 int
3340 sys_rename(struct lwp *l, void *v, register_t *retval)
3341 {
3342 struct sys_rename_args /* {
3343 syscallarg(const char *) from;
3344 syscallarg(const char *) to;
3345 } */ *uap = v;
3346
3347 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 0));
3348 }
3349
3350 /*
3351 * Rename files, POSIX semantics frontend.
3352 */
3353 /* ARGSUSED */
3354 int
3355 sys___posix_rename(struct lwp *l, void *v, register_t *retval)
3356 {
3357 struct sys___posix_rename_args /* {
3358 syscallarg(const char *) from;
3359 syscallarg(const char *) to;
3360 } */ *uap = v;
3361
3362 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 1));
3363 }
3364
3365 /*
3366 * Rename files. Source and destination must either both be directories,
3367 * or both not be directories. If target is a directory, it must be empty.
3368 * If `from' and `to' refer to the same object, the value of the `retain'
3369 * argument is used to determine whether `from' will be
3370 *
3371 * (retain == 0) deleted unless `from' and `to' refer to the same
3372 * object in the file system's name space (BSD).
3373 * (retain == 1) always retained (POSIX).
3374 */
3375 static int
3376 rename_files(const char *from, const char *to, struct lwp *l, int retain)
3377 {
3378 struct mount *mp = NULL;
3379 struct vnode *tvp, *fvp, *tdvp;
3380 struct nameidata fromnd, tond;
3381 struct proc *p;
3382 int error;
3383
3384 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART, UIO_USERSPACE,
3385 from, l);
3386 if ((error = namei(&fromnd)) != 0)
3387 return (error);
3388 if (fromnd.ni_dvp != fromnd.ni_vp)
3389 VOP_UNLOCK(fromnd.ni_dvp, 0);
3390 fvp = fromnd.ni_vp;
3391 error = vn_start_write(fvp, &mp, V_WAIT | V_PCATCH);
3392 if (error != 0) {
3393 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3394 vrele(fromnd.ni_dvp);
3395 vrele(fvp);
3396 if (fromnd.ni_startdir)
3397 vrele(fromnd.ni_startdir);
3398 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3399 return (error);
3400 }
3401 NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3402 (fvp->v_type == VDIR ? CREATEDIR : 0), UIO_USERSPACE, to, l);
3403 if ((error = namei(&tond)) != 0) {
3404 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3405 vrele(fromnd.ni_dvp);
3406 vrele(fvp);
3407 goto out1;
3408 }
3409 tdvp = tond.ni_dvp;
3410 tvp = tond.ni_vp;
3411
3412 if (tvp != NULL) {
3413 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3414 error = ENOTDIR;
3415 goto out;
3416 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3417 error = EISDIR;
3418 goto out;
3419 }
3420 }
3421
3422 if (fvp == tdvp)
3423 error = EINVAL;
3424
3425 /*
3426 * Source and destination refer to the same object.
3427 */
3428 if (fvp == tvp) {
3429 if (retain)
3430 error = -1;
3431 else if (fromnd.ni_dvp == tdvp &&
3432 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3433 !memcmp(fromnd.ni_cnd.cn_nameptr,
3434 tond.ni_cnd.cn_nameptr,
3435 fromnd.ni_cnd.cn_namelen))
3436 error = -1;
3437 }
3438
3439 #if NVERIEXEC > 0
3440 if (!error) {
3441 pathname_t frompath = NULL, topath = NULL;
3442
3443 error = pathname_get(fromnd.ni_dirp, fromnd.ni_segflg,
3444 &frompath);
3445 if (!error)
3446 error = pathname_get(tond.ni_dirp, tond.ni_segflg,
3447 &topath);
3448 if (!error)
3449 error = veriexec_renamechk(fvp, pathname_path(frompath),
3450 tvp, pathname_path(topath), l);
3451
3452 pathname_put(frompath);
3453 pathname_put(topath);
3454 }
3455 #endif /* NVERIEXEC > 0 */
3456
3457 out:
3458 p = l->l_proc;
3459 if (!error) {
3460 VOP_LEASE(tdvp, l, l->l_cred, LEASE_WRITE);
3461 if (fromnd.ni_dvp != tdvp)
3462 VOP_LEASE(fromnd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3463 if (tvp) {
3464 VOP_LEASE(tvp, l, l->l_cred, LEASE_WRITE);
3465 }
3466 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3467 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3468 } else {
3469 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3470 if (tdvp == tvp)
3471 vrele(tdvp);
3472 else
3473 vput(tdvp);
3474 if (tvp)
3475 vput(tvp);
3476 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3477 vrele(fromnd.ni_dvp);
3478 vrele(fvp);
3479 }
3480 vrele(tond.ni_startdir);
3481 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3482 out1:
3483 vn_finished_write(mp, 0);
3484 if (fromnd.ni_startdir)
3485 vrele(fromnd.ni_startdir);
3486 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3487 return (error == -1 ? 0 : error);
3488 }
3489
3490 /*
3491 * Make a directory file.
3492 */
3493 /* ARGSUSED */
3494 int
3495 sys_mkdir(struct lwp *l, void *v, register_t *retval)
3496 {
3497 struct sys_mkdir_args /* {
3498 syscallarg(const char *) path;
3499 syscallarg(int) mode;
3500 } */ *uap = v;
3501 struct proc *p = l->l_proc;
3502 struct mount *mp;
3503 struct vnode *vp;
3504 struct vattr vattr;
3505 int error;
3506 struct nameidata nd;
3507
3508 restart:
3509 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR, UIO_USERSPACE,
3510 SCARG(uap, path), l);
3511 if ((error = namei(&nd)) != 0)
3512 return (error);
3513 vp = nd.ni_vp;
3514 if (vp != NULL) {
3515 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3516 if (nd.ni_dvp == vp)
3517 vrele(nd.ni_dvp);
3518 else
3519 vput(nd.ni_dvp);
3520 vrele(vp);
3521 return (EEXIST);
3522 }
3523 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3524 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3525 if (nd.ni_dvp == vp)
3526 vrele(nd.ni_dvp);
3527 else
3528 vput(nd.ni_dvp);
3529 if ((error = vn_start_write(NULL, &mp,
3530 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
3531 return (error);
3532 goto restart;
3533 }
3534 VATTR_NULL(&vattr);
3535 vattr.va_type = VDIR;
3536 vattr.va_mode =
3537 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3538 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3539 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3540 if (!error)
3541 vput(nd.ni_vp);
3542 vn_finished_write(mp, 0);
3543 return (error);
3544 }
3545
3546 /*
3547 * Remove a directory file.
3548 */
3549 /* ARGSUSED */
3550 int
3551 sys_rmdir(struct lwp *l, void *v, register_t *retval)
3552 {
3553 struct sys_rmdir_args /* {
3554 syscallarg(const char *) path;
3555 } */ *uap = v;
3556 struct mount *mp;
3557 struct vnode *vp;
3558 int error;
3559 struct nameidata nd;
3560
3561 restart:
3562 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3563 SCARG(uap, path), l);
3564 if ((error = namei(&nd)) != 0)
3565 return (error);
3566 vp = nd.ni_vp;
3567 if (vp->v_type != VDIR) {
3568 error = ENOTDIR;
3569 goto out;
3570 }
3571 /*
3572 * No rmdir "." please.
3573 */
3574 if (nd.ni_dvp == vp) {
3575 error = EINVAL;
3576 goto out;
3577 }
3578 /*
3579 * The root of a mounted filesystem cannot be deleted.
3580 */
3581 if (vp->v_flag & VROOT) {
3582 error = EBUSY;
3583 goto out;
3584 }
3585 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3586 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3587 if (nd.ni_dvp == vp)
3588 vrele(nd.ni_dvp);
3589 else
3590 vput(nd.ni_dvp);
3591 vput(vp);
3592 if ((error = vn_start_write(NULL, &mp,
3593 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
3594 return (error);
3595 goto restart;
3596 }
3597 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3598 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3599 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3600 vn_finished_write(mp, 0);
3601 return (error);
3602
3603 out:
3604 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3605 if (nd.ni_dvp == vp)
3606 vrele(nd.ni_dvp);
3607 else
3608 vput(nd.ni_dvp);
3609 vput(vp);
3610 return (error);
3611 }
3612
3613 /*
3614 * Read a block of directory entries in a file system independent format.
3615 */
3616 int
3617 sys___getdents30(struct lwp *l, void *v, register_t *retval)
3618 {
3619 struct sys___getdents30_args /* {
3620 syscallarg(int) fd;
3621 syscallarg(char *) buf;
3622 syscallarg(size_t) count;
3623 } */ *uap = v;
3624 struct proc *p = l->l_proc;
3625 struct file *fp;
3626 int error, done;
3627
3628 /* getvnode() will use the descriptor for us */
3629 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3630 return (error);
3631 if ((fp->f_flag & FREAD) == 0) {
3632 error = EBADF;
3633 goto out;
3634 }
3635 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3636 SCARG(uap, count), &done, l, 0, 0);
3637 #ifdef KTRACE
3638 if (!error && KTRPOINT(p, KTR_GENIO)) {
3639 struct iovec iov;
3640 iov.iov_base = SCARG(uap, buf);
3641 iov.iov_len = done;
3642 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, done, 0);
3643 }
3644 #endif
3645 *retval = done;
3646 out:
3647 FILE_UNUSE(fp, l);
3648 return (error);
3649 }
3650
3651 /*
3652 * Set the mode mask for creation of filesystem nodes.
3653 */
3654 int
3655 sys_umask(struct lwp *l, void *v, register_t *retval)
3656 {
3657 struct sys_umask_args /* {
3658 syscallarg(mode_t) newmask;
3659 } */ *uap = v;
3660 struct proc *p = l->l_proc;
3661 struct cwdinfo *cwdi;
3662
3663 cwdi = p->p_cwdi;
3664 *retval = cwdi->cwdi_cmask;
3665 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3666 return (0);
3667 }
3668
3669 /*
3670 * Void all references to file by ripping underlying filesystem
3671 * away from vnode.
3672 */
3673 /* ARGSUSED */
3674 int
3675 sys_revoke(struct lwp *l, void *v, register_t *retval)
3676 {
3677 struct sys_revoke_args /* {
3678 syscallarg(const char *) path;
3679 } */ *uap = v;
3680 struct mount *mp;
3681 struct vnode *vp;
3682 struct vattr vattr;
3683 int error;
3684 struct nameidata nd;
3685
3686 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3687 if ((error = namei(&nd)) != 0)
3688 return (error);
3689 vp = nd.ni_vp;
3690 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
3691 goto out;
3692 if (kauth_cred_geteuid(l->l_cred) != vattr.va_uid &&
3693 (error = kauth_authorize_generic(l->l_cred,
3694 KAUTH_GENERIC_ISSUSER, NULL)) != 0)
3695 goto out;
3696 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
3697 goto out;
3698 if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER)))
3699 VOP_REVOKE(vp, REVOKEALL);
3700 vn_finished_write(mp, 0);
3701 out:
3702 vrele(vp);
3703 return (error);
3704 }
3705
3706 /*
3707 * Convert a user file descriptor to a kernel file entry.
3708 */
3709 int
3710 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
3711 {
3712 struct vnode *vp;
3713 struct file *fp;
3714
3715 if ((fp = fd_getfile(fdp, fd)) == NULL)
3716 return (EBADF);
3717
3718 FILE_USE(fp);
3719
3720 if (fp->f_type != DTYPE_VNODE) {
3721 FILE_UNUSE(fp, NULL);
3722 return (EINVAL);
3723 }
3724
3725 vp = (struct vnode *)fp->f_data;
3726 if (vp->v_type == VBAD) {
3727 FILE_UNUSE(fp, NULL);
3728 return (EBADF);
3729 }
3730
3731 *fpp = fp;
3732 return (0);
3733 }
3734