vfs_syscalls.c revision 1.322.2.1 1 /* $NetBSD: vfs_syscalls.c,v 1.322.2.1 2007/08/15 13:49:24 skrll Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.322.2.1 2007/08/15 13:49:24 skrll Exp $");
41
42 #include "opt_compat_netbsd.h"
43 #include "opt_compat_43.h"
44 #include "opt_fileassoc.h"
45 #include "opt_ktrace.h"
46 #include "fss.h"
47 #include "veriexec.h"
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/namei.h>
52 #include <sys/filedesc.h>
53 #include <sys/kernel.h>
54 #include <sys/file.h>
55 #include <sys/stat.h>
56 #include <sys/vnode.h>
57 #include <sys/mount.h>
58 #include <sys/proc.h>
59 #include <sys/uio.h>
60 #include <sys/malloc.h>
61 #include <sys/kmem.h>
62 #include <sys/dirent.h>
63 #include <sys/sysctl.h>
64 #include <sys/syscallargs.h>
65 #include <sys/vfs_syscalls.h>
66 #ifdef KTRACE
67 #include <sys/ktrace.h>
68 #endif
69 #ifdef FILEASSOC
70 #include <sys/fileassoc.h>
71 #endif /* FILEASSOC */
72 #include <sys/verified_exec.h>
73 #include <sys/kauth.h>
74
75 #include <miscfs/genfs/genfs.h>
76 #include <miscfs/syncfs/syncfs.h>
77
78 #ifdef COMPAT_30
79 #include "opt_nfsserver.h"
80 #include <nfs/rpcv2.h>
81 #endif
82 #include <nfs/nfsproto.h>
83 #ifdef COMPAT_30
84 #include <nfs/nfs.h>
85 #include <nfs/nfs_var.h>
86 #endif
87
88 #if NFSS > 0
89 #include <dev/fssvar.h>
90 #endif
91
92 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
93
94 static int change_dir(struct nameidata *, struct lwp *);
95 static int change_flags(struct vnode *, u_long, struct lwp *);
96 static int change_mode(struct vnode *, int, struct lwp *l);
97 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
98 static int rename_files(const char *, const char *, struct lwp *, int);
99
100 void checkdirs(struct vnode *);
101
102 int dovfsusermount = 0;
103
104 /*
105 * Virtual File System System Calls
106 */
107
108 /*
109 * Mount a file system.
110 */
111
112 #if defined(COMPAT_09) || defined(COMPAT_43)
113 /*
114 * This table is used to maintain compatibility with 4.3BSD
115 * and NetBSD 0.9 mount syscalls. Note, the order is important!
116 *
117 * Do not modify this table. It should only contain filesystems
118 * supported by NetBSD 0.9 and 4.3BSD.
119 */
120 const char * const mountcompatnames[] = {
121 NULL, /* 0 = MOUNT_NONE */
122 MOUNT_FFS, /* 1 = MOUNT_UFS */
123 MOUNT_NFS, /* 2 */
124 MOUNT_MFS, /* 3 */
125 MOUNT_MSDOS, /* 4 */
126 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
127 MOUNT_FDESC, /* 6 */
128 MOUNT_KERNFS, /* 7 */
129 NULL, /* 8 = MOUNT_DEVFS */
130 MOUNT_AFS, /* 9 */
131 };
132 const int nmountcompatnames = sizeof(mountcompatnames) /
133 sizeof(mountcompatnames[0]);
134 #endif /* COMPAT_09 || COMPAT_43 */
135
136 static int
137 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
138 void *data, size_t *data_len)
139 {
140 struct mount *mp;
141 int error = 0, saved_flags;
142
143 mp = vp->v_mount;
144 saved_flags = mp->mnt_flag;
145
146 /* We can operate only on VROOT nodes. */
147 if ((vp->v_flag & VROOT) == 0)
148 return EINVAL;
149
150 /*
151 * We only allow the filesystem to be reloaded if it
152 * is currently mounted read-only.
153 */
154 if (flags & MNT_RELOAD && !(mp->mnt_flag & MNT_RDONLY))
155 return EOPNOTSUPP; /* Needs translation */
156
157 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
158 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
159 if (error)
160 return error;
161
162 if (vfs_busy(mp, LK_NOWAIT, 0))
163 return EPERM;
164
165 mp->mnt_flag &= ~MNT_OP_FLAGS;
166 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
167
168 /*
169 * Set the mount level flags.
170 */
171 if (flags & MNT_RDONLY)
172 mp->mnt_flag |= MNT_RDONLY;
173 else if (mp->mnt_flag & MNT_RDONLY)
174 mp->mnt_iflag |= IMNT_WANTRDWR;
175 mp->mnt_flag &=
176 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
177 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
178 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
179 mp->mnt_flag |= flags &
180 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
181 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
182 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
183 MNT_IGNORE);
184
185 error = VFS_MOUNT(mp, path, data, data_len, l);
186
187 #if defined(COMPAT_30) && defined(NFSSERVER)
188 if (error && data != NULL) {
189 int error2;
190
191 /* Update failed; let's try and see if it was an
192 * export request. */
193 error2 = nfs_update_exports_30(mp, path, data, l);
194
195 /* Only update error code if the export request was
196 * understood but some problem occurred while
197 * processing it. */
198 if (error2 != EJUSTRETURN)
199 error = error2;
200 }
201 #endif
202 if (mp->mnt_iflag & IMNT_WANTRDWR)
203 mp->mnt_flag &= ~MNT_RDONLY;
204 if (error)
205 mp->mnt_flag = saved_flags;
206 mp->mnt_flag &= ~MNT_OP_FLAGS;
207 mp->mnt_iflag &= ~IMNT_WANTRDWR;
208 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
209 if (mp->mnt_syncer == NULL)
210 error = vfs_allocate_syncvnode(mp);
211 } else {
212 if (mp->mnt_syncer != NULL)
213 vfs_deallocate_syncvnode(mp);
214 }
215 vfs_unbusy(mp);
216
217 return (error);
218 }
219
220 static int
221 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
222 {
223 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
224 int error;
225
226 /* Copy file-system type from userspace. */
227 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
228 if (error) {
229 #if defined(COMPAT_09) || defined(COMPAT_43)
230 /*
231 * Historically, filesystem types were identified by numbers.
232 * If we get an integer for the filesystem type instead of a
233 * string, we check to see if it matches one of the historic
234 * filesystem types.
235 */
236 u_long fsindex = (u_long)fstype;
237 if (fsindex >= nmountcompatnames ||
238 mountcompatnames[fsindex] == NULL)
239 return ENODEV;
240 strlcpy(fstypename, mountcompatnames[fsindex], sizeof(fstypename));
241 #else
242 return error;
243 #endif
244 }
245
246 #ifdef COMPAT_10
247 /* Accept `ufs' as an alias for `ffs'. */
248 if (strcmp(fstypename, "ufs") == 0)
249 fstypename[0] = 'f';
250 #endif
251
252 if ((*vfsops = vfs_getopsbyname(fstypename)) == NULL)
253 return ENODEV;
254 return 0;
255 }
256
257 static int
258 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops,
259 const char *path, int flags, void *data, size_t *data_len)
260 {
261 struct mount *mp = NULL;
262 struct vnode *vp = *vpp;
263 struct vattr va;
264 int error;
265
266 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
267 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
268 if (error)
269 return error;
270
271 /* Can't make a non-dir a mount-point (from here anyway). */
272 if (vp->v_type != VDIR)
273 return ENOTDIR;
274
275 /*
276 * If the user is not root, ensure that they own the directory
277 * onto which we are attempting to mount.
278 */
279 if ((error = VOP_GETATTR(vp, &va, l->l_cred, l)) != 0 ||
280 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
281 (error = kauth_authorize_generic(l->l_cred,
282 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) {
283 return error;
284 }
285
286 if (flags & MNT_EXPORTED)
287 return EINVAL;
288
289 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0)
290 return error;
291
292 /*
293 * Check if a file-system is not already mounted on this vnode.
294 */
295 if (vp->v_mountedhere != NULL)
296 return EBUSY;
297
298 mp = malloc(sizeof(*mp), M_MOUNT, M_WAITOK|M_ZERO);
299
300 mp->mnt_op = vfsops;
301
302 TAILQ_INIT(&mp->mnt_vnodelist);
303 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
304 simple_lock_init(&mp->mnt_slock);
305 (void)vfs_busy(mp, LK_NOWAIT, 0);
306
307 mp->mnt_op->vfs_refcount++;
308 mp->mnt_vnodecovered = vp;
309 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
310 mp->mnt_unmounter = NULL;
311 mount_initspecific(mp);
312
313 /*
314 * The underlying file system may refuse the mount for
315 * various reasons. Allow the user to force it to happen.
316 *
317 * Set the mount level flags.
318 */
319 mp->mnt_flag = flags &
320 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
321 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
322 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
323 MNT_IGNORE | MNT_RDONLY);
324
325 error = VFS_MOUNT(mp, path, data, data_len, l);
326 mp->mnt_flag &= ~MNT_OP_FLAGS;
327
328 /*
329 * Put the new filesystem on the mount list after root.
330 */
331 cache_purge(vp);
332 if (error != 0) {
333 vp->v_mountedhere = NULL;
334 mp->mnt_op->vfs_refcount--;
335 vfs_unbusy(mp);
336 free(mp, M_MOUNT);
337 return error;
338 }
339
340 mp->mnt_iflag &= ~IMNT_WANTRDWR;
341 vp->v_mountedhere = mp;
342 simple_lock(&mountlist_slock);
343 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
344 simple_unlock(&mountlist_slock);
345 VOP_UNLOCK(vp, 0);
346 checkdirs(vp);
347 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
348 error = vfs_allocate_syncvnode(mp);
349 vfs_unbusy(mp);
350 (void) VFS_STATVFS(mp, &mp->mnt_stat, l);
351 error = VFS_START(mp, 0, l);
352 if (error)
353 vrele(vp);
354 *vpp = NULL;
355 return error;
356 }
357
358 static int
359 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
360 void *data, size_t *data_len)
361 {
362 struct mount *mp;
363 int error;
364
365 /* If MNT_GETARGS is specified, it should be the only flag. */
366 if (flags & ~MNT_GETARGS)
367 return EINVAL;
368
369 mp = vp->v_mount;
370
371 /* XXX: probably some notion of "can see" here if we want isolation. */
372 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
373 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
374 if (error)
375 return error;
376
377 if ((vp->v_flag & VROOT) == 0)
378 return EINVAL;
379
380 if (vfs_busy(mp, LK_NOWAIT, 0))
381 return EPERM;
382
383 mp->mnt_flag &= ~MNT_OP_FLAGS;
384 mp->mnt_flag |= MNT_GETARGS;
385 error = VFS_MOUNT(mp, path, data, data_len, l);
386 mp->mnt_flag &= ~MNT_OP_FLAGS;
387
388 vfs_unbusy(mp);
389 return (error);
390 }
391
392 #ifdef COMPAT_40
393 /* ARGSUSED */
394 int
395 compat_40_sys_mount(struct lwp *l, void *v, register_t *retval)
396 {
397 struct compat_40_sys_mount_args /* {
398 syscallarg(const char *) type;
399 syscallarg(const char *) path;
400 syscallarg(int) flags;
401 syscallarg(void *) data;
402 } */ *uap = v;
403 register_t dummy;
404
405 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
406 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 0, &dummy);
407 }
408 #endif
409
410 int
411 sys___mount50(struct lwp *l, void *v, register_t *retval)
412 {
413 struct sys___mount50_args /* {
414 syscallarg(const char *) type;
415 syscallarg(const char *) path;
416 syscallarg(int) flags;
417 syscallarg(void *) data;
418 syscallarg(size_t) data_len;
419 } */ *uap = v;
420
421 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
422 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
423 SCARG(uap, data_len), retval);
424 }
425
426 int
427 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
428 const char *path, int flags, void *data, enum uio_seg data_seg,
429 size_t data_len, register_t *retval)
430 {
431 struct vnode *vp;
432 struct nameidata nd;
433 void *data_buf = data;
434 int error;
435
436 /*
437 * Get vnode to be covered
438 */
439 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path, l);
440 if ((error = namei(&nd)) != 0)
441 return (error);
442 vp = nd.ni_vp;
443
444 /*
445 * A lookup in VFS_MOUNT might result in an attempt to
446 * lock this vnode again, so make the lock recursive.
447 */
448 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
449
450 if (vfsops == NULL) {
451 if (flags & (MNT_GETARGS | MNT_UPDATE))
452 vfsops = vp->v_mount->mnt_op;
453 else {
454 /* 'type' is userspace */
455 error = mount_get_vfsops(type, &vfsops);
456 if (error != 0)
457 goto done;
458 }
459 }
460
461 if (data != NULL && data_seg == UIO_USERSPACE) {
462 if (data_len == 0) {
463 /* No length supplied, use default for filesystem */
464 data_len = vfsops->vfs_min_mount_data;
465 if (data_len > VFS_MAX_MOUNT_DATA) {
466 /* maybe a force loaded old LKM */
467 error = EINVAL;
468 goto done;
469 }
470 #ifdef COMPAT_30
471 /* Hopefully a longer buffer won't make copyin() fail */
472 if (flags & MNT_UPDATE
473 && data_len < sizeof (struct mnt_export_args30))
474 data_len = sizeof (struct mnt_export_args30);
475 #endif
476 }
477 data_buf = malloc(data_len, M_TEMP, M_WAITOK);
478
479 /* NFS needs the buffer even for mnt_getargs .... */
480 error = copyin(data, data_buf, data_len);
481 if (error != 0)
482 goto done;
483 }
484
485 if (flags & MNT_GETARGS) {
486 if (data_len == 0) {
487 error = EINVAL;
488 goto done;
489 }
490 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
491 if (error != 0)
492 goto done;
493 if (data_seg == UIO_USERSPACE)
494 error = copyout(data_buf, data, data_len);
495 *retval = data_len;
496 } else if (flags & MNT_UPDATE) {
497 error = mount_update(l, vp, path, flags, data_buf, &data_len);
498 } else {
499 /* Locking is handled internally in mount_domount(). */
500 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
501 &data_len);
502 }
503
504 done:
505 if (vp)
506 vput(vp);
507 if (data_buf != data)
508 free(data_buf, M_TEMP);
509 return (error);
510 }
511
512 /*
513 * Scan all active processes to see if any of them have a current
514 * or root directory onto which the new filesystem has just been
515 * mounted. If so, replace them with the new mount point.
516 */
517 void
518 checkdirs(struct vnode *olddp)
519 {
520 struct cwdinfo *cwdi;
521 struct vnode *newdp;
522 struct proc *p;
523
524 if (olddp->v_usecount == 1)
525 return;
526 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
527 panic("mount: lost mount");
528 mutex_enter(&proclist_lock);
529 PROCLIST_FOREACH(p, &allproc) {
530 cwdi = p->p_cwdi;
531 if (!cwdi)
532 continue;
533 if (cwdi->cwdi_cdir == olddp) {
534 vrele(cwdi->cwdi_cdir);
535 VREF(newdp);
536 cwdi->cwdi_cdir = newdp;
537 }
538 if (cwdi->cwdi_rdir == olddp) {
539 vrele(cwdi->cwdi_rdir);
540 VREF(newdp);
541 cwdi->cwdi_rdir = newdp;
542 }
543 }
544 mutex_exit(&proclist_lock);
545 if (rootvnode == olddp) {
546 vrele(rootvnode);
547 VREF(newdp);
548 rootvnode = newdp;
549 }
550 vput(newdp);
551 }
552
553 /*
554 * Unmount a file system.
555 *
556 * Note: unmount takes a path to the vnode mounted on as argument,
557 * not special file (as before).
558 */
559 /* ARGSUSED */
560 int
561 sys_unmount(struct lwp *l, void *v, register_t *retval)
562 {
563 struct sys_unmount_args /* {
564 syscallarg(const char *) path;
565 syscallarg(int) flags;
566 } */ *uap = v;
567 struct vnode *vp;
568 struct mount *mp;
569 int error;
570 struct nameidata nd;
571
572 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
573 SCARG(uap, path), l);
574 if ((error = namei(&nd)) != 0)
575 return (error);
576 vp = nd.ni_vp;
577 mp = vp->v_mount;
578
579 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
580 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
581 if (error) {
582 vput(vp);
583 return (error);
584 }
585
586 /*
587 * Don't allow unmounting the root file system.
588 */
589 if (mp->mnt_flag & MNT_ROOTFS) {
590 vput(vp);
591 return (EINVAL);
592 }
593
594 /*
595 * Must be the root of the filesystem
596 */
597 if ((vp->v_flag & VROOT) == 0) {
598 vput(vp);
599 return (EINVAL);
600 }
601 vput(vp);
602
603 /*
604 * XXX Freeze syncer. Must do this before locking the
605 * mount point. See dounmount() for details.
606 */
607 mutex_enter(&syncer_mutex);
608
609 if (vfs_busy(mp, 0, 0)) {
610 mutex_exit(&syncer_mutex);
611 return (EBUSY);
612 }
613
614 return (dounmount(mp, SCARG(uap, flags), l));
615 }
616
617 /*
618 * Do the actual file system unmount. File system is assumed to have been
619 * marked busy by the caller.
620 */
621 int
622 dounmount(struct mount *mp, int flags, struct lwp *l)
623 {
624 struct vnode *coveredvp;
625 int error;
626 int async;
627 int used_syncer;
628
629 #if NVERIEXEC > 0
630 error = veriexec_unmountchk(mp);
631 if (error)
632 return (error);
633 #endif /* NVERIEXEC > 0 */
634
635 simple_lock(&mountlist_slock);
636 vfs_unbusy(mp);
637 used_syncer = (mp->mnt_syncer != NULL);
638
639 /*
640 * XXX Syncer must be frozen when we get here. This should really
641 * be done on a per-mountpoint basis, but especially the softdep
642 * code possibly called from the syncer doesn't exactly work on a
643 * per-mountpoint basis, so the softdep code would become a maze
644 * of vfs_busy() calls.
645 *
646 * The caller of dounmount() must acquire syncer_mutex because
647 * the syncer itself acquires locks in syncer_mutex -> vfs_busy
648 * order, and we must preserve that order to avoid deadlock.
649 *
650 * So, if the file system did not use the syncer, now is
651 * the time to release the syncer_mutex.
652 */
653 if (used_syncer == 0)
654 mutex_exit(&syncer_mutex);
655
656 mp->mnt_iflag |= IMNT_UNMOUNT;
657 mp->mnt_unmounter = l;
658 lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock);
659
660 async = mp->mnt_flag & MNT_ASYNC;
661 mp->mnt_flag &= ~MNT_ASYNC;
662 cache_purgevfs(mp); /* remove cache entries for this file sys */
663 if (mp->mnt_syncer != NULL)
664 vfs_deallocate_syncvnode(mp);
665 error = 0;
666 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
667 #if NFSS > 0
668 error = fss_umount_hook(mp, (flags & MNT_FORCE));
669 #endif
670 if (error == 0)
671 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred, l);
672 }
673 if (error == 0 || (flags & MNT_FORCE))
674 error = VFS_UNMOUNT(mp, flags, l);
675 if (error) {
676 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
677 (void) vfs_allocate_syncvnode(mp);
678 simple_lock(&mountlist_slock);
679 mp->mnt_iflag &= ~IMNT_UNMOUNT;
680 mp->mnt_unmounter = NULL;
681 mp->mnt_flag |= async;
682 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
683 &mountlist_slock);
684 if (used_syncer)
685 mutex_exit(&syncer_mutex);
686 simple_lock(&mp->mnt_slock);
687 while (mp->mnt_wcnt > 0) {
688 wakeup(mp);
689 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1",
690 0, &mp->mnt_slock);
691 }
692 simple_unlock(&mp->mnt_slock);
693 return (error);
694 }
695 simple_lock(&mountlist_slock);
696 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
697 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
698 coveredvp->v_mountedhere = NULL;
699 mp->mnt_op->vfs_refcount--;
700 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
701 panic("unmount: dangling vnode");
702 mp->mnt_iflag |= IMNT_GONE;
703 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock);
704 if (coveredvp != NULLVP)
705 vrele(coveredvp);
706 mount_finispecific(mp);
707 if (used_syncer)
708 mutex_exit(&syncer_mutex);
709 simple_lock(&mp->mnt_slock);
710 while (mp->mnt_wcnt > 0) {
711 wakeup(mp);
712 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock);
713 }
714 simple_unlock(&mp->mnt_slock);
715 vfs_hooks_unmount(mp);
716 free(mp, M_MOUNT);
717 return (0);
718 }
719
720 /*
721 * Sync each mounted filesystem.
722 */
723 #ifdef DEBUG
724 int syncprt = 0;
725 struct ctldebug debug0 = { "syncprt", &syncprt };
726 #endif
727
728 /* ARGSUSED */
729 int
730 sys_sync(struct lwp *l, void *v, register_t *retval)
731 {
732 struct mount *mp, *nmp;
733 int asyncflag;
734
735 if (l == NULL)
736 l = &lwp0;
737
738 simple_lock(&mountlist_slock);
739 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
740 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
741 nmp = mp->mnt_list.cqe_prev;
742 continue;
743 }
744 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
745 asyncflag = mp->mnt_flag & MNT_ASYNC;
746 mp->mnt_flag &= ~MNT_ASYNC;
747 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred, l);
748 if (asyncflag)
749 mp->mnt_flag |= MNT_ASYNC;
750 }
751 simple_lock(&mountlist_slock);
752 nmp = mp->mnt_list.cqe_prev;
753 vfs_unbusy(mp);
754
755 }
756 simple_unlock(&mountlist_slock);
757 #ifdef DEBUG
758 if (syncprt)
759 vfs_bufstats();
760 #endif /* DEBUG */
761 return (0);
762 }
763
764 /*
765 * Change filesystem quotas.
766 */
767 /* ARGSUSED */
768 int
769 sys_quotactl(struct lwp *l, void *v, register_t *retval)
770 {
771 struct sys_quotactl_args /* {
772 syscallarg(const char *) path;
773 syscallarg(int) cmd;
774 syscallarg(int) uid;
775 syscallarg(void *) arg;
776 } */ *uap = v;
777 struct mount *mp;
778 int error;
779 struct nameidata nd;
780
781 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
782 if ((error = namei(&nd)) != 0)
783 return (error);
784 mp = nd.ni_vp->v_mount;
785 vrele(nd.ni_vp);
786 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
787 SCARG(uap, arg), l);
788 return (error);
789 }
790
791 int
792 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
793 int root)
794 {
795 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
796 int error = 0;
797
798 /*
799 * If MNT_NOWAIT or MNT_LAZY is specified, do not
800 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
801 * overrides MNT_NOWAIT.
802 */
803 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
804 (flags != MNT_WAIT && flags != 0)) {
805 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
806 goto done;
807 }
808
809 /* Get the filesystem stats now */
810 memset(sp, 0, sizeof(*sp));
811 if ((error = VFS_STATVFS(mp, sp, l)) != 0) {
812 return error;
813 }
814
815 if (cwdi->cwdi_rdir == NULL)
816 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
817 done:
818 if (cwdi->cwdi_rdir != NULL) {
819 size_t len;
820 char *bp;
821 char *path = PNBUF_GET();
822
823 bp = path + MAXPATHLEN;
824 *--bp = '\0';
825 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
826 MAXPATHLEN / 2, 0, l);
827 if (error) {
828 PNBUF_PUT(path);
829 return error;
830 }
831 len = strlen(bp);
832 /*
833 * for mount points that are below our root, we can see
834 * them, so we fix up the pathname and return them. The
835 * rest we cannot see, so we don't allow viewing the
836 * data.
837 */
838 if (strncmp(bp, sp->f_mntonname, len) == 0) {
839 strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
840 sizeof(sp->f_mntonname));
841 if (sp->f_mntonname[0] == '\0')
842 (void)strlcpy(sp->f_mntonname, "/",
843 sizeof(sp->f_mntonname));
844 } else {
845 if (root)
846 (void)strlcpy(sp->f_mntonname, "/",
847 sizeof(sp->f_mntonname));
848 else
849 error = EPERM;
850 }
851 PNBUF_PUT(path);
852 }
853 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
854 return error;
855 }
856
857 /*
858 * Get filesystem statistics by path.
859 */
860 int
861 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
862 {
863 struct mount *mp;
864 int error;
865 struct nameidata nd;
866
867 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path, l);
868 if ((error = namei(&nd)) != 0)
869 return error;
870 mp = nd.ni_vp->v_mount;
871 error = dostatvfs(mp, sb, l, flags, 1);
872 vrele(nd.ni_vp);
873 return error;
874 }
875
876 /* ARGSUSED */
877 int
878 sys_statvfs1(struct lwp *l, void *v, register_t *retval)
879 {
880 struct sys_statvfs1_args /* {
881 syscallarg(const char *) path;
882 syscallarg(struct statvfs *) buf;
883 syscallarg(int) flags;
884 } */ *uap = v;
885 struct statvfs *sb;
886 int error;
887
888 sb = STATVFSBUF_GET();
889 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
890 if (error == 0)
891 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
892 STATVFSBUF_PUT(sb);
893 return error;
894 }
895
896 /*
897 * Get filesystem statistics by fd.
898 */
899 int
900 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
901 {
902 struct proc *p = l->l_proc;
903 struct file *fp;
904 struct mount *mp;
905 int error;
906
907 /* getvnode() will use the descriptor for us */
908 if ((error = getvnode(p->p_fd, fd, &fp)) != 0)
909 return (error);
910 mp = ((struct vnode *)fp->f_data)->v_mount;
911 error = dostatvfs(mp, sb, l, flags, 1);
912 FILE_UNUSE(fp, l);
913 return error;
914 }
915
916 /* ARGSUSED */
917 int
918 sys_fstatvfs1(struct lwp *l, void *v, register_t *retval)
919 {
920 struct sys_fstatvfs1_args /* {
921 syscallarg(int) fd;
922 syscallarg(struct statvfs *) buf;
923 syscallarg(int) flags;
924 } */ *uap = v;
925 struct statvfs *sb;
926 int error;
927
928 sb = STATVFSBUF_GET();
929 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
930 if (error == 0)
931 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
932 STATVFSBUF_PUT(sb);
933 return error;
934 }
935
936
937 /*
938 * Get statistics on all filesystems.
939 */
940 int
941 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
942 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
943 register_t *retval)
944 {
945 int root = 0;
946 struct proc *p = l->l_proc;
947 struct mount *mp, *nmp;
948 struct statvfs *sb;
949 size_t count, maxcount;
950 int error = 0;
951
952 sb = STATVFSBUF_GET();
953 maxcount = bufsize / entry_sz;
954 simple_lock(&mountlist_slock);
955 count = 0;
956 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
957 mp = nmp) {
958 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
959 nmp = CIRCLEQ_NEXT(mp, mnt_list);
960 continue;
961 }
962 if (sfsp && count < maxcount) {
963 error = dostatvfs(mp, sb, l, flags, 0);
964 if (error) {
965 simple_lock(&mountlist_slock);
966 nmp = CIRCLEQ_NEXT(mp, mnt_list);
967 vfs_unbusy(mp);
968 continue;
969 }
970 error = copyfn(sb, sfsp, entry_sz);
971 if (error) {
972 vfs_unbusy(mp);
973 goto out;
974 }
975 sfsp = (char *)sfsp + entry_sz;
976 root |= strcmp(sb->f_mntonname, "/") == 0;
977 }
978 count++;
979 simple_lock(&mountlist_slock);
980 nmp = CIRCLEQ_NEXT(mp, mnt_list);
981 vfs_unbusy(mp);
982 }
983 simple_unlock(&mountlist_slock);
984 if (root == 0 && p->p_cwdi->cwdi_rdir) {
985 /*
986 * fake a root entry
987 */
988 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, sb, l, flags, 1);
989 if (error != 0)
990 goto out;
991 if (sfsp)
992 error = copyfn(sb, sfsp, entry_sz);
993 count++;
994 }
995 if (sfsp && count > maxcount)
996 *retval = maxcount;
997 else
998 *retval = count;
999 out:
1000 STATVFSBUF_PUT(sb);
1001 return error;
1002 }
1003
1004 int
1005 sys_getvfsstat(struct lwp *l, void *v, register_t *retval)
1006 {
1007 struct sys_getvfsstat_args /* {
1008 syscallarg(struct statvfs *) buf;
1009 syscallarg(size_t) bufsize;
1010 syscallarg(int) flags;
1011 } */ *uap = v;
1012
1013 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1014 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1015 }
1016
1017 /*
1018 * Change current working directory to a given file descriptor.
1019 */
1020 /* ARGSUSED */
1021 int
1022 sys_fchdir(struct lwp *l, void *v, register_t *retval)
1023 {
1024 struct sys_fchdir_args /* {
1025 syscallarg(int) fd;
1026 } */ *uap = v;
1027 struct proc *p = l->l_proc;
1028 struct filedesc *fdp = p->p_fd;
1029 struct cwdinfo *cwdi = p->p_cwdi;
1030 struct vnode *vp, *tdp;
1031 struct mount *mp;
1032 struct file *fp;
1033 int error;
1034
1035 /* getvnode() will use the descriptor for us */
1036 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1037 return (error);
1038 vp = (struct vnode *)fp->f_data;
1039
1040 VREF(vp);
1041 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1042 if (vp->v_type != VDIR)
1043 error = ENOTDIR;
1044 else
1045 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1046 if (error) {
1047 vput(vp);
1048 goto out;
1049 }
1050 while ((mp = vp->v_mountedhere) != NULL) {
1051 if (vfs_busy(mp, 0, 0))
1052 continue;
1053
1054 vput(vp);
1055 error = VFS_ROOT(mp, &tdp);
1056 vfs_unbusy(mp);
1057 if (error)
1058 goto out;
1059 vp = tdp;
1060 }
1061 VOP_UNLOCK(vp, 0);
1062
1063 /*
1064 * Disallow changing to a directory not under the process's
1065 * current root directory (if there is one).
1066 */
1067 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1068 vrele(vp);
1069 error = EPERM; /* operation not permitted */
1070 goto out;
1071 }
1072
1073 vrele(cwdi->cwdi_cdir);
1074 cwdi->cwdi_cdir = vp;
1075 out:
1076 FILE_UNUSE(fp, l);
1077 return (error);
1078 }
1079
1080 /*
1081 * Change this process's notion of the root directory to a given file
1082 * descriptor.
1083 */
1084 int
1085 sys_fchroot(struct lwp *l, void *v, register_t *retval)
1086 {
1087 struct sys_fchroot_args *uap = v;
1088 struct proc *p = l->l_proc;
1089 struct filedesc *fdp = p->p_fd;
1090 struct cwdinfo *cwdi = p->p_cwdi;
1091 struct vnode *vp;
1092 struct file *fp;
1093 int error;
1094
1095 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1096 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1097 return error;
1098 /* getvnode() will use the descriptor for us */
1099 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1100 return error;
1101 vp = (struct vnode *) fp->f_data;
1102 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1103 if (vp->v_type != VDIR)
1104 error = ENOTDIR;
1105 else
1106 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1107 VOP_UNLOCK(vp, 0);
1108 if (error)
1109 goto out;
1110 VREF(vp);
1111
1112 /*
1113 * Prevent escaping from chroot by putting the root under
1114 * the working directory. Silently chdir to / if we aren't
1115 * already there.
1116 */
1117 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1118 /*
1119 * XXX would be more failsafe to change directory to a
1120 * deadfs node here instead
1121 */
1122 vrele(cwdi->cwdi_cdir);
1123 VREF(vp);
1124 cwdi->cwdi_cdir = vp;
1125 }
1126
1127 if (cwdi->cwdi_rdir != NULL)
1128 vrele(cwdi->cwdi_rdir);
1129 cwdi->cwdi_rdir = vp;
1130 out:
1131 FILE_UNUSE(fp, l);
1132 return (error);
1133 }
1134
1135 /*
1136 * Change current working directory (``.'').
1137 */
1138 /* ARGSUSED */
1139 int
1140 sys_chdir(struct lwp *l, void *v, register_t *retval)
1141 {
1142 struct sys_chdir_args /* {
1143 syscallarg(const char *) path;
1144 } */ *uap = v;
1145 struct proc *p = l->l_proc;
1146 struct cwdinfo *cwdi = p->p_cwdi;
1147 int error;
1148 struct nameidata nd;
1149
1150 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1151 SCARG(uap, path), l);
1152 if ((error = change_dir(&nd, l)) != 0)
1153 return (error);
1154 vrele(cwdi->cwdi_cdir);
1155 cwdi->cwdi_cdir = nd.ni_vp;
1156 return (0);
1157 }
1158
1159 /*
1160 * Change notion of root (``/'') directory.
1161 */
1162 /* ARGSUSED */
1163 int
1164 sys_chroot(struct lwp *l, void *v, register_t *retval)
1165 {
1166 struct sys_chroot_args /* {
1167 syscallarg(const char *) path;
1168 } */ *uap = v;
1169 struct proc *p = l->l_proc;
1170 struct cwdinfo *cwdi = p->p_cwdi;
1171 struct vnode *vp;
1172 int error;
1173 struct nameidata nd;
1174
1175 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1176 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1177 return (error);
1178 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1179 SCARG(uap, path), l);
1180 if ((error = change_dir(&nd, l)) != 0)
1181 return (error);
1182 if (cwdi->cwdi_rdir != NULL)
1183 vrele(cwdi->cwdi_rdir);
1184 vp = nd.ni_vp;
1185 cwdi->cwdi_rdir = vp;
1186
1187 /*
1188 * Prevent escaping from chroot by putting the root under
1189 * the working directory. Silently chdir to / if we aren't
1190 * already there.
1191 */
1192 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1193 /*
1194 * XXX would be more failsafe to change directory to a
1195 * deadfs node here instead
1196 */
1197 vrele(cwdi->cwdi_cdir);
1198 VREF(vp);
1199 cwdi->cwdi_cdir = vp;
1200 }
1201
1202 return (0);
1203 }
1204
1205 /*
1206 * Common routine for chroot and chdir.
1207 */
1208 static int
1209 change_dir(struct nameidata *ndp, struct lwp *l)
1210 {
1211 struct vnode *vp;
1212 int error;
1213
1214 if ((error = namei(ndp)) != 0)
1215 return (error);
1216 vp = ndp->ni_vp;
1217 if (vp->v_type != VDIR)
1218 error = ENOTDIR;
1219 else
1220 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1221
1222 if (error)
1223 vput(vp);
1224 else
1225 VOP_UNLOCK(vp, 0);
1226 return (error);
1227 }
1228
1229 /*
1230 * Check permissions, allocate an open file structure,
1231 * and call the device open routine if any.
1232 */
1233 int
1234 sys_open(struct lwp *l, void *v, register_t *retval)
1235 {
1236 struct sys_open_args /* {
1237 syscallarg(const char *) path;
1238 syscallarg(int) flags;
1239 syscallarg(int) mode;
1240 } */ *uap = v;
1241 struct proc *p = l->l_proc;
1242 struct cwdinfo *cwdi = p->p_cwdi;
1243 struct filedesc *fdp = p->p_fd;
1244 struct file *fp;
1245 struct vnode *vp;
1246 int flags, cmode;
1247 int type, indx, error;
1248 struct flock lf;
1249 struct nameidata nd;
1250
1251 flags = FFLAGS(SCARG(uap, flags));
1252 if ((flags & (FREAD | FWRITE)) == 0)
1253 return (EINVAL);
1254 /* falloc() will use the file descriptor for us */
1255 if ((error = falloc(l, &fp, &indx)) != 0)
1256 return (error);
1257 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1258 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
1259 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1260 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1261 FILE_UNUSE(fp, l);
1262 fdp->fd_ofiles[indx] = NULL;
1263 ffree(fp);
1264 if ((error == EDUPFD || error == EMOVEFD) &&
1265 l->l_dupfd >= 0 && /* XXX from fdopen */
1266 (error =
1267 dupfdopen(l, indx, l->l_dupfd, flags, error)) == 0) {
1268 *retval = indx;
1269 return (0);
1270 }
1271 if (error == ERESTART)
1272 error = EINTR;
1273 fdremove(fdp, indx);
1274 return (error);
1275 }
1276 l->l_dupfd = 0;
1277 vp = nd.ni_vp;
1278 fp->f_flag = flags & FMASK;
1279 fp->f_type = DTYPE_VNODE;
1280 fp->f_ops = &vnops;
1281 fp->f_data = vp;
1282 if (flags & (O_EXLOCK | O_SHLOCK)) {
1283 lf.l_whence = SEEK_SET;
1284 lf.l_start = 0;
1285 lf.l_len = 0;
1286 if (flags & O_EXLOCK)
1287 lf.l_type = F_WRLCK;
1288 else
1289 lf.l_type = F_RDLCK;
1290 type = F_FLOCK;
1291 if ((flags & FNONBLOCK) == 0)
1292 type |= F_WAIT;
1293 VOP_UNLOCK(vp, 0);
1294 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1295 if (error) {
1296 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1297 FILE_UNUSE(fp, l);
1298 ffree(fp);
1299 fdremove(fdp, indx);
1300 return (error);
1301 }
1302 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1303 fp->f_flag |= FHASLOCK;
1304 }
1305 VOP_UNLOCK(vp, 0);
1306 *retval = indx;
1307 FILE_SET_MATURE(fp);
1308 FILE_UNUSE(fp, l);
1309 return (0);
1310 }
1311
1312 static void
1313 vfs__fhfree(fhandle_t *fhp)
1314 {
1315 size_t fhsize;
1316
1317 if (fhp == NULL) {
1318 return;
1319 }
1320 fhsize = FHANDLE_SIZE(fhp);
1321 kmem_free(fhp, fhsize);
1322 }
1323
1324 /*
1325 * vfs_composefh: compose a filehandle.
1326 */
1327
1328 int
1329 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1330 {
1331 struct mount *mp;
1332 struct fid *fidp;
1333 int error;
1334 size_t needfhsize;
1335 size_t fidsize;
1336
1337 mp = vp->v_mount;
1338 fidp = NULL;
1339 if (*fh_size < FHANDLE_SIZE_MIN) {
1340 fidsize = 0;
1341 } else {
1342 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1343 if (fhp != NULL) {
1344 memset(fhp, 0, *fh_size);
1345 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1346 fidp = &fhp->fh_fid;
1347 }
1348 }
1349 error = VFS_VPTOFH(vp, fidp, &fidsize);
1350 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1351 if (error == 0 && *fh_size < needfhsize) {
1352 error = E2BIG;
1353 }
1354 *fh_size = needfhsize;
1355 return error;
1356 }
1357
1358 int
1359 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1360 {
1361 struct mount *mp;
1362 fhandle_t *fhp;
1363 size_t fhsize;
1364 size_t fidsize;
1365 int error;
1366
1367 *fhpp = NULL;
1368 mp = vp->v_mount;
1369 fidsize = 0;
1370 error = VFS_VPTOFH(vp, NULL, &fidsize);
1371 KASSERT(error != 0);
1372 if (error != E2BIG) {
1373 goto out;
1374 }
1375 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1376 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1377 if (fhp == NULL) {
1378 error = ENOMEM;
1379 goto out;
1380 }
1381 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1382 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1383 if (error == 0) {
1384 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1385 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1386 *fhpp = fhp;
1387 } else {
1388 kmem_free(fhp, fhsize);
1389 }
1390 out:
1391 return error;
1392 }
1393
1394 void
1395 vfs_composefh_free(fhandle_t *fhp)
1396 {
1397
1398 vfs__fhfree(fhp);
1399 }
1400
1401 /*
1402 * vfs_fhtovp: lookup a vnode by a filehandle.
1403 */
1404
1405 int
1406 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1407 {
1408 struct mount *mp;
1409 int error;
1410
1411 *vpp = NULL;
1412 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1413 if (mp == NULL) {
1414 error = ESTALE;
1415 goto out;
1416 }
1417 if (mp->mnt_op->vfs_fhtovp == NULL) {
1418 error = EOPNOTSUPP;
1419 goto out;
1420 }
1421 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1422 out:
1423 return error;
1424 }
1425
1426 /*
1427 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1428 * the needed size.
1429 */
1430
1431 int
1432 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1433 {
1434 fhandle_t *fhp;
1435 int error;
1436
1437 *fhpp = NULL;
1438 if (fhsize > FHANDLE_SIZE_MAX) {
1439 return EINVAL;
1440 }
1441 if (fhsize < FHANDLE_SIZE_MIN) {
1442 return EINVAL;
1443 }
1444 again:
1445 fhp = kmem_alloc(fhsize, KM_SLEEP);
1446 if (fhp == NULL) {
1447 return ENOMEM;
1448 }
1449 error = copyin(ufhp, fhp, fhsize);
1450 if (error == 0) {
1451 /* XXX this check shouldn't be here */
1452 if (FHANDLE_SIZE(fhp) == fhsize) {
1453 *fhpp = fhp;
1454 return 0;
1455 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1456 /*
1457 * a kludge for nfsv2 padded handles.
1458 */
1459 size_t sz;
1460
1461 sz = FHANDLE_SIZE(fhp);
1462 kmem_free(fhp, fhsize);
1463 fhsize = sz;
1464 goto again;
1465 } else {
1466 /*
1467 * userland told us wrong size.
1468 */
1469 error = EINVAL;
1470 }
1471 }
1472 kmem_free(fhp, fhsize);
1473 return error;
1474 }
1475
1476 void
1477 vfs_copyinfh_free(fhandle_t *fhp)
1478 {
1479
1480 vfs__fhfree(fhp);
1481 }
1482
1483 /*
1484 * Get file handle system call
1485 */
1486 int
1487 sys___getfh30(struct lwp *l, void *v, register_t *retval)
1488 {
1489 struct sys___getfh30_args /* {
1490 syscallarg(char *) fname;
1491 syscallarg(fhandle_t *) fhp;
1492 syscallarg(size_t *) fh_size;
1493 } */ *uap = v;
1494 struct vnode *vp;
1495 fhandle_t *fh;
1496 int error;
1497 struct nameidata nd;
1498 size_t sz;
1499 size_t usz;
1500
1501 /*
1502 * Must be super user
1503 */
1504 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1505 0, NULL, NULL, NULL);
1506 if (error)
1507 return (error);
1508 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1509 SCARG(uap, fname), l);
1510 error = namei(&nd);
1511 if (error)
1512 return (error);
1513 vp = nd.ni_vp;
1514 error = vfs_composefh_alloc(vp, &fh);
1515 vput(vp);
1516 if (error != 0) {
1517 goto out;
1518 }
1519 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1520 if (error != 0) {
1521 goto out;
1522 }
1523 sz = FHANDLE_SIZE(fh);
1524 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1525 if (error != 0) {
1526 goto out;
1527 }
1528 if (usz >= sz) {
1529 error = copyout(fh, SCARG(uap, fhp), sz);
1530 } else {
1531 error = E2BIG;
1532 }
1533 out:
1534 vfs_composefh_free(fh);
1535 return (error);
1536 }
1537
1538 /*
1539 * Open a file given a file handle.
1540 *
1541 * Check permissions, allocate an open file structure,
1542 * and call the device open routine if any.
1543 */
1544
1545 int
1546 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1547 register_t *retval)
1548 {
1549 struct filedesc *fdp = l->l_proc->p_fd;
1550 struct file *fp;
1551 struct vnode *vp = NULL;
1552 kauth_cred_t cred = l->l_cred;
1553 struct file *nfp;
1554 int type, indx, error=0;
1555 struct flock lf;
1556 struct vattr va;
1557 fhandle_t *fh;
1558 int flags;
1559
1560 /*
1561 * Must be super user
1562 */
1563 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1564 0, NULL, NULL, NULL)))
1565 return (error);
1566
1567 flags = FFLAGS(oflags);
1568 if ((flags & (FREAD | FWRITE)) == 0)
1569 return (EINVAL);
1570 if ((flags & O_CREAT))
1571 return (EINVAL);
1572 /* falloc() will use the file descriptor for us */
1573 if ((error = falloc(l, &nfp, &indx)) != 0)
1574 return (error);
1575 fp = nfp;
1576 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1577 if (error != 0) {
1578 goto bad;
1579 }
1580 error = vfs_fhtovp(fh, &vp);
1581 if (error != 0) {
1582 goto bad;
1583 }
1584
1585 /* Now do an effective vn_open */
1586
1587 if (vp->v_type == VSOCK) {
1588 error = EOPNOTSUPP;
1589 goto bad;
1590 }
1591 if (flags & FREAD) {
1592 if ((error = VOP_ACCESS(vp, VREAD, cred, l)) != 0)
1593 goto bad;
1594 }
1595 if (flags & (FWRITE | O_TRUNC)) {
1596 if (vp->v_type == VDIR) {
1597 error = EISDIR;
1598 goto bad;
1599 }
1600 if ((error = vn_writechk(vp)) != 0 ||
1601 (error = VOP_ACCESS(vp, VWRITE, cred, l)) != 0)
1602 goto bad;
1603 }
1604 if (flags & O_TRUNC) {
1605 VOP_UNLOCK(vp, 0); /* XXX */
1606 VOP_LEASE(vp, l, cred, LEASE_WRITE);
1607 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1608 VATTR_NULL(&va);
1609 va.va_size = 0;
1610 error = VOP_SETATTR(vp, &va, cred, l);
1611 if (error)
1612 goto bad;
1613 }
1614 if ((error = VOP_OPEN(vp, flags, cred, l)) != 0)
1615 goto bad;
1616 if (flags & FWRITE)
1617 vp->v_writecount++;
1618
1619 /* done with modified vn_open, now finish what sys_open does. */
1620
1621 fp->f_flag = flags & FMASK;
1622 fp->f_type = DTYPE_VNODE;
1623 fp->f_ops = &vnops;
1624 fp->f_data = vp;
1625 if (flags & (O_EXLOCK | O_SHLOCK)) {
1626 lf.l_whence = SEEK_SET;
1627 lf.l_start = 0;
1628 lf.l_len = 0;
1629 if (flags & O_EXLOCK)
1630 lf.l_type = F_WRLCK;
1631 else
1632 lf.l_type = F_RDLCK;
1633 type = F_FLOCK;
1634 if ((flags & FNONBLOCK) == 0)
1635 type |= F_WAIT;
1636 VOP_UNLOCK(vp, 0);
1637 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1638 if (error) {
1639 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1640 FILE_UNUSE(fp, l);
1641 ffree(fp);
1642 fdremove(fdp, indx);
1643 return (error);
1644 }
1645 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1646 fp->f_flag |= FHASLOCK;
1647 }
1648 VOP_UNLOCK(vp, 0);
1649 *retval = indx;
1650 FILE_SET_MATURE(fp);
1651 FILE_UNUSE(fp, l);
1652 vfs_copyinfh_free(fh);
1653 return (0);
1654
1655 bad:
1656 FILE_UNUSE(fp, l);
1657 ffree(fp);
1658 fdremove(fdp, indx);
1659 if (vp != NULL)
1660 vput(vp);
1661 vfs_copyinfh_free(fh);
1662 return (error);
1663 }
1664
1665 int
1666 sys___fhopen40(struct lwp *l, void *v, register_t *retval)
1667 {
1668 struct sys___fhopen40_args /* {
1669 syscallarg(const void *) fhp;
1670 syscallarg(size_t) fh_size;
1671 syscallarg(int) flags;
1672 } */ *uap = v;
1673
1674 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1675 SCARG(uap, flags), retval);
1676 }
1677
1678 int
1679 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
1680 {
1681 int error;
1682 fhandle_t *fh;
1683 struct vnode *vp;
1684
1685 /*
1686 * Must be super user
1687 */
1688 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1689 0, NULL, NULL, NULL)))
1690 return (error);
1691
1692 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1693 if (error != 0)
1694 return error;
1695
1696 error = vfs_fhtovp(fh, &vp);
1697 vfs_copyinfh_free(fh);
1698 if (error != 0)
1699 return error;
1700
1701 error = vn_stat(vp, sb, l);
1702 vput(vp);
1703 return error;
1704 }
1705
1706
1707 /* ARGSUSED */
1708 int
1709 sys___fhstat40(struct lwp *l, void *v, register_t *retval)
1710 {
1711 struct sys___fhstat40_args /* {
1712 syscallarg(const void *) fhp;
1713 syscallarg(size_t) fh_size;
1714 syscallarg(struct stat *) sb;
1715 } */ *uap = v;
1716 struct stat sb;
1717 int error;
1718
1719 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
1720 if (error)
1721 return error;
1722 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
1723 }
1724
1725 int
1726 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
1727 int flags)
1728 {
1729 fhandle_t *fh;
1730 struct mount *mp;
1731 struct vnode *vp;
1732 int error;
1733
1734 /*
1735 * Must be super user
1736 */
1737 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1738 0, NULL, NULL, NULL)))
1739 return error;
1740
1741 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1742 if (error != 0)
1743 return error;
1744
1745 error = vfs_fhtovp(fh, &vp);
1746 vfs_copyinfh_free(fh);
1747 if (error != 0)
1748 return error;
1749
1750 mp = vp->v_mount;
1751 error = dostatvfs(mp, sb, l, flags, 1);
1752 vput(vp);
1753 return error;
1754 }
1755
1756 /* ARGSUSED */
1757 int
1758 sys___fhstatvfs140(struct lwp *l, void *v, register_t *retval)
1759 {
1760 struct sys___fhstatvfs140_args /* {
1761 syscallarg(const void *) fhp;
1762 syscallarg(size_t) fh_size;
1763 syscallarg(struct statvfs *) buf;
1764 syscallarg(int) flags;
1765 } */ *uap = v;
1766 struct statvfs *sb = STATVFSBUF_GET();
1767 int error;
1768
1769 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
1770 SCARG(uap, flags));
1771 if (error == 0)
1772 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1773 STATVFSBUF_PUT(sb);
1774 return error;
1775 }
1776
1777 /*
1778 * Create a special file.
1779 */
1780 /* ARGSUSED */
1781 int
1782 sys_mknod(struct lwp *l, void *v, register_t *retval)
1783 {
1784 struct sys_mknod_args /* {
1785 syscallarg(const char *) path;
1786 syscallarg(int) mode;
1787 syscallarg(int) dev;
1788 } */ *uap = v;
1789 struct proc *p = l->l_proc;
1790 struct vnode *vp;
1791 struct vattr vattr;
1792 int error, optype;
1793 struct nameidata nd;
1794 char *path;
1795 const char *cpath;
1796 enum uio_seg seg = UIO_USERSPACE;
1797
1798 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1799 0, NULL, NULL, NULL)) != 0)
1800 return (error);
1801
1802 optype = VOP_MKNOD_DESCOFFSET;
1803
1804 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path);
1805 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath, l);
1806
1807 if ((error = namei(&nd)) != 0)
1808 goto out;
1809 vp = nd.ni_vp;
1810 if (vp != NULL)
1811 error = EEXIST;
1812 else {
1813 VATTR_NULL(&vattr);
1814 vattr.va_mode =
1815 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1816 vattr.va_rdev = SCARG(uap, dev);
1817
1818 switch (SCARG(uap, mode) & S_IFMT) {
1819 case S_IFMT: /* used by badsect to flag bad sectors */
1820 vattr.va_type = VBAD;
1821 break;
1822 case S_IFCHR:
1823 vattr.va_type = VCHR;
1824 break;
1825 case S_IFBLK:
1826 vattr.va_type = VBLK;
1827 break;
1828 case S_IFWHT:
1829 optype = VOP_WHITEOUT_DESCOFFSET;
1830 break;
1831 case S_IFREG:
1832 #if NVERIEXEC > 0
1833 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp,
1834 O_CREAT);
1835 #endif /* NVERIEXEC > 0 */
1836 vattr.va_type = VREG;
1837 vattr.va_rdev = VNOVAL;
1838 optype = VOP_CREATE_DESCOFFSET;
1839 break;
1840 default:
1841 error = EINVAL;
1842 break;
1843 }
1844 }
1845 if (!error) {
1846 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1847 switch (optype) {
1848 case VOP_WHITEOUT_DESCOFFSET:
1849 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1850 if (error)
1851 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1852 vput(nd.ni_dvp);
1853 break;
1854
1855 case VOP_MKNOD_DESCOFFSET:
1856 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1857 &nd.ni_cnd, &vattr);
1858 if (error == 0)
1859 vput(nd.ni_vp);
1860 break;
1861
1862 case VOP_CREATE_DESCOFFSET:
1863 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
1864 &nd.ni_cnd, &vattr);
1865 if (error == 0)
1866 vput(nd.ni_vp);
1867 break;
1868 }
1869 } else {
1870 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1871 if (nd.ni_dvp == vp)
1872 vrele(nd.ni_dvp);
1873 else
1874 vput(nd.ni_dvp);
1875 if (vp)
1876 vrele(vp);
1877 }
1878 out:
1879 VERIEXEC_PATH_PUT(path);
1880 return (error);
1881 }
1882
1883 /*
1884 * Create a named pipe.
1885 */
1886 /* ARGSUSED */
1887 int
1888 sys_mkfifo(struct lwp *l, void *v, register_t *retval)
1889 {
1890 struct sys_mkfifo_args /* {
1891 syscallarg(const char *) path;
1892 syscallarg(int) mode;
1893 } */ *uap = v;
1894 struct proc *p = l->l_proc;
1895 struct vattr vattr;
1896 int error;
1897 struct nameidata nd;
1898
1899 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
1900 if ((error = namei(&nd)) != 0)
1901 return (error);
1902 if (nd.ni_vp != NULL) {
1903 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1904 if (nd.ni_dvp == nd.ni_vp)
1905 vrele(nd.ni_dvp);
1906 else
1907 vput(nd.ni_dvp);
1908 vrele(nd.ni_vp);
1909 return (EEXIST);
1910 }
1911 VATTR_NULL(&vattr);
1912 vattr.va_type = VFIFO;
1913 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1914 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1915 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1916 if (error == 0)
1917 vput(nd.ni_vp);
1918 return (error);
1919 }
1920
1921 /*
1922 * Make a hard file link.
1923 */
1924 /* ARGSUSED */
1925 int
1926 sys_link(struct lwp *l, void *v, register_t *retval)
1927 {
1928 struct sys_link_args /* {
1929 syscallarg(const char *) path;
1930 syscallarg(const char *) link;
1931 } */ *uap = v;
1932 struct vnode *vp;
1933 struct nameidata nd;
1934 int error;
1935
1936 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
1937 if ((error = namei(&nd)) != 0)
1938 return (error);
1939 vp = nd.ni_vp;
1940 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, link), l);
1941 if ((error = namei(&nd)) != 0)
1942 goto out;
1943 if (nd.ni_vp) {
1944 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1945 if (nd.ni_dvp == nd.ni_vp)
1946 vrele(nd.ni_dvp);
1947 else
1948 vput(nd.ni_dvp);
1949 vrele(nd.ni_vp);
1950 error = EEXIST;
1951 goto out;
1952 }
1953 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1954 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
1955 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1956 out:
1957 vrele(vp);
1958 return (error);
1959 }
1960
1961 /*
1962 * Make a symbolic link.
1963 */
1964 /* ARGSUSED */
1965 int
1966 sys_symlink(struct lwp *l, void *v, register_t *retval)
1967 {
1968 struct sys_symlink_args /* {
1969 syscallarg(const char *) path;
1970 syscallarg(const char *) link;
1971 } */ *uap = v;
1972 struct proc *p = l->l_proc;
1973 struct vattr vattr;
1974 char *path;
1975 int error;
1976 struct nameidata nd;
1977
1978 path = PNBUF_GET();
1979 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
1980 if (error)
1981 goto out;
1982 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, link), l);
1983 if ((error = namei(&nd)) != 0)
1984 goto out;
1985 if (nd.ni_vp) {
1986 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1987 if (nd.ni_dvp == nd.ni_vp)
1988 vrele(nd.ni_dvp);
1989 else
1990 vput(nd.ni_dvp);
1991 vrele(nd.ni_vp);
1992 error = EEXIST;
1993 goto out;
1994 }
1995 VATTR_NULL(&vattr);
1996 vattr.va_type = VLNK;
1997 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
1998 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1999 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2000 if (error == 0)
2001 vput(nd.ni_vp);
2002 out:
2003 PNBUF_PUT(path);
2004 return (error);
2005 }
2006
2007 /*
2008 * Delete a whiteout from the filesystem.
2009 */
2010 /* ARGSUSED */
2011 int
2012 sys_undelete(struct lwp *l, void *v, register_t *retval)
2013 {
2014 struct sys_undelete_args /* {
2015 syscallarg(const char *) path;
2016 } */ *uap = v;
2017 int error;
2018 struct nameidata nd;
2019
2020 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, UIO_USERSPACE,
2021 SCARG(uap, path), l);
2022 error = namei(&nd);
2023 if (error)
2024 return (error);
2025
2026 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2027 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2028 if (nd.ni_dvp == nd.ni_vp)
2029 vrele(nd.ni_dvp);
2030 else
2031 vput(nd.ni_dvp);
2032 if (nd.ni_vp)
2033 vrele(nd.ni_vp);
2034 return (EEXIST);
2035 }
2036 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2037 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2038 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2039 vput(nd.ni_dvp);
2040 return (error);
2041 }
2042
2043 /*
2044 * Delete a name from the filesystem.
2045 */
2046 /* ARGSUSED */
2047 int
2048 sys_unlink(struct lwp *l, void *v, register_t *retval)
2049 {
2050 struct sys_unlink_args /* {
2051 syscallarg(const char *) path;
2052 } */ *uap = v;
2053 struct vnode *vp;
2054 int error;
2055 struct nameidata nd;
2056 char *path;
2057 const char *cpath;
2058 enum uio_seg seg = UIO_USERSPACE;
2059
2060 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path);
2061 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath, l);
2062
2063 if ((error = namei(&nd)) != 0)
2064 goto out;
2065 vp = nd.ni_vp;
2066
2067 /*
2068 * The root of a mounted filesystem cannot be deleted.
2069 */
2070 if (vp->v_flag & VROOT) {
2071 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2072 if (nd.ni_dvp == vp)
2073 vrele(nd.ni_dvp);
2074 else
2075 vput(nd.ni_dvp);
2076 vput(vp);
2077 error = EBUSY;
2078 goto out;
2079 }
2080
2081 #if NVERIEXEC > 0
2082 /* Handle remove requests for veriexec entries. */
2083 if ((error = veriexec_removechk(l, nd.ni_vp, nd.ni_dirp)) != 0) {
2084 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2085 if (nd.ni_dvp == vp)
2086 vrele(nd.ni_dvp);
2087 else
2088 vput(nd.ni_dvp);
2089 vput(vp);
2090 goto out;
2091 }
2092 #endif /* NVERIEXEC > 0 */
2093
2094 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2095 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2096 #ifdef FILEASSOC
2097 (void)fileassoc_file_delete(vp);
2098 #endif /* FILEASSOC */
2099 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2100 out:
2101 VERIEXEC_PATH_PUT(path);
2102 return (error);
2103 }
2104
2105 /*
2106 * Reposition read/write file offset.
2107 */
2108 int
2109 sys_lseek(struct lwp *l, void *v, register_t *retval)
2110 {
2111 struct sys_lseek_args /* {
2112 syscallarg(int) fd;
2113 syscallarg(int) pad;
2114 syscallarg(off_t) offset;
2115 syscallarg(int) whence;
2116 } */ *uap = v;
2117 struct proc *p = l->l_proc;
2118 kauth_cred_t cred = l->l_cred;
2119 struct filedesc *fdp = p->p_fd;
2120 struct file *fp;
2121 struct vnode *vp;
2122 struct vattr vattr;
2123 off_t newoff;
2124 int error;
2125
2126 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
2127 return (EBADF);
2128
2129 FILE_USE(fp);
2130
2131 vp = (struct vnode *)fp->f_data;
2132 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2133 error = ESPIPE;
2134 goto out;
2135 }
2136
2137 switch (SCARG(uap, whence)) {
2138 case SEEK_CUR:
2139 newoff = fp->f_offset + SCARG(uap, offset);
2140 break;
2141 case SEEK_END:
2142 error = VOP_GETATTR(vp, &vattr, cred, l);
2143 if (error)
2144 goto out;
2145 newoff = SCARG(uap, offset) + vattr.va_size;
2146 break;
2147 case SEEK_SET:
2148 newoff = SCARG(uap, offset);
2149 break;
2150 default:
2151 error = EINVAL;
2152 goto out;
2153 }
2154 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0)
2155 goto out;
2156
2157 *(off_t *)retval = fp->f_offset = newoff;
2158 out:
2159 FILE_UNUSE(fp, l);
2160 return (error);
2161 }
2162
2163 /*
2164 * Positional read system call.
2165 */
2166 int
2167 sys_pread(struct lwp *l, void *v, register_t *retval)
2168 {
2169 struct sys_pread_args /* {
2170 syscallarg(int) fd;
2171 syscallarg(void *) buf;
2172 syscallarg(size_t) nbyte;
2173 syscallarg(off_t) offset;
2174 } */ *uap = v;
2175 struct proc *p = l->l_proc;
2176 struct filedesc *fdp = p->p_fd;
2177 struct file *fp;
2178 struct vnode *vp;
2179 off_t offset;
2180 int error, fd = SCARG(uap, fd);
2181
2182 if ((fp = fd_getfile(fdp, fd)) == NULL)
2183 return (EBADF);
2184
2185 if ((fp->f_flag & FREAD) == 0) {
2186 simple_unlock(&fp->f_slock);
2187 return (EBADF);
2188 }
2189
2190 FILE_USE(fp);
2191
2192 vp = (struct vnode *)fp->f_data;
2193 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2194 error = ESPIPE;
2195 goto out;
2196 }
2197
2198 offset = SCARG(uap, offset);
2199
2200 /*
2201 * XXX This works because no file systems actually
2202 * XXX take any action on the seek operation.
2203 */
2204 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2205 goto out;
2206
2207 /* dofileread() will unuse the descriptor for us */
2208 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2209 &offset, 0, retval));
2210
2211 out:
2212 FILE_UNUSE(fp, l);
2213 return (error);
2214 }
2215
2216 /*
2217 * Positional scatter read system call.
2218 */
2219 int
2220 sys_preadv(struct lwp *l, void *v, register_t *retval)
2221 {
2222 struct sys_preadv_args /* {
2223 syscallarg(int) fd;
2224 syscallarg(const struct iovec *) iovp;
2225 syscallarg(int) iovcnt;
2226 syscallarg(off_t) offset;
2227 } */ *uap = v;
2228
2229 return do_filereadv(l, SCARG(uap, fd), SCARG(uap, iovp),
2230 SCARG(uap, iovcnt), &SCARG(uap, offset), 0, retval);
2231 }
2232
2233 /*
2234 * Positional write system call.
2235 */
2236 int
2237 sys_pwrite(struct lwp *l, void *v, register_t *retval)
2238 {
2239 struct sys_pwrite_args /* {
2240 syscallarg(int) fd;
2241 syscallarg(const void *) buf;
2242 syscallarg(size_t) nbyte;
2243 syscallarg(off_t) offset;
2244 } */ *uap = v;
2245 struct proc *p = l->l_proc;
2246 struct filedesc *fdp = p->p_fd;
2247 struct file *fp;
2248 struct vnode *vp;
2249 off_t offset;
2250 int error, fd = SCARG(uap, fd);
2251
2252 if ((fp = fd_getfile(fdp, fd)) == NULL)
2253 return (EBADF);
2254
2255 if ((fp->f_flag & FWRITE) == 0) {
2256 simple_unlock(&fp->f_slock);
2257 return (EBADF);
2258 }
2259
2260 FILE_USE(fp);
2261
2262 vp = (struct vnode *)fp->f_data;
2263 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2264 error = ESPIPE;
2265 goto out;
2266 }
2267
2268 offset = SCARG(uap, offset);
2269
2270 /*
2271 * XXX This works because no file systems actually
2272 * XXX take any action on the seek operation.
2273 */
2274 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2275 goto out;
2276
2277 /* dofilewrite() will unuse the descriptor for us */
2278 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2279 &offset, 0, retval));
2280
2281 out:
2282 FILE_UNUSE(fp, l);
2283 return (error);
2284 }
2285
2286 /*
2287 * Positional gather write system call.
2288 */
2289 int
2290 sys_pwritev(struct lwp *l, void *v, register_t *retval)
2291 {
2292 struct sys_pwritev_args /* {
2293 syscallarg(int) fd;
2294 syscallarg(const struct iovec *) iovp;
2295 syscallarg(int) iovcnt;
2296 syscallarg(off_t) offset;
2297 } */ *uap = v;
2298
2299 return do_filewritev(l, SCARG(uap, fd), SCARG(uap, iovp),
2300 SCARG(uap, iovcnt), &SCARG(uap, offset), 0, retval);
2301 }
2302
2303 /*
2304 * Check access permissions.
2305 */
2306 int
2307 sys_access(struct lwp *l, void *v, register_t *retval)
2308 {
2309 struct sys_access_args /* {
2310 syscallarg(const char *) path;
2311 syscallarg(int) flags;
2312 } */ *uap = v;
2313 kauth_cred_t cred;
2314 struct vnode *vp;
2315 int error, flags;
2316 struct nameidata nd;
2317
2318 cred = kauth_cred_dup(l->l_cred);
2319 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2320 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2321 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2322 SCARG(uap, path), l);
2323 /* Override default credentials */
2324 nd.ni_cnd.cn_cred = cred;
2325 if ((error = namei(&nd)) != 0)
2326 goto out;
2327 vp = nd.ni_vp;
2328
2329 /* Flags == 0 means only check for existence. */
2330 if (SCARG(uap, flags)) {
2331 flags = 0;
2332 if (SCARG(uap, flags) & R_OK)
2333 flags |= VREAD;
2334 if (SCARG(uap, flags) & W_OK)
2335 flags |= VWRITE;
2336 if (SCARG(uap, flags) & X_OK)
2337 flags |= VEXEC;
2338
2339 error = VOP_ACCESS(vp, flags, cred, l);
2340 if (!error && (flags & VWRITE))
2341 error = vn_writechk(vp);
2342 }
2343 vput(vp);
2344 out:
2345 kauth_cred_free(cred);
2346 return (error);
2347 }
2348
2349 /*
2350 * Common code for all sys_stat functions, including compat versions.
2351 */
2352 int
2353 do_sys_stat(struct lwp *l, const char *path, unsigned int nd_flags,
2354 struct stat *sb)
2355 {
2356 int error;
2357 struct nameidata nd;
2358
2359 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT , UIO_USERSPACE, path, l);
2360 error = namei(&nd);
2361 if (error != 0)
2362 return error;
2363 error = vn_stat(nd.ni_vp, sb, l);
2364 vput(nd.ni_vp);
2365 return error;
2366 }
2367
2368 /*
2369 * Get file status; this version follows links.
2370 */
2371 /* ARGSUSED */
2372 int
2373 sys___stat30(struct lwp *l, void *v, register_t *retval)
2374 {
2375 struct sys___stat30_args /* {
2376 syscallarg(const char *) path;
2377 syscallarg(struct stat *) ub;
2378 } */ *uap = v;
2379 struct stat sb;
2380 int error;
2381
2382 error = do_sys_stat(l, SCARG(uap, path), FOLLOW, &sb);
2383 if (error)
2384 return error;
2385 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2386 }
2387
2388 /*
2389 * Get file status; this version does not follow links.
2390 */
2391 /* ARGSUSED */
2392 int
2393 sys___lstat30(struct lwp *l, void *v, register_t *retval)
2394 {
2395 struct sys___lstat30_args /* {
2396 syscallarg(const char *) path;
2397 syscallarg(struct stat *) ub;
2398 } */ *uap = v;
2399 struct stat sb;
2400 int error;
2401
2402 error = do_sys_stat(l, SCARG(uap, path), NOFOLLOW, &sb);
2403 if (error)
2404 return error;
2405 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2406 }
2407
2408 /*
2409 * Get configurable pathname variables.
2410 */
2411 /* ARGSUSED */
2412 int
2413 sys_pathconf(struct lwp *l, void *v, register_t *retval)
2414 {
2415 struct sys_pathconf_args /* {
2416 syscallarg(const char *) path;
2417 syscallarg(int) name;
2418 } */ *uap = v;
2419 int error;
2420 struct nameidata nd;
2421
2422 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2423 SCARG(uap, path), l);
2424 if ((error = namei(&nd)) != 0)
2425 return (error);
2426 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2427 vput(nd.ni_vp);
2428 return (error);
2429 }
2430
2431 /*
2432 * Return target name of a symbolic link.
2433 */
2434 /* ARGSUSED */
2435 int
2436 sys_readlink(struct lwp *l, void *v, register_t *retval)
2437 {
2438 struct sys_readlink_args /* {
2439 syscallarg(const char *) path;
2440 syscallarg(char *) buf;
2441 syscallarg(size_t) count;
2442 } */ *uap = v;
2443 struct vnode *vp;
2444 struct iovec aiov;
2445 struct uio auio;
2446 int error;
2447 struct nameidata nd;
2448
2449 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2450 SCARG(uap, path), l);
2451 if ((error = namei(&nd)) != 0)
2452 return (error);
2453 vp = nd.ni_vp;
2454 if (vp->v_type != VLNK)
2455 error = EINVAL;
2456 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2457 (error = VOP_ACCESS(vp, VREAD, l->l_cred, l)) == 0) {
2458 aiov.iov_base = SCARG(uap, buf);
2459 aiov.iov_len = SCARG(uap, count);
2460 auio.uio_iov = &aiov;
2461 auio.uio_iovcnt = 1;
2462 auio.uio_offset = 0;
2463 auio.uio_rw = UIO_READ;
2464 KASSERT(l == curlwp);
2465 auio.uio_vmspace = l->l_proc->p_vmspace;
2466 auio.uio_resid = SCARG(uap, count);
2467 error = VOP_READLINK(vp, &auio, l->l_cred);
2468 }
2469 vput(vp);
2470 *retval = SCARG(uap, count) - auio.uio_resid;
2471 return (error);
2472 }
2473
2474 /*
2475 * Change flags of a file given a path name.
2476 */
2477 /* ARGSUSED */
2478 int
2479 sys_chflags(struct lwp *l, void *v, register_t *retval)
2480 {
2481 struct sys_chflags_args /* {
2482 syscallarg(const char *) path;
2483 syscallarg(u_long) flags;
2484 } */ *uap = v;
2485 struct vnode *vp;
2486 int error;
2487 struct nameidata nd;
2488
2489 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2490 if ((error = namei(&nd)) != 0)
2491 return (error);
2492 vp = nd.ni_vp;
2493 error = change_flags(vp, SCARG(uap, flags), l);
2494 vput(vp);
2495 return (error);
2496 }
2497
2498 /*
2499 * Change flags of a file given a file descriptor.
2500 */
2501 /* ARGSUSED */
2502 int
2503 sys_fchflags(struct lwp *l, void *v, register_t *retval)
2504 {
2505 struct sys_fchflags_args /* {
2506 syscallarg(int) fd;
2507 syscallarg(u_long) flags;
2508 } */ *uap = v;
2509 struct proc *p = l->l_proc;
2510 struct vnode *vp;
2511 struct file *fp;
2512 int error;
2513
2514 /* getvnode() will use the descriptor for us */
2515 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2516 return (error);
2517 vp = (struct vnode *)fp->f_data;
2518 error = change_flags(vp, SCARG(uap, flags), l);
2519 VOP_UNLOCK(vp, 0);
2520 FILE_UNUSE(fp, l);
2521 return (error);
2522 }
2523
2524 /*
2525 * Change flags of a file given a path name; this version does
2526 * not follow links.
2527 */
2528 int
2529 sys_lchflags(struct lwp *l, void *v, register_t *retval)
2530 {
2531 struct sys_lchflags_args /* {
2532 syscallarg(const char *) path;
2533 syscallarg(u_long) flags;
2534 } */ *uap = v;
2535 struct vnode *vp;
2536 int error;
2537 struct nameidata nd;
2538
2539 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2540 if ((error = namei(&nd)) != 0)
2541 return (error);
2542 vp = nd.ni_vp;
2543 error = change_flags(vp, SCARG(uap, flags), l);
2544 vput(vp);
2545 return (error);
2546 }
2547
2548 /*
2549 * Common routine to change flags of a file.
2550 */
2551 int
2552 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2553 {
2554 struct vattr vattr;
2555 int error;
2556
2557 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2558 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2559 /*
2560 * Non-superusers cannot change the flags on devices, even if they
2561 * own them.
2562 */
2563 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2564 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2565 goto out;
2566 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2567 error = EINVAL;
2568 goto out;
2569 }
2570 }
2571 VATTR_NULL(&vattr);
2572 vattr.va_flags = flags;
2573 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2574 out:
2575 return (error);
2576 }
2577
2578 /*
2579 * Change mode of a file given path name; this version follows links.
2580 */
2581 /* ARGSUSED */
2582 int
2583 sys_chmod(struct lwp *l, void *v, register_t *retval)
2584 {
2585 struct sys_chmod_args /* {
2586 syscallarg(const char *) path;
2587 syscallarg(int) mode;
2588 } */ *uap = v;
2589 int error;
2590 struct nameidata nd;
2591
2592 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2593 if ((error = namei(&nd)) != 0)
2594 return (error);
2595
2596 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2597
2598 vrele(nd.ni_vp);
2599 return (error);
2600 }
2601
2602 /*
2603 * Change mode of a file given a file descriptor.
2604 */
2605 /* ARGSUSED */
2606 int
2607 sys_fchmod(struct lwp *l, void *v, register_t *retval)
2608 {
2609 struct sys_fchmod_args /* {
2610 syscallarg(int) fd;
2611 syscallarg(int) mode;
2612 } */ *uap = v;
2613 struct proc *p = l->l_proc;
2614 struct file *fp;
2615 int error;
2616
2617 /* getvnode() will use the descriptor for us */
2618 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2619 return (error);
2620
2621 error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), l);
2622 FILE_UNUSE(fp, l);
2623 return (error);
2624 }
2625
2626 /*
2627 * Change mode of a file given path name; this version does not follow links.
2628 */
2629 /* ARGSUSED */
2630 int
2631 sys_lchmod(struct lwp *l, void *v, register_t *retval)
2632 {
2633 struct sys_lchmod_args /* {
2634 syscallarg(const char *) path;
2635 syscallarg(int) mode;
2636 } */ *uap = v;
2637 int error;
2638 struct nameidata nd;
2639
2640 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2641 if ((error = namei(&nd)) != 0)
2642 return (error);
2643
2644 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2645
2646 vrele(nd.ni_vp);
2647 return (error);
2648 }
2649
2650 /*
2651 * Common routine to set mode given a vnode.
2652 */
2653 static int
2654 change_mode(struct vnode *vp, int mode, struct lwp *l)
2655 {
2656 struct vattr vattr;
2657 int error;
2658
2659 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2660 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2661 VATTR_NULL(&vattr);
2662 vattr.va_mode = mode & ALLPERMS;
2663 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2664 VOP_UNLOCK(vp, 0);
2665 return (error);
2666 }
2667
2668 /*
2669 * Set ownership given a path name; this version follows links.
2670 */
2671 /* ARGSUSED */
2672 int
2673 sys_chown(struct lwp *l, void *v, register_t *retval)
2674 {
2675 struct sys_chown_args /* {
2676 syscallarg(const char *) path;
2677 syscallarg(uid_t) uid;
2678 syscallarg(gid_t) gid;
2679 } */ *uap = v;
2680 int error;
2681 struct nameidata nd;
2682
2683 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2684 if ((error = namei(&nd)) != 0)
2685 return (error);
2686
2687 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2688
2689 vrele(nd.ni_vp);
2690 return (error);
2691 }
2692
2693 /*
2694 * Set ownership given a path name; this version follows links.
2695 * Provides POSIX semantics.
2696 */
2697 /* ARGSUSED */
2698 int
2699 sys___posix_chown(struct lwp *l, void *v, register_t *retval)
2700 {
2701 struct sys_chown_args /* {
2702 syscallarg(const char *) path;
2703 syscallarg(uid_t) uid;
2704 syscallarg(gid_t) gid;
2705 } */ *uap = v;
2706 int error;
2707 struct nameidata nd;
2708
2709 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2710 if ((error = namei(&nd)) != 0)
2711 return (error);
2712
2713 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2714
2715 vrele(nd.ni_vp);
2716 return (error);
2717 }
2718
2719 /*
2720 * Set ownership given a file descriptor.
2721 */
2722 /* ARGSUSED */
2723 int
2724 sys_fchown(struct lwp *l, void *v, register_t *retval)
2725 {
2726 struct sys_fchown_args /* {
2727 syscallarg(int) fd;
2728 syscallarg(uid_t) uid;
2729 syscallarg(gid_t) gid;
2730 } */ *uap = v;
2731 struct proc *p = l->l_proc;
2732 int error;
2733 struct file *fp;
2734
2735 /* getvnode() will use the descriptor for us */
2736 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2737 return (error);
2738
2739 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2740 SCARG(uap, gid), l, 0);
2741 FILE_UNUSE(fp, l);
2742 return (error);
2743 }
2744
2745 /*
2746 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2747 */
2748 /* ARGSUSED */
2749 int
2750 sys___posix_fchown(struct lwp *l, void *v, register_t *retval)
2751 {
2752 struct sys_fchown_args /* {
2753 syscallarg(int) fd;
2754 syscallarg(uid_t) uid;
2755 syscallarg(gid_t) gid;
2756 } */ *uap = v;
2757 struct proc *p = l->l_proc;
2758 int error;
2759 struct file *fp;
2760
2761 /* getvnode() will use the descriptor for us */
2762 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2763 return (error);
2764
2765 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2766 SCARG(uap, gid), l, 1);
2767 FILE_UNUSE(fp, l);
2768 return (error);
2769 }
2770
2771 /*
2772 * Set ownership given a path name; this version does not follow links.
2773 */
2774 /* ARGSUSED */
2775 int
2776 sys_lchown(struct lwp *l, void *v, register_t *retval)
2777 {
2778 struct sys_lchown_args /* {
2779 syscallarg(const char *) path;
2780 syscallarg(uid_t) uid;
2781 syscallarg(gid_t) gid;
2782 } */ *uap = v;
2783 int error;
2784 struct nameidata nd;
2785
2786 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2787 if ((error = namei(&nd)) != 0)
2788 return (error);
2789
2790 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2791
2792 vrele(nd.ni_vp);
2793 return (error);
2794 }
2795
2796 /*
2797 * Set ownership given a path name; this version does not follow links.
2798 * Provides POSIX/XPG semantics.
2799 */
2800 /* ARGSUSED */
2801 int
2802 sys___posix_lchown(struct lwp *l, void *v, register_t *retval)
2803 {
2804 struct sys_lchown_args /* {
2805 syscallarg(const char *) path;
2806 syscallarg(uid_t) uid;
2807 syscallarg(gid_t) gid;
2808 } */ *uap = v;
2809 int error;
2810 struct nameidata nd;
2811
2812 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2813 if ((error = namei(&nd)) != 0)
2814 return (error);
2815
2816 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2817
2818 vrele(nd.ni_vp);
2819 return (error);
2820 }
2821
2822 /*
2823 * Common routine to set ownership given a vnode.
2824 */
2825 static int
2826 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2827 int posix_semantics)
2828 {
2829 struct vattr vattr;
2830 mode_t newmode;
2831 int error;
2832
2833 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2834 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2835 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2836 goto out;
2837
2838 #define CHANGED(x) ((int)(x) != -1)
2839 newmode = vattr.va_mode;
2840 if (posix_semantics) {
2841 /*
2842 * POSIX/XPG semantics: if the caller is not the super-user,
2843 * clear set-user-id and set-group-id bits. Both POSIX and
2844 * the XPG consider the behaviour for calls by the super-user
2845 * implementation-defined; we leave the set-user-id and set-
2846 * group-id settings intact in that case.
2847 */
2848 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2849 NULL) != 0)
2850 newmode &= ~(S_ISUID | S_ISGID);
2851 } else {
2852 /*
2853 * NetBSD semantics: when changing owner and/or group,
2854 * clear the respective bit(s).
2855 */
2856 if (CHANGED(uid))
2857 newmode &= ~S_ISUID;
2858 if (CHANGED(gid))
2859 newmode &= ~S_ISGID;
2860 }
2861 /* Update va_mode iff altered. */
2862 if (vattr.va_mode == newmode)
2863 newmode = VNOVAL;
2864
2865 VATTR_NULL(&vattr);
2866 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2867 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2868 vattr.va_mode = newmode;
2869 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2870 #undef CHANGED
2871
2872 out:
2873 VOP_UNLOCK(vp, 0);
2874 return (error);
2875 }
2876
2877 /*
2878 * Set the access and modification times given a path name; this
2879 * version follows links.
2880 */
2881 /* ARGSUSED */
2882 int
2883 sys_utimes(struct lwp *l, void *v, register_t *retval)
2884 {
2885 struct sys_utimes_args /* {
2886 syscallarg(const char *) path;
2887 syscallarg(const struct timeval *) tptr;
2888 } */ *uap = v;
2889
2890 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
2891 SCARG(uap, tptr), UIO_USERSPACE);
2892 }
2893
2894 /*
2895 * Set the access and modification times given a file descriptor.
2896 */
2897 /* ARGSUSED */
2898 int
2899 sys_futimes(struct lwp *l, void *v, register_t *retval)
2900 {
2901 struct sys_futimes_args /* {
2902 syscallarg(int) fd;
2903 syscallarg(const struct timeval *) tptr;
2904 } */ *uap = v;
2905 int error;
2906 struct file *fp;
2907
2908 /* getvnode() will use the descriptor for us */
2909 if ((error = getvnode(l->l_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2910 return (error);
2911
2912 error = do_sys_utimes(l, fp->f_data, NULL, 0,
2913 SCARG(uap, tptr), UIO_USERSPACE);
2914
2915 FILE_UNUSE(fp, l);
2916 return (error);
2917 }
2918
2919 /*
2920 * Set the access and modification times given a path name; this
2921 * version does not follow links.
2922 */
2923 int
2924 sys_lutimes(struct lwp *l, void *v, register_t *retval)
2925 {
2926 struct sys_lutimes_args /* {
2927 syscallarg(const char *) path;
2928 syscallarg(const struct timeval *) tptr;
2929 } */ *uap = v;
2930
2931 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
2932 SCARG(uap, tptr), UIO_USERSPACE);
2933 }
2934
2935 /*
2936 * Common routine to set access and modification times given a vnode.
2937 */
2938 int
2939 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
2940 const struct timeval *tptr, enum uio_seg seg)
2941 {
2942 struct vattr vattr;
2943 struct nameidata nd;
2944 int error;
2945
2946 VATTR_NULL(&vattr);
2947 if (tptr == NULL) {
2948 nanotime(&vattr.va_atime);
2949 vattr.va_mtime = vattr.va_atime;
2950 vattr.va_vaflags |= VA_UTIMES_NULL;
2951 } else {
2952 struct timeval tv[2];
2953
2954 if (seg != UIO_SYSSPACE) {
2955 error = copyin(tptr, &tv, sizeof (tv));
2956 if (error != 0)
2957 return error;
2958 tptr = tv;
2959 }
2960 TIMEVAL_TO_TIMESPEC(tptr, &vattr.va_atime);
2961 TIMEVAL_TO_TIMESPEC(tptr + 1, &vattr.va_mtime);
2962 }
2963
2964 if (vp == NULL) {
2965 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path, l);
2966 if ((error = namei(&nd)) != 0)
2967 return (error);
2968 vp = nd.ni_vp;
2969 } else
2970 nd.ni_vp = NULL;
2971
2972 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2973 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2974 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2975 VOP_UNLOCK(vp, 0);
2976
2977 if (nd.ni_vp != NULL)
2978 vrele(nd.ni_vp);
2979
2980 return (error);
2981 }
2982
2983 /*
2984 * Truncate a file given its path name.
2985 */
2986 /* ARGSUSED */
2987 int
2988 sys_truncate(struct lwp *l, void *v, register_t *retval)
2989 {
2990 struct sys_truncate_args /* {
2991 syscallarg(const char *) path;
2992 syscallarg(int) pad;
2993 syscallarg(off_t) length;
2994 } */ *uap = v;
2995 struct vnode *vp;
2996 struct vattr vattr;
2997 int error;
2998 struct nameidata nd;
2999
3000 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
3001 if ((error = namei(&nd)) != 0)
3002 return (error);
3003 vp = nd.ni_vp;
3004 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3005 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3006 if (vp->v_type == VDIR)
3007 error = EISDIR;
3008 else if ((error = vn_writechk(vp)) == 0 &&
3009 (error = VOP_ACCESS(vp, VWRITE, l->l_cred, l)) == 0) {
3010 VATTR_NULL(&vattr);
3011 vattr.va_size = SCARG(uap, length);
3012 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
3013 }
3014 vput(vp);
3015 return (error);
3016 }
3017
3018 /*
3019 * Truncate a file given a file descriptor.
3020 */
3021 /* ARGSUSED */
3022 int
3023 sys_ftruncate(struct lwp *l, void *v, register_t *retval)
3024 {
3025 struct sys_ftruncate_args /* {
3026 syscallarg(int) fd;
3027 syscallarg(int) pad;
3028 syscallarg(off_t) length;
3029 } */ *uap = v;
3030 struct proc *p = l->l_proc;
3031 struct vattr vattr;
3032 struct vnode *vp;
3033 struct file *fp;
3034 int error;
3035
3036 /* getvnode() will use the descriptor for us */
3037 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3038 return (error);
3039 if ((fp->f_flag & FWRITE) == 0) {
3040 error = EINVAL;
3041 goto out;
3042 }
3043 vp = (struct vnode *)fp->f_data;
3044 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3045 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3046 if (vp->v_type == VDIR)
3047 error = EISDIR;
3048 else if ((error = vn_writechk(vp)) == 0) {
3049 VATTR_NULL(&vattr);
3050 vattr.va_size = SCARG(uap, length);
3051 error = VOP_SETATTR(vp, &vattr, fp->f_cred, l);
3052 }
3053 VOP_UNLOCK(vp, 0);
3054 out:
3055 FILE_UNUSE(fp, l);
3056 return (error);
3057 }
3058
3059 /*
3060 * Sync an open file.
3061 */
3062 /* ARGSUSED */
3063 int
3064 sys_fsync(struct lwp *l, void *v, register_t *retval)
3065 {
3066 struct sys_fsync_args /* {
3067 syscallarg(int) fd;
3068 } */ *uap = v;
3069 struct proc *p = l->l_proc;
3070 struct vnode *vp;
3071 struct file *fp;
3072 int error;
3073
3074 /* getvnode() will use the descriptor for us */
3075 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3076 return (error);
3077 vp = (struct vnode *)fp->f_data;
3078 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3079 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, l);
3080 if (error == 0 && bioops.io_fsync != NULL &&
3081 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3082 (*bioops.io_fsync)(vp, 0);
3083 VOP_UNLOCK(vp, 0);
3084 FILE_UNUSE(fp, l);
3085 return (error);
3086 }
3087
3088 /*
3089 * Sync a range of file data. API modeled after that found in AIX.
3090 *
3091 * FDATASYNC indicates that we need only save enough metadata to be able
3092 * to re-read the written data. Note we duplicate AIX's requirement that
3093 * the file be open for writing.
3094 */
3095 /* ARGSUSED */
3096 int
3097 sys_fsync_range(struct lwp *l, void *v, register_t *retval)
3098 {
3099 struct sys_fsync_range_args /* {
3100 syscallarg(int) fd;
3101 syscallarg(int) flags;
3102 syscallarg(off_t) start;
3103 syscallarg(off_t) length;
3104 } */ *uap = v;
3105 struct proc *p = l->l_proc;
3106 struct vnode *vp;
3107 struct file *fp;
3108 int flags, nflags;
3109 off_t s, e, len;
3110 int error;
3111
3112 /* getvnode() will use the descriptor for us */
3113 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3114 return (error);
3115
3116 if ((fp->f_flag & FWRITE) == 0) {
3117 error = EBADF;
3118 goto out;
3119 }
3120
3121 flags = SCARG(uap, flags);
3122 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3123 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3124 error = EINVAL;
3125 goto out;
3126 }
3127 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3128 if (flags & FDATASYNC)
3129 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3130 else
3131 nflags = FSYNC_WAIT;
3132 if (flags & FDISKSYNC)
3133 nflags |= FSYNC_CACHE;
3134
3135 len = SCARG(uap, length);
3136 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3137 if (len) {
3138 s = SCARG(uap, start);
3139 e = s + len;
3140 if (e < s) {
3141 error = EINVAL;
3142 goto out;
3143 }
3144 } else {
3145 e = 0;
3146 s = 0;
3147 }
3148
3149 vp = (struct vnode *)fp->f_data;
3150 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3151 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, l);
3152
3153 if (error == 0 && bioops.io_fsync != NULL &&
3154 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3155 (*bioops.io_fsync)(vp, nflags);
3156
3157 VOP_UNLOCK(vp, 0);
3158 out:
3159 FILE_UNUSE(fp, l);
3160 return (error);
3161 }
3162
3163 /*
3164 * Sync the data of an open file.
3165 */
3166 /* ARGSUSED */
3167 int
3168 sys_fdatasync(struct lwp *l, void *v, register_t *retval)
3169 {
3170 struct sys_fdatasync_args /* {
3171 syscallarg(int) fd;
3172 } */ *uap = v;
3173 struct proc *p = l->l_proc;
3174 struct vnode *vp;
3175 struct file *fp;
3176 int error;
3177
3178 /* getvnode() will use the descriptor for us */
3179 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3180 return (error);
3181 if ((fp->f_flag & FWRITE) == 0) {
3182 FILE_UNUSE(fp, l);
3183 return (EBADF);
3184 }
3185 vp = (struct vnode *)fp->f_data;
3186 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3187 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, l);
3188 VOP_UNLOCK(vp, 0);
3189 FILE_UNUSE(fp, l);
3190 return (error);
3191 }
3192
3193 /*
3194 * Rename files, (standard) BSD semantics frontend.
3195 */
3196 /* ARGSUSED */
3197 int
3198 sys_rename(struct lwp *l, void *v, register_t *retval)
3199 {
3200 struct sys_rename_args /* {
3201 syscallarg(const char *) from;
3202 syscallarg(const char *) to;
3203 } */ *uap = v;
3204
3205 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 0));
3206 }
3207
3208 /*
3209 * Rename files, POSIX semantics frontend.
3210 */
3211 /* ARGSUSED */
3212 int
3213 sys___posix_rename(struct lwp *l, void *v, register_t *retval)
3214 {
3215 struct sys___posix_rename_args /* {
3216 syscallarg(const char *) from;
3217 syscallarg(const char *) to;
3218 } */ *uap = v;
3219
3220 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 1));
3221 }
3222
3223 /*
3224 * Rename files. Source and destination must either both be directories,
3225 * or both not be directories. If target is a directory, it must be empty.
3226 * If `from' and `to' refer to the same object, the value of the `retain'
3227 * argument is used to determine whether `from' will be
3228 *
3229 * (retain == 0) deleted unless `from' and `to' refer to the same
3230 * object in the file system's name space (BSD).
3231 * (retain == 1) always retained (POSIX).
3232 */
3233 static int
3234 rename_files(const char *from, const char *to, struct lwp *l, int retain)
3235 {
3236 struct vnode *tvp, *fvp, *tdvp;
3237 struct nameidata fromnd, tond;
3238 struct proc *p;
3239 int error;
3240
3241 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT, UIO_USERSPACE,
3242 from, l);
3243 if ((error = namei(&fromnd)) != 0)
3244 return (error);
3245 if (fromnd.ni_dvp != fromnd.ni_vp)
3246 VOP_UNLOCK(fromnd.ni_dvp, 0);
3247 fvp = fromnd.ni_vp;
3248 NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT |
3249 (fvp->v_type == VDIR ? CREATEDIR : 0), UIO_USERSPACE, to, l);
3250 if ((error = namei(&tond)) != 0) {
3251 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3252 vrele(fromnd.ni_dvp);
3253 vrele(fvp);
3254 goto out1;
3255 }
3256 tdvp = tond.ni_dvp;
3257 tvp = tond.ni_vp;
3258
3259 if (tvp != NULL) {
3260 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3261 error = ENOTDIR;
3262 goto out;
3263 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3264 error = EISDIR;
3265 goto out;
3266 }
3267 }
3268
3269 if (fvp == tdvp)
3270 error = EINVAL;
3271
3272 /*
3273 * Source and destination refer to the same object.
3274 */
3275 if (fvp == tvp) {
3276 if (retain)
3277 error = -1;
3278 else if (fromnd.ni_dvp == tdvp &&
3279 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3280 !memcmp(fromnd.ni_cnd.cn_nameptr,
3281 tond.ni_cnd.cn_nameptr,
3282 fromnd.ni_cnd.cn_namelen))
3283 error = -1;
3284 }
3285
3286 #if NVERIEXEC > 0
3287 if (!error) {
3288 char *f1, *f2;
3289
3290 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3291 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen);
3292
3293 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3294 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen);
3295
3296 error = veriexec_renamechk(l, fvp, f1, tvp, f2);
3297
3298 free(f1, M_TEMP);
3299 free(f2, M_TEMP);
3300 }
3301 #endif /* NVERIEXEC > 0 */
3302
3303 out:
3304 p = l->l_proc;
3305 if (!error) {
3306 VOP_LEASE(tdvp, l, l->l_cred, LEASE_WRITE);
3307 if (fromnd.ni_dvp != tdvp)
3308 VOP_LEASE(fromnd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3309 if (tvp) {
3310 VOP_LEASE(tvp, l, l->l_cred, LEASE_WRITE);
3311 }
3312 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3313 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3314 } else {
3315 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3316 if (tdvp == tvp)
3317 vrele(tdvp);
3318 else
3319 vput(tdvp);
3320 if (tvp)
3321 vput(tvp);
3322 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3323 vrele(fromnd.ni_dvp);
3324 vrele(fvp);
3325 }
3326 vrele(tond.ni_startdir);
3327 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3328 out1:
3329 if (fromnd.ni_startdir)
3330 vrele(fromnd.ni_startdir);
3331 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3332 return (error == -1 ? 0 : error);
3333 }
3334
3335 /*
3336 * Make a directory file.
3337 */
3338 /* ARGSUSED */
3339 int
3340 sys_mkdir(struct lwp *l, void *v, register_t *retval)
3341 {
3342 struct sys_mkdir_args /* {
3343 syscallarg(const char *) path;
3344 syscallarg(int) mode;
3345 } */ *uap = v;
3346 struct proc *p = l->l_proc;
3347 struct vnode *vp;
3348 struct vattr vattr;
3349 int error;
3350 struct nameidata nd;
3351
3352 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE,
3353 SCARG(uap, path), l);
3354 if ((error = namei(&nd)) != 0)
3355 return (error);
3356 vp = nd.ni_vp;
3357 if (vp != NULL) {
3358 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3359 if (nd.ni_dvp == vp)
3360 vrele(nd.ni_dvp);
3361 else
3362 vput(nd.ni_dvp);
3363 vrele(vp);
3364 return (EEXIST);
3365 }
3366 VATTR_NULL(&vattr);
3367 vattr.va_type = VDIR;
3368 vattr.va_mode =
3369 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3370 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3371 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3372 if (!error)
3373 vput(nd.ni_vp);
3374 return (error);
3375 }
3376
3377 /*
3378 * Remove a directory file.
3379 */
3380 /* ARGSUSED */
3381 int
3382 sys_rmdir(struct lwp *l, void *v, register_t *retval)
3383 {
3384 struct sys_rmdir_args /* {
3385 syscallarg(const char *) path;
3386 } */ *uap = v;
3387 struct vnode *vp;
3388 int error;
3389 struct nameidata nd;
3390
3391 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
3392 SCARG(uap, path), l);
3393 if ((error = namei(&nd)) != 0)
3394 return (error);
3395 vp = nd.ni_vp;
3396 if (vp->v_type != VDIR) {
3397 error = ENOTDIR;
3398 goto out;
3399 }
3400 /*
3401 * No rmdir "." please.
3402 */
3403 if (nd.ni_dvp == vp) {
3404 error = EINVAL;
3405 goto out;
3406 }
3407 /*
3408 * The root of a mounted filesystem cannot be deleted.
3409 */
3410 if (vp->v_flag & VROOT) {
3411 error = EBUSY;
3412 goto out;
3413 }
3414 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3415 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3416 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3417 return (error);
3418
3419 out:
3420 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3421 if (nd.ni_dvp == vp)
3422 vrele(nd.ni_dvp);
3423 else
3424 vput(nd.ni_dvp);
3425 vput(vp);
3426 return (error);
3427 }
3428
3429 /*
3430 * Read a block of directory entries in a file system independent format.
3431 */
3432 int
3433 sys___getdents30(struct lwp *l, void *v, register_t *retval)
3434 {
3435 struct sys___getdents30_args /* {
3436 syscallarg(int) fd;
3437 syscallarg(char *) buf;
3438 syscallarg(size_t) count;
3439 } */ *uap = v;
3440 struct proc *p = l->l_proc;
3441 struct file *fp;
3442 int error, done;
3443
3444 /* getvnode() will use the descriptor for us */
3445 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3446 return (error);
3447 if ((fp->f_flag & FREAD) == 0) {
3448 error = EBADF;
3449 goto out;
3450 }
3451 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3452 SCARG(uap, count), &done, l, 0, 0);
3453 #ifdef KTRACE
3454 if (!error && KTRPOINT(p, KTR_GENIO)) {
3455 struct iovec iov;
3456 iov.iov_base = SCARG(uap, buf);
3457 iov.iov_len = done;
3458 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, done, 0);
3459 }
3460 #endif
3461 *retval = done;
3462 out:
3463 FILE_UNUSE(fp, l);
3464 return (error);
3465 }
3466
3467 /*
3468 * Set the mode mask for creation of filesystem nodes.
3469 */
3470 int
3471 sys_umask(struct lwp *l, void *v, register_t *retval)
3472 {
3473 struct sys_umask_args /* {
3474 syscallarg(mode_t) newmask;
3475 } */ *uap = v;
3476 struct proc *p = l->l_proc;
3477 struct cwdinfo *cwdi;
3478
3479 cwdi = p->p_cwdi;
3480 *retval = cwdi->cwdi_cmask;
3481 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3482 return (0);
3483 }
3484
3485 /*
3486 * Void all references to file by ripping underlying filesystem
3487 * away from vnode.
3488 */
3489 /* ARGSUSED */
3490 int
3491 sys_revoke(struct lwp *l, void *v, register_t *retval)
3492 {
3493 struct sys_revoke_args /* {
3494 syscallarg(const char *) path;
3495 } */ *uap = v;
3496 struct vnode *vp;
3497 struct vattr vattr;
3498 int error;
3499 struct nameidata nd;
3500
3501 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
3502 if ((error = namei(&nd)) != 0)
3503 return (error);
3504 vp = nd.ni_vp;
3505 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
3506 goto out;
3507 if (kauth_cred_geteuid(l->l_cred) != vattr.va_uid &&
3508 (error = kauth_authorize_generic(l->l_cred,
3509 KAUTH_GENERIC_ISSUSER, NULL)) != 0)
3510 goto out;
3511 if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER)))
3512 VOP_REVOKE(vp, REVOKEALL);
3513 out:
3514 vrele(vp);
3515 return (error);
3516 }
3517
3518 /*
3519 * Convert a user file descriptor to a kernel file entry.
3520 */
3521 int
3522 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
3523 {
3524 struct vnode *vp;
3525 struct file *fp;
3526
3527 if ((fp = fd_getfile(fdp, fd)) == NULL)
3528 return (EBADF);
3529
3530 FILE_USE(fp);
3531
3532 if (fp->f_type != DTYPE_VNODE) {
3533 FILE_UNUSE(fp, NULL);
3534 return (EINVAL);
3535 }
3536
3537 vp = (struct vnode *)fp->f_data;
3538 if (vp->v_type == VBAD) {
3539 FILE_UNUSE(fp, NULL);
3540 return (EBADF);
3541 }
3542
3543 *fpp = fp;
3544 return (0);
3545 }
3546