vfs_syscalls.c revision 1.325 1 /* $NetBSD: vfs_syscalls.c,v 1.325 2007/08/15 12:07:35 ad Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.325 2007/08/15 12:07:35 ad Exp $");
41
42 #include "opt_compat_netbsd.h"
43 #include "opt_compat_43.h"
44 #include "opt_fileassoc.h"
45 #include "fss.h"
46 #include "veriexec.h"
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/namei.h>
51 #include <sys/filedesc.h>
52 #include <sys/kernel.h>
53 #include <sys/file.h>
54 #include <sys/stat.h>
55 #include <sys/vnode.h>
56 #include <sys/mount.h>
57 #include <sys/proc.h>
58 #include <sys/uio.h>
59 #include <sys/malloc.h>
60 #include <sys/kmem.h>
61 #include <sys/dirent.h>
62 #include <sys/sysctl.h>
63 #include <sys/syscallargs.h>
64 #include <sys/vfs_syscalls.h>
65 #include <sys/ktrace.h>
66 #ifdef FILEASSOC
67 #include <sys/fileassoc.h>
68 #endif /* FILEASSOC */
69 #include <sys/verified_exec.h>
70 #include <sys/kauth.h>
71
72 #include <miscfs/genfs/genfs.h>
73 #include <miscfs/syncfs/syncfs.h>
74
75 #ifdef COMPAT_30
76 #include "opt_nfsserver.h"
77 #include <nfs/rpcv2.h>
78 #endif
79 #include <nfs/nfsproto.h>
80 #ifdef COMPAT_30
81 #include <nfs/nfs.h>
82 #include <nfs/nfs_var.h>
83 #endif
84
85 #if NFSS > 0
86 #include <dev/fssvar.h>
87 #endif
88
89 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
90
91 static int change_dir(struct nameidata *, struct lwp *);
92 static int change_flags(struct vnode *, u_long, struct lwp *);
93 static int change_mode(struct vnode *, int, struct lwp *l);
94 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
95 static int rename_files(const char *, const char *, struct lwp *, int);
96
97 void checkdirs(struct vnode *);
98
99 int dovfsusermount = 0;
100
101 /*
102 * Virtual File System System Calls
103 */
104
105 /*
106 * Mount a file system.
107 */
108
109 #if defined(COMPAT_09) || defined(COMPAT_43)
110 /*
111 * This table is used to maintain compatibility with 4.3BSD
112 * and NetBSD 0.9 mount syscalls. Note, the order is important!
113 *
114 * Do not modify this table. It should only contain filesystems
115 * supported by NetBSD 0.9 and 4.3BSD.
116 */
117 const char * const mountcompatnames[] = {
118 NULL, /* 0 = MOUNT_NONE */
119 MOUNT_FFS, /* 1 = MOUNT_UFS */
120 MOUNT_NFS, /* 2 */
121 MOUNT_MFS, /* 3 */
122 MOUNT_MSDOS, /* 4 */
123 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
124 MOUNT_FDESC, /* 6 */
125 MOUNT_KERNFS, /* 7 */
126 NULL, /* 8 = MOUNT_DEVFS */
127 MOUNT_AFS, /* 9 */
128 };
129 const int nmountcompatnames = sizeof(mountcompatnames) /
130 sizeof(mountcompatnames[0]);
131 #endif /* COMPAT_09 || COMPAT_43 */
132
133 static int
134 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
135 void *data, size_t *data_len)
136 {
137 struct mount *mp;
138 int error = 0, saved_flags;
139
140 mp = vp->v_mount;
141 saved_flags = mp->mnt_flag;
142
143 /* We can operate only on VROOT nodes. */
144 if ((vp->v_flag & VROOT) == 0)
145 return EINVAL;
146
147 /*
148 * We only allow the filesystem to be reloaded if it
149 * is currently mounted read-only.
150 */
151 if (flags & MNT_RELOAD && !(mp->mnt_flag & MNT_RDONLY))
152 return EOPNOTSUPP; /* Needs translation */
153
154 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
155 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
156 if (error)
157 return error;
158
159 if (vfs_busy(mp, LK_NOWAIT, 0))
160 return EPERM;
161
162 mp->mnt_flag &= ~MNT_OP_FLAGS;
163 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
164
165 /*
166 * Set the mount level flags.
167 */
168 if (flags & MNT_RDONLY)
169 mp->mnt_flag |= MNT_RDONLY;
170 else if (mp->mnt_flag & MNT_RDONLY)
171 mp->mnt_iflag |= IMNT_WANTRDWR;
172 mp->mnt_flag &=
173 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
174 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
175 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
176 mp->mnt_flag |= flags &
177 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
178 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
179 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
180 MNT_IGNORE);
181
182 error = VFS_MOUNT(mp, path, data, data_len, l);
183
184 #if defined(COMPAT_30) && defined(NFSSERVER)
185 if (error && data != NULL) {
186 int error2;
187
188 /* Update failed; let's try and see if it was an
189 * export request. */
190 error2 = nfs_update_exports_30(mp, path, data, l);
191
192 /* Only update error code if the export request was
193 * understood but some problem occurred while
194 * processing it. */
195 if (error2 != EJUSTRETURN)
196 error = error2;
197 }
198 #endif
199 if (mp->mnt_iflag & IMNT_WANTRDWR)
200 mp->mnt_flag &= ~MNT_RDONLY;
201 if (error)
202 mp->mnt_flag = saved_flags;
203 mp->mnt_flag &= ~MNT_OP_FLAGS;
204 mp->mnt_iflag &= ~IMNT_WANTRDWR;
205 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
206 if (mp->mnt_syncer == NULL)
207 error = vfs_allocate_syncvnode(mp);
208 } else {
209 if (mp->mnt_syncer != NULL)
210 vfs_deallocate_syncvnode(mp);
211 }
212 vfs_unbusy(mp);
213
214 return (error);
215 }
216
217 static int
218 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
219 {
220 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
221 int error;
222
223 /* Copy file-system type from userspace. */
224 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
225 if (error) {
226 #if defined(COMPAT_09) || defined(COMPAT_43)
227 /*
228 * Historically, filesystem types were identified by numbers.
229 * If we get an integer for the filesystem type instead of a
230 * string, we check to see if it matches one of the historic
231 * filesystem types.
232 */
233 u_long fsindex = (u_long)fstype;
234 if (fsindex >= nmountcompatnames ||
235 mountcompatnames[fsindex] == NULL)
236 return ENODEV;
237 strlcpy(fstypename, mountcompatnames[fsindex], sizeof(fstypename));
238 #else
239 return error;
240 #endif
241 }
242
243 #ifdef COMPAT_10
244 /* Accept `ufs' as an alias for `ffs'. */
245 if (strcmp(fstypename, "ufs") == 0)
246 fstypename[0] = 'f';
247 #endif
248
249 if ((*vfsops = vfs_getopsbyname(fstypename)) == NULL)
250 return ENODEV;
251 return 0;
252 }
253
254 static int
255 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops,
256 const char *path, int flags, void *data, size_t *data_len)
257 {
258 struct mount *mp = NULL;
259 struct vnode *vp = *vpp;
260 struct vattr va;
261 int error;
262
263 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
264 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
265 if (error)
266 return error;
267
268 /* Can't make a non-dir a mount-point (from here anyway). */
269 if (vp->v_type != VDIR)
270 return ENOTDIR;
271
272 /*
273 * If the user is not root, ensure that they own the directory
274 * onto which we are attempting to mount.
275 */
276 if ((error = VOP_GETATTR(vp, &va, l->l_cred, l)) != 0 ||
277 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
278 (error = kauth_authorize_generic(l->l_cred,
279 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) {
280 return error;
281 }
282
283 if (flags & MNT_EXPORTED)
284 return EINVAL;
285
286 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0)
287 return error;
288
289 /*
290 * Check if a file-system is not already mounted on this vnode.
291 */
292 if (vp->v_mountedhere != NULL)
293 return EBUSY;
294
295 mp = malloc(sizeof(*mp), M_MOUNT, M_WAITOK|M_ZERO);
296
297 mp->mnt_op = vfsops;
298
299 TAILQ_INIT(&mp->mnt_vnodelist);
300 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
301 simple_lock_init(&mp->mnt_slock);
302 (void)vfs_busy(mp, LK_NOWAIT, 0);
303
304 mp->mnt_op->vfs_refcount++;
305 mp->mnt_vnodecovered = vp;
306 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
307 mp->mnt_unmounter = NULL;
308 mount_initspecific(mp);
309
310 /*
311 * The underlying file system may refuse the mount for
312 * various reasons. Allow the user to force it to happen.
313 *
314 * Set the mount level flags.
315 */
316 mp->mnt_flag = flags &
317 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
318 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
319 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
320 MNT_IGNORE | MNT_RDONLY);
321
322 error = VFS_MOUNT(mp, path, data, data_len, l);
323 mp->mnt_flag &= ~MNT_OP_FLAGS;
324
325 /*
326 * Put the new filesystem on the mount list after root.
327 */
328 cache_purge(vp);
329 if (error != 0) {
330 vp->v_mountedhere = NULL;
331 mp->mnt_op->vfs_refcount--;
332 vfs_unbusy(mp);
333 free(mp, M_MOUNT);
334 return error;
335 }
336
337 mp->mnt_iflag &= ~IMNT_WANTRDWR;
338 vp->v_mountedhere = mp;
339 simple_lock(&mountlist_slock);
340 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
341 simple_unlock(&mountlist_slock);
342 VOP_UNLOCK(vp, 0);
343 checkdirs(vp);
344 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
345 error = vfs_allocate_syncvnode(mp);
346 vfs_unbusy(mp);
347 (void) VFS_STATVFS(mp, &mp->mnt_stat, l);
348 error = VFS_START(mp, 0, l);
349 if (error)
350 vrele(vp);
351 *vpp = NULL;
352 return error;
353 }
354
355 static int
356 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
357 void *data, size_t *data_len)
358 {
359 struct mount *mp;
360 int error;
361
362 /* If MNT_GETARGS is specified, it should be the only flag. */
363 if (flags & ~MNT_GETARGS)
364 return EINVAL;
365
366 mp = vp->v_mount;
367
368 /* XXX: probably some notion of "can see" here if we want isolation. */
369 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
370 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
371 if (error)
372 return error;
373
374 if ((vp->v_flag & VROOT) == 0)
375 return EINVAL;
376
377 if (vfs_busy(mp, LK_NOWAIT, 0))
378 return EPERM;
379
380 mp->mnt_flag &= ~MNT_OP_FLAGS;
381 mp->mnt_flag |= MNT_GETARGS;
382 error = VFS_MOUNT(mp, path, data, data_len, l);
383 mp->mnt_flag &= ~MNT_OP_FLAGS;
384
385 vfs_unbusy(mp);
386 return (error);
387 }
388
389 #ifdef COMPAT_40
390 /* ARGSUSED */
391 int
392 compat_40_sys_mount(struct lwp *l, void *v, register_t *retval)
393 {
394 struct compat_40_sys_mount_args /* {
395 syscallarg(const char *) type;
396 syscallarg(const char *) path;
397 syscallarg(int) flags;
398 syscallarg(void *) data;
399 } */ *uap = v;
400 register_t dummy;
401
402 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
403 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 0, &dummy);
404 }
405 #endif
406
407 int
408 sys___mount50(struct lwp *l, void *v, register_t *retval)
409 {
410 struct sys___mount50_args /* {
411 syscallarg(const char *) type;
412 syscallarg(const char *) path;
413 syscallarg(int) flags;
414 syscallarg(void *) data;
415 syscallarg(size_t) data_len;
416 } */ *uap = v;
417
418 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
419 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
420 SCARG(uap, data_len), retval);
421 }
422
423 int
424 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
425 const char *path, int flags, void *data, enum uio_seg data_seg,
426 size_t data_len, register_t *retval)
427 {
428 struct vnode *vp;
429 struct nameidata nd;
430 void *data_buf = data;
431 int error;
432
433 /*
434 * Get vnode to be covered
435 */
436 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path, l);
437 if ((error = namei(&nd)) != 0)
438 return (error);
439 vp = nd.ni_vp;
440
441 /*
442 * A lookup in VFS_MOUNT might result in an attempt to
443 * lock this vnode again, so make the lock recursive.
444 */
445 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
446
447 if (vfsops == NULL) {
448 if (flags & (MNT_GETARGS | MNT_UPDATE))
449 vfsops = vp->v_mount->mnt_op;
450 else {
451 /* 'type' is userspace */
452 error = mount_get_vfsops(type, &vfsops);
453 if (error != 0)
454 goto done;
455 }
456 }
457
458 if (data != NULL && data_seg == UIO_USERSPACE) {
459 if (data_len == 0) {
460 /* No length supplied, use default for filesystem */
461 data_len = vfsops->vfs_min_mount_data;
462 if (data_len > VFS_MAX_MOUNT_DATA) {
463 /* maybe a force loaded old LKM */
464 error = EINVAL;
465 goto done;
466 }
467 #ifdef COMPAT_30
468 /* Hopefully a longer buffer won't make copyin() fail */
469 if (flags & MNT_UPDATE
470 && data_len < sizeof (struct mnt_export_args30))
471 data_len = sizeof (struct mnt_export_args30);
472 #endif
473 }
474 data_buf = malloc(data_len, M_TEMP, M_WAITOK);
475
476 /* NFS needs the buffer even for mnt_getargs .... */
477 error = copyin(data, data_buf, data_len);
478 if (error != 0)
479 goto done;
480 }
481
482 if (flags & MNT_GETARGS) {
483 if (data_len == 0) {
484 error = EINVAL;
485 goto done;
486 }
487 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
488 if (error != 0)
489 goto done;
490 if (data_seg == UIO_USERSPACE)
491 error = copyout(data_buf, data, data_len);
492 *retval = data_len;
493 } else if (flags & MNT_UPDATE) {
494 error = mount_update(l, vp, path, flags, data_buf, &data_len);
495 } else {
496 /* Locking is handled internally in mount_domount(). */
497 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
498 &data_len);
499 }
500
501 done:
502 if (vp)
503 vput(vp);
504 if (data_buf != data)
505 free(data_buf, M_TEMP);
506 return (error);
507 }
508
509 /*
510 * Scan all active processes to see if any of them have a current
511 * or root directory onto which the new filesystem has just been
512 * mounted. If so, replace them with the new mount point.
513 */
514 void
515 checkdirs(struct vnode *olddp)
516 {
517 struct cwdinfo *cwdi;
518 struct vnode *newdp;
519 struct proc *p;
520
521 if (olddp->v_usecount == 1)
522 return;
523 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
524 panic("mount: lost mount");
525 mutex_enter(&proclist_lock);
526 PROCLIST_FOREACH(p, &allproc) {
527 cwdi = p->p_cwdi;
528 if (!cwdi)
529 continue;
530 if (cwdi->cwdi_cdir == olddp) {
531 vrele(cwdi->cwdi_cdir);
532 VREF(newdp);
533 cwdi->cwdi_cdir = newdp;
534 }
535 if (cwdi->cwdi_rdir == olddp) {
536 vrele(cwdi->cwdi_rdir);
537 VREF(newdp);
538 cwdi->cwdi_rdir = newdp;
539 }
540 }
541 mutex_exit(&proclist_lock);
542 if (rootvnode == olddp) {
543 vrele(rootvnode);
544 VREF(newdp);
545 rootvnode = newdp;
546 }
547 vput(newdp);
548 }
549
550 /*
551 * Unmount a file system.
552 *
553 * Note: unmount takes a path to the vnode mounted on as argument,
554 * not special file (as before).
555 */
556 /* ARGSUSED */
557 int
558 sys_unmount(struct lwp *l, void *v, register_t *retval)
559 {
560 struct sys_unmount_args /* {
561 syscallarg(const char *) path;
562 syscallarg(int) flags;
563 } */ *uap = v;
564 struct vnode *vp;
565 struct mount *mp;
566 int error;
567 struct nameidata nd;
568
569 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
570 SCARG(uap, path), l);
571 if ((error = namei(&nd)) != 0)
572 return (error);
573 vp = nd.ni_vp;
574 mp = vp->v_mount;
575
576 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
577 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
578 if (error) {
579 vput(vp);
580 return (error);
581 }
582
583 /*
584 * Don't allow unmounting the root file system.
585 */
586 if (mp->mnt_flag & MNT_ROOTFS) {
587 vput(vp);
588 return (EINVAL);
589 }
590
591 /*
592 * Must be the root of the filesystem
593 */
594 if ((vp->v_flag & VROOT) == 0) {
595 vput(vp);
596 return (EINVAL);
597 }
598 vput(vp);
599
600 /*
601 * XXX Freeze syncer. Must do this before locking the
602 * mount point. See dounmount() for details.
603 */
604 mutex_enter(&syncer_mutex);
605
606 if (vfs_busy(mp, 0, 0)) {
607 mutex_exit(&syncer_mutex);
608 return (EBUSY);
609 }
610
611 return (dounmount(mp, SCARG(uap, flags), l));
612 }
613
614 /*
615 * Do the actual file system unmount. File system is assumed to have been
616 * marked busy by the caller.
617 */
618 int
619 dounmount(struct mount *mp, int flags, struct lwp *l)
620 {
621 struct vnode *coveredvp;
622 int error;
623 int async;
624 int used_syncer;
625
626 #if NVERIEXEC > 0
627 error = veriexec_unmountchk(mp);
628 if (error)
629 return (error);
630 #endif /* NVERIEXEC > 0 */
631
632 simple_lock(&mountlist_slock);
633 vfs_unbusy(mp);
634 used_syncer = (mp->mnt_syncer != NULL);
635
636 /*
637 * XXX Syncer must be frozen when we get here. This should really
638 * be done on a per-mountpoint basis, but especially the softdep
639 * code possibly called from the syncer doesn't exactly work on a
640 * per-mountpoint basis, so the softdep code would become a maze
641 * of vfs_busy() calls.
642 *
643 * The caller of dounmount() must acquire syncer_mutex because
644 * the syncer itself acquires locks in syncer_mutex -> vfs_busy
645 * order, and we must preserve that order to avoid deadlock.
646 *
647 * So, if the file system did not use the syncer, now is
648 * the time to release the syncer_mutex.
649 */
650 if (used_syncer == 0)
651 mutex_exit(&syncer_mutex);
652
653 mp->mnt_iflag |= IMNT_UNMOUNT;
654 mp->mnt_unmounter = l;
655 lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock);
656
657 async = mp->mnt_flag & MNT_ASYNC;
658 mp->mnt_flag &= ~MNT_ASYNC;
659 cache_purgevfs(mp); /* remove cache entries for this file sys */
660 if (mp->mnt_syncer != NULL)
661 vfs_deallocate_syncvnode(mp);
662 error = 0;
663 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
664 #if NFSS > 0
665 error = fss_umount_hook(mp, (flags & MNT_FORCE));
666 #endif
667 if (error == 0)
668 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred, l);
669 }
670 if (error == 0 || (flags & MNT_FORCE))
671 error = VFS_UNMOUNT(mp, flags, l);
672 if (error) {
673 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
674 (void) vfs_allocate_syncvnode(mp);
675 simple_lock(&mountlist_slock);
676 mp->mnt_iflag &= ~IMNT_UNMOUNT;
677 mp->mnt_unmounter = NULL;
678 mp->mnt_flag |= async;
679 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
680 &mountlist_slock);
681 if (used_syncer)
682 mutex_exit(&syncer_mutex);
683 simple_lock(&mp->mnt_slock);
684 while (mp->mnt_wcnt > 0) {
685 wakeup(mp);
686 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1",
687 0, &mp->mnt_slock);
688 }
689 simple_unlock(&mp->mnt_slock);
690 return (error);
691 }
692 simple_lock(&mountlist_slock);
693 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
694 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
695 coveredvp->v_mountedhere = NULL;
696 mp->mnt_op->vfs_refcount--;
697 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
698 panic("unmount: dangling vnode");
699 mp->mnt_iflag |= IMNT_GONE;
700 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock);
701 if (coveredvp != NULLVP)
702 vrele(coveredvp);
703 mount_finispecific(mp);
704 if (used_syncer)
705 mutex_exit(&syncer_mutex);
706 simple_lock(&mp->mnt_slock);
707 while (mp->mnt_wcnt > 0) {
708 wakeup(mp);
709 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock);
710 }
711 simple_unlock(&mp->mnt_slock);
712 vfs_hooks_unmount(mp);
713 free(mp, M_MOUNT);
714 return (0);
715 }
716
717 /*
718 * Sync each mounted filesystem.
719 */
720 #ifdef DEBUG
721 int syncprt = 0;
722 struct ctldebug debug0 = { "syncprt", &syncprt };
723 #endif
724
725 /* ARGSUSED */
726 int
727 sys_sync(struct lwp *l, void *v, register_t *retval)
728 {
729 struct mount *mp, *nmp;
730 int asyncflag;
731
732 if (l == NULL)
733 l = &lwp0;
734
735 simple_lock(&mountlist_slock);
736 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
737 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
738 nmp = mp->mnt_list.cqe_prev;
739 continue;
740 }
741 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
742 asyncflag = mp->mnt_flag & MNT_ASYNC;
743 mp->mnt_flag &= ~MNT_ASYNC;
744 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred, l);
745 if (asyncflag)
746 mp->mnt_flag |= MNT_ASYNC;
747 }
748 simple_lock(&mountlist_slock);
749 nmp = mp->mnt_list.cqe_prev;
750 vfs_unbusy(mp);
751
752 }
753 simple_unlock(&mountlist_slock);
754 #ifdef DEBUG
755 if (syncprt)
756 vfs_bufstats();
757 #endif /* DEBUG */
758 return (0);
759 }
760
761 /*
762 * Change filesystem quotas.
763 */
764 /* ARGSUSED */
765 int
766 sys_quotactl(struct lwp *l, void *v, register_t *retval)
767 {
768 struct sys_quotactl_args /* {
769 syscallarg(const char *) path;
770 syscallarg(int) cmd;
771 syscallarg(int) uid;
772 syscallarg(void *) arg;
773 } */ *uap = v;
774 struct mount *mp;
775 int error;
776 struct nameidata nd;
777
778 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
779 if ((error = namei(&nd)) != 0)
780 return (error);
781 mp = nd.ni_vp->v_mount;
782 vrele(nd.ni_vp);
783 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
784 SCARG(uap, arg), l);
785 return (error);
786 }
787
788 int
789 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
790 int root)
791 {
792 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
793 int error = 0;
794
795 /*
796 * If MNT_NOWAIT or MNT_LAZY is specified, do not
797 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
798 * overrides MNT_NOWAIT.
799 */
800 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
801 (flags != MNT_WAIT && flags != 0)) {
802 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
803 goto done;
804 }
805
806 /* Get the filesystem stats now */
807 memset(sp, 0, sizeof(*sp));
808 if ((error = VFS_STATVFS(mp, sp, l)) != 0) {
809 return error;
810 }
811
812 if (cwdi->cwdi_rdir == NULL)
813 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
814 done:
815 if (cwdi->cwdi_rdir != NULL) {
816 size_t len;
817 char *bp;
818 char *path = PNBUF_GET();
819
820 bp = path + MAXPATHLEN;
821 *--bp = '\0';
822 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
823 MAXPATHLEN / 2, 0, l);
824 if (error) {
825 PNBUF_PUT(path);
826 return error;
827 }
828 len = strlen(bp);
829 /*
830 * for mount points that are below our root, we can see
831 * them, so we fix up the pathname and return them. The
832 * rest we cannot see, so we don't allow viewing the
833 * data.
834 */
835 if (strncmp(bp, sp->f_mntonname, len) == 0) {
836 strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
837 sizeof(sp->f_mntonname));
838 if (sp->f_mntonname[0] == '\0')
839 (void)strlcpy(sp->f_mntonname, "/",
840 sizeof(sp->f_mntonname));
841 } else {
842 if (root)
843 (void)strlcpy(sp->f_mntonname, "/",
844 sizeof(sp->f_mntonname));
845 else
846 error = EPERM;
847 }
848 PNBUF_PUT(path);
849 }
850 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
851 return error;
852 }
853
854 /*
855 * Get filesystem statistics by path.
856 */
857 int
858 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
859 {
860 struct mount *mp;
861 int error;
862 struct nameidata nd;
863
864 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path, l);
865 if ((error = namei(&nd)) != 0)
866 return error;
867 mp = nd.ni_vp->v_mount;
868 error = dostatvfs(mp, sb, l, flags, 1);
869 vrele(nd.ni_vp);
870 return error;
871 }
872
873 /* ARGSUSED */
874 int
875 sys_statvfs1(struct lwp *l, void *v, register_t *retval)
876 {
877 struct sys_statvfs1_args /* {
878 syscallarg(const char *) path;
879 syscallarg(struct statvfs *) buf;
880 syscallarg(int) flags;
881 } */ *uap = v;
882 struct statvfs *sb;
883 int error;
884
885 sb = STATVFSBUF_GET();
886 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
887 if (error == 0)
888 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
889 STATVFSBUF_PUT(sb);
890 return error;
891 }
892
893 /*
894 * Get filesystem statistics by fd.
895 */
896 int
897 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
898 {
899 struct proc *p = l->l_proc;
900 struct file *fp;
901 struct mount *mp;
902 int error;
903
904 /* getvnode() will use the descriptor for us */
905 if ((error = getvnode(p->p_fd, fd, &fp)) != 0)
906 return (error);
907 mp = ((struct vnode *)fp->f_data)->v_mount;
908 error = dostatvfs(mp, sb, l, flags, 1);
909 FILE_UNUSE(fp, l);
910 return error;
911 }
912
913 /* ARGSUSED */
914 int
915 sys_fstatvfs1(struct lwp *l, void *v, register_t *retval)
916 {
917 struct sys_fstatvfs1_args /* {
918 syscallarg(int) fd;
919 syscallarg(struct statvfs *) buf;
920 syscallarg(int) flags;
921 } */ *uap = v;
922 struct statvfs *sb;
923 int error;
924
925 sb = STATVFSBUF_GET();
926 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
927 if (error == 0)
928 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
929 STATVFSBUF_PUT(sb);
930 return error;
931 }
932
933
934 /*
935 * Get statistics on all filesystems.
936 */
937 int
938 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
939 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
940 register_t *retval)
941 {
942 int root = 0;
943 struct proc *p = l->l_proc;
944 struct mount *mp, *nmp;
945 struct statvfs *sb;
946 size_t count, maxcount;
947 int error = 0;
948
949 sb = STATVFSBUF_GET();
950 maxcount = bufsize / entry_sz;
951 simple_lock(&mountlist_slock);
952 count = 0;
953 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
954 mp = nmp) {
955 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
956 nmp = CIRCLEQ_NEXT(mp, mnt_list);
957 continue;
958 }
959 if (sfsp && count < maxcount) {
960 error = dostatvfs(mp, sb, l, flags, 0);
961 if (error) {
962 simple_lock(&mountlist_slock);
963 nmp = CIRCLEQ_NEXT(mp, mnt_list);
964 vfs_unbusy(mp);
965 continue;
966 }
967 error = copyfn(sb, sfsp, entry_sz);
968 if (error) {
969 vfs_unbusy(mp);
970 goto out;
971 }
972 sfsp = (char *)sfsp + entry_sz;
973 root |= strcmp(sb->f_mntonname, "/") == 0;
974 }
975 count++;
976 simple_lock(&mountlist_slock);
977 nmp = CIRCLEQ_NEXT(mp, mnt_list);
978 vfs_unbusy(mp);
979 }
980 simple_unlock(&mountlist_slock);
981 if (root == 0 && p->p_cwdi->cwdi_rdir) {
982 /*
983 * fake a root entry
984 */
985 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, sb, l, flags, 1);
986 if (error != 0)
987 goto out;
988 if (sfsp)
989 error = copyfn(sb, sfsp, entry_sz);
990 count++;
991 }
992 if (sfsp && count > maxcount)
993 *retval = maxcount;
994 else
995 *retval = count;
996 out:
997 STATVFSBUF_PUT(sb);
998 return error;
999 }
1000
1001 int
1002 sys_getvfsstat(struct lwp *l, void *v, register_t *retval)
1003 {
1004 struct sys_getvfsstat_args /* {
1005 syscallarg(struct statvfs *) buf;
1006 syscallarg(size_t) bufsize;
1007 syscallarg(int) flags;
1008 } */ *uap = v;
1009
1010 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1011 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1012 }
1013
1014 /*
1015 * Change current working directory to a given file descriptor.
1016 */
1017 /* ARGSUSED */
1018 int
1019 sys_fchdir(struct lwp *l, void *v, register_t *retval)
1020 {
1021 struct sys_fchdir_args /* {
1022 syscallarg(int) fd;
1023 } */ *uap = v;
1024 struct proc *p = l->l_proc;
1025 struct filedesc *fdp = p->p_fd;
1026 struct cwdinfo *cwdi = p->p_cwdi;
1027 struct vnode *vp, *tdp;
1028 struct mount *mp;
1029 struct file *fp;
1030 int error;
1031
1032 /* getvnode() will use the descriptor for us */
1033 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1034 return (error);
1035 vp = (struct vnode *)fp->f_data;
1036
1037 VREF(vp);
1038 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1039 if (vp->v_type != VDIR)
1040 error = ENOTDIR;
1041 else
1042 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1043 if (error) {
1044 vput(vp);
1045 goto out;
1046 }
1047 while ((mp = vp->v_mountedhere) != NULL) {
1048 if (vfs_busy(mp, 0, 0))
1049 continue;
1050
1051 vput(vp);
1052 error = VFS_ROOT(mp, &tdp);
1053 vfs_unbusy(mp);
1054 if (error)
1055 goto out;
1056 vp = tdp;
1057 }
1058 VOP_UNLOCK(vp, 0);
1059
1060 /*
1061 * Disallow changing to a directory not under the process's
1062 * current root directory (if there is one).
1063 */
1064 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1065 vrele(vp);
1066 error = EPERM; /* operation not permitted */
1067 goto out;
1068 }
1069
1070 vrele(cwdi->cwdi_cdir);
1071 cwdi->cwdi_cdir = vp;
1072 out:
1073 FILE_UNUSE(fp, l);
1074 return (error);
1075 }
1076
1077 /*
1078 * Change this process's notion of the root directory to a given file
1079 * descriptor.
1080 */
1081 int
1082 sys_fchroot(struct lwp *l, void *v, register_t *retval)
1083 {
1084 struct sys_fchroot_args *uap = v;
1085 struct proc *p = l->l_proc;
1086 struct filedesc *fdp = p->p_fd;
1087 struct cwdinfo *cwdi = p->p_cwdi;
1088 struct vnode *vp;
1089 struct file *fp;
1090 int error;
1091
1092 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1093 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1094 return error;
1095 /* getvnode() will use the descriptor for us */
1096 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1097 return error;
1098 vp = (struct vnode *) fp->f_data;
1099 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1100 if (vp->v_type != VDIR)
1101 error = ENOTDIR;
1102 else
1103 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1104 VOP_UNLOCK(vp, 0);
1105 if (error)
1106 goto out;
1107 VREF(vp);
1108
1109 /*
1110 * Prevent escaping from chroot by putting the root under
1111 * the working directory. Silently chdir to / if we aren't
1112 * already there.
1113 */
1114 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1115 /*
1116 * XXX would be more failsafe to change directory to a
1117 * deadfs node here instead
1118 */
1119 vrele(cwdi->cwdi_cdir);
1120 VREF(vp);
1121 cwdi->cwdi_cdir = vp;
1122 }
1123
1124 if (cwdi->cwdi_rdir != NULL)
1125 vrele(cwdi->cwdi_rdir);
1126 cwdi->cwdi_rdir = vp;
1127 out:
1128 FILE_UNUSE(fp, l);
1129 return (error);
1130 }
1131
1132 /*
1133 * Change current working directory (``.'').
1134 */
1135 /* ARGSUSED */
1136 int
1137 sys_chdir(struct lwp *l, void *v, register_t *retval)
1138 {
1139 struct sys_chdir_args /* {
1140 syscallarg(const char *) path;
1141 } */ *uap = v;
1142 struct proc *p = l->l_proc;
1143 struct cwdinfo *cwdi = p->p_cwdi;
1144 int error;
1145 struct nameidata nd;
1146
1147 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1148 SCARG(uap, path), l);
1149 if ((error = change_dir(&nd, l)) != 0)
1150 return (error);
1151 vrele(cwdi->cwdi_cdir);
1152 cwdi->cwdi_cdir = nd.ni_vp;
1153 return (0);
1154 }
1155
1156 /*
1157 * Change notion of root (``/'') directory.
1158 */
1159 /* ARGSUSED */
1160 int
1161 sys_chroot(struct lwp *l, void *v, register_t *retval)
1162 {
1163 struct sys_chroot_args /* {
1164 syscallarg(const char *) path;
1165 } */ *uap = v;
1166 struct proc *p = l->l_proc;
1167 struct cwdinfo *cwdi = p->p_cwdi;
1168 struct vnode *vp;
1169 int error;
1170 struct nameidata nd;
1171
1172 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1173 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1174 return (error);
1175 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1176 SCARG(uap, path), l);
1177 if ((error = change_dir(&nd, l)) != 0)
1178 return (error);
1179 if (cwdi->cwdi_rdir != NULL)
1180 vrele(cwdi->cwdi_rdir);
1181 vp = nd.ni_vp;
1182 cwdi->cwdi_rdir = vp;
1183
1184 /*
1185 * Prevent escaping from chroot by putting the root under
1186 * the working directory. Silently chdir to / if we aren't
1187 * already there.
1188 */
1189 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1190 /*
1191 * XXX would be more failsafe to change directory to a
1192 * deadfs node here instead
1193 */
1194 vrele(cwdi->cwdi_cdir);
1195 VREF(vp);
1196 cwdi->cwdi_cdir = vp;
1197 }
1198
1199 return (0);
1200 }
1201
1202 /*
1203 * Common routine for chroot and chdir.
1204 */
1205 static int
1206 change_dir(struct nameidata *ndp, struct lwp *l)
1207 {
1208 struct vnode *vp;
1209 int error;
1210
1211 if ((error = namei(ndp)) != 0)
1212 return (error);
1213 vp = ndp->ni_vp;
1214 if (vp->v_type != VDIR)
1215 error = ENOTDIR;
1216 else
1217 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1218
1219 if (error)
1220 vput(vp);
1221 else
1222 VOP_UNLOCK(vp, 0);
1223 return (error);
1224 }
1225
1226 /*
1227 * Check permissions, allocate an open file structure,
1228 * and call the device open routine if any.
1229 */
1230 int
1231 sys_open(struct lwp *l, void *v, register_t *retval)
1232 {
1233 struct sys_open_args /* {
1234 syscallarg(const char *) path;
1235 syscallarg(int) flags;
1236 syscallarg(int) mode;
1237 } */ *uap = v;
1238 struct proc *p = l->l_proc;
1239 struct cwdinfo *cwdi = p->p_cwdi;
1240 struct filedesc *fdp = p->p_fd;
1241 struct file *fp;
1242 struct vnode *vp;
1243 int flags, cmode;
1244 int type, indx, error;
1245 struct flock lf;
1246 struct nameidata nd;
1247
1248 flags = FFLAGS(SCARG(uap, flags));
1249 if ((flags & (FREAD | FWRITE)) == 0)
1250 return (EINVAL);
1251 /* falloc() will use the file descriptor for us */
1252 if ((error = falloc(l, &fp, &indx)) != 0)
1253 return (error);
1254 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1255 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
1256 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1257 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1258 FILE_UNUSE(fp, l);
1259 fdp->fd_ofiles[indx] = NULL;
1260 ffree(fp);
1261 if ((error == EDUPFD || error == EMOVEFD) &&
1262 l->l_dupfd >= 0 && /* XXX from fdopen */
1263 (error =
1264 dupfdopen(l, indx, l->l_dupfd, flags, error)) == 0) {
1265 *retval = indx;
1266 return (0);
1267 }
1268 if (error == ERESTART)
1269 error = EINTR;
1270 fdremove(fdp, indx);
1271 return (error);
1272 }
1273 l->l_dupfd = 0;
1274 vp = nd.ni_vp;
1275 fp->f_flag = flags & FMASK;
1276 fp->f_type = DTYPE_VNODE;
1277 fp->f_ops = &vnops;
1278 fp->f_data = vp;
1279 if (flags & (O_EXLOCK | O_SHLOCK)) {
1280 lf.l_whence = SEEK_SET;
1281 lf.l_start = 0;
1282 lf.l_len = 0;
1283 if (flags & O_EXLOCK)
1284 lf.l_type = F_WRLCK;
1285 else
1286 lf.l_type = F_RDLCK;
1287 type = F_FLOCK;
1288 if ((flags & FNONBLOCK) == 0)
1289 type |= F_WAIT;
1290 VOP_UNLOCK(vp, 0);
1291 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1292 if (error) {
1293 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1294 FILE_UNUSE(fp, l);
1295 ffree(fp);
1296 fdremove(fdp, indx);
1297 return (error);
1298 }
1299 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1300 fp->f_flag |= FHASLOCK;
1301 }
1302 VOP_UNLOCK(vp, 0);
1303 *retval = indx;
1304 FILE_SET_MATURE(fp);
1305 FILE_UNUSE(fp, l);
1306 return (0);
1307 }
1308
1309 static void
1310 vfs__fhfree(fhandle_t *fhp)
1311 {
1312 size_t fhsize;
1313
1314 if (fhp == NULL) {
1315 return;
1316 }
1317 fhsize = FHANDLE_SIZE(fhp);
1318 kmem_free(fhp, fhsize);
1319 }
1320
1321 /*
1322 * vfs_composefh: compose a filehandle.
1323 */
1324
1325 int
1326 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1327 {
1328 struct mount *mp;
1329 struct fid *fidp;
1330 int error;
1331 size_t needfhsize;
1332 size_t fidsize;
1333
1334 mp = vp->v_mount;
1335 fidp = NULL;
1336 if (*fh_size < FHANDLE_SIZE_MIN) {
1337 fidsize = 0;
1338 } else {
1339 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1340 if (fhp != NULL) {
1341 memset(fhp, 0, *fh_size);
1342 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1343 fidp = &fhp->fh_fid;
1344 }
1345 }
1346 error = VFS_VPTOFH(vp, fidp, &fidsize);
1347 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1348 if (error == 0 && *fh_size < needfhsize) {
1349 error = E2BIG;
1350 }
1351 *fh_size = needfhsize;
1352 return error;
1353 }
1354
1355 int
1356 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1357 {
1358 struct mount *mp;
1359 fhandle_t *fhp;
1360 size_t fhsize;
1361 size_t fidsize;
1362 int error;
1363
1364 *fhpp = NULL;
1365 mp = vp->v_mount;
1366 fidsize = 0;
1367 error = VFS_VPTOFH(vp, NULL, &fidsize);
1368 KASSERT(error != 0);
1369 if (error != E2BIG) {
1370 goto out;
1371 }
1372 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1373 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1374 if (fhp == NULL) {
1375 error = ENOMEM;
1376 goto out;
1377 }
1378 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1379 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1380 if (error == 0) {
1381 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1382 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1383 *fhpp = fhp;
1384 } else {
1385 kmem_free(fhp, fhsize);
1386 }
1387 out:
1388 return error;
1389 }
1390
1391 void
1392 vfs_composefh_free(fhandle_t *fhp)
1393 {
1394
1395 vfs__fhfree(fhp);
1396 }
1397
1398 /*
1399 * vfs_fhtovp: lookup a vnode by a filehandle.
1400 */
1401
1402 int
1403 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1404 {
1405 struct mount *mp;
1406 int error;
1407
1408 *vpp = NULL;
1409 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1410 if (mp == NULL) {
1411 error = ESTALE;
1412 goto out;
1413 }
1414 if (mp->mnt_op->vfs_fhtovp == NULL) {
1415 error = EOPNOTSUPP;
1416 goto out;
1417 }
1418 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1419 out:
1420 return error;
1421 }
1422
1423 /*
1424 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1425 * the needed size.
1426 */
1427
1428 int
1429 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1430 {
1431 fhandle_t *fhp;
1432 int error;
1433
1434 *fhpp = NULL;
1435 if (fhsize > FHANDLE_SIZE_MAX) {
1436 return EINVAL;
1437 }
1438 if (fhsize < FHANDLE_SIZE_MIN) {
1439 return EINVAL;
1440 }
1441 again:
1442 fhp = kmem_alloc(fhsize, KM_SLEEP);
1443 if (fhp == NULL) {
1444 return ENOMEM;
1445 }
1446 error = copyin(ufhp, fhp, fhsize);
1447 if (error == 0) {
1448 /* XXX this check shouldn't be here */
1449 if (FHANDLE_SIZE(fhp) == fhsize) {
1450 *fhpp = fhp;
1451 return 0;
1452 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1453 /*
1454 * a kludge for nfsv2 padded handles.
1455 */
1456 size_t sz;
1457
1458 sz = FHANDLE_SIZE(fhp);
1459 kmem_free(fhp, fhsize);
1460 fhsize = sz;
1461 goto again;
1462 } else {
1463 /*
1464 * userland told us wrong size.
1465 */
1466 error = EINVAL;
1467 }
1468 }
1469 kmem_free(fhp, fhsize);
1470 return error;
1471 }
1472
1473 void
1474 vfs_copyinfh_free(fhandle_t *fhp)
1475 {
1476
1477 vfs__fhfree(fhp);
1478 }
1479
1480 /*
1481 * Get file handle system call
1482 */
1483 int
1484 sys___getfh30(struct lwp *l, void *v, register_t *retval)
1485 {
1486 struct sys___getfh30_args /* {
1487 syscallarg(char *) fname;
1488 syscallarg(fhandle_t *) fhp;
1489 syscallarg(size_t *) fh_size;
1490 } */ *uap = v;
1491 struct vnode *vp;
1492 fhandle_t *fh;
1493 int error;
1494 struct nameidata nd;
1495 size_t sz;
1496 size_t usz;
1497
1498 /*
1499 * Must be super user
1500 */
1501 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1502 0, NULL, NULL, NULL);
1503 if (error)
1504 return (error);
1505 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1506 SCARG(uap, fname), l);
1507 error = namei(&nd);
1508 if (error)
1509 return (error);
1510 vp = nd.ni_vp;
1511 error = vfs_composefh_alloc(vp, &fh);
1512 vput(vp);
1513 if (error != 0) {
1514 goto out;
1515 }
1516 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1517 if (error != 0) {
1518 goto out;
1519 }
1520 sz = FHANDLE_SIZE(fh);
1521 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1522 if (error != 0) {
1523 goto out;
1524 }
1525 if (usz >= sz) {
1526 error = copyout(fh, SCARG(uap, fhp), sz);
1527 } else {
1528 error = E2BIG;
1529 }
1530 out:
1531 vfs_composefh_free(fh);
1532 return (error);
1533 }
1534
1535 /*
1536 * Open a file given a file handle.
1537 *
1538 * Check permissions, allocate an open file structure,
1539 * and call the device open routine if any.
1540 */
1541
1542 int
1543 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1544 register_t *retval)
1545 {
1546 struct filedesc *fdp = l->l_proc->p_fd;
1547 struct file *fp;
1548 struct vnode *vp = NULL;
1549 kauth_cred_t cred = l->l_cred;
1550 struct file *nfp;
1551 int type, indx, error=0;
1552 struct flock lf;
1553 struct vattr va;
1554 fhandle_t *fh;
1555 int flags;
1556
1557 /*
1558 * Must be super user
1559 */
1560 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1561 0, NULL, NULL, NULL)))
1562 return (error);
1563
1564 flags = FFLAGS(oflags);
1565 if ((flags & (FREAD | FWRITE)) == 0)
1566 return (EINVAL);
1567 if ((flags & O_CREAT))
1568 return (EINVAL);
1569 /* falloc() will use the file descriptor for us */
1570 if ((error = falloc(l, &nfp, &indx)) != 0)
1571 return (error);
1572 fp = nfp;
1573 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1574 if (error != 0) {
1575 goto bad;
1576 }
1577 error = vfs_fhtovp(fh, &vp);
1578 if (error != 0) {
1579 goto bad;
1580 }
1581
1582 /* Now do an effective vn_open */
1583
1584 if (vp->v_type == VSOCK) {
1585 error = EOPNOTSUPP;
1586 goto bad;
1587 }
1588 if (flags & FREAD) {
1589 if ((error = VOP_ACCESS(vp, VREAD, cred, l)) != 0)
1590 goto bad;
1591 }
1592 if (flags & (FWRITE | O_TRUNC)) {
1593 if (vp->v_type == VDIR) {
1594 error = EISDIR;
1595 goto bad;
1596 }
1597 if ((error = vn_writechk(vp)) != 0 ||
1598 (error = VOP_ACCESS(vp, VWRITE, cred, l)) != 0)
1599 goto bad;
1600 }
1601 if (flags & O_TRUNC) {
1602 VOP_UNLOCK(vp, 0); /* XXX */
1603 VOP_LEASE(vp, l, cred, LEASE_WRITE);
1604 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1605 VATTR_NULL(&va);
1606 va.va_size = 0;
1607 error = VOP_SETATTR(vp, &va, cred, l);
1608 if (error)
1609 goto bad;
1610 }
1611 if ((error = VOP_OPEN(vp, flags, cred, l)) != 0)
1612 goto bad;
1613 if (flags & FWRITE)
1614 vp->v_writecount++;
1615
1616 /* done with modified vn_open, now finish what sys_open does. */
1617
1618 fp->f_flag = flags & FMASK;
1619 fp->f_type = DTYPE_VNODE;
1620 fp->f_ops = &vnops;
1621 fp->f_data = vp;
1622 if (flags & (O_EXLOCK | O_SHLOCK)) {
1623 lf.l_whence = SEEK_SET;
1624 lf.l_start = 0;
1625 lf.l_len = 0;
1626 if (flags & O_EXLOCK)
1627 lf.l_type = F_WRLCK;
1628 else
1629 lf.l_type = F_RDLCK;
1630 type = F_FLOCK;
1631 if ((flags & FNONBLOCK) == 0)
1632 type |= F_WAIT;
1633 VOP_UNLOCK(vp, 0);
1634 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1635 if (error) {
1636 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1637 FILE_UNUSE(fp, l);
1638 ffree(fp);
1639 fdremove(fdp, indx);
1640 return (error);
1641 }
1642 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1643 fp->f_flag |= FHASLOCK;
1644 }
1645 VOP_UNLOCK(vp, 0);
1646 *retval = indx;
1647 FILE_SET_MATURE(fp);
1648 FILE_UNUSE(fp, l);
1649 vfs_copyinfh_free(fh);
1650 return (0);
1651
1652 bad:
1653 FILE_UNUSE(fp, l);
1654 ffree(fp);
1655 fdremove(fdp, indx);
1656 if (vp != NULL)
1657 vput(vp);
1658 vfs_copyinfh_free(fh);
1659 return (error);
1660 }
1661
1662 int
1663 sys___fhopen40(struct lwp *l, void *v, register_t *retval)
1664 {
1665 struct sys___fhopen40_args /* {
1666 syscallarg(const void *) fhp;
1667 syscallarg(size_t) fh_size;
1668 syscallarg(int) flags;
1669 } */ *uap = v;
1670
1671 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1672 SCARG(uap, flags), retval);
1673 }
1674
1675 int
1676 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
1677 {
1678 int error;
1679 fhandle_t *fh;
1680 struct vnode *vp;
1681
1682 /*
1683 * Must be super user
1684 */
1685 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1686 0, NULL, NULL, NULL)))
1687 return (error);
1688
1689 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1690 if (error != 0)
1691 return error;
1692
1693 error = vfs_fhtovp(fh, &vp);
1694 vfs_copyinfh_free(fh);
1695 if (error != 0)
1696 return error;
1697
1698 error = vn_stat(vp, sb, l);
1699 vput(vp);
1700 return error;
1701 }
1702
1703
1704 /* ARGSUSED */
1705 int
1706 sys___fhstat40(struct lwp *l, void *v, register_t *retval)
1707 {
1708 struct sys___fhstat40_args /* {
1709 syscallarg(const void *) fhp;
1710 syscallarg(size_t) fh_size;
1711 syscallarg(struct stat *) sb;
1712 } */ *uap = v;
1713 struct stat sb;
1714 int error;
1715
1716 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
1717 if (error)
1718 return error;
1719 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
1720 }
1721
1722 int
1723 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
1724 int flags)
1725 {
1726 fhandle_t *fh;
1727 struct mount *mp;
1728 struct vnode *vp;
1729 int error;
1730
1731 /*
1732 * Must be super user
1733 */
1734 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1735 0, NULL, NULL, NULL)))
1736 return error;
1737
1738 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1739 if (error != 0)
1740 return error;
1741
1742 error = vfs_fhtovp(fh, &vp);
1743 vfs_copyinfh_free(fh);
1744 if (error != 0)
1745 return error;
1746
1747 mp = vp->v_mount;
1748 error = dostatvfs(mp, sb, l, flags, 1);
1749 vput(vp);
1750 return error;
1751 }
1752
1753 /* ARGSUSED */
1754 int
1755 sys___fhstatvfs140(struct lwp *l, void *v, register_t *retval)
1756 {
1757 struct sys___fhstatvfs140_args /* {
1758 syscallarg(const void *) fhp;
1759 syscallarg(size_t) fh_size;
1760 syscallarg(struct statvfs *) buf;
1761 syscallarg(int) flags;
1762 } */ *uap = v;
1763 struct statvfs *sb = STATVFSBUF_GET();
1764 int error;
1765
1766 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
1767 SCARG(uap, flags));
1768 if (error == 0)
1769 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1770 STATVFSBUF_PUT(sb);
1771 return error;
1772 }
1773
1774 /*
1775 * Create a special file.
1776 */
1777 /* ARGSUSED */
1778 int
1779 sys_mknod(struct lwp *l, void *v, register_t *retval)
1780 {
1781 struct sys_mknod_args /* {
1782 syscallarg(const char *) path;
1783 syscallarg(int) mode;
1784 syscallarg(int) dev;
1785 } */ *uap = v;
1786 struct proc *p = l->l_proc;
1787 struct vnode *vp;
1788 struct vattr vattr;
1789 int error, optype;
1790 struct nameidata nd;
1791 char *path;
1792 const char *cpath;
1793 enum uio_seg seg = UIO_USERSPACE;
1794
1795 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1796 0, NULL, NULL, NULL)) != 0)
1797 return (error);
1798
1799 optype = VOP_MKNOD_DESCOFFSET;
1800
1801 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path);
1802 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath, l);
1803
1804 if ((error = namei(&nd)) != 0)
1805 goto out;
1806 vp = nd.ni_vp;
1807 if (vp != NULL)
1808 error = EEXIST;
1809 else {
1810 VATTR_NULL(&vattr);
1811 vattr.va_mode =
1812 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1813 vattr.va_rdev = SCARG(uap, dev);
1814
1815 switch (SCARG(uap, mode) & S_IFMT) {
1816 case S_IFMT: /* used by badsect to flag bad sectors */
1817 vattr.va_type = VBAD;
1818 break;
1819 case S_IFCHR:
1820 vattr.va_type = VCHR;
1821 break;
1822 case S_IFBLK:
1823 vattr.va_type = VBLK;
1824 break;
1825 case S_IFWHT:
1826 optype = VOP_WHITEOUT_DESCOFFSET;
1827 break;
1828 case S_IFREG:
1829 #if NVERIEXEC > 0
1830 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp,
1831 O_CREAT);
1832 #endif /* NVERIEXEC > 0 */
1833 vattr.va_type = VREG;
1834 vattr.va_rdev = VNOVAL;
1835 optype = VOP_CREATE_DESCOFFSET;
1836 break;
1837 default:
1838 error = EINVAL;
1839 break;
1840 }
1841 }
1842 if (!error) {
1843 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1844 switch (optype) {
1845 case VOP_WHITEOUT_DESCOFFSET:
1846 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1847 if (error)
1848 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1849 vput(nd.ni_dvp);
1850 break;
1851
1852 case VOP_MKNOD_DESCOFFSET:
1853 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1854 &nd.ni_cnd, &vattr);
1855 if (error == 0)
1856 vput(nd.ni_vp);
1857 break;
1858
1859 case VOP_CREATE_DESCOFFSET:
1860 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
1861 &nd.ni_cnd, &vattr);
1862 if (error == 0)
1863 vput(nd.ni_vp);
1864 break;
1865 }
1866 } else {
1867 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1868 if (nd.ni_dvp == vp)
1869 vrele(nd.ni_dvp);
1870 else
1871 vput(nd.ni_dvp);
1872 if (vp)
1873 vrele(vp);
1874 }
1875 out:
1876 VERIEXEC_PATH_PUT(path);
1877 return (error);
1878 }
1879
1880 /*
1881 * Create a named pipe.
1882 */
1883 /* ARGSUSED */
1884 int
1885 sys_mkfifo(struct lwp *l, void *v, register_t *retval)
1886 {
1887 struct sys_mkfifo_args /* {
1888 syscallarg(const char *) path;
1889 syscallarg(int) mode;
1890 } */ *uap = v;
1891 struct proc *p = l->l_proc;
1892 struct vattr vattr;
1893 int error;
1894 struct nameidata nd;
1895
1896 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
1897 if ((error = namei(&nd)) != 0)
1898 return (error);
1899 if (nd.ni_vp != NULL) {
1900 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1901 if (nd.ni_dvp == nd.ni_vp)
1902 vrele(nd.ni_dvp);
1903 else
1904 vput(nd.ni_dvp);
1905 vrele(nd.ni_vp);
1906 return (EEXIST);
1907 }
1908 VATTR_NULL(&vattr);
1909 vattr.va_type = VFIFO;
1910 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1911 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1912 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1913 if (error == 0)
1914 vput(nd.ni_vp);
1915 return (error);
1916 }
1917
1918 /*
1919 * Make a hard file link.
1920 */
1921 /* ARGSUSED */
1922 int
1923 sys_link(struct lwp *l, void *v, register_t *retval)
1924 {
1925 struct sys_link_args /* {
1926 syscallarg(const char *) path;
1927 syscallarg(const char *) link;
1928 } */ *uap = v;
1929 struct vnode *vp;
1930 struct nameidata nd;
1931 int error;
1932
1933 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
1934 if ((error = namei(&nd)) != 0)
1935 return (error);
1936 vp = nd.ni_vp;
1937 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, link), l);
1938 if ((error = namei(&nd)) != 0)
1939 goto out;
1940 if (nd.ni_vp) {
1941 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1942 if (nd.ni_dvp == nd.ni_vp)
1943 vrele(nd.ni_dvp);
1944 else
1945 vput(nd.ni_dvp);
1946 vrele(nd.ni_vp);
1947 error = EEXIST;
1948 goto out;
1949 }
1950 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1951 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
1952 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1953 out:
1954 vrele(vp);
1955 return (error);
1956 }
1957
1958 /*
1959 * Make a symbolic link.
1960 */
1961 /* ARGSUSED */
1962 int
1963 sys_symlink(struct lwp *l, void *v, register_t *retval)
1964 {
1965 struct sys_symlink_args /* {
1966 syscallarg(const char *) path;
1967 syscallarg(const char *) link;
1968 } */ *uap = v;
1969 struct proc *p = l->l_proc;
1970 struct vattr vattr;
1971 char *path;
1972 int error;
1973 struct nameidata nd;
1974
1975 path = PNBUF_GET();
1976 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
1977 if (error)
1978 goto out;
1979 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, link), l);
1980 if ((error = namei(&nd)) != 0)
1981 goto out;
1982 if (nd.ni_vp) {
1983 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1984 if (nd.ni_dvp == nd.ni_vp)
1985 vrele(nd.ni_dvp);
1986 else
1987 vput(nd.ni_dvp);
1988 vrele(nd.ni_vp);
1989 error = EEXIST;
1990 goto out;
1991 }
1992 VATTR_NULL(&vattr);
1993 vattr.va_type = VLNK;
1994 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
1995 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1996 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1997 if (error == 0)
1998 vput(nd.ni_vp);
1999 out:
2000 PNBUF_PUT(path);
2001 return (error);
2002 }
2003
2004 /*
2005 * Delete a whiteout from the filesystem.
2006 */
2007 /* ARGSUSED */
2008 int
2009 sys_undelete(struct lwp *l, void *v, register_t *retval)
2010 {
2011 struct sys_undelete_args /* {
2012 syscallarg(const char *) path;
2013 } */ *uap = v;
2014 int error;
2015 struct nameidata nd;
2016
2017 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, UIO_USERSPACE,
2018 SCARG(uap, path), l);
2019 error = namei(&nd);
2020 if (error)
2021 return (error);
2022
2023 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2024 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2025 if (nd.ni_dvp == nd.ni_vp)
2026 vrele(nd.ni_dvp);
2027 else
2028 vput(nd.ni_dvp);
2029 if (nd.ni_vp)
2030 vrele(nd.ni_vp);
2031 return (EEXIST);
2032 }
2033 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2034 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2035 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2036 vput(nd.ni_dvp);
2037 return (error);
2038 }
2039
2040 /*
2041 * Delete a name from the filesystem.
2042 */
2043 /* ARGSUSED */
2044 int
2045 sys_unlink(struct lwp *l, void *v, register_t *retval)
2046 {
2047 struct sys_unlink_args /* {
2048 syscallarg(const char *) path;
2049 } */ *uap = v;
2050 struct vnode *vp;
2051 int error;
2052 struct nameidata nd;
2053 char *path;
2054 const char *cpath;
2055 enum uio_seg seg = UIO_USERSPACE;
2056
2057 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path);
2058 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath, l);
2059
2060 if ((error = namei(&nd)) != 0)
2061 goto out;
2062 vp = nd.ni_vp;
2063
2064 /*
2065 * The root of a mounted filesystem cannot be deleted.
2066 */
2067 if (vp->v_flag & VROOT) {
2068 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2069 if (nd.ni_dvp == vp)
2070 vrele(nd.ni_dvp);
2071 else
2072 vput(nd.ni_dvp);
2073 vput(vp);
2074 error = EBUSY;
2075 goto out;
2076 }
2077
2078 #if NVERIEXEC > 0
2079 /* Handle remove requests for veriexec entries. */
2080 if ((error = veriexec_removechk(l, nd.ni_vp, nd.ni_dirp)) != 0) {
2081 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2082 if (nd.ni_dvp == vp)
2083 vrele(nd.ni_dvp);
2084 else
2085 vput(nd.ni_dvp);
2086 vput(vp);
2087 goto out;
2088 }
2089 #endif /* NVERIEXEC > 0 */
2090
2091 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2092 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2093 #ifdef FILEASSOC
2094 (void)fileassoc_file_delete(vp);
2095 #endif /* FILEASSOC */
2096 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2097 out:
2098 VERIEXEC_PATH_PUT(path);
2099 return (error);
2100 }
2101
2102 /*
2103 * Reposition read/write file offset.
2104 */
2105 int
2106 sys_lseek(struct lwp *l, void *v, register_t *retval)
2107 {
2108 struct sys_lseek_args /* {
2109 syscallarg(int) fd;
2110 syscallarg(int) pad;
2111 syscallarg(off_t) offset;
2112 syscallarg(int) whence;
2113 } */ *uap = v;
2114 struct proc *p = l->l_proc;
2115 kauth_cred_t cred = l->l_cred;
2116 struct filedesc *fdp = p->p_fd;
2117 struct file *fp;
2118 struct vnode *vp;
2119 struct vattr vattr;
2120 off_t newoff;
2121 int error;
2122
2123 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
2124 return (EBADF);
2125
2126 FILE_USE(fp);
2127
2128 vp = (struct vnode *)fp->f_data;
2129 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2130 error = ESPIPE;
2131 goto out;
2132 }
2133
2134 switch (SCARG(uap, whence)) {
2135 case SEEK_CUR:
2136 newoff = fp->f_offset + SCARG(uap, offset);
2137 break;
2138 case SEEK_END:
2139 error = VOP_GETATTR(vp, &vattr, cred, l);
2140 if (error)
2141 goto out;
2142 newoff = SCARG(uap, offset) + vattr.va_size;
2143 break;
2144 case SEEK_SET:
2145 newoff = SCARG(uap, offset);
2146 break;
2147 default:
2148 error = EINVAL;
2149 goto out;
2150 }
2151 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0)
2152 goto out;
2153
2154 *(off_t *)retval = fp->f_offset = newoff;
2155 out:
2156 FILE_UNUSE(fp, l);
2157 return (error);
2158 }
2159
2160 /*
2161 * Positional read system call.
2162 */
2163 int
2164 sys_pread(struct lwp *l, void *v, register_t *retval)
2165 {
2166 struct sys_pread_args /* {
2167 syscallarg(int) fd;
2168 syscallarg(void *) buf;
2169 syscallarg(size_t) nbyte;
2170 syscallarg(off_t) offset;
2171 } */ *uap = v;
2172 struct proc *p = l->l_proc;
2173 struct filedesc *fdp = p->p_fd;
2174 struct file *fp;
2175 struct vnode *vp;
2176 off_t offset;
2177 int error, fd = SCARG(uap, fd);
2178
2179 if ((fp = fd_getfile(fdp, fd)) == NULL)
2180 return (EBADF);
2181
2182 if ((fp->f_flag & FREAD) == 0) {
2183 simple_unlock(&fp->f_slock);
2184 return (EBADF);
2185 }
2186
2187 FILE_USE(fp);
2188
2189 vp = (struct vnode *)fp->f_data;
2190 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2191 error = ESPIPE;
2192 goto out;
2193 }
2194
2195 offset = SCARG(uap, offset);
2196
2197 /*
2198 * XXX This works because no file systems actually
2199 * XXX take any action on the seek operation.
2200 */
2201 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2202 goto out;
2203
2204 /* dofileread() will unuse the descriptor for us */
2205 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2206 &offset, 0, retval));
2207
2208 out:
2209 FILE_UNUSE(fp, l);
2210 return (error);
2211 }
2212
2213 /*
2214 * Positional scatter read system call.
2215 */
2216 int
2217 sys_preadv(struct lwp *l, void *v, register_t *retval)
2218 {
2219 struct sys_preadv_args /* {
2220 syscallarg(int) fd;
2221 syscallarg(const struct iovec *) iovp;
2222 syscallarg(int) iovcnt;
2223 syscallarg(off_t) offset;
2224 } */ *uap = v;
2225
2226 return do_filereadv(l, SCARG(uap, fd), SCARG(uap, iovp),
2227 SCARG(uap, iovcnt), &SCARG(uap, offset), 0, retval);
2228 }
2229
2230 /*
2231 * Positional write system call.
2232 */
2233 int
2234 sys_pwrite(struct lwp *l, void *v, register_t *retval)
2235 {
2236 struct sys_pwrite_args /* {
2237 syscallarg(int) fd;
2238 syscallarg(const void *) buf;
2239 syscallarg(size_t) nbyte;
2240 syscallarg(off_t) offset;
2241 } */ *uap = v;
2242 struct proc *p = l->l_proc;
2243 struct filedesc *fdp = p->p_fd;
2244 struct file *fp;
2245 struct vnode *vp;
2246 off_t offset;
2247 int error, fd = SCARG(uap, fd);
2248
2249 if ((fp = fd_getfile(fdp, fd)) == NULL)
2250 return (EBADF);
2251
2252 if ((fp->f_flag & FWRITE) == 0) {
2253 simple_unlock(&fp->f_slock);
2254 return (EBADF);
2255 }
2256
2257 FILE_USE(fp);
2258
2259 vp = (struct vnode *)fp->f_data;
2260 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2261 error = ESPIPE;
2262 goto out;
2263 }
2264
2265 offset = SCARG(uap, offset);
2266
2267 /*
2268 * XXX This works because no file systems actually
2269 * XXX take any action on the seek operation.
2270 */
2271 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2272 goto out;
2273
2274 /* dofilewrite() will unuse the descriptor for us */
2275 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2276 &offset, 0, retval));
2277
2278 out:
2279 FILE_UNUSE(fp, l);
2280 return (error);
2281 }
2282
2283 /*
2284 * Positional gather write system call.
2285 */
2286 int
2287 sys_pwritev(struct lwp *l, void *v, register_t *retval)
2288 {
2289 struct sys_pwritev_args /* {
2290 syscallarg(int) fd;
2291 syscallarg(const struct iovec *) iovp;
2292 syscallarg(int) iovcnt;
2293 syscallarg(off_t) offset;
2294 } */ *uap = v;
2295
2296 return do_filewritev(l, SCARG(uap, fd), SCARG(uap, iovp),
2297 SCARG(uap, iovcnt), &SCARG(uap, offset), 0, retval);
2298 }
2299
2300 /*
2301 * Check access permissions.
2302 */
2303 int
2304 sys_access(struct lwp *l, void *v, register_t *retval)
2305 {
2306 struct sys_access_args /* {
2307 syscallarg(const char *) path;
2308 syscallarg(int) flags;
2309 } */ *uap = v;
2310 kauth_cred_t cred;
2311 struct vnode *vp;
2312 int error, flags;
2313 struct nameidata nd;
2314
2315 cred = kauth_cred_dup(l->l_cred);
2316 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2317 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2318 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2319 SCARG(uap, path), l);
2320 /* Override default credentials */
2321 nd.ni_cnd.cn_cred = cred;
2322 if ((error = namei(&nd)) != 0)
2323 goto out;
2324 vp = nd.ni_vp;
2325
2326 /* Flags == 0 means only check for existence. */
2327 if (SCARG(uap, flags)) {
2328 flags = 0;
2329 if (SCARG(uap, flags) & R_OK)
2330 flags |= VREAD;
2331 if (SCARG(uap, flags) & W_OK)
2332 flags |= VWRITE;
2333 if (SCARG(uap, flags) & X_OK)
2334 flags |= VEXEC;
2335
2336 error = VOP_ACCESS(vp, flags, cred, l);
2337 if (!error && (flags & VWRITE))
2338 error = vn_writechk(vp);
2339 }
2340 vput(vp);
2341 out:
2342 kauth_cred_free(cred);
2343 return (error);
2344 }
2345
2346 /*
2347 * Common code for all sys_stat functions, including compat versions.
2348 */
2349 int
2350 do_sys_stat(struct lwp *l, const char *path, unsigned int nd_flags,
2351 struct stat *sb)
2352 {
2353 int error;
2354 struct nameidata nd;
2355
2356 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT , UIO_USERSPACE, path, l);
2357 error = namei(&nd);
2358 if (error != 0)
2359 return error;
2360 error = vn_stat(nd.ni_vp, sb, l);
2361 vput(nd.ni_vp);
2362 return error;
2363 }
2364
2365 /*
2366 * Get file status; this version follows links.
2367 */
2368 /* ARGSUSED */
2369 int
2370 sys___stat30(struct lwp *l, void *v, register_t *retval)
2371 {
2372 struct sys___stat30_args /* {
2373 syscallarg(const char *) path;
2374 syscallarg(struct stat *) ub;
2375 } */ *uap = v;
2376 struct stat sb;
2377 int error;
2378
2379 error = do_sys_stat(l, SCARG(uap, path), FOLLOW, &sb);
2380 if (error)
2381 return error;
2382 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2383 }
2384
2385 /*
2386 * Get file status; this version does not follow links.
2387 */
2388 /* ARGSUSED */
2389 int
2390 sys___lstat30(struct lwp *l, void *v, register_t *retval)
2391 {
2392 struct sys___lstat30_args /* {
2393 syscallarg(const char *) path;
2394 syscallarg(struct stat *) ub;
2395 } */ *uap = v;
2396 struct stat sb;
2397 int error;
2398
2399 error = do_sys_stat(l, SCARG(uap, path), NOFOLLOW, &sb);
2400 if (error)
2401 return error;
2402 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2403 }
2404
2405 /*
2406 * Get configurable pathname variables.
2407 */
2408 /* ARGSUSED */
2409 int
2410 sys_pathconf(struct lwp *l, void *v, register_t *retval)
2411 {
2412 struct sys_pathconf_args /* {
2413 syscallarg(const char *) path;
2414 syscallarg(int) name;
2415 } */ *uap = v;
2416 int error;
2417 struct nameidata nd;
2418
2419 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2420 SCARG(uap, path), l);
2421 if ((error = namei(&nd)) != 0)
2422 return (error);
2423 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2424 vput(nd.ni_vp);
2425 return (error);
2426 }
2427
2428 /*
2429 * Return target name of a symbolic link.
2430 */
2431 /* ARGSUSED */
2432 int
2433 sys_readlink(struct lwp *l, void *v, register_t *retval)
2434 {
2435 struct sys_readlink_args /* {
2436 syscallarg(const char *) path;
2437 syscallarg(char *) buf;
2438 syscallarg(size_t) count;
2439 } */ *uap = v;
2440 struct vnode *vp;
2441 struct iovec aiov;
2442 struct uio auio;
2443 int error;
2444 struct nameidata nd;
2445
2446 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2447 SCARG(uap, path), l);
2448 if ((error = namei(&nd)) != 0)
2449 return (error);
2450 vp = nd.ni_vp;
2451 if (vp->v_type != VLNK)
2452 error = EINVAL;
2453 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2454 (error = VOP_ACCESS(vp, VREAD, l->l_cred, l)) == 0) {
2455 aiov.iov_base = SCARG(uap, buf);
2456 aiov.iov_len = SCARG(uap, count);
2457 auio.uio_iov = &aiov;
2458 auio.uio_iovcnt = 1;
2459 auio.uio_offset = 0;
2460 auio.uio_rw = UIO_READ;
2461 KASSERT(l == curlwp);
2462 auio.uio_vmspace = l->l_proc->p_vmspace;
2463 auio.uio_resid = SCARG(uap, count);
2464 error = VOP_READLINK(vp, &auio, l->l_cred);
2465 }
2466 vput(vp);
2467 *retval = SCARG(uap, count) - auio.uio_resid;
2468 return (error);
2469 }
2470
2471 /*
2472 * Change flags of a file given a path name.
2473 */
2474 /* ARGSUSED */
2475 int
2476 sys_chflags(struct lwp *l, void *v, register_t *retval)
2477 {
2478 struct sys_chflags_args /* {
2479 syscallarg(const char *) path;
2480 syscallarg(u_long) flags;
2481 } */ *uap = v;
2482 struct vnode *vp;
2483 int error;
2484 struct nameidata nd;
2485
2486 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2487 if ((error = namei(&nd)) != 0)
2488 return (error);
2489 vp = nd.ni_vp;
2490 error = change_flags(vp, SCARG(uap, flags), l);
2491 vput(vp);
2492 return (error);
2493 }
2494
2495 /*
2496 * Change flags of a file given a file descriptor.
2497 */
2498 /* ARGSUSED */
2499 int
2500 sys_fchflags(struct lwp *l, void *v, register_t *retval)
2501 {
2502 struct sys_fchflags_args /* {
2503 syscallarg(int) fd;
2504 syscallarg(u_long) flags;
2505 } */ *uap = v;
2506 struct proc *p = l->l_proc;
2507 struct vnode *vp;
2508 struct file *fp;
2509 int error;
2510
2511 /* getvnode() will use the descriptor for us */
2512 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2513 return (error);
2514 vp = (struct vnode *)fp->f_data;
2515 error = change_flags(vp, SCARG(uap, flags), l);
2516 VOP_UNLOCK(vp, 0);
2517 FILE_UNUSE(fp, l);
2518 return (error);
2519 }
2520
2521 /*
2522 * Change flags of a file given a path name; this version does
2523 * not follow links.
2524 */
2525 int
2526 sys_lchflags(struct lwp *l, void *v, register_t *retval)
2527 {
2528 struct sys_lchflags_args /* {
2529 syscallarg(const char *) path;
2530 syscallarg(u_long) flags;
2531 } */ *uap = v;
2532 struct vnode *vp;
2533 int error;
2534 struct nameidata nd;
2535
2536 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2537 if ((error = namei(&nd)) != 0)
2538 return (error);
2539 vp = nd.ni_vp;
2540 error = change_flags(vp, SCARG(uap, flags), l);
2541 vput(vp);
2542 return (error);
2543 }
2544
2545 /*
2546 * Common routine to change flags of a file.
2547 */
2548 int
2549 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2550 {
2551 struct vattr vattr;
2552 int error;
2553
2554 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2555 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2556 /*
2557 * Non-superusers cannot change the flags on devices, even if they
2558 * own them.
2559 */
2560 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2561 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2562 goto out;
2563 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2564 error = EINVAL;
2565 goto out;
2566 }
2567 }
2568 VATTR_NULL(&vattr);
2569 vattr.va_flags = flags;
2570 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2571 out:
2572 return (error);
2573 }
2574
2575 /*
2576 * Change mode of a file given path name; this version follows links.
2577 */
2578 /* ARGSUSED */
2579 int
2580 sys_chmod(struct lwp *l, void *v, register_t *retval)
2581 {
2582 struct sys_chmod_args /* {
2583 syscallarg(const char *) path;
2584 syscallarg(int) mode;
2585 } */ *uap = v;
2586 int error;
2587 struct nameidata nd;
2588
2589 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2590 if ((error = namei(&nd)) != 0)
2591 return (error);
2592
2593 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2594
2595 vrele(nd.ni_vp);
2596 return (error);
2597 }
2598
2599 /*
2600 * Change mode of a file given a file descriptor.
2601 */
2602 /* ARGSUSED */
2603 int
2604 sys_fchmod(struct lwp *l, void *v, register_t *retval)
2605 {
2606 struct sys_fchmod_args /* {
2607 syscallarg(int) fd;
2608 syscallarg(int) mode;
2609 } */ *uap = v;
2610 struct proc *p = l->l_proc;
2611 struct file *fp;
2612 int error;
2613
2614 /* getvnode() will use the descriptor for us */
2615 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2616 return (error);
2617
2618 error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), l);
2619 FILE_UNUSE(fp, l);
2620 return (error);
2621 }
2622
2623 /*
2624 * Change mode of a file given path name; this version does not follow links.
2625 */
2626 /* ARGSUSED */
2627 int
2628 sys_lchmod(struct lwp *l, void *v, register_t *retval)
2629 {
2630 struct sys_lchmod_args /* {
2631 syscallarg(const char *) path;
2632 syscallarg(int) mode;
2633 } */ *uap = v;
2634 int error;
2635 struct nameidata nd;
2636
2637 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2638 if ((error = namei(&nd)) != 0)
2639 return (error);
2640
2641 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2642
2643 vrele(nd.ni_vp);
2644 return (error);
2645 }
2646
2647 /*
2648 * Common routine to set mode given a vnode.
2649 */
2650 static int
2651 change_mode(struct vnode *vp, int mode, struct lwp *l)
2652 {
2653 struct vattr vattr;
2654 int error;
2655
2656 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2657 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2658 VATTR_NULL(&vattr);
2659 vattr.va_mode = mode & ALLPERMS;
2660 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2661 VOP_UNLOCK(vp, 0);
2662 return (error);
2663 }
2664
2665 /*
2666 * Set ownership given a path name; this version follows links.
2667 */
2668 /* ARGSUSED */
2669 int
2670 sys_chown(struct lwp *l, void *v, register_t *retval)
2671 {
2672 struct sys_chown_args /* {
2673 syscallarg(const char *) path;
2674 syscallarg(uid_t) uid;
2675 syscallarg(gid_t) gid;
2676 } */ *uap = v;
2677 int error;
2678 struct nameidata nd;
2679
2680 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2681 if ((error = namei(&nd)) != 0)
2682 return (error);
2683
2684 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2685
2686 vrele(nd.ni_vp);
2687 return (error);
2688 }
2689
2690 /*
2691 * Set ownership given a path name; this version follows links.
2692 * Provides POSIX semantics.
2693 */
2694 /* ARGSUSED */
2695 int
2696 sys___posix_chown(struct lwp *l, void *v, register_t *retval)
2697 {
2698 struct sys_chown_args /* {
2699 syscallarg(const char *) path;
2700 syscallarg(uid_t) uid;
2701 syscallarg(gid_t) gid;
2702 } */ *uap = v;
2703 int error;
2704 struct nameidata nd;
2705
2706 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2707 if ((error = namei(&nd)) != 0)
2708 return (error);
2709
2710 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2711
2712 vrele(nd.ni_vp);
2713 return (error);
2714 }
2715
2716 /*
2717 * Set ownership given a file descriptor.
2718 */
2719 /* ARGSUSED */
2720 int
2721 sys_fchown(struct lwp *l, void *v, register_t *retval)
2722 {
2723 struct sys_fchown_args /* {
2724 syscallarg(int) fd;
2725 syscallarg(uid_t) uid;
2726 syscallarg(gid_t) gid;
2727 } */ *uap = v;
2728 struct proc *p = l->l_proc;
2729 int error;
2730 struct file *fp;
2731
2732 /* getvnode() will use the descriptor for us */
2733 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2734 return (error);
2735
2736 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2737 SCARG(uap, gid), l, 0);
2738 FILE_UNUSE(fp, l);
2739 return (error);
2740 }
2741
2742 /*
2743 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2744 */
2745 /* ARGSUSED */
2746 int
2747 sys___posix_fchown(struct lwp *l, void *v, register_t *retval)
2748 {
2749 struct sys_fchown_args /* {
2750 syscallarg(int) fd;
2751 syscallarg(uid_t) uid;
2752 syscallarg(gid_t) gid;
2753 } */ *uap = v;
2754 struct proc *p = l->l_proc;
2755 int error;
2756 struct file *fp;
2757
2758 /* getvnode() will use the descriptor for us */
2759 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2760 return (error);
2761
2762 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2763 SCARG(uap, gid), l, 1);
2764 FILE_UNUSE(fp, l);
2765 return (error);
2766 }
2767
2768 /*
2769 * Set ownership given a path name; this version does not follow links.
2770 */
2771 /* ARGSUSED */
2772 int
2773 sys_lchown(struct lwp *l, void *v, register_t *retval)
2774 {
2775 struct sys_lchown_args /* {
2776 syscallarg(const char *) path;
2777 syscallarg(uid_t) uid;
2778 syscallarg(gid_t) gid;
2779 } */ *uap = v;
2780 int error;
2781 struct nameidata nd;
2782
2783 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2784 if ((error = namei(&nd)) != 0)
2785 return (error);
2786
2787 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2788
2789 vrele(nd.ni_vp);
2790 return (error);
2791 }
2792
2793 /*
2794 * Set ownership given a path name; this version does not follow links.
2795 * Provides POSIX/XPG semantics.
2796 */
2797 /* ARGSUSED */
2798 int
2799 sys___posix_lchown(struct lwp *l, void *v, register_t *retval)
2800 {
2801 struct sys_lchown_args /* {
2802 syscallarg(const char *) path;
2803 syscallarg(uid_t) uid;
2804 syscallarg(gid_t) gid;
2805 } */ *uap = v;
2806 int error;
2807 struct nameidata nd;
2808
2809 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2810 if ((error = namei(&nd)) != 0)
2811 return (error);
2812
2813 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2814
2815 vrele(nd.ni_vp);
2816 return (error);
2817 }
2818
2819 /*
2820 * Common routine to set ownership given a vnode.
2821 */
2822 static int
2823 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2824 int posix_semantics)
2825 {
2826 struct vattr vattr;
2827 mode_t newmode;
2828 int error;
2829
2830 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2831 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2832 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2833 goto out;
2834
2835 #define CHANGED(x) ((int)(x) != -1)
2836 newmode = vattr.va_mode;
2837 if (posix_semantics) {
2838 /*
2839 * POSIX/XPG semantics: if the caller is not the super-user,
2840 * clear set-user-id and set-group-id bits. Both POSIX and
2841 * the XPG consider the behaviour for calls by the super-user
2842 * implementation-defined; we leave the set-user-id and set-
2843 * group-id settings intact in that case.
2844 */
2845 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2846 NULL) != 0)
2847 newmode &= ~(S_ISUID | S_ISGID);
2848 } else {
2849 /*
2850 * NetBSD semantics: when changing owner and/or group,
2851 * clear the respective bit(s).
2852 */
2853 if (CHANGED(uid))
2854 newmode &= ~S_ISUID;
2855 if (CHANGED(gid))
2856 newmode &= ~S_ISGID;
2857 }
2858 /* Update va_mode iff altered. */
2859 if (vattr.va_mode == newmode)
2860 newmode = VNOVAL;
2861
2862 VATTR_NULL(&vattr);
2863 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2864 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2865 vattr.va_mode = newmode;
2866 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2867 #undef CHANGED
2868
2869 out:
2870 VOP_UNLOCK(vp, 0);
2871 return (error);
2872 }
2873
2874 /*
2875 * Set the access and modification times given a path name; this
2876 * version follows links.
2877 */
2878 /* ARGSUSED */
2879 int
2880 sys_utimes(struct lwp *l, void *v, register_t *retval)
2881 {
2882 struct sys_utimes_args /* {
2883 syscallarg(const char *) path;
2884 syscallarg(const struct timeval *) tptr;
2885 } */ *uap = v;
2886
2887 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
2888 SCARG(uap, tptr), UIO_USERSPACE);
2889 }
2890
2891 /*
2892 * Set the access and modification times given a file descriptor.
2893 */
2894 /* ARGSUSED */
2895 int
2896 sys_futimes(struct lwp *l, void *v, register_t *retval)
2897 {
2898 struct sys_futimes_args /* {
2899 syscallarg(int) fd;
2900 syscallarg(const struct timeval *) tptr;
2901 } */ *uap = v;
2902 int error;
2903 struct file *fp;
2904
2905 /* getvnode() will use the descriptor for us */
2906 if ((error = getvnode(l->l_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
2907 return (error);
2908
2909 error = do_sys_utimes(l, fp->f_data, NULL, 0,
2910 SCARG(uap, tptr), UIO_USERSPACE);
2911
2912 FILE_UNUSE(fp, l);
2913 return (error);
2914 }
2915
2916 /*
2917 * Set the access and modification times given a path name; this
2918 * version does not follow links.
2919 */
2920 int
2921 sys_lutimes(struct lwp *l, void *v, register_t *retval)
2922 {
2923 struct sys_lutimes_args /* {
2924 syscallarg(const char *) path;
2925 syscallarg(const struct timeval *) tptr;
2926 } */ *uap = v;
2927
2928 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
2929 SCARG(uap, tptr), UIO_USERSPACE);
2930 }
2931
2932 /*
2933 * Common routine to set access and modification times given a vnode.
2934 */
2935 int
2936 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
2937 const struct timeval *tptr, enum uio_seg seg)
2938 {
2939 struct vattr vattr;
2940 struct nameidata nd;
2941 int error;
2942
2943 VATTR_NULL(&vattr);
2944 if (tptr == NULL) {
2945 nanotime(&vattr.va_atime);
2946 vattr.va_mtime = vattr.va_atime;
2947 vattr.va_vaflags |= VA_UTIMES_NULL;
2948 } else {
2949 struct timeval tv[2];
2950
2951 if (seg != UIO_SYSSPACE) {
2952 error = copyin(tptr, &tv, sizeof (tv));
2953 if (error != 0)
2954 return error;
2955 tptr = tv;
2956 }
2957 TIMEVAL_TO_TIMESPEC(tptr, &vattr.va_atime);
2958 TIMEVAL_TO_TIMESPEC(tptr + 1, &vattr.va_mtime);
2959 }
2960
2961 if (vp == NULL) {
2962 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path, l);
2963 if ((error = namei(&nd)) != 0)
2964 return (error);
2965 vp = nd.ni_vp;
2966 } else
2967 nd.ni_vp = NULL;
2968
2969 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2970 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2971 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2972 VOP_UNLOCK(vp, 0);
2973
2974 if (nd.ni_vp != NULL)
2975 vrele(nd.ni_vp);
2976
2977 return (error);
2978 }
2979
2980 /*
2981 * Truncate a file given its path name.
2982 */
2983 /* ARGSUSED */
2984 int
2985 sys_truncate(struct lwp *l, void *v, register_t *retval)
2986 {
2987 struct sys_truncate_args /* {
2988 syscallarg(const char *) path;
2989 syscallarg(int) pad;
2990 syscallarg(off_t) length;
2991 } */ *uap = v;
2992 struct vnode *vp;
2993 struct vattr vattr;
2994 int error;
2995 struct nameidata nd;
2996
2997 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2998 if ((error = namei(&nd)) != 0)
2999 return (error);
3000 vp = nd.ni_vp;
3001 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3002 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3003 if (vp->v_type == VDIR)
3004 error = EISDIR;
3005 else if ((error = vn_writechk(vp)) == 0 &&
3006 (error = VOP_ACCESS(vp, VWRITE, l->l_cred, l)) == 0) {
3007 VATTR_NULL(&vattr);
3008 vattr.va_size = SCARG(uap, length);
3009 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
3010 }
3011 vput(vp);
3012 return (error);
3013 }
3014
3015 /*
3016 * Truncate a file given a file descriptor.
3017 */
3018 /* ARGSUSED */
3019 int
3020 sys_ftruncate(struct lwp *l, void *v, register_t *retval)
3021 {
3022 struct sys_ftruncate_args /* {
3023 syscallarg(int) fd;
3024 syscallarg(int) pad;
3025 syscallarg(off_t) length;
3026 } */ *uap = v;
3027 struct proc *p = l->l_proc;
3028 struct vattr vattr;
3029 struct vnode *vp;
3030 struct file *fp;
3031 int error;
3032
3033 /* getvnode() will use the descriptor for us */
3034 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3035 return (error);
3036 if ((fp->f_flag & FWRITE) == 0) {
3037 error = EINVAL;
3038 goto out;
3039 }
3040 vp = (struct vnode *)fp->f_data;
3041 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3042 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3043 if (vp->v_type == VDIR)
3044 error = EISDIR;
3045 else if ((error = vn_writechk(vp)) == 0) {
3046 VATTR_NULL(&vattr);
3047 vattr.va_size = SCARG(uap, length);
3048 error = VOP_SETATTR(vp, &vattr, fp->f_cred, l);
3049 }
3050 VOP_UNLOCK(vp, 0);
3051 out:
3052 FILE_UNUSE(fp, l);
3053 return (error);
3054 }
3055
3056 /*
3057 * Sync an open file.
3058 */
3059 /* ARGSUSED */
3060 int
3061 sys_fsync(struct lwp *l, void *v, register_t *retval)
3062 {
3063 struct sys_fsync_args /* {
3064 syscallarg(int) fd;
3065 } */ *uap = v;
3066 struct proc *p = l->l_proc;
3067 struct vnode *vp;
3068 struct file *fp;
3069 int error;
3070
3071 /* getvnode() will use the descriptor for us */
3072 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3073 return (error);
3074 vp = (struct vnode *)fp->f_data;
3075 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3076 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, l);
3077 if (error == 0 && bioops.io_fsync != NULL &&
3078 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3079 (*bioops.io_fsync)(vp, 0);
3080 VOP_UNLOCK(vp, 0);
3081 FILE_UNUSE(fp, l);
3082 return (error);
3083 }
3084
3085 /*
3086 * Sync a range of file data. API modeled after that found in AIX.
3087 *
3088 * FDATASYNC indicates that we need only save enough metadata to be able
3089 * to re-read the written data. Note we duplicate AIX's requirement that
3090 * the file be open for writing.
3091 */
3092 /* ARGSUSED */
3093 int
3094 sys_fsync_range(struct lwp *l, void *v, register_t *retval)
3095 {
3096 struct sys_fsync_range_args /* {
3097 syscallarg(int) fd;
3098 syscallarg(int) flags;
3099 syscallarg(off_t) start;
3100 syscallarg(off_t) length;
3101 } */ *uap = v;
3102 struct proc *p = l->l_proc;
3103 struct vnode *vp;
3104 struct file *fp;
3105 int flags, nflags;
3106 off_t s, e, len;
3107 int error;
3108
3109 /* getvnode() will use the descriptor for us */
3110 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3111 return (error);
3112
3113 if ((fp->f_flag & FWRITE) == 0) {
3114 error = EBADF;
3115 goto out;
3116 }
3117
3118 flags = SCARG(uap, flags);
3119 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3120 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3121 error = EINVAL;
3122 goto out;
3123 }
3124 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3125 if (flags & FDATASYNC)
3126 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3127 else
3128 nflags = FSYNC_WAIT;
3129 if (flags & FDISKSYNC)
3130 nflags |= FSYNC_CACHE;
3131
3132 len = SCARG(uap, length);
3133 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3134 if (len) {
3135 s = SCARG(uap, start);
3136 e = s + len;
3137 if (e < s) {
3138 error = EINVAL;
3139 goto out;
3140 }
3141 } else {
3142 e = 0;
3143 s = 0;
3144 }
3145
3146 vp = (struct vnode *)fp->f_data;
3147 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3148 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, l);
3149
3150 if (error == 0 && bioops.io_fsync != NULL &&
3151 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3152 (*bioops.io_fsync)(vp, nflags);
3153
3154 VOP_UNLOCK(vp, 0);
3155 out:
3156 FILE_UNUSE(fp, l);
3157 return (error);
3158 }
3159
3160 /*
3161 * Sync the data of an open file.
3162 */
3163 /* ARGSUSED */
3164 int
3165 sys_fdatasync(struct lwp *l, void *v, register_t *retval)
3166 {
3167 struct sys_fdatasync_args /* {
3168 syscallarg(int) fd;
3169 } */ *uap = v;
3170 struct proc *p = l->l_proc;
3171 struct vnode *vp;
3172 struct file *fp;
3173 int error;
3174
3175 /* getvnode() will use the descriptor for us */
3176 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3177 return (error);
3178 if ((fp->f_flag & FWRITE) == 0) {
3179 FILE_UNUSE(fp, l);
3180 return (EBADF);
3181 }
3182 vp = (struct vnode *)fp->f_data;
3183 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3184 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, l);
3185 VOP_UNLOCK(vp, 0);
3186 FILE_UNUSE(fp, l);
3187 return (error);
3188 }
3189
3190 /*
3191 * Rename files, (standard) BSD semantics frontend.
3192 */
3193 /* ARGSUSED */
3194 int
3195 sys_rename(struct lwp *l, void *v, register_t *retval)
3196 {
3197 struct sys_rename_args /* {
3198 syscallarg(const char *) from;
3199 syscallarg(const char *) to;
3200 } */ *uap = v;
3201
3202 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 0));
3203 }
3204
3205 /*
3206 * Rename files, POSIX semantics frontend.
3207 */
3208 /* ARGSUSED */
3209 int
3210 sys___posix_rename(struct lwp *l, void *v, register_t *retval)
3211 {
3212 struct sys___posix_rename_args /* {
3213 syscallarg(const char *) from;
3214 syscallarg(const char *) to;
3215 } */ *uap = v;
3216
3217 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 1));
3218 }
3219
3220 /*
3221 * Rename files. Source and destination must either both be directories,
3222 * or both not be directories. If target is a directory, it must be empty.
3223 * If `from' and `to' refer to the same object, the value of the `retain'
3224 * argument is used to determine whether `from' will be
3225 *
3226 * (retain == 0) deleted unless `from' and `to' refer to the same
3227 * object in the file system's name space (BSD).
3228 * (retain == 1) always retained (POSIX).
3229 */
3230 static int
3231 rename_files(const char *from, const char *to, struct lwp *l, int retain)
3232 {
3233 struct vnode *tvp, *fvp, *tdvp;
3234 struct nameidata fromnd, tond;
3235 struct proc *p;
3236 int error;
3237
3238 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT, UIO_USERSPACE,
3239 from, l);
3240 if ((error = namei(&fromnd)) != 0)
3241 return (error);
3242 if (fromnd.ni_dvp != fromnd.ni_vp)
3243 VOP_UNLOCK(fromnd.ni_dvp, 0);
3244 fvp = fromnd.ni_vp;
3245 NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT |
3246 (fvp->v_type == VDIR ? CREATEDIR : 0), UIO_USERSPACE, to, l);
3247 if ((error = namei(&tond)) != 0) {
3248 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3249 vrele(fromnd.ni_dvp);
3250 vrele(fvp);
3251 goto out1;
3252 }
3253 tdvp = tond.ni_dvp;
3254 tvp = tond.ni_vp;
3255
3256 if (tvp != NULL) {
3257 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3258 error = ENOTDIR;
3259 goto out;
3260 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3261 error = EISDIR;
3262 goto out;
3263 }
3264 }
3265
3266 if (fvp == tdvp)
3267 error = EINVAL;
3268
3269 /*
3270 * Source and destination refer to the same object.
3271 */
3272 if (fvp == tvp) {
3273 if (retain)
3274 error = -1;
3275 else if (fromnd.ni_dvp == tdvp &&
3276 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3277 !memcmp(fromnd.ni_cnd.cn_nameptr,
3278 tond.ni_cnd.cn_nameptr,
3279 fromnd.ni_cnd.cn_namelen))
3280 error = -1;
3281 }
3282
3283 #if NVERIEXEC > 0
3284 if (!error) {
3285 char *f1, *f2;
3286
3287 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3288 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen);
3289
3290 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3291 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen);
3292
3293 error = veriexec_renamechk(l, fvp, f1, tvp, f2);
3294
3295 free(f1, M_TEMP);
3296 free(f2, M_TEMP);
3297 }
3298 #endif /* NVERIEXEC > 0 */
3299
3300 out:
3301 p = l->l_proc;
3302 if (!error) {
3303 VOP_LEASE(tdvp, l, l->l_cred, LEASE_WRITE);
3304 if (fromnd.ni_dvp != tdvp)
3305 VOP_LEASE(fromnd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3306 if (tvp) {
3307 VOP_LEASE(tvp, l, l->l_cred, LEASE_WRITE);
3308 }
3309 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3310 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3311 } else {
3312 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3313 if (tdvp == tvp)
3314 vrele(tdvp);
3315 else
3316 vput(tdvp);
3317 if (tvp)
3318 vput(tvp);
3319 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3320 vrele(fromnd.ni_dvp);
3321 vrele(fvp);
3322 }
3323 vrele(tond.ni_startdir);
3324 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3325 out1:
3326 if (fromnd.ni_startdir)
3327 vrele(fromnd.ni_startdir);
3328 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3329 return (error == -1 ? 0 : error);
3330 }
3331
3332 /*
3333 * Make a directory file.
3334 */
3335 /* ARGSUSED */
3336 int
3337 sys_mkdir(struct lwp *l, void *v, register_t *retval)
3338 {
3339 struct sys_mkdir_args /* {
3340 syscallarg(const char *) path;
3341 syscallarg(int) mode;
3342 } */ *uap = v;
3343 struct proc *p = l->l_proc;
3344 struct vnode *vp;
3345 struct vattr vattr;
3346 int error;
3347 struct nameidata nd;
3348
3349 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE,
3350 SCARG(uap, path), l);
3351 if ((error = namei(&nd)) != 0)
3352 return (error);
3353 vp = nd.ni_vp;
3354 if (vp != NULL) {
3355 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3356 if (nd.ni_dvp == vp)
3357 vrele(nd.ni_dvp);
3358 else
3359 vput(nd.ni_dvp);
3360 vrele(vp);
3361 return (EEXIST);
3362 }
3363 VATTR_NULL(&vattr);
3364 vattr.va_type = VDIR;
3365 vattr.va_mode =
3366 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3367 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3368 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3369 if (!error)
3370 vput(nd.ni_vp);
3371 return (error);
3372 }
3373
3374 /*
3375 * Remove a directory file.
3376 */
3377 /* ARGSUSED */
3378 int
3379 sys_rmdir(struct lwp *l, void *v, register_t *retval)
3380 {
3381 struct sys_rmdir_args /* {
3382 syscallarg(const char *) path;
3383 } */ *uap = v;
3384 struct vnode *vp;
3385 int error;
3386 struct nameidata nd;
3387
3388 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
3389 SCARG(uap, path), l);
3390 if ((error = namei(&nd)) != 0)
3391 return (error);
3392 vp = nd.ni_vp;
3393 if (vp->v_type != VDIR) {
3394 error = ENOTDIR;
3395 goto out;
3396 }
3397 /*
3398 * No rmdir "." please.
3399 */
3400 if (nd.ni_dvp == vp) {
3401 error = EINVAL;
3402 goto out;
3403 }
3404 /*
3405 * The root of a mounted filesystem cannot be deleted.
3406 */
3407 if (vp->v_flag & VROOT) {
3408 error = EBUSY;
3409 goto out;
3410 }
3411 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3412 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3413 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3414 return (error);
3415
3416 out:
3417 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3418 if (nd.ni_dvp == vp)
3419 vrele(nd.ni_dvp);
3420 else
3421 vput(nd.ni_dvp);
3422 vput(vp);
3423 return (error);
3424 }
3425
3426 /*
3427 * Read a block of directory entries in a file system independent format.
3428 */
3429 int
3430 sys___getdents30(struct lwp *l, void *v, register_t *retval)
3431 {
3432 struct sys___getdents30_args /* {
3433 syscallarg(int) fd;
3434 syscallarg(char *) buf;
3435 syscallarg(size_t) count;
3436 } */ *uap = v;
3437 struct proc *p = l->l_proc;
3438 struct file *fp;
3439 int error, done;
3440
3441 /* getvnode() will use the descriptor for us */
3442 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3443 return (error);
3444 if ((fp->f_flag & FREAD) == 0) {
3445 error = EBADF;
3446 goto out;
3447 }
3448 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3449 SCARG(uap, count), &done, l, 0, 0);
3450 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
3451 *retval = done;
3452 out:
3453 FILE_UNUSE(fp, l);
3454 return (error);
3455 }
3456
3457 /*
3458 * Set the mode mask for creation of filesystem nodes.
3459 */
3460 int
3461 sys_umask(struct lwp *l, void *v, register_t *retval)
3462 {
3463 struct sys_umask_args /* {
3464 syscallarg(mode_t) newmask;
3465 } */ *uap = v;
3466 struct proc *p = l->l_proc;
3467 struct cwdinfo *cwdi;
3468
3469 cwdi = p->p_cwdi;
3470 *retval = cwdi->cwdi_cmask;
3471 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3472 return (0);
3473 }
3474
3475 /*
3476 * Void all references to file by ripping underlying filesystem
3477 * away from vnode.
3478 */
3479 /* ARGSUSED */
3480 int
3481 sys_revoke(struct lwp *l, void *v, register_t *retval)
3482 {
3483 struct sys_revoke_args /* {
3484 syscallarg(const char *) path;
3485 } */ *uap = v;
3486 struct vnode *vp;
3487 struct vattr vattr;
3488 int error;
3489 struct nameidata nd;
3490
3491 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
3492 if ((error = namei(&nd)) != 0)
3493 return (error);
3494 vp = nd.ni_vp;
3495 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
3496 goto out;
3497 if (kauth_cred_geteuid(l->l_cred) != vattr.va_uid &&
3498 (error = kauth_authorize_generic(l->l_cred,
3499 KAUTH_GENERIC_ISSUSER, NULL)) != 0)
3500 goto out;
3501 if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER)))
3502 VOP_REVOKE(vp, REVOKEALL);
3503 out:
3504 vrele(vp);
3505 return (error);
3506 }
3507
3508 /*
3509 * Convert a user file descriptor to a kernel file entry.
3510 */
3511 int
3512 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
3513 {
3514 struct vnode *vp;
3515 struct file *fp;
3516
3517 if ((fp = fd_getfile(fdp, fd)) == NULL)
3518 return (EBADF);
3519
3520 FILE_USE(fp);
3521
3522 if (fp->f_type != DTYPE_VNODE) {
3523 FILE_UNUSE(fp, NULL);
3524 return (EINVAL);
3525 }
3526
3527 vp = (struct vnode *)fp->f_data;
3528 if (vp->v_type == VBAD) {
3529 FILE_UNUSE(fp, NULL);
3530 return (EBADF);
3531 }
3532
3533 *fpp = fp;
3534 return (0);
3535 }
3536