vfs_syscalls.c revision 1.367 1 /* $NetBSD: vfs_syscalls.c,v 1.367 2008/06/17 16:18:01 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
63 */
64
65 #include <sys/cdefs.h>
66 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.367 2008/06/17 16:18:01 christos Exp $");
67
68 #include "opt_compat_netbsd.h"
69 #include "opt_compat_43.h"
70 #include "opt_fileassoc.h"
71 #include "fss.h"
72 #include "veriexec.h"
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file.h>
80 #include <sys/stat.h>
81 #include <sys/vnode.h>
82 #include <sys/mount.h>
83 #include <sys/proc.h>
84 #include <sys/uio.h>
85 #include <sys/malloc.h>
86 #include <sys/kmem.h>
87 #include <sys/dirent.h>
88 #include <sys/sysctl.h>
89 #include <sys/syscallargs.h>
90 #include <sys/vfs_syscalls.h>
91 #include <sys/ktrace.h>
92 #ifdef FILEASSOC
93 #include <sys/fileassoc.h>
94 #endif /* FILEASSOC */
95 #include <sys/verified_exec.h>
96 #include <sys/kauth.h>
97 #include <sys/atomic.h>
98 #include <sys/module.h>
99
100 #include <miscfs/genfs/genfs.h>
101 #include <miscfs/syncfs/syncfs.h>
102 #include <miscfs/specfs/specdev.h>
103
104 #ifdef COMPAT_30
105 #include "opt_nfsserver.h"
106 #include <nfs/rpcv2.h>
107 #endif
108 #include <nfs/nfsproto.h>
109 #ifdef COMPAT_30
110 #include <nfs/nfs.h>
111 #include <nfs/nfs_var.h>
112 #endif
113
114 #if NFSS > 0
115 #include <dev/fssvar.h>
116 #endif
117
118 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
119
120 static int change_dir(struct nameidata *, struct lwp *);
121 static int change_flags(struct vnode *, u_long, struct lwp *);
122 static int change_mode(struct vnode *, int, struct lwp *l);
123 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
124
125 void checkdirs(struct vnode *);
126
127 int dovfsusermount = 0;
128
129 /*
130 * Virtual File System System Calls
131 */
132
133 /*
134 * Mount a file system.
135 */
136
137 #if defined(COMPAT_09) || defined(COMPAT_43)
138 /*
139 * This table is used to maintain compatibility with 4.3BSD
140 * and NetBSD 0.9 mount syscalls. Note, the order is important!
141 *
142 * Do not modify this table. It should only contain filesystems
143 * supported by NetBSD 0.9 and 4.3BSD.
144 */
145 const char * const mountcompatnames[] = {
146 NULL, /* 0 = MOUNT_NONE */
147 MOUNT_FFS, /* 1 = MOUNT_UFS */
148 MOUNT_NFS, /* 2 */
149 MOUNT_MFS, /* 3 */
150 MOUNT_MSDOS, /* 4 */
151 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
152 MOUNT_FDESC, /* 6 */
153 MOUNT_KERNFS, /* 7 */
154 NULL, /* 8 = MOUNT_DEVFS */
155 MOUNT_AFS, /* 9 */
156 };
157 const int nmountcompatnames = sizeof(mountcompatnames) /
158 sizeof(mountcompatnames[0]);
159 #endif /* COMPAT_09 || COMPAT_43 */
160
161 static int
162 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
163 void *data, size_t *data_len)
164 {
165 struct mount *mp;
166 int error = 0, saved_flags;
167
168 mp = vp->v_mount;
169 saved_flags = mp->mnt_flag;
170
171 /* We can operate only on VV_ROOT nodes. */
172 if ((vp->v_vflag & VV_ROOT) == 0) {
173 error = EINVAL;
174 goto out;
175 }
176
177 /*
178 * We only allow the filesystem to be reloaded if it
179 * is currently mounted read-only.
180 */
181 if (flags & MNT_RELOAD && !(mp->mnt_flag & MNT_RDONLY)) {
182 error = EOPNOTSUPP; /* Needs translation */
183 goto out;
184 }
185
186 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
187 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
188 if (error)
189 goto out;
190
191 if (vfs_busy(mp, NULL)) {
192 error = EPERM;
193 goto out;
194 }
195
196 mutex_enter(&mp->mnt_updating);
197
198 mp->mnt_flag &= ~MNT_OP_FLAGS;
199 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
200
201 /*
202 * Set the mount level flags.
203 */
204 if (flags & MNT_RDONLY)
205 mp->mnt_flag |= MNT_RDONLY;
206 else if (mp->mnt_flag & MNT_RDONLY)
207 mp->mnt_iflag |= IMNT_WANTRDWR;
208 mp->mnt_flag &=
209 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
210 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
211 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
212 mp->mnt_flag |= flags &
213 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
214 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
215 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
216 MNT_IGNORE);
217
218 error = VFS_MOUNT(mp, path, data, data_len);
219
220 #if defined(COMPAT_30) && defined(NFSSERVER)
221 if (error && data != NULL) {
222 int error2;
223
224 /* Update failed; let's try and see if it was an
225 * export request. */
226 error2 = nfs_update_exports_30(mp, path, data, l);
227
228 /* Only update error code if the export request was
229 * understood but some problem occurred while
230 * processing it. */
231 if (error2 != EJUSTRETURN)
232 error = error2;
233 }
234 #endif
235 if (mp->mnt_iflag & IMNT_WANTRDWR)
236 mp->mnt_flag &= ~MNT_RDONLY;
237 if (error)
238 mp->mnt_flag = saved_flags;
239 mp->mnt_flag &= ~MNT_OP_FLAGS;
240 mp->mnt_iflag &= ~IMNT_WANTRDWR;
241 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
242 if (mp->mnt_syncer == NULL)
243 error = vfs_allocate_syncvnode(mp);
244 } else {
245 if (mp->mnt_syncer != NULL)
246 vfs_deallocate_syncvnode(mp);
247 }
248 mutex_exit(&mp->mnt_updating);
249 vfs_unbusy(mp, false, NULL);
250
251 out:
252 return (error);
253 }
254
255 static int
256 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
257 {
258 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
259 int error;
260
261 /* Copy file-system type from userspace. */
262 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
263 if (error) {
264 #if defined(COMPAT_09) || defined(COMPAT_43)
265 /*
266 * Historically, filesystem types were identified by numbers.
267 * If we get an integer for the filesystem type instead of a
268 * string, we check to see if it matches one of the historic
269 * filesystem types.
270 */
271 u_long fsindex = (u_long)fstype;
272 if (fsindex >= nmountcompatnames ||
273 mountcompatnames[fsindex] == NULL)
274 return ENODEV;
275 strlcpy(fstypename, mountcompatnames[fsindex],
276 sizeof(fstypename));
277 #else
278 return error;
279 #endif
280 }
281
282 #ifdef COMPAT_10
283 /* Accept `ufs' as an alias for `ffs'. */
284 if (strcmp(fstypename, "ufs") == 0)
285 fstypename[0] = 'f';
286 #endif
287
288 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
289 return 0;
290
291 /* If we can autoload a vfs module, try again */
292 (void)module_load(fstype, 0, NULL, MODULE_CLASS_VFS, true);
293
294 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
295 return 0;
296
297 return ENODEV;
298 }
299
300 static int
301 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops,
302 const char *path, int flags, void *data, size_t *data_len, u_int recurse)
303 {
304 struct mount *mp;
305 struct vnode *vp = *vpp;
306 struct vattr va;
307 int error;
308
309 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
310 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
311 if (error)
312 return error;
313
314 /* Can't make a non-dir a mount-point (from here anyway). */
315 if (vp->v_type != VDIR)
316 return ENOTDIR;
317
318 /*
319 * If the user is not root, ensure that they own the directory
320 * onto which we are attempting to mount.
321 */
322 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 ||
323 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
324 (error = kauth_authorize_generic(l->l_cred,
325 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) {
326 return error;
327 }
328
329 if (flags & MNT_EXPORTED)
330 return EINVAL;
331
332 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0)
333 return error;
334
335 /*
336 * Check if a file-system is not already mounted on this vnode.
337 */
338 if (vp->v_mountedhere != NULL)
339 return EBUSY;
340
341 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
342 if (mp == NULL)
343 return ENOMEM;
344
345 mp->mnt_op = vfsops;
346 mp->mnt_refcnt = 1;
347
348 TAILQ_INIT(&mp->mnt_vnodelist);
349 rw_init(&mp->mnt_unmounting);
350 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
351 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
352 error = vfs_busy(mp, NULL);
353 KASSERT(error == 0);
354 mutex_enter(&mp->mnt_updating);
355
356 mp->mnt_vnodecovered = vp;
357 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
358 mount_initspecific(mp);
359
360 /*
361 * The underlying file system may refuse the mount for
362 * various reasons. Allow the user to force it to happen.
363 *
364 * Set the mount level flags.
365 */
366 mp->mnt_flag = flags &
367 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
368 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
369 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
370 MNT_IGNORE | MNT_RDONLY);
371
372 error = VFS_MOUNT(mp, path, data, data_len);
373 mp->mnt_flag &= ~MNT_OP_FLAGS;
374
375 /*
376 * Put the new filesystem on the mount list after root.
377 */
378 cache_purge(vp);
379 if (error != 0) {
380 vp->v_mountedhere = NULL;
381 mutex_exit(&mp->mnt_updating);
382 vfs_unbusy(mp, false, NULL);
383 vfs_destroy(mp);
384 return error;
385 }
386
387 mp->mnt_iflag &= ~IMNT_WANTRDWR;
388 mutex_enter(&mountlist_lock);
389 vp->v_mountedhere = mp;
390 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
391 mutex_exit(&mountlist_lock);
392 vn_restorerecurse(vp, recurse);
393 VOP_UNLOCK(vp, 0);
394 checkdirs(vp);
395 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
396 error = vfs_allocate_syncvnode(mp);
397 /* Hold an additional reference to the mount across VFS_START(). */
398 mutex_exit(&mp->mnt_updating);
399 vfs_unbusy(mp, true, NULL);
400 (void) VFS_STATVFS(mp, &mp->mnt_stat);
401 error = VFS_START(mp, 0);
402 if (error) {
403 vrele(vp);
404 vfs_destroy(mp);
405 }
406 /* Drop reference held for VFS_START(). */
407 vfs_destroy(mp);
408 *vpp = NULL;
409 return error;
410 }
411
412 static int
413 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
414 void *data, size_t *data_len)
415 {
416 struct mount *mp;
417 int error;
418
419 /* If MNT_GETARGS is specified, it should be the only flag. */
420 if (flags & ~MNT_GETARGS)
421 return EINVAL;
422
423 mp = vp->v_mount;
424
425 /* XXX: probably some notion of "can see" here if we want isolation. */
426 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
427 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
428 if (error)
429 return error;
430
431 if ((vp->v_vflag & VV_ROOT) == 0)
432 return EINVAL;
433
434 if (vfs_busy(mp, NULL))
435 return EPERM;
436
437 mutex_enter(&mp->mnt_updating);
438 mp->mnt_flag &= ~MNT_OP_FLAGS;
439 mp->mnt_flag |= MNT_GETARGS;
440 error = VFS_MOUNT(mp, path, data, data_len);
441 mp->mnt_flag &= ~MNT_OP_FLAGS;
442 mutex_exit(&mp->mnt_updating);
443
444 vfs_unbusy(mp, false, NULL);
445 return (error);
446 }
447
448 #ifdef COMPAT_40
449 /* ARGSUSED */
450 int
451 compat_40_sys_mount(struct lwp *l, const struct compat_40_sys_mount_args *uap, register_t *retval)
452 {
453 /* {
454 syscallarg(const char *) type;
455 syscallarg(const char *) path;
456 syscallarg(int) flags;
457 syscallarg(void *) data;
458 } */
459 register_t dummy;
460
461 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
462 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 0, &dummy);
463 }
464 #endif
465
466 int
467 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
468 {
469 /* {
470 syscallarg(const char *) type;
471 syscallarg(const char *) path;
472 syscallarg(int) flags;
473 syscallarg(void *) data;
474 syscallarg(size_t) data_len;
475 } */
476
477 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
478 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
479 SCARG(uap, data_len), retval);
480 }
481
482 int
483 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
484 const char *path, int flags, void *data, enum uio_seg data_seg,
485 size_t data_len, register_t *retval)
486 {
487 struct vnode *vp;
488 struct nameidata nd;
489 void *data_buf = data;
490 u_int recurse;
491 int error;
492
493 /*
494 * Get vnode to be covered
495 */
496 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
497 if ((error = namei(&nd)) != 0)
498 return (error);
499 vp = nd.ni_vp;
500
501 /*
502 * A lookup in VFS_MOUNT might result in an attempt to
503 * lock this vnode again, so make the lock recursive.
504 */
505 if (vfsops == NULL) {
506 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
507 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
508 recurse = vn_setrecurse(vp);
509 vfsops = vp->v_mount->mnt_op;
510 } else {
511 /* 'type' is userspace */
512 error = mount_get_vfsops(type, &vfsops);
513 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
514 recurse = vn_setrecurse(vp);
515 if (error != 0)
516 goto done;
517 }
518 } else {
519 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
520 recurse = vn_setrecurse(vp);
521 }
522
523 if (data != NULL && data_seg == UIO_USERSPACE) {
524 if (data_len == 0) {
525 /* No length supplied, use default for filesystem */
526 data_len = vfsops->vfs_min_mount_data;
527 if (data_len > VFS_MAX_MOUNT_DATA) {
528 /* maybe a force loaded old LKM */
529 error = EINVAL;
530 goto done;
531 }
532 #ifdef COMPAT_30
533 /* Hopefully a longer buffer won't make copyin() fail */
534 if (flags & MNT_UPDATE
535 && data_len < sizeof (struct mnt_export_args30))
536 data_len = sizeof (struct mnt_export_args30);
537 #endif
538 }
539 data_buf = malloc(data_len, M_TEMP, M_WAITOK);
540
541 /* NFS needs the buffer even for mnt_getargs .... */
542 error = copyin(data, data_buf, data_len);
543 if (error != 0)
544 goto done;
545 }
546
547 if (flags & MNT_GETARGS) {
548 if (data_len == 0) {
549 error = EINVAL;
550 goto done;
551 }
552 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
553 if (error != 0)
554 goto done;
555 if (data_seg == UIO_USERSPACE)
556 error = copyout(data_buf, data, data_len);
557 *retval = data_len;
558 } else if (flags & MNT_UPDATE) {
559 error = mount_update(l, vp, path, flags, data_buf, &data_len);
560 } else {
561 /* Locking is handled internally in mount_domount(). */
562 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
563 &data_len, recurse);
564 }
565
566 done:
567 if (vp != NULL) {
568 vn_restorerecurse(vp, recurse);
569 vput(vp);
570 }
571 if (data_buf != data)
572 free(data_buf, M_TEMP);
573 return (error);
574 }
575
576 /*
577 * Scan all active processes to see if any of them have a current
578 * or root directory onto which the new filesystem has just been
579 * mounted. If so, replace them with the new mount point.
580 */
581 void
582 checkdirs(struct vnode *olddp)
583 {
584 struct cwdinfo *cwdi;
585 struct vnode *newdp, *rele1, *rele2;
586 struct proc *p;
587 bool retry;
588
589 if (olddp->v_usecount == 1)
590 return;
591 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
592 panic("mount: lost mount");
593
594 do {
595 retry = false;
596 mutex_enter(proc_lock);
597 PROCLIST_FOREACH(p, &allproc) {
598 if ((p->p_flag & PK_MARKER) != 0)
599 continue;
600 if ((cwdi = p->p_cwdi) == NULL)
601 continue;
602 /*
603 * Can't change to the old directory any more,
604 * so even if we see a stale value it's not a
605 * problem.
606 */
607 if (cwdi->cwdi_cdir != olddp &&
608 cwdi->cwdi_rdir != olddp)
609 continue;
610 retry = true;
611 rele1 = NULL;
612 rele2 = NULL;
613 atomic_inc_uint(&cwdi->cwdi_refcnt);
614 mutex_exit(proc_lock);
615 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
616 if (cwdi->cwdi_cdir == olddp) {
617 rele1 = cwdi->cwdi_cdir;
618 VREF(newdp);
619 cwdi->cwdi_cdir = newdp;
620 }
621 if (cwdi->cwdi_rdir == olddp) {
622 rele2 = cwdi->cwdi_rdir;
623 VREF(newdp);
624 cwdi->cwdi_rdir = newdp;
625 }
626 rw_exit(&cwdi->cwdi_lock);
627 cwdfree(cwdi);
628 if (rele1 != NULL)
629 vrele(rele1);
630 if (rele2 != NULL)
631 vrele(rele2);
632 mutex_enter(proc_lock);
633 break;
634 }
635 mutex_exit(proc_lock);
636 } while (retry);
637
638 if (rootvnode == olddp) {
639 vrele(rootvnode);
640 VREF(newdp);
641 rootvnode = newdp;
642 }
643 vput(newdp);
644 }
645
646 /*
647 * Unmount a file system.
648 *
649 * Note: unmount takes a path to the vnode mounted on as argument,
650 * not special file (as before).
651 */
652 /* ARGSUSED */
653 int
654 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
655 {
656 /* {
657 syscallarg(const char *) path;
658 syscallarg(int) flags;
659 } */
660 struct vnode *vp;
661 struct mount *mp;
662 int error;
663 struct nameidata nd;
664
665 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
666 SCARG(uap, path));
667 if ((error = namei(&nd)) != 0)
668 return (error);
669 vp = nd.ni_vp;
670 mp = vp->v_mount;
671 atomic_inc_uint(&mp->mnt_refcnt);
672 VOP_UNLOCK(vp, 0);
673
674 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
675 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
676 if (error) {
677 vrele(vp);
678 vfs_destroy(mp);
679 return (error);
680 }
681
682 /*
683 * Don't allow unmounting the root file system.
684 */
685 if (mp->mnt_flag & MNT_ROOTFS) {
686 vrele(vp);
687 vfs_destroy(mp);
688 return (EINVAL);
689 }
690
691 /*
692 * Must be the root of the filesystem
693 */
694 if ((vp->v_vflag & VV_ROOT) == 0) {
695 vrele(vp);
696 vfs_destroy(mp);
697 return (EINVAL);
698 }
699
700 vrele(vp);
701 error = dounmount(mp, SCARG(uap, flags), l);
702 return error;
703 }
704
705 /*
706 * Do the actual file system unmount. File system is assumed to have
707 * been locked by the caller.
708 *
709 * => Caller gain reference to the mount, explicility for unmount.
710 * => Reference will be dropped in all cases.
711 */
712 int
713 dounmount(struct mount *mp, int flags, struct lwp *l)
714 {
715 struct vnode *coveredvp;
716 int error;
717 int async;
718 int used_syncer;
719
720 #if NVERIEXEC > 0
721 error = veriexec_unmountchk(mp);
722 if (error)
723 return (error);
724 #endif /* NVERIEXEC > 0 */
725
726 /*
727 * XXX Freeze syncer. Must do this before locking the
728 * mount point. See dounmount() for details.
729 */
730 mutex_enter(&syncer_mutex);
731 rw_enter(&mp->mnt_unmounting, RW_WRITER);
732 if ((mp->mnt_iflag & IMNT_GONE) != 0) {
733 rw_exit(&mp->mnt_unmounting);
734 mutex_exit(&syncer_mutex);
735 vfs_destroy(mp);
736 return ENOENT;
737 }
738
739 used_syncer = (mp->mnt_syncer != NULL);
740
741 /*
742 * XXX Syncer must be frozen when we get here. This should really
743 * be done on a per-mountpoint basis, but especially the softdep
744 * code possibly called from the syncer doesn't exactly work on a
745 * per-mountpoint basis, so the softdep code would become a maze
746 * of vfs_busy() calls.
747 *
748 * The caller of dounmount() must acquire syncer_mutex because
749 * the syncer itself acquires locks in syncer_mutex -> vfs_busy
750 * order, and we must preserve that order to avoid deadlock.
751 *
752 * So, if the file system did not use the syncer, now is
753 * the time to release the syncer_mutex.
754 */
755 if (used_syncer == 0)
756 mutex_exit(&syncer_mutex);
757
758 mp->mnt_iflag |= IMNT_UNMOUNT;
759 async = mp->mnt_flag & MNT_ASYNC;
760 mp->mnt_flag &= ~MNT_ASYNC;
761 cache_purgevfs(mp); /* remove cache entries for this file sys */
762 if (mp->mnt_syncer != NULL)
763 vfs_deallocate_syncvnode(mp);
764 error = 0;
765 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
766 #if NFSS > 0
767 error = fss_umount_hook(mp, (flags & MNT_FORCE));
768 #endif
769 if (error == 0)
770 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
771 }
772 vfs_scrubvnlist(mp);
773 if (error == 0 || (flags & MNT_FORCE))
774 error = VFS_UNMOUNT(mp, flags);
775 if (error) {
776 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
777 (void) vfs_allocate_syncvnode(mp);
778 mp->mnt_iflag &= ~IMNT_UNMOUNT;
779 mp->mnt_flag |= async;
780 rw_exit(&mp->mnt_unmounting);
781 if (used_syncer)
782 mutex_exit(&syncer_mutex);
783 return (error);
784 }
785 vfs_scrubvnlist(mp);
786 mutex_enter(&mountlist_lock);
787 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
788 coveredvp->v_mountedhere = NULL;
789 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
790 mp->mnt_iflag |= IMNT_GONE;
791 mutex_exit(&mountlist_lock);
792 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
793 panic("unmount: dangling vnode");
794 if (used_syncer)
795 mutex_exit(&syncer_mutex);
796 vfs_hooks_unmount(mp);
797 rw_exit(&mp->mnt_unmounting);
798 vfs_destroy(mp); /* caller provided reference */
799 vfs_destroy(mp); /* from mount(), final nail in coffin */
800 if (coveredvp != NULLVP)
801 vrele(coveredvp);
802 return (0);
803 }
804
805 /*
806 * Sync each mounted filesystem.
807 */
808 #ifdef DEBUG
809 int syncprt = 0;
810 struct ctldebug debug0 = { "syncprt", &syncprt };
811 #endif
812
813 /* ARGSUSED */
814 int
815 sys_sync(struct lwp *l, const void *v, register_t *retval)
816 {
817 struct mount *mp, *nmp;
818 int asyncflag;
819
820 if (l == NULL)
821 l = &lwp0;
822
823 mutex_enter(&mountlist_lock);
824 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
825 mp = nmp) {
826 if (vfs_busy(mp, &nmp)) {
827 continue;
828 }
829 mutex_enter(&mp->mnt_updating);
830 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
831 asyncflag = mp->mnt_flag & MNT_ASYNC;
832 mp->mnt_flag &= ~MNT_ASYNC;
833 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
834 if (asyncflag)
835 mp->mnt_flag |= MNT_ASYNC;
836 }
837 mutex_exit(&mp->mnt_updating);
838 vfs_unbusy(mp, false, &nmp);
839 }
840 mutex_exit(&mountlist_lock);
841 #ifdef DEBUG
842 if (syncprt)
843 vfs_bufstats();
844 #endif /* DEBUG */
845 return (0);
846 }
847
848 /*
849 * Change filesystem quotas.
850 */
851 /* ARGSUSED */
852 int
853 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval)
854 {
855 /* {
856 syscallarg(const char *) path;
857 syscallarg(int) cmd;
858 syscallarg(int) uid;
859 syscallarg(void *) arg;
860 } */
861 struct mount *mp;
862 int error;
863 struct nameidata nd;
864
865 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
866 SCARG(uap, path));
867 if ((error = namei(&nd)) != 0)
868 return (error);
869 mp = nd.ni_vp->v_mount;
870 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
871 SCARG(uap, arg));
872 vrele(nd.ni_vp);
873 return (error);
874 }
875
876 int
877 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
878 int root)
879 {
880 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
881 int error = 0;
882
883 /*
884 * If MNT_NOWAIT or MNT_LAZY is specified, do not
885 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
886 * overrides MNT_NOWAIT.
887 */
888 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
889 (flags != MNT_WAIT && flags != 0)) {
890 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
891 goto done;
892 }
893
894 /* Get the filesystem stats now */
895 memset(sp, 0, sizeof(*sp));
896 if ((error = VFS_STATVFS(mp, sp)) != 0) {
897 return error;
898 }
899
900 if (cwdi->cwdi_rdir == NULL)
901 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
902 done:
903 if (cwdi->cwdi_rdir != NULL) {
904 size_t len;
905 char *bp;
906 char c;
907 char *path = PNBUF_GET();
908
909 bp = path + MAXPATHLEN;
910 *--bp = '\0';
911 rw_enter(&cwdi->cwdi_lock, RW_READER);
912 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
913 MAXPATHLEN / 2, 0, l);
914 rw_exit(&cwdi->cwdi_lock);
915 if (error) {
916 PNBUF_PUT(path);
917 return error;
918 }
919 len = strlen(bp);
920 /*
921 * for mount points that are below our root, we can see
922 * them, so we fix up the pathname and return them. The
923 * rest we cannot see, so we don't allow viewing the
924 * data.
925 */
926 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
927 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
928 (void)strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
929 sizeof(sp->f_mntonname));
930 if (sp->f_mntonname[0] == '\0')
931 (void)strlcpy(sp->f_mntonname, "/",
932 sizeof(sp->f_mntonname));
933 } else {
934 if (root)
935 (void)strlcpy(sp->f_mntonname, "/",
936 sizeof(sp->f_mntonname));
937 else
938 error = EPERM;
939 }
940 PNBUF_PUT(path);
941 }
942 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
943 return error;
944 }
945
946 /*
947 * Get filesystem statistics by path.
948 */
949 int
950 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
951 {
952 struct mount *mp;
953 int error;
954 struct nameidata nd;
955
956 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
957 if ((error = namei(&nd)) != 0)
958 return error;
959 mp = nd.ni_vp->v_mount;
960 error = dostatvfs(mp, sb, l, flags, 1);
961 vrele(nd.ni_vp);
962 return error;
963 }
964
965 /* ARGSUSED */
966 int
967 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
968 {
969 /* {
970 syscallarg(const char *) path;
971 syscallarg(struct statvfs *) buf;
972 syscallarg(int) flags;
973 } */
974 struct statvfs *sb;
975 int error;
976
977 sb = STATVFSBUF_GET();
978 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
979 if (error == 0)
980 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
981 STATVFSBUF_PUT(sb);
982 return error;
983 }
984
985 /*
986 * Get filesystem statistics by fd.
987 */
988 int
989 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
990 {
991 file_t *fp;
992 struct mount *mp;
993 int error;
994
995 /* fd_getvnode() will use the descriptor for us */
996 if ((error = fd_getvnode(fd, &fp)) != 0)
997 return (error);
998 mp = ((struct vnode *)fp->f_data)->v_mount;
999 error = dostatvfs(mp, sb, curlwp, flags, 1);
1000 fd_putfile(fd);
1001 return error;
1002 }
1003
1004 /* ARGSUSED */
1005 int
1006 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
1007 {
1008 /* {
1009 syscallarg(int) fd;
1010 syscallarg(struct statvfs *) buf;
1011 syscallarg(int) flags;
1012 } */
1013 struct statvfs *sb;
1014 int error;
1015
1016 sb = STATVFSBUF_GET();
1017 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1018 if (error == 0)
1019 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1020 STATVFSBUF_PUT(sb);
1021 return error;
1022 }
1023
1024
1025 /*
1026 * Get statistics on all filesystems.
1027 */
1028 int
1029 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1030 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1031 register_t *retval)
1032 {
1033 int root = 0;
1034 struct proc *p = l->l_proc;
1035 struct mount *mp, *nmp;
1036 struct statvfs *sb;
1037 size_t count, maxcount;
1038 int error = 0;
1039
1040 sb = STATVFSBUF_GET();
1041 maxcount = bufsize / entry_sz;
1042 mutex_enter(&mountlist_lock);
1043 count = 0;
1044 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1045 mp = nmp) {
1046 if (vfs_busy(mp, &nmp)) {
1047 continue;
1048 }
1049 if (sfsp && count < maxcount) {
1050 error = dostatvfs(mp, sb, l, flags, 0);
1051 if (error) {
1052 vfs_unbusy(mp, false, &nmp);
1053 error = 0;
1054 continue;
1055 }
1056 error = copyfn(sb, sfsp, entry_sz);
1057 if (error) {
1058 vfs_unbusy(mp, false, NULL);
1059 goto out;
1060 }
1061 sfsp = (char *)sfsp + entry_sz;
1062 root |= strcmp(sb->f_mntonname, "/") == 0;
1063 }
1064 count++;
1065 vfs_unbusy(mp, false, &nmp);
1066 }
1067 mutex_exit(&mountlist_lock);
1068
1069 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1070 /*
1071 * fake a root entry
1072 */
1073 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1074 sb, l, flags, 1);
1075 if (error != 0)
1076 goto out;
1077 if (sfsp) {
1078 error = copyfn(sb, sfsp, entry_sz);
1079 if (error != 0)
1080 goto out;
1081 }
1082 count++;
1083 }
1084 if (sfsp && count > maxcount)
1085 *retval = maxcount;
1086 else
1087 *retval = count;
1088 out:
1089 STATVFSBUF_PUT(sb);
1090 return error;
1091 }
1092
1093 int
1094 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1095 {
1096 /* {
1097 syscallarg(struct statvfs *) buf;
1098 syscallarg(size_t) bufsize;
1099 syscallarg(int) flags;
1100 } */
1101
1102 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1103 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1104 }
1105
1106 /*
1107 * Change current working directory to a given file descriptor.
1108 */
1109 /* ARGSUSED */
1110 int
1111 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1112 {
1113 /* {
1114 syscallarg(int) fd;
1115 } */
1116 struct proc *p = l->l_proc;
1117 struct cwdinfo *cwdi;
1118 struct vnode *vp, *tdp;
1119 struct mount *mp;
1120 file_t *fp;
1121 int error, fd;
1122
1123 /* fd_getvnode() will use the descriptor for us */
1124 fd = SCARG(uap, fd);
1125 if ((error = fd_getvnode(fd, &fp)) != 0)
1126 return (error);
1127 vp = fp->f_data;
1128
1129 VREF(vp);
1130 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1131 if (vp->v_type != VDIR)
1132 error = ENOTDIR;
1133 else
1134 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1135 if (error) {
1136 vput(vp);
1137 goto out;
1138 }
1139 while ((mp = vp->v_mountedhere) != NULL) {
1140 error = vfs_busy(mp, NULL);
1141 vput(vp);
1142 if (error != 0)
1143 goto out;
1144 error = VFS_ROOT(mp, &tdp);
1145 vfs_unbusy(mp, false, NULL);
1146 if (error)
1147 goto out;
1148 vp = tdp;
1149 }
1150 VOP_UNLOCK(vp, 0);
1151
1152 /*
1153 * Disallow changing to a directory not under the process's
1154 * current root directory (if there is one).
1155 */
1156 cwdi = p->p_cwdi;
1157 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1158 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1159 vrele(vp);
1160 error = EPERM; /* operation not permitted */
1161 } else {
1162 vrele(cwdi->cwdi_cdir);
1163 cwdi->cwdi_cdir = vp;
1164 }
1165 rw_exit(&cwdi->cwdi_lock);
1166
1167 out:
1168 fd_putfile(fd);
1169 return (error);
1170 }
1171
1172 /*
1173 * Change this process's notion of the root directory to a given file
1174 * descriptor.
1175 */
1176 int
1177 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1178 {
1179 struct proc *p = l->l_proc;
1180 struct cwdinfo *cwdi;
1181 struct vnode *vp;
1182 file_t *fp;
1183 int error, fd = SCARG(uap, fd);
1184
1185 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1186 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1187 return error;
1188 /* fd_getvnode() will use the descriptor for us */
1189 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
1190 return error;
1191 vp = fp->f_data;
1192 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1193 if (vp->v_type != VDIR)
1194 error = ENOTDIR;
1195 else
1196 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1197 VOP_UNLOCK(vp, 0);
1198 if (error)
1199 goto out;
1200 VREF(vp);
1201
1202 /*
1203 * Prevent escaping from chroot by putting the root under
1204 * the working directory. Silently chdir to / if we aren't
1205 * already there.
1206 */
1207 cwdi = p->p_cwdi;
1208 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1209 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1210 /*
1211 * XXX would be more failsafe to change directory to a
1212 * deadfs node here instead
1213 */
1214 vrele(cwdi->cwdi_cdir);
1215 VREF(vp);
1216 cwdi->cwdi_cdir = vp;
1217 }
1218
1219 if (cwdi->cwdi_rdir != NULL)
1220 vrele(cwdi->cwdi_rdir);
1221 cwdi->cwdi_rdir = vp;
1222 rw_exit(&cwdi->cwdi_lock);
1223
1224 out:
1225 fd_putfile(fd);
1226 return (error);
1227 }
1228
1229 /*
1230 * Change current working directory (``.'').
1231 */
1232 /* ARGSUSED */
1233 int
1234 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1235 {
1236 /* {
1237 syscallarg(const char *) path;
1238 } */
1239 struct proc *p = l->l_proc;
1240 struct cwdinfo *cwdi;
1241 int error;
1242 struct nameidata nd;
1243
1244 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1245 SCARG(uap, path));
1246 if ((error = change_dir(&nd, l)) != 0)
1247 return (error);
1248 cwdi = p->p_cwdi;
1249 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1250 vrele(cwdi->cwdi_cdir);
1251 cwdi->cwdi_cdir = nd.ni_vp;
1252 rw_exit(&cwdi->cwdi_lock);
1253 return (0);
1254 }
1255
1256 /*
1257 * Change notion of root (``/'') directory.
1258 */
1259 /* ARGSUSED */
1260 int
1261 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1262 {
1263 /* {
1264 syscallarg(const char *) path;
1265 } */
1266 struct proc *p = l->l_proc;
1267 struct cwdinfo *cwdi;
1268 struct vnode *vp;
1269 int error;
1270 struct nameidata nd;
1271
1272 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1273 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1274 return (error);
1275 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1276 SCARG(uap, path));
1277 if ((error = change_dir(&nd, l)) != 0)
1278 return (error);
1279
1280 cwdi = p->p_cwdi;
1281 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1282 if (cwdi->cwdi_rdir != NULL)
1283 vrele(cwdi->cwdi_rdir);
1284 vp = nd.ni_vp;
1285 cwdi->cwdi_rdir = vp;
1286
1287 /*
1288 * Prevent escaping from chroot by putting the root under
1289 * the working directory. Silently chdir to / if we aren't
1290 * already there.
1291 */
1292 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1293 /*
1294 * XXX would be more failsafe to change directory to a
1295 * deadfs node here instead
1296 */
1297 vrele(cwdi->cwdi_cdir);
1298 VREF(vp);
1299 cwdi->cwdi_cdir = vp;
1300 }
1301 rw_exit(&cwdi->cwdi_lock);
1302
1303 return (0);
1304 }
1305
1306 /*
1307 * Common routine for chroot and chdir.
1308 */
1309 static int
1310 change_dir(struct nameidata *ndp, struct lwp *l)
1311 {
1312 struct vnode *vp;
1313 int error;
1314
1315 if ((error = namei(ndp)) != 0)
1316 return (error);
1317 vp = ndp->ni_vp;
1318 if (vp->v_type != VDIR)
1319 error = ENOTDIR;
1320 else
1321 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1322
1323 if (error)
1324 vput(vp);
1325 else
1326 VOP_UNLOCK(vp, 0);
1327 return (error);
1328 }
1329
1330 /*
1331 * Check permissions, allocate an open file structure,
1332 * and call the device open routine if any.
1333 */
1334 int
1335 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1336 {
1337 /* {
1338 syscallarg(const char *) path;
1339 syscallarg(int) flags;
1340 syscallarg(int) mode;
1341 } */
1342 struct proc *p = l->l_proc;
1343 struct cwdinfo *cwdi = p->p_cwdi;
1344 file_t *fp;
1345 struct vnode *vp;
1346 int flags, cmode;
1347 int type, indx, error;
1348 struct flock lf;
1349 struct nameidata nd;
1350
1351 flags = FFLAGS(SCARG(uap, flags));
1352 if ((flags & (FREAD | FWRITE)) == 0)
1353 return (EINVAL);
1354 if ((error = fd_allocfile(&fp, &indx)) != 0)
1355 return (error);
1356 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1357 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1358 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
1359 SCARG(uap, path));
1360 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1361 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1362 fd_abort(p, fp, indx);
1363 if ((error == EDUPFD || error == EMOVEFD) &&
1364 l->l_dupfd >= 0 && /* XXX from fdopen */
1365 (error =
1366 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1367 *retval = indx;
1368 return (0);
1369 }
1370 if (error == ERESTART)
1371 error = EINTR;
1372 return (error);
1373 }
1374
1375 l->l_dupfd = 0;
1376 vp = nd.ni_vp;
1377 fp->f_flag = flags & FMASK;
1378 fp->f_type = DTYPE_VNODE;
1379 fp->f_ops = &vnops;
1380 fp->f_data = vp;
1381 if (flags & (O_EXLOCK | O_SHLOCK)) {
1382 lf.l_whence = SEEK_SET;
1383 lf.l_start = 0;
1384 lf.l_len = 0;
1385 if (flags & O_EXLOCK)
1386 lf.l_type = F_WRLCK;
1387 else
1388 lf.l_type = F_RDLCK;
1389 type = F_FLOCK;
1390 if ((flags & FNONBLOCK) == 0)
1391 type |= F_WAIT;
1392 VOP_UNLOCK(vp, 0);
1393 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1394 if (error) {
1395 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1396 fd_abort(p, fp, indx);
1397 return (error);
1398 }
1399 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1400 atomic_or_uint(&fp->f_flag, FHASLOCK);
1401 }
1402 VOP_UNLOCK(vp, 0);
1403 *retval = indx;
1404 fd_affix(p, fp, indx);
1405 return (0);
1406 }
1407
1408 static void
1409 vfs__fhfree(fhandle_t *fhp)
1410 {
1411 size_t fhsize;
1412
1413 if (fhp == NULL) {
1414 return;
1415 }
1416 fhsize = FHANDLE_SIZE(fhp);
1417 kmem_free(fhp, fhsize);
1418 }
1419
1420 /*
1421 * vfs_composefh: compose a filehandle.
1422 */
1423
1424 int
1425 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1426 {
1427 struct mount *mp;
1428 struct fid *fidp;
1429 int error;
1430 size_t needfhsize;
1431 size_t fidsize;
1432
1433 mp = vp->v_mount;
1434 fidp = NULL;
1435 if (*fh_size < FHANDLE_SIZE_MIN) {
1436 fidsize = 0;
1437 } else {
1438 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1439 if (fhp != NULL) {
1440 memset(fhp, 0, *fh_size);
1441 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1442 fidp = &fhp->fh_fid;
1443 }
1444 }
1445 error = VFS_VPTOFH(vp, fidp, &fidsize);
1446 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1447 if (error == 0 && *fh_size < needfhsize) {
1448 error = E2BIG;
1449 }
1450 *fh_size = needfhsize;
1451 return error;
1452 }
1453
1454 int
1455 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1456 {
1457 struct mount *mp;
1458 fhandle_t *fhp;
1459 size_t fhsize;
1460 size_t fidsize;
1461 int error;
1462
1463 *fhpp = NULL;
1464 mp = vp->v_mount;
1465 fidsize = 0;
1466 error = VFS_VPTOFH(vp, NULL, &fidsize);
1467 KASSERT(error != 0);
1468 if (error != E2BIG) {
1469 goto out;
1470 }
1471 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1472 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1473 if (fhp == NULL) {
1474 error = ENOMEM;
1475 goto out;
1476 }
1477 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1478 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1479 if (error == 0) {
1480 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1481 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1482 *fhpp = fhp;
1483 } else {
1484 kmem_free(fhp, fhsize);
1485 }
1486 out:
1487 return error;
1488 }
1489
1490 void
1491 vfs_composefh_free(fhandle_t *fhp)
1492 {
1493
1494 vfs__fhfree(fhp);
1495 }
1496
1497 /*
1498 * vfs_fhtovp: lookup a vnode by a filehandle.
1499 */
1500
1501 int
1502 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1503 {
1504 struct mount *mp;
1505 int error;
1506
1507 *vpp = NULL;
1508 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1509 if (mp == NULL) {
1510 error = ESTALE;
1511 goto out;
1512 }
1513 if (mp->mnt_op->vfs_fhtovp == NULL) {
1514 error = EOPNOTSUPP;
1515 goto out;
1516 }
1517 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1518 out:
1519 return error;
1520 }
1521
1522 /*
1523 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1524 * the needed size.
1525 */
1526
1527 int
1528 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1529 {
1530 fhandle_t *fhp;
1531 int error;
1532
1533 *fhpp = NULL;
1534 if (fhsize > FHANDLE_SIZE_MAX) {
1535 return EINVAL;
1536 }
1537 if (fhsize < FHANDLE_SIZE_MIN) {
1538 return EINVAL;
1539 }
1540 again:
1541 fhp = kmem_alloc(fhsize, KM_SLEEP);
1542 if (fhp == NULL) {
1543 return ENOMEM;
1544 }
1545 error = copyin(ufhp, fhp, fhsize);
1546 if (error == 0) {
1547 /* XXX this check shouldn't be here */
1548 if (FHANDLE_SIZE(fhp) == fhsize) {
1549 *fhpp = fhp;
1550 return 0;
1551 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1552 /*
1553 * a kludge for nfsv2 padded handles.
1554 */
1555 size_t sz;
1556
1557 sz = FHANDLE_SIZE(fhp);
1558 kmem_free(fhp, fhsize);
1559 fhsize = sz;
1560 goto again;
1561 } else {
1562 /*
1563 * userland told us wrong size.
1564 */
1565 error = EINVAL;
1566 }
1567 }
1568 kmem_free(fhp, fhsize);
1569 return error;
1570 }
1571
1572 void
1573 vfs_copyinfh_free(fhandle_t *fhp)
1574 {
1575
1576 vfs__fhfree(fhp);
1577 }
1578
1579 /*
1580 * Get file handle system call
1581 */
1582 int
1583 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1584 {
1585 /* {
1586 syscallarg(char *) fname;
1587 syscallarg(fhandle_t *) fhp;
1588 syscallarg(size_t *) fh_size;
1589 } */
1590 struct vnode *vp;
1591 fhandle_t *fh;
1592 int error;
1593 struct nameidata nd;
1594 size_t sz;
1595 size_t usz;
1596
1597 /*
1598 * Must be super user
1599 */
1600 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1601 0, NULL, NULL, NULL);
1602 if (error)
1603 return (error);
1604 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1605 SCARG(uap, fname));
1606 error = namei(&nd);
1607 if (error)
1608 return (error);
1609 vp = nd.ni_vp;
1610 error = vfs_composefh_alloc(vp, &fh);
1611 vput(vp);
1612 if (error != 0) {
1613 goto out;
1614 }
1615 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1616 if (error != 0) {
1617 goto out;
1618 }
1619 sz = FHANDLE_SIZE(fh);
1620 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1621 if (error != 0) {
1622 goto out;
1623 }
1624 if (usz >= sz) {
1625 error = copyout(fh, SCARG(uap, fhp), sz);
1626 } else {
1627 error = E2BIG;
1628 }
1629 out:
1630 vfs_composefh_free(fh);
1631 return (error);
1632 }
1633
1634 /*
1635 * Open a file given a file handle.
1636 *
1637 * Check permissions, allocate an open file structure,
1638 * and call the device open routine if any.
1639 */
1640
1641 int
1642 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1643 register_t *retval)
1644 {
1645 file_t *fp;
1646 struct vnode *vp = NULL;
1647 kauth_cred_t cred = l->l_cred;
1648 file_t *nfp;
1649 int type, indx, error=0;
1650 struct flock lf;
1651 struct vattr va;
1652 fhandle_t *fh;
1653 int flags;
1654 proc_t *p;
1655
1656 p = curproc;
1657
1658 /*
1659 * Must be super user
1660 */
1661 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1662 0, NULL, NULL, NULL)))
1663 return (error);
1664
1665 flags = FFLAGS(oflags);
1666 if ((flags & (FREAD | FWRITE)) == 0)
1667 return (EINVAL);
1668 if ((flags & O_CREAT))
1669 return (EINVAL);
1670 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1671 return (error);
1672 fp = nfp;
1673 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1674 if (error != 0) {
1675 goto bad;
1676 }
1677 error = vfs_fhtovp(fh, &vp);
1678 if (error != 0) {
1679 goto bad;
1680 }
1681
1682 /* Now do an effective vn_open */
1683
1684 if (vp->v_type == VSOCK) {
1685 error = EOPNOTSUPP;
1686 goto bad;
1687 }
1688 error = vn_openchk(vp, cred, flags);
1689 if (error != 0)
1690 goto bad;
1691 if (flags & O_TRUNC) {
1692 VOP_UNLOCK(vp, 0); /* XXX */
1693 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1694 VATTR_NULL(&va);
1695 va.va_size = 0;
1696 error = VOP_SETATTR(vp, &va, cred);
1697 if (error)
1698 goto bad;
1699 }
1700 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
1701 goto bad;
1702 if (flags & FWRITE) {
1703 mutex_enter(&vp->v_interlock);
1704 vp->v_writecount++;
1705 mutex_exit(&vp->v_interlock);
1706 }
1707
1708 /* done with modified vn_open, now finish what sys_open does. */
1709
1710 fp->f_flag = flags & FMASK;
1711 fp->f_type = DTYPE_VNODE;
1712 fp->f_ops = &vnops;
1713 fp->f_data = vp;
1714 if (flags & (O_EXLOCK | O_SHLOCK)) {
1715 lf.l_whence = SEEK_SET;
1716 lf.l_start = 0;
1717 lf.l_len = 0;
1718 if (flags & O_EXLOCK)
1719 lf.l_type = F_WRLCK;
1720 else
1721 lf.l_type = F_RDLCK;
1722 type = F_FLOCK;
1723 if ((flags & FNONBLOCK) == 0)
1724 type |= F_WAIT;
1725 VOP_UNLOCK(vp, 0);
1726 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1727 if (error) {
1728 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1729 fd_abort(p, fp, indx);
1730 return (error);
1731 }
1732 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1733 atomic_or_uint(&fp->f_flag, FHASLOCK);
1734 }
1735 VOP_UNLOCK(vp, 0);
1736 *retval = indx;
1737 fd_affix(p, fp, indx);
1738 vfs_copyinfh_free(fh);
1739 return (0);
1740
1741 bad:
1742 fd_abort(p, fp, indx);
1743 if (vp != NULL)
1744 vput(vp);
1745 vfs_copyinfh_free(fh);
1746 return (error);
1747 }
1748
1749 int
1750 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
1751 {
1752 /* {
1753 syscallarg(const void *) fhp;
1754 syscallarg(size_t) fh_size;
1755 syscallarg(int) flags;
1756 } */
1757
1758 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1759 SCARG(uap, flags), retval);
1760 }
1761
1762 int
1763 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
1764 {
1765 int error;
1766 fhandle_t *fh;
1767 struct vnode *vp;
1768
1769 /*
1770 * Must be super user
1771 */
1772 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1773 0, NULL, NULL, NULL)))
1774 return (error);
1775
1776 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1777 if (error != 0)
1778 return error;
1779
1780 error = vfs_fhtovp(fh, &vp);
1781 vfs_copyinfh_free(fh);
1782 if (error != 0)
1783 return error;
1784
1785 error = vn_stat(vp, sb);
1786 vput(vp);
1787 return error;
1788 }
1789
1790
1791 /* ARGSUSED */
1792 int
1793 sys___fhstat40(struct lwp *l, const struct sys___fhstat40_args *uap, register_t *retval)
1794 {
1795 /* {
1796 syscallarg(const void *) fhp;
1797 syscallarg(size_t) fh_size;
1798 syscallarg(struct stat *) sb;
1799 } */
1800 struct stat sb;
1801 int error;
1802
1803 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
1804 if (error)
1805 return error;
1806 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
1807 }
1808
1809 int
1810 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
1811 int flags)
1812 {
1813 fhandle_t *fh;
1814 struct mount *mp;
1815 struct vnode *vp;
1816 int error;
1817
1818 /*
1819 * Must be super user
1820 */
1821 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1822 0, NULL, NULL, NULL)))
1823 return error;
1824
1825 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1826 if (error != 0)
1827 return error;
1828
1829 error = vfs_fhtovp(fh, &vp);
1830 vfs_copyinfh_free(fh);
1831 if (error != 0)
1832 return error;
1833
1834 mp = vp->v_mount;
1835 error = dostatvfs(mp, sb, l, flags, 1);
1836 vput(vp);
1837 return error;
1838 }
1839
1840 /* ARGSUSED */
1841 int
1842 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
1843 {
1844 /* {
1845 syscallarg(const void *) fhp;
1846 syscallarg(size_t) fh_size;
1847 syscallarg(struct statvfs *) buf;
1848 syscallarg(int) flags;
1849 } */
1850 struct statvfs *sb = STATVFSBUF_GET();
1851 int error;
1852
1853 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
1854 SCARG(uap, flags));
1855 if (error == 0)
1856 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1857 STATVFSBUF_PUT(sb);
1858 return error;
1859 }
1860
1861 /*
1862 * Create a special file.
1863 */
1864 /* ARGSUSED */
1865 int
1866 sys_mknod(struct lwp *l, const struct sys_mknod_args *uap, register_t *retval)
1867 {
1868 /* {
1869 syscallarg(const char *) path;
1870 syscallarg(int) mode;
1871 syscallarg(int) dev;
1872 } */
1873 struct proc *p = l->l_proc;
1874 struct vnode *vp;
1875 struct vattr vattr;
1876 int error, optype;
1877 struct nameidata nd;
1878 char *path;
1879 const char *cpath;
1880 enum uio_seg seg = UIO_USERSPACE;
1881
1882 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1883 0, NULL, NULL, NULL)) != 0)
1884 return (error);
1885
1886 optype = VOP_MKNOD_DESCOFFSET;
1887
1888 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path);
1889 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath);
1890
1891 if ((error = namei(&nd)) != 0)
1892 goto out;
1893 vp = nd.ni_vp;
1894 if (vp != NULL)
1895 error = EEXIST;
1896 else {
1897 VATTR_NULL(&vattr);
1898 /* We will read cwdi->cwdi_cmask unlocked. */
1899 vattr.va_mode =
1900 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1901 vattr.va_rdev = SCARG(uap, dev);
1902
1903 switch (SCARG(uap, mode) & S_IFMT) {
1904 case S_IFMT: /* used by badsect to flag bad sectors */
1905 vattr.va_type = VBAD;
1906 break;
1907 case S_IFCHR:
1908 vattr.va_type = VCHR;
1909 break;
1910 case S_IFBLK:
1911 vattr.va_type = VBLK;
1912 break;
1913 case S_IFWHT:
1914 optype = VOP_WHITEOUT_DESCOFFSET;
1915 break;
1916 case S_IFREG:
1917 #if NVERIEXEC > 0
1918 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp,
1919 O_CREAT);
1920 #endif /* NVERIEXEC > 0 */
1921 vattr.va_type = VREG;
1922 vattr.va_rdev = VNOVAL;
1923 optype = VOP_CREATE_DESCOFFSET;
1924 break;
1925 default:
1926 error = EINVAL;
1927 break;
1928 }
1929 }
1930 if (!error) {
1931 switch (optype) {
1932 case VOP_WHITEOUT_DESCOFFSET:
1933 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1934 if (error)
1935 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1936 vput(nd.ni_dvp);
1937 break;
1938
1939 case VOP_MKNOD_DESCOFFSET:
1940 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1941 &nd.ni_cnd, &vattr);
1942 if (error == 0)
1943 vput(nd.ni_vp);
1944 break;
1945
1946 case VOP_CREATE_DESCOFFSET:
1947 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
1948 &nd.ni_cnd, &vattr);
1949 if (error == 0)
1950 vput(nd.ni_vp);
1951 break;
1952 }
1953 } else {
1954 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1955 if (nd.ni_dvp == vp)
1956 vrele(nd.ni_dvp);
1957 else
1958 vput(nd.ni_dvp);
1959 if (vp)
1960 vrele(vp);
1961 }
1962 out:
1963 VERIEXEC_PATH_PUT(path);
1964 return (error);
1965 }
1966
1967 /*
1968 * Create a named pipe.
1969 */
1970 /* ARGSUSED */
1971 int
1972 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
1973 {
1974 /* {
1975 syscallarg(const char *) path;
1976 syscallarg(int) mode;
1977 } */
1978 struct proc *p = l->l_proc;
1979 struct vattr vattr;
1980 int error;
1981 struct nameidata nd;
1982
1983 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
1984 SCARG(uap, path));
1985 if ((error = namei(&nd)) != 0)
1986 return (error);
1987 if (nd.ni_vp != NULL) {
1988 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1989 if (nd.ni_dvp == nd.ni_vp)
1990 vrele(nd.ni_dvp);
1991 else
1992 vput(nd.ni_dvp);
1993 vrele(nd.ni_vp);
1994 return (EEXIST);
1995 }
1996 VATTR_NULL(&vattr);
1997 vattr.va_type = VFIFO;
1998 /* We will read cwdi->cwdi_cmask unlocked. */
1999 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2000 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2001 if (error == 0)
2002 vput(nd.ni_vp);
2003 return (error);
2004 }
2005
2006 /*
2007 * Make a hard file link.
2008 */
2009 /* ARGSUSED */
2010 int
2011 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
2012 {
2013 /* {
2014 syscallarg(const char *) path;
2015 syscallarg(const char *) link;
2016 } */
2017 struct vnode *vp;
2018 struct nameidata nd;
2019 int error;
2020
2021 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2022 SCARG(uap, path));
2023 if ((error = namei(&nd)) != 0)
2024 return (error);
2025 vp = nd.ni_vp;
2026 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
2027 SCARG(uap, link));
2028 if ((error = namei(&nd)) != 0)
2029 goto out;
2030 if (nd.ni_vp) {
2031 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2032 if (nd.ni_dvp == nd.ni_vp)
2033 vrele(nd.ni_dvp);
2034 else
2035 vput(nd.ni_dvp);
2036 vrele(nd.ni_vp);
2037 error = EEXIST;
2038 goto out;
2039 }
2040 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2041 out:
2042 vrele(vp);
2043 return (error);
2044 }
2045
2046 /*
2047 * Make a symbolic link.
2048 */
2049 /* ARGSUSED */
2050 int
2051 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2052 {
2053 /* {
2054 syscallarg(const char *) path;
2055 syscallarg(const char *) link;
2056 } */
2057 struct proc *p = l->l_proc;
2058 struct vattr vattr;
2059 char *path;
2060 int error;
2061 struct nameidata nd;
2062
2063 path = PNBUF_GET();
2064 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
2065 if (error)
2066 goto out;
2067 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
2068 SCARG(uap, link));
2069 if ((error = namei(&nd)) != 0)
2070 goto out;
2071 if (nd.ni_vp) {
2072 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2073 if (nd.ni_dvp == nd.ni_vp)
2074 vrele(nd.ni_dvp);
2075 else
2076 vput(nd.ni_dvp);
2077 vrele(nd.ni_vp);
2078 error = EEXIST;
2079 goto out;
2080 }
2081 VATTR_NULL(&vattr);
2082 vattr.va_type = VLNK;
2083 /* We will read cwdi->cwdi_cmask unlocked. */
2084 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2085 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2086 if (error == 0)
2087 vput(nd.ni_vp);
2088 out:
2089 PNBUF_PUT(path);
2090 return (error);
2091 }
2092
2093 /*
2094 * Delete a whiteout from the filesystem.
2095 */
2096 /* ARGSUSED */
2097 int
2098 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2099 {
2100 /* {
2101 syscallarg(const char *) path;
2102 } */
2103 int error;
2104 struct nameidata nd;
2105
2106 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT,
2107 UIO_USERSPACE, SCARG(uap, path));
2108 error = namei(&nd);
2109 if (error)
2110 return (error);
2111
2112 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2113 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2114 if (nd.ni_dvp == nd.ni_vp)
2115 vrele(nd.ni_dvp);
2116 else
2117 vput(nd.ni_dvp);
2118 if (nd.ni_vp)
2119 vrele(nd.ni_vp);
2120 return (EEXIST);
2121 }
2122 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2123 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2124 vput(nd.ni_dvp);
2125 return (error);
2126 }
2127
2128 /*
2129 * Delete a name from the filesystem.
2130 */
2131 /* ARGSUSED */
2132 int
2133 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2134 {
2135 /* {
2136 syscallarg(const char *) path;
2137 } */
2138
2139 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE);
2140 }
2141
2142 int
2143 do_sys_unlink(const char *arg, enum uio_seg seg)
2144 {
2145 struct vnode *vp;
2146 int error;
2147 struct nameidata nd;
2148 kauth_cred_t cred;
2149 char *path;
2150 const char *cpath;
2151
2152 VERIEXEC_PATH_GET(arg, seg, cpath, path);
2153 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath);
2154
2155 if ((error = namei(&nd)) != 0)
2156 goto out;
2157 vp = nd.ni_vp;
2158
2159 /*
2160 * The root of a mounted filesystem cannot be deleted.
2161 */
2162 if (vp->v_vflag & VV_ROOT) {
2163 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2164 if (nd.ni_dvp == vp)
2165 vrele(nd.ni_dvp);
2166 else
2167 vput(nd.ni_dvp);
2168 vput(vp);
2169 error = EBUSY;
2170 goto out;
2171 }
2172
2173 #if NVERIEXEC > 0
2174 /* Handle remove requests for veriexec entries. */
2175 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) {
2176 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2177 if (nd.ni_dvp == vp)
2178 vrele(nd.ni_dvp);
2179 else
2180 vput(nd.ni_dvp);
2181 vput(vp);
2182 goto out;
2183 }
2184 #endif /* NVERIEXEC > 0 */
2185
2186 cred = kauth_cred_get();
2187 #ifdef FILEASSOC
2188 (void)fileassoc_file_delete(vp);
2189 #endif /* FILEASSOC */
2190 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2191 out:
2192 VERIEXEC_PATH_PUT(path);
2193 return (error);
2194 }
2195
2196 /*
2197 * Reposition read/write file offset.
2198 */
2199 int
2200 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2201 {
2202 /* {
2203 syscallarg(int) fd;
2204 syscallarg(int) pad;
2205 syscallarg(off_t) offset;
2206 syscallarg(int) whence;
2207 } */
2208 kauth_cred_t cred = l->l_cred;
2209 file_t *fp;
2210 struct vnode *vp;
2211 struct vattr vattr;
2212 off_t newoff;
2213 int error, fd;
2214
2215 fd = SCARG(uap, fd);
2216
2217 if ((fp = fd_getfile(fd)) == NULL)
2218 return (EBADF);
2219
2220 vp = fp->f_data;
2221 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2222 error = ESPIPE;
2223 goto out;
2224 }
2225
2226 switch (SCARG(uap, whence)) {
2227 case SEEK_CUR:
2228 newoff = fp->f_offset + SCARG(uap, offset);
2229 break;
2230 case SEEK_END:
2231 error = VOP_GETATTR(vp, &vattr, cred);
2232 if (error) {
2233 goto out;
2234 }
2235 newoff = SCARG(uap, offset) + vattr.va_size;
2236 break;
2237 case SEEK_SET:
2238 newoff = SCARG(uap, offset);
2239 break;
2240 default:
2241 error = EINVAL;
2242 goto out;
2243 }
2244 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2245 *(off_t *)retval = fp->f_offset = newoff;
2246 }
2247 out:
2248 fd_putfile(fd);
2249 return (error);
2250 }
2251
2252 /*
2253 * Positional read system call.
2254 */
2255 int
2256 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2257 {
2258 /* {
2259 syscallarg(int) fd;
2260 syscallarg(void *) buf;
2261 syscallarg(size_t) nbyte;
2262 syscallarg(off_t) offset;
2263 } */
2264 file_t *fp;
2265 struct vnode *vp;
2266 off_t offset;
2267 int error, fd = SCARG(uap, fd);
2268
2269 if ((fp = fd_getfile(fd)) == NULL)
2270 return (EBADF);
2271
2272 if ((fp->f_flag & FREAD) == 0) {
2273 fd_putfile(fd);
2274 return (EBADF);
2275 }
2276
2277 vp = fp->f_data;
2278 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2279 error = ESPIPE;
2280 goto out;
2281 }
2282
2283 offset = SCARG(uap, offset);
2284
2285 /*
2286 * XXX This works because no file systems actually
2287 * XXX take any action on the seek operation.
2288 */
2289 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2290 goto out;
2291
2292 /* dofileread() will unuse the descriptor for us */
2293 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2294 &offset, 0, retval));
2295
2296 out:
2297 fd_putfile(fd);
2298 return (error);
2299 }
2300
2301 /*
2302 * Positional scatter read system call.
2303 */
2304 int
2305 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2306 {
2307 /* {
2308 syscallarg(int) fd;
2309 syscallarg(const struct iovec *) iovp;
2310 syscallarg(int) iovcnt;
2311 syscallarg(off_t) offset;
2312 } */
2313 off_t offset = SCARG(uap, offset);
2314
2315 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2316 SCARG(uap, iovcnt), &offset, 0, retval);
2317 }
2318
2319 /*
2320 * Positional write system call.
2321 */
2322 int
2323 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2324 {
2325 /* {
2326 syscallarg(int) fd;
2327 syscallarg(const void *) buf;
2328 syscallarg(size_t) nbyte;
2329 syscallarg(off_t) offset;
2330 } */
2331 file_t *fp;
2332 struct vnode *vp;
2333 off_t offset;
2334 int error, fd = SCARG(uap, fd);
2335
2336 if ((fp = fd_getfile(fd)) == NULL)
2337 return (EBADF);
2338
2339 if ((fp->f_flag & FWRITE) == 0) {
2340 fd_putfile(fd);
2341 return (EBADF);
2342 }
2343
2344 vp = fp->f_data;
2345 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2346 error = ESPIPE;
2347 goto out;
2348 }
2349
2350 offset = SCARG(uap, offset);
2351
2352 /*
2353 * XXX This works because no file systems actually
2354 * XXX take any action on the seek operation.
2355 */
2356 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2357 goto out;
2358
2359 /* dofilewrite() will unuse the descriptor for us */
2360 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2361 &offset, 0, retval));
2362
2363 out:
2364 fd_putfile(fd);
2365 return (error);
2366 }
2367
2368 /*
2369 * Positional gather write system call.
2370 */
2371 int
2372 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2373 {
2374 /* {
2375 syscallarg(int) fd;
2376 syscallarg(const struct iovec *) iovp;
2377 syscallarg(int) iovcnt;
2378 syscallarg(off_t) offset;
2379 } */
2380 off_t offset = SCARG(uap, offset);
2381
2382 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2383 SCARG(uap, iovcnt), &offset, 0, retval);
2384 }
2385
2386 /*
2387 * Check access permissions.
2388 */
2389 int
2390 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2391 {
2392 /* {
2393 syscallarg(const char *) path;
2394 syscallarg(int) flags;
2395 } */
2396 kauth_cred_t cred;
2397 struct vnode *vp;
2398 int error, flags;
2399 struct nameidata nd;
2400
2401 cred = kauth_cred_dup(l->l_cred);
2402 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2403 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2404 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2405 SCARG(uap, path));
2406 /* Override default credentials */
2407 nd.ni_cnd.cn_cred = cred;
2408 if ((error = namei(&nd)) != 0)
2409 goto out;
2410 vp = nd.ni_vp;
2411
2412 /* Flags == 0 means only check for existence. */
2413 if (SCARG(uap, flags)) {
2414 flags = 0;
2415 if (SCARG(uap, flags) & R_OK)
2416 flags |= VREAD;
2417 if (SCARG(uap, flags) & W_OK)
2418 flags |= VWRITE;
2419 if (SCARG(uap, flags) & X_OK)
2420 flags |= VEXEC;
2421
2422 error = VOP_ACCESS(vp, flags, cred);
2423 if (!error && (flags & VWRITE))
2424 error = vn_writechk(vp);
2425 }
2426 vput(vp);
2427 out:
2428 kauth_cred_free(cred);
2429 return (error);
2430 }
2431
2432 /*
2433 * Common code for all sys_stat functions, including compat versions.
2434 */
2435 int
2436 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb)
2437 {
2438 int error;
2439 struct nameidata nd;
2440
2441 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT,
2442 UIO_USERSPACE, path);
2443 error = namei(&nd);
2444 if (error != 0)
2445 return error;
2446 error = vn_stat(nd.ni_vp, sb);
2447 vput(nd.ni_vp);
2448 return error;
2449 }
2450
2451 /*
2452 * Get file status; this version follows links.
2453 */
2454 /* ARGSUSED */
2455 int
2456 sys___stat30(struct lwp *l, const struct sys___stat30_args *uap, register_t *retval)
2457 {
2458 /* {
2459 syscallarg(const char *) path;
2460 syscallarg(struct stat *) ub;
2461 } */
2462 struct stat sb;
2463 int error;
2464
2465 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb);
2466 if (error)
2467 return error;
2468 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2469 }
2470
2471 /*
2472 * Get file status; this version does not follow links.
2473 */
2474 /* ARGSUSED */
2475 int
2476 sys___lstat30(struct lwp *l, const struct sys___lstat30_args *uap, register_t *retval)
2477 {
2478 /* {
2479 syscallarg(const char *) path;
2480 syscallarg(struct stat *) ub;
2481 } */
2482 struct stat sb;
2483 int error;
2484
2485 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb);
2486 if (error)
2487 return error;
2488 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2489 }
2490
2491 /*
2492 * Get configurable pathname variables.
2493 */
2494 /* ARGSUSED */
2495 int
2496 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
2497 {
2498 /* {
2499 syscallarg(const char *) path;
2500 syscallarg(int) name;
2501 } */
2502 int error;
2503 struct nameidata nd;
2504
2505 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2506 SCARG(uap, path));
2507 if ((error = namei(&nd)) != 0)
2508 return (error);
2509 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2510 vput(nd.ni_vp);
2511 return (error);
2512 }
2513
2514 /*
2515 * Return target name of a symbolic link.
2516 */
2517 /* ARGSUSED */
2518 int
2519 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval)
2520 {
2521 /* {
2522 syscallarg(const char *) path;
2523 syscallarg(char *) buf;
2524 syscallarg(size_t) count;
2525 } */
2526 struct vnode *vp;
2527 struct iovec aiov;
2528 struct uio auio;
2529 int error;
2530 struct nameidata nd;
2531
2532 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2533 SCARG(uap, path));
2534 if ((error = namei(&nd)) != 0)
2535 return (error);
2536 vp = nd.ni_vp;
2537 if (vp->v_type != VLNK)
2538 error = EINVAL;
2539 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2540 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
2541 aiov.iov_base = SCARG(uap, buf);
2542 aiov.iov_len = SCARG(uap, count);
2543 auio.uio_iov = &aiov;
2544 auio.uio_iovcnt = 1;
2545 auio.uio_offset = 0;
2546 auio.uio_rw = UIO_READ;
2547 KASSERT(l == curlwp);
2548 auio.uio_vmspace = l->l_proc->p_vmspace;
2549 auio.uio_resid = SCARG(uap, count);
2550 error = VOP_READLINK(vp, &auio, l->l_cred);
2551 }
2552 vput(vp);
2553 *retval = SCARG(uap, count) - auio.uio_resid;
2554 return (error);
2555 }
2556
2557 /*
2558 * Change flags of a file given a path name.
2559 */
2560 /* ARGSUSED */
2561 int
2562 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
2563 {
2564 /* {
2565 syscallarg(const char *) path;
2566 syscallarg(u_long) flags;
2567 } */
2568 struct vnode *vp;
2569 int error;
2570 struct nameidata nd;
2571
2572 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2573 SCARG(uap, path));
2574 if ((error = namei(&nd)) != 0)
2575 return (error);
2576 vp = nd.ni_vp;
2577 error = change_flags(vp, SCARG(uap, flags), l);
2578 vput(vp);
2579 return (error);
2580 }
2581
2582 /*
2583 * Change flags of a file given a file descriptor.
2584 */
2585 /* ARGSUSED */
2586 int
2587 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
2588 {
2589 /* {
2590 syscallarg(int) fd;
2591 syscallarg(u_long) flags;
2592 } */
2593 struct vnode *vp;
2594 file_t *fp;
2595 int error;
2596
2597 /* fd_getvnode() will use the descriptor for us */
2598 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2599 return (error);
2600 vp = fp->f_data;
2601 error = change_flags(vp, SCARG(uap, flags), l);
2602 VOP_UNLOCK(vp, 0);
2603 fd_putfile(SCARG(uap, fd));
2604 return (error);
2605 }
2606
2607 /*
2608 * Change flags of a file given a path name; this version does
2609 * not follow links.
2610 */
2611 int
2612 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
2613 {
2614 /* {
2615 syscallarg(const char *) path;
2616 syscallarg(u_long) flags;
2617 } */
2618 struct vnode *vp;
2619 int error;
2620 struct nameidata nd;
2621
2622 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2623 SCARG(uap, path));
2624 if ((error = namei(&nd)) != 0)
2625 return (error);
2626 vp = nd.ni_vp;
2627 error = change_flags(vp, SCARG(uap, flags), l);
2628 vput(vp);
2629 return (error);
2630 }
2631
2632 /*
2633 * Common routine to change flags of a file.
2634 */
2635 int
2636 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2637 {
2638 struct vattr vattr;
2639 int error;
2640
2641 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2642 /*
2643 * Non-superusers cannot change the flags on devices, even if they
2644 * own them.
2645 */
2646 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2647 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2648 goto out;
2649 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2650 error = EINVAL;
2651 goto out;
2652 }
2653 }
2654 VATTR_NULL(&vattr);
2655 vattr.va_flags = flags;
2656 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2657 out:
2658 return (error);
2659 }
2660
2661 /*
2662 * Change mode of a file given path name; this version follows links.
2663 */
2664 /* ARGSUSED */
2665 int
2666 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
2667 {
2668 /* {
2669 syscallarg(const char *) path;
2670 syscallarg(int) mode;
2671 } */
2672 int error;
2673 struct nameidata nd;
2674
2675 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2676 SCARG(uap, path));
2677 if ((error = namei(&nd)) != 0)
2678 return (error);
2679
2680 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2681
2682 vrele(nd.ni_vp);
2683 return (error);
2684 }
2685
2686 /*
2687 * Change mode of a file given a file descriptor.
2688 */
2689 /* ARGSUSED */
2690 int
2691 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
2692 {
2693 /* {
2694 syscallarg(int) fd;
2695 syscallarg(int) mode;
2696 } */
2697 file_t *fp;
2698 int error;
2699
2700 /* fd_getvnode() will use the descriptor for us */
2701 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2702 return (error);
2703 error = change_mode(fp->f_data, SCARG(uap, mode), l);
2704 fd_putfile(SCARG(uap, fd));
2705 return (error);
2706 }
2707
2708 /*
2709 * Change mode of a file given path name; this version does not follow links.
2710 */
2711 /* ARGSUSED */
2712 int
2713 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
2714 {
2715 /* {
2716 syscallarg(const char *) path;
2717 syscallarg(int) mode;
2718 } */
2719 int error;
2720 struct nameidata nd;
2721
2722 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2723 SCARG(uap, path));
2724 if ((error = namei(&nd)) != 0)
2725 return (error);
2726
2727 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2728
2729 vrele(nd.ni_vp);
2730 return (error);
2731 }
2732
2733 /*
2734 * Common routine to set mode given a vnode.
2735 */
2736 static int
2737 change_mode(struct vnode *vp, int mode, struct lwp *l)
2738 {
2739 struct vattr vattr;
2740 int error;
2741
2742 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2743 VATTR_NULL(&vattr);
2744 vattr.va_mode = mode & ALLPERMS;
2745 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2746 VOP_UNLOCK(vp, 0);
2747 return (error);
2748 }
2749
2750 /*
2751 * Set ownership given a path name; this version follows links.
2752 */
2753 /* ARGSUSED */
2754 int
2755 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
2756 {
2757 /* {
2758 syscallarg(const char *) path;
2759 syscallarg(uid_t) uid;
2760 syscallarg(gid_t) gid;
2761 } */
2762 int error;
2763 struct nameidata nd;
2764
2765 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2766 SCARG(uap, path));
2767 if ((error = namei(&nd)) != 0)
2768 return (error);
2769
2770 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2771
2772 vrele(nd.ni_vp);
2773 return (error);
2774 }
2775
2776 /*
2777 * Set ownership given a path name; this version follows links.
2778 * Provides POSIX semantics.
2779 */
2780 /* ARGSUSED */
2781 int
2782 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
2783 {
2784 /* {
2785 syscallarg(const char *) path;
2786 syscallarg(uid_t) uid;
2787 syscallarg(gid_t) gid;
2788 } */
2789 int error;
2790 struct nameidata nd;
2791
2792 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2793 SCARG(uap, path));
2794 if ((error = namei(&nd)) != 0)
2795 return (error);
2796
2797 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2798
2799 vrele(nd.ni_vp);
2800 return (error);
2801 }
2802
2803 /*
2804 * Set ownership given a file descriptor.
2805 */
2806 /* ARGSUSED */
2807 int
2808 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
2809 {
2810 /* {
2811 syscallarg(int) fd;
2812 syscallarg(uid_t) uid;
2813 syscallarg(gid_t) gid;
2814 } */
2815 int error;
2816 file_t *fp;
2817
2818 /* fd_getvnode() will use the descriptor for us */
2819 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2820 return (error);
2821 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2822 l, 0);
2823 fd_putfile(SCARG(uap, fd));
2824 return (error);
2825 }
2826
2827 /*
2828 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2829 */
2830 /* ARGSUSED */
2831 int
2832 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
2833 {
2834 /* {
2835 syscallarg(int) fd;
2836 syscallarg(uid_t) uid;
2837 syscallarg(gid_t) gid;
2838 } */
2839 int error;
2840 file_t *fp;
2841
2842 /* fd_getvnode() will use the descriptor for us */
2843 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2844 return (error);
2845 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2846 l, 1);
2847 fd_putfile(SCARG(uap, fd));
2848 return (error);
2849 }
2850
2851 /*
2852 * Set ownership given a path name; this version does not follow links.
2853 */
2854 /* ARGSUSED */
2855 int
2856 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
2857 {
2858 /* {
2859 syscallarg(const char *) path;
2860 syscallarg(uid_t) uid;
2861 syscallarg(gid_t) gid;
2862 } */
2863 int error;
2864 struct nameidata nd;
2865
2866 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2867 SCARG(uap, path));
2868 if ((error = namei(&nd)) != 0)
2869 return (error);
2870
2871 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2872
2873 vrele(nd.ni_vp);
2874 return (error);
2875 }
2876
2877 /*
2878 * Set ownership given a path name; this version does not follow links.
2879 * Provides POSIX/XPG semantics.
2880 */
2881 /* ARGSUSED */
2882 int
2883 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
2884 {
2885 /* {
2886 syscallarg(const char *) path;
2887 syscallarg(uid_t) uid;
2888 syscallarg(gid_t) gid;
2889 } */
2890 int error;
2891 struct nameidata nd;
2892
2893 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2894 SCARG(uap, path));
2895 if ((error = namei(&nd)) != 0)
2896 return (error);
2897
2898 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2899
2900 vrele(nd.ni_vp);
2901 return (error);
2902 }
2903
2904 /*
2905 * Common routine to set ownership given a vnode.
2906 */
2907 static int
2908 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2909 int posix_semantics)
2910 {
2911 struct vattr vattr;
2912 mode_t newmode;
2913 int error;
2914
2915 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2916 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2917 goto out;
2918
2919 #define CHANGED(x) ((int)(x) != -1)
2920 newmode = vattr.va_mode;
2921 if (posix_semantics) {
2922 /*
2923 * POSIX/XPG semantics: if the caller is not the super-user,
2924 * clear set-user-id and set-group-id bits. Both POSIX and
2925 * the XPG consider the behaviour for calls by the super-user
2926 * implementation-defined; we leave the set-user-id and set-
2927 * group-id settings intact in that case.
2928 */
2929 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2930 NULL) != 0)
2931 newmode &= ~(S_ISUID | S_ISGID);
2932 } else {
2933 /*
2934 * NetBSD semantics: when changing owner and/or group,
2935 * clear the respective bit(s).
2936 */
2937 if (CHANGED(uid))
2938 newmode &= ~S_ISUID;
2939 if (CHANGED(gid))
2940 newmode &= ~S_ISGID;
2941 }
2942 /* Update va_mode iff altered. */
2943 if (vattr.va_mode == newmode)
2944 newmode = VNOVAL;
2945
2946 VATTR_NULL(&vattr);
2947 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2948 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2949 vattr.va_mode = newmode;
2950 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2951 #undef CHANGED
2952
2953 out:
2954 VOP_UNLOCK(vp, 0);
2955 return (error);
2956 }
2957
2958 /*
2959 * Set the access and modification times given a path name; this
2960 * version follows links.
2961 */
2962 /* ARGSUSED */
2963 int
2964 sys_utimes(struct lwp *l, const struct sys_utimes_args *uap, register_t *retval)
2965 {
2966 /* {
2967 syscallarg(const char *) path;
2968 syscallarg(const struct timeval *) tptr;
2969 } */
2970
2971 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
2972 SCARG(uap, tptr), UIO_USERSPACE);
2973 }
2974
2975 /*
2976 * Set the access and modification times given a file descriptor.
2977 */
2978 /* ARGSUSED */
2979 int
2980 sys_futimes(struct lwp *l, const struct sys_futimes_args *uap, register_t *retval)
2981 {
2982 /* {
2983 syscallarg(int) fd;
2984 syscallarg(const struct timeval *) tptr;
2985 } */
2986 int error;
2987 file_t *fp;
2988
2989 /* fd_getvnode() will use the descriptor for us */
2990 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2991 return (error);
2992 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr),
2993 UIO_USERSPACE);
2994 fd_putfile(SCARG(uap, fd));
2995 return (error);
2996 }
2997
2998 /*
2999 * Set the access and modification times given a path name; this
3000 * version does not follow links.
3001 */
3002 int
3003 sys_lutimes(struct lwp *l, const struct sys_lutimes_args *uap, register_t *retval)
3004 {
3005 /* {
3006 syscallarg(const char *) path;
3007 syscallarg(const struct timeval *) tptr;
3008 } */
3009
3010 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
3011 SCARG(uap, tptr), UIO_USERSPACE);
3012 }
3013
3014 /*
3015 * Common routine to set access and modification times given a vnode.
3016 */
3017 int
3018 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
3019 const struct timeval *tptr, enum uio_seg seg)
3020 {
3021 struct vattr vattr;
3022 struct nameidata nd;
3023 int error;
3024 bool vanull, setbirthtime;
3025 struct timespec ts[2];
3026
3027 if (tptr == NULL) {
3028 vanull = true;
3029 nanotime(&ts[0]);
3030 ts[1] = ts[0];
3031 } else {
3032 struct timeval tv[2];
3033
3034 vanull = false;
3035 if (seg != UIO_SYSSPACE) {
3036 error = copyin(tptr, &tv, sizeof (tv));
3037 if (error != 0)
3038 return error;
3039 tptr = tv;
3040 }
3041 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3042 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3043 }
3044
3045 if (vp == NULL) {
3046 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path);
3047 if ((error = namei(&nd)) != 0)
3048 return error;
3049 vp = nd.ni_vp;
3050 } else
3051 nd.ni_vp = NULL;
3052
3053 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3054 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3055 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3056 VATTR_NULL(&vattr);
3057 vattr.va_mtime = ts[0];
3058 vattr.va_atime = ts[1];
3059 if (setbirthtime)
3060 vattr.va_birthtime = ts[1];
3061 if (vanull)
3062 vattr.va_flags |= VA_UTIMES_NULL;
3063 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3064 VOP_UNLOCK(vp, 0);
3065
3066 if (nd.ni_vp != NULL)
3067 vrele(nd.ni_vp);
3068
3069 return error;
3070 }
3071
3072 /*
3073 * Truncate a file given its path name.
3074 */
3075 /* ARGSUSED */
3076 int
3077 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3078 {
3079 /* {
3080 syscallarg(const char *) path;
3081 syscallarg(int) pad;
3082 syscallarg(off_t) length;
3083 } */
3084 struct vnode *vp;
3085 struct vattr vattr;
3086 int error;
3087 struct nameidata nd;
3088
3089 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3090 SCARG(uap, path));
3091 if ((error = namei(&nd)) != 0)
3092 return (error);
3093 vp = nd.ni_vp;
3094 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3095 if (vp->v_type == VDIR)
3096 error = EISDIR;
3097 else if ((error = vn_writechk(vp)) == 0 &&
3098 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3099 VATTR_NULL(&vattr);
3100 vattr.va_size = SCARG(uap, length);
3101 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3102 }
3103 vput(vp);
3104 return (error);
3105 }
3106
3107 /*
3108 * Truncate a file given a file descriptor.
3109 */
3110 /* ARGSUSED */
3111 int
3112 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3113 {
3114 /* {
3115 syscallarg(int) fd;
3116 syscallarg(int) pad;
3117 syscallarg(off_t) length;
3118 } */
3119 struct vattr vattr;
3120 struct vnode *vp;
3121 file_t *fp;
3122 int error;
3123
3124 /* fd_getvnode() will use the descriptor for us */
3125 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3126 return (error);
3127 if ((fp->f_flag & FWRITE) == 0) {
3128 error = EINVAL;
3129 goto out;
3130 }
3131 vp = fp->f_data;
3132 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3133 if (vp->v_type == VDIR)
3134 error = EISDIR;
3135 else if ((error = vn_writechk(vp)) == 0) {
3136 VATTR_NULL(&vattr);
3137 vattr.va_size = SCARG(uap, length);
3138 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3139 }
3140 VOP_UNLOCK(vp, 0);
3141 out:
3142 fd_putfile(SCARG(uap, fd));
3143 return (error);
3144 }
3145
3146 /*
3147 * Sync an open file.
3148 */
3149 /* ARGSUSED */
3150 int
3151 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3152 {
3153 /* {
3154 syscallarg(int) fd;
3155 } */
3156 struct vnode *vp;
3157 file_t *fp;
3158 int error;
3159
3160 /* fd_getvnode() will use the descriptor for us */
3161 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3162 return (error);
3163 vp = fp->f_data;
3164 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3165 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
3166 if (error == 0 && bioopsp != NULL &&
3167 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3168 (*bioopsp->io_fsync)(vp, 0);
3169 VOP_UNLOCK(vp, 0);
3170 fd_putfile(SCARG(uap, fd));
3171 return (error);
3172 }
3173
3174 /*
3175 * Sync a range of file data. API modeled after that found in AIX.
3176 *
3177 * FDATASYNC indicates that we need only save enough metadata to be able
3178 * to re-read the written data. Note we duplicate AIX's requirement that
3179 * the file be open for writing.
3180 */
3181 /* ARGSUSED */
3182 int
3183 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
3184 {
3185 /* {
3186 syscallarg(int) fd;
3187 syscallarg(int) flags;
3188 syscallarg(off_t) start;
3189 syscallarg(off_t) length;
3190 } */
3191 struct vnode *vp;
3192 file_t *fp;
3193 int flags, nflags;
3194 off_t s, e, len;
3195 int error;
3196
3197 /* fd_getvnode() will use the descriptor for us */
3198 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3199 return (error);
3200
3201 if ((fp->f_flag & FWRITE) == 0) {
3202 error = EBADF;
3203 goto out;
3204 }
3205
3206 flags = SCARG(uap, flags);
3207 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3208 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3209 error = EINVAL;
3210 goto out;
3211 }
3212 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3213 if (flags & FDATASYNC)
3214 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3215 else
3216 nflags = FSYNC_WAIT;
3217 if (flags & FDISKSYNC)
3218 nflags |= FSYNC_CACHE;
3219
3220 len = SCARG(uap, length);
3221 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3222 if (len) {
3223 s = SCARG(uap, start);
3224 e = s + len;
3225 if (e < s) {
3226 error = EINVAL;
3227 goto out;
3228 }
3229 } else {
3230 e = 0;
3231 s = 0;
3232 }
3233
3234 vp = fp->f_data;
3235 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3236 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
3237
3238 if (error == 0 && bioopsp != NULL &&
3239 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3240 (*bioopsp->io_fsync)(vp, nflags);
3241
3242 VOP_UNLOCK(vp, 0);
3243 out:
3244 fd_putfile(SCARG(uap, fd));
3245 return (error);
3246 }
3247
3248 /*
3249 * Sync the data of an open file.
3250 */
3251 /* ARGSUSED */
3252 int
3253 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
3254 {
3255 /* {
3256 syscallarg(int) fd;
3257 } */
3258 struct vnode *vp;
3259 file_t *fp;
3260 int error;
3261
3262 /* fd_getvnode() will use the descriptor for us */
3263 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3264 return (error);
3265 if ((fp->f_flag & FWRITE) == 0) {
3266 fd_putfile(SCARG(uap, fd));
3267 return (EBADF);
3268 }
3269 vp = fp->f_data;
3270 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3271 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
3272 VOP_UNLOCK(vp, 0);
3273 fd_putfile(SCARG(uap, fd));
3274 return (error);
3275 }
3276
3277 /*
3278 * Rename files, (standard) BSD semantics frontend.
3279 */
3280 /* ARGSUSED */
3281 int
3282 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
3283 {
3284 /* {
3285 syscallarg(const char *) from;
3286 syscallarg(const char *) to;
3287 } */
3288
3289 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0));
3290 }
3291
3292 /*
3293 * Rename files, POSIX semantics frontend.
3294 */
3295 /* ARGSUSED */
3296 int
3297 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
3298 {
3299 /* {
3300 syscallarg(const char *) from;
3301 syscallarg(const char *) to;
3302 } */
3303
3304 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1));
3305 }
3306
3307 /*
3308 * Rename files. Source and destination must either both be directories,
3309 * or both not be directories. If target is a directory, it must be empty.
3310 * If `from' and `to' refer to the same object, the value of the `retain'
3311 * argument is used to determine whether `from' will be
3312 *
3313 * (retain == 0) deleted unless `from' and `to' refer to the same
3314 * object in the file system's name space (BSD).
3315 * (retain == 1) always retained (POSIX).
3316 */
3317 int
3318 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
3319 {
3320 struct vnode *tvp, *fvp, *tdvp;
3321 struct nameidata fromnd, tond;
3322 struct mount *fs;
3323 struct lwp *l = curlwp;
3324 struct proc *p;
3325 uint32_t saveflag;
3326 int error;
3327
3328 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT,
3329 seg, from);
3330 if ((error = namei(&fromnd)) != 0)
3331 return (error);
3332 if (fromnd.ni_dvp != fromnd.ni_vp)
3333 VOP_UNLOCK(fromnd.ni_dvp, 0);
3334 fvp = fromnd.ni_vp;
3335
3336 fs = fvp->v_mount;
3337 error = VFS_RENAMELOCK_ENTER(fs);
3338 if (error) {
3339 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3340 vrele(fromnd.ni_dvp);
3341 vrele(fvp);
3342 goto out1;
3343 }
3344
3345 /*
3346 * close, partially, yet another race - ideally we should only
3347 * go as far as getting fromnd.ni_dvp before getting the per-fs
3348 * lock, and then continue to get fromnd.ni_vp, but we can't do
3349 * that with namei as it stands.
3350 *
3351 * This still won't prevent rmdir from nuking fromnd.ni_vp
3352 * under us. The real fix is to get the locks in the right
3353 * order and do the lookups in the right places, but that's a
3354 * major rototill.
3355 *
3356 * Preserve the SAVESTART in cn_flags, because who knows what
3357 * might happen if we don't.
3358 *
3359 * Note: this logic (as well as this whole function) is cloned
3360 * in nfs_serv.c. Proceed accordingly.
3361 */
3362 vrele(fvp);
3363 if ((fromnd.ni_cnd.cn_namelen == 1 &&
3364 fromnd.ni_cnd.cn_nameptr[0] == '.') ||
3365 (fromnd.ni_cnd.cn_namelen == 2 &&
3366 fromnd.ni_cnd.cn_nameptr[0] == '.' &&
3367 fromnd.ni_cnd.cn_nameptr[1] == '.')) {
3368 error = EINVAL;
3369 VFS_RENAMELOCK_EXIT(fs);
3370 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3371 vrele(fromnd.ni_dvp);
3372 goto out1;
3373 }
3374 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART;
3375 fromnd.ni_cnd.cn_flags &= ~SAVESTART;
3376 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY);
3377 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd);
3378 fromnd.ni_cnd.cn_flags |= saveflag;
3379 if (error) {
3380 VOP_UNLOCK(fromnd.ni_dvp, 0);
3381 VFS_RENAMELOCK_EXIT(fs);
3382 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3383 vrele(fromnd.ni_dvp);
3384 goto out1;
3385 }
3386 VOP_UNLOCK(fromnd.ni_vp, 0);
3387 if (fromnd.ni_dvp != fromnd.ni_vp)
3388 VOP_UNLOCK(fromnd.ni_dvp, 0);
3389 fvp = fromnd.ni_vp;
3390
3391 NDINIT(&tond, RENAME,
3392 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT
3393 | (fvp->v_type == VDIR ? CREATEDIR : 0),
3394 seg, to);
3395 if ((error = namei(&tond)) != 0) {
3396 VFS_RENAMELOCK_EXIT(fs);
3397 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3398 vrele(fromnd.ni_dvp);
3399 vrele(fvp);
3400 goto out1;
3401 }
3402 tdvp = tond.ni_dvp;
3403 tvp = tond.ni_vp;
3404
3405 if (tvp != NULL) {
3406 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3407 error = ENOTDIR;
3408 goto out;
3409 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3410 error = EISDIR;
3411 goto out;
3412 }
3413 }
3414
3415 if (fvp == tdvp)
3416 error = EINVAL;
3417
3418 /*
3419 * Source and destination refer to the same object.
3420 */
3421 if (fvp == tvp) {
3422 if (retain)
3423 error = -1;
3424 else if (fromnd.ni_dvp == tdvp &&
3425 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3426 !memcmp(fromnd.ni_cnd.cn_nameptr,
3427 tond.ni_cnd.cn_nameptr,
3428 fromnd.ni_cnd.cn_namelen))
3429 error = -1;
3430 }
3431
3432 #if NVERIEXEC > 0
3433 if (!error) {
3434 char *f1, *f2;
3435
3436 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3437 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen);
3438
3439 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3440 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen);
3441
3442 error = veriexec_renamechk(l, fvp, f1, tvp, f2);
3443
3444 free(f1, M_TEMP);
3445 free(f2, M_TEMP);
3446 }
3447 #endif /* NVERIEXEC > 0 */
3448
3449 out:
3450 p = l->l_proc;
3451 if (!error) {
3452 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3453 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3454 VFS_RENAMELOCK_EXIT(fs);
3455 } else {
3456 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3457 if (tdvp == tvp)
3458 vrele(tdvp);
3459 else
3460 vput(tdvp);
3461 if (tvp)
3462 vput(tvp);
3463 VFS_RENAMELOCK_EXIT(fs);
3464 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3465 vrele(fromnd.ni_dvp);
3466 vrele(fvp);
3467 }
3468 vrele(tond.ni_startdir);
3469 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3470 out1:
3471 if (fromnd.ni_startdir)
3472 vrele(fromnd.ni_startdir);
3473 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3474 return (error == -1 ? 0 : error);
3475 }
3476
3477 /*
3478 * Make a directory file.
3479 */
3480 /* ARGSUSED */
3481 int
3482 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
3483 {
3484 /* {
3485 syscallarg(const char *) path;
3486 syscallarg(int) mode;
3487 } */
3488 struct proc *p = l->l_proc;
3489 struct vnode *vp;
3490 struct vattr vattr;
3491 int error;
3492 struct nameidata nd;
3493
3494 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE,
3495 SCARG(uap, path));
3496 if ((error = namei(&nd)) != 0)
3497 return (error);
3498 vp = nd.ni_vp;
3499 if (vp != NULL) {
3500 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3501 if (nd.ni_dvp == vp)
3502 vrele(nd.ni_dvp);
3503 else
3504 vput(nd.ni_dvp);
3505 vrele(vp);
3506 return (EEXIST);
3507 }
3508 VATTR_NULL(&vattr);
3509 vattr.va_type = VDIR;
3510 /* We will read cwdi->cwdi_cmask unlocked. */
3511 vattr.va_mode =
3512 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3513 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3514 if (!error)
3515 vput(nd.ni_vp);
3516 return (error);
3517 }
3518
3519 /*
3520 * Remove a directory file.
3521 */
3522 /* ARGSUSED */
3523 int
3524 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
3525 {
3526 /* {
3527 syscallarg(const char *) path;
3528 } */
3529 struct vnode *vp;
3530 int error;
3531 struct nameidata nd;
3532
3533 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
3534 SCARG(uap, path));
3535 if ((error = namei(&nd)) != 0)
3536 return (error);
3537 vp = nd.ni_vp;
3538 if (vp->v_type != VDIR) {
3539 error = ENOTDIR;
3540 goto out;
3541 }
3542 /*
3543 * No rmdir "." please.
3544 */
3545 if (nd.ni_dvp == vp) {
3546 error = EINVAL;
3547 goto out;
3548 }
3549 /*
3550 * The root of a mounted filesystem cannot be deleted.
3551 */
3552 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) {
3553 error = EBUSY;
3554 goto out;
3555 }
3556 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3557 return (error);
3558
3559 out:
3560 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3561 if (nd.ni_dvp == vp)
3562 vrele(nd.ni_dvp);
3563 else
3564 vput(nd.ni_dvp);
3565 vput(vp);
3566 return (error);
3567 }
3568
3569 /*
3570 * Read a block of directory entries in a file system independent format.
3571 */
3572 int
3573 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
3574 {
3575 /* {
3576 syscallarg(int) fd;
3577 syscallarg(char *) buf;
3578 syscallarg(size_t) count;
3579 } */
3580 file_t *fp;
3581 int error, done;
3582
3583 /* fd_getvnode() will use the descriptor for us */
3584 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3585 return (error);
3586 if ((fp->f_flag & FREAD) == 0) {
3587 error = EBADF;
3588 goto out;
3589 }
3590 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3591 SCARG(uap, count), &done, l, 0, 0);
3592 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
3593 *retval = done;
3594 out:
3595 fd_putfile(SCARG(uap, fd));
3596 return (error);
3597 }
3598
3599 /*
3600 * Set the mode mask for creation of filesystem nodes.
3601 */
3602 int
3603 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
3604 {
3605 /* {
3606 syscallarg(mode_t) newmask;
3607 } */
3608 struct proc *p = l->l_proc;
3609 struct cwdinfo *cwdi;
3610
3611 /*
3612 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
3613 * important is that we serialize changes to the mask. The
3614 * rw_exit() will issue a write memory barrier on our behalf,
3615 * and force the changes out to other CPUs (as it must use an
3616 * atomic operation, draining the local CPU's store buffers).
3617 */
3618 cwdi = p->p_cwdi;
3619 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
3620 *retval = cwdi->cwdi_cmask;
3621 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3622 rw_exit(&cwdi->cwdi_lock);
3623
3624 return (0);
3625 }
3626
3627 int
3628 dorevoke(struct vnode *vp, kauth_cred_t cred)
3629 {
3630 struct vattr vattr;
3631 int error;
3632
3633 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0)
3634 return error;
3635 if (kauth_cred_geteuid(cred) != vattr.va_uid &&
3636 (error = kauth_authorize_generic(cred,
3637 KAUTH_GENERIC_ISSUSER, NULL)) == 0)
3638 VOP_REVOKE(vp, REVOKEALL);
3639 return (error);
3640 }
3641
3642 /*
3643 * Void all references to file by ripping underlying filesystem
3644 * away from vnode.
3645 */
3646 /* ARGSUSED */
3647 int
3648 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
3649 {
3650 /* {
3651 syscallarg(const char *) path;
3652 } */
3653 struct vnode *vp;
3654 int error;
3655 struct nameidata nd;
3656
3657 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3658 SCARG(uap, path));
3659 if ((error = namei(&nd)) != 0)
3660 return (error);
3661 vp = nd.ni_vp;
3662 error = dorevoke(vp, l->l_cred);
3663 vrele(vp);
3664 return (error);
3665 }
3666
3667 /*
3668 * Convert a user file descriptor to a kernel file entry.
3669 */
3670 int
3671 getvnode(int fd, file_t **fpp)
3672 {
3673 struct vnode *vp;
3674 file_t *fp;
3675
3676 if ((fp = fd_getfile(fd)) == NULL)
3677 return (EBADF);
3678
3679 if (fp->f_type != DTYPE_VNODE) {
3680 fd_putfile(fd);
3681 return (EINVAL);
3682 }
3683
3684 vp = fp->f_data;
3685 if (vp->v_type == VBAD) {
3686 fd_putfile(fd);
3687 return (EBADF);
3688 }
3689
3690 *fpp = fp;
3691 return (0);
3692 }
3693