vfs_syscalls.c revision 1.392 1 /* $NetBSD: vfs_syscalls.c,v 1.392 2009/04/28 03:01:15 yamt Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
66 */
67
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.392 2009/04/28 03:01:15 yamt Exp $");
70
71 #ifdef _KERNEL_OPT
72 #include "opt_fileassoc.h"
73 #include "veriexec.h"
74 #endif
75
76 #include <sys/param.h>
77 #include <sys/systm.h>
78 #include <sys/namei.h>
79 #include <sys/filedesc.h>
80 #include <sys/kernel.h>
81 #include <sys/file.h>
82 #include <sys/stat.h>
83 #include <sys/vnode.h>
84 #include <sys/mount.h>
85 #include <sys/proc.h>
86 #include <sys/uio.h>
87 #include <sys/kmem.h>
88 #include <sys/dirent.h>
89 #include <sys/sysctl.h>
90 #include <sys/syscallargs.h>
91 #include <sys/vfs_syscalls.h>
92 #include <sys/ktrace.h>
93 #ifdef FILEASSOC
94 #include <sys/fileassoc.h>
95 #endif /* FILEASSOC */
96 #include <sys/verified_exec.h>
97 #include <sys/kauth.h>
98 #include <sys/atomic.h>
99 #include <sys/module.h>
100 #include <sys/buf.h>
101
102 #include <miscfs/genfs/genfs.h>
103 #include <miscfs/syncfs/syncfs.h>
104 #include <miscfs/specfs/specdev.h>
105
106 #include <nfs/rpcv2.h>
107 #include <nfs/nfsproto.h>
108 #include <nfs/nfs.h>
109 #include <nfs/nfs_var.h>
110
111 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
112
113 static int change_dir(struct nameidata *, struct lwp *);
114 static int change_flags(struct vnode *, u_long, struct lwp *);
115 static int change_mode(struct vnode *, int, struct lwp *l);
116 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
117
118 void checkdirs(struct vnode *);
119
120 int dovfsusermount = 0;
121
122 /*
123 * Virtual File System System Calls
124 */
125
126 /*
127 * Mount a file system.
128 */
129
130 /*
131 * This table is used to maintain compatibility with 4.3BSD
132 * and NetBSD 0.9 mount syscalls - and possibly other systems.
133 * Note, the order is important!
134 *
135 * Do not modify this table. It should only contain filesystems
136 * supported by NetBSD 0.9 and 4.3BSD.
137 */
138 const char * const mountcompatnames[] = {
139 NULL, /* 0 = MOUNT_NONE */
140 MOUNT_FFS, /* 1 = MOUNT_UFS */
141 MOUNT_NFS, /* 2 */
142 MOUNT_MFS, /* 3 */
143 MOUNT_MSDOS, /* 4 */
144 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
145 MOUNT_FDESC, /* 6 */
146 MOUNT_KERNFS, /* 7 */
147 NULL, /* 8 = MOUNT_DEVFS */
148 MOUNT_AFS, /* 9 */
149 };
150 const int nmountcompatnames = sizeof(mountcompatnames) /
151 sizeof(mountcompatnames[0]);
152
153 static int
154 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
155 void *data, size_t *data_len)
156 {
157 struct mount *mp;
158 int error = 0, saved_flags;
159
160 mp = vp->v_mount;
161 saved_flags = mp->mnt_flag;
162
163 /* We can operate only on VV_ROOT nodes. */
164 if ((vp->v_vflag & VV_ROOT) == 0) {
165 error = EINVAL;
166 goto out;
167 }
168
169 /*
170 * We only allow the filesystem to be reloaded if it
171 * is currently mounted read-only. Additionally, we
172 * prevent read-write to read-only downgrades.
173 */
174 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
175 (mp->mnt_flag & MNT_RDONLY) == 0) {
176 error = EOPNOTSUPP; /* Needs translation */
177 goto out;
178 }
179
180 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
181 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
182 if (error)
183 goto out;
184
185 if (vfs_busy(mp, NULL)) {
186 error = EPERM;
187 goto out;
188 }
189
190 mutex_enter(&mp->mnt_updating);
191
192 mp->mnt_flag &= ~MNT_OP_FLAGS;
193 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
194
195 /*
196 * Set the mount level flags.
197 */
198 if (flags & MNT_RDONLY)
199 mp->mnt_flag |= MNT_RDONLY;
200 else if (mp->mnt_flag & MNT_RDONLY)
201 mp->mnt_iflag |= IMNT_WANTRDWR;
202 mp->mnt_flag &=
203 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
204 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
205 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
206 MNT_LOG);
207 mp->mnt_flag |= flags &
208 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
209 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
210 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
211 MNT_LOG | MNT_IGNORE);
212
213 error = VFS_MOUNT(mp, path, data, data_len);
214
215 if (error && data != NULL) {
216 int error2;
217
218 /*
219 * Update failed; let's try and see if it was an
220 * export request. For compat with 3.0 and earlier.
221 */
222 error2 = vfs_hooks_reexport(mp, path, data);
223
224 /*
225 * Only update error code if the export request was
226 * understood but some problem occurred while
227 * processing it.
228 */
229 if (error2 != EJUSTRETURN)
230 error = error2;
231 }
232
233 if (mp->mnt_iflag & IMNT_WANTRDWR)
234 mp->mnt_flag &= ~MNT_RDONLY;
235 if (error)
236 mp->mnt_flag = saved_flags;
237 mp->mnt_flag &= ~MNT_OP_FLAGS;
238 mp->mnt_iflag &= ~IMNT_WANTRDWR;
239 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
240 if (mp->mnt_syncer == NULL)
241 error = vfs_allocate_syncvnode(mp);
242 } else {
243 if (mp->mnt_syncer != NULL)
244 vfs_deallocate_syncvnode(mp);
245 }
246 mutex_exit(&mp->mnt_updating);
247 vfs_unbusy(mp, false, NULL);
248
249 out:
250 return (error);
251 }
252
253 static int
254 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
255 {
256 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
257 int error;
258
259 /* Copy file-system type from userspace. */
260 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
261 if (error) {
262 /*
263 * Historically, filesystem types were identified by numbers.
264 * If we get an integer for the filesystem type instead of a
265 * string, we check to see if it matches one of the historic
266 * filesystem types.
267 */
268 u_long fsindex = (u_long)fstype;
269 if (fsindex >= nmountcompatnames ||
270 mountcompatnames[fsindex] == NULL)
271 return ENODEV;
272 strlcpy(fstypename, mountcompatnames[fsindex],
273 sizeof(fstypename));
274 }
275
276 /* Accept `ufs' as an alias for `ffs', for compatibility. */
277 if (strcmp(fstypename, "ufs") == 0)
278 fstypename[0] = 'f';
279
280 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
281 return 0;
282
283 /* If we can autoload a vfs module, try again */
284 mutex_enter(&module_lock);
285 (void)module_autoload(fstype, MODULE_CLASS_VFS);
286 mutex_exit(&module_lock);
287
288 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
289 return 0;
290
291 return ENODEV;
292 }
293
294 static int
295 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops,
296 const char *path, int flags, void *data, size_t *data_len, u_int recurse)
297 {
298 struct mount *mp;
299 struct vnode *vp = *vpp;
300 struct vattr va;
301 int error;
302
303 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
304 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
305 if (error)
306 return error;
307
308 /* Can't make a non-dir a mount-point (from here anyway). */
309 if (vp->v_type != VDIR)
310 return ENOTDIR;
311
312 /*
313 * If the user is not root, ensure that they own the directory
314 * onto which we are attempting to mount.
315 */
316 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 ||
317 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
318 (error = kauth_authorize_generic(l->l_cred,
319 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) {
320 return error;
321 }
322
323 if (flags & MNT_EXPORTED)
324 return EINVAL;
325
326 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0)
327 return error;
328
329 /*
330 * Check if a file-system is not already mounted on this vnode.
331 */
332 if (vp->v_mountedhere != NULL)
333 return EBUSY;
334
335 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
336 if (mp == NULL)
337 return ENOMEM;
338
339 mp->mnt_op = vfsops;
340 mp->mnt_refcnt = 1;
341
342 TAILQ_INIT(&mp->mnt_vnodelist);
343 rw_init(&mp->mnt_unmounting);
344 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
345 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
346 error = vfs_busy(mp, NULL);
347 KASSERT(error == 0);
348 mutex_enter(&mp->mnt_updating);
349
350 mp->mnt_vnodecovered = vp;
351 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
352 mount_initspecific(mp);
353
354 /*
355 * The underlying file system may refuse the mount for
356 * various reasons. Allow the user to force it to happen.
357 *
358 * Set the mount level flags.
359 */
360 mp->mnt_flag = flags &
361 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
362 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
363 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
364 MNT_LOG | MNT_IGNORE | MNT_RDONLY);
365
366 error = VFS_MOUNT(mp, path, data, data_len);
367 mp->mnt_flag &= ~MNT_OP_FLAGS;
368
369 /*
370 * Put the new filesystem on the mount list after root.
371 */
372 cache_purge(vp);
373 if (error != 0) {
374 vp->v_mountedhere = NULL;
375 mutex_exit(&mp->mnt_updating);
376 vfs_unbusy(mp, false, NULL);
377 vfs_destroy(mp);
378 return error;
379 }
380
381 mp->mnt_iflag &= ~IMNT_WANTRDWR;
382 mutex_enter(&mountlist_lock);
383 vp->v_mountedhere = mp;
384 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
385 mutex_exit(&mountlist_lock);
386 vn_restorerecurse(vp, recurse);
387 VOP_UNLOCK(vp, 0);
388 checkdirs(vp);
389 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
390 error = vfs_allocate_syncvnode(mp);
391 /* Hold an additional reference to the mount across VFS_START(). */
392 mutex_exit(&mp->mnt_updating);
393 vfs_unbusy(mp, true, NULL);
394 (void) VFS_STATVFS(mp, &mp->mnt_stat);
395 error = VFS_START(mp, 0);
396 if (error)
397 vrele(vp);
398 /* Drop reference held for VFS_START(). */
399 vfs_destroy(mp);
400 *vpp = NULL;
401 return error;
402 }
403
404 static int
405 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
406 void *data, size_t *data_len)
407 {
408 struct mount *mp;
409 int error;
410
411 /* If MNT_GETARGS is specified, it should be the only flag. */
412 if (flags & ~MNT_GETARGS)
413 return EINVAL;
414
415 mp = vp->v_mount;
416
417 /* XXX: probably some notion of "can see" here if we want isolation. */
418 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
419 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
420 if (error)
421 return error;
422
423 if ((vp->v_vflag & VV_ROOT) == 0)
424 return EINVAL;
425
426 if (vfs_busy(mp, NULL))
427 return EPERM;
428
429 mutex_enter(&mp->mnt_updating);
430 mp->mnt_flag &= ~MNT_OP_FLAGS;
431 mp->mnt_flag |= MNT_GETARGS;
432 error = VFS_MOUNT(mp, path, data, data_len);
433 mp->mnt_flag &= ~MNT_OP_FLAGS;
434 mutex_exit(&mp->mnt_updating);
435
436 vfs_unbusy(mp, false, NULL);
437 return (error);
438 }
439
440 int
441 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
442 {
443 /* {
444 syscallarg(const char *) type;
445 syscallarg(const char *) path;
446 syscallarg(int) flags;
447 syscallarg(void *) data;
448 syscallarg(size_t) data_len;
449 } */
450
451 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
452 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
453 SCARG(uap, data_len), retval);
454 }
455
456 int
457 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
458 const char *path, int flags, void *data, enum uio_seg data_seg,
459 size_t data_len, register_t *retval)
460 {
461 struct vnode *vp;
462 struct nameidata nd;
463 void *data_buf = data;
464 u_int recurse;
465 int error;
466
467 /*
468 * Get vnode to be covered
469 */
470 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
471 if ((error = namei(&nd)) != 0)
472 return (error);
473 vp = nd.ni_vp;
474
475 /*
476 * A lookup in VFS_MOUNT might result in an attempt to
477 * lock this vnode again, so make the lock recursive.
478 */
479 if (vfsops == NULL) {
480 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
481 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
482 recurse = vn_setrecurse(vp);
483 vfsops = vp->v_mount->mnt_op;
484 } else {
485 /* 'type' is userspace */
486 error = mount_get_vfsops(type, &vfsops);
487 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
488 recurse = vn_setrecurse(vp);
489 if (error != 0)
490 goto done;
491 }
492 } else {
493 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
494 recurse = vn_setrecurse(vp);
495 }
496
497 if (data != NULL && data_seg == UIO_USERSPACE) {
498 if (data_len == 0) {
499 /* No length supplied, use default for filesystem */
500 data_len = vfsops->vfs_min_mount_data;
501 if (data_len > VFS_MAX_MOUNT_DATA) {
502 error = EINVAL;
503 goto done;
504 }
505 /*
506 * Hopefully a longer buffer won't make copyin() fail.
507 * For compatibility with 3.0 and earlier.
508 */
509 if (flags & MNT_UPDATE
510 && data_len < sizeof (struct mnt_export_args30))
511 data_len = sizeof (struct mnt_export_args30);
512 }
513 data_buf = kmem_alloc(data_len, KM_SLEEP);
514
515 /* NFS needs the buffer even for mnt_getargs .... */
516 error = copyin(data, data_buf, data_len);
517 if (error != 0)
518 goto done;
519 }
520
521 if (flags & MNT_GETARGS) {
522 if (data_len == 0) {
523 error = EINVAL;
524 goto done;
525 }
526 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
527 if (error != 0)
528 goto done;
529 if (data_seg == UIO_USERSPACE)
530 error = copyout(data_buf, data, data_len);
531 *retval = data_len;
532 } else if (flags & MNT_UPDATE) {
533 error = mount_update(l, vp, path, flags, data_buf, &data_len);
534 } else {
535 /* Locking is handled internally in mount_domount(). */
536 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
537 &data_len, recurse);
538 }
539
540 done:
541 if (vp != NULL) {
542 vn_restorerecurse(vp, recurse);
543 vput(vp);
544 }
545 if (data_buf != data)
546 kmem_free(data_buf, data_len);
547 return (error);
548 }
549
550 /*
551 * Scan all active processes to see if any of them have a current
552 * or root directory onto which the new filesystem has just been
553 * mounted. If so, replace them with the new mount point.
554 */
555 void
556 checkdirs(struct vnode *olddp)
557 {
558 struct cwdinfo *cwdi;
559 struct vnode *newdp, *rele1, *rele2;
560 struct proc *p;
561 bool retry;
562
563 if (olddp->v_usecount == 1)
564 return;
565 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
566 panic("mount: lost mount");
567
568 do {
569 retry = false;
570 mutex_enter(proc_lock);
571 PROCLIST_FOREACH(p, &allproc) {
572 if ((p->p_flag & PK_MARKER) != 0)
573 continue;
574 if ((cwdi = p->p_cwdi) == NULL)
575 continue;
576 /*
577 * Can't change to the old directory any more,
578 * so even if we see a stale value it's not a
579 * problem.
580 */
581 if (cwdi->cwdi_cdir != olddp &&
582 cwdi->cwdi_rdir != olddp)
583 continue;
584 retry = true;
585 rele1 = NULL;
586 rele2 = NULL;
587 atomic_inc_uint(&cwdi->cwdi_refcnt);
588 mutex_exit(proc_lock);
589 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
590 if (cwdi->cwdi_cdir == olddp) {
591 rele1 = cwdi->cwdi_cdir;
592 VREF(newdp);
593 cwdi->cwdi_cdir = newdp;
594 }
595 if (cwdi->cwdi_rdir == olddp) {
596 rele2 = cwdi->cwdi_rdir;
597 VREF(newdp);
598 cwdi->cwdi_rdir = newdp;
599 }
600 rw_exit(&cwdi->cwdi_lock);
601 cwdfree(cwdi);
602 if (rele1 != NULL)
603 vrele(rele1);
604 if (rele2 != NULL)
605 vrele(rele2);
606 mutex_enter(proc_lock);
607 break;
608 }
609 mutex_exit(proc_lock);
610 } while (retry);
611
612 if (rootvnode == olddp) {
613 vrele(rootvnode);
614 VREF(newdp);
615 rootvnode = newdp;
616 }
617 vput(newdp);
618 }
619
620 /*
621 * Unmount a file system.
622 *
623 * Note: unmount takes a path to the vnode mounted on as argument,
624 * not special file (as before).
625 */
626 /* ARGSUSED */
627 int
628 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
629 {
630 /* {
631 syscallarg(const char *) path;
632 syscallarg(int) flags;
633 } */
634 struct vnode *vp;
635 struct mount *mp;
636 int error;
637 struct nameidata nd;
638
639 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
640 SCARG(uap, path));
641 if ((error = namei(&nd)) != 0)
642 return (error);
643 vp = nd.ni_vp;
644 mp = vp->v_mount;
645 atomic_inc_uint(&mp->mnt_refcnt);
646 VOP_UNLOCK(vp, 0);
647
648 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
649 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
650 if (error) {
651 vrele(vp);
652 vfs_destroy(mp);
653 return (error);
654 }
655
656 /*
657 * Don't allow unmounting the root file system.
658 */
659 if (mp->mnt_flag & MNT_ROOTFS) {
660 vrele(vp);
661 vfs_destroy(mp);
662 return (EINVAL);
663 }
664
665 /*
666 * Must be the root of the filesystem
667 */
668 if ((vp->v_vflag & VV_ROOT) == 0) {
669 vrele(vp);
670 vfs_destroy(mp);
671 return (EINVAL);
672 }
673
674 vrele(vp);
675 error = dounmount(mp, SCARG(uap, flags), l);
676 vfs_destroy(mp);
677 return error;
678 }
679
680 /*
681 * Do the actual file system unmount. File system is assumed to have
682 * been locked by the caller.
683 *
684 * => Caller hold reference to the mount, explicitly for dounmount().
685 */
686 int
687 dounmount(struct mount *mp, int flags, struct lwp *l)
688 {
689 struct vnode *coveredvp;
690 int error;
691 int async;
692 int used_syncer;
693
694 #if NVERIEXEC > 0
695 error = veriexec_unmountchk(mp);
696 if (error)
697 return (error);
698 #endif /* NVERIEXEC > 0 */
699
700 /*
701 * XXX Freeze syncer. Must do this before locking the
702 * mount point. See dounmount() for details.
703 */
704 mutex_enter(&syncer_mutex);
705 rw_enter(&mp->mnt_unmounting, RW_WRITER);
706 if ((mp->mnt_iflag & IMNT_GONE) != 0) {
707 rw_exit(&mp->mnt_unmounting);
708 mutex_exit(&syncer_mutex);
709 return ENOENT;
710 }
711
712 used_syncer = (mp->mnt_syncer != NULL);
713
714 /*
715 * XXX Syncer must be frozen when we get here. This should really
716 * be done on a per-mountpoint basis, but the syncer doesn't work
717 * like that.
718 *
719 * The caller of dounmount() must acquire syncer_mutex because
720 * the syncer itself acquires locks in syncer_mutex -> vfs_busy
721 * order, and we must preserve that order to avoid deadlock.
722 *
723 * So, if the file system did not use the syncer, now is
724 * the time to release the syncer_mutex.
725 */
726 if (used_syncer == 0)
727 mutex_exit(&syncer_mutex);
728
729 mp->mnt_iflag |= IMNT_UNMOUNT;
730 async = mp->mnt_flag & MNT_ASYNC;
731 mp->mnt_flag &= ~MNT_ASYNC;
732 cache_purgevfs(mp); /* remove cache entries for this file sys */
733 if (mp->mnt_syncer != NULL)
734 vfs_deallocate_syncvnode(mp);
735 error = 0;
736 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
737 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
738 }
739 vfs_scrubvnlist(mp);
740 if (error == 0 || (flags & MNT_FORCE))
741 error = VFS_UNMOUNT(mp, flags);
742 if (error) {
743 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
744 (void) vfs_allocate_syncvnode(mp);
745 mp->mnt_iflag &= ~IMNT_UNMOUNT;
746 mp->mnt_flag |= async;
747 rw_exit(&mp->mnt_unmounting);
748 if (used_syncer)
749 mutex_exit(&syncer_mutex);
750 return (error);
751 }
752 vfs_scrubvnlist(mp);
753 mutex_enter(&mountlist_lock);
754 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
755 coveredvp->v_mountedhere = NULL;
756 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
757 mp->mnt_iflag |= IMNT_GONE;
758 mutex_exit(&mountlist_lock);
759 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
760 panic("unmount: dangling vnode");
761 if (used_syncer)
762 mutex_exit(&syncer_mutex);
763 vfs_hooks_unmount(mp);
764 rw_exit(&mp->mnt_unmounting);
765 vfs_destroy(mp); /* reference from mount() */
766 if (coveredvp != NULLVP)
767 vrele(coveredvp);
768 return (0);
769 }
770
771 /*
772 * Sync each mounted filesystem.
773 */
774 #ifdef DEBUG
775 int syncprt = 0;
776 struct ctldebug debug0 = { "syncprt", &syncprt };
777 #endif
778
779 /* ARGSUSED */
780 int
781 sys_sync(struct lwp *l, const void *v, register_t *retval)
782 {
783 struct mount *mp, *nmp;
784 int asyncflag;
785
786 if (l == NULL)
787 l = &lwp0;
788
789 mutex_enter(&mountlist_lock);
790 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
791 mp = nmp) {
792 if (vfs_busy(mp, &nmp)) {
793 continue;
794 }
795 mutex_enter(&mp->mnt_updating);
796 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
797 asyncflag = mp->mnt_flag & MNT_ASYNC;
798 mp->mnt_flag &= ~MNT_ASYNC;
799 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
800 if (asyncflag)
801 mp->mnt_flag |= MNT_ASYNC;
802 }
803 mutex_exit(&mp->mnt_updating);
804 vfs_unbusy(mp, false, &nmp);
805 }
806 mutex_exit(&mountlist_lock);
807 #ifdef DEBUG
808 if (syncprt)
809 vfs_bufstats();
810 #endif /* DEBUG */
811 return (0);
812 }
813
814 /*
815 * Change filesystem quotas.
816 */
817 /* ARGSUSED */
818 int
819 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval)
820 {
821 /* {
822 syscallarg(const char *) path;
823 syscallarg(int) cmd;
824 syscallarg(int) uid;
825 syscallarg(void *) arg;
826 } */
827 struct mount *mp;
828 int error;
829 struct nameidata nd;
830
831 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
832 SCARG(uap, path));
833 if ((error = namei(&nd)) != 0)
834 return (error);
835 mp = nd.ni_vp->v_mount;
836 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
837 SCARG(uap, arg));
838 vrele(nd.ni_vp);
839 return (error);
840 }
841
842 int
843 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
844 int root)
845 {
846 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
847 int error = 0;
848
849 /*
850 * If MNT_NOWAIT or MNT_LAZY is specified, do not
851 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
852 * overrides MNT_NOWAIT.
853 */
854 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
855 (flags != MNT_WAIT && flags != 0)) {
856 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
857 goto done;
858 }
859
860 /* Get the filesystem stats now */
861 memset(sp, 0, sizeof(*sp));
862 if ((error = VFS_STATVFS(mp, sp)) != 0) {
863 return error;
864 }
865
866 if (cwdi->cwdi_rdir == NULL)
867 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
868 done:
869 if (cwdi->cwdi_rdir != NULL) {
870 size_t len;
871 char *bp;
872 char c;
873 char *path = PNBUF_GET();
874
875 bp = path + MAXPATHLEN;
876 *--bp = '\0';
877 rw_enter(&cwdi->cwdi_lock, RW_READER);
878 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
879 MAXPATHLEN / 2, 0, l);
880 rw_exit(&cwdi->cwdi_lock);
881 if (error) {
882 PNBUF_PUT(path);
883 return error;
884 }
885 len = strlen(bp);
886 if (len != 1) {
887 /*
888 * for mount points that are below our root, we can see
889 * them, so we fix up the pathname and return them. The
890 * rest we cannot see, so we don't allow viewing the
891 * data.
892 */
893 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
894 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
895 (void)strlcpy(sp->f_mntonname,
896 c == '\0' ? "/" : &sp->f_mntonname[len],
897 sizeof(sp->f_mntonname));
898 } else {
899 if (root)
900 (void)strlcpy(sp->f_mntonname, "/",
901 sizeof(sp->f_mntonname));
902 else
903 error = EPERM;
904 }
905 }
906 PNBUF_PUT(path);
907 }
908 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
909 return error;
910 }
911
912 /*
913 * Get filesystem statistics by path.
914 */
915 int
916 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
917 {
918 struct mount *mp;
919 int error;
920 struct nameidata nd;
921
922 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
923 if ((error = namei(&nd)) != 0)
924 return error;
925 mp = nd.ni_vp->v_mount;
926 error = dostatvfs(mp, sb, l, flags, 1);
927 vrele(nd.ni_vp);
928 return error;
929 }
930
931 /* ARGSUSED */
932 int
933 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
934 {
935 /* {
936 syscallarg(const char *) path;
937 syscallarg(struct statvfs *) buf;
938 syscallarg(int) flags;
939 } */
940 struct statvfs *sb;
941 int error;
942
943 sb = STATVFSBUF_GET();
944 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
945 if (error == 0)
946 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
947 STATVFSBUF_PUT(sb);
948 return error;
949 }
950
951 /*
952 * Get filesystem statistics by fd.
953 */
954 int
955 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
956 {
957 file_t *fp;
958 struct mount *mp;
959 int error;
960
961 /* fd_getvnode() will use the descriptor for us */
962 if ((error = fd_getvnode(fd, &fp)) != 0)
963 return (error);
964 mp = ((struct vnode *)fp->f_data)->v_mount;
965 error = dostatvfs(mp, sb, curlwp, flags, 1);
966 fd_putfile(fd);
967 return error;
968 }
969
970 /* ARGSUSED */
971 int
972 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
973 {
974 /* {
975 syscallarg(int) fd;
976 syscallarg(struct statvfs *) buf;
977 syscallarg(int) flags;
978 } */
979 struct statvfs *sb;
980 int error;
981
982 sb = STATVFSBUF_GET();
983 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
984 if (error == 0)
985 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
986 STATVFSBUF_PUT(sb);
987 return error;
988 }
989
990
991 /*
992 * Get statistics on all filesystems.
993 */
994 int
995 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
996 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
997 register_t *retval)
998 {
999 int root = 0;
1000 struct proc *p = l->l_proc;
1001 struct mount *mp, *nmp;
1002 struct statvfs *sb;
1003 size_t count, maxcount;
1004 int error = 0;
1005
1006 sb = STATVFSBUF_GET();
1007 maxcount = bufsize / entry_sz;
1008 mutex_enter(&mountlist_lock);
1009 count = 0;
1010 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1011 mp = nmp) {
1012 if (vfs_busy(mp, &nmp)) {
1013 continue;
1014 }
1015 if (sfsp && count < maxcount) {
1016 error = dostatvfs(mp, sb, l, flags, 0);
1017 if (error) {
1018 vfs_unbusy(mp, false, &nmp);
1019 error = 0;
1020 continue;
1021 }
1022 error = copyfn(sb, sfsp, entry_sz);
1023 if (error) {
1024 vfs_unbusy(mp, false, NULL);
1025 goto out;
1026 }
1027 sfsp = (char *)sfsp + entry_sz;
1028 root |= strcmp(sb->f_mntonname, "/") == 0;
1029 }
1030 count++;
1031 vfs_unbusy(mp, false, &nmp);
1032 }
1033 mutex_exit(&mountlist_lock);
1034
1035 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1036 /*
1037 * fake a root entry
1038 */
1039 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1040 sb, l, flags, 1);
1041 if (error != 0)
1042 goto out;
1043 if (sfsp) {
1044 error = copyfn(sb, sfsp, entry_sz);
1045 if (error != 0)
1046 goto out;
1047 }
1048 count++;
1049 }
1050 if (sfsp && count > maxcount)
1051 *retval = maxcount;
1052 else
1053 *retval = count;
1054 out:
1055 STATVFSBUF_PUT(sb);
1056 return error;
1057 }
1058
1059 int
1060 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1061 {
1062 /* {
1063 syscallarg(struct statvfs *) buf;
1064 syscallarg(size_t) bufsize;
1065 syscallarg(int) flags;
1066 } */
1067
1068 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1069 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1070 }
1071
1072 /*
1073 * Change current working directory to a given file descriptor.
1074 */
1075 /* ARGSUSED */
1076 int
1077 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1078 {
1079 /* {
1080 syscallarg(int) fd;
1081 } */
1082 struct proc *p = l->l_proc;
1083 struct cwdinfo *cwdi;
1084 struct vnode *vp, *tdp;
1085 struct mount *mp;
1086 file_t *fp;
1087 int error, fd;
1088
1089 /* fd_getvnode() will use the descriptor for us */
1090 fd = SCARG(uap, fd);
1091 if ((error = fd_getvnode(fd, &fp)) != 0)
1092 return (error);
1093 vp = fp->f_data;
1094
1095 VREF(vp);
1096 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1097 if (vp->v_type != VDIR)
1098 error = ENOTDIR;
1099 else
1100 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1101 if (error) {
1102 vput(vp);
1103 goto out;
1104 }
1105 while ((mp = vp->v_mountedhere) != NULL) {
1106 error = vfs_busy(mp, NULL);
1107 vput(vp);
1108 if (error != 0)
1109 goto out;
1110 error = VFS_ROOT(mp, &tdp);
1111 vfs_unbusy(mp, false, NULL);
1112 if (error)
1113 goto out;
1114 vp = tdp;
1115 }
1116 VOP_UNLOCK(vp, 0);
1117
1118 /*
1119 * Disallow changing to a directory not under the process's
1120 * current root directory (if there is one).
1121 */
1122 cwdi = p->p_cwdi;
1123 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1124 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1125 vrele(vp);
1126 error = EPERM; /* operation not permitted */
1127 } else {
1128 vrele(cwdi->cwdi_cdir);
1129 cwdi->cwdi_cdir = vp;
1130 }
1131 rw_exit(&cwdi->cwdi_lock);
1132
1133 out:
1134 fd_putfile(fd);
1135 return (error);
1136 }
1137
1138 /*
1139 * Change this process's notion of the root directory to a given file
1140 * descriptor.
1141 */
1142 int
1143 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1144 {
1145 struct proc *p = l->l_proc;
1146 struct cwdinfo *cwdi;
1147 struct vnode *vp;
1148 file_t *fp;
1149 int error, fd = SCARG(uap, fd);
1150
1151 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1152 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1153 return error;
1154 /* fd_getvnode() will use the descriptor for us */
1155 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
1156 return error;
1157 vp = fp->f_data;
1158 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1159 if (vp->v_type != VDIR)
1160 error = ENOTDIR;
1161 else
1162 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1163 VOP_UNLOCK(vp, 0);
1164 if (error)
1165 goto out;
1166 VREF(vp);
1167
1168 /*
1169 * Prevent escaping from chroot by putting the root under
1170 * the working directory. Silently chdir to / if we aren't
1171 * already there.
1172 */
1173 cwdi = p->p_cwdi;
1174 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1175 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1176 /*
1177 * XXX would be more failsafe to change directory to a
1178 * deadfs node here instead
1179 */
1180 vrele(cwdi->cwdi_cdir);
1181 VREF(vp);
1182 cwdi->cwdi_cdir = vp;
1183 }
1184
1185 if (cwdi->cwdi_rdir != NULL)
1186 vrele(cwdi->cwdi_rdir);
1187 cwdi->cwdi_rdir = vp;
1188 rw_exit(&cwdi->cwdi_lock);
1189
1190 out:
1191 fd_putfile(fd);
1192 return (error);
1193 }
1194
1195 /*
1196 * Change current working directory (``.'').
1197 */
1198 /* ARGSUSED */
1199 int
1200 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1201 {
1202 /* {
1203 syscallarg(const char *) path;
1204 } */
1205 struct proc *p = l->l_proc;
1206 struct cwdinfo *cwdi;
1207 int error;
1208 struct nameidata nd;
1209
1210 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1211 SCARG(uap, path));
1212 if ((error = change_dir(&nd, l)) != 0)
1213 return (error);
1214 cwdi = p->p_cwdi;
1215 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1216 vrele(cwdi->cwdi_cdir);
1217 cwdi->cwdi_cdir = nd.ni_vp;
1218 rw_exit(&cwdi->cwdi_lock);
1219 return (0);
1220 }
1221
1222 /*
1223 * Change notion of root (``/'') directory.
1224 */
1225 /* ARGSUSED */
1226 int
1227 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1228 {
1229 /* {
1230 syscallarg(const char *) path;
1231 } */
1232 struct proc *p = l->l_proc;
1233 struct cwdinfo *cwdi;
1234 struct vnode *vp;
1235 int error;
1236 struct nameidata nd;
1237
1238 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1239 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1240 return (error);
1241 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1242 SCARG(uap, path));
1243 if ((error = change_dir(&nd, l)) != 0)
1244 return (error);
1245
1246 cwdi = p->p_cwdi;
1247 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1248 if (cwdi->cwdi_rdir != NULL)
1249 vrele(cwdi->cwdi_rdir);
1250 vp = nd.ni_vp;
1251 cwdi->cwdi_rdir = vp;
1252
1253 /*
1254 * Prevent escaping from chroot by putting the root under
1255 * the working directory. Silently chdir to / if we aren't
1256 * already there.
1257 */
1258 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1259 /*
1260 * XXX would be more failsafe to change directory to a
1261 * deadfs node here instead
1262 */
1263 vrele(cwdi->cwdi_cdir);
1264 VREF(vp);
1265 cwdi->cwdi_cdir = vp;
1266 }
1267 rw_exit(&cwdi->cwdi_lock);
1268
1269 return (0);
1270 }
1271
1272 /*
1273 * Common routine for chroot and chdir.
1274 */
1275 static int
1276 change_dir(struct nameidata *ndp, struct lwp *l)
1277 {
1278 struct vnode *vp;
1279 int error;
1280
1281 if ((error = namei(ndp)) != 0)
1282 return (error);
1283 vp = ndp->ni_vp;
1284 if (vp->v_type != VDIR)
1285 error = ENOTDIR;
1286 else
1287 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1288
1289 if (error)
1290 vput(vp);
1291 else
1292 VOP_UNLOCK(vp, 0);
1293 return (error);
1294 }
1295
1296 /*
1297 * Check permissions, allocate an open file structure,
1298 * and call the device open routine if any.
1299 */
1300 int
1301 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1302 {
1303 /* {
1304 syscallarg(const char *) path;
1305 syscallarg(int) flags;
1306 syscallarg(int) mode;
1307 } */
1308 struct proc *p = l->l_proc;
1309 struct cwdinfo *cwdi = p->p_cwdi;
1310 file_t *fp;
1311 struct vnode *vp;
1312 int flags, cmode;
1313 int type, indx, error;
1314 struct flock lf;
1315 struct nameidata nd;
1316
1317 flags = FFLAGS(SCARG(uap, flags));
1318 if ((flags & (FREAD | FWRITE)) == 0)
1319 return (EINVAL);
1320 if ((error = fd_allocfile(&fp, &indx)) != 0)
1321 return (error);
1322 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1323 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1324 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
1325 SCARG(uap, path));
1326 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1327 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1328 fd_abort(p, fp, indx);
1329 if ((error == EDUPFD || error == EMOVEFD) &&
1330 l->l_dupfd >= 0 && /* XXX from fdopen */
1331 (error =
1332 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1333 *retval = indx;
1334 return (0);
1335 }
1336 if (error == ERESTART)
1337 error = EINTR;
1338 return (error);
1339 }
1340
1341 l->l_dupfd = 0;
1342 vp = nd.ni_vp;
1343 fp->f_flag = flags & FMASK;
1344 fp->f_type = DTYPE_VNODE;
1345 fp->f_ops = &vnops;
1346 fp->f_data = vp;
1347 if (flags & (O_EXLOCK | O_SHLOCK)) {
1348 lf.l_whence = SEEK_SET;
1349 lf.l_start = 0;
1350 lf.l_len = 0;
1351 if (flags & O_EXLOCK)
1352 lf.l_type = F_WRLCK;
1353 else
1354 lf.l_type = F_RDLCK;
1355 type = F_FLOCK;
1356 if ((flags & FNONBLOCK) == 0)
1357 type |= F_WAIT;
1358 VOP_UNLOCK(vp, 0);
1359 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1360 if (error) {
1361 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1362 fd_abort(p, fp, indx);
1363 return (error);
1364 }
1365 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1366 atomic_or_uint(&fp->f_flag, FHASLOCK);
1367 }
1368 VOP_UNLOCK(vp, 0);
1369 *retval = indx;
1370 fd_affix(p, fp, indx);
1371 return (0);
1372 }
1373
1374 static void
1375 vfs__fhfree(fhandle_t *fhp)
1376 {
1377 size_t fhsize;
1378
1379 if (fhp == NULL) {
1380 return;
1381 }
1382 fhsize = FHANDLE_SIZE(fhp);
1383 kmem_free(fhp, fhsize);
1384 }
1385
1386 /*
1387 * vfs_composefh: compose a filehandle.
1388 */
1389
1390 int
1391 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1392 {
1393 struct mount *mp;
1394 struct fid *fidp;
1395 int error;
1396 size_t needfhsize;
1397 size_t fidsize;
1398
1399 mp = vp->v_mount;
1400 fidp = NULL;
1401 if (*fh_size < FHANDLE_SIZE_MIN) {
1402 fidsize = 0;
1403 } else {
1404 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1405 if (fhp != NULL) {
1406 memset(fhp, 0, *fh_size);
1407 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1408 fidp = &fhp->fh_fid;
1409 }
1410 }
1411 error = VFS_VPTOFH(vp, fidp, &fidsize);
1412 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1413 if (error == 0 && *fh_size < needfhsize) {
1414 error = E2BIG;
1415 }
1416 *fh_size = needfhsize;
1417 return error;
1418 }
1419
1420 int
1421 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1422 {
1423 struct mount *mp;
1424 fhandle_t *fhp;
1425 size_t fhsize;
1426 size_t fidsize;
1427 int error;
1428
1429 *fhpp = NULL;
1430 mp = vp->v_mount;
1431 fidsize = 0;
1432 error = VFS_VPTOFH(vp, NULL, &fidsize);
1433 KASSERT(error != 0);
1434 if (error != E2BIG) {
1435 goto out;
1436 }
1437 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1438 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1439 if (fhp == NULL) {
1440 error = ENOMEM;
1441 goto out;
1442 }
1443 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1444 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1445 if (error == 0) {
1446 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1447 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1448 *fhpp = fhp;
1449 } else {
1450 kmem_free(fhp, fhsize);
1451 }
1452 out:
1453 return error;
1454 }
1455
1456 void
1457 vfs_composefh_free(fhandle_t *fhp)
1458 {
1459
1460 vfs__fhfree(fhp);
1461 }
1462
1463 /*
1464 * vfs_fhtovp: lookup a vnode by a filehandle.
1465 */
1466
1467 int
1468 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1469 {
1470 struct mount *mp;
1471 int error;
1472
1473 *vpp = NULL;
1474 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1475 if (mp == NULL) {
1476 error = ESTALE;
1477 goto out;
1478 }
1479 if (mp->mnt_op->vfs_fhtovp == NULL) {
1480 error = EOPNOTSUPP;
1481 goto out;
1482 }
1483 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1484 out:
1485 return error;
1486 }
1487
1488 /*
1489 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1490 * the needed size.
1491 */
1492
1493 int
1494 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1495 {
1496 fhandle_t *fhp;
1497 int error;
1498
1499 *fhpp = NULL;
1500 if (fhsize > FHANDLE_SIZE_MAX) {
1501 return EINVAL;
1502 }
1503 if (fhsize < FHANDLE_SIZE_MIN) {
1504 return EINVAL;
1505 }
1506 again:
1507 fhp = kmem_alloc(fhsize, KM_SLEEP);
1508 if (fhp == NULL) {
1509 return ENOMEM;
1510 }
1511 error = copyin(ufhp, fhp, fhsize);
1512 if (error == 0) {
1513 /* XXX this check shouldn't be here */
1514 if (FHANDLE_SIZE(fhp) == fhsize) {
1515 *fhpp = fhp;
1516 return 0;
1517 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1518 /*
1519 * a kludge for nfsv2 padded handles.
1520 */
1521 size_t sz;
1522
1523 sz = FHANDLE_SIZE(fhp);
1524 kmem_free(fhp, fhsize);
1525 fhsize = sz;
1526 goto again;
1527 } else {
1528 /*
1529 * userland told us wrong size.
1530 */
1531 error = EINVAL;
1532 }
1533 }
1534 kmem_free(fhp, fhsize);
1535 return error;
1536 }
1537
1538 void
1539 vfs_copyinfh_free(fhandle_t *fhp)
1540 {
1541
1542 vfs__fhfree(fhp);
1543 }
1544
1545 /*
1546 * Get file handle system call
1547 */
1548 int
1549 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1550 {
1551 /* {
1552 syscallarg(char *) fname;
1553 syscallarg(fhandle_t *) fhp;
1554 syscallarg(size_t *) fh_size;
1555 } */
1556 struct vnode *vp;
1557 fhandle_t *fh;
1558 int error;
1559 struct nameidata nd;
1560 size_t sz;
1561 size_t usz;
1562
1563 /*
1564 * Must be super user
1565 */
1566 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1567 0, NULL, NULL, NULL);
1568 if (error)
1569 return (error);
1570 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1571 SCARG(uap, fname));
1572 error = namei(&nd);
1573 if (error)
1574 return (error);
1575 vp = nd.ni_vp;
1576 error = vfs_composefh_alloc(vp, &fh);
1577 vput(vp);
1578 if (error != 0) {
1579 goto out;
1580 }
1581 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1582 if (error != 0) {
1583 goto out;
1584 }
1585 sz = FHANDLE_SIZE(fh);
1586 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1587 if (error != 0) {
1588 goto out;
1589 }
1590 if (usz >= sz) {
1591 error = copyout(fh, SCARG(uap, fhp), sz);
1592 } else {
1593 error = E2BIG;
1594 }
1595 out:
1596 vfs_composefh_free(fh);
1597 return (error);
1598 }
1599
1600 /*
1601 * Open a file given a file handle.
1602 *
1603 * Check permissions, allocate an open file structure,
1604 * and call the device open routine if any.
1605 */
1606
1607 int
1608 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1609 register_t *retval)
1610 {
1611 file_t *fp;
1612 struct vnode *vp = NULL;
1613 kauth_cred_t cred = l->l_cred;
1614 file_t *nfp;
1615 int type, indx, error=0;
1616 struct flock lf;
1617 struct vattr va;
1618 fhandle_t *fh;
1619 int flags;
1620 proc_t *p;
1621
1622 p = curproc;
1623
1624 /*
1625 * Must be super user
1626 */
1627 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1628 0, NULL, NULL, NULL)))
1629 return (error);
1630
1631 flags = FFLAGS(oflags);
1632 if ((flags & (FREAD | FWRITE)) == 0)
1633 return (EINVAL);
1634 if ((flags & O_CREAT))
1635 return (EINVAL);
1636 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1637 return (error);
1638 fp = nfp;
1639 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1640 if (error != 0) {
1641 goto bad;
1642 }
1643 error = vfs_fhtovp(fh, &vp);
1644 if (error != 0) {
1645 goto bad;
1646 }
1647
1648 /* Now do an effective vn_open */
1649
1650 if (vp->v_type == VSOCK) {
1651 error = EOPNOTSUPP;
1652 goto bad;
1653 }
1654 error = vn_openchk(vp, cred, flags);
1655 if (error != 0)
1656 goto bad;
1657 if (flags & O_TRUNC) {
1658 VOP_UNLOCK(vp, 0); /* XXX */
1659 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1660 VATTR_NULL(&va);
1661 va.va_size = 0;
1662 error = VOP_SETATTR(vp, &va, cred);
1663 if (error)
1664 goto bad;
1665 }
1666 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
1667 goto bad;
1668 if (flags & FWRITE) {
1669 mutex_enter(&vp->v_interlock);
1670 vp->v_writecount++;
1671 mutex_exit(&vp->v_interlock);
1672 }
1673
1674 /* done with modified vn_open, now finish what sys_open does. */
1675
1676 fp->f_flag = flags & FMASK;
1677 fp->f_type = DTYPE_VNODE;
1678 fp->f_ops = &vnops;
1679 fp->f_data = vp;
1680 if (flags & (O_EXLOCK | O_SHLOCK)) {
1681 lf.l_whence = SEEK_SET;
1682 lf.l_start = 0;
1683 lf.l_len = 0;
1684 if (flags & O_EXLOCK)
1685 lf.l_type = F_WRLCK;
1686 else
1687 lf.l_type = F_RDLCK;
1688 type = F_FLOCK;
1689 if ((flags & FNONBLOCK) == 0)
1690 type |= F_WAIT;
1691 VOP_UNLOCK(vp, 0);
1692 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1693 if (error) {
1694 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1695 fd_abort(p, fp, indx);
1696 return (error);
1697 }
1698 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1699 atomic_or_uint(&fp->f_flag, FHASLOCK);
1700 }
1701 VOP_UNLOCK(vp, 0);
1702 *retval = indx;
1703 fd_affix(p, fp, indx);
1704 vfs_copyinfh_free(fh);
1705 return (0);
1706
1707 bad:
1708 fd_abort(p, fp, indx);
1709 if (vp != NULL)
1710 vput(vp);
1711 vfs_copyinfh_free(fh);
1712 return (error);
1713 }
1714
1715 int
1716 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
1717 {
1718 /* {
1719 syscallarg(const void *) fhp;
1720 syscallarg(size_t) fh_size;
1721 syscallarg(int) flags;
1722 } */
1723
1724 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1725 SCARG(uap, flags), retval);
1726 }
1727
1728 int
1729 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
1730 {
1731 int error;
1732 fhandle_t *fh;
1733 struct vnode *vp;
1734
1735 /*
1736 * Must be super user
1737 */
1738 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1739 0, NULL, NULL, NULL)))
1740 return (error);
1741
1742 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1743 if (error != 0)
1744 return error;
1745
1746 error = vfs_fhtovp(fh, &vp);
1747 vfs_copyinfh_free(fh);
1748 if (error != 0)
1749 return error;
1750
1751 error = vn_stat(vp, sb);
1752 vput(vp);
1753 return error;
1754 }
1755
1756
1757 /* ARGSUSED */
1758 int
1759 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval)
1760 {
1761 /* {
1762 syscallarg(const void *) fhp;
1763 syscallarg(size_t) fh_size;
1764 syscallarg(struct stat *) sb;
1765 } */
1766 struct stat sb;
1767 int error;
1768
1769 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
1770 if (error)
1771 return error;
1772 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
1773 }
1774
1775 int
1776 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
1777 int flags)
1778 {
1779 fhandle_t *fh;
1780 struct mount *mp;
1781 struct vnode *vp;
1782 int error;
1783
1784 /*
1785 * Must be super user
1786 */
1787 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1788 0, NULL, NULL, NULL)))
1789 return error;
1790
1791 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1792 if (error != 0)
1793 return error;
1794
1795 error = vfs_fhtovp(fh, &vp);
1796 vfs_copyinfh_free(fh);
1797 if (error != 0)
1798 return error;
1799
1800 mp = vp->v_mount;
1801 error = dostatvfs(mp, sb, l, flags, 1);
1802 vput(vp);
1803 return error;
1804 }
1805
1806 /* ARGSUSED */
1807 int
1808 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
1809 {
1810 /* {
1811 syscallarg(const void *) fhp;
1812 syscallarg(size_t) fh_size;
1813 syscallarg(struct statvfs *) buf;
1814 syscallarg(int) flags;
1815 } */
1816 struct statvfs *sb = STATVFSBUF_GET();
1817 int error;
1818
1819 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
1820 SCARG(uap, flags));
1821 if (error == 0)
1822 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1823 STATVFSBUF_PUT(sb);
1824 return error;
1825 }
1826
1827 /*
1828 * Create a special file.
1829 */
1830 /* ARGSUSED */
1831 int
1832 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
1833 register_t *retval)
1834 {
1835 /* {
1836 syscallarg(const char *) path;
1837 syscallarg(mode_t) mode;
1838 syscallarg(dev_t) dev;
1839 } */
1840 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode),
1841 SCARG(uap, dev), retval);
1842 }
1843
1844 int
1845 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
1846 register_t *retval)
1847 {
1848 struct proc *p = l->l_proc;
1849 struct vnode *vp;
1850 struct vattr vattr;
1851 int error, optype;
1852 struct nameidata nd;
1853 char *path;
1854 const char *cpath;
1855 enum uio_seg seg = UIO_USERSPACE;
1856
1857 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1858 0, NULL, NULL, NULL)) != 0)
1859 return (error);
1860
1861 optype = VOP_MKNOD_DESCOFFSET;
1862
1863 VERIEXEC_PATH_GET(pathname, seg, cpath, path);
1864 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath);
1865
1866 if ((error = namei(&nd)) != 0)
1867 goto out;
1868 vp = nd.ni_vp;
1869 if (vp != NULL)
1870 error = EEXIST;
1871 else {
1872 VATTR_NULL(&vattr);
1873 /* We will read cwdi->cwdi_cmask unlocked. */
1874 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1875 vattr.va_rdev = dev;
1876
1877 switch (mode & S_IFMT) {
1878 case S_IFMT: /* used by badsect to flag bad sectors */
1879 vattr.va_type = VBAD;
1880 break;
1881 case S_IFCHR:
1882 vattr.va_type = VCHR;
1883 break;
1884 case S_IFBLK:
1885 vattr.va_type = VBLK;
1886 break;
1887 case S_IFWHT:
1888 optype = VOP_WHITEOUT_DESCOFFSET;
1889 break;
1890 case S_IFREG:
1891 #if NVERIEXEC > 0
1892 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp,
1893 O_CREAT);
1894 #endif /* NVERIEXEC > 0 */
1895 vattr.va_type = VREG;
1896 vattr.va_rdev = VNOVAL;
1897 optype = VOP_CREATE_DESCOFFSET;
1898 break;
1899 default:
1900 error = EINVAL;
1901 break;
1902 }
1903 }
1904 if (!error) {
1905 switch (optype) {
1906 case VOP_WHITEOUT_DESCOFFSET:
1907 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1908 if (error)
1909 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1910 vput(nd.ni_dvp);
1911 break;
1912
1913 case VOP_MKNOD_DESCOFFSET:
1914 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1915 &nd.ni_cnd, &vattr);
1916 if (error == 0)
1917 vput(nd.ni_vp);
1918 break;
1919
1920 case VOP_CREATE_DESCOFFSET:
1921 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
1922 &nd.ni_cnd, &vattr);
1923 if (error == 0)
1924 vput(nd.ni_vp);
1925 break;
1926 }
1927 } else {
1928 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1929 if (nd.ni_dvp == vp)
1930 vrele(nd.ni_dvp);
1931 else
1932 vput(nd.ni_dvp);
1933 if (vp)
1934 vrele(vp);
1935 }
1936 out:
1937 VERIEXEC_PATH_PUT(path);
1938 return (error);
1939 }
1940
1941 /*
1942 * Create a named pipe.
1943 */
1944 /* ARGSUSED */
1945 int
1946 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
1947 {
1948 /* {
1949 syscallarg(const char *) path;
1950 syscallarg(int) mode;
1951 } */
1952 struct proc *p = l->l_proc;
1953 struct vattr vattr;
1954 int error;
1955 struct nameidata nd;
1956
1957 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
1958 SCARG(uap, path));
1959 if ((error = namei(&nd)) != 0)
1960 return (error);
1961 if (nd.ni_vp != NULL) {
1962 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1963 if (nd.ni_dvp == nd.ni_vp)
1964 vrele(nd.ni_dvp);
1965 else
1966 vput(nd.ni_dvp);
1967 vrele(nd.ni_vp);
1968 return (EEXIST);
1969 }
1970 VATTR_NULL(&vattr);
1971 vattr.va_type = VFIFO;
1972 /* We will read cwdi->cwdi_cmask unlocked. */
1973 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1974 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1975 if (error == 0)
1976 vput(nd.ni_vp);
1977 return (error);
1978 }
1979
1980 /*
1981 * Make a hard file link.
1982 */
1983 /* ARGSUSED */
1984 int
1985 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
1986 {
1987 /* {
1988 syscallarg(const char *) path;
1989 syscallarg(const char *) link;
1990 } */
1991 struct vnode *vp;
1992 struct nameidata nd;
1993 int error;
1994
1995 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
1996 SCARG(uap, path));
1997 if ((error = namei(&nd)) != 0)
1998 return (error);
1999 vp = nd.ni_vp;
2000 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
2001 SCARG(uap, link));
2002 if ((error = namei(&nd)) != 0)
2003 goto out;
2004 if (nd.ni_vp) {
2005 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2006 if (nd.ni_dvp == nd.ni_vp)
2007 vrele(nd.ni_dvp);
2008 else
2009 vput(nd.ni_dvp);
2010 vrele(nd.ni_vp);
2011 error = EEXIST;
2012 goto out;
2013 }
2014 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2015 out:
2016 vrele(vp);
2017 return (error);
2018 }
2019
2020 /*
2021 * Make a symbolic link.
2022 */
2023 /* ARGSUSED */
2024 int
2025 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2026 {
2027 /* {
2028 syscallarg(const char *) path;
2029 syscallarg(const char *) link;
2030 } */
2031 struct proc *p = l->l_proc;
2032 struct vattr vattr;
2033 char *path;
2034 int error;
2035 struct nameidata nd;
2036
2037 path = PNBUF_GET();
2038 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
2039 if (error)
2040 goto out;
2041 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
2042 SCARG(uap, link));
2043 if ((error = namei(&nd)) != 0)
2044 goto out;
2045 if (nd.ni_vp) {
2046 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2047 if (nd.ni_dvp == nd.ni_vp)
2048 vrele(nd.ni_dvp);
2049 else
2050 vput(nd.ni_dvp);
2051 vrele(nd.ni_vp);
2052 error = EEXIST;
2053 goto out;
2054 }
2055 VATTR_NULL(&vattr);
2056 vattr.va_type = VLNK;
2057 /* We will read cwdi->cwdi_cmask unlocked. */
2058 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2059 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2060 if (error == 0)
2061 vput(nd.ni_vp);
2062 out:
2063 PNBUF_PUT(path);
2064 return (error);
2065 }
2066
2067 /*
2068 * Delete a whiteout from the filesystem.
2069 */
2070 /* ARGSUSED */
2071 int
2072 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2073 {
2074 /* {
2075 syscallarg(const char *) path;
2076 } */
2077 int error;
2078 struct nameidata nd;
2079
2080 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT,
2081 UIO_USERSPACE, SCARG(uap, path));
2082 error = namei(&nd);
2083 if (error)
2084 return (error);
2085
2086 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2087 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2088 if (nd.ni_dvp == nd.ni_vp)
2089 vrele(nd.ni_dvp);
2090 else
2091 vput(nd.ni_dvp);
2092 if (nd.ni_vp)
2093 vrele(nd.ni_vp);
2094 return (EEXIST);
2095 }
2096 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2097 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2098 vput(nd.ni_dvp);
2099 return (error);
2100 }
2101
2102 /*
2103 * Delete a name from the filesystem.
2104 */
2105 /* ARGSUSED */
2106 int
2107 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2108 {
2109 /* {
2110 syscallarg(const char *) path;
2111 } */
2112
2113 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE);
2114 }
2115
2116 int
2117 do_sys_unlink(const char *arg, enum uio_seg seg)
2118 {
2119 struct vnode *vp;
2120 int error;
2121 struct nameidata nd;
2122 char *path;
2123 const char *cpath;
2124
2125 VERIEXEC_PATH_GET(arg, seg, cpath, path);
2126 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath);
2127
2128 if ((error = namei(&nd)) != 0)
2129 goto out;
2130 vp = nd.ni_vp;
2131
2132 /*
2133 * The root of a mounted filesystem cannot be deleted.
2134 */
2135 if (vp->v_vflag & VV_ROOT) {
2136 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2137 if (nd.ni_dvp == vp)
2138 vrele(nd.ni_dvp);
2139 else
2140 vput(nd.ni_dvp);
2141 vput(vp);
2142 error = EBUSY;
2143 goto out;
2144 }
2145
2146 #if NVERIEXEC > 0
2147 /* Handle remove requests for veriexec entries. */
2148 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) {
2149 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2150 if (nd.ni_dvp == vp)
2151 vrele(nd.ni_dvp);
2152 else
2153 vput(nd.ni_dvp);
2154 vput(vp);
2155 goto out;
2156 }
2157 #endif /* NVERIEXEC > 0 */
2158
2159 #ifdef FILEASSOC
2160 (void)fileassoc_file_delete(vp);
2161 #endif /* FILEASSOC */
2162 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2163 out:
2164 VERIEXEC_PATH_PUT(path);
2165 return (error);
2166 }
2167
2168 /*
2169 * Reposition read/write file offset.
2170 */
2171 int
2172 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2173 {
2174 /* {
2175 syscallarg(int) fd;
2176 syscallarg(int) pad;
2177 syscallarg(off_t) offset;
2178 syscallarg(int) whence;
2179 } */
2180 kauth_cred_t cred = l->l_cred;
2181 file_t *fp;
2182 struct vnode *vp;
2183 struct vattr vattr;
2184 off_t newoff;
2185 int error, fd;
2186
2187 fd = SCARG(uap, fd);
2188
2189 if ((fp = fd_getfile(fd)) == NULL)
2190 return (EBADF);
2191
2192 vp = fp->f_data;
2193 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2194 error = ESPIPE;
2195 goto out;
2196 }
2197
2198 switch (SCARG(uap, whence)) {
2199 case SEEK_CUR:
2200 newoff = fp->f_offset + SCARG(uap, offset);
2201 break;
2202 case SEEK_END:
2203 error = VOP_GETATTR(vp, &vattr, cred);
2204 if (error) {
2205 goto out;
2206 }
2207 newoff = SCARG(uap, offset) + vattr.va_size;
2208 break;
2209 case SEEK_SET:
2210 newoff = SCARG(uap, offset);
2211 break;
2212 default:
2213 error = EINVAL;
2214 goto out;
2215 }
2216 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2217 *(off_t *)retval = fp->f_offset = newoff;
2218 }
2219 out:
2220 fd_putfile(fd);
2221 return (error);
2222 }
2223
2224 /*
2225 * Positional read system call.
2226 */
2227 int
2228 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2229 {
2230 /* {
2231 syscallarg(int) fd;
2232 syscallarg(void *) buf;
2233 syscallarg(size_t) nbyte;
2234 syscallarg(off_t) offset;
2235 } */
2236 file_t *fp;
2237 struct vnode *vp;
2238 off_t offset;
2239 int error, fd = SCARG(uap, fd);
2240
2241 if ((fp = fd_getfile(fd)) == NULL)
2242 return (EBADF);
2243
2244 if ((fp->f_flag & FREAD) == 0) {
2245 fd_putfile(fd);
2246 return (EBADF);
2247 }
2248
2249 vp = fp->f_data;
2250 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2251 error = ESPIPE;
2252 goto out;
2253 }
2254
2255 offset = SCARG(uap, offset);
2256
2257 /*
2258 * XXX This works because no file systems actually
2259 * XXX take any action on the seek operation.
2260 */
2261 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2262 goto out;
2263
2264 /* dofileread() will unuse the descriptor for us */
2265 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2266 &offset, 0, retval));
2267
2268 out:
2269 fd_putfile(fd);
2270 return (error);
2271 }
2272
2273 /*
2274 * Positional scatter read system call.
2275 */
2276 int
2277 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2278 {
2279 /* {
2280 syscallarg(int) fd;
2281 syscallarg(const struct iovec *) iovp;
2282 syscallarg(int) iovcnt;
2283 syscallarg(off_t) offset;
2284 } */
2285 off_t offset = SCARG(uap, offset);
2286
2287 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2288 SCARG(uap, iovcnt), &offset, 0, retval);
2289 }
2290
2291 /*
2292 * Positional write system call.
2293 */
2294 int
2295 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2296 {
2297 /* {
2298 syscallarg(int) fd;
2299 syscallarg(const void *) buf;
2300 syscallarg(size_t) nbyte;
2301 syscallarg(off_t) offset;
2302 } */
2303 file_t *fp;
2304 struct vnode *vp;
2305 off_t offset;
2306 int error, fd = SCARG(uap, fd);
2307
2308 if ((fp = fd_getfile(fd)) == NULL)
2309 return (EBADF);
2310
2311 if ((fp->f_flag & FWRITE) == 0) {
2312 fd_putfile(fd);
2313 return (EBADF);
2314 }
2315
2316 vp = fp->f_data;
2317 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2318 error = ESPIPE;
2319 goto out;
2320 }
2321
2322 offset = SCARG(uap, offset);
2323
2324 /*
2325 * XXX This works because no file systems actually
2326 * XXX take any action on the seek operation.
2327 */
2328 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2329 goto out;
2330
2331 /* dofilewrite() will unuse the descriptor for us */
2332 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2333 &offset, 0, retval));
2334
2335 out:
2336 fd_putfile(fd);
2337 return (error);
2338 }
2339
2340 /*
2341 * Positional gather write system call.
2342 */
2343 int
2344 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2345 {
2346 /* {
2347 syscallarg(int) fd;
2348 syscallarg(const struct iovec *) iovp;
2349 syscallarg(int) iovcnt;
2350 syscallarg(off_t) offset;
2351 } */
2352 off_t offset = SCARG(uap, offset);
2353
2354 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2355 SCARG(uap, iovcnt), &offset, 0, retval);
2356 }
2357
2358 /*
2359 * Check access permissions.
2360 */
2361 int
2362 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2363 {
2364 /* {
2365 syscallarg(const char *) path;
2366 syscallarg(int) flags;
2367 } */
2368 kauth_cred_t cred;
2369 struct vnode *vp;
2370 int error, flags;
2371 struct nameidata nd;
2372
2373 cred = kauth_cred_dup(l->l_cred);
2374 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2375 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2376 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2377 SCARG(uap, path));
2378 /* Override default credentials */
2379 nd.ni_cnd.cn_cred = cred;
2380 if ((error = namei(&nd)) != 0)
2381 goto out;
2382 vp = nd.ni_vp;
2383
2384 /* Flags == 0 means only check for existence. */
2385 if (SCARG(uap, flags)) {
2386 flags = 0;
2387 if (SCARG(uap, flags) & R_OK)
2388 flags |= VREAD;
2389 if (SCARG(uap, flags) & W_OK)
2390 flags |= VWRITE;
2391 if (SCARG(uap, flags) & X_OK)
2392 flags |= VEXEC;
2393
2394 error = VOP_ACCESS(vp, flags, cred);
2395 if (!error && (flags & VWRITE))
2396 error = vn_writechk(vp);
2397 }
2398 vput(vp);
2399 out:
2400 kauth_cred_free(cred);
2401 return (error);
2402 }
2403
2404 /*
2405 * Common code for all sys_stat functions, including compat versions.
2406 */
2407 int
2408 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb)
2409 {
2410 int error;
2411 struct nameidata nd;
2412
2413 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT,
2414 UIO_USERSPACE, path);
2415 error = namei(&nd);
2416 if (error != 0)
2417 return error;
2418 error = vn_stat(nd.ni_vp, sb);
2419 vput(nd.ni_vp);
2420 return error;
2421 }
2422
2423 /*
2424 * Get file status; this version follows links.
2425 */
2426 /* ARGSUSED */
2427 int
2428 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval)
2429 {
2430 /* {
2431 syscallarg(const char *) path;
2432 syscallarg(struct stat *) ub;
2433 } */
2434 struct stat sb;
2435 int error;
2436
2437 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb);
2438 if (error)
2439 return error;
2440 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2441 }
2442
2443 /*
2444 * Get file status; this version does not follow links.
2445 */
2446 /* ARGSUSED */
2447 int
2448 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval)
2449 {
2450 /* {
2451 syscallarg(const char *) path;
2452 syscallarg(struct stat *) ub;
2453 } */
2454 struct stat sb;
2455 int error;
2456
2457 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb);
2458 if (error)
2459 return error;
2460 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2461 }
2462
2463 /*
2464 * Get configurable pathname variables.
2465 */
2466 /* ARGSUSED */
2467 int
2468 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
2469 {
2470 /* {
2471 syscallarg(const char *) path;
2472 syscallarg(int) name;
2473 } */
2474 int error;
2475 struct nameidata nd;
2476
2477 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2478 SCARG(uap, path));
2479 if ((error = namei(&nd)) != 0)
2480 return (error);
2481 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2482 vput(nd.ni_vp);
2483 return (error);
2484 }
2485
2486 /*
2487 * Return target name of a symbolic link.
2488 */
2489 /* ARGSUSED */
2490 int
2491 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval)
2492 {
2493 /* {
2494 syscallarg(const char *) path;
2495 syscallarg(char *) buf;
2496 syscallarg(size_t) count;
2497 } */
2498 struct vnode *vp;
2499 struct iovec aiov;
2500 struct uio auio;
2501 int error;
2502 struct nameidata nd;
2503
2504 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2505 SCARG(uap, path));
2506 if ((error = namei(&nd)) != 0)
2507 return (error);
2508 vp = nd.ni_vp;
2509 if (vp->v_type != VLNK)
2510 error = EINVAL;
2511 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2512 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
2513 aiov.iov_base = SCARG(uap, buf);
2514 aiov.iov_len = SCARG(uap, count);
2515 auio.uio_iov = &aiov;
2516 auio.uio_iovcnt = 1;
2517 auio.uio_offset = 0;
2518 auio.uio_rw = UIO_READ;
2519 KASSERT(l == curlwp);
2520 auio.uio_vmspace = l->l_proc->p_vmspace;
2521 auio.uio_resid = SCARG(uap, count);
2522 error = VOP_READLINK(vp, &auio, l->l_cred);
2523 }
2524 vput(vp);
2525 *retval = SCARG(uap, count) - auio.uio_resid;
2526 return (error);
2527 }
2528
2529 /*
2530 * Change flags of a file given a path name.
2531 */
2532 /* ARGSUSED */
2533 int
2534 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
2535 {
2536 /* {
2537 syscallarg(const char *) path;
2538 syscallarg(u_long) flags;
2539 } */
2540 struct vnode *vp;
2541 int error;
2542 struct nameidata nd;
2543
2544 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2545 SCARG(uap, path));
2546 if ((error = namei(&nd)) != 0)
2547 return (error);
2548 vp = nd.ni_vp;
2549 error = change_flags(vp, SCARG(uap, flags), l);
2550 vput(vp);
2551 return (error);
2552 }
2553
2554 /*
2555 * Change flags of a file given a file descriptor.
2556 */
2557 /* ARGSUSED */
2558 int
2559 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
2560 {
2561 /* {
2562 syscallarg(int) fd;
2563 syscallarg(u_long) flags;
2564 } */
2565 struct vnode *vp;
2566 file_t *fp;
2567 int error;
2568
2569 /* fd_getvnode() will use the descriptor for us */
2570 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2571 return (error);
2572 vp = fp->f_data;
2573 error = change_flags(vp, SCARG(uap, flags), l);
2574 VOP_UNLOCK(vp, 0);
2575 fd_putfile(SCARG(uap, fd));
2576 return (error);
2577 }
2578
2579 /*
2580 * Change flags of a file given a path name; this version does
2581 * not follow links.
2582 */
2583 int
2584 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
2585 {
2586 /* {
2587 syscallarg(const char *) path;
2588 syscallarg(u_long) flags;
2589 } */
2590 struct vnode *vp;
2591 int error;
2592 struct nameidata nd;
2593
2594 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2595 SCARG(uap, path));
2596 if ((error = namei(&nd)) != 0)
2597 return (error);
2598 vp = nd.ni_vp;
2599 error = change_flags(vp, SCARG(uap, flags), l);
2600 vput(vp);
2601 return (error);
2602 }
2603
2604 /*
2605 * Common routine to change flags of a file.
2606 */
2607 int
2608 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2609 {
2610 struct vattr vattr;
2611 int error;
2612
2613 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2614 /*
2615 * Non-superusers cannot change the flags on devices, even if they
2616 * own them.
2617 */
2618 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2619 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2620 goto out;
2621 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2622 error = EINVAL;
2623 goto out;
2624 }
2625 }
2626 VATTR_NULL(&vattr);
2627 vattr.va_flags = flags;
2628 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2629 out:
2630 return (error);
2631 }
2632
2633 /*
2634 * Change mode of a file given path name; this version follows links.
2635 */
2636 /* ARGSUSED */
2637 int
2638 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
2639 {
2640 /* {
2641 syscallarg(const char *) path;
2642 syscallarg(int) mode;
2643 } */
2644 int error;
2645 struct nameidata nd;
2646
2647 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2648 SCARG(uap, path));
2649 if ((error = namei(&nd)) != 0)
2650 return (error);
2651
2652 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2653
2654 vrele(nd.ni_vp);
2655 return (error);
2656 }
2657
2658 /*
2659 * Change mode of a file given a file descriptor.
2660 */
2661 /* ARGSUSED */
2662 int
2663 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
2664 {
2665 /* {
2666 syscallarg(int) fd;
2667 syscallarg(int) mode;
2668 } */
2669 file_t *fp;
2670 int error;
2671
2672 /* fd_getvnode() will use the descriptor for us */
2673 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2674 return (error);
2675 error = change_mode(fp->f_data, SCARG(uap, mode), l);
2676 fd_putfile(SCARG(uap, fd));
2677 return (error);
2678 }
2679
2680 /*
2681 * Change mode of a file given path name; this version does not follow links.
2682 */
2683 /* ARGSUSED */
2684 int
2685 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
2686 {
2687 /* {
2688 syscallarg(const char *) path;
2689 syscallarg(int) mode;
2690 } */
2691 int error;
2692 struct nameidata nd;
2693
2694 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2695 SCARG(uap, path));
2696 if ((error = namei(&nd)) != 0)
2697 return (error);
2698
2699 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2700
2701 vrele(nd.ni_vp);
2702 return (error);
2703 }
2704
2705 /*
2706 * Common routine to set mode given a vnode.
2707 */
2708 static int
2709 change_mode(struct vnode *vp, int mode, struct lwp *l)
2710 {
2711 struct vattr vattr;
2712 int error;
2713
2714 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2715 VATTR_NULL(&vattr);
2716 vattr.va_mode = mode & ALLPERMS;
2717 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2718 VOP_UNLOCK(vp, 0);
2719 return (error);
2720 }
2721
2722 /*
2723 * Set ownership given a path name; this version follows links.
2724 */
2725 /* ARGSUSED */
2726 int
2727 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
2728 {
2729 /* {
2730 syscallarg(const char *) path;
2731 syscallarg(uid_t) uid;
2732 syscallarg(gid_t) gid;
2733 } */
2734 int error;
2735 struct nameidata nd;
2736
2737 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2738 SCARG(uap, path));
2739 if ((error = namei(&nd)) != 0)
2740 return (error);
2741
2742 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2743
2744 vrele(nd.ni_vp);
2745 return (error);
2746 }
2747
2748 /*
2749 * Set ownership given a path name; this version follows links.
2750 * Provides POSIX semantics.
2751 */
2752 /* ARGSUSED */
2753 int
2754 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
2755 {
2756 /* {
2757 syscallarg(const char *) path;
2758 syscallarg(uid_t) uid;
2759 syscallarg(gid_t) gid;
2760 } */
2761 int error;
2762 struct nameidata nd;
2763
2764 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2765 SCARG(uap, path));
2766 if ((error = namei(&nd)) != 0)
2767 return (error);
2768
2769 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2770
2771 vrele(nd.ni_vp);
2772 return (error);
2773 }
2774
2775 /*
2776 * Set ownership given a file descriptor.
2777 */
2778 /* ARGSUSED */
2779 int
2780 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
2781 {
2782 /* {
2783 syscallarg(int) fd;
2784 syscallarg(uid_t) uid;
2785 syscallarg(gid_t) gid;
2786 } */
2787 int error;
2788 file_t *fp;
2789
2790 /* fd_getvnode() will use the descriptor for us */
2791 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2792 return (error);
2793 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2794 l, 0);
2795 fd_putfile(SCARG(uap, fd));
2796 return (error);
2797 }
2798
2799 /*
2800 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2801 */
2802 /* ARGSUSED */
2803 int
2804 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
2805 {
2806 /* {
2807 syscallarg(int) fd;
2808 syscallarg(uid_t) uid;
2809 syscallarg(gid_t) gid;
2810 } */
2811 int error;
2812 file_t *fp;
2813
2814 /* fd_getvnode() will use the descriptor for us */
2815 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2816 return (error);
2817 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2818 l, 1);
2819 fd_putfile(SCARG(uap, fd));
2820 return (error);
2821 }
2822
2823 /*
2824 * Set ownership given a path name; this version does not follow links.
2825 */
2826 /* ARGSUSED */
2827 int
2828 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
2829 {
2830 /* {
2831 syscallarg(const char *) path;
2832 syscallarg(uid_t) uid;
2833 syscallarg(gid_t) gid;
2834 } */
2835 int error;
2836 struct nameidata nd;
2837
2838 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2839 SCARG(uap, path));
2840 if ((error = namei(&nd)) != 0)
2841 return (error);
2842
2843 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2844
2845 vrele(nd.ni_vp);
2846 return (error);
2847 }
2848
2849 /*
2850 * Set ownership given a path name; this version does not follow links.
2851 * Provides POSIX/XPG semantics.
2852 */
2853 /* ARGSUSED */
2854 int
2855 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
2856 {
2857 /* {
2858 syscallarg(const char *) path;
2859 syscallarg(uid_t) uid;
2860 syscallarg(gid_t) gid;
2861 } */
2862 int error;
2863 struct nameidata nd;
2864
2865 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2866 SCARG(uap, path));
2867 if ((error = namei(&nd)) != 0)
2868 return (error);
2869
2870 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2871
2872 vrele(nd.ni_vp);
2873 return (error);
2874 }
2875
2876 /*
2877 * Common routine to set ownership given a vnode.
2878 */
2879 static int
2880 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2881 int posix_semantics)
2882 {
2883 struct vattr vattr;
2884 mode_t newmode;
2885 int error;
2886
2887 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2888 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2889 goto out;
2890
2891 #define CHANGED(x) ((int)(x) != -1)
2892 newmode = vattr.va_mode;
2893 if (posix_semantics) {
2894 /*
2895 * POSIX/XPG semantics: if the caller is not the super-user,
2896 * clear set-user-id and set-group-id bits. Both POSIX and
2897 * the XPG consider the behaviour for calls by the super-user
2898 * implementation-defined; we leave the set-user-id and set-
2899 * group-id settings intact in that case.
2900 */
2901 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2902 NULL) != 0)
2903 newmode &= ~(S_ISUID | S_ISGID);
2904 } else {
2905 /*
2906 * NetBSD semantics: when changing owner and/or group,
2907 * clear the respective bit(s).
2908 */
2909 if (CHANGED(uid))
2910 newmode &= ~S_ISUID;
2911 if (CHANGED(gid))
2912 newmode &= ~S_ISGID;
2913 }
2914 /* Update va_mode iff altered. */
2915 if (vattr.va_mode == newmode)
2916 newmode = VNOVAL;
2917
2918 VATTR_NULL(&vattr);
2919 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2920 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2921 vattr.va_mode = newmode;
2922 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2923 #undef CHANGED
2924
2925 out:
2926 VOP_UNLOCK(vp, 0);
2927 return (error);
2928 }
2929
2930 /*
2931 * Set the access and modification times given a path name; this
2932 * version follows links.
2933 */
2934 /* ARGSUSED */
2935 int
2936 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
2937 register_t *retval)
2938 {
2939 /* {
2940 syscallarg(const char *) path;
2941 syscallarg(const struct timeval *) tptr;
2942 } */
2943
2944 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
2945 SCARG(uap, tptr), UIO_USERSPACE);
2946 }
2947
2948 /*
2949 * Set the access and modification times given a file descriptor.
2950 */
2951 /* ARGSUSED */
2952 int
2953 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
2954 register_t *retval)
2955 {
2956 /* {
2957 syscallarg(int) fd;
2958 syscallarg(const struct timeval *) tptr;
2959 } */
2960 int error;
2961 file_t *fp;
2962
2963 /* fd_getvnode() will use the descriptor for us */
2964 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2965 return (error);
2966 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr),
2967 UIO_USERSPACE);
2968 fd_putfile(SCARG(uap, fd));
2969 return (error);
2970 }
2971
2972 /*
2973 * Set the access and modification times given a path name; this
2974 * version does not follow links.
2975 */
2976 int
2977 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
2978 register_t *retval)
2979 {
2980 /* {
2981 syscallarg(const char *) path;
2982 syscallarg(const struct timeval *) tptr;
2983 } */
2984
2985 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
2986 SCARG(uap, tptr), UIO_USERSPACE);
2987 }
2988
2989 /*
2990 * Common routine to set access and modification times given a vnode.
2991 */
2992 int
2993 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
2994 const struct timeval *tptr, enum uio_seg seg)
2995 {
2996 struct vattr vattr;
2997 struct nameidata nd;
2998 int error;
2999 bool vanull, setbirthtime;
3000 struct timespec ts[2];
3001
3002 if (tptr == NULL) {
3003 vanull = true;
3004 nanotime(&ts[0]);
3005 ts[1] = ts[0];
3006 } else {
3007 struct timeval tv[2];
3008
3009 vanull = false;
3010 if (seg != UIO_SYSSPACE) {
3011 error = copyin(tptr, tv, sizeof (tv));
3012 if (error != 0)
3013 return error;
3014 tptr = tv;
3015 }
3016 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3017 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3018 }
3019
3020 if (vp == NULL) {
3021 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path);
3022 if ((error = namei(&nd)) != 0)
3023 return error;
3024 vp = nd.ni_vp;
3025 } else
3026 nd.ni_vp = NULL;
3027
3028 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3029 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3030 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3031 VATTR_NULL(&vattr);
3032 vattr.va_atime = ts[0];
3033 vattr.va_mtime = ts[1];
3034 if (setbirthtime)
3035 vattr.va_birthtime = ts[1];
3036 if (vanull)
3037 vattr.va_vaflags |= VA_UTIMES_NULL;
3038 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3039 VOP_UNLOCK(vp, 0);
3040
3041 if (nd.ni_vp != NULL)
3042 vrele(nd.ni_vp);
3043
3044 return error;
3045 }
3046
3047 /*
3048 * Truncate a file given its path name.
3049 */
3050 /* ARGSUSED */
3051 int
3052 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3053 {
3054 /* {
3055 syscallarg(const char *) path;
3056 syscallarg(int) pad;
3057 syscallarg(off_t) length;
3058 } */
3059 struct vnode *vp;
3060 struct vattr vattr;
3061 int error;
3062 struct nameidata nd;
3063
3064 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3065 SCARG(uap, path));
3066 if ((error = namei(&nd)) != 0)
3067 return (error);
3068 vp = nd.ni_vp;
3069 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3070 if (vp->v_type == VDIR)
3071 error = EISDIR;
3072 else if ((error = vn_writechk(vp)) == 0 &&
3073 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3074 VATTR_NULL(&vattr);
3075 vattr.va_size = SCARG(uap, length);
3076 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3077 }
3078 vput(vp);
3079 return (error);
3080 }
3081
3082 /*
3083 * Truncate a file given a file descriptor.
3084 */
3085 /* ARGSUSED */
3086 int
3087 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3088 {
3089 /* {
3090 syscallarg(int) fd;
3091 syscallarg(int) pad;
3092 syscallarg(off_t) length;
3093 } */
3094 struct vattr vattr;
3095 struct vnode *vp;
3096 file_t *fp;
3097 int error;
3098
3099 /* fd_getvnode() will use the descriptor for us */
3100 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3101 return (error);
3102 if ((fp->f_flag & FWRITE) == 0) {
3103 error = EINVAL;
3104 goto out;
3105 }
3106 vp = fp->f_data;
3107 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3108 if (vp->v_type == VDIR)
3109 error = EISDIR;
3110 else if ((error = vn_writechk(vp)) == 0) {
3111 VATTR_NULL(&vattr);
3112 vattr.va_size = SCARG(uap, length);
3113 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3114 }
3115 VOP_UNLOCK(vp, 0);
3116 out:
3117 fd_putfile(SCARG(uap, fd));
3118 return (error);
3119 }
3120
3121 /*
3122 * Sync an open file.
3123 */
3124 /* ARGSUSED */
3125 int
3126 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3127 {
3128 /* {
3129 syscallarg(int) fd;
3130 } */
3131 struct vnode *vp;
3132 file_t *fp;
3133 int error;
3134
3135 /* fd_getvnode() will use the descriptor for us */
3136 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3137 return (error);
3138 vp = fp->f_data;
3139 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3140 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
3141 VOP_UNLOCK(vp, 0);
3142 fd_putfile(SCARG(uap, fd));
3143 return (error);
3144 }
3145
3146 /*
3147 * Sync a range of file data. API modeled after that found in AIX.
3148 *
3149 * FDATASYNC indicates that we need only save enough metadata to be able
3150 * to re-read the written data. Note we duplicate AIX's requirement that
3151 * the file be open for writing.
3152 */
3153 /* ARGSUSED */
3154 int
3155 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
3156 {
3157 /* {
3158 syscallarg(int) fd;
3159 syscallarg(int) flags;
3160 syscallarg(off_t) start;
3161 syscallarg(off_t) length;
3162 } */
3163 struct vnode *vp;
3164 file_t *fp;
3165 int flags, nflags;
3166 off_t s, e, len;
3167 int error;
3168
3169 /* fd_getvnode() will use the descriptor for us */
3170 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3171 return (error);
3172
3173 if ((fp->f_flag & FWRITE) == 0) {
3174 error = EBADF;
3175 goto out;
3176 }
3177
3178 flags = SCARG(uap, flags);
3179 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3180 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3181 error = EINVAL;
3182 goto out;
3183 }
3184 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3185 if (flags & FDATASYNC)
3186 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3187 else
3188 nflags = FSYNC_WAIT;
3189 if (flags & FDISKSYNC)
3190 nflags |= FSYNC_CACHE;
3191
3192 len = SCARG(uap, length);
3193 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3194 if (len) {
3195 s = SCARG(uap, start);
3196 e = s + len;
3197 if (e < s) {
3198 error = EINVAL;
3199 goto out;
3200 }
3201 } else {
3202 e = 0;
3203 s = 0;
3204 }
3205
3206 vp = fp->f_data;
3207 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3208 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
3209 VOP_UNLOCK(vp, 0);
3210 out:
3211 fd_putfile(SCARG(uap, fd));
3212 return (error);
3213 }
3214
3215 /*
3216 * Sync the data of an open file.
3217 */
3218 /* ARGSUSED */
3219 int
3220 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
3221 {
3222 /* {
3223 syscallarg(int) fd;
3224 } */
3225 struct vnode *vp;
3226 file_t *fp;
3227 int error;
3228
3229 /* fd_getvnode() will use the descriptor for us */
3230 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3231 return (error);
3232 if ((fp->f_flag & FWRITE) == 0) {
3233 fd_putfile(SCARG(uap, fd));
3234 return (EBADF);
3235 }
3236 vp = fp->f_data;
3237 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3238 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
3239 VOP_UNLOCK(vp, 0);
3240 fd_putfile(SCARG(uap, fd));
3241 return (error);
3242 }
3243
3244 /*
3245 * Rename files, (standard) BSD semantics frontend.
3246 */
3247 /* ARGSUSED */
3248 int
3249 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
3250 {
3251 /* {
3252 syscallarg(const char *) from;
3253 syscallarg(const char *) to;
3254 } */
3255
3256 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0));
3257 }
3258
3259 /*
3260 * Rename files, POSIX semantics frontend.
3261 */
3262 /* ARGSUSED */
3263 int
3264 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
3265 {
3266 /* {
3267 syscallarg(const char *) from;
3268 syscallarg(const char *) to;
3269 } */
3270
3271 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1));
3272 }
3273
3274 /*
3275 * Rename files. Source and destination must either both be directories,
3276 * or both not be directories. If target is a directory, it must be empty.
3277 * If `from' and `to' refer to the same object, the value of the `retain'
3278 * argument is used to determine whether `from' will be
3279 *
3280 * (retain == 0) deleted unless `from' and `to' refer to the same
3281 * object in the file system's name space (BSD).
3282 * (retain == 1) always retained (POSIX).
3283 */
3284 int
3285 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
3286 {
3287 struct vnode *tvp, *fvp, *tdvp;
3288 struct nameidata fromnd, tond;
3289 struct mount *fs;
3290 struct lwp *l = curlwp;
3291 struct proc *p;
3292 uint32_t saveflag;
3293 int error;
3294
3295 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT,
3296 seg, from);
3297 if ((error = namei(&fromnd)) != 0)
3298 return (error);
3299 if (fromnd.ni_dvp != fromnd.ni_vp)
3300 VOP_UNLOCK(fromnd.ni_dvp, 0);
3301 fvp = fromnd.ni_vp;
3302
3303 fs = fvp->v_mount;
3304 error = VFS_RENAMELOCK_ENTER(fs);
3305 if (error) {
3306 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3307 vrele(fromnd.ni_dvp);
3308 vrele(fvp);
3309 goto out1;
3310 }
3311
3312 /*
3313 * close, partially, yet another race - ideally we should only
3314 * go as far as getting fromnd.ni_dvp before getting the per-fs
3315 * lock, and then continue to get fromnd.ni_vp, but we can't do
3316 * that with namei as it stands.
3317 *
3318 * This still won't prevent rmdir from nuking fromnd.ni_vp
3319 * under us. The real fix is to get the locks in the right
3320 * order and do the lookups in the right places, but that's a
3321 * major rototill.
3322 *
3323 * Preserve the SAVESTART in cn_flags, because who knows what
3324 * might happen if we don't.
3325 *
3326 * Note: this logic (as well as this whole function) is cloned
3327 * in nfs_serv.c. Proceed accordingly.
3328 */
3329 vrele(fvp);
3330 if ((fromnd.ni_cnd.cn_namelen == 1 &&
3331 fromnd.ni_cnd.cn_nameptr[0] == '.') ||
3332 (fromnd.ni_cnd.cn_namelen == 2 &&
3333 fromnd.ni_cnd.cn_nameptr[0] == '.' &&
3334 fromnd.ni_cnd.cn_nameptr[1] == '.')) {
3335 error = EINVAL;
3336 VFS_RENAMELOCK_EXIT(fs);
3337 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3338 vrele(fromnd.ni_dvp);
3339 goto out1;
3340 }
3341 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART;
3342 fromnd.ni_cnd.cn_flags &= ~SAVESTART;
3343 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY);
3344 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd);
3345 fromnd.ni_cnd.cn_flags |= saveflag;
3346 if (error) {
3347 VOP_UNLOCK(fromnd.ni_dvp, 0);
3348 VFS_RENAMELOCK_EXIT(fs);
3349 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3350 vrele(fromnd.ni_dvp);
3351 goto out1;
3352 }
3353 VOP_UNLOCK(fromnd.ni_vp, 0);
3354 if (fromnd.ni_dvp != fromnd.ni_vp)
3355 VOP_UNLOCK(fromnd.ni_dvp, 0);
3356 fvp = fromnd.ni_vp;
3357
3358 NDINIT(&tond, RENAME,
3359 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT
3360 | (fvp->v_type == VDIR ? CREATEDIR : 0),
3361 seg, to);
3362 if ((error = namei(&tond)) != 0) {
3363 VFS_RENAMELOCK_EXIT(fs);
3364 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3365 vrele(fromnd.ni_dvp);
3366 vrele(fvp);
3367 goto out1;
3368 }
3369 tdvp = tond.ni_dvp;
3370 tvp = tond.ni_vp;
3371
3372 if (tvp != NULL) {
3373 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3374 error = ENOTDIR;
3375 goto out;
3376 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3377 error = EISDIR;
3378 goto out;
3379 }
3380 }
3381
3382 if (fvp == tdvp)
3383 error = EINVAL;
3384
3385 /*
3386 * Source and destination refer to the same object.
3387 */
3388 if (fvp == tvp) {
3389 if (retain)
3390 error = -1;
3391 else if (fromnd.ni_dvp == tdvp &&
3392 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3393 !memcmp(fromnd.ni_cnd.cn_nameptr,
3394 tond.ni_cnd.cn_nameptr,
3395 fromnd.ni_cnd.cn_namelen))
3396 error = -1;
3397 }
3398
3399 #if NVERIEXEC > 0
3400 if (!error) {
3401 char *f1, *f2;
3402 size_t f1_len;
3403 size_t f2_len;
3404
3405 f1_len = fromnd.ni_cnd.cn_namelen + 1;
3406 f1 = kmem_alloc(f1_len, KM_SLEEP);
3407 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, f1_len);
3408
3409 f2_len = tond.ni_cnd.cn_namelen + 1;
3410 f2 = kmem_alloc(f2_len, KM_SLEEP);
3411 strlcpy(f2, tond.ni_cnd.cn_nameptr, f2_len);
3412
3413 error = veriexec_renamechk(l, fvp, f1, tvp, f2);
3414
3415 kmem_free(f1, f1_len);
3416 kmem_free(f2, f2_len);
3417 }
3418 #endif /* NVERIEXEC > 0 */
3419
3420 out:
3421 p = l->l_proc;
3422 if (!error) {
3423 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3424 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3425 VFS_RENAMELOCK_EXIT(fs);
3426 } else {
3427 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3428 if (tdvp == tvp)
3429 vrele(tdvp);
3430 else
3431 vput(tdvp);
3432 if (tvp)
3433 vput(tvp);
3434 VFS_RENAMELOCK_EXIT(fs);
3435 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3436 vrele(fromnd.ni_dvp);
3437 vrele(fvp);
3438 }
3439 vrele(tond.ni_startdir);
3440 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3441 out1:
3442 if (fromnd.ni_startdir)
3443 vrele(fromnd.ni_startdir);
3444 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3445 return (error == -1 ? 0 : error);
3446 }
3447
3448 /*
3449 * Make a directory file.
3450 */
3451 /* ARGSUSED */
3452 int
3453 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
3454 {
3455 /* {
3456 syscallarg(const char *) path;
3457 syscallarg(int) mode;
3458 } */
3459 struct proc *p = l->l_proc;
3460 struct vnode *vp;
3461 struct vattr vattr;
3462 int error;
3463 struct nameidata nd;
3464
3465 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE,
3466 SCARG(uap, path));
3467 if ((error = namei(&nd)) != 0)
3468 return (error);
3469 vp = nd.ni_vp;
3470 if (vp != NULL) {
3471 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3472 if (nd.ni_dvp == vp)
3473 vrele(nd.ni_dvp);
3474 else
3475 vput(nd.ni_dvp);
3476 vrele(vp);
3477 return (EEXIST);
3478 }
3479 VATTR_NULL(&vattr);
3480 vattr.va_type = VDIR;
3481 /* We will read cwdi->cwdi_cmask unlocked. */
3482 vattr.va_mode =
3483 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3484 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3485 if (!error)
3486 vput(nd.ni_vp);
3487 return (error);
3488 }
3489
3490 /*
3491 * Remove a directory file.
3492 */
3493 /* ARGSUSED */
3494 int
3495 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
3496 {
3497 /* {
3498 syscallarg(const char *) path;
3499 } */
3500 struct vnode *vp;
3501 int error;
3502 struct nameidata nd;
3503
3504 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
3505 SCARG(uap, path));
3506 if ((error = namei(&nd)) != 0)
3507 return (error);
3508 vp = nd.ni_vp;
3509 if (vp->v_type != VDIR) {
3510 error = ENOTDIR;
3511 goto out;
3512 }
3513 /*
3514 * No rmdir "." please.
3515 */
3516 if (nd.ni_dvp == vp) {
3517 error = EINVAL;
3518 goto out;
3519 }
3520 /*
3521 * The root of a mounted filesystem cannot be deleted.
3522 */
3523 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) {
3524 error = EBUSY;
3525 goto out;
3526 }
3527 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3528 return (error);
3529
3530 out:
3531 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3532 if (nd.ni_dvp == vp)
3533 vrele(nd.ni_dvp);
3534 else
3535 vput(nd.ni_dvp);
3536 vput(vp);
3537 return (error);
3538 }
3539
3540 /*
3541 * Read a block of directory entries in a file system independent format.
3542 */
3543 int
3544 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
3545 {
3546 /* {
3547 syscallarg(int) fd;
3548 syscallarg(char *) buf;
3549 syscallarg(size_t) count;
3550 } */
3551 file_t *fp;
3552 int error, done;
3553
3554 /* fd_getvnode() will use the descriptor for us */
3555 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3556 return (error);
3557 if ((fp->f_flag & FREAD) == 0) {
3558 error = EBADF;
3559 goto out;
3560 }
3561 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3562 SCARG(uap, count), &done, l, 0, 0);
3563 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
3564 *retval = done;
3565 out:
3566 fd_putfile(SCARG(uap, fd));
3567 return (error);
3568 }
3569
3570 /*
3571 * Set the mode mask for creation of filesystem nodes.
3572 */
3573 int
3574 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
3575 {
3576 /* {
3577 syscallarg(mode_t) newmask;
3578 } */
3579 struct proc *p = l->l_proc;
3580 struct cwdinfo *cwdi;
3581
3582 /*
3583 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
3584 * important is that we serialize changes to the mask. The
3585 * rw_exit() will issue a write memory barrier on our behalf,
3586 * and force the changes out to other CPUs (as it must use an
3587 * atomic operation, draining the local CPU's store buffers).
3588 */
3589 cwdi = p->p_cwdi;
3590 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
3591 *retval = cwdi->cwdi_cmask;
3592 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3593 rw_exit(&cwdi->cwdi_lock);
3594
3595 return (0);
3596 }
3597
3598 int
3599 dorevoke(struct vnode *vp, kauth_cred_t cred)
3600 {
3601 struct vattr vattr;
3602 int error;
3603
3604 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0)
3605 return error;
3606 if (kauth_cred_geteuid(cred) == vattr.va_uid ||
3607 (error = kauth_authorize_generic(cred,
3608 KAUTH_GENERIC_ISSUSER, NULL)) == 0)
3609 VOP_REVOKE(vp, REVOKEALL);
3610 return (error);
3611 }
3612
3613 /*
3614 * Void all references to file by ripping underlying filesystem
3615 * away from vnode.
3616 */
3617 /* ARGSUSED */
3618 int
3619 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
3620 {
3621 /* {
3622 syscallarg(const char *) path;
3623 } */
3624 struct vnode *vp;
3625 int error;
3626 struct nameidata nd;
3627
3628 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3629 SCARG(uap, path));
3630 if ((error = namei(&nd)) != 0)
3631 return (error);
3632 vp = nd.ni_vp;
3633 error = dorevoke(vp, l->l_cred);
3634 vrele(vp);
3635 return (error);
3636 }
3637