vfs_syscalls.c revision 1.383 1 /* $NetBSD: vfs_syscalls.c,v 1.383 2009/01/11 02:45:53 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
63 */
64
65 #include <sys/cdefs.h>
66 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.383 2009/01/11 02:45:53 christos Exp $");
67
68 #ifdef _KERNEL_OPT
69 #include "opt_fileassoc.h"
70 #include "veriexec.h"
71 #endif
72
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/namei.h>
76 #include <sys/filedesc.h>
77 #include <sys/kernel.h>
78 #include <sys/file.h>
79 #include <sys/stat.h>
80 #include <sys/vnode.h>
81 #include <sys/mount.h>
82 #include <sys/proc.h>
83 #include <sys/uio.h>
84 #include <sys/malloc.h>
85 #include <sys/kmem.h>
86 #include <sys/dirent.h>
87 #include <sys/sysctl.h>
88 #include <sys/syscallargs.h>
89 #include <sys/vfs_syscalls.h>
90 #include <sys/ktrace.h>
91 #ifdef FILEASSOC
92 #include <sys/fileassoc.h>
93 #endif /* FILEASSOC */
94 #include <sys/verified_exec.h>
95 #include <sys/kauth.h>
96 #include <sys/atomic.h>
97 #include <sys/module.h>
98 #include <sys/buf.h>
99
100 #include <miscfs/genfs/genfs.h>
101 #include <miscfs/syncfs/syncfs.h>
102 #include <miscfs/specfs/specdev.h>
103
104 #include <nfs/rpcv2.h>
105 #include <nfs/nfsproto.h>
106 #include <nfs/nfs.h>
107 #include <nfs/nfs_var.h>
108
109 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
110
111 static int change_dir(struct nameidata *, struct lwp *);
112 static int change_flags(struct vnode *, u_long, struct lwp *);
113 static int change_mode(struct vnode *, int, struct lwp *l);
114 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
115
116 void checkdirs(struct vnode *);
117
118 int dovfsusermount = 0;
119
120 /*
121 * Virtual File System System Calls
122 */
123
124 /*
125 * Mount a file system.
126 */
127
128 /*
129 * This table is used to maintain compatibility with 4.3BSD
130 * and NetBSD 0.9 mount syscalls - and possibly other systems.
131 * Note, the order is important!
132 *
133 * Do not modify this table. It should only contain filesystems
134 * supported by NetBSD 0.9 and 4.3BSD.
135 */
136 const char * const mountcompatnames[] = {
137 NULL, /* 0 = MOUNT_NONE */
138 MOUNT_FFS, /* 1 = MOUNT_UFS */
139 MOUNT_NFS, /* 2 */
140 MOUNT_MFS, /* 3 */
141 MOUNT_MSDOS, /* 4 */
142 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
143 MOUNT_FDESC, /* 6 */
144 MOUNT_KERNFS, /* 7 */
145 NULL, /* 8 = MOUNT_DEVFS */
146 MOUNT_AFS, /* 9 */
147 };
148 const int nmountcompatnames = sizeof(mountcompatnames) /
149 sizeof(mountcompatnames[0]);
150
151 static int
152 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
153 void *data, size_t *data_len)
154 {
155 struct mount *mp;
156 int error = 0, saved_flags;
157
158 mp = vp->v_mount;
159 saved_flags = mp->mnt_flag;
160
161 /* We can operate only on VV_ROOT nodes. */
162 if ((vp->v_vflag & VV_ROOT) == 0) {
163 error = EINVAL;
164 goto out;
165 }
166
167 /*
168 * We only allow the filesystem to be reloaded if it
169 * is currently mounted read-only. Additionally, we
170 * prevent read-write to read-only downgrades.
171 */
172 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
173 (mp->mnt_flag & MNT_RDONLY) == 0) {
174 error = EOPNOTSUPP; /* Needs translation */
175 goto out;
176 }
177
178 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
179 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
180 if (error)
181 goto out;
182
183 if (vfs_busy(mp, NULL)) {
184 error = EPERM;
185 goto out;
186 }
187
188 mutex_enter(&mp->mnt_updating);
189
190 mp->mnt_flag &= ~MNT_OP_FLAGS;
191 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
192
193 /*
194 * Set the mount level flags.
195 */
196 if (flags & MNT_RDONLY)
197 mp->mnt_flag |= MNT_RDONLY;
198 else if (mp->mnt_flag & MNT_RDONLY)
199 mp->mnt_iflag |= IMNT_WANTRDWR;
200 mp->mnt_flag &=
201 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
202 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
203 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
204 MNT_LOG);
205 mp->mnt_flag |= flags &
206 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
207 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
208 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
209 MNT_LOG | MNT_IGNORE);
210
211 error = VFS_MOUNT(mp, path, data, data_len);
212
213 if (error && data != NULL) {
214 int error2;
215
216 /*
217 * Update failed; let's try and see if it was an
218 * export request. For compat with 3.0 and earlier.
219 */
220 error2 = vfs_hooks_reexport(mp, path, data);
221
222 /*
223 * Only update error code if the export request was
224 * understood but some problem occurred while
225 * processing it.
226 */
227 if (error2 != EJUSTRETURN)
228 error = error2;
229 }
230
231 if (mp->mnt_iflag & IMNT_WANTRDWR)
232 mp->mnt_flag &= ~MNT_RDONLY;
233 if (error)
234 mp->mnt_flag = saved_flags;
235 mp->mnt_flag &= ~MNT_OP_FLAGS;
236 mp->mnt_iflag &= ~IMNT_WANTRDWR;
237 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
238 if (mp->mnt_syncer == NULL)
239 error = vfs_allocate_syncvnode(mp);
240 } else {
241 if (mp->mnt_syncer != NULL)
242 vfs_deallocate_syncvnode(mp);
243 }
244 mutex_exit(&mp->mnt_updating);
245 vfs_unbusy(mp, false, NULL);
246
247 out:
248 return (error);
249 }
250
251 static int
252 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
253 {
254 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
255 int error;
256
257 /* Copy file-system type from userspace. */
258 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
259 if (error) {
260 /*
261 * Historically, filesystem types were identified by numbers.
262 * If we get an integer for the filesystem type instead of a
263 * string, we check to see if it matches one of the historic
264 * filesystem types.
265 */
266 u_long fsindex = (u_long)fstype;
267 if (fsindex >= nmountcompatnames ||
268 mountcompatnames[fsindex] == NULL)
269 return ENODEV;
270 strlcpy(fstypename, mountcompatnames[fsindex],
271 sizeof(fstypename));
272 }
273
274 /* Accept `ufs' as an alias for `ffs', for compatibility. */
275 if (strcmp(fstypename, "ufs") == 0)
276 fstypename[0] = 'f';
277
278 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
279 return 0;
280
281 /* If we can autoload a vfs module, try again */
282 mutex_enter(&module_lock);
283 (void)module_autoload(fstype, MODULE_CLASS_VFS);
284 mutex_exit(&module_lock);
285
286 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
287 return 0;
288
289 return ENODEV;
290 }
291
292 static int
293 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops,
294 const char *path, int flags, void *data, size_t *data_len, u_int recurse)
295 {
296 struct mount *mp;
297 struct vnode *vp = *vpp;
298 struct vattr va;
299 int error;
300
301 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
302 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
303 if (error)
304 return error;
305
306 /* Can't make a non-dir a mount-point (from here anyway). */
307 if (vp->v_type != VDIR)
308 return ENOTDIR;
309
310 /*
311 * If the user is not root, ensure that they own the directory
312 * onto which we are attempting to mount.
313 */
314 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 ||
315 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
316 (error = kauth_authorize_generic(l->l_cred,
317 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) {
318 return error;
319 }
320
321 if (flags & MNT_EXPORTED)
322 return EINVAL;
323
324 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0)
325 return error;
326
327 /*
328 * Check if a file-system is not already mounted on this vnode.
329 */
330 if (vp->v_mountedhere != NULL)
331 return EBUSY;
332
333 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
334 if (mp == NULL)
335 return ENOMEM;
336
337 mp->mnt_op = vfsops;
338 mp->mnt_refcnt = 1;
339
340 TAILQ_INIT(&mp->mnt_vnodelist);
341 rw_init(&mp->mnt_unmounting);
342 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
343 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
344 error = vfs_busy(mp, NULL);
345 KASSERT(error == 0);
346 mutex_enter(&mp->mnt_updating);
347
348 mp->mnt_vnodecovered = vp;
349 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
350 mount_initspecific(mp);
351
352 /*
353 * The underlying file system may refuse the mount for
354 * various reasons. Allow the user to force it to happen.
355 *
356 * Set the mount level flags.
357 */
358 mp->mnt_flag = flags &
359 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
360 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
361 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
362 MNT_LOG | MNT_IGNORE | MNT_RDONLY);
363
364 error = VFS_MOUNT(mp, path, data, data_len);
365 mp->mnt_flag &= ~MNT_OP_FLAGS;
366
367 /*
368 * Put the new filesystem on the mount list after root.
369 */
370 cache_purge(vp);
371 if (error != 0) {
372 vp->v_mountedhere = NULL;
373 mutex_exit(&mp->mnt_updating);
374 vfs_unbusy(mp, false, NULL);
375 vfs_destroy(mp);
376 return error;
377 }
378
379 mp->mnt_iflag &= ~IMNT_WANTRDWR;
380 mutex_enter(&mountlist_lock);
381 vp->v_mountedhere = mp;
382 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
383 mutex_exit(&mountlist_lock);
384 vn_restorerecurse(vp, recurse);
385 VOP_UNLOCK(vp, 0);
386 checkdirs(vp);
387 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
388 error = vfs_allocate_syncvnode(mp);
389 /* Hold an additional reference to the mount across VFS_START(). */
390 mutex_exit(&mp->mnt_updating);
391 vfs_unbusy(mp, true, NULL);
392 (void) VFS_STATVFS(mp, &mp->mnt_stat);
393 error = VFS_START(mp, 0);
394 if (error)
395 vrele(vp);
396 /* Drop reference held for VFS_START(). */
397 vfs_destroy(mp);
398 *vpp = NULL;
399 return error;
400 }
401
402 static int
403 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
404 void *data, size_t *data_len)
405 {
406 struct mount *mp;
407 int error;
408
409 /* If MNT_GETARGS is specified, it should be the only flag. */
410 if (flags & ~MNT_GETARGS)
411 return EINVAL;
412
413 mp = vp->v_mount;
414
415 /* XXX: probably some notion of "can see" here if we want isolation. */
416 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
417 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
418 if (error)
419 return error;
420
421 if ((vp->v_vflag & VV_ROOT) == 0)
422 return EINVAL;
423
424 if (vfs_busy(mp, NULL))
425 return EPERM;
426
427 mutex_enter(&mp->mnt_updating);
428 mp->mnt_flag &= ~MNT_OP_FLAGS;
429 mp->mnt_flag |= MNT_GETARGS;
430 error = VFS_MOUNT(mp, path, data, data_len);
431 mp->mnt_flag &= ~MNT_OP_FLAGS;
432 mutex_exit(&mp->mnt_updating);
433
434 vfs_unbusy(mp, false, NULL);
435 return (error);
436 }
437
438 int
439 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
440 {
441 /* {
442 syscallarg(const char *) type;
443 syscallarg(const char *) path;
444 syscallarg(int) flags;
445 syscallarg(void *) data;
446 syscallarg(size_t) data_len;
447 } */
448
449 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
450 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
451 SCARG(uap, data_len), retval);
452 }
453
454 int
455 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
456 const char *path, int flags, void *data, enum uio_seg data_seg,
457 size_t data_len, register_t *retval)
458 {
459 struct vnode *vp;
460 struct nameidata nd;
461 void *data_buf = data;
462 u_int recurse;
463 int error;
464
465 /*
466 * Get vnode to be covered
467 */
468 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
469 if ((error = namei(&nd)) != 0)
470 return (error);
471 vp = nd.ni_vp;
472
473 /*
474 * A lookup in VFS_MOUNT might result in an attempt to
475 * lock this vnode again, so make the lock recursive.
476 */
477 if (vfsops == NULL) {
478 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
479 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
480 recurse = vn_setrecurse(vp);
481 vfsops = vp->v_mount->mnt_op;
482 } else {
483 /* 'type' is userspace */
484 error = mount_get_vfsops(type, &vfsops);
485 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
486 recurse = vn_setrecurse(vp);
487 if (error != 0)
488 goto done;
489 }
490 } else {
491 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
492 recurse = vn_setrecurse(vp);
493 }
494
495 if (data != NULL && data_seg == UIO_USERSPACE) {
496 if (data_len == 0) {
497 /* No length supplied, use default for filesystem */
498 data_len = vfsops->vfs_min_mount_data;
499 if (data_len > VFS_MAX_MOUNT_DATA) {
500 error = EINVAL;
501 goto done;
502 }
503 /*
504 * Hopefully a longer buffer won't make copyin() fail.
505 * For compatibility with 3.0 and earlier.
506 */
507 if (flags & MNT_UPDATE
508 && data_len < sizeof (struct mnt_export_args30))
509 data_len = sizeof (struct mnt_export_args30);
510 }
511 data_buf = malloc(data_len, M_TEMP, M_WAITOK);
512
513 /* NFS needs the buffer even for mnt_getargs .... */
514 error = copyin(data, data_buf, data_len);
515 if (error != 0)
516 goto done;
517 }
518
519 if (flags & MNT_GETARGS) {
520 if (data_len == 0) {
521 error = EINVAL;
522 goto done;
523 }
524 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
525 if (error != 0)
526 goto done;
527 if (data_seg == UIO_USERSPACE)
528 error = copyout(data_buf, data, data_len);
529 *retval = data_len;
530 } else if (flags & MNT_UPDATE) {
531 error = mount_update(l, vp, path, flags, data_buf, &data_len);
532 } else {
533 /* Locking is handled internally in mount_domount(). */
534 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
535 &data_len, recurse);
536 }
537
538 done:
539 if (vp != NULL) {
540 vn_restorerecurse(vp, recurse);
541 vput(vp);
542 }
543 if (data_buf != data)
544 free(data_buf, M_TEMP);
545 return (error);
546 }
547
548 /*
549 * Scan all active processes to see if any of them have a current
550 * or root directory onto which the new filesystem has just been
551 * mounted. If so, replace them with the new mount point.
552 */
553 void
554 checkdirs(struct vnode *olddp)
555 {
556 struct cwdinfo *cwdi;
557 struct vnode *newdp, *rele1, *rele2;
558 struct proc *p;
559 bool retry;
560
561 if (olddp->v_usecount == 1)
562 return;
563 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
564 panic("mount: lost mount");
565
566 do {
567 retry = false;
568 mutex_enter(proc_lock);
569 PROCLIST_FOREACH(p, &allproc) {
570 if ((p->p_flag & PK_MARKER) != 0)
571 continue;
572 if ((cwdi = p->p_cwdi) == NULL)
573 continue;
574 /*
575 * Can't change to the old directory any more,
576 * so even if we see a stale value it's not a
577 * problem.
578 */
579 if (cwdi->cwdi_cdir != olddp &&
580 cwdi->cwdi_rdir != olddp)
581 continue;
582 retry = true;
583 rele1 = NULL;
584 rele2 = NULL;
585 atomic_inc_uint(&cwdi->cwdi_refcnt);
586 mutex_exit(proc_lock);
587 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
588 if (cwdi->cwdi_cdir == olddp) {
589 rele1 = cwdi->cwdi_cdir;
590 VREF(newdp);
591 cwdi->cwdi_cdir = newdp;
592 }
593 if (cwdi->cwdi_rdir == olddp) {
594 rele2 = cwdi->cwdi_rdir;
595 VREF(newdp);
596 cwdi->cwdi_rdir = newdp;
597 }
598 rw_exit(&cwdi->cwdi_lock);
599 cwdfree(cwdi);
600 if (rele1 != NULL)
601 vrele(rele1);
602 if (rele2 != NULL)
603 vrele(rele2);
604 mutex_enter(proc_lock);
605 break;
606 }
607 mutex_exit(proc_lock);
608 } while (retry);
609
610 if (rootvnode == olddp) {
611 vrele(rootvnode);
612 VREF(newdp);
613 rootvnode = newdp;
614 }
615 vput(newdp);
616 }
617
618 /*
619 * Unmount a file system.
620 *
621 * Note: unmount takes a path to the vnode mounted on as argument,
622 * not special file (as before).
623 */
624 /* ARGSUSED */
625 int
626 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
627 {
628 /* {
629 syscallarg(const char *) path;
630 syscallarg(int) flags;
631 } */
632 struct vnode *vp;
633 struct mount *mp;
634 int error;
635 struct nameidata nd;
636
637 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
638 SCARG(uap, path));
639 if ((error = namei(&nd)) != 0)
640 return (error);
641 vp = nd.ni_vp;
642 mp = vp->v_mount;
643 atomic_inc_uint(&mp->mnt_refcnt);
644 VOP_UNLOCK(vp, 0);
645
646 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
647 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
648 if (error) {
649 vrele(vp);
650 vfs_destroy(mp);
651 return (error);
652 }
653
654 /*
655 * Don't allow unmounting the root file system.
656 */
657 if (mp->mnt_flag & MNT_ROOTFS) {
658 vrele(vp);
659 vfs_destroy(mp);
660 return (EINVAL);
661 }
662
663 /*
664 * Must be the root of the filesystem
665 */
666 if ((vp->v_vflag & VV_ROOT) == 0) {
667 vrele(vp);
668 vfs_destroy(mp);
669 return (EINVAL);
670 }
671
672 vrele(vp);
673 error = dounmount(mp, SCARG(uap, flags), l);
674 vfs_destroy(mp);
675 return error;
676 }
677
678 /*
679 * Do the actual file system unmount. File system is assumed to have
680 * been locked by the caller.
681 *
682 * => Caller hold reference to the mount, explicitly for dounmount().
683 */
684 int
685 dounmount(struct mount *mp, int flags, struct lwp *l)
686 {
687 struct vnode *coveredvp;
688 int error;
689 int async;
690 int used_syncer;
691
692 #if NVERIEXEC > 0
693 error = veriexec_unmountchk(mp);
694 if (error)
695 return (error);
696 #endif /* NVERIEXEC > 0 */
697
698 /*
699 * XXX Freeze syncer. Must do this before locking the
700 * mount point. See dounmount() for details.
701 */
702 mutex_enter(&syncer_mutex);
703 rw_enter(&mp->mnt_unmounting, RW_WRITER);
704 if ((mp->mnt_iflag & IMNT_GONE) != 0) {
705 rw_exit(&mp->mnt_unmounting);
706 mutex_exit(&syncer_mutex);
707 return ENOENT;
708 }
709
710 used_syncer = (mp->mnt_syncer != NULL);
711
712 /*
713 * XXX Syncer must be frozen when we get here. This should really
714 * be done on a per-mountpoint basis, but especially the softdep
715 * code possibly called from the syncer doesn't exactly work on a
716 * per-mountpoint basis, so the softdep code would become a maze
717 * of vfs_busy() calls.
718 *
719 * The caller of dounmount() must acquire syncer_mutex because
720 * the syncer itself acquires locks in syncer_mutex -> vfs_busy
721 * order, and we must preserve that order to avoid deadlock.
722 *
723 * So, if the file system did not use the syncer, now is
724 * the time to release the syncer_mutex.
725 */
726 if (used_syncer == 0)
727 mutex_exit(&syncer_mutex);
728
729 mp->mnt_iflag |= IMNT_UNMOUNT;
730 async = mp->mnt_flag & MNT_ASYNC;
731 mp->mnt_flag &= ~MNT_ASYNC;
732 cache_purgevfs(mp); /* remove cache entries for this file sys */
733 if (mp->mnt_syncer != NULL)
734 vfs_deallocate_syncvnode(mp);
735 error = 0;
736 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
737 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
738 }
739 vfs_scrubvnlist(mp);
740 if (error == 0 || (flags & MNT_FORCE))
741 error = VFS_UNMOUNT(mp, flags);
742 if (error) {
743 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
744 (void) vfs_allocate_syncvnode(mp);
745 mp->mnt_iflag &= ~IMNT_UNMOUNT;
746 mp->mnt_flag |= async;
747 rw_exit(&mp->mnt_unmounting);
748 if (used_syncer)
749 mutex_exit(&syncer_mutex);
750 return (error);
751 }
752 vfs_scrubvnlist(mp);
753 mutex_enter(&mountlist_lock);
754 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
755 coveredvp->v_mountedhere = NULL;
756 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
757 mp->mnt_iflag |= IMNT_GONE;
758 mutex_exit(&mountlist_lock);
759 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
760 panic("unmount: dangling vnode");
761 if (used_syncer)
762 mutex_exit(&syncer_mutex);
763 vfs_hooks_unmount(mp);
764 rw_exit(&mp->mnt_unmounting);
765 vfs_destroy(mp); /* reference from mount() */
766 if (coveredvp != NULLVP)
767 vrele(coveredvp);
768 return (0);
769 }
770
771 /*
772 * Sync each mounted filesystem.
773 */
774 #ifdef DEBUG
775 int syncprt = 0;
776 struct ctldebug debug0 = { "syncprt", &syncprt };
777 #endif
778
779 /* ARGSUSED */
780 int
781 sys_sync(struct lwp *l, const void *v, register_t *retval)
782 {
783 struct mount *mp, *nmp;
784 int asyncflag;
785
786 if (l == NULL)
787 l = &lwp0;
788
789 mutex_enter(&mountlist_lock);
790 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
791 mp = nmp) {
792 if (vfs_busy(mp, &nmp)) {
793 continue;
794 }
795 mutex_enter(&mp->mnt_updating);
796 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
797 asyncflag = mp->mnt_flag & MNT_ASYNC;
798 mp->mnt_flag &= ~MNT_ASYNC;
799 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
800 if (asyncflag)
801 mp->mnt_flag |= MNT_ASYNC;
802 }
803 mutex_exit(&mp->mnt_updating);
804 vfs_unbusy(mp, false, &nmp);
805 }
806 mutex_exit(&mountlist_lock);
807 #ifdef DEBUG
808 if (syncprt)
809 vfs_bufstats();
810 #endif /* DEBUG */
811 return (0);
812 }
813
814 /*
815 * Change filesystem quotas.
816 */
817 /* ARGSUSED */
818 int
819 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval)
820 {
821 /* {
822 syscallarg(const char *) path;
823 syscallarg(int) cmd;
824 syscallarg(int) uid;
825 syscallarg(void *) arg;
826 } */
827 struct mount *mp;
828 int error;
829 struct nameidata nd;
830
831 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
832 SCARG(uap, path));
833 if ((error = namei(&nd)) != 0)
834 return (error);
835 mp = nd.ni_vp->v_mount;
836 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
837 SCARG(uap, arg));
838 vrele(nd.ni_vp);
839 return (error);
840 }
841
842 int
843 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
844 int root)
845 {
846 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
847 int error = 0;
848
849 /*
850 * If MNT_NOWAIT or MNT_LAZY is specified, do not
851 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
852 * overrides MNT_NOWAIT.
853 */
854 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
855 (flags != MNT_WAIT && flags != 0)) {
856 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
857 goto done;
858 }
859
860 /* Get the filesystem stats now */
861 memset(sp, 0, sizeof(*sp));
862 if ((error = VFS_STATVFS(mp, sp)) != 0) {
863 return error;
864 }
865
866 if (cwdi->cwdi_rdir == NULL)
867 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
868 done:
869 if (cwdi->cwdi_rdir != NULL) {
870 size_t len;
871 char *bp;
872 char c;
873 char *path = PNBUF_GET();
874
875 bp = path + MAXPATHLEN;
876 *--bp = '\0';
877 rw_enter(&cwdi->cwdi_lock, RW_READER);
878 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
879 MAXPATHLEN / 2, 0, l);
880 rw_exit(&cwdi->cwdi_lock);
881 if (error) {
882 PNBUF_PUT(path);
883 return error;
884 }
885 len = strlen(bp);
886 /*
887 * for mount points that are below our root, we can see
888 * them, so we fix up the pathname and return them. The
889 * rest we cannot see, so we don't allow viewing the
890 * data.
891 */
892 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
893 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
894 (void)strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
895 sizeof(sp->f_mntonname));
896 if (sp->f_mntonname[0] == '\0')
897 (void)strlcpy(sp->f_mntonname, "/",
898 sizeof(sp->f_mntonname));
899 } else {
900 if (root)
901 (void)strlcpy(sp->f_mntonname, "/",
902 sizeof(sp->f_mntonname));
903 else
904 error = EPERM;
905 }
906 PNBUF_PUT(path);
907 }
908 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
909 return error;
910 }
911
912 /*
913 * Get filesystem statistics by path.
914 */
915 int
916 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
917 {
918 struct mount *mp;
919 int error;
920 struct nameidata nd;
921
922 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
923 if ((error = namei(&nd)) != 0)
924 return error;
925 mp = nd.ni_vp->v_mount;
926 error = dostatvfs(mp, sb, l, flags, 1);
927 vrele(nd.ni_vp);
928 return error;
929 }
930
931 /* ARGSUSED */
932 int
933 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
934 {
935 /* {
936 syscallarg(const char *) path;
937 syscallarg(struct statvfs *) buf;
938 syscallarg(int) flags;
939 } */
940 struct statvfs *sb;
941 int error;
942
943 sb = STATVFSBUF_GET();
944 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
945 if (error == 0)
946 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
947 STATVFSBUF_PUT(sb);
948 return error;
949 }
950
951 /*
952 * Get filesystem statistics by fd.
953 */
954 int
955 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
956 {
957 file_t *fp;
958 struct mount *mp;
959 int error;
960
961 /* fd_getvnode() will use the descriptor for us */
962 if ((error = fd_getvnode(fd, &fp)) != 0)
963 return (error);
964 mp = ((struct vnode *)fp->f_data)->v_mount;
965 error = dostatvfs(mp, sb, curlwp, flags, 1);
966 fd_putfile(fd);
967 return error;
968 }
969
970 /* ARGSUSED */
971 int
972 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
973 {
974 /* {
975 syscallarg(int) fd;
976 syscallarg(struct statvfs *) buf;
977 syscallarg(int) flags;
978 } */
979 struct statvfs *sb;
980 int error;
981
982 sb = STATVFSBUF_GET();
983 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
984 if (error == 0)
985 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
986 STATVFSBUF_PUT(sb);
987 return error;
988 }
989
990
991 /*
992 * Get statistics on all filesystems.
993 */
994 int
995 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
996 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
997 register_t *retval)
998 {
999 int root = 0;
1000 struct proc *p = l->l_proc;
1001 struct mount *mp, *nmp;
1002 struct statvfs *sb;
1003 size_t count, maxcount;
1004 int error = 0;
1005
1006 sb = STATVFSBUF_GET();
1007 maxcount = bufsize / entry_sz;
1008 mutex_enter(&mountlist_lock);
1009 count = 0;
1010 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1011 mp = nmp) {
1012 if (vfs_busy(mp, &nmp)) {
1013 continue;
1014 }
1015 if (sfsp && count < maxcount) {
1016 error = dostatvfs(mp, sb, l, flags, 0);
1017 if (error) {
1018 vfs_unbusy(mp, false, &nmp);
1019 error = 0;
1020 continue;
1021 }
1022 error = copyfn(sb, sfsp, entry_sz);
1023 if (error) {
1024 vfs_unbusy(mp, false, NULL);
1025 goto out;
1026 }
1027 sfsp = (char *)sfsp + entry_sz;
1028 root |= strcmp(sb->f_mntonname, "/") == 0;
1029 }
1030 count++;
1031 vfs_unbusy(mp, false, &nmp);
1032 }
1033 mutex_exit(&mountlist_lock);
1034
1035 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1036 /*
1037 * fake a root entry
1038 */
1039 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1040 sb, l, flags, 1);
1041 if (error != 0)
1042 goto out;
1043 if (sfsp) {
1044 error = copyfn(sb, sfsp, entry_sz);
1045 if (error != 0)
1046 goto out;
1047 }
1048 count++;
1049 }
1050 if (sfsp && count > maxcount)
1051 *retval = maxcount;
1052 else
1053 *retval = count;
1054 out:
1055 STATVFSBUF_PUT(sb);
1056 return error;
1057 }
1058
1059 int
1060 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1061 {
1062 /* {
1063 syscallarg(struct statvfs *) buf;
1064 syscallarg(size_t) bufsize;
1065 syscallarg(int) flags;
1066 } */
1067
1068 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1069 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1070 }
1071
1072 /*
1073 * Change current working directory to a given file descriptor.
1074 */
1075 /* ARGSUSED */
1076 int
1077 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1078 {
1079 /* {
1080 syscallarg(int) fd;
1081 } */
1082 struct proc *p = l->l_proc;
1083 struct cwdinfo *cwdi;
1084 struct vnode *vp, *tdp;
1085 struct mount *mp;
1086 file_t *fp;
1087 int error, fd;
1088
1089 /* fd_getvnode() will use the descriptor for us */
1090 fd = SCARG(uap, fd);
1091 if ((error = fd_getvnode(fd, &fp)) != 0)
1092 return (error);
1093 vp = fp->f_data;
1094
1095 VREF(vp);
1096 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1097 if (vp->v_type != VDIR)
1098 error = ENOTDIR;
1099 else
1100 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1101 if (error) {
1102 vput(vp);
1103 goto out;
1104 }
1105 while ((mp = vp->v_mountedhere) != NULL) {
1106 error = vfs_busy(mp, NULL);
1107 vput(vp);
1108 if (error != 0)
1109 goto out;
1110 error = VFS_ROOT(mp, &tdp);
1111 vfs_unbusy(mp, false, NULL);
1112 if (error)
1113 goto out;
1114 vp = tdp;
1115 }
1116 VOP_UNLOCK(vp, 0);
1117
1118 /*
1119 * Disallow changing to a directory not under the process's
1120 * current root directory (if there is one).
1121 */
1122 cwdi = p->p_cwdi;
1123 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1124 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1125 vrele(vp);
1126 error = EPERM; /* operation not permitted */
1127 } else {
1128 vrele(cwdi->cwdi_cdir);
1129 cwdi->cwdi_cdir = vp;
1130 }
1131 rw_exit(&cwdi->cwdi_lock);
1132
1133 out:
1134 fd_putfile(fd);
1135 return (error);
1136 }
1137
1138 /*
1139 * Change this process's notion of the root directory to a given file
1140 * descriptor.
1141 */
1142 int
1143 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1144 {
1145 struct proc *p = l->l_proc;
1146 struct cwdinfo *cwdi;
1147 struct vnode *vp;
1148 file_t *fp;
1149 int error, fd = SCARG(uap, fd);
1150
1151 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1152 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1153 return error;
1154 /* fd_getvnode() will use the descriptor for us */
1155 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
1156 return error;
1157 vp = fp->f_data;
1158 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1159 if (vp->v_type != VDIR)
1160 error = ENOTDIR;
1161 else
1162 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1163 VOP_UNLOCK(vp, 0);
1164 if (error)
1165 goto out;
1166 VREF(vp);
1167
1168 /*
1169 * Prevent escaping from chroot by putting the root under
1170 * the working directory. Silently chdir to / if we aren't
1171 * already there.
1172 */
1173 cwdi = p->p_cwdi;
1174 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1175 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1176 /*
1177 * XXX would be more failsafe to change directory to a
1178 * deadfs node here instead
1179 */
1180 vrele(cwdi->cwdi_cdir);
1181 VREF(vp);
1182 cwdi->cwdi_cdir = vp;
1183 }
1184
1185 if (cwdi->cwdi_rdir != NULL)
1186 vrele(cwdi->cwdi_rdir);
1187 cwdi->cwdi_rdir = vp;
1188 rw_exit(&cwdi->cwdi_lock);
1189
1190 out:
1191 fd_putfile(fd);
1192 return (error);
1193 }
1194
1195 /*
1196 * Change current working directory (``.'').
1197 */
1198 /* ARGSUSED */
1199 int
1200 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1201 {
1202 /* {
1203 syscallarg(const char *) path;
1204 } */
1205 struct proc *p = l->l_proc;
1206 struct cwdinfo *cwdi;
1207 int error;
1208 struct nameidata nd;
1209
1210 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1211 SCARG(uap, path));
1212 if ((error = change_dir(&nd, l)) != 0)
1213 return (error);
1214 cwdi = p->p_cwdi;
1215 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1216 vrele(cwdi->cwdi_cdir);
1217 cwdi->cwdi_cdir = nd.ni_vp;
1218 rw_exit(&cwdi->cwdi_lock);
1219 return (0);
1220 }
1221
1222 /*
1223 * Change notion of root (``/'') directory.
1224 */
1225 /* ARGSUSED */
1226 int
1227 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1228 {
1229 /* {
1230 syscallarg(const char *) path;
1231 } */
1232 struct proc *p = l->l_proc;
1233 struct cwdinfo *cwdi;
1234 struct vnode *vp;
1235 int error;
1236 struct nameidata nd;
1237
1238 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1239 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1240 return (error);
1241 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1242 SCARG(uap, path));
1243 if ((error = change_dir(&nd, l)) != 0)
1244 return (error);
1245
1246 cwdi = p->p_cwdi;
1247 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1248 if (cwdi->cwdi_rdir != NULL)
1249 vrele(cwdi->cwdi_rdir);
1250 vp = nd.ni_vp;
1251 cwdi->cwdi_rdir = vp;
1252
1253 /*
1254 * Prevent escaping from chroot by putting the root under
1255 * the working directory. Silently chdir to / if we aren't
1256 * already there.
1257 */
1258 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1259 /*
1260 * XXX would be more failsafe to change directory to a
1261 * deadfs node here instead
1262 */
1263 vrele(cwdi->cwdi_cdir);
1264 VREF(vp);
1265 cwdi->cwdi_cdir = vp;
1266 }
1267 rw_exit(&cwdi->cwdi_lock);
1268
1269 return (0);
1270 }
1271
1272 /*
1273 * Common routine for chroot and chdir.
1274 */
1275 static int
1276 change_dir(struct nameidata *ndp, struct lwp *l)
1277 {
1278 struct vnode *vp;
1279 int error;
1280
1281 if ((error = namei(ndp)) != 0)
1282 return (error);
1283 vp = ndp->ni_vp;
1284 if (vp->v_type != VDIR)
1285 error = ENOTDIR;
1286 else
1287 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1288
1289 if (error)
1290 vput(vp);
1291 else
1292 VOP_UNLOCK(vp, 0);
1293 return (error);
1294 }
1295
1296 /*
1297 * Check permissions, allocate an open file structure,
1298 * and call the device open routine if any.
1299 */
1300 int
1301 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1302 {
1303 /* {
1304 syscallarg(const char *) path;
1305 syscallarg(int) flags;
1306 syscallarg(int) mode;
1307 } */
1308 struct proc *p = l->l_proc;
1309 struct cwdinfo *cwdi = p->p_cwdi;
1310 file_t *fp;
1311 struct vnode *vp;
1312 int flags, cmode;
1313 int type, indx, error;
1314 struct flock lf;
1315 struct nameidata nd;
1316
1317 flags = FFLAGS(SCARG(uap, flags));
1318 if ((flags & (FREAD | FWRITE)) == 0)
1319 return (EINVAL);
1320 if ((error = fd_allocfile(&fp, &indx)) != 0)
1321 return (error);
1322 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1323 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1324 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
1325 SCARG(uap, path));
1326 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1327 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1328 fd_abort(p, fp, indx);
1329 if ((error == EDUPFD || error == EMOVEFD) &&
1330 l->l_dupfd >= 0 && /* XXX from fdopen */
1331 (error =
1332 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1333 *retval = indx;
1334 return (0);
1335 }
1336 if (error == ERESTART)
1337 error = EINTR;
1338 return (error);
1339 }
1340
1341 l->l_dupfd = 0;
1342 vp = nd.ni_vp;
1343 fp->f_flag = flags & FMASK;
1344 fp->f_type = DTYPE_VNODE;
1345 fp->f_ops = &vnops;
1346 fp->f_data = vp;
1347 if (flags & (O_EXLOCK | O_SHLOCK)) {
1348 lf.l_whence = SEEK_SET;
1349 lf.l_start = 0;
1350 lf.l_len = 0;
1351 if (flags & O_EXLOCK)
1352 lf.l_type = F_WRLCK;
1353 else
1354 lf.l_type = F_RDLCK;
1355 type = F_FLOCK;
1356 if ((flags & FNONBLOCK) == 0)
1357 type |= F_WAIT;
1358 VOP_UNLOCK(vp, 0);
1359 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1360 if (error) {
1361 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1362 fd_abort(p, fp, indx);
1363 return (error);
1364 }
1365 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1366 atomic_or_uint(&fp->f_flag, FHASLOCK);
1367 }
1368 VOP_UNLOCK(vp, 0);
1369 *retval = indx;
1370 fd_affix(p, fp, indx);
1371 return (0);
1372 }
1373
1374 static void
1375 vfs__fhfree(fhandle_t *fhp)
1376 {
1377 size_t fhsize;
1378
1379 if (fhp == NULL) {
1380 return;
1381 }
1382 fhsize = FHANDLE_SIZE(fhp);
1383 kmem_free(fhp, fhsize);
1384 }
1385
1386 /*
1387 * vfs_composefh: compose a filehandle.
1388 */
1389
1390 int
1391 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1392 {
1393 struct mount *mp;
1394 struct fid *fidp;
1395 int error;
1396 size_t needfhsize;
1397 size_t fidsize;
1398
1399 mp = vp->v_mount;
1400 fidp = NULL;
1401 if (*fh_size < FHANDLE_SIZE_MIN) {
1402 fidsize = 0;
1403 } else {
1404 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1405 if (fhp != NULL) {
1406 memset(fhp, 0, *fh_size);
1407 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1408 fidp = &fhp->fh_fid;
1409 }
1410 }
1411 error = VFS_VPTOFH(vp, fidp, &fidsize);
1412 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1413 if (error == 0 && *fh_size < needfhsize) {
1414 error = E2BIG;
1415 }
1416 *fh_size = needfhsize;
1417 return error;
1418 }
1419
1420 int
1421 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1422 {
1423 struct mount *mp;
1424 fhandle_t *fhp;
1425 size_t fhsize;
1426 size_t fidsize;
1427 int error;
1428
1429 *fhpp = NULL;
1430 mp = vp->v_mount;
1431 fidsize = 0;
1432 error = VFS_VPTOFH(vp, NULL, &fidsize);
1433 KASSERT(error != 0);
1434 if (error != E2BIG) {
1435 goto out;
1436 }
1437 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1438 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1439 if (fhp == NULL) {
1440 error = ENOMEM;
1441 goto out;
1442 }
1443 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1444 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1445 if (error == 0) {
1446 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1447 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1448 *fhpp = fhp;
1449 } else {
1450 kmem_free(fhp, fhsize);
1451 }
1452 out:
1453 return error;
1454 }
1455
1456 void
1457 vfs_composefh_free(fhandle_t *fhp)
1458 {
1459
1460 vfs__fhfree(fhp);
1461 }
1462
1463 /*
1464 * vfs_fhtovp: lookup a vnode by a filehandle.
1465 */
1466
1467 int
1468 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1469 {
1470 struct mount *mp;
1471 int error;
1472
1473 *vpp = NULL;
1474 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1475 if (mp == NULL) {
1476 error = ESTALE;
1477 goto out;
1478 }
1479 if (mp->mnt_op->vfs_fhtovp == NULL) {
1480 error = EOPNOTSUPP;
1481 goto out;
1482 }
1483 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1484 out:
1485 return error;
1486 }
1487
1488 /*
1489 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1490 * the needed size.
1491 */
1492
1493 int
1494 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1495 {
1496 fhandle_t *fhp;
1497 int error;
1498
1499 *fhpp = NULL;
1500 if (fhsize > FHANDLE_SIZE_MAX) {
1501 return EINVAL;
1502 }
1503 if (fhsize < FHANDLE_SIZE_MIN) {
1504 return EINVAL;
1505 }
1506 again:
1507 fhp = kmem_alloc(fhsize, KM_SLEEP);
1508 if (fhp == NULL) {
1509 return ENOMEM;
1510 }
1511 error = copyin(ufhp, fhp, fhsize);
1512 if (error == 0) {
1513 /* XXX this check shouldn't be here */
1514 if (FHANDLE_SIZE(fhp) == fhsize) {
1515 *fhpp = fhp;
1516 return 0;
1517 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1518 /*
1519 * a kludge for nfsv2 padded handles.
1520 */
1521 size_t sz;
1522
1523 sz = FHANDLE_SIZE(fhp);
1524 kmem_free(fhp, fhsize);
1525 fhsize = sz;
1526 goto again;
1527 } else {
1528 /*
1529 * userland told us wrong size.
1530 */
1531 error = EINVAL;
1532 }
1533 }
1534 kmem_free(fhp, fhsize);
1535 return error;
1536 }
1537
1538 void
1539 vfs_copyinfh_free(fhandle_t *fhp)
1540 {
1541
1542 vfs__fhfree(fhp);
1543 }
1544
1545 /*
1546 * Get file handle system call
1547 */
1548 int
1549 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1550 {
1551 /* {
1552 syscallarg(char *) fname;
1553 syscallarg(fhandle_t *) fhp;
1554 syscallarg(size_t *) fh_size;
1555 } */
1556 struct vnode *vp;
1557 fhandle_t *fh;
1558 int error;
1559 struct nameidata nd;
1560 size_t sz;
1561 size_t usz;
1562
1563 /*
1564 * Must be super user
1565 */
1566 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1567 0, NULL, NULL, NULL);
1568 if (error)
1569 return (error);
1570 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1571 SCARG(uap, fname));
1572 error = namei(&nd);
1573 if (error)
1574 return (error);
1575 vp = nd.ni_vp;
1576 error = vfs_composefh_alloc(vp, &fh);
1577 vput(vp);
1578 if (error != 0) {
1579 goto out;
1580 }
1581 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1582 if (error != 0) {
1583 goto out;
1584 }
1585 sz = FHANDLE_SIZE(fh);
1586 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1587 if (error != 0) {
1588 goto out;
1589 }
1590 if (usz >= sz) {
1591 error = copyout(fh, SCARG(uap, fhp), sz);
1592 } else {
1593 error = E2BIG;
1594 }
1595 out:
1596 vfs_composefh_free(fh);
1597 return (error);
1598 }
1599
1600 /*
1601 * Open a file given a file handle.
1602 *
1603 * Check permissions, allocate an open file structure,
1604 * and call the device open routine if any.
1605 */
1606
1607 int
1608 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1609 register_t *retval)
1610 {
1611 file_t *fp;
1612 struct vnode *vp = NULL;
1613 kauth_cred_t cred = l->l_cred;
1614 file_t *nfp;
1615 int type, indx, error=0;
1616 struct flock lf;
1617 struct vattr va;
1618 fhandle_t *fh;
1619 int flags;
1620 proc_t *p;
1621
1622 p = curproc;
1623
1624 /*
1625 * Must be super user
1626 */
1627 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1628 0, NULL, NULL, NULL)))
1629 return (error);
1630
1631 flags = FFLAGS(oflags);
1632 if ((flags & (FREAD | FWRITE)) == 0)
1633 return (EINVAL);
1634 if ((flags & O_CREAT))
1635 return (EINVAL);
1636 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1637 return (error);
1638 fp = nfp;
1639 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1640 if (error != 0) {
1641 goto bad;
1642 }
1643 error = vfs_fhtovp(fh, &vp);
1644 if (error != 0) {
1645 goto bad;
1646 }
1647
1648 /* Now do an effective vn_open */
1649
1650 if (vp->v_type == VSOCK) {
1651 error = EOPNOTSUPP;
1652 goto bad;
1653 }
1654 error = vn_openchk(vp, cred, flags);
1655 if (error != 0)
1656 goto bad;
1657 if (flags & O_TRUNC) {
1658 VOP_UNLOCK(vp, 0); /* XXX */
1659 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1660 VATTR_NULL(&va);
1661 va.va_size = 0;
1662 error = VOP_SETATTR(vp, &va, cred);
1663 if (error)
1664 goto bad;
1665 }
1666 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
1667 goto bad;
1668 if (flags & FWRITE) {
1669 mutex_enter(&vp->v_interlock);
1670 vp->v_writecount++;
1671 mutex_exit(&vp->v_interlock);
1672 }
1673
1674 /* done with modified vn_open, now finish what sys_open does. */
1675
1676 fp->f_flag = flags & FMASK;
1677 fp->f_type = DTYPE_VNODE;
1678 fp->f_ops = &vnops;
1679 fp->f_data = vp;
1680 if (flags & (O_EXLOCK | O_SHLOCK)) {
1681 lf.l_whence = SEEK_SET;
1682 lf.l_start = 0;
1683 lf.l_len = 0;
1684 if (flags & O_EXLOCK)
1685 lf.l_type = F_WRLCK;
1686 else
1687 lf.l_type = F_RDLCK;
1688 type = F_FLOCK;
1689 if ((flags & FNONBLOCK) == 0)
1690 type |= F_WAIT;
1691 VOP_UNLOCK(vp, 0);
1692 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1693 if (error) {
1694 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1695 fd_abort(p, fp, indx);
1696 return (error);
1697 }
1698 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1699 atomic_or_uint(&fp->f_flag, FHASLOCK);
1700 }
1701 VOP_UNLOCK(vp, 0);
1702 *retval = indx;
1703 fd_affix(p, fp, indx);
1704 vfs_copyinfh_free(fh);
1705 return (0);
1706
1707 bad:
1708 fd_abort(p, fp, indx);
1709 if (vp != NULL)
1710 vput(vp);
1711 vfs_copyinfh_free(fh);
1712 return (error);
1713 }
1714
1715 int
1716 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
1717 {
1718 /* {
1719 syscallarg(const void *) fhp;
1720 syscallarg(size_t) fh_size;
1721 syscallarg(int) flags;
1722 } */
1723
1724 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1725 SCARG(uap, flags), retval);
1726 }
1727
1728 int
1729 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
1730 {
1731 int error;
1732 fhandle_t *fh;
1733 struct vnode *vp;
1734
1735 /*
1736 * Must be super user
1737 */
1738 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1739 0, NULL, NULL, NULL)))
1740 return (error);
1741
1742 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1743 if (error != 0)
1744 return error;
1745
1746 error = vfs_fhtovp(fh, &vp);
1747 vfs_copyinfh_free(fh);
1748 if (error != 0)
1749 return error;
1750
1751 error = vn_stat(vp, sb);
1752 vput(vp);
1753 return error;
1754 }
1755
1756
1757 /* ARGSUSED */
1758 int
1759 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval)
1760 {
1761 /* {
1762 syscallarg(const void *) fhp;
1763 syscallarg(size_t) fh_size;
1764 syscallarg(struct stat *) sb;
1765 } */
1766 struct stat sb;
1767 int error;
1768
1769 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
1770 if (error)
1771 return error;
1772 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
1773 }
1774
1775 int
1776 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
1777 int flags)
1778 {
1779 fhandle_t *fh;
1780 struct mount *mp;
1781 struct vnode *vp;
1782 int error;
1783
1784 /*
1785 * Must be super user
1786 */
1787 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1788 0, NULL, NULL, NULL)))
1789 return error;
1790
1791 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1792 if (error != 0)
1793 return error;
1794
1795 error = vfs_fhtovp(fh, &vp);
1796 vfs_copyinfh_free(fh);
1797 if (error != 0)
1798 return error;
1799
1800 mp = vp->v_mount;
1801 error = dostatvfs(mp, sb, l, flags, 1);
1802 vput(vp);
1803 return error;
1804 }
1805
1806 /* ARGSUSED */
1807 int
1808 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
1809 {
1810 /* {
1811 syscallarg(const void *) fhp;
1812 syscallarg(size_t) fh_size;
1813 syscallarg(struct statvfs *) buf;
1814 syscallarg(int) flags;
1815 } */
1816 struct statvfs *sb = STATVFSBUF_GET();
1817 int error;
1818
1819 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
1820 SCARG(uap, flags));
1821 if (error == 0)
1822 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1823 STATVFSBUF_PUT(sb);
1824 return error;
1825 }
1826
1827 /*
1828 * Create a special file.
1829 */
1830 /* ARGSUSED */
1831 int
1832 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
1833 register_t *retval)
1834 {
1835 /* {
1836 syscallarg(const char *) path;
1837 syscallarg(mode_t) mode;
1838 syscallarg(dev_t) dev;
1839 } */
1840 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode),
1841 SCARG(uap, dev), retval);
1842 }
1843
1844 int
1845 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
1846 register_t *retval)
1847 {
1848 struct proc *p = l->l_proc;
1849 struct vnode *vp;
1850 struct vattr vattr;
1851 int error, optype;
1852 struct nameidata nd;
1853 char *path;
1854 const char *cpath;
1855 enum uio_seg seg = UIO_USERSPACE;
1856
1857 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1858 0, NULL, NULL, NULL)) != 0)
1859 return (error);
1860
1861 optype = VOP_MKNOD_DESCOFFSET;
1862
1863 VERIEXEC_PATH_GET(pathname, seg, cpath, path);
1864 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath);
1865
1866 if ((error = namei(&nd)) != 0)
1867 goto out;
1868 vp = nd.ni_vp;
1869 if (vp != NULL)
1870 error = EEXIST;
1871 else {
1872 VATTR_NULL(&vattr);
1873 /* We will read cwdi->cwdi_cmask unlocked. */
1874 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1875 vattr.va_rdev = dev;
1876
1877 switch (mode & S_IFMT) {
1878 case S_IFMT: /* used by badsect to flag bad sectors */
1879 vattr.va_type = VBAD;
1880 break;
1881 case S_IFCHR:
1882 vattr.va_type = VCHR;
1883 break;
1884 case S_IFBLK:
1885 vattr.va_type = VBLK;
1886 break;
1887 case S_IFWHT:
1888 optype = VOP_WHITEOUT_DESCOFFSET;
1889 break;
1890 case S_IFREG:
1891 #if NVERIEXEC > 0
1892 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp,
1893 O_CREAT);
1894 #endif /* NVERIEXEC > 0 */
1895 vattr.va_type = VREG;
1896 vattr.va_rdev = VNOVAL;
1897 optype = VOP_CREATE_DESCOFFSET;
1898 break;
1899 default:
1900 error = EINVAL;
1901 break;
1902 }
1903 }
1904 if (!error) {
1905 switch (optype) {
1906 case VOP_WHITEOUT_DESCOFFSET:
1907 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1908 if (error)
1909 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1910 vput(nd.ni_dvp);
1911 break;
1912
1913 case VOP_MKNOD_DESCOFFSET:
1914 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1915 &nd.ni_cnd, &vattr);
1916 if (error == 0)
1917 vput(nd.ni_vp);
1918 break;
1919
1920 case VOP_CREATE_DESCOFFSET:
1921 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
1922 &nd.ni_cnd, &vattr);
1923 if (error == 0)
1924 vput(nd.ni_vp);
1925 break;
1926 }
1927 } else {
1928 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1929 if (nd.ni_dvp == vp)
1930 vrele(nd.ni_dvp);
1931 else
1932 vput(nd.ni_dvp);
1933 if (vp)
1934 vrele(vp);
1935 }
1936 out:
1937 VERIEXEC_PATH_PUT(path);
1938 return (error);
1939 }
1940
1941 /*
1942 * Create a named pipe.
1943 */
1944 /* ARGSUSED */
1945 int
1946 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
1947 {
1948 /* {
1949 syscallarg(const char *) path;
1950 syscallarg(int) mode;
1951 } */
1952 struct proc *p = l->l_proc;
1953 struct vattr vattr;
1954 int error;
1955 struct nameidata nd;
1956
1957 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
1958 SCARG(uap, path));
1959 if ((error = namei(&nd)) != 0)
1960 return (error);
1961 if (nd.ni_vp != NULL) {
1962 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1963 if (nd.ni_dvp == nd.ni_vp)
1964 vrele(nd.ni_dvp);
1965 else
1966 vput(nd.ni_dvp);
1967 vrele(nd.ni_vp);
1968 return (EEXIST);
1969 }
1970 VATTR_NULL(&vattr);
1971 vattr.va_type = VFIFO;
1972 /* We will read cwdi->cwdi_cmask unlocked. */
1973 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1974 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1975 if (error == 0)
1976 vput(nd.ni_vp);
1977 return (error);
1978 }
1979
1980 /*
1981 * Make a hard file link.
1982 */
1983 /* ARGSUSED */
1984 int
1985 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
1986 {
1987 /* {
1988 syscallarg(const char *) path;
1989 syscallarg(const char *) link;
1990 } */
1991 struct vnode *vp;
1992 struct nameidata nd;
1993 int error;
1994
1995 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
1996 SCARG(uap, path));
1997 if ((error = namei(&nd)) != 0)
1998 return (error);
1999 vp = nd.ni_vp;
2000 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
2001 SCARG(uap, link));
2002 if ((error = namei(&nd)) != 0)
2003 goto out;
2004 if (nd.ni_vp) {
2005 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2006 if (nd.ni_dvp == nd.ni_vp)
2007 vrele(nd.ni_dvp);
2008 else
2009 vput(nd.ni_dvp);
2010 vrele(nd.ni_vp);
2011 error = EEXIST;
2012 goto out;
2013 }
2014 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2015 out:
2016 vrele(vp);
2017 return (error);
2018 }
2019
2020 /*
2021 * Make a symbolic link.
2022 */
2023 /* ARGSUSED */
2024 int
2025 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2026 {
2027 /* {
2028 syscallarg(const char *) path;
2029 syscallarg(const char *) link;
2030 } */
2031 struct proc *p = l->l_proc;
2032 struct vattr vattr;
2033 char *path;
2034 int error;
2035 struct nameidata nd;
2036
2037 path = PNBUF_GET();
2038 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
2039 if (error)
2040 goto out;
2041 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
2042 SCARG(uap, link));
2043 if ((error = namei(&nd)) != 0)
2044 goto out;
2045 if (nd.ni_vp) {
2046 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2047 if (nd.ni_dvp == nd.ni_vp)
2048 vrele(nd.ni_dvp);
2049 else
2050 vput(nd.ni_dvp);
2051 vrele(nd.ni_vp);
2052 error = EEXIST;
2053 goto out;
2054 }
2055 VATTR_NULL(&vattr);
2056 vattr.va_type = VLNK;
2057 /* We will read cwdi->cwdi_cmask unlocked. */
2058 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2059 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2060 if (error == 0)
2061 vput(nd.ni_vp);
2062 out:
2063 PNBUF_PUT(path);
2064 return (error);
2065 }
2066
2067 /*
2068 * Delete a whiteout from the filesystem.
2069 */
2070 /* ARGSUSED */
2071 int
2072 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2073 {
2074 /* {
2075 syscallarg(const char *) path;
2076 } */
2077 int error;
2078 struct nameidata nd;
2079
2080 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT,
2081 UIO_USERSPACE, SCARG(uap, path));
2082 error = namei(&nd);
2083 if (error)
2084 return (error);
2085
2086 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2087 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2088 if (nd.ni_dvp == nd.ni_vp)
2089 vrele(nd.ni_dvp);
2090 else
2091 vput(nd.ni_dvp);
2092 if (nd.ni_vp)
2093 vrele(nd.ni_vp);
2094 return (EEXIST);
2095 }
2096 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2097 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2098 vput(nd.ni_dvp);
2099 return (error);
2100 }
2101
2102 /*
2103 * Delete a name from the filesystem.
2104 */
2105 /* ARGSUSED */
2106 int
2107 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2108 {
2109 /* {
2110 syscallarg(const char *) path;
2111 } */
2112
2113 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE);
2114 }
2115
2116 int
2117 do_sys_unlink(const char *arg, enum uio_seg seg)
2118 {
2119 struct vnode *vp;
2120 int error;
2121 struct nameidata nd;
2122 kauth_cred_t cred;
2123 char *path;
2124 const char *cpath;
2125
2126 VERIEXEC_PATH_GET(arg, seg, cpath, path);
2127 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath);
2128
2129 if ((error = namei(&nd)) != 0)
2130 goto out;
2131 vp = nd.ni_vp;
2132
2133 /*
2134 * The root of a mounted filesystem cannot be deleted.
2135 */
2136 if (vp->v_vflag & VV_ROOT) {
2137 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2138 if (nd.ni_dvp == vp)
2139 vrele(nd.ni_dvp);
2140 else
2141 vput(nd.ni_dvp);
2142 vput(vp);
2143 error = EBUSY;
2144 goto out;
2145 }
2146
2147 #if NVERIEXEC > 0
2148 /* Handle remove requests for veriexec entries. */
2149 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) {
2150 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2151 if (nd.ni_dvp == vp)
2152 vrele(nd.ni_dvp);
2153 else
2154 vput(nd.ni_dvp);
2155 vput(vp);
2156 goto out;
2157 }
2158 #endif /* NVERIEXEC > 0 */
2159
2160 cred = kauth_cred_get();
2161 #ifdef FILEASSOC
2162 (void)fileassoc_file_delete(vp);
2163 #endif /* FILEASSOC */
2164 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2165 out:
2166 VERIEXEC_PATH_PUT(path);
2167 return (error);
2168 }
2169
2170 /*
2171 * Reposition read/write file offset.
2172 */
2173 int
2174 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2175 {
2176 /* {
2177 syscallarg(int) fd;
2178 syscallarg(int) pad;
2179 syscallarg(off_t) offset;
2180 syscallarg(int) whence;
2181 } */
2182 kauth_cred_t cred = l->l_cred;
2183 file_t *fp;
2184 struct vnode *vp;
2185 struct vattr vattr;
2186 off_t newoff;
2187 int error, fd;
2188
2189 fd = SCARG(uap, fd);
2190
2191 if ((fp = fd_getfile(fd)) == NULL)
2192 return (EBADF);
2193
2194 vp = fp->f_data;
2195 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2196 error = ESPIPE;
2197 goto out;
2198 }
2199
2200 switch (SCARG(uap, whence)) {
2201 case SEEK_CUR:
2202 newoff = fp->f_offset + SCARG(uap, offset);
2203 break;
2204 case SEEK_END:
2205 error = VOP_GETATTR(vp, &vattr, cred);
2206 if (error) {
2207 goto out;
2208 }
2209 newoff = SCARG(uap, offset) + vattr.va_size;
2210 break;
2211 case SEEK_SET:
2212 newoff = SCARG(uap, offset);
2213 break;
2214 default:
2215 error = EINVAL;
2216 goto out;
2217 }
2218 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2219 *(off_t *)retval = fp->f_offset = newoff;
2220 }
2221 out:
2222 fd_putfile(fd);
2223 return (error);
2224 }
2225
2226 /*
2227 * Positional read system call.
2228 */
2229 int
2230 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2231 {
2232 /* {
2233 syscallarg(int) fd;
2234 syscallarg(void *) buf;
2235 syscallarg(size_t) nbyte;
2236 syscallarg(off_t) offset;
2237 } */
2238 file_t *fp;
2239 struct vnode *vp;
2240 off_t offset;
2241 int error, fd = SCARG(uap, fd);
2242
2243 if ((fp = fd_getfile(fd)) == NULL)
2244 return (EBADF);
2245
2246 if ((fp->f_flag & FREAD) == 0) {
2247 fd_putfile(fd);
2248 return (EBADF);
2249 }
2250
2251 vp = fp->f_data;
2252 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2253 error = ESPIPE;
2254 goto out;
2255 }
2256
2257 offset = SCARG(uap, offset);
2258
2259 /*
2260 * XXX This works because no file systems actually
2261 * XXX take any action on the seek operation.
2262 */
2263 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2264 goto out;
2265
2266 /* dofileread() will unuse the descriptor for us */
2267 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2268 &offset, 0, retval));
2269
2270 out:
2271 fd_putfile(fd);
2272 return (error);
2273 }
2274
2275 /*
2276 * Positional scatter read system call.
2277 */
2278 int
2279 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2280 {
2281 /* {
2282 syscallarg(int) fd;
2283 syscallarg(const struct iovec *) iovp;
2284 syscallarg(int) iovcnt;
2285 syscallarg(off_t) offset;
2286 } */
2287 off_t offset = SCARG(uap, offset);
2288
2289 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2290 SCARG(uap, iovcnt), &offset, 0, retval);
2291 }
2292
2293 /*
2294 * Positional write system call.
2295 */
2296 int
2297 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2298 {
2299 /* {
2300 syscallarg(int) fd;
2301 syscallarg(const void *) buf;
2302 syscallarg(size_t) nbyte;
2303 syscallarg(off_t) offset;
2304 } */
2305 file_t *fp;
2306 struct vnode *vp;
2307 off_t offset;
2308 int error, fd = SCARG(uap, fd);
2309
2310 if ((fp = fd_getfile(fd)) == NULL)
2311 return (EBADF);
2312
2313 if ((fp->f_flag & FWRITE) == 0) {
2314 fd_putfile(fd);
2315 return (EBADF);
2316 }
2317
2318 vp = fp->f_data;
2319 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2320 error = ESPIPE;
2321 goto out;
2322 }
2323
2324 offset = SCARG(uap, offset);
2325
2326 /*
2327 * XXX This works because no file systems actually
2328 * XXX take any action on the seek operation.
2329 */
2330 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2331 goto out;
2332
2333 /* dofilewrite() will unuse the descriptor for us */
2334 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2335 &offset, 0, retval));
2336
2337 out:
2338 fd_putfile(fd);
2339 return (error);
2340 }
2341
2342 /*
2343 * Positional gather write system call.
2344 */
2345 int
2346 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2347 {
2348 /* {
2349 syscallarg(int) fd;
2350 syscallarg(const struct iovec *) iovp;
2351 syscallarg(int) iovcnt;
2352 syscallarg(off_t) offset;
2353 } */
2354 off_t offset = SCARG(uap, offset);
2355
2356 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2357 SCARG(uap, iovcnt), &offset, 0, retval);
2358 }
2359
2360 /*
2361 * Check access permissions.
2362 */
2363 int
2364 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2365 {
2366 /* {
2367 syscallarg(const char *) path;
2368 syscallarg(int) flags;
2369 } */
2370 kauth_cred_t cred;
2371 struct vnode *vp;
2372 int error, flags;
2373 struct nameidata nd;
2374
2375 cred = kauth_cred_dup(l->l_cred);
2376 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2377 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2378 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2379 SCARG(uap, path));
2380 /* Override default credentials */
2381 nd.ni_cnd.cn_cred = cred;
2382 if ((error = namei(&nd)) != 0)
2383 goto out;
2384 vp = nd.ni_vp;
2385
2386 /* Flags == 0 means only check for existence. */
2387 if (SCARG(uap, flags)) {
2388 flags = 0;
2389 if (SCARG(uap, flags) & R_OK)
2390 flags |= VREAD;
2391 if (SCARG(uap, flags) & W_OK)
2392 flags |= VWRITE;
2393 if (SCARG(uap, flags) & X_OK)
2394 flags |= VEXEC;
2395
2396 error = VOP_ACCESS(vp, flags, cred);
2397 if (!error && (flags & VWRITE))
2398 error = vn_writechk(vp);
2399 }
2400 vput(vp);
2401 out:
2402 kauth_cred_free(cred);
2403 return (error);
2404 }
2405
2406 /*
2407 * Common code for all sys_stat functions, including compat versions.
2408 */
2409 int
2410 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb)
2411 {
2412 int error;
2413 struct nameidata nd;
2414
2415 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT,
2416 UIO_USERSPACE, path);
2417 error = namei(&nd);
2418 if (error != 0)
2419 return error;
2420 error = vn_stat(nd.ni_vp, sb);
2421 vput(nd.ni_vp);
2422 return error;
2423 }
2424
2425 /*
2426 * Get file status; this version follows links.
2427 */
2428 /* ARGSUSED */
2429 int
2430 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval)
2431 {
2432 /* {
2433 syscallarg(const char *) path;
2434 syscallarg(struct stat *) ub;
2435 } */
2436 struct stat sb;
2437 int error;
2438
2439 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb);
2440 if (error)
2441 return error;
2442 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2443 }
2444
2445 /*
2446 * Get file status; this version does not follow links.
2447 */
2448 /* ARGSUSED */
2449 int
2450 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval)
2451 {
2452 /* {
2453 syscallarg(const char *) path;
2454 syscallarg(struct stat *) ub;
2455 } */
2456 struct stat sb;
2457 int error;
2458
2459 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb);
2460 if (error)
2461 return error;
2462 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2463 }
2464
2465 /*
2466 * Get configurable pathname variables.
2467 */
2468 /* ARGSUSED */
2469 int
2470 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
2471 {
2472 /* {
2473 syscallarg(const char *) path;
2474 syscallarg(int) name;
2475 } */
2476 int error;
2477 struct nameidata nd;
2478
2479 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2480 SCARG(uap, path));
2481 if ((error = namei(&nd)) != 0)
2482 return (error);
2483 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2484 vput(nd.ni_vp);
2485 return (error);
2486 }
2487
2488 /*
2489 * Return target name of a symbolic link.
2490 */
2491 /* ARGSUSED */
2492 int
2493 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval)
2494 {
2495 /* {
2496 syscallarg(const char *) path;
2497 syscallarg(char *) buf;
2498 syscallarg(size_t) count;
2499 } */
2500 struct vnode *vp;
2501 struct iovec aiov;
2502 struct uio auio;
2503 int error;
2504 struct nameidata nd;
2505
2506 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2507 SCARG(uap, path));
2508 if ((error = namei(&nd)) != 0)
2509 return (error);
2510 vp = nd.ni_vp;
2511 if (vp->v_type != VLNK)
2512 error = EINVAL;
2513 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2514 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
2515 aiov.iov_base = SCARG(uap, buf);
2516 aiov.iov_len = SCARG(uap, count);
2517 auio.uio_iov = &aiov;
2518 auio.uio_iovcnt = 1;
2519 auio.uio_offset = 0;
2520 auio.uio_rw = UIO_READ;
2521 KASSERT(l == curlwp);
2522 auio.uio_vmspace = l->l_proc->p_vmspace;
2523 auio.uio_resid = SCARG(uap, count);
2524 error = VOP_READLINK(vp, &auio, l->l_cred);
2525 }
2526 vput(vp);
2527 *retval = SCARG(uap, count) - auio.uio_resid;
2528 return (error);
2529 }
2530
2531 /*
2532 * Change flags of a file given a path name.
2533 */
2534 /* ARGSUSED */
2535 int
2536 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
2537 {
2538 /* {
2539 syscallarg(const char *) path;
2540 syscallarg(u_long) flags;
2541 } */
2542 struct vnode *vp;
2543 int error;
2544 struct nameidata nd;
2545
2546 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2547 SCARG(uap, path));
2548 if ((error = namei(&nd)) != 0)
2549 return (error);
2550 vp = nd.ni_vp;
2551 error = change_flags(vp, SCARG(uap, flags), l);
2552 vput(vp);
2553 return (error);
2554 }
2555
2556 /*
2557 * Change flags of a file given a file descriptor.
2558 */
2559 /* ARGSUSED */
2560 int
2561 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
2562 {
2563 /* {
2564 syscallarg(int) fd;
2565 syscallarg(u_long) flags;
2566 } */
2567 struct vnode *vp;
2568 file_t *fp;
2569 int error;
2570
2571 /* fd_getvnode() will use the descriptor for us */
2572 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2573 return (error);
2574 vp = fp->f_data;
2575 error = change_flags(vp, SCARG(uap, flags), l);
2576 VOP_UNLOCK(vp, 0);
2577 fd_putfile(SCARG(uap, fd));
2578 return (error);
2579 }
2580
2581 /*
2582 * Change flags of a file given a path name; this version does
2583 * not follow links.
2584 */
2585 int
2586 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
2587 {
2588 /* {
2589 syscallarg(const char *) path;
2590 syscallarg(u_long) flags;
2591 } */
2592 struct vnode *vp;
2593 int error;
2594 struct nameidata nd;
2595
2596 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2597 SCARG(uap, path));
2598 if ((error = namei(&nd)) != 0)
2599 return (error);
2600 vp = nd.ni_vp;
2601 error = change_flags(vp, SCARG(uap, flags), l);
2602 vput(vp);
2603 return (error);
2604 }
2605
2606 /*
2607 * Common routine to change flags of a file.
2608 */
2609 int
2610 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2611 {
2612 struct vattr vattr;
2613 int error;
2614
2615 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2616 /*
2617 * Non-superusers cannot change the flags on devices, even if they
2618 * own them.
2619 */
2620 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2621 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2622 goto out;
2623 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2624 error = EINVAL;
2625 goto out;
2626 }
2627 }
2628 VATTR_NULL(&vattr);
2629 vattr.va_flags = flags;
2630 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2631 out:
2632 return (error);
2633 }
2634
2635 /*
2636 * Change mode of a file given path name; this version follows links.
2637 */
2638 /* ARGSUSED */
2639 int
2640 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
2641 {
2642 /* {
2643 syscallarg(const char *) path;
2644 syscallarg(int) mode;
2645 } */
2646 int error;
2647 struct nameidata nd;
2648
2649 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2650 SCARG(uap, path));
2651 if ((error = namei(&nd)) != 0)
2652 return (error);
2653
2654 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2655
2656 vrele(nd.ni_vp);
2657 return (error);
2658 }
2659
2660 /*
2661 * Change mode of a file given a file descriptor.
2662 */
2663 /* ARGSUSED */
2664 int
2665 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
2666 {
2667 /* {
2668 syscallarg(int) fd;
2669 syscallarg(int) mode;
2670 } */
2671 file_t *fp;
2672 int error;
2673
2674 /* fd_getvnode() will use the descriptor for us */
2675 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2676 return (error);
2677 error = change_mode(fp->f_data, SCARG(uap, mode), l);
2678 fd_putfile(SCARG(uap, fd));
2679 return (error);
2680 }
2681
2682 /*
2683 * Change mode of a file given path name; this version does not follow links.
2684 */
2685 /* ARGSUSED */
2686 int
2687 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
2688 {
2689 /* {
2690 syscallarg(const char *) path;
2691 syscallarg(int) mode;
2692 } */
2693 int error;
2694 struct nameidata nd;
2695
2696 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2697 SCARG(uap, path));
2698 if ((error = namei(&nd)) != 0)
2699 return (error);
2700
2701 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2702
2703 vrele(nd.ni_vp);
2704 return (error);
2705 }
2706
2707 /*
2708 * Common routine to set mode given a vnode.
2709 */
2710 static int
2711 change_mode(struct vnode *vp, int mode, struct lwp *l)
2712 {
2713 struct vattr vattr;
2714 int error;
2715
2716 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2717 VATTR_NULL(&vattr);
2718 vattr.va_mode = mode & ALLPERMS;
2719 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2720 VOP_UNLOCK(vp, 0);
2721 return (error);
2722 }
2723
2724 /*
2725 * Set ownership given a path name; this version follows links.
2726 */
2727 /* ARGSUSED */
2728 int
2729 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
2730 {
2731 /* {
2732 syscallarg(const char *) path;
2733 syscallarg(uid_t) uid;
2734 syscallarg(gid_t) gid;
2735 } */
2736 int error;
2737 struct nameidata nd;
2738
2739 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2740 SCARG(uap, path));
2741 if ((error = namei(&nd)) != 0)
2742 return (error);
2743
2744 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2745
2746 vrele(nd.ni_vp);
2747 return (error);
2748 }
2749
2750 /*
2751 * Set ownership given a path name; this version follows links.
2752 * Provides POSIX semantics.
2753 */
2754 /* ARGSUSED */
2755 int
2756 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
2757 {
2758 /* {
2759 syscallarg(const char *) path;
2760 syscallarg(uid_t) uid;
2761 syscallarg(gid_t) gid;
2762 } */
2763 int error;
2764 struct nameidata nd;
2765
2766 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2767 SCARG(uap, path));
2768 if ((error = namei(&nd)) != 0)
2769 return (error);
2770
2771 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2772
2773 vrele(nd.ni_vp);
2774 return (error);
2775 }
2776
2777 /*
2778 * Set ownership given a file descriptor.
2779 */
2780 /* ARGSUSED */
2781 int
2782 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
2783 {
2784 /* {
2785 syscallarg(int) fd;
2786 syscallarg(uid_t) uid;
2787 syscallarg(gid_t) gid;
2788 } */
2789 int error;
2790 file_t *fp;
2791
2792 /* fd_getvnode() will use the descriptor for us */
2793 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2794 return (error);
2795 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2796 l, 0);
2797 fd_putfile(SCARG(uap, fd));
2798 return (error);
2799 }
2800
2801 /*
2802 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2803 */
2804 /* ARGSUSED */
2805 int
2806 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
2807 {
2808 /* {
2809 syscallarg(int) fd;
2810 syscallarg(uid_t) uid;
2811 syscallarg(gid_t) gid;
2812 } */
2813 int error;
2814 file_t *fp;
2815
2816 /* fd_getvnode() will use the descriptor for us */
2817 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2818 return (error);
2819 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2820 l, 1);
2821 fd_putfile(SCARG(uap, fd));
2822 return (error);
2823 }
2824
2825 /*
2826 * Set ownership given a path name; this version does not follow links.
2827 */
2828 /* ARGSUSED */
2829 int
2830 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
2831 {
2832 /* {
2833 syscallarg(const char *) path;
2834 syscallarg(uid_t) uid;
2835 syscallarg(gid_t) gid;
2836 } */
2837 int error;
2838 struct nameidata nd;
2839
2840 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2841 SCARG(uap, path));
2842 if ((error = namei(&nd)) != 0)
2843 return (error);
2844
2845 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2846
2847 vrele(nd.ni_vp);
2848 return (error);
2849 }
2850
2851 /*
2852 * Set ownership given a path name; this version does not follow links.
2853 * Provides POSIX/XPG semantics.
2854 */
2855 /* ARGSUSED */
2856 int
2857 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
2858 {
2859 /* {
2860 syscallarg(const char *) path;
2861 syscallarg(uid_t) uid;
2862 syscallarg(gid_t) gid;
2863 } */
2864 int error;
2865 struct nameidata nd;
2866
2867 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2868 SCARG(uap, path));
2869 if ((error = namei(&nd)) != 0)
2870 return (error);
2871
2872 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2873
2874 vrele(nd.ni_vp);
2875 return (error);
2876 }
2877
2878 /*
2879 * Common routine to set ownership given a vnode.
2880 */
2881 static int
2882 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2883 int posix_semantics)
2884 {
2885 struct vattr vattr;
2886 mode_t newmode;
2887 int error;
2888
2889 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2890 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2891 goto out;
2892
2893 #define CHANGED(x) ((int)(x) != -1)
2894 newmode = vattr.va_mode;
2895 if (posix_semantics) {
2896 /*
2897 * POSIX/XPG semantics: if the caller is not the super-user,
2898 * clear set-user-id and set-group-id bits. Both POSIX and
2899 * the XPG consider the behaviour for calls by the super-user
2900 * implementation-defined; we leave the set-user-id and set-
2901 * group-id settings intact in that case.
2902 */
2903 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2904 NULL) != 0)
2905 newmode &= ~(S_ISUID | S_ISGID);
2906 } else {
2907 /*
2908 * NetBSD semantics: when changing owner and/or group,
2909 * clear the respective bit(s).
2910 */
2911 if (CHANGED(uid))
2912 newmode &= ~S_ISUID;
2913 if (CHANGED(gid))
2914 newmode &= ~S_ISGID;
2915 }
2916 /* Update va_mode iff altered. */
2917 if (vattr.va_mode == newmode)
2918 newmode = VNOVAL;
2919
2920 VATTR_NULL(&vattr);
2921 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2922 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2923 vattr.va_mode = newmode;
2924 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2925 #undef CHANGED
2926
2927 out:
2928 VOP_UNLOCK(vp, 0);
2929 return (error);
2930 }
2931
2932 /*
2933 * Set the access and modification times given a path name; this
2934 * version follows links.
2935 */
2936 /* ARGSUSED */
2937 int
2938 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
2939 register_t *retval)
2940 {
2941 /* {
2942 syscallarg(const char *) path;
2943 syscallarg(const struct timeval *) tptr;
2944 } */
2945
2946 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
2947 SCARG(uap, tptr), UIO_USERSPACE);
2948 }
2949
2950 /*
2951 * Set the access and modification times given a file descriptor.
2952 */
2953 /* ARGSUSED */
2954 int
2955 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
2956 register_t *retval)
2957 {
2958 /* {
2959 syscallarg(int) fd;
2960 syscallarg(const struct timeval *) tptr;
2961 } */
2962 int error;
2963 file_t *fp;
2964
2965 /* fd_getvnode() will use the descriptor for us */
2966 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2967 return (error);
2968 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr),
2969 UIO_USERSPACE);
2970 fd_putfile(SCARG(uap, fd));
2971 return (error);
2972 }
2973
2974 /*
2975 * Set the access and modification times given a path name; this
2976 * version does not follow links.
2977 */
2978 int
2979 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
2980 register_t *retval)
2981 {
2982 /* {
2983 syscallarg(const char *) path;
2984 syscallarg(const struct timeval *) tptr;
2985 } */
2986
2987 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
2988 SCARG(uap, tptr), UIO_USERSPACE);
2989 }
2990
2991 /*
2992 * Common routine to set access and modification times given a vnode.
2993 */
2994 int
2995 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
2996 const struct timeval *tptr, enum uio_seg seg)
2997 {
2998 struct vattr vattr;
2999 struct nameidata nd;
3000 int error;
3001 bool vanull, setbirthtime;
3002 struct timespec ts[2];
3003
3004 if (tptr == NULL) {
3005 vanull = true;
3006 nanotime(&ts[0]);
3007 ts[1] = ts[0];
3008 } else {
3009 struct timeval tv[2];
3010
3011 vanull = false;
3012 if (seg != UIO_SYSSPACE) {
3013 error = copyin(tptr, tv, sizeof (tv));
3014 if (error != 0)
3015 return error;
3016 tptr = tv;
3017 }
3018 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3019 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3020 }
3021
3022 if (vp == NULL) {
3023 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path);
3024 if ((error = namei(&nd)) != 0)
3025 return error;
3026 vp = nd.ni_vp;
3027 } else
3028 nd.ni_vp = NULL;
3029
3030 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3031 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3032 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3033 VATTR_NULL(&vattr);
3034 vattr.va_atime = ts[0];
3035 vattr.va_mtime = ts[1];
3036 if (setbirthtime)
3037 vattr.va_birthtime = ts[1];
3038 if (vanull)
3039 vattr.va_flags |= VA_UTIMES_NULL;
3040 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3041 VOP_UNLOCK(vp, 0);
3042
3043 if (nd.ni_vp != NULL)
3044 vrele(nd.ni_vp);
3045
3046 return error;
3047 }
3048
3049 /*
3050 * Truncate a file given its path name.
3051 */
3052 /* ARGSUSED */
3053 int
3054 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3055 {
3056 /* {
3057 syscallarg(const char *) path;
3058 syscallarg(int) pad;
3059 syscallarg(off_t) length;
3060 } */
3061 struct vnode *vp;
3062 struct vattr vattr;
3063 int error;
3064 struct nameidata nd;
3065
3066 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3067 SCARG(uap, path));
3068 if ((error = namei(&nd)) != 0)
3069 return (error);
3070 vp = nd.ni_vp;
3071 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3072 if (vp->v_type == VDIR)
3073 error = EISDIR;
3074 else if ((error = vn_writechk(vp)) == 0 &&
3075 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3076 VATTR_NULL(&vattr);
3077 vattr.va_size = SCARG(uap, length);
3078 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3079 }
3080 vput(vp);
3081 return (error);
3082 }
3083
3084 /*
3085 * Truncate a file given a file descriptor.
3086 */
3087 /* ARGSUSED */
3088 int
3089 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3090 {
3091 /* {
3092 syscallarg(int) fd;
3093 syscallarg(int) pad;
3094 syscallarg(off_t) length;
3095 } */
3096 struct vattr vattr;
3097 struct vnode *vp;
3098 file_t *fp;
3099 int error;
3100
3101 /* fd_getvnode() will use the descriptor for us */
3102 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3103 return (error);
3104 if ((fp->f_flag & FWRITE) == 0) {
3105 error = EINVAL;
3106 goto out;
3107 }
3108 vp = fp->f_data;
3109 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3110 if (vp->v_type == VDIR)
3111 error = EISDIR;
3112 else if ((error = vn_writechk(vp)) == 0) {
3113 VATTR_NULL(&vattr);
3114 vattr.va_size = SCARG(uap, length);
3115 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3116 }
3117 VOP_UNLOCK(vp, 0);
3118 out:
3119 fd_putfile(SCARG(uap, fd));
3120 return (error);
3121 }
3122
3123 /*
3124 * Sync an open file.
3125 */
3126 /* ARGSUSED */
3127 int
3128 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3129 {
3130 /* {
3131 syscallarg(int) fd;
3132 } */
3133 struct vnode *vp;
3134 file_t *fp;
3135 int error;
3136
3137 /* fd_getvnode() will use the descriptor for us */
3138 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3139 return (error);
3140 vp = fp->f_data;
3141 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3142 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
3143 if (error == 0 && bioopsp != NULL &&
3144 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3145 (*bioopsp->io_fsync)(vp, 0);
3146 VOP_UNLOCK(vp, 0);
3147 fd_putfile(SCARG(uap, fd));
3148 return (error);
3149 }
3150
3151 /*
3152 * Sync a range of file data. API modeled after that found in AIX.
3153 *
3154 * FDATASYNC indicates that we need only save enough metadata to be able
3155 * to re-read the written data. Note we duplicate AIX's requirement that
3156 * the file be open for writing.
3157 */
3158 /* ARGSUSED */
3159 int
3160 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
3161 {
3162 /* {
3163 syscallarg(int) fd;
3164 syscallarg(int) flags;
3165 syscallarg(off_t) start;
3166 syscallarg(off_t) length;
3167 } */
3168 struct vnode *vp;
3169 file_t *fp;
3170 int flags, nflags;
3171 off_t s, e, len;
3172 int error;
3173
3174 /* fd_getvnode() will use the descriptor for us */
3175 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3176 return (error);
3177
3178 if ((fp->f_flag & FWRITE) == 0) {
3179 error = EBADF;
3180 goto out;
3181 }
3182
3183 flags = SCARG(uap, flags);
3184 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3185 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3186 error = EINVAL;
3187 goto out;
3188 }
3189 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3190 if (flags & FDATASYNC)
3191 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3192 else
3193 nflags = FSYNC_WAIT;
3194 if (flags & FDISKSYNC)
3195 nflags |= FSYNC_CACHE;
3196
3197 len = SCARG(uap, length);
3198 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3199 if (len) {
3200 s = SCARG(uap, start);
3201 e = s + len;
3202 if (e < s) {
3203 error = EINVAL;
3204 goto out;
3205 }
3206 } else {
3207 e = 0;
3208 s = 0;
3209 }
3210
3211 vp = fp->f_data;
3212 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3213 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
3214
3215 if (error == 0 && bioopsp != NULL &&
3216 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3217 (*bioopsp->io_fsync)(vp, nflags);
3218
3219 VOP_UNLOCK(vp, 0);
3220 out:
3221 fd_putfile(SCARG(uap, fd));
3222 return (error);
3223 }
3224
3225 /*
3226 * Sync the data of an open file.
3227 */
3228 /* ARGSUSED */
3229 int
3230 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
3231 {
3232 /* {
3233 syscallarg(int) fd;
3234 } */
3235 struct vnode *vp;
3236 file_t *fp;
3237 int error;
3238
3239 /* fd_getvnode() will use the descriptor for us */
3240 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3241 return (error);
3242 if ((fp->f_flag & FWRITE) == 0) {
3243 fd_putfile(SCARG(uap, fd));
3244 return (EBADF);
3245 }
3246 vp = fp->f_data;
3247 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3248 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
3249 VOP_UNLOCK(vp, 0);
3250 fd_putfile(SCARG(uap, fd));
3251 return (error);
3252 }
3253
3254 /*
3255 * Rename files, (standard) BSD semantics frontend.
3256 */
3257 /* ARGSUSED */
3258 int
3259 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
3260 {
3261 /* {
3262 syscallarg(const char *) from;
3263 syscallarg(const char *) to;
3264 } */
3265
3266 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0));
3267 }
3268
3269 /*
3270 * Rename files, POSIX semantics frontend.
3271 */
3272 /* ARGSUSED */
3273 int
3274 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
3275 {
3276 /* {
3277 syscallarg(const char *) from;
3278 syscallarg(const char *) to;
3279 } */
3280
3281 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1));
3282 }
3283
3284 /*
3285 * Rename files. Source and destination must either both be directories,
3286 * or both not be directories. If target is a directory, it must be empty.
3287 * If `from' and `to' refer to the same object, the value of the `retain'
3288 * argument is used to determine whether `from' will be
3289 *
3290 * (retain == 0) deleted unless `from' and `to' refer to the same
3291 * object in the file system's name space (BSD).
3292 * (retain == 1) always retained (POSIX).
3293 */
3294 int
3295 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
3296 {
3297 struct vnode *tvp, *fvp, *tdvp;
3298 struct nameidata fromnd, tond;
3299 struct mount *fs;
3300 struct lwp *l = curlwp;
3301 struct proc *p;
3302 uint32_t saveflag;
3303 int error;
3304
3305 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT,
3306 seg, from);
3307 if ((error = namei(&fromnd)) != 0)
3308 return (error);
3309 if (fromnd.ni_dvp != fromnd.ni_vp)
3310 VOP_UNLOCK(fromnd.ni_dvp, 0);
3311 fvp = fromnd.ni_vp;
3312
3313 fs = fvp->v_mount;
3314 error = VFS_RENAMELOCK_ENTER(fs);
3315 if (error) {
3316 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3317 vrele(fromnd.ni_dvp);
3318 vrele(fvp);
3319 goto out1;
3320 }
3321
3322 /*
3323 * close, partially, yet another race - ideally we should only
3324 * go as far as getting fromnd.ni_dvp before getting the per-fs
3325 * lock, and then continue to get fromnd.ni_vp, but we can't do
3326 * that with namei as it stands.
3327 *
3328 * This still won't prevent rmdir from nuking fromnd.ni_vp
3329 * under us. The real fix is to get the locks in the right
3330 * order and do the lookups in the right places, but that's a
3331 * major rototill.
3332 *
3333 * Preserve the SAVESTART in cn_flags, because who knows what
3334 * might happen if we don't.
3335 *
3336 * Note: this logic (as well as this whole function) is cloned
3337 * in nfs_serv.c. Proceed accordingly.
3338 */
3339 vrele(fvp);
3340 if ((fromnd.ni_cnd.cn_namelen == 1 &&
3341 fromnd.ni_cnd.cn_nameptr[0] == '.') ||
3342 (fromnd.ni_cnd.cn_namelen == 2 &&
3343 fromnd.ni_cnd.cn_nameptr[0] == '.' &&
3344 fromnd.ni_cnd.cn_nameptr[1] == '.')) {
3345 error = EINVAL;
3346 VFS_RENAMELOCK_EXIT(fs);
3347 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3348 vrele(fromnd.ni_dvp);
3349 goto out1;
3350 }
3351 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART;
3352 fromnd.ni_cnd.cn_flags &= ~SAVESTART;
3353 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY);
3354 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd);
3355 fromnd.ni_cnd.cn_flags |= saveflag;
3356 if (error) {
3357 VOP_UNLOCK(fromnd.ni_dvp, 0);
3358 VFS_RENAMELOCK_EXIT(fs);
3359 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3360 vrele(fromnd.ni_dvp);
3361 goto out1;
3362 }
3363 VOP_UNLOCK(fromnd.ni_vp, 0);
3364 if (fromnd.ni_dvp != fromnd.ni_vp)
3365 VOP_UNLOCK(fromnd.ni_dvp, 0);
3366 fvp = fromnd.ni_vp;
3367
3368 NDINIT(&tond, RENAME,
3369 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT
3370 | (fvp->v_type == VDIR ? CREATEDIR : 0),
3371 seg, to);
3372 if ((error = namei(&tond)) != 0) {
3373 VFS_RENAMELOCK_EXIT(fs);
3374 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3375 vrele(fromnd.ni_dvp);
3376 vrele(fvp);
3377 goto out1;
3378 }
3379 tdvp = tond.ni_dvp;
3380 tvp = tond.ni_vp;
3381
3382 if (tvp != NULL) {
3383 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3384 error = ENOTDIR;
3385 goto out;
3386 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3387 error = EISDIR;
3388 goto out;
3389 }
3390 }
3391
3392 if (fvp == tdvp)
3393 error = EINVAL;
3394
3395 /*
3396 * Source and destination refer to the same object.
3397 */
3398 if (fvp == tvp) {
3399 if (retain)
3400 error = -1;
3401 else if (fromnd.ni_dvp == tdvp &&
3402 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3403 !memcmp(fromnd.ni_cnd.cn_nameptr,
3404 tond.ni_cnd.cn_nameptr,
3405 fromnd.ni_cnd.cn_namelen))
3406 error = -1;
3407 }
3408
3409 #if NVERIEXEC > 0
3410 if (!error) {
3411 char *f1, *f2;
3412
3413 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3414 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen + 1);
3415
3416 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3417 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen + 1);
3418
3419 error = veriexec_renamechk(l, fvp, f1, tvp, f2);
3420
3421 free(f1, M_TEMP);
3422 free(f2, M_TEMP);
3423 }
3424 #endif /* NVERIEXEC > 0 */
3425
3426 out:
3427 p = l->l_proc;
3428 if (!error) {
3429 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3430 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3431 VFS_RENAMELOCK_EXIT(fs);
3432 } else {
3433 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3434 if (tdvp == tvp)
3435 vrele(tdvp);
3436 else
3437 vput(tdvp);
3438 if (tvp)
3439 vput(tvp);
3440 VFS_RENAMELOCK_EXIT(fs);
3441 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3442 vrele(fromnd.ni_dvp);
3443 vrele(fvp);
3444 }
3445 vrele(tond.ni_startdir);
3446 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3447 out1:
3448 if (fromnd.ni_startdir)
3449 vrele(fromnd.ni_startdir);
3450 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3451 return (error == -1 ? 0 : error);
3452 }
3453
3454 /*
3455 * Make a directory file.
3456 */
3457 /* ARGSUSED */
3458 int
3459 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
3460 {
3461 /* {
3462 syscallarg(const char *) path;
3463 syscallarg(int) mode;
3464 } */
3465 struct proc *p = l->l_proc;
3466 struct vnode *vp;
3467 struct vattr vattr;
3468 int error;
3469 struct nameidata nd;
3470
3471 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE,
3472 SCARG(uap, path));
3473 if ((error = namei(&nd)) != 0)
3474 return (error);
3475 vp = nd.ni_vp;
3476 if (vp != NULL) {
3477 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3478 if (nd.ni_dvp == vp)
3479 vrele(nd.ni_dvp);
3480 else
3481 vput(nd.ni_dvp);
3482 vrele(vp);
3483 return (EEXIST);
3484 }
3485 VATTR_NULL(&vattr);
3486 vattr.va_type = VDIR;
3487 /* We will read cwdi->cwdi_cmask unlocked. */
3488 vattr.va_mode =
3489 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3490 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3491 if (!error)
3492 vput(nd.ni_vp);
3493 return (error);
3494 }
3495
3496 /*
3497 * Remove a directory file.
3498 */
3499 /* ARGSUSED */
3500 int
3501 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
3502 {
3503 /* {
3504 syscallarg(const char *) path;
3505 } */
3506 struct vnode *vp;
3507 int error;
3508 struct nameidata nd;
3509
3510 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
3511 SCARG(uap, path));
3512 if ((error = namei(&nd)) != 0)
3513 return (error);
3514 vp = nd.ni_vp;
3515 if (vp->v_type != VDIR) {
3516 error = ENOTDIR;
3517 goto out;
3518 }
3519 /*
3520 * No rmdir "." please.
3521 */
3522 if (nd.ni_dvp == vp) {
3523 error = EINVAL;
3524 goto out;
3525 }
3526 /*
3527 * The root of a mounted filesystem cannot be deleted.
3528 */
3529 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) {
3530 error = EBUSY;
3531 goto out;
3532 }
3533 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3534 return (error);
3535
3536 out:
3537 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3538 if (nd.ni_dvp == vp)
3539 vrele(nd.ni_dvp);
3540 else
3541 vput(nd.ni_dvp);
3542 vput(vp);
3543 return (error);
3544 }
3545
3546 /*
3547 * Read a block of directory entries in a file system independent format.
3548 */
3549 int
3550 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
3551 {
3552 /* {
3553 syscallarg(int) fd;
3554 syscallarg(char *) buf;
3555 syscallarg(size_t) count;
3556 } */
3557 file_t *fp;
3558 int error, done;
3559
3560 /* fd_getvnode() will use the descriptor for us */
3561 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3562 return (error);
3563 if ((fp->f_flag & FREAD) == 0) {
3564 error = EBADF;
3565 goto out;
3566 }
3567 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3568 SCARG(uap, count), &done, l, 0, 0);
3569 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
3570 *retval = done;
3571 out:
3572 fd_putfile(SCARG(uap, fd));
3573 return (error);
3574 }
3575
3576 /*
3577 * Set the mode mask for creation of filesystem nodes.
3578 */
3579 int
3580 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
3581 {
3582 /* {
3583 syscallarg(mode_t) newmask;
3584 } */
3585 struct proc *p = l->l_proc;
3586 struct cwdinfo *cwdi;
3587
3588 /*
3589 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
3590 * important is that we serialize changes to the mask. The
3591 * rw_exit() will issue a write memory barrier on our behalf,
3592 * and force the changes out to other CPUs (as it must use an
3593 * atomic operation, draining the local CPU's store buffers).
3594 */
3595 cwdi = p->p_cwdi;
3596 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
3597 *retval = cwdi->cwdi_cmask;
3598 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3599 rw_exit(&cwdi->cwdi_lock);
3600
3601 return (0);
3602 }
3603
3604 int
3605 dorevoke(struct vnode *vp, kauth_cred_t cred)
3606 {
3607 struct vattr vattr;
3608 int error;
3609
3610 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0)
3611 return error;
3612 if (kauth_cred_geteuid(cred) != vattr.va_uid &&
3613 (error = kauth_authorize_generic(cred,
3614 KAUTH_GENERIC_ISSUSER, NULL)) == 0)
3615 VOP_REVOKE(vp, REVOKEALL);
3616 return (error);
3617 }
3618
3619 /*
3620 * Void all references to file by ripping underlying filesystem
3621 * away from vnode.
3622 */
3623 /* ARGSUSED */
3624 int
3625 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
3626 {
3627 /* {
3628 syscallarg(const char *) path;
3629 } */
3630 struct vnode *vp;
3631 int error;
3632 struct nameidata nd;
3633
3634 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3635 SCARG(uap, path));
3636 if ((error = namei(&nd)) != 0)
3637 return (error);
3638 vp = nd.ni_vp;
3639 error = dorevoke(vp, l->l_cred);
3640 vrele(vp);
3641 return (error);
3642 }
3643