vfs_syscalls.c revision 1.369.2.2 1 /* $NetBSD: vfs_syscalls.c,v 1.369.2.2 2008/12/13 01:15:09 haad Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
63 */
64
65 #include <sys/cdefs.h>
66 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.369.2.2 2008/12/13 01:15:09 haad Exp $");
67
68 #ifdef _KERNEL_OPT
69 #include "opt_fileassoc.h"
70 #include "veriexec.h"
71 #endif
72
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/namei.h>
76 #include <sys/filedesc.h>
77 #include <sys/kernel.h>
78 #include <sys/file.h>
79 #include <sys/stat.h>
80 #include <sys/vnode.h>
81 #include <sys/mount.h>
82 #include <sys/proc.h>
83 #include <sys/uio.h>
84 #include <sys/malloc.h>
85 #include <sys/kmem.h>
86 #include <sys/dirent.h>
87 #include <sys/sysctl.h>
88 #include <sys/syscallargs.h>
89 #include <sys/vfs_syscalls.h>
90 #include <sys/ktrace.h>
91 #ifdef FILEASSOC
92 #include <sys/fileassoc.h>
93 #endif /* FILEASSOC */
94 #include <sys/verified_exec.h>
95 #include <sys/kauth.h>
96 #include <sys/atomic.h>
97 #include <sys/module.h>
98 #include <sys/buf.h>
99
100 #include <miscfs/genfs/genfs.h>
101 #include <miscfs/syncfs/syncfs.h>
102 #include <miscfs/specfs/specdev.h>
103
104 #include <nfs/rpcv2.h>
105 #include <nfs/nfsproto.h>
106 #include <nfs/nfs.h>
107 #include <nfs/nfs_var.h>
108
109 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
110
111 static int change_dir(struct nameidata *, struct lwp *);
112 static int change_flags(struct vnode *, u_long, struct lwp *);
113 static int change_mode(struct vnode *, int, struct lwp *l);
114 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
115
116 void checkdirs(struct vnode *);
117
118 int dovfsusermount = 0;
119
120 /*
121 * Virtual File System System Calls
122 */
123
124 /*
125 * Mount a file system.
126 */
127
128 /*
129 * This table is used to maintain compatibility with 4.3BSD
130 * and NetBSD 0.9 mount syscalls - and possibly other systems.
131 * Note, the order is important!
132 *
133 * Do not modify this table. It should only contain filesystems
134 * supported by NetBSD 0.9 and 4.3BSD.
135 */
136 const char * const mountcompatnames[] = {
137 NULL, /* 0 = MOUNT_NONE */
138 MOUNT_FFS, /* 1 = MOUNT_UFS */
139 MOUNT_NFS, /* 2 */
140 MOUNT_MFS, /* 3 */
141 MOUNT_MSDOS, /* 4 */
142 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
143 MOUNT_FDESC, /* 6 */
144 MOUNT_KERNFS, /* 7 */
145 NULL, /* 8 = MOUNT_DEVFS */
146 MOUNT_AFS, /* 9 */
147 };
148 const int nmountcompatnames = sizeof(mountcompatnames) /
149 sizeof(mountcompatnames[0]);
150
151 static int
152 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
153 void *data, size_t *data_len)
154 {
155 struct mount *mp;
156 int error = 0, saved_flags;
157
158 mp = vp->v_mount;
159 saved_flags = mp->mnt_flag;
160
161 /* We can operate only on VV_ROOT nodes. */
162 if ((vp->v_vflag & VV_ROOT) == 0) {
163 error = EINVAL;
164 goto out;
165 }
166
167 /*
168 * We only allow the filesystem to be reloaded if it
169 * is currently mounted read-only. Additionally, we
170 * prevent read-write to read-only downgrades.
171 */
172 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
173 (mp->mnt_flag & MNT_RDONLY) == 0) {
174 error = EOPNOTSUPP; /* Needs translation */
175 goto out;
176 }
177
178 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
179 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
180 if (error)
181 goto out;
182
183 if (vfs_busy(mp, NULL)) {
184 error = EPERM;
185 goto out;
186 }
187
188 mutex_enter(&mp->mnt_updating);
189
190 mp->mnt_flag &= ~MNT_OP_FLAGS;
191 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
192
193 /*
194 * Set the mount level flags.
195 */
196 if (flags & MNT_RDONLY)
197 mp->mnt_flag |= MNT_RDONLY;
198 else if (mp->mnt_flag & MNT_RDONLY)
199 mp->mnt_iflag |= IMNT_WANTRDWR;
200 mp->mnt_flag &=
201 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
202 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
203 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
204 MNT_LOG);
205 mp->mnt_flag |= flags &
206 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
207 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
208 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
209 MNT_LOG | MNT_IGNORE);
210
211 error = VFS_MOUNT(mp, path, data, data_len);
212
213 if (error && data != NULL) {
214 int error2;
215
216 /*
217 * Update failed; let's try and see if it was an
218 * export request. For compat with 3.0 and earlier.
219 */
220 error2 = vfs_hooks_reexport(mp, path, data);
221
222 /*
223 * Only update error code if the export request was
224 * understood but some problem occurred while
225 * processing it.
226 */
227 if (error2 != EJUSTRETURN)
228 error = error2;
229 }
230
231 if (mp->mnt_iflag & IMNT_WANTRDWR)
232 mp->mnt_flag &= ~MNT_RDONLY;
233 if (error)
234 mp->mnt_flag = saved_flags;
235 mp->mnt_flag &= ~MNT_OP_FLAGS;
236 mp->mnt_iflag &= ~IMNT_WANTRDWR;
237 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
238 if (mp->mnt_syncer == NULL)
239 error = vfs_allocate_syncvnode(mp);
240 } else {
241 if (mp->mnt_syncer != NULL)
242 vfs_deallocate_syncvnode(mp);
243 }
244 mutex_exit(&mp->mnt_updating);
245 vfs_unbusy(mp, false, NULL);
246
247 out:
248 return (error);
249 }
250
251 static int
252 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
253 {
254 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
255 int error;
256
257 /* Copy file-system type from userspace. */
258 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
259 if (error) {
260 /*
261 * Historically, filesystem types were identified by numbers.
262 * If we get an integer for the filesystem type instead of a
263 * string, we check to see if it matches one of the historic
264 * filesystem types.
265 */
266 u_long fsindex = (u_long)fstype;
267 if (fsindex >= nmountcompatnames ||
268 mountcompatnames[fsindex] == NULL)
269 return ENODEV;
270 strlcpy(fstypename, mountcompatnames[fsindex],
271 sizeof(fstypename));
272 }
273
274 /* Accept `ufs' as an alias for `ffs', for compatibility. */
275 if (strcmp(fstypename, "ufs") == 0)
276 fstypename[0] = 'f';
277
278 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
279 return 0;
280
281 /* If we can autoload a vfs module, try again */
282 mutex_enter(&module_lock);
283 (void)module_autoload(fstype, MODULE_CLASS_VFS);
284 mutex_exit(&module_lock);
285
286 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
287 return 0;
288
289 return ENODEV;
290 }
291
292 static int
293 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops,
294 const char *path, int flags, void *data, size_t *data_len, u_int recurse)
295 {
296 struct mount *mp;
297 struct vnode *vp = *vpp;
298 struct vattr va;
299 int error;
300
301 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
302 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
303 if (error)
304 return error;
305
306 /* Can't make a non-dir a mount-point (from here anyway). */
307 if (vp->v_type != VDIR)
308 return ENOTDIR;
309
310 /*
311 * If the user is not root, ensure that they own the directory
312 * onto which we are attempting to mount.
313 */
314 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 ||
315 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
316 (error = kauth_authorize_generic(l->l_cred,
317 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) {
318 return error;
319 }
320
321 if (flags & MNT_EXPORTED)
322 return EINVAL;
323
324 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0)
325 return error;
326
327 /*
328 * Check if a file-system is not already mounted on this vnode.
329 */
330 if (vp->v_mountedhere != NULL)
331 return EBUSY;
332
333 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
334 if (mp == NULL)
335 return ENOMEM;
336
337 mp->mnt_op = vfsops;
338 mp->mnt_refcnt = 1;
339
340 TAILQ_INIT(&mp->mnt_vnodelist);
341 rw_init(&mp->mnt_unmounting);
342 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
343 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
344 error = vfs_busy(mp, NULL);
345 KASSERT(error == 0);
346 mutex_enter(&mp->mnt_updating);
347
348 mp->mnt_vnodecovered = vp;
349 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
350 mount_initspecific(mp);
351
352 /*
353 * The underlying file system may refuse the mount for
354 * various reasons. Allow the user to force it to happen.
355 *
356 * Set the mount level flags.
357 */
358 mp->mnt_flag = flags &
359 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
360 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
361 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
362 MNT_LOG | MNT_IGNORE | MNT_RDONLY);
363
364 error = VFS_MOUNT(mp, path, data, data_len);
365 mp->mnt_flag &= ~MNT_OP_FLAGS;
366
367 /*
368 * Put the new filesystem on the mount list after root.
369 */
370 cache_purge(vp);
371 if (error != 0) {
372 vp->v_mountedhere = NULL;
373 mutex_exit(&mp->mnt_updating);
374 vfs_unbusy(mp, false, NULL);
375 vfs_destroy(mp);
376 return error;
377 }
378
379 mp->mnt_iflag &= ~IMNT_WANTRDWR;
380 mutex_enter(&mountlist_lock);
381 vp->v_mountedhere = mp;
382 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
383 mutex_exit(&mountlist_lock);
384 vn_restorerecurse(vp, recurse);
385 VOP_UNLOCK(vp, 0);
386 checkdirs(vp);
387 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
388 error = vfs_allocate_syncvnode(mp);
389 /* Hold an additional reference to the mount across VFS_START(). */
390 mutex_exit(&mp->mnt_updating);
391 vfs_unbusy(mp, true, NULL);
392 (void) VFS_STATVFS(mp, &mp->mnt_stat);
393 error = VFS_START(mp, 0);
394 if (error)
395 vrele(vp);
396 /* Drop reference held for VFS_START(). */
397 vfs_destroy(mp);
398 *vpp = NULL;
399 return error;
400 }
401
402 static int
403 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
404 void *data, size_t *data_len)
405 {
406 struct mount *mp;
407 int error;
408
409 /* If MNT_GETARGS is specified, it should be the only flag. */
410 if (flags & ~MNT_GETARGS)
411 return EINVAL;
412
413 mp = vp->v_mount;
414
415 /* XXX: probably some notion of "can see" here if we want isolation. */
416 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
417 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
418 if (error)
419 return error;
420
421 if ((vp->v_vflag & VV_ROOT) == 0)
422 return EINVAL;
423
424 if (vfs_busy(mp, NULL))
425 return EPERM;
426
427 mutex_enter(&mp->mnt_updating);
428 mp->mnt_flag &= ~MNT_OP_FLAGS;
429 mp->mnt_flag |= MNT_GETARGS;
430 error = VFS_MOUNT(mp, path, data, data_len);
431 mp->mnt_flag &= ~MNT_OP_FLAGS;
432 mutex_exit(&mp->mnt_updating);
433
434 vfs_unbusy(mp, false, NULL);
435 return (error);
436 }
437
438 int
439 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
440 {
441 /* {
442 syscallarg(const char *) type;
443 syscallarg(const char *) path;
444 syscallarg(int) flags;
445 syscallarg(void *) data;
446 syscallarg(size_t) data_len;
447 } */
448
449 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
450 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
451 SCARG(uap, data_len), retval);
452 }
453
454 int
455 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
456 const char *path, int flags, void *data, enum uio_seg data_seg,
457 size_t data_len, register_t *retval)
458 {
459 struct vnode *vp;
460 struct nameidata nd;
461 void *data_buf = data;
462 u_int recurse;
463 int error;
464
465 /*
466 * Get vnode to be covered
467 */
468 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
469 if ((error = namei(&nd)) != 0)
470 return (error);
471 vp = nd.ni_vp;
472
473 /*
474 * A lookup in VFS_MOUNT might result in an attempt to
475 * lock this vnode again, so make the lock recursive.
476 */
477 if (vfsops == NULL) {
478 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
479 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
480 recurse = vn_setrecurse(vp);
481 vfsops = vp->v_mount->mnt_op;
482 } else {
483 /* 'type' is userspace */
484 error = mount_get_vfsops(type, &vfsops);
485 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
486 recurse = vn_setrecurse(vp);
487 if (error != 0)
488 goto done;
489 }
490 } else {
491 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
492 recurse = vn_setrecurse(vp);
493 }
494
495 if (data != NULL && data_seg == UIO_USERSPACE) {
496 if (data_len == 0) {
497 /* No length supplied, use default for filesystem */
498 data_len = vfsops->vfs_min_mount_data;
499 if (data_len > VFS_MAX_MOUNT_DATA) {
500 error = EINVAL;
501 goto done;
502 }
503 /*
504 * Hopefully a longer buffer won't make copyin() fail.
505 * For compatibility with 3.0 and earlier.
506 */
507 if (flags & MNT_UPDATE
508 && data_len < sizeof (struct mnt_export_args30))
509 data_len = sizeof (struct mnt_export_args30);
510 }
511 data_buf = malloc(data_len, M_TEMP, M_WAITOK);
512
513 /* NFS needs the buffer even for mnt_getargs .... */
514 error = copyin(data, data_buf, data_len);
515 if (error != 0)
516 goto done;
517 }
518
519 if (flags & MNT_GETARGS) {
520 if (data_len == 0) {
521 error = EINVAL;
522 goto done;
523 }
524 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
525 if (error != 0)
526 goto done;
527 if (data_seg == UIO_USERSPACE)
528 error = copyout(data_buf, data, data_len);
529 *retval = data_len;
530 } else if (flags & MNT_UPDATE) {
531 error = mount_update(l, vp, path, flags, data_buf, &data_len);
532 } else {
533 /* Locking is handled internally in mount_domount(). */
534 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
535 &data_len, recurse);
536 }
537
538 done:
539 if (vp != NULL) {
540 vn_restorerecurse(vp, recurse);
541 vput(vp);
542 }
543 if (data_buf != data)
544 free(data_buf, M_TEMP);
545 return (error);
546 }
547
548 /*
549 * Scan all active processes to see if any of them have a current
550 * or root directory onto which the new filesystem has just been
551 * mounted. If so, replace them with the new mount point.
552 */
553 void
554 checkdirs(struct vnode *olddp)
555 {
556 struct cwdinfo *cwdi;
557 struct vnode *newdp, *rele1, *rele2;
558 struct proc *p;
559 bool retry;
560
561 if (olddp->v_usecount == 1)
562 return;
563 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
564 panic("mount: lost mount");
565
566 do {
567 retry = false;
568 mutex_enter(proc_lock);
569 PROCLIST_FOREACH(p, &allproc) {
570 if ((p->p_flag & PK_MARKER) != 0)
571 continue;
572 if ((cwdi = p->p_cwdi) == NULL)
573 continue;
574 /*
575 * Can't change to the old directory any more,
576 * so even if we see a stale value it's not a
577 * problem.
578 */
579 if (cwdi->cwdi_cdir != olddp &&
580 cwdi->cwdi_rdir != olddp)
581 continue;
582 retry = true;
583 rele1 = NULL;
584 rele2 = NULL;
585 atomic_inc_uint(&cwdi->cwdi_refcnt);
586 mutex_exit(proc_lock);
587 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
588 if (cwdi->cwdi_cdir == olddp) {
589 rele1 = cwdi->cwdi_cdir;
590 VREF(newdp);
591 cwdi->cwdi_cdir = newdp;
592 }
593 if (cwdi->cwdi_rdir == olddp) {
594 rele2 = cwdi->cwdi_rdir;
595 VREF(newdp);
596 cwdi->cwdi_rdir = newdp;
597 }
598 rw_exit(&cwdi->cwdi_lock);
599 cwdfree(cwdi);
600 if (rele1 != NULL)
601 vrele(rele1);
602 if (rele2 != NULL)
603 vrele(rele2);
604 mutex_enter(proc_lock);
605 break;
606 }
607 mutex_exit(proc_lock);
608 } while (retry);
609
610 if (rootvnode == olddp) {
611 vrele(rootvnode);
612 VREF(newdp);
613 rootvnode = newdp;
614 }
615 vput(newdp);
616 }
617
618 /*
619 * Unmount a file system.
620 *
621 * Note: unmount takes a path to the vnode mounted on as argument,
622 * not special file (as before).
623 */
624 /* ARGSUSED */
625 int
626 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
627 {
628 /* {
629 syscallarg(const char *) path;
630 syscallarg(int) flags;
631 } */
632 struct vnode *vp;
633 struct mount *mp;
634 int error;
635 struct nameidata nd;
636
637 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
638 SCARG(uap, path));
639 if ((error = namei(&nd)) != 0)
640 return (error);
641 vp = nd.ni_vp;
642 mp = vp->v_mount;
643 atomic_inc_uint(&mp->mnt_refcnt);
644 VOP_UNLOCK(vp, 0);
645
646 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
647 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
648 if (error) {
649 vrele(vp);
650 vfs_destroy(mp);
651 return (error);
652 }
653
654 /*
655 * Don't allow unmounting the root file system.
656 */
657 if (mp->mnt_flag & MNT_ROOTFS) {
658 vrele(vp);
659 vfs_destroy(mp);
660 return (EINVAL);
661 }
662
663 /*
664 * Must be the root of the filesystem
665 */
666 if ((vp->v_vflag & VV_ROOT) == 0) {
667 vrele(vp);
668 vfs_destroy(mp);
669 return (EINVAL);
670 }
671
672 vrele(vp);
673 error = dounmount(mp, SCARG(uap, flags), l);
674 vfs_destroy(mp);
675 return error;
676 }
677
678 /*
679 * Do the actual file system unmount. File system is assumed to have
680 * been locked by the caller.
681 *
682 * => Caller hold reference to the mount, explicitly for dounmount().
683 */
684 int
685 dounmount(struct mount *mp, int flags, struct lwp *l)
686 {
687 struct vnode *coveredvp;
688 int error;
689 int async;
690 int used_syncer;
691
692 #if NVERIEXEC > 0
693 error = veriexec_unmountchk(mp);
694 if (error)
695 return (error);
696 #endif /* NVERIEXEC > 0 */
697
698 /*
699 * XXX Freeze syncer. Must do this before locking the
700 * mount point. See dounmount() for details.
701 */
702 mutex_enter(&syncer_mutex);
703 rw_enter(&mp->mnt_unmounting, RW_WRITER);
704 if ((mp->mnt_iflag & IMNT_GONE) != 0) {
705 rw_exit(&mp->mnt_unmounting);
706 mutex_exit(&syncer_mutex);
707 return ENOENT;
708 }
709
710 used_syncer = (mp->mnt_syncer != NULL);
711
712 /*
713 * XXX Syncer must be frozen when we get here. This should really
714 * be done on a per-mountpoint basis, but especially the softdep
715 * code possibly called from the syncer doesn't exactly work on a
716 * per-mountpoint basis, so the softdep code would become a maze
717 * of vfs_busy() calls.
718 *
719 * The caller of dounmount() must acquire syncer_mutex because
720 * the syncer itself acquires locks in syncer_mutex -> vfs_busy
721 * order, and we must preserve that order to avoid deadlock.
722 *
723 * So, if the file system did not use the syncer, now is
724 * the time to release the syncer_mutex.
725 */
726 if (used_syncer == 0)
727 mutex_exit(&syncer_mutex);
728
729 mp->mnt_iflag |= IMNT_UNMOUNT;
730 async = mp->mnt_flag & MNT_ASYNC;
731 mp->mnt_flag &= ~MNT_ASYNC;
732 cache_purgevfs(mp); /* remove cache entries for this file sys */
733 if (mp->mnt_syncer != NULL)
734 vfs_deallocate_syncvnode(mp);
735 error = 0;
736 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
737 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
738 }
739 vfs_scrubvnlist(mp);
740 if (error == 0 || (flags & MNT_FORCE))
741 error = VFS_UNMOUNT(mp, flags);
742 if (error) {
743 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
744 (void) vfs_allocate_syncvnode(mp);
745 mp->mnt_iflag &= ~IMNT_UNMOUNT;
746 mp->mnt_flag |= async;
747 rw_exit(&mp->mnt_unmounting);
748 if (used_syncer)
749 mutex_exit(&syncer_mutex);
750 return (error);
751 }
752 vfs_scrubvnlist(mp);
753 mutex_enter(&mountlist_lock);
754 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
755 coveredvp->v_mountedhere = NULL;
756 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
757 mp->mnt_iflag |= IMNT_GONE;
758 mutex_exit(&mountlist_lock);
759 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
760 panic("unmount: dangling vnode");
761 if (used_syncer)
762 mutex_exit(&syncer_mutex);
763 vfs_hooks_unmount(mp);
764 rw_exit(&mp->mnt_unmounting);
765 vfs_destroy(mp); /* reference from mount() */
766 if (coveredvp != NULLVP)
767 vrele(coveredvp);
768 return (0);
769 }
770
771 /*
772 * Sync each mounted filesystem.
773 */
774 #ifdef DEBUG
775 int syncprt = 0;
776 struct ctldebug debug0 = { "syncprt", &syncprt };
777 #endif
778
779 /* ARGSUSED */
780 int
781 sys_sync(struct lwp *l, const void *v, register_t *retval)
782 {
783 struct mount *mp, *nmp;
784 int asyncflag;
785
786 if (l == NULL)
787 l = &lwp0;
788
789 mutex_enter(&mountlist_lock);
790 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
791 mp = nmp) {
792 if (vfs_busy(mp, &nmp)) {
793 continue;
794 }
795 mutex_enter(&mp->mnt_updating);
796 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
797 asyncflag = mp->mnt_flag & MNT_ASYNC;
798 mp->mnt_flag &= ~MNT_ASYNC;
799 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
800 if (asyncflag)
801 mp->mnt_flag |= MNT_ASYNC;
802 }
803 mutex_exit(&mp->mnt_updating);
804 vfs_unbusy(mp, false, &nmp);
805 }
806 mutex_exit(&mountlist_lock);
807 #ifdef DEBUG
808 if (syncprt)
809 vfs_bufstats();
810 #endif /* DEBUG */
811 return (0);
812 }
813
814 /*
815 * Change filesystem quotas.
816 */
817 /* ARGSUSED */
818 int
819 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval)
820 {
821 /* {
822 syscallarg(const char *) path;
823 syscallarg(int) cmd;
824 syscallarg(int) uid;
825 syscallarg(void *) arg;
826 } */
827 struct mount *mp;
828 int error;
829 struct nameidata nd;
830
831 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
832 SCARG(uap, path));
833 if ((error = namei(&nd)) != 0)
834 return (error);
835 mp = nd.ni_vp->v_mount;
836 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
837 SCARG(uap, arg));
838 vrele(nd.ni_vp);
839 return (error);
840 }
841
842 int
843 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
844 int root)
845 {
846 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
847 int error = 0;
848
849 /*
850 * If MNT_NOWAIT or MNT_LAZY is specified, do not
851 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
852 * overrides MNT_NOWAIT.
853 */
854 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
855 (flags != MNT_WAIT && flags != 0)) {
856 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
857 goto done;
858 }
859
860 /* Get the filesystem stats now */
861 memset(sp, 0, sizeof(*sp));
862 if ((error = VFS_STATVFS(mp, sp)) != 0) {
863 return error;
864 }
865
866 if (cwdi->cwdi_rdir == NULL)
867 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
868 done:
869 if (cwdi->cwdi_rdir != NULL) {
870 size_t len;
871 char *bp;
872 char c;
873 char *path = PNBUF_GET();
874
875 bp = path + MAXPATHLEN;
876 *--bp = '\0';
877 rw_enter(&cwdi->cwdi_lock, RW_READER);
878 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
879 MAXPATHLEN / 2, 0, l);
880 rw_exit(&cwdi->cwdi_lock);
881 if (error) {
882 PNBUF_PUT(path);
883 return error;
884 }
885 len = strlen(bp);
886 /*
887 * for mount points that are below our root, we can see
888 * them, so we fix up the pathname and return them. The
889 * rest we cannot see, so we don't allow viewing the
890 * data.
891 */
892 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
893 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
894 (void)strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
895 sizeof(sp->f_mntonname));
896 if (sp->f_mntonname[0] == '\0')
897 (void)strlcpy(sp->f_mntonname, "/",
898 sizeof(sp->f_mntonname));
899 } else {
900 if (root)
901 (void)strlcpy(sp->f_mntonname, "/",
902 sizeof(sp->f_mntonname));
903 else
904 error = EPERM;
905 }
906 PNBUF_PUT(path);
907 }
908 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
909 return error;
910 }
911
912 /*
913 * Get filesystem statistics by path.
914 */
915 int
916 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
917 {
918 struct mount *mp;
919 int error;
920 struct nameidata nd;
921
922 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
923 if ((error = namei(&nd)) != 0)
924 return error;
925 mp = nd.ni_vp->v_mount;
926 error = dostatvfs(mp, sb, l, flags, 1);
927 vrele(nd.ni_vp);
928 return error;
929 }
930
931 /* ARGSUSED */
932 int
933 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
934 {
935 /* {
936 syscallarg(const char *) path;
937 syscallarg(struct statvfs *) buf;
938 syscallarg(int) flags;
939 } */
940 struct statvfs *sb;
941 int error;
942
943 sb = STATVFSBUF_GET();
944 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
945 if (error == 0)
946 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
947 STATVFSBUF_PUT(sb);
948 return error;
949 }
950
951 /*
952 * Get filesystem statistics by fd.
953 */
954 int
955 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
956 {
957 file_t *fp;
958 struct mount *mp;
959 int error;
960
961 /* fd_getvnode() will use the descriptor for us */
962 if ((error = fd_getvnode(fd, &fp)) != 0)
963 return (error);
964 mp = ((struct vnode *)fp->f_data)->v_mount;
965 error = dostatvfs(mp, sb, curlwp, flags, 1);
966 fd_putfile(fd);
967 return error;
968 }
969
970 /* ARGSUSED */
971 int
972 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
973 {
974 /* {
975 syscallarg(int) fd;
976 syscallarg(struct statvfs *) buf;
977 syscallarg(int) flags;
978 } */
979 struct statvfs *sb;
980 int error;
981
982 sb = STATVFSBUF_GET();
983 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
984 if (error == 0)
985 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
986 STATVFSBUF_PUT(sb);
987 return error;
988 }
989
990
991 /*
992 * Get statistics on all filesystems.
993 */
994 int
995 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
996 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
997 register_t *retval)
998 {
999 int root = 0;
1000 struct proc *p = l->l_proc;
1001 struct mount *mp, *nmp;
1002 struct statvfs *sb;
1003 size_t count, maxcount;
1004 int error = 0;
1005
1006 sb = STATVFSBUF_GET();
1007 maxcount = bufsize / entry_sz;
1008 mutex_enter(&mountlist_lock);
1009 count = 0;
1010 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1011 mp = nmp) {
1012 if (vfs_busy(mp, &nmp)) {
1013 continue;
1014 }
1015 if (sfsp && count < maxcount) {
1016 error = dostatvfs(mp, sb, l, flags, 0);
1017 if (error) {
1018 vfs_unbusy(mp, false, &nmp);
1019 error = 0;
1020 continue;
1021 }
1022 error = copyfn(sb, sfsp, entry_sz);
1023 if (error) {
1024 vfs_unbusy(mp, false, NULL);
1025 goto out;
1026 }
1027 sfsp = (char *)sfsp + entry_sz;
1028 root |= strcmp(sb->f_mntonname, "/") == 0;
1029 }
1030 count++;
1031 vfs_unbusy(mp, false, &nmp);
1032 }
1033 mutex_exit(&mountlist_lock);
1034
1035 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1036 /*
1037 * fake a root entry
1038 */
1039 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1040 sb, l, flags, 1);
1041 if (error != 0)
1042 goto out;
1043 if (sfsp) {
1044 error = copyfn(sb, sfsp, entry_sz);
1045 if (error != 0)
1046 goto out;
1047 }
1048 count++;
1049 }
1050 if (sfsp && count > maxcount)
1051 *retval = maxcount;
1052 else
1053 *retval = count;
1054 out:
1055 STATVFSBUF_PUT(sb);
1056 return error;
1057 }
1058
1059 int
1060 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1061 {
1062 /* {
1063 syscallarg(struct statvfs *) buf;
1064 syscallarg(size_t) bufsize;
1065 syscallarg(int) flags;
1066 } */
1067
1068 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1069 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1070 }
1071
1072 /*
1073 * Change current working directory to a given file descriptor.
1074 */
1075 /* ARGSUSED */
1076 int
1077 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1078 {
1079 /* {
1080 syscallarg(int) fd;
1081 } */
1082 struct proc *p = l->l_proc;
1083 struct cwdinfo *cwdi;
1084 struct vnode *vp, *tdp;
1085 struct mount *mp;
1086 file_t *fp;
1087 int error, fd;
1088
1089 /* fd_getvnode() will use the descriptor for us */
1090 fd = SCARG(uap, fd);
1091 if ((error = fd_getvnode(fd, &fp)) != 0)
1092 return (error);
1093 vp = fp->f_data;
1094
1095 VREF(vp);
1096 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1097 if (vp->v_type != VDIR)
1098 error = ENOTDIR;
1099 else
1100 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1101 if (error) {
1102 vput(vp);
1103 goto out;
1104 }
1105 while ((mp = vp->v_mountedhere) != NULL) {
1106 error = vfs_busy(mp, NULL);
1107 vput(vp);
1108 if (error != 0)
1109 goto out;
1110 error = VFS_ROOT(mp, &tdp);
1111 vfs_unbusy(mp, false, NULL);
1112 if (error)
1113 goto out;
1114 vp = tdp;
1115 }
1116 VOP_UNLOCK(vp, 0);
1117
1118 /*
1119 * Disallow changing to a directory not under the process's
1120 * current root directory (if there is one).
1121 */
1122 cwdi = p->p_cwdi;
1123 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1124 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1125 vrele(vp);
1126 error = EPERM; /* operation not permitted */
1127 } else {
1128 vrele(cwdi->cwdi_cdir);
1129 cwdi->cwdi_cdir = vp;
1130 }
1131 rw_exit(&cwdi->cwdi_lock);
1132
1133 out:
1134 fd_putfile(fd);
1135 return (error);
1136 }
1137
1138 /*
1139 * Change this process's notion of the root directory to a given file
1140 * descriptor.
1141 */
1142 int
1143 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1144 {
1145 struct proc *p = l->l_proc;
1146 struct cwdinfo *cwdi;
1147 struct vnode *vp;
1148 file_t *fp;
1149 int error, fd = SCARG(uap, fd);
1150
1151 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1152 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1153 return error;
1154 /* fd_getvnode() will use the descriptor for us */
1155 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
1156 return error;
1157 vp = fp->f_data;
1158 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1159 if (vp->v_type != VDIR)
1160 error = ENOTDIR;
1161 else
1162 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1163 VOP_UNLOCK(vp, 0);
1164 if (error)
1165 goto out;
1166 VREF(vp);
1167
1168 /*
1169 * Prevent escaping from chroot by putting the root under
1170 * the working directory. Silently chdir to / if we aren't
1171 * already there.
1172 */
1173 cwdi = p->p_cwdi;
1174 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1175 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1176 /*
1177 * XXX would be more failsafe to change directory to a
1178 * deadfs node here instead
1179 */
1180 vrele(cwdi->cwdi_cdir);
1181 VREF(vp);
1182 cwdi->cwdi_cdir = vp;
1183 }
1184
1185 if (cwdi->cwdi_rdir != NULL)
1186 vrele(cwdi->cwdi_rdir);
1187 cwdi->cwdi_rdir = vp;
1188 rw_exit(&cwdi->cwdi_lock);
1189
1190 out:
1191 fd_putfile(fd);
1192 return (error);
1193 }
1194
1195 /*
1196 * Change current working directory (``.'').
1197 */
1198 /* ARGSUSED */
1199 int
1200 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1201 {
1202 /* {
1203 syscallarg(const char *) path;
1204 } */
1205 struct proc *p = l->l_proc;
1206 struct cwdinfo *cwdi;
1207 int error;
1208 struct nameidata nd;
1209
1210 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1211 SCARG(uap, path));
1212 if ((error = change_dir(&nd, l)) != 0)
1213 return (error);
1214 cwdi = p->p_cwdi;
1215 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1216 vrele(cwdi->cwdi_cdir);
1217 cwdi->cwdi_cdir = nd.ni_vp;
1218 rw_exit(&cwdi->cwdi_lock);
1219 return (0);
1220 }
1221
1222 /*
1223 * Change notion of root (``/'') directory.
1224 */
1225 /* ARGSUSED */
1226 int
1227 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1228 {
1229 /* {
1230 syscallarg(const char *) path;
1231 } */
1232 struct proc *p = l->l_proc;
1233 struct cwdinfo *cwdi;
1234 struct vnode *vp;
1235 int error;
1236 struct nameidata nd;
1237
1238 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1239 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1240 return (error);
1241 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1242 SCARG(uap, path));
1243 if ((error = change_dir(&nd, l)) != 0)
1244 return (error);
1245
1246 cwdi = p->p_cwdi;
1247 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1248 if (cwdi->cwdi_rdir != NULL)
1249 vrele(cwdi->cwdi_rdir);
1250 vp = nd.ni_vp;
1251 cwdi->cwdi_rdir = vp;
1252
1253 /*
1254 * Prevent escaping from chroot by putting the root under
1255 * the working directory. Silently chdir to / if we aren't
1256 * already there.
1257 */
1258 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1259 /*
1260 * XXX would be more failsafe to change directory to a
1261 * deadfs node here instead
1262 */
1263 vrele(cwdi->cwdi_cdir);
1264 VREF(vp);
1265 cwdi->cwdi_cdir = vp;
1266 }
1267 rw_exit(&cwdi->cwdi_lock);
1268
1269 return (0);
1270 }
1271
1272 /*
1273 * Common routine for chroot and chdir.
1274 */
1275 static int
1276 change_dir(struct nameidata *ndp, struct lwp *l)
1277 {
1278 struct vnode *vp;
1279 int error;
1280
1281 if ((error = namei(ndp)) != 0)
1282 return (error);
1283 vp = ndp->ni_vp;
1284 if (vp->v_type != VDIR)
1285 error = ENOTDIR;
1286 else
1287 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1288
1289 if (error)
1290 vput(vp);
1291 else
1292 VOP_UNLOCK(vp, 0);
1293 return (error);
1294 }
1295
1296 /*
1297 * Check permissions, allocate an open file structure,
1298 * and call the device open routine if any.
1299 */
1300 int
1301 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1302 {
1303 /* {
1304 syscallarg(const char *) path;
1305 syscallarg(int) flags;
1306 syscallarg(int) mode;
1307 } */
1308 struct proc *p = l->l_proc;
1309 struct cwdinfo *cwdi = p->p_cwdi;
1310 file_t *fp;
1311 struct vnode *vp;
1312 int flags, cmode;
1313 int type, indx, error;
1314 struct flock lf;
1315 struct nameidata nd;
1316
1317 flags = FFLAGS(SCARG(uap, flags));
1318 if ((flags & (FREAD | FWRITE)) == 0)
1319 return (EINVAL);
1320 if ((error = fd_allocfile(&fp, &indx)) != 0)
1321 return (error);
1322 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1323 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1324 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
1325 SCARG(uap, path));
1326 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1327 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1328 fd_abort(p, fp, indx);
1329 if ((error == EDUPFD || error == EMOVEFD) &&
1330 l->l_dupfd >= 0 && /* XXX from fdopen */
1331 (error =
1332 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1333 *retval = indx;
1334 return (0);
1335 }
1336 if (error == ERESTART)
1337 error = EINTR;
1338 return (error);
1339 }
1340
1341 l->l_dupfd = 0;
1342 vp = nd.ni_vp;
1343 fp->f_flag = flags & FMASK;
1344 fp->f_type = DTYPE_VNODE;
1345 fp->f_ops = &vnops;
1346 fp->f_data = vp;
1347 if (flags & (O_EXLOCK | O_SHLOCK)) {
1348 lf.l_whence = SEEK_SET;
1349 lf.l_start = 0;
1350 lf.l_len = 0;
1351 if (flags & O_EXLOCK)
1352 lf.l_type = F_WRLCK;
1353 else
1354 lf.l_type = F_RDLCK;
1355 type = F_FLOCK;
1356 if ((flags & FNONBLOCK) == 0)
1357 type |= F_WAIT;
1358 VOP_UNLOCK(vp, 0);
1359 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1360 if (error) {
1361 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1362 fd_abort(p, fp, indx);
1363 return (error);
1364 }
1365 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1366 atomic_or_uint(&fp->f_flag, FHASLOCK);
1367 }
1368 VOP_UNLOCK(vp, 0);
1369 *retval = indx;
1370 fd_affix(p, fp, indx);
1371 return (0);
1372 }
1373
1374 static void
1375 vfs__fhfree(fhandle_t *fhp)
1376 {
1377 size_t fhsize;
1378
1379 if (fhp == NULL) {
1380 return;
1381 }
1382 fhsize = FHANDLE_SIZE(fhp);
1383 kmem_free(fhp, fhsize);
1384 }
1385
1386 /*
1387 * vfs_composefh: compose a filehandle.
1388 */
1389
1390 int
1391 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1392 {
1393 struct mount *mp;
1394 struct fid *fidp;
1395 int error;
1396 size_t needfhsize;
1397 size_t fidsize;
1398
1399 mp = vp->v_mount;
1400 fidp = NULL;
1401 if (*fh_size < FHANDLE_SIZE_MIN) {
1402 fidsize = 0;
1403 } else {
1404 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1405 if (fhp != NULL) {
1406 memset(fhp, 0, *fh_size);
1407 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1408 fidp = &fhp->fh_fid;
1409 }
1410 }
1411 error = VFS_VPTOFH(vp, fidp, &fidsize);
1412 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1413 if (error == 0 && *fh_size < needfhsize) {
1414 error = E2BIG;
1415 }
1416 *fh_size = needfhsize;
1417 return error;
1418 }
1419
1420 int
1421 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1422 {
1423 struct mount *mp;
1424 fhandle_t *fhp;
1425 size_t fhsize;
1426 size_t fidsize;
1427 int error;
1428
1429 *fhpp = NULL;
1430 mp = vp->v_mount;
1431 fidsize = 0;
1432 error = VFS_VPTOFH(vp, NULL, &fidsize);
1433 KASSERT(error != 0);
1434 if (error != E2BIG) {
1435 goto out;
1436 }
1437 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1438 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1439 if (fhp == NULL) {
1440 error = ENOMEM;
1441 goto out;
1442 }
1443 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1444 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1445 if (error == 0) {
1446 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1447 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1448 *fhpp = fhp;
1449 } else {
1450 kmem_free(fhp, fhsize);
1451 }
1452 out:
1453 return error;
1454 }
1455
1456 void
1457 vfs_composefh_free(fhandle_t *fhp)
1458 {
1459
1460 vfs__fhfree(fhp);
1461 }
1462
1463 /*
1464 * vfs_fhtovp: lookup a vnode by a filehandle.
1465 */
1466
1467 int
1468 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1469 {
1470 struct mount *mp;
1471 int error;
1472
1473 *vpp = NULL;
1474 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1475 if (mp == NULL) {
1476 error = ESTALE;
1477 goto out;
1478 }
1479 if (mp->mnt_op->vfs_fhtovp == NULL) {
1480 error = EOPNOTSUPP;
1481 goto out;
1482 }
1483 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1484 out:
1485 return error;
1486 }
1487
1488 /*
1489 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1490 * the needed size.
1491 */
1492
1493 int
1494 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1495 {
1496 fhandle_t *fhp;
1497 int error;
1498
1499 *fhpp = NULL;
1500 if (fhsize > FHANDLE_SIZE_MAX) {
1501 return EINVAL;
1502 }
1503 if (fhsize < FHANDLE_SIZE_MIN) {
1504 return EINVAL;
1505 }
1506 again:
1507 fhp = kmem_alloc(fhsize, KM_SLEEP);
1508 if (fhp == NULL) {
1509 return ENOMEM;
1510 }
1511 error = copyin(ufhp, fhp, fhsize);
1512 if (error == 0) {
1513 /* XXX this check shouldn't be here */
1514 if (FHANDLE_SIZE(fhp) == fhsize) {
1515 *fhpp = fhp;
1516 return 0;
1517 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1518 /*
1519 * a kludge for nfsv2 padded handles.
1520 */
1521 size_t sz;
1522
1523 sz = FHANDLE_SIZE(fhp);
1524 kmem_free(fhp, fhsize);
1525 fhsize = sz;
1526 goto again;
1527 } else {
1528 /*
1529 * userland told us wrong size.
1530 */
1531 error = EINVAL;
1532 }
1533 }
1534 kmem_free(fhp, fhsize);
1535 return error;
1536 }
1537
1538 void
1539 vfs_copyinfh_free(fhandle_t *fhp)
1540 {
1541
1542 vfs__fhfree(fhp);
1543 }
1544
1545 /*
1546 * Get file handle system call
1547 */
1548 int
1549 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1550 {
1551 /* {
1552 syscallarg(char *) fname;
1553 syscallarg(fhandle_t *) fhp;
1554 syscallarg(size_t *) fh_size;
1555 } */
1556 struct vnode *vp;
1557 fhandle_t *fh;
1558 int error;
1559 struct nameidata nd;
1560 size_t sz;
1561 size_t usz;
1562
1563 /*
1564 * Must be super user
1565 */
1566 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1567 0, NULL, NULL, NULL);
1568 if (error)
1569 return (error);
1570 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1571 SCARG(uap, fname));
1572 error = namei(&nd);
1573 if (error)
1574 return (error);
1575 vp = nd.ni_vp;
1576 error = vfs_composefh_alloc(vp, &fh);
1577 vput(vp);
1578 if (error != 0) {
1579 goto out;
1580 }
1581 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1582 if (error != 0) {
1583 goto out;
1584 }
1585 sz = FHANDLE_SIZE(fh);
1586 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1587 if (error != 0) {
1588 goto out;
1589 }
1590 if (usz >= sz) {
1591 error = copyout(fh, SCARG(uap, fhp), sz);
1592 } else {
1593 error = E2BIG;
1594 }
1595 out:
1596 vfs_composefh_free(fh);
1597 return (error);
1598 }
1599
1600 /*
1601 * Open a file given a file handle.
1602 *
1603 * Check permissions, allocate an open file structure,
1604 * and call the device open routine if any.
1605 */
1606
1607 int
1608 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1609 register_t *retval)
1610 {
1611 file_t *fp;
1612 struct vnode *vp = NULL;
1613 kauth_cred_t cred = l->l_cred;
1614 file_t *nfp;
1615 int type, indx, error=0;
1616 struct flock lf;
1617 struct vattr va;
1618 fhandle_t *fh;
1619 int flags;
1620 proc_t *p;
1621
1622 p = curproc;
1623
1624 /*
1625 * Must be super user
1626 */
1627 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1628 0, NULL, NULL, NULL)))
1629 return (error);
1630
1631 flags = FFLAGS(oflags);
1632 if ((flags & (FREAD | FWRITE)) == 0)
1633 return (EINVAL);
1634 if ((flags & O_CREAT))
1635 return (EINVAL);
1636 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1637 return (error);
1638 fp = nfp;
1639 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1640 if (error != 0) {
1641 goto bad;
1642 }
1643 error = vfs_fhtovp(fh, &vp);
1644 if (error != 0) {
1645 goto bad;
1646 }
1647
1648 /* Now do an effective vn_open */
1649
1650 if (vp->v_type == VSOCK) {
1651 error = EOPNOTSUPP;
1652 goto bad;
1653 }
1654 error = vn_openchk(vp, cred, flags);
1655 if (error != 0)
1656 goto bad;
1657 if (flags & O_TRUNC) {
1658 VOP_UNLOCK(vp, 0); /* XXX */
1659 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1660 VATTR_NULL(&va);
1661 va.va_size = 0;
1662 error = VOP_SETATTR(vp, &va, cred);
1663 if (error)
1664 goto bad;
1665 }
1666 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
1667 goto bad;
1668 if (flags & FWRITE) {
1669 mutex_enter(&vp->v_interlock);
1670 vp->v_writecount++;
1671 mutex_exit(&vp->v_interlock);
1672 }
1673
1674 /* done with modified vn_open, now finish what sys_open does. */
1675
1676 fp->f_flag = flags & FMASK;
1677 fp->f_type = DTYPE_VNODE;
1678 fp->f_ops = &vnops;
1679 fp->f_data = vp;
1680 if (flags & (O_EXLOCK | O_SHLOCK)) {
1681 lf.l_whence = SEEK_SET;
1682 lf.l_start = 0;
1683 lf.l_len = 0;
1684 if (flags & O_EXLOCK)
1685 lf.l_type = F_WRLCK;
1686 else
1687 lf.l_type = F_RDLCK;
1688 type = F_FLOCK;
1689 if ((flags & FNONBLOCK) == 0)
1690 type |= F_WAIT;
1691 VOP_UNLOCK(vp, 0);
1692 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1693 if (error) {
1694 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1695 fd_abort(p, fp, indx);
1696 return (error);
1697 }
1698 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1699 atomic_or_uint(&fp->f_flag, FHASLOCK);
1700 }
1701 VOP_UNLOCK(vp, 0);
1702 *retval = indx;
1703 fd_affix(p, fp, indx);
1704 vfs_copyinfh_free(fh);
1705 return (0);
1706
1707 bad:
1708 fd_abort(p, fp, indx);
1709 if (vp != NULL)
1710 vput(vp);
1711 vfs_copyinfh_free(fh);
1712 return (error);
1713 }
1714
1715 int
1716 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
1717 {
1718 /* {
1719 syscallarg(const void *) fhp;
1720 syscallarg(size_t) fh_size;
1721 syscallarg(int) flags;
1722 } */
1723
1724 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1725 SCARG(uap, flags), retval);
1726 }
1727
1728 int
1729 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
1730 {
1731 int error;
1732 fhandle_t *fh;
1733 struct vnode *vp;
1734
1735 /*
1736 * Must be super user
1737 */
1738 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1739 0, NULL, NULL, NULL)))
1740 return (error);
1741
1742 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1743 if (error != 0)
1744 return error;
1745
1746 error = vfs_fhtovp(fh, &vp);
1747 vfs_copyinfh_free(fh);
1748 if (error != 0)
1749 return error;
1750
1751 error = vn_stat(vp, sb);
1752 vput(vp);
1753 return error;
1754 }
1755
1756
1757 /* ARGSUSED */
1758 int
1759 sys___fhstat40(struct lwp *l, const struct sys___fhstat40_args *uap, register_t *retval)
1760 {
1761 /* {
1762 syscallarg(const void *) fhp;
1763 syscallarg(size_t) fh_size;
1764 syscallarg(struct stat *) sb;
1765 } */
1766 struct stat sb;
1767 int error;
1768
1769 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
1770 if (error)
1771 return error;
1772 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
1773 }
1774
1775 int
1776 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
1777 int flags)
1778 {
1779 fhandle_t *fh;
1780 struct mount *mp;
1781 struct vnode *vp;
1782 int error;
1783
1784 /*
1785 * Must be super user
1786 */
1787 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1788 0, NULL, NULL, NULL)))
1789 return error;
1790
1791 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1792 if (error != 0)
1793 return error;
1794
1795 error = vfs_fhtovp(fh, &vp);
1796 vfs_copyinfh_free(fh);
1797 if (error != 0)
1798 return error;
1799
1800 mp = vp->v_mount;
1801 error = dostatvfs(mp, sb, l, flags, 1);
1802 vput(vp);
1803 return error;
1804 }
1805
1806 /* ARGSUSED */
1807 int
1808 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
1809 {
1810 /* {
1811 syscallarg(const void *) fhp;
1812 syscallarg(size_t) fh_size;
1813 syscallarg(struct statvfs *) buf;
1814 syscallarg(int) flags;
1815 } */
1816 struct statvfs *sb = STATVFSBUF_GET();
1817 int error;
1818
1819 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
1820 SCARG(uap, flags));
1821 if (error == 0)
1822 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1823 STATVFSBUF_PUT(sb);
1824 return error;
1825 }
1826
1827 /*
1828 * Create a special file.
1829 */
1830 /* ARGSUSED */
1831 int
1832 sys_mknod(struct lwp *l, const struct sys_mknod_args *uap, register_t *retval)
1833 {
1834 /* {
1835 syscallarg(const char *) path;
1836 syscallarg(int) mode;
1837 syscallarg(int) dev;
1838 } */
1839 struct proc *p = l->l_proc;
1840 struct vnode *vp;
1841 struct vattr vattr;
1842 int error, optype;
1843 struct nameidata nd;
1844 char *path;
1845 const char *cpath;
1846 enum uio_seg seg = UIO_USERSPACE;
1847
1848 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1849 0, NULL, NULL, NULL)) != 0)
1850 return (error);
1851
1852 optype = VOP_MKNOD_DESCOFFSET;
1853
1854 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path);
1855 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath);
1856
1857 if ((error = namei(&nd)) != 0)
1858 goto out;
1859 vp = nd.ni_vp;
1860 if (vp != NULL)
1861 error = EEXIST;
1862 else {
1863 VATTR_NULL(&vattr);
1864 /* We will read cwdi->cwdi_cmask unlocked. */
1865 vattr.va_mode =
1866 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1867 vattr.va_rdev = SCARG(uap, dev);
1868
1869 switch (SCARG(uap, mode) & S_IFMT) {
1870 case S_IFMT: /* used by badsect to flag bad sectors */
1871 vattr.va_type = VBAD;
1872 break;
1873 case S_IFCHR:
1874 vattr.va_type = VCHR;
1875 break;
1876 case S_IFBLK:
1877 vattr.va_type = VBLK;
1878 break;
1879 case S_IFWHT:
1880 optype = VOP_WHITEOUT_DESCOFFSET;
1881 break;
1882 case S_IFREG:
1883 #if NVERIEXEC > 0
1884 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp,
1885 O_CREAT);
1886 #endif /* NVERIEXEC > 0 */
1887 vattr.va_type = VREG;
1888 vattr.va_rdev = VNOVAL;
1889 optype = VOP_CREATE_DESCOFFSET;
1890 break;
1891 default:
1892 error = EINVAL;
1893 break;
1894 }
1895 }
1896 if (!error) {
1897 switch (optype) {
1898 case VOP_WHITEOUT_DESCOFFSET:
1899 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1900 if (error)
1901 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1902 vput(nd.ni_dvp);
1903 break;
1904
1905 case VOP_MKNOD_DESCOFFSET:
1906 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1907 &nd.ni_cnd, &vattr);
1908 if (error == 0)
1909 vput(nd.ni_vp);
1910 break;
1911
1912 case VOP_CREATE_DESCOFFSET:
1913 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
1914 &nd.ni_cnd, &vattr);
1915 if (error == 0)
1916 vput(nd.ni_vp);
1917 break;
1918 }
1919 } else {
1920 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1921 if (nd.ni_dvp == vp)
1922 vrele(nd.ni_dvp);
1923 else
1924 vput(nd.ni_dvp);
1925 if (vp)
1926 vrele(vp);
1927 }
1928 out:
1929 VERIEXEC_PATH_PUT(path);
1930 return (error);
1931 }
1932
1933 /*
1934 * Create a named pipe.
1935 */
1936 /* ARGSUSED */
1937 int
1938 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
1939 {
1940 /* {
1941 syscallarg(const char *) path;
1942 syscallarg(int) mode;
1943 } */
1944 struct proc *p = l->l_proc;
1945 struct vattr vattr;
1946 int error;
1947 struct nameidata nd;
1948
1949 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
1950 SCARG(uap, path));
1951 if ((error = namei(&nd)) != 0)
1952 return (error);
1953 if (nd.ni_vp != NULL) {
1954 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1955 if (nd.ni_dvp == nd.ni_vp)
1956 vrele(nd.ni_dvp);
1957 else
1958 vput(nd.ni_dvp);
1959 vrele(nd.ni_vp);
1960 return (EEXIST);
1961 }
1962 VATTR_NULL(&vattr);
1963 vattr.va_type = VFIFO;
1964 /* We will read cwdi->cwdi_cmask unlocked. */
1965 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1966 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1967 if (error == 0)
1968 vput(nd.ni_vp);
1969 return (error);
1970 }
1971
1972 /*
1973 * Make a hard file link.
1974 */
1975 /* ARGSUSED */
1976 int
1977 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
1978 {
1979 /* {
1980 syscallarg(const char *) path;
1981 syscallarg(const char *) link;
1982 } */
1983 struct vnode *vp;
1984 struct nameidata nd;
1985 int error;
1986
1987 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
1988 SCARG(uap, path));
1989 if ((error = namei(&nd)) != 0)
1990 return (error);
1991 vp = nd.ni_vp;
1992 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
1993 SCARG(uap, link));
1994 if ((error = namei(&nd)) != 0)
1995 goto out;
1996 if (nd.ni_vp) {
1997 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1998 if (nd.ni_dvp == nd.ni_vp)
1999 vrele(nd.ni_dvp);
2000 else
2001 vput(nd.ni_dvp);
2002 vrele(nd.ni_vp);
2003 error = EEXIST;
2004 goto out;
2005 }
2006 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2007 out:
2008 vrele(vp);
2009 return (error);
2010 }
2011
2012 /*
2013 * Make a symbolic link.
2014 */
2015 /* ARGSUSED */
2016 int
2017 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2018 {
2019 /* {
2020 syscallarg(const char *) path;
2021 syscallarg(const char *) link;
2022 } */
2023 struct proc *p = l->l_proc;
2024 struct vattr vattr;
2025 char *path;
2026 int error;
2027 struct nameidata nd;
2028
2029 path = PNBUF_GET();
2030 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
2031 if (error)
2032 goto out;
2033 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
2034 SCARG(uap, link));
2035 if ((error = namei(&nd)) != 0)
2036 goto out;
2037 if (nd.ni_vp) {
2038 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2039 if (nd.ni_dvp == nd.ni_vp)
2040 vrele(nd.ni_dvp);
2041 else
2042 vput(nd.ni_dvp);
2043 vrele(nd.ni_vp);
2044 error = EEXIST;
2045 goto out;
2046 }
2047 VATTR_NULL(&vattr);
2048 vattr.va_type = VLNK;
2049 /* We will read cwdi->cwdi_cmask unlocked. */
2050 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2051 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2052 if (error == 0)
2053 vput(nd.ni_vp);
2054 out:
2055 PNBUF_PUT(path);
2056 return (error);
2057 }
2058
2059 /*
2060 * Delete a whiteout from the filesystem.
2061 */
2062 /* ARGSUSED */
2063 int
2064 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2065 {
2066 /* {
2067 syscallarg(const char *) path;
2068 } */
2069 int error;
2070 struct nameidata nd;
2071
2072 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT,
2073 UIO_USERSPACE, SCARG(uap, path));
2074 error = namei(&nd);
2075 if (error)
2076 return (error);
2077
2078 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2079 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2080 if (nd.ni_dvp == nd.ni_vp)
2081 vrele(nd.ni_dvp);
2082 else
2083 vput(nd.ni_dvp);
2084 if (nd.ni_vp)
2085 vrele(nd.ni_vp);
2086 return (EEXIST);
2087 }
2088 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2089 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2090 vput(nd.ni_dvp);
2091 return (error);
2092 }
2093
2094 /*
2095 * Delete a name from the filesystem.
2096 */
2097 /* ARGSUSED */
2098 int
2099 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2100 {
2101 /* {
2102 syscallarg(const char *) path;
2103 } */
2104
2105 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE);
2106 }
2107
2108 int
2109 do_sys_unlink(const char *arg, enum uio_seg seg)
2110 {
2111 struct vnode *vp;
2112 int error;
2113 struct nameidata nd;
2114 kauth_cred_t cred;
2115 char *path;
2116 const char *cpath;
2117
2118 VERIEXEC_PATH_GET(arg, seg, cpath, path);
2119 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath);
2120
2121 if ((error = namei(&nd)) != 0)
2122 goto out;
2123 vp = nd.ni_vp;
2124
2125 /*
2126 * The root of a mounted filesystem cannot be deleted.
2127 */
2128 if (vp->v_vflag & VV_ROOT) {
2129 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2130 if (nd.ni_dvp == vp)
2131 vrele(nd.ni_dvp);
2132 else
2133 vput(nd.ni_dvp);
2134 vput(vp);
2135 error = EBUSY;
2136 goto out;
2137 }
2138
2139 #if NVERIEXEC > 0
2140 /* Handle remove requests for veriexec entries. */
2141 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) {
2142 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2143 if (nd.ni_dvp == vp)
2144 vrele(nd.ni_dvp);
2145 else
2146 vput(nd.ni_dvp);
2147 vput(vp);
2148 goto out;
2149 }
2150 #endif /* NVERIEXEC > 0 */
2151
2152 cred = kauth_cred_get();
2153 #ifdef FILEASSOC
2154 (void)fileassoc_file_delete(vp);
2155 #endif /* FILEASSOC */
2156 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2157 out:
2158 VERIEXEC_PATH_PUT(path);
2159 return (error);
2160 }
2161
2162 /*
2163 * Reposition read/write file offset.
2164 */
2165 int
2166 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2167 {
2168 /* {
2169 syscallarg(int) fd;
2170 syscallarg(int) pad;
2171 syscallarg(off_t) offset;
2172 syscallarg(int) whence;
2173 } */
2174 kauth_cred_t cred = l->l_cred;
2175 file_t *fp;
2176 struct vnode *vp;
2177 struct vattr vattr;
2178 off_t newoff;
2179 int error, fd;
2180
2181 fd = SCARG(uap, fd);
2182
2183 if ((fp = fd_getfile(fd)) == NULL)
2184 return (EBADF);
2185
2186 vp = fp->f_data;
2187 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2188 error = ESPIPE;
2189 goto out;
2190 }
2191
2192 switch (SCARG(uap, whence)) {
2193 case SEEK_CUR:
2194 newoff = fp->f_offset + SCARG(uap, offset);
2195 break;
2196 case SEEK_END:
2197 error = VOP_GETATTR(vp, &vattr, cred);
2198 if (error) {
2199 goto out;
2200 }
2201 newoff = SCARG(uap, offset) + vattr.va_size;
2202 break;
2203 case SEEK_SET:
2204 newoff = SCARG(uap, offset);
2205 break;
2206 default:
2207 error = EINVAL;
2208 goto out;
2209 }
2210 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2211 *(off_t *)retval = fp->f_offset = newoff;
2212 }
2213 out:
2214 fd_putfile(fd);
2215 return (error);
2216 }
2217
2218 /*
2219 * Positional read system call.
2220 */
2221 int
2222 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2223 {
2224 /* {
2225 syscallarg(int) fd;
2226 syscallarg(void *) buf;
2227 syscallarg(size_t) nbyte;
2228 syscallarg(off_t) offset;
2229 } */
2230 file_t *fp;
2231 struct vnode *vp;
2232 off_t offset;
2233 int error, fd = SCARG(uap, fd);
2234
2235 if ((fp = fd_getfile(fd)) == NULL)
2236 return (EBADF);
2237
2238 if ((fp->f_flag & FREAD) == 0) {
2239 fd_putfile(fd);
2240 return (EBADF);
2241 }
2242
2243 vp = fp->f_data;
2244 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2245 error = ESPIPE;
2246 goto out;
2247 }
2248
2249 offset = SCARG(uap, offset);
2250
2251 /*
2252 * XXX This works because no file systems actually
2253 * XXX take any action on the seek operation.
2254 */
2255 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2256 goto out;
2257
2258 /* dofileread() will unuse the descriptor for us */
2259 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2260 &offset, 0, retval));
2261
2262 out:
2263 fd_putfile(fd);
2264 return (error);
2265 }
2266
2267 /*
2268 * Positional scatter read system call.
2269 */
2270 int
2271 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2272 {
2273 /* {
2274 syscallarg(int) fd;
2275 syscallarg(const struct iovec *) iovp;
2276 syscallarg(int) iovcnt;
2277 syscallarg(off_t) offset;
2278 } */
2279 off_t offset = SCARG(uap, offset);
2280
2281 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2282 SCARG(uap, iovcnt), &offset, 0, retval);
2283 }
2284
2285 /*
2286 * Positional write system call.
2287 */
2288 int
2289 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2290 {
2291 /* {
2292 syscallarg(int) fd;
2293 syscallarg(const void *) buf;
2294 syscallarg(size_t) nbyte;
2295 syscallarg(off_t) offset;
2296 } */
2297 file_t *fp;
2298 struct vnode *vp;
2299 off_t offset;
2300 int error, fd = SCARG(uap, fd);
2301
2302 if ((fp = fd_getfile(fd)) == NULL)
2303 return (EBADF);
2304
2305 if ((fp->f_flag & FWRITE) == 0) {
2306 fd_putfile(fd);
2307 return (EBADF);
2308 }
2309
2310 vp = fp->f_data;
2311 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2312 error = ESPIPE;
2313 goto out;
2314 }
2315
2316 offset = SCARG(uap, offset);
2317
2318 /*
2319 * XXX This works because no file systems actually
2320 * XXX take any action on the seek operation.
2321 */
2322 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2323 goto out;
2324
2325 /* dofilewrite() will unuse the descriptor for us */
2326 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2327 &offset, 0, retval));
2328
2329 out:
2330 fd_putfile(fd);
2331 return (error);
2332 }
2333
2334 /*
2335 * Positional gather write system call.
2336 */
2337 int
2338 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2339 {
2340 /* {
2341 syscallarg(int) fd;
2342 syscallarg(const struct iovec *) iovp;
2343 syscallarg(int) iovcnt;
2344 syscallarg(off_t) offset;
2345 } */
2346 off_t offset = SCARG(uap, offset);
2347
2348 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2349 SCARG(uap, iovcnt), &offset, 0, retval);
2350 }
2351
2352 /*
2353 * Check access permissions.
2354 */
2355 int
2356 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2357 {
2358 /* {
2359 syscallarg(const char *) path;
2360 syscallarg(int) flags;
2361 } */
2362 kauth_cred_t cred;
2363 struct vnode *vp;
2364 int error, flags;
2365 struct nameidata nd;
2366
2367 cred = kauth_cred_dup(l->l_cred);
2368 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2369 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2370 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2371 SCARG(uap, path));
2372 /* Override default credentials */
2373 nd.ni_cnd.cn_cred = cred;
2374 if ((error = namei(&nd)) != 0)
2375 goto out;
2376 vp = nd.ni_vp;
2377
2378 /* Flags == 0 means only check for existence. */
2379 if (SCARG(uap, flags)) {
2380 flags = 0;
2381 if (SCARG(uap, flags) & R_OK)
2382 flags |= VREAD;
2383 if (SCARG(uap, flags) & W_OK)
2384 flags |= VWRITE;
2385 if (SCARG(uap, flags) & X_OK)
2386 flags |= VEXEC;
2387
2388 error = VOP_ACCESS(vp, flags, cred);
2389 if (!error && (flags & VWRITE))
2390 error = vn_writechk(vp);
2391 }
2392 vput(vp);
2393 out:
2394 kauth_cred_free(cred);
2395 return (error);
2396 }
2397
2398 /*
2399 * Common code for all sys_stat functions, including compat versions.
2400 */
2401 int
2402 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb)
2403 {
2404 int error;
2405 struct nameidata nd;
2406
2407 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT,
2408 UIO_USERSPACE, path);
2409 error = namei(&nd);
2410 if (error != 0)
2411 return error;
2412 error = vn_stat(nd.ni_vp, sb);
2413 vput(nd.ni_vp);
2414 return error;
2415 }
2416
2417 /*
2418 * Get file status; this version follows links.
2419 */
2420 /* ARGSUSED */
2421 int
2422 sys___stat30(struct lwp *l, const struct sys___stat30_args *uap, register_t *retval)
2423 {
2424 /* {
2425 syscallarg(const char *) path;
2426 syscallarg(struct stat *) ub;
2427 } */
2428 struct stat sb;
2429 int error;
2430
2431 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb);
2432 if (error)
2433 return error;
2434 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2435 }
2436
2437 /*
2438 * Get file status; this version does not follow links.
2439 */
2440 /* ARGSUSED */
2441 int
2442 sys___lstat30(struct lwp *l, const struct sys___lstat30_args *uap, register_t *retval)
2443 {
2444 /* {
2445 syscallarg(const char *) path;
2446 syscallarg(struct stat *) ub;
2447 } */
2448 struct stat sb;
2449 int error;
2450
2451 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb);
2452 if (error)
2453 return error;
2454 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2455 }
2456
2457 /*
2458 * Get configurable pathname variables.
2459 */
2460 /* ARGSUSED */
2461 int
2462 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
2463 {
2464 /* {
2465 syscallarg(const char *) path;
2466 syscallarg(int) name;
2467 } */
2468 int error;
2469 struct nameidata nd;
2470
2471 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2472 SCARG(uap, path));
2473 if ((error = namei(&nd)) != 0)
2474 return (error);
2475 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2476 vput(nd.ni_vp);
2477 return (error);
2478 }
2479
2480 /*
2481 * Return target name of a symbolic link.
2482 */
2483 /* ARGSUSED */
2484 int
2485 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval)
2486 {
2487 /* {
2488 syscallarg(const char *) path;
2489 syscallarg(char *) buf;
2490 syscallarg(size_t) count;
2491 } */
2492 struct vnode *vp;
2493 struct iovec aiov;
2494 struct uio auio;
2495 int error;
2496 struct nameidata nd;
2497
2498 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2499 SCARG(uap, path));
2500 if ((error = namei(&nd)) != 0)
2501 return (error);
2502 vp = nd.ni_vp;
2503 if (vp->v_type != VLNK)
2504 error = EINVAL;
2505 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2506 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
2507 aiov.iov_base = SCARG(uap, buf);
2508 aiov.iov_len = SCARG(uap, count);
2509 auio.uio_iov = &aiov;
2510 auio.uio_iovcnt = 1;
2511 auio.uio_offset = 0;
2512 auio.uio_rw = UIO_READ;
2513 KASSERT(l == curlwp);
2514 auio.uio_vmspace = l->l_proc->p_vmspace;
2515 auio.uio_resid = SCARG(uap, count);
2516 error = VOP_READLINK(vp, &auio, l->l_cred);
2517 }
2518 vput(vp);
2519 *retval = SCARG(uap, count) - auio.uio_resid;
2520 return (error);
2521 }
2522
2523 /*
2524 * Change flags of a file given a path name.
2525 */
2526 /* ARGSUSED */
2527 int
2528 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
2529 {
2530 /* {
2531 syscallarg(const char *) path;
2532 syscallarg(u_long) flags;
2533 } */
2534 struct vnode *vp;
2535 int error;
2536 struct nameidata nd;
2537
2538 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2539 SCARG(uap, path));
2540 if ((error = namei(&nd)) != 0)
2541 return (error);
2542 vp = nd.ni_vp;
2543 error = change_flags(vp, SCARG(uap, flags), l);
2544 vput(vp);
2545 return (error);
2546 }
2547
2548 /*
2549 * Change flags of a file given a file descriptor.
2550 */
2551 /* ARGSUSED */
2552 int
2553 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
2554 {
2555 /* {
2556 syscallarg(int) fd;
2557 syscallarg(u_long) flags;
2558 } */
2559 struct vnode *vp;
2560 file_t *fp;
2561 int error;
2562
2563 /* fd_getvnode() will use the descriptor for us */
2564 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2565 return (error);
2566 vp = fp->f_data;
2567 error = change_flags(vp, SCARG(uap, flags), l);
2568 VOP_UNLOCK(vp, 0);
2569 fd_putfile(SCARG(uap, fd));
2570 return (error);
2571 }
2572
2573 /*
2574 * Change flags of a file given a path name; this version does
2575 * not follow links.
2576 */
2577 int
2578 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
2579 {
2580 /* {
2581 syscallarg(const char *) path;
2582 syscallarg(u_long) flags;
2583 } */
2584 struct vnode *vp;
2585 int error;
2586 struct nameidata nd;
2587
2588 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2589 SCARG(uap, path));
2590 if ((error = namei(&nd)) != 0)
2591 return (error);
2592 vp = nd.ni_vp;
2593 error = change_flags(vp, SCARG(uap, flags), l);
2594 vput(vp);
2595 return (error);
2596 }
2597
2598 /*
2599 * Common routine to change flags of a file.
2600 */
2601 int
2602 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2603 {
2604 struct vattr vattr;
2605 int error;
2606
2607 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2608 /*
2609 * Non-superusers cannot change the flags on devices, even if they
2610 * own them.
2611 */
2612 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2613 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2614 goto out;
2615 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2616 error = EINVAL;
2617 goto out;
2618 }
2619 }
2620 VATTR_NULL(&vattr);
2621 vattr.va_flags = flags;
2622 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2623 out:
2624 return (error);
2625 }
2626
2627 /*
2628 * Change mode of a file given path name; this version follows links.
2629 */
2630 /* ARGSUSED */
2631 int
2632 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
2633 {
2634 /* {
2635 syscallarg(const char *) path;
2636 syscallarg(int) mode;
2637 } */
2638 int error;
2639 struct nameidata nd;
2640
2641 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2642 SCARG(uap, path));
2643 if ((error = namei(&nd)) != 0)
2644 return (error);
2645
2646 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2647
2648 vrele(nd.ni_vp);
2649 return (error);
2650 }
2651
2652 /*
2653 * Change mode of a file given a file descriptor.
2654 */
2655 /* ARGSUSED */
2656 int
2657 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
2658 {
2659 /* {
2660 syscallarg(int) fd;
2661 syscallarg(int) mode;
2662 } */
2663 file_t *fp;
2664 int error;
2665
2666 /* fd_getvnode() will use the descriptor for us */
2667 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2668 return (error);
2669 error = change_mode(fp->f_data, SCARG(uap, mode), l);
2670 fd_putfile(SCARG(uap, fd));
2671 return (error);
2672 }
2673
2674 /*
2675 * Change mode of a file given path name; this version does not follow links.
2676 */
2677 /* ARGSUSED */
2678 int
2679 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
2680 {
2681 /* {
2682 syscallarg(const char *) path;
2683 syscallarg(int) mode;
2684 } */
2685 int error;
2686 struct nameidata nd;
2687
2688 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2689 SCARG(uap, path));
2690 if ((error = namei(&nd)) != 0)
2691 return (error);
2692
2693 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2694
2695 vrele(nd.ni_vp);
2696 return (error);
2697 }
2698
2699 /*
2700 * Common routine to set mode given a vnode.
2701 */
2702 static int
2703 change_mode(struct vnode *vp, int mode, struct lwp *l)
2704 {
2705 struct vattr vattr;
2706 int error;
2707
2708 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2709 VATTR_NULL(&vattr);
2710 vattr.va_mode = mode & ALLPERMS;
2711 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2712 VOP_UNLOCK(vp, 0);
2713 return (error);
2714 }
2715
2716 /*
2717 * Set ownership given a path name; this version follows links.
2718 */
2719 /* ARGSUSED */
2720 int
2721 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
2722 {
2723 /* {
2724 syscallarg(const char *) path;
2725 syscallarg(uid_t) uid;
2726 syscallarg(gid_t) gid;
2727 } */
2728 int error;
2729 struct nameidata nd;
2730
2731 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2732 SCARG(uap, path));
2733 if ((error = namei(&nd)) != 0)
2734 return (error);
2735
2736 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2737
2738 vrele(nd.ni_vp);
2739 return (error);
2740 }
2741
2742 /*
2743 * Set ownership given a path name; this version follows links.
2744 * Provides POSIX semantics.
2745 */
2746 /* ARGSUSED */
2747 int
2748 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
2749 {
2750 /* {
2751 syscallarg(const char *) path;
2752 syscallarg(uid_t) uid;
2753 syscallarg(gid_t) gid;
2754 } */
2755 int error;
2756 struct nameidata nd;
2757
2758 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2759 SCARG(uap, path));
2760 if ((error = namei(&nd)) != 0)
2761 return (error);
2762
2763 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2764
2765 vrele(nd.ni_vp);
2766 return (error);
2767 }
2768
2769 /*
2770 * Set ownership given a file descriptor.
2771 */
2772 /* ARGSUSED */
2773 int
2774 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
2775 {
2776 /* {
2777 syscallarg(int) fd;
2778 syscallarg(uid_t) uid;
2779 syscallarg(gid_t) gid;
2780 } */
2781 int error;
2782 file_t *fp;
2783
2784 /* fd_getvnode() will use the descriptor for us */
2785 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2786 return (error);
2787 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2788 l, 0);
2789 fd_putfile(SCARG(uap, fd));
2790 return (error);
2791 }
2792
2793 /*
2794 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2795 */
2796 /* ARGSUSED */
2797 int
2798 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
2799 {
2800 /* {
2801 syscallarg(int) fd;
2802 syscallarg(uid_t) uid;
2803 syscallarg(gid_t) gid;
2804 } */
2805 int error;
2806 file_t *fp;
2807
2808 /* fd_getvnode() will use the descriptor for us */
2809 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2810 return (error);
2811 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2812 l, 1);
2813 fd_putfile(SCARG(uap, fd));
2814 return (error);
2815 }
2816
2817 /*
2818 * Set ownership given a path name; this version does not follow links.
2819 */
2820 /* ARGSUSED */
2821 int
2822 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
2823 {
2824 /* {
2825 syscallarg(const char *) path;
2826 syscallarg(uid_t) uid;
2827 syscallarg(gid_t) gid;
2828 } */
2829 int error;
2830 struct nameidata nd;
2831
2832 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2833 SCARG(uap, path));
2834 if ((error = namei(&nd)) != 0)
2835 return (error);
2836
2837 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2838
2839 vrele(nd.ni_vp);
2840 return (error);
2841 }
2842
2843 /*
2844 * Set ownership given a path name; this version does not follow links.
2845 * Provides POSIX/XPG semantics.
2846 */
2847 /* ARGSUSED */
2848 int
2849 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
2850 {
2851 /* {
2852 syscallarg(const char *) path;
2853 syscallarg(uid_t) uid;
2854 syscallarg(gid_t) gid;
2855 } */
2856 int error;
2857 struct nameidata nd;
2858
2859 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2860 SCARG(uap, path));
2861 if ((error = namei(&nd)) != 0)
2862 return (error);
2863
2864 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2865
2866 vrele(nd.ni_vp);
2867 return (error);
2868 }
2869
2870 /*
2871 * Common routine to set ownership given a vnode.
2872 */
2873 static int
2874 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2875 int posix_semantics)
2876 {
2877 struct vattr vattr;
2878 mode_t newmode;
2879 int error;
2880
2881 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2882 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2883 goto out;
2884
2885 #define CHANGED(x) ((int)(x) != -1)
2886 newmode = vattr.va_mode;
2887 if (posix_semantics) {
2888 /*
2889 * POSIX/XPG semantics: if the caller is not the super-user,
2890 * clear set-user-id and set-group-id bits. Both POSIX and
2891 * the XPG consider the behaviour for calls by the super-user
2892 * implementation-defined; we leave the set-user-id and set-
2893 * group-id settings intact in that case.
2894 */
2895 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2896 NULL) != 0)
2897 newmode &= ~(S_ISUID | S_ISGID);
2898 } else {
2899 /*
2900 * NetBSD semantics: when changing owner and/or group,
2901 * clear the respective bit(s).
2902 */
2903 if (CHANGED(uid))
2904 newmode &= ~S_ISUID;
2905 if (CHANGED(gid))
2906 newmode &= ~S_ISGID;
2907 }
2908 /* Update va_mode iff altered. */
2909 if (vattr.va_mode == newmode)
2910 newmode = VNOVAL;
2911
2912 VATTR_NULL(&vattr);
2913 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2914 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2915 vattr.va_mode = newmode;
2916 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2917 #undef CHANGED
2918
2919 out:
2920 VOP_UNLOCK(vp, 0);
2921 return (error);
2922 }
2923
2924 /*
2925 * Set the access and modification times given a path name; this
2926 * version follows links.
2927 */
2928 /* ARGSUSED */
2929 int
2930 sys_utimes(struct lwp *l, const struct sys_utimes_args *uap, register_t *retval)
2931 {
2932 /* {
2933 syscallarg(const char *) path;
2934 syscallarg(const struct timeval *) tptr;
2935 } */
2936
2937 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
2938 SCARG(uap, tptr), UIO_USERSPACE);
2939 }
2940
2941 /*
2942 * Set the access and modification times given a file descriptor.
2943 */
2944 /* ARGSUSED */
2945 int
2946 sys_futimes(struct lwp *l, const struct sys_futimes_args *uap, register_t *retval)
2947 {
2948 /* {
2949 syscallarg(int) fd;
2950 syscallarg(const struct timeval *) tptr;
2951 } */
2952 int error;
2953 file_t *fp;
2954
2955 /* fd_getvnode() will use the descriptor for us */
2956 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2957 return (error);
2958 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr),
2959 UIO_USERSPACE);
2960 fd_putfile(SCARG(uap, fd));
2961 return (error);
2962 }
2963
2964 /*
2965 * Set the access and modification times given a path name; this
2966 * version does not follow links.
2967 */
2968 int
2969 sys_lutimes(struct lwp *l, const struct sys_lutimes_args *uap, register_t *retval)
2970 {
2971 /* {
2972 syscallarg(const char *) path;
2973 syscallarg(const struct timeval *) tptr;
2974 } */
2975
2976 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
2977 SCARG(uap, tptr), UIO_USERSPACE);
2978 }
2979
2980 /*
2981 * Common routine to set access and modification times given a vnode.
2982 */
2983 int
2984 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
2985 const struct timeval *tptr, enum uio_seg seg)
2986 {
2987 struct vattr vattr;
2988 struct nameidata nd;
2989 int error;
2990 bool vanull, setbirthtime;
2991 struct timespec ts[2];
2992
2993 if (tptr == NULL) {
2994 vanull = true;
2995 nanotime(&ts[0]);
2996 ts[1] = ts[0];
2997 } else {
2998 struct timeval tv[2];
2999
3000 vanull = false;
3001 if (seg != UIO_SYSSPACE) {
3002 error = copyin(tptr, &tv, sizeof (tv));
3003 if (error != 0)
3004 return error;
3005 tptr = tv;
3006 }
3007 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3008 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3009 }
3010
3011 if (vp == NULL) {
3012 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path);
3013 if ((error = namei(&nd)) != 0)
3014 return error;
3015 vp = nd.ni_vp;
3016 } else
3017 nd.ni_vp = NULL;
3018
3019 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3020 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3021 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3022 VATTR_NULL(&vattr);
3023 vattr.va_atime = ts[0];
3024 vattr.va_mtime = ts[1];
3025 if (setbirthtime)
3026 vattr.va_birthtime = ts[1];
3027 if (vanull)
3028 vattr.va_flags |= VA_UTIMES_NULL;
3029 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3030 VOP_UNLOCK(vp, 0);
3031
3032 if (nd.ni_vp != NULL)
3033 vrele(nd.ni_vp);
3034
3035 return error;
3036 }
3037
3038 /*
3039 * Truncate a file given its path name.
3040 */
3041 /* ARGSUSED */
3042 int
3043 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3044 {
3045 /* {
3046 syscallarg(const char *) path;
3047 syscallarg(int) pad;
3048 syscallarg(off_t) length;
3049 } */
3050 struct vnode *vp;
3051 struct vattr vattr;
3052 int error;
3053 struct nameidata nd;
3054
3055 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3056 SCARG(uap, path));
3057 if ((error = namei(&nd)) != 0)
3058 return (error);
3059 vp = nd.ni_vp;
3060 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3061 if (vp->v_type == VDIR)
3062 error = EISDIR;
3063 else if ((error = vn_writechk(vp)) == 0 &&
3064 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3065 VATTR_NULL(&vattr);
3066 vattr.va_size = SCARG(uap, length);
3067 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3068 }
3069 vput(vp);
3070 return (error);
3071 }
3072
3073 /*
3074 * Truncate a file given a file descriptor.
3075 */
3076 /* ARGSUSED */
3077 int
3078 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3079 {
3080 /* {
3081 syscallarg(int) fd;
3082 syscallarg(int) pad;
3083 syscallarg(off_t) length;
3084 } */
3085 struct vattr vattr;
3086 struct vnode *vp;
3087 file_t *fp;
3088 int error;
3089
3090 /* fd_getvnode() will use the descriptor for us */
3091 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3092 return (error);
3093 if ((fp->f_flag & FWRITE) == 0) {
3094 error = EINVAL;
3095 goto out;
3096 }
3097 vp = fp->f_data;
3098 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3099 if (vp->v_type == VDIR)
3100 error = EISDIR;
3101 else if ((error = vn_writechk(vp)) == 0) {
3102 VATTR_NULL(&vattr);
3103 vattr.va_size = SCARG(uap, length);
3104 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3105 }
3106 VOP_UNLOCK(vp, 0);
3107 out:
3108 fd_putfile(SCARG(uap, fd));
3109 return (error);
3110 }
3111
3112 /*
3113 * Sync an open file.
3114 */
3115 /* ARGSUSED */
3116 int
3117 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3118 {
3119 /* {
3120 syscallarg(int) fd;
3121 } */
3122 struct vnode *vp;
3123 file_t *fp;
3124 int error;
3125
3126 /* fd_getvnode() will use the descriptor for us */
3127 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3128 return (error);
3129 vp = fp->f_data;
3130 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3131 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
3132 if (error == 0 && bioopsp != NULL &&
3133 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3134 (*bioopsp->io_fsync)(vp, 0);
3135 VOP_UNLOCK(vp, 0);
3136 fd_putfile(SCARG(uap, fd));
3137 return (error);
3138 }
3139
3140 /*
3141 * Sync a range of file data. API modeled after that found in AIX.
3142 *
3143 * FDATASYNC indicates that we need only save enough metadata to be able
3144 * to re-read the written data. Note we duplicate AIX's requirement that
3145 * the file be open for writing.
3146 */
3147 /* ARGSUSED */
3148 int
3149 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
3150 {
3151 /* {
3152 syscallarg(int) fd;
3153 syscallarg(int) flags;
3154 syscallarg(off_t) start;
3155 syscallarg(off_t) length;
3156 } */
3157 struct vnode *vp;
3158 file_t *fp;
3159 int flags, nflags;
3160 off_t s, e, len;
3161 int error;
3162
3163 /* fd_getvnode() will use the descriptor for us */
3164 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3165 return (error);
3166
3167 if ((fp->f_flag & FWRITE) == 0) {
3168 error = EBADF;
3169 goto out;
3170 }
3171
3172 flags = SCARG(uap, flags);
3173 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3174 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3175 error = EINVAL;
3176 goto out;
3177 }
3178 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3179 if (flags & FDATASYNC)
3180 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3181 else
3182 nflags = FSYNC_WAIT;
3183 if (flags & FDISKSYNC)
3184 nflags |= FSYNC_CACHE;
3185
3186 len = SCARG(uap, length);
3187 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3188 if (len) {
3189 s = SCARG(uap, start);
3190 e = s + len;
3191 if (e < s) {
3192 error = EINVAL;
3193 goto out;
3194 }
3195 } else {
3196 e = 0;
3197 s = 0;
3198 }
3199
3200 vp = fp->f_data;
3201 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3202 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
3203
3204 if (error == 0 && bioopsp != NULL &&
3205 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3206 (*bioopsp->io_fsync)(vp, nflags);
3207
3208 VOP_UNLOCK(vp, 0);
3209 out:
3210 fd_putfile(SCARG(uap, fd));
3211 return (error);
3212 }
3213
3214 /*
3215 * Sync the data of an open file.
3216 */
3217 /* ARGSUSED */
3218 int
3219 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
3220 {
3221 /* {
3222 syscallarg(int) fd;
3223 } */
3224 struct vnode *vp;
3225 file_t *fp;
3226 int error;
3227
3228 /* fd_getvnode() will use the descriptor for us */
3229 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3230 return (error);
3231 if ((fp->f_flag & FWRITE) == 0) {
3232 fd_putfile(SCARG(uap, fd));
3233 return (EBADF);
3234 }
3235 vp = fp->f_data;
3236 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3237 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
3238 VOP_UNLOCK(vp, 0);
3239 fd_putfile(SCARG(uap, fd));
3240 return (error);
3241 }
3242
3243 /*
3244 * Rename files, (standard) BSD semantics frontend.
3245 */
3246 /* ARGSUSED */
3247 int
3248 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
3249 {
3250 /* {
3251 syscallarg(const char *) from;
3252 syscallarg(const char *) to;
3253 } */
3254
3255 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0));
3256 }
3257
3258 /*
3259 * Rename files, POSIX semantics frontend.
3260 */
3261 /* ARGSUSED */
3262 int
3263 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
3264 {
3265 /* {
3266 syscallarg(const char *) from;
3267 syscallarg(const char *) to;
3268 } */
3269
3270 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1));
3271 }
3272
3273 /*
3274 * Rename files. Source and destination must either both be directories,
3275 * or both not be directories. If target is a directory, it must be empty.
3276 * If `from' and `to' refer to the same object, the value of the `retain'
3277 * argument is used to determine whether `from' will be
3278 *
3279 * (retain == 0) deleted unless `from' and `to' refer to the same
3280 * object in the file system's name space (BSD).
3281 * (retain == 1) always retained (POSIX).
3282 */
3283 int
3284 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
3285 {
3286 struct vnode *tvp, *fvp, *tdvp;
3287 struct nameidata fromnd, tond;
3288 struct mount *fs;
3289 struct lwp *l = curlwp;
3290 struct proc *p;
3291 uint32_t saveflag;
3292 int error;
3293
3294 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT,
3295 seg, from);
3296 if ((error = namei(&fromnd)) != 0)
3297 return (error);
3298 if (fromnd.ni_dvp != fromnd.ni_vp)
3299 VOP_UNLOCK(fromnd.ni_dvp, 0);
3300 fvp = fromnd.ni_vp;
3301
3302 fs = fvp->v_mount;
3303 error = VFS_RENAMELOCK_ENTER(fs);
3304 if (error) {
3305 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3306 vrele(fromnd.ni_dvp);
3307 vrele(fvp);
3308 goto out1;
3309 }
3310
3311 /*
3312 * close, partially, yet another race - ideally we should only
3313 * go as far as getting fromnd.ni_dvp before getting the per-fs
3314 * lock, and then continue to get fromnd.ni_vp, but we can't do
3315 * that with namei as it stands.
3316 *
3317 * This still won't prevent rmdir from nuking fromnd.ni_vp
3318 * under us. The real fix is to get the locks in the right
3319 * order and do the lookups in the right places, but that's a
3320 * major rototill.
3321 *
3322 * Preserve the SAVESTART in cn_flags, because who knows what
3323 * might happen if we don't.
3324 *
3325 * Note: this logic (as well as this whole function) is cloned
3326 * in nfs_serv.c. Proceed accordingly.
3327 */
3328 vrele(fvp);
3329 if ((fromnd.ni_cnd.cn_namelen == 1 &&
3330 fromnd.ni_cnd.cn_nameptr[0] == '.') ||
3331 (fromnd.ni_cnd.cn_namelen == 2 &&
3332 fromnd.ni_cnd.cn_nameptr[0] == '.' &&
3333 fromnd.ni_cnd.cn_nameptr[1] == '.')) {
3334 error = EINVAL;
3335 VFS_RENAMELOCK_EXIT(fs);
3336 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3337 vrele(fromnd.ni_dvp);
3338 goto out1;
3339 }
3340 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART;
3341 fromnd.ni_cnd.cn_flags &= ~SAVESTART;
3342 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY);
3343 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd);
3344 fromnd.ni_cnd.cn_flags |= saveflag;
3345 if (error) {
3346 VOP_UNLOCK(fromnd.ni_dvp, 0);
3347 VFS_RENAMELOCK_EXIT(fs);
3348 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3349 vrele(fromnd.ni_dvp);
3350 goto out1;
3351 }
3352 VOP_UNLOCK(fromnd.ni_vp, 0);
3353 if (fromnd.ni_dvp != fromnd.ni_vp)
3354 VOP_UNLOCK(fromnd.ni_dvp, 0);
3355 fvp = fromnd.ni_vp;
3356
3357 NDINIT(&tond, RENAME,
3358 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT
3359 | (fvp->v_type == VDIR ? CREATEDIR : 0),
3360 seg, to);
3361 if ((error = namei(&tond)) != 0) {
3362 VFS_RENAMELOCK_EXIT(fs);
3363 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3364 vrele(fromnd.ni_dvp);
3365 vrele(fvp);
3366 goto out1;
3367 }
3368 tdvp = tond.ni_dvp;
3369 tvp = tond.ni_vp;
3370
3371 if (tvp != NULL) {
3372 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3373 error = ENOTDIR;
3374 goto out;
3375 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3376 error = EISDIR;
3377 goto out;
3378 }
3379 }
3380
3381 if (fvp == tdvp)
3382 error = EINVAL;
3383
3384 /*
3385 * Source and destination refer to the same object.
3386 */
3387 if (fvp == tvp) {
3388 if (retain)
3389 error = -1;
3390 else if (fromnd.ni_dvp == tdvp &&
3391 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3392 !memcmp(fromnd.ni_cnd.cn_nameptr,
3393 tond.ni_cnd.cn_nameptr,
3394 fromnd.ni_cnd.cn_namelen))
3395 error = -1;
3396 }
3397
3398 #if NVERIEXEC > 0
3399 if (!error) {
3400 char *f1, *f2;
3401
3402 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3403 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen);
3404
3405 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3406 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen);
3407
3408 error = veriexec_renamechk(l, fvp, f1, tvp, f2);
3409
3410 free(f1, M_TEMP);
3411 free(f2, M_TEMP);
3412 }
3413 #endif /* NVERIEXEC > 0 */
3414
3415 out:
3416 p = l->l_proc;
3417 if (!error) {
3418 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3419 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3420 VFS_RENAMELOCK_EXIT(fs);
3421 } else {
3422 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3423 if (tdvp == tvp)
3424 vrele(tdvp);
3425 else
3426 vput(tdvp);
3427 if (tvp)
3428 vput(tvp);
3429 VFS_RENAMELOCK_EXIT(fs);
3430 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3431 vrele(fromnd.ni_dvp);
3432 vrele(fvp);
3433 }
3434 vrele(tond.ni_startdir);
3435 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3436 out1:
3437 if (fromnd.ni_startdir)
3438 vrele(fromnd.ni_startdir);
3439 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3440 return (error == -1 ? 0 : error);
3441 }
3442
3443 /*
3444 * Make a directory file.
3445 */
3446 /* ARGSUSED */
3447 int
3448 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
3449 {
3450 /* {
3451 syscallarg(const char *) path;
3452 syscallarg(int) mode;
3453 } */
3454 struct proc *p = l->l_proc;
3455 struct vnode *vp;
3456 struct vattr vattr;
3457 int error;
3458 struct nameidata nd;
3459
3460 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE,
3461 SCARG(uap, path));
3462 if ((error = namei(&nd)) != 0)
3463 return (error);
3464 vp = nd.ni_vp;
3465 if (vp != NULL) {
3466 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3467 if (nd.ni_dvp == vp)
3468 vrele(nd.ni_dvp);
3469 else
3470 vput(nd.ni_dvp);
3471 vrele(vp);
3472 return (EEXIST);
3473 }
3474 VATTR_NULL(&vattr);
3475 vattr.va_type = VDIR;
3476 /* We will read cwdi->cwdi_cmask unlocked. */
3477 vattr.va_mode =
3478 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3479 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3480 if (!error)
3481 vput(nd.ni_vp);
3482 return (error);
3483 }
3484
3485 /*
3486 * Remove a directory file.
3487 */
3488 /* ARGSUSED */
3489 int
3490 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
3491 {
3492 /* {
3493 syscallarg(const char *) path;
3494 } */
3495 struct vnode *vp;
3496 int error;
3497 struct nameidata nd;
3498
3499 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
3500 SCARG(uap, path));
3501 if ((error = namei(&nd)) != 0)
3502 return (error);
3503 vp = nd.ni_vp;
3504 if (vp->v_type != VDIR) {
3505 error = ENOTDIR;
3506 goto out;
3507 }
3508 /*
3509 * No rmdir "." please.
3510 */
3511 if (nd.ni_dvp == vp) {
3512 error = EINVAL;
3513 goto out;
3514 }
3515 /*
3516 * The root of a mounted filesystem cannot be deleted.
3517 */
3518 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) {
3519 error = EBUSY;
3520 goto out;
3521 }
3522 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3523 return (error);
3524
3525 out:
3526 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3527 if (nd.ni_dvp == vp)
3528 vrele(nd.ni_dvp);
3529 else
3530 vput(nd.ni_dvp);
3531 vput(vp);
3532 return (error);
3533 }
3534
3535 /*
3536 * Read a block of directory entries in a file system independent format.
3537 */
3538 int
3539 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
3540 {
3541 /* {
3542 syscallarg(int) fd;
3543 syscallarg(char *) buf;
3544 syscallarg(size_t) count;
3545 } */
3546 file_t *fp;
3547 int error, done;
3548
3549 /* fd_getvnode() will use the descriptor for us */
3550 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3551 return (error);
3552 if ((fp->f_flag & FREAD) == 0) {
3553 error = EBADF;
3554 goto out;
3555 }
3556 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3557 SCARG(uap, count), &done, l, 0, 0);
3558 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
3559 *retval = done;
3560 out:
3561 fd_putfile(SCARG(uap, fd));
3562 return (error);
3563 }
3564
3565 /*
3566 * Set the mode mask for creation of filesystem nodes.
3567 */
3568 int
3569 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
3570 {
3571 /* {
3572 syscallarg(mode_t) newmask;
3573 } */
3574 struct proc *p = l->l_proc;
3575 struct cwdinfo *cwdi;
3576
3577 /*
3578 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
3579 * important is that we serialize changes to the mask. The
3580 * rw_exit() will issue a write memory barrier on our behalf,
3581 * and force the changes out to other CPUs (as it must use an
3582 * atomic operation, draining the local CPU's store buffers).
3583 */
3584 cwdi = p->p_cwdi;
3585 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
3586 *retval = cwdi->cwdi_cmask;
3587 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3588 rw_exit(&cwdi->cwdi_lock);
3589
3590 return (0);
3591 }
3592
3593 int
3594 dorevoke(struct vnode *vp, kauth_cred_t cred)
3595 {
3596 struct vattr vattr;
3597 int error;
3598
3599 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0)
3600 return error;
3601 if (kauth_cred_geteuid(cred) != vattr.va_uid &&
3602 (error = kauth_authorize_generic(cred,
3603 KAUTH_GENERIC_ISSUSER, NULL)) == 0)
3604 VOP_REVOKE(vp, REVOKEALL);
3605 return (error);
3606 }
3607
3608 /*
3609 * Void all references to file by ripping underlying filesystem
3610 * away from vnode.
3611 */
3612 /* ARGSUSED */
3613 int
3614 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
3615 {
3616 /* {
3617 syscallarg(const char *) path;
3618 } */
3619 struct vnode *vp;
3620 int error;
3621 struct nameidata nd;
3622
3623 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3624 SCARG(uap, path));
3625 if ((error = namei(&nd)) != 0)
3626 return (error);
3627 vp = nd.ni_vp;
3628 error = dorevoke(vp, l->l_cred);
3629 vrele(vp);
3630 return (error);
3631 }
3632