vfs_syscalls.c revision 1.359.2.4 1 /* $NetBSD: vfs_syscalls.c,v 1.359.2.4 2008/09/18 04:31:45 wrstuden Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
63 */
64
65 #include <sys/cdefs.h>
66 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.359.2.4 2008/09/18 04:31:45 wrstuden Exp $");
67
68 #include "opt_compat_netbsd.h"
69 #include "opt_compat_43.h"
70 #include "opt_fileassoc.h"
71 #include "fss.h"
72 #include "veriexec.h"
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file.h>
80 #include <sys/stat.h>
81 #include <sys/vnode.h>
82 #include <sys/mount.h>
83 #include <sys/proc.h>
84 #include <sys/uio.h>
85 #include <sys/malloc.h>
86 #include <sys/kmem.h>
87 #include <sys/dirent.h>
88 #include <sys/sysctl.h>
89 #include <sys/syscallargs.h>
90 #include <sys/vfs_syscalls.h>
91 #include <sys/ktrace.h>
92 #ifdef FILEASSOC
93 #include <sys/fileassoc.h>
94 #endif /* FILEASSOC */
95 #include <sys/verified_exec.h>
96 #include <sys/kauth.h>
97 #include <sys/atomic.h>
98 #include <sys/module.h>
99
100 #include <miscfs/genfs/genfs.h>
101 #include <miscfs/syncfs/syncfs.h>
102 #include <miscfs/specfs/specdev.h>
103
104 #ifdef COMPAT_30
105 #include "opt_nfsserver.h"
106 #include <nfs/rpcv2.h>
107 #endif
108 #include <nfs/nfsproto.h>
109 #ifdef COMPAT_30
110 #include <nfs/nfs.h>
111 #include <nfs/nfs_var.h>
112 #endif
113
114 #if NFSS > 0
115 #include <dev/fssvar.h>
116 #endif
117
118 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
119
120 static int change_dir(struct nameidata *, struct lwp *);
121 static int change_flags(struct vnode *, u_long, struct lwp *);
122 static int change_mode(struct vnode *, int, struct lwp *l);
123 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
124
125 void checkdirs(struct vnode *);
126
127 int dovfsusermount = 0;
128
129 /*
130 * Virtual File System System Calls
131 */
132
133 /*
134 * Mount a file system.
135 */
136
137 #if defined(COMPAT_09) || defined(COMPAT_43)
138 /*
139 * This table is used to maintain compatibility with 4.3BSD
140 * and NetBSD 0.9 mount syscalls. Note, the order is important!
141 *
142 * Do not modify this table. It should only contain filesystems
143 * supported by NetBSD 0.9 and 4.3BSD.
144 */
145 const char * const mountcompatnames[] = {
146 NULL, /* 0 = MOUNT_NONE */
147 MOUNT_FFS, /* 1 = MOUNT_UFS */
148 MOUNT_NFS, /* 2 */
149 MOUNT_MFS, /* 3 */
150 MOUNT_MSDOS, /* 4 */
151 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
152 MOUNT_FDESC, /* 6 */
153 MOUNT_KERNFS, /* 7 */
154 NULL, /* 8 = MOUNT_DEVFS */
155 MOUNT_AFS, /* 9 */
156 };
157 const int nmountcompatnames = sizeof(mountcompatnames) /
158 sizeof(mountcompatnames[0]);
159 #endif /* COMPAT_09 || COMPAT_43 */
160
161 static int
162 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
163 void *data, size_t *data_len)
164 {
165 struct mount *mp;
166 int error = 0, saved_flags;
167
168 mp = vp->v_mount;
169 saved_flags = mp->mnt_flag;
170
171 /* We can operate only on VV_ROOT nodes. */
172 if ((vp->v_vflag & VV_ROOT) == 0) {
173 error = EINVAL;
174 goto out;
175 }
176
177 /*
178 * We only allow the filesystem to be reloaded if it
179 * is currently mounted read-only.
180 */
181 if (flags & MNT_RELOAD && !(mp->mnt_flag & MNT_RDONLY)) {
182 error = EOPNOTSUPP; /* Needs translation */
183 goto out;
184 }
185
186 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
187 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
188 if (error)
189 goto out;
190
191 if (vfs_busy(mp, NULL)) {
192 error = EPERM;
193 goto out;
194 }
195
196 mutex_enter(&mp->mnt_updating);
197
198 mp->mnt_flag &= ~MNT_OP_FLAGS;
199 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
200
201 /*
202 * Set the mount level flags.
203 */
204 if (flags & MNT_RDONLY)
205 mp->mnt_flag |= MNT_RDONLY;
206 else if (mp->mnt_flag & MNT_RDONLY)
207 mp->mnt_iflag |= IMNT_WANTRDWR;
208 mp->mnt_flag &=
209 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
210 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
211 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
212 MNT_LOG);
213 mp->mnt_flag |= flags &
214 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
215 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
216 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
217 MNT_LOG | MNT_IGNORE);
218
219 error = VFS_MOUNT(mp, path, data, data_len);
220
221 #if defined(COMPAT_30) && defined(NFSSERVER)
222 if (error && data != NULL) {
223 int error2;
224
225 /* Update failed; let's try and see if it was an
226 * export request. */
227 error2 = nfs_update_exports_30(mp, path, data, l);
228
229 /* Only update error code if the export request was
230 * understood but some problem occurred while
231 * processing it. */
232 if (error2 != EJUSTRETURN)
233 error = error2;
234 }
235 #endif
236 if (mp->mnt_iflag & IMNT_WANTRDWR)
237 mp->mnt_flag &= ~MNT_RDONLY;
238 if (error)
239 mp->mnt_flag = saved_flags;
240 mp->mnt_flag &= ~MNT_OP_FLAGS;
241 mp->mnt_iflag &= ~IMNT_WANTRDWR;
242 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
243 if (mp->mnt_syncer == NULL)
244 error = vfs_allocate_syncvnode(mp);
245 } else {
246 if (mp->mnt_syncer != NULL)
247 vfs_deallocate_syncvnode(mp);
248 }
249 mutex_exit(&mp->mnt_updating);
250 vfs_unbusy(mp, false, NULL);
251
252 out:
253 return (error);
254 }
255
256 static int
257 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
258 {
259 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
260 int error;
261
262 /* Copy file-system type from userspace. */
263 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
264 if (error) {
265 #if defined(COMPAT_09) || defined(COMPAT_43)
266 /*
267 * Historically, filesystem types were identified by numbers.
268 * If we get an integer for the filesystem type instead of a
269 * string, we check to see if it matches one of the historic
270 * filesystem types.
271 */
272 u_long fsindex = (u_long)fstype;
273 if (fsindex >= nmountcompatnames ||
274 mountcompatnames[fsindex] == NULL)
275 return ENODEV;
276 strlcpy(fstypename, mountcompatnames[fsindex],
277 sizeof(fstypename));
278 #else
279 return error;
280 #endif
281 }
282
283 #ifdef COMPAT_10
284 /* Accept `ufs' as an alias for `ffs'. */
285 if (strcmp(fstypename, "ufs") == 0)
286 fstypename[0] = 'f';
287 #endif
288
289 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
290 return 0;
291
292 /* If we can autoload a vfs module, try again */
293 (void)module_load(fstype, 0, NULL, MODULE_CLASS_VFS, true);
294
295 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
296 return 0;
297
298 return ENODEV;
299 }
300
301 static int
302 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops,
303 const char *path, int flags, void *data, size_t *data_len, u_int recurse)
304 {
305 struct mount *mp;
306 struct vnode *vp = *vpp;
307 struct vattr va;
308 int error;
309
310 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
311 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
312 if (error)
313 return error;
314
315 /* Can't make a non-dir a mount-point (from here anyway). */
316 if (vp->v_type != VDIR)
317 return ENOTDIR;
318
319 /*
320 * If the user is not root, ensure that they own the directory
321 * onto which we are attempting to mount.
322 */
323 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 ||
324 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
325 (error = kauth_authorize_generic(l->l_cred,
326 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) {
327 return error;
328 }
329
330 if (flags & MNT_EXPORTED)
331 return EINVAL;
332
333 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0)
334 return error;
335
336 /*
337 * Check if a file-system is not already mounted on this vnode.
338 */
339 if (vp->v_mountedhere != NULL)
340 return EBUSY;
341
342 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
343 if (mp == NULL)
344 return ENOMEM;
345
346 mp->mnt_op = vfsops;
347 mp->mnt_refcnt = 1;
348
349 TAILQ_INIT(&mp->mnt_vnodelist);
350 rw_init(&mp->mnt_unmounting);
351 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
352 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
353 error = vfs_busy(mp, NULL);
354 KASSERT(error == 0);
355 mutex_enter(&mp->mnt_updating);
356
357 mp->mnt_vnodecovered = vp;
358 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
359 mount_initspecific(mp);
360
361 /*
362 * The underlying file system may refuse the mount for
363 * various reasons. Allow the user to force it to happen.
364 *
365 * Set the mount level flags.
366 */
367 mp->mnt_flag = flags &
368 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
369 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
370 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
371 MNT_LOG | MNT_IGNORE | MNT_RDONLY);
372
373 error = VFS_MOUNT(mp, path, data, data_len);
374 mp->mnt_flag &= ~MNT_OP_FLAGS;
375
376 /*
377 * Put the new filesystem on the mount list after root.
378 */
379 cache_purge(vp);
380 if (error != 0) {
381 vp->v_mountedhere = NULL;
382 mutex_exit(&mp->mnt_updating);
383 vfs_unbusy(mp, false, NULL);
384 vfs_destroy(mp);
385 return error;
386 }
387
388 mp->mnt_iflag &= ~IMNT_WANTRDWR;
389 mutex_enter(&mountlist_lock);
390 vp->v_mountedhere = mp;
391 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
392 mutex_exit(&mountlist_lock);
393 vn_restorerecurse(vp, recurse);
394 VOP_UNLOCK(vp, 0);
395 checkdirs(vp);
396 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
397 error = vfs_allocate_syncvnode(mp);
398 /* Hold an additional reference to the mount across VFS_START(). */
399 mutex_exit(&mp->mnt_updating);
400 vfs_unbusy(mp, true, NULL);
401 (void) VFS_STATVFS(mp, &mp->mnt_stat);
402 error = VFS_START(mp, 0);
403 if (error) {
404 vrele(vp);
405 vfs_destroy(mp);
406 }
407 /* Drop reference held for VFS_START(). */
408 vfs_destroy(mp);
409 *vpp = NULL;
410 return error;
411 }
412
413 static int
414 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
415 void *data, size_t *data_len)
416 {
417 struct mount *mp;
418 int error;
419
420 /* If MNT_GETARGS is specified, it should be the only flag. */
421 if (flags & ~MNT_GETARGS)
422 return EINVAL;
423
424 mp = vp->v_mount;
425
426 /* XXX: probably some notion of "can see" here if we want isolation. */
427 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
428 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
429 if (error)
430 return error;
431
432 if ((vp->v_vflag & VV_ROOT) == 0)
433 return EINVAL;
434
435 if (vfs_busy(mp, NULL))
436 return EPERM;
437
438 mutex_enter(&mp->mnt_updating);
439 mp->mnt_flag &= ~MNT_OP_FLAGS;
440 mp->mnt_flag |= MNT_GETARGS;
441 error = VFS_MOUNT(mp, path, data, data_len);
442 mp->mnt_flag &= ~MNT_OP_FLAGS;
443 mutex_exit(&mp->mnt_updating);
444
445 vfs_unbusy(mp, false, NULL);
446 return (error);
447 }
448
449 #ifdef COMPAT_40
450 /* ARGSUSED */
451 int
452 compat_40_sys_mount(struct lwp *l, const struct compat_40_sys_mount_args *uap, register_t *retval)
453 {
454 /* {
455 syscallarg(const char *) type;
456 syscallarg(const char *) path;
457 syscallarg(int) flags;
458 syscallarg(void *) data;
459 } */
460 register_t dummy;
461
462 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
463 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 0, &dummy);
464 }
465 #endif
466
467 int
468 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
469 {
470 /* {
471 syscallarg(const char *) type;
472 syscallarg(const char *) path;
473 syscallarg(int) flags;
474 syscallarg(void *) data;
475 syscallarg(size_t) data_len;
476 } */
477
478 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
479 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
480 SCARG(uap, data_len), retval);
481 }
482
483 int
484 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
485 const char *path, int flags, void *data, enum uio_seg data_seg,
486 size_t data_len, register_t *retval)
487 {
488 struct vnode *vp;
489 struct nameidata nd;
490 void *data_buf = data;
491 u_int recurse;
492 int error;
493
494 /*
495 * Get vnode to be covered
496 */
497 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
498 if ((error = namei(&nd)) != 0)
499 return (error);
500 vp = nd.ni_vp;
501
502 /*
503 * A lookup in VFS_MOUNT might result in an attempt to
504 * lock this vnode again, so make the lock recursive.
505 */
506 if (vfsops == NULL) {
507 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
508 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
509 recurse = vn_setrecurse(vp);
510 vfsops = vp->v_mount->mnt_op;
511 } else {
512 /* 'type' is userspace */
513 error = mount_get_vfsops(type, &vfsops);
514 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
515 recurse = vn_setrecurse(vp);
516 if (error != 0)
517 goto done;
518 }
519 } else {
520 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
521 recurse = vn_setrecurse(vp);
522 }
523
524 if (data != NULL && data_seg == UIO_USERSPACE) {
525 if (data_len == 0) {
526 /* No length supplied, use default for filesystem */
527 data_len = vfsops->vfs_min_mount_data;
528 if (data_len > VFS_MAX_MOUNT_DATA) {
529 /* maybe a force loaded old LKM */
530 error = EINVAL;
531 goto done;
532 }
533 #ifdef COMPAT_30
534 /* Hopefully a longer buffer won't make copyin() fail */
535 if (flags & MNT_UPDATE
536 && data_len < sizeof (struct mnt_export_args30))
537 data_len = sizeof (struct mnt_export_args30);
538 #endif
539 }
540 data_buf = malloc(data_len, M_TEMP, M_WAITOK);
541
542 /* NFS needs the buffer even for mnt_getargs .... */
543 error = copyin(data, data_buf, data_len);
544 if (error != 0)
545 goto done;
546 }
547
548 if (flags & MNT_GETARGS) {
549 if (data_len == 0) {
550 error = EINVAL;
551 goto done;
552 }
553 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
554 if (error != 0)
555 goto done;
556 if (data_seg == UIO_USERSPACE)
557 error = copyout(data_buf, data, data_len);
558 *retval = data_len;
559 } else if (flags & MNT_UPDATE) {
560 error = mount_update(l, vp, path, flags, data_buf, &data_len);
561 } else {
562 /* Locking is handled internally in mount_domount(). */
563 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
564 &data_len, recurse);
565 }
566
567 done:
568 if (vp != NULL) {
569 vn_restorerecurse(vp, recurse);
570 vput(vp);
571 }
572 if (data_buf != data)
573 free(data_buf, M_TEMP);
574 return (error);
575 }
576
577 /*
578 * Scan all active processes to see if any of them have a current
579 * or root directory onto which the new filesystem has just been
580 * mounted. If so, replace them with the new mount point.
581 */
582 void
583 checkdirs(struct vnode *olddp)
584 {
585 struct cwdinfo *cwdi;
586 struct vnode *newdp, *rele1, *rele2;
587 struct proc *p;
588 bool retry;
589
590 if (olddp->v_usecount == 1)
591 return;
592 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
593 panic("mount: lost mount");
594
595 do {
596 retry = false;
597 mutex_enter(proc_lock);
598 PROCLIST_FOREACH(p, &allproc) {
599 if ((p->p_flag & PK_MARKER) != 0)
600 continue;
601 if ((cwdi = p->p_cwdi) == NULL)
602 continue;
603 /*
604 * Can't change to the old directory any more,
605 * so even if we see a stale value it's not a
606 * problem.
607 */
608 if (cwdi->cwdi_cdir != olddp &&
609 cwdi->cwdi_rdir != olddp)
610 continue;
611 retry = true;
612 rele1 = NULL;
613 rele2 = NULL;
614 atomic_inc_uint(&cwdi->cwdi_refcnt);
615 mutex_exit(proc_lock);
616 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
617 if (cwdi->cwdi_cdir == olddp) {
618 rele1 = cwdi->cwdi_cdir;
619 VREF(newdp);
620 cwdi->cwdi_cdir = newdp;
621 }
622 if (cwdi->cwdi_rdir == olddp) {
623 rele2 = cwdi->cwdi_rdir;
624 VREF(newdp);
625 cwdi->cwdi_rdir = newdp;
626 }
627 rw_exit(&cwdi->cwdi_lock);
628 cwdfree(cwdi);
629 if (rele1 != NULL)
630 vrele(rele1);
631 if (rele2 != NULL)
632 vrele(rele2);
633 mutex_enter(proc_lock);
634 break;
635 }
636 mutex_exit(proc_lock);
637 } while (retry);
638
639 if (rootvnode == olddp) {
640 vrele(rootvnode);
641 VREF(newdp);
642 rootvnode = newdp;
643 }
644 vput(newdp);
645 }
646
647 /*
648 * Unmount a file system.
649 *
650 * Note: unmount takes a path to the vnode mounted on as argument,
651 * not special file (as before).
652 */
653 /* ARGSUSED */
654 int
655 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
656 {
657 /* {
658 syscallarg(const char *) path;
659 syscallarg(int) flags;
660 } */
661 struct vnode *vp;
662 struct mount *mp;
663 int error;
664 struct nameidata nd;
665
666 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
667 SCARG(uap, path));
668 if ((error = namei(&nd)) != 0)
669 return (error);
670 vp = nd.ni_vp;
671 mp = vp->v_mount;
672 atomic_inc_uint(&mp->mnt_refcnt);
673 VOP_UNLOCK(vp, 0);
674
675 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
676 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
677 if (error) {
678 vrele(vp);
679 vfs_destroy(mp);
680 return (error);
681 }
682
683 /*
684 * Don't allow unmounting the root file system.
685 */
686 if (mp->mnt_flag & MNT_ROOTFS) {
687 vrele(vp);
688 vfs_destroy(mp);
689 return (EINVAL);
690 }
691
692 /*
693 * Must be the root of the filesystem
694 */
695 if ((vp->v_vflag & VV_ROOT) == 0) {
696 vrele(vp);
697 vfs_destroy(mp);
698 return (EINVAL);
699 }
700
701 vrele(vp);
702 error = dounmount(mp, SCARG(uap, flags), l);
703 return error;
704 }
705
706 /*
707 * Do the actual file system unmount. File system is assumed to have
708 * been locked by the caller.
709 *
710 * => Caller gain reference to the mount, explicility for unmount.
711 * => Reference will be dropped in all cases.
712 */
713 int
714 dounmount(struct mount *mp, int flags, struct lwp *l)
715 {
716 struct vnode *coveredvp;
717 int error;
718 int async;
719 int used_syncer;
720
721 #if NVERIEXEC > 0
722 error = veriexec_unmountchk(mp);
723 if (error)
724 return (error);
725 #endif /* NVERIEXEC > 0 */
726
727 /*
728 * XXX Freeze syncer. Must do this before locking the
729 * mount point. See dounmount() for details.
730 */
731 mutex_enter(&syncer_mutex);
732 rw_enter(&mp->mnt_unmounting, RW_WRITER);
733 if ((mp->mnt_iflag & IMNT_GONE) != 0) {
734 rw_exit(&mp->mnt_unmounting);
735 mutex_exit(&syncer_mutex);
736 vfs_destroy(mp);
737 return ENOENT;
738 }
739
740 used_syncer = (mp->mnt_syncer != NULL);
741
742 /*
743 * XXX Syncer must be frozen when we get here. This should really
744 * be done on a per-mountpoint basis, but especially the softdep
745 * code possibly called from the syncer doesn't exactly work on a
746 * per-mountpoint basis, so the softdep code would become a maze
747 * of vfs_busy() calls.
748 *
749 * The caller of dounmount() must acquire syncer_mutex because
750 * the syncer itself acquires locks in syncer_mutex -> vfs_busy
751 * order, and we must preserve that order to avoid deadlock.
752 *
753 * So, if the file system did not use the syncer, now is
754 * the time to release the syncer_mutex.
755 */
756 if (used_syncer == 0)
757 mutex_exit(&syncer_mutex);
758
759 mp->mnt_iflag |= IMNT_UNMOUNT;
760 async = mp->mnt_flag & MNT_ASYNC;
761 mp->mnt_flag &= ~MNT_ASYNC;
762 cache_purgevfs(mp); /* remove cache entries for this file sys */
763 if (mp->mnt_syncer != NULL)
764 vfs_deallocate_syncvnode(mp);
765 error = 0;
766 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
767 #if NFSS > 0
768 error = fss_umount_hook(mp, (flags & MNT_FORCE));
769 #endif
770 if (error == 0)
771 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
772 }
773 vfs_scrubvnlist(mp);
774 if (error == 0 || (flags & MNT_FORCE))
775 error = VFS_UNMOUNT(mp, flags);
776 if (error) {
777 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
778 (void) vfs_allocate_syncvnode(mp);
779 mp->mnt_iflag &= ~IMNT_UNMOUNT;
780 mp->mnt_flag |= async;
781 rw_exit(&mp->mnt_unmounting);
782 if (used_syncer)
783 mutex_exit(&syncer_mutex);
784 return (error);
785 }
786 vfs_scrubvnlist(mp);
787 mutex_enter(&mountlist_lock);
788 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
789 coveredvp->v_mountedhere = NULL;
790 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
791 mp->mnt_iflag |= IMNT_GONE;
792 mutex_exit(&mountlist_lock);
793 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
794 panic("unmount: dangling vnode");
795 if (used_syncer)
796 mutex_exit(&syncer_mutex);
797 vfs_hooks_unmount(mp);
798 rw_exit(&mp->mnt_unmounting);
799 vfs_destroy(mp); /* caller provided reference */
800 vfs_destroy(mp); /* from mount(), final nail in coffin */
801 if (coveredvp != NULLVP)
802 vrele(coveredvp);
803 return (0);
804 }
805
806 /*
807 * Sync each mounted filesystem.
808 */
809 #ifdef DEBUG
810 int syncprt = 0;
811 struct ctldebug debug0 = { "syncprt", &syncprt };
812 #endif
813
814 /* ARGSUSED */
815 int
816 sys_sync(struct lwp *l, const void *v, register_t *retval)
817 {
818 struct mount *mp, *nmp;
819 int asyncflag;
820
821 if (l == NULL)
822 l = &lwp0;
823
824 mutex_enter(&mountlist_lock);
825 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
826 mp = nmp) {
827 if (vfs_busy(mp, &nmp)) {
828 continue;
829 }
830 mutex_enter(&mp->mnt_updating);
831 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
832 asyncflag = mp->mnt_flag & MNT_ASYNC;
833 mp->mnt_flag &= ~MNT_ASYNC;
834 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
835 if (asyncflag)
836 mp->mnt_flag |= MNT_ASYNC;
837 }
838 mutex_exit(&mp->mnt_updating);
839 vfs_unbusy(mp, false, &nmp);
840 }
841 mutex_exit(&mountlist_lock);
842 #ifdef DEBUG
843 if (syncprt)
844 vfs_bufstats();
845 #endif /* DEBUG */
846 return (0);
847 }
848
849 /*
850 * Change filesystem quotas.
851 */
852 /* ARGSUSED */
853 int
854 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval)
855 {
856 /* {
857 syscallarg(const char *) path;
858 syscallarg(int) cmd;
859 syscallarg(int) uid;
860 syscallarg(void *) arg;
861 } */
862 struct mount *mp;
863 int error;
864 struct nameidata nd;
865
866 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
867 SCARG(uap, path));
868 if ((error = namei(&nd)) != 0)
869 return (error);
870 mp = nd.ni_vp->v_mount;
871 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
872 SCARG(uap, arg));
873 vrele(nd.ni_vp);
874 return (error);
875 }
876
877 int
878 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
879 int root)
880 {
881 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
882 int error = 0;
883
884 /*
885 * If MNT_NOWAIT or MNT_LAZY is specified, do not
886 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
887 * overrides MNT_NOWAIT.
888 */
889 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
890 (flags != MNT_WAIT && flags != 0)) {
891 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
892 goto done;
893 }
894
895 /* Get the filesystem stats now */
896 memset(sp, 0, sizeof(*sp));
897 if ((error = VFS_STATVFS(mp, sp)) != 0) {
898 return error;
899 }
900
901 if (cwdi->cwdi_rdir == NULL)
902 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
903 done:
904 if (cwdi->cwdi_rdir != NULL) {
905 size_t len;
906 char *bp;
907 char c;
908 char *path = PNBUF_GET();
909
910 bp = path + MAXPATHLEN;
911 *--bp = '\0';
912 rw_enter(&cwdi->cwdi_lock, RW_READER);
913 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
914 MAXPATHLEN / 2, 0, l);
915 rw_exit(&cwdi->cwdi_lock);
916 if (error) {
917 PNBUF_PUT(path);
918 return error;
919 }
920 len = strlen(bp);
921 /*
922 * for mount points that are below our root, we can see
923 * them, so we fix up the pathname and return them. The
924 * rest we cannot see, so we don't allow viewing the
925 * data.
926 */
927 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
928 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
929 (void)strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
930 sizeof(sp->f_mntonname));
931 if (sp->f_mntonname[0] == '\0')
932 (void)strlcpy(sp->f_mntonname, "/",
933 sizeof(sp->f_mntonname));
934 } else {
935 if (root)
936 (void)strlcpy(sp->f_mntonname, "/",
937 sizeof(sp->f_mntonname));
938 else
939 error = EPERM;
940 }
941 PNBUF_PUT(path);
942 }
943 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
944 return error;
945 }
946
947 /*
948 * Get filesystem statistics by path.
949 */
950 int
951 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
952 {
953 struct mount *mp;
954 int error;
955 struct nameidata nd;
956
957 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
958 if ((error = namei(&nd)) != 0)
959 return error;
960 mp = nd.ni_vp->v_mount;
961 error = dostatvfs(mp, sb, l, flags, 1);
962 vrele(nd.ni_vp);
963 return error;
964 }
965
966 /* ARGSUSED */
967 int
968 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
969 {
970 /* {
971 syscallarg(const char *) path;
972 syscallarg(struct statvfs *) buf;
973 syscallarg(int) flags;
974 } */
975 struct statvfs *sb;
976 int error;
977
978 sb = STATVFSBUF_GET();
979 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
980 if (error == 0)
981 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
982 STATVFSBUF_PUT(sb);
983 return error;
984 }
985
986 /*
987 * Get filesystem statistics by fd.
988 */
989 int
990 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
991 {
992 file_t *fp;
993 struct mount *mp;
994 int error;
995
996 /* fd_getvnode() will use the descriptor for us */
997 if ((error = fd_getvnode(fd, &fp)) != 0)
998 return (error);
999 mp = ((struct vnode *)fp->f_data)->v_mount;
1000 error = dostatvfs(mp, sb, curlwp, flags, 1);
1001 fd_putfile(fd);
1002 return error;
1003 }
1004
1005 /* ARGSUSED */
1006 int
1007 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
1008 {
1009 /* {
1010 syscallarg(int) fd;
1011 syscallarg(struct statvfs *) buf;
1012 syscallarg(int) flags;
1013 } */
1014 struct statvfs *sb;
1015 int error;
1016
1017 sb = STATVFSBUF_GET();
1018 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1019 if (error == 0)
1020 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1021 STATVFSBUF_PUT(sb);
1022 return error;
1023 }
1024
1025
1026 /*
1027 * Get statistics on all filesystems.
1028 */
1029 int
1030 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1031 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1032 register_t *retval)
1033 {
1034 int root = 0;
1035 struct proc *p = l->l_proc;
1036 struct mount *mp, *nmp;
1037 struct statvfs *sb;
1038 size_t count, maxcount;
1039 int error = 0;
1040
1041 sb = STATVFSBUF_GET();
1042 maxcount = bufsize / entry_sz;
1043 mutex_enter(&mountlist_lock);
1044 count = 0;
1045 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1046 mp = nmp) {
1047 if (vfs_busy(mp, &nmp)) {
1048 continue;
1049 }
1050 if (sfsp && count < maxcount) {
1051 error = dostatvfs(mp, sb, l, flags, 0);
1052 if (error) {
1053 vfs_unbusy(mp, false, &nmp);
1054 error = 0;
1055 continue;
1056 }
1057 error = copyfn(sb, sfsp, entry_sz);
1058 if (error) {
1059 vfs_unbusy(mp, false, NULL);
1060 goto out;
1061 }
1062 sfsp = (char *)sfsp + entry_sz;
1063 root |= strcmp(sb->f_mntonname, "/") == 0;
1064 }
1065 count++;
1066 vfs_unbusy(mp, false, &nmp);
1067 }
1068 mutex_exit(&mountlist_lock);
1069
1070 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1071 /*
1072 * fake a root entry
1073 */
1074 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1075 sb, l, flags, 1);
1076 if (error != 0)
1077 goto out;
1078 if (sfsp) {
1079 error = copyfn(sb, sfsp, entry_sz);
1080 if (error != 0)
1081 goto out;
1082 }
1083 count++;
1084 }
1085 if (sfsp && count > maxcount)
1086 *retval = maxcount;
1087 else
1088 *retval = count;
1089 out:
1090 STATVFSBUF_PUT(sb);
1091 return error;
1092 }
1093
1094 int
1095 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1096 {
1097 /* {
1098 syscallarg(struct statvfs *) buf;
1099 syscallarg(size_t) bufsize;
1100 syscallarg(int) flags;
1101 } */
1102
1103 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1104 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1105 }
1106
1107 /*
1108 * Change current working directory to a given file descriptor.
1109 */
1110 /* ARGSUSED */
1111 int
1112 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1113 {
1114 /* {
1115 syscallarg(int) fd;
1116 } */
1117 struct proc *p = l->l_proc;
1118 struct cwdinfo *cwdi;
1119 struct vnode *vp, *tdp;
1120 struct mount *mp;
1121 file_t *fp;
1122 int error, fd;
1123
1124 /* fd_getvnode() will use the descriptor for us */
1125 fd = SCARG(uap, fd);
1126 if ((error = fd_getvnode(fd, &fp)) != 0)
1127 return (error);
1128 vp = fp->f_data;
1129
1130 VREF(vp);
1131 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1132 if (vp->v_type != VDIR)
1133 error = ENOTDIR;
1134 else
1135 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1136 if (error) {
1137 vput(vp);
1138 goto out;
1139 }
1140 while ((mp = vp->v_mountedhere) != NULL) {
1141 error = vfs_busy(mp, NULL);
1142 vput(vp);
1143 if (error != 0)
1144 goto out;
1145 error = VFS_ROOT(mp, &tdp);
1146 vfs_unbusy(mp, false, NULL);
1147 if (error)
1148 goto out;
1149 vp = tdp;
1150 }
1151 VOP_UNLOCK(vp, 0);
1152
1153 /*
1154 * Disallow changing to a directory not under the process's
1155 * current root directory (if there is one).
1156 */
1157 cwdi = p->p_cwdi;
1158 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1159 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1160 vrele(vp);
1161 error = EPERM; /* operation not permitted */
1162 } else {
1163 vrele(cwdi->cwdi_cdir);
1164 cwdi->cwdi_cdir = vp;
1165 }
1166 rw_exit(&cwdi->cwdi_lock);
1167
1168 out:
1169 fd_putfile(fd);
1170 return (error);
1171 }
1172
1173 /*
1174 * Change this process's notion of the root directory to a given file
1175 * descriptor.
1176 */
1177 int
1178 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1179 {
1180 struct proc *p = l->l_proc;
1181 struct cwdinfo *cwdi;
1182 struct vnode *vp;
1183 file_t *fp;
1184 int error, fd = SCARG(uap, fd);
1185
1186 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1187 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1188 return error;
1189 /* fd_getvnode() will use the descriptor for us */
1190 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
1191 return error;
1192 vp = fp->f_data;
1193 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1194 if (vp->v_type != VDIR)
1195 error = ENOTDIR;
1196 else
1197 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1198 VOP_UNLOCK(vp, 0);
1199 if (error)
1200 goto out;
1201 VREF(vp);
1202
1203 /*
1204 * Prevent escaping from chroot by putting the root under
1205 * the working directory. Silently chdir to / if we aren't
1206 * already there.
1207 */
1208 cwdi = p->p_cwdi;
1209 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1210 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1211 /*
1212 * XXX would be more failsafe to change directory to a
1213 * deadfs node here instead
1214 */
1215 vrele(cwdi->cwdi_cdir);
1216 VREF(vp);
1217 cwdi->cwdi_cdir = vp;
1218 }
1219
1220 if (cwdi->cwdi_rdir != NULL)
1221 vrele(cwdi->cwdi_rdir);
1222 cwdi->cwdi_rdir = vp;
1223 rw_exit(&cwdi->cwdi_lock);
1224
1225 out:
1226 fd_putfile(fd);
1227 return (error);
1228 }
1229
1230 /*
1231 * Change current working directory (``.'').
1232 */
1233 /* ARGSUSED */
1234 int
1235 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1236 {
1237 /* {
1238 syscallarg(const char *) path;
1239 } */
1240 struct proc *p = l->l_proc;
1241 struct cwdinfo *cwdi;
1242 int error;
1243 struct nameidata nd;
1244
1245 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1246 SCARG(uap, path));
1247 if ((error = change_dir(&nd, l)) != 0)
1248 return (error);
1249 cwdi = p->p_cwdi;
1250 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1251 vrele(cwdi->cwdi_cdir);
1252 cwdi->cwdi_cdir = nd.ni_vp;
1253 rw_exit(&cwdi->cwdi_lock);
1254 return (0);
1255 }
1256
1257 /*
1258 * Change notion of root (``/'') directory.
1259 */
1260 /* ARGSUSED */
1261 int
1262 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1263 {
1264 /* {
1265 syscallarg(const char *) path;
1266 } */
1267 struct proc *p = l->l_proc;
1268 struct cwdinfo *cwdi;
1269 struct vnode *vp;
1270 int error;
1271 struct nameidata nd;
1272
1273 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1274 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1275 return (error);
1276 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1277 SCARG(uap, path));
1278 if ((error = change_dir(&nd, l)) != 0)
1279 return (error);
1280
1281 cwdi = p->p_cwdi;
1282 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1283 if (cwdi->cwdi_rdir != NULL)
1284 vrele(cwdi->cwdi_rdir);
1285 vp = nd.ni_vp;
1286 cwdi->cwdi_rdir = vp;
1287
1288 /*
1289 * Prevent escaping from chroot by putting the root under
1290 * the working directory. Silently chdir to / if we aren't
1291 * already there.
1292 */
1293 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1294 /*
1295 * XXX would be more failsafe to change directory to a
1296 * deadfs node here instead
1297 */
1298 vrele(cwdi->cwdi_cdir);
1299 VREF(vp);
1300 cwdi->cwdi_cdir = vp;
1301 }
1302 rw_exit(&cwdi->cwdi_lock);
1303
1304 return (0);
1305 }
1306
1307 /*
1308 * Common routine for chroot and chdir.
1309 */
1310 static int
1311 change_dir(struct nameidata *ndp, struct lwp *l)
1312 {
1313 struct vnode *vp;
1314 int error;
1315
1316 if ((error = namei(ndp)) != 0)
1317 return (error);
1318 vp = ndp->ni_vp;
1319 if (vp->v_type != VDIR)
1320 error = ENOTDIR;
1321 else
1322 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1323
1324 if (error)
1325 vput(vp);
1326 else
1327 VOP_UNLOCK(vp, 0);
1328 return (error);
1329 }
1330
1331 /*
1332 * Check permissions, allocate an open file structure,
1333 * and call the device open routine if any.
1334 */
1335 int
1336 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1337 {
1338 /* {
1339 syscallarg(const char *) path;
1340 syscallarg(int) flags;
1341 syscallarg(int) mode;
1342 } */
1343 struct proc *p = l->l_proc;
1344 struct cwdinfo *cwdi = p->p_cwdi;
1345 file_t *fp;
1346 struct vnode *vp;
1347 int flags, cmode;
1348 int type, indx, error;
1349 struct flock lf;
1350 struct nameidata nd;
1351
1352 flags = FFLAGS(SCARG(uap, flags));
1353 if ((flags & (FREAD | FWRITE)) == 0)
1354 return (EINVAL);
1355 if ((error = fd_allocfile(&fp, &indx)) != 0)
1356 return (error);
1357 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1358 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1359 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
1360 SCARG(uap, path));
1361 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1362 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1363 fd_abort(p, fp, indx);
1364 if ((error == EDUPFD || error == EMOVEFD) &&
1365 l->l_dupfd >= 0 && /* XXX from fdopen */
1366 (error =
1367 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1368 *retval = indx;
1369 return (0);
1370 }
1371 if (error == ERESTART)
1372 error = EINTR;
1373 return (error);
1374 }
1375
1376 l->l_dupfd = 0;
1377 vp = nd.ni_vp;
1378 fp->f_flag = flags & FMASK;
1379 fp->f_type = DTYPE_VNODE;
1380 fp->f_ops = &vnops;
1381 fp->f_data = vp;
1382 if (flags & (O_EXLOCK | O_SHLOCK)) {
1383 lf.l_whence = SEEK_SET;
1384 lf.l_start = 0;
1385 lf.l_len = 0;
1386 if (flags & O_EXLOCK)
1387 lf.l_type = F_WRLCK;
1388 else
1389 lf.l_type = F_RDLCK;
1390 type = F_FLOCK;
1391 if ((flags & FNONBLOCK) == 0)
1392 type |= F_WAIT;
1393 VOP_UNLOCK(vp, 0);
1394 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1395 if (error) {
1396 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1397 fd_abort(p, fp, indx);
1398 return (error);
1399 }
1400 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1401 atomic_or_uint(&fp->f_flag, FHASLOCK);
1402 }
1403 VOP_UNLOCK(vp, 0);
1404 *retval = indx;
1405 fd_affix(p, fp, indx);
1406 return (0);
1407 }
1408
1409 static void
1410 vfs__fhfree(fhandle_t *fhp)
1411 {
1412 size_t fhsize;
1413
1414 if (fhp == NULL) {
1415 return;
1416 }
1417 fhsize = FHANDLE_SIZE(fhp);
1418 kmem_free(fhp, fhsize);
1419 }
1420
1421 /*
1422 * vfs_composefh: compose a filehandle.
1423 */
1424
1425 int
1426 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1427 {
1428 struct mount *mp;
1429 struct fid *fidp;
1430 int error;
1431 size_t needfhsize;
1432 size_t fidsize;
1433
1434 mp = vp->v_mount;
1435 fidp = NULL;
1436 if (*fh_size < FHANDLE_SIZE_MIN) {
1437 fidsize = 0;
1438 } else {
1439 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1440 if (fhp != NULL) {
1441 memset(fhp, 0, *fh_size);
1442 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1443 fidp = &fhp->fh_fid;
1444 }
1445 }
1446 error = VFS_VPTOFH(vp, fidp, &fidsize);
1447 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1448 if (error == 0 && *fh_size < needfhsize) {
1449 error = E2BIG;
1450 }
1451 *fh_size = needfhsize;
1452 return error;
1453 }
1454
1455 int
1456 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1457 {
1458 struct mount *mp;
1459 fhandle_t *fhp;
1460 size_t fhsize;
1461 size_t fidsize;
1462 int error;
1463
1464 *fhpp = NULL;
1465 mp = vp->v_mount;
1466 fidsize = 0;
1467 error = VFS_VPTOFH(vp, NULL, &fidsize);
1468 KASSERT(error != 0);
1469 if (error != E2BIG) {
1470 goto out;
1471 }
1472 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1473 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1474 if (fhp == NULL) {
1475 error = ENOMEM;
1476 goto out;
1477 }
1478 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1479 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1480 if (error == 0) {
1481 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1482 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1483 *fhpp = fhp;
1484 } else {
1485 kmem_free(fhp, fhsize);
1486 }
1487 out:
1488 return error;
1489 }
1490
1491 void
1492 vfs_composefh_free(fhandle_t *fhp)
1493 {
1494
1495 vfs__fhfree(fhp);
1496 }
1497
1498 /*
1499 * vfs_fhtovp: lookup a vnode by a filehandle.
1500 */
1501
1502 int
1503 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1504 {
1505 struct mount *mp;
1506 int error;
1507
1508 *vpp = NULL;
1509 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1510 if (mp == NULL) {
1511 error = ESTALE;
1512 goto out;
1513 }
1514 if (mp->mnt_op->vfs_fhtovp == NULL) {
1515 error = EOPNOTSUPP;
1516 goto out;
1517 }
1518 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1519 out:
1520 return error;
1521 }
1522
1523 /*
1524 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1525 * the needed size.
1526 */
1527
1528 int
1529 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1530 {
1531 fhandle_t *fhp;
1532 int error;
1533
1534 *fhpp = NULL;
1535 if (fhsize > FHANDLE_SIZE_MAX) {
1536 return EINVAL;
1537 }
1538 if (fhsize < FHANDLE_SIZE_MIN) {
1539 return EINVAL;
1540 }
1541 again:
1542 fhp = kmem_alloc(fhsize, KM_SLEEP);
1543 if (fhp == NULL) {
1544 return ENOMEM;
1545 }
1546 error = copyin(ufhp, fhp, fhsize);
1547 if (error == 0) {
1548 /* XXX this check shouldn't be here */
1549 if (FHANDLE_SIZE(fhp) == fhsize) {
1550 *fhpp = fhp;
1551 return 0;
1552 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1553 /*
1554 * a kludge for nfsv2 padded handles.
1555 */
1556 size_t sz;
1557
1558 sz = FHANDLE_SIZE(fhp);
1559 kmem_free(fhp, fhsize);
1560 fhsize = sz;
1561 goto again;
1562 } else {
1563 /*
1564 * userland told us wrong size.
1565 */
1566 error = EINVAL;
1567 }
1568 }
1569 kmem_free(fhp, fhsize);
1570 return error;
1571 }
1572
1573 void
1574 vfs_copyinfh_free(fhandle_t *fhp)
1575 {
1576
1577 vfs__fhfree(fhp);
1578 }
1579
1580 /*
1581 * Get file handle system call
1582 */
1583 int
1584 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1585 {
1586 /* {
1587 syscallarg(char *) fname;
1588 syscallarg(fhandle_t *) fhp;
1589 syscallarg(size_t *) fh_size;
1590 } */
1591 struct vnode *vp;
1592 fhandle_t *fh;
1593 int error;
1594 struct nameidata nd;
1595 size_t sz;
1596 size_t usz;
1597
1598 /*
1599 * Must be super user
1600 */
1601 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1602 0, NULL, NULL, NULL);
1603 if (error)
1604 return (error);
1605 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1606 SCARG(uap, fname));
1607 error = namei(&nd);
1608 if (error)
1609 return (error);
1610 vp = nd.ni_vp;
1611 error = vfs_composefh_alloc(vp, &fh);
1612 vput(vp);
1613 if (error != 0) {
1614 goto out;
1615 }
1616 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1617 if (error != 0) {
1618 goto out;
1619 }
1620 sz = FHANDLE_SIZE(fh);
1621 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1622 if (error != 0) {
1623 goto out;
1624 }
1625 if (usz >= sz) {
1626 error = copyout(fh, SCARG(uap, fhp), sz);
1627 } else {
1628 error = E2BIG;
1629 }
1630 out:
1631 vfs_composefh_free(fh);
1632 return (error);
1633 }
1634
1635 /*
1636 * Open a file given a file handle.
1637 *
1638 * Check permissions, allocate an open file structure,
1639 * and call the device open routine if any.
1640 */
1641
1642 int
1643 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1644 register_t *retval)
1645 {
1646 file_t *fp;
1647 struct vnode *vp = NULL;
1648 kauth_cred_t cred = l->l_cred;
1649 file_t *nfp;
1650 int type, indx, error=0;
1651 struct flock lf;
1652 struct vattr va;
1653 fhandle_t *fh;
1654 int flags;
1655 proc_t *p;
1656
1657 p = curproc;
1658
1659 /*
1660 * Must be super user
1661 */
1662 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1663 0, NULL, NULL, NULL)))
1664 return (error);
1665
1666 flags = FFLAGS(oflags);
1667 if ((flags & (FREAD | FWRITE)) == 0)
1668 return (EINVAL);
1669 if ((flags & O_CREAT))
1670 return (EINVAL);
1671 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1672 return (error);
1673 fp = nfp;
1674 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1675 if (error != 0) {
1676 goto bad;
1677 }
1678 error = vfs_fhtovp(fh, &vp);
1679 if (error != 0) {
1680 goto bad;
1681 }
1682
1683 /* Now do an effective vn_open */
1684
1685 if (vp->v_type == VSOCK) {
1686 error = EOPNOTSUPP;
1687 goto bad;
1688 }
1689 error = vn_openchk(vp, cred, flags);
1690 if (error != 0)
1691 goto bad;
1692 if (flags & O_TRUNC) {
1693 VOP_UNLOCK(vp, 0); /* XXX */
1694 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1695 VATTR_NULL(&va);
1696 va.va_size = 0;
1697 error = VOP_SETATTR(vp, &va, cred);
1698 if (error)
1699 goto bad;
1700 }
1701 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
1702 goto bad;
1703 if (flags & FWRITE) {
1704 mutex_enter(&vp->v_interlock);
1705 vp->v_writecount++;
1706 mutex_exit(&vp->v_interlock);
1707 }
1708
1709 /* done with modified vn_open, now finish what sys_open does. */
1710
1711 fp->f_flag = flags & FMASK;
1712 fp->f_type = DTYPE_VNODE;
1713 fp->f_ops = &vnops;
1714 fp->f_data = vp;
1715 if (flags & (O_EXLOCK | O_SHLOCK)) {
1716 lf.l_whence = SEEK_SET;
1717 lf.l_start = 0;
1718 lf.l_len = 0;
1719 if (flags & O_EXLOCK)
1720 lf.l_type = F_WRLCK;
1721 else
1722 lf.l_type = F_RDLCK;
1723 type = F_FLOCK;
1724 if ((flags & FNONBLOCK) == 0)
1725 type |= F_WAIT;
1726 VOP_UNLOCK(vp, 0);
1727 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1728 if (error) {
1729 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1730 fd_abort(p, fp, indx);
1731 return (error);
1732 }
1733 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1734 atomic_or_uint(&fp->f_flag, FHASLOCK);
1735 }
1736 VOP_UNLOCK(vp, 0);
1737 *retval = indx;
1738 fd_affix(p, fp, indx);
1739 vfs_copyinfh_free(fh);
1740 return (0);
1741
1742 bad:
1743 fd_abort(p, fp, indx);
1744 if (vp != NULL)
1745 vput(vp);
1746 vfs_copyinfh_free(fh);
1747 return (error);
1748 }
1749
1750 int
1751 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
1752 {
1753 /* {
1754 syscallarg(const void *) fhp;
1755 syscallarg(size_t) fh_size;
1756 syscallarg(int) flags;
1757 } */
1758
1759 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1760 SCARG(uap, flags), retval);
1761 }
1762
1763 int
1764 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
1765 {
1766 int error;
1767 fhandle_t *fh;
1768 struct vnode *vp;
1769
1770 /*
1771 * Must be super user
1772 */
1773 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1774 0, NULL, NULL, NULL)))
1775 return (error);
1776
1777 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1778 if (error != 0)
1779 return error;
1780
1781 error = vfs_fhtovp(fh, &vp);
1782 vfs_copyinfh_free(fh);
1783 if (error != 0)
1784 return error;
1785
1786 error = vn_stat(vp, sb);
1787 vput(vp);
1788 return error;
1789 }
1790
1791
1792 /* ARGSUSED */
1793 int
1794 sys___fhstat40(struct lwp *l, const struct sys___fhstat40_args *uap, register_t *retval)
1795 {
1796 /* {
1797 syscallarg(const void *) fhp;
1798 syscallarg(size_t) fh_size;
1799 syscallarg(struct stat *) sb;
1800 } */
1801 struct stat sb;
1802 int error;
1803
1804 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
1805 if (error)
1806 return error;
1807 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
1808 }
1809
1810 int
1811 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
1812 int flags)
1813 {
1814 fhandle_t *fh;
1815 struct mount *mp;
1816 struct vnode *vp;
1817 int error;
1818
1819 /*
1820 * Must be super user
1821 */
1822 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1823 0, NULL, NULL, NULL)))
1824 return error;
1825
1826 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1827 if (error != 0)
1828 return error;
1829
1830 error = vfs_fhtovp(fh, &vp);
1831 vfs_copyinfh_free(fh);
1832 if (error != 0)
1833 return error;
1834
1835 mp = vp->v_mount;
1836 error = dostatvfs(mp, sb, l, flags, 1);
1837 vput(vp);
1838 return error;
1839 }
1840
1841 /* ARGSUSED */
1842 int
1843 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
1844 {
1845 /* {
1846 syscallarg(const void *) fhp;
1847 syscallarg(size_t) fh_size;
1848 syscallarg(struct statvfs *) buf;
1849 syscallarg(int) flags;
1850 } */
1851 struct statvfs *sb = STATVFSBUF_GET();
1852 int error;
1853
1854 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
1855 SCARG(uap, flags));
1856 if (error == 0)
1857 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1858 STATVFSBUF_PUT(sb);
1859 return error;
1860 }
1861
1862 /*
1863 * Create a special file.
1864 */
1865 /* ARGSUSED */
1866 int
1867 sys_mknod(struct lwp *l, const struct sys_mknod_args *uap, register_t *retval)
1868 {
1869 /* {
1870 syscallarg(const char *) path;
1871 syscallarg(int) mode;
1872 syscallarg(int) dev;
1873 } */
1874 struct proc *p = l->l_proc;
1875 struct vnode *vp;
1876 struct vattr vattr;
1877 int error, optype;
1878 struct nameidata nd;
1879 char *path;
1880 const char *cpath;
1881 enum uio_seg seg = UIO_USERSPACE;
1882
1883 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1884 0, NULL, NULL, NULL)) != 0)
1885 return (error);
1886
1887 optype = VOP_MKNOD_DESCOFFSET;
1888
1889 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path);
1890 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath);
1891
1892 if ((error = namei(&nd)) != 0)
1893 goto out;
1894 vp = nd.ni_vp;
1895 if (vp != NULL)
1896 error = EEXIST;
1897 else {
1898 VATTR_NULL(&vattr);
1899 /* We will read cwdi->cwdi_cmask unlocked. */
1900 vattr.va_mode =
1901 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1902 vattr.va_rdev = SCARG(uap, dev);
1903
1904 switch (SCARG(uap, mode) & S_IFMT) {
1905 case S_IFMT: /* used by badsect to flag bad sectors */
1906 vattr.va_type = VBAD;
1907 break;
1908 case S_IFCHR:
1909 vattr.va_type = VCHR;
1910 break;
1911 case S_IFBLK:
1912 vattr.va_type = VBLK;
1913 break;
1914 case S_IFWHT:
1915 optype = VOP_WHITEOUT_DESCOFFSET;
1916 break;
1917 case S_IFREG:
1918 #if NVERIEXEC > 0
1919 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp,
1920 O_CREAT);
1921 #endif /* NVERIEXEC > 0 */
1922 vattr.va_type = VREG;
1923 vattr.va_rdev = VNOVAL;
1924 optype = VOP_CREATE_DESCOFFSET;
1925 break;
1926 default:
1927 error = EINVAL;
1928 break;
1929 }
1930 }
1931 if (!error) {
1932 switch (optype) {
1933 case VOP_WHITEOUT_DESCOFFSET:
1934 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1935 if (error)
1936 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1937 vput(nd.ni_dvp);
1938 break;
1939
1940 case VOP_MKNOD_DESCOFFSET:
1941 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1942 &nd.ni_cnd, &vattr);
1943 if (error == 0)
1944 vput(nd.ni_vp);
1945 break;
1946
1947 case VOP_CREATE_DESCOFFSET:
1948 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
1949 &nd.ni_cnd, &vattr);
1950 if (error == 0)
1951 vput(nd.ni_vp);
1952 break;
1953 }
1954 } else {
1955 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1956 if (nd.ni_dvp == vp)
1957 vrele(nd.ni_dvp);
1958 else
1959 vput(nd.ni_dvp);
1960 if (vp)
1961 vrele(vp);
1962 }
1963 out:
1964 VERIEXEC_PATH_PUT(path);
1965 return (error);
1966 }
1967
1968 /*
1969 * Create a named pipe.
1970 */
1971 /* ARGSUSED */
1972 int
1973 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
1974 {
1975 /* {
1976 syscallarg(const char *) path;
1977 syscallarg(int) mode;
1978 } */
1979 struct proc *p = l->l_proc;
1980 struct vattr vattr;
1981 int error;
1982 struct nameidata nd;
1983
1984 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
1985 SCARG(uap, path));
1986 if ((error = namei(&nd)) != 0)
1987 return (error);
1988 if (nd.ni_vp != NULL) {
1989 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1990 if (nd.ni_dvp == nd.ni_vp)
1991 vrele(nd.ni_dvp);
1992 else
1993 vput(nd.ni_dvp);
1994 vrele(nd.ni_vp);
1995 return (EEXIST);
1996 }
1997 VATTR_NULL(&vattr);
1998 vattr.va_type = VFIFO;
1999 /* We will read cwdi->cwdi_cmask unlocked. */
2000 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2001 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2002 if (error == 0)
2003 vput(nd.ni_vp);
2004 return (error);
2005 }
2006
2007 /*
2008 * Make a hard file link.
2009 */
2010 /* ARGSUSED */
2011 int
2012 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
2013 {
2014 /* {
2015 syscallarg(const char *) path;
2016 syscallarg(const char *) link;
2017 } */
2018 struct vnode *vp;
2019 struct nameidata nd;
2020 int error;
2021
2022 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2023 SCARG(uap, path));
2024 if ((error = namei(&nd)) != 0)
2025 return (error);
2026 vp = nd.ni_vp;
2027 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
2028 SCARG(uap, link));
2029 if ((error = namei(&nd)) != 0)
2030 goto out;
2031 if (nd.ni_vp) {
2032 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2033 if (nd.ni_dvp == nd.ni_vp)
2034 vrele(nd.ni_dvp);
2035 else
2036 vput(nd.ni_dvp);
2037 vrele(nd.ni_vp);
2038 error = EEXIST;
2039 goto out;
2040 }
2041 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2042 out:
2043 vrele(vp);
2044 return (error);
2045 }
2046
2047 /*
2048 * Make a symbolic link.
2049 */
2050 /* ARGSUSED */
2051 int
2052 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2053 {
2054 /* {
2055 syscallarg(const char *) path;
2056 syscallarg(const char *) link;
2057 } */
2058 struct proc *p = l->l_proc;
2059 struct vattr vattr;
2060 char *path;
2061 int error;
2062 struct nameidata nd;
2063
2064 path = PNBUF_GET();
2065 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
2066 if (error)
2067 goto out;
2068 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
2069 SCARG(uap, link));
2070 if ((error = namei(&nd)) != 0)
2071 goto out;
2072 if (nd.ni_vp) {
2073 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2074 if (nd.ni_dvp == nd.ni_vp)
2075 vrele(nd.ni_dvp);
2076 else
2077 vput(nd.ni_dvp);
2078 vrele(nd.ni_vp);
2079 error = EEXIST;
2080 goto out;
2081 }
2082 VATTR_NULL(&vattr);
2083 vattr.va_type = VLNK;
2084 /* We will read cwdi->cwdi_cmask unlocked. */
2085 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2086 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2087 if (error == 0)
2088 vput(nd.ni_vp);
2089 out:
2090 PNBUF_PUT(path);
2091 return (error);
2092 }
2093
2094 /*
2095 * Delete a whiteout from the filesystem.
2096 */
2097 /* ARGSUSED */
2098 int
2099 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2100 {
2101 /* {
2102 syscallarg(const char *) path;
2103 } */
2104 int error;
2105 struct nameidata nd;
2106
2107 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT,
2108 UIO_USERSPACE, SCARG(uap, path));
2109 error = namei(&nd);
2110 if (error)
2111 return (error);
2112
2113 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2114 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2115 if (nd.ni_dvp == nd.ni_vp)
2116 vrele(nd.ni_dvp);
2117 else
2118 vput(nd.ni_dvp);
2119 if (nd.ni_vp)
2120 vrele(nd.ni_vp);
2121 return (EEXIST);
2122 }
2123 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2124 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2125 vput(nd.ni_dvp);
2126 return (error);
2127 }
2128
2129 /*
2130 * Delete a name from the filesystem.
2131 */
2132 /* ARGSUSED */
2133 int
2134 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2135 {
2136 /* {
2137 syscallarg(const char *) path;
2138 } */
2139
2140 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE);
2141 }
2142
2143 int
2144 do_sys_unlink(const char *arg, enum uio_seg seg)
2145 {
2146 struct vnode *vp;
2147 int error;
2148 struct nameidata nd;
2149 kauth_cred_t cred;
2150 char *path;
2151 const char *cpath;
2152
2153 VERIEXEC_PATH_GET(arg, seg, cpath, path);
2154 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath);
2155
2156 if ((error = namei(&nd)) != 0)
2157 goto out;
2158 vp = nd.ni_vp;
2159
2160 /*
2161 * The root of a mounted filesystem cannot be deleted.
2162 */
2163 if (vp->v_vflag & VV_ROOT) {
2164 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2165 if (nd.ni_dvp == vp)
2166 vrele(nd.ni_dvp);
2167 else
2168 vput(nd.ni_dvp);
2169 vput(vp);
2170 error = EBUSY;
2171 goto out;
2172 }
2173
2174 #if NVERIEXEC > 0
2175 /* Handle remove requests for veriexec entries. */
2176 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) {
2177 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2178 if (nd.ni_dvp == vp)
2179 vrele(nd.ni_dvp);
2180 else
2181 vput(nd.ni_dvp);
2182 vput(vp);
2183 goto out;
2184 }
2185 #endif /* NVERIEXEC > 0 */
2186
2187 cred = kauth_cred_get();
2188 #ifdef FILEASSOC
2189 (void)fileassoc_file_delete(vp);
2190 #endif /* FILEASSOC */
2191 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2192 out:
2193 VERIEXEC_PATH_PUT(path);
2194 return (error);
2195 }
2196
2197 /*
2198 * Reposition read/write file offset.
2199 */
2200 int
2201 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2202 {
2203 /* {
2204 syscallarg(int) fd;
2205 syscallarg(int) pad;
2206 syscallarg(off_t) offset;
2207 syscallarg(int) whence;
2208 } */
2209 kauth_cred_t cred = l->l_cred;
2210 file_t *fp;
2211 struct vnode *vp;
2212 struct vattr vattr;
2213 off_t newoff;
2214 int error, fd;
2215
2216 fd = SCARG(uap, fd);
2217
2218 if ((fp = fd_getfile(fd)) == NULL)
2219 return (EBADF);
2220
2221 vp = fp->f_data;
2222 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2223 error = ESPIPE;
2224 goto out;
2225 }
2226
2227 switch (SCARG(uap, whence)) {
2228 case SEEK_CUR:
2229 newoff = fp->f_offset + SCARG(uap, offset);
2230 break;
2231 case SEEK_END:
2232 error = VOP_GETATTR(vp, &vattr, cred);
2233 if (error) {
2234 goto out;
2235 }
2236 newoff = SCARG(uap, offset) + vattr.va_size;
2237 break;
2238 case SEEK_SET:
2239 newoff = SCARG(uap, offset);
2240 break;
2241 default:
2242 error = EINVAL;
2243 goto out;
2244 }
2245 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2246 *(off_t *)retval = fp->f_offset = newoff;
2247 }
2248 out:
2249 fd_putfile(fd);
2250 return (error);
2251 }
2252
2253 /*
2254 * Positional read system call.
2255 */
2256 int
2257 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2258 {
2259 /* {
2260 syscallarg(int) fd;
2261 syscallarg(void *) buf;
2262 syscallarg(size_t) nbyte;
2263 syscallarg(off_t) offset;
2264 } */
2265 file_t *fp;
2266 struct vnode *vp;
2267 off_t offset;
2268 int error, fd = SCARG(uap, fd);
2269
2270 if ((fp = fd_getfile(fd)) == NULL)
2271 return (EBADF);
2272
2273 if ((fp->f_flag & FREAD) == 0) {
2274 fd_putfile(fd);
2275 return (EBADF);
2276 }
2277
2278 vp = fp->f_data;
2279 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2280 error = ESPIPE;
2281 goto out;
2282 }
2283
2284 offset = SCARG(uap, offset);
2285
2286 /*
2287 * XXX This works because no file systems actually
2288 * XXX take any action on the seek operation.
2289 */
2290 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2291 goto out;
2292
2293 /* dofileread() will unuse the descriptor for us */
2294 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2295 &offset, 0, retval));
2296
2297 out:
2298 fd_putfile(fd);
2299 return (error);
2300 }
2301
2302 /*
2303 * Positional scatter read system call.
2304 */
2305 int
2306 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2307 {
2308 /* {
2309 syscallarg(int) fd;
2310 syscallarg(const struct iovec *) iovp;
2311 syscallarg(int) iovcnt;
2312 syscallarg(off_t) offset;
2313 } */
2314 off_t offset = SCARG(uap, offset);
2315
2316 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2317 SCARG(uap, iovcnt), &offset, 0, retval);
2318 }
2319
2320 /*
2321 * Positional write system call.
2322 */
2323 int
2324 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2325 {
2326 /* {
2327 syscallarg(int) fd;
2328 syscallarg(const void *) buf;
2329 syscallarg(size_t) nbyte;
2330 syscallarg(off_t) offset;
2331 } */
2332 file_t *fp;
2333 struct vnode *vp;
2334 off_t offset;
2335 int error, fd = SCARG(uap, fd);
2336
2337 if ((fp = fd_getfile(fd)) == NULL)
2338 return (EBADF);
2339
2340 if ((fp->f_flag & FWRITE) == 0) {
2341 fd_putfile(fd);
2342 return (EBADF);
2343 }
2344
2345 vp = fp->f_data;
2346 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2347 error = ESPIPE;
2348 goto out;
2349 }
2350
2351 offset = SCARG(uap, offset);
2352
2353 /*
2354 * XXX This works because no file systems actually
2355 * XXX take any action on the seek operation.
2356 */
2357 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2358 goto out;
2359
2360 /* dofilewrite() will unuse the descriptor for us */
2361 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2362 &offset, 0, retval));
2363
2364 out:
2365 fd_putfile(fd);
2366 return (error);
2367 }
2368
2369 /*
2370 * Positional gather write system call.
2371 */
2372 int
2373 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2374 {
2375 /* {
2376 syscallarg(int) fd;
2377 syscallarg(const struct iovec *) iovp;
2378 syscallarg(int) iovcnt;
2379 syscallarg(off_t) offset;
2380 } */
2381 off_t offset = SCARG(uap, offset);
2382
2383 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2384 SCARG(uap, iovcnt), &offset, 0, retval);
2385 }
2386
2387 /*
2388 * Check access permissions.
2389 */
2390 int
2391 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2392 {
2393 /* {
2394 syscallarg(const char *) path;
2395 syscallarg(int) flags;
2396 } */
2397 kauth_cred_t cred;
2398 struct vnode *vp;
2399 int error, flags;
2400 struct nameidata nd;
2401
2402 cred = kauth_cred_dup(l->l_cred);
2403 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2404 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2405 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2406 SCARG(uap, path));
2407 /* Override default credentials */
2408 nd.ni_cnd.cn_cred = cred;
2409 if ((error = namei(&nd)) != 0)
2410 goto out;
2411 vp = nd.ni_vp;
2412
2413 /* Flags == 0 means only check for existence. */
2414 if (SCARG(uap, flags)) {
2415 flags = 0;
2416 if (SCARG(uap, flags) & R_OK)
2417 flags |= VREAD;
2418 if (SCARG(uap, flags) & W_OK)
2419 flags |= VWRITE;
2420 if (SCARG(uap, flags) & X_OK)
2421 flags |= VEXEC;
2422
2423 error = VOP_ACCESS(vp, flags, cred);
2424 if (!error && (flags & VWRITE))
2425 error = vn_writechk(vp);
2426 }
2427 vput(vp);
2428 out:
2429 kauth_cred_free(cred);
2430 return (error);
2431 }
2432
2433 /*
2434 * Common code for all sys_stat functions, including compat versions.
2435 */
2436 int
2437 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb)
2438 {
2439 int error;
2440 struct nameidata nd;
2441
2442 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT,
2443 UIO_USERSPACE, path);
2444 error = namei(&nd);
2445 if (error != 0)
2446 return error;
2447 error = vn_stat(nd.ni_vp, sb);
2448 vput(nd.ni_vp);
2449 return error;
2450 }
2451
2452 /*
2453 * Get file status; this version follows links.
2454 */
2455 /* ARGSUSED */
2456 int
2457 sys___stat30(struct lwp *l, const struct sys___stat30_args *uap, register_t *retval)
2458 {
2459 /* {
2460 syscallarg(const char *) path;
2461 syscallarg(struct stat *) ub;
2462 } */
2463 struct stat sb;
2464 int error;
2465
2466 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb);
2467 if (error)
2468 return error;
2469 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2470 }
2471
2472 /*
2473 * Get file status; this version does not follow links.
2474 */
2475 /* ARGSUSED */
2476 int
2477 sys___lstat30(struct lwp *l, const struct sys___lstat30_args *uap, register_t *retval)
2478 {
2479 /* {
2480 syscallarg(const char *) path;
2481 syscallarg(struct stat *) ub;
2482 } */
2483 struct stat sb;
2484 int error;
2485
2486 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb);
2487 if (error)
2488 return error;
2489 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2490 }
2491
2492 /*
2493 * Get configurable pathname variables.
2494 */
2495 /* ARGSUSED */
2496 int
2497 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
2498 {
2499 /* {
2500 syscallarg(const char *) path;
2501 syscallarg(int) name;
2502 } */
2503 int error;
2504 struct nameidata nd;
2505
2506 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2507 SCARG(uap, path));
2508 if ((error = namei(&nd)) != 0)
2509 return (error);
2510 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2511 vput(nd.ni_vp);
2512 return (error);
2513 }
2514
2515 /*
2516 * Return target name of a symbolic link.
2517 */
2518 /* ARGSUSED */
2519 int
2520 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval)
2521 {
2522 /* {
2523 syscallarg(const char *) path;
2524 syscallarg(char *) buf;
2525 syscallarg(size_t) count;
2526 } */
2527 struct vnode *vp;
2528 struct iovec aiov;
2529 struct uio auio;
2530 int error;
2531 struct nameidata nd;
2532
2533 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2534 SCARG(uap, path));
2535 if ((error = namei(&nd)) != 0)
2536 return (error);
2537 vp = nd.ni_vp;
2538 if (vp->v_type != VLNK)
2539 error = EINVAL;
2540 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2541 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
2542 aiov.iov_base = SCARG(uap, buf);
2543 aiov.iov_len = SCARG(uap, count);
2544 auio.uio_iov = &aiov;
2545 auio.uio_iovcnt = 1;
2546 auio.uio_offset = 0;
2547 auio.uio_rw = UIO_READ;
2548 KASSERT(l == curlwp);
2549 auio.uio_vmspace = l->l_proc->p_vmspace;
2550 auio.uio_resid = SCARG(uap, count);
2551 error = VOP_READLINK(vp, &auio, l->l_cred);
2552 }
2553 vput(vp);
2554 *retval = SCARG(uap, count) - auio.uio_resid;
2555 return (error);
2556 }
2557
2558 /*
2559 * Change flags of a file given a path name.
2560 */
2561 /* ARGSUSED */
2562 int
2563 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
2564 {
2565 /* {
2566 syscallarg(const char *) path;
2567 syscallarg(u_long) flags;
2568 } */
2569 struct vnode *vp;
2570 int error;
2571 struct nameidata nd;
2572
2573 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2574 SCARG(uap, path));
2575 if ((error = namei(&nd)) != 0)
2576 return (error);
2577 vp = nd.ni_vp;
2578 error = change_flags(vp, SCARG(uap, flags), l);
2579 vput(vp);
2580 return (error);
2581 }
2582
2583 /*
2584 * Change flags of a file given a file descriptor.
2585 */
2586 /* ARGSUSED */
2587 int
2588 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
2589 {
2590 /* {
2591 syscallarg(int) fd;
2592 syscallarg(u_long) flags;
2593 } */
2594 struct vnode *vp;
2595 file_t *fp;
2596 int error;
2597
2598 /* fd_getvnode() will use the descriptor for us */
2599 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2600 return (error);
2601 vp = fp->f_data;
2602 error = change_flags(vp, SCARG(uap, flags), l);
2603 VOP_UNLOCK(vp, 0);
2604 fd_putfile(SCARG(uap, fd));
2605 return (error);
2606 }
2607
2608 /*
2609 * Change flags of a file given a path name; this version does
2610 * not follow links.
2611 */
2612 int
2613 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
2614 {
2615 /* {
2616 syscallarg(const char *) path;
2617 syscallarg(u_long) flags;
2618 } */
2619 struct vnode *vp;
2620 int error;
2621 struct nameidata nd;
2622
2623 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2624 SCARG(uap, path));
2625 if ((error = namei(&nd)) != 0)
2626 return (error);
2627 vp = nd.ni_vp;
2628 error = change_flags(vp, SCARG(uap, flags), l);
2629 vput(vp);
2630 return (error);
2631 }
2632
2633 /*
2634 * Common routine to change flags of a file.
2635 */
2636 int
2637 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2638 {
2639 struct vattr vattr;
2640 int error;
2641
2642 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2643 /*
2644 * Non-superusers cannot change the flags on devices, even if they
2645 * own them.
2646 */
2647 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2648 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2649 goto out;
2650 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2651 error = EINVAL;
2652 goto out;
2653 }
2654 }
2655 VATTR_NULL(&vattr);
2656 vattr.va_flags = flags;
2657 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2658 out:
2659 return (error);
2660 }
2661
2662 /*
2663 * Change mode of a file given path name; this version follows links.
2664 */
2665 /* ARGSUSED */
2666 int
2667 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
2668 {
2669 /* {
2670 syscallarg(const char *) path;
2671 syscallarg(int) mode;
2672 } */
2673 int error;
2674 struct nameidata nd;
2675
2676 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2677 SCARG(uap, path));
2678 if ((error = namei(&nd)) != 0)
2679 return (error);
2680
2681 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2682
2683 vrele(nd.ni_vp);
2684 return (error);
2685 }
2686
2687 /*
2688 * Change mode of a file given a file descriptor.
2689 */
2690 /* ARGSUSED */
2691 int
2692 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
2693 {
2694 /* {
2695 syscallarg(int) fd;
2696 syscallarg(int) mode;
2697 } */
2698 file_t *fp;
2699 int error;
2700
2701 /* fd_getvnode() will use the descriptor for us */
2702 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2703 return (error);
2704 error = change_mode(fp->f_data, SCARG(uap, mode), l);
2705 fd_putfile(SCARG(uap, fd));
2706 return (error);
2707 }
2708
2709 /*
2710 * Change mode of a file given path name; this version does not follow links.
2711 */
2712 /* ARGSUSED */
2713 int
2714 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
2715 {
2716 /* {
2717 syscallarg(const char *) path;
2718 syscallarg(int) mode;
2719 } */
2720 int error;
2721 struct nameidata nd;
2722
2723 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2724 SCARG(uap, path));
2725 if ((error = namei(&nd)) != 0)
2726 return (error);
2727
2728 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2729
2730 vrele(nd.ni_vp);
2731 return (error);
2732 }
2733
2734 /*
2735 * Common routine to set mode given a vnode.
2736 */
2737 static int
2738 change_mode(struct vnode *vp, int mode, struct lwp *l)
2739 {
2740 struct vattr vattr;
2741 int error;
2742
2743 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2744 VATTR_NULL(&vattr);
2745 vattr.va_mode = mode & ALLPERMS;
2746 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2747 VOP_UNLOCK(vp, 0);
2748 return (error);
2749 }
2750
2751 /*
2752 * Set ownership given a path name; this version follows links.
2753 */
2754 /* ARGSUSED */
2755 int
2756 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
2757 {
2758 /* {
2759 syscallarg(const char *) path;
2760 syscallarg(uid_t) uid;
2761 syscallarg(gid_t) gid;
2762 } */
2763 int error;
2764 struct nameidata nd;
2765
2766 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2767 SCARG(uap, path));
2768 if ((error = namei(&nd)) != 0)
2769 return (error);
2770
2771 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2772
2773 vrele(nd.ni_vp);
2774 return (error);
2775 }
2776
2777 /*
2778 * Set ownership given a path name; this version follows links.
2779 * Provides POSIX semantics.
2780 */
2781 /* ARGSUSED */
2782 int
2783 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
2784 {
2785 /* {
2786 syscallarg(const char *) path;
2787 syscallarg(uid_t) uid;
2788 syscallarg(gid_t) gid;
2789 } */
2790 int error;
2791 struct nameidata nd;
2792
2793 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2794 SCARG(uap, path));
2795 if ((error = namei(&nd)) != 0)
2796 return (error);
2797
2798 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2799
2800 vrele(nd.ni_vp);
2801 return (error);
2802 }
2803
2804 /*
2805 * Set ownership given a file descriptor.
2806 */
2807 /* ARGSUSED */
2808 int
2809 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
2810 {
2811 /* {
2812 syscallarg(int) fd;
2813 syscallarg(uid_t) uid;
2814 syscallarg(gid_t) gid;
2815 } */
2816 int error;
2817 file_t *fp;
2818
2819 /* fd_getvnode() will use the descriptor for us */
2820 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2821 return (error);
2822 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2823 l, 0);
2824 fd_putfile(SCARG(uap, fd));
2825 return (error);
2826 }
2827
2828 /*
2829 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2830 */
2831 /* ARGSUSED */
2832 int
2833 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
2834 {
2835 /* {
2836 syscallarg(int) fd;
2837 syscallarg(uid_t) uid;
2838 syscallarg(gid_t) gid;
2839 } */
2840 int error;
2841 file_t *fp;
2842
2843 /* fd_getvnode() will use the descriptor for us */
2844 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2845 return (error);
2846 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2847 l, 1);
2848 fd_putfile(SCARG(uap, fd));
2849 return (error);
2850 }
2851
2852 /*
2853 * Set ownership given a path name; this version does not follow links.
2854 */
2855 /* ARGSUSED */
2856 int
2857 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
2858 {
2859 /* {
2860 syscallarg(const char *) path;
2861 syscallarg(uid_t) uid;
2862 syscallarg(gid_t) gid;
2863 } */
2864 int error;
2865 struct nameidata nd;
2866
2867 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2868 SCARG(uap, path));
2869 if ((error = namei(&nd)) != 0)
2870 return (error);
2871
2872 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2873
2874 vrele(nd.ni_vp);
2875 return (error);
2876 }
2877
2878 /*
2879 * Set ownership given a path name; this version does not follow links.
2880 * Provides POSIX/XPG semantics.
2881 */
2882 /* ARGSUSED */
2883 int
2884 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
2885 {
2886 /* {
2887 syscallarg(const char *) path;
2888 syscallarg(uid_t) uid;
2889 syscallarg(gid_t) gid;
2890 } */
2891 int error;
2892 struct nameidata nd;
2893
2894 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2895 SCARG(uap, path));
2896 if ((error = namei(&nd)) != 0)
2897 return (error);
2898
2899 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2900
2901 vrele(nd.ni_vp);
2902 return (error);
2903 }
2904
2905 /*
2906 * Common routine to set ownership given a vnode.
2907 */
2908 static int
2909 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2910 int posix_semantics)
2911 {
2912 struct vattr vattr;
2913 mode_t newmode;
2914 int error;
2915
2916 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2917 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2918 goto out;
2919
2920 #define CHANGED(x) ((int)(x) != -1)
2921 newmode = vattr.va_mode;
2922 if (posix_semantics) {
2923 /*
2924 * POSIX/XPG semantics: if the caller is not the super-user,
2925 * clear set-user-id and set-group-id bits. Both POSIX and
2926 * the XPG consider the behaviour for calls by the super-user
2927 * implementation-defined; we leave the set-user-id and set-
2928 * group-id settings intact in that case.
2929 */
2930 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2931 NULL) != 0)
2932 newmode &= ~(S_ISUID | S_ISGID);
2933 } else {
2934 /*
2935 * NetBSD semantics: when changing owner and/or group,
2936 * clear the respective bit(s).
2937 */
2938 if (CHANGED(uid))
2939 newmode &= ~S_ISUID;
2940 if (CHANGED(gid))
2941 newmode &= ~S_ISGID;
2942 }
2943 /* Update va_mode iff altered. */
2944 if (vattr.va_mode == newmode)
2945 newmode = VNOVAL;
2946
2947 VATTR_NULL(&vattr);
2948 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2949 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2950 vattr.va_mode = newmode;
2951 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2952 #undef CHANGED
2953
2954 out:
2955 VOP_UNLOCK(vp, 0);
2956 return (error);
2957 }
2958
2959 /*
2960 * Set the access and modification times given a path name; this
2961 * version follows links.
2962 */
2963 /* ARGSUSED */
2964 int
2965 sys_utimes(struct lwp *l, const struct sys_utimes_args *uap, register_t *retval)
2966 {
2967 /* {
2968 syscallarg(const char *) path;
2969 syscallarg(const struct timeval *) tptr;
2970 } */
2971
2972 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
2973 SCARG(uap, tptr), UIO_USERSPACE);
2974 }
2975
2976 /*
2977 * Set the access and modification times given a file descriptor.
2978 */
2979 /* ARGSUSED */
2980 int
2981 sys_futimes(struct lwp *l, const struct sys_futimes_args *uap, register_t *retval)
2982 {
2983 /* {
2984 syscallarg(int) fd;
2985 syscallarg(const struct timeval *) tptr;
2986 } */
2987 int error;
2988 file_t *fp;
2989
2990 /* fd_getvnode() will use the descriptor for us */
2991 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2992 return (error);
2993 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr),
2994 UIO_USERSPACE);
2995 fd_putfile(SCARG(uap, fd));
2996 return (error);
2997 }
2998
2999 /*
3000 * Set the access and modification times given a path name; this
3001 * version does not follow links.
3002 */
3003 int
3004 sys_lutimes(struct lwp *l, const struct sys_lutimes_args *uap, register_t *retval)
3005 {
3006 /* {
3007 syscallarg(const char *) path;
3008 syscallarg(const struct timeval *) tptr;
3009 } */
3010
3011 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
3012 SCARG(uap, tptr), UIO_USERSPACE);
3013 }
3014
3015 /*
3016 * Common routine to set access and modification times given a vnode.
3017 */
3018 int
3019 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
3020 const struct timeval *tptr, enum uio_seg seg)
3021 {
3022 struct vattr vattr;
3023 struct nameidata nd;
3024 int error;
3025 bool vanull, setbirthtime;
3026 struct timespec ts[2];
3027
3028 if (tptr == NULL) {
3029 vanull = true;
3030 nanotime(&ts[0]);
3031 ts[1] = ts[0];
3032 } else {
3033 struct timeval tv[2];
3034
3035 vanull = false;
3036 if (seg != UIO_SYSSPACE) {
3037 error = copyin(tptr, &tv, sizeof (tv));
3038 if (error != 0)
3039 return error;
3040 tptr = tv;
3041 }
3042 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3043 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3044 }
3045
3046 if (vp == NULL) {
3047 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path);
3048 if ((error = namei(&nd)) != 0)
3049 return error;
3050 vp = nd.ni_vp;
3051 } else
3052 nd.ni_vp = NULL;
3053
3054 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3055 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3056 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3057 VATTR_NULL(&vattr);
3058 vattr.va_atime = ts[0];
3059 vattr.va_mtime = ts[1];
3060 if (setbirthtime)
3061 vattr.va_birthtime = ts[1];
3062 if (vanull)
3063 vattr.va_flags |= VA_UTIMES_NULL;
3064 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3065 VOP_UNLOCK(vp, 0);
3066
3067 if (nd.ni_vp != NULL)
3068 vrele(nd.ni_vp);
3069
3070 return error;
3071 }
3072
3073 /*
3074 * Truncate a file given its path name.
3075 */
3076 /* ARGSUSED */
3077 int
3078 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3079 {
3080 /* {
3081 syscallarg(const char *) path;
3082 syscallarg(int) pad;
3083 syscallarg(off_t) length;
3084 } */
3085 struct vnode *vp;
3086 struct vattr vattr;
3087 int error;
3088 struct nameidata nd;
3089
3090 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3091 SCARG(uap, path));
3092 if ((error = namei(&nd)) != 0)
3093 return (error);
3094 vp = nd.ni_vp;
3095 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3096 if (vp->v_type == VDIR)
3097 error = EISDIR;
3098 else if ((error = vn_writechk(vp)) == 0 &&
3099 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3100 VATTR_NULL(&vattr);
3101 vattr.va_size = SCARG(uap, length);
3102 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3103 }
3104 vput(vp);
3105 return (error);
3106 }
3107
3108 /*
3109 * Truncate a file given a file descriptor.
3110 */
3111 /* ARGSUSED */
3112 int
3113 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3114 {
3115 /* {
3116 syscallarg(int) fd;
3117 syscallarg(int) pad;
3118 syscallarg(off_t) length;
3119 } */
3120 struct vattr vattr;
3121 struct vnode *vp;
3122 file_t *fp;
3123 int error;
3124
3125 /* fd_getvnode() will use the descriptor for us */
3126 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3127 return (error);
3128 if ((fp->f_flag & FWRITE) == 0) {
3129 error = EINVAL;
3130 goto out;
3131 }
3132 vp = fp->f_data;
3133 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3134 if (vp->v_type == VDIR)
3135 error = EISDIR;
3136 else if ((error = vn_writechk(vp)) == 0) {
3137 VATTR_NULL(&vattr);
3138 vattr.va_size = SCARG(uap, length);
3139 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3140 }
3141 VOP_UNLOCK(vp, 0);
3142 out:
3143 fd_putfile(SCARG(uap, fd));
3144 return (error);
3145 }
3146
3147 /*
3148 * Sync an open file.
3149 */
3150 /* ARGSUSED */
3151 int
3152 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3153 {
3154 /* {
3155 syscallarg(int) fd;
3156 } */
3157 struct vnode *vp;
3158 file_t *fp;
3159 int error;
3160
3161 /* fd_getvnode() will use the descriptor for us */
3162 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3163 return (error);
3164 vp = fp->f_data;
3165 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3166 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
3167 if (error == 0 && bioopsp != NULL &&
3168 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3169 (*bioopsp->io_fsync)(vp, 0);
3170 VOP_UNLOCK(vp, 0);
3171 fd_putfile(SCARG(uap, fd));
3172 return (error);
3173 }
3174
3175 /*
3176 * Sync a range of file data. API modeled after that found in AIX.
3177 *
3178 * FDATASYNC indicates that we need only save enough metadata to be able
3179 * to re-read the written data. Note we duplicate AIX's requirement that
3180 * the file be open for writing.
3181 */
3182 /* ARGSUSED */
3183 int
3184 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
3185 {
3186 /* {
3187 syscallarg(int) fd;
3188 syscallarg(int) flags;
3189 syscallarg(off_t) start;
3190 syscallarg(off_t) length;
3191 } */
3192 struct vnode *vp;
3193 file_t *fp;
3194 int flags, nflags;
3195 off_t s, e, len;
3196 int error;
3197
3198 /* fd_getvnode() will use the descriptor for us */
3199 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3200 return (error);
3201
3202 if ((fp->f_flag & FWRITE) == 0) {
3203 error = EBADF;
3204 goto out;
3205 }
3206
3207 flags = SCARG(uap, flags);
3208 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3209 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3210 error = EINVAL;
3211 goto out;
3212 }
3213 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3214 if (flags & FDATASYNC)
3215 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3216 else
3217 nflags = FSYNC_WAIT;
3218 if (flags & FDISKSYNC)
3219 nflags |= FSYNC_CACHE;
3220
3221 len = SCARG(uap, length);
3222 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3223 if (len) {
3224 s = SCARG(uap, start);
3225 e = s + len;
3226 if (e < s) {
3227 error = EINVAL;
3228 goto out;
3229 }
3230 } else {
3231 e = 0;
3232 s = 0;
3233 }
3234
3235 vp = fp->f_data;
3236 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3237 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
3238
3239 if (error == 0 && bioopsp != NULL &&
3240 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3241 (*bioopsp->io_fsync)(vp, nflags);
3242
3243 VOP_UNLOCK(vp, 0);
3244 out:
3245 fd_putfile(SCARG(uap, fd));
3246 return (error);
3247 }
3248
3249 /*
3250 * Sync the data of an open file.
3251 */
3252 /* ARGSUSED */
3253 int
3254 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
3255 {
3256 /* {
3257 syscallarg(int) fd;
3258 } */
3259 struct vnode *vp;
3260 file_t *fp;
3261 int error;
3262
3263 /* fd_getvnode() will use the descriptor for us */
3264 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3265 return (error);
3266 if ((fp->f_flag & FWRITE) == 0) {
3267 fd_putfile(SCARG(uap, fd));
3268 return (EBADF);
3269 }
3270 vp = fp->f_data;
3271 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3272 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
3273 VOP_UNLOCK(vp, 0);
3274 fd_putfile(SCARG(uap, fd));
3275 return (error);
3276 }
3277
3278 /*
3279 * Rename files, (standard) BSD semantics frontend.
3280 */
3281 /* ARGSUSED */
3282 int
3283 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
3284 {
3285 /* {
3286 syscallarg(const char *) from;
3287 syscallarg(const char *) to;
3288 } */
3289
3290 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0));
3291 }
3292
3293 /*
3294 * Rename files, POSIX semantics frontend.
3295 */
3296 /* ARGSUSED */
3297 int
3298 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
3299 {
3300 /* {
3301 syscallarg(const char *) from;
3302 syscallarg(const char *) to;
3303 } */
3304
3305 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1));
3306 }
3307
3308 /*
3309 * Rename files. Source and destination must either both be directories,
3310 * or both not be directories. If target is a directory, it must be empty.
3311 * If `from' and `to' refer to the same object, the value of the `retain'
3312 * argument is used to determine whether `from' will be
3313 *
3314 * (retain == 0) deleted unless `from' and `to' refer to the same
3315 * object in the file system's name space (BSD).
3316 * (retain == 1) always retained (POSIX).
3317 */
3318 int
3319 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
3320 {
3321 struct vnode *tvp, *fvp, *tdvp;
3322 struct nameidata fromnd, tond;
3323 struct mount *fs;
3324 struct lwp *l = curlwp;
3325 struct proc *p;
3326 uint32_t saveflag;
3327 int error;
3328
3329 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT,
3330 seg, from);
3331 if ((error = namei(&fromnd)) != 0)
3332 return (error);
3333 if (fromnd.ni_dvp != fromnd.ni_vp)
3334 VOP_UNLOCK(fromnd.ni_dvp, 0);
3335 fvp = fromnd.ni_vp;
3336
3337 fs = fvp->v_mount;
3338 error = VFS_RENAMELOCK_ENTER(fs);
3339 if (error) {
3340 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3341 vrele(fromnd.ni_dvp);
3342 vrele(fvp);
3343 goto out1;
3344 }
3345
3346 /*
3347 * close, partially, yet another race - ideally we should only
3348 * go as far as getting fromnd.ni_dvp before getting the per-fs
3349 * lock, and then continue to get fromnd.ni_vp, but we can't do
3350 * that with namei as it stands.
3351 *
3352 * This still won't prevent rmdir from nuking fromnd.ni_vp
3353 * under us. The real fix is to get the locks in the right
3354 * order and do the lookups in the right places, but that's a
3355 * major rototill.
3356 *
3357 * Preserve the SAVESTART in cn_flags, because who knows what
3358 * might happen if we don't.
3359 *
3360 * Note: this logic (as well as this whole function) is cloned
3361 * in nfs_serv.c. Proceed accordingly.
3362 */
3363 vrele(fvp);
3364 if ((fromnd.ni_cnd.cn_namelen == 1 &&
3365 fromnd.ni_cnd.cn_nameptr[0] == '.') ||
3366 (fromnd.ni_cnd.cn_namelen == 2 &&
3367 fromnd.ni_cnd.cn_nameptr[0] == '.' &&
3368 fromnd.ni_cnd.cn_nameptr[1] == '.')) {
3369 error = EINVAL;
3370 VFS_RENAMELOCK_EXIT(fs);
3371 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3372 vrele(fromnd.ni_dvp);
3373 goto out1;
3374 }
3375 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART;
3376 fromnd.ni_cnd.cn_flags &= ~SAVESTART;
3377 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY);
3378 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd);
3379 fromnd.ni_cnd.cn_flags |= saveflag;
3380 if (error) {
3381 VOP_UNLOCK(fromnd.ni_dvp, 0);
3382 VFS_RENAMELOCK_EXIT(fs);
3383 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3384 vrele(fromnd.ni_dvp);
3385 goto out1;
3386 }
3387 VOP_UNLOCK(fromnd.ni_vp, 0);
3388 if (fromnd.ni_dvp != fromnd.ni_vp)
3389 VOP_UNLOCK(fromnd.ni_dvp, 0);
3390 fvp = fromnd.ni_vp;
3391
3392 NDINIT(&tond, RENAME,
3393 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT
3394 | (fvp->v_type == VDIR ? CREATEDIR : 0),
3395 seg, to);
3396 if ((error = namei(&tond)) != 0) {
3397 VFS_RENAMELOCK_EXIT(fs);
3398 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3399 vrele(fromnd.ni_dvp);
3400 vrele(fvp);
3401 goto out1;
3402 }
3403 tdvp = tond.ni_dvp;
3404 tvp = tond.ni_vp;
3405
3406 if (tvp != NULL) {
3407 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3408 error = ENOTDIR;
3409 goto out;
3410 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3411 error = EISDIR;
3412 goto out;
3413 }
3414 }
3415
3416 if (fvp == tdvp)
3417 error = EINVAL;
3418
3419 /*
3420 * Source and destination refer to the same object.
3421 */
3422 if (fvp == tvp) {
3423 if (retain)
3424 error = -1;
3425 else if (fromnd.ni_dvp == tdvp &&
3426 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3427 !memcmp(fromnd.ni_cnd.cn_nameptr,
3428 tond.ni_cnd.cn_nameptr,
3429 fromnd.ni_cnd.cn_namelen))
3430 error = -1;
3431 }
3432
3433 #if NVERIEXEC > 0
3434 if (!error) {
3435 char *f1, *f2;
3436
3437 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3438 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen);
3439
3440 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3441 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen);
3442
3443 error = veriexec_renamechk(l, fvp, f1, tvp, f2);
3444
3445 free(f1, M_TEMP);
3446 free(f2, M_TEMP);
3447 }
3448 #endif /* NVERIEXEC > 0 */
3449
3450 out:
3451 p = l->l_proc;
3452 if (!error) {
3453 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3454 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3455 VFS_RENAMELOCK_EXIT(fs);
3456 } else {
3457 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3458 if (tdvp == tvp)
3459 vrele(tdvp);
3460 else
3461 vput(tdvp);
3462 if (tvp)
3463 vput(tvp);
3464 VFS_RENAMELOCK_EXIT(fs);
3465 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3466 vrele(fromnd.ni_dvp);
3467 vrele(fvp);
3468 }
3469 vrele(tond.ni_startdir);
3470 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3471 out1:
3472 if (fromnd.ni_startdir)
3473 vrele(fromnd.ni_startdir);
3474 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3475 return (error == -1 ? 0 : error);
3476 }
3477
3478 /*
3479 * Make a directory file.
3480 */
3481 /* ARGSUSED */
3482 int
3483 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
3484 {
3485 /* {
3486 syscallarg(const char *) path;
3487 syscallarg(int) mode;
3488 } */
3489 struct proc *p = l->l_proc;
3490 struct vnode *vp;
3491 struct vattr vattr;
3492 int error;
3493 struct nameidata nd;
3494
3495 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE,
3496 SCARG(uap, path));
3497 if ((error = namei(&nd)) != 0)
3498 return (error);
3499 vp = nd.ni_vp;
3500 if (vp != NULL) {
3501 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3502 if (nd.ni_dvp == vp)
3503 vrele(nd.ni_dvp);
3504 else
3505 vput(nd.ni_dvp);
3506 vrele(vp);
3507 return (EEXIST);
3508 }
3509 VATTR_NULL(&vattr);
3510 vattr.va_type = VDIR;
3511 /* We will read cwdi->cwdi_cmask unlocked. */
3512 vattr.va_mode =
3513 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3514 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3515 if (!error)
3516 vput(nd.ni_vp);
3517 return (error);
3518 }
3519
3520 /*
3521 * Remove a directory file.
3522 */
3523 /* ARGSUSED */
3524 int
3525 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
3526 {
3527 /* {
3528 syscallarg(const char *) path;
3529 } */
3530 struct vnode *vp;
3531 int error;
3532 struct nameidata nd;
3533
3534 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
3535 SCARG(uap, path));
3536 if ((error = namei(&nd)) != 0)
3537 return (error);
3538 vp = nd.ni_vp;
3539 if (vp->v_type != VDIR) {
3540 error = ENOTDIR;
3541 goto out;
3542 }
3543 /*
3544 * No rmdir "." please.
3545 */
3546 if (nd.ni_dvp == vp) {
3547 error = EINVAL;
3548 goto out;
3549 }
3550 /*
3551 * The root of a mounted filesystem cannot be deleted.
3552 */
3553 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) {
3554 error = EBUSY;
3555 goto out;
3556 }
3557 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3558 return (error);
3559
3560 out:
3561 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3562 if (nd.ni_dvp == vp)
3563 vrele(nd.ni_dvp);
3564 else
3565 vput(nd.ni_dvp);
3566 vput(vp);
3567 return (error);
3568 }
3569
3570 /*
3571 * Read a block of directory entries in a file system independent format.
3572 */
3573 int
3574 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
3575 {
3576 /* {
3577 syscallarg(int) fd;
3578 syscallarg(char *) buf;
3579 syscallarg(size_t) count;
3580 } */
3581 file_t *fp;
3582 int error, done;
3583
3584 /* fd_getvnode() will use the descriptor for us */
3585 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3586 return (error);
3587 if ((fp->f_flag & FREAD) == 0) {
3588 error = EBADF;
3589 goto out;
3590 }
3591 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3592 SCARG(uap, count), &done, l, 0, 0);
3593 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
3594 *retval = done;
3595 out:
3596 fd_putfile(SCARG(uap, fd));
3597 return (error);
3598 }
3599
3600 /*
3601 * Set the mode mask for creation of filesystem nodes.
3602 */
3603 int
3604 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
3605 {
3606 /* {
3607 syscallarg(mode_t) newmask;
3608 } */
3609 struct proc *p = l->l_proc;
3610 struct cwdinfo *cwdi;
3611
3612 /*
3613 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
3614 * important is that we serialize changes to the mask. The
3615 * rw_exit() will issue a write memory barrier on our behalf,
3616 * and force the changes out to other CPUs (as it must use an
3617 * atomic operation, draining the local CPU's store buffers).
3618 */
3619 cwdi = p->p_cwdi;
3620 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
3621 *retval = cwdi->cwdi_cmask;
3622 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3623 rw_exit(&cwdi->cwdi_lock);
3624
3625 return (0);
3626 }
3627
3628 int
3629 dorevoke(struct vnode *vp, kauth_cred_t cred)
3630 {
3631 struct vattr vattr;
3632 int error;
3633
3634 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0)
3635 return error;
3636 if (kauth_cred_geteuid(cred) != vattr.va_uid &&
3637 (error = kauth_authorize_generic(cred,
3638 KAUTH_GENERIC_ISSUSER, NULL)) == 0)
3639 VOP_REVOKE(vp, REVOKEALL);
3640 return (error);
3641 }
3642
3643 /*
3644 * Void all references to file by ripping underlying filesystem
3645 * away from vnode.
3646 */
3647 /* ARGSUSED */
3648 int
3649 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
3650 {
3651 /* {
3652 syscallarg(const char *) path;
3653 } */
3654 struct vnode *vp;
3655 int error;
3656 struct nameidata nd;
3657
3658 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3659 SCARG(uap, path));
3660 if ((error = namei(&nd)) != 0)
3661 return (error);
3662 vp = nd.ni_vp;
3663 error = dorevoke(vp, l->l_cred);
3664 vrele(vp);
3665 return (error);
3666 }
3667