vfs_syscalls.c revision 1.358 1 /* $NetBSD: vfs_syscalls.c,v 1.358 2008/05/06 18:43:44 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
63 */
64
65 #include <sys/cdefs.h>
66 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.358 2008/05/06 18:43:44 ad Exp $");
67
68 #include "opt_compat_netbsd.h"
69 #include "opt_compat_43.h"
70 #include "opt_fileassoc.h"
71 #include "fss.h"
72 #include "veriexec.h"
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/namei.h>
77 #include <sys/filedesc.h>
78 #include <sys/kernel.h>
79 #include <sys/file.h>
80 #include <sys/stat.h>
81 #include <sys/vnode.h>
82 #include <sys/mount.h>
83 #include <sys/proc.h>
84 #include <sys/uio.h>
85 #include <sys/malloc.h>
86 #include <sys/kmem.h>
87 #include <sys/dirent.h>
88 #include <sys/sysctl.h>
89 #include <sys/syscallargs.h>
90 #include <sys/vfs_syscalls.h>
91 #include <sys/ktrace.h>
92 #ifdef FILEASSOC
93 #include <sys/fileassoc.h>
94 #endif /* FILEASSOC */
95 #include <sys/verified_exec.h>
96 #include <sys/kauth.h>
97 #include <sys/atomic.h>
98
99 #include <miscfs/genfs/genfs.h>
100 #include <miscfs/syncfs/syncfs.h>
101 #include <miscfs/specfs/specdev.h>
102
103 #ifdef COMPAT_30
104 #include "opt_nfsserver.h"
105 #include <nfs/rpcv2.h>
106 #endif
107 #include <nfs/nfsproto.h>
108 #ifdef COMPAT_30
109 #include <nfs/nfs.h>
110 #include <nfs/nfs_var.h>
111 #endif
112
113 #if NFSS > 0
114 #include <dev/fssvar.h>
115 #endif
116
117 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
118
119 static int change_dir(struct nameidata *, struct lwp *);
120 static int change_flags(struct vnode *, u_long, struct lwp *);
121 static int change_mode(struct vnode *, int, struct lwp *l);
122 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
123
124 void checkdirs(struct vnode *);
125
126 int dovfsusermount = 0;
127
128 /*
129 * Virtual File System System Calls
130 */
131
132 /*
133 * Mount a file system.
134 */
135
136 #if defined(COMPAT_09) || defined(COMPAT_43)
137 /*
138 * This table is used to maintain compatibility with 4.3BSD
139 * and NetBSD 0.9 mount syscalls. Note, the order is important!
140 *
141 * Do not modify this table. It should only contain filesystems
142 * supported by NetBSD 0.9 and 4.3BSD.
143 */
144 const char * const mountcompatnames[] = {
145 NULL, /* 0 = MOUNT_NONE */
146 MOUNT_FFS, /* 1 = MOUNT_UFS */
147 MOUNT_NFS, /* 2 */
148 MOUNT_MFS, /* 3 */
149 MOUNT_MSDOS, /* 4 */
150 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
151 MOUNT_FDESC, /* 6 */
152 MOUNT_KERNFS, /* 7 */
153 NULL, /* 8 = MOUNT_DEVFS */
154 MOUNT_AFS, /* 9 */
155 };
156 const int nmountcompatnames = sizeof(mountcompatnames) /
157 sizeof(mountcompatnames[0]);
158 #endif /* COMPAT_09 || COMPAT_43 */
159
160 static int
161 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
162 void *data, size_t *data_len)
163 {
164 struct mount *mp;
165 int error = 0, saved_flags;
166
167 mp = vp->v_mount;
168 saved_flags = mp->mnt_flag;
169
170 /* We can operate only on VV_ROOT nodes. */
171 if ((vp->v_vflag & VV_ROOT) == 0) {
172 error = EINVAL;
173 goto out;
174 }
175
176 /*
177 * We only allow the filesystem to be reloaded if it
178 * is currently mounted read-only.
179 */
180 if (flags & MNT_RELOAD && !(mp->mnt_flag & MNT_RDONLY)) {
181 error = EOPNOTSUPP; /* Needs translation */
182 goto out;
183 }
184
185 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
186 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
187 if (error)
188 goto out;
189
190 if (vfs_busy(mp, NULL)) {
191 error = EPERM;
192 goto out;
193 }
194
195 mutex_enter(&mp->mnt_updating);
196
197 mp->mnt_flag &= ~MNT_OP_FLAGS;
198 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
199
200 /*
201 * Set the mount level flags.
202 */
203 if (flags & MNT_RDONLY)
204 mp->mnt_flag |= MNT_RDONLY;
205 else if (mp->mnt_flag & MNT_RDONLY)
206 mp->mnt_iflag |= IMNT_WANTRDWR;
207 mp->mnt_flag &=
208 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
209 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
210 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
211 mp->mnt_flag |= flags &
212 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
213 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
214 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
215 MNT_IGNORE);
216
217 error = VFS_MOUNT(mp, path, data, data_len);
218
219 #if defined(COMPAT_30) && defined(NFSSERVER)
220 if (error && data != NULL) {
221 int error2;
222
223 /* Update failed; let's try and see if it was an
224 * export request. */
225 error2 = nfs_update_exports_30(mp, path, data, l);
226
227 /* Only update error code if the export request was
228 * understood but some problem occurred while
229 * processing it. */
230 if (error2 != EJUSTRETURN)
231 error = error2;
232 }
233 #endif
234 if (mp->mnt_iflag & IMNT_WANTRDWR)
235 mp->mnt_flag &= ~MNT_RDONLY;
236 if (error)
237 mp->mnt_flag = saved_flags;
238 mp->mnt_flag &= ~MNT_OP_FLAGS;
239 mp->mnt_iflag &= ~IMNT_WANTRDWR;
240 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
241 if (mp->mnt_syncer == NULL)
242 error = vfs_allocate_syncvnode(mp);
243 } else {
244 if (mp->mnt_syncer != NULL)
245 vfs_deallocate_syncvnode(mp);
246 }
247 mutex_exit(&mp->mnt_updating);
248 vfs_unbusy(mp, false, NULL);
249
250 out:
251 return (error);
252 }
253
254 static int
255 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
256 {
257 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
258 int error;
259
260 /* Copy file-system type from userspace. */
261 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
262 if (error) {
263 #if defined(COMPAT_09) || defined(COMPAT_43)
264 /*
265 * Historically, filesystem types were identified by numbers.
266 * If we get an integer for the filesystem type instead of a
267 * string, we check to see if it matches one of the historic
268 * filesystem types.
269 */
270 u_long fsindex = (u_long)fstype;
271 if (fsindex >= nmountcompatnames ||
272 mountcompatnames[fsindex] == NULL)
273 return ENODEV;
274 strlcpy(fstypename, mountcompatnames[fsindex],
275 sizeof(fstypename));
276 #else
277 return error;
278 #endif
279 }
280
281 #ifdef COMPAT_10
282 /* Accept `ufs' as an alias for `ffs'. */
283 if (strcmp(fstypename, "ufs") == 0)
284 fstypename[0] = 'f';
285 #endif
286
287 if ((*vfsops = vfs_getopsbyname(fstypename)) == NULL)
288 return ENODEV;
289 return 0;
290 }
291
292 static int
293 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops,
294 const char *path, int flags, void *data, size_t *data_len, u_int recurse)
295 {
296 struct mount *mp = NULL;
297 struct vnode *vp = *vpp;
298 struct vattr va;
299 int error;
300
301 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
302 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
303 if (error)
304 return error;
305
306 /* Can't make a non-dir a mount-point (from here anyway). */
307 if (vp->v_type != VDIR)
308 return ENOTDIR;
309
310 /*
311 * If the user is not root, ensure that they own the directory
312 * onto which we are attempting to mount.
313 */
314 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 ||
315 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
316 (error = kauth_authorize_generic(l->l_cred,
317 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) {
318 return error;
319 }
320
321 if (flags & MNT_EXPORTED)
322 return EINVAL;
323
324 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0)
325 return error;
326
327 /*
328 * Check if a file-system is not already mounted on this vnode.
329 */
330 if (vp->v_mountedhere != NULL)
331 return EBUSY;
332
333 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
334 if (mp == NULL)
335 return ENOMEM;
336
337 mp->mnt_op = vfsops;
338 mp->mnt_refcnt = 1;
339
340 TAILQ_INIT(&mp->mnt_vnodelist);
341 rw_init(&mp->mnt_unmounting);
342 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
343 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
344 error = vfs_busy(mp, NULL);
345 KASSERT(error == 0);
346 mutex_enter(&mp->mnt_updating);
347
348 mp->mnt_vnodecovered = vp;
349 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
350 mount_initspecific(mp);
351
352 /*
353 * The underlying file system may refuse the mount for
354 * various reasons. Allow the user to force it to happen.
355 *
356 * Set the mount level flags.
357 */
358 mp->mnt_flag = flags &
359 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
360 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
361 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
362 MNT_IGNORE | MNT_RDONLY);
363
364 error = VFS_MOUNT(mp, path, data, data_len);
365 mp->mnt_flag &= ~MNT_OP_FLAGS;
366
367 /*
368 * Put the new filesystem on the mount list after root.
369 */
370 cache_purge(vp);
371 if (error != 0) {
372 vp->v_mountedhere = NULL;
373 mutex_exit(&mp->mnt_updating);
374 vfs_unbusy(mp, false, NULL);
375 vfs_destroy(mp);
376 return error;
377 }
378
379 mp->mnt_iflag &= ~IMNT_WANTRDWR;
380 mutex_enter(&mountlist_lock);
381 vp->v_mountedhere = mp;
382 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
383 mutex_exit(&mountlist_lock);
384 vn_restorerecurse(vp, recurse);
385 VOP_UNLOCK(vp, 0);
386 checkdirs(vp);
387 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
388 error = vfs_allocate_syncvnode(mp);
389 /* Hold an additional reference to the mount across VFS_START(). */
390 mutex_exit(&mp->mnt_updating);
391 vfs_unbusy(mp, true, NULL);
392 (void) VFS_STATVFS(mp, &mp->mnt_stat);
393 error = VFS_START(mp, 0);
394 if (error) {
395 vrele(vp);
396 vfs_destroy(mp);
397 }
398 /* Drop reference held for VFS_START(). */
399 vfs_destroy(mp);
400 *vpp = NULL;
401 return error;
402 }
403
404 static int
405 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
406 void *data, size_t *data_len)
407 {
408 struct mount *mp;
409 int error;
410
411 /* If MNT_GETARGS is specified, it should be the only flag. */
412 if (flags & ~MNT_GETARGS)
413 return EINVAL;
414
415 mp = vp->v_mount;
416
417 /* XXX: probably some notion of "can see" here if we want isolation. */
418 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
419 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
420 if (error)
421 return error;
422
423 if ((vp->v_vflag & VV_ROOT) == 0)
424 return EINVAL;
425
426 if (vfs_busy(mp, NULL))
427 return EPERM;
428
429 mutex_enter(&mp->mnt_updating);
430 mp->mnt_flag &= ~MNT_OP_FLAGS;
431 mp->mnt_flag |= MNT_GETARGS;
432 error = VFS_MOUNT(mp, path, data, data_len);
433 mp->mnt_flag &= ~MNT_OP_FLAGS;
434 mutex_exit(&mp->mnt_updating);
435
436 vfs_unbusy(mp, false, NULL);
437 return (error);
438 }
439
440 #ifdef COMPAT_40
441 /* ARGSUSED */
442 int
443 compat_40_sys_mount(struct lwp *l, const struct compat_40_sys_mount_args *uap, register_t *retval)
444 {
445 /* {
446 syscallarg(const char *) type;
447 syscallarg(const char *) path;
448 syscallarg(int) flags;
449 syscallarg(void *) data;
450 } */
451 register_t dummy;
452
453 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
454 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 0, &dummy);
455 }
456 #endif
457
458 int
459 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
460 {
461 /* {
462 syscallarg(const char *) type;
463 syscallarg(const char *) path;
464 syscallarg(int) flags;
465 syscallarg(void *) data;
466 syscallarg(size_t) data_len;
467 } */
468
469 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
470 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
471 SCARG(uap, data_len), retval);
472 }
473
474 int
475 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
476 const char *path, int flags, void *data, enum uio_seg data_seg,
477 size_t data_len, register_t *retval)
478 {
479 struct vnode *vp;
480 struct nameidata nd;
481 void *data_buf = data;
482 u_int recurse;
483 int error;
484
485 /*
486 * Get vnode to be covered
487 */
488 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
489 if ((error = namei(&nd)) != 0)
490 return (error);
491 vp = nd.ni_vp;
492
493 /*
494 * A lookup in VFS_MOUNT might result in an attempt to
495 * lock this vnode again, so make the lock recursive.
496 */
497 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
498 recurse = vn_setrecurse(vp);
499
500 if (vfsops == NULL) {
501 if (flags & (MNT_GETARGS | MNT_UPDATE))
502 vfsops = vp->v_mount->mnt_op;
503 else {
504 /* 'type' is userspace */
505 error = mount_get_vfsops(type, &vfsops);
506 if (error != 0)
507 goto done;
508 }
509 }
510
511 if (data != NULL && data_seg == UIO_USERSPACE) {
512 if (data_len == 0) {
513 /* No length supplied, use default for filesystem */
514 data_len = vfsops->vfs_min_mount_data;
515 if (data_len > VFS_MAX_MOUNT_DATA) {
516 /* maybe a force loaded old LKM */
517 error = EINVAL;
518 goto done;
519 }
520 #ifdef COMPAT_30
521 /* Hopefully a longer buffer won't make copyin() fail */
522 if (flags & MNT_UPDATE
523 && data_len < sizeof (struct mnt_export_args30))
524 data_len = sizeof (struct mnt_export_args30);
525 #endif
526 }
527 data_buf = malloc(data_len, M_TEMP, M_WAITOK);
528
529 /* NFS needs the buffer even for mnt_getargs .... */
530 error = copyin(data, data_buf, data_len);
531 if (error != 0)
532 goto done;
533 }
534
535 if (flags & MNT_GETARGS) {
536 if (data_len == 0) {
537 error = EINVAL;
538 goto done;
539 }
540 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
541 if (error != 0)
542 goto done;
543 if (data_seg == UIO_USERSPACE)
544 error = copyout(data_buf, data, data_len);
545 *retval = data_len;
546 } else if (flags & MNT_UPDATE) {
547 error = mount_update(l, vp, path, flags, data_buf, &data_len);
548 } else {
549 /* Locking is handled internally in mount_domount(). */
550 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
551 &data_len, recurse);
552 }
553
554 done:
555 if (vp != NULL) {
556 vn_restorerecurse(vp, recurse);
557 vput(vp);
558 }
559 if (data_buf != data)
560 free(data_buf, M_TEMP);
561 return (error);
562 }
563
564 /*
565 * Scan all active processes to see if any of them have a current
566 * or root directory onto which the new filesystem has just been
567 * mounted. If so, replace them with the new mount point.
568 */
569 void
570 checkdirs(struct vnode *olddp)
571 {
572 struct cwdinfo *cwdi;
573 struct vnode *newdp, *rele1, *rele2;
574 struct proc *p;
575 bool retry;
576
577 if (olddp->v_usecount == 1)
578 return;
579 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
580 panic("mount: lost mount");
581
582 do {
583 retry = false;
584 mutex_enter(proc_lock);
585 PROCLIST_FOREACH(p, &allproc) {
586 if ((p->p_flag & PK_MARKER) != 0)
587 continue;
588 if ((cwdi = p->p_cwdi) == NULL)
589 continue;
590 /*
591 * Can't change to the old directory any more,
592 * so even if we see a stale value it's not a
593 * problem.
594 */
595 if (cwdi->cwdi_cdir != olddp &&
596 cwdi->cwdi_rdir != olddp)
597 continue;
598 retry = true;
599 rele1 = NULL;
600 rele2 = NULL;
601 atomic_inc_uint(&cwdi->cwdi_refcnt);
602 mutex_exit(proc_lock);
603 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
604 if (cwdi->cwdi_cdir == olddp) {
605 rele1 = cwdi->cwdi_cdir;
606 VREF(newdp);
607 cwdi->cwdi_cdir = newdp;
608 }
609 if (cwdi->cwdi_rdir == olddp) {
610 rele2 = cwdi->cwdi_rdir;
611 VREF(newdp);
612 cwdi->cwdi_rdir = newdp;
613 }
614 rw_exit(&cwdi->cwdi_lock);
615 cwdfree(cwdi);
616 if (rele1 != NULL)
617 vrele(rele1);
618 if (rele2 != NULL)
619 vrele(rele2);
620 mutex_enter(proc_lock);
621 break;
622 }
623 mutex_exit(proc_lock);
624 } while (retry);
625
626 if (rootvnode == olddp) {
627 vrele(rootvnode);
628 VREF(newdp);
629 rootvnode = newdp;
630 }
631 vput(newdp);
632 }
633
634 /*
635 * Unmount a file system.
636 *
637 * Note: unmount takes a path to the vnode mounted on as argument,
638 * not special file (as before).
639 */
640 /* ARGSUSED */
641 int
642 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
643 {
644 /* {
645 syscallarg(const char *) path;
646 syscallarg(int) flags;
647 } */
648 struct vnode *vp;
649 struct mount *mp;
650 int error;
651 struct nameidata nd;
652
653 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
654 SCARG(uap, path));
655 if ((error = namei(&nd)) != 0)
656 return (error);
657 vp = nd.ni_vp;
658 mp = vp->v_mount;
659 atomic_inc_uint(&mp->mnt_refcnt);
660 VOP_UNLOCK(vp, 0);
661
662 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
663 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
664 if (error) {
665 vrele(vp);
666 vfs_destroy(mp);
667 return (error);
668 }
669
670 /*
671 * Don't allow unmounting the root file system.
672 */
673 if (mp->mnt_flag & MNT_ROOTFS) {
674 vrele(vp);
675 vfs_destroy(mp);
676 return (EINVAL);
677 }
678
679 /*
680 * Must be the root of the filesystem
681 */
682 if ((vp->v_vflag & VV_ROOT) == 0) {
683 vrele(vp);
684 vfs_destroy(mp);
685 return (EINVAL);
686 }
687
688 error = dounmount(mp, SCARG(uap, flags), l);
689 vrele(vp);
690 return error;
691 }
692
693 /*
694 * Do the actual file system unmount. File system is assumed to have
695 * been locked by the caller.
696 *
697 * => Caller gain reference to the mount, explicility for unmount.
698 * => Reference will be dropped in all cases.
699 */
700 int
701 dounmount(struct mount *mp, int flags, struct lwp *l)
702 {
703 struct vnode *coveredvp;
704 int error;
705 int async;
706 int used_syncer;
707
708 #if NVERIEXEC > 0
709 error = veriexec_unmountchk(mp);
710 if (error)
711 return (error);
712 #endif /* NVERIEXEC > 0 */
713
714 /*
715 * XXX Freeze syncer. Must do this before locking the
716 * mount point. See dounmount() for details.
717 */
718 mutex_enter(&syncer_mutex);
719 rw_enter(&mp->mnt_unmounting, RW_WRITER);
720 if ((mp->mnt_iflag & IMNT_GONE) != 0) {
721 rw_exit(&mp->mnt_unmounting);
722 mutex_exit(&syncer_mutex);
723 vfs_destroy(mp);
724 return ENOENT;
725 }
726
727 used_syncer = (mp->mnt_syncer != NULL);
728
729 /*
730 * XXX Syncer must be frozen when we get here. This should really
731 * be done on a per-mountpoint basis, but especially the softdep
732 * code possibly called from the syncer doesn't exactly work on a
733 * per-mountpoint basis, so the softdep code would become a maze
734 * of vfs_busy() calls.
735 *
736 * The caller of dounmount() must acquire syncer_mutex because
737 * the syncer itself acquires locks in syncer_mutex -> vfs_busy
738 * order, and we must preserve that order to avoid deadlock.
739 *
740 * So, if the file system did not use the syncer, now is
741 * the time to release the syncer_mutex.
742 */
743 if (used_syncer == 0)
744 mutex_exit(&syncer_mutex);
745
746 mp->mnt_iflag |= IMNT_UNMOUNT;
747 async = mp->mnt_flag & MNT_ASYNC;
748 mp->mnt_flag &= ~MNT_ASYNC;
749 cache_purgevfs(mp); /* remove cache entries for this file sys */
750 if (mp->mnt_syncer != NULL)
751 vfs_deallocate_syncvnode(mp);
752 error = 0;
753 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
754 #if NFSS > 0
755 error = fss_umount_hook(mp, (flags & MNT_FORCE));
756 #endif
757 if (error == 0)
758 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
759 }
760 vfs_scrubvnlist(mp);
761 if (error == 0 || (flags & MNT_FORCE))
762 error = VFS_UNMOUNT(mp, flags);
763 if (error) {
764 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
765 (void) vfs_allocate_syncvnode(mp);
766 mp->mnt_iflag &= ~IMNT_UNMOUNT;
767 mp->mnt_flag |= async;
768 rw_exit(&mp->mnt_unmounting);
769 if (used_syncer)
770 mutex_exit(&syncer_mutex);
771 return (error);
772 }
773 vfs_scrubvnlist(mp);
774 mutex_enter(&mountlist_lock);
775 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
776 coveredvp->v_mountedhere = NULL;
777 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
778 mp->mnt_iflag |= IMNT_GONE;
779 mutex_exit(&mountlist_lock);
780 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
781 panic("unmount: dangling vnode");
782 if (used_syncer)
783 mutex_exit(&syncer_mutex);
784 vfs_hooks_unmount(mp);
785 rw_exit(&mp->mnt_unmounting);
786 vfs_destroy(mp); /* caller provided reference */
787 vfs_destroy(mp); /* from mount(), final nail in coffin */
788 if (coveredvp != NULLVP)
789 vrele(coveredvp);
790 return (0);
791 }
792
793 /*
794 * Sync each mounted filesystem.
795 */
796 #ifdef DEBUG
797 int syncprt = 0;
798 struct ctldebug debug0 = { "syncprt", &syncprt };
799 #endif
800
801 /* ARGSUSED */
802 int
803 sys_sync(struct lwp *l, const void *v, register_t *retval)
804 {
805 struct mount *mp, *nmp;
806 int asyncflag;
807
808 if (l == NULL)
809 l = &lwp0;
810
811 mutex_enter(&mountlist_lock);
812 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
813 mp = nmp) {
814 if (vfs_busy(mp, &nmp)) {
815 continue;
816 }
817 mutex_enter(&mp->mnt_updating);
818 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
819 asyncflag = mp->mnt_flag & MNT_ASYNC;
820 mp->mnt_flag &= ~MNT_ASYNC;
821 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
822 if (asyncflag)
823 mp->mnt_flag |= MNT_ASYNC;
824 }
825 mutex_exit(&mp->mnt_updating);
826 vfs_unbusy(mp, false, &nmp);
827 }
828 mutex_exit(&mountlist_lock);
829 #ifdef DEBUG
830 if (syncprt)
831 vfs_bufstats();
832 #endif /* DEBUG */
833 return (0);
834 }
835
836 /*
837 * Change filesystem quotas.
838 */
839 /* ARGSUSED */
840 int
841 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval)
842 {
843 /* {
844 syscallarg(const char *) path;
845 syscallarg(int) cmd;
846 syscallarg(int) uid;
847 syscallarg(void *) arg;
848 } */
849 struct mount *mp;
850 int error;
851 struct nameidata nd;
852
853 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
854 SCARG(uap, path));
855 if ((error = namei(&nd)) != 0)
856 return (error);
857 mp = nd.ni_vp->v_mount;
858 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
859 SCARG(uap, arg));
860 vrele(nd.ni_vp);
861 return (error);
862 }
863
864 int
865 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
866 int root)
867 {
868 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
869 int error = 0;
870
871 /*
872 * If MNT_NOWAIT or MNT_LAZY is specified, do not
873 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
874 * overrides MNT_NOWAIT.
875 */
876 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
877 (flags != MNT_WAIT && flags != 0)) {
878 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
879 goto done;
880 }
881
882 /* Get the filesystem stats now */
883 memset(sp, 0, sizeof(*sp));
884 if ((error = VFS_STATVFS(mp, sp)) != 0) {
885 return error;
886 }
887
888 if (cwdi->cwdi_rdir == NULL)
889 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
890 done:
891 if (cwdi->cwdi_rdir != NULL) {
892 size_t len;
893 char *bp;
894 char *path = PNBUF_GET();
895
896 bp = path + MAXPATHLEN;
897 *--bp = '\0';
898 rw_enter(&cwdi->cwdi_lock, RW_READER);
899 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
900 MAXPATHLEN / 2, 0, l);
901 rw_exit(&cwdi->cwdi_lock);
902 if (error) {
903 PNBUF_PUT(path);
904 return error;
905 }
906 len = strlen(bp);
907 /*
908 * for mount points that are below our root, we can see
909 * them, so we fix up the pathname and return them. The
910 * rest we cannot see, so we don't allow viewing the
911 * data.
912 */
913 if (strncmp(bp, sp->f_mntonname, len) == 0) {
914 strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
915 sizeof(sp->f_mntonname));
916 if (sp->f_mntonname[0] == '\0')
917 (void)strlcpy(sp->f_mntonname, "/",
918 sizeof(sp->f_mntonname));
919 } else {
920 if (root)
921 (void)strlcpy(sp->f_mntonname, "/",
922 sizeof(sp->f_mntonname));
923 else
924 error = EPERM;
925 }
926 PNBUF_PUT(path);
927 }
928 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
929 return error;
930 }
931
932 /*
933 * Get filesystem statistics by path.
934 */
935 int
936 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
937 {
938 struct mount *mp;
939 int error;
940 struct nameidata nd;
941
942 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
943 if ((error = namei(&nd)) != 0)
944 return error;
945 mp = nd.ni_vp->v_mount;
946 error = dostatvfs(mp, sb, l, flags, 1);
947 vrele(nd.ni_vp);
948 return error;
949 }
950
951 /* ARGSUSED */
952 int
953 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
954 {
955 /* {
956 syscallarg(const char *) path;
957 syscallarg(struct statvfs *) buf;
958 syscallarg(int) flags;
959 } */
960 struct statvfs *sb;
961 int error;
962
963 sb = STATVFSBUF_GET();
964 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
965 if (error == 0)
966 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
967 STATVFSBUF_PUT(sb);
968 return error;
969 }
970
971 /*
972 * Get filesystem statistics by fd.
973 */
974 int
975 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
976 {
977 file_t *fp;
978 struct mount *mp;
979 int error;
980
981 /* fd_getvnode() will use the descriptor for us */
982 if ((error = fd_getvnode(fd, &fp)) != 0)
983 return (error);
984 mp = ((struct vnode *)fp->f_data)->v_mount;
985 error = dostatvfs(mp, sb, curlwp, flags, 1);
986 fd_putfile(fd);
987 return error;
988 }
989
990 /* ARGSUSED */
991 int
992 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
993 {
994 /* {
995 syscallarg(int) fd;
996 syscallarg(struct statvfs *) buf;
997 syscallarg(int) flags;
998 } */
999 struct statvfs *sb;
1000 int error;
1001
1002 sb = STATVFSBUF_GET();
1003 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1004 if (error == 0)
1005 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1006 STATVFSBUF_PUT(sb);
1007 return error;
1008 }
1009
1010
1011 /*
1012 * Get statistics on all filesystems.
1013 */
1014 int
1015 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1016 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1017 register_t *retval)
1018 {
1019 int root = 0;
1020 struct proc *p = l->l_proc;
1021 struct mount *mp, *nmp;
1022 struct statvfs *sb;
1023 size_t count, maxcount;
1024 int error = 0;
1025
1026 sb = STATVFSBUF_GET();
1027 maxcount = bufsize / entry_sz;
1028 mutex_enter(&mountlist_lock);
1029 count = 0;
1030 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1031 mp = nmp) {
1032 if (vfs_busy(mp, &nmp)) {
1033 continue;
1034 }
1035 if (sfsp && count < maxcount) {
1036 error = dostatvfs(mp, sb, l, flags, 0);
1037 if (error) {
1038 vfs_unbusy(mp, false, &nmp);
1039 continue;
1040 }
1041 error = copyfn(sb, sfsp, entry_sz);
1042 if (error) {
1043 vfs_unbusy(mp, false, NULL);
1044 goto out;
1045 }
1046 sfsp = (char *)sfsp + entry_sz;
1047 root |= strcmp(sb->f_mntonname, "/") == 0;
1048 }
1049 count++;
1050 vfs_unbusy(mp, false, &nmp);
1051 }
1052 mutex_exit(&mountlist_lock);
1053
1054 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1055 /*
1056 * fake a root entry
1057 */
1058 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1059 sb, l, flags, 1);
1060 if (error != 0)
1061 goto out;
1062 if (sfsp)
1063 error = copyfn(sb, sfsp, entry_sz);
1064 count++;
1065 }
1066 if (sfsp && count > maxcount)
1067 *retval = maxcount;
1068 else
1069 *retval = count;
1070 out:
1071 STATVFSBUF_PUT(sb);
1072 return error;
1073 }
1074
1075 int
1076 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1077 {
1078 /* {
1079 syscallarg(struct statvfs *) buf;
1080 syscallarg(size_t) bufsize;
1081 syscallarg(int) flags;
1082 } */
1083
1084 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1085 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1086 }
1087
1088 /*
1089 * Change current working directory to a given file descriptor.
1090 */
1091 /* ARGSUSED */
1092 int
1093 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1094 {
1095 /* {
1096 syscallarg(int) fd;
1097 } */
1098 struct proc *p = l->l_proc;
1099 struct cwdinfo *cwdi;
1100 struct vnode *vp, *tdp;
1101 struct mount *mp;
1102 file_t *fp;
1103 int error, fd;
1104
1105 /* fd_getvnode() will use the descriptor for us */
1106 fd = SCARG(uap, fd);
1107 if ((error = fd_getvnode(fd, &fp)) != 0)
1108 return (error);
1109 vp = fp->f_data;
1110
1111 VREF(vp);
1112 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1113 if (vp->v_type != VDIR)
1114 error = ENOTDIR;
1115 else
1116 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1117 if (error) {
1118 vput(vp);
1119 goto out;
1120 }
1121 while ((mp = vp->v_mountedhere) != NULL) {
1122 error = vfs_busy(mp, NULL);
1123 vput(vp);
1124 if (error != 0)
1125 goto out;
1126 error = VFS_ROOT(mp, &tdp);
1127 vfs_unbusy(mp, false, NULL);
1128 if (error)
1129 goto out;
1130 vp = tdp;
1131 }
1132 VOP_UNLOCK(vp, 0);
1133
1134 /*
1135 * Disallow changing to a directory not under the process's
1136 * current root directory (if there is one).
1137 */
1138 cwdi = p->p_cwdi;
1139 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1140 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1141 vrele(vp);
1142 error = EPERM; /* operation not permitted */
1143 } else {
1144 vrele(cwdi->cwdi_cdir);
1145 cwdi->cwdi_cdir = vp;
1146 }
1147 rw_exit(&cwdi->cwdi_lock);
1148
1149 out:
1150 fd_putfile(fd);
1151 return (error);
1152 }
1153
1154 /*
1155 * Change this process's notion of the root directory to a given file
1156 * descriptor.
1157 */
1158 int
1159 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1160 {
1161 struct proc *p = l->l_proc;
1162 struct cwdinfo *cwdi;
1163 struct vnode *vp;
1164 file_t *fp;
1165 int error, fd = SCARG(uap, fd);
1166
1167 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1168 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1169 return error;
1170 /* fd_getvnode() will use the descriptor for us */
1171 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
1172 return error;
1173 vp = fp->f_data;
1174 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1175 if (vp->v_type != VDIR)
1176 error = ENOTDIR;
1177 else
1178 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1179 VOP_UNLOCK(vp, 0);
1180 if (error)
1181 goto out;
1182 VREF(vp);
1183
1184 /*
1185 * Prevent escaping from chroot by putting the root under
1186 * the working directory. Silently chdir to / if we aren't
1187 * already there.
1188 */
1189 cwdi = p->p_cwdi;
1190 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1191 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1192 /*
1193 * XXX would be more failsafe to change directory to a
1194 * deadfs node here instead
1195 */
1196 vrele(cwdi->cwdi_cdir);
1197 VREF(vp);
1198 cwdi->cwdi_cdir = vp;
1199 }
1200
1201 if (cwdi->cwdi_rdir != NULL)
1202 vrele(cwdi->cwdi_rdir);
1203 cwdi->cwdi_rdir = vp;
1204 rw_exit(&cwdi->cwdi_lock);
1205
1206 out:
1207 fd_putfile(fd);
1208 return (error);
1209 }
1210
1211 /*
1212 * Change current working directory (``.'').
1213 */
1214 /* ARGSUSED */
1215 int
1216 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1217 {
1218 /* {
1219 syscallarg(const char *) path;
1220 } */
1221 struct proc *p = l->l_proc;
1222 struct cwdinfo *cwdi;
1223 int error;
1224 struct nameidata nd;
1225
1226 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1227 SCARG(uap, path));
1228 if ((error = change_dir(&nd, l)) != 0)
1229 return (error);
1230 cwdi = p->p_cwdi;
1231 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1232 vrele(cwdi->cwdi_cdir);
1233 cwdi->cwdi_cdir = nd.ni_vp;
1234 rw_exit(&cwdi->cwdi_lock);
1235 return (0);
1236 }
1237
1238 /*
1239 * Change notion of root (``/'') directory.
1240 */
1241 /* ARGSUSED */
1242 int
1243 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1244 {
1245 /* {
1246 syscallarg(const char *) path;
1247 } */
1248 struct proc *p = l->l_proc;
1249 struct cwdinfo *cwdi;
1250 struct vnode *vp;
1251 int error;
1252 struct nameidata nd;
1253
1254 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1255 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1256 return (error);
1257 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1258 SCARG(uap, path));
1259 if ((error = change_dir(&nd, l)) != 0)
1260 return (error);
1261
1262 cwdi = p->p_cwdi;
1263 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1264 if (cwdi->cwdi_rdir != NULL)
1265 vrele(cwdi->cwdi_rdir);
1266 vp = nd.ni_vp;
1267 cwdi->cwdi_rdir = vp;
1268
1269 /*
1270 * Prevent escaping from chroot by putting the root under
1271 * the working directory. Silently chdir to / if we aren't
1272 * already there.
1273 */
1274 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1275 /*
1276 * XXX would be more failsafe to change directory to a
1277 * deadfs node here instead
1278 */
1279 vrele(cwdi->cwdi_cdir);
1280 VREF(vp);
1281 cwdi->cwdi_cdir = vp;
1282 }
1283 rw_exit(&cwdi->cwdi_lock);
1284
1285 return (0);
1286 }
1287
1288 /*
1289 * Common routine for chroot and chdir.
1290 */
1291 static int
1292 change_dir(struct nameidata *ndp, struct lwp *l)
1293 {
1294 struct vnode *vp;
1295 int error;
1296
1297 if ((error = namei(ndp)) != 0)
1298 return (error);
1299 vp = ndp->ni_vp;
1300 if (vp->v_type != VDIR)
1301 error = ENOTDIR;
1302 else
1303 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1304
1305 if (error)
1306 vput(vp);
1307 else
1308 VOP_UNLOCK(vp, 0);
1309 return (error);
1310 }
1311
1312 /*
1313 * Check permissions, allocate an open file structure,
1314 * and call the device open routine if any.
1315 */
1316 int
1317 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1318 {
1319 /* {
1320 syscallarg(const char *) path;
1321 syscallarg(int) flags;
1322 syscallarg(int) mode;
1323 } */
1324 struct proc *p = l->l_proc;
1325 struct cwdinfo *cwdi = p->p_cwdi;
1326 file_t *fp;
1327 struct vnode *vp;
1328 int flags, cmode;
1329 int type, indx, error;
1330 struct flock lf;
1331 struct nameidata nd;
1332
1333 flags = FFLAGS(SCARG(uap, flags));
1334 if ((flags & (FREAD | FWRITE)) == 0)
1335 return (EINVAL);
1336 if ((error = fd_allocfile(&fp, &indx)) != 0)
1337 return (error);
1338 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1339 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1340 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
1341 SCARG(uap, path));
1342 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1343 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1344 fd_abort(p, fp, indx);
1345 if ((error == EDUPFD || error == EMOVEFD) &&
1346 l->l_dupfd >= 0 && /* XXX from fdopen */
1347 (error =
1348 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1349 *retval = indx;
1350 return (0);
1351 }
1352 if (error == ERESTART)
1353 error = EINTR;
1354 return (error);
1355 }
1356
1357 l->l_dupfd = 0;
1358 vp = nd.ni_vp;
1359 fp->f_flag = flags & FMASK;
1360 fp->f_type = DTYPE_VNODE;
1361 fp->f_ops = &vnops;
1362 fp->f_data = vp;
1363 if (flags & (O_EXLOCK | O_SHLOCK)) {
1364 lf.l_whence = SEEK_SET;
1365 lf.l_start = 0;
1366 lf.l_len = 0;
1367 if (flags & O_EXLOCK)
1368 lf.l_type = F_WRLCK;
1369 else
1370 lf.l_type = F_RDLCK;
1371 type = F_FLOCK;
1372 if ((flags & FNONBLOCK) == 0)
1373 type |= F_WAIT;
1374 VOP_UNLOCK(vp, 0);
1375 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1376 if (error) {
1377 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1378 fd_abort(p, fp, indx);
1379 return (error);
1380 }
1381 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1382 atomic_or_uint(&fp->f_flag, FHASLOCK);
1383 }
1384 VOP_UNLOCK(vp, 0);
1385 *retval = indx;
1386 fd_affix(p, fp, indx);
1387 return (0);
1388 }
1389
1390 static void
1391 vfs__fhfree(fhandle_t *fhp)
1392 {
1393 size_t fhsize;
1394
1395 if (fhp == NULL) {
1396 return;
1397 }
1398 fhsize = FHANDLE_SIZE(fhp);
1399 kmem_free(fhp, fhsize);
1400 }
1401
1402 /*
1403 * vfs_composefh: compose a filehandle.
1404 */
1405
1406 int
1407 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1408 {
1409 struct mount *mp;
1410 struct fid *fidp;
1411 int error;
1412 size_t needfhsize;
1413 size_t fidsize;
1414
1415 mp = vp->v_mount;
1416 fidp = NULL;
1417 if (*fh_size < FHANDLE_SIZE_MIN) {
1418 fidsize = 0;
1419 } else {
1420 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1421 if (fhp != NULL) {
1422 memset(fhp, 0, *fh_size);
1423 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1424 fidp = &fhp->fh_fid;
1425 }
1426 }
1427 error = VFS_VPTOFH(vp, fidp, &fidsize);
1428 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1429 if (error == 0 && *fh_size < needfhsize) {
1430 error = E2BIG;
1431 }
1432 *fh_size = needfhsize;
1433 return error;
1434 }
1435
1436 int
1437 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1438 {
1439 struct mount *mp;
1440 fhandle_t *fhp;
1441 size_t fhsize;
1442 size_t fidsize;
1443 int error;
1444
1445 *fhpp = NULL;
1446 mp = vp->v_mount;
1447 fidsize = 0;
1448 error = VFS_VPTOFH(vp, NULL, &fidsize);
1449 KASSERT(error != 0);
1450 if (error != E2BIG) {
1451 goto out;
1452 }
1453 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1454 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1455 if (fhp == NULL) {
1456 error = ENOMEM;
1457 goto out;
1458 }
1459 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1460 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1461 if (error == 0) {
1462 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1463 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1464 *fhpp = fhp;
1465 } else {
1466 kmem_free(fhp, fhsize);
1467 }
1468 out:
1469 return error;
1470 }
1471
1472 void
1473 vfs_composefh_free(fhandle_t *fhp)
1474 {
1475
1476 vfs__fhfree(fhp);
1477 }
1478
1479 /*
1480 * vfs_fhtovp: lookup a vnode by a filehandle.
1481 */
1482
1483 int
1484 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1485 {
1486 struct mount *mp;
1487 int error;
1488
1489 *vpp = NULL;
1490 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1491 if (mp == NULL) {
1492 error = ESTALE;
1493 goto out;
1494 }
1495 if (mp->mnt_op->vfs_fhtovp == NULL) {
1496 error = EOPNOTSUPP;
1497 goto out;
1498 }
1499 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1500 out:
1501 return error;
1502 }
1503
1504 /*
1505 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1506 * the needed size.
1507 */
1508
1509 int
1510 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1511 {
1512 fhandle_t *fhp;
1513 int error;
1514
1515 *fhpp = NULL;
1516 if (fhsize > FHANDLE_SIZE_MAX) {
1517 return EINVAL;
1518 }
1519 if (fhsize < FHANDLE_SIZE_MIN) {
1520 return EINVAL;
1521 }
1522 again:
1523 fhp = kmem_alloc(fhsize, KM_SLEEP);
1524 if (fhp == NULL) {
1525 return ENOMEM;
1526 }
1527 error = copyin(ufhp, fhp, fhsize);
1528 if (error == 0) {
1529 /* XXX this check shouldn't be here */
1530 if (FHANDLE_SIZE(fhp) == fhsize) {
1531 *fhpp = fhp;
1532 return 0;
1533 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1534 /*
1535 * a kludge for nfsv2 padded handles.
1536 */
1537 size_t sz;
1538
1539 sz = FHANDLE_SIZE(fhp);
1540 kmem_free(fhp, fhsize);
1541 fhsize = sz;
1542 goto again;
1543 } else {
1544 /*
1545 * userland told us wrong size.
1546 */
1547 error = EINVAL;
1548 }
1549 }
1550 kmem_free(fhp, fhsize);
1551 return error;
1552 }
1553
1554 void
1555 vfs_copyinfh_free(fhandle_t *fhp)
1556 {
1557
1558 vfs__fhfree(fhp);
1559 }
1560
1561 /*
1562 * Get file handle system call
1563 */
1564 int
1565 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1566 {
1567 /* {
1568 syscallarg(char *) fname;
1569 syscallarg(fhandle_t *) fhp;
1570 syscallarg(size_t *) fh_size;
1571 } */
1572 struct vnode *vp;
1573 fhandle_t *fh;
1574 int error;
1575 struct nameidata nd;
1576 size_t sz;
1577 size_t usz;
1578
1579 /*
1580 * Must be super user
1581 */
1582 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1583 0, NULL, NULL, NULL);
1584 if (error)
1585 return (error);
1586 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1587 SCARG(uap, fname));
1588 error = namei(&nd);
1589 if (error)
1590 return (error);
1591 vp = nd.ni_vp;
1592 error = vfs_composefh_alloc(vp, &fh);
1593 vput(vp);
1594 if (error != 0) {
1595 goto out;
1596 }
1597 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1598 if (error != 0) {
1599 goto out;
1600 }
1601 sz = FHANDLE_SIZE(fh);
1602 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1603 if (error != 0) {
1604 goto out;
1605 }
1606 if (usz >= sz) {
1607 error = copyout(fh, SCARG(uap, fhp), sz);
1608 } else {
1609 error = E2BIG;
1610 }
1611 out:
1612 vfs_composefh_free(fh);
1613 return (error);
1614 }
1615
1616 /*
1617 * Open a file given a file handle.
1618 *
1619 * Check permissions, allocate an open file structure,
1620 * and call the device open routine if any.
1621 */
1622
1623 int
1624 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1625 register_t *retval)
1626 {
1627 file_t *fp;
1628 struct vnode *vp = NULL;
1629 kauth_cred_t cred = l->l_cred;
1630 file_t *nfp;
1631 int type, indx, error=0;
1632 struct flock lf;
1633 struct vattr va;
1634 fhandle_t *fh;
1635 int flags;
1636 proc_t *p;
1637
1638 p = curproc;
1639
1640 /*
1641 * Must be super user
1642 */
1643 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1644 0, NULL, NULL, NULL)))
1645 return (error);
1646
1647 flags = FFLAGS(oflags);
1648 if ((flags & (FREAD | FWRITE)) == 0)
1649 return (EINVAL);
1650 if ((flags & O_CREAT))
1651 return (EINVAL);
1652 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1653 return (error);
1654 fp = nfp;
1655 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1656 if (error != 0) {
1657 goto bad;
1658 }
1659 error = vfs_fhtovp(fh, &vp);
1660 if (error != 0) {
1661 goto bad;
1662 }
1663
1664 /* Now do an effective vn_open */
1665
1666 if (vp->v_type == VSOCK) {
1667 error = EOPNOTSUPP;
1668 goto bad;
1669 }
1670 error = vn_openchk(vp, cred, flags);
1671 if (error != 0)
1672 goto bad;
1673 if (flags & O_TRUNC) {
1674 VOP_UNLOCK(vp, 0); /* XXX */
1675 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1676 VATTR_NULL(&va);
1677 va.va_size = 0;
1678 error = VOP_SETATTR(vp, &va, cred);
1679 if (error)
1680 goto bad;
1681 }
1682 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
1683 goto bad;
1684 if (flags & FWRITE) {
1685 mutex_enter(&vp->v_interlock);
1686 vp->v_writecount++;
1687 mutex_exit(&vp->v_interlock);
1688 }
1689
1690 /* done with modified vn_open, now finish what sys_open does. */
1691
1692 fp->f_flag = flags & FMASK;
1693 fp->f_type = DTYPE_VNODE;
1694 fp->f_ops = &vnops;
1695 fp->f_data = vp;
1696 if (flags & (O_EXLOCK | O_SHLOCK)) {
1697 lf.l_whence = SEEK_SET;
1698 lf.l_start = 0;
1699 lf.l_len = 0;
1700 if (flags & O_EXLOCK)
1701 lf.l_type = F_WRLCK;
1702 else
1703 lf.l_type = F_RDLCK;
1704 type = F_FLOCK;
1705 if ((flags & FNONBLOCK) == 0)
1706 type |= F_WAIT;
1707 VOP_UNLOCK(vp, 0);
1708 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1709 if (error) {
1710 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1711 fd_abort(p, fp, indx);
1712 return (error);
1713 }
1714 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1715 atomic_or_uint(&fp->f_flag, FHASLOCK);
1716 }
1717 VOP_UNLOCK(vp, 0);
1718 *retval = indx;
1719 fd_affix(p, fp, indx);
1720 vfs_copyinfh_free(fh);
1721 return (0);
1722
1723 bad:
1724 fd_abort(p, fp, indx);
1725 if (vp != NULL)
1726 vput(vp);
1727 vfs_copyinfh_free(fh);
1728 return (error);
1729 }
1730
1731 int
1732 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
1733 {
1734 /* {
1735 syscallarg(const void *) fhp;
1736 syscallarg(size_t) fh_size;
1737 syscallarg(int) flags;
1738 } */
1739
1740 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1741 SCARG(uap, flags), retval);
1742 }
1743
1744 int
1745 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
1746 {
1747 int error;
1748 fhandle_t *fh;
1749 struct vnode *vp;
1750
1751 /*
1752 * Must be super user
1753 */
1754 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1755 0, NULL, NULL, NULL)))
1756 return (error);
1757
1758 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1759 if (error != 0)
1760 return error;
1761
1762 error = vfs_fhtovp(fh, &vp);
1763 vfs_copyinfh_free(fh);
1764 if (error != 0)
1765 return error;
1766
1767 error = vn_stat(vp, sb);
1768 vput(vp);
1769 return error;
1770 }
1771
1772
1773 /* ARGSUSED */
1774 int
1775 sys___fhstat40(struct lwp *l, const struct sys___fhstat40_args *uap, register_t *retval)
1776 {
1777 /* {
1778 syscallarg(const void *) fhp;
1779 syscallarg(size_t) fh_size;
1780 syscallarg(struct stat *) sb;
1781 } */
1782 struct stat sb;
1783 int error;
1784
1785 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
1786 if (error)
1787 return error;
1788 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
1789 }
1790
1791 int
1792 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
1793 int flags)
1794 {
1795 fhandle_t *fh;
1796 struct mount *mp;
1797 struct vnode *vp;
1798 int error;
1799
1800 /*
1801 * Must be super user
1802 */
1803 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1804 0, NULL, NULL, NULL)))
1805 return error;
1806
1807 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1808 if (error != 0)
1809 return error;
1810
1811 error = vfs_fhtovp(fh, &vp);
1812 vfs_copyinfh_free(fh);
1813 if (error != 0)
1814 return error;
1815
1816 mp = vp->v_mount;
1817 error = dostatvfs(mp, sb, l, flags, 1);
1818 vput(vp);
1819 return error;
1820 }
1821
1822 /* ARGSUSED */
1823 int
1824 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
1825 {
1826 /* {
1827 syscallarg(const void *) fhp;
1828 syscallarg(size_t) fh_size;
1829 syscallarg(struct statvfs *) buf;
1830 syscallarg(int) flags;
1831 } */
1832 struct statvfs *sb = STATVFSBUF_GET();
1833 int error;
1834
1835 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
1836 SCARG(uap, flags));
1837 if (error == 0)
1838 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1839 STATVFSBUF_PUT(sb);
1840 return error;
1841 }
1842
1843 /*
1844 * Create a special file.
1845 */
1846 /* ARGSUSED */
1847 int
1848 sys_mknod(struct lwp *l, const struct sys_mknod_args *uap, register_t *retval)
1849 {
1850 /* {
1851 syscallarg(const char *) path;
1852 syscallarg(int) mode;
1853 syscallarg(int) dev;
1854 } */
1855 struct proc *p = l->l_proc;
1856 struct vnode *vp;
1857 struct vattr vattr;
1858 int error, optype;
1859 struct nameidata nd;
1860 char *path;
1861 const char *cpath;
1862 enum uio_seg seg = UIO_USERSPACE;
1863
1864 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1865 0, NULL, NULL, NULL)) != 0)
1866 return (error);
1867
1868 optype = VOP_MKNOD_DESCOFFSET;
1869
1870 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path);
1871 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath);
1872
1873 if ((error = namei(&nd)) != 0)
1874 goto out;
1875 vp = nd.ni_vp;
1876 if (vp != NULL)
1877 error = EEXIST;
1878 else {
1879 VATTR_NULL(&vattr);
1880 /* We will read cwdi->cwdi_cmask unlocked. */
1881 vattr.va_mode =
1882 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1883 vattr.va_rdev = SCARG(uap, dev);
1884
1885 switch (SCARG(uap, mode) & S_IFMT) {
1886 case S_IFMT: /* used by badsect to flag bad sectors */
1887 vattr.va_type = VBAD;
1888 break;
1889 case S_IFCHR:
1890 vattr.va_type = VCHR;
1891 break;
1892 case S_IFBLK:
1893 vattr.va_type = VBLK;
1894 break;
1895 case S_IFWHT:
1896 optype = VOP_WHITEOUT_DESCOFFSET;
1897 break;
1898 case S_IFREG:
1899 #if NVERIEXEC > 0
1900 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp,
1901 O_CREAT);
1902 #endif /* NVERIEXEC > 0 */
1903 vattr.va_type = VREG;
1904 vattr.va_rdev = VNOVAL;
1905 optype = VOP_CREATE_DESCOFFSET;
1906 break;
1907 default:
1908 error = EINVAL;
1909 break;
1910 }
1911 }
1912 if (!error) {
1913 switch (optype) {
1914 case VOP_WHITEOUT_DESCOFFSET:
1915 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1916 if (error)
1917 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1918 vput(nd.ni_dvp);
1919 break;
1920
1921 case VOP_MKNOD_DESCOFFSET:
1922 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1923 &nd.ni_cnd, &vattr);
1924 if (error == 0)
1925 vput(nd.ni_vp);
1926 break;
1927
1928 case VOP_CREATE_DESCOFFSET:
1929 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
1930 &nd.ni_cnd, &vattr);
1931 if (error == 0)
1932 vput(nd.ni_vp);
1933 break;
1934 }
1935 } else {
1936 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1937 if (nd.ni_dvp == vp)
1938 vrele(nd.ni_dvp);
1939 else
1940 vput(nd.ni_dvp);
1941 if (vp)
1942 vrele(vp);
1943 }
1944 out:
1945 VERIEXEC_PATH_PUT(path);
1946 return (error);
1947 }
1948
1949 /*
1950 * Create a named pipe.
1951 */
1952 /* ARGSUSED */
1953 int
1954 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
1955 {
1956 /* {
1957 syscallarg(const char *) path;
1958 syscallarg(int) mode;
1959 } */
1960 struct proc *p = l->l_proc;
1961 struct vattr vattr;
1962 int error;
1963 struct nameidata nd;
1964
1965 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
1966 SCARG(uap, path));
1967 if ((error = namei(&nd)) != 0)
1968 return (error);
1969 if (nd.ni_vp != NULL) {
1970 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1971 if (nd.ni_dvp == nd.ni_vp)
1972 vrele(nd.ni_dvp);
1973 else
1974 vput(nd.ni_dvp);
1975 vrele(nd.ni_vp);
1976 return (EEXIST);
1977 }
1978 VATTR_NULL(&vattr);
1979 vattr.va_type = VFIFO;
1980 /* We will read cwdi->cwdi_cmask unlocked. */
1981 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1982 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1983 if (error == 0)
1984 vput(nd.ni_vp);
1985 return (error);
1986 }
1987
1988 /*
1989 * Make a hard file link.
1990 */
1991 /* ARGSUSED */
1992 int
1993 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
1994 {
1995 /* {
1996 syscallarg(const char *) path;
1997 syscallarg(const char *) link;
1998 } */
1999 struct vnode *vp;
2000 struct nameidata nd;
2001 int error;
2002
2003 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2004 SCARG(uap, path));
2005 if ((error = namei(&nd)) != 0)
2006 return (error);
2007 vp = nd.ni_vp;
2008 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
2009 SCARG(uap, link));
2010 if ((error = namei(&nd)) != 0)
2011 goto out;
2012 if (nd.ni_vp) {
2013 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2014 if (nd.ni_dvp == nd.ni_vp)
2015 vrele(nd.ni_dvp);
2016 else
2017 vput(nd.ni_dvp);
2018 vrele(nd.ni_vp);
2019 error = EEXIST;
2020 goto out;
2021 }
2022 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2023 out:
2024 vrele(vp);
2025 return (error);
2026 }
2027
2028 /*
2029 * Make a symbolic link.
2030 */
2031 /* ARGSUSED */
2032 int
2033 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2034 {
2035 /* {
2036 syscallarg(const char *) path;
2037 syscallarg(const char *) link;
2038 } */
2039 struct proc *p = l->l_proc;
2040 struct vattr vattr;
2041 char *path;
2042 int error;
2043 struct nameidata nd;
2044
2045 path = PNBUF_GET();
2046 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
2047 if (error)
2048 goto out;
2049 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
2050 SCARG(uap, link));
2051 if ((error = namei(&nd)) != 0)
2052 goto out;
2053 if (nd.ni_vp) {
2054 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2055 if (nd.ni_dvp == nd.ni_vp)
2056 vrele(nd.ni_dvp);
2057 else
2058 vput(nd.ni_dvp);
2059 vrele(nd.ni_vp);
2060 error = EEXIST;
2061 goto out;
2062 }
2063 VATTR_NULL(&vattr);
2064 vattr.va_type = VLNK;
2065 /* We will read cwdi->cwdi_cmask unlocked. */
2066 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2067 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2068 if (error == 0)
2069 vput(nd.ni_vp);
2070 out:
2071 PNBUF_PUT(path);
2072 return (error);
2073 }
2074
2075 /*
2076 * Delete a whiteout from the filesystem.
2077 */
2078 /* ARGSUSED */
2079 int
2080 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2081 {
2082 /* {
2083 syscallarg(const char *) path;
2084 } */
2085 int error;
2086 struct nameidata nd;
2087
2088 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT,
2089 UIO_USERSPACE, SCARG(uap, path));
2090 error = namei(&nd);
2091 if (error)
2092 return (error);
2093
2094 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2095 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2096 if (nd.ni_dvp == nd.ni_vp)
2097 vrele(nd.ni_dvp);
2098 else
2099 vput(nd.ni_dvp);
2100 if (nd.ni_vp)
2101 vrele(nd.ni_vp);
2102 return (EEXIST);
2103 }
2104 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2105 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2106 vput(nd.ni_dvp);
2107 return (error);
2108 }
2109
2110 /*
2111 * Delete a name from the filesystem.
2112 */
2113 /* ARGSUSED */
2114 int
2115 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2116 {
2117 /* {
2118 syscallarg(const char *) path;
2119 } */
2120
2121 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE);
2122 }
2123
2124 int
2125 do_sys_unlink(const char *arg, enum uio_seg seg)
2126 {
2127 struct vnode *vp;
2128 int error;
2129 struct nameidata nd;
2130 kauth_cred_t cred;
2131 char *path;
2132 const char *cpath;
2133
2134 VERIEXEC_PATH_GET(arg, seg, cpath, path);
2135 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath);
2136
2137 if ((error = namei(&nd)) != 0)
2138 goto out;
2139 vp = nd.ni_vp;
2140
2141 /*
2142 * The root of a mounted filesystem cannot be deleted.
2143 */
2144 if (vp->v_vflag & VV_ROOT) {
2145 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2146 if (nd.ni_dvp == vp)
2147 vrele(nd.ni_dvp);
2148 else
2149 vput(nd.ni_dvp);
2150 vput(vp);
2151 error = EBUSY;
2152 goto out;
2153 }
2154
2155 #if NVERIEXEC > 0
2156 /* Handle remove requests for veriexec entries. */
2157 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) {
2158 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2159 if (nd.ni_dvp == vp)
2160 vrele(nd.ni_dvp);
2161 else
2162 vput(nd.ni_dvp);
2163 vput(vp);
2164 goto out;
2165 }
2166 #endif /* NVERIEXEC > 0 */
2167
2168 cred = kauth_cred_get();
2169 #ifdef FILEASSOC
2170 (void)fileassoc_file_delete(vp);
2171 #endif /* FILEASSOC */
2172 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2173 out:
2174 VERIEXEC_PATH_PUT(path);
2175 return (error);
2176 }
2177
2178 /*
2179 * Reposition read/write file offset.
2180 */
2181 int
2182 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2183 {
2184 /* {
2185 syscallarg(int) fd;
2186 syscallarg(int) pad;
2187 syscallarg(off_t) offset;
2188 syscallarg(int) whence;
2189 } */
2190 kauth_cred_t cred = l->l_cred;
2191 file_t *fp;
2192 struct vnode *vp;
2193 struct vattr vattr;
2194 off_t newoff;
2195 int error, fd;
2196
2197 fd = SCARG(uap, fd);
2198
2199 if ((fp = fd_getfile(fd)) == NULL)
2200 return (EBADF);
2201
2202 vp = fp->f_data;
2203 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2204 error = ESPIPE;
2205 goto out;
2206 }
2207
2208 switch (SCARG(uap, whence)) {
2209 case SEEK_CUR:
2210 newoff = fp->f_offset + SCARG(uap, offset);
2211 break;
2212 case SEEK_END:
2213 error = VOP_GETATTR(vp, &vattr, cred);
2214 if (error) {
2215 goto out;
2216 }
2217 newoff = SCARG(uap, offset) + vattr.va_size;
2218 break;
2219 case SEEK_SET:
2220 newoff = SCARG(uap, offset);
2221 break;
2222 default:
2223 error = EINVAL;
2224 goto out;
2225 }
2226 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2227 *(off_t *)retval = fp->f_offset = newoff;
2228 }
2229 out:
2230 fd_putfile(fd);
2231 return (error);
2232 }
2233
2234 /*
2235 * Positional read system call.
2236 */
2237 int
2238 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2239 {
2240 /* {
2241 syscallarg(int) fd;
2242 syscallarg(void *) buf;
2243 syscallarg(size_t) nbyte;
2244 syscallarg(off_t) offset;
2245 } */
2246 file_t *fp;
2247 struct vnode *vp;
2248 off_t offset;
2249 int error, fd = SCARG(uap, fd);
2250
2251 if ((fp = fd_getfile(fd)) == NULL)
2252 return (EBADF);
2253
2254 if ((fp->f_flag & FREAD) == 0) {
2255 fd_putfile(fd);
2256 return (EBADF);
2257 }
2258
2259 vp = fp->f_data;
2260 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2261 error = ESPIPE;
2262 goto out;
2263 }
2264
2265 offset = SCARG(uap, offset);
2266
2267 /*
2268 * XXX This works because no file systems actually
2269 * XXX take any action on the seek operation.
2270 */
2271 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2272 goto out;
2273
2274 /* dofileread() will unuse the descriptor for us */
2275 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2276 &offset, 0, retval));
2277
2278 out:
2279 fd_putfile(fd);
2280 return (error);
2281 }
2282
2283 /*
2284 * Positional scatter read system call.
2285 */
2286 int
2287 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2288 {
2289 /* {
2290 syscallarg(int) fd;
2291 syscallarg(const struct iovec *) iovp;
2292 syscallarg(int) iovcnt;
2293 syscallarg(off_t) offset;
2294 } */
2295 off_t offset = SCARG(uap, offset);
2296
2297 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2298 SCARG(uap, iovcnt), &offset, 0, retval);
2299 }
2300
2301 /*
2302 * Positional write system call.
2303 */
2304 int
2305 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2306 {
2307 /* {
2308 syscallarg(int) fd;
2309 syscallarg(const void *) buf;
2310 syscallarg(size_t) nbyte;
2311 syscallarg(off_t) offset;
2312 } */
2313 file_t *fp;
2314 struct vnode *vp;
2315 off_t offset;
2316 int error, fd = SCARG(uap, fd);
2317
2318 if ((fp = fd_getfile(fd)) == NULL)
2319 return (EBADF);
2320
2321 if ((fp->f_flag & FWRITE) == 0) {
2322 fd_putfile(fd);
2323 return (EBADF);
2324 }
2325
2326 vp = fp->f_data;
2327 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2328 error = ESPIPE;
2329 goto out;
2330 }
2331
2332 offset = SCARG(uap, offset);
2333
2334 /*
2335 * XXX This works because no file systems actually
2336 * XXX take any action on the seek operation.
2337 */
2338 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2339 goto out;
2340
2341 /* dofilewrite() will unuse the descriptor for us */
2342 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2343 &offset, 0, retval));
2344
2345 out:
2346 fd_putfile(fd);
2347 return (error);
2348 }
2349
2350 /*
2351 * Positional gather write system call.
2352 */
2353 int
2354 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2355 {
2356 /* {
2357 syscallarg(int) fd;
2358 syscallarg(const struct iovec *) iovp;
2359 syscallarg(int) iovcnt;
2360 syscallarg(off_t) offset;
2361 } */
2362 off_t offset = SCARG(uap, offset);
2363
2364 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2365 SCARG(uap, iovcnt), &offset, 0, retval);
2366 }
2367
2368 /*
2369 * Check access permissions.
2370 */
2371 int
2372 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2373 {
2374 /* {
2375 syscallarg(const char *) path;
2376 syscallarg(int) flags;
2377 } */
2378 kauth_cred_t cred;
2379 struct vnode *vp;
2380 int error, flags;
2381 struct nameidata nd;
2382
2383 cred = kauth_cred_dup(l->l_cred);
2384 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2385 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2386 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2387 SCARG(uap, path));
2388 /* Override default credentials */
2389 nd.ni_cnd.cn_cred = cred;
2390 if ((error = namei(&nd)) != 0)
2391 goto out;
2392 vp = nd.ni_vp;
2393
2394 /* Flags == 0 means only check for existence. */
2395 if (SCARG(uap, flags)) {
2396 flags = 0;
2397 if (SCARG(uap, flags) & R_OK)
2398 flags |= VREAD;
2399 if (SCARG(uap, flags) & W_OK)
2400 flags |= VWRITE;
2401 if (SCARG(uap, flags) & X_OK)
2402 flags |= VEXEC;
2403
2404 error = VOP_ACCESS(vp, flags, cred);
2405 if (!error && (flags & VWRITE))
2406 error = vn_writechk(vp);
2407 }
2408 vput(vp);
2409 out:
2410 kauth_cred_free(cred);
2411 return (error);
2412 }
2413
2414 /*
2415 * Common code for all sys_stat functions, including compat versions.
2416 */
2417 int
2418 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb)
2419 {
2420 int error;
2421 struct nameidata nd;
2422
2423 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT,
2424 UIO_USERSPACE, path);
2425 error = namei(&nd);
2426 if (error != 0)
2427 return error;
2428 error = vn_stat(nd.ni_vp, sb);
2429 vput(nd.ni_vp);
2430 return error;
2431 }
2432
2433 /*
2434 * Get file status; this version follows links.
2435 */
2436 /* ARGSUSED */
2437 int
2438 sys___stat30(struct lwp *l, const struct sys___stat30_args *uap, register_t *retval)
2439 {
2440 /* {
2441 syscallarg(const char *) path;
2442 syscallarg(struct stat *) ub;
2443 } */
2444 struct stat sb;
2445 int error;
2446
2447 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb);
2448 if (error)
2449 return error;
2450 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2451 }
2452
2453 /*
2454 * Get file status; this version does not follow links.
2455 */
2456 /* ARGSUSED */
2457 int
2458 sys___lstat30(struct lwp *l, const struct sys___lstat30_args *uap, register_t *retval)
2459 {
2460 /* {
2461 syscallarg(const char *) path;
2462 syscallarg(struct stat *) ub;
2463 } */
2464 struct stat sb;
2465 int error;
2466
2467 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb);
2468 if (error)
2469 return error;
2470 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2471 }
2472
2473 /*
2474 * Get configurable pathname variables.
2475 */
2476 /* ARGSUSED */
2477 int
2478 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
2479 {
2480 /* {
2481 syscallarg(const char *) path;
2482 syscallarg(int) name;
2483 } */
2484 int error;
2485 struct nameidata nd;
2486
2487 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2488 SCARG(uap, path));
2489 if ((error = namei(&nd)) != 0)
2490 return (error);
2491 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2492 vput(nd.ni_vp);
2493 return (error);
2494 }
2495
2496 /*
2497 * Return target name of a symbolic link.
2498 */
2499 /* ARGSUSED */
2500 int
2501 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval)
2502 {
2503 /* {
2504 syscallarg(const char *) path;
2505 syscallarg(char *) buf;
2506 syscallarg(size_t) count;
2507 } */
2508 struct vnode *vp;
2509 struct iovec aiov;
2510 struct uio auio;
2511 int error;
2512 struct nameidata nd;
2513
2514 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2515 SCARG(uap, path));
2516 if ((error = namei(&nd)) != 0)
2517 return (error);
2518 vp = nd.ni_vp;
2519 if (vp->v_type != VLNK)
2520 error = EINVAL;
2521 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2522 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
2523 aiov.iov_base = SCARG(uap, buf);
2524 aiov.iov_len = SCARG(uap, count);
2525 auio.uio_iov = &aiov;
2526 auio.uio_iovcnt = 1;
2527 auio.uio_offset = 0;
2528 auio.uio_rw = UIO_READ;
2529 KASSERT(l == curlwp);
2530 auio.uio_vmspace = l->l_proc->p_vmspace;
2531 auio.uio_resid = SCARG(uap, count);
2532 error = VOP_READLINK(vp, &auio, l->l_cred);
2533 }
2534 vput(vp);
2535 *retval = SCARG(uap, count) - auio.uio_resid;
2536 return (error);
2537 }
2538
2539 /*
2540 * Change flags of a file given a path name.
2541 */
2542 /* ARGSUSED */
2543 int
2544 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
2545 {
2546 /* {
2547 syscallarg(const char *) path;
2548 syscallarg(u_long) flags;
2549 } */
2550 struct vnode *vp;
2551 int error;
2552 struct nameidata nd;
2553
2554 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2555 SCARG(uap, path));
2556 if ((error = namei(&nd)) != 0)
2557 return (error);
2558 vp = nd.ni_vp;
2559 error = change_flags(vp, SCARG(uap, flags), l);
2560 vput(vp);
2561 return (error);
2562 }
2563
2564 /*
2565 * Change flags of a file given a file descriptor.
2566 */
2567 /* ARGSUSED */
2568 int
2569 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
2570 {
2571 /* {
2572 syscallarg(int) fd;
2573 syscallarg(u_long) flags;
2574 } */
2575 struct vnode *vp;
2576 file_t *fp;
2577 int error;
2578
2579 /* fd_getvnode() will use the descriptor for us */
2580 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2581 return (error);
2582 vp = fp->f_data;
2583 error = change_flags(vp, SCARG(uap, flags), l);
2584 VOP_UNLOCK(vp, 0);
2585 fd_putfile(SCARG(uap, fd));
2586 return (error);
2587 }
2588
2589 /*
2590 * Change flags of a file given a path name; this version does
2591 * not follow links.
2592 */
2593 int
2594 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
2595 {
2596 /* {
2597 syscallarg(const char *) path;
2598 syscallarg(u_long) flags;
2599 } */
2600 struct vnode *vp;
2601 int error;
2602 struct nameidata nd;
2603
2604 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2605 SCARG(uap, path));
2606 if ((error = namei(&nd)) != 0)
2607 return (error);
2608 vp = nd.ni_vp;
2609 error = change_flags(vp, SCARG(uap, flags), l);
2610 vput(vp);
2611 return (error);
2612 }
2613
2614 /*
2615 * Common routine to change flags of a file.
2616 */
2617 int
2618 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2619 {
2620 struct vattr vattr;
2621 int error;
2622
2623 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2624 /*
2625 * Non-superusers cannot change the flags on devices, even if they
2626 * own them.
2627 */
2628 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2629 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2630 goto out;
2631 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2632 error = EINVAL;
2633 goto out;
2634 }
2635 }
2636 VATTR_NULL(&vattr);
2637 vattr.va_flags = flags;
2638 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2639 out:
2640 return (error);
2641 }
2642
2643 /*
2644 * Change mode of a file given path name; this version follows links.
2645 */
2646 /* ARGSUSED */
2647 int
2648 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
2649 {
2650 /* {
2651 syscallarg(const char *) path;
2652 syscallarg(int) mode;
2653 } */
2654 int error;
2655 struct nameidata nd;
2656
2657 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2658 SCARG(uap, path));
2659 if ((error = namei(&nd)) != 0)
2660 return (error);
2661
2662 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2663
2664 vrele(nd.ni_vp);
2665 return (error);
2666 }
2667
2668 /*
2669 * Change mode of a file given a file descriptor.
2670 */
2671 /* ARGSUSED */
2672 int
2673 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
2674 {
2675 /* {
2676 syscallarg(int) fd;
2677 syscallarg(int) mode;
2678 } */
2679 file_t *fp;
2680 int error;
2681
2682 /* fd_getvnode() will use the descriptor for us */
2683 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2684 return (error);
2685 error = change_mode(fp->f_data, SCARG(uap, mode), l);
2686 fd_putfile(SCARG(uap, fd));
2687 return (error);
2688 }
2689
2690 /*
2691 * Change mode of a file given path name; this version does not follow links.
2692 */
2693 /* ARGSUSED */
2694 int
2695 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
2696 {
2697 /* {
2698 syscallarg(const char *) path;
2699 syscallarg(int) mode;
2700 } */
2701 int error;
2702 struct nameidata nd;
2703
2704 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2705 SCARG(uap, path));
2706 if ((error = namei(&nd)) != 0)
2707 return (error);
2708
2709 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2710
2711 vrele(nd.ni_vp);
2712 return (error);
2713 }
2714
2715 /*
2716 * Common routine to set mode given a vnode.
2717 */
2718 static int
2719 change_mode(struct vnode *vp, int mode, struct lwp *l)
2720 {
2721 struct vattr vattr;
2722 int error;
2723
2724 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2725 VATTR_NULL(&vattr);
2726 vattr.va_mode = mode & ALLPERMS;
2727 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2728 VOP_UNLOCK(vp, 0);
2729 return (error);
2730 }
2731
2732 /*
2733 * Set ownership given a path name; this version follows links.
2734 */
2735 /* ARGSUSED */
2736 int
2737 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
2738 {
2739 /* {
2740 syscallarg(const char *) path;
2741 syscallarg(uid_t) uid;
2742 syscallarg(gid_t) gid;
2743 } */
2744 int error;
2745 struct nameidata nd;
2746
2747 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2748 SCARG(uap, path));
2749 if ((error = namei(&nd)) != 0)
2750 return (error);
2751
2752 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2753
2754 vrele(nd.ni_vp);
2755 return (error);
2756 }
2757
2758 /*
2759 * Set ownership given a path name; this version follows links.
2760 * Provides POSIX semantics.
2761 */
2762 /* ARGSUSED */
2763 int
2764 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
2765 {
2766 /* {
2767 syscallarg(const char *) path;
2768 syscallarg(uid_t) uid;
2769 syscallarg(gid_t) gid;
2770 } */
2771 int error;
2772 struct nameidata nd;
2773
2774 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2775 SCARG(uap, path));
2776 if ((error = namei(&nd)) != 0)
2777 return (error);
2778
2779 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2780
2781 vrele(nd.ni_vp);
2782 return (error);
2783 }
2784
2785 /*
2786 * Set ownership given a file descriptor.
2787 */
2788 /* ARGSUSED */
2789 int
2790 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
2791 {
2792 /* {
2793 syscallarg(int) fd;
2794 syscallarg(uid_t) uid;
2795 syscallarg(gid_t) gid;
2796 } */
2797 int error;
2798 file_t *fp;
2799
2800 /* fd_getvnode() will use the descriptor for us */
2801 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2802 return (error);
2803 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2804 l, 0);
2805 fd_putfile(SCARG(uap, fd));
2806 return (error);
2807 }
2808
2809 /*
2810 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2811 */
2812 /* ARGSUSED */
2813 int
2814 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
2815 {
2816 /* {
2817 syscallarg(int) fd;
2818 syscallarg(uid_t) uid;
2819 syscallarg(gid_t) gid;
2820 } */
2821 int error;
2822 file_t *fp;
2823
2824 /* fd_getvnode() will use the descriptor for us */
2825 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2826 return (error);
2827 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2828 l, 1);
2829 fd_putfile(SCARG(uap, fd));
2830 return (error);
2831 }
2832
2833 /*
2834 * Set ownership given a path name; this version does not follow links.
2835 */
2836 /* ARGSUSED */
2837 int
2838 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
2839 {
2840 /* {
2841 syscallarg(const char *) path;
2842 syscallarg(uid_t) uid;
2843 syscallarg(gid_t) gid;
2844 } */
2845 int error;
2846 struct nameidata nd;
2847
2848 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2849 SCARG(uap, path));
2850 if ((error = namei(&nd)) != 0)
2851 return (error);
2852
2853 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2854
2855 vrele(nd.ni_vp);
2856 return (error);
2857 }
2858
2859 /*
2860 * Set ownership given a path name; this version does not follow links.
2861 * Provides POSIX/XPG semantics.
2862 */
2863 /* ARGSUSED */
2864 int
2865 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
2866 {
2867 /* {
2868 syscallarg(const char *) path;
2869 syscallarg(uid_t) uid;
2870 syscallarg(gid_t) gid;
2871 } */
2872 int error;
2873 struct nameidata nd;
2874
2875 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2876 SCARG(uap, path));
2877 if ((error = namei(&nd)) != 0)
2878 return (error);
2879
2880 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2881
2882 vrele(nd.ni_vp);
2883 return (error);
2884 }
2885
2886 /*
2887 * Common routine to set ownership given a vnode.
2888 */
2889 static int
2890 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2891 int posix_semantics)
2892 {
2893 struct vattr vattr;
2894 mode_t newmode;
2895 int error;
2896
2897 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2898 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2899 goto out;
2900
2901 #define CHANGED(x) ((int)(x) != -1)
2902 newmode = vattr.va_mode;
2903 if (posix_semantics) {
2904 /*
2905 * POSIX/XPG semantics: if the caller is not the super-user,
2906 * clear set-user-id and set-group-id bits. Both POSIX and
2907 * the XPG consider the behaviour for calls by the super-user
2908 * implementation-defined; we leave the set-user-id and set-
2909 * group-id settings intact in that case.
2910 */
2911 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2912 NULL) != 0)
2913 newmode &= ~(S_ISUID | S_ISGID);
2914 } else {
2915 /*
2916 * NetBSD semantics: when changing owner and/or group,
2917 * clear the respective bit(s).
2918 */
2919 if (CHANGED(uid))
2920 newmode &= ~S_ISUID;
2921 if (CHANGED(gid))
2922 newmode &= ~S_ISGID;
2923 }
2924 /* Update va_mode iff altered. */
2925 if (vattr.va_mode == newmode)
2926 newmode = VNOVAL;
2927
2928 VATTR_NULL(&vattr);
2929 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2930 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2931 vattr.va_mode = newmode;
2932 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2933 #undef CHANGED
2934
2935 out:
2936 VOP_UNLOCK(vp, 0);
2937 return (error);
2938 }
2939
2940 /*
2941 * Set the access and modification times given a path name; this
2942 * version follows links.
2943 */
2944 /* ARGSUSED */
2945 int
2946 sys_utimes(struct lwp *l, const struct sys_utimes_args *uap, register_t *retval)
2947 {
2948 /* {
2949 syscallarg(const char *) path;
2950 syscallarg(const struct timeval *) tptr;
2951 } */
2952
2953 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
2954 SCARG(uap, tptr), UIO_USERSPACE);
2955 }
2956
2957 /*
2958 * Set the access and modification times given a file descriptor.
2959 */
2960 /* ARGSUSED */
2961 int
2962 sys_futimes(struct lwp *l, const struct sys_futimes_args *uap, register_t *retval)
2963 {
2964 /* {
2965 syscallarg(int) fd;
2966 syscallarg(const struct timeval *) tptr;
2967 } */
2968 int error;
2969 file_t *fp;
2970
2971 /* fd_getvnode() will use the descriptor for us */
2972 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2973 return (error);
2974 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr),
2975 UIO_USERSPACE);
2976 fd_putfile(SCARG(uap, fd));
2977 return (error);
2978 }
2979
2980 /*
2981 * Set the access and modification times given a path name; this
2982 * version does not follow links.
2983 */
2984 int
2985 sys_lutimes(struct lwp *l, const struct sys_lutimes_args *uap, register_t *retval)
2986 {
2987 /* {
2988 syscallarg(const char *) path;
2989 syscallarg(const struct timeval *) tptr;
2990 } */
2991
2992 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
2993 SCARG(uap, tptr), UIO_USERSPACE);
2994 }
2995
2996 /*
2997 * Common routine to set access and modification times given a vnode.
2998 */
2999 int
3000 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
3001 const struct timeval *tptr, enum uio_seg seg)
3002 {
3003 struct vattr vattr;
3004 struct nameidata nd;
3005 int error;
3006
3007 VATTR_NULL(&vattr);
3008 if (tptr == NULL) {
3009 nanotime(&vattr.va_atime);
3010 vattr.va_mtime = vattr.va_atime;
3011 vattr.va_vaflags |= VA_UTIMES_NULL;
3012 } else {
3013 struct timeval tv[2];
3014
3015 if (seg != UIO_SYSSPACE) {
3016 error = copyin(tptr, &tv, sizeof (tv));
3017 if (error != 0)
3018 return error;
3019 tptr = tv;
3020 }
3021 TIMEVAL_TO_TIMESPEC(tptr, &vattr.va_atime);
3022 TIMEVAL_TO_TIMESPEC(tptr + 1, &vattr.va_mtime);
3023 }
3024
3025 if (vp == NULL) {
3026 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path);
3027 if ((error = namei(&nd)) != 0)
3028 return (error);
3029 vp = nd.ni_vp;
3030 } else
3031 nd.ni_vp = NULL;
3032
3033 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3034 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3035 VOP_UNLOCK(vp, 0);
3036
3037 if (nd.ni_vp != NULL)
3038 vrele(nd.ni_vp);
3039
3040 return (error);
3041 }
3042
3043 /*
3044 * Truncate a file given its path name.
3045 */
3046 /* ARGSUSED */
3047 int
3048 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3049 {
3050 /* {
3051 syscallarg(const char *) path;
3052 syscallarg(int) pad;
3053 syscallarg(off_t) length;
3054 } */
3055 struct vnode *vp;
3056 struct vattr vattr;
3057 int error;
3058 struct nameidata nd;
3059
3060 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3061 SCARG(uap, path));
3062 if ((error = namei(&nd)) != 0)
3063 return (error);
3064 vp = nd.ni_vp;
3065 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3066 if (vp->v_type == VDIR)
3067 error = EISDIR;
3068 else if ((error = vn_writechk(vp)) == 0 &&
3069 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3070 VATTR_NULL(&vattr);
3071 vattr.va_size = SCARG(uap, length);
3072 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3073 }
3074 vput(vp);
3075 return (error);
3076 }
3077
3078 /*
3079 * Truncate a file given a file descriptor.
3080 */
3081 /* ARGSUSED */
3082 int
3083 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3084 {
3085 /* {
3086 syscallarg(int) fd;
3087 syscallarg(int) pad;
3088 syscallarg(off_t) length;
3089 } */
3090 struct vattr vattr;
3091 struct vnode *vp;
3092 file_t *fp;
3093 int error;
3094
3095 /* fd_getvnode() will use the descriptor for us */
3096 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3097 return (error);
3098 if ((fp->f_flag & FWRITE) == 0) {
3099 error = EINVAL;
3100 goto out;
3101 }
3102 vp = fp->f_data;
3103 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3104 if (vp->v_type == VDIR)
3105 error = EISDIR;
3106 else if ((error = vn_writechk(vp)) == 0) {
3107 VATTR_NULL(&vattr);
3108 vattr.va_size = SCARG(uap, length);
3109 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3110 }
3111 VOP_UNLOCK(vp, 0);
3112 out:
3113 fd_putfile(SCARG(uap, fd));
3114 return (error);
3115 }
3116
3117 /*
3118 * Sync an open file.
3119 */
3120 /* ARGSUSED */
3121 int
3122 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3123 {
3124 /* {
3125 syscallarg(int) fd;
3126 } */
3127 struct vnode *vp;
3128 file_t *fp;
3129 int error;
3130
3131 /* fd_getvnode() will use the descriptor for us */
3132 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3133 return (error);
3134 vp = fp->f_data;
3135 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3136 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
3137 if (error == 0 && bioopsp != NULL &&
3138 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3139 (*bioopsp->io_fsync)(vp, 0);
3140 VOP_UNLOCK(vp, 0);
3141 fd_putfile(SCARG(uap, fd));
3142 return (error);
3143 }
3144
3145 /*
3146 * Sync a range of file data. API modeled after that found in AIX.
3147 *
3148 * FDATASYNC indicates that we need only save enough metadata to be able
3149 * to re-read the written data. Note we duplicate AIX's requirement that
3150 * the file be open for writing.
3151 */
3152 /* ARGSUSED */
3153 int
3154 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
3155 {
3156 /* {
3157 syscallarg(int) fd;
3158 syscallarg(int) flags;
3159 syscallarg(off_t) start;
3160 syscallarg(off_t) length;
3161 } */
3162 struct vnode *vp;
3163 file_t *fp;
3164 int flags, nflags;
3165 off_t s, e, len;
3166 int error;
3167
3168 /* fd_getvnode() will use the descriptor for us */
3169 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3170 return (error);
3171
3172 if ((fp->f_flag & FWRITE) == 0) {
3173 error = EBADF;
3174 goto out;
3175 }
3176
3177 flags = SCARG(uap, flags);
3178 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3179 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3180 error = EINVAL;
3181 goto out;
3182 }
3183 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3184 if (flags & FDATASYNC)
3185 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3186 else
3187 nflags = FSYNC_WAIT;
3188 if (flags & FDISKSYNC)
3189 nflags |= FSYNC_CACHE;
3190
3191 len = SCARG(uap, length);
3192 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3193 if (len) {
3194 s = SCARG(uap, start);
3195 e = s + len;
3196 if (e < s) {
3197 error = EINVAL;
3198 goto out;
3199 }
3200 } else {
3201 e = 0;
3202 s = 0;
3203 }
3204
3205 vp = fp->f_data;
3206 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3207 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
3208
3209 if (error == 0 && bioopsp != NULL &&
3210 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3211 (*bioopsp->io_fsync)(vp, nflags);
3212
3213 VOP_UNLOCK(vp, 0);
3214 out:
3215 fd_putfile(SCARG(uap, fd));
3216 return (error);
3217 }
3218
3219 /*
3220 * Sync the data of an open file.
3221 */
3222 /* ARGSUSED */
3223 int
3224 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
3225 {
3226 /* {
3227 syscallarg(int) fd;
3228 } */
3229 struct vnode *vp;
3230 file_t *fp;
3231 int error;
3232
3233 /* fd_getvnode() will use the descriptor for us */
3234 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3235 return (error);
3236 if ((fp->f_flag & FWRITE) == 0) {
3237 fd_putfile(SCARG(uap, fd));
3238 return (EBADF);
3239 }
3240 vp = fp->f_data;
3241 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3242 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
3243 VOP_UNLOCK(vp, 0);
3244 fd_putfile(SCARG(uap, fd));
3245 return (error);
3246 }
3247
3248 /*
3249 * Rename files, (standard) BSD semantics frontend.
3250 */
3251 /* ARGSUSED */
3252 int
3253 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
3254 {
3255 /* {
3256 syscallarg(const char *) from;
3257 syscallarg(const char *) to;
3258 } */
3259
3260 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0));
3261 }
3262
3263 /*
3264 * Rename files, POSIX semantics frontend.
3265 */
3266 /* ARGSUSED */
3267 int
3268 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
3269 {
3270 /* {
3271 syscallarg(const char *) from;
3272 syscallarg(const char *) to;
3273 } */
3274
3275 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1));
3276 }
3277
3278 /*
3279 * Rename files. Source and destination must either both be directories,
3280 * or both not be directories. If target is a directory, it must be empty.
3281 * If `from' and `to' refer to the same object, the value of the `retain'
3282 * argument is used to determine whether `from' will be
3283 *
3284 * (retain == 0) deleted unless `from' and `to' refer to the same
3285 * object in the file system's name space (BSD).
3286 * (retain == 1) always retained (POSIX).
3287 */
3288 int
3289 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
3290 {
3291 struct vnode *tvp, *fvp, *tdvp;
3292 struct nameidata fromnd, tond;
3293 struct mount *fs;
3294 struct lwp *l = curlwp;
3295 struct proc *p;
3296 uint32_t saveflag;
3297 int error;
3298
3299 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT,
3300 seg, from);
3301 if ((error = namei(&fromnd)) != 0)
3302 return (error);
3303 if (fromnd.ni_dvp != fromnd.ni_vp)
3304 VOP_UNLOCK(fromnd.ni_dvp, 0);
3305 fvp = fromnd.ni_vp;
3306
3307 fs = fvp->v_mount;
3308 error = VFS_RENAMELOCK_ENTER(fs);
3309 if (error) {
3310 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3311 vrele(fromnd.ni_dvp);
3312 vrele(fvp);
3313 goto out1;
3314 }
3315
3316 /*
3317 * close, partially, yet another race - ideally we should only
3318 * go as far as getting fromnd.ni_dvp before getting the per-fs
3319 * lock, and then continue to get fromnd.ni_vp, but we can't do
3320 * that with namei as it stands.
3321 *
3322 * This still won't prevent rmdir from nuking fromnd.ni_vp
3323 * under us. The real fix is to get the locks in the right
3324 * order and do the lookups in the right places, but that's a
3325 * major rototill.
3326 *
3327 * Preserve the SAVESTART in cn_flags, because who knows what
3328 * might happen if we don't.
3329 *
3330 * Note: this logic (as well as this whole function) is cloned
3331 * in nfs_serv.c. Proceed accordingly.
3332 */
3333 vrele(fvp);
3334 if ((fromnd.ni_cnd.cn_namelen == 1 &&
3335 fromnd.ni_cnd.cn_nameptr[0] == '.') ||
3336 (fromnd.ni_cnd.cn_namelen == 2 &&
3337 fromnd.ni_cnd.cn_nameptr[0] == '.' &&
3338 fromnd.ni_cnd.cn_nameptr[1] == '.')) {
3339 error = EINVAL;
3340 VFS_RENAMELOCK_EXIT(fs);
3341 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3342 vrele(fromnd.ni_dvp);
3343 goto out1;
3344 }
3345 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART;
3346 fromnd.ni_cnd.cn_flags &= ~SAVESTART;
3347 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY);
3348 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd);
3349 fromnd.ni_cnd.cn_flags |= saveflag;
3350 if (error) {
3351 VOP_UNLOCK(fromnd.ni_dvp, 0);
3352 VFS_RENAMELOCK_EXIT(fs);
3353 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3354 vrele(fromnd.ni_dvp);
3355 goto out1;
3356 }
3357 VOP_UNLOCK(fromnd.ni_vp, 0);
3358 if (fromnd.ni_dvp != fromnd.ni_vp)
3359 VOP_UNLOCK(fromnd.ni_dvp, 0);
3360 fvp = fromnd.ni_vp;
3361
3362 NDINIT(&tond, RENAME,
3363 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT
3364 | (fvp->v_type == VDIR ? CREATEDIR : 0),
3365 seg, to);
3366 if ((error = namei(&tond)) != 0) {
3367 VFS_RENAMELOCK_EXIT(fs);
3368 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3369 vrele(fromnd.ni_dvp);
3370 vrele(fvp);
3371 goto out1;
3372 }
3373 tdvp = tond.ni_dvp;
3374 tvp = tond.ni_vp;
3375
3376 if (tvp != NULL) {
3377 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3378 error = ENOTDIR;
3379 goto out;
3380 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3381 error = EISDIR;
3382 goto out;
3383 }
3384 }
3385
3386 if (fvp == tdvp)
3387 error = EINVAL;
3388
3389 /*
3390 * Source and destination refer to the same object.
3391 */
3392 if (fvp == tvp) {
3393 if (retain)
3394 error = -1;
3395 else if (fromnd.ni_dvp == tdvp &&
3396 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3397 !memcmp(fromnd.ni_cnd.cn_nameptr,
3398 tond.ni_cnd.cn_nameptr,
3399 fromnd.ni_cnd.cn_namelen))
3400 error = -1;
3401 }
3402
3403 #if NVERIEXEC > 0
3404 if (!error) {
3405 char *f1, *f2;
3406
3407 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3408 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen);
3409
3410 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3411 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen);
3412
3413 error = veriexec_renamechk(l, fvp, f1, tvp, f2);
3414
3415 free(f1, M_TEMP);
3416 free(f2, M_TEMP);
3417 }
3418 #endif /* NVERIEXEC > 0 */
3419
3420 out:
3421 p = l->l_proc;
3422 if (!error) {
3423 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3424 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3425 VFS_RENAMELOCK_EXIT(fs);
3426 } else {
3427 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3428 if (tdvp == tvp)
3429 vrele(tdvp);
3430 else
3431 vput(tdvp);
3432 if (tvp)
3433 vput(tvp);
3434 VFS_RENAMELOCK_EXIT(fs);
3435 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3436 vrele(fromnd.ni_dvp);
3437 vrele(fvp);
3438 }
3439 vrele(tond.ni_startdir);
3440 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3441 out1:
3442 if (fromnd.ni_startdir)
3443 vrele(fromnd.ni_startdir);
3444 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3445 return (error == -1 ? 0 : error);
3446 }
3447
3448 /*
3449 * Make a directory file.
3450 */
3451 /* ARGSUSED */
3452 int
3453 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
3454 {
3455 /* {
3456 syscallarg(const char *) path;
3457 syscallarg(int) mode;
3458 } */
3459 struct proc *p = l->l_proc;
3460 struct vnode *vp;
3461 struct vattr vattr;
3462 int error;
3463 struct nameidata nd;
3464
3465 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE,
3466 SCARG(uap, path));
3467 if ((error = namei(&nd)) != 0)
3468 return (error);
3469 vp = nd.ni_vp;
3470 if (vp != NULL) {
3471 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3472 if (nd.ni_dvp == vp)
3473 vrele(nd.ni_dvp);
3474 else
3475 vput(nd.ni_dvp);
3476 vrele(vp);
3477 return (EEXIST);
3478 }
3479 VATTR_NULL(&vattr);
3480 vattr.va_type = VDIR;
3481 /* We will read cwdi->cwdi_cmask unlocked. */
3482 vattr.va_mode =
3483 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3484 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3485 if (!error)
3486 vput(nd.ni_vp);
3487 return (error);
3488 }
3489
3490 /*
3491 * Remove a directory file.
3492 */
3493 /* ARGSUSED */
3494 int
3495 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
3496 {
3497 /* {
3498 syscallarg(const char *) path;
3499 } */
3500 struct vnode *vp;
3501 int error;
3502 struct nameidata nd;
3503
3504 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
3505 SCARG(uap, path));
3506 if ((error = namei(&nd)) != 0)
3507 return (error);
3508 vp = nd.ni_vp;
3509 if (vp->v_type != VDIR) {
3510 error = ENOTDIR;
3511 goto out;
3512 }
3513 /*
3514 * No rmdir "." please.
3515 */
3516 if (nd.ni_dvp == vp) {
3517 error = EINVAL;
3518 goto out;
3519 }
3520 /*
3521 * The root of a mounted filesystem cannot be deleted.
3522 */
3523 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) {
3524 error = EBUSY;
3525 goto out;
3526 }
3527 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3528 return (error);
3529
3530 out:
3531 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3532 if (nd.ni_dvp == vp)
3533 vrele(nd.ni_dvp);
3534 else
3535 vput(nd.ni_dvp);
3536 vput(vp);
3537 return (error);
3538 }
3539
3540 /*
3541 * Read a block of directory entries in a file system independent format.
3542 */
3543 int
3544 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
3545 {
3546 /* {
3547 syscallarg(int) fd;
3548 syscallarg(char *) buf;
3549 syscallarg(size_t) count;
3550 } */
3551 file_t *fp;
3552 int error, done;
3553
3554 /* fd_getvnode() will use the descriptor for us */
3555 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3556 return (error);
3557 if ((fp->f_flag & FREAD) == 0) {
3558 error = EBADF;
3559 goto out;
3560 }
3561 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3562 SCARG(uap, count), &done, l, 0, 0);
3563 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
3564 *retval = done;
3565 out:
3566 fd_putfile(SCARG(uap, fd));
3567 return (error);
3568 }
3569
3570 /*
3571 * Set the mode mask for creation of filesystem nodes.
3572 */
3573 int
3574 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
3575 {
3576 /* {
3577 syscallarg(mode_t) newmask;
3578 } */
3579 struct proc *p = l->l_proc;
3580 struct cwdinfo *cwdi;
3581
3582 /*
3583 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
3584 * important is that we serialize changes to the mask. The
3585 * rw_exit() will issue a write memory barrier on our behalf,
3586 * and force the changes out to other CPUs (as it must use an
3587 * atomic operation, draining the local CPU's store buffers).
3588 */
3589 cwdi = p->p_cwdi;
3590 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
3591 *retval = cwdi->cwdi_cmask;
3592 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3593 rw_exit(&cwdi->cwdi_lock);
3594
3595 return (0);
3596 }
3597
3598 int
3599 dorevoke(struct vnode *vp, kauth_cred_t cred)
3600 {
3601 struct vattr vattr;
3602 int error;
3603
3604 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0)
3605 return error;
3606 if (kauth_cred_geteuid(cred) != vattr.va_uid &&
3607 (error = kauth_authorize_generic(cred,
3608 KAUTH_GENERIC_ISSUSER, NULL)) == 0)
3609 VOP_REVOKE(vp, REVOKEALL);
3610 return (error);
3611 }
3612
3613 /*
3614 * Void all references to file by ripping underlying filesystem
3615 * away from vnode.
3616 */
3617 /* ARGSUSED */
3618 int
3619 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
3620 {
3621 /* {
3622 syscallarg(const char *) path;
3623 } */
3624 struct vnode *vp;
3625 int error;
3626 struct nameidata nd;
3627
3628 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3629 SCARG(uap, path));
3630 if ((error = namei(&nd)) != 0)
3631 return (error);
3632 vp = nd.ni_vp;
3633 error = dorevoke(vp, l->l_cred);
3634 vrele(vp);
3635 return (error);
3636 }
3637
3638 /*
3639 * Convert a user file descriptor to a kernel file entry.
3640 */
3641 int
3642 getvnode(int fd, file_t **fpp)
3643 {
3644 struct vnode *vp;
3645 file_t *fp;
3646
3647 if ((fp = fd_getfile(fd)) == NULL)
3648 return (EBADF);
3649
3650 if (fp->f_type != DTYPE_VNODE) {
3651 fd_putfile(fd);
3652 return (EINVAL);
3653 }
3654
3655 vp = fp->f_data;
3656 if (vp->v_type == VBAD) {
3657 fd_putfile(fd);
3658 return (EBADF);
3659 }
3660
3661 *fpp = fp;
3662 return (0);
3663 }
3664