vfs_syscalls.c revision 1.359.2.6 1 /* $NetBSD: vfs_syscalls.c,v 1.359.2.6 2008/10/10 22:34:14 skrll Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 1989, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
63 */
64
65 #include <sys/cdefs.h>
66 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.359.2.6 2008/10/10 22:34:14 skrll Exp $");
67
68 #include "opt_compat_netbsd.h"
69 #include "opt_compat_43.h"
70 #include "opt_fileassoc.h"
71 #include "veriexec.h"
72
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/namei.h>
76 #include <sys/filedesc.h>
77 #include <sys/kernel.h>
78 #include <sys/file.h>
79 #include <sys/stat.h>
80 #include <sys/vnode.h>
81 #include <sys/mount.h>
82 #include <sys/proc.h>
83 #include <sys/uio.h>
84 #include <sys/malloc.h>
85 #include <sys/kmem.h>
86 #include <sys/dirent.h>
87 #include <sys/sysctl.h>
88 #include <sys/syscallargs.h>
89 #include <sys/vfs_syscalls.h>
90 #include <sys/ktrace.h>
91 #ifdef FILEASSOC
92 #include <sys/fileassoc.h>
93 #endif /* FILEASSOC */
94 #include <sys/verified_exec.h>
95 #include <sys/kauth.h>
96 #include <sys/atomic.h>
97 #include <sys/module.h>
98
99 #include <miscfs/genfs/genfs.h>
100 #include <miscfs/syncfs/syncfs.h>
101 #include <miscfs/specfs/specdev.h>
102
103 #ifdef COMPAT_30
104 #include "opt_nfsserver.h"
105 #include <nfs/rpcv2.h>
106 #endif
107 #include <nfs/nfsproto.h>
108 #ifdef COMPAT_30
109 #include <nfs/nfs.h>
110 #include <nfs/nfs_var.h>
111 #endif
112
113 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
114
115 static int change_dir(struct nameidata *, struct lwp *);
116 static int change_flags(struct vnode *, u_long, struct lwp *);
117 static int change_mode(struct vnode *, int, struct lwp *l);
118 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
119
120 void checkdirs(struct vnode *);
121
122 int dovfsusermount = 0;
123
124 /*
125 * Virtual File System System Calls
126 */
127
128 /*
129 * Mount a file system.
130 */
131
132 #if defined(COMPAT_09) || defined(COMPAT_43)
133 /*
134 * This table is used to maintain compatibility with 4.3BSD
135 * and NetBSD 0.9 mount syscalls. Note, the order is important!
136 *
137 * Do not modify this table. It should only contain filesystems
138 * supported by NetBSD 0.9 and 4.3BSD.
139 */
140 const char * const mountcompatnames[] = {
141 NULL, /* 0 = MOUNT_NONE */
142 MOUNT_FFS, /* 1 = MOUNT_UFS */
143 MOUNT_NFS, /* 2 */
144 MOUNT_MFS, /* 3 */
145 MOUNT_MSDOS, /* 4 */
146 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
147 MOUNT_FDESC, /* 6 */
148 MOUNT_KERNFS, /* 7 */
149 NULL, /* 8 = MOUNT_DEVFS */
150 MOUNT_AFS, /* 9 */
151 };
152 const int nmountcompatnames = sizeof(mountcompatnames) /
153 sizeof(mountcompatnames[0]);
154 #endif /* COMPAT_09 || COMPAT_43 */
155
156 static int
157 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
158 void *data, size_t *data_len)
159 {
160 struct mount *mp;
161 int error = 0, saved_flags;
162
163 mp = vp->v_mount;
164 saved_flags = mp->mnt_flag;
165
166 /* We can operate only on VV_ROOT nodes. */
167 if ((vp->v_vflag & VV_ROOT) == 0) {
168 error = EINVAL;
169 goto out;
170 }
171
172 /*
173 * We only allow the filesystem to be reloaded if it
174 * is currently mounted read-only. Additionally, we
175 * prevent read-write to read-only downgrades.
176 */
177 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
178 (mp->mnt_flag & MNT_RDONLY) == 0) {
179 error = EOPNOTSUPP; /* Needs translation */
180 goto out;
181 }
182
183 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
184 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
185 if (error)
186 goto out;
187
188 if (vfs_busy(mp, NULL)) {
189 error = EPERM;
190 goto out;
191 }
192
193 mutex_enter(&mp->mnt_updating);
194
195 mp->mnt_flag &= ~MNT_OP_FLAGS;
196 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
197
198 /*
199 * Set the mount level flags.
200 */
201 if (flags & MNT_RDONLY)
202 mp->mnt_flag |= MNT_RDONLY;
203 else if (mp->mnt_flag & MNT_RDONLY)
204 mp->mnt_iflag |= IMNT_WANTRDWR;
205 mp->mnt_flag &=
206 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
207 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
208 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
209 MNT_LOG);
210 mp->mnt_flag |= flags &
211 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
212 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
213 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
214 MNT_LOG | MNT_IGNORE);
215
216 error = VFS_MOUNT(mp, path, data, data_len);
217
218 #if defined(COMPAT_30) && defined(NFSSERVER)
219 if (error && data != NULL) {
220 int error2;
221
222 /* Update failed; let's try and see if it was an
223 * export request. */
224 error2 = nfs_update_exports_30(mp, path, data, l);
225
226 /* Only update error code if the export request was
227 * understood but some problem occurred while
228 * processing it. */
229 if (error2 != EJUSTRETURN)
230 error = error2;
231 }
232 #endif
233 if (mp->mnt_iflag & IMNT_WANTRDWR)
234 mp->mnt_flag &= ~MNT_RDONLY;
235 if (error)
236 mp->mnt_flag = saved_flags;
237 mp->mnt_flag &= ~MNT_OP_FLAGS;
238 mp->mnt_iflag &= ~IMNT_WANTRDWR;
239 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
240 if (mp->mnt_syncer == NULL)
241 error = vfs_allocate_syncvnode(mp);
242 } else {
243 if (mp->mnt_syncer != NULL)
244 vfs_deallocate_syncvnode(mp);
245 }
246 mutex_exit(&mp->mnt_updating);
247 vfs_unbusy(mp, false, NULL);
248
249 out:
250 return (error);
251 }
252
253 static int
254 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
255 {
256 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
257 int error;
258
259 /* Copy file-system type from userspace. */
260 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
261 if (error) {
262 #if defined(COMPAT_09) || defined(COMPAT_43)
263 /*
264 * Historically, filesystem types were identified by numbers.
265 * If we get an integer for the filesystem type instead of a
266 * string, we check to see if it matches one of the historic
267 * filesystem types.
268 */
269 u_long fsindex = (u_long)fstype;
270 if (fsindex >= nmountcompatnames ||
271 mountcompatnames[fsindex] == NULL)
272 return ENODEV;
273 strlcpy(fstypename, mountcompatnames[fsindex],
274 sizeof(fstypename));
275 #else
276 return error;
277 #endif
278 }
279
280 #ifdef COMPAT_10
281 /* Accept `ufs' as an alias for `ffs'. */
282 if (strcmp(fstypename, "ufs") == 0)
283 fstypename[0] = 'f';
284 #endif
285
286 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
287 return 0;
288
289 /* If we can autoload a vfs module, try again */
290 (void)module_load(fstype, 0, NULL, MODULE_CLASS_VFS, true);
291
292 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
293 return 0;
294
295 return ENODEV;
296 }
297
298 static int
299 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops,
300 const char *path, int flags, void *data, size_t *data_len, u_int recurse)
301 {
302 struct mount *mp;
303 struct vnode *vp = *vpp;
304 struct vattr va;
305 int error;
306
307 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
308 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
309 if (error)
310 return error;
311
312 /* Can't make a non-dir a mount-point (from here anyway). */
313 if (vp->v_type != VDIR)
314 return ENOTDIR;
315
316 /*
317 * If the user is not root, ensure that they own the directory
318 * onto which we are attempting to mount.
319 */
320 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 ||
321 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
322 (error = kauth_authorize_generic(l->l_cred,
323 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) {
324 return error;
325 }
326
327 if (flags & MNT_EXPORTED)
328 return EINVAL;
329
330 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0)
331 return error;
332
333 /*
334 * Check if a file-system is not already mounted on this vnode.
335 */
336 if (vp->v_mountedhere != NULL)
337 return EBUSY;
338
339 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
340 if (mp == NULL)
341 return ENOMEM;
342
343 mp->mnt_op = vfsops;
344 mp->mnt_refcnt = 1;
345
346 TAILQ_INIT(&mp->mnt_vnodelist);
347 rw_init(&mp->mnt_unmounting);
348 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
349 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
350 error = vfs_busy(mp, NULL);
351 KASSERT(error == 0);
352 mutex_enter(&mp->mnt_updating);
353
354 mp->mnt_vnodecovered = vp;
355 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
356 mount_initspecific(mp);
357
358 /*
359 * The underlying file system may refuse the mount for
360 * various reasons. Allow the user to force it to happen.
361 *
362 * Set the mount level flags.
363 */
364 mp->mnt_flag = flags &
365 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
366 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
367 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
368 MNT_LOG | MNT_IGNORE | MNT_RDONLY);
369
370 error = VFS_MOUNT(mp, path, data, data_len);
371 mp->mnt_flag &= ~MNT_OP_FLAGS;
372
373 /*
374 * Put the new filesystem on the mount list after root.
375 */
376 cache_purge(vp);
377 if (error != 0) {
378 vp->v_mountedhere = NULL;
379 mutex_exit(&mp->mnt_updating);
380 vfs_unbusy(mp, false, NULL);
381 vfs_destroy(mp);
382 return error;
383 }
384
385 mp->mnt_iflag &= ~IMNT_WANTRDWR;
386 mutex_enter(&mountlist_lock);
387 vp->v_mountedhere = mp;
388 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
389 mutex_exit(&mountlist_lock);
390 vn_restorerecurse(vp, recurse);
391 VOP_UNLOCK(vp, 0);
392 checkdirs(vp);
393 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
394 error = vfs_allocate_syncvnode(mp);
395 /* Hold an additional reference to the mount across VFS_START(). */
396 mutex_exit(&mp->mnt_updating);
397 vfs_unbusy(mp, true, NULL);
398 (void) VFS_STATVFS(mp, &mp->mnt_stat);
399 error = VFS_START(mp, 0);
400 if (error)
401 vrele(vp);
402 /* Drop reference held for VFS_START(). */
403 vfs_destroy(mp);
404 *vpp = NULL;
405 return error;
406 }
407
408 static int
409 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
410 void *data, size_t *data_len)
411 {
412 struct mount *mp;
413 int error;
414
415 /* If MNT_GETARGS is specified, it should be the only flag. */
416 if (flags & ~MNT_GETARGS)
417 return EINVAL;
418
419 mp = vp->v_mount;
420
421 /* XXX: probably some notion of "can see" here if we want isolation. */
422 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
423 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
424 if (error)
425 return error;
426
427 if ((vp->v_vflag & VV_ROOT) == 0)
428 return EINVAL;
429
430 if (vfs_busy(mp, NULL))
431 return EPERM;
432
433 mutex_enter(&mp->mnt_updating);
434 mp->mnt_flag &= ~MNT_OP_FLAGS;
435 mp->mnt_flag |= MNT_GETARGS;
436 error = VFS_MOUNT(mp, path, data, data_len);
437 mp->mnt_flag &= ~MNT_OP_FLAGS;
438 mutex_exit(&mp->mnt_updating);
439
440 vfs_unbusy(mp, false, NULL);
441 return (error);
442 }
443
444 #ifdef COMPAT_40
445 /* ARGSUSED */
446 int
447 compat_40_sys_mount(struct lwp *l, const struct compat_40_sys_mount_args *uap, register_t *retval)
448 {
449 /* {
450 syscallarg(const char *) type;
451 syscallarg(const char *) path;
452 syscallarg(int) flags;
453 syscallarg(void *) data;
454 } */
455 register_t dummy;
456
457 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
458 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 0, &dummy);
459 }
460 #endif
461
462 int
463 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
464 {
465 /* {
466 syscallarg(const char *) type;
467 syscallarg(const char *) path;
468 syscallarg(int) flags;
469 syscallarg(void *) data;
470 syscallarg(size_t) data_len;
471 } */
472
473 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
474 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
475 SCARG(uap, data_len), retval);
476 }
477
478 int
479 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
480 const char *path, int flags, void *data, enum uio_seg data_seg,
481 size_t data_len, register_t *retval)
482 {
483 struct vnode *vp;
484 struct nameidata nd;
485 void *data_buf = data;
486 u_int recurse;
487 int error;
488
489 /*
490 * Get vnode to be covered
491 */
492 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
493 if ((error = namei(&nd)) != 0)
494 return (error);
495 vp = nd.ni_vp;
496
497 /*
498 * A lookup in VFS_MOUNT might result in an attempt to
499 * lock this vnode again, so make the lock recursive.
500 */
501 if (vfsops == NULL) {
502 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
503 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
504 recurse = vn_setrecurse(vp);
505 vfsops = vp->v_mount->mnt_op;
506 } else {
507 /* 'type' is userspace */
508 error = mount_get_vfsops(type, &vfsops);
509 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
510 recurse = vn_setrecurse(vp);
511 if (error != 0)
512 goto done;
513 }
514 } else {
515 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
516 recurse = vn_setrecurse(vp);
517 }
518
519 if (data != NULL && data_seg == UIO_USERSPACE) {
520 if (data_len == 0) {
521 /* No length supplied, use default for filesystem */
522 data_len = vfsops->vfs_min_mount_data;
523 if (data_len > VFS_MAX_MOUNT_DATA) {
524 /* maybe a force loaded old LKM */
525 error = EINVAL;
526 goto done;
527 }
528 #ifdef COMPAT_30
529 /* Hopefully a longer buffer won't make copyin() fail */
530 if (flags & MNT_UPDATE
531 && data_len < sizeof (struct mnt_export_args30))
532 data_len = sizeof (struct mnt_export_args30);
533 #endif
534 }
535 data_buf = malloc(data_len, M_TEMP, M_WAITOK);
536
537 /* NFS needs the buffer even for mnt_getargs .... */
538 error = copyin(data, data_buf, data_len);
539 if (error != 0)
540 goto done;
541 }
542
543 if (flags & MNT_GETARGS) {
544 if (data_len == 0) {
545 error = EINVAL;
546 goto done;
547 }
548 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
549 if (error != 0)
550 goto done;
551 if (data_seg == UIO_USERSPACE)
552 error = copyout(data_buf, data, data_len);
553 *retval = data_len;
554 } else if (flags & MNT_UPDATE) {
555 error = mount_update(l, vp, path, flags, data_buf, &data_len);
556 } else {
557 /* Locking is handled internally in mount_domount(). */
558 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
559 &data_len, recurse);
560 }
561
562 done:
563 if (vp != NULL) {
564 vn_restorerecurse(vp, recurse);
565 vput(vp);
566 }
567 if (data_buf != data)
568 free(data_buf, M_TEMP);
569 return (error);
570 }
571
572 /*
573 * Scan all active processes to see if any of them have a current
574 * or root directory onto which the new filesystem has just been
575 * mounted. If so, replace them with the new mount point.
576 */
577 void
578 checkdirs(struct vnode *olddp)
579 {
580 struct cwdinfo *cwdi;
581 struct vnode *newdp, *rele1, *rele2;
582 struct proc *p;
583 bool retry;
584
585 if (olddp->v_usecount == 1)
586 return;
587 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
588 panic("mount: lost mount");
589
590 do {
591 retry = false;
592 mutex_enter(proc_lock);
593 PROCLIST_FOREACH(p, &allproc) {
594 if ((p->p_flag & PK_MARKER) != 0)
595 continue;
596 if ((cwdi = p->p_cwdi) == NULL)
597 continue;
598 /*
599 * Can't change to the old directory any more,
600 * so even if we see a stale value it's not a
601 * problem.
602 */
603 if (cwdi->cwdi_cdir != olddp &&
604 cwdi->cwdi_rdir != olddp)
605 continue;
606 retry = true;
607 rele1 = NULL;
608 rele2 = NULL;
609 atomic_inc_uint(&cwdi->cwdi_refcnt);
610 mutex_exit(proc_lock);
611 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
612 if (cwdi->cwdi_cdir == olddp) {
613 rele1 = cwdi->cwdi_cdir;
614 VREF(newdp);
615 cwdi->cwdi_cdir = newdp;
616 }
617 if (cwdi->cwdi_rdir == olddp) {
618 rele2 = cwdi->cwdi_rdir;
619 VREF(newdp);
620 cwdi->cwdi_rdir = newdp;
621 }
622 rw_exit(&cwdi->cwdi_lock);
623 cwdfree(cwdi);
624 if (rele1 != NULL)
625 vrele(rele1);
626 if (rele2 != NULL)
627 vrele(rele2);
628 mutex_enter(proc_lock);
629 break;
630 }
631 mutex_exit(proc_lock);
632 } while (retry);
633
634 if (rootvnode == olddp) {
635 vrele(rootvnode);
636 VREF(newdp);
637 rootvnode = newdp;
638 }
639 vput(newdp);
640 }
641
642 /*
643 * Unmount a file system.
644 *
645 * Note: unmount takes a path to the vnode mounted on as argument,
646 * not special file (as before).
647 */
648 /* ARGSUSED */
649 int
650 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
651 {
652 /* {
653 syscallarg(const char *) path;
654 syscallarg(int) flags;
655 } */
656 struct vnode *vp;
657 struct mount *mp;
658 int error;
659 struct nameidata nd;
660
661 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
662 SCARG(uap, path));
663 if ((error = namei(&nd)) != 0)
664 return (error);
665 vp = nd.ni_vp;
666 mp = vp->v_mount;
667 atomic_inc_uint(&mp->mnt_refcnt);
668 VOP_UNLOCK(vp, 0);
669
670 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
671 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
672 if (error) {
673 vrele(vp);
674 vfs_destroy(mp);
675 return (error);
676 }
677
678 /*
679 * Don't allow unmounting the root file system.
680 */
681 if (mp->mnt_flag & MNT_ROOTFS) {
682 vrele(vp);
683 vfs_destroy(mp);
684 return (EINVAL);
685 }
686
687 /*
688 * Must be the root of the filesystem
689 */
690 if ((vp->v_vflag & VV_ROOT) == 0) {
691 vrele(vp);
692 vfs_destroy(mp);
693 return (EINVAL);
694 }
695
696 vrele(vp);
697 error = dounmount(mp, SCARG(uap, flags), l);
698 vfs_destroy(mp);
699 return error;
700 }
701
702 /*
703 * Do the actual file system unmount. File system is assumed to have
704 * been locked by the caller.
705 *
706 * => Caller hold reference to the mount, explicitly for dounmount().
707 */
708 int
709 dounmount(struct mount *mp, int flags, struct lwp *l)
710 {
711 struct vnode *coveredvp;
712 int error;
713 int async;
714 int used_syncer;
715
716 #if NVERIEXEC > 0
717 error = veriexec_unmountchk(mp);
718 if (error)
719 return (error);
720 #endif /* NVERIEXEC > 0 */
721
722 /*
723 * XXX Freeze syncer. Must do this before locking the
724 * mount point. See dounmount() for details.
725 */
726 mutex_enter(&syncer_mutex);
727 rw_enter(&mp->mnt_unmounting, RW_WRITER);
728 if ((mp->mnt_iflag & IMNT_GONE) != 0) {
729 rw_exit(&mp->mnt_unmounting);
730 mutex_exit(&syncer_mutex);
731 return ENOENT;
732 }
733
734 used_syncer = (mp->mnt_syncer != NULL);
735
736 /*
737 * XXX Syncer must be frozen when we get here. This should really
738 * be done on a per-mountpoint basis, but especially the softdep
739 * code possibly called from the syncer doesn't exactly work on a
740 * per-mountpoint basis, so the softdep code would become a maze
741 * of vfs_busy() calls.
742 *
743 * The caller of dounmount() must acquire syncer_mutex because
744 * the syncer itself acquires locks in syncer_mutex -> vfs_busy
745 * order, and we must preserve that order to avoid deadlock.
746 *
747 * So, if the file system did not use the syncer, now is
748 * the time to release the syncer_mutex.
749 */
750 if (used_syncer == 0)
751 mutex_exit(&syncer_mutex);
752
753 mp->mnt_iflag |= IMNT_UNMOUNT;
754 async = mp->mnt_flag & MNT_ASYNC;
755 mp->mnt_flag &= ~MNT_ASYNC;
756 cache_purgevfs(mp); /* remove cache entries for this file sys */
757 if (mp->mnt_syncer != NULL)
758 vfs_deallocate_syncvnode(mp);
759 error = 0;
760 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
761 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
762 }
763 vfs_scrubvnlist(mp);
764 if (error == 0 || (flags & MNT_FORCE))
765 error = VFS_UNMOUNT(mp, flags);
766 if (error) {
767 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
768 (void) vfs_allocate_syncvnode(mp);
769 mp->mnt_iflag &= ~IMNT_UNMOUNT;
770 mp->mnt_flag |= async;
771 rw_exit(&mp->mnt_unmounting);
772 if (used_syncer)
773 mutex_exit(&syncer_mutex);
774 return (error);
775 }
776 vfs_scrubvnlist(mp);
777 mutex_enter(&mountlist_lock);
778 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
779 coveredvp->v_mountedhere = NULL;
780 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
781 mp->mnt_iflag |= IMNT_GONE;
782 mutex_exit(&mountlist_lock);
783 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
784 panic("unmount: dangling vnode");
785 if (used_syncer)
786 mutex_exit(&syncer_mutex);
787 vfs_hooks_unmount(mp);
788 rw_exit(&mp->mnt_unmounting);
789 vfs_destroy(mp); /* reference from mount() */
790 if (coveredvp != NULLVP)
791 vrele(coveredvp);
792 return (0);
793 }
794
795 /*
796 * Sync each mounted filesystem.
797 */
798 #ifdef DEBUG
799 int syncprt = 0;
800 struct ctldebug debug0 = { "syncprt", &syncprt };
801 #endif
802
803 /* ARGSUSED */
804 int
805 sys_sync(struct lwp *l, const void *v, register_t *retval)
806 {
807 struct mount *mp, *nmp;
808 int asyncflag;
809
810 if (l == NULL)
811 l = &lwp0;
812
813 mutex_enter(&mountlist_lock);
814 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
815 mp = nmp) {
816 if (vfs_busy(mp, &nmp)) {
817 continue;
818 }
819 mutex_enter(&mp->mnt_updating);
820 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
821 asyncflag = mp->mnt_flag & MNT_ASYNC;
822 mp->mnt_flag &= ~MNT_ASYNC;
823 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
824 if (asyncflag)
825 mp->mnt_flag |= MNT_ASYNC;
826 }
827 mutex_exit(&mp->mnt_updating);
828 vfs_unbusy(mp, false, &nmp);
829 }
830 mutex_exit(&mountlist_lock);
831 #ifdef DEBUG
832 if (syncprt)
833 vfs_bufstats();
834 #endif /* DEBUG */
835 return (0);
836 }
837
838 /*
839 * Change filesystem quotas.
840 */
841 /* ARGSUSED */
842 int
843 sys_quotactl(struct lwp *l, const struct sys_quotactl_args *uap, register_t *retval)
844 {
845 /* {
846 syscallarg(const char *) path;
847 syscallarg(int) cmd;
848 syscallarg(int) uid;
849 syscallarg(void *) arg;
850 } */
851 struct mount *mp;
852 int error;
853 struct nameidata nd;
854
855 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
856 SCARG(uap, path));
857 if ((error = namei(&nd)) != 0)
858 return (error);
859 mp = nd.ni_vp->v_mount;
860 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
861 SCARG(uap, arg));
862 vrele(nd.ni_vp);
863 return (error);
864 }
865
866 int
867 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
868 int root)
869 {
870 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
871 int error = 0;
872
873 /*
874 * If MNT_NOWAIT or MNT_LAZY is specified, do not
875 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
876 * overrides MNT_NOWAIT.
877 */
878 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
879 (flags != MNT_WAIT && flags != 0)) {
880 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
881 goto done;
882 }
883
884 /* Get the filesystem stats now */
885 memset(sp, 0, sizeof(*sp));
886 if ((error = VFS_STATVFS(mp, sp)) != 0) {
887 return error;
888 }
889
890 if (cwdi->cwdi_rdir == NULL)
891 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
892 done:
893 if (cwdi->cwdi_rdir != NULL) {
894 size_t len;
895 char *bp;
896 char c;
897 char *path = PNBUF_GET();
898
899 bp = path + MAXPATHLEN;
900 *--bp = '\0';
901 rw_enter(&cwdi->cwdi_lock, RW_READER);
902 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
903 MAXPATHLEN / 2, 0, l);
904 rw_exit(&cwdi->cwdi_lock);
905 if (error) {
906 PNBUF_PUT(path);
907 return error;
908 }
909 len = strlen(bp);
910 /*
911 * for mount points that are below our root, we can see
912 * them, so we fix up the pathname and return them. The
913 * rest we cannot see, so we don't allow viewing the
914 * data.
915 */
916 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
917 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
918 (void)strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
919 sizeof(sp->f_mntonname));
920 if (sp->f_mntonname[0] == '\0')
921 (void)strlcpy(sp->f_mntonname, "/",
922 sizeof(sp->f_mntonname));
923 } else {
924 if (root)
925 (void)strlcpy(sp->f_mntonname, "/",
926 sizeof(sp->f_mntonname));
927 else
928 error = EPERM;
929 }
930 PNBUF_PUT(path);
931 }
932 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
933 return error;
934 }
935
936 /*
937 * Get filesystem statistics by path.
938 */
939 int
940 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
941 {
942 struct mount *mp;
943 int error;
944 struct nameidata nd;
945
946 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path);
947 if ((error = namei(&nd)) != 0)
948 return error;
949 mp = nd.ni_vp->v_mount;
950 error = dostatvfs(mp, sb, l, flags, 1);
951 vrele(nd.ni_vp);
952 return error;
953 }
954
955 /* ARGSUSED */
956 int
957 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
958 {
959 /* {
960 syscallarg(const char *) path;
961 syscallarg(struct statvfs *) buf;
962 syscallarg(int) flags;
963 } */
964 struct statvfs *sb;
965 int error;
966
967 sb = STATVFSBUF_GET();
968 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
969 if (error == 0)
970 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
971 STATVFSBUF_PUT(sb);
972 return error;
973 }
974
975 /*
976 * Get filesystem statistics by fd.
977 */
978 int
979 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
980 {
981 file_t *fp;
982 struct mount *mp;
983 int error;
984
985 /* fd_getvnode() will use the descriptor for us */
986 if ((error = fd_getvnode(fd, &fp)) != 0)
987 return (error);
988 mp = ((struct vnode *)fp->f_data)->v_mount;
989 error = dostatvfs(mp, sb, curlwp, flags, 1);
990 fd_putfile(fd);
991 return error;
992 }
993
994 /* ARGSUSED */
995 int
996 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
997 {
998 /* {
999 syscallarg(int) fd;
1000 syscallarg(struct statvfs *) buf;
1001 syscallarg(int) flags;
1002 } */
1003 struct statvfs *sb;
1004 int error;
1005
1006 sb = STATVFSBUF_GET();
1007 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1008 if (error == 0)
1009 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1010 STATVFSBUF_PUT(sb);
1011 return error;
1012 }
1013
1014
1015 /*
1016 * Get statistics on all filesystems.
1017 */
1018 int
1019 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1020 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1021 register_t *retval)
1022 {
1023 int root = 0;
1024 struct proc *p = l->l_proc;
1025 struct mount *mp, *nmp;
1026 struct statvfs *sb;
1027 size_t count, maxcount;
1028 int error = 0;
1029
1030 sb = STATVFSBUF_GET();
1031 maxcount = bufsize / entry_sz;
1032 mutex_enter(&mountlist_lock);
1033 count = 0;
1034 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1035 mp = nmp) {
1036 if (vfs_busy(mp, &nmp)) {
1037 continue;
1038 }
1039 if (sfsp && count < maxcount) {
1040 error = dostatvfs(mp, sb, l, flags, 0);
1041 if (error) {
1042 vfs_unbusy(mp, false, &nmp);
1043 error = 0;
1044 continue;
1045 }
1046 error = copyfn(sb, sfsp, entry_sz);
1047 if (error) {
1048 vfs_unbusy(mp, false, NULL);
1049 goto out;
1050 }
1051 sfsp = (char *)sfsp + entry_sz;
1052 root |= strcmp(sb->f_mntonname, "/") == 0;
1053 }
1054 count++;
1055 vfs_unbusy(mp, false, &nmp);
1056 }
1057 mutex_exit(&mountlist_lock);
1058
1059 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1060 /*
1061 * fake a root entry
1062 */
1063 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1064 sb, l, flags, 1);
1065 if (error != 0)
1066 goto out;
1067 if (sfsp) {
1068 error = copyfn(sb, sfsp, entry_sz);
1069 if (error != 0)
1070 goto out;
1071 }
1072 count++;
1073 }
1074 if (sfsp && count > maxcount)
1075 *retval = maxcount;
1076 else
1077 *retval = count;
1078 out:
1079 STATVFSBUF_PUT(sb);
1080 return error;
1081 }
1082
1083 int
1084 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1085 {
1086 /* {
1087 syscallarg(struct statvfs *) buf;
1088 syscallarg(size_t) bufsize;
1089 syscallarg(int) flags;
1090 } */
1091
1092 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1093 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1094 }
1095
1096 /*
1097 * Change current working directory to a given file descriptor.
1098 */
1099 /* ARGSUSED */
1100 int
1101 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1102 {
1103 /* {
1104 syscallarg(int) fd;
1105 } */
1106 struct proc *p = l->l_proc;
1107 struct cwdinfo *cwdi;
1108 struct vnode *vp, *tdp;
1109 struct mount *mp;
1110 file_t *fp;
1111 int error, fd;
1112
1113 /* fd_getvnode() will use the descriptor for us */
1114 fd = SCARG(uap, fd);
1115 if ((error = fd_getvnode(fd, &fp)) != 0)
1116 return (error);
1117 vp = fp->f_data;
1118
1119 VREF(vp);
1120 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1121 if (vp->v_type != VDIR)
1122 error = ENOTDIR;
1123 else
1124 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1125 if (error) {
1126 vput(vp);
1127 goto out;
1128 }
1129 while ((mp = vp->v_mountedhere) != NULL) {
1130 error = vfs_busy(mp, NULL);
1131 vput(vp);
1132 if (error != 0)
1133 goto out;
1134 error = VFS_ROOT(mp, &tdp);
1135 vfs_unbusy(mp, false, NULL);
1136 if (error)
1137 goto out;
1138 vp = tdp;
1139 }
1140 VOP_UNLOCK(vp, 0);
1141
1142 /*
1143 * Disallow changing to a directory not under the process's
1144 * current root directory (if there is one).
1145 */
1146 cwdi = p->p_cwdi;
1147 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1148 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1149 vrele(vp);
1150 error = EPERM; /* operation not permitted */
1151 } else {
1152 vrele(cwdi->cwdi_cdir);
1153 cwdi->cwdi_cdir = vp;
1154 }
1155 rw_exit(&cwdi->cwdi_lock);
1156
1157 out:
1158 fd_putfile(fd);
1159 return (error);
1160 }
1161
1162 /*
1163 * Change this process's notion of the root directory to a given file
1164 * descriptor.
1165 */
1166 int
1167 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1168 {
1169 struct proc *p = l->l_proc;
1170 struct cwdinfo *cwdi;
1171 struct vnode *vp;
1172 file_t *fp;
1173 int error, fd = SCARG(uap, fd);
1174
1175 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1176 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1177 return error;
1178 /* fd_getvnode() will use the descriptor for us */
1179 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
1180 return error;
1181 vp = fp->f_data;
1182 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1183 if (vp->v_type != VDIR)
1184 error = ENOTDIR;
1185 else
1186 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1187 VOP_UNLOCK(vp, 0);
1188 if (error)
1189 goto out;
1190 VREF(vp);
1191
1192 /*
1193 * Prevent escaping from chroot by putting the root under
1194 * the working directory. Silently chdir to / if we aren't
1195 * already there.
1196 */
1197 cwdi = p->p_cwdi;
1198 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1199 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1200 /*
1201 * XXX would be more failsafe to change directory to a
1202 * deadfs node here instead
1203 */
1204 vrele(cwdi->cwdi_cdir);
1205 VREF(vp);
1206 cwdi->cwdi_cdir = vp;
1207 }
1208
1209 if (cwdi->cwdi_rdir != NULL)
1210 vrele(cwdi->cwdi_rdir);
1211 cwdi->cwdi_rdir = vp;
1212 rw_exit(&cwdi->cwdi_lock);
1213
1214 out:
1215 fd_putfile(fd);
1216 return (error);
1217 }
1218
1219 /*
1220 * Change current working directory (``.'').
1221 */
1222 /* ARGSUSED */
1223 int
1224 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1225 {
1226 /* {
1227 syscallarg(const char *) path;
1228 } */
1229 struct proc *p = l->l_proc;
1230 struct cwdinfo *cwdi;
1231 int error;
1232 struct nameidata nd;
1233
1234 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1235 SCARG(uap, path));
1236 if ((error = change_dir(&nd, l)) != 0)
1237 return (error);
1238 cwdi = p->p_cwdi;
1239 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1240 vrele(cwdi->cwdi_cdir);
1241 cwdi->cwdi_cdir = nd.ni_vp;
1242 rw_exit(&cwdi->cwdi_lock);
1243 return (0);
1244 }
1245
1246 /*
1247 * Change notion of root (``/'') directory.
1248 */
1249 /* ARGSUSED */
1250 int
1251 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1252 {
1253 /* {
1254 syscallarg(const char *) path;
1255 } */
1256 struct proc *p = l->l_proc;
1257 struct cwdinfo *cwdi;
1258 struct vnode *vp;
1259 int error;
1260 struct nameidata nd;
1261
1262 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1263 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1264 return (error);
1265 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1266 SCARG(uap, path));
1267 if ((error = change_dir(&nd, l)) != 0)
1268 return (error);
1269
1270 cwdi = p->p_cwdi;
1271 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1272 if (cwdi->cwdi_rdir != NULL)
1273 vrele(cwdi->cwdi_rdir);
1274 vp = nd.ni_vp;
1275 cwdi->cwdi_rdir = vp;
1276
1277 /*
1278 * Prevent escaping from chroot by putting the root under
1279 * the working directory. Silently chdir to / if we aren't
1280 * already there.
1281 */
1282 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1283 /*
1284 * XXX would be more failsafe to change directory to a
1285 * deadfs node here instead
1286 */
1287 vrele(cwdi->cwdi_cdir);
1288 VREF(vp);
1289 cwdi->cwdi_cdir = vp;
1290 }
1291 rw_exit(&cwdi->cwdi_lock);
1292
1293 return (0);
1294 }
1295
1296 /*
1297 * Common routine for chroot and chdir.
1298 */
1299 static int
1300 change_dir(struct nameidata *ndp, struct lwp *l)
1301 {
1302 struct vnode *vp;
1303 int error;
1304
1305 if ((error = namei(ndp)) != 0)
1306 return (error);
1307 vp = ndp->ni_vp;
1308 if (vp->v_type != VDIR)
1309 error = ENOTDIR;
1310 else
1311 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1312
1313 if (error)
1314 vput(vp);
1315 else
1316 VOP_UNLOCK(vp, 0);
1317 return (error);
1318 }
1319
1320 /*
1321 * Check permissions, allocate an open file structure,
1322 * and call the device open routine if any.
1323 */
1324 int
1325 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1326 {
1327 /* {
1328 syscallarg(const char *) path;
1329 syscallarg(int) flags;
1330 syscallarg(int) mode;
1331 } */
1332 struct proc *p = l->l_proc;
1333 struct cwdinfo *cwdi = p->p_cwdi;
1334 file_t *fp;
1335 struct vnode *vp;
1336 int flags, cmode;
1337 int type, indx, error;
1338 struct flock lf;
1339 struct nameidata nd;
1340
1341 flags = FFLAGS(SCARG(uap, flags));
1342 if ((flags & (FREAD | FWRITE)) == 0)
1343 return (EINVAL);
1344 if ((error = fd_allocfile(&fp, &indx)) != 0)
1345 return (error);
1346 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1347 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1348 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
1349 SCARG(uap, path));
1350 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1351 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1352 fd_abort(p, fp, indx);
1353 if ((error == EDUPFD || error == EMOVEFD) &&
1354 l->l_dupfd >= 0 && /* XXX from fdopen */
1355 (error =
1356 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1357 *retval = indx;
1358 return (0);
1359 }
1360 if (error == ERESTART)
1361 error = EINTR;
1362 return (error);
1363 }
1364
1365 l->l_dupfd = 0;
1366 vp = nd.ni_vp;
1367 fp->f_flag = flags & FMASK;
1368 fp->f_type = DTYPE_VNODE;
1369 fp->f_ops = &vnops;
1370 fp->f_data = vp;
1371 if (flags & (O_EXLOCK | O_SHLOCK)) {
1372 lf.l_whence = SEEK_SET;
1373 lf.l_start = 0;
1374 lf.l_len = 0;
1375 if (flags & O_EXLOCK)
1376 lf.l_type = F_WRLCK;
1377 else
1378 lf.l_type = F_RDLCK;
1379 type = F_FLOCK;
1380 if ((flags & FNONBLOCK) == 0)
1381 type |= F_WAIT;
1382 VOP_UNLOCK(vp, 0);
1383 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1384 if (error) {
1385 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1386 fd_abort(p, fp, indx);
1387 return (error);
1388 }
1389 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1390 atomic_or_uint(&fp->f_flag, FHASLOCK);
1391 }
1392 VOP_UNLOCK(vp, 0);
1393 *retval = indx;
1394 fd_affix(p, fp, indx);
1395 return (0);
1396 }
1397
1398 static void
1399 vfs__fhfree(fhandle_t *fhp)
1400 {
1401 size_t fhsize;
1402
1403 if (fhp == NULL) {
1404 return;
1405 }
1406 fhsize = FHANDLE_SIZE(fhp);
1407 kmem_free(fhp, fhsize);
1408 }
1409
1410 /*
1411 * vfs_composefh: compose a filehandle.
1412 */
1413
1414 int
1415 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1416 {
1417 struct mount *mp;
1418 struct fid *fidp;
1419 int error;
1420 size_t needfhsize;
1421 size_t fidsize;
1422
1423 mp = vp->v_mount;
1424 fidp = NULL;
1425 if (*fh_size < FHANDLE_SIZE_MIN) {
1426 fidsize = 0;
1427 } else {
1428 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1429 if (fhp != NULL) {
1430 memset(fhp, 0, *fh_size);
1431 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1432 fidp = &fhp->fh_fid;
1433 }
1434 }
1435 error = VFS_VPTOFH(vp, fidp, &fidsize);
1436 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1437 if (error == 0 && *fh_size < needfhsize) {
1438 error = E2BIG;
1439 }
1440 *fh_size = needfhsize;
1441 return error;
1442 }
1443
1444 int
1445 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1446 {
1447 struct mount *mp;
1448 fhandle_t *fhp;
1449 size_t fhsize;
1450 size_t fidsize;
1451 int error;
1452
1453 *fhpp = NULL;
1454 mp = vp->v_mount;
1455 fidsize = 0;
1456 error = VFS_VPTOFH(vp, NULL, &fidsize);
1457 KASSERT(error != 0);
1458 if (error != E2BIG) {
1459 goto out;
1460 }
1461 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1462 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1463 if (fhp == NULL) {
1464 error = ENOMEM;
1465 goto out;
1466 }
1467 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1468 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1469 if (error == 0) {
1470 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1471 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1472 *fhpp = fhp;
1473 } else {
1474 kmem_free(fhp, fhsize);
1475 }
1476 out:
1477 return error;
1478 }
1479
1480 void
1481 vfs_composefh_free(fhandle_t *fhp)
1482 {
1483
1484 vfs__fhfree(fhp);
1485 }
1486
1487 /*
1488 * vfs_fhtovp: lookup a vnode by a filehandle.
1489 */
1490
1491 int
1492 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1493 {
1494 struct mount *mp;
1495 int error;
1496
1497 *vpp = NULL;
1498 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1499 if (mp == NULL) {
1500 error = ESTALE;
1501 goto out;
1502 }
1503 if (mp->mnt_op->vfs_fhtovp == NULL) {
1504 error = EOPNOTSUPP;
1505 goto out;
1506 }
1507 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1508 out:
1509 return error;
1510 }
1511
1512 /*
1513 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1514 * the needed size.
1515 */
1516
1517 int
1518 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1519 {
1520 fhandle_t *fhp;
1521 int error;
1522
1523 *fhpp = NULL;
1524 if (fhsize > FHANDLE_SIZE_MAX) {
1525 return EINVAL;
1526 }
1527 if (fhsize < FHANDLE_SIZE_MIN) {
1528 return EINVAL;
1529 }
1530 again:
1531 fhp = kmem_alloc(fhsize, KM_SLEEP);
1532 if (fhp == NULL) {
1533 return ENOMEM;
1534 }
1535 error = copyin(ufhp, fhp, fhsize);
1536 if (error == 0) {
1537 /* XXX this check shouldn't be here */
1538 if (FHANDLE_SIZE(fhp) == fhsize) {
1539 *fhpp = fhp;
1540 return 0;
1541 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1542 /*
1543 * a kludge for nfsv2 padded handles.
1544 */
1545 size_t sz;
1546
1547 sz = FHANDLE_SIZE(fhp);
1548 kmem_free(fhp, fhsize);
1549 fhsize = sz;
1550 goto again;
1551 } else {
1552 /*
1553 * userland told us wrong size.
1554 */
1555 error = EINVAL;
1556 }
1557 }
1558 kmem_free(fhp, fhsize);
1559 return error;
1560 }
1561
1562 void
1563 vfs_copyinfh_free(fhandle_t *fhp)
1564 {
1565
1566 vfs__fhfree(fhp);
1567 }
1568
1569 /*
1570 * Get file handle system call
1571 */
1572 int
1573 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1574 {
1575 /* {
1576 syscallarg(char *) fname;
1577 syscallarg(fhandle_t *) fhp;
1578 syscallarg(size_t *) fh_size;
1579 } */
1580 struct vnode *vp;
1581 fhandle_t *fh;
1582 int error;
1583 struct nameidata nd;
1584 size_t sz;
1585 size_t usz;
1586
1587 /*
1588 * Must be super user
1589 */
1590 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1591 0, NULL, NULL, NULL);
1592 if (error)
1593 return (error);
1594 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1595 SCARG(uap, fname));
1596 error = namei(&nd);
1597 if (error)
1598 return (error);
1599 vp = nd.ni_vp;
1600 error = vfs_composefh_alloc(vp, &fh);
1601 vput(vp);
1602 if (error != 0) {
1603 goto out;
1604 }
1605 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1606 if (error != 0) {
1607 goto out;
1608 }
1609 sz = FHANDLE_SIZE(fh);
1610 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1611 if (error != 0) {
1612 goto out;
1613 }
1614 if (usz >= sz) {
1615 error = copyout(fh, SCARG(uap, fhp), sz);
1616 } else {
1617 error = E2BIG;
1618 }
1619 out:
1620 vfs_composefh_free(fh);
1621 return (error);
1622 }
1623
1624 /*
1625 * Open a file given a file handle.
1626 *
1627 * Check permissions, allocate an open file structure,
1628 * and call the device open routine if any.
1629 */
1630
1631 int
1632 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1633 register_t *retval)
1634 {
1635 file_t *fp;
1636 struct vnode *vp = NULL;
1637 kauth_cred_t cred = l->l_cred;
1638 file_t *nfp;
1639 int type, indx, error=0;
1640 struct flock lf;
1641 struct vattr va;
1642 fhandle_t *fh;
1643 int flags;
1644 proc_t *p;
1645
1646 p = curproc;
1647
1648 /*
1649 * Must be super user
1650 */
1651 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1652 0, NULL, NULL, NULL)))
1653 return (error);
1654
1655 flags = FFLAGS(oflags);
1656 if ((flags & (FREAD | FWRITE)) == 0)
1657 return (EINVAL);
1658 if ((flags & O_CREAT))
1659 return (EINVAL);
1660 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1661 return (error);
1662 fp = nfp;
1663 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1664 if (error != 0) {
1665 goto bad;
1666 }
1667 error = vfs_fhtovp(fh, &vp);
1668 if (error != 0) {
1669 goto bad;
1670 }
1671
1672 /* Now do an effective vn_open */
1673
1674 if (vp->v_type == VSOCK) {
1675 error = EOPNOTSUPP;
1676 goto bad;
1677 }
1678 error = vn_openchk(vp, cred, flags);
1679 if (error != 0)
1680 goto bad;
1681 if (flags & O_TRUNC) {
1682 VOP_UNLOCK(vp, 0); /* XXX */
1683 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1684 VATTR_NULL(&va);
1685 va.va_size = 0;
1686 error = VOP_SETATTR(vp, &va, cred);
1687 if (error)
1688 goto bad;
1689 }
1690 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
1691 goto bad;
1692 if (flags & FWRITE) {
1693 mutex_enter(&vp->v_interlock);
1694 vp->v_writecount++;
1695 mutex_exit(&vp->v_interlock);
1696 }
1697
1698 /* done with modified vn_open, now finish what sys_open does. */
1699
1700 fp->f_flag = flags & FMASK;
1701 fp->f_type = DTYPE_VNODE;
1702 fp->f_ops = &vnops;
1703 fp->f_data = vp;
1704 if (flags & (O_EXLOCK | O_SHLOCK)) {
1705 lf.l_whence = SEEK_SET;
1706 lf.l_start = 0;
1707 lf.l_len = 0;
1708 if (flags & O_EXLOCK)
1709 lf.l_type = F_WRLCK;
1710 else
1711 lf.l_type = F_RDLCK;
1712 type = F_FLOCK;
1713 if ((flags & FNONBLOCK) == 0)
1714 type |= F_WAIT;
1715 VOP_UNLOCK(vp, 0);
1716 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1717 if (error) {
1718 (void) vn_close(vp, fp->f_flag, fp->f_cred);
1719 fd_abort(p, fp, indx);
1720 return (error);
1721 }
1722 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1723 atomic_or_uint(&fp->f_flag, FHASLOCK);
1724 }
1725 VOP_UNLOCK(vp, 0);
1726 *retval = indx;
1727 fd_affix(p, fp, indx);
1728 vfs_copyinfh_free(fh);
1729 return (0);
1730
1731 bad:
1732 fd_abort(p, fp, indx);
1733 if (vp != NULL)
1734 vput(vp);
1735 vfs_copyinfh_free(fh);
1736 return (error);
1737 }
1738
1739 int
1740 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
1741 {
1742 /* {
1743 syscallarg(const void *) fhp;
1744 syscallarg(size_t) fh_size;
1745 syscallarg(int) flags;
1746 } */
1747
1748 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1749 SCARG(uap, flags), retval);
1750 }
1751
1752 int
1753 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
1754 {
1755 int error;
1756 fhandle_t *fh;
1757 struct vnode *vp;
1758
1759 /*
1760 * Must be super user
1761 */
1762 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1763 0, NULL, NULL, NULL)))
1764 return (error);
1765
1766 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1767 if (error != 0)
1768 return error;
1769
1770 error = vfs_fhtovp(fh, &vp);
1771 vfs_copyinfh_free(fh);
1772 if (error != 0)
1773 return error;
1774
1775 error = vn_stat(vp, sb);
1776 vput(vp);
1777 return error;
1778 }
1779
1780
1781 /* ARGSUSED */
1782 int
1783 sys___fhstat40(struct lwp *l, const struct sys___fhstat40_args *uap, register_t *retval)
1784 {
1785 /* {
1786 syscallarg(const void *) fhp;
1787 syscallarg(size_t) fh_size;
1788 syscallarg(struct stat *) sb;
1789 } */
1790 struct stat sb;
1791 int error;
1792
1793 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
1794 if (error)
1795 return error;
1796 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
1797 }
1798
1799 int
1800 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
1801 int flags)
1802 {
1803 fhandle_t *fh;
1804 struct mount *mp;
1805 struct vnode *vp;
1806 int error;
1807
1808 /*
1809 * Must be super user
1810 */
1811 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1812 0, NULL, NULL, NULL)))
1813 return error;
1814
1815 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1816 if (error != 0)
1817 return error;
1818
1819 error = vfs_fhtovp(fh, &vp);
1820 vfs_copyinfh_free(fh);
1821 if (error != 0)
1822 return error;
1823
1824 mp = vp->v_mount;
1825 error = dostatvfs(mp, sb, l, flags, 1);
1826 vput(vp);
1827 return error;
1828 }
1829
1830 /* ARGSUSED */
1831 int
1832 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
1833 {
1834 /* {
1835 syscallarg(const void *) fhp;
1836 syscallarg(size_t) fh_size;
1837 syscallarg(struct statvfs *) buf;
1838 syscallarg(int) flags;
1839 } */
1840 struct statvfs *sb = STATVFSBUF_GET();
1841 int error;
1842
1843 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
1844 SCARG(uap, flags));
1845 if (error == 0)
1846 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1847 STATVFSBUF_PUT(sb);
1848 return error;
1849 }
1850
1851 /*
1852 * Create a special file.
1853 */
1854 /* ARGSUSED */
1855 int
1856 sys_mknod(struct lwp *l, const struct sys_mknod_args *uap, register_t *retval)
1857 {
1858 /* {
1859 syscallarg(const char *) path;
1860 syscallarg(int) mode;
1861 syscallarg(int) dev;
1862 } */
1863 struct proc *p = l->l_proc;
1864 struct vnode *vp;
1865 struct vattr vattr;
1866 int error, optype;
1867 struct nameidata nd;
1868 char *path;
1869 const char *cpath;
1870 enum uio_seg seg = UIO_USERSPACE;
1871
1872 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1873 0, NULL, NULL, NULL)) != 0)
1874 return (error);
1875
1876 optype = VOP_MKNOD_DESCOFFSET;
1877
1878 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path);
1879 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath);
1880
1881 if ((error = namei(&nd)) != 0)
1882 goto out;
1883 vp = nd.ni_vp;
1884 if (vp != NULL)
1885 error = EEXIST;
1886 else {
1887 VATTR_NULL(&vattr);
1888 /* We will read cwdi->cwdi_cmask unlocked. */
1889 vattr.va_mode =
1890 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1891 vattr.va_rdev = SCARG(uap, dev);
1892
1893 switch (SCARG(uap, mode) & S_IFMT) {
1894 case S_IFMT: /* used by badsect to flag bad sectors */
1895 vattr.va_type = VBAD;
1896 break;
1897 case S_IFCHR:
1898 vattr.va_type = VCHR;
1899 break;
1900 case S_IFBLK:
1901 vattr.va_type = VBLK;
1902 break;
1903 case S_IFWHT:
1904 optype = VOP_WHITEOUT_DESCOFFSET;
1905 break;
1906 case S_IFREG:
1907 #if NVERIEXEC > 0
1908 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp,
1909 O_CREAT);
1910 #endif /* NVERIEXEC > 0 */
1911 vattr.va_type = VREG;
1912 vattr.va_rdev = VNOVAL;
1913 optype = VOP_CREATE_DESCOFFSET;
1914 break;
1915 default:
1916 error = EINVAL;
1917 break;
1918 }
1919 }
1920 if (!error) {
1921 switch (optype) {
1922 case VOP_WHITEOUT_DESCOFFSET:
1923 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1924 if (error)
1925 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1926 vput(nd.ni_dvp);
1927 break;
1928
1929 case VOP_MKNOD_DESCOFFSET:
1930 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1931 &nd.ni_cnd, &vattr);
1932 if (error == 0)
1933 vput(nd.ni_vp);
1934 break;
1935
1936 case VOP_CREATE_DESCOFFSET:
1937 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
1938 &nd.ni_cnd, &vattr);
1939 if (error == 0)
1940 vput(nd.ni_vp);
1941 break;
1942 }
1943 } else {
1944 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1945 if (nd.ni_dvp == vp)
1946 vrele(nd.ni_dvp);
1947 else
1948 vput(nd.ni_dvp);
1949 if (vp)
1950 vrele(vp);
1951 }
1952 out:
1953 VERIEXEC_PATH_PUT(path);
1954 return (error);
1955 }
1956
1957 /*
1958 * Create a named pipe.
1959 */
1960 /* ARGSUSED */
1961 int
1962 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
1963 {
1964 /* {
1965 syscallarg(const char *) path;
1966 syscallarg(int) mode;
1967 } */
1968 struct proc *p = l->l_proc;
1969 struct vattr vattr;
1970 int error;
1971 struct nameidata nd;
1972
1973 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
1974 SCARG(uap, path));
1975 if ((error = namei(&nd)) != 0)
1976 return (error);
1977 if (nd.ni_vp != NULL) {
1978 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1979 if (nd.ni_dvp == nd.ni_vp)
1980 vrele(nd.ni_dvp);
1981 else
1982 vput(nd.ni_dvp);
1983 vrele(nd.ni_vp);
1984 return (EEXIST);
1985 }
1986 VATTR_NULL(&vattr);
1987 vattr.va_type = VFIFO;
1988 /* We will read cwdi->cwdi_cmask unlocked. */
1989 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1990 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1991 if (error == 0)
1992 vput(nd.ni_vp);
1993 return (error);
1994 }
1995
1996 /*
1997 * Make a hard file link.
1998 */
1999 /* ARGSUSED */
2000 int
2001 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
2002 {
2003 /* {
2004 syscallarg(const char *) path;
2005 syscallarg(const char *) link;
2006 } */
2007 struct vnode *vp;
2008 struct nameidata nd;
2009 int error;
2010
2011 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2012 SCARG(uap, path));
2013 if ((error = namei(&nd)) != 0)
2014 return (error);
2015 vp = nd.ni_vp;
2016 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
2017 SCARG(uap, link));
2018 if ((error = namei(&nd)) != 0)
2019 goto out;
2020 if (nd.ni_vp) {
2021 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2022 if (nd.ni_dvp == nd.ni_vp)
2023 vrele(nd.ni_dvp);
2024 else
2025 vput(nd.ni_dvp);
2026 vrele(nd.ni_vp);
2027 error = EEXIST;
2028 goto out;
2029 }
2030 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2031 out:
2032 vrele(vp);
2033 return (error);
2034 }
2035
2036 /*
2037 * Make a symbolic link.
2038 */
2039 /* ARGSUSED */
2040 int
2041 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2042 {
2043 /* {
2044 syscallarg(const char *) path;
2045 syscallarg(const char *) link;
2046 } */
2047 struct proc *p = l->l_proc;
2048 struct vattr vattr;
2049 char *path;
2050 int error;
2051 struct nameidata nd;
2052
2053 path = PNBUF_GET();
2054 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
2055 if (error)
2056 goto out;
2057 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE,
2058 SCARG(uap, link));
2059 if ((error = namei(&nd)) != 0)
2060 goto out;
2061 if (nd.ni_vp) {
2062 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2063 if (nd.ni_dvp == nd.ni_vp)
2064 vrele(nd.ni_dvp);
2065 else
2066 vput(nd.ni_dvp);
2067 vrele(nd.ni_vp);
2068 error = EEXIST;
2069 goto out;
2070 }
2071 VATTR_NULL(&vattr);
2072 vattr.va_type = VLNK;
2073 /* We will read cwdi->cwdi_cmask unlocked. */
2074 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2075 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2076 if (error == 0)
2077 vput(nd.ni_vp);
2078 out:
2079 PNBUF_PUT(path);
2080 return (error);
2081 }
2082
2083 /*
2084 * Delete a whiteout from the filesystem.
2085 */
2086 /* ARGSUSED */
2087 int
2088 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2089 {
2090 /* {
2091 syscallarg(const char *) path;
2092 } */
2093 int error;
2094 struct nameidata nd;
2095
2096 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT,
2097 UIO_USERSPACE, SCARG(uap, path));
2098 error = namei(&nd);
2099 if (error)
2100 return (error);
2101
2102 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2103 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2104 if (nd.ni_dvp == nd.ni_vp)
2105 vrele(nd.ni_dvp);
2106 else
2107 vput(nd.ni_dvp);
2108 if (nd.ni_vp)
2109 vrele(nd.ni_vp);
2110 return (EEXIST);
2111 }
2112 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2113 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2114 vput(nd.ni_dvp);
2115 return (error);
2116 }
2117
2118 /*
2119 * Delete a name from the filesystem.
2120 */
2121 /* ARGSUSED */
2122 int
2123 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2124 {
2125 /* {
2126 syscallarg(const char *) path;
2127 } */
2128
2129 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE);
2130 }
2131
2132 int
2133 do_sys_unlink(const char *arg, enum uio_seg seg)
2134 {
2135 struct vnode *vp;
2136 int error;
2137 struct nameidata nd;
2138 kauth_cred_t cred;
2139 char *path;
2140 const char *cpath;
2141
2142 VERIEXEC_PATH_GET(arg, seg, cpath, path);
2143 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath);
2144
2145 if ((error = namei(&nd)) != 0)
2146 goto out;
2147 vp = nd.ni_vp;
2148
2149 /*
2150 * The root of a mounted filesystem cannot be deleted.
2151 */
2152 if (vp->v_vflag & VV_ROOT) {
2153 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2154 if (nd.ni_dvp == vp)
2155 vrele(nd.ni_dvp);
2156 else
2157 vput(nd.ni_dvp);
2158 vput(vp);
2159 error = EBUSY;
2160 goto out;
2161 }
2162
2163 #if NVERIEXEC > 0
2164 /* Handle remove requests for veriexec entries. */
2165 if ((error = veriexec_removechk(curlwp, nd.ni_vp, nd.ni_dirp)) != 0) {
2166 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2167 if (nd.ni_dvp == vp)
2168 vrele(nd.ni_dvp);
2169 else
2170 vput(nd.ni_dvp);
2171 vput(vp);
2172 goto out;
2173 }
2174 #endif /* NVERIEXEC > 0 */
2175
2176 cred = kauth_cred_get();
2177 #ifdef FILEASSOC
2178 (void)fileassoc_file_delete(vp);
2179 #endif /* FILEASSOC */
2180 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2181 out:
2182 VERIEXEC_PATH_PUT(path);
2183 return (error);
2184 }
2185
2186 /*
2187 * Reposition read/write file offset.
2188 */
2189 int
2190 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2191 {
2192 /* {
2193 syscallarg(int) fd;
2194 syscallarg(int) pad;
2195 syscallarg(off_t) offset;
2196 syscallarg(int) whence;
2197 } */
2198 kauth_cred_t cred = l->l_cred;
2199 file_t *fp;
2200 struct vnode *vp;
2201 struct vattr vattr;
2202 off_t newoff;
2203 int error, fd;
2204
2205 fd = SCARG(uap, fd);
2206
2207 if ((fp = fd_getfile(fd)) == NULL)
2208 return (EBADF);
2209
2210 vp = fp->f_data;
2211 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2212 error = ESPIPE;
2213 goto out;
2214 }
2215
2216 switch (SCARG(uap, whence)) {
2217 case SEEK_CUR:
2218 newoff = fp->f_offset + SCARG(uap, offset);
2219 break;
2220 case SEEK_END:
2221 error = VOP_GETATTR(vp, &vattr, cred);
2222 if (error) {
2223 goto out;
2224 }
2225 newoff = SCARG(uap, offset) + vattr.va_size;
2226 break;
2227 case SEEK_SET:
2228 newoff = SCARG(uap, offset);
2229 break;
2230 default:
2231 error = EINVAL;
2232 goto out;
2233 }
2234 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2235 *(off_t *)retval = fp->f_offset = newoff;
2236 }
2237 out:
2238 fd_putfile(fd);
2239 return (error);
2240 }
2241
2242 /*
2243 * Positional read system call.
2244 */
2245 int
2246 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2247 {
2248 /* {
2249 syscallarg(int) fd;
2250 syscallarg(void *) buf;
2251 syscallarg(size_t) nbyte;
2252 syscallarg(off_t) offset;
2253 } */
2254 file_t *fp;
2255 struct vnode *vp;
2256 off_t offset;
2257 int error, fd = SCARG(uap, fd);
2258
2259 if ((fp = fd_getfile(fd)) == NULL)
2260 return (EBADF);
2261
2262 if ((fp->f_flag & FREAD) == 0) {
2263 fd_putfile(fd);
2264 return (EBADF);
2265 }
2266
2267 vp = fp->f_data;
2268 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2269 error = ESPIPE;
2270 goto out;
2271 }
2272
2273 offset = SCARG(uap, offset);
2274
2275 /*
2276 * XXX This works because no file systems actually
2277 * XXX take any action on the seek operation.
2278 */
2279 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2280 goto out;
2281
2282 /* dofileread() will unuse the descriptor for us */
2283 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2284 &offset, 0, retval));
2285
2286 out:
2287 fd_putfile(fd);
2288 return (error);
2289 }
2290
2291 /*
2292 * Positional scatter read system call.
2293 */
2294 int
2295 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2296 {
2297 /* {
2298 syscallarg(int) fd;
2299 syscallarg(const struct iovec *) iovp;
2300 syscallarg(int) iovcnt;
2301 syscallarg(off_t) offset;
2302 } */
2303 off_t offset = SCARG(uap, offset);
2304
2305 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2306 SCARG(uap, iovcnt), &offset, 0, retval);
2307 }
2308
2309 /*
2310 * Positional write system call.
2311 */
2312 int
2313 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2314 {
2315 /* {
2316 syscallarg(int) fd;
2317 syscallarg(const void *) buf;
2318 syscallarg(size_t) nbyte;
2319 syscallarg(off_t) offset;
2320 } */
2321 file_t *fp;
2322 struct vnode *vp;
2323 off_t offset;
2324 int error, fd = SCARG(uap, fd);
2325
2326 if ((fp = fd_getfile(fd)) == NULL)
2327 return (EBADF);
2328
2329 if ((fp->f_flag & FWRITE) == 0) {
2330 fd_putfile(fd);
2331 return (EBADF);
2332 }
2333
2334 vp = fp->f_data;
2335 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2336 error = ESPIPE;
2337 goto out;
2338 }
2339
2340 offset = SCARG(uap, offset);
2341
2342 /*
2343 * XXX This works because no file systems actually
2344 * XXX take any action on the seek operation.
2345 */
2346 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2347 goto out;
2348
2349 /* dofilewrite() will unuse the descriptor for us */
2350 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2351 &offset, 0, retval));
2352
2353 out:
2354 fd_putfile(fd);
2355 return (error);
2356 }
2357
2358 /*
2359 * Positional gather write system call.
2360 */
2361 int
2362 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2363 {
2364 /* {
2365 syscallarg(int) fd;
2366 syscallarg(const struct iovec *) iovp;
2367 syscallarg(int) iovcnt;
2368 syscallarg(off_t) offset;
2369 } */
2370 off_t offset = SCARG(uap, offset);
2371
2372 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2373 SCARG(uap, iovcnt), &offset, 0, retval);
2374 }
2375
2376 /*
2377 * Check access permissions.
2378 */
2379 int
2380 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2381 {
2382 /* {
2383 syscallarg(const char *) path;
2384 syscallarg(int) flags;
2385 } */
2386 kauth_cred_t cred;
2387 struct vnode *vp;
2388 int error, flags;
2389 struct nameidata nd;
2390
2391 cred = kauth_cred_dup(l->l_cred);
2392 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2393 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2394 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2395 SCARG(uap, path));
2396 /* Override default credentials */
2397 nd.ni_cnd.cn_cred = cred;
2398 if ((error = namei(&nd)) != 0)
2399 goto out;
2400 vp = nd.ni_vp;
2401
2402 /* Flags == 0 means only check for existence. */
2403 if (SCARG(uap, flags)) {
2404 flags = 0;
2405 if (SCARG(uap, flags) & R_OK)
2406 flags |= VREAD;
2407 if (SCARG(uap, flags) & W_OK)
2408 flags |= VWRITE;
2409 if (SCARG(uap, flags) & X_OK)
2410 flags |= VEXEC;
2411
2412 error = VOP_ACCESS(vp, flags, cred);
2413 if (!error && (flags & VWRITE))
2414 error = vn_writechk(vp);
2415 }
2416 vput(vp);
2417 out:
2418 kauth_cred_free(cred);
2419 return (error);
2420 }
2421
2422 /*
2423 * Common code for all sys_stat functions, including compat versions.
2424 */
2425 int
2426 do_sys_stat(const char *path, unsigned int nd_flags, struct stat *sb)
2427 {
2428 int error;
2429 struct nameidata nd;
2430
2431 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT,
2432 UIO_USERSPACE, path);
2433 error = namei(&nd);
2434 if (error != 0)
2435 return error;
2436 error = vn_stat(nd.ni_vp, sb);
2437 vput(nd.ni_vp);
2438 return error;
2439 }
2440
2441 /*
2442 * Get file status; this version follows links.
2443 */
2444 /* ARGSUSED */
2445 int
2446 sys___stat30(struct lwp *l, const struct sys___stat30_args *uap, register_t *retval)
2447 {
2448 /* {
2449 syscallarg(const char *) path;
2450 syscallarg(struct stat *) ub;
2451 } */
2452 struct stat sb;
2453 int error;
2454
2455 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb);
2456 if (error)
2457 return error;
2458 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2459 }
2460
2461 /*
2462 * Get file status; this version does not follow links.
2463 */
2464 /* ARGSUSED */
2465 int
2466 sys___lstat30(struct lwp *l, const struct sys___lstat30_args *uap, register_t *retval)
2467 {
2468 /* {
2469 syscallarg(const char *) path;
2470 syscallarg(struct stat *) ub;
2471 } */
2472 struct stat sb;
2473 int error;
2474
2475 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb);
2476 if (error)
2477 return error;
2478 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2479 }
2480
2481 /*
2482 * Get configurable pathname variables.
2483 */
2484 /* ARGSUSED */
2485 int
2486 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
2487 {
2488 /* {
2489 syscallarg(const char *) path;
2490 syscallarg(int) name;
2491 } */
2492 int error;
2493 struct nameidata nd;
2494
2495 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2496 SCARG(uap, path));
2497 if ((error = namei(&nd)) != 0)
2498 return (error);
2499 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2500 vput(nd.ni_vp);
2501 return (error);
2502 }
2503
2504 /*
2505 * Return target name of a symbolic link.
2506 */
2507 /* ARGSUSED */
2508 int
2509 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval)
2510 {
2511 /* {
2512 syscallarg(const char *) path;
2513 syscallarg(char *) buf;
2514 syscallarg(size_t) count;
2515 } */
2516 struct vnode *vp;
2517 struct iovec aiov;
2518 struct uio auio;
2519 int error;
2520 struct nameidata nd;
2521
2522 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2523 SCARG(uap, path));
2524 if ((error = namei(&nd)) != 0)
2525 return (error);
2526 vp = nd.ni_vp;
2527 if (vp->v_type != VLNK)
2528 error = EINVAL;
2529 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2530 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
2531 aiov.iov_base = SCARG(uap, buf);
2532 aiov.iov_len = SCARG(uap, count);
2533 auio.uio_iov = &aiov;
2534 auio.uio_iovcnt = 1;
2535 auio.uio_offset = 0;
2536 auio.uio_rw = UIO_READ;
2537 KASSERT(l == curlwp);
2538 auio.uio_vmspace = l->l_proc->p_vmspace;
2539 auio.uio_resid = SCARG(uap, count);
2540 error = VOP_READLINK(vp, &auio, l->l_cred);
2541 }
2542 vput(vp);
2543 *retval = SCARG(uap, count) - auio.uio_resid;
2544 return (error);
2545 }
2546
2547 /*
2548 * Change flags of a file given a path name.
2549 */
2550 /* ARGSUSED */
2551 int
2552 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
2553 {
2554 /* {
2555 syscallarg(const char *) path;
2556 syscallarg(u_long) flags;
2557 } */
2558 struct vnode *vp;
2559 int error;
2560 struct nameidata nd;
2561
2562 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2563 SCARG(uap, path));
2564 if ((error = namei(&nd)) != 0)
2565 return (error);
2566 vp = nd.ni_vp;
2567 error = change_flags(vp, SCARG(uap, flags), l);
2568 vput(vp);
2569 return (error);
2570 }
2571
2572 /*
2573 * Change flags of a file given a file descriptor.
2574 */
2575 /* ARGSUSED */
2576 int
2577 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
2578 {
2579 /* {
2580 syscallarg(int) fd;
2581 syscallarg(u_long) flags;
2582 } */
2583 struct vnode *vp;
2584 file_t *fp;
2585 int error;
2586
2587 /* fd_getvnode() will use the descriptor for us */
2588 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2589 return (error);
2590 vp = fp->f_data;
2591 error = change_flags(vp, SCARG(uap, flags), l);
2592 VOP_UNLOCK(vp, 0);
2593 fd_putfile(SCARG(uap, fd));
2594 return (error);
2595 }
2596
2597 /*
2598 * Change flags of a file given a path name; this version does
2599 * not follow links.
2600 */
2601 int
2602 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
2603 {
2604 /* {
2605 syscallarg(const char *) path;
2606 syscallarg(u_long) flags;
2607 } */
2608 struct vnode *vp;
2609 int error;
2610 struct nameidata nd;
2611
2612 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2613 SCARG(uap, path));
2614 if ((error = namei(&nd)) != 0)
2615 return (error);
2616 vp = nd.ni_vp;
2617 error = change_flags(vp, SCARG(uap, flags), l);
2618 vput(vp);
2619 return (error);
2620 }
2621
2622 /*
2623 * Common routine to change flags of a file.
2624 */
2625 int
2626 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2627 {
2628 struct vattr vattr;
2629 int error;
2630
2631 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2632 /*
2633 * Non-superusers cannot change the flags on devices, even if they
2634 * own them.
2635 */
2636 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2637 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2638 goto out;
2639 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2640 error = EINVAL;
2641 goto out;
2642 }
2643 }
2644 VATTR_NULL(&vattr);
2645 vattr.va_flags = flags;
2646 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2647 out:
2648 return (error);
2649 }
2650
2651 /*
2652 * Change mode of a file given path name; this version follows links.
2653 */
2654 /* ARGSUSED */
2655 int
2656 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
2657 {
2658 /* {
2659 syscallarg(const char *) path;
2660 syscallarg(int) mode;
2661 } */
2662 int error;
2663 struct nameidata nd;
2664
2665 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2666 SCARG(uap, path));
2667 if ((error = namei(&nd)) != 0)
2668 return (error);
2669
2670 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2671
2672 vrele(nd.ni_vp);
2673 return (error);
2674 }
2675
2676 /*
2677 * Change mode of a file given a file descriptor.
2678 */
2679 /* ARGSUSED */
2680 int
2681 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
2682 {
2683 /* {
2684 syscallarg(int) fd;
2685 syscallarg(int) mode;
2686 } */
2687 file_t *fp;
2688 int error;
2689
2690 /* fd_getvnode() will use the descriptor for us */
2691 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2692 return (error);
2693 error = change_mode(fp->f_data, SCARG(uap, mode), l);
2694 fd_putfile(SCARG(uap, fd));
2695 return (error);
2696 }
2697
2698 /*
2699 * Change mode of a file given path name; this version does not follow links.
2700 */
2701 /* ARGSUSED */
2702 int
2703 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
2704 {
2705 /* {
2706 syscallarg(const char *) path;
2707 syscallarg(int) mode;
2708 } */
2709 int error;
2710 struct nameidata nd;
2711
2712 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2713 SCARG(uap, path));
2714 if ((error = namei(&nd)) != 0)
2715 return (error);
2716
2717 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2718
2719 vrele(nd.ni_vp);
2720 return (error);
2721 }
2722
2723 /*
2724 * Common routine to set mode given a vnode.
2725 */
2726 static int
2727 change_mode(struct vnode *vp, int mode, struct lwp *l)
2728 {
2729 struct vattr vattr;
2730 int error;
2731
2732 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2733 VATTR_NULL(&vattr);
2734 vattr.va_mode = mode & ALLPERMS;
2735 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2736 VOP_UNLOCK(vp, 0);
2737 return (error);
2738 }
2739
2740 /*
2741 * Set ownership given a path name; this version follows links.
2742 */
2743 /* ARGSUSED */
2744 int
2745 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
2746 {
2747 /* {
2748 syscallarg(const char *) path;
2749 syscallarg(uid_t) uid;
2750 syscallarg(gid_t) gid;
2751 } */
2752 int error;
2753 struct nameidata nd;
2754
2755 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2756 SCARG(uap, path));
2757 if ((error = namei(&nd)) != 0)
2758 return (error);
2759
2760 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2761
2762 vrele(nd.ni_vp);
2763 return (error);
2764 }
2765
2766 /*
2767 * Set ownership given a path name; this version follows links.
2768 * Provides POSIX semantics.
2769 */
2770 /* ARGSUSED */
2771 int
2772 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
2773 {
2774 /* {
2775 syscallarg(const char *) path;
2776 syscallarg(uid_t) uid;
2777 syscallarg(gid_t) gid;
2778 } */
2779 int error;
2780 struct nameidata nd;
2781
2782 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
2783 SCARG(uap, path));
2784 if ((error = namei(&nd)) != 0)
2785 return (error);
2786
2787 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2788
2789 vrele(nd.ni_vp);
2790 return (error);
2791 }
2792
2793 /*
2794 * Set ownership given a file descriptor.
2795 */
2796 /* ARGSUSED */
2797 int
2798 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
2799 {
2800 /* {
2801 syscallarg(int) fd;
2802 syscallarg(uid_t) uid;
2803 syscallarg(gid_t) gid;
2804 } */
2805 int error;
2806 file_t *fp;
2807
2808 /* fd_getvnode() will use the descriptor for us */
2809 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2810 return (error);
2811 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2812 l, 0);
2813 fd_putfile(SCARG(uap, fd));
2814 return (error);
2815 }
2816
2817 /*
2818 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2819 */
2820 /* ARGSUSED */
2821 int
2822 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
2823 {
2824 /* {
2825 syscallarg(int) fd;
2826 syscallarg(uid_t) uid;
2827 syscallarg(gid_t) gid;
2828 } */
2829 int error;
2830 file_t *fp;
2831
2832 /* fd_getvnode() will use the descriptor for us */
2833 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2834 return (error);
2835 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
2836 l, 1);
2837 fd_putfile(SCARG(uap, fd));
2838 return (error);
2839 }
2840
2841 /*
2842 * Set ownership given a path name; this version does not follow links.
2843 */
2844 /* ARGSUSED */
2845 int
2846 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
2847 {
2848 /* {
2849 syscallarg(const char *) path;
2850 syscallarg(uid_t) uid;
2851 syscallarg(gid_t) gid;
2852 } */
2853 int error;
2854 struct nameidata nd;
2855
2856 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2857 SCARG(uap, path));
2858 if ((error = namei(&nd)) != 0)
2859 return (error);
2860
2861 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2862
2863 vrele(nd.ni_vp);
2864 return (error);
2865 }
2866
2867 /*
2868 * Set ownership given a path name; this version does not follow links.
2869 * Provides POSIX/XPG semantics.
2870 */
2871 /* ARGSUSED */
2872 int
2873 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
2874 {
2875 /* {
2876 syscallarg(const char *) path;
2877 syscallarg(uid_t) uid;
2878 syscallarg(gid_t) gid;
2879 } */
2880 int error;
2881 struct nameidata nd;
2882
2883 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE,
2884 SCARG(uap, path));
2885 if ((error = namei(&nd)) != 0)
2886 return (error);
2887
2888 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2889
2890 vrele(nd.ni_vp);
2891 return (error);
2892 }
2893
2894 /*
2895 * Common routine to set ownership given a vnode.
2896 */
2897 static int
2898 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2899 int posix_semantics)
2900 {
2901 struct vattr vattr;
2902 mode_t newmode;
2903 int error;
2904
2905 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2906 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
2907 goto out;
2908
2909 #define CHANGED(x) ((int)(x) != -1)
2910 newmode = vattr.va_mode;
2911 if (posix_semantics) {
2912 /*
2913 * POSIX/XPG semantics: if the caller is not the super-user,
2914 * clear set-user-id and set-group-id bits. Both POSIX and
2915 * the XPG consider the behaviour for calls by the super-user
2916 * implementation-defined; we leave the set-user-id and set-
2917 * group-id settings intact in that case.
2918 */
2919 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2920 NULL) != 0)
2921 newmode &= ~(S_ISUID | S_ISGID);
2922 } else {
2923 /*
2924 * NetBSD semantics: when changing owner and/or group,
2925 * clear the respective bit(s).
2926 */
2927 if (CHANGED(uid))
2928 newmode &= ~S_ISUID;
2929 if (CHANGED(gid))
2930 newmode &= ~S_ISGID;
2931 }
2932 /* Update va_mode iff altered. */
2933 if (vattr.va_mode == newmode)
2934 newmode = VNOVAL;
2935
2936 VATTR_NULL(&vattr);
2937 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2938 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2939 vattr.va_mode = newmode;
2940 error = VOP_SETATTR(vp, &vattr, l->l_cred);
2941 #undef CHANGED
2942
2943 out:
2944 VOP_UNLOCK(vp, 0);
2945 return (error);
2946 }
2947
2948 /*
2949 * Set the access and modification times given a path name; this
2950 * version follows links.
2951 */
2952 /* ARGSUSED */
2953 int
2954 sys_utimes(struct lwp *l, const struct sys_utimes_args *uap, register_t *retval)
2955 {
2956 /* {
2957 syscallarg(const char *) path;
2958 syscallarg(const struct timeval *) tptr;
2959 } */
2960
2961 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
2962 SCARG(uap, tptr), UIO_USERSPACE);
2963 }
2964
2965 /*
2966 * Set the access and modification times given a file descriptor.
2967 */
2968 /* ARGSUSED */
2969 int
2970 sys_futimes(struct lwp *l, const struct sys_futimes_args *uap, register_t *retval)
2971 {
2972 /* {
2973 syscallarg(int) fd;
2974 syscallarg(const struct timeval *) tptr;
2975 } */
2976 int error;
2977 file_t *fp;
2978
2979 /* fd_getvnode() will use the descriptor for us */
2980 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2981 return (error);
2982 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr),
2983 UIO_USERSPACE);
2984 fd_putfile(SCARG(uap, fd));
2985 return (error);
2986 }
2987
2988 /*
2989 * Set the access and modification times given a path name; this
2990 * version does not follow links.
2991 */
2992 int
2993 sys_lutimes(struct lwp *l, const struct sys_lutimes_args *uap, register_t *retval)
2994 {
2995 /* {
2996 syscallarg(const char *) path;
2997 syscallarg(const struct timeval *) tptr;
2998 } */
2999
3000 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
3001 SCARG(uap, tptr), UIO_USERSPACE);
3002 }
3003
3004 /*
3005 * Common routine to set access and modification times given a vnode.
3006 */
3007 int
3008 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
3009 const struct timeval *tptr, enum uio_seg seg)
3010 {
3011 struct vattr vattr;
3012 struct nameidata nd;
3013 int error;
3014 bool vanull, setbirthtime;
3015 struct timespec ts[2];
3016
3017 if (tptr == NULL) {
3018 vanull = true;
3019 nanotime(&ts[0]);
3020 ts[1] = ts[0];
3021 } else {
3022 struct timeval tv[2];
3023
3024 vanull = false;
3025 if (seg != UIO_SYSSPACE) {
3026 error = copyin(tptr, &tv, sizeof (tv));
3027 if (error != 0)
3028 return error;
3029 tptr = tv;
3030 }
3031 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3032 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3033 }
3034
3035 if (vp == NULL) {
3036 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path);
3037 if ((error = namei(&nd)) != 0)
3038 return error;
3039 vp = nd.ni_vp;
3040 } else
3041 nd.ni_vp = NULL;
3042
3043 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3044 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3045 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3046 VATTR_NULL(&vattr);
3047 vattr.va_atime = ts[0];
3048 vattr.va_mtime = ts[1];
3049 if (setbirthtime)
3050 vattr.va_birthtime = ts[1];
3051 if (vanull)
3052 vattr.va_flags |= VA_UTIMES_NULL;
3053 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3054 VOP_UNLOCK(vp, 0);
3055
3056 if (nd.ni_vp != NULL)
3057 vrele(nd.ni_vp);
3058
3059 return error;
3060 }
3061
3062 /*
3063 * Truncate a file given its path name.
3064 */
3065 /* ARGSUSED */
3066 int
3067 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3068 {
3069 /* {
3070 syscallarg(const char *) path;
3071 syscallarg(int) pad;
3072 syscallarg(off_t) length;
3073 } */
3074 struct vnode *vp;
3075 struct vattr vattr;
3076 int error;
3077 struct nameidata nd;
3078
3079 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3080 SCARG(uap, path));
3081 if ((error = namei(&nd)) != 0)
3082 return (error);
3083 vp = nd.ni_vp;
3084 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3085 if (vp->v_type == VDIR)
3086 error = EISDIR;
3087 else if ((error = vn_writechk(vp)) == 0 &&
3088 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3089 VATTR_NULL(&vattr);
3090 vattr.va_size = SCARG(uap, length);
3091 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3092 }
3093 vput(vp);
3094 return (error);
3095 }
3096
3097 /*
3098 * Truncate a file given a file descriptor.
3099 */
3100 /* ARGSUSED */
3101 int
3102 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3103 {
3104 /* {
3105 syscallarg(int) fd;
3106 syscallarg(int) pad;
3107 syscallarg(off_t) length;
3108 } */
3109 struct vattr vattr;
3110 struct vnode *vp;
3111 file_t *fp;
3112 int error;
3113
3114 /* fd_getvnode() will use the descriptor for us */
3115 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3116 return (error);
3117 if ((fp->f_flag & FWRITE) == 0) {
3118 error = EINVAL;
3119 goto out;
3120 }
3121 vp = fp->f_data;
3122 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3123 if (vp->v_type == VDIR)
3124 error = EISDIR;
3125 else if ((error = vn_writechk(vp)) == 0) {
3126 VATTR_NULL(&vattr);
3127 vattr.va_size = SCARG(uap, length);
3128 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3129 }
3130 VOP_UNLOCK(vp, 0);
3131 out:
3132 fd_putfile(SCARG(uap, fd));
3133 return (error);
3134 }
3135
3136 /*
3137 * Sync an open file.
3138 */
3139 /* ARGSUSED */
3140 int
3141 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3142 {
3143 /* {
3144 syscallarg(int) fd;
3145 } */
3146 struct vnode *vp;
3147 file_t *fp;
3148 int error;
3149
3150 /* fd_getvnode() will use the descriptor for us */
3151 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3152 return (error);
3153 vp = fp->f_data;
3154 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3155 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
3156 if (error == 0 && bioopsp != NULL &&
3157 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3158 (*bioopsp->io_fsync)(vp, 0);
3159 VOP_UNLOCK(vp, 0);
3160 fd_putfile(SCARG(uap, fd));
3161 return (error);
3162 }
3163
3164 /*
3165 * Sync a range of file data. API modeled after that found in AIX.
3166 *
3167 * FDATASYNC indicates that we need only save enough metadata to be able
3168 * to re-read the written data. Note we duplicate AIX's requirement that
3169 * the file be open for writing.
3170 */
3171 /* ARGSUSED */
3172 int
3173 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
3174 {
3175 /* {
3176 syscallarg(int) fd;
3177 syscallarg(int) flags;
3178 syscallarg(off_t) start;
3179 syscallarg(off_t) length;
3180 } */
3181 struct vnode *vp;
3182 file_t *fp;
3183 int flags, nflags;
3184 off_t s, e, len;
3185 int error;
3186
3187 /* fd_getvnode() will use the descriptor for us */
3188 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3189 return (error);
3190
3191 if ((fp->f_flag & FWRITE) == 0) {
3192 error = EBADF;
3193 goto out;
3194 }
3195
3196 flags = SCARG(uap, flags);
3197 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3198 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3199 error = EINVAL;
3200 goto out;
3201 }
3202 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3203 if (flags & FDATASYNC)
3204 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3205 else
3206 nflags = FSYNC_WAIT;
3207 if (flags & FDISKSYNC)
3208 nflags |= FSYNC_CACHE;
3209
3210 len = SCARG(uap, length);
3211 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3212 if (len) {
3213 s = SCARG(uap, start);
3214 e = s + len;
3215 if (e < s) {
3216 error = EINVAL;
3217 goto out;
3218 }
3219 } else {
3220 e = 0;
3221 s = 0;
3222 }
3223
3224 vp = fp->f_data;
3225 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3226 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
3227
3228 if (error == 0 && bioopsp != NULL &&
3229 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3230 (*bioopsp->io_fsync)(vp, nflags);
3231
3232 VOP_UNLOCK(vp, 0);
3233 out:
3234 fd_putfile(SCARG(uap, fd));
3235 return (error);
3236 }
3237
3238 /*
3239 * Sync the data of an open file.
3240 */
3241 /* ARGSUSED */
3242 int
3243 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
3244 {
3245 /* {
3246 syscallarg(int) fd;
3247 } */
3248 struct vnode *vp;
3249 file_t *fp;
3250 int error;
3251
3252 /* fd_getvnode() will use the descriptor for us */
3253 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3254 return (error);
3255 if ((fp->f_flag & FWRITE) == 0) {
3256 fd_putfile(SCARG(uap, fd));
3257 return (EBADF);
3258 }
3259 vp = fp->f_data;
3260 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3261 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
3262 VOP_UNLOCK(vp, 0);
3263 fd_putfile(SCARG(uap, fd));
3264 return (error);
3265 }
3266
3267 /*
3268 * Rename files, (standard) BSD semantics frontend.
3269 */
3270 /* ARGSUSED */
3271 int
3272 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
3273 {
3274 /* {
3275 syscallarg(const char *) from;
3276 syscallarg(const char *) to;
3277 } */
3278
3279 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0));
3280 }
3281
3282 /*
3283 * Rename files, POSIX semantics frontend.
3284 */
3285 /* ARGSUSED */
3286 int
3287 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
3288 {
3289 /* {
3290 syscallarg(const char *) from;
3291 syscallarg(const char *) to;
3292 } */
3293
3294 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1));
3295 }
3296
3297 /*
3298 * Rename files. Source and destination must either both be directories,
3299 * or both not be directories. If target is a directory, it must be empty.
3300 * If `from' and `to' refer to the same object, the value of the `retain'
3301 * argument is used to determine whether `from' will be
3302 *
3303 * (retain == 0) deleted unless `from' and `to' refer to the same
3304 * object in the file system's name space (BSD).
3305 * (retain == 1) always retained (POSIX).
3306 */
3307 int
3308 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
3309 {
3310 struct vnode *tvp, *fvp, *tdvp;
3311 struct nameidata fromnd, tond;
3312 struct mount *fs;
3313 struct lwp *l = curlwp;
3314 struct proc *p;
3315 uint32_t saveflag;
3316 int error;
3317
3318 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT,
3319 seg, from);
3320 if ((error = namei(&fromnd)) != 0)
3321 return (error);
3322 if (fromnd.ni_dvp != fromnd.ni_vp)
3323 VOP_UNLOCK(fromnd.ni_dvp, 0);
3324 fvp = fromnd.ni_vp;
3325
3326 fs = fvp->v_mount;
3327 error = VFS_RENAMELOCK_ENTER(fs);
3328 if (error) {
3329 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3330 vrele(fromnd.ni_dvp);
3331 vrele(fvp);
3332 goto out1;
3333 }
3334
3335 /*
3336 * close, partially, yet another race - ideally we should only
3337 * go as far as getting fromnd.ni_dvp before getting the per-fs
3338 * lock, and then continue to get fromnd.ni_vp, but we can't do
3339 * that with namei as it stands.
3340 *
3341 * This still won't prevent rmdir from nuking fromnd.ni_vp
3342 * under us. The real fix is to get the locks in the right
3343 * order and do the lookups in the right places, but that's a
3344 * major rototill.
3345 *
3346 * Preserve the SAVESTART in cn_flags, because who knows what
3347 * might happen if we don't.
3348 *
3349 * Note: this logic (as well as this whole function) is cloned
3350 * in nfs_serv.c. Proceed accordingly.
3351 */
3352 vrele(fvp);
3353 if ((fromnd.ni_cnd.cn_namelen == 1 &&
3354 fromnd.ni_cnd.cn_nameptr[0] == '.') ||
3355 (fromnd.ni_cnd.cn_namelen == 2 &&
3356 fromnd.ni_cnd.cn_nameptr[0] == '.' &&
3357 fromnd.ni_cnd.cn_nameptr[1] == '.')) {
3358 error = EINVAL;
3359 VFS_RENAMELOCK_EXIT(fs);
3360 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3361 vrele(fromnd.ni_dvp);
3362 goto out1;
3363 }
3364 saveflag = fromnd.ni_cnd.cn_flags & SAVESTART;
3365 fromnd.ni_cnd.cn_flags &= ~SAVESTART;
3366 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY);
3367 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd);
3368 fromnd.ni_cnd.cn_flags |= saveflag;
3369 if (error) {
3370 VOP_UNLOCK(fromnd.ni_dvp, 0);
3371 VFS_RENAMELOCK_EXIT(fs);
3372 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3373 vrele(fromnd.ni_dvp);
3374 goto out1;
3375 }
3376 VOP_UNLOCK(fromnd.ni_vp, 0);
3377 if (fromnd.ni_dvp != fromnd.ni_vp)
3378 VOP_UNLOCK(fromnd.ni_dvp, 0);
3379 fvp = fromnd.ni_vp;
3380
3381 NDINIT(&tond, RENAME,
3382 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT
3383 | (fvp->v_type == VDIR ? CREATEDIR : 0),
3384 seg, to);
3385 if ((error = namei(&tond)) != 0) {
3386 VFS_RENAMELOCK_EXIT(fs);
3387 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3388 vrele(fromnd.ni_dvp);
3389 vrele(fvp);
3390 goto out1;
3391 }
3392 tdvp = tond.ni_dvp;
3393 tvp = tond.ni_vp;
3394
3395 if (tvp != NULL) {
3396 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3397 error = ENOTDIR;
3398 goto out;
3399 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3400 error = EISDIR;
3401 goto out;
3402 }
3403 }
3404
3405 if (fvp == tdvp)
3406 error = EINVAL;
3407
3408 /*
3409 * Source and destination refer to the same object.
3410 */
3411 if (fvp == tvp) {
3412 if (retain)
3413 error = -1;
3414 else if (fromnd.ni_dvp == tdvp &&
3415 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3416 !memcmp(fromnd.ni_cnd.cn_nameptr,
3417 tond.ni_cnd.cn_nameptr,
3418 fromnd.ni_cnd.cn_namelen))
3419 error = -1;
3420 }
3421
3422 #if NVERIEXEC > 0
3423 if (!error) {
3424 char *f1, *f2;
3425
3426 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3427 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen);
3428
3429 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3430 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen);
3431
3432 error = veriexec_renamechk(l, fvp, f1, tvp, f2);
3433
3434 free(f1, M_TEMP);
3435 free(f2, M_TEMP);
3436 }
3437 #endif /* NVERIEXEC > 0 */
3438
3439 out:
3440 p = l->l_proc;
3441 if (!error) {
3442 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3443 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3444 VFS_RENAMELOCK_EXIT(fs);
3445 } else {
3446 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3447 if (tdvp == tvp)
3448 vrele(tdvp);
3449 else
3450 vput(tdvp);
3451 if (tvp)
3452 vput(tvp);
3453 VFS_RENAMELOCK_EXIT(fs);
3454 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3455 vrele(fromnd.ni_dvp);
3456 vrele(fvp);
3457 }
3458 vrele(tond.ni_startdir);
3459 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3460 out1:
3461 if (fromnd.ni_startdir)
3462 vrele(fromnd.ni_startdir);
3463 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3464 return (error == -1 ? 0 : error);
3465 }
3466
3467 /*
3468 * Make a directory file.
3469 */
3470 /* ARGSUSED */
3471 int
3472 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
3473 {
3474 /* {
3475 syscallarg(const char *) path;
3476 syscallarg(int) mode;
3477 } */
3478 struct proc *p = l->l_proc;
3479 struct vnode *vp;
3480 struct vattr vattr;
3481 int error;
3482 struct nameidata nd;
3483
3484 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE,
3485 SCARG(uap, path));
3486 if ((error = namei(&nd)) != 0)
3487 return (error);
3488 vp = nd.ni_vp;
3489 if (vp != NULL) {
3490 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3491 if (nd.ni_dvp == vp)
3492 vrele(nd.ni_dvp);
3493 else
3494 vput(nd.ni_dvp);
3495 vrele(vp);
3496 return (EEXIST);
3497 }
3498 VATTR_NULL(&vattr);
3499 vattr.va_type = VDIR;
3500 /* We will read cwdi->cwdi_cmask unlocked. */
3501 vattr.va_mode =
3502 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3503 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3504 if (!error)
3505 vput(nd.ni_vp);
3506 return (error);
3507 }
3508
3509 /*
3510 * Remove a directory file.
3511 */
3512 /* ARGSUSED */
3513 int
3514 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
3515 {
3516 /* {
3517 syscallarg(const char *) path;
3518 } */
3519 struct vnode *vp;
3520 int error;
3521 struct nameidata nd;
3522
3523 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
3524 SCARG(uap, path));
3525 if ((error = namei(&nd)) != 0)
3526 return (error);
3527 vp = nd.ni_vp;
3528 if (vp->v_type != VDIR) {
3529 error = ENOTDIR;
3530 goto out;
3531 }
3532 /*
3533 * No rmdir "." please.
3534 */
3535 if (nd.ni_dvp == vp) {
3536 error = EINVAL;
3537 goto out;
3538 }
3539 /*
3540 * The root of a mounted filesystem cannot be deleted.
3541 */
3542 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) {
3543 error = EBUSY;
3544 goto out;
3545 }
3546 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3547 return (error);
3548
3549 out:
3550 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3551 if (nd.ni_dvp == vp)
3552 vrele(nd.ni_dvp);
3553 else
3554 vput(nd.ni_dvp);
3555 vput(vp);
3556 return (error);
3557 }
3558
3559 /*
3560 * Read a block of directory entries in a file system independent format.
3561 */
3562 int
3563 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
3564 {
3565 /* {
3566 syscallarg(int) fd;
3567 syscallarg(char *) buf;
3568 syscallarg(size_t) count;
3569 } */
3570 file_t *fp;
3571 int error, done;
3572
3573 /* fd_getvnode() will use the descriptor for us */
3574 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3575 return (error);
3576 if ((fp->f_flag & FREAD) == 0) {
3577 error = EBADF;
3578 goto out;
3579 }
3580 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3581 SCARG(uap, count), &done, l, 0, 0);
3582 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
3583 *retval = done;
3584 out:
3585 fd_putfile(SCARG(uap, fd));
3586 return (error);
3587 }
3588
3589 /*
3590 * Set the mode mask for creation of filesystem nodes.
3591 */
3592 int
3593 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
3594 {
3595 /* {
3596 syscallarg(mode_t) newmask;
3597 } */
3598 struct proc *p = l->l_proc;
3599 struct cwdinfo *cwdi;
3600
3601 /*
3602 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
3603 * important is that we serialize changes to the mask. The
3604 * rw_exit() will issue a write memory barrier on our behalf,
3605 * and force the changes out to other CPUs (as it must use an
3606 * atomic operation, draining the local CPU's store buffers).
3607 */
3608 cwdi = p->p_cwdi;
3609 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
3610 *retval = cwdi->cwdi_cmask;
3611 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3612 rw_exit(&cwdi->cwdi_lock);
3613
3614 return (0);
3615 }
3616
3617 int
3618 dorevoke(struct vnode *vp, kauth_cred_t cred)
3619 {
3620 struct vattr vattr;
3621 int error;
3622
3623 if ((error = VOP_GETATTR(vp, &vattr, cred)) != 0)
3624 return error;
3625 if (kauth_cred_geteuid(cred) != vattr.va_uid &&
3626 (error = kauth_authorize_generic(cred,
3627 KAUTH_GENERIC_ISSUSER, NULL)) == 0)
3628 VOP_REVOKE(vp, REVOKEALL);
3629 return (error);
3630 }
3631
3632 /*
3633 * Void all references to file by ripping underlying filesystem
3634 * away from vnode.
3635 */
3636 /* ARGSUSED */
3637 int
3638 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
3639 {
3640 /* {
3641 syscallarg(const char *) path;
3642 } */
3643 struct vnode *vp;
3644 int error;
3645 struct nameidata nd;
3646
3647 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
3648 SCARG(uap, path));
3649 if ((error = namei(&nd)) != 0)
3650 return (error);
3651 vp = nd.ni_vp;
3652 error = dorevoke(vp, l->l_cred);
3653 vrele(vp);
3654 return (error);
3655 }
3656