vfs_syscalls.c revision 1.306.2.9 1 /* $NetBSD: vfs_syscalls.c,v 1.306.2.9 2007/07/15 13:27:48 ad Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.306.2.9 2007/07/15 13:27:48 ad Exp $");
41
42 #include "opt_compat_netbsd.h"
43 #include "opt_compat_43.h"
44 #include "opt_fileassoc.h"
45 #include "opt_ktrace.h"
46 #include "fss.h"
47 #include "veriexec.h"
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/namei.h>
52 #include <sys/filedesc.h>
53 #include <sys/kernel.h>
54 #include <sys/file.h>
55 #include <sys/stat.h>
56 #include <sys/vnode.h>
57 #include <sys/mount.h>
58 #include <sys/proc.h>
59 #include <sys/uio.h>
60 #include <sys/malloc.h>
61 #include <sys/kmem.h>
62 #include <sys/dirent.h>
63 #include <sys/sysctl.h>
64 #include <sys/syscallargs.h>
65 #include <sys/vfs_syscalls.h>
66 #ifdef KTRACE
67 #include <sys/ktrace.h>
68 #endif
69 #ifdef FILEASSOC
70 #include <sys/fileassoc.h>
71 #endif /* FILEASSOC */
72 #include <sys/verified_exec.h>
73 #include <sys/kauth.h>
74
75 #include <miscfs/genfs/genfs.h>
76 #include <miscfs/syncfs/syncfs.h>
77
78 #ifdef COMPAT_30
79 #include "opt_nfsserver.h"
80 #include <nfs/rpcv2.h>
81 #endif
82 #include <nfs/nfsproto.h>
83 #ifdef COMPAT_30
84 #include <nfs/nfs.h>
85 #include <nfs/nfs_var.h>
86 #endif
87
88 #if NFSS > 0
89 #include <dev/fssvar.h>
90 #endif
91
92 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
93
94 static int change_dir(struct nameidata *, struct lwp *);
95 static int change_flags(struct vnode *, u_long, struct lwp *);
96 static int change_mode(struct vnode *, int, struct lwp *l);
97 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
98 static int rename_files(const char *, const char *, struct lwp *, int);
99
100 void checkdirs(struct vnode *);
101
102 int dovfsusermount = 0;
103
104 /*
105 * Virtual File System System Calls
106 */
107
108 /*
109 * Mount a file system.
110 */
111
112 #if defined(COMPAT_09) || defined(COMPAT_43)
113 /*
114 * This table is used to maintain compatibility with 4.3BSD
115 * and NetBSD 0.9 mount syscalls. Note, the order is important!
116 *
117 * Do not modify this table. It should only contain filesystems
118 * supported by NetBSD 0.9 and 4.3BSD.
119 */
120 const char * const mountcompatnames[] = {
121 NULL, /* 0 = MOUNT_NONE */
122 MOUNT_FFS, /* 1 = MOUNT_UFS */
123 MOUNT_NFS, /* 2 */
124 MOUNT_MFS, /* 3 */
125 MOUNT_MSDOS, /* 4 */
126 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
127 MOUNT_FDESC, /* 6 */
128 MOUNT_KERNFS, /* 7 */
129 NULL, /* 8 = MOUNT_DEVFS */
130 MOUNT_AFS, /* 9 */
131 };
132 const int nmountcompatnames = sizeof(mountcompatnames) /
133 sizeof(mountcompatnames[0]);
134 #endif /* COMPAT_09 || COMPAT_43 */
135
136 static int
137 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
138 void *data, size_t *data_len, struct nameidata *ndp)
139 {
140 struct mount *mp;
141 int error = 0, saved_flags;
142
143 mp = vp->v_mount;
144 saved_flags = mp->mnt_flag;
145
146 /* We can operate only on VROOT nodes. */
147 if ((vp->v_vflag & VV_ROOT) == 0) {
148 error = EINVAL;
149 goto out;
150 }
151
152 /*
153 * We only allow the filesystem to be reloaded if it
154 * is currently mounted read-only.
155 */
156 if (flags & MNT_RELOAD && !(mp->mnt_flag & MNT_RDONLY)) {
157 error = EOPNOTSUPP; /* Needs translation */
158 goto out;
159 }
160
161 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
162 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
163 if (error)
164 goto out;
165
166 if (vfs_busy(mp, LK_NOWAIT, 0)) {
167 error = EPERM;
168 goto out;
169 }
170
171 mp->mnt_flag &= ~MNT_OP_FLAGS;
172 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
173
174 /*
175 * Set the mount level flags.
176 */
177 if (flags & MNT_RDONLY)
178 mp->mnt_flag |= MNT_RDONLY;
179 else if (mp->mnt_flag & MNT_RDONLY)
180 mp->mnt_iflag |= IMNT_WANTRDWR;
181 mp->mnt_flag &=
182 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
183 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
184 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
185 mp->mnt_flag |= flags &
186 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
187 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
188 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
189 MNT_IGNORE);
190
191 error = VFS_MOUNT(mp, path, data, data_len, ndp, l);
192
193 #if defined(COMPAT_30) && defined(NFSSERVER)
194 if (error && data != NULL) {
195 int error2;
196
197 /* Update failed; let's try and see if it was an
198 * export request. */
199 error2 = nfs_update_exports_30(mp, path, data, l);
200
201 /* Only update error code if the export request was
202 * understood but some problem occurred while
203 * processing it. */
204 if (error2 != EJUSTRETURN)
205 error = error2;
206 }
207 #endif
208 if (mp->mnt_iflag & IMNT_WANTRDWR)
209 mp->mnt_flag &= ~MNT_RDONLY;
210 if (error)
211 mp->mnt_flag = saved_flags;
212 mp->mnt_flag &= ~MNT_OP_FLAGS;
213 mp->mnt_iflag &= ~IMNT_WANTRDWR;
214 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
215 if (mp->mnt_syncer == NULL)
216 error = vfs_allocate_syncvnode(mp);
217 } else {
218 if (mp->mnt_syncer != NULL)
219 vfs_deallocate_syncvnode(mp);
220 }
221 vfs_unbusy(mp);
222
223 return (error);
224 }
225
226 static int
227 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
228 {
229 char fstypename[MFSNAMELEN];
230 int error;
231
232 /* Copy file-system type from userspace. */
233 error = copyinstr(fstype, fstypename, MFSNAMELEN, NULL);
234 if (error) {
235 #if defined(COMPAT_09) || defined(COMPAT_43)
236 /*
237 * Historically, filesystem types were identified by numbers.
238 * If we get an integer for the filesystem type instead of a
239 * string, we check to see if it matches one of the historic
240 * filesystem types.
241 */
242 u_long fsindex = (u_long)fstype;
243 if (fsindex >= nmountcompatnames ||
244 mountcompatnames[fsindex] == NULL)
245 return ENODEV;
246 strlcpy(fstypename, mountcompatnames[fsindex], sizeof(fstypename));
247 #else
248 return error;
249 #endif
250 }
251
252 #ifdef COMPAT_10
253 /* Accept `ufs' as an alias for `ffs'. */
254 if (strcmp(fstypename, "ufs") == 0)
255 fstypename[0] = 'f';
256 #endif
257
258 if ((*vfsops = vfs_getopsbyname(fstypename)) == NULL)
259 return ENODEV;
260 return 0;
261 }
262
263 static int
264 mount_domount(struct lwp *l, struct vnode **vpp, struct vfsops *vfsops,
265 const char *path, int flags, void *data, size_t *data_len,
266 struct nameidata *ndp)
267 {
268 struct mount *mp = NULL;
269 struct vnode *vp = *vpp;
270 struct vattr va;
271 int error;
272
273 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
274 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
275 if (error)
276 return error;
277
278 /* Can't make a non-dir a mount-point (from here anyway). */
279 if (vp->v_type != VDIR)
280 return ENOTDIR;
281
282 /*
283 * If the user is not root, ensure that they own the directory
284 * onto which we are attempting to mount.
285 */
286 if ((error = VOP_GETATTR(vp, &va, l->l_cred, l)) != 0 ||
287 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
288 (error = kauth_authorize_generic(l->l_cred,
289 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) {
290 return error;
291 }
292
293 if (flags & MNT_EXPORTED)
294 return EINVAL;
295
296 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0)
297 return error;
298
299 /*
300 * Check if a file-system is not already mounted on this vnode.
301 */
302 if (vp->v_mountedhere != NULL)
303 return EBUSY;
304
305 mp = malloc(sizeof(*mp), M_MOUNT, M_WAITOK|M_ZERO);
306
307 mp->mnt_op = vfsops;
308
309 TAILQ_INIT(&mp->mnt_vnodelist);
310 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
311 mutex_init(&mp->mnt_mutex, MUTEX_DEFAULT, IPL_NONE);
312 (void)vfs_busy(mp, LK_NOWAIT, 0);
313
314 mp->mnt_vnodecovered = vp;
315 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
316 mp->mnt_unmounter = NULL;
317 mount_initspecific(mp);
318
319 /*
320 * The underlying file system may refuse the mount for
321 * various reasons. Allow the user to force it to happen.
322 *
323 * Set the mount level flags.
324 */
325 mp->mnt_flag = flags &
326 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
327 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
328 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
329 MNT_IGNORE | MNT_RDONLY);
330
331 error = VFS_MOUNT(mp, path, data, ndp, l);
332 mp->mnt_flag &= ~MNT_OP_FLAGS;
333
334 /*
335 * Put the new filesystem on the mount list after root.
336 */
337 cache_purge(vp);
338 if (!error) {
339 mp->mnt_iflag &= ~IMNT_WANTRDWR;
340 vp->v_mountedhere = mp;
341 mutex_enter(&mountlist_lock);
342 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
343 mutex_exit(&mountlist_lock);
344 VOP_UNLOCK(vp, 0);
345 checkdirs(vp);
346 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
347 error = vfs_allocate_syncvnode(mp);
348 vfs_unbusy(mp);
349 (void) VFS_STATVFS(mp, &mp->mnt_stat, l);
350 error = VFS_START(mp, 0, l);
351 if (error)
352 vrele(vp);
353 } else {
354 vp->v_mountedhere = NULL;
355 vfs_unbusy(mp);
356 vfs_delref(mp->mnt_op);
357 free(mp, M_MOUNT);
358 vput(vp);
359 }
360
361 out:
362 return (error);
363 }
364
365 static int
366 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
367 void *data, struct nameidata *ndp)
368 {
369 struct mount *mp;
370 int error;
371
372 /* If MNT_GETARGS is specified, it should be the only flag. */
373 if (flags & ~MNT_GETARGS) {
374 error = EINVAL;
375 goto out;
376 }
377
378 mp = vp->v_mount;
379
380 /* XXX: probably some notion of "can see" here if we want isolation. */
381 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
382 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
383 if (error)
384 goto out;
385
386 if ((vp->v_vflag & VV_ROOT) == 0) {
387 error = EINVAL;
388 goto out;
389 }
390
391 if (vfs_busy(mp, LK_NOWAIT, 0)) {
392 error = EPERM;
393 goto out;
394 }
395
396 mp->mnt_flag &= ~MNT_OP_FLAGS;
397 mp->mnt_flag |= MNT_GETARGS;
398 error = VFS_MOUNT(mp, path, data, ndp, l);
399 mp->mnt_flag &= ~MNT_OP_FLAGS;
400
401 vfs_unbusy(mp);
402 out:
403 return (error);
404 }
405
406 /* ARGSUSED */
407 int
408 sys_mount(struct lwp *l, void *v, register_t *retval)
409 {
410 struct sys_mount_args /* {
411 syscallarg(const char *) type;
412 syscallarg(const char *) path;
413 syscallarg(int) flags;
414 syscallarg(void *) data;
415 } */ *uap = v;
416 struct vnode *vp;
417 struct nameidata nd;
418 int error;
419
420 /*
421 * Get vnode to be covered
422 */
423 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE,
424 SCARG(uap, path), l);
425 if ((error = namei(&nd)) != 0)
426 return (error);
427 vp = nd.ni_vp;
428
429 /*
430 * A lookup in VFS_MOUNT might result in an attempt to
431 * lock this vnode again, so make the lock recursive.
432 */
433 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
434
435 if (vfsops == NULL) {
436 if (flags & (MNT_GETARGS | MNT_UPDATE))
437 vfsops = vp->v_mount->mnt_op;
438 else {
439 /* 'type' is userspace */
440 error = mount_get_vfsops(type, &vfsops);
441 if (error != 0)
442 goto done;
443 }
444 }
445
446 if (data != NULL && data_seg == UIO_USERSPACE) {
447 if (data_len == 0) {
448 /* No length supplied, use default for filesystem */
449 data_len = vfsops->vfs_min_mount_data;
450 if (data_len > VFS_MAX_MOUNT_DATA) {
451 /* maybe a force loaded old LKM */
452 error = EINVAL;
453 goto done;
454 }
455 #ifdef COMPAT_30
456 /* Hopefully a longer buffer won't make copyin() fail */
457 if (flags & MNT_UPDATE
458 && data_len < sizeof (struct mnt_export_args30))
459 data_len = sizeof (struct mnt_export_args30);
460 #endif
461 }
462 data_buf = malloc(data_len, M_TEMP, M_WAITOK);
463
464 /* NFS needs the buffer even for mnt_getargs .... */
465 error = copyin(data, data_buf, data_len);
466 if (error != 0)
467 goto done;
468 }
469
470 if (flags & MNT_GETARGS) {
471 if (data_len == 0) {
472 error = EINVAL;
473 goto done;
474 }
475 error = mount_getargs(l, vp, path, flags, data_buf,
476 &data_len, &nd);
477 if (error != 0)
478 goto done;
479 if (data_seg == UIO_USERSPACE)
480 error = copyout(data_buf, data, data_len);
481 *retval = data_len;
482 } else if (flags & MNT_UPDATE) {
483 error = mount_update(l, vp, path, flags, data_buf, &data_len,
484 &nd);
485 } else {
486 /* Locking is handled internally in mount_domount(). */
487 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
488 &data_len, &nd);
489 }
490
491 done:
492 if (vp)
493 vput(vp);
494 if (data_buf != data)
495 free(data_buf, M_TEMP);
496 return (error);
497 }
498
499 /*
500 * Scan all active processes to see if any of them have a current
501 * or root directory onto which the new filesystem has just been
502 * mounted. If so, replace them with the new mount point.
503 */
504 void
505 checkdirs(struct vnode *olddp)
506 {
507 struct cwdinfo *cwdi;
508 struct vnode *newdp;
509 struct proc *p;
510
511 if (olddp->v_usecount == 1)
512 return;
513 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
514 panic("mount: lost mount");
515 mutex_enter(&proclist_lock);
516 PROCLIST_FOREACH(p, &allproc) {
517 cwdi = p->p_cwdi;
518 if (!cwdi)
519 continue;
520 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
521 if (cwdi->cwdi_cdir == olddp) {
522 vrele(cwdi->cwdi_cdir);
523 VREF(newdp);
524 cwdi->cwdi_cdir = newdp;
525 }
526 if (cwdi->cwdi_rdir == olddp) {
527 vrele(cwdi->cwdi_rdir);
528 VREF(newdp);
529 cwdi->cwdi_rdir = newdp;
530 }
531 rw_exit(&cwdi->cwdi_lock);
532 }
533 mutex_exit(&proclist_lock);
534 if (rootvnode == olddp) {
535 vrele(rootvnode);
536 VREF(newdp);
537 rootvnode = newdp;
538 }
539 vput(newdp);
540 }
541
542 /*
543 * Unmount a file system.
544 *
545 * Note: unmount takes a path to the vnode mounted on as argument,
546 * not special file (as before).
547 */
548 /* ARGSUSED */
549 int
550 sys_unmount(struct lwp *l, void *v, register_t *retval)
551 {
552 struct sys_unmount_args /* {
553 syscallarg(const char *) path;
554 syscallarg(int) flags;
555 } */ *uap = v;
556 struct vnode *vp;
557 struct mount *mp;
558 int error;
559 struct nameidata nd;
560
561 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
562 SCARG(uap, path), l);
563 if ((error = namei(&nd)) != 0)
564 return (error);
565 vp = nd.ni_vp;
566 mp = vp->v_mount;
567
568 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
569 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
570 if (error) {
571 vput(vp);
572 return (error);
573 }
574
575 /*
576 * Don't allow unmounting the root file system.
577 */
578 if (mp->mnt_flag & MNT_ROOTFS) {
579 vput(vp);
580 return (EINVAL);
581 }
582
583 /*
584 * Must be the root of the filesystem
585 */
586 if ((vp->v_vflag & VV_ROOT) == 0) {
587 vput(vp);
588 return (EINVAL);
589 }
590 vput(vp);
591
592 /*
593 * XXX Freeze syncer. Must do this before locking the
594 * mount point. See dounmount() for details.
595 */
596 mutex_enter(&syncer_mutex);
597
598 if (vfs_busy(mp, 0, 0)) {
599 mutex_exit(&syncer_mutex);
600 return (EBUSY);
601 }
602
603 return (dounmount(mp, SCARG(uap, flags), l));
604 }
605
606 /*
607 * Do the actual file system unmount. File system is assumed to have been
608 * marked busy by the caller.
609 */
610 int
611 dounmount(struct mount *mp, int flags, struct lwp *l)
612 {
613 struct vnode *coveredvp;
614 int error;
615 int async;
616 int used_syncer;
617
618 #if NVERIEXEC > 0
619 error = veriexec_unmountchk(mp);
620 if (error)
621 return (error);
622 #endif /* NVERIEXEC > 0 */
623
624 mutex_enter(&mountlist_lock);
625 vfs_unbusy(mp);
626 used_syncer = (mp->mnt_syncer != NULL);
627
628 /*
629 * XXX Syncer must be frozen when we get here. This should really
630 * be done on a per-mountpoint basis, but especially the softdep
631 * code possibly called from the syncer doesn't exactly work on a
632 * per-mountpoint basis, so the softdep code would become a maze
633 * of vfs_busy() calls.
634 *
635 * The caller of dounmount() must acquire syncer_mutex because
636 * the syncer itself acquires locks in syncer_mutex -> vfs_busy
637 * order, and we must preserve that order to avoid deadlock.
638 *
639 * So, if the file system did not use the syncer, now is
640 * the time to release the syncer_mutex.
641 */
642 if (used_syncer == 0)
643 mutex_exit(&syncer_mutex);
644
645 mp->mnt_iflag |= IMNT_UNMOUNT;
646 mp->mnt_unmounter = l;
647 lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_lock);
648
649 async = mp->mnt_flag & MNT_ASYNC;
650 mp->mnt_flag &= ~MNT_ASYNC;
651 cache_purgevfs(mp); /* remove cache entries for this file sys */
652 if (mp->mnt_syncer != NULL)
653 vfs_deallocate_syncvnode(mp);
654 error = 0;
655 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
656 #if NFSS > 0
657 error = fss_umount_hook(mp, (flags & MNT_FORCE));
658 #endif
659 if (error == 0)
660 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred, l);
661 }
662 if (error == 0 || (flags & MNT_FORCE))
663 error = VFS_UNMOUNT(mp, flags, l);
664 if (error) {
665 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
666 (void) vfs_allocate_syncvnode(mp);
667 mutex_enter(&mountlist_lock);
668 mp->mnt_iflag &= ~IMNT_UNMOUNT;
669 mp->mnt_unmounter = NULL;
670 mp->mnt_flag |= async;
671 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
672 &mountlist_lock);
673 if (used_syncer)
674 mutex_exit(&syncer_mutex);
675 mutex_enter(&mp->mnt_mutex);
676 while (mp->mnt_wcnt > 0) {
677 wakeup(mp);
678 mtsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1",
679 0, &mp->mnt_mutex);
680 }
681 mutex_exit(&mp->mnt_mutex);
682 return (error);
683 }
684 mutex_enter(&mountlist_lock);
685 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
686 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP)
687 coveredvp->v_mountedhere = NULL;
688 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
689 panic("unmount: dangling vnode");
690 mp->mnt_iflag |= IMNT_GONE;
691 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_lock);
692 if (coveredvp != NULLVP)
693 vrele(coveredvp);
694 mount_finispecific(mp);
695 if (used_syncer)
696 mutex_exit(&syncer_mutex);
697 mutex_enter(&mp->mnt_mutex);
698 while (mp->mnt_wcnt > 0) {
699 wakeup(mp);
700 mtsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_mutex);
701 }
702 mutex_exit(&mp->mnt_mutex);
703 vfs_hooks_unmount(mp);
704 mutex_destroy(&mp->mnt_mutex);
705 vfs_delref(mp->mnt_op);
706 free(mp, M_MOUNT);
707 return (0);
708 }
709
710 /*
711 * Sync each mounted filesystem.
712 */
713 #ifdef DEBUG
714 int syncprt = 0;
715 struct ctldebug debug0 = { "syncprt", &syncprt };
716 #endif
717
718 /* ARGSUSED */
719 int
720 sys_sync(struct lwp *l, void *v, register_t *retval)
721 {
722 struct mount *mp, *nmp;
723 int asyncflag;
724
725 if (l == NULL)
726 l = &lwp0;
727
728 mutex_enter(&mountlist_lock);
729 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
730 if (vfs_busy(mp, LK_NOWAIT, &mountlist_lock)) {
731 nmp = mp->mnt_list.cqe_prev;
732 continue;
733 }
734 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
735 asyncflag = mp->mnt_flag & MNT_ASYNC;
736 mp->mnt_flag &= ~MNT_ASYNC;
737 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred, l);
738 if (asyncflag)
739 mp->mnt_flag |= MNT_ASYNC;
740 }
741 mutex_enter(&mountlist_lock);
742 nmp = mp->mnt_list.cqe_prev;
743 vfs_unbusy(mp);
744
745 }
746 mutex_exit(&mountlist_lock);
747 #ifdef DEBUG
748 if (syncprt)
749 vfs_bufstats();
750 #endif /* DEBUG */
751 return (0);
752 }
753
754 /*
755 * Change filesystem quotas.
756 */
757 /* ARGSUSED */
758 int
759 sys_quotactl(struct lwp *l, void *v, register_t *retval)
760 {
761 struct sys_quotactl_args /* {
762 syscallarg(const char *) path;
763 syscallarg(int) cmd;
764 syscallarg(int) uid;
765 syscallarg(void *) arg;
766 } */ *uap = v;
767 struct mount *mp;
768 int error;
769 struct nameidata nd;
770
771 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
772 if ((error = namei(&nd)) != 0)
773 return (error);
774 mp = nd.ni_vp->v_mount;
775 vrele(nd.ni_vp);
776 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
777 SCARG(uap, arg), l);
778 return (error);
779 }
780
781 int
782 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
783 int root)
784 {
785 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
786 int error = 0;
787
788 /*
789 * If MNT_NOWAIT or MNT_LAZY is specified, do not
790 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
791 * overrides MNT_NOWAIT.
792 */
793 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
794 (flags != MNT_WAIT && flags != 0)) {
795 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
796 goto done;
797 }
798
799 /* Get the filesystem stats now */
800 memset(sp, 0, sizeof(*sp));
801 if ((error = VFS_STATVFS(mp, sp, l)) != 0) {
802 return error;
803 }
804
805 if (cwdi->cwdi_rdir == NULL)
806 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
807 done:
808 if (cwdi->cwdi_rdir != NULL) {
809 size_t len;
810 char *bp;
811 char *path = PNBUF_GET();
812
813 bp = path + MAXPATHLEN;
814 *--bp = '\0';
815 rw_enter(&cwdi->cwdi_lock, RW_READER);
816 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
817 MAXPATHLEN / 2, 0, l);
818 rw_exit(&cwdi->cwdi_lock);
819 if (error) {
820 PNBUF_PUT(path);
821 return error;
822 }
823 len = strlen(bp);
824 /*
825 * for mount points that are below our root, we can see
826 * them, so we fix up the pathname and return them. The
827 * rest we cannot see, so we don't allow viewing the
828 * data.
829 */
830 if (strncmp(bp, sp->f_mntonname, len) == 0) {
831 strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
832 sizeof(sp->f_mntonname));
833 if (sp->f_mntonname[0] == '\0')
834 (void)strlcpy(sp->f_mntonname, "/",
835 sizeof(sp->f_mntonname));
836 } else {
837 if (root)
838 (void)strlcpy(sp->f_mntonname, "/",
839 sizeof(sp->f_mntonname));
840 else
841 error = EPERM;
842 }
843 PNBUF_PUT(path);
844 }
845 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
846 return error;
847 }
848
849 /*
850 * Get filesystem statistics by path.
851 */
852 int
853 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
854 {
855 struct mount *mp;
856 int error;
857 struct nameidata nd;
858
859 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, path, l);
860 if ((error = namei(&nd)) != 0)
861 return error;
862 mp = nd.ni_vp->v_mount;
863 error = dostatvfs(mp, sb, l, flags, 1);
864 vrele(nd.ni_vp);
865 return error;
866 }
867
868 /* ARGSUSED */
869 int
870 sys_statvfs1(struct lwp *l, void *v, register_t *retval)
871 {
872 struct sys_statvfs1_args /* {
873 syscallarg(const char *) path;
874 syscallarg(struct statvfs *) buf;
875 syscallarg(int) flags;
876 } */ *uap = v;
877 struct statvfs *sb;
878 int error;
879
880 sb = STATVFSBUF_GET();
881 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
882 if (error == 0)
883 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
884 STATVFSBUF_PUT(sb);
885 return error;
886 }
887
888 /*
889 * Get filesystem statistics by fd.
890 */
891 int
892 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
893 {
894 struct proc *p = l->l_proc;
895 struct file *fp;
896 struct mount *mp;
897 int error;
898
899 /* getvnode() will use the descriptor for us */
900 if ((error = getvnode(p->p_fd, fd, &fp)) != 0)
901 return (error);
902 mp = ((struct vnode *)fp->f_data)->v_mount;
903 error = dostatvfs(mp, sb, l, flags, 1);
904 FILE_UNUSE(fp, l);
905 return error;
906 }
907
908 /* ARGSUSED */
909 int
910 sys_fstatvfs1(struct lwp *l, void *v, register_t *retval)
911 {
912 struct sys_fstatvfs1_args /* {
913 syscallarg(int) fd;
914 syscallarg(struct statvfs *) buf;
915 syscallarg(int) flags;
916 } */ *uap = v;
917 struct statvfs *sb;
918 int error;
919
920 sb = STATVFSBUF_GET();
921 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
922 if (error == 0)
923 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
924 STATVFSBUF_PUT(sb);
925 return error;
926 }
927
928
929 /*
930 * Get statistics on all filesystems.
931 */
932 int
933 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
934 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
935 register_t *retval)
936 {
937 int root = 0;
938 struct proc *p = l->l_proc;
939 struct mount *mp, *nmp;
940 struct statvfs *sb;
941 size_t count, maxcount;
942 int error = 0;
943
944 sb = STATVFSBUF_GET();
945 maxcount = bufsize / entry_sz;
946 mutex_enter(&mountlist_lock);
947 count = 0;
948 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
949 mp = nmp) {
950 if (vfs_busy(mp, LK_NOWAIT, &mountlist_lock)) {
951 nmp = CIRCLEQ_NEXT(mp, mnt_list);
952 continue;
953 }
954 if (sfsp && count < maxcount) {
955 error = dostatvfs(mp, sb, l, flags, 0);
956 if (error) {
957 mutex_enter(&mountlist_lock);
958 nmp = CIRCLEQ_NEXT(mp, mnt_list);
959 vfs_unbusy(mp);
960 continue;
961 }
962 error = copyfn(sb, sfsp, entry_sz);
963 if (error) {
964 vfs_unbusy(mp);
965 goto out;
966 }
967 sfsp = (char *)sfsp + entry_sz;
968 root |= strcmp(sb->f_mntonname, "/") == 0;
969 }
970 count++;
971 mutex_enter(&mountlist_lock);
972 nmp = CIRCLEQ_NEXT(mp, mnt_list);
973 vfs_unbusy(mp);
974 }
975 mutex_exit(&mountlist_lock);
976 if (root == 0 && p->p_cwdi->cwdi_rdir) {
977 /*
978 * fake a root entry
979 */
980 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, sb, l, flags, 1);
981 if (error != 0)
982 goto out;
983 if (sfsp)
984 error = copyfn(sb, sfsp, entry_sz);
985 count++;
986 }
987 if (sfsp && count > maxcount)
988 *retval = maxcount;
989 else
990 *retval = count;
991 out:
992 STATVFSBUF_PUT(sb);
993 return error;
994 }
995
996 int
997 sys_getvfsstat(struct lwp *l, void *v, register_t *retval)
998 {
999 struct sys_getvfsstat_args /* {
1000 syscallarg(struct statvfs *) buf;
1001 syscallarg(size_t) bufsize;
1002 syscallarg(int) flags;
1003 } */ *uap = v;
1004
1005 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1006 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1007 }
1008
1009 /*
1010 * Change current working directory to a given file descriptor.
1011 */
1012 /* ARGSUSED */
1013 int
1014 sys_fchdir(struct lwp *l, void *v, register_t *retval)
1015 {
1016 struct sys_fchdir_args /* {
1017 syscallarg(int) fd;
1018 } */ *uap = v;
1019 struct proc *p = l->l_proc;
1020 struct filedesc *fdp = p->p_fd;
1021 struct cwdinfo *cwdi;
1022 struct vnode *vp, *tdp;
1023 struct mount *mp;
1024 struct file *fp;
1025 int error;
1026
1027 /* getvnode() will use the descriptor for us */
1028 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1029 return (error);
1030 vp = (struct vnode *)fp->f_data;
1031
1032 VREF(vp);
1033 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1034 if (vp->v_type != VDIR)
1035 error = ENOTDIR;
1036 else
1037 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1038 if (error) {
1039 vput(vp);
1040 goto out;
1041 }
1042 while ((mp = vp->v_mountedhere) != NULL) {
1043 if (vfs_busy(mp, 0, 0))
1044 continue;
1045
1046 vput(vp);
1047 error = VFS_ROOT(mp, &tdp);
1048 vfs_unbusy(mp);
1049 if (error)
1050 goto out;
1051 vp = tdp;
1052 }
1053 VOP_UNLOCK(vp, 0);
1054
1055 /*
1056 * Disallow changing to a directory not under the process's
1057 * current root directory (if there is one).
1058 */
1059 cwdi = p->p_cwdi;
1060 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1061 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1062 vrele(vp);
1063 error = EPERM; /* operation not permitted */
1064 } else {
1065 vrele(cwdi->cwdi_cdir);
1066 cwdi->cwdi_cdir = vp;
1067 }
1068 rw_exit(&cwdi->cwdi_lock);
1069
1070 out:
1071 FILE_UNUSE(fp, l);
1072 return (error);
1073 }
1074
1075 /*
1076 * Change this process's notion of the root directory to a given file
1077 * descriptor.
1078 */
1079 int
1080 sys_fchroot(struct lwp *l, void *v, register_t *retval)
1081 {
1082 struct sys_fchroot_args *uap = v;
1083 struct proc *p = l->l_proc;
1084 struct filedesc *fdp = p->p_fd;
1085 struct cwdinfo *cwdi;
1086 struct vnode *vp;
1087 struct file *fp;
1088 int error;
1089
1090 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1091 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1092 return error;
1093 /* getvnode() will use the descriptor for us */
1094 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1095 return error;
1096 vp = (struct vnode *) fp->f_data;
1097 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1098 if (vp->v_type != VDIR)
1099 error = ENOTDIR;
1100 else
1101 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1102 VOP_UNLOCK(vp, 0);
1103 if (error)
1104 goto out;
1105 VREF(vp);
1106
1107 /*
1108 * Prevent escaping from chroot by putting the root under
1109 * the working directory. Silently chdir to / if we aren't
1110 * already there.
1111 */
1112 cwdi = p->p_cwdi;
1113 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1114 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1115 /*
1116 * XXX would be more failsafe to change directory to a
1117 * deadfs node here instead
1118 */
1119 vrele(cwdi->cwdi_cdir);
1120 VREF(vp);
1121 cwdi->cwdi_cdir = vp;
1122 }
1123
1124 if (cwdi->cwdi_rdir != NULL)
1125 vrele(cwdi->cwdi_rdir);
1126 cwdi->cwdi_rdir = vp;
1127 rw_exit(&cwdi->cwdi_lock);
1128
1129 out:
1130 FILE_UNUSE(fp, l);
1131 return (error);
1132 }
1133
1134 /*
1135 * Change current working directory (``.'').
1136 */
1137 /* ARGSUSED */
1138 int
1139 sys_chdir(struct lwp *l, void *v, register_t *retval)
1140 {
1141 struct sys_chdir_args /* {
1142 syscallarg(const char *) path;
1143 } */ *uap = v;
1144 struct proc *p = l->l_proc;
1145 struct cwdinfo *cwdi;
1146 int error;
1147 struct nameidata nd;
1148
1149 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1150 SCARG(uap, path), l);
1151 if ((error = change_dir(&nd, l)) != 0)
1152 return (error);
1153 cwdi = p->p_cwdi;
1154 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1155 vrele(cwdi->cwdi_cdir);
1156 cwdi->cwdi_cdir = nd.ni_vp;
1157 rw_exit(&cwdi->cwdi_lock);
1158 return (0);
1159 }
1160
1161 /*
1162 * Change notion of root (``/'') directory.
1163 */
1164 /* ARGSUSED */
1165 int
1166 sys_chroot(struct lwp *l, void *v, register_t *retval)
1167 {
1168 struct sys_chroot_args /* {
1169 syscallarg(const char *) path;
1170 } */ *uap = v;
1171 struct proc *p = l->l_proc;
1172 struct cwdinfo *cwdi;
1173 struct vnode *vp;
1174 int error;
1175 struct nameidata nd;
1176
1177 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1178 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1179 return (error);
1180 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1181 SCARG(uap, path), l);
1182 if ((error = change_dir(&nd, l)) != 0)
1183 return (error);
1184
1185 cwdi = p->p_cwdi;
1186 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1187 if (cwdi->cwdi_rdir != NULL)
1188 vrele(cwdi->cwdi_rdir);
1189 vp = nd.ni_vp;
1190 cwdi->cwdi_rdir = vp;
1191
1192 /*
1193 * Prevent escaping from chroot by putting the root under
1194 * the working directory. Silently chdir to / if we aren't
1195 * already there.
1196 */
1197 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1198 /*
1199 * XXX would be more failsafe to change directory to a
1200 * deadfs node here instead
1201 */
1202 vrele(cwdi->cwdi_cdir);
1203 VREF(vp);
1204 cwdi->cwdi_cdir = vp;
1205 }
1206 rw_exit(&cwdi->cwdi_lock);
1207
1208 return (0);
1209 }
1210
1211 /*
1212 * Common routine for chroot and chdir.
1213 */
1214 static int
1215 change_dir(struct nameidata *ndp, struct lwp *l)
1216 {
1217 struct vnode *vp;
1218 int error;
1219
1220 if ((error = namei(ndp)) != 0)
1221 return (error);
1222 vp = ndp->ni_vp;
1223 if (vp->v_type != VDIR)
1224 error = ENOTDIR;
1225 else
1226 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1227
1228 if (error)
1229 vput(vp);
1230 else
1231 VOP_UNLOCK(vp, 0);
1232 return (error);
1233 }
1234
1235 /*
1236 * Check permissions, allocate an open file structure,
1237 * and call the device open routine if any.
1238 */
1239 int
1240 sys_open(struct lwp *l, void *v, register_t *retval)
1241 {
1242 struct sys_open_args /* {
1243 syscallarg(const char *) path;
1244 syscallarg(int) flags;
1245 syscallarg(int) mode;
1246 } */ *uap = v;
1247 struct proc *p = l->l_proc;
1248 struct cwdinfo *cwdi = p->p_cwdi;
1249 struct filedesc *fdp = p->p_fd;
1250 struct file *fp;
1251 struct vnode *vp;
1252 int flags, cmode;
1253 int type, indx, error;
1254 struct flock lf;
1255 struct nameidata nd;
1256
1257 flags = FFLAGS(SCARG(uap, flags));
1258 if ((flags & (FREAD | FWRITE)) == 0)
1259 return (EINVAL);
1260 /* falloc() will use the file descriptor for us */
1261 if ((error = falloc(l, &fp, &indx)) != 0)
1262 return (error);
1263 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1264 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1265 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
1266 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1267 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1268 rw_enter(&fdp->fd_lock, RW_WRITER);
1269 FILE_UNUSE(fp, l);
1270 fdp->fd_ofiles[indx] = NULL;
1271 rw_exit(&fdp->fd_lock);
1272 ffree(fp);
1273 if ((error == EDUPFD || error == EMOVEFD) &&
1274 l->l_dupfd >= 0 && /* XXX from fdopen */
1275 (error =
1276 dupfdopen(l, indx, l->l_dupfd, flags, error)) == 0) {
1277 *retval = indx;
1278 return (0);
1279 }
1280 if (error == ERESTART)
1281 error = EINTR;
1282 fdremove(fdp, indx);
1283 return (error);
1284 }
1285 l->l_dupfd = 0;
1286 vp = nd.ni_vp;
1287 fp->f_flag = flags & FMASK;
1288 fp->f_type = DTYPE_VNODE;
1289 fp->f_ops = &vnops;
1290 fp->f_data = vp;
1291 if (flags & (O_EXLOCK | O_SHLOCK)) {
1292 lf.l_whence = SEEK_SET;
1293 lf.l_start = 0;
1294 lf.l_len = 0;
1295 if (flags & O_EXLOCK)
1296 lf.l_type = F_WRLCK;
1297 else
1298 lf.l_type = F_RDLCK;
1299 type = F_FLOCK;
1300 if ((flags & FNONBLOCK) == 0)
1301 type |= F_WAIT;
1302 VOP_UNLOCK(vp, 0);
1303 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1304 if (error) {
1305 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1306 FILE_UNUSE(fp, l);
1307 ffree(fp);
1308 fdremove(fdp, indx);
1309 return (error);
1310 }
1311 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1312 fp->f_flag |= FHASLOCK;
1313 }
1314 VOP_UNLOCK(vp, 0);
1315 *retval = indx;
1316 FILE_SET_MATURE(fp);
1317 FILE_UNUSE(fp, l);
1318 return (0);
1319 }
1320
1321 static void
1322 vfs__fhfree(fhandle_t *fhp)
1323 {
1324 size_t fhsize;
1325
1326 if (fhp == NULL) {
1327 return;
1328 }
1329 fhsize = FHANDLE_SIZE(fhp);
1330 kmem_free(fhp, fhsize);
1331 }
1332
1333 /*
1334 * vfs_composefh: compose a filehandle.
1335 */
1336
1337 int
1338 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1339 {
1340 struct mount *mp;
1341 struct fid *fidp;
1342 int error;
1343 size_t needfhsize;
1344 size_t fidsize;
1345
1346 mp = vp->v_mount;
1347 fidp = NULL;
1348 if (*fh_size < FHANDLE_SIZE_MIN) {
1349 fidsize = 0;
1350 } else {
1351 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1352 if (fhp != NULL) {
1353 memset(fhp, 0, *fh_size);
1354 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1355 fidp = &fhp->fh_fid;
1356 }
1357 }
1358 error = VFS_VPTOFH(vp, fidp, &fidsize);
1359 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1360 if (error == 0 && *fh_size < needfhsize) {
1361 error = E2BIG;
1362 }
1363 *fh_size = needfhsize;
1364 return error;
1365 }
1366
1367 int
1368 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1369 {
1370 struct mount *mp;
1371 fhandle_t *fhp;
1372 size_t fhsize;
1373 size_t fidsize;
1374 int error;
1375
1376 *fhpp = NULL;
1377 mp = vp->v_mount;
1378 fidsize = 0;
1379 error = VFS_VPTOFH(vp, NULL, &fidsize);
1380 KASSERT(error != 0);
1381 if (error != E2BIG) {
1382 goto out;
1383 }
1384 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1385 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1386 if (fhp == NULL) {
1387 error = ENOMEM;
1388 goto out;
1389 }
1390 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1391 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1392 if (error == 0) {
1393 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1394 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1395 *fhpp = fhp;
1396 } else {
1397 kmem_free(fhp, fhsize);
1398 }
1399 out:
1400 return error;
1401 }
1402
1403 void
1404 vfs_composefh_free(fhandle_t *fhp)
1405 {
1406
1407 vfs__fhfree(fhp);
1408 }
1409
1410 /*
1411 * vfs_fhtovp: lookup a vnode by a filehandle.
1412 */
1413
1414 int
1415 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1416 {
1417 struct mount *mp;
1418 int error;
1419
1420 *vpp = NULL;
1421 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1422 if (mp == NULL) {
1423 error = ESTALE;
1424 goto out;
1425 }
1426 if (mp->mnt_op->vfs_fhtovp == NULL) {
1427 error = EOPNOTSUPP;
1428 goto out;
1429 }
1430 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1431 out:
1432 return error;
1433 }
1434
1435 /*
1436 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1437 * the needed size.
1438 */
1439
1440 int
1441 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1442 {
1443 fhandle_t *fhp;
1444 int error;
1445
1446 *fhpp = NULL;
1447 if (fhsize > FHANDLE_SIZE_MAX) {
1448 return EINVAL;
1449 }
1450 if (fhsize < FHANDLE_SIZE_MIN) {
1451 return EINVAL;
1452 }
1453 again:
1454 fhp = kmem_alloc(fhsize, KM_SLEEP);
1455 if (fhp == NULL) {
1456 return ENOMEM;
1457 }
1458 error = copyin(ufhp, fhp, fhsize);
1459 if (error == 0) {
1460 /* XXX this check shouldn't be here */
1461 if (FHANDLE_SIZE(fhp) == fhsize) {
1462 *fhpp = fhp;
1463 return 0;
1464 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1465 /*
1466 * a kludge for nfsv2 padded handles.
1467 */
1468 size_t sz;
1469
1470 sz = FHANDLE_SIZE(fhp);
1471 kmem_free(fhp, fhsize);
1472 fhsize = sz;
1473 goto again;
1474 } else {
1475 /*
1476 * userland told us wrong size.
1477 */
1478 error = EINVAL;
1479 }
1480 }
1481 kmem_free(fhp, fhsize);
1482 return error;
1483 }
1484
1485 void
1486 vfs_copyinfh_free(fhandle_t *fhp)
1487 {
1488
1489 vfs__fhfree(fhp);
1490 }
1491
1492 /*
1493 * Get file handle system call
1494 */
1495 int
1496 sys___getfh30(struct lwp *l, void *v, register_t *retval)
1497 {
1498 struct sys___getfh30_args /* {
1499 syscallarg(char *) fname;
1500 syscallarg(fhandle_t *) fhp;
1501 syscallarg(size_t *) fh_size;
1502 } */ *uap = v;
1503 struct vnode *vp;
1504 fhandle_t *fh;
1505 int error;
1506 struct nameidata nd;
1507 size_t sz;
1508 size_t usz;
1509
1510 /*
1511 * Must be super user
1512 */
1513 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1514 0, NULL, NULL, NULL);
1515 if (error)
1516 return (error);
1517 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
1518 SCARG(uap, fname), l);
1519 error = namei(&nd);
1520 if (error)
1521 return (error);
1522 vp = nd.ni_vp;
1523 error = vfs_composefh_alloc(vp, &fh);
1524 vput(vp);
1525 if (error != 0) {
1526 goto out;
1527 }
1528 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1529 if (error != 0) {
1530 goto out;
1531 }
1532 sz = FHANDLE_SIZE(fh);
1533 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1534 if (error != 0) {
1535 goto out;
1536 }
1537 if (usz >= sz) {
1538 error = copyout(fh, SCARG(uap, fhp), sz);
1539 } else {
1540 error = E2BIG;
1541 }
1542 out:
1543 vfs_composefh_free(fh);
1544 return (error);
1545 }
1546
1547 /*
1548 * Open a file given a file handle.
1549 *
1550 * Check permissions, allocate an open file structure,
1551 * and call the device open routine if any.
1552 */
1553
1554 int
1555 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1556 register_t *retval)
1557 {
1558 struct filedesc *fdp = l->l_proc->p_fd;
1559 struct file *fp;
1560 struct vnode *vp = NULL;
1561 kauth_cred_t cred = l->l_cred;
1562 struct file *nfp;
1563 int type, indx, error=0;
1564 struct flock lf;
1565 struct vattr va;
1566 fhandle_t *fh;
1567 int flags;
1568
1569 /*
1570 * Must be super user
1571 */
1572 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1573 0, NULL, NULL, NULL)))
1574 return (error);
1575
1576 flags = FFLAGS(oflags);
1577 if ((flags & (FREAD | FWRITE)) == 0)
1578 return (EINVAL);
1579 if ((flags & O_CREAT))
1580 return (EINVAL);
1581 /* falloc() will use the file descriptor for us */
1582 if ((error = falloc(l, &nfp, &indx)) != 0)
1583 return (error);
1584 fp = nfp;
1585 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1586 if (error != 0) {
1587 goto bad;
1588 }
1589 error = vfs_fhtovp(fh, &vp);
1590 if (error != 0) {
1591 goto bad;
1592 }
1593
1594 /* Now do an effective vn_open */
1595
1596 if (vp->v_type == VSOCK) {
1597 error = EOPNOTSUPP;
1598 goto bad;
1599 }
1600 if (flags & FREAD) {
1601 if ((error = VOP_ACCESS(vp, VREAD, cred, l)) != 0)
1602 goto bad;
1603 }
1604 if (flags & (FWRITE | O_TRUNC)) {
1605 if (vp->v_type == VDIR) {
1606 error = EISDIR;
1607 goto bad;
1608 }
1609 if ((error = vn_writechk(vp)) != 0 ||
1610 (error = VOP_ACCESS(vp, VWRITE, cred, l)) != 0)
1611 goto bad;
1612 }
1613 if (flags & O_TRUNC) {
1614 VOP_UNLOCK(vp, 0); /* XXX */
1615 VOP_LEASE(vp, l, cred, LEASE_WRITE);
1616 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1617 VATTR_NULL(&va);
1618 va.va_size = 0;
1619 error = VOP_SETATTR(vp, &va, cred, l);
1620 if (error)
1621 goto bad;
1622 }
1623 if ((error = VOP_OPEN(vp, flags, cred, l)) != 0)
1624 goto bad;
1625 if (vp->v_type == VREG &&
1626 uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
1627 error = EIO;
1628 goto bad;
1629 }
1630 if (flags & FWRITE)
1631 vp->v_writecount++;
1632
1633 /* done with modified vn_open, now finish what sys_open does. */
1634
1635 fp->f_flag = flags & FMASK;
1636 fp->f_type = DTYPE_VNODE;
1637 fp->f_ops = &vnops;
1638 fp->f_data = vp;
1639 if (flags & (O_EXLOCK | O_SHLOCK)) {
1640 lf.l_whence = SEEK_SET;
1641 lf.l_start = 0;
1642 lf.l_len = 0;
1643 if (flags & O_EXLOCK)
1644 lf.l_type = F_WRLCK;
1645 else
1646 lf.l_type = F_RDLCK;
1647 type = F_FLOCK;
1648 if ((flags & FNONBLOCK) == 0)
1649 type |= F_WAIT;
1650 VOP_UNLOCK(vp, 0);
1651 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1652 if (error) {
1653 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1654 FILE_UNUSE(fp, l);
1655 ffree(fp);
1656 fdremove(fdp, indx);
1657 return (error);
1658 }
1659 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1660 fp->f_flag |= FHASLOCK;
1661 }
1662 VOP_UNLOCK(vp, 0);
1663 *retval = indx;
1664 FILE_SET_MATURE(fp);
1665 FILE_UNUSE(fp, l);
1666 vfs_copyinfh_free(fh);
1667 return (0);
1668
1669 bad:
1670 FILE_UNUSE(fp, l);
1671 ffree(fp);
1672 fdremove(fdp, indx);
1673 if (vp != NULL)
1674 vput(vp);
1675 vfs_copyinfh_free(fh);
1676 return (error);
1677 }
1678
1679 int
1680 sys___fhopen40(struct lwp *l, void *v, register_t *retval)
1681 {
1682 struct sys___fhopen40_args /* {
1683 syscallarg(const void *) fhp;
1684 syscallarg(size_t) fh_size;
1685 syscallarg(int) flags;
1686 } */ *uap = v;
1687
1688 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1689 SCARG(uap, flags), retval);
1690 }
1691
1692 int
1693 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
1694 {
1695 int error;
1696 fhandle_t *fh;
1697 struct vnode *vp;
1698
1699 /*
1700 * Must be super user
1701 */
1702 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1703 0, NULL, NULL, NULL)))
1704 return (error);
1705
1706 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1707 if (error != 0)
1708 return error;
1709
1710 error = vfs_fhtovp(fh, &vp);
1711 vfs_copyinfh_free(fh);
1712 if (error != 0)
1713 return error;
1714
1715 error = vn_stat(vp, sb, l);
1716 vput(vp);
1717 return error;
1718 }
1719
1720
1721 /* ARGSUSED */
1722 int
1723 sys___fhstat40(struct lwp *l, void *v, register_t *retval)
1724 {
1725 struct sys___fhstat40_args /* {
1726 syscallarg(const void *) fhp;
1727 syscallarg(size_t) fh_size;
1728 syscallarg(struct stat *) sb;
1729 } */ *uap = v;
1730 struct stat sb;
1731 int error;
1732
1733 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
1734 if (error)
1735 return error;
1736 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
1737 }
1738
1739 int
1740 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
1741 int flags)
1742 {
1743 fhandle_t *fh;
1744 struct mount *mp;
1745 struct vnode *vp;
1746 int error;
1747
1748 /*
1749 * Must be super user
1750 */
1751 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1752 0, NULL, NULL, NULL)))
1753 return error;
1754
1755 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1756 if (error != 0)
1757 return error;
1758
1759 error = vfs_fhtovp(fh, &vp);
1760 vfs_copyinfh_free(fh);
1761 if (error != 0)
1762 return error;
1763
1764 mp = vp->v_mount;
1765 error = dostatvfs(mp, sb, l, flags, 1);
1766 vput(vp);
1767 return error;
1768 }
1769
1770 /* ARGSUSED */
1771 int
1772 sys___fhstatvfs140(struct lwp *l, void *v, register_t *retval)
1773 {
1774 struct sys___fhstatvfs140_args /* {
1775 syscallarg(const void *) fhp;
1776 syscallarg(size_t) fh_size;
1777 syscallarg(struct statvfs *) buf;
1778 syscallarg(int) flags;
1779 } */ *uap = v;
1780 struct statvfs *sb = STATVFSBUF_GET();
1781 int error;
1782
1783 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
1784 SCARG(uap, flags));
1785 if (error == 0)
1786 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1787 STATVFSBUF_PUT(sb);
1788 return error;
1789 }
1790
1791 /*
1792 * Create a special file.
1793 */
1794 /* ARGSUSED */
1795 int
1796 sys_mknod(struct lwp *l, void *v, register_t *retval)
1797 {
1798 struct sys_mknod_args /* {
1799 syscallarg(const char *) path;
1800 syscallarg(int) mode;
1801 syscallarg(int) dev;
1802 } */ *uap = v;
1803 struct proc *p = l->l_proc;
1804 struct vnode *vp;
1805 struct vattr vattr;
1806 int error, optype;
1807 struct nameidata nd;
1808 char *path;
1809 const char *cpath;
1810 enum uio_seg seg = UIO_USERSPACE;
1811
1812 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1813 0, NULL, NULL, NULL)) != 0)
1814 return (error);
1815
1816 optype = VOP_MKNOD_DESCOFFSET;
1817
1818 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path);
1819 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, seg, cpath, l);
1820
1821 if ((error = namei(&nd)) != 0)
1822 goto out;
1823 vp = nd.ni_vp;
1824 if (vp != NULL)
1825 error = EEXIST;
1826 else {
1827 VATTR_NULL(&vattr);
1828 /* We will read cwdi->cwdi_cmask unlocked. */
1829 vattr.va_mode =
1830 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1831 vattr.va_rdev = SCARG(uap, dev);
1832
1833 switch (SCARG(uap, mode) & S_IFMT) {
1834 case S_IFMT: /* used by badsect to flag bad sectors */
1835 vattr.va_type = VBAD;
1836 break;
1837 case S_IFCHR:
1838 vattr.va_type = VCHR;
1839 break;
1840 case S_IFBLK:
1841 vattr.va_type = VBLK;
1842 break;
1843 case S_IFWHT:
1844 optype = VOP_WHITEOUT_DESCOFFSET;
1845 break;
1846 case S_IFREG:
1847 #if NVERIEXEC > 0
1848 error = veriexec_openchk(l, nd.ni_vp, nd.ni_dirp,
1849 O_CREAT);
1850 #endif /* NVERIEXEC > 0 */
1851 vattr.va_type = VREG;
1852 vattr.va_rdev = VNOVAL;
1853 optype = VOP_CREATE_DESCOFFSET;
1854 break;
1855 default:
1856 error = EINVAL;
1857 break;
1858 }
1859 }
1860 if (!error) {
1861 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1862 switch (optype) {
1863 case VOP_WHITEOUT_DESCOFFSET:
1864 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1865 if (error)
1866 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1867 vput(nd.ni_dvp);
1868 break;
1869
1870 case VOP_MKNOD_DESCOFFSET:
1871 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1872 &nd.ni_cnd, &vattr);
1873 if (error == 0)
1874 vput(nd.ni_vp);
1875 break;
1876
1877 case VOP_CREATE_DESCOFFSET:
1878 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
1879 &nd.ni_cnd, &vattr);
1880 if (error == 0)
1881 vput(nd.ni_vp);
1882 break;
1883 }
1884 } else {
1885 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1886 if (nd.ni_dvp == vp)
1887 vrele(nd.ni_dvp);
1888 else
1889 vput(nd.ni_dvp);
1890 if (vp)
1891 vrele(vp);
1892 }
1893 out:
1894 VERIEXEC_PATH_PUT(path);
1895 return (error);
1896 }
1897
1898 /*
1899 * Create a named pipe.
1900 */
1901 /* ARGSUSED */
1902 int
1903 sys_mkfifo(struct lwp *l, void *v, register_t *retval)
1904 {
1905 struct sys_mkfifo_args /* {
1906 syscallarg(const char *) path;
1907 syscallarg(int) mode;
1908 } */ *uap = v;
1909 struct proc *p = l->l_proc;
1910 struct vattr vattr;
1911 int error;
1912 struct nameidata nd;
1913
1914 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
1915 if ((error = namei(&nd)) != 0)
1916 return (error);
1917 if (nd.ni_vp != NULL) {
1918 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1919 if (nd.ni_dvp == nd.ni_vp)
1920 vrele(nd.ni_dvp);
1921 else
1922 vput(nd.ni_dvp);
1923 vrele(nd.ni_vp);
1924 return (EEXIST);
1925 }
1926 VATTR_NULL(&vattr);
1927 vattr.va_type = VFIFO;
1928 /* We will read cwdi->cwdi_cmask unlocked. */
1929 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1930 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1931 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1932 if (error == 0)
1933 vput(nd.ni_vp);
1934 return (error);
1935 }
1936
1937 /*
1938 * Make a hard file link.
1939 */
1940 /* ARGSUSED */
1941 int
1942 sys_link(struct lwp *l, void *v, register_t *retval)
1943 {
1944 struct sys_link_args /* {
1945 syscallarg(const char *) path;
1946 syscallarg(const char *) link;
1947 } */ *uap = v;
1948 struct vnode *vp;
1949 struct nameidata nd;
1950 int error;
1951
1952 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
1953 if ((error = namei(&nd)) != 0)
1954 return (error);
1955 vp = nd.ni_vp;
1956 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, link), l);
1957 if ((error = namei(&nd)) != 0)
1958 goto out;
1959 if (nd.ni_vp) {
1960 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1961 if (nd.ni_dvp == nd.ni_vp)
1962 vrele(nd.ni_dvp);
1963 else
1964 vput(nd.ni_dvp);
1965 vrele(nd.ni_vp);
1966 error = EEXIST;
1967 goto out;
1968 }
1969 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1970 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
1971 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1972 out:
1973 vrele(vp);
1974 return (error);
1975 }
1976
1977 /*
1978 * Make a symbolic link.
1979 */
1980 /* ARGSUSED */
1981 int
1982 sys_symlink(struct lwp *l, void *v, register_t *retval)
1983 {
1984 struct sys_symlink_args /* {
1985 syscallarg(const char *) path;
1986 syscallarg(const char *) link;
1987 } */ *uap = v;
1988 struct proc *p = l->l_proc;
1989 struct vattr vattr;
1990 char *path;
1991 int error;
1992 struct nameidata nd;
1993
1994 path = PNBUF_GET();
1995 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
1996 if (error)
1997 goto out;
1998 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, link), l);
1999 if ((error = namei(&nd)) != 0)
2000 goto out;
2001 if (nd.ni_vp) {
2002 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2003 if (nd.ni_dvp == nd.ni_vp)
2004 vrele(nd.ni_dvp);
2005 else
2006 vput(nd.ni_dvp);
2007 vrele(nd.ni_vp);
2008 error = EEXIST;
2009 goto out;
2010 }
2011 VATTR_NULL(&vattr);
2012 vattr.va_type = VLNK;
2013 /* We will read cwdi->cwdi_cmask unlocked. */
2014 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2015 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2016 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2017 if (error == 0)
2018 vput(nd.ni_vp);
2019 out:
2020 PNBUF_PUT(path);
2021 return (error);
2022 }
2023
2024 /*
2025 * Delete a whiteout from the filesystem.
2026 */
2027 /* ARGSUSED */
2028 int
2029 sys_undelete(struct lwp *l, void *v, register_t *retval)
2030 {
2031 struct sys_undelete_args /* {
2032 syscallarg(const char *) path;
2033 } */ *uap = v;
2034 int error;
2035 struct nameidata nd;
2036
2037 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, UIO_USERSPACE,
2038 SCARG(uap, path), l);
2039 error = namei(&nd);
2040 if (error)
2041 return (error);
2042
2043 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2044 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2045 if (nd.ni_dvp == nd.ni_vp)
2046 vrele(nd.ni_dvp);
2047 else
2048 vput(nd.ni_dvp);
2049 if (nd.ni_vp)
2050 vrele(nd.ni_vp);
2051 return (EEXIST);
2052 }
2053 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2054 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2055 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2056 vput(nd.ni_dvp);
2057 return (error);
2058 }
2059
2060 /*
2061 * Delete a name from the filesystem.
2062 */
2063 /* ARGSUSED */
2064 int
2065 sys_unlink(struct lwp *l, void *v, register_t *retval)
2066 {
2067 struct sys_unlink_args /* {
2068 syscallarg(const char *) path;
2069 } */ *uap = v;
2070 struct vnode *vp;
2071 int error;
2072 struct nameidata nd;
2073 char *path;
2074 const char *cpath;
2075 enum uio_seg seg = UIO_USERSPACE;
2076
2077 VERIEXEC_PATH_GET(SCARG(uap, path), seg, cpath, path);
2078 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, seg, cpath, l);
2079
2080 if ((error = namei(&nd)) != 0)
2081 goto out;
2082 vp = nd.ni_vp;
2083
2084 /*
2085 * The root of a mounted filesystem cannot be deleted.
2086 */
2087 if (vp->v_vflag & VV_ROOT) {
2088 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2089 if (nd.ni_dvp == vp)
2090 vrele(nd.ni_dvp);
2091 else
2092 vput(nd.ni_dvp);
2093 vput(vp);
2094 error = EBUSY;
2095 goto out;
2096 }
2097
2098 #if NVERIEXEC > 0
2099 /* Handle remove requests for veriexec entries. */
2100 if ((error = veriexec_removechk(l, nd.ni_vp, nd.ni_dirp)) != 0) {
2101 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2102 if (nd.ni_dvp == vp)
2103 vrele(nd.ni_dvp);
2104 else
2105 vput(nd.ni_dvp);
2106 vput(vp);
2107 goto out;
2108 }
2109 #endif /* NVERIEXEC > 0 */
2110
2111 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2112 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2113 #ifdef FILEASSOC
2114 (void)fileassoc_file_delete(vp);
2115 #endif /* FILEASSOC */
2116 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2117 out:
2118 VERIEXEC_PATH_PUT(path);
2119 return (error);
2120 }
2121
2122 /*
2123 * Reposition read/write file offset.
2124 */
2125 int
2126 sys_lseek(struct lwp *l, void *v, register_t *retval)
2127 {
2128 struct sys_lseek_args /* {
2129 syscallarg(int) fd;
2130 syscallarg(int) pad;
2131 syscallarg(off_t) offset;
2132 syscallarg(int) whence;
2133 } */ *uap = v;
2134 struct proc *p = l->l_proc;
2135 kauth_cred_t cred = l->l_cred;
2136 struct filedesc *fdp = p->p_fd;
2137 struct file *fp;
2138 struct vnode *vp;
2139 struct vattr vattr;
2140 off_t newoff;
2141 int error;
2142
2143 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
2144 return (EBADF);
2145
2146 vp = (struct vnode *)fp->f_data;
2147 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2148 error = ESPIPE;
2149 mutex_exit(&fp->f_lock);
2150 goto out;
2151 }
2152
2153 switch (SCARG(uap, whence)) {
2154 case SEEK_CUR:
2155 newoff = fp->f_offset + SCARG(uap, offset);
2156 FILE_USE(fp);
2157 break;
2158 case SEEK_END:
2159 FILE_USE(fp);
2160 error = VOP_GETATTR(vp, &vattr, cred, l);
2161 if (error) {
2162 FILE_UNUSE(fp, l);
2163 goto out;
2164 }
2165 newoff = SCARG(uap, offset) + vattr.va_size;
2166 break;
2167 case SEEK_SET:
2168 FILE_USE(fp);
2169 newoff = SCARG(uap, offset);
2170 break;
2171 default:
2172 mutex_exit(&fp->f_lock);
2173 error = EINVAL;
2174 goto out;
2175 }
2176 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2177 mutex_enter(&fp->f_lock);
2178 *(off_t *)retval = fp->f_offset = newoff;
2179 mutex_exit(&fp->f_lock);
2180 }
2181 FILE_UNUSE(fp, l);
2182 out:
2183 return (error);
2184 }
2185
2186 /*
2187 * Positional read system call.
2188 */
2189 int
2190 sys_pread(struct lwp *l, void *v, register_t *retval)
2191 {
2192 struct sys_pread_args /* {
2193 syscallarg(int) fd;
2194 syscallarg(void *) buf;
2195 syscallarg(size_t) nbyte;
2196 syscallarg(off_t) offset;
2197 } */ *uap = v;
2198 struct proc *p = l->l_proc;
2199 struct filedesc *fdp = p->p_fd;
2200 struct file *fp;
2201 struct vnode *vp;
2202 off_t offset;
2203 int error, fd = SCARG(uap, fd);
2204
2205 if ((fp = fd_getfile(fdp, fd)) == NULL)
2206 return (EBADF);
2207
2208 if ((fp->f_flag & FREAD) == 0) {
2209 mutex_exit(&fp->f_lock);
2210 return (EBADF);
2211 }
2212
2213 FILE_USE(fp);
2214
2215 vp = (struct vnode *)fp->f_data;
2216 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2217 error = ESPIPE;
2218 goto out;
2219 }
2220
2221 offset = SCARG(uap, offset);
2222
2223 /*
2224 * XXX This works because no file systems actually
2225 * XXX take any action on the seek operation.
2226 */
2227 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2228 goto out;
2229
2230 /* dofileread() will unuse the descriptor for us */
2231 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2232 &offset, 0, retval));
2233
2234 out:
2235 FILE_UNUSE(fp, l);
2236 return (error);
2237 }
2238
2239 /*
2240 * Positional scatter read system call.
2241 */
2242 int
2243 sys_preadv(struct lwp *l, void *v, register_t *retval)
2244 {
2245 struct sys_preadv_args /* {
2246 syscallarg(int) fd;
2247 syscallarg(const struct iovec *) iovp;
2248 syscallarg(int) iovcnt;
2249 syscallarg(off_t) offset;
2250 } */ *uap = v;
2251 struct proc *p = l->l_proc;
2252 struct filedesc *fdp = p->p_fd;
2253 struct file *fp;
2254 struct vnode *vp;
2255 off_t offset;
2256 int error, fd = SCARG(uap, fd);
2257
2258 if ((fp = fd_getfile(fdp, fd)) == NULL)
2259 return (EBADF);
2260
2261 if ((fp->f_flag & FREAD) == 0) {
2262 mutex_exit(&fp->f_lock);
2263 return (EBADF);
2264 }
2265
2266 FILE_USE(fp);
2267
2268 vp = (struct vnode *)fp->f_data;
2269 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2270 error = ESPIPE;
2271 goto out;
2272 }
2273
2274 offset = SCARG(uap, offset);
2275
2276 /*
2277 * XXX This works because no file systems actually
2278 * XXX take any action on the seek operation.
2279 */
2280 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2281 goto out;
2282
2283 /* dofilereadv() will unuse the descriptor for us */
2284 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2285 &offset, 0, retval));
2286
2287 out:
2288 FILE_UNUSE(fp, l);
2289 return (error);
2290 }
2291
2292 /*
2293 * Positional write system call.
2294 */
2295 int
2296 sys_pwrite(struct lwp *l, void *v, register_t *retval)
2297 {
2298 struct sys_pwrite_args /* {
2299 syscallarg(int) fd;
2300 syscallarg(const void *) buf;
2301 syscallarg(size_t) nbyte;
2302 syscallarg(off_t) offset;
2303 } */ *uap = v;
2304 struct proc *p = l->l_proc;
2305 struct filedesc *fdp = p->p_fd;
2306 struct file *fp;
2307 struct vnode *vp;
2308 off_t offset;
2309 int error, fd = SCARG(uap, fd);
2310
2311 if ((fp = fd_getfile(fdp, fd)) == NULL)
2312 return (EBADF);
2313
2314 if ((fp->f_flag & FWRITE) == 0) {
2315 mutex_exit(&fp->f_lock);
2316 return (EBADF);
2317 }
2318
2319 FILE_USE(fp);
2320
2321 vp = (struct vnode *)fp->f_data;
2322 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2323 error = ESPIPE;
2324 goto out;
2325 }
2326
2327 offset = SCARG(uap, offset);
2328
2329 /*
2330 * XXX This works because no file systems actually
2331 * XXX take any action on the seek operation.
2332 */
2333 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2334 goto out;
2335
2336 /* dofilewrite() will unuse the descriptor for us */
2337 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2338 &offset, 0, retval));
2339
2340 out:
2341 FILE_UNUSE(fp, l);
2342 return (error);
2343 }
2344
2345 /*
2346 * Positional gather write system call.
2347 */
2348 int
2349 sys_pwritev(struct lwp *l, void *v, register_t *retval)
2350 {
2351 struct sys_pwritev_args /* {
2352 syscallarg(int) fd;
2353 syscallarg(const struct iovec *) iovp;
2354 syscallarg(int) iovcnt;
2355 syscallarg(off_t) offset;
2356 } */ *uap = v;
2357 struct proc *p = l->l_proc;
2358 struct filedesc *fdp = p->p_fd;
2359 struct file *fp;
2360 struct vnode *vp;
2361 off_t offset;
2362 int error, fd = SCARG(uap, fd);
2363
2364 if ((fp = fd_getfile(fdp, fd)) == NULL)
2365 return (EBADF);
2366
2367 if ((fp->f_flag & FWRITE) == 0) {
2368 mutex_exit(&fp->f_lock);
2369 return (EBADF);
2370 }
2371
2372 FILE_USE(fp);
2373
2374 vp = (struct vnode *)fp->f_data;
2375 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2376 error = ESPIPE;
2377 goto out;
2378 }
2379
2380 offset = SCARG(uap, offset);
2381
2382 /*
2383 * XXX This works because no file systems actually
2384 * XXX take any action on the seek operation.
2385 */
2386 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2387 goto out;
2388
2389 /* dofilewritev() will unuse the descriptor for us */
2390 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2391 &offset, 0, retval));
2392
2393 out:
2394 FILE_UNUSE(fp, l);
2395 return (error);
2396 }
2397
2398 /*
2399 * Check access permissions.
2400 */
2401 int
2402 sys_access(struct lwp *l, void *v, register_t *retval)
2403 {
2404 struct sys_access_args /* {
2405 syscallarg(const char *) path;
2406 syscallarg(int) flags;
2407 } */ *uap = v;
2408 kauth_cred_t cred;
2409 struct vnode *vp;
2410 int error, flags;
2411 struct nameidata nd;
2412
2413 cred = kauth_cred_dup(l->l_cred);
2414 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2415 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2416 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2417 SCARG(uap, path), l);
2418 /* Override default credentials */
2419 nd.ni_cnd.cn_cred = cred;
2420 if ((error = namei(&nd)) != 0)
2421 goto out;
2422 vp = nd.ni_vp;
2423
2424 /* Flags == 0 means only check for existence. */
2425 if (SCARG(uap, flags)) {
2426 flags = 0;
2427 if (SCARG(uap, flags) & R_OK)
2428 flags |= VREAD;
2429 if (SCARG(uap, flags) & W_OK)
2430 flags |= VWRITE;
2431 if (SCARG(uap, flags) & X_OK)
2432 flags |= VEXEC;
2433
2434 error = VOP_ACCESS(vp, flags, cred, l);
2435 if (!error && (flags & VWRITE))
2436 error = vn_writechk(vp);
2437 }
2438 vput(vp);
2439 out:
2440 kauth_cred_free(cred);
2441 return (error);
2442 }
2443
2444 /*
2445 * Common code for all sys_stat functions, including compat versions.
2446 */
2447 int
2448 do_sys_stat(struct lwp *l, const char *path, unsigned int nd_flags,
2449 struct stat *sb)
2450 {
2451 int error;
2452 struct nameidata nd;
2453
2454 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT , UIO_USERSPACE, path, l);
2455 error = namei(&nd);
2456 if (error != 0)
2457 return error;
2458 error = vn_stat(nd.ni_vp, sb, l);
2459 vput(nd.ni_vp);
2460 return error;
2461 }
2462
2463 /*
2464 * Get file status; this version follows links.
2465 */
2466 /* ARGSUSED */
2467 int
2468 sys___stat30(struct lwp *l, void *v, register_t *retval)
2469 {
2470 struct sys___stat30_args /* {
2471 syscallarg(const char *) path;
2472 syscallarg(struct stat *) ub;
2473 } */ *uap = v;
2474 struct stat sb;
2475 int error;
2476
2477 error = do_sys_stat(l, SCARG(uap, path), FOLLOW, &sb);
2478 if (error)
2479 return error;
2480 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2481 }
2482
2483 /*
2484 * Get file status; this version does not follow links.
2485 */
2486 /* ARGSUSED */
2487 int
2488 sys___lstat30(struct lwp *l, void *v, register_t *retval)
2489 {
2490 struct sys___lstat30_args /* {
2491 syscallarg(const char *) path;
2492 syscallarg(struct stat *) ub;
2493 } */ *uap = v;
2494 struct stat sb;
2495 int error;
2496
2497 error = do_sys_stat(l, SCARG(uap, path), NOFOLLOW, &sb);
2498 if (error)
2499 return error;
2500 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2501 }
2502
2503 /*
2504 * Get configurable pathname variables.
2505 */
2506 /* ARGSUSED */
2507 int
2508 sys_pathconf(struct lwp *l, void *v, register_t *retval)
2509 {
2510 struct sys_pathconf_args /* {
2511 syscallarg(const char *) path;
2512 syscallarg(int) name;
2513 } */ *uap = v;
2514 int error;
2515 struct nameidata nd;
2516
2517 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2518 SCARG(uap, path), l);
2519 if ((error = namei(&nd)) != 0)
2520 return (error);
2521 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2522 vput(nd.ni_vp);
2523 return (error);
2524 }
2525
2526 /*
2527 * Return target name of a symbolic link.
2528 */
2529 /* ARGSUSED */
2530 int
2531 sys_readlink(struct lwp *l, void *v, register_t *retval)
2532 {
2533 struct sys_readlink_args /* {
2534 syscallarg(const char *) path;
2535 syscallarg(char *) buf;
2536 syscallarg(size_t) count;
2537 } */ *uap = v;
2538 struct vnode *vp;
2539 struct iovec aiov;
2540 struct uio auio;
2541 int error;
2542 struct nameidata nd;
2543
2544 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
2545 SCARG(uap, path), l);
2546 if ((error = namei(&nd)) != 0)
2547 return (error);
2548 vp = nd.ni_vp;
2549 if (vp->v_type != VLNK)
2550 error = EINVAL;
2551 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2552 (error = VOP_ACCESS(vp, VREAD, l->l_cred, l)) == 0) {
2553 aiov.iov_base = SCARG(uap, buf);
2554 aiov.iov_len = SCARG(uap, count);
2555 auio.uio_iov = &aiov;
2556 auio.uio_iovcnt = 1;
2557 auio.uio_offset = 0;
2558 auio.uio_rw = UIO_READ;
2559 KASSERT(l == curlwp);
2560 auio.uio_vmspace = l->l_proc->p_vmspace;
2561 auio.uio_resid = SCARG(uap, count);
2562 error = VOP_READLINK(vp, &auio, l->l_cred);
2563 }
2564 vput(vp);
2565 *retval = SCARG(uap, count) - auio.uio_resid;
2566 return (error);
2567 }
2568
2569 /*
2570 * Change flags of a file given a path name.
2571 */
2572 /* ARGSUSED */
2573 int
2574 sys_chflags(struct lwp *l, void *v, register_t *retval)
2575 {
2576 struct sys_chflags_args /* {
2577 syscallarg(const char *) path;
2578 syscallarg(u_long) flags;
2579 } */ *uap = v;
2580 struct vnode *vp;
2581 int error;
2582 struct nameidata nd;
2583
2584 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2585 if ((error = namei(&nd)) != 0)
2586 return (error);
2587 vp = nd.ni_vp;
2588 error = change_flags(vp, SCARG(uap, flags), l);
2589 vput(vp);
2590 return (error);
2591 }
2592
2593 /*
2594 * Change flags of a file given a file descriptor.
2595 */
2596 /* ARGSUSED */
2597 int
2598 sys_fchflags(struct lwp *l, void *v, register_t *retval)
2599 {
2600 struct sys_fchflags_args /* {
2601 syscallarg(int) fd;
2602 syscallarg(u_long) flags;
2603 } */ *uap = v;
2604 struct proc *p = l->l_proc;
2605 struct vnode *vp;
2606 struct file *fp;
2607 int error;
2608
2609 /* getvnode() will use the descriptor for us */
2610 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2611 return (error);
2612 vp = (struct vnode *)fp->f_data;
2613 error = change_flags(vp, SCARG(uap, flags), l);
2614 VOP_UNLOCK(vp, 0);
2615 FILE_UNUSE(fp, l);
2616 return (error);
2617 }
2618
2619 /*
2620 * Change flags of a file given a path name; this version does
2621 * not follow links.
2622 */
2623 int
2624 sys_lchflags(struct lwp *l, void *v, register_t *retval)
2625 {
2626 struct sys_lchflags_args /* {
2627 syscallarg(const char *) path;
2628 syscallarg(u_long) flags;
2629 } */ *uap = v;
2630 struct vnode *vp;
2631 int error;
2632 struct nameidata nd;
2633
2634 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2635 if ((error = namei(&nd)) != 0)
2636 return (error);
2637 vp = nd.ni_vp;
2638 error = change_flags(vp, SCARG(uap, flags), l);
2639 vput(vp);
2640 return (error);
2641 }
2642
2643 /*
2644 * Common routine to change flags of a file.
2645 */
2646 int
2647 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2648 {
2649 struct vattr vattr;
2650 int error;
2651
2652 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2653 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2654 /*
2655 * Non-superusers cannot change the flags on devices, even if they
2656 * own them.
2657 */
2658 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
2659 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2660 goto out;
2661 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2662 error = EINVAL;
2663 goto out;
2664 }
2665 }
2666 VATTR_NULL(&vattr);
2667 vattr.va_flags = flags;
2668 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2669 out:
2670 return (error);
2671 }
2672
2673 /*
2674 * Change mode of a file given path name; this version follows links.
2675 */
2676 /* ARGSUSED */
2677 int
2678 sys_chmod(struct lwp *l, void *v, register_t *retval)
2679 {
2680 struct sys_chmod_args /* {
2681 syscallarg(const char *) path;
2682 syscallarg(int) mode;
2683 } */ *uap = v;
2684 int error;
2685 struct nameidata nd;
2686
2687 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2688 if ((error = namei(&nd)) != 0)
2689 return (error);
2690
2691 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2692
2693 vrele(nd.ni_vp);
2694 return (error);
2695 }
2696
2697 /*
2698 * Change mode of a file given a file descriptor.
2699 */
2700 /* ARGSUSED */
2701 int
2702 sys_fchmod(struct lwp *l, void *v, register_t *retval)
2703 {
2704 struct sys_fchmod_args /* {
2705 syscallarg(int) fd;
2706 syscallarg(int) mode;
2707 } */ *uap = v;
2708 struct proc *p = l->l_proc;
2709 struct file *fp;
2710 int error;
2711
2712 /* getvnode() will use the descriptor for us */
2713 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2714 return (error);
2715
2716 error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), l);
2717 FILE_UNUSE(fp, l);
2718 return (error);
2719 }
2720
2721 /*
2722 * Change mode of a file given path name; this version does not follow links.
2723 */
2724 /* ARGSUSED */
2725 int
2726 sys_lchmod(struct lwp *l, void *v, register_t *retval)
2727 {
2728 struct sys_lchmod_args /* {
2729 syscallarg(const char *) path;
2730 syscallarg(int) mode;
2731 } */ *uap = v;
2732 int error;
2733 struct nameidata nd;
2734
2735 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2736 if ((error = namei(&nd)) != 0)
2737 return (error);
2738
2739 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2740
2741 vrele(nd.ni_vp);
2742 return (error);
2743 }
2744
2745 /*
2746 * Common routine to set mode given a vnode.
2747 */
2748 static int
2749 change_mode(struct vnode *vp, int mode, struct lwp *l)
2750 {
2751 struct vattr vattr;
2752 int error;
2753
2754 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2755 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2756 VATTR_NULL(&vattr);
2757 vattr.va_mode = mode & ALLPERMS;
2758 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2759 VOP_UNLOCK(vp, 0);
2760 return (error);
2761 }
2762
2763 /*
2764 * Set ownership given a path name; this version follows links.
2765 */
2766 /* ARGSUSED */
2767 int
2768 sys_chown(struct lwp *l, void *v, register_t *retval)
2769 {
2770 struct sys_chown_args /* {
2771 syscallarg(const char *) path;
2772 syscallarg(uid_t) uid;
2773 syscallarg(gid_t) gid;
2774 } */ *uap = v;
2775 int error;
2776 struct nameidata nd;
2777
2778 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2779 if ((error = namei(&nd)) != 0)
2780 return (error);
2781
2782 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2783
2784 vrele(nd.ni_vp);
2785 return (error);
2786 }
2787
2788 /*
2789 * Set ownership given a path name; this version follows links.
2790 * Provides POSIX semantics.
2791 */
2792 /* ARGSUSED */
2793 int
2794 sys___posix_chown(struct lwp *l, void *v, register_t *retval)
2795 {
2796 struct sys_chown_args /* {
2797 syscallarg(const char *) path;
2798 syscallarg(uid_t) uid;
2799 syscallarg(gid_t) gid;
2800 } */ *uap = v;
2801 int error;
2802 struct nameidata nd;
2803
2804 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2805 if ((error = namei(&nd)) != 0)
2806 return (error);
2807
2808 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2809
2810 vrele(nd.ni_vp);
2811 return (error);
2812 }
2813
2814 /*
2815 * Set ownership given a file descriptor.
2816 */
2817 /* ARGSUSED */
2818 int
2819 sys_fchown(struct lwp *l, void *v, register_t *retval)
2820 {
2821 struct sys_fchown_args /* {
2822 syscallarg(int) fd;
2823 syscallarg(uid_t) uid;
2824 syscallarg(gid_t) gid;
2825 } */ *uap = v;
2826 struct proc *p = l->l_proc;
2827 int error;
2828 struct file *fp;
2829
2830 /* getvnode() will use the descriptor for us */
2831 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2832 return (error);
2833
2834 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2835 SCARG(uap, gid), l, 0);
2836 FILE_UNUSE(fp, l);
2837 return (error);
2838 }
2839
2840 /*
2841 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2842 */
2843 /* ARGSUSED */
2844 int
2845 sys___posix_fchown(struct lwp *l, void *v, register_t *retval)
2846 {
2847 struct sys_fchown_args /* {
2848 syscallarg(int) fd;
2849 syscallarg(uid_t) uid;
2850 syscallarg(gid_t) gid;
2851 } */ *uap = v;
2852 struct proc *p = l->l_proc;
2853 int error;
2854 struct file *fp;
2855
2856 /* getvnode() will use the descriptor for us */
2857 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2858 return (error);
2859
2860 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2861 SCARG(uap, gid), l, 1);
2862 FILE_UNUSE(fp, l);
2863 return (error);
2864 }
2865
2866 /*
2867 * Set ownership given a path name; this version does not follow links.
2868 */
2869 /* ARGSUSED */
2870 int
2871 sys_lchown(struct lwp *l, void *v, register_t *retval)
2872 {
2873 struct sys_lchown_args /* {
2874 syscallarg(const char *) path;
2875 syscallarg(uid_t) uid;
2876 syscallarg(gid_t) gid;
2877 } */ *uap = v;
2878 int error;
2879 struct nameidata nd;
2880
2881 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2882 if ((error = namei(&nd)) != 0)
2883 return (error);
2884
2885 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2886
2887 vrele(nd.ni_vp);
2888 return (error);
2889 }
2890
2891 /*
2892 * Set ownership given a path name; this version does not follow links.
2893 * Provides POSIX/XPG semantics.
2894 */
2895 /* ARGSUSED */
2896 int
2897 sys___posix_lchown(struct lwp *l, void *v, register_t *retval)
2898 {
2899 struct sys_lchown_args /* {
2900 syscallarg(const char *) path;
2901 syscallarg(uid_t) uid;
2902 syscallarg(gid_t) gid;
2903 } */ *uap = v;
2904 int error;
2905 struct nameidata nd;
2906
2907 NDINIT(&nd, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
2908 if ((error = namei(&nd)) != 0)
2909 return (error);
2910
2911 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2912
2913 vrele(nd.ni_vp);
2914 return (error);
2915 }
2916
2917 /*
2918 * Common routine to set ownership given a vnode.
2919 */
2920 static int
2921 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2922 int posix_semantics)
2923 {
2924 struct vattr vattr;
2925 mode_t newmode;
2926 int error;
2927
2928 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2929 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2930 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2931 goto out;
2932
2933 #define CHANGED(x) ((int)(x) != -1)
2934 newmode = vattr.va_mode;
2935 if (posix_semantics) {
2936 /*
2937 * POSIX/XPG semantics: if the caller is not the super-user,
2938 * clear set-user-id and set-group-id bits. Both POSIX and
2939 * the XPG consider the behaviour for calls by the super-user
2940 * implementation-defined; we leave the set-user-id and set-
2941 * group-id settings intact in that case.
2942 */
2943 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2944 NULL) != 0)
2945 newmode &= ~(S_ISUID | S_ISGID);
2946 } else {
2947 /*
2948 * NetBSD semantics: when changing owner and/or group,
2949 * clear the respective bit(s).
2950 */
2951 if (CHANGED(uid))
2952 newmode &= ~S_ISUID;
2953 if (CHANGED(gid))
2954 newmode &= ~S_ISGID;
2955 }
2956 /* Update va_mode iff altered. */
2957 if (vattr.va_mode == newmode)
2958 newmode = VNOVAL;
2959
2960 VATTR_NULL(&vattr);
2961 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2962 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2963 vattr.va_mode = newmode;
2964 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2965 #undef CHANGED
2966
2967 out:
2968 VOP_UNLOCK(vp, 0);
2969 return (error);
2970 }
2971
2972 /*
2973 * Set the access and modification times given a path name; this
2974 * version follows links.
2975 */
2976 /* ARGSUSED */
2977 int
2978 sys_utimes(struct lwp *l, void *v, register_t *retval)
2979 {
2980 struct sys_utimes_args /* {
2981 syscallarg(const char *) path;
2982 syscallarg(const struct timeval *) tptr;
2983 } */ *uap = v;
2984
2985 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
2986 SCARG(uap, tptr), UIO_USERSPACE);
2987 }
2988
2989 /*
2990 * Set the access and modification times given a file descriptor.
2991 */
2992 /* ARGSUSED */
2993 int
2994 sys_futimes(struct lwp *l, void *v, register_t *retval)
2995 {
2996 struct sys_futimes_args /* {
2997 syscallarg(int) fd;
2998 syscallarg(const struct timeval *) tptr;
2999 } */ *uap = v;
3000 int error;
3001 struct file *fp;
3002
3003 /* getvnode() will use the descriptor for us */
3004 if ((error = getvnode(l->l_proc->p_fd, SCARG(uap, fd), &fp)) != 0)
3005 return (error);
3006
3007 error = do_sys_utimes(l, fp->f_data, NULL, 0,
3008 SCARG(uap, tptr), UIO_USERSPACE);
3009
3010 FILE_UNUSE(fp, l);
3011 return (error);
3012 }
3013
3014 /*
3015 * Set the access and modification times given a path name; this
3016 * version does not follow links.
3017 */
3018 int
3019 sys_lutimes(struct lwp *l, void *v, register_t *retval)
3020 {
3021 struct sys_lutimes_args /* {
3022 syscallarg(const char *) path;
3023 syscallarg(const struct timeval *) tptr;
3024 } */ *uap = v;
3025
3026 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
3027 SCARG(uap, tptr), UIO_USERSPACE);
3028 }
3029
3030 /*
3031 * Common routine to set access and modification times given a vnode.
3032 */
3033 int
3034 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
3035 const struct timeval *tptr, enum uio_seg seg)
3036 {
3037 struct vattr vattr;
3038 struct nameidata nd;
3039 int error;
3040
3041 VATTR_NULL(&vattr);
3042 if (tptr == NULL) {
3043 nanotime(&vattr.va_atime);
3044 vattr.va_mtime = vattr.va_atime;
3045 vattr.va_vaflags |= VA_UTIMES_NULL;
3046 } else {
3047 struct timeval tv[2];
3048
3049 if (seg != UIO_SYSSPACE) {
3050 error = copyin(tptr, &tv, sizeof (tv));
3051 if (error != 0)
3052 return error;
3053 tptr = tv;
3054 }
3055 TIMEVAL_TO_TIMESPEC(tptr, &vattr.va_atime);
3056 TIMEVAL_TO_TIMESPEC(tptr + 1, &vattr.va_mtime);
3057 }
3058
3059 if (vp == NULL) {
3060 NDINIT(&nd, LOOKUP, flag | TRYEMULROOT, UIO_USERSPACE, path, l);
3061 if ((error = namei(&nd)) != 0)
3062 return (error);
3063 vp = nd.ni_vp;
3064 } else
3065 nd.ni_vp = NULL;
3066
3067 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3068 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3069 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
3070 VOP_UNLOCK(vp, 0);
3071
3072 if (nd.ni_vp != NULL)
3073 vrele(nd.ni_vp);
3074
3075 return (error);
3076 }
3077
3078 /*
3079 * Truncate a file given its path name.
3080 */
3081 /* ARGSUSED */
3082 int
3083 sys_truncate(struct lwp *l, void *v, register_t *retval)
3084 {
3085 struct sys_truncate_args /* {
3086 syscallarg(const char *) path;
3087 syscallarg(int) pad;
3088 syscallarg(off_t) length;
3089 } */ *uap = v;
3090 struct vnode *vp;
3091 struct vattr vattr;
3092 int error;
3093 struct nameidata nd;
3094
3095 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
3096 if ((error = namei(&nd)) != 0)
3097 return (error);
3098 vp = nd.ni_vp;
3099 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3100 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3101 if (vp->v_type == VDIR)
3102 error = EISDIR;
3103 else if ((error = vn_writechk(vp)) == 0 &&
3104 (error = VOP_ACCESS(vp, VWRITE, l->l_cred, l)) == 0) {
3105 VATTR_NULL(&vattr);
3106 vattr.va_size = SCARG(uap, length);
3107 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
3108 }
3109 vput(vp);
3110 return (error);
3111 }
3112
3113 /*
3114 * Truncate a file given a file descriptor.
3115 */
3116 /* ARGSUSED */
3117 int
3118 sys_ftruncate(struct lwp *l, void *v, register_t *retval)
3119 {
3120 struct sys_ftruncate_args /* {
3121 syscallarg(int) fd;
3122 syscallarg(int) pad;
3123 syscallarg(off_t) length;
3124 } */ *uap = v;
3125 struct proc *p = l->l_proc;
3126 struct vattr vattr;
3127 struct vnode *vp;
3128 struct file *fp;
3129 int error;
3130
3131 /* getvnode() will use the descriptor for us */
3132 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3133 return (error);
3134 if ((fp->f_flag & FWRITE) == 0) {
3135 error = EINVAL;
3136 goto out;
3137 }
3138 vp = (struct vnode *)fp->f_data;
3139 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3140 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3141 if (vp->v_type == VDIR)
3142 error = EISDIR;
3143 else if ((error = vn_writechk(vp)) == 0) {
3144 VATTR_NULL(&vattr);
3145 vattr.va_size = SCARG(uap, length);
3146 error = VOP_SETATTR(vp, &vattr, fp->f_cred, l);
3147 }
3148 VOP_UNLOCK(vp, 0);
3149 out:
3150 FILE_UNUSE(fp, l);
3151 return (error);
3152 }
3153
3154 /*
3155 * Sync an open file.
3156 */
3157 /* ARGSUSED */
3158 int
3159 sys_fsync(struct lwp *l, void *v, register_t *retval)
3160 {
3161 struct sys_fsync_args /* {
3162 syscallarg(int) fd;
3163 } */ *uap = v;
3164 struct proc *p = l->l_proc;
3165 struct vnode *vp;
3166 struct file *fp;
3167 int error;
3168
3169 /* getvnode() will use the descriptor for us */
3170 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3171 return (error);
3172 vp = (struct vnode *)fp->f_data;
3173 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3174 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, l);
3175 if (error == 0 && bioops.io_fsync != NULL &&
3176 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) {
3177 KERNEL_LOCK(1, l);
3178 (*bioops.io_fsync)(vp, 0);
3179 KERNEL_UNLOCK_ONE(l);
3180 }
3181 VOP_UNLOCK(vp, 0);
3182 FILE_UNUSE(fp, l);
3183 return (error);
3184 }
3185
3186 /*
3187 * Sync a range of file data. API modeled after that found in AIX.
3188 *
3189 * FDATASYNC indicates that we need only save enough metadata to be able
3190 * to re-read the written data. Note we duplicate AIX's requirement that
3191 * the file be open for writing.
3192 */
3193 /* ARGSUSED */
3194 int
3195 sys_fsync_range(struct lwp *l, void *v, register_t *retval)
3196 {
3197 struct sys_fsync_range_args /* {
3198 syscallarg(int) fd;
3199 syscallarg(int) flags;
3200 syscallarg(off_t) start;
3201 syscallarg(off_t) length;
3202 } */ *uap = v;
3203 struct proc *p = l->l_proc;
3204 struct vnode *vp;
3205 struct file *fp;
3206 int flags, nflags;
3207 off_t s, e, len;
3208 int error;
3209
3210 /* getvnode() will use the descriptor for us */
3211 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3212 return (error);
3213
3214 if ((fp->f_flag & FWRITE) == 0) {
3215 error = EBADF;
3216 goto out;
3217 }
3218
3219 flags = SCARG(uap, flags);
3220 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3221 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3222 error = EINVAL;
3223 goto out;
3224 }
3225 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3226 if (flags & FDATASYNC)
3227 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3228 else
3229 nflags = FSYNC_WAIT;
3230 if (flags & FDISKSYNC)
3231 nflags |= FSYNC_CACHE;
3232
3233 len = SCARG(uap, length);
3234 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3235 if (len) {
3236 s = SCARG(uap, start);
3237 e = s + len;
3238 if (e < s) {
3239 error = EINVAL;
3240 goto out;
3241 }
3242 } else {
3243 e = 0;
3244 s = 0;
3245 }
3246
3247 vp = (struct vnode *)fp->f_data;
3248 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3249 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, l);
3250
3251 if (error == 0 && bioops.io_fsync != NULL &&
3252 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) {
3253 KERNEL_LOCK(1, l);
3254 (*bioops.io_fsync)(vp, nflags);
3255 KERNEL_UNLOCK_ONE(l);
3256 }
3257
3258 VOP_UNLOCK(vp, 0);
3259 out:
3260 FILE_UNUSE(fp, l);
3261 return (error);
3262 }
3263
3264 /*
3265 * Sync the data of an open file.
3266 */
3267 /* ARGSUSED */
3268 int
3269 sys_fdatasync(struct lwp *l, void *v, register_t *retval)
3270 {
3271 struct sys_fdatasync_args /* {
3272 syscallarg(int) fd;
3273 } */ *uap = v;
3274 struct proc *p = l->l_proc;
3275 struct vnode *vp;
3276 struct file *fp;
3277 int error;
3278
3279 /* getvnode() will use the descriptor for us */
3280 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3281 return (error);
3282 if ((fp->f_flag & FWRITE) == 0) {
3283 FILE_UNUSE(fp, l);
3284 return (EBADF);
3285 }
3286 vp = (struct vnode *)fp->f_data;
3287 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3288 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, l);
3289 VOP_UNLOCK(vp, 0);
3290 FILE_UNUSE(fp, l);
3291 return (error);
3292 }
3293
3294 /*
3295 * Rename files, (standard) BSD semantics frontend.
3296 */
3297 /* ARGSUSED */
3298 int
3299 sys_rename(struct lwp *l, void *v, register_t *retval)
3300 {
3301 struct sys_rename_args /* {
3302 syscallarg(const char *) from;
3303 syscallarg(const char *) to;
3304 } */ *uap = v;
3305
3306 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 0));
3307 }
3308
3309 /*
3310 * Rename files, POSIX semantics frontend.
3311 */
3312 /* ARGSUSED */
3313 int
3314 sys___posix_rename(struct lwp *l, void *v, register_t *retval)
3315 {
3316 struct sys___posix_rename_args /* {
3317 syscallarg(const char *) from;
3318 syscallarg(const char *) to;
3319 } */ *uap = v;
3320
3321 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 1));
3322 }
3323
3324 /*
3325 * Rename files. Source and destination must either both be directories,
3326 * or both not be directories. If target is a directory, it must be empty.
3327 * If `from' and `to' refer to the same object, the value of the `retain'
3328 * argument is used to determine whether `from' will be
3329 *
3330 * (retain == 0) deleted unless `from' and `to' refer to the same
3331 * object in the file system's name space (BSD).
3332 * (retain == 1) always retained (POSIX).
3333 */
3334 static int
3335 rename_files(const char *from, const char *to, struct lwp *l, int retain)
3336 {
3337 struct vnode *tvp, *fvp, *tdvp;
3338 struct nameidata fromnd, tond;
3339 struct proc *p;
3340 int error;
3341
3342 NDINIT(&fromnd, DELETE, LOCKPARENT | SAVESTART | TRYEMULROOT, UIO_USERSPACE,
3343 from, l);
3344 if ((error = namei(&fromnd)) != 0)
3345 return (error);
3346 if (fromnd.ni_dvp != fromnd.ni_vp)
3347 VOP_UNLOCK(fromnd.ni_dvp, 0);
3348 fvp = fromnd.ni_vp;
3349 NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | TRYEMULROOT |
3350 (fvp->v_type == VDIR ? CREATEDIR : 0), UIO_USERSPACE, to, l);
3351 if ((error = namei(&tond)) != 0) {
3352 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3353 vrele(fromnd.ni_dvp);
3354 vrele(fvp);
3355 goto out1;
3356 }
3357 tdvp = tond.ni_dvp;
3358 tvp = tond.ni_vp;
3359
3360 if (tvp != NULL) {
3361 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3362 error = ENOTDIR;
3363 goto out;
3364 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3365 error = EISDIR;
3366 goto out;
3367 }
3368 }
3369
3370 if (fvp == tdvp)
3371 error = EINVAL;
3372
3373 /*
3374 * Source and destination refer to the same object.
3375 */
3376 if (fvp == tvp) {
3377 if (retain)
3378 error = -1;
3379 else if (fromnd.ni_dvp == tdvp &&
3380 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3381 !memcmp(fromnd.ni_cnd.cn_nameptr,
3382 tond.ni_cnd.cn_nameptr,
3383 fromnd.ni_cnd.cn_namelen))
3384 error = -1;
3385 }
3386
3387 #if NVERIEXEC > 0
3388 if (!error) {
3389 char *f1, *f2;
3390
3391 f1 = malloc(fromnd.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3392 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen);
3393
3394 f2 = malloc(tond.ni_cnd.cn_namelen + 1, M_TEMP, M_WAITOK);
3395 strlcpy(f2, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen);
3396
3397 error = veriexec_renamechk(l, fvp, f1, tvp, f2);
3398
3399 free(f1, M_TEMP);
3400 free(f2, M_TEMP);
3401 }
3402 #endif /* NVERIEXEC > 0 */
3403
3404 out:
3405 p = l->l_proc;
3406 if (!error) {
3407 VOP_LEASE(tdvp, l, l->l_cred, LEASE_WRITE);
3408 if (fromnd.ni_dvp != tdvp)
3409 VOP_LEASE(fromnd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3410 if (tvp) {
3411 VOP_LEASE(tvp, l, l->l_cred, LEASE_WRITE);
3412 }
3413 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3414 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3415 } else {
3416 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3417 if (tdvp == tvp)
3418 vrele(tdvp);
3419 else
3420 vput(tdvp);
3421 if (tvp)
3422 vput(tvp);
3423 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3424 vrele(fromnd.ni_dvp);
3425 vrele(fvp);
3426 }
3427 vrele(tond.ni_startdir);
3428 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3429 out1:
3430 if (fromnd.ni_startdir)
3431 vrele(fromnd.ni_startdir);
3432 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3433 return (error == -1 ? 0 : error);
3434 }
3435
3436 /*
3437 * Make a directory file.
3438 */
3439 /* ARGSUSED */
3440 int
3441 sys_mkdir(struct lwp *l, void *v, register_t *retval)
3442 {
3443 struct sys_mkdir_args /* {
3444 syscallarg(const char *) path;
3445 syscallarg(int) mode;
3446 } */ *uap = v;
3447 struct proc *p = l->l_proc;
3448 struct vnode *vp;
3449 struct vattr vattr;
3450 int error;
3451 struct nameidata nd;
3452
3453 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, UIO_USERSPACE,
3454 SCARG(uap, path), l);
3455 if ((error = namei(&nd)) != 0)
3456 return (error);
3457 vp = nd.ni_vp;
3458 if (vp != NULL) {
3459 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3460 if (nd.ni_dvp == vp)
3461 vrele(nd.ni_dvp);
3462 else
3463 vput(nd.ni_dvp);
3464 vrele(vp);
3465 return (EEXIST);
3466 }
3467 VATTR_NULL(&vattr);
3468 vattr.va_type = VDIR;
3469 /* We will read cwdi->cwdi_cmask unlocked. */
3470 vattr.va_mode =
3471 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3472 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3473 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3474 if (!error)
3475 vput(nd.ni_vp);
3476 return (error);
3477 }
3478
3479 /*
3480 * Remove a directory file.
3481 */
3482 /* ARGSUSED */
3483 int
3484 sys_rmdir(struct lwp *l, void *v, register_t *retval)
3485 {
3486 struct sys_rmdir_args /* {
3487 syscallarg(const char *) path;
3488 } */ *uap = v;
3489 struct vnode *vp;
3490 int error;
3491 struct nameidata nd;
3492
3493 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE,
3494 SCARG(uap, path), l);
3495 if ((error = namei(&nd)) != 0)
3496 return (error);
3497 vp = nd.ni_vp;
3498 if (vp->v_type != VDIR) {
3499 error = ENOTDIR;
3500 goto out;
3501 }
3502 /*
3503 * No rmdir "." please.
3504 */
3505 if (nd.ni_dvp == vp) {
3506 error = EINVAL;
3507 goto out;
3508 }
3509 /*
3510 * The root of a mounted filesystem cannot be deleted.
3511 */
3512 if (vp->v_vflag & VV_ROOT) {
3513 error = EBUSY;
3514 goto out;
3515 }
3516 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3517 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3518 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3519 return (error);
3520
3521 out:
3522 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3523 if (nd.ni_dvp == vp)
3524 vrele(nd.ni_dvp);
3525 else
3526 vput(nd.ni_dvp);
3527 vput(vp);
3528 return (error);
3529 }
3530
3531 /*
3532 * Read a block of directory entries in a file system independent format.
3533 */
3534 int
3535 sys___getdents30(struct lwp *l, void *v, register_t *retval)
3536 {
3537 struct sys___getdents30_args /* {
3538 syscallarg(int) fd;
3539 syscallarg(char *) buf;
3540 syscallarg(size_t) count;
3541 } */ *uap = v;
3542 struct proc *p = l->l_proc;
3543 struct file *fp;
3544 int error, done;
3545
3546 /* getvnode() will use the descriptor for us */
3547 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3548 return (error);
3549 if ((fp->f_flag & FREAD) == 0) {
3550 error = EBADF;
3551 goto out;
3552 }
3553 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3554 SCARG(uap, count), &done, l, 0, 0);
3555 #ifdef KTRACE
3556 if (!error && KTRPOINT(p, KTR_GENIO)) {
3557 struct iovec iov;
3558 iov.iov_base = SCARG(uap, buf);
3559 iov.iov_len = done;
3560 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, done, 0);
3561 }
3562 #endif
3563 *retval = done;
3564 out:
3565 FILE_UNUSE(fp, l);
3566 return (error);
3567 }
3568
3569 /*
3570 * Set the mode mask for creation of filesystem nodes.
3571 */
3572 int
3573 sys_umask(struct lwp *l, void *v, register_t *retval)
3574 {
3575 struct sys_umask_args /* {
3576 syscallarg(mode_t) newmask;
3577 } */ *uap = v;
3578 struct proc *p = l->l_proc;
3579 struct cwdinfo *cwdi;
3580
3581 /*
3582 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
3583 * important is that we serialize changes to the mask. The
3584 * rw_exit() will issue a write memory barrier on our behalf,
3585 * and force the changes out to other CPUs (as it must use an
3586 * atomic operation, draining the local CPU's store buffers).
3587 */
3588 cwdi = p->p_cwdi;
3589 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
3590 *retval = cwdi->cwdi_cmask;
3591 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3592 rw_exit(&cwdi->cwdi_lock);
3593
3594 return (0);
3595 }
3596
3597 /*
3598 * Void all references to file by ripping underlying filesystem
3599 * away from vnode.
3600 */
3601 /* ARGSUSED */
3602 int
3603 sys_revoke(struct lwp *l, void *v, register_t *retval)
3604 {
3605 struct sys_revoke_args /* {
3606 syscallarg(const char *) path;
3607 } */ *uap = v;
3608 struct vnode *vp;
3609 struct vattr vattr;
3610 int error;
3611 bool revoke;
3612 struct nameidata nd;
3613
3614 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, UIO_USERSPACE, SCARG(uap, path), l);
3615 if ((error = namei(&nd)) != 0)
3616 return (error);
3617 vp = nd.ni_vp;
3618 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
3619 goto out;
3620 if (kauth_cred_geteuid(l->l_cred) != vattr.va_uid &&
3621 (error = kauth_authorize_generic(l->l_cred,
3622 KAUTH_GENERIC_ISSUSER, NULL)) != 0)
3623 goto out;
3624 mutex_enter(&vp->v_interlock);
3625 revoke = (vp->v_usecount > 1 || (vp->v_iflag & (VI_ALIASED | VI_LAYER)));
3626 mutex_exit(&vp->v_interlock);
3627 if (revoke)
3628 VOP_REVOKE(vp, REVOKEALL);
3629 out:
3630 vrele(vp);
3631 return (error);
3632 }
3633
3634 /*
3635 * Convert a user file descriptor to a kernel file entry.
3636 */
3637 int
3638 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
3639 {
3640 struct vnode *vp;
3641 struct file *fp;
3642
3643 if ((fp = fd_getfile(fdp, fd)) == NULL)
3644 return (EBADF);
3645
3646 FILE_USE(fp);
3647
3648 if (fp->f_type != DTYPE_VNODE) {
3649 FILE_UNUSE(fp, NULL);
3650 return (EINVAL);
3651 }
3652
3653 vp = (struct vnode *)fp->f_data;
3654 if (vp->v_type == VBAD) {
3655 FILE_UNUSE(fp, NULL);
3656 return (EBADF);
3657 }
3658
3659 *fpp = fp;
3660 return (0);
3661 }
3662