vfs_syscalls.c revision 1.248 1 /* $NetBSD: vfs_syscalls.c,v 1.248 2006/07/14 15:59:29 yamt Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.248 2006/07/14 15:59:29 yamt Exp $");
41
42 #include "opt_compat_netbsd.h"
43 #include "opt_compat_43.h"
44 #include "opt_ktrace.h"
45 #include "opt_verified_exec.h"
46 #include "fss.h"
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/namei.h>
51 #include <sys/filedesc.h>
52 #include <sys/kernel.h>
53 #include <sys/file.h>
54 #include <sys/stat.h>
55 #include <sys/vnode.h>
56 #include <sys/mount.h>
57 #include <sys/proc.h>
58 #include <sys/uio.h>
59 #include <sys/malloc.h>
60 #include <sys/kmem.h>
61 #include <sys/dirent.h>
62 #include <sys/sysctl.h>
63 #include <sys/sa.h>
64 #include <sys/syscallargs.h>
65 #ifdef KTRACE
66 #include <sys/ktrace.h>
67 #endif
68 #ifdef VERIFIED_EXEC
69 #include <sys/verified_exec.h>
70 #endif /* VERIFIED_EXEC */
71 #include <sys/kauth.h>
72
73 #include <miscfs/genfs/genfs.h>
74 #include <miscfs/syncfs/syncfs.h>
75
76 #ifdef COMPAT_30
77 #include "opt_nfsserver.h"
78 #include <nfs/rpcv2.h>
79 #include <nfs/nfsproto.h>
80 #include <nfs/nfs.h>
81 #include <nfs/nfs_var.h>
82 #endif
83
84 #if NFSS > 0
85 #include <dev/fssvar.h>
86 #endif
87
88 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
89
90 static int change_dir(struct nameidata *, struct lwp *);
91 static int change_flags(struct vnode *, u_long, struct lwp *);
92 static int change_mode(struct vnode *, int, struct lwp *l);
93 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
94 static int change_utimes(struct vnode *vp, const struct timeval *,
95 struct lwp *l);
96 static int rename_files(const char *, const char *, struct lwp *, int);
97
98 void checkdirs(struct vnode *);
99
100 int dovfsusermount = 0;
101
102 /*
103 * Virtual File System System Calls
104 */
105
106 /*
107 * Mount a file system.
108 */
109
110 #if defined(COMPAT_09) || defined(COMPAT_43)
111 /*
112 * This table is used to maintain compatibility with 4.3BSD
113 * and NetBSD 0.9 mount syscalls. Note, the order is important!
114 *
115 * Do not modify this table. It should only contain filesystems
116 * supported by NetBSD 0.9 and 4.3BSD.
117 */
118 const char * const mountcompatnames[] = {
119 NULL, /* 0 = MOUNT_NONE */
120 MOUNT_FFS, /* 1 = MOUNT_UFS */
121 MOUNT_NFS, /* 2 */
122 MOUNT_MFS, /* 3 */
123 MOUNT_MSDOS, /* 4 */
124 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
125 MOUNT_FDESC, /* 6 */
126 MOUNT_KERNFS, /* 7 */
127 NULL, /* 8 = MOUNT_DEVFS */
128 MOUNT_AFS, /* 9 */
129 };
130 const int nmountcompatnames = sizeof(mountcompatnames) /
131 sizeof(mountcompatnames[0]);
132 #endif /* COMPAT_09 || COMPAT_43 */
133
134 /* ARGSUSED */
135 int
136 sys_mount(struct lwp *l, void *v, register_t *retval)
137 {
138 struct sys_mount_args /* {
139 syscallarg(const char *) type;
140 syscallarg(const char *) path;
141 syscallarg(int) flags;
142 syscallarg(void *) data;
143 } */ *uap = v;
144 struct proc *p = l->l_proc;
145 struct vnode *vp;
146 struct mount *mp;
147 int error, flag = 0;
148 char fstypename[MFSNAMELEN];
149 struct vattr va;
150 struct nameidata nd;
151 struct vfsops *vfs;
152
153 /*
154 * if MNT_GETARGS is specified, it should be only flag.
155 */
156
157 if ((SCARG(uap, flags) & MNT_GETARGS) != 0 &&
158 (SCARG(uap, flags) & ~MNT_GETARGS) != 0) {
159 return EINVAL;
160 }
161
162 if (dovfsusermount == 0 && (SCARG(uap, flags) & MNT_GETARGS) == 0 &&
163 (error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
164 &p->p_acflag)))
165 return (error);
166 /*
167 * Get vnode to be covered
168 */
169 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
170 SCARG(uap, path), l);
171 if ((error = namei(&nd)) != 0)
172 return (error);
173 vp = nd.ni_vp;
174 /*
175 * A lookup in VFS_MOUNT might result in an attempt to
176 * lock this vnode again, so make the lock recursive.
177 */
178 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
179 if (SCARG(uap, flags) & (MNT_UPDATE | MNT_GETARGS)) {
180 if ((vp->v_flag & VROOT) == 0) {
181 vput(vp);
182 return (EINVAL);
183 }
184 mp = vp->v_mount;
185 flag = mp->mnt_flag;
186 vfs = mp->mnt_op;
187 /*
188 * We only allow the filesystem to be reloaded if it
189 * is currently mounted read-only.
190 */
191 if ((SCARG(uap, flags) & MNT_RELOAD) &&
192 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
193 vput(vp);
194 return (EOPNOTSUPP); /* Needs translation */
195 }
196 /*
197 * In "highly secure" mode, don't let the caller do anything
198 * but downgrade a filesystem from read-write to read-only.
199 * (see also below; MNT_UPDATE or MNT_GETARGS is required.)
200 */
201 if (securelevel >= 2 &&
202 SCARG(uap, flags) != MNT_GETARGS &&
203 SCARG(uap, flags) !=
204 (mp->mnt_flag | MNT_RDONLY |
205 MNT_RELOAD | MNT_FORCE | MNT_UPDATE)) {
206 vput(vp);
207 return (EPERM);
208 }
209 mp->mnt_flag |= SCARG(uap, flags) &
210 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
211 /*
212 * Only root, or the user that did the original mount is
213 * permitted to update it.
214 */
215 if ((mp->mnt_flag & MNT_GETARGS) == 0 &&
216 mp->mnt_stat.f_owner != kauth_cred_geteuid(p->p_cred) &&
217 (error = kauth_authorize_generic(p->p_cred,
218 KAUTH_GENERIC_ISSUSER,
219 &p->p_acflag)) != 0) {
220 vput(vp);
221 return (error);
222 }
223 /*
224 * Do not allow NFS export by non-root users. For non-root
225 * users, silently enforce MNT_NOSUID and MNT_NODEV, and
226 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
227 */
228 if (kauth_cred_geteuid(p->p_cred) != 0) {
229 if (SCARG(uap, flags) & MNT_EXPORTED) {
230 vput(vp);
231 return (EPERM);
232 }
233 SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
234 if (flag & MNT_NOEXEC)
235 SCARG(uap, flags) |= MNT_NOEXEC;
236 }
237 if (vfs_busy(mp, LK_NOWAIT, 0)) {
238 vput(vp);
239 return (EPERM);
240 }
241 goto update;
242 } else {
243 if (securelevel >= 2) {
244 vput(vp);
245 return (EPERM);
246 }
247 }
248 /*
249 * If the user is not root, ensure that they own the directory
250 * onto which we are attempting to mount.
251 */
252 if ((error = VOP_GETATTR(vp, &va, p->p_cred, l)) != 0 ||
253 (va.va_uid != kauth_cred_geteuid(p->p_cred) &&
254 (error = kauth_authorize_generic(p->p_cred,
255 KAUTH_GENERIC_ISSUSER,
256 &p->p_acflag)) != 0)) {
257 vput(vp);
258 return (error);
259 }
260 /*
261 * Do not allow NFS export by non-root users. For non-root users,
262 * silently enforce MNT_NOSUID and MNT_NODEV, and MNT_NOEXEC if the
263 * mount point is already MNT_NOEXEC.
264 */
265 if (kauth_cred_geteuid(p->p_cred) != 0) {
266 if (SCARG(uap, flags) & MNT_EXPORTED) {
267 vput(vp);
268 return (EPERM);
269 }
270 SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
271 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
272 SCARG(uap, flags) |= MNT_NOEXEC;
273 }
274 if ((error = vinvalbuf(vp, V_SAVE, p->p_cred, l, 0, 0)) != 0) {
275 vput(vp);
276 return (error);
277 }
278 if (vp->v_type != VDIR) {
279 vput(vp);
280 return (ENOTDIR);
281 }
282 error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
283 if (error) {
284 #if defined(COMPAT_09) || defined(COMPAT_43)
285 /*
286 * Historically, filesystem types were identified by numbers.
287 * If we get an integer for the filesystem type instead of a
288 * string, we check to see if it matches one of the historic
289 * filesystem types.
290 */
291 u_long fsindex = (u_long)SCARG(uap, type);
292 if (fsindex >= nmountcompatnames ||
293 mountcompatnames[fsindex] == NULL) {
294 vput(vp);
295 return (ENODEV);
296 }
297 strncpy(fstypename, mountcompatnames[fsindex], MFSNAMELEN);
298 #else
299 vput(vp);
300 return (error);
301 #endif
302 }
303 #ifdef COMPAT_10
304 /* Accept `ufs' as an alias for `ffs'. */
305 if (!strncmp(fstypename, "ufs", MFSNAMELEN))
306 strncpy(fstypename, "ffs", MFSNAMELEN);
307 #endif
308 if ((vfs = vfs_getopsbyname(fstypename)) == NULL) {
309 vput(vp);
310 return (ENODEV);
311 }
312 if (vp->v_mountedhere != NULL) {
313 vput(vp);
314 return (EBUSY);
315 }
316
317 /*
318 * Allocate and initialize the file system.
319 */
320 mp = (struct mount *)malloc((u_long)sizeof(struct mount),
321 M_MOUNT, M_WAITOK);
322 memset((char *)mp, 0, (u_long)sizeof(struct mount));
323 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
324 simple_lock_init(&mp->mnt_slock);
325 (void)vfs_busy(mp, LK_NOWAIT, 0);
326 mp->mnt_op = vfs;
327 vfs->vfs_refcount++;
328 mp->mnt_vnodecovered = vp;
329 mp->mnt_stat.f_owner = kauth_cred_geteuid(p->p_cred);
330 mp->mnt_unmounter = NULL;
331 mp->mnt_leaf = mp;
332
333 /*
334 * The underlying file system may refuse the mount for
335 * various reasons. Allow the user to force it to happen.
336 */
337 mp->mnt_flag |= SCARG(uap, flags) & MNT_FORCE;
338 update:
339 if ((SCARG(uap, flags) & MNT_GETARGS) == 0) {
340 /*
341 * Set the mount level flags.
342 */
343 if (SCARG(uap, flags) & MNT_RDONLY)
344 mp->mnt_flag |= MNT_RDONLY;
345 else if (mp->mnt_flag & MNT_RDONLY)
346 mp->mnt_iflag |= IMNT_WANTRDWR;
347 mp->mnt_flag &=
348 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
349 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
350 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
351 mp->mnt_flag |= SCARG(uap, flags) &
352 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
353 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
354 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
355 MNT_IGNORE);
356 }
357 /*
358 * Mount the filesystem.
359 */
360 error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, l);
361 if (mp->mnt_flag & (MNT_UPDATE | MNT_GETARGS)) {
362 #if defined(COMPAT_30) && defined(NFSSERVER)
363 if (mp->mnt_flag & MNT_UPDATE && error != 0) {
364 int error2;
365
366 /* Update failed; let's try and see if it was an
367 * export request. */
368 error2 = nfs_update_exports_30(mp, SCARG(uap, path),
369 SCARG(uap, data), l);
370
371 /* Only update error code if the export request was
372 * understood but some problem occurred while
373 * processing it. */
374 if (error2 != EJUSTRETURN)
375 error = error2;
376 }
377 #endif
378 if (mp->mnt_iflag & IMNT_WANTRDWR)
379 mp->mnt_flag &= ~MNT_RDONLY;
380 if (error)
381 mp->mnt_flag = flag;
382 mp->mnt_flag &=~
383 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
384 mp->mnt_iflag &=~ IMNT_WANTRDWR;
385 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
386 if (mp->mnt_syncer == NULL)
387 error = vfs_allocate_syncvnode(mp);
388 } else {
389 if (mp->mnt_syncer != NULL)
390 vfs_deallocate_syncvnode(mp);
391 }
392 vfs_unbusy(mp);
393 VOP_UNLOCK(vp, 0);
394 vrele(vp);
395 return (error);
396 }
397 /*
398 * Put the new filesystem on the mount list after root.
399 */
400 cache_purge(vp);
401 if (!error) {
402 mp->mnt_flag &=~
403 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
404 mp->mnt_iflag &=~ IMNT_WANTRDWR;
405 vp->v_mountedhere = mp;
406 simple_lock(&mountlist_slock);
407 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
408 simple_unlock(&mountlist_slock);
409 checkdirs(vp);
410 VOP_UNLOCK(vp, 0);
411 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
412 error = vfs_allocate_syncvnode(mp);
413 vfs_unbusy(mp);
414 (void) VFS_STATVFS(mp, &mp->mnt_stat, l);
415 if ((error = VFS_START(mp, 0, l)))
416 vrele(vp);
417 } else {
418 vp->v_mountedhere = (struct mount *)0;
419 vfs->vfs_refcount--;
420 vfs_unbusy(mp);
421 free(mp, M_MOUNT);
422 vput(vp);
423 }
424 return (error);
425 }
426
427 /*
428 * Scan all active processes to see if any of them have a current
429 * or root directory onto which the new filesystem has just been
430 * mounted. If so, replace them with the new mount point.
431 */
432 void
433 checkdirs(struct vnode *olddp)
434 {
435 struct cwdinfo *cwdi;
436 struct vnode *newdp;
437 struct proc *p;
438
439 if (olddp->v_usecount == 1)
440 return;
441 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
442 panic("mount: lost mount");
443 proclist_lock_read();
444 PROCLIST_FOREACH(p, &allproc) {
445 cwdi = p->p_cwdi;
446 if (!cwdi)
447 continue;
448 if (cwdi->cwdi_cdir == olddp) {
449 vrele(cwdi->cwdi_cdir);
450 VREF(newdp);
451 cwdi->cwdi_cdir = newdp;
452 }
453 if (cwdi->cwdi_rdir == olddp) {
454 vrele(cwdi->cwdi_rdir);
455 VREF(newdp);
456 cwdi->cwdi_rdir = newdp;
457 }
458 }
459 proclist_unlock_read();
460 if (rootvnode == olddp) {
461 vrele(rootvnode);
462 VREF(newdp);
463 rootvnode = newdp;
464 }
465 vput(newdp);
466 }
467
468 /*
469 * Unmount a file system.
470 *
471 * Note: unmount takes a path to the vnode mounted on as argument,
472 * not special file (as before).
473 */
474 /* ARGSUSED */
475 int
476 sys_unmount(struct lwp *l, void *v, register_t *retval)
477 {
478 struct sys_unmount_args /* {
479 syscallarg(const char *) path;
480 syscallarg(int) flags;
481 } */ *uap = v;
482 struct proc *p = l->l_proc;
483 struct vnode *vp;
484 struct mount *mp;
485 int error;
486 struct nameidata nd;
487
488 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
489 SCARG(uap, path), l);
490 if ((error = namei(&nd)) != 0)
491 return (error);
492 vp = nd.ni_vp;
493 mp = vp->v_mount;
494
495 /*
496 * Only root, or the user that did the original mount is
497 * permitted to unmount this filesystem.
498 */
499 if ((mp->mnt_stat.f_owner != kauth_cred_geteuid(p->p_cred)) &&
500 (error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
501 &p->p_acflag)) != 0) {
502 vput(vp);
503 return (error);
504 }
505
506 /*
507 * Don't allow unmounting the root file system.
508 */
509 if (mp->mnt_flag & MNT_ROOTFS) {
510 vput(vp);
511 return (EINVAL);
512 }
513
514 /*
515 * Must be the root of the filesystem
516 */
517 if ((vp->v_flag & VROOT) == 0) {
518 vput(vp);
519 return (EINVAL);
520 }
521 vput(vp);
522
523 /*
524 * XXX Freeze syncer. Must do this before locking the
525 * mount point. See dounmount() for details.
526 */
527 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
528
529 if (vfs_busy(mp, 0, 0)) {
530 lockmgr(&syncer_lock, LK_RELEASE, NULL);
531 return (EBUSY);
532 }
533
534 return (dounmount(mp, SCARG(uap, flags), l));
535 }
536
537 /*
538 * Do the actual file system unmount. File system is assumed to have been
539 * marked busy by the caller.
540 */
541 int
542 dounmount(struct mount *mp, int flags, struct lwp *l)
543 {
544 struct vnode *coveredvp;
545 int error;
546 int async;
547 int used_syncer;
548
549 simple_lock(&mountlist_slock);
550 vfs_unbusy(mp);
551 used_syncer = (mp->mnt_syncer != NULL);
552
553 /*
554 * XXX Syncer must be frozen when we get here. This should really
555 * be done on a per-mountpoint basis, but especially the softdep
556 * code possibly called from the syncer doesn't exactly work on a
557 * per-mountpoint basis, so the softdep code would become a maze
558 * of vfs_busy() calls.
559 *
560 * The caller of dounmount() must acquire syncer_lock because
561 * the syncer itself acquires locks in syncer_lock -> vfs_busy
562 * order, and we must preserve that order to avoid deadlock.
563 *
564 * So, if the file system did not use the syncer, now is
565 * the time to release the syncer_lock.
566 */
567 if (used_syncer == 0)
568 lockmgr(&syncer_lock, LK_RELEASE, NULL);
569
570 mp->mnt_iflag |= IMNT_UNMOUNT;
571 mp->mnt_unmounter = l;
572 lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock);
573 vn_start_write(NULL, &mp, V_WAIT);
574
575 async = mp->mnt_flag & MNT_ASYNC;
576 mp->mnt_flag &= ~MNT_ASYNC;
577 cache_purgevfs(mp); /* remove cache entries for this file sys */
578 if (mp->mnt_syncer != NULL)
579 vfs_deallocate_syncvnode(mp);
580 error = 0;
581 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
582 #if NFSS > 0
583 error = fss_umount_hook(mp, (flags & MNT_FORCE));
584 #endif
585 if (error == 0)
586 error = VFS_SYNC(mp, MNT_WAIT, l->l_proc->p_cred, l);
587 }
588 if (error == 0 || (flags & MNT_FORCE))
589 error = VFS_UNMOUNT(mp, flags, l);
590 vn_finished_write(mp, 0);
591 simple_lock(&mountlist_slock);
592 if (error) {
593 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
594 (void) vfs_allocate_syncvnode(mp);
595 mp->mnt_iflag &= ~IMNT_UNMOUNT;
596 mp->mnt_unmounter = NULL;
597 mp->mnt_flag |= async;
598 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
599 &mountlist_slock);
600 if (used_syncer)
601 lockmgr(&syncer_lock, LK_RELEASE, NULL);
602 simple_lock(&mp->mnt_slock);
603 while (mp->mnt_wcnt > 0) {
604 wakeup(mp);
605 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1",
606 0, &mp->mnt_slock);
607 }
608 simple_unlock(&mp->mnt_slock);
609 return (error);
610 }
611 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
612 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
613 coveredvp->v_mountedhere = NULL;
614 vrele(coveredvp);
615 }
616 mp->mnt_op->vfs_refcount--;
617 if (LIST_FIRST(&mp->mnt_vnodelist) != NULL)
618 panic("unmount: dangling vnode");
619 mp->mnt_iflag |= IMNT_GONE;
620 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock);
621 if (used_syncer)
622 lockmgr(&syncer_lock, LK_RELEASE, NULL);
623 simple_lock(&mp->mnt_slock);
624 while (mp->mnt_wcnt > 0) {
625 wakeup(mp);
626 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock);
627 }
628 simple_unlock(&mp->mnt_slock);
629 vfs_hooks_unmount(mp);
630 free(mp, M_MOUNT);
631 return (0);
632 }
633
634 /*
635 * Sync each mounted filesystem.
636 */
637 #ifdef DEBUG
638 int syncprt = 0;
639 struct ctldebug debug0 = { "syncprt", &syncprt };
640 #endif
641
642 /* ARGSUSED */
643 int
644 sys_sync(struct lwp *l, void *v, register_t *retval)
645 {
646 struct mount *mp, *nmp;
647 int asyncflag;
648 struct proc *p = l == NULL ? &proc0 : l->l_proc;
649
650 simple_lock(&mountlist_slock);
651 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
652 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
653 nmp = mp->mnt_list.cqe_prev;
654 continue;
655 }
656 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
657 vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
658 asyncflag = mp->mnt_flag & MNT_ASYNC;
659 mp->mnt_flag &= ~MNT_ASYNC;
660 VFS_SYNC(mp, MNT_NOWAIT, p->p_cred, l);
661 if (asyncflag)
662 mp->mnt_flag |= MNT_ASYNC;
663 vn_finished_write(mp, 0);
664 }
665 simple_lock(&mountlist_slock);
666 nmp = mp->mnt_list.cqe_prev;
667 vfs_unbusy(mp);
668
669 }
670 simple_unlock(&mountlist_slock);
671 #ifdef DEBUG
672 if (syncprt)
673 vfs_bufstats();
674 #endif /* DEBUG */
675 return (0);
676 }
677
678 /*
679 * Change filesystem quotas.
680 */
681 /* ARGSUSED */
682 int
683 sys_quotactl(struct lwp *l, void *v, register_t *retval)
684 {
685 struct sys_quotactl_args /* {
686 syscallarg(const char *) path;
687 syscallarg(int) cmd;
688 syscallarg(int) uid;
689 syscallarg(caddr_t) arg;
690 } */ *uap = v;
691 struct mount *mp;
692 int error;
693 struct nameidata nd;
694
695 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
696 if ((error = namei(&nd)) != 0)
697 return (error);
698 error = vn_start_write(nd.ni_vp, &mp, V_WAIT | V_PCATCH);
699 vrele(nd.ni_vp);
700 if (error)
701 return (error);
702 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
703 SCARG(uap, arg), l);
704 vn_finished_write(mp, 0);
705 return (error);
706 }
707
708 int
709 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
710 int root)
711 {
712 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
713 int error = 0;
714
715 /*
716 * If MNT_NOWAIT or MNT_LAZY is specified, do not
717 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
718 * overrides MNT_NOWAIT.
719 */
720 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
721 (flags != MNT_WAIT && flags != 0)) {
722 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
723 goto done;
724 }
725
726 /* Get the filesystem stats now */
727 memset(sp, 0, sizeof(*sp));
728 if ((error = VFS_STATVFS(mp, sp, l)) != 0) {
729 return error;
730 }
731
732 if (cwdi->cwdi_rdir == NULL)
733 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
734 done:
735 if (cwdi->cwdi_rdir != NULL) {
736 size_t len;
737 char *bp;
738 char *path = PNBUF_GET();
739 if (!path)
740 return ENOMEM;
741
742 bp = path + MAXPATHLEN;
743 *--bp = '\0';
744 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
745 MAXPATHLEN / 2, 0, l);
746 if (error) {
747 PNBUF_PUT(path);
748 return error;
749 }
750 len = strlen(bp);
751 /*
752 * for mount points that are below our root, we can see
753 * them, so we fix up the pathname and return them. The
754 * rest we cannot see, so we don't allow viewing the
755 * data.
756 */
757 if (strncmp(bp, sp->f_mntonname, len) == 0) {
758 strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
759 sizeof(sp->f_mntonname));
760 if (sp->f_mntonname[0] == '\0')
761 (void)strlcpy(sp->f_mntonname, "/",
762 sizeof(sp->f_mntonname));
763 } else {
764 if (root)
765 (void)strlcpy(sp->f_mntonname, "/",
766 sizeof(sp->f_mntonname));
767 else
768 error = EPERM;
769 }
770 PNBUF_PUT(path);
771 }
772 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
773 return error;
774 }
775
776 /*
777 * Get filesystem statistics.
778 */
779 /* ARGSUSED */
780 int
781 sys_statvfs1(struct lwp *l, void *v, register_t *retval)
782 {
783 struct sys_statvfs1_args /* {
784 syscallarg(const char *) path;
785 syscallarg(struct statvfs *) buf;
786 syscallarg(int) flags;
787 } */ *uap = v;
788 struct mount *mp;
789 struct statvfs *sb;
790 int error;
791 struct nameidata nd;
792
793 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
794 if ((error = namei(&nd)) != 0)
795 return error;
796 mp = nd.ni_vp->v_mount;
797 vrele(nd.ni_vp);
798 sb = STATVFSBUF_GET();
799 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1);
800 if (error == 0) {
801 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
802 }
803 STATVFSBUF_PUT(sb);
804 return error;
805 }
806
807 /*
808 * Get filesystem statistics.
809 */
810 /* ARGSUSED */
811 int
812 sys_fstatvfs1(struct lwp *l, void *v, register_t *retval)
813 {
814 struct sys_fstatvfs1_args /* {
815 syscallarg(int) fd;
816 syscallarg(struct statvfs *) buf;
817 syscallarg(int) flags;
818 } */ *uap = v;
819 struct proc *p = l->l_proc;
820 struct file *fp;
821 struct mount *mp;
822 struct statvfs *sb;
823 int error;
824
825 /* getvnode() will use the descriptor for us */
826 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
827 return (error);
828 mp = ((struct vnode *)fp->f_data)->v_mount;
829 sb = STATVFSBUF_GET();
830 if ((error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1)) != 0)
831 goto out;
832 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
833 out:
834 FILE_UNUSE(fp, l);
835 STATVFSBUF_PUT(sb);
836 return error;
837 }
838
839
840 /*
841 * Get statistics on all filesystems.
842 */
843 int
844 sys_getvfsstat(struct lwp *l, void *v, register_t *retval)
845 {
846 struct sys_getvfsstat_args /* {
847 syscallarg(struct statvfs *) buf;
848 syscallarg(size_t) bufsize;
849 syscallarg(int) flags;
850 } */ *uap = v;
851 int root = 0;
852 struct proc *p = l->l_proc;
853 struct mount *mp, *nmp;
854 struct statvfs *sb;
855 struct statvfs *sfsp;
856 size_t count, maxcount;
857 int error = 0;
858
859 sb = STATVFSBUF_GET();
860 maxcount = SCARG(uap, bufsize) / sizeof(struct statvfs);
861 sfsp = SCARG(uap, buf);
862 simple_lock(&mountlist_slock);
863 count = 0;
864 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
865 mp = nmp) {
866 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
867 nmp = CIRCLEQ_NEXT(mp, mnt_list);
868 continue;
869 }
870 if (sfsp && count < maxcount) {
871 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 0);
872 if (error) {
873 simple_lock(&mountlist_slock);
874 nmp = CIRCLEQ_NEXT(mp, mnt_list);
875 vfs_unbusy(mp);
876 continue;
877 }
878 error = copyout(sb, sfsp, sizeof(*sfsp));
879 if (error) {
880 vfs_unbusy(mp);
881 goto out;
882 }
883 sfsp++;
884 root |= strcmp(sb->f_mntonname, "/") == 0;
885 }
886 count++;
887 simple_lock(&mountlist_slock);
888 nmp = CIRCLEQ_NEXT(mp, mnt_list);
889 vfs_unbusy(mp);
890 }
891 simple_unlock(&mountlist_slock);
892 if (root == 0 && p->p_cwdi->cwdi_rdir) {
893 /*
894 * fake a root entry
895 */
896 if ((error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, sb, l,
897 SCARG(uap, flags), 1)) != 0)
898 goto out;
899 if (sfsp)
900 error = copyout(sb, sfsp, sizeof(*sfsp));
901 count++;
902 }
903 if (sfsp && count > maxcount)
904 *retval = maxcount;
905 else
906 *retval = count;
907 out:
908 STATVFSBUF_PUT(sb);
909 return error;
910 }
911
912 /*
913 * Change current working directory to a given file descriptor.
914 */
915 /* ARGSUSED */
916 int
917 sys_fchdir(struct lwp *l, void *v, register_t *retval)
918 {
919 struct sys_fchdir_args /* {
920 syscallarg(int) fd;
921 } */ *uap = v;
922 struct proc *p = l->l_proc;
923 struct filedesc *fdp = p->p_fd;
924 struct cwdinfo *cwdi = p->p_cwdi;
925 struct vnode *vp, *tdp;
926 struct mount *mp;
927 struct file *fp;
928 int error;
929
930 /* getvnode() will use the descriptor for us */
931 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
932 return (error);
933 vp = (struct vnode *)fp->f_data;
934
935 VREF(vp);
936 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
937 if (vp->v_type != VDIR)
938 error = ENOTDIR;
939 else
940 error = VOP_ACCESS(vp, VEXEC, p->p_cred, l);
941 while (!error && (mp = vp->v_mountedhere) != NULL) {
942 if (vfs_busy(mp, 0, 0))
943 continue;
944 error = VFS_ROOT(mp, &tdp);
945 vfs_unbusy(mp);
946 if (error)
947 break;
948 vput(vp);
949 vp = tdp;
950 }
951 if (error) {
952 vput(vp);
953 goto out;
954 }
955 VOP_UNLOCK(vp, 0);
956
957 /*
958 * Disallow changing to a directory not under the process's
959 * current root directory (if there is one).
960 */
961 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
962 vrele(vp);
963 error = EPERM; /* operation not permitted */
964 goto out;
965 }
966
967 vrele(cwdi->cwdi_cdir);
968 cwdi->cwdi_cdir = vp;
969 out:
970 FILE_UNUSE(fp, l);
971 return (error);
972 }
973
974 /*
975 * Change this process's notion of the root directory to a given file
976 * descriptor.
977 */
978 int
979 sys_fchroot(struct lwp *l, void *v, register_t *retval)
980 {
981 struct sys_fchroot_args *uap = v;
982 struct proc *p = l->l_proc;
983 struct filedesc *fdp = p->p_fd;
984 struct cwdinfo *cwdi = p->p_cwdi;
985 struct vnode *vp;
986 struct file *fp;
987 int error;
988
989 if ((error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
990 &p->p_acflag)) != 0)
991 return error;
992 /* getvnode() will use the descriptor for us */
993 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
994 return error;
995 vp = (struct vnode *) fp->f_data;
996 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
997 if (vp->v_type != VDIR)
998 error = ENOTDIR;
999 else
1000 error = VOP_ACCESS(vp, VEXEC, p->p_cred, l);
1001 VOP_UNLOCK(vp, 0);
1002 if (error)
1003 goto out;
1004 VREF(vp);
1005
1006 /*
1007 * Prevent escaping from chroot by putting the root under
1008 * the working directory. Silently chdir to / if we aren't
1009 * already there.
1010 */
1011 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1012 /*
1013 * XXX would be more failsafe to change directory to a
1014 * deadfs node here instead
1015 */
1016 vrele(cwdi->cwdi_cdir);
1017 VREF(vp);
1018 cwdi->cwdi_cdir = vp;
1019 }
1020
1021 if (cwdi->cwdi_rdir != NULL)
1022 vrele(cwdi->cwdi_rdir);
1023 cwdi->cwdi_rdir = vp;
1024 out:
1025 FILE_UNUSE(fp, l);
1026 return (error);
1027 }
1028
1029 /*
1030 * Change current working directory (``.'').
1031 */
1032 /* ARGSUSED */
1033 int
1034 sys_chdir(struct lwp *l, void *v, register_t *retval)
1035 {
1036 struct sys_chdir_args /* {
1037 syscallarg(const char *) path;
1038 } */ *uap = v;
1039 struct proc *p = l->l_proc;
1040 struct cwdinfo *cwdi = p->p_cwdi;
1041 int error;
1042 struct nameidata nd;
1043
1044 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1045 SCARG(uap, path), l);
1046 if ((error = change_dir(&nd, l)) != 0)
1047 return (error);
1048 vrele(cwdi->cwdi_cdir);
1049 cwdi->cwdi_cdir = nd.ni_vp;
1050 return (0);
1051 }
1052
1053 /*
1054 * Change notion of root (``/'') directory.
1055 */
1056 /* ARGSUSED */
1057 int
1058 sys_chroot(struct lwp *l, void *v, register_t *retval)
1059 {
1060 struct sys_chroot_args /* {
1061 syscallarg(const char *) path;
1062 } */ *uap = v;
1063 struct proc *p = l->l_proc;
1064 struct cwdinfo *cwdi = p->p_cwdi;
1065 struct vnode *vp;
1066 int error;
1067 struct nameidata nd;
1068
1069 if ((error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
1070 &p->p_acflag)) != 0)
1071 return (error);
1072 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1073 SCARG(uap, path), l);
1074 if ((error = change_dir(&nd, l)) != 0)
1075 return (error);
1076 if (cwdi->cwdi_rdir != NULL)
1077 vrele(cwdi->cwdi_rdir);
1078 vp = nd.ni_vp;
1079 cwdi->cwdi_rdir = vp;
1080
1081 /*
1082 * Prevent escaping from chroot by putting the root under
1083 * the working directory. Silently chdir to / if we aren't
1084 * already there.
1085 */
1086 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1087 /*
1088 * XXX would be more failsafe to change directory to a
1089 * deadfs node here instead
1090 */
1091 vrele(cwdi->cwdi_cdir);
1092 VREF(vp);
1093 cwdi->cwdi_cdir = vp;
1094 }
1095
1096 return (0);
1097 }
1098
1099 /*
1100 * Common routine for chroot and chdir.
1101 */
1102 static int
1103 change_dir(struct nameidata *ndp, struct lwp *l)
1104 {
1105 struct vnode *vp;
1106 int error;
1107
1108 if ((error = namei(ndp)) != 0)
1109 return (error);
1110 vp = ndp->ni_vp;
1111 if (vp->v_type != VDIR)
1112 error = ENOTDIR;
1113 else
1114 error = VOP_ACCESS(vp, VEXEC, l->l_proc->p_cred, l);
1115
1116 if (error)
1117 vput(vp);
1118 else
1119 VOP_UNLOCK(vp, 0);
1120 return (error);
1121 }
1122
1123 /*
1124 * Check permissions, allocate an open file structure,
1125 * and call the device open routine if any.
1126 */
1127 int
1128 sys_open(struct lwp *l, void *v, register_t *retval)
1129 {
1130 struct sys_open_args /* {
1131 syscallarg(const char *) path;
1132 syscallarg(int) flags;
1133 syscallarg(int) mode;
1134 } */ *uap = v;
1135 struct proc *p = l->l_proc;
1136 struct cwdinfo *cwdi = p->p_cwdi;
1137 struct filedesc *fdp = p->p_fd;
1138 struct file *fp;
1139 struct vnode *vp;
1140 int flags, cmode;
1141 int type, indx, error;
1142 struct flock lf;
1143 struct nameidata nd;
1144
1145 flags = FFLAGS(SCARG(uap, flags));
1146 if ((flags & (FREAD | FWRITE)) == 0)
1147 return (EINVAL);
1148 /* falloc() will use the file descriptor for us */
1149 if ((error = falloc(p, &fp, &indx)) != 0)
1150 return (error);
1151 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1152 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1153 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1154 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1155 FILE_UNUSE(fp, l);
1156 fdp->fd_ofiles[indx] = NULL;
1157 ffree(fp);
1158 if ((error == EDUPFD || error == EMOVEFD) &&
1159 l->l_dupfd >= 0 && /* XXX from fdopen */
1160 (error =
1161 dupfdopen(l, indx, l->l_dupfd, flags, error)) == 0) {
1162 *retval = indx;
1163 return (0);
1164 }
1165 if (error == ERESTART)
1166 error = EINTR;
1167 fdremove(fdp, indx);
1168 return (error);
1169 }
1170 l->l_dupfd = 0;
1171 vp = nd.ni_vp;
1172 fp->f_flag = flags & FMASK;
1173 fp->f_type = DTYPE_VNODE;
1174 fp->f_ops = &vnops;
1175 fp->f_data = vp;
1176 if (flags & (O_EXLOCK | O_SHLOCK)) {
1177 lf.l_whence = SEEK_SET;
1178 lf.l_start = 0;
1179 lf.l_len = 0;
1180 if (flags & O_EXLOCK)
1181 lf.l_type = F_WRLCK;
1182 else
1183 lf.l_type = F_RDLCK;
1184 type = F_FLOCK;
1185 if ((flags & FNONBLOCK) == 0)
1186 type |= F_WAIT;
1187 VOP_UNLOCK(vp, 0);
1188 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1189 if (error) {
1190 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1191 FILE_UNUSE(fp, l);
1192 ffree(fp);
1193 fdremove(fdp, indx);
1194 return (error);
1195 }
1196 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1197 fp->f_flag |= FHASLOCK;
1198 }
1199 VOP_UNLOCK(vp, 0);
1200 *retval = indx;
1201 FILE_SET_MATURE(fp);
1202 FILE_UNUSE(fp, l);
1203 return (0);
1204 }
1205
1206 /*
1207 * vfs_composefh: compose a filehandle.
1208 */
1209
1210 int
1211 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1212 {
1213 struct mount *mp;
1214 int error;
1215 size_t sz;
1216
1217 mp = vp->v_mount;
1218 if (mp->mnt_op->vfs_vptofh == NULL) {
1219 return EOPNOTSUPP;
1220 }
1221 if (fhp != NULL && *fh_size >= offsetof(fhandle_t, fh_fid)) {
1222 memset(fhp, 0, *fh_size);
1223 sz = *fh_size - offsetof(fhandle_t, fh_fid);
1224 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1225 error = VFS_VPTOFH(vp, &fhp->fh_fid, &sz);
1226 } else {
1227 /* just query the size */
1228 sz = 0;
1229 error = VFS_VPTOFH(vp, NULL, &sz);
1230 }
1231 *fh_size = sz + offsetof(fhandle_t, fh_fid);
1232 return error;
1233 }
1234
1235 /*
1236 * vfs_fhtovp: lookup a vnode by a filehandle.
1237 */
1238
1239 int
1240 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1241 {
1242 struct mount *mp;
1243 int error;
1244
1245 *vpp = NULL;
1246 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1247 if (mp == NULL) {
1248 error = ESTALE;
1249 goto out;
1250 }
1251 if (mp->mnt_op->vfs_fhtovp == NULL) {
1252 error = EOPNOTSUPP;
1253 goto out;
1254 }
1255 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1256 out:
1257 return error;
1258 }
1259
1260 /*
1261 * vfs_copyinfh: copyin a filehandle.
1262 */
1263
1264 int
1265 vfs_copyinfh(const void *ufhp, fhandle_t **fhpp)
1266 {
1267 fhandle_t *fhp;
1268 fhandle_t tempfh;
1269 size_t fhsize;
1270 int error;
1271
1272 *fhpp = NULL;
1273 error = copyin(ufhp, &tempfh, sizeof(tempfh));
1274 if (error) {
1275 return error;
1276 }
1277 fhsize = FHANDLE_SIZE(&tempfh);
1278 fhp = kmem_alloc(fhsize, KM_SLEEP);
1279 if (fhp == NULL) {
1280 return ENOMEM;
1281 }
1282 error = copyin(ufhp, fhp, fhsize);
1283 if (error == 0) {
1284 *fhpp = fhp;
1285 } else {
1286 kmem_free(fhp, fhsize);
1287 }
1288 return error;
1289 }
1290
1291 void
1292 vfs_copyinfh_free(fhandle_t *fhp)
1293 {
1294 size_t fhsize;
1295
1296 if (fhp == NULL) {
1297 return;
1298 }
1299 fhsize = FHANDLE_SIZE(fhp);
1300 kmem_free(fhp, fhsize);
1301 }
1302
1303 /*
1304 * Get file handle system call
1305 */
1306 int
1307 sys___getfh30(struct lwp *l, void *v, register_t *retval)
1308 {
1309 struct sys___getfh30_args /* {
1310 syscallarg(char *) fname;
1311 syscallarg(fhandle_t *) fhp;
1312 syscallarg(size_t *) fh_size;
1313 } */ *uap = v;
1314 struct proc *p = l->l_proc;
1315 struct vnode *vp;
1316 fhandle_t *fh;
1317 int error;
1318 struct nameidata nd;
1319 size_t sz;
1320
1321 /*
1322 * Must be super user
1323 */
1324 error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
1325 &p->p_acflag);
1326 if (error)
1327 return (error);
1328 fh = NULL;
1329 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1330 SCARG(uap, fname), l);
1331 error = namei(&nd);
1332 if (error)
1333 return (error);
1334 vp = nd.ni_vp;
1335 error = copyin(SCARG(uap, fh_size), &sz, sizeof(size_t));
1336 vput(vp);
1337 if (!error) {
1338 fh = malloc(sz, M_TEMP, M_WAITOK);
1339 if (fh == NULL)
1340 return EINVAL;
1341 error = vfs_composefh(vp, fh, &sz);
1342 }
1343 if (error == E2BIG)
1344 copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1345 if (error == 0) {
1346 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1347 if (!error)
1348 error = copyout(fh, SCARG(uap, fhp), sz);
1349 }
1350 free(fh, M_TEMP);
1351 return (error);
1352 }
1353
1354 /*
1355 * Open a file given a file handle.
1356 *
1357 * Check permissions, allocate an open file structure,
1358 * and call the device open routine if any.
1359 */
1360 int
1361 sys_fhopen(struct lwp *l, void *v, register_t *retval)
1362 {
1363 struct sys_fhopen_args /* {
1364 syscallarg(const fhandle_t *) fhp;
1365 syscallarg(int) flags;
1366 } */ *uap = v;
1367 struct proc *p = l->l_proc;
1368 struct filedesc *fdp = p->p_fd;
1369 struct file *fp;
1370 struct vnode *vp = NULL;
1371 struct mount *mp;
1372 kauth_cred_t cred = p->p_cred;
1373 int flags;
1374 struct file *nfp;
1375 int type, indx, error=0;
1376 struct flock lf;
1377 struct vattr va;
1378 fhandle_t *fh;
1379
1380 /*
1381 * Must be super user
1382 */
1383 if ((error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
1384 &p->p_acflag)))
1385 return (error);
1386
1387 flags = FFLAGS(SCARG(uap, flags));
1388 if ((flags & (FREAD | FWRITE)) == 0)
1389 return (EINVAL);
1390 if ((flags & O_CREAT))
1391 return (EINVAL);
1392 /* falloc() will use the file descriptor for us */
1393 if ((error = falloc(p, &nfp, &indx)) != 0)
1394 return (error);
1395 fp = nfp;
1396 error = vfs_copyinfh(SCARG(uap, fhp), &fh);
1397 if (error != 0) {
1398 goto bad;
1399 }
1400 error = vfs_fhtovp(fh, &vp);
1401 if (error != 0) {
1402 goto bad;
1403 }
1404
1405 /* Now do an effective vn_open */
1406
1407 if (vp->v_type == VSOCK) {
1408 error = EOPNOTSUPP;
1409 goto bad;
1410 }
1411 if (flags & FREAD) {
1412 if ((error = VOP_ACCESS(vp, VREAD, cred, l)) != 0)
1413 goto bad;
1414 }
1415 if (flags & (FWRITE | O_TRUNC)) {
1416 if (vp->v_type == VDIR) {
1417 error = EISDIR;
1418 goto bad;
1419 }
1420 if ((error = vn_writechk(vp)) != 0 ||
1421 (error = VOP_ACCESS(vp, VWRITE, cred, l)) != 0)
1422 goto bad;
1423 }
1424 if (flags & O_TRUNC) {
1425 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
1426 goto bad;
1427 VOP_UNLOCK(vp, 0); /* XXX */
1428 VOP_LEASE(vp, l, cred, LEASE_WRITE);
1429 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1430 VATTR_NULL(&va);
1431 va.va_size = 0;
1432 error = VOP_SETATTR(vp, &va, cred, l);
1433 vn_finished_write(mp, 0);
1434 if (error)
1435 goto bad;
1436 }
1437 if ((error = VOP_OPEN(vp, flags, cred, l)) != 0)
1438 goto bad;
1439 if (vp->v_type == VREG &&
1440 uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
1441 error = EIO;
1442 goto bad;
1443 }
1444 if (flags & FWRITE)
1445 vp->v_writecount++;
1446
1447 /* done with modified vn_open, now finish what sys_open does. */
1448
1449 fp->f_flag = flags & FMASK;
1450 fp->f_type = DTYPE_VNODE;
1451 fp->f_ops = &vnops;
1452 fp->f_data = vp;
1453 if (flags & (O_EXLOCK | O_SHLOCK)) {
1454 lf.l_whence = SEEK_SET;
1455 lf.l_start = 0;
1456 lf.l_len = 0;
1457 if (flags & O_EXLOCK)
1458 lf.l_type = F_WRLCK;
1459 else
1460 lf.l_type = F_RDLCK;
1461 type = F_FLOCK;
1462 if ((flags & FNONBLOCK) == 0)
1463 type |= F_WAIT;
1464 VOP_UNLOCK(vp, 0);
1465 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1466 if (error) {
1467 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1468 FILE_UNUSE(fp, l);
1469 ffree(fp);
1470 fdremove(fdp, indx);
1471 return (error);
1472 }
1473 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1474 fp->f_flag |= FHASLOCK;
1475 }
1476 VOP_UNLOCK(vp, 0);
1477 *retval = indx;
1478 FILE_SET_MATURE(fp);
1479 FILE_UNUSE(fp, l);
1480 vfs_copyinfh_free(fh);
1481 return (0);
1482
1483 bad:
1484 FILE_UNUSE(fp, l);
1485 ffree(fp);
1486 fdremove(fdp, indx);
1487 if (vp != NULL)
1488 vput(vp);
1489 vfs_copyinfh_free(fh);
1490 return (error);
1491 }
1492
1493 /* ARGSUSED */
1494 int
1495 sys___fhstat30(struct lwp *l, void *v, register_t *retval)
1496 {
1497 struct sys___fhstat30_args /* {
1498 syscallarg(const fhandle_t *) fhp;
1499 syscallarg(struct stat *) sb;
1500 } */ *uap = v;
1501 struct proc *p = l->l_proc;
1502 struct stat sb;
1503 int error;
1504 fhandle_t *fh;
1505 struct vnode *vp;
1506
1507 /*
1508 * Must be super user
1509 */
1510 if ((error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
1511 &p->p_acflag)))
1512 return (error);
1513
1514 error = vfs_copyinfh(SCARG(uap, fhp), &fh);
1515 if (error != 0) {
1516 goto bad;
1517 }
1518 error = vfs_fhtovp(fh, &vp);
1519 if (error != 0) {
1520 goto bad;
1521 }
1522 error = vn_stat(vp, &sb, l);
1523 vput(vp);
1524 if (error) {
1525 goto bad;
1526 }
1527 error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1528 bad:
1529 vfs_copyinfh_free(fh);
1530 return error;
1531 }
1532
1533 /* ARGSUSED */
1534 int
1535 sys_fhstatvfs1(struct lwp *l, void *v, register_t *retval)
1536 {
1537 struct sys_fhstatvfs1_args /* {
1538 syscallarg(const fhandle_t *) fhp;
1539 syscallarg(struct statvfs *) buf;
1540 syscallarg(int) flags;
1541 } */ *uap = v;
1542 struct proc *p = l->l_proc;
1543 struct statvfs *sb = NULL;
1544 fhandle_t *fh;
1545 struct mount *mp;
1546 struct vnode *vp;
1547 int error;
1548
1549 /*
1550 * Must be super user
1551 */
1552 if ((error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
1553 &p->p_acflag)) != 0)
1554 return error;
1555
1556 error = vfs_copyinfh(SCARG(uap, fhp), &fh);
1557 if (error != 0) {
1558 goto out;
1559 }
1560 error = vfs_fhtovp(fh, &vp);
1561 if (error != 0) {
1562 goto out;
1563 }
1564 mp = vp->v_mount;
1565 sb = STATVFSBUF_GET();
1566 if ((error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1)) != 0) {
1567 vput(vp);
1568 goto out;
1569 }
1570 vput(vp);
1571 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1572 out:
1573 if (sb != NULL) {
1574 STATVFSBUF_PUT(sb);
1575 }
1576 vfs_copyinfh_free(fh);
1577 return error;
1578 }
1579
1580 /*
1581 * Create a special file.
1582 */
1583 /* ARGSUSED */
1584 int
1585 sys_mknod(struct lwp *l, void *v, register_t *retval)
1586 {
1587 struct sys_mknod_args /* {
1588 syscallarg(const char *) path;
1589 syscallarg(int) mode;
1590 syscallarg(int) dev;
1591 } */ *uap = v;
1592 struct proc *p = l->l_proc;
1593 struct vnode *vp;
1594 struct mount *mp;
1595 struct vattr vattr;
1596 int error;
1597 int whiteout = 0;
1598 struct nameidata nd;
1599
1600 if ((error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
1601 &p->p_acflag)) != 0)
1602 return (error);
1603 restart:
1604 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1605 if ((error = namei(&nd)) != 0)
1606 return (error);
1607 vp = nd.ni_vp;
1608 if (vp != NULL)
1609 error = EEXIST;
1610 else {
1611 VATTR_NULL(&vattr);
1612 vattr.va_mode =
1613 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1614 vattr.va_rdev = SCARG(uap, dev);
1615 whiteout = 0;
1616
1617 switch (SCARG(uap, mode) & S_IFMT) {
1618 case S_IFMT: /* used by badsect to flag bad sectors */
1619 vattr.va_type = VBAD;
1620 break;
1621 case S_IFCHR:
1622 vattr.va_type = VCHR;
1623 break;
1624 case S_IFBLK:
1625 vattr.va_type = VBLK;
1626 break;
1627 case S_IFWHT:
1628 whiteout = 1;
1629 break;
1630 default:
1631 error = EINVAL;
1632 break;
1633 }
1634 }
1635 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1636 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1637 if (nd.ni_dvp == vp)
1638 vrele(nd.ni_dvp);
1639 else
1640 vput(nd.ni_dvp);
1641 if (vp)
1642 vrele(vp);
1643 if ((error = vn_start_write(NULL, &mp,
1644 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1645 return (error);
1646 goto restart;
1647 }
1648 if (!error) {
1649 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
1650 if (whiteout) {
1651 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1652 if (error)
1653 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1654 vput(nd.ni_dvp);
1655 } else {
1656 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1657 &nd.ni_cnd, &vattr);
1658 if (error == 0)
1659 vput(nd.ni_vp);
1660 }
1661 } else {
1662 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1663 if (nd.ni_dvp == vp)
1664 vrele(nd.ni_dvp);
1665 else
1666 vput(nd.ni_dvp);
1667 if (vp)
1668 vrele(vp);
1669 }
1670 vn_finished_write(mp, 0);
1671 return (error);
1672 }
1673
1674 /*
1675 * Create a named pipe.
1676 */
1677 /* ARGSUSED */
1678 int
1679 sys_mkfifo(struct lwp *l, void *v, register_t *retval)
1680 {
1681 struct sys_mkfifo_args /* {
1682 syscallarg(const char *) path;
1683 syscallarg(int) mode;
1684 } */ *uap = v;
1685 struct proc *p = l->l_proc;
1686 struct mount *mp;
1687 struct vattr vattr;
1688 int error;
1689 struct nameidata nd;
1690
1691 restart:
1692 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1693 if ((error = namei(&nd)) != 0)
1694 return (error);
1695 if (nd.ni_vp != NULL) {
1696 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1697 if (nd.ni_dvp == nd.ni_vp)
1698 vrele(nd.ni_dvp);
1699 else
1700 vput(nd.ni_dvp);
1701 vrele(nd.ni_vp);
1702 return (EEXIST);
1703 }
1704 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1705 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1706 if (nd.ni_dvp == nd.ni_vp)
1707 vrele(nd.ni_dvp);
1708 else
1709 vput(nd.ni_dvp);
1710 if (nd.ni_vp)
1711 vrele(nd.ni_vp);
1712 if ((error = vn_start_write(NULL, &mp,
1713 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1714 return (error);
1715 goto restart;
1716 }
1717 VATTR_NULL(&vattr);
1718 vattr.va_type = VFIFO;
1719 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1720 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
1721 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1722 if (error == 0)
1723 vput(nd.ni_vp);
1724 vn_finished_write(mp, 0);
1725 return (error);
1726 }
1727
1728 /*
1729 * Make a hard file link.
1730 */
1731 /* ARGSUSED */
1732 int
1733 sys_link(struct lwp *l, void *v, register_t *retval)
1734 {
1735 struct sys_link_args /* {
1736 syscallarg(const char *) path;
1737 syscallarg(const char *) link;
1738 } */ *uap = v;
1739 struct proc *p = l->l_proc;
1740 struct vnode *vp;
1741 struct mount *mp;
1742 struct nameidata nd;
1743 int error;
1744
1745 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1746 if ((error = namei(&nd)) != 0)
1747 return (error);
1748 vp = nd.ni_vp;
1749 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
1750 vrele(vp);
1751 return (error);
1752 }
1753 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1754 if ((error = namei(&nd)) != 0)
1755 goto out;
1756 if (nd.ni_vp) {
1757 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1758 if (nd.ni_dvp == nd.ni_vp)
1759 vrele(nd.ni_dvp);
1760 else
1761 vput(nd.ni_dvp);
1762 vrele(nd.ni_vp);
1763 error = EEXIST;
1764 goto out;
1765 }
1766 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
1767 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
1768 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1769 out:
1770 vrele(vp);
1771 vn_finished_write(mp, 0);
1772 return (error);
1773 }
1774
1775 /*
1776 * Make a symbolic link.
1777 */
1778 /* ARGSUSED */
1779 int
1780 sys_symlink(struct lwp *l, void *v, register_t *retval)
1781 {
1782 struct sys_symlink_args /* {
1783 syscallarg(const char *) path;
1784 syscallarg(const char *) link;
1785 } */ *uap = v;
1786 struct proc *p = l->l_proc;
1787 struct mount *mp;
1788 struct vattr vattr;
1789 char *path;
1790 int error;
1791 struct nameidata nd;
1792
1793 path = PNBUF_GET();
1794 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
1795 if (error)
1796 goto out;
1797 restart:
1798 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1799 if ((error = namei(&nd)) != 0)
1800 goto out;
1801 if (nd.ni_vp) {
1802 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1803 if (nd.ni_dvp == nd.ni_vp)
1804 vrele(nd.ni_dvp);
1805 else
1806 vput(nd.ni_dvp);
1807 vrele(nd.ni_vp);
1808 error = EEXIST;
1809 goto out;
1810 }
1811 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1812 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1813 if (nd.ni_dvp == nd.ni_vp)
1814 vrele(nd.ni_dvp);
1815 else
1816 vput(nd.ni_dvp);
1817 if ((error = vn_start_write(NULL, &mp,
1818 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1819 return (error);
1820 goto restart;
1821 }
1822 VATTR_NULL(&vattr);
1823 vattr.va_type = VLNK;
1824 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
1825 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
1826 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1827 if (error == 0)
1828 vput(nd.ni_vp);
1829 vn_finished_write(mp, 0);
1830 out:
1831 PNBUF_PUT(path);
1832 return (error);
1833 }
1834
1835 /*
1836 * Delete a whiteout from the filesystem.
1837 */
1838 /* ARGSUSED */
1839 int
1840 sys_undelete(struct lwp *l, void *v, register_t *retval)
1841 {
1842 struct sys_undelete_args /* {
1843 syscallarg(const char *) path;
1844 } */ *uap = v;
1845 struct proc *p = l->l_proc;
1846 int error;
1847 struct mount *mp;
1848 struct nameidata nd;
1849
1850 restart:
1851 NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1852 SCARG(uap, path), l);
1853 error = namei(&nd);
1854 if (error)
1855 return (error);
1856
1857 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1858 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1859 if (nd.ni_dvp == nd.ni_vp)
1860 vrele(nd.ni_dvp);
1861 else
1862 vput(nd.ni_dvp);
1863 if (nd.ni_vp)
1864 vrele(nd.ni_vp);
1865 return (EEXIST);
1866 }
1867 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1868 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1869 if (nd.ni_dvp == nd.ni_vp)
1870 vrele(nd.ni_dvp);
1871 else
1872 vput(nd.ni_dvp);
1873 if ((error = vn_start_write(NULL, &mp,
1874 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1875 return (error);
1876 goto restart;
1877 }
1878 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
1879 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
1880 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1881 vput(nd.ni_dvp);
1882 vn_finished_write(mp, 0);
1883 return (error);
1884 }
1885
1886 /*
1887 * Delete a name from the filesystem.
1888 */
1889 /* ARGSUSED */
1890 int
1891 sys_unlink(struct lwp *l, void *v, register_t *retval)
1892 {
1893 struct sys_unlink_args /* {
1894 syscallarg(const char *) path;
1895 } */ *uap = v;
1896 struct proc *p = l->l_proc;
1897 struct mount *mp;
1898 struct vnode *vp;
1899 int error;
1900 struct nameidata nd;
1901
1902 restart:
1903 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
1904 SCARG(uap, path), l);
1905 if ((error = namei(&nd)) != 0)
1906 return (error);
1907 vp = nd.ni_vp;
1908
1909 /*
1910 * The root of a mounted filesystem cannot be deleted.
1911 */
1912 if (vp->v_flag & VROOT) {
1913 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1914 if (nd.ni_dvp == vp)
1915 vrele(nd.ni_dvp);
1916 else
1917 vput(nd.ni_dvp);
1918 vput(vp);
1919 error = EBUSY;
1920 goto out;
1921 }
1922
1923 #ifdef VERIFIED_EXEC
1924 /* Handle remove requests for veriexec entries. */
1925 if ((error = veriexec_removechk(l, vp, nd.ni_dirp)) != 0) {
1926 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1927 if (nd.ni_dvp == vp)
1928 vrele(nd.ni_dvp);
1929 else
1930 vput(nd.ni_dvp);
1931 vput(vp);
1932 goto out;
1933 }
1934 #endif /* VERIFIED_EXEC */
1935
1936 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1937 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1938 if (nd.ni_dvp == vp)
1939 vrele(nd.ni_dvp);
1940 else
1941 vput(nd.ni_dvp);
1942 vput(vp);
1943 if ((error = vn_start_write(NULL, &mp,
1944 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1945 return (error);
1946 goto restart;
1947 }
1948 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
1949 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
1950 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
1951 vn_finished_write(mp, 0);
1952 out:
1953 return (error);
1954 }
1955
1956 /*
1957 * Reposition read/write file offset.
1958 */
1959 int
1960 sys_lseek(struct lwp *l, void *v, register_t *retval)
1961 {
1962 struct sys_lseek_args /* {
1963 syscallarg(int) fd;
1964 syscallarg(int) pad;
1965 syscallarg(off_t) offset;
1966 syscallarg(int) whence;
1967 } */ *uap = v;
1968 struct proc *p = l->l_proc;
1969 kauth_cred_t cred = p->p_cred;
1970 struct filedesc *fdp = p->p_fd;
1971 struct file *fp;
1972 struct vnode *vp;
1973 struct vattr vattr;
1974 off_t newoff;
1975 int error;
1976
1977 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
1978 return (EBADF);
1979
1980 FILE_USE(fp);
1981
1982 vp = (struct vnode *)fp->f_data;
1983 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
1984 error = ESPIPE;
1985 goto out;
1986 }
1987
1988 switch (SCARG(uap, whence)) {
1989 case SEEK_CUR:
1990 newoff = fp->f_offset + SCARG(uap, offset);
1991 break;
1992 case SEEK_END:
1993 error = VOP_GETATTR(vp, &vattr, cred, l);
1994 if (error)
1995 goto out;
1996 newoff = SCARG(uap, offset) + vattr.va_size;
1997 break;
1998 case SEEK_SET:
1999 newoff = SCARG(uap, offset);
2000 break;
2001 default:
2002 error = EINVAL;
2003 goto out;
2004 }
2005 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0)
2006 goto out;
2007
2008 *(off_t *)retval = fp->f_offset = newoff;
2009 out:
2010 FILE_UNUSE(fp, l);
2011 return (error);
2012 }
2013
2014 /*
2015 * Positional read system call.
2016 */
2017 int
2018 sys_pread(struct lwp *l, void *v, register_t *retval)
2019 {
2020 struct sys_pread_args /* {
2021 syscallarg(int) fd;
2022 syscallarg(void *) buf;
2023 syscallarg(size_t) nbyte;
2024 syscallarg(off_t) offset;
2025 } */ *uap = v;
2026 struct proc *p = l->l_proc;
2027 struct filedesc *fdp = p->p_fd;
2028 struct file *fp;
2029 struct vnode *vp;
2030 off_t offset;
2031 int error, fd = SCARG(uap, fd);
2032
2033 if ((fp = fd_getfile(fdp, fd)) == NULL)
2034 return (EBADF);
2035
2036 if ((fp->f_flag & FREAD) == 0) {
2037 simple_unlock(&fp->f_slock);
2038 return (EBADF);
2039 }
2040
2041 FILE_USE(fp);
2042
2043 vp = (struct vnode *)fp->f_data;
2044 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2045 error = ESPIPE;
2046 goto out;
2047 }
2048
2049 offset = SCARG(uap, offset);
2050
2051 /*
2052 * XXX This works because no file systems actually
2053 * XXX take any action on the seek operation.
2054 */
2055 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2056 goto out;
2057
2058 /* dofileread() will unuse the descriptor for us */
2059 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2060 &offset, 0, retval));
2061
2062 out:
2063 FILE_UNUSE(fp, l);
2064 return (error);
2065 }
2066
2067 /*
2068 * Positional scatter read system call.
2069 */
2070 int
2071 sys_preadv(struct lwp *l, void *v, register_t *retval)
2072 {
2073 struct sys_preadv_args /* {
2074 syscallarg(int) fd;
2075 syscallarg(const struct iovec *) iovp;
2076 syscallarg(int) iovcnt;
2077 syscallarg(off_t) offset;
2078 } */ *uap = v;
2079 struct proc *p = l->l_proc;
2080 struct filedesc *fdp = p->p_fd;
2081 struct file *fp;
2082 struct vnode *vp;
2083 off_t offset;
2084 int error, fd = SCARG(uap, fd);
2085
2086 if ((fp = fd_getfile(fdp, fd)) == NULL)
2087 return (EBADF);
2088
2089 if ((fp->f_flag & FREAD) == 0) {
2090 simple_unlock(&fp->f_slock);
2091 return (EBADF);
2092 }
2093
2094 FILE_USE(fp);
2095
2096 vp = (struct vnode *)fp->f_data;
2097 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2098 error = ESPIPE;
2099 goto out;
2100 }
2101
2102 offset = SCARG(uap, offset);
2103
2104 /*
2105 * XXX This works because no file systems actually
2106 * XXX take any action on the seek operation.
2107 */
2108 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2109 goto out;
2110
2111 /* dofilereadv() will unuse the descriptor for us */
2112 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2113 &offset, 0, retval));
2114
2115 out:
2116 FILE_UNUSE(fp, l);
2117 return (error);
2118 }
2119
2120 /*
2121 * Positional write system call.
2122 */
2123 int
2124 sys_pwrite(struct lwp *l, void *v, register_t *retval)
2125 {
2126 struct sys_pwrite_args /* {
2127 syscallarg(int) fd;
2128 syscallarg(const void *) buf;
2129 syscallarg(size_t) nbyte;
2130 syscallarg(off_t) offset;
2131 } */ *uap = v;
2132 struct proc *p = l->l_proc;
2133 struct filedesc *fdp = p->p_fd;
2134 struct file *fp;
2135 struct vnode *vp;
2136 off_t offset;
2137 int error, fd = SCARG(uap, fd);
2138
2139 if ((fp = fd_getfile(fdp, fd)) == NULL)
2140 return (EBADF);
2141
2142 if ((fp->f_flag & FWRITE) == 0) {
2143 simple_unlock(&fp->f_slock);
2144 return (EBADF);
2145 }
2146
2147 FILE_USE(fp);
2148
2149 vp = (struct vnode *)fp->f_data;
2150 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2151 error = ESPIPE;
2152 goto out;
2153 }
2154
2155 offset = SCARG(uap, offset);
2156
2157 /*
2158 * XXX This works because no file systems actually
2159 * XXX take any action on the seek operation.
2160 */
2161 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2162 goto out;
2163
2164 /* dofilewrite() will unuse the descriptor for us */
2165 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2166 &offset, 0, retval));
2167
2168 out:
2169 FILE_UNUSE(fp, l);
2170 return (error);
2171 }
2172
2173 /*
2174 * Positional gather write system call.
2175 */
2176 int
2177 sys_pwritev(struct lwp *l, void *v, register_t *retval)
2178 {
2179 struct sys_pwritev_args /* {
2180 syscallarg(int) fd;
2181 syscallarg(const struct iovec *) iovp;
2182 syscallarg(int) iovcnt;
2183 syscallarg(off_t) offset;
2184 } */ *uap = v;
2185 struct proc *p = l->l_proc;
2186 struct filedesc *fdp = p->p_fd;
2187 struct file *fp;
2188 struct vnode *vp;
2189 off_t offset;
2190 int error, fd = SCARG(uap, fd);
2191
2192 if ((fp = fd_getfile(fdp, fd)) == NULL)
2193 return (EBADF);
2194
2195 if ((fp->f_flag & FWRITE) == 0) {
2196 simple_unlock(&fp->f_slock);
2197 return (EBADF);
2198 }
2199
2200 FILE_USE(fp);
2201
2202 vp = (struct vnode *)fp->f_data;
2203 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2204 error = ESPIPE;
2205 goto out;
2206 }
2207
2208 offset = SCARG(uap, offset);
2209
2210 /*
2211 * XXX This works because no file systems actually
2212 * XXX take any action on the seek operation.
2213 */
2214 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2215 goto out;
2216
2217 /* dofilewritev() will unuse the descriptor for us */
2218 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2219 &offset, 0, retval));
2220
2221 out:
2222 FILE_UNUSE(fp, l);
2223 return (error);
2224 }
2225
2226 /*
2227 * Check access permissions.
2228 */
2229 int
2230 sys_access(struct lwp *l, void *v, register_t *retval)
2231 {
2232 struct sys_access_args /* {
2233 syscallarg(const char *) path;
2234 syscallarg(int) flags;
2235 } */ *uap = v;
2236 struct proc *p = l->l_proc;
2237 kauth_cred_t cred;
2238 struct vnode *vp;
2239 int error, flags;
2240 struct nameidata nd;
2241
2242 cred = kauth_cred_dup(p->p_cred);
2243 kauth_cred_seteuid(cred, kauth_cred_getuid(p->p_cred));
2244 kauth_cred_setegid(cred, kauth_cred_getgid(p->p_cred));
2245 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2246 SCARG(uap, path), l);
2247 /* Override default credentials */
2248 nd.ni_cnd.cn_cred = cred;
2249 if ((error = namei(&nd)) != 0)
2250 goto out;
2251 vp = nd.ni_vp;
2252
2253 /* Flags == 0 means only check for existence. */
2254 if (SCARG(uap, flags)) {
2255 flags = 0;
2256 if (SCARG(uap, flags) & R_OK)
2257 flags |= VREAD;
2258 if (SCARG(uap, flags) & W_OK)
2259 flags |= VWRITE;
2260 if (SCARG(uap, flags) & X_OK)
2261 flags |= VEXEC;
2262
2263 error = VOP_ACCESS(vp, flags, cred, l);
2264 if (!error && (flags & VWRITE))
2265 error = vn_writechk(vp);
2266 }
2267 vput(vp);
2268 out:
2269 kauth_cred_free(cred);
2270 return (error);
2271 }
2272
2273 /*
2274 * Get file status; this version follows links.
2275 */
2276 /* ARGSUSED */
2277 int
2278 sys___stat30(struct lwp *l, void *v, register_t *retval)
2279 {
2280 struct sys___stat30_args /* {
2281 syscallarg(const char *) path;
2282 syscallarg(struct stat *) ub;
2283 } */ *uap = v;
2284 struct stat sb;
2285 int error;
2286 struct nameidata nd;
2287
2288 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2289 SCARG(uap, path), l);
2290 if ((error = namei(&nd)) != 0)
2291 return (error);
2292 error = vn_stat(nd.ni_vp, &sb, l);
2293 vput(nd.ni_vp);
2294 if (error)
2295 return (error);
2296 error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2297 return (error);
2298 }
2299
2300 /*
2301 * Get file status; this version does not follow links.
2302 */
2303 /* ARGSUSED */
2304 int
2305 sys___lstat30(struct lwp *l, void *v, register_t *retval)
2306 {
2307 struct sys___lstat30_args /* {
2308 syscallarg(const char *) path;
2309 syscallarg(struct stat *) ub;
2310 } */ *uap = v;
2311 struct stat sb;
2312 int error;
2313 struct nameidata nd;
2314
2315 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2316 SCARG(uap, path), l);
2317 if ((error = namei(&nd)) != 0)
2318 return (error);
2319 error = vn_stat(nd.ni_vp, &sb, l);
2320 vput(nd.ni_vp);
2321 if (error)
2322 return (error);
2323 error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2324 return (error);
2325 }
2326
2327 /*
2328 * Get configurable pathname variables.
2329 */
2330 /* ARGSUSED */
2331 int
2332 sys_pathconf(struct lwp *l, void *v, register_t *retval)
2333 {
2334 struct sys_pathconf_args /* {
2335 syscallarg(const char *) path;
2336 syscallarg(int) name;
2337 } */ *uap = v;
2338 int error;
2339 struct nameidata nd;
2340
2341 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2342 SCARG(uap, path), l);
2343 if ((error = namei(&nd)) != 0)
2344 return (error);
2345 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2346 vput(nd.ni_vp);
2347 return (error);
2348 }
2349
2350 /*
2351 * Return target name of a symbolic link.
2352 */
2353 /* ARGSUSED */
2354 int
2355 sys_readlink(struct lwp *l, void *v, register_t *retval)
2356 {
2357 struct sys_readlink_args /* {
2358 syscallarg(const char *) path;
2359 syscallarg(char *) buf;
2360 syscallarg(size_t) count;
2361 } */ *uap = v;
2362 struct proc *p = l->l_proc;
2363 struct vnode *vp;
2364 struct iovec aiov;
2365 struct uio auio;
2366 int error;
2367 struct nameidata nd;
2368
2369 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2370 SCARG(uap, path), l);
2371 if ((error = namei(&nd)) != 0)
2372 return (error);
2373 vp = nd.ni_vp;
2374 if (vp->v_type != VLNK)
2375 error = EINVAL;
2376 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2377 (error = VOP_ACCESS(vp, VREAD, p->p_cred, l)) == 0) {
2378 aiov.iov_base = SCARG(uap, buf);
2379 aiov.iov_len = SCARG(uap, count);
2380 auio.uio_iov = &aiov;
2381 auio.uio_iovcnt = 1;
2382 auio.uio_offset = 0;
2383 auio.uio_rw = UIO_READ;
2384 KASSERT(l == curlwp);
2385 auio.uio_vmspace = l->l_proc->p_vmspace;
2386 auio.uio_resid = SCARG(uap, count);
2387 error = VOP_READLINK(vp, &auio, p->p_cred);
2388 }
2389 vput(vp);
2390 *retval = SCARG(uap, count) - auio.uio_resid;
2391 return (error);
2392 }
2393
2394 /*
2395 * Change flags of a file given a path name.
2396 */
2397 /* ARGSUSED */
2398 int
2399 sys_chflags(struct lwp *l, void *v, register_t *retval)
2400 {
2401 struct sys_chflags_args /* {
2402 syscallarg(const char *) path;
2403 syscallarg(u_long) flags;
2404 } */ *uap = v;
2405 struct vnode *vp;
2406 int error;
2407 struct nameidata nd;
2408
2409 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2410 if ((error = namei(&nd)) != 0)
2411 return (error);
2412 vp = nd.ni_vp;
2413 error = change_flags(vp, SCARG(uap, flags), l);
2414 vput(vp);
2415 return (error);
2416 }
2417
2418 /*
2419 * Change flags of a file given a file descriptor.
2420 */
2421 /* ARGSUSED */
2422 int
2423 sys_fchflags(struct lwp *l, void *v, register_t *retval)
2424 {
2425 struct sys_fchflags_args /* {
2426 syscallarg(int) fd;
2427 syscallarg(u_long) flags;
2428 } */ *uap = v;
2429 struct proc *p = l->l_proc;
2430 struct vnode *vp;
2431 struct file *fp;
2432 int error;
2433
2434 /* getvnode() will use the descriptor for us */
2435 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2436 return (error);
2437 vp = (struct vnode *)fp->f_data;
2438 error = change_flags(vp, SCARG(uap, flags), l);
2439 VOP_UNLOCK(vp, 0);
2440 FILE_UNUSE(fp, l);
2441 return (error);
2442 }
2443
2444 /*
2445 * Change flags of a file given a path name; this version does
2446 * not follow links.
2447 */
2448 int
2449 sys_lchflags(struct lwp *l, void *v, register_t *retval)
2450 {
2451 struct sys_lchflags_args /* {
2452 syscallarg(const char *) path;
2453 syscallarg(u_long) flags;
2454 } */ *uap = v;
2455 struct vnode *vp;
2456 int error;
2457 struct nameidata nd;
2458
2459 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2460 if ((error = namei(&nd)) != 0)
2461 return (error);
2462 vp = nd.ni_vp;
2463 error = change_flags(vp, SCARG(uap, flags), l);
2464 vput(vp);
2465 return (error);
2466 }
2467
2468 /*
2469 * Common routine to change flags of a file.
2470 */
2471 int
2472 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2473 {
2474 struct proc *p = l->l_proc;
2475 struct mount *mp;
2476 struct vattr vattr;
2477 int error;
2478
2479 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2480 return (error);
2481 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
2482 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2483 /*
2484 * Non-superusers cannot change the flags on devices, even if they
2485 * own them.
2486 */
2487 if (kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
2488 &p->p_acflag) != 0) {
2489 if ((error = VOP_GETATTR(vp, &vattr, p->p_cred, l)) != 0)
2490 goto out;
2491 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2492 error = EINVAL;
2493 goto out;
2494 }
2495 }
2496 VATTR_NULL(&vattr);
2497 vattr.va_flags = flags;
2498 error = VOP_SETATTR(vp, &vattr, p->p_cred, l);
2499 out:
2500 vn_finished_write(mp, 0);
2501 return (error);
2502 }
2503
2504 /*
2505 * Change mode of a file given path name; this version follows links.
2506 */
2507 /* ARGSUSED */
2508 int
2509 sys_chmod(struct lwp *l, void *v, register_t *retval)
2510 {
2511 struct sys_chmod_args /* {
2512 syscallarg(const char *) path;
2513 syscallarg(int) mode;
2514 } */ *uap = v;
2515 int error;
2516 struct nameidata nd;
2517
2518 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2519 if ((error = namei(&nd)) != 0)
2520 return (error);
2521
2522 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2523
2524 vrele(nd.ni_vp);
2525 return (error);
2526 }
2527
2528 /*
2529 * Change mode of a file given a file descriptor.
2530 */
2531 /* ARGSUSED */
2532 int
2533 sys_fchmod(struct lwp *l, void *v, register_t *retval)
2534 {
2535 struct sys_fchmod_args /* {
2536 syscallarg(int) fd;
2537 syscallarg(int) mode;
2538 } */ *uap = v;
2539 struct proc *p = l->l_proc;
2540 struct file *fp;
2541 int error;
2542
2543 /* getvnode() will use the descriptor for us */
2544 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2545 return (error);
2546
2547 error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), l);
2548 FILE_UNUSE(fp, l);
2549 return (error);
2550 }
2551
2552 /*
2553 * Change mode of a file given path name; this version does not follow links.
2554 */
2555 /* ARGSUSED */
2556 int
2557 sys_lchmod(struct lwp *l, void *v, register_t *retval)
2558 {
2559 struct sys_lchmod_args /* {
2560 syscallarg(const char *) path;
2561 syscallarg(int) mode;
2562 } */ *uap = v;
2563 int error;
2564 struct nameidata nd;
2565
2566 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2567 if ((error = namei(&nd)) != 0)
2568 return (error);
2569
2570 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2571
2572 vrele(nd.ni_vp);
2573 return (error);
2574 }
2575
2576 /*
2577 * Common routine to set mode given a vnode.
2578 */
2579 static int
2580 change_mode(struct vnode *vp, int mode, struct lwp *l)
2581 {
2582 struct proc *p = l->l_proc;
2583 struct mount *mp;
2584 struct vattr vattr;
2585 int error;
2586
2587 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2588 return (error);
2589 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
2590 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2591 VATTR_NULL(&vattr);
2592 vattr.va_mode = mode & ALLPERMS;
2593 error = VOP_SETATTR(vp, &vattr, p->p_cred, l);
2594 VOP_UNLOCK(vp, 0);
2595 vn_finished_write(mp, 0);
2596 return (error);
2597 }
2598
2599 /*
2600 * Set ownership given a path name; this version follows links.
2601 */
2602 /* ARGSUSED */
2603 int
2604 sys_chown(struct lwp *l, void *v, register_t *retval)
2605 {
2606 struct sys_chown_args /* {
2607 syscallarg(const char *) path;
2608 syscallarg(uid_t) uid;
2609 syscallarg(gid_t) gid;
2610 } */ *uap = v;
2611 int error;
2612 struct nameidata nd;
2613
2614 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2615 if ((error = namei(&nd)) != 0)
2616 return (error);
2617
2618 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2619
2620 vrele(nd.ni_vp);
2621 return (error);
2622 }
2623
2624 /*
2625 * Set ownership given a path name; this version follows links.
2626 * Provides POSIX semantics.
2627 */
2628 /* ARGSUSED */
2629 int
2630 sys___posix_chown(struct lwp *l, void *v, register_t *retval)
2631 {
2632 struct sys_chown_args /* {
2633 syscallarg(const char *) path;
2634 syscallarg(uid_t) uid;
2635 syscallarg(gid_t) gid;
2636 } */ *uap = v;
2637 int error;
2638 struct nameidata nd;
2639
2640 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2641 if ((error = namei(&nd)) != 0)
2642 return (error);
2643
2644 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2645
2646 vrele(nd.ni_vp);
2647 return (error);
2648 }
2649
2650 /*
2651 * Set ownership given a file descriptor.
2652 */
2653 /* ARGSUSED */
2654 int
2655 sys_fchown(struct lwp *l, void *v, register_t *retval)
2656 {
2657 struct sys_fchown_args /* {
2658 syscallarg(int) fd;
2659 syscallarg(uid_t) uid;
2660 syscallarg(gid_t) gid;
2661 } */ *uap = v;
2662 struct proc *p = l->l_proc;
2663 int error;
2664 struct file *fp;
2665
2666 /* getvnode() will use the descriptor for us */
2667 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2668 return (error);
2669
2670 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2671 SCARG(uap, gid), l, 0);
2672 FILE_UNUSE(fp, l);
2673 return (error);
2674 }
2675
2676 /*
2677 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2678 */
2679 /* ARGSUSED */
2680 int
2681 sys___posix_fchown(struct lwp *l, void *v, register_t *retval)
2682 {
2683 struct sys_fchown_args /* {
2684 syscallarg(int) fd;
2685 syscallarg(uid_t) uid;
2686 syscallarg(gid_t) gid;
2687 } */ *uap = v;
2688 struct proc *p = l->l_proc;
2689 int error;
2690 struct file *fp;
2691
2692 /* getvnode() will use the descriptor for us */
2693 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2694 return (error);
2695
2696 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2697 SCARG(uap, gid), l, 1);
2698 FILE_UNUSE(fp, l);
2699 return (error);
2700 }
2701
2702 /*
2703 * Set ownership given a path name; this version does not follow links.
2704 */
2705 /* ARGSUSED */
2706 int
2707 sys_lchown(struct lwp *l, void *v, register_t *retval)
2708 {
2709 struct sys_lchown_args /* {
2710 syscallarg(const char *) path;
2711 syscallarg(uid_t) uid;
2712 syscallarg(gid_t) gid;
2713 } */ *uap = v;
2714 int error;
2715 struct nameidata nd;
2716
2717 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2718 if ((error = namei(&nd)) != 0)
2719 return (error);
2720
2721 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2722
2723 vrele(nd.ni_vp);
2724 return (error);
2725 }
2726
2727 /*
2728 * Set ownership given a path name; this version does not follow links.
2729 * Provides POSIX/XPG semantics.
2730 */
2731 /* ARGSUSED */
2732 int
2733 sys___posix_lchown(struct lwp *l, void *v, register_t *retval)
2734 {
2735 struct sys_lchown_args /* {
2736 syscallarg(const char *) path;
2737 syscallarg(uid_t) uid;
2738 syscallarg(gid_t) gid;
2739 } */ *uap = v;
2740 int error;
2741 struct nameidata nd;
2742
2743 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2744 if ((error = namei(&nd)) != 0)
2745 return (error);
2746
2747 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2748
2749 vrele(nd.ni_vp);
2750 return (error);
2751 }
2752
2753 /*
2754 * Common routine to set ownership given a vnode.
2755 */
2756 static int
2757 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2758 int posix_semantics)
2759 {
2760 struct proc *p = l->l_proc;
2761 struct mount *mp;
2762 struct vattr vattr;
2763 mode_t newmode;
2764 int error;
2765
2766 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2767 return (error);
2768 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
2769 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2770 if ((error = VOP_GETATTR(vp, &vattr, p->p_cred, l)) != 0)
2771 goto out;
2772
2773 #define CHANGED(x) ((int)(x) != -1)
2774 newmode = vattr.va_mode;
2775 if (posix_semantics) {
2776 /*
2777 * POSIX/XPG semantics: if the caller is not the super-user,
2778 * clear set-user-id and set-group-id bits. Both POSIX and
2779 * the XPG consider the behaviour for calls by the super-user
2780 * implementation-defined; we leave the set-user-id and set-
2781 * group-id settings intact in that case.
2782 */
2783 if (kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
2784 NULL) != 0)
2785 newmode &= ~(S_ISUID | S_ISGID);
2786 } else {
2787 /*
2788 * NetBSD semantics: when changing owner and/or group,
2789 * clear the respective bit(s).
2790 */
2791 if (CHANGED(uid))
2792 newmode &= ~S_ISUID;
2793 if (CHANGED(gid))
2794 newmode &= ~S_ISGID;
2795 }
2796 /* Update va_mode iff altered. */
2797 if (vattr.va_mode == newmode)
2798 newmode = VNOVAL;
2799
2800 VATTR_NULL(&vattr);
2801 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2802 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2803 vattr.va_mode = newmode;
2804 error = VOP_SETATTR(vp, &vattr, p->p_cred, l);
2805 #undef CHANGED
2806
2807 out:
2808 VOP_UNLOCK(vp, 0);
2809 vn_finished_write(mp, 0);
2810 return (error);
2811 }
2812
2813 /*
2814 * Set the access and modification times given a path name; this
2815 * version follows links.
2816 */
2817 /* ARGSUSED */
2818 int
2819 sys_utimes(struct lwp *l, void *v, register_t *retval)
2820 {
2821 struct sys_utimes_args /* {
2822 syscallarg(const char *) path;
2823 syscallarg(const struct timeval *) tptr;
2824 } */ *uap = v;
2825 int error;
2826 struct nameidata nd;
2827
2828 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2829 if ((error = namei(&nd)) != 0)
2830 return (error);
2831
2832 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
2833
2834 vrele(nd.ni_vp);
2835 return (error);
2836 }
2837
2838 /*
2839 * Set the access and modification times given a file descriptor.
2840 */
2841 /* ARGSUSED */
2842 int
2843 sys_futimes(struct lwp *l, void *v, register_t *retval)
2844 {
2845 struct sys_futimes_args /* {
2846 syscallarg(int) fd;
2847 syscallarg(const struct timeval *) tptr;
2848 } */ *uap = v;
2849 struct proc *p = l->l_proc;
2850 int error;
2851 struct file *fp;
2852
2853 /* getvnode() will use the descriptor for us */
2854 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2855 return (error);
2856
2857 error = change_utimes((struct vnode *)fp->f_data, SCARG(uap, tptr), l);
2858 FILE_UNUSE(fp, l);
2859 return (error);
2860 }
2861
2862 /*
2863 * Set the access and modification times given a path name; this
2864 * version does not follow links.
2865 */
2866 /* ARGSUSED */
2867 int
2868 sys_lutimes(struct lwp *l, void *v, register_t *retval)
2869 {
2870 struct sys_lutimes_args /* {
2871 syscallarg(const char *) path;
2872 syscallarg(const struct timeval *) tptr;
2873 } */ *uap = v;
2874 int error;
2875 struct nameidata nd;
2876
2877 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2878 if ((error = namei(&nd)) != 0)
2879 return (error);
2880
2881 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
2882
2883 vrele(nd.ni_vp);
2884 return (error);
2885 }
2886
2887 /*
2888 * Common routine to set access and modification times given a vnode.
2889 */
2890 static int
2891 change_utimes(struct vnode *vp, const struct timeval *tptr, struct lwp *l)
2892 {
2893 struct proc *p = l->l_proc;
2894 struct mount *mp;
2895 struct vattr vattr;
2896 int error;
2897
2898 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2899 return (error);
2900 VATTR_NULL(&vattr);
2901 if (tptr == NULL) {
2902 nanotime(&vattr.va_atime);
2903 vattr.va_mtime = vattr.va_atime;
2904 vattr.va_vaflags |= VA_UTIMES_NULL;
2905 } else {
2906 struct timeval tv[2];
2907
2908 error = copyin(tptr, tv, sizeof(tv));
2909 if (error)
2910 goto out;
2911 TIMEVAL_TO_TIMESPEC(&tv[0], &vattr.va_atime);
2912 TIMEVAL_TO_TIMESPEC(&tv[1], &vattr.va_mtime);
2913 }
2914 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
2915 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2916 error = VOP_SETATTR(vp, &vattr, p->p_cred, l);
2917 VOP_UNLOCK(vp, 0);
2918 out:
2919 vn_finished_write(mp, 0);
2920 return (error);
2921 }
2922
2923 /*
2924 * Truncate a file given its path name.
2925 */
2926 /* ARGSUSED */
2927 int
2928 sys_truncate(struct lwp *l, void *v, register_t *retval)
2929 {
2930 struct sys_truncate_args /* {
2931 syscallarg(const char *) path;
2932 syscallarg(int) pad;
2933 syscallarg(off_t) length;
2934 } */ *uap = v;
2935 struct proc *p = l->l_proc;
2936 struct vnode *vp;
2937 struct mount *mp;
2938 struct vattr vattr;
2939 int error;
2940 struct nameidata nd;
2941
2942 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2943 if ((error = namei(&nd)) != 0)
2944 return (error);
2945 vp = nd.ni_vp;
2946 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
2947 vrele(vp);
2948 return (error);
2949 }
2950 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
2951 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2952 if (vp->v_type == VDIR)
2953 error = EISDIR;
2954 else if ((error = vn_writechk(vp)) == 0 &&
2955 (error = VOP_ACCESS(vp, VWRITE, p->p_cred, l)) == 0) {
2956 VATTR_NULL(&vattr);
2957 vattr.va_size = SCARG(uap, length);
2958 error = VOP_SETATTR(vp, &vattr, p->p_cred, l);
2959 }
2960 vput(vp);
2961 vn_finished_write(mp, 0);
2962 return (error);
2963 }
2964
2965 /*
2966 * Truncate a file given a file descriptor.
2967 */
2968 /* ARGSUSED */
2969 int
2970 sys_ftruncate(struct lwp *l, void *v, register_t *retval)
2971 {
2972 struct sys_ftruncate_args /* {
2973 syscallarg(int) fd;
2974 syscallarg(int) pad;
2975 syscallarg(off_t) length;
2976 } */ *uap = v;
2977 struct proc *p = l->l_proc;
2978 struct mount *mp;
2979 struct vattr vattr;
2980 struct vnode *vp;
2981 struct file *fp;
2982 int error;
2983
2984 /* getvnode() will use the descriptor for us */
2985 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2986 return (error);
2987 if ((fp->f_flag & FWRITE) == 0) {
2988 error = EINVAL;
2989 goto out;
2990 }
2991 vp = (struct vnode *)fp->f_data;
2992 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
2993 FILE_UNUSE(fp, l);
2994 return (error);
2995 }
2996 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
2997 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2998 if (vp->v_type == VDIR)
2999 error = EISDIR;
3000 else if ((error = vn_writechk(vp)) == 0) {
3001 VATTR_NULL(&vattr);
3002 vattr.va_size = SCARG(uap, length);
3003 error = VOP_SETATTR(vp, &vattr, fp->f_cred, l);
3004 }
3005 VOP_UNLOCK(vp, 0);
3006 vn_finished_write(mp, 0);
3007 out:
3008 FILE_UNUSE(fp, l);
3009 return (error);
3010 }
3011
3012 /*
3013 * Sync an open file.
3014 */
3015 /* ARGSUSED */
3016 int
3017 sys_fsync(struct lwp *l, void *v, register_t *retval)
3018 {
3019 struct sys_fsync_args /* {
3020 syscallarg(int) fd;
3021 } */ *uap = v;
3022 struct proc *p = l->l_proc;
3023 struct vnode *vp;
3024 struct mount *mp;
3025 struct file *fp;
3026 int error;
3027
3028 /* getvnode() will use the descriptor for us */
3029 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3030 return (error);
3031 vp = (struct vnode *)fp->f_data;
3032 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3033 FILE_UNUSE(fp, l);
3034 return (error);
3035 }
3036 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3037 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, l);
3038 if (error == 0 && bioops.io_fsync != NULL &&
3039 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3040 (*bioops.io_fsync)(vp, 0);
3041 VOP_UNLOCK(vp, 0);
3042 vn_finished_write(mp, 0);
3043 FILE_UNUSE(fp, l);
3044 return (error);
3045 }
3046
3047 /*
3048 * Sync a range of file data. API modeled after that found in AIX.
3049 *
3050 * FDATASYNC indicates that we need only save enough metadata to be able
3051 * to re-read the written data. Note we duplicate AIX's requirement that
3052 * the file be open for writing.
3053 */
3054 /* ARGSUSED */
3055 int
3056 sys_fsync_range(struct lwp *l, void *v, register_t *retval)
3057 {
3058 struct sys_fsync_range_args /* {
3059 syscallarg(int) fd;
3060 syscallarg(int) flags;
3061 syscallarg(off_t) start;
3062 syscallarg(off_t) length;
3063 } */ *uap = v;
3064 struct proc *p = l->l_proc;
3065 struct vnode *vp;
3066 struct file *fp;
3067 int flags, nflags;
3068 off_t s, e, len;
3069 int error;
3070
3071 /* getvnode() will use the descriptor for us */
3072 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3073 return (error);
3074
3075 if ((fp->f_flag & FWRITE) == 0) {
3076 FILE_UNUSE(fp, l);
3077 return (EBADF);
3078 }
3079
3080 flags = SCARG(uap, flags);
3081 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3082 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3083 return (EINVAL);
3084 }
3085 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3086 if (flags & FDATASYNC)
3087 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3088 else
3089 nflags = FSYNC_WAIT;
3090 if (flags & FDISKSYNC)
3091 nflags |= FSYNC_CACHE;
3092
3093 len = SCARG(uap, length);
3094 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3095 if (len) {
3096 s = SCARG(uap, start);
3097 e = s + len;
3098 if (e < s) {
3099 FILE_UNUSE(fp, l);
3100 return (EINVAL);
3101 }
3102 } else {
3103 e = 0;
3104 s = 0;
3105 }
3106
3107 vp = (struct vnode *)fp->f_data;
3108 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3109 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, l);
3110
3111 if (error == 0 && bioops.io_fsync != NULL &&
3112 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3113 (*bioops.io_fsync)(vp, nflags);
3114
3115 VOP_UNLOCK(vp, 0);
3116 FILE_UNUSE(fp, l);
3117 return (error);
3118 }
3119
3120 /*
3121 * Sync the data of an open file.
3122 */
3123 /* ARGSUSED */
3124 int
3125 sys_fdatasync(struct lwp *l, void *v, register_t *retval)
3126 {
3127 struct sys_fdatasync_args /* {
3128 syscallarg(int) fd;
3129 } */ *uap = v;
3130 struct proc *p = l->l_proc;
3131 struct vnode *vp;
3132 struct file *fp;
3133 int error;
3134
3135 /* getvnode() will use the descriptor for us */
3136 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3137 return (error);
3138 if ((fp->f_flag & FWRITE) == 0) {
3139 FILE_UNUSE(fp, l);
3140 return (EBADF);
3141 }
3142 vp = (struct vnode *)fp->f_data;
3143 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3144 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, l);
3145 VOP_UNLOCK(vp, 0);
3146 FILE_UNUSE(fp, l);
3147 return (error);
3148 }
3149
3150 /*
3151 * Rename files, (standard) BSD semantics frontend.
3152 */
3153 /* ARGSUSED */
3154 int
3155 sys_rename(struct lwp *l, void *v, register_t *retval)
3156 {
3157 struct sys_rename_args /* {
3158 syscallarg(const char *) from;
3159 syscallarg(const char *) to;
3160 } */ *uap = v;
3161
3162 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 0));
3163 }
3164
3165 /*
3166 * Rename files, POSIX semantics frontend.
3167 */
3168 /* ARGSUSED */
3169 int
3170 sys___posix_rename(struct lwp *l, void *v, register_t *retval)
3171 {
3172 struct sys___posix_rename_args /* {
3173 syscallarg(const char *) from;
3174 syscallarg(const char *) to;
3175 } */ *uap = v;
3176
3177 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 1));
3178 }
3179
3180 /*
3181 * Rename files. Source and destination must either both be directories,
3182 * or both not be directories. If target is a directory, it must be empty.
3183 * If `from' and `to' refer to the same object, the value of the `retain'
3184 * argument is used to determine whether `from' will be
3185 *
3186 * (retain == 0) deleted unless `from' and `to' refer to the same
3187 * object in the file system's name space (BSD).
3188 * (retain == 1) always retained (POSIX).
3189 */
3190 static int
3191 rename_files(const char *from, const char *to, struct lwp *l, int retain)
3192 {
3193 struct mount *mp = NULL;
3194 struct vnode *tvp, *fvp, *tdvp;
3195 struct nameidata fromnd, tond;
3196 struct proc *p;
3197 int error;
3198
3199 NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
3200 from, l);
3201 if ((error = namei(&fromnd)) != 0)
3202 return (error);
3203 fvp = fromnd.ni_vp;
3204 error = vn_start_write(fvp, &mp, V_WAIT | V_PCATCH);
3205 if (error != 0) {
3206 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3207 vrele(fromnd.ni_dvp);
3208 vrele(fvp);
3209 if (fromnd.ni_startdir)
3210 vrele(fromnd.ni_startdir);
3211 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3212 return (error);
3213 }
3214 NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3215 (fvp->v_type == VDIR ? CREATEDIR : 0), UIO_USERSPACE, to, l);
3216 if ((error = namei(&tond)) != 0) {
3217 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3218 vrele(fromnd.ni_dvp);
3219 vrele(fvp);
3220 goto out1;
3221 }
3222 tdvp = tond.ni_dvp;
3223 tvp = tond.ni_vp;
3224
3225 if (tvp != NULL) {
3226 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3227 error = ENOTDIR;
3228 goto out;
3229 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3230 error = EISDIR;
3231 goto out;
3232 }
3233 }
3234
3235 if (fvp == tdvp)
3236 error = EINVAL;
3237
3238 /*
3239 * Source and destination refer to the same object.
3240 */
3241 if (fvp == tvp) {
3242 if (retain)
3243 error = -1;
3244 else if (fromnd.ni_dvp == tdvp &&
3245 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3246 !memcmp(fromnd.ni_cnd.cn_nameptr,
3247 tond.ni_cnd.cn_nameptr,
3248 fromnd.ni_cnd.cn_namelen))
3249 error = -1;
3250 }
3251
3252 #ifdef VERIFIED_EXEC
3253 if (!error)
3254 error = veriexec_renamechk(fvp, fromnd.ni_dirp, tond.ni_dirp, l);
3255 #endif /* VERIFIED_EXEC */
3256
3257 out:
3258 p = l->l_proc;
3259 if (!error) {
3260 VOP_LEASE(tdvp, l, p->p_cred, LEASE_WRITE);
3261 if (fromnd.ni_dvp != tdvp)
3262 VOP_LEASE(fromnd.ni_dvp, l, p->p_cred, LEASE_WRITE);
3263 if (tvp) {
3264 VOP_LEASE(tvp, l, p->p_cred, LEASE_WRITE);
3265 }
3266 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3267 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3268 } else {
3269 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3270 if (tdvp == tvp)
3271 vrele(tdvp);
3272 else
3273 vput(tdvp);
3274 if (tvp)
3275 vput(tvp);
3276 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3277 vrele(fromnd.ni_dvp);
3278 vrele(fvp);
3279 }
3280 vrele(tond.ni_startdir);
3281 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3282 out1:
3283 vn_finished_write(mp, 0);
3284 if (fromnd.ni_startdir)
3285 vrele(fromnd.ni_startdir);
3286 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3287 return (error == -1 ? 0 : error);
3288 }
3289
3290 /*
3291 * Make a directory file.
3292 */
3293 /* ARGSUSED */
3294 int
3295 sys_mkdir(struct lwp *l, void *v, register_t *retval)
3296 {
3297 struct sys_mkdir_args /* {
3298 syscallarg(const char *) path;
3299 syscallarg(int) mode;
3300 } */ *uap = v;
3301 struct proc *p = l->l_proc;
3302 struct mount *mp;
3303 struct vnode *vp;
3304 struct vattr vattr;
3305 int error;
3306 struct nameidata nd;
3307
3308 restart:
3309 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR, UIO_USERSPACE,
3310 SCARG(uap, path), l);
3311 if ((error = namei(&nd)) != 0)
3312 return (error);
3313 vp = nd.ni_vp;
3314 if (vp != NULL) {
3315 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3316 if (nd.ni_dvp == vp)
3317 vrele(nd.ni_dvp);
3318 else
3319 vput(nd.ni_dvp);
3320 vrele(vp);
3321 return (EEXIST);
3322 }
3323 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3324 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3325 if (nd.ni_dvp == vp)
3326 vrele(nd.ni_dvp);
3327 else
3328 vput(nd.ni_dvp);
3329 if ((error = vn_start_write(NULL, &mp,
3330 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
3331 return (error);
3332 goto restart;
3333 }
3334 VATTR_NULL(&vattr);
3335 vattr.va_type = VDIR;
3336 vattr.va_mode =
3337 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3338 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
3339 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3340 if (!error)
3341 vput(nd.ni_vp);
3342 vn_finished_write(mp, 0);
3343 return (error);
3344 }
3345
3346 /*
3347 * Remove a directory file.
3348 */
3349 /* ARGSUSED */
3350 int
3351 sys_rmdir(struct lwp *l, void *v, register_t *retval)
3352 {
3353 struct sys_rmdir_args /* {
3354 syscallarg(const char *) path;
3355 } */ *uap = v;
3356 struct proc *p = l->l_proc;
3357 struct mount *mp;
3358 struct vnode *vp;
3359 int error;
3360 struct nameidata nd;
3361
3362 restart:
3363 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3364 SCARG(uap, path), l);
3365 if ((error = namei(&nd)) != 0)
3366 return (error);
3367 vp = nd.ni_vp;
3368 if (vp->v_type != VDIR) {
3369 error = ENOTDIR;
3370 goto out;
3371 }
3372 /*
3373 * No rmdir "." please.
3374 */
3375 if (nd.ni_dvp == vp) {
3376 error = EINVAL;
3377 goto out;
3378 }
3379 /*
3380 * The root of a mounted filesystem cannot be deleted.
3381 */
3382 if (vp->v_flag & VROOT) {
3383 error = EBUSY;
3384 goto out;
3385 }
3386 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3387 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3388 if (nd.ni_dvp == vp)
3389 vrele(nd.ni_dvp);
3390 else
3391 vput(nd.ni_dvp);
3392 vput(vp);
3393 if ((error = vn_start_write(NULL, &mp,
3394 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
3395 return (error);
3396 goto restart;
3397 }
3398 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
3399 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
3400 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3401 vn_finished_write(mp, 0);
3402 return (error);
3403
3404 out:
3405 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3406 if (nd.ni_dvp == vp)
3407 vrele(nd.ni_dvp);
3408 else
3409 vput(nd.ni_dvp);
3410 vput(vp);
3411 return (error);
3412 }
3413
3414 /*
3415 * Read a block of directory entries in a file system independent format.
3416 */
3417 int
3418 sys___getdents30(struct lwp *l, void *v, register_t *retval)
3419 {
3420 struct sys___getdents30_args /* {
3421 syscallarg(int) fd;
3422 syscallarg(char *) buf;
3423 syscallarg(size_t) count;
3424 } */ *uap = v;
3425 struct proc *p = l->l_proc;
3426 struct file *fp;
3427 int error, done;
3428
3429 /* getvnode() will use the descriptor for us */
3430 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3431 return (error);
3432 if ((fp->f_flag & FREAD) == 0) {
3433 error = EBADF;
3434 goto out;
3435 }
3436 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3437 SCARG(uap, count), &done, l, 0, 0);
3438 #ifdef KTRACE
3439 if (!error && KTRPOINT(p, KTR_GENIO)) {
3440 struct iovec iov;
3441 iov.iov_base = SCARG(uap, buf);
3442 iov.iov_len = done;
3443 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, done, 0);
3444 }
3445 #endif
3446 *retval = done;
3447 out:
3448 FILE_UNUSE(fp, l);
3449 return (error);
3450 }
3451
3452 /*
3453 * Set the mode mask for creation of filesystem nodes.
3454 */
3455 int
3456 sys_umask(struct lwp *l, void *v, register_t *retval)
3457 {
3458 struct sys_umask_args /* {
3459 syscallarg(mode_t) newmask;
3460 } */ *uap = v;
3461 struct proc *p = l->l_proc;
3462 struct cwdinfo *cwdi;
3463
3464 cwdi = p->p_cwdi;
3465 *retval = cwdi->cwdi_cmask;
3466 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3467 return (0);
3468 }
3469
3470 /*
3471 * Void all references to file by ripping underlying filesystem
3472 * away from vnode.
3473 */
3474 /* ARGSUSED */
3475 int
3476 sys_revoke(struct lwp *l, void *v, register_t *retval)
3477 {
3478 struct sys_revoke_args /* {
3479 syscallarg(const char *) path;
3480 } */ *uap = v;
3481 struct proc *p = l->l_proc;
3482 struct mount *mp;
3483 struct vnode *vp;
3484 struct vattr vattr;
3485 int error;
3486 struct nameidata nd;
3487
3488 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3489 if ((error = namei(&nd)) != 0)
3490 return (error);
3491 vp = nd.ni_vp;
3492 if ((error = VOP_GETATTR(vp, &vattr, p->p_cred, l)) != 0)
3493 goto out;
3494 if (kauth_cred_geteuid(p->p_cred) != vattr.va_uid &&
3495 (error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
3496 &p->p_acflag)) != 0)
3497 goto out;
3498 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
3499 goto out;
3500 if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER)))
3501 VOP_REVOKE(vp, REVOKEALL);
3502 vn_finished_write(mp, 0);
3503 out:
3504 vrele(vp);
3505 return (error);
3506 }
3507
3508 /*
3509 * Convert a user file descriptor to a kernel file entry.
3510 */
3511 int
3512 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
3513 {
3514 struct vnode *vp;
3515 struct file *fp;
3516
3517 if ((fp = fd_getfile(fdp, fd)) == NULL)
3518 return (EBADF);
3519
3520 FILE_USE(fp);
3521
3522 if (fp->f_type != DTYPE_VNODE) {
3523 FILE_UNUSE(fp, NULL);
3524 return (EINVAL);
3525 }
3526
3527 vp = (struct vnode *)fp->f_data;
3528 if (vp->v_type == VBAD) {
3529 FILE_UNUSE(fp, NULL);
3530 return (EBADF);
3531 }
3532
3533 *fpp = fp;
3534 return (0);
3535 }
3536