vfs_syscalls.c revision 1.251 1 /* $NetBSD: vfs_syscalls.c,v 1.251 2006/07/14 18:41:40 elad Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.251 2006/07/14 18:41:40 elad Exp $");
41
42 #include "opt_compat_netbsd.h"
43 #include "opt_compat_43.h"
44 #include "opt_ktrace.h"
45 #include "opt_verified_exec.h"
46 #include "fss.h"
47 #include "opt_fileassoc.h"
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/namei.h>
52 #include <sys/filedesc.h>
53 #include <sys/kernel.h>
54 #include <sys/file.h>
55 #include <sys/stat.h>
56 #include <sys/vnode.h>
57 #include <sys/mount.h>
58 #include <sys/proc.h>
59 #include <sys/uio.h>
60 #include <sys/malloc.h>
61 #include <sys/kmem.h>
62 #include <sys/dirent.h>
63 #include <sys/sysctl.h>
64 #include <sys/sa.h>
65 #include <sys/syscallargs.h>
66 #ifdef KTRACE
67 #include <sys/ktrace.h>
68 #endif
69 #ifdef FILEASSOC
70 #include <sys/fileassoc.h>
71 #endif /* FILEASSOC */
72 #ifdef VERIFIED_EXEC
73 #include <sys/verified_exec.h>
74 #endif /* VERIFIED_EXEC */
75 #include <sys/kauth.h>
76
77 #include <miscfs/genfs/genfs.h>
78 #include <miscfs/syncfs/syncfs.h>
79
80 #ifdef COMPAT_30
81 #include "opt_nfsserver.h"
82 #include <nfs/rpcv2.h>
83 #include <nfs/nfsproto.h>
84 #include <nfs/nfs.h>
85 #include <nfs/nfs_var.h>
86 #endif
87
88 #if NFSS > 0
89 #include <dev/fssvar.h>
90 #endif
91
92 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
93
94 static int change_dir(struct nameidata *, struct lwp *);
95 static int change_flags(struct vnode *, u_long, struct lwp *);
96 static int change_mode(struct vnode *, int, struct lwp *l);
97 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
98 static int change_utimes(struct vnode *vp, const struct timeval *,
99 struct lwp *l);
100 static int rename_files(const char *, const char *, struct lwp *, int);
101
102 void checkdirs(struct vnode *);
103
104 int dovfsusermount = 0;
105
106 /*
107 * Virtual File System System Calls
108 */
109
110 /*
111 * Mount a file system.
112 */
113
114 #if defined(COMPAT_09) || defined(COMPAT_43)
115 /*
116 * This table is used to maintain compatibility with 4.3BSD
117 * and NetBSD 0.9 mount syscalls. Note, the order is important!
118 *
119 * Do not modify this table. It should only contain filesystems
120 * supported by NetBSD 0.9 and 4.3BSD.
121 */
122 const char * const mountcompatnames[] = {
123 NULL, /* 0 = MOUNT_NONE */
124 MOUNT_FFS, /* 1 = MOUNT_UFS */
125 MOUNT_NFS, /* 2 */
126 MOUNT_MFS, /* 3 */
127 MOUNT_MSDOS, /* 4 */
128 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
129 MOUNT_FDESC, /* 6 */
130 MOUNT_KERNFS, /* 7 */
131 NULL, /* 8 = MOUNT_DEVFS */
132 MOUNT_AFS, /* 9 */
133 };
134 const int nmountcompatnames = sizeof(mountcompatnames) /
135 sizeof(mountcompatnames[0]);
136 #endif /* COMPAT_09 || COMPAT_43 */
137
138 /* ARGSUSED */
139 int
140 sys_mount(struct lwp *l, void *v, register_t *retval)
141 {
142 struct sys_mount_args /* {
143 syscallarg(const char *) type;
144 syscallarg(const char *) path;
145 syscallarg(int) flags;
146 syscallarg(void *) data;
147 } */ *uap = v;
148 struct proc *p = l->l_proc;
149 struct vnode *vp;
150 struct mount *mp;
151 int error, flag = 0;
152 char fstypename[MFSNAMELEN];
153 struct vattr va;
154 struct nameidata nd;
155 struct vfsops *vfs;
156
157 /*
158 * if MNT_GETARGS is specified, it should be only flag.
159 */
160
161 if ((SCARG(uap, flags) & MNT_GETARGS) != 0 &&
162 (SCARG(uap, flags) & ~MNT_GETARGS) != 0) {
163 return EINVAL;
164 }
165
166 if (dovfsusermount == 0 && (SCARG(uap, flags) & MNT_GETARGS) == 0 &&
167 (error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
168 &p->p_acflag)))
169 return (error);
170 /*
171 * Get vnode to be covered
172 */
173 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
174 SCARG(uap, path), l);
175 if ((error = namei(&nd)) != 0)
176 return (error);
177 vp = nd.ni_vp;
178 /*
179 * A lookup in VFS_MOUNT might result in an attempt to
180 * lock this vnode again, so make the lock recursive.
181 */
182 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
183 if (SCARG(uap, flags) & (MNT_UPDATE | MNT_GETARGS)) {
184 if ((vp->v_flag & VROOT) == 0) {
185 vput(vp);
186 return (EINVAL);
187 }
188 mp = vp->v_mount;
189 flag = mp->mnt_flag;
190 vfs = mp->mnt_op;
191 /*
192 * We only allow the filesystem to be reloaded if it
193 * is currently mounted read-only.
194 */
195 if ((SCARG(uap, flags) & MNT_RELOAD) &&
196 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
197 vput(vp);
198 return (EOPNOTSUPP); /* Needs translation */
199 }
200 /*
201 * In "highly secure" mode, don't let the caller do anything
202 * but downgrade a filesystem from read-write to read-only.
203 * (see also below; MNT_UPDATE or MNT_GETARGS is required.)
204 */
205 if (securelevel >= 2 &&
206 SCARG(uap, flags) != MNT_GETARGS &&
207 SCARG(uap, flags) !=
208 (mp->mnt_flag | MNT_RDONLY |
209 MNT_RELOAD | MNT_FORCE | MNT_UPDATE)) {
210 vput(vp);
211 return (EPERM);
212 }
213 mp->mnt_flag |= SCARG(uap, flags) &
214 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
215 /*
216 * Only root, or the user that did the original mount is
217 * permitted to update it.
218 */
219 if ((mp->mnt_flag & MNT_GETARGS) == 0 &&
220 mp->mnt_stat.f_owner != kauth_cred_geteuid(p->p_cred) &&
221 (error = kauth_authorize_generic(p->p_cred,
222 KAUTH_GENERIC_ISSUSER,
223 &p->p_acflag)) != 0) {
224 vput(vp);
225 return (error);
226 }
227 /*
228 * Do not allow NFS export by non-root users. For non-root
229 * users, silently enforce MNT_NOSUID and MNT_NODEV, and
230 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
231 */
232 if (kauth_cred_geteuid(p->p_cred) != 0) {
233 if (SCARG(uap, flags) & MNT_EXPORTED) {
234 vput(vp);
235 return (EPERM);
236 }
237 SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
238 if (flag & MNT_NOEXEC)
239 SCARG(uap, flags) |= MNT_NOEXEC;
240 }
241 if (vfs_busy(mp, LK_NOWAIT, 0)) {
242 vput(vp);
243 return (EPERM);
244 }
245 goto update;
246 } else {
247 if (securelevel >= 2) {
248 vput(vp);
249 return (EPERM);
250 }
251 }
252 /*
253 * If the user is not root, ensure that they own the directory
254 * onto which we are attempting to mount.
255 */
256 if ((error = VOP_GETATTR(vp, &va, p->p_cred, l)) != 0 ||
257 (va.va_uid != kauth_cred_geteuid(p->p_cred) &&
258 (error = kauth_authorize_generic(p->p_cred,
259 KAUTH_GENERIC_ISSUSER,
260 &p->p_acflag)) != 0)) {
261 vput(vp);
262 return (error);
263 }
264 /*
265 * Do not allow NFS export by non-root users. For non-root users,
266 * silently enforce MNT_NOSUID and MNT_NODEV, and MNT_NOEXEC if the
267 * mount point is already MNT_NOEXEC.
268 */
269 if (kauth_cred_geteuid(p->p_cred) != 0) {
270 if (SCARG(uap, flags) & MNT_EXPORTED) {
271 vput(vp);
272 return (EPERM);
273 }
274 SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
275 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
276 SCARG(uap, flags) |= MNT_NOEXEC;
277 }
278 if ((error = vinvalbuf(vp, V_SAVE, p->p_cred, l, 0, 0)) != 0) {
279 vput(vp);
280 return (error);
281 }
282 if (vp->v_type != VDIR) {
283 vput(vp);
284 return (ENOTDIR);
285 }
286 error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
287 if (error) {
288 #if defined(COMPAT_09) || defined(COMPAT_43)
289 /*
290 * Historically, filesystem types were identified by numbers.
291 * If we get an integer for the filesystem type instead of a
292 * string, we check to see if it matches one of the historic
293 * filesystem types.
294 */
295 u_long fsindex = (u_long)SCARG(uap, type);
296 if (fsindex >= nmountcompatnames ||
297 mountcompatnames[fsindex] == NULL) {
298 vput(vp);
299 return (ENODEV);
300 }
301 strncpy(fstypename, mountcompatnames[fsindex], MFSNAMELEN);
302 #else
303 vput(vp);
304 return (error);
305 #endif
306 }
307 #ifdef COMPAT_10
308 /* Accept `ufs' as an alias for `ffs'. */
309 if (!strncmp(fstypename, "ufs", MFSNAMELEN))
310 strncpy(fstypename, "ffs", MFSNAMELEN);
311 #endif
312 if ((vfs = vfs_getopsbyname(fstypename)) == NULL) {
313 vput(vp);
314 return (ENODEV);
315 }
316 if (vp->v_mountedhere != NULL) {
317 vput(vp);
318 return (EBUSY);
319 }
320
321 /*
322 * Allocate and initialize the file system.
323 */
324 mp = (struct mount *)malloc((u_long)sizeof(struct mount),
325 M_MOUNT, M_WAITOK);
326 memset((char *)mp, 0, (u_long)sizeof(struct mount));
327 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
328 simple_lock_init(&mp->mnt_slock);
329 (void)vfs_busy(mp, LK_NOWAIT, 0);
330 mp->mnt_op = vfs;
331 vfs->vfs_refcount++;
332 mp->mnt_vnodecovered = vp;
333 mp->mnt_stat.f_owner = kauth_cred_geteuid(p->p_cred);
334 mp->mnt_unmounter = NULL;
335 mp->mnt_leaf = mp;
336
337 /*
338 * The underlying file system may refuse the mount for
339 * various reasons. Allow the user to force it to happen.
340 */
341 mp->mnt_flag |= SCARG(uap, flags) & MNT_FORCE;
342 update:
343 if ((SCARG(uap, flags) & MNT_GETARGS) == 0) {
344 /*
345 * Set the mount level flags.
346 */
347 if (SCARG(uap, flags) & MNT_RDONLY)
348 mp->mnt_flag |= MNT_RDONLY;
349 else if (mp->mnt_flag & MNT_RDONLY)
350 mp->mnt_iflag |= IMNT_WANTRDWR;
351 mp->mnt_flag &=
352 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
353 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
354 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
355 mp->mnt_flag |= SCARG(uap, flags) &
356 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
357 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
358 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
359 MNT_IGNORE);
360 }
361 /*
362 * Mount the filesystem.
363 */
364 error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, l);
365 if (mp->mnt_flag & (MNT_UPDATE | MNT_GETARGS)) {
366 #if defined(COMPAT_30) && defined(NFSSERVER)
367 if (mp->mnt_flag & MNT_UPDATE && error != 0) {
368 int error2;
369
370 /* Update failed; let's try and see if it was an
371 * export request. */
372 error2 = nfs_update_exports_30(mp, SCARG(uap, path),
373 SCARG(uap, data), l);
374
375 /* Only update error code if the export request was
376 * understood but some problem occurred while
377 * processing it. */
378 if (error2 != EJUSTRETURN)
379 error = error2;
380 }
381 #endif
382 if (mp->mnt_iflag & IMNT_WANTRDWR)
383 mp->mnt_flag &= ~MNT_RDONLY;
384 if (error)
385 mp->mnt_flag = flag;
386 mp->mnt_flag &=~
387 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
388 mp->mnt_iflag &=~ IMNT_WANTRDWR;
389 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
390 if (mp->mnt_syncer == NULL)
391 error = vfs_allocate_syncvnode(mp);
392 } else {
393 if (mp->mnt_syncer != NULL)
394 vfs_deallocate_syncvnode(mp);
395 }
396 vfs_unbusy(mp);
397 VOP_UNLOCK(vp, 0);
398 vrele(vp);
399 return (error);
400 }
401 /*
402 * Put the new filesystem on the mount list after root.
403 */
404 cache_purge(vp);
405 if (!error) {
406 mp->mnt_flag &=~
407 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
408 mp->mnt_iflag &=~ IMNT_WANTRDWR;
409 vp->v_mountedhere = mp;
410 simple_lock(&mountlist_slock);
411 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
412 simple_unlock(&mountlist_slock);
413 checkdirs(vp);
414 VOP_UNLOCK(vp, 0);
415 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
416 error = vfs_allocate_syncvnode(mp);
417 vfs_unbusy(mp);
418 (void) VFS_STATVFS(mp, &mp->mnt_stat, l);
419 if ((error = VFS_START(mp, 0, l)))
420 vrele(vp);
421 } else {
422 vp->v_mountedhere = (struct mount *)0;
423 vfs->vfs_refcount--;
424 vfs_unbusy(mp);
425 free(mp, M_MOUNT);
426 vput(vp);
427 }
428 return (error);
429 }
430
431 /*
432 * Scan all active processes to see if any of them have a current
433 * or root directory onto which the new filesystem has just been
434 * mounted. If so, replace them with the new mount point.
435 */
436 void
437 checkdirs(struct vnode *olddp)
438 {
439 struct cwdinfo *cwdi;
440 struct vnode *newdp;
441 struct proc *p;
442
443 if (olddp->v_usecount == 1)
444 return;
445 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
446 panic("mount: lost mount");
447 proclist_lock_read();
448 PROCLIST_FOREACH(p, &allproc) {
449 cwdi = p->p_cwdi;
450 if (!cwdi)
451 continue;
452 if (cwdi->cwdi_cdir == olddp) {
453 vrele(cwdi->cwdi_cdir);
454 VREF(newdp);
455 cwdi->cwdi_cdir = newdp;
456 }
457 if (cwdi->cwdi_rdir == olddp) {
458 vrele(cwdi->cwdi_rdir);
459 VREF(newdp);
460 cwdi->cwdi_rdir = newdp;
461 }
462 }
463 proclist_unlock_read();
464 if (rootvnode == olddp) {
465 vrele(rootvnode);
466 VREF(newdp);
467 rootvnode = newdp;
468 }
469 vput(newdp);
470 }
471
472 /*
473 * Unmount a file system.
474 *
475 * Note: unmount takes a path to the vnode mounted on as argument,
476 * not special file (as before).
477 */
478 /* ARGSUSED */
479 int
480 sys_unmount(struct lwp *l, void *v, register_t *retval)
481 {
482 struct sys_unmount_args /* {
483 syscallarg(const char *) path;
484 syscallarg(int) flags;
485 } */ *uap = v;
486 struct proc *p = l->l_proc;
487 struct vnode *vp;
488 struct mount *mp;
489 int error;
490 struct nameidata nd;
491
492 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
493 SCARG(uap, path), l);
494 if ((error = namei(&nd)) != 0)
495 return (error);
496 vp = nd.ni_vp;
497 mp = vp->v_mount;
498
499 /*
500 * Only root, or the user that did the original mount is
501 * permitted to unmount this filesystem.
502 */
503 if ((mp->mnt_stat.f_owner != kauth_cred_geteuid(p->p_cred)) &&
504 (error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
505 &p->p_acflag)) != 0) {
506 vput(vp);
507 return (error);
508 }
509
510 /*
511 * Don't allow unmounting the root file system.
512 */
513 if (mp->mnt_flag & MNT_ROOTFS) {
514 vput(vp);
515 return (EINVAL);
516 }
517
518 /*
519 * Must be the root of the filesystem
520 */
521 if ((vp->v_flag & VROOT) == 0) {
522 vput(vp);
523 return (EINVAL);
524 }
525 vput(vp);
526
527 /*
528 * XXX Freeze syncer. Must do this before locking the
529 * mount point. See dounmount() for details.
530 */
531 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
532
533 if (vfs_busy(mp, 0, 0)) {
534 lockmgr(&syncer_lock, LK_RELEASE, NULL);
535 return (EBUSY);
536 }
537
538 return (dounmount(mp, SCARG(uap, flags), l));
539 }
540
541 /*
542 * Do the actual file system unmount. File system is assumed to have been
543 * marked busy by the caller.
544 */
545 int
546 dounmount(struct mount *mp, int flags, struct lwp *l)
547 {
548 struct vnode *coveredvp;
549 int error;
550 int async;
551 int used_syncer;
552
553 #ifdef VERIFIED_EXEC
554 if (!doing_shutdown) {
555 if (veriexec_strict >= 3) {
556 printf("Veriexec: Lockdown mode, preventing unmount of"
557 " \"%s\". (uid=%u)\n", mp->mnt_stat.f_mntonname,
558 kauth_cred_getuid(l->l_proc->p_cred));
559 return (EPERM);
560 }
561
562 if (veriexec_strict == 2) {
563 struct veriexec_table_entry *vte;
564
565 /* Check if we have fingerprints on mount. */
566 vte = fileassoc_tabledata_lookup(mp, veriexec_hook);
567 if ((vte != NULL) && (vte->vte_count > 0)) {
568 printf("Veriexec: IPS mode, preventing unmount"
569 " of \"%s\" with monitored files. "
570 "(uid=%u)\n", mp->mnt_stat.f_mntonname,
571 kauth_cred_getuid(l->l_proc->p_cred));
572 return (EPERM);
573 }
574 }
575 }
576 #endif /* VERIFIED_EXEC */
577
578 #ifdef FILEASSOC
579 (void)fileassoc_table_delete(mp);
580 #endif /* FILEASSOC */
581
582 simple_lock(&mountlist_slock);
583 vfs_unbusy(mp);
584 used_syncer = (mp->mnt_syncer != NULL);
585
586 /*
587 * XXX Syncer must be frozen when we get here. This should really
588 * be done on a per-mountpoint basis, but especially the softdep
589 * code possibly called from the syncer doesn't exactly work on a
590 * per-mountpoint basis, so the softdep code would become a maze
591 * of vfs_busy() calls.
592 *
593 * The caller of dounmount() must acquire syncer_lock because
594 * the syncer itself acquires locks in syncer_lock -> vfs_busy
595 * order, and we must preserve that order to avoid deadlock.
596 *
597 * So, if the file system did not use the syncer, now is
598 * the time to release the syncer_lock.
599 */
600 if (used_syncer == 0)
601 lockmgr(&syncer_lock, LK_RELEASE, NULL);
602
603 mp->mnt_iflag |= IMNT_UNMOUNT;
604 mp->mnt_unmounter = l;
605 lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock);
606 vn_start_write(NULL, &mp, V_WAIT);
607
608 async = mp->mnt_flag & MNT_ASYNC;
609 mp->mnt_flag &= ~MNT_ASYNC;
610 cache_purgevfs(mp); /* remove cache entries for this file sys */
611 if (mp->mnt_syncer != NULL)
612 vfs_deallocate_syncvnode(mp);
613 error = 0;
614 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
615 #if NFSS > 0
616 error = fss_umount_hook(mp, (flags & MNT_FORCE));
617 #endif
618 if (error == 0)
619 error = VFS_SYNC(mp, MNT_WAIT, l->l_proc->p_cred, l);
620 }
621 if (error == 0 || (flags & MNT_FORCE))
622 error = VFS_UNMOUNT(mp, flags, l);
623 vn_finished_write(mp, 0);
624 simple_lock(&mountlist_slock);
625 if (error) {
626 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
627 (void) vfs_allocate_syncvnode(mp);
628 mp->mnt_iflag &= ~IMNT_UNMOUNT;
629 mp->mnt_unmounter = NULL;
630 mp->mnt_flag |= async;
631 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
632 &mountlist_slock);
633 if (used_syncer)
634 lockmgr(&syncer_lock, LK_RELEASE, NULL);
635 simple_lock(&mp->mnt_slock);
636 while (mp->mnt_wcnt > 0) {
637 wakeup(mp);
638 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1",
639 0, &mp->mnt_slock);
640 }
641 simple_unlock(&mp->mnt_slock);
642 return (error);
643 }
644 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
645 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
646 coveredvp->v_mountedhere = NULL;
647 vrele(coveredvp);
648 }
649 mp->mnt_op->vfs_refcount--;
650 if (LIST_FIRST(&mp->mnt_vnodelist) != NULL)
651 panic("unmount: dangling vnode");
652 mp->mnt_iflag |= IMNT_GONE;
653 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock);
654 if (used_syncer)
655 lockmgr(&syncer_lock, LK_RELEASE, NULL);
656 simple_lock(&mp->mnt_slock);
657 while (mp->mnt_wcnt > 0) {
658 wakeup(mp);
659 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock);
660 }
661 simple_unlock(&mp->mnt_slock);
662 vfs_hooks_unmount(mp);
663 free(mp, M_MOUNT);
664 return (0);
665 }
666
667 /*
668 * Sync each mounted filesystem.
669 */
670 #ifdef DEBUG
671 int syncprt = 0;
672 struct ctldebug debug0 = { "syncprt", &syncprt };
673 #endif
674
675 /* ARGSUSED */
676 int
677 sys_sync(struct lwp *l, void *v, register_t *retval)
678 {
679 struct mount *mp, *nmp;
680 int asyncflag;
681 struct proc *p = l == NULL ? &proc0 : l->l_proc;
682
683 simple_lock(&mountlist_slock);
684 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
685 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
686 nmp = mp->mnt_list.cqe_prev;
687 continue;
688 }
689 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
690 vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
691 asyncflag = mp->mnt_flag & MNT_ASYNC;
692 mp->mnt_flag &= ~MNT_ASYNC;
693 VFS_SYNC(mp, MNT_NOWAIT, p->p_cred, l);
694 if (asyncflag)
695 mp->mnt_flag |= MNT_ASYNC;
696 vn_finished_write(mp, 0);
697 }
698 simple_lock(&mountlist_slock);
699 nmp = mp->mnt_list.cqe_prev;
700 vfs_unbusy(mp);
701
702 }
703 simple_unlock(&mountlist_slock);
704 #ifdef DEBUG
705 if (syncprt)
706 vfs_bufstats();
707 #endif /* DEBUG */
708 return (0);
709 }
710
711 /*
712 * Change filesystem quotas.
713 */
714 /* ARGSUSED */
715 int
716 sys_quotactl(struct lwp *l, void *v, register_t *retval)
717 {
718 struct sys_quotactl_args /* {
719 syscallarg(const char *) path;
720 syscallarg(int) cmd;
721 syscallarg(int) uid;
722 syscallarg(caddr_t) arg;
723 } */ *uap = v;
724 struct mount *mp;
725 int error;
726 struct nameidata nd;
727
728 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
729 if ((error = namei(&nd)) != 0)
730 return (error);
731 error = vn_start_write(nd.ni_vp, &mp, V_WAIT | V_PCATCH);
732 vrele(nd.ni_vp);
733 if (error)
734 return (error);
735 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
736 SCARG(uap, arg), l);
737 vn_finished_write(mp, 0);
738 return (error);
739 }
740
741 int
742 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
743 int root)
744 {
745 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
746 int error = 0;
747
748 /*
749 * If MNT_NOWAIT or MNT_LAZY is specified, do not
750 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
751 * overrides MNT_NOWAIT.
752 */
753 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
754 (flags != MNT_WAIT && flags != 0)) {
755 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
756 goto done;
757 }
758
759 /* Get the filesystem stats now */
760 memset(sp, 0, sizeof(*sp));
761 if ((error = VFS_STATVFS(mp, sp, l)) != 0) {
762 return error;
763 }
764
765 if (cwdi->cwdi_rdir == NULL)
766 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
767 done:
768 if (cwdi->cwdi_rdir != NULL) {
769 size_t len;
770 char *bp;
771 char *path = PNBUF_GET();
772 if (!path)
773 return ENOMEM;
774
775 bp = path + MAXPATHLEN;
776 *--bp = '\0';
777 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
778 MAXPATHLEN / 2, 0, l);
779 if (error) {
780 PNBUF_PUT(path);
781 return error;
782 }
783 len = strlen(bp);
784 /*
785 * for mount points that are below our root, we can see
786 * them, so we fix up the pathname and return them. The
787 * rest we cannot see, so we don't allow viewing the
788 * data.
789 */
790 if (strncmp(bp, sp->f_mntonname, len) == 0) {
791 strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
792 sizeof(sp->f_mntonname));
793 if (sp->f_mntonname[0] == '\0')
794 (void)strlcpy(sp->f_mntonname, "/",
795 sizeof(sp->f_mntonname));
796 } else {
797 if (root)
798 (void)strlcpy(sp->f_mntonname, "/",
799 sizeof(sp->f_mntonname));
800 else
801 error = EPERM;
802 }
803 PNBUF_PUT(path);
804 }
805 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
806 return error;
807 }
808
809 /*
810 * Get filesystem statistics.
811 */
812 /* ARGSUSED */
813 int
814 sys_statvfs1(struct lwp *l, void *v, register_t *retval)
815 {
816 struct sys_statvfs1_args /* {
817 syscallarg(const char *) path;
818 syscallarg(struct statvfs *) buf;
819 syscallarg(int) flags;
820 } */ *uap = v;
821 struct mount *mp;
822 struct statvfs *sb;
823 int error;
824 struct nameidata nd;
825
826 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
827 if ((error = namei(&nd)) != 0)
828 return error;
829 mp = nd.ni_vp->v_mount;
830 vrele(nd.ni_vp);
831 sb = STATVFSBUF_GET();
832 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1);
833 if (error == 0) {
834 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
835 }
836 STATVFSBUF_PUT(sb);
837 return error;
838 }
839
840 /*
841 * Get filesystem statistics.
842 */
843 /* ARGSUSED */
844 int
845 sys_fstatvfs1(struct lwp *l, void *v, register_t *retval)
846 {
847 struct sys_fstatvfs1_args /* {
848 syscallarg(int) fd;
849 syscallarg(struct statvfs *) buf;
850 syscallarg(int) flags;
851 } */ *uap = v;
852 struct proc *p = l->l_proc;
853 struct file *fp;
854 struct mount *mp;
855 struct statvfs *sb;
856 int error;
857
858 /* getvnode() will use the descriptor for us */
859 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
860 return (error);
861 mp = ((struct vnode *)fp->f_data)->v_mount;
862 sb = STATVFSBUF_GET();
863 if ((error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1)) != 0)
864 goto out;
865 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
866 out:
867 FILE_UNUSE(fp, l);
868 STATVFSBUF_PUT(sb);
869 return error;
870 }
871
872
873 /*
874 * Get statistics on all filesystems.
875 */
876 int
877 sys_getvfsstat(struct lwp *l, void *v, register_t *retval)
878 {
879 struct sys_getvfsstat_args /* {
880 syscallarg(struct statvfs *) buf;
881 syscallarg(size_t) bufsize;
882 syscallarg(int) flags;
883 } */ *uap = v;
884 int root = 0;
885 struct proc *p = l->l_proc;
886 struct mount *mp, *nmp;
887 struct statvfs *sb;
888 struct statvfs *sfsp;
889 size_t count, maxcount;
890 int error = 0;
891
892 sb = STATVFSBUF_GET();
893 maxcount = SCARG(uap, bufsize) / sizeof(struct statvfs);
894 sfsp = SCARG(uap, buf);
895 simple_lock(&mountlist_slock);
896 count = 0;
897 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
898 mp = nmp) {
899 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
900 nmp = CIRCLEQ_NEXT(mp, mnt_list);
901 continue;
902 }
903 if (sfsp && count < maxcount) {
904 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 0);
905 if (error) {
906 simple_lock(&mountlist_slock);
907 nmp = CIRCLEQ_NEXT(mp, mnt_list);
908 vfs_unbusy(mp);
909 continue;
910 }
911 error = copyout(sb, sfsp, sizeof(*sfsp));
912 if (error) {
913 vfs_unbusy(mp);
914 goto out;
915 }
916 sfsp++;
917 root |= strcmp(sb->f_mntonname, "/") == 0;
918 }
919 count++;
920 simple_lock(&mountlist_slock);
921 nmp = CIRCLEQ_NEXT(mp, mnt_list);
922 vfs_unbusy(mp);
923 }
924 simple_unlock(&mountlist_slock);
925 if (root == 0 && p->p_cwdi->cwdi_rdir) {
926 /*
927 * fake a root entry
928 */
929 if ((error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, sb, l,
930 SCARG(uap, flags), 1)) != 0)
931 goto out;
932 if (sfsp)
933 error = copyout(sb, sfsp, sizeof(*sfsp));
934 count++;
935 }
936 if (sfsp && count > maxcount)
937 *retval = maxcount;
938 else
939 *retval = count;
940 out:
941 STATVFSBUF_PUT(sb);
942 return error;
943 }
944
945 /*
946 * Change current working directory to a given file descriptor.
947 */
948 /* ARGSUSED */
949 int
950 sys_fchdir(struct lwp *l, void *v, register_t *retval)
951 {
952 struct sys_fchdir_args /* {
953 syscallarg(int) fd;
954 } */ *uap = v;
955 struct proc *p = l->l_proc;
956 struct filedesc *fdp = p->p_fd;
957 struct cwdinfo *cwdi = p->p_cwdi;
958 struct vnode *vp, *tdp;
959 struct mount *mp;
960 struct file *fp;
961 int error;
962
963 /* getvnode() will use the descriptor for us */
964 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
965 return (error);
966 vp = (struct vnode *)fp->f_data;
967
968 VREF(vp);
969 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
970 if (vp->v_type != VDIR)
971 error = ENOTDIR;
972 else
973 error = VOP_ACCESS(vp, VEXEC, p->p_cred, l);
974 while (!error && (mp = vp->v_mountedhere) != NULL) {
975 if (vfs_busy(mp, 0, 0))
976 continue;
977 error = VFS_ROOT(mp, &tdp);
978 vfs_unbusy(mp);
979 if (error)
980 break;
981 vput(vp);
982 vp = tdp;
983 }
984 if (error) {
985 vput(vp);
986 goto out;
987 }
988 VOP_UNLOCK(vp, 0);
989
990 /*
991 * Disallow changing to a directory not under the process's
992 * current root directory (if there is one).
993 */
994 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
995 vrele(vp);
996 error = EPERM; /* operation not permitted */
997 goto out;
998 }
999
1000 vrele(cwdi->cwdi_cdir);
1001 cwdi->cwdi_cdir = vp;
1002 out:
1003 FILE_UNUSE(fp, l);
1004 return (error);
1005 }
1006
1007 /*
1008 * Change this process's notion of the root directory to a given file
1009 * descriptor.
1010 */
1011 int
1012 sys_fchroot(struct lwp *l, void *v, register_t *retval)
1013 {
1014 struct sys_fchroot_args *uap = v;
1015 struct proc *p = l->l_proc;
1016 struct filedesc *fdp = p->p_fd;
1017 struct cwdinfo *cwdi = p->p_cwdi;
1018 struct vnode *vp;
1019 struct file *fp;
1020 int error;
1021
1022 if ((error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
1023 &p->p_acflag)) != 0)
1024 return error;
1025 /* getvnode() will use the descriptor for us */
1026 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1027 return error;
1028 vp = (struct vnode *) fp->f_data;
1029 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1030 if (vp->v_type != VDIR)
1031 error = ENOTDIR;
1032 else
1033 error = VOP_ACCESS(vp, VEXEC, p->p_cred, l);
1034 VOP_UNLOCK(vp, 0);
1035 if (error)
1036 goto out;
1037 VREF(vp);
1038
1039 /*
1040 * Prevent escaping from chroot by putting the root under
1041 * the working directory. Silently chdir to / if we aren't
1042 * already there.
1043 */
1044 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1045 /*
1046 * XXX would be more failsafe to change directory to a
1047 * deadfs node here instead
1048 */
1049 vrele(cwdi->cwdi_cdir);
1050 VREF(vp);
1051 cwdi->cwdi_cdir = vp;
1052 }
1053
1054 if (cwdi->cwdi_rdir != NULL)
1055 vrele(cwdi->cwdi_rdir);
1056 cwdi->cwdi_rdir = vp;
1057 out:
1058 FILE_UNUSE(fp, l);
1059 return (error);
1060 }
1061
1062 /*
1063 * Change current working directory (``.'').
1064 */
1065 /* ARGSUSED */
1066 int
1067 sys_chdir(struct lwp *l, void *v, register_t *retval)
1068 {
1069 struct sys_chdir_args /* {
1070 syscallarg(const char *) path;
1071 } */ *uap = v;
1072 struct proc *p = l->l_proc;
1073 struct cwdinfo *cwdi = p->p_cwdi;
1074 int error;
1075 struct nameidata nd;
1076
1077 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1078 SCARG(uap, path), l);
1079 if ((error = change_dir(&nd, l)) != 0)
1080 return (error);
1081 vrele(cwdi->cwdi_cdir);
1082 cwdi->cwdi_cdir = nd.ni_vp;
1083 return (0);
1084 }
1085
1086 /*
1087 * Change notion of root (``/'') directory.
1088 */
1089 /* ARGSUSED */
1090 int
1091 sys_chroot(struct lwp *l, void *v, register_t *retval)
1092 {
1093 struct sys_chroot_args /* {
1094 syscallarg(const char *) path;
1095 } */ *uap = v;
1096 struct proc *p = l->l_proc;
1097 struct cwdinfo *cwdi = p->p_cwdi;
1098 struct vnode *vp;
1099 int error;
1100 struct nameidata nd;
1101
1102 if ((error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
1103 &p->p_acflag)) != 0)
1104 return (error);
1105 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1106 SCARG(uap, path), l);
1107 if ((error = change_dir(&nd, l)) != 0)
1108 return (error);
1109 if (cwdi->cwdi_rdir != NULL)
1110 vrele(cwdi->cwdi_rdir);
1111 vp = nd.ni_vp;
1112 cwdi->cwdi_rdir = vp;
1113
1114 /*
1115 * Prevent escaping from chroot by putting the root under
1116 * the working directory. Silently chdir to / if we aren't
1117 * already there.
1118 */
1119 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1120 /*
1121 * XXX would be more failsafe to change directory to a
1122 * deadfs node here instead
1123 */
1124 vrele(cwdi->cwdi_cdir);
1125 VREF(vp);
1126 cwdi->cwdi_cdir = vp;
1127 }
1128
1129 return (0);
1130 }
1131
1132 /*
1133 * Common routine for chroot and chdir.
1134 */
1135 static int
1136 change_dir(struct nameidata *ndp, struct lwp *l)
1137 {
1138 struct vnode *vp;
1139 int error;
1140
1141 if ((error = namei(ndp)) != 0)
1142 return (error);
1143 vp = ndp->ni_vp;
1144 if (vp->v_type != VDIR)
1145 error = ENOTDIR;
1146 else
1147 error = VOP_ACCESS(vp, VEXEC, l->l_proc->p_cred, l);
1148
1149 if (error)
1150 vput(vp);
1151 else
1152 VOP_UNLOCK(vp, 0);
1153 return (error);
1154 }
1155
1156 /*
1157 * Check permissions, allocate an open file structure,
1158 * and call the device open routine if any.
1159 */
1160 int
1161 sys_open(struct lwp *l, void *v, register_t *retval)
1162 {
1163 struct sys_open_args /* {
1164 syscallarg(const char *) path;
1165 syscallarg(int) flags;
1166 syscallarg(int) mode;
1167 } */ *uap = v;
1168 struct proc *p = l->l_proc;
1169 struct cwdinfo *cwdi = p->p_cwdi;
1170 struct filedesc *fdp = p->p_fd;
1171 struct file *fp;
1172 struct vnode *vp;
1173 int flags, cmode;
1174 int type, indx, error;
1175 struct flock lf;
1176 struct nameidata nd;
1177
1178 flags = FFLAGS(SCARG(uap, flags));
1179 if ((flags & (FREAD | FWRITE)) == 0)
1180 return (EINVAL);
1181 /* falloc() will use the file descriptor for us */
1182 if ((error = falloc(p, &fp, &indx)) != 0)
1183 return (error);
1184 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1185 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1186 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1187 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1188 FILE_UNUSE(fp, l);
1189 fdp->fd_ofiles[indx] = NULL;
1190 ffree(fp);
1191 if ((error == EDUPFD || error == EMOVEFD) &&
1192 l->l_dupfd >= 0 && /* XXX from fdopen */
1193 (error =
1194 dupfdopen(l, indx, l->l_dupfd, flags, error)) == 0) {
1195 *retval = indx;
1196 return (0);
1197 }
1198 if (error == ERESTART)
1199 error = EINTR;
1200 fdremove(fdp, indx);
1201 return (error);
1202 }
1203 l->l_dupfd = 0;
1204 vp = nd.ni_vp;
1205 fp->f_flag = flags & FMASK;
1206 fp->f_type = DTYPE_VNODE;
1207 fp->f_ops = &vnops;
1208 fp->f_data = vp;
1209 if (flags & (O_EXLOCK | O_SHLOCK)) {
1210 lf.l_whence = SEEK_SET;
1211 lf.l_start = 0;
1212 lf.l_len = 0;
1213 if (flags & O_EXLOCK)
1214 lf.l_type = F_WRLCK;
1215 else
1216 lf.l_type = F_RDLCK;
1217 type = F_FLOCK;
1218 if ((flags & FNONBLOCK) == 0)
1219 type |= F_WAIT;
1220 VOP_UNLOCK(vp, 0);
1221 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1222 if (error) {
1223 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1224 FILE_UNUSE(fp, l);
1225 ffree(fp);
1226 fdremove(fdp, indx);
1227 return (error);
1228 }
1229 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1230 fp->f_flag |= FHASLOCK;
1231 }
1232 VOP_UNLOCK(vp, 0);
1233 *retval = indx;
1234 FILE_SET_MATURE(fp);
1235 FILE_UNUSE(fp, l);
1236 return (0);
1237 }
1238
1239 static void
1240 vfs__fhfree(fhandle_t *fhp)
1241 {
1242 size_t fhsize;
1243
1244 if (fhp == NULL) {
1245 return;
1246 }
1247 fhsize = FHANDLE_SIZE(fhp);
1248 kmem_free(fhp, fhsize);
1249 }
1250
1251 /*
1252 * vfs_composefh: compose a filehandle.
1253 */
1254
1255 int
1256 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1257 {
1258 struct mount *mp;
1259 struct fid *fidp;
1260 int error;
1261 size_t needfhsize;
1262 size_t fidsize;
1263
1264 mp = vp->v_mount;
1265 if (mp->mnt_op->vfs_vptofh == NULL) {
1266 return EOPNOTSUPP;
1267 }
1268 fidp = NULL;
1269 if (*fh_size <= FHANDLE_SIZE_MIN) {
1270 fidsize = 0;
1271 } else {
1272 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1273 if (fhp != NULL) {
1274 memset(fhp, 0, *fh_size);
1275 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1276 fidp = &fhp->fh_fid;
1277 }
1278 }
1279 error = VFS_VPTOFH(vp, fidp, &fidsize);
1280 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1281 if (error == 0 && *fh_size < needfhsize) {
1282 error = E2BIG;
1283 }
1284 *fh_size = needfhsize;
1285 return error;
1286 }
1287
1288 int
1289 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1290 {
1291 struct mount *mp;
1292 fhandle_t *fhp;
1293 size_t fhsize;
1294 size_t fidsize;
1295 int error;
1296
1297 *fhpp = NULL;
1298 mp = vp->v_mount;
1299 if (mp->mnt_op->vfs_vptofh == NULL) {
1300 error = EOPNOTSUPP;
1301 goto out;
1302 }
1303 fhsize = 0;
1304 error = VFS_VPTOFH(vp, NULL, &fidsize);
1305 KASSERT(error != 0);
1306 if (error != E2BIG) {
1307 goto out;
1308 }
1309 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1310 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1311 if (fhp == NULL) {
1312 error = ENOMEM;
1313 goto out;
1314 }
1315 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1316 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1317 if (error == 0) {
1318 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1319 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1320 *fhpp = fhp;
1321 } else {
1322 kmem_free(fhp, fhsize);
1323 }
1324 out:
1325 return error;
1326 }
1327
1328 void
1329 vfs_composefh_free(fhandle_t *fhp)
1330 {
1331
1332 vfs__fhfree(fhp);
1333 }
1334
1335 /*
1336 * vfs_fhtovp: lookup a vnode by a filehandle.
1337 */
1338
1339 int
1340 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1341 {
1342 struct mount *mp;
1343 int error;
1344
1345 *vpp = NULL;
1346 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1347 if (mp == NULL) {
1348 error = ESTALE;
1349 goto out;
1350 }
1351 if (mp->mnt_op->vfs_fhtovp == NULL) {
1352 error = EOPNOTSUPP;
1353 goto out;
1354 }
1355 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1356 out:
1357 return error;
1358 }
1359
1360 /*
1361 * vfs_copyinfh: copyin a filehandle.
1362 */
1363
1364 int
1365 vfs_copyinfh_alloc(const void *ufhp, fhandle_t **fhpp)
1366 {
1367 fhandle_t *fhp;
1368 fhandle_t tempfh;
1369 size_t fhsize;
1370 int error;
1371
1372 *fhpp = NULL;
1373 error = copyin(ufhp, &tempfh, sizeof(tempfh));
1374 if (error) {
1375 return error;
1376 }
1377 fhsize = FHANDLE_SIZE(&tempfh);
1378 if (fhsize > FHANDLE_SIZE_MAX) {
1379 return EINVAL;
1380 }
1381 fhp = kmem_alloc(fhsize, KM_SLEEP);
1382 if (fhp == NULL) {
1383 return ENOMEM;
1384 }
1385 error = copyin(ufhp, fhp, fhsize);
1386 if (error == 0) {
1387 *fhpp = fhp;
1388 } else {
1389 kmem_free(fhp, fhsize);
1390 }
1391 return error;
1392 }
1393
1394 void
1395 vfs_copyinfh_free(fhandle_t *fhp)
1396 {
1397
1398 vfs__fhfree(fhp);
1399 }
1400
1401 /*
1402 * Get file handle system call
1403 */
1404 int
1405 sys___getfh30(struct lwp *l, void *v, register_t *retval)
1406 {
1407 struct sys___getfh30_args /* {
1408 syscallarg(char *) fname;
1409 syscallarg(fhandle_t *) fhp;
1410 syscallarg(size_t *) fh_size;
1411 } */ *uap = v;
1412 struct proc *p = l->l_proc;
1413 struct vnode *vp;
1414 fhandle_t *fh;
1415 int error;
1416 struct nameidata nd;
1417 size_t sz;
1418 size_t usz;
1419
1420 /*
1421 * Must be super user
1422 */
1423 error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
1424 &p->p_acflag);
1425 if (error)
1426 return (error);
1427 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1428 SCARG(uap, fname), l);
1429 error = namei(&nd);
1430 if (error)
1431 return (error);
1432 vp = nd.ni_vp;
1433 error = vfs_composefh_alloc(vp, &fh);
1434 vput(vp);
1435 if (error != 0) {
1436 goto out;
1437 }
1438 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1439 if (error != 0) {
1440 goto out;
1441 }
1442 sz = FHANDLE_SIZE(fh);
1443 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1444 if (error != 0) {
1445 goto out;
1446 }
1447 if (usz >= sz) {
1448 error = copyout(fh, SCARG(uap, fhp), sz);
1449 } else {
1450 error = E2BIG;
1451 }
1452 out:
1453 vfs_composefh_free(fh);
1454 return (error);
1455 }
1456
1457 /*
1458 * Open a file given a file handle.
1459 *
1460 * Check permissions, allocate an open file structure,
1461 * and call the device open routine if any.
1462 */
1463 int
1464 sys_fhopen(struct lwp *l, void *v, register_t *retval)
1465 {
1466 struct sys_fhopen_args /* {
1467 syscallarg(const fhandle_t *) fhp;
1468 syscallarg(int) flags;
1469 } */ *uap = v;
1470 struct proc *p = l->l_proc;
1471 struct filedesc *fdp = p->p_fd;
1472 struct file *fp;
1473 struct vnode *vp = NULL;
1474 struct mount *mp;
1475 kauth_cred_t cred = p->p_cred;
1476 int flags;
1477 struct file *nfp;
1478 int type, indx, error=0;
1479 struct flock lf;
1480 struct vattr va;
1481 fhandle_t *fh;
1482
1483 /*
1484 * Must be super user
1485 */
1486 if ((error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
1487 &p->p_acflag)))
1488 return (error);
1489
1490 flags = FFLAGS(SCARG(uap, flags));
1491 if ((flags & (FREAD | FWRITE)) == 0)
1492 return (EINVAL);
1493 if ((flags & O_CREAT))
1494 return (EINVAL);
1495 /* falloc() will use the file descriptor for us */
1496 if ((error = falloc(p, &nfp, &indx)) != 0)
1497 return (error);
1498 fp = nfp;
1499 error = vfs_copyinfh_alloc(SCARG(uap, fhp), &fh);
1500 if (error != 0) {
1501 goto bad;
1502 }
1503 error = vfs_fhtovp(fh, &vp);
1504 if (error != 0) {
1505 goto bad;
1506 }
1507
1508 /* Now do an effective vn_open */
1509
1510 if (vp->v_type == VSOCK) {
1511 error = EOPNOTSUPP;
1512 goto bad;
1513 }
1514 if (flags & FREAD) {
1515 if ((error = VOP_ACCESS(vp, VREAD, cred, l)) != 0)
1516 goto bad;
1517 }
1518 if (flags & (FWRITE | O_TRUNC)) {
1519 if (vp->v_type == VDIR) {
1520 error = EISDIR;
1521 goto bad;
1522 }
1523 if ((error = vn_writechk(vp)) != 0 ||
1524 (error = VOP_ACCESS(vp, VWRITE, cred, l)) != 0)
1525 goto bad;
1526 }
1527 if (flags & O_TRUNC) {
1528 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
1529 goto bad;
1530 VOP_UNLOCK(vp, 0); /* XXX */
1531 VOP_LEASE(vp, l, cred, LEASE_WRITE);
1532 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1533 VATTR_NULL(&va);
1534 va.va_size = 0;
1535 error = VOP_SETATTR(vp, &va, cred, l);
1536 vn_finished_write(mp, 0);
1537 if (error)
1538 goto bad;
1539 }
1540 if ((error = VOP_OPEN(vp, flags, cred, l)) != 0)
1541 goto bad;
1542 if (vp->v_type == VREG &&
1543 uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
1544 error = EIO;
1545 goto bad;
1546 }
1547 if (flags & FWRITE)
1548 vp->v_writecount++;
1549
1550 /* done with modified vn_open, now finish what sys_open does. */
1551
1552 fp->f_flag = flags & FMASK;
1553 fp->f_type = DTYPE_VNODE;
1554 fp->f_ops = &vnops;
1555 fp->f_data = vp;
1556 if (flags & (O_EXLOCK | O_SHLOCK)) {
1557 lf.l_whence = SEEK_SET;
1558 lf.l_start = 0;
1559 lf.l_len = 0;
1560 if (flags & O_EXLOCK)
1561 lf.l_type = F_WRLCK;
1562 else
1563 lf.l_type = F_RDLCK;
1564 type = F_FLOCK;
1565 if ((flags & FNONBLOCK) == 0)
1566 type |= F_WAIT;
1567 VOP_UNLOCK(vp, 0);
1568 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1569 if (error) {
1570 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1571 FILE_UNUSE(fp, l);
1572 ffree(fp);
1573 fdremove(fdp, indx);
1574 return (error);
1575 }
1576 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1577 fp->f_flag |= FHASLOCK;
1578 }
1579 VOP_UNLOCK(vp, 0);
1580 *retval = indx;
1581 FILE_SET_MATURE(fp);
1582 FILE_UNUSE(fp, l);
1583 vfs_copyinfh_free(fh);
1584 return (0);
1585
1586 bad:
1587 FILE_UNUSE(fp, l);
1588 ffree(fp);
1589 fdremove(fdp, indx);
1590 if (vp != NULL)
1591 vput(vp);
1592 vfs_copyinfh_free(fh);
1593 return (error);
1594 }
1595
1596 /* ARGSUSED */
1597 int
1598 sys___fhstat30(struct lwp *l, void *v, register_t *retval)
1599 {
1600 struct sys___fhstat30_args /* {
1601 syscallarg(const fhandle_t *) fhp;
1602 syscallarg(struct stat *) sb;
1603 } */ *uap = v;
1604 struct proc *p = l->l_proc;
1605 struct stat sb;
1606 int error;
1607 fhandle_t *fh;
1608 struct vnode *vp;
1609
1610 /*
1611 * Must be super user
1612 */
1613 if ((error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
1614 &p->p_acflag)))
1615 return (error);
1616
1617 error = vfs_copyinfh_alloc(SCARG(uap, fhp), &fh);
1618 if (error != 0) {
1619 goto bad;
1620 }
1621 error = vfs_fhtovp(fh, &vp);
1622 if (error != 0) {
1623 goto bad;
1624 }
1625 error = vn_stat(vp, &sb, l);
1626 vput(vp);
1627 if (error) {
1628 goto bad;
1629 }
1630 error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
1631 bad:
1632 vfs_copyinfh_free(fh);
1633 return error;
1634 }
1635
1636 /* ARGSUSED */
1637 int
1638 sys_fhstatvfs1(struct lwp *l, void *v, register_t *retval)
1639 {
1640 struct sys_fhstatvfs1_args /* {
1641 syscallarg(const fhandle_t *) fhp;
1642 syscallarg(struct statvfs *) buf;
1643 syscallarg(int) flags;
1644 } */ *uap = v;
1645 struct proc *p = l->l_proc;
1646 struct statvfs *sb = NULL;
1647 fhandle_t *fh;
1648 struct mount *mp;
1649 struct vnode *vp;
1650 int error;
1651
1652 /*
1653 * Must be super user
1654 */
1655 if ((error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
1656 &p->p_acflag)) != 0)
1657 return error;
1658
1659 error = vfs_copyinfh_alloc(SCARG(uap, fhp), &fh);
1660 if (error != 0) {
1661 goto out;
1662 }
1663 error = vfs_fhtovp(fh, &vp);
1664 if (error != 0) {
1665 goto out;
1666 }
1667 mp = vp->v_mount;
1668 sb = STATVFSBUF_GET();
1669 if ((error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1)) != 0) {
1670 vput(vp);
1671 goto out;
1672 }
1673 vput(vp);
1674 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1675 out:
1676 if (sb != NULL) {
1677 STATVFSBUF_PUT(sb);
1678 }
1679 vfs_copyinfh_free(fh);
1680 return error;
1681 }
1682
1683 /*
1684 * Create a special file.
1685 */
1686 /* ARGSUSED */
1687 int
1688 sys_mknod(struct lwp *l, void *v, register_t *retval)
1689 {
1690 struct sys_mknod_args /* {
1691 syscallarg(const char *) path;
1692 syscallarg(int) mode;
1693 syscallarg(int) dev;
1694 } */ *uap = v;
1695 struct proc *p = l->l_proc;
1696 struct vnode *vp;
1697 struct mount *mp;
1698 struct vattr vattr;
1699 int error;
1700 int whiteout = 0;
1701 struct nameidata nd;
1702
1703 if ((error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
1704 &p->p_acflag)) != 0)
1705 return (error);
1706 restart:
1707 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1708 if ((error = namei(&nd)) != 0)
1709 return (error);
1710 vp = nd.ni_vp;
1711 if (vp != NULL)
1712 error = EEXIST;
1713 else {
1714 VATTR_NULL(&vattr);
1715 vattr.va_mode =
1716 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1717 vattr.va_rdev = SCARG(uap, dev);
1718 whiteout = 0;
1719
1720 switch (SCARG(uap, mode) & S_IFMT) {
1721 case S_IFMT: /* used by badsect to flag bad sectors */
1722 vattr.va_type = VBAD;
1723 break;
1724 case S_IFCHR:
1725 vattr.va_type = VCHR;
1726 break;
1727 case S_IFBLK:
1728 vattr.va_type = VBLK;
1729 break;
1730 case S_IFWHT:
1731 whiteout = 1;
1732 break;
1733 default:
1734 error = EINVAL;
1735 break;
1736 }
1737 }
1738 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1739 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1740 if (nd.ni_dvp == vp)
1741 vrele(nd.ni_dvp);
1742 else
1743 vput(nd.ni_dvp);
1744 if (vp)
1745 vrele(vp);
1746 if ((error = vn_start_write(NULL, &mp,
1747 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1748 return (error);
1749 goto restart;
1750 }
1751 if (!error) {
1752 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
1753 if (whiteout) {
1754 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1755 if (error)
1756 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1757 vput(nd.ni_dvp);
1758 } else {
1759 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1760 &nd.ni_cnd, &vattr);
1761 if (error == 0)
1762 vput(nd.ni_vp);
1763 }
1764 } else {
1765 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1766 if (nd.ni_dvp == vp)
1767 vrele(nd.ni_dvp);
1768 else
1769 vput(nd.ni_dvp);
1770 if (vp)
1771 vrele(vp);
1772 }
1773 vn_finished_write(mp, 0);
1774 return (error);
1775 }
1776
1777 /*
1778 * Create a named pipe.
1779 */
1780 /* ARGSUSED */
1781 int
1782 sys_mkfifo(struct lwp *l, void *v, register_t *retval)
1783 {
1784 struct sys_mkfifo_args /* {
1785 syscallarg(const char *) path;
1786 syscallarg(int) mode;
1787 } */ *uap = v;
1788 struct proc *p = l->l_proc;
1789 struct mount *mp;
1790 struct vattr vattr;
1791 int error;
1792 struct nameidata nd;
1793
1794 restart:
1795 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1796 if ((error = namei(&nd)) != 0)
1797 return (error);
1798 if (nd.ni_vp != NULL) {
1799 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1800 if (nd.ni_dvp == nd.ni_vp)
1801 vrele(nd.ni_dvp);
1802 else
1803 vput(nd.ni_dvp);
1804 vrele(nd.ni_vp);
1805 return (EEXIST);
1806 }
1807 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1808 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1809 if (nd.ni_dvp == nd.ni_vp)
1810 vrele(nd.ni_dvp);
1811 else
1812 vput(nd.ni_dvp);
1813 if (nd.ni_vp)
1814 vrele(nd.ni_vp);
1815 if ((error = vn_start_write(NULL, &mp,
1816 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1817 return (error);
1818 goto restart;
1819 }
1820 VATTR_NULL(&vattr);
1821 vattr.va_type = VFIFO;
1822 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1823 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
1824 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1825 if (error == 0)
1826 vput(nd.ni_vp);
1827 vn_finished_write(mp, 0);
1828 return (error);
1829 }
1830
1831 /*
1832 * Make a hard file link.
1833 */
1834 /* ARGSUSED */
1835 int
1836 sys_link(struct lwp *l, void *v, register_t *retval)
1837 {
1838 struct sys_link_args /* {
1839 syscallarg(const char *) path;
1840 syscallarg(const char *) link;
1841 } */ *uap = v;
1842 struct proc *p = l->l_proc;
1843 struct vnode *vp;
1844 struct mount *mp;
1845 struct nameidata nd;
1846 int error;
1847
1848 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1849 if ((error = namei(&nd)) != 0)
1850 return (error);
1851 vp = nd.ni_vp;
1852 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
1853 vrele(vp);
1854 return (error);
1855 }
1856 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1857 if ((error = namei(&nd)) != 0)
1858 goto out;
1859 if (nd.ni_vp) {
1860 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1861 if (nd.ni_dvp == nd.ni_vp)
1862 vrele(nd.ni_dvp);
1863 else
1864 vput(nd.ni_dvp);
1865 vrele(nd.ni_vp);
1866 error = EEXIST;
1867 goto out;
1868 }
1869 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
1870 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
1871 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1872 out:
1873 vrele(vp);
1874 vn_finished_write(mp, 0);
1875 return (error);
1876 }
1877
1878 /*
1879 * Make a symbolic link.
1880 */
1881 /* ARGSUSED */
1882 int
1883 sys_symlink(struct lwp *l, void *v, register_t *retval)
1884 {
1885 struct sys_symlink_args /* {
1886 syscallarg(const char *) path;
1887 syscallarg(const char *) link;
1888 } */ *uap = v;
1889 struct proc *p = l->l_proc;
1890 struct mount *mp;
1891 struct vattr vattr;
1892 char *path;
1893 int error;
1894 struct nameidata nd;
1895
1896 path = PNBUF_GET();
1897 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
1898 if (error)
1899 goto out;
1900 restart:
1901 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1902 if ((error = namei(&nd)) != 0)
1903 goto out;
1904 if (nd.ni_vp) {
1905 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1906 if (nd.ni_dvp == nd.ni_vp)
1907 vrele(nd.ni_dvp);
1908 else
1909 vput(nd.ni_dvp);
1910 vrele(nd.ni_vp);
1911 error = EEXIST;
1912 goto out;
1913 }
1914 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1915 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1916 if (nd.ni_dvp == nd.ni_vp)
1917 vrele(nd.ni_dvp);
1918 else
1919 vput(nd.ni_dvp);
1920 if ((error = vn_start_write(NULL, &mp,
1921 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1922 return (error);
1923 goto restart;
1924 }
1925 VATTR_NULL(&vattr);
1926 vattr.va_type = VLNK;
1927 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
1928 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
1929 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1930 if (error == 0)
1931 vput(nd.ni_vp);
1932 vn_finished_write(mp, 0);
1933 out:
1934 PNBUF_PUT(path);
1935 return (error);
1936 }
1937
1938 /*
1939 * Delete a whiteout from the filesystem.
1940 */
1941 /* ARGSUSED */
1942 int
1943 sys_undelete(struct lwp *l, void *v, register_t *retval)
1944 {
1945 struct sys_undelete_args /* {
1946 syscallarg(const char *) path;
1947 } */ *uap = v;
1948 struct proc *p = l->l_proc;
1949 int error;
1950 struct mount *mp;
1951 struct nameidata nd;
1952
1953 restart:
1954 NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
1955 SCARG(uap, path), l);
1956 error = namei(&nd);
1957 if (error)
1958 return (error);
1959
1960 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
1961 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1962 if (nd.ni_dvp == nd.ni_vp)
1963 vrele(nd.ni_dvp);
1964 else
1965 vput(nd.ni_dvp);
1966 if (nd.ni_vp)
1967 vrele(nd.ni_vp);
1968 return (EEXIST);
1969 }
1970 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1971 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1972 if (nd.ni_dvp == nd.ni_vp)
1973 vrele(nd.ni_dvp);
1974 else
1975 vput(nd.ni_dvp);
1976 if ((error = vn_start_write(NULL, &mp,
1977 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1978 return (error);
1979 goto restart;
1980 }
1981 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
1982 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
1983 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1984 vput(nd.ni_dvp);
1985 vn_finished_write(mp, 0);
1986 return (error);
1987 }
1988
1989 /*
1990 * Delete a name from the filesystem.
1991 */
1992 /* ARGSUSED */
1993 int
1994 sys_unlink(struct lwp *l, void *v, register_t *retval)
1995 {
1996 struct sys_unlink_args /* {
1997 syscallarg(const char *) path;
1998 } */ *uap = v;
1999 struct proc *p = l->l_proc;
2000 struct mount *mp;
2001 struct vnode *vp;
2002 int error;
2003 struct nameidata nd;
2004
2005 restart:
2006 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2007 SCARG(uap, path), l);
2008 if ((error = namei(&nd)) != 0)
2009 return (error);
2010 vp = nd.ni_vp;
2011
2012 /*
2013 * The root of a mounted filesystem cannot be deleted.
2014 */
2015 if (vp->v_flag & VROOT) {
2016 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2017 if (nd.ni_dvp == vp)
2018 vrele(nd.ni_dvp);
2019 else
2020 vput(nd.ni_dvp);
2021 vput(vp);
2022 error = EBUSY;
2023 goto out;
2024 }
2025
2026 #ifdef VERIFIED_EXEC
2027 /* Handle remove requests for veriexec entries. */
2028 if ((error = veriexec_removechk(l, vp, nd.ni_dirp)) != 0) {
2029 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2030 if (nd.ni_dvp == vp)
2031 vrele(nd.ni_dvp);
2032 else
2033 vput(nd.ni_dvp);
2034 vput(vp);
2035 goto out;
2036 }
2037 #endif /* VERIFIED_EXEC */
2038
2039 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2040 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2041 if (nd.ni_dvp == vp)
2042 vrele(nd.ni_dvp);
2043 else
2044 vput(nd.ni_dvp);
2045 vput(vp);
2046 if ((error = vn_start_write(NULL, &mp,
2047 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
2048 return (error);
2049 goto restart;
2050 }
2051 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
2052 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
2053 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2054 vn_finished_write(mp, 0);
2055 #ifdef FILEASSOC
2056 if (!error)
2057 (void)fileassoc_file_delete(nd.ni_vp);
2058 #endif /* FILEASSOC */
2059 out:
2060 return (error);
2061 }
2062
2063 /*
2064 * Reposition read/write file offset.
2065 */
2066 int
2067 sys_lseek(struct lwp *l, void *v, register_t *retval)
2068 {
2069 struct sys_lseek_args /* {
2070 syscallarg(int) fd;
2071 syscallarg(int) pad;
2072 syscallarg(off_t) offset;
2073 syscallarg(int) whence;
2074 } */ *uap = v;
2075 struct proc *p = l->l_proc;
2076 kauth_cred_t cred = p->p_cred;
2077 struct filedesc *fdp = p->p_fd;
2078 struct file *fp;
2079 struct vnode *vp;
2080 struct vattr vattr;
2081 off_t newoff;
2082 int error;
2083
2084 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
2085 return (EBADF);
2086
2087 FILE_USE(fp);
2088
2089 vp = (struct vnode *)fp->f_data;
2090 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2091 error = ESPIPE;
2092 goto out;
2093 }
2094
2095 switch (SCARG(uap, whence)) {
2096 case SEEK_CUR:
2097 newoff = fp->f_offset + SCARG(uap, offset);
2098 break;
2099 case SEEK_END:
2100 error = VOP_GETATTR(vp, &vattr, cred, l);
2101 if (error)
2102 goto out;
2103 newoff = SCARG(uap, offset) + vattr.va_size;
2104 break;
2105 case SEEK_SET:
2106 newoff = SCARG(uap, offset);
2107 break;
2108 default:
2109 error = EINVAL;
2110 goto out;
2111 }
2112 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0)
2113 goto out;
2114
2115 *(off_t *)retval = fp->f_offset = newoff;
2116 out:
2117 FILE_UNUSE(fp, l);
2118 return (error);
2119 }
2120
2121 /*
2122 * Positional read system call.
2123 */
2124 int
2125 sys_pread(struct lwp *l, void *v, register_t *retval)
2126 {
2127 struct sys_pread_args /* {
2128 syscallarg(int) fd;
2129 syscallarg(void *) buf;
2130 syscallarg(size_t) nbyte;
2131 syscallarg(off_t) offset;
2132 } */ *uap = v;
2133 struct proc *p = l->l_proc;
2134 struct filedesc *fdp = p->p_fd;
2135 struct file *fp;
2136 struct vnode *vp;
2137 off_t offset;
2138 int error, fd = SCARG(uap, fd);
2139
2140 if ((fp = fd_getfile(fdp, fd)) == NULL)
2141 return (EBADF);
2142
2143 if ((fp->f_flag & FREAD) == 0) {
2144 simple_unlock(&fp->f_slock);
2145 return (EBADF);
2146 }
2147
2148 FILE_USE(fp);
2149
2150 vp = (struct vnode *)fp->f_data;
2151 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2152 error = ESPIPE;
2153 goto out;
2154 }
2155
2156 offset = SCARG(uap, offset);
2157
2158 /*
2159 * XXX This works because no file systems actually
2160 * XXX take any action on the seek operation.
2161 */
2162 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2163 goto out;
2164
2165 /* dofileread() will unuse the descriptor for us */
2166 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2167 &offset, 0, retval));
2168
2169 out:
2170 FILE_UNUSE(fp, l);
2171 return (error);
2172 }
2173
2174 /*
2175 * Positional scatter read system call.
2176 */
2177 int
2178 sys_preadv(struct lwp *l, void *v, register_t *retval)
2179 {
2180 struct sys_preadv_args /* {
2181 syscallarg(int) fd;
2182 syscallarg(const struct iovec *) iovp;
2183 syscallarg(int) iovcnt;
2184 syscallarg(off_t) offset;
2185 } */ *uap = v;
2186 struct proc *p = l->l_proc;
2187 struct filedesc *fdp = p->p_fd;
2188 struct file *fp;
2189 struct vnode *vp;
2190 off_t offset;
2191 int error, fd = SCARG(uap, fd);
2192
2193 if ((fp = fd_getfile(fdp, fd)) == NULL)
2194 return (EBADF);
2195
2196 if ((fp->f_flag & FREAD) == 0) {
2197 simple_unlock(&fp->f_slock);
2198 return (EBADF);
2199 }
2200
2201 FILE_USE(fp);
2202
2203 vp = (struct vnode *)fp->f_data;
2204 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2205 error = ESPIPE;
2206 goto out;
2207 }
2208
2209 offset = SCARG(uap, offset);
2210
2211 /*
2212 * XXX This works because no file systems actually
2213 * XXX take any action on the seek operation.
2214 */
2215 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2216 goto out;
2217
2218 /* dofilereadv() will unuse the descriptor for us */
2219 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2220 &offset, 0, retval));
2221
2222 out:
2223 FILE_UNUSE(fp, l);
2224 return (error);
2225 }
2226
2227 /*
2228 * Positional write system call.
2229 */
2230 int
2231 sys_pwrite(struct lwp *l, void *v, register_t *retval)
2232 {
2233 struct sys_pwrite_args /* {
2234 syscallarg(int) fd;
2235 syscallarg(const void *) buf;
2236 syscallarg(size_t) nbyte;
2237 syscallarg(off_t) offset;
2238 } */ *uap = v;
2239 struct proc *p = l->l_proc;
2240 struct filedesc *fdp = p->p_fd;
2241 struct file *fp;
2242 struct vnode *vp;
2243 off_t offset;
2244 int error, fd = SCARG(uap, fd);
2245
2246 if ((fp = fd_getfile(fdp, fd)) == NULL)
2247 return (EBADF);
2248
2249 if ((fp->f_flag & FWRITE) == 0) {
2250 simple_unlock(&fp->f_slock);
2251 return (EBADF);
2252 }
2253
2254 FILE_USE(fp);
2255
2256 vp = (struct vnode *)fp->f_data;
2257 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2258 error = ESPIPE;
2259 goto out;
2260 }
2261
2262 offset = SCARG(uap, offset);
2263
2264 /*
2265 * XXX This works because no file systems actually
2266 * XXX take any action on the seek operation.
2267 */
2268 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2269 goto out;
2270
2271 /* dofilewrite() will unuse the descriptor for us */
2272 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2273 &offset, 0, retval));
2274
2275 out:
2276 FILE_UNUSE(fp, l);
2277 return (error);
2278 }
2279
2280 /*
2281 * Positional gather write system call.
2282 */
2283 int
2284 sys_pwritev(struct lwp *l, void *v, register_t *retval)
2285 {
2286 struct sys_pwritev_args /* {
2287 syscallarg(int) fd;
2288 syscallarg(const struct iovec *) iovp;
2289 syscallarg(int) iovcnt;
2290 syscallarg(off_t) offset;
2291 } */ *uap = v;
2292 struct proc *p = l->l_proc;
2293 struct filedesc *fdp = p->p_fd;
2294 struct file *fp;
2295 struct vnode *vp;
2296 off_t offset;
2297 int error, fd = SCARG(uap, fd);
2298
2299 if ((fp = fd_getfile(fdp, fd)) == NULL)
2300 return (EBADF);
2301
2302 if ((fp->f_flag & FWRITE) == 0) {
2303 simple_unlock(&fp->f_slock);
2304 return (EBADF);
2305 }
2306
2307 FILE_USE(fp);
2308
2309 vp = (struct vnode *)fp->f_data;
2310 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2311 error = ESPIPE;
2312 goto out;
2313 }
2314
2315 offset = SCARG(uap, offset);
2316
2317 /*
2318 * XXX This works because no file systems actually
2319 * XXX take any action on the seek operation.
2320 */
2321 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2322 goto out;
2323
2324 /* dofilewritev() will unuse the descriptor for us */
2325 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2326 &offset, 0, retval));
2327
2328 out:
2329 FILE_UNUSE(fp, l);
2330 return (error);
2331 }
2332
2333 /*
2334 * Check access permissions.
2335 */
2336 int
2337 sys_access(struct lwp *l, void *v, register_t *retval)
2338 {
2339 struct sys_access_args /* {
2340 syscallarg(const char *) path;
2341 syscallarg(int) flags;
2342 } */ *uap = v;
2343 struct proc *p = l->l_proc;
2344 kauth_cred_t cred;
2345 struct vnode *vp;
2346 int error, flags;
2347 struct nameidata nd;
2348
2349 cred = kauth_cred_dup(p->p_cred);
2350 kauth_cred_seteuid(cred, kauth_cred_getuid(p->p_cred));
2351 kauth_cred_setegid(cred, kauth_cred_getgid(p->p_cred));
2352 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2353 SCARG(uap, path), l);
2354 /* Override default credentials */
2355 nd.ni_cnd.cn_cred = cred;
2356 if ((error = namei(&nd)) != 0)
2357 goto out;
2358 vp = nd.ni_vp;
2359
2360 /* Flags == 0 means only check for existence. */
2361 if (SCARG(uap, flags)) {
2362 flags = 0;
2363 if (SCARG(uap, flags) & R_OK)
2364 flags |= VREAD;
2365 if (SCARG(uap, flags) & W_OK)
2366 flags |= VWRITE;
2367 if (SCARG(uap, flags) & X_OK)
2368 flags |= VEXEC;
2369
2370 error = VOP_ACCESS(vp, flags, cred, l);
2371 if (!error && (flags & VWRITE))
2372 error = vn_writechk(vp);
2373 }
2374 vput(vp);
2375 out:
2376 kauth_cred_free(cred);
2377 return (error);
2378 }
2379
2380 /*
2381 * Get file status; this version follows links.
2382 */
2383 /* ARGSUSED */
2384 int
2385 sys___stat30(struct lwp *l, void *v, register_t *retval)
2386 {
2387 struct sys___stat30_args /* {
2388 syscallarg(const char *) path;
2389 syscallarg(struct stat *) ub;
2390 } */ *uap = v;
2391 struct stat sb;
2392 int error;
2393 struct nameidata nd;
2394
2395 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2396 SCARG(uap, path), l);
2397 if ((error = namei(&nd)) != 0)
2398 return (error);
2399 error = vn_stat(nd.ni_vp, &sb, l);
2400 vput(nd.ni_vp);
2401 if (error)
2402 return (error);
2403 error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2404 return (error);
2405 }
2406
2407 /*
2408 * Get file status; this version does not follow links.
2409 */
2410 /* ARGSUSED */
2411 int
2412 sys___lstat30(struct lwp *l, void *v, register_t *retval)
2413 {
2414 struct sys___lstat30_args /* {
2415 syscallarg(const char *) path;
2416 syscallarg(struct stat *) ub;
2417 } */ *uap = v;
2418 struct stat sb;
2419 int error;
2420 struct nameidata nd;
2421
2422 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2423 SCARG(uap, path), l);
2424 if ((error = namei(&nd)) != 0)
2425 return (error);
2426 error = vn_stat(nd.ni_vp, &sb, l);
2427 vput(nd.ni_vp);
2428 if (error)
2429 return (error);
2430 error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2431 return (error);
2432 }
2433
2434 /*
2435 * Get configurable pathname variables.
2436 */
2437 /* ARGSUSED */
2438 int
2439 sys_pathconf(struct lwp *l, void *v, register_t *retval)
2440 {
2441 struct sys_pathconf_args /* {
2442 syscallarg(const char *) path;
2443 syscallarg(int) name;
2444 } */ *uap = v;
2445 int error;
2446 struct nameidata nd;
2447
2448 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2449 SCARG(uap, path), l);
2450 if ((error = namei(&nd)) != 0)
2451 return (error);
2452 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2453 vput(nd.ni_vp);
2454 return (error);
2455 }
2456
2457 /*
2458 * Return target name of a symbolic link.
2459 */
2460 /* ARGSUSED */
2461 int
2462 sys_readlink(struct lwp *l, void *v, register_t *retval)
2463 {
2464 struct sys_readlink_args /* {
2465 syscallarg(const char *) path;
2466 syscallarg(char *) buf;
2467 syscallarg(size_t) count;
2468 } */ *uap = v;
2469 struct proc *p = l->l_proc;
2470 struct vnode *vp;
2471 struct iovec aiov;
2472 struct uio auio;
2473 int error;
2474 struct nameidata nd;
2475
2476 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2477 SCARG(uap, path), l);
2478 if ((error = namei(&nd)) != 0)
2479 return (error);
2480 vp = nd.ni_vp;
2481 if (vp->v_type != VLNK)
2482 error = EINVAL;
2483 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2484 (error = VOP_ACCESS(vp, VREAD, p->p_cred, l)) == 0) {
2485 aiov.iov_base = SCARG(uap, buf);
2486 aiov.iov_len = SCARG(uap, count);
2487 auio.uio_iov = &aiov;
2488 auio.uio_iovcnt = 1;
2489 auio.uio_offset = 0;
2490 auio.uio_rw = UIO_READ;
2491 KASSERT(l == curlwp);
2492 auio.uio_vmspace = l->l_proc->p_vmspace;
2493 auio.uio_resid = SCARG(uap, count);
2494 error = VOP_READLINK(vp, &auio, p->p_cred);
2495 }
2496 vput(vp);
2497 *retval = SCARG(uap, count) - auio.uio_resid;
2498 return (error);
2499 }
2500
2501 /*
2502 * Change flags of a file given a path name.
2503 */
2504 /* ARGSUSED */
2505 int
2506 sys_chflags(struct lwp *l, void *v, register_t *retval)
2507 {
2508 struct sys_chflags_args /* {
2509 syscallarg(const char *) path;
2510 syscallarg(u_long) flags;
2511 } */ *uap = v;
2512 struct vnode *vp;
2513 int error;
2514 struct nameidata nd;
2515
2516 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2517 if ((error = namei(&nd)) != 0)
2518 return (error);
2519 vp = nd.ni_vp;
2520 error = change_flags(vp, SCARG(uap, flags), l);
2521 vput(vp);
2522 return (error);
2523 }
2524
2525 /*
2526 * Change flags of a file given a file descriptor.
2527 */
2528 /* ARGSUSED */
2529 int
2530 sys_fchflags(struct lwp *l, void *v, register_t *retval)
2531 {
2532 struct sys_fchflags_args /* {
2533 syscallarg(int) fd;
2534 syscallarg(u_long) flags;
2535 } */ *uap = v;
2536 struct proc *p = l->l_proc;
2537 struct vnode *vp;
2538 struct file *fp;
2539 int error;
2540
2541 /* getvnode() will use the descriptor for us */
2542 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2543 return (error);
2544 vp = (struct vnode *)fp->f_data;
2545 error = change_flags(vp, SCARG(uap, flags), l);
2546 VOP_UNLOCK(vp, 0);
2547 FILE_UNUSE(fp, l);
2548 return (error);
2549 }
2550
2551 /*
2552 * Change flags of a file given a path name; this version does
2553 * not follow links.
2554 */
2555 int
2556 sys_lchflags(struct lwp *l, void *v, register_t *retval)
2557 {
2558 struct sys_lchflags_args /* {
2559 syscallarg(const char *) path;
2560 syscallarg(u_long) flags;
2561 } */ *uap = v;
2562 struct vnode *vp;
2563 int error;
2564 struct nameidata nd;
2565
2566 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2567 if ((error = namei(&nd)) != 0)
2568 return (error);
2569 vp = nd.ni_vp;
2570 error = change_flags(vp, SCARG(uap, flags), l);
2571 vput(vp);
2572 return (error);
2573 }
2574
2575 /*
2576 * Common routine to change flags of a file.
2577 */
2578 int
2579 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2580 {
2581 struct proc *p = l->l_proc;
2582 struct mount *mp;
2583 struct vattr vattr;
2584 int error;
2585
2586 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2587 return (error);
2588 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
2589 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2590 /*
2591 * Non-superusers cannot change the flags on devices, even if they
2592 * own them.
2593 */
2594 if (kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
2595 &p->p_acflag) != 0) {
2596 if ((error = VOP_GETATTR(vp, &vattr, p->p_cred, l)) != 0)
2597 goto out;
2598 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2599 error = EINVAL;
2600 goto out;
2601 }
2602 }
2603 VATTR_NULL(&vattr);
2604 vattr.va_flags = flags;
2605 error = VOP_SETATTR(vp, &vattr, p->p_cred, l);
2606 out:
2607 vn_finished_write(mp, 0);
2608 return (error);
2609 }
2610
2611 /*
2612 * Change mode of a file given path name; this version follows links.
2613 */
2614 /* ARGSUSED */
2615 int
2616 sys_chmod(struct lwp *l, void *v, register_t *retval)
2617 {
2618 struct sys_chmod_args /* {
2619 syscallarg(const char *) path;
2620 syscallarg(int) mode;
2621 } */ *uap = v;
2622 int error;
2623 struct nameidata nd;
2624
2625 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2626 if ((error = namei(&nd)) != 0)
2627 return (error);
2628
2629 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2630
2631 vrele(nd.ni_vp);
2632 return (error);
2633 }
2634
2635 /*
2636 * Change mode of a file given a file descriptor.
2637 */
2638 /* ARGSUSED */
2639 int
2640 sys_fchmod(struct lwp *l, void *v, register_t *retval)
2641 {
2642 struct sys_fchmod_args /* {
2643 syscallarg(int) fd;
2644 syscallarg(int) mode;
2645 } */ *uap = v;
2646 struct proc *p = l->l_proc;
2647 struct file *fp;
2648 int error;
2649
2650 /* getvnode() will use the descriptor for us */
2651 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2652 return (error);
2653
2654 error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), l);
2655 FILE_UNUSE(fp, l);
2656 return (error);
2657 }
2658
2659 /*
2660 * Change mode of a file given path name; this version does not follow links.
2661 */
2662 /* ARGSUSED */
2663 int
2664 sys_lchmod(struct lwp *l, void *v, register_t *retval)
2665 {
2666 struct sys_lchmod_args /* {
2667 syscallarg(const char *) path;
2668 syscallarg(int) mode;
2669 } */ *uap = v;
2670 int error;
2671 struct nameidata nd;
2672
2673 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2674 if ((error = namei(&nd)) != 0)
2675 return (error);
2676
2677 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2678
2679 vrele(nd.ni_vp);
2680 return (error);
2681 }
2682
2683 /*
2684 * Common routine to set mode given a vnode.
2685 */
2686 static int
2687 change_mode(struct vnode *vp, int mode, struct lwp *l)
2688 {
2689 struct proc *p = l->l_proc;
2690 struct mount *mp;
2691 struct vattr vattr;
2692 int error;
2693
2694 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2695 return (error);
2696 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
2697 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2698 VATTR_NULL(&vattr);
2699 vattr.va_mode = mode & ALLPERMS;
2700 error = VOP_SETATTR(vp, &vattr, p->p_cred, l);
2701 VOP_UNLOCK(vp, 0);
2702 vn_finished_write(mp, 0);
2703 return (error);
2704 }
2705
2706 /*
2707 * Set ownership given a path name; this version follows links.
2708 */
2709 /* ARGSUSED */
2710 int
2711 sys_chown(struct lwp *l, void *v, register_t *retval)
2712 {
2713 struct sys_chown_args /* {
2714 syscallarg(const char *) path;
2715 syscallarg(uid_t) uid;
2716 syscallarg(gid_t) gid;
2717 } */ *uap = v;
2718 int error;
2719 struct nameidata nd;
2720
2721 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2722 if ((error = namei(&nd)) != 0)
2723 return (error);
2724
2725 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2726
2727 vrele(nd.ni_vp);
2728 return (error);
2729 }
2730
2731 /*
2732 * Set ownership given a path name; this version follows links.
2733 * Provides POSIX semantics.
2734 */
2735 /* ARGSUSED */
2736 int
2737 sys___posix_chown(struct lwp *l, void *v, register_t *retval)
2738 {
2739 struct sys_chown_args /* {
2740 syscallarg(const char *) path;
2741 syscallarg(uid_t) uid;
2742 syscallarg(gid_t) gid;
2743 } */ *uap = v;
2744 int error;
2745 struct nameidata nd;
2746
2747 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2748 if ((error = namei(&nd)) != 0)
2749 return (error);
2750
2751 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2752
2753 vrele(nd.ni_vp);
2754 return (error);
2755 }
2756
2757 /*
2758 * Set ownership given a file descriptor.
2759 */
2760 /* ARGSUSED */
2761 int
2762 sys_fchown(struct lwp *l, void *v, register_t *retval)
2763 {
2764 struct sys_fchown_args /* {
2765 syscallarg(int) fd;
2766 syscallarg(uid_t) uid;
2767 syscallarg(gid_t) gid;
2768 } */ *uap = v;
2769 struct proc *p = l->l_proc;
2770 int error;
2771 struct file *fp;
2772
2773 /* getvnode() will use the descriptor for us */
2774 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2775 return (error);
2776
2777 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2778 SCARG(uap, gid), l, 0);
2779 FILE_UNUSE(fp, l);
2780 return (error);
2781 }
2782
2783 /*
2784 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2785 */
2786 /* ARGSUSED */
2787 int
2788 sys___posix_fchown(struct lwp *l, void *v, register_t *retval)
2789 {
2790 struct sys_fchown_args /* {
2791 syscallarg(int) fd;
2792 syscallarg(uid_t) uid;
2793 syscallarg(gid_t) gid;
2794 } */ *uap = v;
2795 struct proc *p = l->l_proc;
2796 int error;
2797 struct file *fp;
2798
2799 /* getvnode() will use the descriptor for us */
2800 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2801 return (error);
2802
2803 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2804 SCARG(uap, gid), l, 1);
2805 FILE_UNUSE(fp, l);
2806 return (error);
2807 }
2808
2809 /*
2810 * Set ownership given a path name; this version does not follow links.
2811 */
2812 /* ARGSUSED */
2813 int
2814 sys_lchown(struct lwp *l, void *v, register_t *retval)
2815 {
2816 struct sys_lchown_args /* {
2817 syscallarg(const char *) path;
2818 syscallarg(uid_t) uid;
2819 syscallarg(gid_t) gid;
2820 } */ *uap = v;
2821 int error;
2822 struct nameidata nd;
2823
2824 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2825 if ((error = namei(&nd)) != 0)
2826 return (error);
2827
2828 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2829
2830 vrele(nd.ni_vp);
2831 return (error);
2832 }
2833
2834 /*
2835 * Set ownership given a path name; this version does not follow links.
2836 * Provides POSIX/XPG semantics.
2837 */
2838 /* ARGSUSED */
2839 int
2840 sys___posix_lchown(struct lwp *l, void *v, register_t *retval)
2841 {
2842 struct sys_lchown_args /* {
2843 syscallarg(const char *) path;
2844 syscallarg(uid_t) uid;
2845 syscallarg(gid_t) gid;
2846 } */ *uap = v;
2847 int error;
2848 struct nameidata nd;
2849
2850 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2851 if ((error = namei(&nd)) != 0)
2852 return (error);
2853
2854 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2855
2856 vrele(nd.ni_vp);
2857 return (error);
2858 }
2859
2860 /*
2861 * Common routine to set ownership given a vnode.
2862 */
2863 static int
2864 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2865 int posix_semantics)
2866 {
2867 struct proc *p = l->l_proc;
2868 struct mount *mp;
2869 struct vattr vattr;
2870 mode_t newmode;
2871 int error;
2872
2873 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2874 return (error);
2875 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
2876 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2877 if ((error = VOP_GETATTR(vp, &vattr, p->p_cred, l)) != 0)
2878 goto out;
2879
2880 #define CHANGED(x) ((int)(x) != -1)
2881 newmode = vattr.va_mode;
2882 if (posix_semantics) {
2883 /*
2884 * POSIX/XPG semantics: if the caller is not the super-user,
2885 * clear set-user-id and set-group-id bits. Both POSIX and
2886 * the XPG consider the behaviour for calls by the super-user
2887 * implementation-defined; we leave the set-user-id and set-
2888 * group-id settings intact in that case.
2889 */
2890 if (kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
2891 NULL) != 0)
2892 newmode &= ~(S_ISUID | S_ISGID);
2893 } else {
2894 /*
2895 * NetBSD semantics: when changing owner and/or group,
2896 * clear the respective bit(s).
2897 */
2898 if (CHANGED(uid))
2899 newmode &= ~S_ISUID;
2900 if (CHANGED(gid))
2901 newmode &= ~S_ISGID;
2902 }
2903 /* Update va_mode iff altered. */
2904 if (vattr.va_mode == newmode)
2905 newmode = VNOVAL;
2906
2907 VATTR_NULL(&vattr);
2908 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2909 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2910 vattr.va_mode = newmode;
2911 error = VOP_SETATTR(vp, &vattr, p->p_cred, l);
2912 #undef CHANGED
2913
2914 out:
2915 VOP_UNLOCK(vp, 0);
2916 vn_finished_write(mp, 0);
2917 return (error);
2918 }
2919
2920 /*
2921 * Set the access and modification times given a path name; this
2922 * version follows links.
2923 */
2924 /* ARGSUSED */
2925 int
2926 sys_utimes(struct lwp *l, void *v, register_t *retval)
2927 {
2928 struct sys_utimes_args /* {
2929 syscallarg(const char *) path;
2930 syscallarg(const struct timeval *) tptr;
2931 } */ *uap = v;
2932 int error;
2933 struct nameidata nd;
2934
2935 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2936 if ((error = namei(&nd)) != 0)
2937 return (error);
2938
2939 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
2940
2941 vrele(nd.ni_vp);
2942 return (error);
2943 }
2944
2945 /*
2946 * Set the access and modification times given a file descriptor.
2947 */
2948 /* ARGSUSED */
2949 int
2950 sys_futimes(struct lwp *l, void *v, register_t *retval)
2951 {
2952 struct sys_futimes_args /* {
2953 syscallarg(int) fd;
2954 syscallarg(const struct timeval *) tptr;
2955 } */ *uap = v;
2956 struct proc *p = l->l_proc;
2957 int error;
2958 struct file *fp;
2959
2960 /* getvnode() will use the descriptor for us */
2961 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2962 return (error);
2963
2964 error = change_utimes((struct vnode *)fp->f_data, SCARG(uap, tptr), l);
2965 FILE_UNUSE(fp, l);
2966 return (error);
2967 }
2968
2969 /*
2970 * Set the access and modification times given a path name; this
2971 * version does not follow links.
2972 */
2973 /* ARGSUSED */
2974 int
2975 sys_lutimes(struct lwp *l, void *v, register_t *retval)
2976 {
2977 struct sys_lutimes_args /* {
2978 syscallarg(const char *) path;
2979 syscallarg(const struct timeval *) tptr;
2980 } */ *uap = v;
2981 int error;
2982 struct nameidata nd;
2983
2984 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2985 if ((error = namei(&nd)) != 0)
2986 return (error);
2987
2988 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
2989
2990 vrele(nd.ni_vp);
2991 return (error);
2992 }
2993
2994 /*
2995 * Common routine to set access and modification times given a vnode.
2996 */
2997 static int
2998 change_utimes(struct vnode *vp, const struct timeval *tptr, struct lwp *l)
2999 {
3000 struct proc *p = l->l_proc;
3001 struct mount *mp;
3002 struct vattr vattr;
3003 int error;
3004
3005 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
3006 return (error);
3007 VATTR_NULL(&vattr);
3008 if (tptr == NULL) {
3009 nanotime(&vattr.va_atime);
3010 vattr.va_mtime = vattr.va_atime;
3011 vattr.va_vaflags |= VA_UTIMES_NULL;
3012 } else {
3013 struct timeval tv[2];
3014
3015 error = copyin(tptr, tv, sizeof(tv));
3016 if (error)
3017 goto out;
3018 TIMEVAL_TO_TIMESPEC(&tv[0], &vattr.va_atime);
3019 TIMEVAL_TO_TIMESPEC(&tv[1], &vattr.va_mtime);
3020 }
3021 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
3022 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3023 error = VOP_SETATTR(vp, &vattr, p->p_cred, l);
3024 VOP_UNLOCK(vp, 0);
3025 out:
3026 vn_finished_write(mp, 0);
3027 return (error);
3028 }
3029
3030 /*
3031 * Truncate a file given its path name.
3032 */
3033 /* ARGSUSED */
3034 int
3035 sys_truncate(struct lwp *l, void *v, register_t *retval)
3036 {
3037 struct sys_truncate_args /* {
3038 syscallarg(const char *) path;
3039 syscallarg(int) pad;
3040 syscallarg(off_t) length;
3041 } */ *uap = v;
3042 struct proc *p = l->l_proc;
3043 struct vnode *vp;
3044 struct mount *mp;
3045 struct vattr vattr;
3046 int error;
3047 struct nameidata nd;
3048
3049 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3050 if ((error = namei(&nd)) != 0)
3051 return (error);
3052 vp = nd.ni_vp;
3053 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3054 vrele(vp);
3055 return (error);
3056 }
3057 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
3058 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3059 if (vp->v_type == VDIR)
3060 error = EISDIR;
3061 else if ((error = vn_writechk(vp)) == 0 &&
3062 (error = VOP_ACCESS(vp, VWRITE, p->p_cred, l)) == 0) {
3063 VATTR_NULL(&vattr);
3064 vattr.va_size = SCARG(uap, length);
3065 error = VOP_SETATTR(vp, &vattr, p->p_cred, l);
3066 }
3067 vput(vp);
3068 vn_finished_write(mp, 0);
3069 return (error);
3070 }
3071
3072 /*
3073 * Truncate a file given a file descriptor.
3074 */
3075 /* ARGSUSED */
3076 int
3077 sys_ftruncate(struct lwp *l, void *v, register_t *retval)
3078 {
3079 struct sys_ftruncate_args /* {
3080 syscallarg(int) fd;
3081 syscallarg(int) pad;
3082 syscallarg(off_t) length;
3083 } */ *uap = v;
3084 struct proc *p = l->l_proc;
3085 struct mount *mp;
3086 struct vattr vattr;
3087 struct vnode *vp;
3088 struct file *fp;
3089 int error;
3090
3091 /* getvnode() will use the descriptor for us */
3092 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3093 return (error);
3094 if ((fp->f_flag & FWRITE) == 0) {
3095 error = EINVAL;
3096 goto out;
3097 }
3098 vp = (struct vnode *)fp->f_data;
3099 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3100 FILE_UNUSE(fp, l);
3101 return (error);
3102 }
3103 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
3104 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3105 if (vp->v_type == VDIR)
3106 error = EISDIR;
3107 else if ((error = vn_writechk(vp)) == 0) {
3108 VATTR_NULL(&vattr);
3109 vattr.va_size = SCARG(uap, length);
3110 error = VOP_SETATTR(vp, &vattr, fp->f_cred, l);
3111 }
3112 VOP_UNLOCK(vp, 0);
3113 vn_finished_write(mp, 0);
3114 out:
3115 FILE_UNUSE(fp, l);
3116 return (error);
3117 }
3118
3119 /*
3120 * Sync an open file.
3121 */
3122 /* ARGSUSED */
3123 int
3124 sys_fsync(struct lwp *l, void *v, register_t *retval)
3125 {
3126 struct sys_fsync_args /* {
3127 syscallarg(int) fd;
3128 } */ *uap = v;
3129 struct proc *p = l->l_proc;
3130 struct vnode *vp;
3131 struct mount *mp;
3132 struct file *fp;
3133 int error;
3134
3135 /* getvnode() will use the descriptor for us */
3136 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3137 return (error);
3138 vp = (struct vnode *)fp->f_data;
3139 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3140 FILE_UNUSE(fp, l);
3141 return (error);
3142 }
3143 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3144 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, l);
3145 if (error == 0 && bioops.io_fsync != NULL &&
3146 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3147 (*bioops.io_fsync)(vp, 0);
3148 VOP_UNLOCK(vp, 0);
3149 vn_finished_write(mp, 0);
3150 FILE_UNUSE(fp, l);
3151 return (error);
3152 }
3153
3154 /*
3155 * Sync a range of file data. API modeled after that found in AIX.
3156 *
3157 * FDATASYNC indicates that we need only save enough metadata to be able
3158 * to re-read the written data. Note we duplicate AIX's requirement that
3159 * the file be open for writing.
3160 */
3161 /* ARGSUSED */
3162 int
3163 sys_fsync_range(struct lwp *l, void *v, register_t *retval)
3164 {
3165 struct sys_fsync_range_args /* {
3166 syscallarg(int) fd;
3167 syscallarg(int) flags;
3168 syscallarg(off_t) start;
3169 syscallarg(off_t) length;
3170 } */ *uap = v;
3171 struct proc *p = l->l_proc;
3172 struct vnode *vp;
3173 struct file *fp;
3174 int flags, nflags;
3175 off_t s, e, len;
3176 int error;
3177
3178 /* getvnode() will use the descriptor for us */
3179 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3180 return (error);
3181
3182 if ((fp->f_flag & FWRITE) == 0) {
3183 FILE_UNUSE(fp, l);
3184 return (EBADF);
3185 }
3186
3187 flags = SCARG(uap, flags);
3188 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3189 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3190 return (EINVAL);
3191 }
3192 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3193 if (flags & FDATASYNC)
3194 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3195 else
3196 nflags = FSYNC_WAIT;
3197 if (flags & FDISKSYNC)
3198 nflags |= FSYNC_CACHE;
3199
3200 len = SCARG(uap, length);
3201 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3202 if (len) {
3203 s = SCARG(uap, start);
3204 e = s + len;
3205 if (e < s) {
3206 FILE_UNUSE(fp, l);
3207 return (EINVAL);
3208 }
3209 } else {
3210 e = 0;
3211 s = 0;
3212 }
3213
3214 vp = (struct vnode *)fp->f_data;
3215 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3216 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, l);
3217
3218 if (error == 0 && bioops.io_fsync != NULL &&
3219 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3220 (*bioops.io_fsync)(vp, nflags);
3221
3222 VOP_UNLOCK(vp, 0);
3223 FILE_UNUSE(fp, l);
3224 return (error);
3225 }
3226
3227 /*
3228 * Sync the data of an open file.
3229 */
3230 /* ARGSUSED */
3231 int
3232 sys_fdatasync(struct lwp *l, void *v, register_t *retval)
3233 {
3234 struct sys_fdatasync_args /* {
3235 syscallarg(int) fd;
3236 } */ *uap = v;
3237 struct proc *p = l->l_proc;
3238 struct vnode *vp;
3239 struct file *fp;
3240 int error;
3241
3242 /* getvnode() will use the descriptor for us */
3243 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3244 return (error);
3245 if ((fp->f_flag & FWRITE) == 0) {
3246 FILE_UNUSE(fp, l);
3247 return (EBADF);
3248 }
3249 vp = (struct vnode *)fp->f_data;
3250 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3251 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, l);
3252 VOP_UNLOCK(vp, 0);
3253 FILE_UNUSE(fp, l);
3254 return (error);
3255 }
3256
3257 /*
3258 * Rename files, (standard) BSD semantics frontend.
3259 */
3260 /* ARGSUSED */
3261 int
3262 sys_rename(struct lwp *l, void *v, register_t *retval)
3263 {
3264 struct sys_rename_args /* {
3265 syscallarg(const char *) from;
3266 syscallarg(const char *) to;
3267 } */ *uap = v;
3268
3269 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 0));
3270 }
3271
3272 /*
3273 * Rename files, POSIX semantics frontend.
3274 */
3275 /* ARGSUSED */
3276 int
3277 sys___posix_rename(struct lwp *l, void *v, register_t *retval)
3278 {
3279 struct sys___posix_rename_args /* {
3280 syscallarg(const char *) from;
3281 syscallarg(const char *) to;
3282 } */ *uap = v;
3283
3284 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 1));
3285 }
3286
3287 /*
3288 * Rename files. Source and destination must either both be directories,
3289 * or both not be directories. If target is a directory, it must be empty.
3290 * If `from' and `to' refer to the same object, the value of the `retain'
3291 * argument is used to determine whether `from' will be
3292 *
3293 * (retain == 0) deleted unless `from' and `to' refer to the same
3294 * object in the file system's name space (BSD).
3295 * (retain == 1) always retained (POSIX).
3296 */
3297 static int
3298 rename_files(const char *from, const char *to, struct lwp *l, int retain)
3299 {
3300 struct mount *mp = NULL;
3301 struct vnode *tvp, *fvp, *tdvp;
3302 struct nameidata fromnd, tond;
3303 struct proc *p;
3304 int error;
3305
3306 NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
3307 from, l);
3308 if ((error = namei(&fromnd)) != 0)
3309 return (error);
3310 fvp = fromnd.ni_vp;
3311 error = vn_start_write(fvp, &mp, V_WAIT | V_PCATCH);
3312 if (error != 0) {
3313 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3314 vrele(fromnd.ni_dvp);
3315 vrele(fvp);
3316 if (fromnd.ni_startdir)
3317 vrele(fromnd.ni_startdir);
3318 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3319 return (error);
3320 }
3321 NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3322 (fvp->v_type == VDIR ? CREATEDIR : 0), UIO_USERSPACE, to, l);
3323 if ((error = namei(&tond)) != 0) {
3324 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3325 vrele(fromnd.ni_dvp);
3326 vrele(fvp);
3327 goto out1;
3328 }
3329 tdvp = tond.ni_dvp;
3330 tvp = tond.ni_vp;
3331
3332 if (tvp != NULL) {
3333 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3334 error = ENOTDIR;
3335 goto out;
3336 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3337 error = EISDIR;
3338 goto out;
3339 }
3340 }
3341
3342 if (fvp == tdvp)
3343 error = EINVAL;
3344
3345 /*
3346 * Source and destination refer to the same object.
3347 */
3348 if (fvp == tvp) {
3349 if (retain)
3350 error = -1;
3351 else if (fromnd.ni_dvp == tdvp &&
3352 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3353 !memcmp(fromnd.ni_cnd.cn_nameptr,
3354 tond.ni_cnd.cn_nameptr,
3355 fromnd.ni_cnd.cn_namelen))
3356 error = -1;
3357 }
3358
3359 #ifdef VERIFIED_EXEC
3360 if (!error)
3361 error = veriexec_renamechk(fvp, fromnd.ni_dirp, tond.ni_dirp, l);
3362 #endif /* VERIFIED_EXEC */
3363
3364 out:
3365 p = l->l_proc;
3366 if (!error) {
3367 VOP_LEASE(tdvp, l, p->p_cred, LEASE_WRITE);
3368 if (fromnd.ni_dvp != tdvp)
3369 VOP_LEASE(fromnd.ni_dvp, l, p->p_cred, LEASE_WRITE);
3370 if (tvp) {
3371 VOP_LEASE(tvp, l, p->p_cred, LEASE_WRITE);
3372 }
3373 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3374 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3375 } else {
3376 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3377 if (tdvp == tvp)
3378 vrele(tdvp);
3379 else
3380 vput(tdvp);
3381 if (tvp)
3382 vput(tvp);
3383 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3384 vrele(fromnd.ni_dvp);
3385 vrele(fvp);
3386 }
3387 vrele(tond.ni_startdir);
3388 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3389 out1:
3390 vn_finished_write(mp, 0);
3391 if (fromnd.ni_startdir)
3392 vrele(fromnd.ni_startdir);
3393 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3394 return (error == -1 ? 0 : error);
3395 }
3396
3397 /*
3398 * Make a directory file.
3399 */
3400 /* ARGSUSED */
3401 int
3402 sys_mkdir(struct lwp *l, void *v, register_t *retval)
3403 {
3404 struct sys_mkdir_args /* {
3405 syscallarg(const char *) path;
3406 syscallarg(int) mode;
3407 } */ *uap = v;
3408 struct proc *p = l->l_proc;
3409 struct mount *mp;
3410 struct vnode *vp;
3411 struct vattr vattr;
3412 int error;
3413 struct nameidata nd;
3414
3415 restart:
3416 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR, UIO_USERSPACE,
3417 SCARG(uap, path), l);
3418 if ((error = namei(&nd)) != 0)
3419 return (error);
3420 vp = nd.ni_vp;
3421 if (vp != NULL) {
3422 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3423 if (nd.ni_dvp == vp)
3424 vrele(nd.ni_dvp);
3425 else
3426 vput(nd.ni_dvp);
3427 vrele(vp);
3428 return (EEXIST);
3429 }
3430 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3431 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3432 if (nd.ni_dvp == vp)
3433 vrele(nd.ni_dvp);
3434 else
3435 vput(nd.ni_dvp);
3436 if ((error = vn_start_write(NULL, &mp,
3437 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
3438 return (error);
3439 goto restart;
3440 }
3441 VATTR_NULL(&vattr);
3442 vattr.va_type = VDIR;
3443 vattr.va_mode =
3444 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3445 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
3446 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3447 if (!error)
3448 vput(nd.ni_vp);
3449 vn_finished_write(mp, 0);
3450 return (error);
3451 }
3452
3453 /*
3454 * Remove a directory file.
3455 */
3456 /* ARGSUSED */
3457 int
3458 sys_rmdir(struct lwp *l, void *v, register_t *retval)
3459 {
3460 struct sys_rmdir_args /* {
3461 syscallarg(const char *) path;
3462 } */ *uap = v;
3463 struct proc *p = l->l_proc;
3464 struct mount *mp;
3465 struct vnode *vp;
3466 int error;
3467 struct nameidata nd;
3468
3469 restart:
3470 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3471 SCARG(uap, path), l);
3472 if ((error = namei(&nd)) != 0)
3473 return (error);
3474 vp = nd.ni_vp;
3475 if (vp->v_type != VDIR) {
3476 error = ENOTDIR;
3477 goto out;
3478 }
3479 /*
3480 * No rmdir "." please.
3481 */
3482 if (nd.ni_dvp == vp) {
3483 error = EINVAL;
3484 goto out;
3485 }
3486 /*
3487 * The root of a mounted filesystem cannot be deleted.
3488 */
3489 if (vp->v_flag & VROOT) {
3490 error = EBUSY;
3491 goto out;
3492 }
3493 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3494 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3495 if (nd.ni_dvp == vp)
3496 vrele(nd.ni_dvp);
3497 else
3498 vput(nd.ni_dvp);
3499 vput(vp);
3500 if ((error = vn_start_write(NULL, &mp,
3501 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
3502 return (error);
3503 goto restart;
3504 }
3505 VOP_LEASE(nd.ni_dvp, l, p->p_cred, LEASE_WRITE);
3506 VOP_LEASE(vp, l, p->p_cred, LEASE_WRITE);
3507 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3508 vn_finished_write(mp, 0);
3509 return (error);
3510
3511 out:
3512 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3513 if (nd.ni_dvp == vp)
3514 vrele(nd.ni_dvp);
3515 else
3516 vput(nd.ni_dvp);
3517 vput(vp);
3518 return (error);
3519 }
3520
3521 /*
3522 * Read a block of directory entries in a file system independent format.
3523 */
3524 int
3525 sys___getdents30(struct lwp *l, void *v, register_t *retval)
3526 {
3527 struct sys___getdents30_args /* {
3528 syscallarg(int) fd;
3529 syscallarg(char *) buf;
3530 syscallarg(size_t) count;
3531 } */ *uap = v;
3532 struct proc *p = l->l_proc;
3533 struct file *fp;
3534 int error, done;
3535
3536 /* getvnode() will use the descriptor for us */
3537 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3538 return (error);
3539 if ((fp->f_flag & FREAD) == 0) {
3540 error = EBADF;
3541 goto out;
3542 }
3543 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3544 SCARG(uap, count), &done, l, 0, 0);
3545 #ifdef KTRACE
3546 if (!error && KTRPOINT(p, KTR_GENIO)) {
3547 struct iovec iov;
3548 iov.iov_base = SCARG(uap, buf);
3549 iov.iov_len = done;
3550 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, done, 0);
3551 }
3552 #endif
3553 *retval = done;
3554 out:
3555 FILE_UNUSE(fp, l);
3556 return (error);
3557 }
3558
3559 /*
3560 * Set the mode mask for creation of filesystem nodes.
3561 */
3562 int
3563 sys_umask(struct lwp *l, void *v, register_t *retval)
3564 {
3565 struct sys_umask_args /* {
3566 syscallarg(mode_t) newmask;
3567 } */ *uap = v;
3568 struct proc *p = l->l_proc;
3569 struct cwdinfo *cwdi;
3570
3571 cwdi = p->p_cwdi;
3572 *retval = cwdi->cwdi_cmask;
3573 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3574 return (0);
3575 }
3576
3577 /*
3578 * Void all references to file by ripping underlying filesystem
3579 * away from vnode.
3580 */
3581 /* ARGSUSED */
3582 int
3583 sys_revoke(struct lwp *l, void *v, register_t *retval)
3584 {
3585 struct sys_revoke_args /* {
3586 syscallarg(const char *) path;
3587 } */ *uap = v;
3588 struct proc *p = l->l_proc;
3589 struct mount *mp;
3590 struct vnode *vp;
3591 struct vattr vattr;
3592 int error;
3593 struct nameidata nd;
3594
3595 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3596 if ((error = namei(&nd)) != 0)
3597 return (error);
3598 vp = nd.ni_vp;
3599 if ((error = VOP_GETATTR(vp, &vattr, p->p_cred, l)) != 0)
3600 goto out;
3601 if (kauth_cred_geteuid(p->p_cred) != vattr.va_uid &&
3602 (error = kauth_authorize_generic(p->p_cred, KAUTH_GENERIC_ISSUSER,
3603 &p->p_acflag)) != 0)
3604 goto out;
3605 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
3606 goto out;
3607 if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER)))
3608 VOP_REVOKE(vp, REVOKEALL);
3609 vn_finished_write(mp, 0);
3610 out:
3611 vrele(vp);
3612 return (error);
3613 }
3614
3615 /*
3616 * Convert a user file descriptor to a kernel file entry.
3617 */
3618 int
3619 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
3620 {
3621 struct vnode *vp;
3622 struct file *fp;
3623
3624 if ((fp = fd_getfile(fdp, fd)) == NULL)
3625 return (EBADF);
3626
3627 FILE_USE(fp);
3628
3629 if (fp->f_type != DTYPE_VNODE) {
3630 FILE_UNUSE(fp, NULL);
3631 return (EINVAL);
3632 }
3633
3634 vp = (struct vnode *)fp->f_data;
3635 if (vp->v_type == VBAD) {
3636 FILE_UNUSE(fp, NULL);
3637 return (EBADF);
3638 }
3639
3640 *fpp = fp;
3641 return (0);
3642 }
3643