vfs_syscalls.c revision 1.278 1 /* $NetBSD: vfs_syscalls.c,v 1.278 2006/11/21 23:52:41 elad Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.278 2006/11/21 23:52:41 elad Exp $");
41
42 #include "opt_compat_netbsd.h"
43 #include "opt_compat_43.h"
44 #include "opt_fileassoc.h"
45 #include "opt_ktrace.h"
46 #include "fss.h"
47 #include "veriexec.h"
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/namei.h>
52 #include <sys/filedesc.h>
53 #include <sys/kernel.h>
54 #include <sys/file.h>
55 #include <sys/stat.h>
56 #include <sys/vnode.h>
57 #include <sys/mount.h>
58 #include <sys/proc.h>
59 #include <sys/uio.h>
60 #include <sys/malloc.h>
61 #include <sys/kmem.h>
62 #include <sys/dirent.h>
63 #include <sys/sysctl.h>
64 #include <sys/sa.h>
65 #include <sys/syscallargs.h>
66 #ifdef KTRACE
67 #include <sys/ktrace.h>
68 #endif
69 #ifdef FILEASSOC
70 #include <sys/fileassoc.h>
71 #endif /* FILEASSOC */
72 #if NVERIEXEC > 0
73 #include <sys/verified_exec.h>
74 #include <sys/syslog.h>
75 #endif /* NVERIEXEC > 0 */
76 #include <sys/kauth.h>
77
78 #include <miscfs/genfs/genfs.h>
79 #include <miscfs/syncfs/syncfs.h>
80
81 #ifdef COMPAT_30
82 #include "opt_nfsserver.h"
83 #include <nfs/rpcv2.h>
84 #endif
85 #include <nfs/nfsproto.h>
86 #ifdef COMPAT_30
87 #include <nfs/nfs.h>
88 #include <nfs/nfs_var.h>
89 #endif
90
91 #if NFSS > 0
92 #include <dev/fssvar.h>
93 #endif
94
95 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount struct");
96
97 static int change_dir(struct nameidata *, struct lwp *);
98 static int change_flags(struct vnode *, u_long, struct lwp *);
99 static int change_mode(struct vnode *, int, struct lwp *l);
100 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
101 static int change_utimes(struct vnode *vp, const struct timeval *,
102 struct lwp *l);
103 static int rename_files(const char *, const char *, struct lwp *, int);
104
105 void checkdirs(struct vnode *);
106
107 int dovfsusermount = 0;
108
109 /*
110 * Virtual File System System Calls
111 */
112
113 /*
114 * Mount a file system.
115 */
116
117 #if defined(COMPAT_09) || defined(COMPAT_43)
118 /*
119 * This table is used to maintain compatibility with 4.3BSD
120 * and NetBSD 0.9 mount syscalls. Note, the order is important!
121 *
122 * Do not modify this table. It should only contain filesystems
123 * supported by NetBSD 0.9 and 4.3BSD.
124 */
125 const char * const mountcompatnames[] = {
126 NULL, /* 0 = MOUNT_NONE */
127 MOUNT_FFS, /* 1 = MOUNT_UFS */
128 MOUNT_NFS, /* 2 */
129 MOUNT_MFS, /* 3 */
130 MOUNT_MSDOS, /* 4 */
131 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
132 MOUNT_FDESC, /* 6 */
133 MOUNT_KERNFS, /* 7 */
134 NULL, /* 8 = MOUNT_DEVFS */
135 MOUNT_AFS, /* 9 */
136 };
137 const int nmountcompatnames = sizeof(mountcompatnames) /
138 sizeof(mountcompatnames[0]);
139 #endif /* COMPAT_09 || COMPAT_43 */
140
141 /* ARGSUSED */
142 int
143 sys_mount(struct lwp *l, void *v, register_t *retval)
144 {
145 struct sys_mount_args /* {
146 syscallarg(const char *) type;
147 syscallarg(const char *) path;
148 syscallarg(int) flags;
149 syscallarg(void *) data;
150 } */ *uap = v;
151 struct vnode *vp;
152 struct mount *mp;
153 int error, flag = 0;
154 char fstypename[MFSNAMELEN];
155 struct vattr va;
156 struct nameidata nd;
157 struct vfsops *vfs;
158
159 /*
160 * if MNT_GETARGS is specified, it should be only flag.
161 */
162
163 if ((SCARG(uap, flags) & MNT_GETARGS) != 0 &&
164 (SCARG(uap, flags) & ~MNT_GETARGS) != 0) {
165 return EINVAL;
166 }
167
168 if (dovfsusermount == 0 && (SCARG(uap, flags) & MNT_GETARGS) == 0 &&
169 (error = kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
170 &l->l_acflag)))
171 return (error);
172 /*
173 * Get vnode to be covered
174 */
175 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
176 SCARG(uap, path), l);
177 if ((error = namei(&nd)) != 0)
178 return (error);
179 vp = nd.ni_vp;
180 /*
181 * A lookup in VFS_MOUNT might result in an attempt to
182 * lock this vnode again, so make the lock recursive.
183 */
184 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_SETRECURSE);
185 if (SCARG(uap, flags) & (MNT_UPDATE | MNT_GETARGS)) {
186 if ((vp->v_flag & VROOT) == 0) {
187 vput(vp);
188 return (EINVAL);
189 }
190 mp = vp->v_mount;
191 flag = mp->mnt_flag;
192 vfs = mp->mnt_op;
193 /*
194 * We only allow the filesystem to be reloaded if it
195 * is currently mounted read-only.
196 */
197 if ((SCARG(uap, flags) & MNT_RELOAD) &&
198 ((mp->mnt_flag & MNT_RDONLY) == 0)) {
199 vput(vp);
200 return (EOPNOTSUPP); /* Needs translation */
201 }
202 /*
203 * In "highly secure" mode, don't let the caller do anything
204 * but downgrade a filesystem from read-write to read-only.
205 * (see also below; MNT_UPDATE or MNT_GETARGS is required.)
206 */
207 if (securelevel >= 2 &&
208 SCARG(uap, flags) != MNT_GETARGS &&
209 SCARG(uap, flags) !=
210 (mp->mnt_flag | MNT_RDONLY |
211 MNT_RELOAD | MNT_FORCE | MNT_UPDATE)) {
212 vput(vp);
213 return (EPERM);
214 }
215 mp->mnt_flag |= SCARG(uap, flags) &
216 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
217 /*
218 * Only root, or the user that did the original mount is
219 * permitted to update it.
220 */
221 if ((mp->mnt_flag & MNT_GETARGS) == 0 &&
222 mp->mnt_stat.f_owner != kauth_cred_geteuid(l->l_cred) &&
223 (error = kauth_authorize_generic(l->l_cred,
224 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0) {
225 vput(vp);
226 return (error);
227 }
228 /*
229 * Do not allow NFS export by non-root users. For non-root
230 * users, silently enforce MNT_NOSUID and MNT_NODEV, and
231 * MNT_NOEXEC if mount point is already MNT_NOEXEC.
232 */
233 if (kauth_cred_geteuid(l->l_cred) != 0) {
234 if (SCARG(uap, flags) & MNT_EXPORTED) {
235 vput(vp);
236 return (EPERM);
237 }
238 SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
239 if (flag & MNT_NOEXEC)
240 SCARG(uap, flags) |= MNT_NOEXEC;
241 }
242 if (vfs_busy(mp, LK_NOWAIT, 0)) {
243 vput(vp);
244 return (EPERM);
245 }
246 goto update;
247 } else {
248 if (securelevel >= 2) {
249 vput(vp);
250 return (EPERM);
251 }
252 }
253 /*
254 * If the user is not root, ensure that they own the directory
255 * onto which we are attempting to mount.
256 */
257 if ((error = VOP_GETATTR(vp, &va, l->l_cred, l)) != 0 ||
258 (va.va_uid != kauth_cred_geteuid(l->l_cred) &&
259 (error = kauth_authorize_generic(l->l_cred,
260 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0)) {
261 vput(vp);
262 return (error);
263 }
264 /*
265 * Do not allow NFS export by non-root users. For non-root users,
266 * silently enforce MNT_NOSUID and MNT_NODEV, and MNT_NOEXEC if the
267 * mount point is already MNT_NOEXEC.
268 */
269 if (kauth_cred_geteuid(l->l_cred) != 0) {
270 if (SCARG(uap, flags) & MNT_EXPORTED) {
271 vput(vp);
272 return (EPERM);
273 }
274 SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
275 if (vp->v_mount->mnt_flag & MNT_NOEXEC)
276 SCARG(uap, flags) |= MNT_NOEXEC;
277 }
278 if ((error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0)) != 0) {
279 vput(vp);
280 return (error);
281 }
282 if (vp->v_type != VDIR) {
283 vput(vp);
284 return (ENOTDIR);
285 }
286 error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL);
287 if (error) {
288 #if defined(COMPAT_09) || defined(COMPAT_43)
289 /*
290 * Historically, filesystem types were identified by numbers.
291 * If we get an integer for the filesystem type instead of a
292 * string, we check to see if it matches one of the historic
293 * filesystem types.
294 */
295 u_long fsindex = (u_long)SCARG(uap, type);
296 if (fsindex >= nmountcompatnames ||
297 mountcompatnames[fsindex] == NULL) {
298 vput(vp);
299 return (ENODEV);
300 }
301 strncpy(fstypename, mountcompatnames[fsindex], MFSNAMELEN);
302 #else
303 vput(vp);
304 return (error);
305 #endif
306 }
307 #ifdef COMPAT_10
308 /* Accept `ufs' as an alias for `ffs'. */
309 if (!strncmp(fstypename, "ufs", MFSNAMELEN))
310 strncpy(fstypename, "ffs", MFSNAMELEN);
311 #endif
312 if ((vfs = vfs_getopsbyname(fstypename)) == NULL) {
313 vput(vp);
314 return (ENODEV);
315 }
316 if (vp->v_mountedhere != NULL) {
317 vput(vp);
318 return (EBUSY);
319 }
320
321 /*
322 * Allocate and initialize the file system.
323 */
324 mp = (struct mount *)malloc((u_long)sizeof(struct mount),
325 M_MOUNT, M_WAITOK);
326 memset((char *)mp, 0, (u_long)sizeof(struct mount));
327 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
328 simple_lock_init(&mp->mnt_slock);
329 (void)vfs_busy(mp, LK_NOWAIT, 0);
330 mp->mnt_op = vfs;
331 vfs->vfs_refcount++;
332 mp->mnt_vnodecovered = vp;
333 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
334 mp->mnt_unmounter = NULL;
335 mp->mnt_leaf = mp;
336 mount_initspecific(mp);
337
338 /*
339 * The underlying file system may refuse the mount for
340 * various reasons. Allow the user to force it to happen.
341 */
342 mp->mnt_flag |= SCARG(uap, flags) & MNT_FORCE;
343 update:
344 if ((SCARG(uap, flags) & MNT_GETARGS) == 0) {
345 /*
346 * Set the mount level flags.
347 */
348 if (SCARG(uap, flags) & MNT_RDONLY)
349 mp->mnt_flag |= MNT_RDONLY;
350 else if (mp->mnt_flag & MNT_RDONLY)
351 mp->mnt_iflag |= IMNT_WANTRDWR;
352 mp->mnt_flag &=
353 ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
354 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
355 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP);
356 mp->mnt_flag |= SCARG(uap, flags) &
357 (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
358 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP |
359 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP |
360 MNT_IGNORE);
361 }
362 /*
363 * Mount the filesystem.
364 */
365 error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, l);
366 if (mp->mnt_flag & (MNT_UPDATE | MNT_GETARGS)) {
367 #if defined(COMPAT_30) && defined(NFSSERVER)
368 if (mp->mnt_flag & MNT_UPDATE && error != 0) {
369 int error2;
370
371 /* Update failed; let's try and see if it was an
372 * export request. */
373 error2 = nfs_update_exports_30(mp, SCARG(uap, path),
374 SCARG(uap, data), l);
375
376 /* Only update error code if the export request was
377 * understood but some problem occurred while
378 * processing it. */
379 if (error2 != EJUSTRETURN)
380 error = error2;
381 }
382 #endif
383 if (mp->mnt_iflag & IMNT_WANTRDWR)
384 mp->mnt_flag &= ~MNT_RDONLY;
385 if (error)
386 mp->mnt_flag = flag;
387 mp->mnt_flag &=~
388 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
389 mp->mnt_iflag &=~ IMNT_WANTRDWR;
390 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
391 if (mp->mnt_syncer == NULL)
392 error = vfs_allocate_syncvnode(mp);
393 } else {
394 if (mp->mnt_syncer != NULL)
395 vfs_deallocate_syncvnode(mp);
396 }
397 vfs_unbusy(mp);
398 VOP_UNLOCK(vp, 0);
399 vrele(vp);
400 return (error);
401 }
402 /*
403 * Put the new filesystem on the mount list after root.
404 */
405 cache_purge(vp);
406 if (!error) {
407 mp->mnt_flag &=~
408 (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_GETARGS);
409 mp->mnt_iflag &=~ IMNT_WANTRDWR;
410 vp->v_mountedhere = mp;
411 simple_lock(&mountlist_slock);
412 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
413 simple_unlock(&mountlist_slock);
414 checkdirs(vp);
415 VOP_UNLOCK(vp, 0);
416 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
417 error = vfs_allocate_syncvnode(mp);
418 vfs_unbusy(mp);
419 (void) VFS_STATVFS(mp, &mp->mnt_stat, l);
420 if ((error = VFS_START(mp, 0, l)))
421 vrele(vp);
422 } else {
423 vp->v_mountedhere = (struct mount *)0;
424 vfs->vfs_refcount--;
425 vfs_unbusy(mp);
426 free(mp, M_MOUNT);
427 vput(vp);
428 }
429 return (error);
430 }
431
432 /*
433 * Scan all active processes to see if any of them have a current
434 * or root directory onto which the new filesystem has just been
435 * mounted. If so, replace them with the new mount point.
436 */
437 void
438 checkdirs(struct vnode *olddp)
439 {
440 struct cwdinfo *cwdi;
441 struct vnode *newdp;
442 struct proc *p;
443
444 if (olddp->v_usecount == 1)
445 return;
446 if (VFS_ROOT(olddp->v_mountedhere, &newdp))
447 panic("mount: lost mount");
448 proclist_lock_read();
449 PROCLIST_FOREACH(p, &allproc) {
450 cwdi = p->p_cwdi;
451 if (!cwdi)
452 continue;
453 if (cwdi->cwdi_cdir == olddp) {
454 vrele(cwdi->cwdi_cdir);
455 VREF(newdp);
456 cwdi->cwdi_cdir = newdp;
457 }
458 if (cwdi->cwdi_rdir == olddp) {
459 vrele(cwdi->cwdi_rdir);
460 VREF(newdp);
461 cwdi->cwdi_rdir = newdp;
462 }
463 }
464 proclist_unlock_read();
465 if (rootvnode == olddp) {
466 vrele(rootvnode);
467 VREF(newdp);
468 rootvnode = newdp;
469 }
470 vput(newdp);
471 }
472
473 /*
474 * Unmount a file system.
475 *
476 * Note: unmount takes a path to the vnode mounted on as argument,
477 * not special file (as before).
478 */
479 /* ARGSUSED */
480 int
481 sys_unmount(struct lwp *l, void *v, register_t *retval)
482 {
483 struct sys_unmount_args /* {
484 syscallarg(const char *) path;
485 syscallarg(int) flags;
486 } */ *uap = v;
487 struct vnode *vp;
488 struct mount *mp;
489 int error;
490 struct nameidata nd;
491
492 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
493 SCARG(uap, path), l);
494 if ((error = namei(&nd)) != 0)
495 return (error);
496 vp = nd.ni_vp;
497 mp = vp->v_mount;
498
499 /*
500 * Only root, or the user that did the original mount is
501 * permitted to unmount this filesystem.
502 */
503 if ((mp->mnt_stat.f_owner != kauth_cred_geteuid(l->l_cred)) &&
504 (error = kauth_authorize_generic(l->l_cred,
505 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0) {
506 vput(vp);
507 return (error);
508 }
509
510 /*
511 * Don't allow unmounting the root file system.
512 */
513 if (mp->mnt_flag & MNT_ROOTFS) {
514 vput(vp);
515 return (EINVAL);
516 }
517
518 /*
519 * Must be the root of the filesystem
520 */
521 if ((vp->v_flag & VROOT) == 0) {
522 vput(vp);
523 return (EINVAL);
524 }
525 vput(vp);
526
527 /*
528 * XXX Freeze syncer. Must do this before locking the
529 * mount point. See dounmount() for details.
530 */
531 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
532
533 if (vfs_busy(mp, 0, 0)) {
534 lockmgr(&syncer_lock, LK_RELEASE, NULL);
535 return (EBUSY);
536 }
537
538 return (dounmount(mp, SCARG(uap, flags), l));
539 }
540
541 /*
542 * Do the actual file system unmount. File system is assumed to have been
543 * marked busy by the caller.
544 */
545 int
546 dounmount(struct mount *mp, int flags, struct lwp *l)
547 {
548 struct vnode *coveredvp;
549 int error;
550 int async;
551 int used_syncer;
552
553 #if NVERIEXEC > 0
554 if (!doing_shutdown) {
555 if (veriexec_strict >= VERIEXEC_LOCKDOWN) {
556 log(LOG_ALERT, "Veriexec: Lockdown mode, "
557 "preventing unmount of \"%s\". (uid=%u)\n",
558 mp->mnt_stat.f_mntonname,
559 kauth_cred_getuid(l->l_cred));
560 return (EPERM);
561 }
562
563 if (veriexec_strict == VERIEXEC_IPS) {
564 struct veriexec_table_entry *vte;
565
566 /* Check if we have fingerprints on mount. */
567 vte = fileassoc_tabledata_lookup(mp, veriexec_hook);
568 if ((vte != NULL) && (vte->vte_count > 0)) {
569 log(LOG_ALERT, "Veriexec: IPS mode, preventing"
570 " unmount of \"%s\" with monitored files. "
571 "(uid=%u)\n", mp->mnt_stat.f_mntonname,
572 kauth_cred_getuid(l->l_cred));
573 return (EPERM);
574 }
575 }
576 }
577 #endif /* NVERIEXEC > 0 */
578
579 #ifdef FILEASSOC
580 (void)fileassoc_table_delete(mp);
581 #endif /* FILEASSOC */
582
583 simple_lock(&mountlist_slock);
584 vfs_unbusy(mp);
585 used_syncer = (mp->mnt_syncer != NULL);
586
587 /*
588 * XXX Syncer must be frozen when we get here. This should really
589 * be done on a per-mountpoint basis, but especially the softdep
590 * code possibly called from the syncer doesn't exactly work on a
591 * per-mountpoint basis, so the softdep code would become a maze
592 * of vfs_busy() calls.
593 *
594 * The caller of dounmount() must acquire syncer_lock because
595 * the syncer itself acquires locks in syncer_lock -> vfs_busy
596 * order, and we must preserve that order to avoid deadlock.
597 *
598 * So, if the file system did not use the syncer, now is
599 * the time to release the syncer_lock.
600 */
601 if (used_syncer == 0)
602 lockmgr(&syncer_lock, LK_RELEASE, NULL);
603
604 mp->mnt_iflag |= IMNT_UNMOUNT;
605 mp->mnt_unmounter = l;
606 lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock);
607 vn_start_write(NULL, &mp, V_WAIT);
608
609 async = mp->mnt_flag & MNT_ASYNC;
610 mp->mnt_flag &= ~MNT_ASYNC;
611 cache_purgevfs(mp); /* remove cache entries for this file sys */
612 if (mp->mnt_syncer != NULL)
613 vfs_deallocate_syncvnode(mp);
614 error = 0;
615 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
616 #if NFSS > 0
617 error = fss_umount_hook(mp, (flags & MNT_FORCE));
618 #endif
619 if (error == 0)
620 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred, l);
621 }
622 if (error == 0 || (flags & MNT_FORCE))
623 error = VFS_UNMOUNT(mp, flags, l);
624 vn_finished_write(mp, 0);
625 simple_lock(&mountlist_slock);
626 if (error) {
627 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
628 (void) vfs_allocate_syncvnode(mp);
629 mp->mnt_iflag &= ~IMNT_UNMOUNT;
630 mp->mnt_unmounter = NULL;
631 mp->mnt_flag |= async;
632 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
633 &mountlist_slock);
634 if (used_syncer)
635 lockmgr(&syncer_lock, LK_RELEASE, NULL);
636 simple_lock(&mp->mnt_slock);
637 while (mp->mnt_wcnt > 0) {
638 wakeup(mp);
639 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt1",
640 0, &mp->mnt_slock);
641 }
642 simple_unlock(&mp->mnt_slock);
643 return (error);
644 }
645 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
646 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
647 coveredvp->v_mountedhere = NULL;
648 vrele(coveredvp);
649 }
650 mp->mnt_op->vfs_refcount--;
651 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
652 panic("unmount: dangling vnode");
653 mp->mnt_iflag |= IMNT_GONE;
654 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock);
655 mount_finispecific(mp);
656 if (used_syncer)
657 lockmgr(&syncer_lock, LK_RELEASE, NULL);
658 simple_lock(&mp->mnt_slock);
659 while (mp->mnt_wcnt > 0) {
660 wakeup(mp);
661 ltsleep(&mp->mnt_wcnt, PVFS, "mntwcnt2", 0, &mp->mnt_slock);
662 }
663 simple_unlock(&mp->mnt_slock);
664 vfs_hooks_unmount(mp);
665 free(mp, M_MOUNT);
666 return (0);
667 }
668
669 /*
670 * Sync each mounted filesystem.
671 */
672 #ifdef DEBUG
673 int syncprt = 0;
674 struct ctldebug debug0 = { "syncprt", &syncprt };
675 #endif
676
677 /* ARGSUSED */
678 int
679 sys_sync(struct lwp *l, void *v, register_t *retval)
680 {
681 struct mount *mp, *nmp;
682 int asyncflag;
683
684 if (l == NULL)
685 l = &lwp0;
686
687 simple_lock(&mountlist_slock);
688 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
689 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
690 nmp = mp->mnt_list.cqe_prev;
691 continue;
692 }
693 if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
694 vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
695 asyncflag = mp->mnt_flag & MNT_ASYNC;
696 mp->mnt_flag &= ~MNT_ASYNC;
697 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred, l);
698 if (asyncflag)
699 mp->mnt_flag |= MNT_ASYNC;
700 vn_finished_write(mp, 0);
701 }
702 simple_lock(&mountlist_slock);
703 nmp = mp->mnt_list.cqe_prev;
704 vfs_unbusy(mp);
705
706 }
707 simple_unlock(&mountlist_slock);
708 #ifdef DEBUG
709 if (syncprt)
710 vfs_bufstats();
711 #endif /* DEBUG */
712 return (0);
713 }
714
715 /*
716 * Change filesystem quotas.
717 */
718 /* ARGSUSED */
719 int
720 sys_quotactl(struct lwp *l, void *v, register_t *retval)
721 {
722 struct sys_quotactl_args /* {
723 syscallarg(const char *) path;
724 syscallarg(int) cmd;
725 syscallarg(int) uid;
726 syscallarg(void *) arg;
727 } */ *uap = v;
728 struct mount *mp;
729 int error;
730 struct nameidata nd;
731
732 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
733 if ((error = namei(&nd)) != 0)
734 return (error);
735 error = vn_start_write(nd.ni_vp, &mp, V_WAIT | V_PCATCH);
736 vrele(nd.ni_vp);
737 if (error)
738 return (error);
739 error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
740 SCARG(uap, arg), l);
741 vn_finished_write(mp, 0);
742 return (error);
743 }
744
745 int
746 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
747 int root)
748 {
749 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
750 int error = 0;
751
752 /*
753 * If MNT_NOWAIT or MNT_LAZY is specified, do not
754 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
755 * overrides MNT_NOWAIT.
756 */
757 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
758 (flags != MNT_WAIT && flags != 0)) {
759 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
760 goto done;
761 }
762
763 /* Get the filesystem stats now */
764 memset(sp, 0, sizeof(*sp));
765 if ((error = VFS_STATVFS(mp, sp, l)) != 0) {
766 return error;
767 }
768
769 if (cwdi->cwdi_rdir == NULL)
770 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
771 done:
772 if (cwdi->cwdi_rdir != NULL) {
773 size_t len;
774 char *bp;
775 char *path = PNBUF_GET();
776 if (!path)
777 return ENOMEM;
778
779 bp = path + MAXPATHLEN;
780 *--bp = '\0';
781 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
782 MAXPATHLEN / 2, 0, l);
783 if (error) {
784 PNBUF_PUT(path);
785 return error;
786 }
787 len = strlen(bp);
788 /*
789 * for mount points that are below our root, we can see
790 * them, so we fix up the pathname and return them. The
791 * rest we cannot see, so we don't allow viewing the
792 * data.
793 */
794 if (strncmp(bp, sp->f_mntonname, len) == 0) {
795 strlcpy(sp->f_mntonname, &sp->f_mntonname[len],
796 sizeof(sp->f_mntonname));
797 if (sp->f_mntonname[0] == '\0')
798 (void)strlcpy(sp->f_mntonname, "/",
799 sizeof(sp->f_mntonname));
800 } else {
801 if (root)
802 (void)strlcpy(sp->f_mntonname, "/",
803 sizeof(sp->f_mntonname));
804 else
805 error = EPERM;
806 }
807 PNBUF_PUT(path);
808 }
809 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
810 return error;
811 }
812
813 /*
814 * Get filesystem statistics.
815 */
816 /* ARGSUSED */
817 int
818 sys_statvfs1(struct lwp *l, void *v, register_t *retval)
819 {
820 struct sys_statvfs1_args /* {
821 syscallarg(const char *) path;
822 syscallarg(struct statvfs *) buf;
823 syscallarg(int) flags;
824 } */ *uap = v;
825 struct mount *mp;
826 struct statvfs *sb;
827 int error;
828 struct nameidata nd;
829
830 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
831 if ((error = namei(&nd)) != 0)
832 return error;
833 mp = nd.ni_vp->v_mount;
834 vrele(nd.ni_vp);
835 sb = STATVFSBUF_GET();
836 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1);
837 if (error == 0) {
838 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
839 }
840 STATVFSBUF_PUT(sb);
841 return error;
842 }
843
844 /*
845 * Get filesystem statistics.
846 */
847 /* ARGSUSED */
848 int
849 sys_fstatvfs1(struct lwp *l, void *v, register_t *retval)
850 {
851 struct sys_fstatvfs1_args /* {
852 syscallarg(int) fd;
853 syscallarg(struct statvfs *) buf;
854 syscallarg(int) flags;
855 } */ *uap = v;
856 struct proc *p = l->l_proc;
857 struct file *fp;
858 struct mount *mp;
859 struct statvfs *sb;
860 int error;
861
862 /* getvnode() will use the descriptor for us */
863 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
864 return (error);
865 mp = ((struct vnode *)fp->f_data)->v_mount;
866 sb = STATVFSBUF_GET();
867 if ((error = dostatvfs(mp, sb, l, SCARG(uap, flags), 1)) != 0)
868 goto out;
869 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
870 out:
871 FILE_UNUSE(fp, l);
872 STATVFSBUF_PUT(sb);
873 return error;
874 }
875
876
877 /*
878 * Get statistics on all filesystems.
879 */
880 int
881 sys_getvfsstat(struct lwp *l, void *v, register_t *retval)
882 {
883 struct sys_getvfsstat_args /* {
884 syscallarg(struct statvfs *) buf;
885 syscallarg(size_t) bufsize;
886 syscallarg(int) flags;
887 } */ *uap = v;
888 int root = 0;
889 struct proc *p = l->l_proc;
890 struct mount *mp, *nmp;
891 struct statvfs *sb;
892 struct statvfs *sfsp;
893 size_t count, maxcount;
894 int error = 0;
895
896 sb = STATVFSBUF_GET();
897 maxcount = SCARG(uap, bufsize) / sizeof(struct statvfs);
898 sfsp = SCARG(uap, buf);
899 simple_lock(&mountlist_slock);
900 count = 0;
901 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
902 mp = nmp) {
903 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
904 nmp = CIRCLEQ_NEXT(mp, mnt_list);
905 continue;
906 }
907 if (sfsp && count < maxcount) {
908 error = dostatvfs(mp, sb, l, SCARG(uap, flags), 0);
909 if (error) {
910 simple_lock(&mountlist_slock);
911 nmp = CIRCLEQ_NEXT(mp, mnt_list);
912 vfs_unbusy(mp);
913 continue;
914 }
915 error = copyout(sb, sfsp, sizeof(*sfsp));
916 if (error) {
917 vfs_unbusy(mp);
918 goto out;
919 }
920 sfsp++;
921 root |= strcmp(sb->f_mntonname, "/") == 0;
922 }
923 count++;
924 simple_lock(&mountlist_slock);
925 nmp = CIRCLEQ_NEXT(mp, mnt_list);
926 vfs_unbusy(mp);
927 }
928 simple_unlock(&mountlist_slock);
929 if (root == 0 && p->p_cwdi->cwdi_rdir) {
930 /*
931 * fake a root entry
932 */
933 if ((error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, sb, l,
934 SCARG(uap, flags), 1)) != 0)
935 goto out;
936 if (sfsp)
937 error = copyout(sb, sfsp, sizeof(*sfsp));
938 count++;
939 }
940 if (sfsp && count > maxcount)
941 *retval = maxcount;
942 else
943 *retval = count;
944 out:
945 STATVFSBUF_PUT(sb);
946 return error;
947 }
948
949 /*
950 * Change current working directory to a given file descriptor.
951 */
952 /* ARGSUSED */
953 int
954 sys_fchdir(struct lwp *l, void *v, register_t *retval)
955 {
956 struct sys_fchdir_args /* {
957 syscallarg(int) fd;
958 } */ *uap = v;
959 struct proc *p = l->l_proc;
960 struct filedesc *fdp = p->p_fd;
961 struct cwdinfo *cwdi = p->p_cwdi;
962 struct vnode *vp, *tdp;
963 struct mount *mp;
964 struct file *fp;
965 int error;
966
967 /* getvnode() will use the descriptor for us */
968 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
969 return (error);
970 vp = (struct vnode *)fp->f_data;
971
972 VREF(vp);
973 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
974 if (vp->v_type != VDIR)
975 error = ENOTDIR;
976 else
977 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
978 while (!error && (mp = vp->v_mountedhere) != NULL) {
979 if (vfs_busy(mp, 0, 0))
980 continue;
981 error = VFS_ROOT(mp, &tdp);
982 vfs_unbusy(mp);
983 if (error)
984 break;
985 vput(vp);
986 vp = tdp;
987 }
988 if (error) {
989 vput(vp);
990 goto out;
991 }
992 VOP_UNLOCK(vp, 0);
993
994 /*
995 * Disallow changing to a directory not under the process's
996 * current root directory (if there is one).
997 */
998 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
999 vrele(vp);
1000 error = EPERM; /* operation not permitted */
1001 goto out;
1002 }
1003
1004 vrele(cwdi->cwdi_cdir);
1005 cwdi->cwdi_cdir = vp;
1006 out:
1007 FILE_UNUSE(fp, l);
1008 return (error);
1009 }
1010
1011 /*
1012 * Change this process's notion of the root directory to a given file
1013 * descriptor.
1014 */
1015 int
1016 sys_fchroot(struct lwp *l, void *v, register_t *retval)
1017 {
1018 struct sys_fchroot_args *uap = v;
1019 struct proc *p = l->l_proc;
1020 struct filedesc *fdp = p->p_fd;
1021 struct cwdinfo *cwdi = p->p_cwdi;
1022 struct vnode *vp;
1023 struct file *fp;
1024 int error;
1025
1026 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1027 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1028 return error;
1029 /* getvnode() will use the descriptor for us */
1030 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0)
1031 return error;
1032 vp = (struct vnode *) fp->f_data;
1033 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1034 if (vp->v_type != VDIR)
1035 error = ENOTDIR;
1036 else
1037 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1038 VOP_UNLOCK(vp, 0);
1039 if (error)
1040 goto out;
1041 VREF(vp);
1042
1043 /*
1044 * Prevent escaping from chroot by putting the root under
1045 * the working directory. Silently chdir to / if we aren't
1046 * already there.
1047 */
1048 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1049 /*
1050 * XXX would be more failsafe to change directory to a
1051 * deadfs node here instead
1052 */
1053 vrele(cwdi->cwdi_cdir);
1054 VREF(vp);
1055 cwdi->cwdi_cdir = vp;
1056 }
1057
1058 if (cwdi->cwdi_rdir != NULL)
1059 vrele(cwdi->cwdi_rdir);
1060 cwdi->cwdi_rdir = vp;
1061 out:
1062 FILE_UNUSE(fp, l);
1063 return (error);
1064 }
1065
1066 /*
1067 * Change current working directory (``.'').
1068 */
1069 /* ARGSUSED */
1070 int
1071 sys_chdir(struct lwp *l, void *v, register_t *retval)
1072 {
1073 struct sys_chdir_args /* {
1074 syscallarg(const char *) path;
1075 } */ *uap = v;
1076 struct proc *p = l->l_proc;
1077 struct cwdinfo *cwdi = p->p_cwdi;
1078 int error;
1079 struct nameidata nd;
1080
1081 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1082 SCARG(uap, path), l);
1083 if ((error = change_dir(&nd, l)) != 0)
1084 return (error);
1085 vrele(cwdi->cwdi_cdir);
1086 cwdi->cwdi_cdir = nd.ni_vp;
1087 return (0);
1088 }
1089
1090 /*
1091 * Change notion of root (``/'') directory.
1092 */
1093 /* ARGSUSED */
1094 int
1095 sys_chroot(struct lwp *l, void *v, register_t *retval)
1096 {
1097 struct sys_chroot_args /* {
1098 syscallarg(const char *) path;
1099 } */ *uap = v;
1100 struct proc *p = l->l_proc;
1101 struct cwdinfo *cwdi = p->p_cwdi;
1102 struct vnode *vp;
1103 int error;
1104 struct nameidata nd;
1105
1106 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1107 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1108 return (error);
1109 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1110 SCARG(uap, path), l);
1111 if ((error = change_dir(&nd, l)) != 0)
1112 return (error);
1113 if (cwdi->cwdi_rdir != NULL)
1114 vrele(cwdi->cwdi_rdir);
1115 vp = nd.ni_vp;
1116 cwdi->cwdi_rdir = vp;
1117
1118 /*
1119 * Prevent escaping from chroot by putting the root under
1120 * the working directory. Silently chdir to / if we aren't
1121 * already there.
1122 */
1123 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1124 /*
1125 * XXX would be more failsafe to change directory to a
1126 * deadfs node here instead
1127 */
1128 vrele(cwdi->cwdi_cdir);
1129 VREF(vp);
1130 cwdi->cwdi_cdir = vp;
1131 }
1132
1133 return (0);
1134 }
1135
1136 /*
1137 * Common routine for chroot and chdir.
1138 */
1139 static int
1140 change_dir(struct nameidata *ndp, struct lwp *l)
1141 {
1142 struct vnode *vp;
1143 int error;
1144
1145 if ((error = namei(ndp)) != 0)
1146 return (error);
1147 vp = ndp->ni_vp;
1148 if (vp->v_type != VDIR)
1149 error = ENOTDIR;
1150 else
1151 error = VOP_ACCESS(vp, VEXEC, l->l_cred, l);
1152
1153 if (error)
1154 vput(vp);
1155 else
1156 VOP_UNLOCK(vp, 0);
1157 return (error);
1158 }
1159
1160 /*
1161 * Check permissions, allocate an open file structure,
1162 * and call the device open routine if any.
1163 */
1164 int
1165 sys_open(struct lwp *l, void *v, register_t *retval)
1166 {
1167 struct sys_open_args /* {
1168 syscallarg(const char *) path;
1169 syscallarg(int) flags;
1170 syscallarg(int) mode;
1171 } */ *uap = v;
1172 struct proc *p = l->l_proc;
1173 struct cwdinfo *cwdi = p->p_cwdi;
1174 struct filedesc *fdp = p->p_fd;
1175 struct file *fp;
1176 struct vnode *vp;
1177 int flags, cmode;
1178 int type, indx, error;
1179 struct flock lf;
1180 struct nameidata nd;
1181
1182 flags = FFLAGS(SCARG(uap, flags));
1183 if ((flags & (FREAD | FWRITE)) == 0)
1184 return (EINVAL);
1185 /* falloc() will use the file descriptor for us */
1186 if ((error = falloc(l, &fp, &indx)) != 0)
1187 return (error);
1188 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1189 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1190 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1191 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1192 FILE_UNUSE(fp, l);
1193 fdp->fd_ofiles[indx] = NULL;
1194 ffree(fp);
1195 if ((error == EDUPFD || error == EMOVEFD) &&
1196 l->l_dupfd >= 0 && /* XXX from fdopen */
1197 (error =
1198 dupfdopen(l, indx, l->l_dupfd, flags, error)) == 0) {
1199 *retval = indx;
1200 return (0);
1201 }
1202 if (error == ERESTART)
1203 error = EINTR;
1204 fdremove(fdp, indx);
1205 return (error);
1206 }
1207 l->l_dupfd = 0;
1208 vp = nd.ni_vp;
1209 fp->f_flag = flags & FMASK;
1210 fp->f_type = DTYPE_VNODE;
1211 fp->f_ops = &vnops;
1212 fp->f_data = vp;
1213 if (flags & (O_EXLOCK | O_SHLOCK)) {
1214 lf.l_whence = SEEK_SET;
1215 lf.l_start = 0;
1216 lf.l_len = 0;
1217 if (flags & O_EXLOCK)
1218 lf.l_type = F_WRLCK;
1219 else
1220 lf.l_type = F_RDLCK;
1221 type = F_FLOCK;
1222 if ((flags & FNONBLOCK) == 0)
1223 type |= F_WAIT;
1224 VOP_UNLOCK(vp, 0);
1225 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1226 if (error) {
1227 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1228 FILE_UNUSE(fp, l);
1229 ffree(fp);
1230 fdremove(fdp, indx);
1231 return (error);
1232 }
1233 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1234 fp->f_flag |= FHASLOCK;
1235 }
1236 VOP_UNLOCK(vp, 0);
1237 *retval = indx;
1238 FILE_SET_MATURE(fp);
1239 FILE_UNUSE(fp, l);
1240 return (0);
1241 }
1242
1243 static void
1244 vfs__fhfree(fhandle_t *fhp)
1245 {
1246 size_t fhsize;
1247
1248 if (fhp == NULL) {
1249 return;
1250 }
1251 fhsize = FHANDLE_SIZE(fhp);
1252 kmem_free(fhp, fhsize);
1253 }
1254
1255 /*
1256 * vfs_composefh: compose a filehandle.
1257 */
1258
1259 int
1260 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1261 {
1262 struct mount *mp;
1263 struct fid *fidp;
1264 int error;
1265 size_t needfhsize;
1266 size_t fidsize;
1267
1268 mp = vp->v_mount;
1269 if (mp->mnt_op->vfs_vptofh == NULL) {
1270 return EOPNOTSUPP;
1271 }
1272 fidp = NULL;
1273 if (*fh_size < FHANDLE_SIZE_MIN) {
1274 fidsize = 0;
1275 } else {
1276 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1277 if (fhp != NULL) {
1278 memset(fhp, 0, *fh_size);
1279 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1280 fidp = &fhp->fh_fid;
1281 }
1282 }
1283 error = VFS_VPTOFH(vp, fidp, &fidsize);
1284 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1285 if (error == 0 && *fh_size < needfhsize) {
1286 error = E2BIG;
1287 }
1288 *fh_size = needfhsize;
1289 return error;
1290 }
1291
1292 int
1293 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1294 {
1295 struct mount *mp;
1296 fhandle_t *fhp;
1297 size_t fhsize;
1298 size_t fidsize;
1299 int error;
1300
1301 *fhpp = NULL;
1302 mp = vp->v_mount;
1303 if (mp->mnt_op->vfs_vptofh == NULL) {
1304 error = EOPNOTSUPP;
1305 goto out;
1306 }
1307 fidsize = 0;
1308 error = VFS_VPTOFH(vp, NULL, &fidsize);
1309 KASSERT(error != 0);
1310 if (error != E2BIG) {
1311 goto out;
1312 }
1313 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1314 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1315 if (fhp == NULL) {
1316 error = ENOMEM;
1317 goto out;
1318 }
1319 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1320 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1321 if (error == 0) {
1322 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1323 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1324 *fhpp = fhp;
1325 } else {
1326 kmem_free(fhp, fhsize);
1327 }
1328 out:
1329 return error;
1330 }
1331
1332 void
1333 vfs_composefh_free(fhandle_t *fhp)
1334 {
1335
1336 vfs__fhfree(fhp);
1337 }
1338
1339 /*
1340 * vfs_fhtovp: lookup a vnode by a filehandle.
1341 */
1342
1343 int
1344 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1345 {
1346 struct mount *mp;
1347 int error;
1348
1349 *vpp = NULL;
1350 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1351 if (mp == NULL) {
1352 error = ESTALE;
1353 goto out;
1354 }
1355 if (mp->mnt_op->vfs_fhtovp == NULL) {
1356 error = EOPNOTSUPP;
1357 goto out;
1358 }
1359 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1360 out:
1361 return error;
1362 }
1363
1364 /*
1365 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1366 * the needed size.
1367 */
1368
1369 int
1370 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1371 {
1372 fhandle_t *fhp;
1373 int error;
1374
1375 *fhpp = NULL;
1376 if (fhsize > FHANDLE_SIZE_MAX) {
1377 return EINVAL;
1378 }
1379 if (fhsize < FHANDLE_SIZE_MIN) {
1380 return EINVAL;
1381 }
1382 again:
1383 fhp = kmem_alloc(fhsize, KM_SLEEP);
1384 if (fhp == NULL) {
1385 return ENOMEM;
1386 }
1387 error = copyin(ufhp, fhp, fhsize);
1388 if (error == 0) {
1389 /* XXX this check shouldn't be here */
1390 if (FHANDLE_SIZE(fhp) == fhsize) {
1391 *fhpp = fhp;
1392 return 0;
1393 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1394 /*
1395 * a kludge for nfsv2 padded handles.
1396 */
1397 size_t sz;
1398
1399 sz = FHANDLE_SIZE(fhp);
1400 kmem_free(fhp, fhsize);
1401 fhsize = sz;
1402 goto again;
1403 } else {
1404 /*
1405 * userland told us wrong size.
1406 */
1407 error = EINVAL;
1408 }
1409 }
1410 kmem_free(fhp, fhsize);
1411 return error;
1412 }
1413
1414 void
1415 vfs_copyinfh_free(fhandle_t *fhp)
1416 {
1417
1418 vfs__fhfree(fhp);
1419 }
1420
1421 /*
1422 * Get file handle system call
1423 */
1424 int
1425 sys___getfh30(struct lwp *l, void *v, register_t *retval)
1426 {
1427 struct sys___getfh30_args /* {
1428 syscallarg(char *) fname;
1429 syscallarg(fhandle_t *) fhp;
1430 syscallarg(size_t *) fh_size;
1431 } */ *uap = v;
1432 struct vnode *vp;
1433 fhandle_t *fh;
1434 int error;
1435 struct nameidata nd;
1436 size_t sz;
1437 size_t usz;
1438
1439 /*
1440 * Must be super user
1441 */
1442 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1443 0, NULL, NULL, NULL);
1444 if (error)
1445 return (error);
1446 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
1447 SCARG(uap, fname), l);
1448 error = namei(&nd);
1449 if (error)
1450 return (error);
1451 vp = nd.ni_vp;
1452 error = vfs_composefh_alloc(vp, &fh);
1453 vput(vp);
1454 if (error != 0) {
1455 goto out;
1456 }
1457 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1458 if (error != 0) {
1459 goto out;
1460 }
1461 sz = FHANDLE_SIZE(fh);
1462 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1463 if (error != 0) {
1464 goto out;
1465 }
1466 if (usz >= sz) {
1467 error = copyout(fh, SCARG(uap, fhp), sz);
1468 } else {
1469 error = E2BIG;
1470 }
1471 out:
1472 vfs_composefh_free(fh);
1473 return (error);
1474 }
1475
1476 /*
1477 * Open a file given a file handle.
1478 *
1479 * Check permissions, allocate an open file structure,
1480 * and call the device open routine if any.
1481 */
1482
1483 int
1484 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1485 register_t *retval)
1486 {
1487 struct filedesc *fdp = l->l_proc->p_fd;
1488 struct file *fp;
1489 struct vnode *vp = NULL;
1490 struct mount *mp;
1491 kauth_cred_t cred = l->l_cred;
1492 struct file *nfp;
1493 int type, indx, error=0;
1494 struct flock lf;
1495 struct vattr va;
1496 fhandle_t *fh;
1497 int flags;
1498
1499 /*
1500 * Must be super user
1501 */
1502 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1503 0, NULL, NULL, NULL)))
1504 return (error);
1505
1506 flags = FFLAGS(oflags);
1507 if ((flags & (FREAD | FWRITE)) == 0)
1508 return (EINVAL);
1509 if ((flags & O_CREAT))
1510 return (EINVAL);
1511 /* falloc() will use the file descriptor for us */
1512 if ((error = falloc(l, &nfp, &indx)) != 0)
1513 return (error);
1514 fp = nfp;
1515 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1516 if (error != 0) {
1517 goto bad;
1518 }
1519 error = vfs_fhtovp(fh, &vp);
1520 if (error != 0) {
1521 goto bad;
1522 }
1523
1524 /* Now do an effective vn_open */
1525
1526 if (vp->v_type == VSOCK) {
1527 error = EOPNOTSUPP;
1528 goto bad;
1529 }
1530 if (flags & FREAD) {
1531 if ((error = VOP_ACCESS(vp, VREAD, cred, l)) != 0)
1532 goto bad;
1533 }
1534 if (flags & (FWRITE | O_TRUNC)) {
1535 if (vp->v_type == VDIR) {
1536 error = EISDIR;
1537 goto bad;
1538 }
1539 if ((error = vn_writechk(vp)) != 0 ||
1540 (error = VOP_ACCESS(vp, VWRITE, cred, l)) != 0)
1541 goto bad;
1542 }
1543 if (flags & O_TRUNC) {
1544 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
1545 goto bad;
1546 VOP_UNLOCK(vp, 0); /* XXX */
1547 VOP_LEASE(vp, l, cred, LEASE_WRITE);
1548 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1549 VATTR_NULL(&va);
1550 va.va_size = 0;
1551 error = VOP_SETATTR(vp, &va, cred, l);
1552 vn_finished_write(mp, 0);
1553 if (error)
1554 goto bad;
1555 }
1556 if ((error = VOP_OPEN(vp, flags, cred, l)) != 0)
1557 goto bad;
1558 if (vp->v_type == VREG &&
1559 uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
1560 error = EIO;
1561 goto bad;
1562 }
1563 if (flags & FWRITE)
1564 vp->v_writecount++;
1565
1566 /* done with modified vn_open, now finish what sys_open does. */
1567
1568 fp->f_flag = flags & FMASK;
1569 fp->f_type = DTYPE_VNODE;
1570 fp->f_ops = &vnops;
1571 fp->f_data = vp;
1572 if (flags & (O_EXLOCK | O_SHLOCK)) {
1573 lf.l_whence = SEEK_SET;
1574 lf.l_start = 0;
1575 lf.l_len = 0;
1576 if (flags & O_EXLOCK)
1577 lf.l_type = F_WRLCK;
1578 else
1579 lf.l_type = F_RDLCK;
1580 type = F_FLOCK;
1581 if ((flags & FNONBLOCK) == 0)
1582 type |= F_WAIT;
1583 VOP_UNLOCK(vp, 0);
1584 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
1585 if (error) {
1586 (void) vn_close(vp, fp->f_flag, fp->f_cred, l);
1587 FILE_UNUSE(fp, l);
1588 ffree(fp);
1589 fdremove(fdp, indx);
1590 return (error);
1591 }
1592 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1593 fp->f_flag |= FHASLOCK;
1594 }
1595 VOP_UNLOCK(vp, 0);
1596 *retval = indx;
1597 FILE_SET_MATURE(fp);
1598 FILE_UNUSE(fp, l);
1599 vfs_copyinfh_free(fh);
1600 return (0);
1601
1602 bad:
1603 FILE_UNUSE(fp, l);
1604 ffree(fp);
1605 fdremove(fdp, indx);
1606 if (vp != NULL)
1607 vput(vp);
1608 vfs_copyinfh_free(fh);
1609 return (error);
1610 }
1611
1612 int
1613 sys___fhopen40(struct lwp *l, void *v, register_t *retval)
1614 {
1615 struct sys___fhopen40_args /* {
1616 syscallarg(const void *) fhp;
1617 syscallarg(size_t) fh_size;
1618 syscallarg(int) flags;
1619 } */ *uap = v;
1620
1621 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1622 SCARG(uap, flags), retval);
1623 }
1624
1625 int
1626 dofhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sbp,
1627 register_t *retval)
1628 {
1629 struct stat sb;
1630 int error;
1631 fhandle_t *fh;
1632 struct vnode *vp;
1633
1634 /*
1635 * Must be super user
1636 */
1637 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1638 0, NULL, NULL, NULL)))
1639 return (error);
1640
1641 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1642 if (error != 0) {
1643 goto bad;
1644 }
1645 error = vfs_fhtovp(fh, &vp);
1646 if (error != 0) {
1647 goto bad;
1648 }
1649 error = vn_stat(vp, &sb, l);
1650 vput(vp);
1651 if (error) {
1652 goto bad;
1653 }
1654 error = copyout(&sb, sbp, sizeof(sb));
1655 bad:
1656 vfs_copyinfh_free(fh);
1657 return error;
1658 }
1659
1660
1661 /* ARGSUSED */
1662 int
1663 sys___fhstat40(struct lwp *l, void *v, register_t *retval)
1664 {
1665 struct sys___fhstat40_args /* {
1666 syscallarg(const void *) fhp;
1667 syscallarg(size_t) fh_size;
1668 syscallarg(struct stat *) sb;
1669 } */ *uap = v;
1670
1671 return dofhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), SCARG(uap, sb),
1672 retval);
1673 }
1674
1675 int
1676 dofhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *buf,
1677 int flags, register_t *retval)
1678 {
1679 struct statvfs *sb = NULL;
1680 fhandle_t *fh;
1681 struct mount *mp;
1682 struct vnode *vp;
1683 int error;
1684
1685 /*
1686 * Must be super user
1687 */
1688 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1689 0, NULL, NULL, NULL)))
1690 return error;
1691
1692 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1693 if (error != 0) {
1694 goto out;
1695 }
1696 error = vfs_fhtovp(fh, &vp);
1697 if (error != 0) {
1698 goto out;
1699 }
1700 mp = vp->v_mount;
1701 sb = STATVFSBUF_GET();
1702 if ((error = dostatvfs(mp, sb, l, flags, 1)) != 0) {
1703 vput(vp);
1704 goto out;
1705 }
1706 vput(vp);
1707 error = copyout(sb, buf, sizeof(*sb));
1708 out:
1709 if (sb != NULL) {
1710 STATVFSBUF_PUT(sb);
1711 }
1712 vfs_copyinfh_free(fh);
1713 return error;
1714 }
1715
1716 /* ARGSUSED */
1717 int
1718 sys___fhstatvfs140(struct lwp *l, void *v, register_t *retval)
1719 {
1720 struct sys___fhstatvfs140_args /* {
1721 syscallarg(const void *) fhp;
1722 syscallarg(size_t) fh_size;
1723 syscallarg(struct statvfs *) buf;
1724 syscallarg(int) flags;
1725 } */ *uap = v;
1726
1727 return dofhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1728 SCARG(uap, buf), SCARG(uap, flags), retval);
1729 }
1730
1731 /*
1732 * Create a special file.
1733 */
1734 /* ARGSUSED */
1735 int
1736 sys_mknod(struct lwp *l, void *v, register_t *retval)
1737 {
1738 struct sys_mknod_args /* {
1739 syscallarg(const char *) path;
1740 syscallarg(int) mode;
1741 syscallarg(int) dev;
1742 } */ *uap = v;
1743 struct proc *p = l->l_proc;
1744 struct vnode *vp;
1745 struct mount *mp;
1746 struct vattr vattr;
1747 int error;
1748 int whiteout = 0;
1749 struct nameidata nd;
1750
1751 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
1752 0, NULL, NULL, NULL)) != 0)
1753 return (error);
1754 restart:
1755 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1756 if ((error = namei(&nd)) != 0)
1757 return (error);
1758 vp = nd.ni_vp;
1759 if (vp != NULL)
1760 error = EEXIST;
1761 else {
1762 VATTR_NULL(&vattr);
1763 vattr.va_mode =
1764 (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1765 vattr.va_rdev = SCARG(uap, dev);
1766 whiteout = 0;
1767
1768 switch (SCARG(uap, mode) & S_IFMT) {
1769 case S_IFMT: /* used by badsect to flag bad sectors */
1770 vattr.va_type = VBAD;
1771 break;
1772 case S_IFCHR:
1773 vattr.va_type = VCHR;
1774 break;
1775 case S_IFBLK:
1776 vattr.va_type = VBLK;
1777 break;
1778 case S_IFWHT:
1779 whiteout = 1;
1780 break;
1781 default:
1782 error = EINVAL;
1783 break;
1784 }
1785 }
1786 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1787 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1788 if (nd.ni_dvp == vp)
1789 vrele(nd.ni_dvp);
1790 else
1791 vput(nd.ni_dvp);
1792 if (vp)
1793 vrele(vp);
1794 if ((error = vn_start_write(NULL, &mp,
1795 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1796 return (error);
1797 goto restart;
1798 }
1799 if (!error) {
1800 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1801 if (whiteout) {
1802 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
1803 if (error)
1804 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1805 vput(nd.ni_dvp);
1806 } else {
1807 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
1808 &nd.ni_cnd, &vattr);
1809 if (error == 0)
1810 vput(nd.ni_vp);
1811 }
1812 } else {
1813 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1814 if (nd.ni_dvp == vp)
1815 vrele(nd.ni_dvp);
1816 else
1817 vput(nd.ni_dvp);
1818 if (vp)
1819 vrele(vp);
1820 }
1821 vn_finished_write(mp, 0);
1822 return (error);
1823 }
1824
1825 /*
1826 * Create a named pipe.
1827 */
1828 /* ARGSUSED */
1829 int
1830 sys_mkfifo(struct lwp *l, void *v, register_t *retval)
1831 {
1832 struct sys_mkfifo_args /* {
1833 syscallarg(const char *) path;
1834 syscallarg(int) mode;
1835 } */ *uap = v;
1836 struct proc *p = l->l_proc;
1837 struct mount *mp;
1838 struct vattr vattr;
1839 int error;
1840 struct nameidata nd;
1841
1842 restart:
1843 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), l);
1844 if ((error = namei(&nd)) != 0)
1845 return (error);
1846 if (nd.ni_vp != NULL) {
1847 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1848 if (nd.ni_dvp == nd.ni_vp)
1849 vrele(nd.ni_dvp);
1850 else
1851 vput(nd.ni_dvp);
1852 vrele(nd.ni_vp);
1853 return (EEXIST);
1854 }
1855 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1856 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1857 if (nd.ni_dvp == nd.ni_vp)
1858 vrele(nd.ni_dvp);
1859 else
1860 vput(nd.ni_dvp);
1861 if (nd.ni_vp)
1862 vrele(nd.ni_vp);
1863 if ((error = vn_start_write(NULL, &mp,
1864 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1865 return (error);
1866 goto restart;
1867 }
1868 VATTR_NULL(&vattr);
1869 vattr.va_type = VFIFO;
1870 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
1871 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1872 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1873 if (error == 0)
1874 vput(nd.ni_vp);
1875 vn_finished_write(mp, 0);
1876 return (error);
1877 }
1878
1879 /*
1880 * Make a hard file link.
1881 */
1882 /* ARGSUSED */
1883 int
1884 sys_link(struct lwp *l, void *v, register_t *retval)
1885 {
1886 struct sys_link_args /* {
1887 syscallarg(const char *) path;
1888 syscallarg(const char *) link;
1889 } */ *uap = v;
1890 struct vnode *vp;
1891 struct mount *mp;
1892 struct nameidata nd;
1893 int error;
1894
1895 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
1896 if ((error = namei(&nd)) != 0)
1897 return (error);
1898 vp = nd.ni_vp;
1899 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
1900 vrele(vp);
1901 return (error);
1902 }
1903 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1904 if ((error = namei(&nd)) != 0)
1905 goto out;
1906 if (nd.ni_vp) {
1907 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1908 if (nd.ni_dvp == nd.ni_vp)
1909 vrele(nd.ni_dvp);
1910 else
1911 vput(nd.ni_dvp);
1912 vrele(nd.ni_vp);
1913 error = EEXIST;
1914 goto out;
1915 }
1916 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1917 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
1918 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
1919 out:
1920 vrele(vp);
1921 vn_finished_write(mp, 0);
1922 return (error);
1923 }
1924
1925 /*
1926 * Make a symbolic link.
1927 */
1928 /* ARGSUSED */
1929 int
1930 sys_symlink(struct lwp *l, void *v, register_t *retval)
1931 {
1932 struct sys_symlink_args /* {
1933 syscallarg(const char *) path;
1934 syscallarg(const char *) link;
1935 } */ *uap = v;
1936 struct proc *p = l->l_proc;
1937 struct mount *mp;
1938 struct vattr vattr;
1939 char *path;
1940 int error;
1941 struct nameidata nd;
1942
1943 path = PNBUF_GET();
1944 error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL);
1945 if (error)
1946 goto out;
1947 restart:
1948 NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), l);
1949 if ((error = namei(&nd)) != 0)
1950 goto out;
1951 if (nd.ni_vp) {
1952 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1953 if (nd.ni_dvp == nd.ni_vp)
1954 vrele(nd.ni_dvp);
1955 else
1956 vput(nd.ni_dvp);
1957 vrele(nd.ni_vp);
1958 error = EEXIST;
1959 goto out;
1960 }
1961 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1962 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
1963 if (nd.ni_dvp == nd.ni_vp)
1964 vrele(nd.ni_dvp);
1965 else
1966 vput(nd.ni_dvp);
1967 if ((error = vn_start_write(NULL, &mp,
1968 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
1969 return (error);
1970 goto restart;
1971 }
1972 VATTR_NULL(&vattr);
1973 vattr.va_type = VLNK;
1974 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
1975 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1976 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
1977 if (error == 0)
1978 vput(nd.ni_vp);
1979 vn_finished_write(mp, 0);
1980 out:
1981 PNBUF_PUT(path);
1982 return (error);
1983 }
1984
1985 /*
1986 * Delete a whiteout from the filesystem.
1987 */
1988 /* ARGSUSED */
1989 int
1990 sys_undelete(struct lwp *l, void *v, register_t *retval)
1991 {
1992 struct sys_undelete_args /* {
1993 syscallarg(const char *) path;
1994 } */ *uap = v;
1995 int error;
1996 struct mount *mp;
1997 struct nameidata nd;
1998
1999 restart:
2000 NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
2001 SCARG(uap, path), l);
2002 error = namei(&nd);
2003 if (error)
2004 return (error);
2005
2006 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2007 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2008 if (nd.ni_dvp == nd.ni_vp)
2009 vrele(nd.ni_dvp);
2010 else
2011 vput(nd.ni_dvp);
2012 if (nd.ni_vp)
2013 vrele(nd.ni_vp);
2014 return (EEXIST);
2015 }
2016 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2017 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2018 if (nd.ni_dvp == nd.ni_vp)
2019 vrele(nd.ni_dvp);
2020 else
2021 vput(nd.ni_dvp);
2022 if ((error = vn_start_write(NULL, &mp,
2023 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
2024 return (error);
2025 goto restart;
2026 }
2027 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2028 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2029 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2030 vput(nd.ni_dvp);
2031 vn_finished_write(mp, 0);
2032 return (error);
2033 }
2034
2035 /*
2036 * Delete a name from the filesystem.
2037 */
2038 /* ARGSUSED */
2039 int
2040 sys_unlink(struct lwp *l, void *v, register_t *retval)
2041 {
2042 struct sys_unlink_args /* {
2043 syscallarg(const char *) path;
2044 } */ *uap = v;
2045 struct mount *mp;
2046 struct vnode *vp;
2047 int error;
2048 struct nameidata nd;
2049
2050 restart:
2051 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
2052 SCARG(uap, path), l);
2053 if ((error = namei(&nd)) != 0)
2054 return (error);
2055 vp = nd.ni_vp;
2056
2057 /*
2058 * The root of a mounted filesystem cannot be deleted.
2059 */
2060 if (vp->v_flag & VROOT) {
2061 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2062 if (nd.ni_dvp == vp)
2063 vrele(nd.ni_dvp);
2064 else
2065 vput(nd.ni_dvp);
2066 vput(vp);
2067 error = EBUSY;
2068 goto out;
2069 }
2070
2071 #if NVERIEXEC > 0
2072 /* Handle remove requests for veriexec entries. */
2073 if ((error = veriexec_removechk(vp, nd.ni_dirp, l)) != 0) {
2074 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2075 if (nd.ni_dvp == vp)
2076 vrele(nd.ni_dvp);
2077 else
2078 vput(nd.ni_dvp);
2079 vput(vp);
2080 goto out;
2081 }
2082 #endif /* NVERIEXEC > 0 */
2083
2084 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
2085 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2086 if (nd.ni_dvp == vp)
2087 vrele(nd.ni_dvp);
2088 else
2089 vput(nd.ni_dvp);
2090 vput(vp);
2091 if ((error = vn_start_write(NULL, &mp,
2092 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
2093 return (error);
2094 goto restart;
2095 }
2096 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
2097 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2098 #ifdef FILEASSOC
2099 (void)fileassoc_file_delete(vp);
2100 #endif /* FILEASSOC */
2101 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2102 vn_finished_write(mp, 0);
2103 out:
2104 return (error);
2105 }
2106
2107 /*
2108 * Reposition read/write file offset.
2109 */
2110 int
2111 sys_lseek(struct lwp *l, void *v, register_t *retval)
2112 {
2113 struct sys_lseek_args /* {
2114 syscallarg(int) fd;
2115 syscallarg(int) pad;
2116 syscallarg(off_t) offset;
2117 syscallarg(int) whence;
2118 } */ *uap = v;
2119 struct proc *p = l->l_proc;
2120 kauth_cred_t cred = l->l_cred;
2121 struct filedesc *fdp = p->p_fd;
2122 struct file *fp;
2123 struct vnode *vp;
2124 struct vattr vattr;
2125 off_t newoff;
2126 int error;
2127
2128 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
2129 return (EBADF);
2130
2131 FILE_USE(fp);
2132
2133 vp = (struct vnode *)fp->f_data;
2134 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2135 error = ESPIPE;
2136 goto out;
2137 }
2138
2139 switch (SCARG(uap, whence)) {
2140 case SEEK_CUR:
2141 newoff = fp->f_offset + SCARG(uap, offset);
2142 break;
2143 case SEEK_END:
2144 error = VOP_GETATTR(vp, &vattr, cred, l);
2145 if (error)
2146 goto out;
2147 newoff = SCARG(uap, offset) + vattr.va_size;
2148 break;
2149 case SEEK_SET:
2150 newoff = SCARG(uap, offset);
2151 break;
2152 default:
2153 error = EINVAL;
2154 goto out;
2155 }
2156 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0)
2157 goto out;
2158
2159 *(off_t *)retval = fp->f_offset = newoff;
2160 out:
2161 FILE_UNUSE(fp, l);
2162 return (error);
2163 }
2164
2165 /*
2166 * Positional read system call.
2167 */
2168 int
2169 sys_pread(struct lwp *l, void *v, register_t *retval)
2170 {
2171 struct sys_pread_args /* {
2172 syscallarg(int) fd;
2173 syscallarg(void *) buf;
2174 syscallarg(size_t) nbyte;
2175 syscallarg(off_t) offset;
2176 } */ *uap = v;
2177 struct proc *p = l->l_proc;
2178 struct filedesc *fdp = p->p_fd;
2179 struct file *fp;
2180 struct vnode *vp;
2181 off_t offset;
2182 int error, fd = SCARG(uap, fd);
2183
2184 if ((fp = fd_getfile(fdp, fd)) == NULL)
2185 return (EBADF);
2186
2187 if ((fp->f_flag & FREAD) == 0) {
2188 simple_unlock(&fp->f_slock);
2189 return (EBADF);
2190 }
2191
2192 FILE_USE(fp);
2193
2194 vp = (struct vnode *)fp->f_data;
2195 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2196 error = ESPIPE;
2197 goto out;
2198 }
2199
2200 offset = SCARG(uap, offset);
2201
2202 /*
2203 * XXX This works because no file systems actually
2204 * XXX take any action on the seek operation.
2205 */
2206 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2207 goto out;
2208
2209 /* dofileread() will unuse the descriptor for us */
2210 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2211 &offset, 0, retval));
2212
2213 out:
2214 FILE_UNUSE(fp, l);
2215 return (error);
2216 }
2217
2218 /*
2219 * Positional scatter read system call.
2220 */
2221 int
2222 sys_preadv(struct lwp *l, void *v, register_t *retval)
2223 {
2224 struct sys_preadv_args /* {
2225 syscallarg(int) fd;
2226 syscallarg(const struct iovec *) iovp;
2227 syscallarg(int) iovcnt;
2228 syscallarg(off_t) offset;
2229 } */ *uap = v;
2230 struct proc *p = l->l_proc;
2231 struct filedesc *fdp = p->p_fd;
2232 struct file *fp;
2233 struct vnode *vp;
2234 off_t offset;
2235 int error, fd = SCARG(uap, fd);
2236
2237 if ((fp = fd_getfile(fdp, fd)) == NULL)
2238 return (EBADF);
2239
2240 if ((fp->f_flag & FREAD) == 0) {
2241 simple_unlock(&fp->f_slock);
2242 return (EBADF);
2243 }
2244
2245 FILE_USE(fp);
2246
2247 vp = (struct vnode *)fp->f_data;
2248 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2249 error = ESPIPE;
2250 goto out;
2251 }
2252
2253 offset = SCARG(uap, offset);
2254
2255 /*
2256 * XXX This works because no file systems actually
2257 * XXX take any action on the seek operation.
2258 */
2259 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2260 goto out;
2261
2262 /* dofilereadv() will unuse the descriptor for us */
2263 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2264 &offset, 0, retval));
2265
2266 out:
2267 FILE_UNUSE(fp, l);
2268 return (error);
2269 }
2270
2271 /*
2272 * Positional write system call.
2273 */
2274 int
2275 sys_pwrite(struct lwp *l, void *v, register_t *retval)
2276 {
2277 struct sys_pwrite_args /* {
2278 syscallarg(int) fd;
2279 syscallarg(const void *) buf;
2280 syscallarg(size_t) nbyte;
2281 syscallarg(off_t) offset;
2282 } */ *uap = v;
2283 struct proc *p = l->l_proc;
2284 struct filedesc *fdp = p->p_fd;
2285 struct file *fp;
2286 struct vnode *vp;
2287 off_t offset;
2288 int error, fd = SCARG(uap, fd);
2289
2290 if ((fp = fd_getfile(fdp, fd)) == NULL)
2291 return (EBADF);
2292
2293 if ((fp->f_flag & FWRITE) == 0) {
2294 simple_unlock(&fp->f_slock);
2295 return (EBADF);
2296 }
2297
2298 FILE_USE(fp);
2299
2300 vp = (struct vnode *)fp->f_data;
2301 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2302 error = ESPIPE;
2303 goto out;
2304 }
2305
2306 offset = SCARG(uap, offset);
2307
2308 /*
2309 * XXX This works because no file systems actually
2310 * XXX take any action on the seek operation.
2311 */
2312 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2313 goto out;
2314
2315 /* dofilewrite() will unuse the descriptor for us */
2316 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2317 &offset, 0, retval));
2318
2319 out:
2320 FILE_UNUSE(fp, l);
2321 return (error);
2322 }
2323
2324 /*
2325 * Positional gather write system call.
2326 */
2327 int
2328 sys_pwritev(struct lwp *l, void *v, register_t *retval)
2329 {
2330 struct sys_pwritev_args /* {
2331 syscallarg(int) fd;
2332 syscallarg(const struct iovec *) iovp;
2333 syscallarg(int) iovcnt;
2334 syscallarg(off_t) offset;
2335 } */ *uap = v;
2336 struct proc *p = l->l_proc;
2337 struct filedesc *fdp = p->p_fd;
2338 struct file *fp;
2339 struct vnode *vp;
2340 off_t offset;
2341 int error, fd = SCARG(uap, fd);
2342
2343 if ((fp = fd_getfile(fdp, fd)) == NULL)
2344 return (EBADF);
2345
2346 if ((fp->f_flag & FWRITE) == 0) {
2347 simple_unlock(&fp->f_slock);
2348 return (EBADF);
2349 }
2350
2351 FILE_USE(fp);
2352
2353 vp = (struct vnode *)fp->f_data;
2354 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2355 error = ESPIPE;
2356 goto out;
2357 }
2358
2359 offset = SCARG(uap, offset);
2360
2361 /*
2362 * XXX This works because no file systems actually
2363 * XXX take any action on the seek operation.
2364 */
2365 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2366 goto out;
2367
2368 /* dofilewritev() will unuse the descriptor for us */
2369 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
2370 &offset, 0, retval));
2371
2372 out:
2373 FILE_UNUSE(fp, l);
2374 return (error);
2375 }
2376
2377 /*
2378 * Check access permissions.
2379 */
2380 int
2381 sys_access(struct lwp *l, void *v, register_t *retval)
2382 {
2383 struct sys_access_args /* {
2384 syscallarg(const char *) path;
2385 syscallarg(int) flags;
2386 } */ *uap = v;
2387 kauth_cred_t cred;
2388 struct vnode *vp;
2389 int error, flags;
2390 struct nameidata nd;
2391
2392 cred = kauth_cred_dup(l->l_cred);
2393 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2394 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2395 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2396 SCARG(uap, path), l);
2397 /* Override default credentials */
2398 nd.ni_cnd.cn_cred = cred;
2399 if ((error = namei(&nd)) != 0)
2400 goto out;
2401 vp = nd.ni_vp;
2402
2403 /* Flags == 0 means only check for existence. */
2404 if (SCARG(uap, flags)) {
2405 flags = 0;
2406 if (SCARG(uap, flags) & R_OK)
2407 flags |= VREAD;
2408 if (SCARG(uap, flags) & W_OK)
2409 flags |= VWRITE;
2410 if (SCARG(uap, flags) & X_OK)
2411 flags |= VEXEC;
2412
2413 error = VOP_ACCESS(vp, flags, cred, l);
2414 if (!error && (flags & VWRITE))
2415 error = vn_writechk(vp);
2416 }
2417 vput(vp);
2418 out:
2419 kauth_cred_free(cred);
2420 return (error);
2421 }
2422
2423 /*
2424 * Get file status; this version follows links.
2425 */
2426 /* ARGSUSED */
2427 int
2428 sys___stat30(struct lwp *l, void *v, register_t *retval)
2429 {
2430 struct sys___stat30_args /* {
2431 syscallarg(const char *) path;
2432 syscallarg(struct stat *) ub;
2433 } */ *uap = v;
2434 struct stat sb;
2435 int error;
2436 struct nameidata nd;
2437
2438 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2439 SCARG(uap, path), l);
2440 if ((error = namei(&nd)) != 0)
2441 return (error);
2442 error = vn_stat(nd.ni_vp, &sb, l);
2443 vput(nd.ni_vp);
2444 if (error)
2445 return (error);
2446 error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2447 return (error);
2448 }
2449
2450 /*
2451 * Get file status; this version does not follow links.
2452 */
2453 /* ARGSUSED */
2454 int
2455 sys___lstat30(struct lwp *l, void *v, register_t *retval)
2456 {
2457 struct sys___lstat30_args /* {
2458 syscallarg(const char *) path;
2459 syscallarg(struct stat *) ub;
2460 } */ *uap = v;
2461 struct stat sb;
2462 int error;
2463 struct nameidata nd;
2464
2465 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2466 SCARG(uap, path), l);
2467 if ((error = namei(&nd)) != 0)
2468 return (error);
2469 error = vn_stat(nd.ni_vp, &sb, l);
2470 vput(nd.ni_vp);
2471 if (error)
2472 return (error);
2473 error = copyout(&sb, SCARG(uap, ub), sizeof(sb));
2474 return (error);
2475 }
2476
2477 /*
2478 * Get configurable pathname variables.
2479 */
2480 /* ARGSUSED */
2481 int
2482 sys_pathconf(struct lwp *l, void *v, register_t *retval)
2483 {
2484 struct sys_pathconf_args /* {
2485 syscallarg(const char *) path;
2486 syscallarg(int) name;
2487 } */ *uap = v;
2488 int error;
2489 struct nameidata nd;
2490
2491 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
2492 SCARG(uap, path), l);
2493 if ((error = namei(&nd)) != 0)
2494 return (error);
2495 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2496 vput(nd.ni_vp);
2497 return (error);
2498 }
2499
2500 /*
2501 * Return target name of a symbolic link.
2502 */
2503 /* ARGSUSED */
2504 int
2505 sys_readlink(struct lwp *l, void *v, register_t *retval)
2506 {
2507 struct sys_readlink_args /* {
2508 syscallarg(const char *) path;
2509 syscallarg(char *) buf;
2510 syscallarg(size_t) count;
2511 } */ *uap = v;
2512 struct vnode *vp;
2513 struct iovec aiov;
2514 struct uio auio;
2515 int error;
2516 struct nameidata nd;
2517
2518 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE,
2519 SCARG(uap, path), l);
2520 if ((error = namei(&nd)) != 0)
2521 return (error);
2522 vp = nd.ni_vp;
2523 if (vp->v_type != VLNK)
2524 error = EINVAL;
2525 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2526 (error = VOP_ACCESS(vp, VREAD, l->l_cred, l)) == 0) {
2527 aiov.iov_base = SCARG(uap, buf);
2528 aiov.iov_len = SCARG(uap, count);
2529 auio.uio_iov = &aiov;
2530 auio.uio_iovcnt = 1;
2531 auio.uio_offset = 0;
2532 auio.uio_rw = UIO_READ;
2533 KASSERT(l == curlwp);
2534 auio.uio_vmspace = l->l_proc->p_vmspace;
2535 auio.uio_resid = SCARG(uap, count);
2536 error = VOP_READLINK(vp, &auio, l->l_cred);
2537 }
2538 vput(vp);
2539 *retval = SCARG(uap, count) - auio.uio_resid;
2540 return (error);
2541 }
2542
2543 /*
2544 * Change flags of a file given a path name.
2545 */
2546 /* ARGSUSED */
2547 int
2548 sys_chflags(struct lwp *l, void *v, register_t *retval)
2549 {
2550 struct sys_chflags_args /* {
2551 syscallarg(const char *) path;
2552 syscallarg(u_long) flags;
2553 } */ *uap = v;
2554 struct vnode *vp;
2555 int error;
2556 struct nameidata nd;
2557
2558 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2559 if ((error = namei(&nd)) != 0)
2560 return (error);
2561 vp = nd.ni_vp;
2562 error = change_flags(vp, SCARG(uap, flags), l);
2563 vput(vp);
2564 return (error);
2565 }
2566
2567 /*
2568 * Change flags of a file given a file descriptor.
2569 */
2570 /* ARGSUSED */
2571 int
2572 sys_fchflags(struct lwp *l, void *v, register_t *retval)
2573 {
2574 struct sys_fchflags_args /* {
2575 syscallarg(int) fd;
2576 syscallarg(u_long) flags;
2577 } */ *uap = v;
2578 struct proc *p = l->l_proc;
2579 struct vnode *vp;
2580 struct file *fp;
2581 int error;
2582
2583 /* getvnode() will use the descriptor for us */
2584 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2585 return (error);
2586 vp = (struct vnode *)fp->f_data;
2587 error = change_flags(vp, SCARG(uap, flags), l);
2588 VOP_UNLOCK(vp, 0);
2589 FILE_UNUSE(fp, l);
2590 return (error);
2591 }
2592
2593 /*
2594 * Change flags of a file given a path name; this version does
2595 * not follow links.
2596 */
2597 int
2598 sys_lchflags(struct lwp *l, void *v, register_t *retval)
2599 {
2600 struct sys_lchflags_args /* {
2601 syscallarg(const char *) path;
2602 syscallarg(u_long) flags;
2603 } */ *uap = v;
2604 struct vnode *vp;
2605 int error;
2606 struct nameidata nd;
2607
2608 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2609 if ((error = namei(&nd)) != 0)
2610 return (error);
2611 vp = nd.ni_vp;
2612 error = change_flags(vp, SCARG(uap, flags), l);
2613 vput(vp);
2614 return (error);
2615 }
2616
2617 /*
2618 * Common routine to change flags of a file.
2619 */
2620 int
2621 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
2622 {
2623 struct mount *mp;
2624 struct vattr vattr;
2625 int error;
2626
2627 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2628 return (error);
2629 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2630 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2631 /*
2632 * Non-superusers cannot change the flags on devices, even if they
2633 * own them.
2634 */
2635 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2636 &l->l_acflag) != 0) {
2637 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2638 goto out;
2639 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
2640 error = EINVAL;
2641 goto out;
2642 }
2643 }
2644 VATTR_NULL(&vattr);
2645 vattr.va_flags = flags;
2646 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2647 out:
2648 vn_finished_write(mp, 0);
2649 return (error);
2650 }
2651
2652 /*
2653 * Change mode of a file given path name; this version follows links.
2654 */
2655 /* ARGSUSED */
2656 int
2657 sys_chmod(struct lwp *l, void *v, register_t *retval)
2658 {
2659 struct sys_chmod_args /* {
2660 syscallarg(const char *) path;
2661 syscallarg(int) mode;
2662 } */ *uap = v;
2663 int error;
2664 struct nameidata nd;
2665
2666 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2667 if ((error = namei(&nd)) != 0)
2668 return (error);
2669
2670 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2671
2672 vrele(nd.ni_vp);
2673 return (error);
2674 }
2675
2676 /*
2677 * Change mode of a file given a file descriptor.
2678 */
2679 /* ARGSUSED */
2680 int
2681 sys_fchmod(struct lwp *l, void *v, register_t *retval)
2682 {
2683 struct sys_fchmod_args /* {
2684 syscallarg(int) fd;
2685 syscallarg(int) mode;
2686 } */ *uap = v;
2687 struct proc *p = l->l_proc;
2688 struct file *fp;
2689 int error;
2690
2691 /* getvnode() will use the descriptor for us */
2692 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2693 return (error);
2694
2695 error = change_mode((struct vnode *)fp->f_data, SCARG(uap, mode), l);
2696 FILE_UNUSE(fp, l);
2697 return (error);
2698 }
2699
2700 /*
2701 * Change mode of a file given path name; this version does not follow links.
2702 */
2703 /* ARGSUSED */
2704 int
2705 sys_lchmod(struct lwp *l, void *v, register_t *retval)
2706 {
2707 struct sys_lchmod_args /* {
2708 syscallarg(const char *) path;
2709 syscallarg(int) mode;
2710 } */ *uap = v;
2711 int error;
2712 struct nameidata nd;
2713
2714 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2715 if ((error = namei(&nd)) != 0)
2716 return (error);
2717
2718 error = change_mode(nd.ni_vp, SCARG(uap, mode), l);
2719
2720 vrele(nd.ni_vp);
2721 return (error);
2722 }
2723
2724 /*
2725 * Common routine to set mode given a vnode.
2726 */
2727 static int
2728 change_mode(struct vnode *vp, int mode, struct lwp *l)
2729 {
2730 struct mount *mp;
2731 struct vattr vattr;
2732 int error;
2733
2734 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2735 return (error);
2736 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2737 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2738 VATTR_NULL(&vattr);
2739 vattr.va_mode = mode & ALLPERMS;
2740 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2741 VOP_UNLOCK(vp, 0);
2742 vn_finished_write(mp, 0);
2743 return (error);
2744 }
2745
2746 /*
2747 * Set ownership given a path name; this version follows links.
2748 */
2749 /* ARGSUSED */
2750 int
2751 sys_chown(struct lwp *l, void *v, register_t *retval)
2752 {
2753 struct sys_chown_args /* {
2754 syscallarg(const char *) path;
2755 syscallarg(uid_t) uid;
2756 syscallarg(gid_t) gid;
2757 } */ *uap = v;
2758 int error;
2759 struct nameidata nd;
2760
2761 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2762 if ((error = namei(&nd)) != 0)
2763 return (error);
2764
2765 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2766
2767 vrele(nd.ni_vp);
2768 return (error);
2769 }
2770
2771 /*
2772 * Set ownership given a path name; this version follows links.
2773 * Provides POSIX semantics.
2774 */
2775 /* ARGSUSED */
2776 int
2777 sys___posix_chown(struct lwp *l, void *v, register_t *retval)
2778 {
2779 struct sys_chown_args /* {
2780 syscallarg(const char *) path;
2781 syscallarg(uid_t) uid;
2782 syscallarg(gid_t) gid;
2783 } */ *uap = v;
2784 int error;
2785 struct nameidata nd;
2786
2787 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2788 if ((error = namei(&nd)) != 0)
2789 return (error);
2790
2791 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2792
2793 vrele(nd.ni_vp);
2794 return (error);
2795 }
2796
2797 /*
2798 * Set ownership given a file descriptor.
2799 */
2800 /* ARGSUSED */
2801 int
2802 sys_fchown(struct lwp *l, void *v, register_t *retval)
2803 {
2804 struct sys_fchown_args /* {
2805 syscallarg(int) fd;
2806 syscallarg(uid_t) uid;
2807 syscallarg(gid_t) gid;
2808 } */ *uap = v;
2809 struct proc *p = l->l_proc;
2810 int error;
2811 struct file *fp;
2812
2813 /* getvnode() will use the descriptor for us */
2814 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2815 return (error);
2816
2817 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2818 SCARG(uap, gid), l, 0);
2819 FILE_UNUSE(fp, l);
2820 return (error);
2821 }
2822
2823 /*
2824 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
2825 */
2826 /* ARGSUSED */
2827 int
2828 sys___posix_fchown(struct lwp *l, void *v, register_t *retval)
2829 {
2830 struct sys_fchown_args /* {
2831 syscallarg(int) fd;
2832 syscallarg(uid_t) uid;
2833 syscallarg(gid_t) gid;
2834 } */ *uap = v;
2835 struct proc *p = l->l_proc;
2836 int error;
2837 struct file *fp;
2838
2839 /* getvnode() will use the descriptor for us */
2840 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
2841 return (error);
2842
2843 error = change_owner((struct vnode *)fp->f_data, SCARG(uap, uid),
2844 SCARG(uap, gid), l, 1);
2845 FILE_UNUSE(fp, l);
2846 return (error);
2847 }
2848
2849 /*
2850 * Set ownership given a path name; this version does not follow links.
2851 */
2852 /* ARGSUSED */
2853 int
2854 sys_lchown(struct lwp *l, void *v, register_t *retval)
2855 {
2856 struct sys_lchown_args /* {
2857 syscallarg(const char *) path;
2858 syscallarg(uid_t) uid;
2859 syscallarg(gid_t) gid;
2860 } */ *uap = v;
2861 int error;
2862 struct nameidata nd;
2863
2864 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2865 if ((error = namei(&nd)) != 0)
2866 return (error);
2867
2868 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
2869
2870 vrele(nd.ni_vp);
2871 return (error);
2872 }
2873
2874 /*
2875 * Set ownership given a path name; this version does not follow links.
2876 * Provides POSIX/XPG semantics.
2877 */
2878 /* ARGSUSED */
2879 int
2880 sys___posix_lchown(struct lwp *l, void *v, register_t *retval)
2881 {
2882 struct sys_lchown_args /* {
2883 syscallarg(const char *) path;
2884 syscallarg(uid_t) uid;
2885 syscallarg(gid_t) gid;
2886 } */ *uap = v;
2887 int error;
2888 struct nameidata nd;
2889
2890 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2891 if ((error = namei(&nd)) != 0)
2892 return (error);
2893
2894 error = change_owner(nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
2895
2896 vrele(nd.ni_vp);
2897 return (error);
2898 }
2899
2900 /*
2901 * Common routine to set ownership given a vnode.
2902 */
2903 static int
2904 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
2905 int posix_semantics)
2906 {
2907 struct mount *mp;
2908 struct vattr vattr;
2909 mode_t newmode;
2910 int error;
2911
2912 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
2913 return (error);
2914 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
2915 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2916 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
2917 goto out;
2918
2919 #define CHANGED(x) ((int)(x) != -1)
2920 newmode = vattr.va_mode;
2921 if (posix_semantics) {
2922 /*
2923 * POSIX/XPG semantics: if the caller is not the super-user,
2924 * clear set-user-id and set-group-id bits. Both POSIX and
2925 * the XPG consider the behaviour for calls by the super-user
2926 * implementation-defined; we leave the set-user-id and set-
2927 * group-id settings intact in that case.
2928 */
2929 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
2930 NULL) != 0)
2931 newmode &= ~(S_ISUID | S_ISGID);
2932 } else {
2933 /*
2934 * NetBSD semantics: when changing owner and/or group,
2935 * clear the respective bit(s).
2936 */
2937 if (CHANGED(uid))
2938 newmode &= ~S_ISUID;
2939 if (CHANGED(gid))
2940 newmode &= ~S_ISGID;
2941 }
2942 /* Update va_mode iff altered. */
2943 if (vattr.va_mode == newmode)
2944 newmode = VNOVAL;
2945
2946 VATTR_NULL(&vattr);
2947 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
2948 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
2949 vattr.va_mode = newmode;
2950 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
2951 #undef CHANGED
2952
2953 out:
2954 VOP_UNLOCK(vp, 0);
2955 vn_finished_write(mp, 0);
2956 return (error);
2957 }
2958
2959 /*
2960 * Set the access and modification times given a path name; this
2961 * version follows links.
2962 */
2963 /* ARGSUSED */
2964 int
2965 sys_utimes(struct lwp *l, void *v, register_t *retval)
2966 {
2967 struct sys_utimes_args /* {
2968 syscallarg(const char *) path;
2969 syscallarg(const struct timeval *) tptr;
2970 } */ *uap = v;
2971 int error;
2972 struct nameidata nd;
2973
2974 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
2975 if ((error = namei(&nd)) != 0)
2976 return (error);
2977
2978 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
2979
2980 vrele(nd.ni_vp);
2981 return (error);
2982 }
2983
2984 /*
2985 * Set the access and modification times given a file descriptor.
2986 */
2987 /* ARGSUSED */
2988 int
2989 sys_futimes(struct lwp *l, void *v, register_t *retval)
2990 {
2991 struct sys_futimes_args /* {
2992 syscallarg(int) fd;
2993 syscallarg(const struct timeval *) tptr;
2994 } */ *uap = v;
2995 struct proc *p = l->l_proc;
2996 int error;
2997 struct file *fp;
2998
2999 /* getvnode() will use the descriptor for us */
3000 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3001 return (error);
3002
3003 error = change_utimes((struct vnode *)fp->f_data, SCARG(uap, tptr), l);
3004 FILE_UNUSE(fp, l);
3005 return (error);
3006 }
3007
3008 /*
3009 * Set the access and modification times given a path name; this
3010 * version does not follow links.
3011 */
3012 /* ARGSUSED */
3013 int
3014 sys_lutimes(struct lwp *l, void *v, register_t *retval)
3015 {
3016 struct sys_lutimes_args /* {
3017 syscallarg(const char *) path;
3018 syscallarg(const struct timeval *) tptr;
3019 } */ *uap = v;
3020 int error;
3021 struct nameidata nd;
3022
3023 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3024 if ((error = namei(&nd)) != 0)
3025 return (error);
3026
3027 error = change_utimes(nd.ni_vp, SCARG(uap, tptr), l);
3028
3029 vrele(nd.ni_vp);
3030 return (error);
3031 }
3032
3033 /*
3034 * Common routine to set access and modification times given a vnode.
3035 */
3036 static int
3037 change_utimes(struct vnode *vp, const struct timeval *tptr, struct lwp *l)
3038 {
3039 struct mount *mp;
3040 struct vattr vattr;
3041 int error;
3042
3043 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
3044 return (error);
3045 VATTR_NULL(&vattr);
3046 if (tptr == NULL) {
3047 nanotime(&vattr.va_atime);
3048 vattr.va_mtime = vattr.va_atime;
3049 vattr.va_vaflags |= VA_UTIMES_NULL;
3050 } else {
3051 struct timeval tv[2];
3052
3053 error = copyin(tptr, tv, sizeof(tv));
3054 if (error)
3055 goto out;
3056 TIMEVAL_TO_TIMESPEC(&tv[0], &vattr.va_atime);
3057 TIMEVAL_TO_TIMESPEC(&tv[1], &vattr.va_mtime);
3058 }
3059 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3060 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3061 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
3062 VOP_UNLOCK(vp, 0);
3063 out:
3064 vn_finished_write(mp, 0);
3065 return (error);
3066 }
3067
3068 /*
3069 * Truncate a file given its path name.
3070 */
3071 /* ARGSUSED */
3072 int
3073 sys_truncate(struct lwp *l, void *v, register_t *retval)
3074 {
3075 struct sys_truncate_args /* {
3076 syscallarg(const char *) path;
3077 syscallarg(int) pad;
3078 syscallarg(off_t) length;
3079 } */ *uap = v;
3080 struct vnode *vp;
3081 struct mount *mp;
3082 struct vattr vattr;
3083 int error;
3084 struct nameidata nd;
3085
3086 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3087 if ((error = namei(&nd)) != 0)
3088 return (error);
3089 vp = nd.ni_vp;
3090 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3091 vrele(vp);
3092 return (error);
3093 }
3094 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3095 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3096 if (vp->v_type == VDIR)
3097 error = EISDIR;
3098 else if ((error = vn_writechk(vp)) == 0 &&
3099 (error = VOP_ACCESS(vp, VWRITE, l->l_cred, l)) == 0) {
3100 VATTR_NULL(&vattr);
3101 vattr.va_size = SCARG(uap, length);
3102 error = VOP_SETATTR(vp, &vattr, l->l_cred, l);
3103 }
3104 vput(vp);
3105 vn_finished_write(mp, 0);
3106 return (error);
3107 }
3108
3109 /*
3110 * Truncate a file given a file descriptor.
3111 */
3112 /* ARGSUSED */
3113 int
3114 sys_ftruncate(struct lwp *l, void *v, register_t *retval)
3115 {
3116 struct sys_ftruncate_args /* {
3117 syscallarg(int) fd;
3118 syscallarg(int) pad;
3119 syscallarg(off_t) length;
3120 } */ *uap = v;
3121 struct proc *p = l->l_proc;
3122 struct mount *mp;
3123 struct vattr vattr;
3124 struct vnode *vp;
3125 struct file *fp;
3126 int error;
3127
3128 /* getvnode() will use the descriptor for us */
3129 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3130 return (error);
3131 if ((fp->f_flag & FWRITE) == 0) {
3132 error = EINVAL;
3133 goto out;
3134 }
3135 vp = (struct vnode *)fp->f_data;
3136 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3137 FILE_UNUSE(fp, l);
3138 return (error);
3139 }
3140 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3141 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3142 if (vp->v_type == VDIR)
3143 error = EISDIR;
3144 else if ((error = vn_writechk(vp)) == 0) {
3145 VATTR_NULL(&vattr);
3146 vattr.va_size = SCARG(uap, length);
3147 error = VOP_SETATTR(vp, &vattr, fp->f_cred, l);
3148 }
3149 VOP_UNLOCK(vp, 0);
3150 vn_finished_write(mp, 0);
3151 out:
3152 FILE_UNUSE(fp, l);
3153 return (error);
3154 }
3155
3156 /*
3157 * Sync an open file.
3158 */
3159 /* ARGSUSED */
3160 int
3161 sys_fsync(struct lwp *l, void *v, register_t *retval)
3162 {
3163 struct sys_fsync_args /* {
3164 syscallarg(int) fd;
3165 } */ *uap = v;
3166 struct proc *p = l->l_proc;
3167 struct vnode *vp;
3168 struct mount *mp;
3169 struct file *fp;
3170 int error;
3171
3172 /* getvnode() will use the descriptor for us */
3173 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3174 return (error);
3175 vp = (struct vnode *)fp->f_data;
3176 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) {
3177 FILE_UNUSE(fp, l);
3178 return (error);
3179 }
3180 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3181 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0, l);
3182 if (error == 0 && bioops.io_fsync != NULL &&
3183 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3184 (*bioops.io_fsync)(vp, 0);
3185 VOP_UNLOCK(vp, 0);
3186 vn_finished_write(mp, 0);
3187 FILE_UNUSE(fp, l);
3188 return (error);
3189 }
3190
3191 /*
3192 * Sync a range of file data. API modeled after that found in AIX.
3193 *
3194 * FDATASYNC indicates that we need only save enough metadata to be able
3195 * to re-read the written data. Note we duplicate AIX's requirement that
3196 * the file be open for writing.
3197 */
3198 /* ARGSUSED */
3199 int
3200 sys_fsync_range(struct lwp *l, void *v, register_t *retval)
3201 {
3202 struct sys_fsync_range_args /* {
3203 syscallarg(int) fd;
3204 syscallarg(int) flags;
3205 syscallarg(off_t) start;
3206 syscallarg(off_t) length;
3207 } */ *uap = v;
3208 struct proc *p = l->l_proc;
3209 struct vnode *vp;
3210 struct file *fp;
3211 int flags, nflags;
3212 off_t s, e, len;
3213 int error;
3214
3215 /* getvnode() will use the descriptor for us */
3216 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3217 return (error);
3218
3219 if ((fp->f_flag & FWRITE) == 0) {
3220 FILE_UNUSE(fp, l);
3221 return (EBADF);
3222 }
3223
3224 flags = SCARG(uap, flags);
3225 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3226 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3227 return (EINVAL);
3228 }
3229 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3230 if (flags & FDATASYNC)
3231 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3232 else
3233 nflags = FSYNC_WAIT;
3234 if (flags & FDISKSYNC)
3235 nflags |= FSYNC_CACHE;
3236
3237 len = SCARG(uap, length);
3238 /* If length == 0, we do the whole file, and s = l = 0 will do that */
3239 if (len) {
3240 s = SCARG(uap, start);
3241 e = s + len;
3242 if (e < s) {
3243 FILE_UNUSE(fp, l);
3244 return (EINVAL);
3245 }
3246 } else {
3247 e = 0;
3248 s = 0;
3249 }
3250
3251 vp = (struct vnode *)fp->f_data;
3252 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3253 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e, l);
3254
3255 if (error == 0 && bioops.io_fsync != NULL &&
3256 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
3257 (*bioops.io_fsync)(vp, nflags);
3258
3259 VOP_UNLOCK(vp, 0);
3260 FILE_UNUSE(fp, l);
3261 return (error);
3262 }
3263
3264 /*
3265 * Sync the data of an open file.
3266 */
3267 /* ARGSUSED */
3268 int
3269 sys_fdatasync(struct lwp *l, void *v, register_t *retval)
3270 {
3271 struct sys_fdatasync_args /* {
3272 syscallarg(int) fd;
3273 } */ *uap = v;
3274 struct proc *p = l->l_proc;
3275 struct vnode *vp;
3276 struct file *fp;
3277 int error;
3278
3279 /* getvnode() will use the descriptor for us */
3280 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3281 return (error);
3282 if ((fp->f_flag & FWRITE) == 0) {
3283 FILE_UNUSE(fp, l);
3284 return (EBADF);
3285 }
3286 vp = (struct vnode *)fp->f_data;
3287 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3288 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0, l);
3289 VOP_UNLOCK(vp, 0);
3290 FILE_UNUSE(fp, l);
3291 return (error);
3292 }
3293
3294 /*
3295 * Rename files, (standard) BSD semantics frontend.
3296 */
3297 /* ARGSUSED */
3298 int
3299 sys_rename(struct lwp *l, void *v, register_t *retval)
3300 {
3301 struct sys_rename_args /* {
3302 syscallarg(const char *) from;
3303 syscallarg(const char *) to;
3304 } */ *uap = v;
3305
3306 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 0));
3307 }
3308
3309 /*
3310 * Rename files, POSIX semantics frontend.
3311 */
3312 /* ARGSUSED */
3313 int
3314 sys___posix_rename(struct lwp *l, void *v, register_t *retval)
3315 {
3316 struct sys___posix_rename_args /* {
3317 syscallarg(const char *) from;
3318 syscallarg(const char *) to;
3319 } */ *uap = v;
3320
3321 return (rename_files(SCARG(uap, from), SCARG(uap, to), l, 1));
3322 }
3323
3324 /*
3325 * Rename files. Source and destination must either both be directories,
3326 * or both not be directories. If target is a directory, it must be empty.
3327 * If `from' and `to' refer to the same object, the value of the `retain'
3328 * argument is used to determine whether `from' will be
3329 *
3330 * (retain == 0) deleted unless `from' and `to' refer to the same
3331 * object in the file system's name space (BSD).
3332 * (retain == 1) always retained (POSIX).
3333 */
3334 static int
3335 rename_files(const char *from, const char *to, struct lwp *l, int retain)
3336 {
3337 struct mount *mp = NULL;
3338 struct vnode *tvp, *fvp, *tdvp;
3339 struct nameidata fromnd, tond;
3340 struct proc *p;
3341 int error;
3342
3343 NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
3344 from, l);
3345 if ((error = namei(&fromnd)) != 0)
3346 return (error);
3347 fvp = fromnd.ni_vp;
3348 error = vn_start_write(fvp, &mp, V_WAIT | V_PCATCH);
3349 if (error != 0) {
3350 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3351 vrele(fromnd.ni_dvp);
3352 vrele(fvp);
3353 if (fromnd.ni_startdir)
3354 vrele(fromnd.ni_startdir);
3355 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3356 return (error);
3357 }
3358 NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
3359 (fvp->v_type == VDIR ? CREATEDIR : 0), UIO_USERSPACE, to, l);
3360 if ((error = namei(&tond)) != 0) {
3361 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3362 vrele(fromnd.ni_dvp);
3363 vrele(fvp);
3364 goto out1;
3365 }
3366 tdvp = tond.ni_dvp;
3367 tvp = tond.ni_vp;
3368
3369 if (tvp != NULL) {
3370 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3371 error = ENOTDIR;
3372 goto out;
3373 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3374 error = EISDIR;
3375 goto out;
3376 }
3377 }
3378
3379 if (fvp == tdvp)
3380 error = EINVAL;
3381
3382 /*
3383 * Source and destination refer to the same object.
3384 */
3385 if (fvp == tvp) {
3386 if (retain)
3387 error = -1;
3388 else if (fromnd.ni_dvp == tdvp &&
3389 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3390 !memcmp(fromnd.ni_cnd.cn_nameptr,
3391 tond.ni_cnd.cn_nameptr,
3392 fromnd.ni_cnd.cn_namelen))
3393 error = -1;
3394 }
3395
3396 #if NVERIEXEC > 0
3397 if (!error)
3398 error = veriexec_renamechk(fvp, fromnd.ni_dirp, tvp,
3399 tond.ni_dirp, l);
3400 #endif /* NVERIEXEC > 0 */
3401
3402 out:
3403 p = l->l_proc;
3404 if (!error) {
3405 VOP_LEASE(tdvp, l, l->l_cred, LEASE_WRITE);
3406 if (fromnd.ni_dvp != tdvp)
3407 VOP_LEASE(fromnd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3408 if (tvp) {
3409 VOP_LEASE(tvp, l, l->l_cred, LEASE_WRITE);
3410 }
3411 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3412 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3413 } else {
3414 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3415 if (tdvp == tvp)
3416 vrele(tdvp);
3417 else
3418 vput(tdvp);
3419 if (tvp)
3420 vput(tvp);
3421 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3422 vrele(fromnd.ni_dvp);
3423 vrele(fvp);
3424 }
3425 vrele(tond.ni_startdir);
3426 PNBUF_PUT(tond.ni_cnd.cn_pnbuf);
3427 out1:
3428 vn_finished_write(mp, 0);
3429 if (fromnd.ni_startdir)
3430 vrele(fromnd.ni_startdir);
3431 PNBUF_PUT(fromnd.ni_cnd.cn_pnbuf);
3432 return (error == -1 ? 0 : error);
3433 }
3434
3435 /*
3436 * Make a directory file.
3437 */
3438 /* ARGSUSED */
3439 int
3440 sys_mkdir(struct lwp *l, void *v, register_t *retval)
3441 {
3442 struct sys_mkdir_args /* {
3443 syscallarg(const char *) path;
3444 syscallarg(int) mode;
3445 } */ *uap = v;
3446 struct proc *p = l->l_proc;
3447 struct mount *mp;
3448 struct vnode *vp;
3449 struct vattr vattr;
3450 int error;
3451 struct nameidata nd;
3452
3453 restart:
3454 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR, UIO_USERSPACE,
3455 SCARG(uap, path), l);
3456 if ((error = namei(&nd)) != 0)
3457 return (error);
3458 vp = nd.ni_vp;
3459 if (vp != NULL) {
3460 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3461 if (nd.ni_dvp == vp)
3462 vrele(nd.ni_dvp);
3463 else
3464 vput(nd.ni_dvp);
3465 vrele(vp);
3466 return (EEXIST);
3467 }
3468 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3469 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3470 if (nd.ni_dvp == vp)
3471 vrele(nd.ni_dvp);
3472 else
3473 vput(nd.ni_dvp);
3474 if ((error = vn_start_write(NULL, &mp,
3475 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
3476 return (error);
3477 goto restart;
3478 }
3479 VATTR_NULL(&vattr);
3480 vattr.va_type = VDIR;
3481 vattr.va_mode =
3482 (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
3483 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3484 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
3485 if (!error)
3486 vput(nd.ni_vp);
3487 vn_finished_write(mp, 0);
3488 return (error);
3489 }
3490
3491 /*
3492 * Remove a directory file.
3493 */
3494 /* ARGSUSED */
3495 int
3496 sys_rmdir(struct lwp *l, void *v, register_t *retval)
3497 {
3498 struct sys_rmdir_args /* {
3499 syscallarg(const char *) path;
3500 } */ *uap = v;
3501 struct mount *mp;
3502 struct vnode *vp;
3503 int error;
3504 struct nameidata nd;
3505
3506 restart:
3507 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
3508 SCARG(uap, path), l);
3509 if ((error = namei(&nd)) != 0)
3510 return (error);
3511 vp = nd.ni_vp;
3512 if (vp->v_type != VDIR) {
3513 error = ENOTDIR;
3514 goto out;
3515 }
3516 /*
3517 * No rmdir "." please.
3518 */
3519 if (nd.ni_dvp == vp) {
3520 error = EINVAL;
3521 goto out;
3522 }
3523 /*
3524 * The root of a mounted filesystem cannot be deleted.
3525 */
3526 if (vp->v_flag & VROOT) {
3527 error = EBUSY;
3528 goto out;
3529 }
3530 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
3531 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3532 if (nd.ni_dvp == vp)
3533 vrele(nd.ni_dvp);
3534 else
3535 vput(nd.ni_dvp);
3536 vput(vp);
3537 if ((error = vn_start_write(NULL, &mp,
3538 V_WAIT | V_SLEEPONLY | V_PCATCH)) != 0)
3539 return (error);
3540 goto restart;
3541 }
3542 VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
3543 VOP_LEASE(vp, l, l->l_cred, LEASE_WRITE);
3544 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
3545 vn_finished_write(mp, 0);
3546 return (error);
3547
3548 out:
3549 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
3550 if (nd.ni_dvp == vp)
3551 vrele(nd.ni_dvp);
3552 else
3553 vput(nd.ni_dvp);
3554 vput(vp);
3555 return (error);
3556 }
3557
3558 /*
3559 * Read a block of directory entries in a file system independent format.
3560 */
3561 int
3562 sys___getdents30(struct lwp *l, void *v, register_t *retval)
3563 {
3564 struct sys___getdents30_args /* {
3565 syscallarg(int) fd;
3566 syscallarg(char *) buf;
3567 syscallarg(size_t) count;
3568 } */ *uap = v;
3569 struct proc *p = l->l_proc;
3570 struct file *fp;
3571 int error, done;
3572
3573 /* getvnode() will use the descriptor for us */
3574 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
3575 return (error);
3576 if ((fp->f_flag & FREAD) == 0) {
3577 error = EBADF;
3578 goto out;
3579 }
3580 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
3581 SCARG(uap, count), &done, l, 0, 0);
3582 #ifdef KTRACE
3583 if (!error && KTRPOINT(p, KTR_GENIO)) {
3584 struct iovec iov;
3585 iov.iov_base = SCARG(uap, buf);
3586 iov.iov_len = done;
3587 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov, done, 0);
3588 }
3589 #endif
3590 *retval = done;
3591 out:
3592 FILE_UNUSE(fp, l);
3593 return (error);
3594 }
3595
3596 /*
3597 * Set the mode mask for creation of filesystem nodes.
3598 */
3599 int
3600 sys_umask(struct lwp *l, void *v, register_t *retval)
3601 {
3602 struct sys_umask_args /* {
3603 syscallarg(mode_t) newmask;
3604 } */ *uap = v;
3605 struct proc *p = l->l_proc;
3606 struct cwdinfo *cwdi;
3607
3608 cwdi = p->p_cwdi;
3609 *retval = cwdi->cwdi_cmask;
3610 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
3611 return (0);
3612 }
3613
3614 /*
3615 * Void all references to file by ripping underlying filesystem
3616 * away from vnode.
3617 */
3618 /* ARGSUSED */
3619 int
3620 sys_revoke(struct lwp *l, void *v, register_t *retval)
3621 {
3622 struct sys_revoke_args /* {
3623 syscallarg(const char *) path;
3624 } */ *uap = v;
3625 struct mount *mp;
3626 struct vnode *vp;
3627 struct vattr vattr;
3628 int error;
3629 struct nameidata nd;
3630
3631 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), l);
3632 if ((error = namei(&nd)) != 0)
3633 return (error);
3634 vp = nd.ni_vp;
3635 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred, l)) != 0)
3636 goto out;
3637 if (kauth_cred_geteuid(l->l_cred) != vattr.va_uid &&
3638 (error = kauth_authorize_generic(l->l_cred,
3639 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0)
3640 goto out;
3641 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0)
3642 goto out;
3643 if (vp->v_usecount > 1 || (vp->v_flag & (VALIASED | VLAYER)))
3644 VOP_REVOKE(vp, REVOKEALL);
3645 vn_finished_write(mp, 0);
3646 out:
3647 vrele(vp);
3648 return (error);
3649 }
3650
3651 /*
3652 * Convert a user file descriptor to a kernel file entry.
3653 */
3654 int
3655 getvnode(struct filedesc *fdp, int fd, struct file **fpp)
3656 {
3657 struct vnode *vp;
3658 struct file *fp;
3659
3660 if ((fp = fd_getfile(fdp, fd)) == NULL)
3661 return (EBADF);
3662
3663 FILE_USE(fp);
3664
3665 if (fp->f_type != DTYPE_VNODE) {
3666 FILE_UNUSE(fp, NULL);
3667 return (EINVAL);
3668 }
3669
3670 vp = (struct vnode *)fp->f_data;
3671 if (vp->v_type == VBAD) {
3672 FILE_UNUSE(fp, NULL);
3673 return (EBADF);
3674 }
3675
3676 *fpp = fp;
3677 return (0);
3678 }
3679