vfs_syscalls.c revision 1.504.2.2 1 /* $NetBSD: vfs_syscalls.c,v 1.504.2.2 2017/03/20 06:57:48 pgoyette Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
66 */
67
68 /*
69 * Virtual File System System Calls
70 */
71
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.504.2.2 2017/03/20 06:57:48 pgoyette Exp $");
74
75 #ifdef _KERNEL_OPT
76 #include "opt_fileassoc.h"
77 #include "veriexec.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/namei.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/file.h>
86 #include <sys/fcntl.h>
87 #include <sys/stat.h>
88 #include <sys/vnode.h>
89 #include <sys/mount.h>
90 #include <sys/fstrans.h>
91 #include <sys/proc.h>
92 #include <sys/uio.h>
93 #include <sys/kmem.h>
94 #include <sys/dirent.h>
95 #include <sys/sysctl.h>
96 #include <sys/syscallargs.h>
97 #include <sys/vfs_syscalls.h>
98 #include <sys/quota.h>
99 #include <sys/quotactl.h>
100 #include <sys/ktrace.h>
101 #ifdef FILEASSOC
102 #include <sys/fileassoc.h>
103 #endif /* FILEASSOC */
104 #include <sys/extattr.h>
105 #include <sys/verified_exec.h>
106 #include <sys/kauth.h>
107 #include <sys/atomic.h>
108 #include <sys/module.h>
109 #include <sys/buf.h>
110
111 #include <miscfs/genfs/genfs.h>
112 #include <miscfs/specfs/specdev.h>
113
114 #include <nfs/rpcv2.h>
115 #include <nfs/nfsproto.h>
116 #include <nfs/nfs.h>
117 #include <nfs/nfs_var.h>
118
119 /* XXX this shouldn't be here */
120 #ifndef OFF_T_MAX
121 #define OFF_T_MAX __type_max(off_t)
122 #endif
123
124 static int change_flags(struct vnode *, u_long, struct lwp *);
125 static int change_mode(struct vnode *, int, struct lwp *);
126 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
127 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *);
128 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t,
129 enum uio_seg);
130 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t);
131 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *,
132 enum uio_seg);
133 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *,
134 enum uio_seg, int);
135 static int do_sys_readlinkat(struct lwp *, int, const char *, char *,
136 size_t, register_t *);
137 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg);
138
139 static int fd_nameiat(struct lwp *, int, struct nameidata *);
140 static int fd_nameiat_simple_user(struct lwp *, int, const char *,
141 namei_simple_flags_t, struct vnode **);
142
143
144 /*
145 * This table is used to maintain compatibility with 4.3BSD
146 * and NetBSD 0.9 mount syscalls - and possibly other systems.
147 * Note, the order is important!
148 *
149 * Do not modify this table. It should only contain filesystems
150 * supported by NetBSD 0.9 and 4.3BSD.
151 */
152 const char * const mountcompatnames[] = {
153 NULL, /* 0 = MOUNT_NONE */
154 MOUNT_FFS, /* 1 = MOUNT_UFS */
155 MOUNT_NFS, /* 2 */
156 MOUNT_MFS, /* 3 */
157 MOUNT_MSDOS, /* 4 */
158 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
159 MOUNT_FDESC, /* 6 */
160 MOUNT_KERNFS, /* 7 */
161 NULL, /* 8 = MOUNT_DEVFS */
162 MOUNT_AFS, /* 9 */
163 };
164
165 const int nmountcompatnames = __arraycount(mountcompatnames);
166
167 static int
168 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp)
169 {
170 file_t *dfp;
171 int error;
172
173 if (fdat != AT_FDCWD) {
174 if ((error = fd_getvnode(fdat, &dfp)) != 0)
175 goto out;
176
177 NDAT(ndp, dfp->f_vnode);
178 }
179
180 error = namei(ndp);
181
182 if (fdat != AT_FDCWD)
183 fd_putfile(fdat);
184 out:
185 return error;
186 }
187
188 static int
189 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path,
190 namei_simple_flags_t sflags, struct vnode **vp_ret)
191 {
192 file_t *dfp;
193 struct vnode *dvp;
194 int error;
195
196 if (fdat != AT_FDCWD) {
197 if ((error = fd_getvnode(fdat, &dfp)) != 0)
198 goto out;
199
200 dvp = dfp->f_vnode;
201 } else {
202 dvp = NULL;
203 }
204
205 error = nameiat_simple_user(dvp, path, sflags, vp_ret);
206
207 if (fdat != AT_FDCWD)
208 fd_putfile(fdat);
209 out:
210 return error;
211 }
212
213 static int
214 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags)
215 {
216 int error;
217
218 fp->f_flag = flags & FMASK;
219 fp->f_type = DTYPE_VNODE;
220 fp->f_ops = &vnops;
221 fp->f_vnode = vp;
222
223 if (flags & (O_EXLOCK | O_SHLOCK)) {
224 struct flock lf;
225 int type;
226
227 lf.l_whence = SEEK_SET;
228 lf.l_start = 0;
229 lf.l_len = 0;
230 if (flags & O_EXLOCK)
231 lf.l_type = F_WRLCK;
232 else
233 lf.l_type = F_RDLCK;
234 type = F_FLOCK;
235 if ((flags & FNONBLOCK) == 0)
236 type |= F_WAIT;
237 VOP_UNLOCK(vp);
238 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
239 if (error) {
240 (void) vn_close(vp, fp->f_flag, fp->f_cred);
241 fd_abort(l->l_proc, fp, indx);
242 return error;
243 }
244 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
245 atomic_or_uint(&fp->f_flag, FHASLOCK);
246 }
247 if (flags & O_CLOEXEC)
248 fd_set_exclose(l, indx, true);
249 return 0;
250 }
251
252 static int
253 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
254 void *data, size_t *data_len)
255 {
256 struct mount *mp;
257 int error = 0, saved_flags;
258
259 mp = vp->v_mount;
260 saved_flags = mp->mnt_flag;
261
262 /* We can operate only on VV_ROOT nodes. */
263 if ((vp->v_vflag & VV_ROOT) == 0) {
264 error = EINVAL;
265 goto out;
266 }
267
268 /*
269 * We only allow the filesystem to be reloaded if it
270 * is currently mounted read-only. Additionally, we
271 * prevent read-write to read-only downgrades.
272 */
273 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
274 (mp->mnt_flag & MNT_RDONLY) == 0 &&
275 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) {
276 error = EOPNOTSUPP; /* Needs translation */
277 goto out;
278 }
279
280 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
281 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
282 if (error)
283 goto out;
284
285 if (vfs_busy(mp, NULL)) {
286 error = EPERM;
287 goto out;
288 }
289
290 error = vfs_suspend(mp, 0);
291 if (error)
292 goto out;
293
294 mutex_enter(&mp->mnt_updating);
295
296 mp->mnt_flag &= ~MNT_OP_FLAGS;
297 mp->mnt_flag |= flags & MNT_OP_FLAGS;
298
299 /*
300 * Set the mount level flags.
301 */
302 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) {
303 if ((flags & MNT_RDONLY))
304 mp->mnt_iflag |= IMNT_WANTRDONLY;
305 else
306 mp->mnt_iflag |= IMNT_WANTRDWR;
307 }
308 mp->mnt_flag &= ~MNT_BASIC_FLAGS;
309 mp->mnt_flag |= flags & MNT_BASIC_FLAGS;
310 if ((mp->mnt_iflag & IMNT_WANTRDONLY))
311 mp->mnt_flag &= ~MNT_RDONLY;
312
313 error = VFS_MOUNT(mp, path, data, data_len);
314
315 if (error && data != NULL) {
316 int error2;
317
318 /*
319 * Update failed; let's try and see if it was an
320 * export request. For compat with 3.0 and earlier.
321 */
322 error2 = vfs_hooks_reexport(mp, path, data);
323
324 /*
325 * Only update error code if the export request was
326 * understood but some problem occurred while
327 * processing it.
328 */
329 if (error2 != EJUSTRETURN)
330 error = error2;
331 }
332
333 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY))
334 mp->mnt_flag |= MNT_RDONLY;
335 if (error)
336 mp->mnt_flag = saved_flags;
337 mp->mnt_flag &= ~MNT_OP_FLAGS;
338 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR);
339 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
340 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0)
341 vfs_syncer_add_to_worklist(mp);
342 } else {
343 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0)
344 vfs_syncer_remove_from_worklist(mp);
345 }
346 mutex_exit(&mp->mnt_updating);
347 vfs_resume(mp);
348 vfs_unbusy(mp, false, NULL);
349
350 if ((error == 0) && !(saved_flags & MNT_EXTATTR) &&
351 (flags & MNT_EXTATTR)) {
352 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START,
353 NULL, 0, NULL) != 0) {
354 printf("%s: failed to start extattr, error = %d",
355 mp->mnt_stat.f_mntonname, error);
356 mp->mnt_flag &= ~MNT_EXTATTR;
357 }
358 }
359
360 if ((error == 0) && (saved_flags & MNT_EXTATTR) &&
361 !(flags & MNT_EXTATTR)) {
362 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP,
363 NULL, 0, NULL) != 0) {
364 printf("%s: failed to stop extattr, error = %d",
365 mp->mnt_stat.f_mntonname, error);
366 mp->mnt_flag |= MNT_RDONLY;
367 }
368 }
369 out:
370 return (error);
371 }
372
373 static int
374 mount_get_vfsops(const char *fstype, enum uio_seg type_seg,
375 struct vfsops **vfsops)
376 {
377 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
378 int error;
379
380 if (type_seg == UIO_USERSPACE) {
381 /* Copy file-system type from userspace. */
382 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
383 } else {
384 error = copystr(fstype, fstypename, sizeof(fstypename), NULL);
385 KASSERT(error == 0);
386 }
387
388 if (error) {
389 /*
390 * Historically, filesystem types were identified by numbers.
391 * If we get an integer for the filesystem type instead of a
392 * string, we check to see if it matches one of the historic
393 * filesystem types.
394 */
395 u_long fsindex = (u_long)fstype;
396 if (fsindex >= nmountcompatnames ||
397 mountcompatnames[fsindex] == NULL)
398 return ENODEV;
399 strlcpy(fstypename, mountcompatnames[fsindex],
400 sizeof(fstypename));
401 }
402
403 /* Accept `ufs' as an alias for `ffs', for compatibility. */
404 if (strcmp(fstypename, "ufs") == 0)
405 fstypename[0] = 'f';
406
407 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
408 return 0;
409
410 /* If we can autoload a vfs module, try again */
411 (void)module_autoload(fstypename, MODULE_CLASS_VFS);
412
413 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
414 return 0;
415
416 return ENODEV;
417 }
418
419 static int
420 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
421 void *data, size_t *data_len)
422 {
423 struct mount *mp;
424 int error;
425
426 /* If MNT_GETARGS is specified, it should be the only flag. */
427 if (flags & ~MNT_GETARGS)
428 return EINVAL;
429
430 mp = vp->v_mount;
431
432 /* XXX: probably some notion of "can see" here if we want isolation. */
433 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
434 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
435 if (error)
436 return error;
437
438 if ((vp->v_vflag & VV_ROOT) == 0)
439 return EINVAL;
440
441 if (vfs_busy(mp, NULL))
442 return EPERM;
443
444 mutex_enter(&mp->mnt_updating);
445 mp->mnt_flag &= ~MNT_OP_FLAGS;
446 mp->mnt_flag |= MNT_GETARGS;
447 error = VFS_MOUNT(mp, path, data, data_len);
448 mp->mnt_flag &= ~MNT_OP_FLAGS;
449 mutex_exit(&mp->mnt_updating);
450
451 vfs_unbusy(mp, false, NULL);
452 return (error);
453 }
454
455 int
456 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
457 {
458 /* {
459 syscallarg(const char *) type;
460 syscallarg(const char *) path;
461 syscallarg(int) flags;
462 syscallarg(void *) data;
463 syscallarg(size_t) data_len;
464 } */
465
466 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path),
467 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
468 SCARG(uap, data_len), retval);
469 }
470
471 int
472 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg,
473 const char *path, int flags, void *data, enum uio_seg data_seg,
474 size_t data_len, register_t *retval)
475 {
476 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */
477 struct vnode *vp;
478 void *data_buf = data;
479 bool vfsopsrele = false;
480 size_t alloc_sz = 0;
481 int error;
482
483 /*
484 * Get vnode to be covered
485 */
486 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
487 if (error != 0) {
488 vp = NULL;
489 goto done;
490 }
491
492 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
493 vfsops = vp->v_mount->mnt_op;
494 } else {
495 /* 'type' is userspace */
496 error = mount_get_vfsops(type, type_seg, &vfsops);
497 if (error != 0)
498 goto done;
499 vfsopsrele = true;
500 }
501
502 /*
503 * We allow data to be NULL, even for userspace. Some fs's don't need
504 * it. The others will handle NULL.
505 */
506 if (data != NULL && data_seg == UIO_USERSPACE) {
507 if (data_len == 0) {
508 /* No length supplied, use default for filesystem */
509 data_len = vfsops->vfs_min_mount_data;
510
511 /*
512 * Hopefully a longer buffer won't make copyin() fail.
513 * For compatibility with 3.0 and earlier.
514 */
515 if (flags & MNT_UPDATE
516 && data_len < sizeof (struct mnt_export_args30))
517 data_len = sizeof (struct mnt_export_args30);
518 }
519 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) {
520 error = EINVAL;
521 goto done;
522 }
523 alloc_sz = data_len;
524 data_buf = kmem_alloc(alloc_sz, KM_SLEEP);
525
526 /* NFS needs the buffer even for mnt_getargs .... */
527 error = copyin(data, data_buf, data_len);
528 if (error != 0)
529 goto done;
530 }
531
532 if (flags & MNT_GETARGS) {
533 if (data_len == 0) {
534 error = EINVAL;
535 goto done;
536 }
537 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
538 if (error != 0)
539 goto done;
540 if (data_seg == UIO_USERSPACE)
541 error = copyout(data_buf, data, data_len);
542 *retval = data_len;
543 } else if (flags & MNT_UPDATE) {
544 error = mount_update(l, vp, path, flags, data_buf, &data_len);
545 } else {
546 /* Locking is handled internally in mount_domount(). */
547 KASSERT(vfsopsrele == true);
548 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
549 &data_len);
550 vfsopsrele = false;
551 }
552
553 done:
554 if (vfsopsrele)
555 vfs_delref(vfsops);
556 if (vp != NULL) {
557 vrele(vp);
558 }
559 if (data_buf != data)
560 kmem_free(data_buf, alloc_sz);
561 return (error);
562 }
563
564 /*
565 * Unmount a file system.
566 *
567 * Note: unmount takes a path to the vnode mounted on as argument,
568 * not special file (as before).
569 */
570 /* ARGSUSED */
571 int
572 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
573 {
574 /* {
575 syscallarg(const char *) path;
576 syscallarg(int) flags;
577 } */
578 struct vnode *vp;
579 struct mount *mp;
580 int error;
581 struct pathbuf *pb;
582 struct nameidata nd;
583
584 error = pathbuf_copyin(SCARG(uap, path), &pb);
585 if (error) {
586 return error;
587 }
588
589 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
590 if ((error = namei(&nd)) != 0) {
591 pathbuf_destroy(pb);
592 return error;
593 }
594 vp = nd.ni_vp;
595 pathbuf_destroy(pb);
596
597 mp = vp->v_mount;
598 atomic_inc_uint(&mp->mnt_refcnt);
599 VOP_UNLOCK(vp);
600
601 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
602 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
603 if (error) {
604 vrele(vp);
605 vfs_destroy(mp);
606 return (error);
607 }
608
609 /*
610 * Don't allow unmounting the root file system.
611 */
612 if (mp->mnt_flag & MNT_ROOTFS) {
613 vrele(vp);
614 vfs_destroy(mp);
615 return (EINVAL);
616 }
617
618 /*
619 * Must be the root of the filesystem
620 */
621 if ((vp->v_vflag & VV_ROOT) == 0) {
622 vrele(vp);
623 vfs_destroy(mp);
624 return (EINVAL);
625 }
626
627 vrele(vp);
628 error = dounmount(mp, SCARG(uap, flags), l);
629 vfs_destroy(mp);
630 return error;
631 }
632
633 /*
634 * Sync each mounted filesystem.
635 */
636 #ifdef DEBUG
637 int syncprt = 0;
638 struct ctldebug debug0 = { "syncprt", &syncprt };
639 #endif
640
641 void
642 do_sys_sync(struct lwp *l)
643 {
644 struct mount *mp, *nmp;
645 int asyncflag;
646
647 mutex_enter(&mountlist_lock);
648 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
649 if (vfs_busy(mp, &nmp)) {
650 continue;
651 }
652 fstrans_start(mp, FSTRANS_SHARED);
653 mutex_enter(&mp->mnt_updating);
654 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
655 asyncflag = mp->mnt_flag & MNT_ASYNC;
656 mp->mnt_flag &= ~MNT_ASYNC;
657 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
658 if (asyncflag)
659 mp->mnt_flag |= MNT_ASYNC;
660 }
661 mutex_exit(&mp->mnt_updating);
662 fstrans_done(mp);
663 vfs_unbusy(mp, false, &nmp);
664 }
665 mutex_exit(&mountlist_lock);
666 #ifdef DEBUG
667 if (syncprt)
668 vfs_bufstats();
669 #endif /* DEBUG */
670 }
671
672 /* ARGSUSED */
673 int
674 sys_sync(struct lwp *l, const void *v, register_t *retval)
675 {
676 do_sys_sync(l);
677 return (0);
678 }
679
680
681 /*
682 * Access or change filesystem quotas.
683 *
684 * (this is really 14 different calls bundled into one)
685 */
686
687 static int
688 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u)
689 {
690 struct quotastat info_k;
691 int error;
692
693 /* ensure any padding bytes are cleared */
694 memset(&info_k, 0, sizeof(info_k));
695
696 error = vfs_quotactl_stat(mp, &info_k);
697 if (error) {
698 return error;
699 }
700
701 return copyout(&info_k, info_u, sizeof(info_k));
702 }
703
704 static int
705 do_sys_quotactl_idtypestat(struct mount *mp, int idtype,
706 struct quotaidtypestat *info_u)
707 {
708 struct quotaidtypestat info_k;
709 int error;
710
711 /* ensure any padding bytes are cleared */
712 memset(&info_k, 0, sizeof(info_k));
713
714 error = vfs_quotactl_idtypestat(mp, idtype, &info_k);
715 if (error) {
716 return error;
717 }
718
719 return copyout(&info_k, info_u, sizeof(info_k));
720 }
721
722 static int
723 do_sys_quotactl_objtypestat(struct mount *mp, int objtype,
724 struct quotaobjtypestat *info_u)
725 {
726 struct quotaobjtypestat info_k;
727 int error;
728
729 /* ensure any padding bytes are cleared */
730 memset(&info_k, 0, sizeof(info_k));
731
732 error = vfs_quotactl_objtypestat(mp, objtype, &info_k);
733 if (error) {
734 return error;
735 }
736
737 return copyout(&info_k, info_u, sizeof(info_k));
738 }
739
740 static int
741 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u,
742 struct quotaval *val_u)
743 {
744 struct quotakey key_k;
745 struct quotaval val_k;
746 int error;
747
748 /* ensure any padding bytes are cleared */
749 memset(&val_k, 0, sizeof(val_k));
750
751 error = copyin(key_u, &key_k, sizeof(key_k));
752 if (error) {
753 return error;
754 }
755
756 error = vfs_quotactl_get(mp, &key_k, &val_k);
757 if (error) {
758 return error;
759 }
760
761 return copyout(&val_k, val_u, sizeof(val_k));
762 }
763
764 static int
765 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u,
766 const struct quotaval *val_u)
767 {
768 struct quotakey key_k;
769 struct quotaval val_k;
770 int error;
771
772 error = copyin(key_u, &key_k, sizeof(key_k));
773 if (error) {
774 return error;
775 }
776
777 error = copyin(val_u, &val_k, sizeof(val_k));
778 if (error) {
779 return error;
780 }
781
782 return vfs_quotactl_put(mp, &key_k, &val_k);
783 }
784
785 static int
786 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u)
787 {
788 struct quotakey key_k;
789 int error;
790
791 error = copyin(key_u, &key_k, sizeof(key_k));
792 if (error) {
793 return error;
794 }
795
796 return vfs_quotactl_del(mp, &key_k);
797 }
798
799 static int
800 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u)
801 {
802 struct quotakcursor cursor_k;
803 int error;
804
805 /* ensure any padding bytes are cleared */
806 memset(&cursor_k, 0, sizeof(cursor_k));
807
808 error = vfs_quotactl_cursoropen(mp, &cursor_k);
809 if (error) {
810 return error;
811 }
812
813 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
814 }
815
816 static int
817 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u)
818 {
819 struct quotakcursor cursor_k;
820 int error;
821
822 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
823 if (error) {
824 return error;
825 }
826
827 return vfs_quotactl_cursorclose(mp, &cursor_k);
828 }
829
830 static int
831 do_sys_quotactl_cursorskipidtype(struct mount *mp,
832 struct quotakcursor *cursor_u, int idtype)
833 {
834 struct quotakcursor cursor_k;
835 int error;
836
837 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
838 if (error) {
839 return error;
840 }
841
842 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype);
843 if (error) {
844 return error;
845 }
846
847 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
848 }
849
850 static int
851 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u,
852 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum,
853 unsigned *ret_u)
854 {
855 #define CGET_STACK_MAX 8
856 struct quotakcursor cursor_k;
857 struct quotakey stackkeys[CGET_STACK_MAX];
858 struct quotaval stackvals[CGET_STACK_MAX];
859 struct quotakey *keys_k;
860 struct quotaval *vals_k;
861 unsigned ret_k;
862 int error;
863
864 if (maxnum > 128) {
865 maxnum = 128;
866 }
867
868 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
869 if (error) {
870 return error;
871 }
872
873 if (maxnum <= CGET_STACK_MAX) {
874 keys_k = stackkeys;
875 vals_k = stackvals;
876 /* ensure any padding bytes are cleared */
877 memset(keys_k, 0, maxnum * sizeof(keys_k[0]));
878 memset(vals_k, 0, maxnum * sizeof(vals_k[0]));
879 } else {
880 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP);
881 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP);
882 }
883
884 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum,
885 &ret_k);
886 if (error) {
887 goto fail;
888 }
889
890 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0]));
891 if (error) {
892 goto fail;
893 }
894
895 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0]));
896 if (error) {
897 goto fail;
898 }
899
900 error = copyout(&ret_k, ret_u, sizeof(ret_k));
901 if (error) {
902 goto fail;
903 }
904
905 /* do last to maximize the chance of being able to recover a failure */
906 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k));
907
908 fail:
909 if (keys_k != stackkeys) {
910 kmem_free(keys_k, maxnum * sizeof(keys_k[0]));
911 }
912 if (vals_k != stackvals) {
913 kmem_free(vals_k, maxnum * sizeof(vals_k[0]));
914 }
915 return error;
916 }
917
918 static int
919 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u,
920 int *ret_u)
921 {
922 struct quotakcursor cursor_k;
923 int ret_k;
924 int error;
925
926 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
927 if (error) {
928 return error;
929 }
930
931 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k);
932 if (error) {
933 return error;
934 }
935
936 error = copyout(&ret_k, ret_u, sizeof(ret_k));
937 if (error) {
938 return error;
939 }
940
941 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
942 }
943
944 static int
945 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u)
946 {
947 struct quotakcursor cursor_k;
948 int error;
949
950 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
951 if (error) {
952 return error;
953 }
954
955 error = vfs_quotactl_cursorrewind(mp, &cursor_k);
956 if (error) {
957 return error;
958 }
959
960 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
961 }
962
963 static int
964 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u)
965 {
966 char *path_k;
967 int error;
968
969 /* XXX this should probably be a struct pathbuf */
970 path_k = PNBUF_GET();
971 error = copyin(path_u, path_k, PATH_MAX);
972 if (error) {
973 PNBUF_PUT(path_k);
974 return error;
975 }
976
977 error = vfs_quotactl_quotaon(mp, idtype, path_k);
978
979 PNBUF_PUT(path_k);
980 return error;
981 }
982
983 static int
984 do_sys_quotactl_quotaoff(struct mount *mp, int idtype)
985 {
986 return vfs_quotactl_quotaoff(mp, idtype);
987 }
988
989 int
990 do_sys_quotactl(const char *path_u, const struct quotactl_args *args)
991 {
992 struct mount *mp;
993 struct vnode *vp;
994 int error;
995
996 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp);
997 if (error != 0)
998 return (error);
999 mp = vp->v_mount;
1000
1001 switch (args->qc_op) {
1002 case QUOTACTL_STAT:
1003 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info);
1004 break;
1005 case QUOTACTL_IDTYPESTAT:
1006 error = do_sys_quotactl_idtypestat(mp,
1007 args->u.idtypestat.qc_idtype,
1008 args->u.idtypestat.qc_info);
1009 break;
1010 case QUOTACTL_OBJTYPESTAT:
1011 error = do_sys_quotactl_objtypestat(mp,
1012 args->u.objtypestat.qc_objtype,
1013 args->u.objtypestat.qc_info);
1014 break;
1015 case QUOTACTL_GET:
1016 error = do_sys_quotactl_get(mp,
1017 args->u.get.qc_key,
1018 args->u.get.qc_val);
1019 break;
1020 case QUOTACTL_PUT:
1021 error = do_sys_quotactl_put(mp,
1022 args->u.put.qc_key,
1023 args->u.put.qc_val);
1024 break;
1025 case QUOTACTL_DEL:
1026 error = do_sys_quotactl_del(mp, args->u.del.qc_key);
1027 break;
1028 case QUOTACTL_CURSOROPEN:
1029 error = do_sys_quotactl_cursoropen(mp,
1030 args->u.cursoropen.qc_cursor);
1031 break;
1032 case QUOTACTL_CURSORCLOSE:
1033 error = do_sys_quotactl_cursorclose(mp,
1034 args->u.cursorclose.qc_cursor);
1035 break;
1036 case QUOTACTL_CURSORSKIPIDTYPE:
1037 error = do_sys_quotactl_cursorskipidtype(mp,
1038 args->u.cursorskipidtype.qc_cursor,
1039 args->u.cursorskipidtype.qc_idtype);
1040 break;
1041 case QUOTACTL_CURSORGET:
1042 error = do_sys_quotactl_cursorget(mp,
1043 args->u.cursorget.qc_cursor,
1044 args->u.cursorget.qc_keys,
1045 args->u.cursorget.qc_vals,
1046 args->u.cursorget.qc_maxnum,
1047 args->u.cursorget.qc_ret);
1048 break;
1049 case QUOTACTL_CURSORATEND:
1050 error = do_sys_quotactl_cursoratend(mp,
1051 args->u.cursoratend.qc_cursor,
1052 args->u.cursoratend.qc_ret);
1053 break;
1054 case QUOTACTL_CURSORREWIND:
1055 error = do_sys_quotactl_cursorrewind(mp,
1056 args->u.cursorrewind.qc_cursor);
1057 break;
1058 case QUOTACTL_QUOTAON:
1059 error = do_sys_quotactl_quotaon(mp,
1060 args->u.quotaon.qc_idtype,
1061 args->u.quotaon.qc_quotafile);
1062 break;
1063 case QUOTACTL_QUOTAOFF:
1064 error = do_sys_quotactl_quotaoff(mp,
1065 args->u.quotaoff.qc_idtype);
1066 break;
1067 default:
1068 error = EINVAL;
1069 break;
1070 }
1071
1072 vrele(vp);
1073 return error;
1074 }
1075
1076 /* ARGSUSED */
1077 int
1078 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap,
1079 register_t *retval)
1080 {
1081 /* {
1082 syscallarg(const char *) path;
1083 syscallarg(struct quotactl_args *) args;
1084 } */
1085 struct quotactl_args args;
1086 int error;
1087
1088 error = copyin(SCARG(uap, args), &args, sizeof(args));
1089 if (error) {
1090 return error;
1091 }
1092
1093 return do_sys_quotactl(SCARG(uap, path), &args);
1094 }
1095
1096 int
1097 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
1098 int root)
1099 {
1100 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1101 int error = 0;
1102
1103 /*
1104 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1105 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
1106 * overrides MNT_NOWAIT.
1107 */
1108 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
1109 (flags != MNT_WAIT && flags != 0)) {
1110 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
1111 goto done;
1112 }
1113
1114 /* Get the filesystem stats now */
1115 memset(sp, 0, sizeof(*sp));
1116 if ((error = VFS_STATVFS(mp, sp)) != 0) {
1117 return error;
1118 }
1119
1120 if (cwdi->cwdi_rdir == NULL)
1121 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
1122 done:
1123 if (cwdi->cwdi_rdir != NULL) {
1124 size_t len;
1125 char *bp;
1126 char c;
1127 char *path = PNBUF_GET();
1128
1129 bp = path + MAXPATHLEN;
1130 *--bp = '\0';
1131 rw_enter(&cwdi->cwdi_lock, RW_READER);
1132 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
1133 MAXPATHLEN / 2, 0, l);
1134 rw_exit(&cwdi->cwdi_lock);
1135 if (error) {
1136 PNBUF_PUT(path);
1137 return error;
1138 }
1139 len = strlen(bp);
1140 if (len != 1) {
1141 /*
1142 * for mount points that are below our root, we can see
1143 * them, so we fix up the pathname and return them. The
1144 * rest we cannot see, so we don't allow viewing the
1145 * data.
1146 */
1147 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
1148 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
1149 (void)strlcpy(sp->f_mntonname,
1150 c == '\0' ? "/" : &sp->f_mntonname[len],
1151 sizeof(sp->f_mntonname));
1152 } else {
1153 if (root)
1154 (void)strlcpy(sp->f_mntonname, "/",
1155 sizeof(sp->f_mntonname));
1156 else
1157 error = EPERM;
1158 }
1159 }
1160 PNBUF_PUT(path);
1161 }
1162 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
1163 return error;
1164 }
1165
1166 /*
1167 * Get filesystem statistics by path.
1168 */
1169 int
1170 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
1171 {
1172 struct mount *mp;
1173 int error;
1174 struct vnode *vp;
1175
1176 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
1177 if (error != 0)
1178 return error;
1179 mp = vp->v_mount;
1180 error = dostatvfs(mp, sb, l, flags, 1);
1181 vrele(vp);
1182 return error;
1183 }
1184
1185 /* ARGSUSED */
1186 int
1187 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
1188 {
1189 /* {
1190 syscallarg(const char *) path;
1191 syscallarg(struct statvfs *) buf;
1192 syscallarg(int) flags;
1193 } */
1194 struct statvfs *sb;
1195 int error;
1196
1197 sb = STATVFSBUF_GET();
1198 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
1199 if (error == 0)
1200 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1201 STATVFSBUF_PUT(sb);
1202 return error;
1203 }
1204
1205 /*
1206 * Get filesystem statistics by fd.
1207 */
1208 int
1209 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
1210 {
1211 file_t *fp;
1212 struct mount *mp;
1213 int error;
1214
1215 /* fd_getvnode() will use the descriptor for us */
1216 if ((error = fd_getvnode(fd, &fp)) != 0)
1217 return (error);
1218 mp = fp->f_vnode->v_mount;
1219 error = dostatvfs(mp, sb, curlwp, flags, 1);
1220 fd_putfile(fd);
1221 return error;
1222 }
1223
1224 /* ARGSUSED */
1225 int
1226 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
1227 {
1228 /* {
1229 syscallarg(int) fd;
1230 syscallarg(struct statvfs *) buf;
1231 syscallarg(int) flags;
1232 } */
1233 struct statvfs *sb;
1234 int error;
1235
1236 sb = STATVFSBUF_GET();
1237 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1238 if (error == 0)
1239 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1240 STATVFSBUF_PUT(sb);
1241 return error;
1242 }
1243
1244
1245 /*
1246 * Get statistics on all filesystems.
1247 */
1248 int
1249 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1250 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1251 register_t *retval)
1252 {
1253 int root = 0;
1254 struct proc *p = l->l_proc;
1255 struct mount *mp, *nmp;
1256 struct statvfs *sb;
1257 size_t count, maxcount;
1258 int error = 0;
1259
1260 sb = STATVFSBUF_GET();
1261 maxcount = bufsize / entry_sz;
1262 mutex_enter(&mountlist_lock);
1263 count = 0;
1264 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1265 if (vfs_busy(mp, &nmp)) {
1266 continue;
1267 }
1268 if (sfsp && count < maxcount) {
1269 error = dostatvfs(mp, sb, l, flags, 0);
1270 if (error) {
1271 vfs_unbusy(mp, false, &nmp);
1272 error = 0;
1273 continue;
1274 }
1275 error = copyfn(sb, sfsp, entry_sz);
1276 if (error) {
1277 vfs_unbusy(mp, false, NULL);
1278 goto out;
1279 }
1280 sfsp = (char *)sfsp + entry_sz;
1281 root |= strcmp(sb->f_mntonname, "/") == 0;
1282 }
1283 count++;
1284 vfs_unbusy(mp, false, &nmp);
1285 }
1286 mutex_exit(&mountlist_lock);
1287
1288 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1289 /*
1290 * fake a root entry
1291 */
1292 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1293 sb, l, flags, 1);
1294 if (error != 0)
1295 goto out;
1296 if (sfsp) {
1297 error = copyfn(sb, sfsp, entry_sz);
1298 if (error != 0)
1299 goto out;
1300 }
1301 count++;
1302 }
1303 if (sfsp && count > maxcount)
1304 *retval = maxcount;
1305 else
1306 *retval = count;
1307 out:
1308 STATVFSBUF_PUT(sb);
1309 return error;
1310 }
1311
1312 int
1313 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1314 {
1315 /* {
1316 syscallarg(struct statvfs *) buf;
1317 syscallarg(size_t) bufsize;
1318 syscallarg(int) flags;
1319 } */
1320
1321 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1322 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1323 }
1324
1325 /*
1326 * Change current working directory to a given file descriptor.
1327 */
1328 /* ARGSUSED */
1329 int
1330 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1331 {
1332 /* {
1333 syscallarg(int) fd;
1334 } */
1335 struct proc *p = l->l_proc;
1336 struct cwdinfo *cwdi;
1337 struct vnode *vp, *tdp;
1338 struct mount *mp;
1339 file_t *fp;
1340 int error, fd;
1341
1342 /* fd_getvnode() will use the descriptor for us */
1343 fd = SCARG(uap, fd);
1344 if ((error = fd_getvnode(fd, &fp)) != 0)
1345 return (error);
1346 vp = fp->f_vnode;
1347
1348 vref(vp);
1349 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1350 if (vp->v_type != VDIR)
1351 error = ENOTDIR;
1352 else
1353 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1354 if (error) {
1355 vput(vp);
1356 goto out;
1357 }
1358 while ((mp = vp->v_mountedhere) != NULL) {
1359 error = vfs_busy(mp, NULL);
1360 vput(vp);
1361 if (error != 0)
1362 goto out;
1363 error = VFS_ROOT(mp, &tdp);
1364 vfs_unbusy(mp, false, NULL);
1365 if (error)
1366 goto out;
1367 vp = tdp;
1368 }
1369 VOP_UNLOCK(vp);
1370
1371 /*
1372 * Disallow changing to a directory not under the process's
1373 * current root directory (if there is one).
1374 */
1375 cwdi = p->p_cwdi;
1376 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1377 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1378 vrele(vp);
1379 error = EPERM; /* operation not permitted */
1380 } else {
1381 vrele(cwdi->cwdi_cdir);
1382 cwdi->cwdi_cdir = vp;
1383 }
1384 rw_exit(&cwdi->cwdi_lock);
1385
1386 out:
1387 fd_putfile(fd);
1388 return (error);
1389 }
1390
1391 /*
1392 * Change this process's notion of the root directory to a given file
1393 * descriptor.
1394 */
1395 int
1396 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1397 {
1398 struct proc *p = l->l_proc;
1399 struct vnode *vp;
1400 file_t *fp;
1401 int error, fd = SCARG(uap, fd);
1402
1403 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1404 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1405 return error;
1406 /* fd_getvnode() will use the descriptor for us */
1407 if ((error = fd_getvnode(fd, &fp)) != 0)
1408 return error;
1409 vp = fp->f_vnode;
1410 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1411 if (vp->v_type != VDIR)
1412 error = ENOTDIR;
1413 else
1414 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1415 VOP_UNLOCK(vp);
1416 if (error)
1417 goto out;
1418 vref(vp);
1419
1420 change_root(p->p_cwdi, vp, l);
1421
1422 out:
1423 fd_putfile(fd);
1424 return (error);
1425 }
1426
1427 /*
1428 * Change current working directory (``.'').
1429 */
1430 /* ARGSUSED */
1431 int
1432 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1433 {
1434 /* {
1435 syscallarg(const char *) path;
1436 } */
1437 struct proc *p = l->l_proc;
1438 struct cwdinfo *cwdi;
1439 int error;
1440 struct vnode *vp;
1441
1442 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1443 &vp, l)) != 0)
1444 return (error);
1445 cwdi = p->p_cwdi;
1446 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1447 vrele(cwdi->cwdi_cdir);
1448 cwdi->cwdi_cdir = vp;
1449 rw_exit(&cwdi->cwdi_lock);
1450 return (0);
1451 }
1452
1453 /*
1454 * Change notion of root (``/'') directory.
1455 */
1456 /* ARGSUSED */
1457 int
1458 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1459 {
1460 /* {
1461 syscallarg(const char *) path;
1462 } */
1463 struct proc *p = l->l_proc;
1464 int error;
1465 struct vnode *vp;
1466
1467 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1468 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1469 return (error);
1470 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1471 &vp, l)) != 0)
1472 return (error);
1473
1474 change_root(p->p_cwdi, vp, l);
1475
1476 return (0);
1477 }
1478
1479 /*
1480 * Common routine for chroot and fchroot.
1481 * NB: callers need to properly authorize the change root operation.
1482 */
1483 void
1484 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l)
1485 {
1486 struct proc *p = l->l_proc;
1487 kauth_cred_t ncred;
1488
1489 ncred = kauth_cred_alloc();
1490
1491 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1492 if (cwdi->cwdi_rdir != NULL)
1493 vrele(cwdi->cwdi_rdir);
1494 cwdi->cwdi_rdir = vp;
1495
1496 /*
1497 * Prevent escaping from chroot by putting the root under
1498 * the working directory. Silently chdir to / if we aren't
1499 * already there.
1500 */
1501 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1502 /*
1503 * XXX would be more failsafe to change directory to a
1504 * deadfs node here instead
1505 */
1506 vrele(cwdi->cwdi_cdir);
1507 vref(vp);
1508 cwdi->cwdi_cdir = vp;
1509 }
1510 rw_exit(&cwdi->cwdi_lock);
1511
1512 /* Get a write lock on the process credential. */
1513 proc_crmod_enter();
1514
1515 kauth_cred_clone(p->p_cred, ncred);
1516 kauth_proc_chroot(ncred, p->p_cwdi);
1517
1518 /* Broadcast our credentials to the process and other LWPs. */
1519 proc_crmod_leave(ncred, p->p_cred, true);
1520 }
1521
1522 /*
1523 * Common routine for chroot and chdir.
1524 * XXX "where" should be enum uio_seg
1525 */
1526 int
1527 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l)
1528 {
1529 struct pathbuf *pb;
1530 struct nameidata nd;
1531 int error;
1532
1533 error = pathbuf_maybe_copyin(path, where, &pb);
1534 if (error) {
1535 return error;
1536 }
1537 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1538 if ((error = namei(&nd)) != 0) {
1539 pathbuf_destroy(pb);
1540 return error;
1541 }
1542 *vpp = nd.ni_vp;
1543 pathbuf_destroy(pb);
1544
1545 if ((*vpp)->v_type != VDIR)
1546 error = ENOTDIR;
1547 else
1548 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred);
1549
1550 if (error)
1551 vput(*vpp);
1552 else
1553 VOP_UNLOCK(*vpp);
1554 return (error);
1555 }
1556
1557 /*
1558 * Internals of sys_open - path has already been converted into a pathbuf
1559 * (so we can easily reuse this function from other parts of the kernel,
1560 * like posix_spawn post-processing).
1561 */
1562 int
1563 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags,
1564 int open_mode, int *fd)
1565 {
1566 struct proc *p = l->l_proc;
1567 struct cwdinfo *cwdi = p->p_cwdi;
1568 file_t *fp;
1569 struct vnode *vp;
1570 int flags, cmode;
1571 int indx, error;
1572 struct nameidata nd;
1573
1574 if (open_flags & O_SEARCH) {
1575 open_flags &= ~(int)O_SEARCH;
1576 }
1577
1578 flags = FFLAGS(open_flags);
1579 if ((flags & (FREAD | FWRITE)) == 0)
1580 return EINVAL;
1581
1582 if ((error = fd_allocfile(&fp, &indx)) != 0) {
1583 return error;
1584 }
1585
1586 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1587 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1588 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb);
1589 if (dvp != NULL)
1590 NDAT(&nd, dvp);
1591
1592 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1593 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1594 fd_abort(p, fp, indx);
1595 if ((error == EDUPFD || error == EMOVEFD) &&
1596 l->l_dupfd >= 0 && /* XXX from fdopen */
1597 (error =
1598 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1599 *fd = indx;
1600 return 0;
1601 }
1602 if (error == ERESTART)
1603 error = EINTR;
1604 return error;
1605 }
1606
1607 l->l_dupfd = 0;
1608 vp = nd.ni_vp;
1609
1610 if ((error = open_setfp(l, fp, vp, indx, flags)))
1611 return error;
1612
1613 VOP_UNLOCK(vp);
1614 *fd = indx;
1615 fd_affix(p, fp, indx);
1616 return 0;
1617 }
1618
1619 int
1620 fd_open(const char *path, int open_flags, int open_mode, int *fd)
1621 {
1622 struct pathbuf *pb;
1623 int error, oflags;
1624
1625 oflags = FFLAGS(open_flags);
1626 if ((oflags & (FREAD | FWRITE)) == 0)
1627 return EINVAL;
1628
1629 pb = pathbuf_create(path);
1630 if (pb == NULL)
1631 return ENOMEM;
1632
1633 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd);
1634 pathbuf_destroy(pb);
1635
1636 return error;
1637 }
1638
1639 /*
1640 * Check permissions, allocate an open file structure,
1641 * and call the device open routine if any.
1642 */
1643 static int
1644 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags,
1645 int mode, int *fd)
1646 {
1647 file_t *dfp = NULL;
1648 struct vnode *dvp = NULL;
1649 struct pathbuf *pb;
1650 int error;
1651
1652 #ifdef COMPAT_10 /* XXX: and perhaps later */
1653 if (path == NULL) {
1654 pb = pathbuf_create(".");
1655 if (pb == NULL)
1656 return ENOMEM;
1657 } else
1658 #endif
1659 {
1660 error = pathbuf_copyin(path, &pb);
1661 if (error)
1662 return error;
1663 }
1664
1665 if (fdat != AT_FDCWD) {
1666 /* fd_getvnode() will use the descriptor for us */
1667 if ((error = fd_getvnode(fdat, &dfp)) != 0)
1668 goto out;
1669
1670 dvp = dfp->f_vnode;
1671 }
1672
1673 error = do_open(l, dvp, pb, flags, mode, fd);
1674
1675 if (dfp != NULL)
1676 fd_putfile(fdat);
1677 out:
1678 pathbuf_destroy(pb);
1679 return error;
1680 }
1681
1682 int
1683 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1684 {
1685 /* {
1686 syscallarg(const char *) path;
1687 syscallarg(int) flags;
1688 syscallarg(int) mode;
1689 } */
1690 int error;
1691 int fd;
1692
1693 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path),
1694 SCARG(uap, flags), SCARG(uap, mode), &fd);
1695
1696 if (error == 0)
1697 *retval = fd;
1698
1699 return error;
1700 }
1701
1702 int
1703 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval)
1704 {
1705 /* {
1706 syscallarg(int) fd;
1707 syscallarg(const char *) path;
1708 syscallarg(int) oflags;
1709 syscallarg(int) mode;
1710 } */
1711 int error;
1712 int fd;
1713
1714 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path),
1715 SCARG(uap, oflags), SCARG(uap, mode), &fd);
1716
1717 if (error == 0)
1718 *retval = fd;
1719
1720 return error;
1721 }
1722
1723 static void
1724 vfs__fhfree(fhandle_t *fhp)
1725 {
1726 size_t fhsize;
1727
1728 fhsize = FHANDLE_SIZE(fhp);
1729 kmem_free(fhp, fhsize);
1730 }
1731
1732 /*
1733 * vfs_composefh: compose a filehandle.
1734 */
1735
1736 int
1737 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1738 {
1739 struct mount *mp;
1740 struct fid *fidp;
1741 int error;
1742 size_t needfhsize;
1743 size_t fidsize;
1744
1745 mp = vp->v_mount;
1746 fidp = NULL;
1747 if (*fh_size < FHANDLE_SIZE_MIN) {
1748 fidsize = 0;
1749 } else {
1750 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1751 if (fhp != NULL) {
1752 memset(fhp, 0, *fh_size);
1753 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1754 fidp = &fhp->fh_fid;
1755 }
1756 }
1757 error = VFS_VPTOFH(vp, fidp, &fidsize);
1758 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1759 if (error == 0 && *fh_size < needfhsize) {
1760 error = E2BIG;
1761 }
1762 *fh_size = needfhsize;
1763 return error;
1764 }
1765
1766 int
1767 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1768 {
1769 struct mount *mp;
1770 fhandle_t *fhp;
1771 size_t fhsize;
1772 size_t fidsize;
1773 int error;
1774
1775 mp = vp->v_mount;
1776 fidsize = 0;
1777 error = VFS_VPTOFH(vp, NULL, &fidsize);
1778 KASSERT(error != 0);
1779 if (error != E2BIG) {
1780 goto out;
1781 }
1782 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1783 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1784 if (fhp == NULL) {
1785 error = ENOMEM;
1786 goto out;
1787 }
1788 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1789 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1790 if (error == 0) {
1791 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1792 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1793 *fhpp = fhp;
1794 } else {
1795 kmem_free(fhp, fhsize);
1796 }
1797 out:
1798 return error;
1799 }
1800
1801 void
1802 vfs_composefh_free(fhandle_t *fhp)
1803 {
1804
1805 vfs__fhfree(fhp);
1806 }
1807
1808 /*
1809 * vfs_fhtovp: lookup a vnode by a filehandle.
1810 */
1811
1812 int
1813 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1814 {
1815 struct mount *mp;
1816 int error;
1817
1818 *vpp = NULL;
1819 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1820 if (mp == NULL) {
1821 error = ESTALE;
1822 goto out;
1823 }
1824 if (mp->mnt_op->vfs_fhtovp == NULL) {
1825 error = EOPNOTSUPP;
1826 goto out;
1827 }
1828 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1829 out:
1830 return error;
1831 }
1832
1833 /*
1834 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1835 * the needed size.
1836 */
1837
1838 int
1839 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1840 {
1841 fhandle_t *fhp;
1842 int error;
1843
1844 if (fhsize > FHANDLE_SIZE_MAX) {
1845 return EINVAL;
1846 }
1847 if (fhsize < FHANDLE_SIZE_MIN) {
1848 return EINVAL;
1849 }
1850 again:
1851 fhp = kmem_alloc(fhsize, KM_SLEEP);
1852 if (fhp == NULL) {
1853 return ENOMEM;
1854 }
1855 error = copyin(ufhp, fhp, fhsize);
1856 if (error == 0) {
1857 /* XXX this check shouldn't be here */
1858 if (FHANDLE_SIZE(fhp) == fhsize) {
1859 *fhpp = fhp;
1860 return 0;
1861 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1862 /*
1863 * a kludge for nfsv2 padded handles.
1864 */
1865 size_t sz;
1866
1867 sz = FHANDLE_SIZE(fhp);
1868 kmem_free(fhp, fhsize);
1869 fhsize = sz;
1870 goto again;
1871 } else {
1872 /*
1873 * userland told us wrong size.
1874 */
1875 error = EINVAL;
1876 }
1877 }
1878 kmem_free(fhp, fhsize);
1879 return error;
1880 }
1881
1882 void
1883 vfs_copyinfh_free(fhandle_t *fhp)
1884 {
1885
1886 vfs__fhfree(fhp);
1887 }
1888
1889 /*
1890 * Get file handle system call
1891 */
1892 int
1893 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1894 {
1895 /* {
1896 syscallarg(char *) fname;
1897 syscallarg(fhandle_t *) fhp;
1898 syscallarg(size_t *) fh_size;
1899 } */
1900 struct vnode *vp;
1901 fhandle_t *fh;
1902 int error;
1903 struct pathbuf *pb;
1904 struct nameidata nd;
1905 size_t sz;
1906 size_t usz;
1907
1908 /*
1909 * Must be super user
1910 */
1911 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1912 0, NULL, NULL, NULL);
1913 if (error)
1914 return (error);
1915
1916 error = pathbuf_copyin(SCARG(uap, fname), &pb);
1917 if (error) {
1918 return error;
1919 }
1920 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1921 error = namei(&nd);
1922 if (error) {
1923 pathbuf_destroy(pb);
1924 return error;
1925 }
1926 vp = nd.ni_vp;
1927 pathbuf_destroy(pb);
1928
1929 error = vfs_composefh_alloc(vp, &fh);
1930 vput(vp);
1931 if (error != 0) {
1932 return error;
1933 }
1934 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1935 if (error != 0) {
1936 goto out;
1937 }
1938 sz = FHANDLE_SIZE(fh);
1939 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1940 if (error != 0) {
1941 goto out;
1942 }
1943 if (usz >= sz) {
1944 error = copyout(fh, SCARG(uap, fhp), sz);
1945 } else {
1946 error = E2BIG;
1947 }
1948 out:
1949 vfs_composefh_free(fh);
1950 return (error);
1951 }
1952
1953 /*
1954 * Open a file given a file handle.
1955 *
1956 * Check permissions, allocate an open file structure,
1957 * and call the device open routine if any.
1958 */
1959
1960 int
1961 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1962 register_t *retval)
1963 {
1964 file_t *fp;
1965 struct vnode *vp = NULL;
1966 kauth_cred_t cred = l->l_cred;
1967 file_t *nfp;
1968 int indx, error;
1969 struct vattr va;
1970 fhandle_t *fh;
1971 int flags;
1972 proc_t *p;
1973
1974 p = curproc;
1975
1976 /*
1977 * Must be super user
1978 */
1979 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1980 0, NULL, NULL, NULL)))
1981 return (error);
1982
1983 if (oflags & O_SEARCH) {
1984 oflags &= ~(int)O_SEARCH;
1985 }
1986
1987 flags = FFLAGS(oflags);
1988 if ((flags & (FREAD | FWRITE)) == 0)
1989 return (EINVAL);
1990 if ((flags & O_CREAT))
1991 return (EINVAL);
1992 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1993 return (error);
1994 fp = nfp;
1995 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1996 if (error != 0) {
1997 goto bad;
1998 }
1999 error = vfs_fhtovp(fh, &vp);
2000 vfs_copyinfh_free(fh);
2001 if (error != 0) {
2002 goto bad;
2003 }
2004
2005 /* Now do an effective vn_open */
2006
2007 if (vp->v_type == VSOCK) {
2008 error = EOPNOTSUPP;
2009 goto bad;
2010 }
2011 error = vn_openchk(vp, cred, flags);
2012 if (error != 0)
2013 goto bad;
2014 if (flags & O_TRUNC) {
2015 VOP_UNLOCK(vp); /* XXX */
2016 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
2017 vattr_null(&va);
2018 va.va_size = 0;
2019 error = VOP_SETATTR(vp, &va, cred);
2020 if (error)
2021 goto bad;
2022 }
2023 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
2024 goto bad;
2025 if (flags & FWRITE) {
2026 mutex_enter(vp->v_interlock);
2027 vp->v_writecount++;
2028 mutex_exit(vp->v_interlock);
2029 }
2030
2031 /* done with modified vn_open, now finish what sys_open does. */
2032 if ((error = open_setfp(l, fp, vp, indx, flags)))
2033 return error;
2034
2035 VOP_UNLOCK(vp);
2036 *retval = indx;
2037 fd_affix(p, fp, indx);
2038 return (0);
2039
2040 bad:
2041 fd_abort(p, fp, indx);
2042 if (vp != NULL)
2043 vput(vp);
2044 return (error);
2045 }
2046
2047 int
2048 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
2049 {
2050 /* {
2051 syscallarg(const void *) fhp;
2052 syscallarg(size_t) fh_size;
2053 syscallarg(int) flags;
2054 } */
2055
2056 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
2057 SCARG(uap, flags), retval);
2058 }
2059
2060 int
2061 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
2062 {
2063 int error;
2064 fhandle_t *fh;
2065 struct vnode *vp;
2066
2067 /*
2068 * Must be super user
2069 */
2070 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2071 0, NULL, NULL, NULL)))
2072 return (error);
2073
2074 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2075 if (error != 0)
2076 return error;
2077
2078 error = vfs_fhtovp(fh, &vp);
2079 vfs_copyinfh_free(fh);
2080 if (error != 0)
2081 return error;
2082
2083 error = vn_stat(vp, sb);
2084 vput(vp);
2085 return error;
2086 }
2087
2088
2089 /* ARGSUSED */
2090 int
2091 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval)
2092 {
2093 /* {
2094 syscallarg(const void *) fhp;
2095 syscallarg(size_t) fh_size;
2096 syscallarg(struct stat *) sb;
2097 } */
2098 struct stat sb;
2099 int error;
2100
2101 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
2102 if (error)
2103 return error;
2104 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
2105 }
2106
2107 int
2108 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
2109 int flags)
2110 {
2111 fhandle_t *fh;
2112 struct mount *mp;
2113 struct vnode *vp;
2114 int error;
2115
2116 /*
2117 * Must be super user
2118 */
2119 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2120 0, NULL, NULL, NULL)))
2121 return error;
2122
2123 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2124 if (error != 0)
2125 return error;
2126
2127 error = vfs_fhtovp(fh, &vp);
2128 vfs_copyinfh_free(fh);
2129 if (error != 0)
2130 return error;
2131
2132 mp = vp->v_mount;
2133 error = dostatvfs(mp, sb, l, flags, 1);
2134 vput(vp);
2135 return error;
2136 }
2137
2138 /* ARGSUSED */
2139 int
2140 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
2141 {
2142 /* {
2143 syscallarg(const void *) fhp;
2144 syscallarg(size_t) fh_size;
2145 syscallarg(struct statvfs *) buf;
2146 syscallarg(int) flags;
2147 } */
2148 struct statvfs *sb = STATVFSBUF_GET();
2149 int error;
2150
2151 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
2152 SCARG(uap, flags));
2153 if (error == 0)
2154 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
2155 STATVFSBUF_PUT(sb);
2156 return error;
2157 }
2158
2159 /*
2160 * Create a special file.
2161 */
2162 /* ARGSUSED */
2163 int
2164 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
2165 register_t *retval)
2166 {
2167 /* {
2168 syscallarg(const char *) path;
2169 syscallarg(mode_t) mode;
2170 syscallarg(dev_t) dev;
2171 } */
2172 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
2173 SCARG(uap, dev), retval, UIO_USERSPACE);
2174 }
2175
2176 int
2177 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap,
2178 register_t *retval)
2179 {
2180 /* {
2181 syscallarg(int) fd;
2182 syscallarg(const char *) path;
2183 syscallarg(mode_t) mode;
2184 syscallarg(int) pad;
2185 syscallarg(dev_t) dev;
2186 } */
2187
2188 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path),
2189 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE);
2190 }
2191
2192 int
2193 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
2194 register_t *retval, enum uio_seg seg)
2195 {
2196 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg);
2197 }
2198
2199 int
2200 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode,
2201 dev_t dev, register_t *retval, enum uio_seg seg)
2202 {
2203 struct proc *p = l->l_proc;
2204 struct vnode *vp;
2205 struct vattr vattr;
2206 int error, optype;
2207 struct pathbuf *pb;
2208 struct nameidata nd;
2209 const char *pathstring;
2210
2211 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
2212 0, NULL, NULL, NULL)) != 0)
2213 return (error);
2214
2215 optype = VOP_MKNOD_DESCOFFSET;
2216
2217 error = pathbuf_maybe_copyin(pathname, seg, &pb);
2218 if (error) {
2219 return error;
2220 }
2221 pathstring = pathbuf_stringcopy_get(pb);
2222 if (pathstring == NULL) {
2223 pathbuf_destroy(pb);
2224 return ENOMEM;
2225 }
2226
2227 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2228
2229 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2230 goto out;
2231 vp = nd.ni_vp;
2232
2233 if (vp != NULL)
2234 error = EEXIST;
2235 else {
2236 vattr_null(&vattr);
2237 /* We will read cwdi->cwdi_cmask unlocked. */
2238 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2239 vattr.va_rdev = dev;
2240
2241 switch (mode & S_IFMT) {
2242 case S_IFMT: /* used by badsect to flag bad sectors */
2243 vattr.va_type = VBAD;
2244 break;
2245 case S_IFCHR:
2246 vattr.va_type = VCHR;
2247 break;
2248 case S_IFBLK:
2249 vattr.va_type = VBLK;
2250 break;
2251 case S_IFWHT:
2252 optype = VOP_WHITEOUT_DESCOFFSET;
2253 break;
2254 case S_IFREG:
2255 #if NVERIEXEC > 0
2256 error = veriexec_openchk(l, nd.ni_vp, pathstring,
2257 O_CREAT);
2258 #endif /* NVERIEXEC > 0 */
2259 vattr.va_type = VREG;
2260 vattr.va_rdev = VNOVAL;
2261 optype = VOP_CREATE_DESCOFFSET;
2262 break;
2263 default:
2264 error = EINVAL;
2265 break;
2266 }
2267 }
2268 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET
2269 && vattr.va_rdev == VNOVAL)
2270 error = EINVAL;
2271 if (!error) {
2272 switch (optype) {
2273 case VOP_WHITEOUT_DESCOFFSET:
2274 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
2275 if (error)
2276 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2277 vput(nd.ni_dvp);
2278 break;
2279
2280 case VOP_MKNOD_DESCOFFSET:
2281 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
2282 &nd.ni_cnd, &vattr);
2283 if (error == 0)
2284 vrele(nd.ni_vp);
2285 vput(nd.ni_dvp);
2286 break;
2287
2288 case VOP_CREATE_DESCOFFSET:
2289 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
2290 &nd.ni_cnd, &vattr);
2291 if (error == 0)
2292 vrele(nd.ni_vp);
2293 vput(nd.ni_dvp);
2294 break;
2295 }
2296 } else {
2297 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2298 if (nd.ni_dvp == vp)
2299 vrele(nd.ni_dvp);
2300 else
2301 vput(nd.ni_dvp);
2302 if (vp)
2303 vrele(vp);
2304 }
2305 out:
2306 pathbuf_stringcopy_put(pb, pathstring);
2307 pathbuf_destroy(pb);
2308 return (error);
2309 }
2310
2311 /*
2312 * Create a named pipe.
2313 */
2314 /* ARGSUSED */
2315 int
2316 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
2317 {
2318 /* {
2319 syscallarg(const char *) path;
2320 syscallarg(int) mode;
2321 } */
2322 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode));
2323 }
2324
2325 int
2326 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap,
2327 register_t *retval)
2328 {
2329 /* {
2330 syscallarg(int) fd;
2331 syscallarg(const char *) path;
2332 syscallarg(int) mode;
2333 } */
2334
2335 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path),
2336 SCARG(uap, mode));
2337 }
2338
2339 static int
2340 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode)
2341 {
2342 struct proc *p = l->l_proc;
2343 struct vattr vattr;
2344 int error;
2345 struct pathbuf *pb;
2346 struct nameidata nd;
2347
2348 error = pathbuf_copyin(path, &pb);
2349 if (error) {
2350 return error;
2351 }
2352 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2353
2354 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2355 pathbuf_destroy(pb);
2356 return error;
2357 }
2358 if (nd.ni_vp != NULL) {
2359 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2360 if (nd.ni_dvp == nd.ni_vp)
2361 vrele(nd.ni_dvp);
2362 else
2363 vput(nd.ni_dvp);
2364 vrele(nd.ni_vp);
2365 pathbuf_destroy(pb);
2366 return (EEXIST);
2367 }
2368 vattr_null(&vattr);
2369 vattr.va_type = VFIFO;
2370 /* We will read cwdi->cwdi_cmask unlocked. */
2371 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2372 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2373 if (error == 0)
2374 vrele(nd.ni_vp);
2375 vput(nd.ni_dvp);
2376 pathbuf_destroy(pb);
2377 return (error);
2378 }
2379
2380 /*
2381 * Make a hard file link.
2382 */
2383 /* ARGSUSED */
2384 int
2385 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink,
2386 const char *link, int follow, register_t *retval)
2387 {
2388 struct vnode *vp;
2389 struct pathbuf *linkpb;
2390 struct nameidata nd;
2391 namei_simple_flags_t ns_flags;
2392 int error;
2393
2394 if (follow & AT_SYMLINK_FOLLOW)
2395 ns_flags = NSM_FOLLOW_TRYEMULROOT;
2396 else
2397 ns_flags = NSM_NOFOLLOW_TRYEMULROOT;
2398
2399 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp);
2400 if (error != 0)
2401 return (error);
2402 error = pathbuf_copyin(link, &linkpb);
2403 if (error) {
2404 goto out1;
2405 }
2406 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2407 if ((error = fd_nameiat(l, fdlink, &nd)) != 0)
2408 goto out2;
2409 if (nd.ni_vp) {
2410 error = EEXIST;
2411 goto abortop;
2412 }
2413 /* Prevent hard links on directories. */
2414 if (vp->v_type == VDIR) {
2415 error = EPERM;
2416 goto abortop;
2417 }
2418 /* Prevent cross-mount operation. */
2419 if (nd.ni_dvp->v_mount != vp->v_mount) {
2420 error = EXDEV;
2421 goto abortop;
2422 }
2423 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2424 VOP_UNLOCK(nd.ni_dvp);
2425 vrele(nd.ni_dvp);
2426 out2:
2427 pathbuf_destroy(linkpb);
2428 out1:
2429 vrele(vp);
2430 return (error);
2431 abortop:
2432 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2433 if (nd.ni_dvp == nd.ni_vp)
2434 vrele(nd.ni_dvp);
2435 else
2436 vput(nd.ni_dvp);
2437 if (nd.ni_vp != NULL)
2438 vrele(nd.ni_vp);
2439 goto out2;
2440 }
2441
2442 int
2443 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
2444 {
2445 /* {
2446 syscallarg(const char *) path;
2447 syscallarg(const char *) link;
2448 } */
2449 const char *path = SCARG(uap, path);
2450 const char *link = SCARG(uap, link);
2451
2452 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link,
2453 AT_SYMLINK_FOLLOW, retval);
2454 }
2455
2456 int
2457 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap,
2458 register_t *retval)
2459 {
2460 /* {
2461 syscallarg(int) fd1;
2462 syscallarg(const char *) name1;
2463 syscallarg(int) fd2;
2464 syscallarg(const char *) name2;
2465 syscallarg(int) flags;
2466 } */
2467 int fd1 = SCARG(uap, fd1);
2468 const char *name1 = SCARG(uap, name1);
2469 int fd2 = SCARG(uap, fd2);
2470 const char *name2 = SCARG(uap, name2);
2471 int follow;
2472
2473 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW;
2474
2475 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval);
2476 }
2477
2478
2479 int
2480 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg)
2481 {
2482 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg);
2483 }
2484
2485 static int
2486 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat,
2487 const char *link, enum uio_seg seg)
2488 {
2489 struct proc *p = curproc;
2490 struct vattr vattr;
2491 char *path;
2492 int error;
2493 struct pathbuf *linkpb;
2494 struct nameidata nd;
2495
2496 KASSERT(l != NULL || fdat == AT_FDCWD);
2497
2498 path = PNBUF_GET();
2499 if (seg == UIO_USERSPACE) {
2500 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0)
2501 goto out1;
2502 if ((error = pathbuf_copyin(link, &linkpb)) != 0)
2503 goto out1;
2504 } else {
2505 KASSERT(strlen(patharg) < MAXPATHLEN);
2506 strcpy(path, patharg);
2507 linkpb = pathbuf_create(link);
2508 if (linkpb == NULL) {
2509 error = ENOMEM;
2510 goto out1;
2511 }
2512 }
2513 ktrkuser("symlink-target", path, strlen(path));
2514
2515 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2516 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2517 goto out2;
2518 if (nd.ni_vp) {
2519 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2520 if (nd.ni_dvp == nd.ni_vp)
2521 vrele(nd.ni_dvp);
2522 else
2523 vput(nd.ni_dvp);
2524 vrele(nd.ni_vp);
2525 error = EEXIST;
2526 goto out2;
2527 }
2528 vattr_null(&vattr);
2529 vattr.va_type = VLNK;
2530 /* We will read cwdi->cwdi_cmask unlocked. */
2531 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2532 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2533 if (error == 0)
2534 vrele(nd.ni_vp);
2535 vput(nd.ni_dvp);
2536 out2:
2537 pathbuf_destroy(linkpb);
2538 out1:
2539 PNBUF_PUT(path);
2540 return (error);
2541 }
2542
2543 /*
2544 * Make a symbolic link.
2545 */
2546 /* ARGSUSED */
2547 int
2548 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2549 {
2550 /* {
2551 syscallarg(const char *) path;
2552 syscallarg(const char *) link;
2553 } */
2554
2555 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link),
2556 UIO_USERSPACE);
2557 }
2558
2559 int
2560 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap,
2561 register_t *retval)
2562 {
2563 /* {
2564 syscallarg(const char *) path1;
2565 syscallarg(int) fd;
2566 syscallarg(const char *) path2;
2567 } */
2568
2569 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd),
2570 SCARG(uap, path2), UIO_USERSPACE);
2571 }
2572
2573 /*
2574 * Delete a whiteout from the filesystem.
2575 */
2576 /* ARGSUSED */
2577 int
2578 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2579 {
2580 /* {
2581 syscallarg(const char *) path;
2582 } */
2583 int error;
2584 struct pathbuf *pb;
2585 struct nameidata nd;
2586
2587 error = pathbuf_copyin(SCARG(uap, path), &pb);
2588 if (error) {
2589 return error;
2590 }
2591
2592 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb);
2593 error = namei(&nd);
2594 if (error) {
2595 pathbuf_destroy(pb);
2596 return (error);
2597 }
2598
2599 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2600 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2601 if (nd.ni_dvp == nd.ni_vp)
2602 vrele(nd.ni_dvp);
2603 else
2604 vput(nd.ni_dvp);
2605 if (nd.ni_vp)
2606 vrele(nd.ni_vp);
2607 pathbuf_destroy(pb);
2608 return (EEXIST);
2609 }
2610 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2611 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2612 vput(nd.ni_dvp);
2613 pathbuf_destroy(pb);
2614 return (error);
2615 }
2616
2617 /*
2618 * Delete a name from the filesystem.
2619 */
2620 /* ARGSUSED */
2621 int
2622 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2623 {
2624 /* {
2625 syscallarg(const char *) path;
2626 } */
2627
2628 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE);
2629 }
2630
2631 int
2632 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap,
2633 register_t *retval)
2634 {
2635 /* {
2636 syscallarg(int) fd;
2637 syscallarg(const char *) path;
2638 syscallarg(int) flag;
2639 } */
2640
2641 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path),
2642 SCARG(uap, flag), UIO_USERSPACE);
2643 }
2644
2645 int
2646 do_sys_unlink(const char *arg, enum uio_seg seg)
2647 {
2648 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg);
2649 }
2650
2651 static int
2652 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags,
2653 enum uio_seg seg)
2654 {
2655 struct vnode *vp;
2656 int error;
2657 struct pathbuf *pb;
2658 struct nameidata nd;
2659 const char *pathstring;
2660
2661 KASSERT(l != NULL || fdat == AT_FDCWD);
2662
2663 error = pathbuf_maybe_copyin(arg, seg, &pb);
2664 if (error) {
2665 return error;
2666 }
2667 pathstring = pathbuf_stringcopy_get(pb);
2668 if (pathstring == NULL) {
2669 pathbuf_destroy(pb);
2670 return ENOMEM;
2671 }
2672
2673 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb);
2674 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2675 goto out;
2676 vp = nd.ni_vp;
2677
2678 /*
2679 * The root of a mounted filesystem cannot be deleted.
2680 */
2681 if ((vp->v_vflag & VV_ROOT) != 0) {
2682 error = EBUSY;
2683 goto abort;
2684 }
2685
2686 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) {
2687 error = EBUSY;
2688 goto abort;
2689 }
2690
2691 /*
2692 * No rmdir "." please.
2693 */
2694 if (nd.ni_dvp == vp) {
2695 error = EINVAL;
2696 goto abort;
2697 }
2698
2699 /*
2700 * AT_REMOVEDIR is required to remove a directory
2701 */
2702 if (vp->v_type == VDIR) {
2703 if (!(flags & AT_REMOVEDIR)) {
2704 error = EPERM;
2705 goto abort;
2706 } else {
2707 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2708 goto out;
2709 }
2710 }
2711
2712 /*
2713 * Starting here we only deal with non directories.
2714 */
2715 if (flags & AT_REMOVEDIR) {
2716 error = ENOTDIR;
2717 goto abort;
2718 }
2719
2720 #if NVERIEXEC > 0
2721 /* Handle remove requests for veriexec entries. */
2722 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) {
2723 goto abort;
2724 }
2725 #endif /* NVERIEXEC > 0 */
2726
2727 #ifdef FILEASSOC
2728 (void)fileassoc_file_delete(vp);
2729 #endif /* FILEASSOC */
2730 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2731 goto out;
2732
2733 abort:
2734 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2735 if (nd.ni_dvp == vp)
2736 vrele(nd.ni_dvp);
2737 else
2738 vput(nd.ni_dvp);
2739 vput(vp);
2740
2741 out:
2742 pathbuf_stringcopy_put(pb, pathstring);
2743 pathbuf_destroy(pb);
2744 return (error);
2745 }
2746
2747 /*
2748 * Reposition read/write file offset.
2749 */
2750 int
2751 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2752 {
2753 /* {
2754 syscallarg(int) fd;
2755 syscallarg(int) pad;
2756 syscallarg(off_t) offset;
2757 syscallarg(int) whence;
2758 } */
2759 kauth_cred_t cred = l->l_cred;
2760 file_t *fp;
2761 struct vnode *vp;
2762 struct vattr vattr;
2763 off_t newoff;
2764 int error, fd;
2765
2766 fd = SCARG(uap, fd);
2767
2768 if ((fp = fd_getfile(fd)) == NULL)
2769 return (EBADF);
2770
2771 vp = fp->f_vnode;
2772 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2773 error = ESPIPE;
2774 goto out;
2775 }
2776
2777 vn_lock(vp, LK_SHARED | LK_RETRY);
2778
2779 switch (SCARG(uap, whence)) {
2780 case SEEK_CUR:
2781 newoff = fp->f_offset + SCARG(uap, offset);
2782 break;
2783 case SEEK_END:
2784 error = VOP_GETATTR(vp, &vattr, cred);
2785 if (error) {
2786 VOP_UNLOCK(vp);
2787 goto out;
2788 }
2789 newoff = SCARG(uap, offset) + vattr.va_size;
2790 break;
2791 case SEEK_SET:
2792 newoff = SCARG(uap, offset);
2793 break;
2794 default:
2795 error = EINVAL;
2796 VOP_UNLOCK(vp);
2797 goto out;
2798 }
2799 VOP_UNLOCK(vp);
2800 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2801 *(off_t *)retval = fp->f_offset = newoff;
2802 }
2803 out:
2804 fd_putfile(fd);
2805 return (error);
2806 }
2807
2808 /*
2809 * Positional read system call.
2810 */
2811 int
2812 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2813 {
2814 /* {
2815 syscallarg(int) fd;
2816 syscallarg(void *) buf;
2817 syscallarg(size_t) nbyte;
2818 syscallarg(off_t) offset;
2819 } */
2820 file_t *fp;
2821 struct vnode *vp;
2822 off_t offset;
2823 int error, fd = SCARG(uap, fd);
2824
2825 if ((fp = fd_getfile(fd)) == NULL)
2826 return (EBADF);
2827
2828 if ((fp->f_flag & FREAD) == 0) {
2829 fd_putfile(fd);
2830 return (EBADF);
2831 }
2832
2833 vp = fp->f_vnode;
2834 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2835 error = ESPIPE;
2836 goto out;
2837 }
2838
2839 offset = SCARG(uap, offset);
2840
2841 /*
2842 * XXX This works because no file systems actually
2843 * XXX take any action on the seek operation.
2844 */
2845 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2846 goto out;
2847
2848 /* dofileread() will unuse the descriptor for us */
2849 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2850 &offset, 0, retval));
2851
2852 out:
2853 fd_putfile(fd);
2854 return (error);
2855 }
2856
2857 /*
2858 * Positional scatter read system call.
2859 */
2860 int
2861 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2862 {
2863 /* {
2864 syscallarg(int) fd;
2865 syscallarg(const struct iovec *) iovp;
2866 syscallarg(int) iovcnt;
2867 syscallarg(off_t) offset;
2868 } */
2869 off_t offset = SCARG(uap, offset);
2870
2871 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2872 SCARG(uap, iovcnt), &offset, 0, retval);
2873 }
2874
2875 /*
2876 * Positional write system call.
2877 */
2878 int
2879 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2880 {
2881 /* {
2882 syscallarg(int) fd;
2883 syscallarg(const void *) buf;
2884 syscallarg(size_t) nbyte;
2885 syscallarg(off_t) offset;
2886 } */
2887 file_t *fp;
2888 struct vnode *vp;
2889 off_t offset;
2890 int error, fd = SCARG(uap, fd);
2891
2892 if ((fp = fd_getfile(fd)) == NULL)
2893 return (EBADF);
2894
2895 if ((fp->f_flag & FWRITE) == 0) {
2896 fd_putfile(fd);
2897 return (EBADF);
2898 }
2899
2900 vp = fp->f_vnode;
2901 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2902 error = ESPIPE;
2903 goto out;
2904 }
2905
2906 offset = SCARG(uap, offset);
2907
2908 /*
2909 * XXX This works because no file systems actually
2910 * XXX take any action on the seek operation.
2911 */
2912 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2913 goto out;
2914
2915 /* dofilewrite() will unuse the descriptor for us */
2916 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2917 &offset, 0, retval));
2918
2919 out:
2920 fd_putfile(fd);
2921 return (error);
2922 }
2923
2924 /*
2925 * Positional gather write system call.
2926 */
2927 int
2928 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2929 {
2930 /* {
2931 syscallarg(int) fd;
2932 syscallarg(const struct iovec *) iovp;
2933 syscallarg(int) iovcnt;
2934 syscallarg(off_t) offset;
2935 } */
2936 off_t offset = SCARG(uap, offset);
2937
2938 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2939 SCARG(uap, iovcnt), &offset, 0, retval);
2940 }
2941
2942 /*
2943 * Check access permissions.
2944 */
2945 int
2946 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2947 {
2948 /* {
2949 syscallarg(const char *) path;
2950 syscallarg(int) flags;
2951 } */
2952
2953 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path),
2954 SCARG(uap, flags), 0);
2955 }
2956
2957 int
2958 do_sys_accessat(struct lwp *l, int fdat, const char *path,
2959 int mode, int flags)
2960 {
2961 kauth_cred_t cred;
2962 struct vnode *vp;
2963 int error, nd_flag, vmode;
2964 struct pathbuf *pb;
2965 struct nameidata nd;
2966
2967 CTASSERT(F_OK == 0);
2968 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) {
2969 /* nonsense mode */
2970 return EINVAL;
2971 }
2972
2973 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT;
2974 if (flags & AT_SYMLINK_NOFOLLOW)
2975 nd_flag &= ~FOLLOW;
2976
2977 error = pathbuf_copyin(path, &pb);
2978 if (error)
2979 return error;
2980
2981 NDINIT(&nd, LOOKUP, nd_flag, pb);
2982
2983 /* Override default credentials */
2984 cred = kauth_cred_dup(l->l_cred);
2985 if (!(flags & AT_EACCESS)) {
2986 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2987 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2988 }
2989 nd.ni_cnd.cn_cred = cred;
2990
2991 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2992 pathbuf_destroy(pb);
2993 goto out;
2994 }
2995 vp = nd.ni_vp;
2996 pathbuf_destroy(pb);
2997
2998 /* Flags == 0 means only check for existence. */
2999 if (mode) {
3000 vmode = 0;
3001 if (mode & R_OK)
3002 vmode |= VREAD;
3003 if (mode & W_OK)
3004 vmode |= VWRITE;
3005 if (mode & X_OK)
3006 vmode |= VEXEC;
3007
3008 error = VOP_ACCESS(vp, vmode, cred);
3009 if (!error && (vmode & VWRITE))
3010 error = vn_writechk(vp);
3011 }
3012 vput(vp);
3013 out:
3014 kauth_cred_free(cred);
3015 return (error);
3016 }
3017
3018 int
3019 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap,
3020 register_t *retval)
3021 {
3022 /* {
3023 syscallarg(int) fd;
3024 syscallarg(const char *) path;
3025 syscallarg(int) amode;
3026 syscallarg(int) flag;
3027 } */
3028
3029 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path),
3030 SCARG(uap, amode), SCARG(uap, flag));
3031 }
3032
3033 /*
3034 * Common code for all sys_stat functions, including compat versions.
3035 */
3036 int
3037 do_sys_stat(const char *userpath, unsigned int nd_flag,
3038 struct stat *sb)
3039 {
3040 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb);
3041 }
3042
3043 int
3044 do_sys_statat(struct lwp *l, int fdat, const char *userpath,
3045 unsigned int nd_flag, struct stat *sb)
3046 {
3047 int error;
3048 struct pathbuf *pb;
3049 struct nameidata nd;
3050
3051 KASSERT(l != NULL || fdat == AT_FDCWD);
3052
3053 error = pathbuf_copyin(userpath, &pb);
3054 if (error) {
3055 return error;
3056 }
3057
3058 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb);
3059
3060 error = fd_nameiat(l, fdat, &nd);
3061 if (error != 0) {
3062 pathbuf_destroy(pb);
3063 return error;
3064 }
3065 error = vn_stat(nd.ni_vp, sb);
3066 vput(nd.ni_vp);
3067 pathbuf_destroy(pb);
3068 return error;
3069 }
3070
3071 /*
3072 * Get file status; this version follows links.
3073 */
3074 /* ARGSUSED */
3075 int
3076 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval)
3077 {
3078 /* {
3079 syscallarg(const char *) path;
3080 syscallarg(struct stat *) ub;
3081 } */
3082 struct stat sb;
3083 int error;
3084
3085 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb);
3086 if (error)
3087 return error;
3088 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3089 }
3090
3091 /*
3092 * Get file status; this version does not follow links.
3093 */
3094 /* ARGSUSED */
3095 int
3096 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval)
3097 {
3098 /* {
3099 syscallarg(const char *) path;
3100 syscallarg(struct stat *) ub;
3101 } */
3102 struct stat sb;
3103 int error;
3104
3105 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb);
3106 if (error)
3107 return error;
3108 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3109 }
3110
3111 int
3112 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap,
3113 register_t *retval)
3114 {
3115 /* {
3116 syscallarg(int) fd;
3117 syscallarg(const char *) path;
3118 syscallarg(struct stat *) buf;
3119 syscallarg(int) flag;
3120 } */
3121 unsigned int nd_flag;
3122 struct stat sb;
3123 int error;
3124
3125 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW)
3126 nd_flag = NOFOLLOW;
3127 else
3128 nd_flag = FOLLOW;
3129
3130 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag,
3131 &sb);
3132 if (error)
3133 return error;
3134 return copyout(&sb, SCARG(uap, buf), sizeof(sb));
3135 }
3136
3137 /*
3138 * Get configurable pathname variables.
3139 */
3140 /* ARGSUSED */
3141 int
3142 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
3143 {
3144 /* {
3145 syscallarg(const char *) path;
3146 syscallarg(int) name;
3147 } */
3148 int error;
3149 struct pathbuf *pb;
3150 struct nameidata nd;
3151
3152 error = pathbuf_copyin(SCARG(uap, path), &pb);
3153 if (error) {
3154 return error;
3155 }
3156 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3157 if ((error = namei(&nd)) != 0) {
3158 pathbuf_destroy(pb);
3159 return (error);
3160 }
3161 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
3162 vput(nd.ni_vp);
3163 pathbuf_destroy(pb);
3164 return (error);
3165 }
3166
3167 /*
3168 * Return target name of a symbolic link.
3169 */
3170 /* ARGSUSED */
3171 int
3172 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap,
3173 register_t *retval)
3174 {
3175 /* {
3176 syscallarg(const char *) path;
3177 syscallarg(char *) buf;
3178 syscallarg(size_t) count;
3179 } */
3180 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path),
3181 SCARG(uap, buf), SCARG(uap, count), retval);
3182 }
3183
3184 static int
3185 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf,
3186 size_t count, register_t *retval)
3187 {
3188 struct vnode *vp;
3189 struct iovec aiov;
3190 struct uio auio;
3191 int error;
3192 struct pathbuf *pb;
3193 struct nameidata nd;
3194
3195 error = pathbuf_copyin(path, &pb);
3196 if (error) {
3197 return error;
3198 }
3199 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3200 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
3201 pathbuf_destroy(pb);
3202 return error;
3203 }
3204 vp = nd.ni_vp;
3205 pathbuf_destroy(pb);
3206 if (vp->v_type != VLNK)
3207 error = EINVAL;
3208 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
3209 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
3210 aiov.iov_base = buf;
3211 aiov.iov_len = count;
3212 auio.uio_iov = &aiov;
3213 auio.uio_iovcnt = 1;
3214 auio.uio_offset = 0;
3215 auio.uio_rw = UIO_READ;
3216 KASSERT(l == curlwp);
3217 auio.uio_vmspace = l->l_proc->p_vmspace;
3218 auio.uio_resid = count;
3219 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0)
3220 *retval = count - auio.uio_resid;
3221 }
3222 vput(vp);
3223 return (error);
3224 }
3225
3226 int
3227 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap,
3228 register_t *retval)
3229 {
3230 /* {
3231 syscallarg(int) fd;
3232 syscallarg(const char *) path;
3233 syscallarg(char *) buf;
3234 syscallarg(size_t) bufsize;
3235 } */
3236
3237 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path),
3238 SCARG(uap, buf), SCARG(uap, bufsize), retval);
3239 }
3240
3241 /*
3242 * Change flags of a file given a path name.
3243 */
3244 /* ARGSUSED */
3245 int
3246 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
3247 {
3248 /* {
3249 syscallarg(const char *) path;
3250 syscallarg(u_long) flags;
3251 } */
3252 struct vnode *vp;
3253 int error;
3254
3255 error = namei_simple_user(SCARG(uap, path),
3256 NSM_FOLLOW_TRYEMULROOT, &vp);
3257 if (error != 0)
3258 return (error);
3259 error = change_flags(vp, SCARG(uap, flags), l);
3260 vput(vp);
3261 return (error);
3262 }
3263
3264 /*
3265 * Change flags of a file given a file descriptor.
3266 */
3267 /* ARGSUSED */
3268 int
3269 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
3270 {
3271 /* {
3272 syscallarg(int) fd;
3273 syscallarg(u_long) flags;
3274 } */
3275 struct vnode *vp;
3276 file_t *fp;
3277 int error;
3278
3279 /* fd_getvnode() will use the descriptor for us */
3280 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3281 return (error);
3282 vp = fp->f_vnode;
3283 error = change_flags(vp, SCARG(uap, flags), l);
3284 VOP_UNLOCK(vp);
3285 fd_putfile(SCARG(uap, fd));
3286 return (error);
3287 }
3288
3289 /*
3290 * Change flags of a file given a path name; this version does
3291 * not follow links.
3292 */
3293 int
3294 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
3295 {
3296 /* {
3297 syscallarg(const char *) path;
3298 syscallarg(u_long) flags;
3299 } */
3300 struct vnode *vp;
3301 int error;
3302
3303 error = namei_simple_user(SCARG(uap, path),
3304 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3305 if (error != 0)
3306 return (error);
3307 error = change_flags(vp, SCARG(uap, flags), l);
3308 vput(vp);
3309 return (error);
3310 }
3311
3312 /*
3313 * Common routine to change flags of a file.
3314 */
3315 int
3316 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
3317 {
3318 struct vattr vattr;
3319 int error;
3320
3321 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3322
3323 vattr_null(&vattr);
3324 vattr.va_flags = flags;
3325 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3326
3327 return (error);
3328 }
3329
3330 /*
3331 * Change mode of a file given path name; this version follows links.
3332 */
3333 /* ARGSUSED */
3334 int
3335 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
3336 {
3337 /* {
3338 syscallarg(const char *) path;
3339 syscallarg(int) mode;
3340 } */
3341 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path),
3342 SCARG(uap, mode), 0);
3343 }
3344
3345 int
3346 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags)
3347 {
3348 int error;
3349 struct vnode *vp;
3350 namei_simple_flags_t ns_flag;
3351
3352 if (flags & AT_SYMLINK_NOFOLLOW)
3353 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3354 else
3355 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3356
3357 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3358 if (error != 0)
3359 return error;
3360
3361 error = change_mode(vp, mode, l);
3362
3363 vrele(vp);
3364
3365 return (error);
3366 }
3367
3368 /*
3369 * Change mode of a file given a file descriptor.
3370 */
3371 /* ARGSUSED */
3372 int
3373 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
3374 {
3375 /* {
3376 syscallarg(int) fd;
3377 syscallarg(int) mode;
3378 } */
3379 file_t *fp;
3380 int error;
3381
3382 /* fd_getvnode() will use the descriptor for us */
3383 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3384 return (error);
3385 error = change_mode(fp->f_vnode, SCARG(uap, mode), l);
3386 fd_putfile(SCARG(uap, fd));
3387 return (error);
3388 }
3389
3390 int
3391 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap,
3392 register_t *retval)
3393 {
3394 /* {
3395 syscallarg(int) fd;
3396 syscallarg(const char *) path;
3397 syscallarg(int) mode;
3398 syscallarg(int) flag;
3399 } */
3400
3401 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path),
3402 SCARG(uap, mode), SCARG(uap, flag));
3403 }
3404
3405 /*
3406 * Change mode of a file given path name; this version does not follow links.
3407 */
3408 /* ARGSUSED */
3409 int
3410 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
3411 {
3412 /* {
3413 syscallarg(const char *) path;
3414 syscallarg(int) mode;
3415 } */
3416 int error;
3417 struct vnode *vp;
3418
3419 error = namei_simple_user(SCARG(uap, path),
3420 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3421 if (error != 0)
3422 return (error);
3423
3424 error = change_mode(vp, SCARG(uap, mode), l);
3425
3426 vrele(vp);
3427 return (error);
3428 }
3429
3430 /*
3431 * Common routine to set mode given a vnode.
3432 */
3433 static int
3434 change_mode(struct vnode *vp, int mode, struct lwp *l)
3435 {
3436 struct vattr vattr;
3437 int error;
3438
3439 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3440 vattr_null(&vattr);
3441 vattr.va_mode = mode & ALLPERMS;
3442 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3443 VOP_UNLOCK(vp);
3444 return (error);
3445 }
3446
3447 /*
3448 * Set ownership given a path name; this version follows links.
3449 */
3450 /* ARGSUSED */
3451 int
3452 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
3453 {
3454 /* {
3455 syscallarg(const char *) path;
3456 syscallarg(uid_t) uid;
3457 syscallarg(gid_t) gid;
3458 } */
3459 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid),
3460 SCARG(uap, gid), 0);
3461 }
3462
3463 int
3464 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid,
3465 gid_t gid, int flags)
3466 {
3467 int error;
3468 struct vnode *vp;
3469 namei_simple_flags_t ns_flag;
3470
3471 if (flags & AT_SYMLINK_NOFOLLOW)
3472 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3473 else
3474 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3475
3476 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3477 if (error != 0)
3478 return error;
3479
3480 error = change_owner(vp, uid, gid, l, 0);
3481
3482 vrele(vp);
3483
3484 return (error);
3485 }
3486
3487 /*
3488 * Set ownership given a path name; this version follows links.
3489 * Provides POSIX semantics.
3490 */
3491 /* ARGSUSED */
3492 int
3493 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
3494 {
3495 /* {
3496 syscallarg(const char *) path;
3497 syscallarg(uid_t) uid;
3498 syscallarg(gid_t) gid;
3499 } */
3500 int error;
3501 struct vnode *vp;
3502
3503 error = namei_simple_user(SCARG(uap, path),
3504 NSM_FOLLOW_TRYEMULROOT, &vp);
3505 if (error != 0)
3506 return (error);
3507
3508 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3509
3510 vrele(vp);
3511 return (error);
3512 }
3513
3514 /*
3515 * Set ownership given a file descriptor.
3516 */
3517 /* ARGSUSED */
3518 int
3519 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
3520 {
3521 /* {
3522 syscallarg(int) fd;
3523 syscallarg(uid_t) uid;
3524 syscallarg(gid_t) gid;
3525 } */
3526 int error;
3527 file_t *fp;
3528
3529 /* fd_getvnode() will use the descriptor for us */
3530 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3531 return (error);
3532 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3533 l, 0);
3534 fd_putfile(SCARG(uap, fd));
3535 return (error);
3536 }
3537
3538 int
3539 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap,
3540 register_t *retval)
3541 {
3542 /* {
3543 syscallarg(int) fd;
3544 syscallarg(const char *) path;
3545 syscallarg(uid_t) owner;
3546 syscallarg(gid_t) group;
3547 syscallarg(int) flag;
3548 } */
3549
3550 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path),
3551 SCARG(uap, owner), SCARG(uap, group),
3552 SCARG(uap, flag));
3553 }
3554
3555 /*
3556 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
3557 */
3558 /* ARGSUSED */
3559 int
3560 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
3561 {
3562 /* {
3563 syscallarg(int) fd;
3564 syscallarg(uid_t) uid;
3565 syscallarg(gid_t) gid;
3566 } */
3567 int error;
3568 file_t *fp;
3569
3570 /* fd_getvnode() will use the descriptor for us */
3571 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3572 return (error);
3573 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3574 l, 1);
3575 fd_putfile(SCARG(uap, fd));
3576 return (error);
3577 }
3578
3579 /*
3580 * Set ownership given a path name; this version does not follow links.
3581 */
3582 /* ARGSUSED */
3583 int
3584 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
3585 {
3586 /* {
3587 syscallarg(const char *) path;
3588 syscallarg(uid_t) uid;
3589 syscallarg(gid_t) gid;
3590 } */
3591 int error;
3592 struct vnode *vp;
3593
3594 error = namei_simple_user(SCARG(uap, path),
3595 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3596 if (error != 0)
3597 return (error);
3598
3599 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
3600
3601 vrele(vp);
3602 return (error);
3603 }
3604
3605 /*
3606 * Set ownership given a path name; this version does not follow links.
3607 * Provides POSIX/XPG semantics.
3608 */
3609 /* ARGSUSED */
3610 int
3611 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
3612 {
3613 /* {
3614 syscallarg(const char *) path;
3615 syscallarg(uid_t) uid;
3616 syscallarg(gid_t) gid;
3617 } */
3618 int error;
3619 struct vnode *vp;
3620
3621 error = namei_simple_user(SCARG(uap, path),
3622 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3623 if (error != 0)
3624 return (error);
3625
3626 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3627
3628 vrele(vp);
3629 return (error);
3630 }
3631
3632 /*
3633 * Common routine to set ownership given a vnode.
3634 */
3635 static int
3636 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
3637 int posix_semantics)
3638 {
3639 struct vattr vattr;
3640 mode_t newmode;
3641 int error;
3642
3643 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3644 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
3645 goto out;
3646
3647 #define CHANGED(x) ((int)(x) != -1)
3648 newmode = vattr.va_mode;
3649 if (posix_semantics) {
3650 /*
3651 * POSIX/XPG semantics: if the caller is not the super-user,
3652 * clear set-user-id and set-group-id bits. Both POSIX and
3653 * the XPG consider the behaviour for calls by the super-user
3654 * implementation-defined; we leave the set-user-id and set-
3655 * group-id settings intact in that case.
3656 */
3657 if (vattr.va_mode & S_ISUID) {
3658 if (kauth_authorize_vnode(l->l_cred,
3659 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0)
3660 newmode &= ~S_ISUID;
3661 }
3662 if (vattr.va_mode & S_ISGID) {
3663 if (kauth_authorize_vnode(l->l_cred,
3664 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0)
3665 newmode &= ~S_ISGID;
3666 }
3667 } else {
3668 /*
3669 * NetBSD semantics: when changing owner and/or group,
3670 * clear the respective bit(s).
3671 */
3672 if (CHANGED(uid))
3673 newmode &= ~S_ISUID;
3674 if (CHANGED(gid))
3675 newmode &= ~S_ISGID;
3676 }
3677 /* Update va_mode iff altered. */
3678 if (vattr.va_mode == newmode)
3679 newmode = VNOVAL;
3680
3681 vattr_null(&vattr);
3682 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
3683 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
3684 vattr.va_mode = newmode;
3685 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3686 #undef CHANGED
3687
3688 out:
3689 VOP_UNLOCK(vp);
3690 return (error);
3691 }
3692
3693 /*
3694 * Set the access and modification times given a path name; this
3695 * version follows links.
3696 */
3697 /* ARGSUSED */
3698 int
3699 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
3700 register_t *retval)
3701 {
3702 /* {
3703 syscallarg(const char *) path;
3704 syscallarg(const struct timeval *) tptr;
3705 } */
3706
3707 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
3708 SCARG(uap, tptr), UIO_USERSPACE);
3709 }
3710
3711 /*
3712 * Set the access and modification times given a file descriptor.
3713 */
3714 /* ARGSUSED */
3715 int
3716 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
3717 register_t *retval)
3718 {
3719 /* {
3720 syscallarg(int) fd;
3721 syscallarg(const struct timeval *) tptr;
3722 } */
3723 int error;
3724 file_t *fp;
3725
3726 /* fd_getvnode() will use the descriptor for us */
3727 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3728 return (error);
3729 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr),
3730 UIO_USERSPACE);
3731 fd_putfile(SCARG(uap, fd));
3732 return (error);
3733 }
3734
3735 int
3736 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap,
3737 register_t *retval)
3738 {
3739 /* {
3740 syscallarg(int) fd;
3741 syscallarg(const struct timespec *) tptr;
3742 } */
3743 int error;
3744 file_t *fp;
3745
3746 /* fd_getvnode() will use the descriptor for us */
3747 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3748 return (error);
3749 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0,
3750 SCARG(uap, tptr), UIO_USERSPACE);
3751 fd_putfile(SCARG(uap, fd));
3752 return (error);
3753 }
3754
3755 /*
3756 * Set the access and modification times given a path name; this
3757 * version does not follow links.
3758 */
3759 int
3760 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
3761 register_t *retval)
3762 {
3763 /* {
3764 syscallarg(const char *) path;
3765 syscallarg(const struct timeval *) tptr;
3766 } */
3767
3768 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
3769 SCARG(uap, tptr), UIO_USERSPACE);
3770 }
3771
3772 int
3773 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap,
3774 register_t *retval)
3775 {
3776 /* {
3777 syscallarg(int) fd;
3778 syscallarg(const char *) path;
3779 syscallarg(const struct timespec *) tptr;
3780 syscallarg(int) flag;
3781 } */
3782 int follow;
3783 const struct timespec *tptr;
3784 int error;
3785
3786 tptr = SCARG(uap, tptr);
3787 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3788
3789 error = do_sys_utimensat(l, SCARG(uap, fd), NULL,
3790 SCARG(uap, path), follow, tptr, UIO_USERSPACE);
3791
3792 return error;
3793 }
3794
3795 /*
3796 * Common routine to set access and modification times given a vnode.
3797 */
3798 int
3799 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag,
3800 const struct timespec *tptr, enum uio_seg seg)
3801 {
3802 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg);
3803 }
3804
3805 int
3806 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp,
3807 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg)
3808 {
3809 struct vattr vattr;
3810 int error, dorele = 0;
3811 namei_simple_flags_t sflags;
3812 bool vanull, setbirthtime;
3813 struct timespec ts[2];
3814
3815 KASSERT(l != NULL || fdat == AT_FDCWD);
3816
3817 /*
3818 * I have checked all callers and they pass either FOLLOW,
3819 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW
3820 * is 0. More to the point, they don't pass anything else.
3821 * Let's keep it that way at least until the namei interfaces
3822 * are fully sanitized.
3823 */
3824 KASSERT(flag == NOFOLLOW || flag == FOLLOW);
3825 sflags = (flag == FOLLOW) ?
3826 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT;
3827
3828 if (tptr == NULL) {
3829 vanull = true;
3830 nanotime(&ts[0]);
3831 ts[1] = ts[0];
3832 } else {
3833 vanull = false;
3834 if (seg != UIO_SYSSPACE) {
3835 error = copyin(tptr, ts, sizeof (ts));
3836 if (error != 0)
3837 return error;
3838 } else {
3839 ts[0] = tptr[0];
3840 ts[1] = tptr[1];
3841 }
3842 }
3843
3844 if (ts[0].tv_nsec == UTIME_NOW) {
3845 nanotime(&ts[0]);
3846 if (ts[1].tv_nsec == UTIME_NOW) {
3847 vanull = true;
3848 ts[1] = ts[0];
3849 }
3850 } else if (ts[1].tv_nsec == UTIME_NOW)
3851 nanotime(&ts[1]);
3852
3853 if (vp == NULL) {
3854 /* note: SEG describes TPTR, not PATH; PATH is always user */
3855 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp);
3856 if (error != 0)
3857 return error;
3858 dorele = 1;
3859 }
3860
3861 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3862 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3863 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3864 vattr_null(&vattr);
3865
3866 if (ts[0].tv_nsec != UTIME_OMIT)
3867 vattr.va_atime = ts[0];
3868
3869 if (ts[1].tv_nsec != UTIME_OMIT) {
3870 vattr.va_mtime = ts[1];
3871 if (setbirthtime)
3872 vattr.va_birthtime = ts[1];
3873 }
3874
3875 if (vanull)
3876 vattr.va_vaflags |= VA_UTIMES_NULL;
3877 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3878 VOP_UNLOCK(vp);
3879
3880 if (dorele != 0)
3881 vrele(vp);
3882
3883 return error;
3884 }
3885
3886 int
3887 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
3888 const struct timeval *tptr, enum uio_seg seg)
3889 {
3890 struct timespec ts[2];
3891 struct timespec *tsptr = NULL;
3892 int error;
3893
3894 if (tptr != NULL) {
3895 struct timeval tv[2];
3896
3897 if (seg != UIO_SYSSPACE) {
3898 error = copyin(tptr, tv, sizeof (tv));
3899 if (error != 0)
3900 return error;
3901 tptr = tv;
3902 }
3903
3904 if ((tv[0].tv_usec == UTIME_NOW) ||
3905 (tv[0].tv_usec == UTIME_OMIT))
3906 ts[0].tv_nsec = tv[0].tv_usec;
3907 else
3908 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3909
3910 if ((tv[1].tv_usec == UTIME_NOW) ||
3911 (tv[1].tv_usec == UTIME_OMIT))
3912 ts[1].tv_nsec = tv[1].tv_usec;
3913 else
3914 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3915
3916 tsptr = &ts[0];
3917 }
3918
3919 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE);
3920 }
3921
3922 /*
3923 * Truncate a file given its path name.
3924 */
3925 /* ARGSUSED */
3926 int
3927 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3928 {
3929 /* {
3930 syscallarg(const char *) path;
3931 syscallarg(int) pad;
3932 syscallarg(off_t) length;
3933 } */
3934 struct vnode *vp;
3935 struct vattr vattr;
3936 int error;
3937
3938 if (SCARG(uap, length) < 0)
3939 return EINVAL;
3940
3941 error = namei_simple_user(SCARG(uap, path),
3942 NSM_FOLLOW_TRYEMULROOT, &vp);
3943 if (error != 0)
3944 return (error);
3945 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3946 if (vp->v_type == VDIR)
3947 error = EISDIR;
3948 else if ((error = vn_writechk(vp)) == 0 &&
3949 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3950 vattr_null(&vattr);
3951 vattr.va_size = SCARG(uap, length);
3952 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3953 }
3954 vput(vp);
3955 return (error);
3956 }
3957
3958 /*
3959 * Truncate a file given a file descriptor.
3960 */
3961 /* ARGSUSED */
3962 int
3963 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3964 {
3965 /* {
3966 syscallarg(int) fd;
3967 syscallarg(int) pad;
3968 syscallarg(off_t) length;
3969 } */
3970 struct vattr vattr;
3971 struct vnode *vp;
3972 file_t *fp;
3973 int error;
3974
3975 if (SCARG(uap, length) < 0)
3976 return EINVAL;
3977
3978 /* fd_getvnode() will use the descriptor for us */
3979 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3980 return (error);
3981 if ((fp->f_flag & FWRITE) == 0) {
3982 error = EINVAL;
3983 goto out;
3984 }
3985 vp = fp->f_vnode;
3986 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3987 if (vp->v_type == VDIR)
3988 error = EISDIR;
3989 else if ((error = vn_writechk(vp)) == 0) {
3990 vattr_null(&vattr);
3991 vattr.va_size = SCARG(uap, length);
3992 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3993 }
3994 VOP_UNLOCK(vp);
3995 out:
3996 fd_putfile(SCARG(uap, fd));
3997 return (error);
3998 }
3999
4000 /*
4001 * Sync an open file.
4002 */
4003 /* ARGSUSED */
4004 int
4005 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
4006 {
4007 /* {
4008 syscallarg(int) fd;
4009 } */
4010 struct vnode *vp;
4011 file_t *fp;
4012 int error;
4013
4014 /* fd_getvnode() will use the descriptor for us */
4015 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4016 return (error);
4017 vp = fp->f_vnode;
4018 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4019 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
4020 VOP_UNLOCK(vp);
4021 fd_putfile(SCARG(uap, fd));
4022 return (error);
4023 }
4024
4025 /*
4026 * Sync a range of file data. API modeled after that found in AIX.
4027 *
4028 * FDATASYNC indicates that we need only save enough metadata to be able
4029 * to re-read the written data. Note we duplicate AIX's requirement that
4030 * the file be open for writing.
4031 */
4032 /* ARGSUSED */
4033 int
4034 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
4035 {
4036 /* {
4037 syscallarg(int) fd;
4038 syscallarg(int) flags;
4039 syscallarg(off_t) start;
4040 syscallarg(off_t) length;
4041 } */
4042 struct vnode *vp;
4043 file_t *fp;
4044 int flags, nflags;
4045 off_t s, e, len;
4046 int error;
4047
4048 /* fd_getvnode() will use the descriptor for us */
4049 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4050 return (error);
4051
4052 if ((fp->f_flag & FWRITE) == 0) {
4053 error = EBADF;
4054 goto out;
4055 }
4056
4057 flags = SCARG(uap, flags);
4058 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
4059 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
4060 error = EINVAL;
4061 goto out;
4062 }
4063 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
4064 if (flags & FDATASYNC)
4065 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
4066 else
4067 nflags = FSYNC_WAIT;
4068 if (flags & FDISKSYNC)
4069 nflags |= FSYNC_CACHE;
4070
4071 len = SCARG(uap, length);
4072 /* If length == 0, we do the whole file, and s = e = 0 will do that */
4073 if (len) {
4074 s = SCARG(uap, start);
4075 e = s + len;
4076 if (e < s) {
4077 error = EINVAL;
4078 goto out;
4079 }
4080 } else {
4081 e = 0;
4082 s = 0;
4083 }
4084
4085 vp = fp->f_vnode;
4086 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4087 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
4088 VOP_UNLOCK(vp);
4089 out:
4090 fd_putfile(SCARG(uap, fd));
4091 return (error);
4092 }
4093
4094 /*
4095 * Sync the data of an open file.
4096 */
4097 /* ARGSUSED */
4098 int
4099 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
4100 {
4101 /* {
4102 syscallarg(int) fd;
4103 } */
4104 struct vnode *vp;
4105 file_t *fp;
4106 int error;
4107
4108 /* fd_getvnode() will use the descriptor for us */
4109 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4110 return (error);
4111 if ((fp->f_flag & FWRITE) == 0) {
4112 fd_putfile(SCARG(uap, fd));
4113 return (EBADF);
4114 }
4115 vp = fp->f_vnode;
4116 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4117 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
4118 VOP_UNLOCK(vp);
4119 fd_putfile(SCARG(uap, fd));
4120 return (error);
4121 }
4122
4123 /*
4124 * Rename files, (standard) BSD semantics frontend.
4125 */
4126 /* ARGSUSED */
4127 int
4128 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
4129 {
4130 /* {
4131 syscallarg(const char *) from;
4132 syscallarg(const char *) to;
4133 } */
4134
4135 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4136 SCARG(uap, to), UIO_USERSPACE, 0));
4137 }
4138
4139 int
4140 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap,
4141 register_t *retval)
4142 {
4143 /* {
4144 syscallarg(int) fromfd;
4145 syscallarg(const char *) from;
4146 syscallarg(int) tofd;
4147 syscallarg(const char *) to;
4148 } */
4149
4150 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from),
4151 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0));
4152 }
4153
4154 /*
4155 * Rename files, POSIX semantics frontend.
4156 */
4157 /* ARGSUSED */
4158 int
4159 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
4160 {
4161 /* {
4162 syscallarg(const char *) from;
4163 syscallarg(const char *) to;
4164 } */
4165
4166 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4167 SCARG(uap, to), UIO_USERSPACE, 1));
4168 }
4169
4170 /*
4171 * Rename files. Source and destination must either both be directories,
4172 * or both not be directories. If target is a directory, it must be empty.
4173 * If `from' and `to' refer to the same object, the value of the `retain'
4174 * argument is used to determine whether `from' will be
4175 *
4176 * (retain == 0) deleted unless `from' and `to' refer to the same
4177 * object in the file system's name space (BSD).
4178 * (retain == 1) always retained (POSIX).
4179 *
4180 * XXX Synchronize with nfsrv_rename in nfs_serv.c.
4181 */
4182 int
4183 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
4184 {
4185 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain);
4186 }
4187
4188 static int
4189 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd,
4190 const char *to, enum uio_seg seg, int retain)
4191 {
4192 struct pathbuf *fpb, *tpb;
4193 struct nameidata fnd, tnd;
4194 struct vnode *fdvp, *fvp;
4195 struct vnode *tdvp, *tvp;
4196 struct mount *mp, *tmp;
4197 int error;
4198
4199 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD));
4200
4201 error = pathbuf_maybe_copyin(from, seg, &fpb);
4202 if (error)
4203 goto out0;
4204 KASSERT(fpb != NULL);
4205
4206 error = pathbuf_maybe_copyin(to, seg, &tpb);
4207 if (error)
4208 goto out1;
4209 KASSERT(tpb != NULL);
4210
4211 /*
4212 * Lookup from.
4213 *
4214 * XXX LOCKPARENT is wrong because we don't actually want it
4215 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is
4216 * insane, so for the time being we need to leave it like this.
4217 */
4218 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb);
4219 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0)
4220 goto out2;
4221
4222 /*
4223 * Pull out the important results of the lookup, fdvp and fvp.
4224 * Of course, fvp is bogus because we're about to unlock fdvp.
4225 */
4226 fdvp = fnd.ni_dvp;
4227 fvp = fnd.ni_vp;
4228 KASSERT(fdvp != NULL);
4229 KASSERT(fvp != NULL);
4230 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE));
4231
4232 /*
4233 * Make sure neither fdvp nor fvp is locked.
4234 */
4235 if (fdvp != fvp)
4236 VOP_UNLOCK(fdvp);
4237 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4238 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4239
4240 /*
4241 * Reject renaming `.' and `..'. Can't do this until after
4242 * namei because we need namei's parsing to find the final
4243 * component name. (namei should just leave us with the final
4244 * component name and not look it up itself, but anyway...)
4245 *
4246 * This was here before because we used to relookup from
4247 * instead of to and relookup requires the caller to check
4248 * this, but now file systems may depend on this check, so we
4249 * must retain it until the file systems are all rototilled.
4250 */
4251 if (((fnd.ni_cnd.cn_namelen == 1) &&
4252 (fnd.ni_cnd.cn_nameptr[0] == '.')) ||
4253 ((fnd.ni_cnd.cn_namelen == 2) &&
4254 (fnd.ni_cnd.cn_nameptr[0] == '.') &&
4255 (fnd.ni_cnd.cn_nameptr[1] == '.'))) {
4256 error = EINVAL; /* XXX EISDIR? */
4257 goto abort0;
4258 }
4259
4260 /*
4261 * Lookup to.
4262 *
4263 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using
4264 * fvp here to decide whether to add CREATEDIR is a load of
4265 * bollocks because fvp might be the wrong node by now, since
4266 * fdvp is unlocked.
4267 *
4268 * XXX Why not pass CREATEDIR always?
4269 */
4270 NDINIT(&tnd, RENAME,
4271 (LOCKPARENT | NOCACHE | TRYEMULROOT |
4272 ((fvp->v_type == VDIR)? CREATEDIR : 0)),
4273 tpb);
4274 if ((error = fd_nameiat(l, tofd, &tnd)) != 0)
4275 goto abort0;
4276
4277 /*
4278 * Pull out the important results of the lookup, tdvp and tvp.
4279 * Of course, tvp is bogus because we're about to unlock tdvp.
4280 */
4281 tdvp = tnd.ni_dvp;
4282 tvp = tnd.ni_vp;
4283 KASSERT(tdvp != NULL);
4284 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE));
4285
4286 /*
4287 * Make sure neither tdvp nor tvp is locked.
4288 */
4289 if (tdvp != tvp)
4290 VOP_UNLOCK(tdvp);
4291 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4292 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4293
4294 /*
4295 * Reject renaming onto `.' or `..'. relookup is unhappy with
4296 * these, which is why we must do this here. Once upon a time
4297 * we relooked up from instead of to, and consequently didn't
4298 * need this check, but now that we relookup to instead of
4299 * from, we need this; and we shall need it forever forward
4300 * until the VOP_RENAME protocol changes, because file systems
4301 * will no doubt begin to depend on this check.
4302 */
4303 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) {
4304 error = EISDIR;
4305 goto abort1;
4306 }
4307 if ((tnd.ni_cnd.cn_namelen == 2) &&
4308 (tnd.ni_cnd.cn_nameptr[0] == '.') &&
4309 (tnd.ni_cnd.cn_nameptr[1] == '.')) {
4310 error = EINVAL;
4311 goto abort1;
4312 }
4313
4314 /*
4315 * Get the mount point. If the file system has been unmounted,
4316 * which it may be because we're not holding any vnode locks,
4317 * then v_mount will be NULL. We're not really supposed to
4318 * read v_mount without holding the vnode lock, but since we
4319 * have fdvp referenced, if fdvp->v_mount changes then at worst
4320 * it will be set to NULL, not changed to another mount point.
4321 * And, of course, since it is up to the file system to
4322 * determine the real lock order, we can't lock both fdvp and
4323 * tdvp at the same time.
4324 */
4325 mp = fdvp->v_mount;
4326 if (mp == NULL) {
4327 error = ENOENT;
4328 goto abort1;
4329 }
4330
4331 /*
4332 * Make sure the mount points match. Again, although we don't
4333 * hold any vnode locks, the v_mount fields may change -- but
4334 * at worst they will change to NULL, so this will never become
4335 * a cross-device rename, because we hold vnode references.
4336 *
4337 * XXX Because nothing is locked and the compiler may reorder
4338 * things here, unmounting the file system at an inopportune
4339 * moment may cause rename to fail with EXDEV when it really
4340 * should fail with ENOENT.
4341 */
4342 tmp = tdvp->v_mount;
4343 if (tmp == NULL) {
4344 error = ENOENT;
4345 goto abort1;
4346 }
4347
4348 if (mp != tmp) {
4349 error = EXDEV;
4350 goto abort1;
4351 }
4352
4353 /*
4354 * Take the vfs rename lock to avoid cross-directory screw cases.
4355 * Nothing is locked currently, so taking this lock is safe.
4356 */
4357 error = VFS_RENAMELOCK_ENTER(mp);
4358 if (error)
4359 goto abort1;
4360
4361 /*
4362 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced,
4363 * and nothing is locked except for the vfs rename lock.
4364 *
4365 * The next step is a little rain dance to conform to the
4366 * insane lock protocol, even though it does nothing to ward
4367 * off race conditions.
4368 *
4369 * We need tdvp and tvp to be locked. However, because we have
4370 * unlocked tdvp in order to hold no locks while we take the
4371 * vfs rename lock, tvp may be wrong here, and we can't safely
4372 * lock it even if the sensible file systems will just unlock
4373 * it straight away. Consequently, we must lock tdvp and then
4374 * relookup tvp to get it locked.
4375 *
4376 * Finally, because the VOP_RENAME protocol is brain-damaged
4377 * and various file systems insanely depend on the semantics of
4378 * this brain damage, the lookup of to must be the last lookup
4379 * before VOP_RENAME.
4380 */
4381 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
4382 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0);
4383 if (error)
4384 goto abort2;
4385
4386 /*
4387 * Drop the old tvp and pick up the new one -- which might be
4388 * the same, but that doesn't matter to us. After this, tdvp
4389 * and tvp should both be locked.
4390 */
4391 if (tvp != NULL)
4392 vrele(tvp);
4393 tvp = tnd.ni_vp;
4394 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4395 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4396
4397 /*
4398 * The old do_sys_rename had various consistency checks here
4399 * involving fvp and tvp. fvp is bogus already here, and tvp
4400 * will become bogus soon in any sensible file system, so the
4401 * only purpose in putting these checks here is to give lip
4402 * service to these screw cases and to acknowledge that they
4403 * exist, not actually to handle them, but here you go
4404 * anyway...
4405 */
4406
4407 /*
4408 * Acknowledge that directories and non-directories aren't
4409 * suposed to mix.
4410 */
4411 if (tvp != NULL) {
4412 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) {
4413 error = ENOTDIR;
4414 goto abort3;
4415 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) {
4416 error = EISDIR;
4417 goto abort3;
4418 }
4419 }
4420
4421 /*
4422 * Acknowledge some random screw case, among the dozens that
4423 * might arise.
4424 */
4425 if (fvp == tdvp) {
4426 error = EINVAL;
4427 goto abort3;
4428 }
4429
4430 /*
4431 * Acknowledge that POSIX has a wacky screw case.
4432 *
4433 * XXX Eventually the retain flag needs to be passed on to
4434 * VOP_RENAME.
4435 */
4436 if (fvp == tvp) {
4437 if (retain) {
4438 error = 0;
4439 goto abort3;
4440 } else if ((fdvp == tdvp) &&
4441 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) &&
4442 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr,
4443 fnd.ni_cnd.cn_namelen))) {
4444 error = 0;
4445 goto abort3;
4446 }
4447 }
4448
4449 /*
4450 * Make sure veriexec can screw us up. (But a race can screw
4451 * up veriexec, of course -- remember, fvp and (soon) tvp are
4452 * bogus.)
4453 */
4454 #if NVERIEXEC > 0
4455 {
4456 char *f1, *f2;
4457 size_t f1_len;
4458 size_t f2_len;
4459
4460 f1_len = fnd.ni_cnd.cn_namelen + 1;
4461 f1 = kmem_alloc(f1_len, KM_SLEEP);
4462 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len);
4463
4464 f2_len = tnd.ni_cnd.cn_namelen + 1;
4465 f2 = kmem_alloc(f2_len, KM_SLEEP);
4466 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len);
4467
4468 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2);
4469
4470 kmem_free(f1, f1_len);
4471 kmem_free(f2, f2_len);
4472
4473 if (error)
4474 goto abort3;
4475 }
4476 #endif /* NVERIEXEC > 0 */
4477
4478 /*
4479 * All ready. Incant the rename vop.
4480 */
4481 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4482 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4483 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4484 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4485 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd);
4486
4487 /*
4488 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks
4489 * tdvp and tvp. But we can't assert any of that.
4490 */
4491 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4492 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4493 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4494 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4495
4496 /*
4497 * So all we have left to do is to drop the rename lock and
4498 * destroy the pathbufs.
4499 */
4500 VFS_RENAMELOCK_EXIT(mp);
4501 goto out2;
4502
4503 abort3: if ((tvp != NULL) && (tvp != tdvp))
4504 VOP_UNLOCK(tvp);
4505 abort2: VOP_UNLOCK(tdvp);
4506 VFS_RENAMELOCK_EXIT(mp);
4507 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd);
4508 vrele(tdvp);
4509 if (tvp != NULL)
4510 vrele(tvp);
4511 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd);
4512 vrele(fdvp);
4513 vrele(fvp);
4514 out2: pathbuf_destroy(tpb);
4515 out1: pathbuf_destroy(fpb);
4516 out0: return error;
4517 }
4518
4519 /*
4520 * Make a directory file.
4521 */
4522 /* ARGSUSED */
4523 int
4524 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
4525 {
4526 /* {
4527 syscallarg(const char *) path;
4528 syscallarg(int) mode;
4529 } */
4530
4531 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path),
4532 SCARG(uap, mode), UIO_USERSPACE);
4533 }
4534
4535 int
4536 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap,
4537 register_t *retval)
4538 {
4539 /* {
4540 syscallarg(int) fd;
4541 syscallarg(const char *) path;
4542 syscallarg(int) mode;
4543 } */
4544
4545 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path),
4546 SCARG(uap, mode), UIO_USERSPACE);
4547 }
4548
4549
4550 int
4551 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg)
4552 {
4553 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE);
4554 }
4555
4556 static int
4557 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode,
4558 enum uio_seg seg)
4559 {
4560 struct proc *p = curlwp->l_proc;
4561 struct vnode *vp;
4562 struct vattr vattr;
4563 int error;
4564 struct pathbuf *pb;
4565 struct nameidata nd;
4566
4567 KASSERT(l != NULL || fdat == AT_FDCWD);
4568
4569 /* XXX bollocks, should pass in a pathbuf */
4570 error = pathbuf_maybe_copyin(path, seg, &pb);
4571 if (error) {
4572 return error;
4573 }
4574
4575 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb);
4576
4577 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
4578 pathbuf_destroy(pb);
4579 return (error);
4580 }
4581 vp = nd.ni_vp;
4582 if (vp != NULL) {
4583 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
4584 if (nd.ni_dvp == vp)
4585 vrele(nd.ni_dvp);
4586 else
4587 vput(nd.ni_dvp);
4588 vrele(vp);
4589 pathbuf_destroy(pb);
4590 return (EEXIST);
4591 }
4592 vattr_null(&vattr);
4593 vattr.va_type = VDIR;
4594 /* We will read cwdi->cwdi_cmask unlocked. */
4595 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
4596 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
4597 if (!error)
4598 vrele(nd.ni_vp);
4599 vput(nd.ni_dvp);
4600 pathbuf_destroy(pb);
4601 return (error);
4602 }
4603
4604 /*
4605 * Remove a directory file.
4606 */
4607 /* ARGSUSED */
4608 int
4609 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
4610 {
4611 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path),
4612 AT_REMOVEDIR, UIO_USERSPACE);
4613 }
4614
4615 /*
4616 * Read a block of directory entries in a file system independent format.
4617 */
4618 int
4619 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
4620 {
4621 /* {
4622 syscallarg(int) fd;
4623 syscallarg(char *) buf;
4624 syscallarg(size_t) count;
4625 } */
4626 file_t *fp;
4627 int error, done;
4628
4629 /* fd_getvnode() will use the descriptor for us */
4630 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4631 return (error);
4632 if ((fp->f_flag & FREAD) == 0) {
4633 error = EBADF;
4634 goto out;
4635 }
4636 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
4637 SCARG(uap, count), &done, l, 0, 0);
4638 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
4639 *retval = done;
4640 out:
4641 fd_putfile(SCARG(uap, fd));
4642 return (error);
4643 }
4644
4645 /*
4646 * Set the mode mask for creation of filesystem nodes.
4647 */
4648 int
4649 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
4650 {
4651 /* {
4652 syscallarg(mode_t) newmask;
4653 } */
4654 struct proc *p = l->l_proc;
4655 struct cwdinfo *cwdi;
4656
4657 /*
4658 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
4659 * important is that we serialize changes to the mask. The
4660 * rw_exit() will issue a write memory barrier on our behalf,
4661 * and force the changes out to other CPUs (as it must use an
4662 * atomic operation, draining the local CPU's store buffers).
4663 */
4664 cwdi = p->p_cwdi;
4665 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
4666 *retval = cwdi->cwdi_cmask;
4667 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
4668 rw_exit(&cwdi->cwdi_lock);
4669
4670 return (0);
4671 }
4672
4673 int
4674 dorevoke(struct vnode *vp, kauth_cred_t cred)
4675 {
4676 struct vattr vattr;
4677 int error, fs_decision;
4678
4679 vn_lock(vp, LK_SHARED | LK_RETRY);
4680 error = VOP_GETATTR(vp, &vattr, cred);
4681 VOP_UNLOCK(vp);
4682 if (error != 0)
4683 return error;
4684 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM;
4685 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL,
4686 fs_decision);
4687 if (!error)
4688 VOP_REVOKE(vp, REVOKEALL);
4689 return (error);
4690 }
4691
4692 /*
4693 * Void all references to file by ripping underlying filesystem
4694 * away from vnode.
4695 */
4696 /* ARGSUSED */
4697 int
4698 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
4699 {
4700 /* {
4701 syscallarg(const char *) path;
4702 } */
4703 struct vnode *vp;
4704 int error;
4705
4706 error = namei_simple_user(SCARG(uap, path),
4707 NSM_FOLLOW_TRYEMULROOT, &vp);
4708 if (error != 0)
4709 return (error);
4710 error = dorevoke(vp, l->l_cred);
4711 vrele(vp);
4712 return (error);
4713 }
4714
4715 /*
4716 * Allocate backing store for a file, filling a hole without having to
4717 * explicitly write anything out.
4718 */
4719 /* ARGSUSED */
4720 int
4721 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap,
4722 register_t *retval)
4723 {
4724 /* {
4725 syscallarg(int) fd;
4726 syscallarg(off_t) pos;
4727 syscallarg(off_t) len;
4728 } */
4729 int fd;
4730 off_t pos, len;
4731 struct file *fp;
4732 struct vnode *vp;
4733 int error;
4734
4735 fd = SCARG(uap, fd);
4736 pos = SCARG(uap, pos);
4737 len = SCARG(uap, len);
4738
4739 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
4740 *retval = EINVAL;
4741 return 0;
4742 }
4743
4744 error = fd_getvnode(fd, &fp);
4745 if (error) {
4746 *retval = error;
4747 return 0;
4748 }
4749 if ((fp->f_flag & FWRITE) == 0) {
4750 error = EBADF;
4751 goto fail;
4752 }
4753 vp = fp->f_vnode;
4754
4755 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4756 if (vp->v_type == VDIR) {
4757 error = EISDIR;
4758 } else {
4759 error = VOP_FALLOCATE(vp, pos, len);
4760 }
4761 VOP_UNLOCK(vp);
4762
4763 fail:
4764 fd_putfile(fd);
4765 *retval = error;
4766 return 0;
4767 }
4768
4769 /*
4770 * Deallocate backing store for a file, creating a hole. Also used for
4771 * invoking TRIM on disks.
4772 */
4773 /* ARGSUSED */
4774 int
4775 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap,
4776 register_t *retval)
4777 {
4778 /* {
4779 syscallarg(int) fd;
4780 syscallarg(off_t) pos;
4781 syscallarg(off_t) len;
4782 } */
4783 int fd;
4784 off_t pos, len;
4785 struct file *fp;
4786 struct vnode *vp;
4787 int error;
4788
4789 fd = SCARG(uap, fd);
4790 pos = SCARG(uap, pos);
4791 len = SCARG(uap, len);
4792
4793 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
4794 return EINVAL;
4795 }
4796
4797 error = fd_getvnode(fd, &fp);
4798 if (error) {
4799 return error;
4800 }
4801 if ((fp->f_flag & FWRITE) == 0) {
4802 error = EBADF;
4803 goto fail;
4804 }
4805 vp = fp->f_vnode;
4806
4807 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4808 if (vp->v_type == VDIR) {
4809 error = EISDIR;
4810 } else {
4811 error = VOP_FDISCARD(vp, pos, len);
4812 }
4813 VOP_UNLOCK(vp);
4814
4815 fail:
4816 fd_putfile(fd);
4817 return error;
4818 }
4819