vfs_syscalls.c revision 1.511 1 /* $NetBSD: vfs_syscalls.c,v 1.511 2017/04/17 08:31:02 hannken Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
66 */
67
68 /*
69 * Virtual File System System Calls
70 */
71
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.511 2017/04/17 08:31:02 hannken Exp $");
74
75 #ifdef _KERNEL_OPT
76 #include "opt_fileassoc.h"
77 #include "veriexec.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/namei.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/file.h>
86 #include <sys/fcntl.h>
87 #include <sys/stat.h>
88 #include <sys/vnode.h>
89 #include <sys/mount.h>
90 #include <sys/fstrans.h>
91 #include <sys/proc.h>
92 #include <sys/uio.h>
93 #include <sys/kmem.h>
94 #include <sys/dirent.h>
95 #include <sys/sysctl.h>
96 #include <sys/syscallargs.h>
97 #include <sys/vfs_syscalls.h>
98 #include <sys/quota.h>
99 #include <sys/quotactl.h>
100 #include <sys/ktrace.h>
101 #ifdef FILEASSOC
102 #include <sys/fileassoc.h>
103 #endif /* FILEASSOC */
104 #include <sys/extattr.h>
105 #include <sys/verified_exec.h>
106 #include <sys/kauth.h>
107 #include <sys/atomic.h>
108 #include <sys/module.h>
109 #include <sys/buf.h>
110
111 #include <miscfs/genfs/genfs.h>
112 #include <miscfs/specfs/specdev.h>
113
114 #include <nfs/rpcv2.h>
115 #include <nfs/nfsproto.h>
116 #include <nfs/nfs.h>
117 #include <nfs/nfs_var.h>
118
119 /* XXX this shouldn't be here */
120 #ifndef OFF_T_MAX
121 #define OFF_T_MAX __type_max(off_t)
122 #endif
123
124 static int change_flags(struct vnode *, u_long, struct lwp *);
125 static int change_mode(struct vnode *, int, struct lwp *);
126 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
127 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *);
128 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t,
129 enum uio_seg);
130 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t);
131 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *,
132 enum uio_seg);
133 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *,
134 enum uio_seg, int);
135 static int do_sys_readlinkat(struct lwp *, int, const char *, char *,
136 size_t, register_t *);
137 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg);
138
139 static int fd_nameiat(struct lwp *, int, struct nameidata *);
140 static int fd_nameiat_simple_user(struct lwp *, int, const char *,
141 namei_simple_flags_t, struct vnode **);
142
143
144 /*
145 * This table is used to maintain compatibility with 4.3BSD
146 * and NetBSD 0.9 mount syscalls - and possibly other systems.
147 * Note, the order is important!
148 *
149 * Do not modify this table. It should only contain filesystems
150 * supported by NetBSD 0.9 and 4.3BSD.
151 */
152 const char * const mountcompatnames[] = {
153 NULL, /* 0 = MOUNT_NONE */
154 MOUNT_FFS, /* 1 = MOUNT_UFS */
155 MOUNT_NFS, /* 2 */
156 MOUNT_MFS, /* 3 */
157 MOUNT_MSDOS, /* 4 */
158 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
159 MOUNT_FDESC, /* 6 */
160 MOUNT_KERNFS, /* 7 */
161 NULL, /* 8 = MOUNT_DEVFS */
162 MOUNT_AFS, /* 9 */
163 };
164
165 const int nmountcompatnames = __arraycount(mountcompatnames);
166
167 static int
168 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp)
169 {
170 file_t *dfp;
171 int error;
172
173 if (fdat != AT_FDCWD) {
174 if ((error = fd_getvnode(fdat, &dfp)) != 0)
175 goto out;
176
177 NDAT(ndp, dfp->f_vnode);
178 }
179
180 error = namei(ndp);
181
182 if (fdat != AT_FDCWD)
183 fd_putfile(fdat);
184 out:
185 return error;
186 }
187
188 static int
189 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path,
190 namei_simple_flags_t sflags, struct vnode **vp_ret)
191 {
192 file_t *dfp;
193 struct vnode *dvp;
194 int error;
195
196 if (fdat != AT_FDCWD) {
197 if ((error = fd_getvnode(fdat, &dfp)) != 0)
198 goto out;
199
200 dvp = dfp->f_vnode;
201 } else {
202 dvp = NULL;
203 }
204
205 error = nameiat_simple_user(dvp, path, sflags, vp_ret);
206
207 if (fdat != AT_FDCWD)
208 fd_putfile(fdat);
209 out:
210 return error;
211 }
212
213 static int
214 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags)
215 {
216 int error;
217
218 fp->f_flag = flags & FMASK;
219 fp->f_type = DTYPE_VNODE;
220 fp->f_ops = &vnops;
221 fp->f_vnode = vp;
222
223 if (flags & (O_EXLOCK | O_SHLOCK)) {
224 struct flock lf;
225 int type;
226
227 lf.l_whence = SEEK_SET;
228 lf.l_start = 0;
229 lf.l_len = 0;
230 if (flags & O_EXLOCK)
231 lf.l_type = F_WRLCK;
232 else
233 lf.l_type = F_RDLCK;
234 type = F_FLOCK;
235 if ((flags & FNONBLOCK) == 0)
236 type |= F_WAIT;
237 VOP_UNLOCK(vp);
238 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
239 if (error) {
240 (void) vn_close(vp, fp->f_flag, fp->f_cred);
241 fd_abort(l->l_proc, fp, indx);
242 return error;
243 }
244 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
245 atomic_or_uint(&fp->f_flag, FHASLOCK);
246 }
247 if (flags & O_CLOEXEC)
248 fd_set_exclose(l, indx, true);
249 return 0;
250 }
251
252 static int
253 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
254 void *data, size_t *data_len)
255 {
256 struct mount *mp;
257 int error = 0, saved_flags;
258
259 mp = vp->v_mount;
260 saved_flags = mp->mnt_flag;
261
262 /* We can operate only on VV_ROOT nodes. */
263 if ((vp->v_vflag & VV_ROOT) == 0) {
264 error = EINVAL;
265 goto out;
266 }
267
268 /*
269 * We only allow the filesystem to be reloaded if it
270 * is currently mounted read-only. Additionally, we
271 * prevent read-write to read-only downgrades.
272 */
273 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
274 (mp->mnt_flag & MNT_RDONLY) == 0 &&
275 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) {
276 error = EOPNOTSUPP; /* Needs translation */
277 goto out;
278 }
279
280 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
281 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
282 if (error)
283 goto out;
284
285 if (vfs_busy(mp, NULL)) {
286 error = EPERM;
287 goto out;
288 }
289
290 error = vfs_suspend(mp, 0);
291 if (error)
292 goto out;
293
294 mutex_enter(&mp->mnt_updating);
295
296 mp->mnt_flag &= ~MNT_OP_FLAGS;
297 mp->mnt_flag |= flags & MNT_OP_FLAGS;
298
299 /*
300 * Set the mount level flags.
301 */
302 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) {
303 if ((flags & MNT_RDONLY))
304 mp->mnt_iflag |= IMNT_WANTRDONLY;
305 else
306 mp->mnt_iflag |= IMNT_WANTRDWR;
307 }
308 mp->mnt_flag &= ~MNT_BASIC_FLAGS;
309 mp->mnt_flag |= flags & MNT_BASIC_FLAGS;
310 if ((mp->mnt_iflag & IMNT_WANTRDONLY))
311 mp->mnt_flag &= ~MNT_RDONLY;
312
313 error = VFS_MOUNT(mp, path, data, data_len);
314
315 if (error && data != NULL) {
316 int error2;
317
318 /*
319 * Update failed; let's try and see if it was an
320 * export request. For compat with 3.0 and earlier.
321 */
322 error2 = vfs_hooks_reexport(mp, path, data);
323
324 /*
325 * Only update error code if the export request was
326 * understood but some problem occurred while
327 * processing it.
328 */
329 if (error2 != EJUSTRETURN)
330 error = error2;
331 }
332
333 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY))
334 mp->mnt_flag |= MNT_RDONLY;
335 if (error)
336 mp->mnt_flag = saved_flags;
337 mp->mnt_flag &= ~MNT_OP_FLAGS;
338 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR);
339 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
340 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0)
341 vfs_syncer_add_to_worklist(mp);
342 } else {
343 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0)
344 vfs_syncer_remove_from_worklist(mp);
345 }
346 mutex_exit(&mp->mnt_updating);
347 vfs_resume(mp);
348 vfs_unbusy(mp, false, NULL);
349
350 if ((error == 0) && !(saved_flags & MNT_EXTATTR) &&
351 (flags & MNT_EXTATTR)) {
352 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START,
353 NULL, 0, NULL) != 0) {
354 printf("%s: failed to start extattr, error = %d",
355 mp->mnt_stat.f_mntonname, error);
356 mp->mnt_flag &= ~MNT_EXTATTR;
357 }
358 }
359
360 if ((error == 0) && (saved_flags & MNT_EXTATTR) &&
361 !(flags & MNT_EXTATTR)) {
362 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP,
363 NULL, 0, NULL) != 0) {
364 printf("%s: failed to stop extattr, error = %d",
365 mp->mnt_stat.f_mntonname, error);
366 mp->mnt_flag |= MNT_RDONLY;
367 }
368 }
369 out:
370 return (error);
371 }
372
373 static int
374 mount_get_vfsops(const char *fstype, enum uio_seg type_seg,
375 struct vfsops **vfsops)
376 {
377 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
378 int error;
379
380 if (type_seg == UIO_USERSPACE) {
381 /* Copy file-system type from userspace. */
382 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
383 } else {
384 error = copystr(fstype, fstypename, sizeof(fstypename), NULL);
385 KASSERT(error == 0);
386 }
387
388 if (error) {
389 /*
390 * Historically, filesystem types were identified by numbers.
391 * If we get an integer for the filesystem type instead of a
392 * string, we check to see if it matches one of the historic
393 * filesystem types.
394 */
395 u_long fsindex = (u_long)fstype;
396 if (fsindex >= nmountcompatnames ||
397 mountcompatnames[fsindex] == NULL)
398 return ENODEV;
399 strlcpy(fstypename, mountcompatnames[fsindex],
400 sizeof(fstypename));
401 }
402
403 /* Accept `ufs' as an alias for `ffs', for compatibility. */
404 if (strcmp(fstypename, "ufs") == 0)
405 fstypename[0] = 'f';
406
407 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
408 return 0;
409
410 /* If we can autoload a vfs module, try again */
411 (void)module_autoload(fstypename, MODULE_CLASS_VFS);
412
413 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
414 return 0;
415
416 return ENODEV;
417 }
418
419 static int
420 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
421 void *data, size_t *data_len)
422 {
423 struct mount *mp;
424 int error;
425
426 /* If MNT_GETARGS is specified, it should be the only flag. */
427 if (flags & ~MNT_GETARGS)
428 return EINVAL;
429
430 mp = vp->v_mount;
431
432 /* XXX: probably some notion of "can see" here if we want isolation. */
433 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
434 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
435 if (error)
436 return error;
437
438 if ((vp->v_vflag & VV_ROOT) == 0)
439 return EINVAL;
440
441 if (vfs_busy(mp, NULL))
442 return EPERM;
443
444 mutex_enter(&mp->mnt_updating);
445 mp->mnt_flag &= ~MNT_OP_FLAGS;
446 mp->mnt_flag |= MNT_GETARGS;
447 error = VFS_MOUNT(mp, path, data, data_len);
448 mp->mnt_flag &= ~MNT_OP_FLAGS;
449 mutex_exit(&mp->mnt_updating);
450
451 vfs_unbusy(mp, false, NULL);
452 return (error);
453 }
454
455 int
456 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
457 {
458 /* {
459 syscallarg(const char *) type;
460 syscallarg(const char *) path;
461 syscallarg(int) flags;
462 syscallarg(void *) data;
463 syscallarg(size_t) data_len;
464 } */
465
466 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path),
467 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
468 SCARG(uap, data_len), retval);
469 }
470
471 int
472 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg,
473 const char *path, int flags, void *data, enum uio_seg data_seg,
474 size_t data_len, register_t *retval)
475 {
476 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */
477 struct vnode *vp;
478 void *data_buf = data;
479 bool vfsopsrele = false;
480 size_t alloc_sz = 0;
481 int error;
482
483 /*
484 * Get vnode to be covered
485 */
486 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
487 if (error != 0) {
488 vp = NULL;
489 goto done;
490 }
491
492 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
493 vfsops = vp->v_mount->mnt_op;
494 } else {
495 /* 'type' is userspace */
496 error = mount_get_vfsops(type, type_seg, &vfsops);
497 if (error != 0)
498 goto done;
499 vfsopsrele = true;
500 }
501
502 /*
503 * We allow data to be NULL, even for userspace. Some fs's don't need
504 * it. The others will handle NULL.
505 */
506 if (data != NULL && data_seg == UIO_USERSPACE) {
507 if (data_len == 0) {
508 /* No length supplied, use default for filesystem */
509 data_len = vfsops->vfs_min_mount_data;
510
511 /*
512 * Hopefully a longer buffer won't make copyin() fail.
513 * For compatibility with 3.0 and earlier.
514 */
515 if (flags & MNT_UPDATE
516 && data_len < sizeof (struct mnt_export_args30))
517 data_len = sizeof (struct mnt_export_args30);
518 }
519 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) {
520 error = EINVAL;
521 goto done;
522 }
523 alloc_sz = data_len;
524 data_buf = kmem_alloc(alloc_sz, KM_SLEEP);
525
526 /* NFS needs the buffer even for mnt_getargs .... */
527 error = copyin(data, data_buf, data_len);
528 if (error != 0)
529 goto done;
530 }
531
532 if (flags & MNT_GETARGS) {
533 if (data_len == 0) {
534 error = EINVAL;
535 goto done;
536 }
537 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
538 if (error != 0)
539 goto done;
540 if (data_seg == UIO_USERSPACE)
541 error = copyout(data_buf, data, data_len);
542 *retval = data_len;
543 } else if (flags & MNT_UPDATE) {
544 error = mount_update(l, vp, path, flags, data_buf, &data_len);
545 } else {
546 /* Locking is handled internally in mount_domount(). */
547 KASSERT(vfsopsrele == true);
548 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
549 &data_len);
550 vfsopsrele = false;
551 }
552
553 done:
554 if (vfsopsrele)
555 vfs_delref(vfsops);
556 if (vp != NULL) {
557 vrele(vp);
558 }
559 if (data_buf != data)
560 kmem_free(data_buf, alloc_sz);
561 return (error);
562 }
563
564 /*
565 * Unmount a file system.
566 *
567 * Note: unmount takes a path to the vnode mounted on as argument,
568 * not special file (as before).
569 */
570 /* ARGSUSED */
571 int
572 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
573 {
574 /* {
575 syscallarg(const char *) path;
576 syscallarg(int) flags;
577 } */
578 struct vnode *vp;
579 struct mount *mp;
580 int error;
581 struct pathbuf *pb;
582 struct nameidata nd;
583
584 error = pathbuf_copyin(SCARG(uap, path), &pb);
585 if (error) {
586 return error;
587 }
588
589 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
590 if ((error = namei(&nd)) != 0) {
591 pathbuf_destroy(pb);
592 return error;
593 }
594 vp = nd.ni_vp;
595 pathbuf_destroy(pb);
596
597 mp = vp->v_mount;
598 vfs_ref(mp);
599 VOP_UNLOCK(vp);
600
601 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
602 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
603 if (error) {
604 vrele(vp);
605 vfs_rele(mp);
606 return (error);
607 }
608
609 /*
610 * Don't allow unmounting the root file system.
611 */
612 if (mp->mnt_flag & MNT_ROOTFS) {
613 vrele(vp);
614 vfs_rele(mp);
615 return (EINVAL);
616 }
617
618 /*
619 * Must be the root of the filesystem
620 */
621 if ((vp->v_vflag & VV_ROOT) == 0) {
622 vrele(vp);
623 vfs_rele(mp);
624 return (EINVAL);
625 }
626
627 vrele(vp);
628 error = dounmount(mp, SCARG(uap, flags), l);
629 vfs_rele(mp);
630 return error;
631 }
632
633 /*
634 * Sync each mounted filesystem.
635 */
636 #ifdef DEBUG
637 int syncprt = 0;
638 struct ctldebug debug0 = { "syncprt", &syncprt };
639 #endif
640
641 void
642 do_sys_sync(struct lwp *l)
643 {
644 mount_iterator_t *iter;
645 struct mount *mp;
646 int asyncflag;
647
648 mountlist_iterator_init(&iter);
649 while ((mp = mountlist_iterator_next(iter)) != NULL) {
650 fstrans_start(mp, FSTRANS_SHARED);
651 mutex_enter(&mp->mnt_updating);
652 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
653 asyncflag = mp->mnt_flag & MNT_ASYNC;
654 mp->mnt_flag &= ~MNT_ASYNC;
655 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
656 if (asyncflag)
657 mp->mnt_flag |= MNT_ASYNC;
658 }
659 mutex_exit(&mp->mnt_updating);
660 fstrans_done(mp);
661 }
662 mountlist_iterator_destroy(iter);
663 #ifdef DEBUG
664 if (syncprt)
665 vfs_bufstats();
666 #endif /* DEBUG */
667 }
668
669 /* ARGSUSED */
670 int
671 sys_sync(struct lwp *l, const void *v, register_t *retval)
672 {
673 do_sys_sync(l);
674 return (0);
675 }
676
677
678 /*
679 * Access or change filesystem quotas.
680 *
681 * (this is really 14 different calls bundled into one)
682 */
683
684 static int
685 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u)
686 {
687 struct quotastat info_k;
688 int error;
689
690 /* ensure any padding bytes are cleared */
691 memset(&info_k, 0, sizeof(info_k));
692
693 error = vfs_quotactl_stat(mp, &info_k);
694 if (error) {
695 return error;
696 }
697
698 return copyout(&info_k, info_u, sizeof(info_k));
699 }
700
701 static int
702 do_sys_quotactl_idtypestat(struct mount *mp, int idtype,
703 struct quotaidtypestat *info_u)
704 {
705 struct quotaidtypestat info_k;
706 int error;
707
708 /* ensure any padding bytes are cleared */
709 memset(&info_k, 0, sizeof(info_k));
710
711 error = vfs_quotactl_idtypestat(mp, idtype, &info_k);
712 if (error) {
713 return error;
714 }
715
716 return copyout(&info_k, info_u, sizeof(info_k));
717 }
718
719 static int
720 do_sys_quotactl_objtypestat(struct mount *mp, int objtype,
721 struct quotaobjtypestat *info_u)
722 {
723 struct quotaobjtypestat info_k;
724 int error;
725
726 /* ensure any padding bytes are cleared */
727 memset(&info_k, 0, sizeof(info_k));
728
729 error = vfs_quotactl_objtypestat(mp, objtype, &info_k);
730 if (error) {
731 return error;
732 }
733
734 return copyout(&info_k, info_u, sizeof(info_k));
735 }
736
737 static int
738 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u,
739 struct quotaval *val_u)
740 {
741 struct quotakey key_k;
742 struct quotaval val_k;
743 int error;
744
745 /* ensure any padding bytes are cleared */
746 memset(&val_k, 0, sizeof(val_k));
747
748 error = copyin(key_u, &key_k, sizeof(key_k));
749 if (error) {
750 return error;
751 }
752
753 error = vfs_quotactl_get(mp, &key_k, &val_k);
754 if (error) {
755 return error;
756 }
757
758 return copyout(&val_k, val_u, sizeof(val_k));
759 }
760
761 static int
762 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u,
763 const struct quotaval *val_u)
764 {
765 struct quotakey key_k;
766 struct quotaval val_k;
767 int error;
768
769 error = copyin(key_u, &key_k, sizeof(key_k));
770 if (error) {
771 return error;
772 }
773
774 error = copyin(val_u, &val_k, sizeof(val_k));
775 if (error) {
776 return error;
777 }
778
779 return vfs_quotactl_put(mp, &key_k, &val_k);
780 }
781
782 static int
783 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u)
784 {
785 struct quotakey key_k;
786 int error;
787
788 error = copyin(key_u, &key_k, sizeof(key_k));
789 if (error) {
790 return error;
791 }
792
793 return vfs_quotactl_del(mp, &key_k);
794 }
795
796 static int
797 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u)
798 {
799 struct quotakcursor cursor_k;
800 int error;
801
802 /* ensure any padding bytes are cleared */
803 memset(&cursor_k, 0, sizeof(cursor_k));
804
805 error = vfs_quotactl_cursoropen(mp, &cursor_k);
806 if (error) {
807 return error;
808 }
809
810 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
811 }
812
813 static int
814 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u)
815 {
816 struct quotakcursor cursor_k;
817 int error;
818
819 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
820 if (error) {
821 return error;
822 }
823
824 return vfs_quotactl_cursorclose(mp, &cursor_k);
825 }
826
827 static int
828 do_sys_quotactl_cursorskipidtype(struct mount *mp,
829 struct quotakcursor *cursor_u, int idtype)
830 {
831 struct quotakcursor cursor_k;
832 int error;
833
834 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
835 if (error) {
836 return error;
837 }
838
839 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype);
840 if (error) {
841 return error;
842 }
843
844 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
845 }
846
847 static int
848 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u,
849 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum,
850 unsigned *ret_u)
851 {
852 #define CGET_STACK_MAX 8
853 struct quotakcursor cursor_k;
854 struct quotakey stackkeys[CGET_STACK_MAX];
855 struct quotaval stackvals[CGET_STACK_MAX];
856 struct quotakey *keys_k;
857 struct quotaval *vals_k;
858 unsigned ret_k;
859 int error;
860
861 if (maxnum > 128) {
862 maxnum = 128;
863 }
864
865 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
866 if (error) {
867 return error;
868 }
869
870 if (maxnum <= CGET_STACK_MAX) {
871 keys_k = stackkeys;
872 vals_k = stackvals;
873 /* ensure any padding bytes are cleared */
874 memset(keys_k, 0, maxnum * sizeof(keys_k[0]));
875 memset(vals_k, 0, maxnum * sizeof(vals_k[0]));
876 } else {
877 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP);
878 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP);
879 }
880
881 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum,
882 &ret_k);
883 if (error) {
884 goto fail;
885 }
886
887 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0]));
888 if (error) {
889 goto fail;
890 }
891
892 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0]));
893 if (error) {
894 goto fail;
895 }
896
897 error = copyout(&ret_k, ret_u, sizeof(ret_k));
898 if (error) {
899 goto fail;
900 }
901
902 /* do last to maximize the chance of being able to recover a failure */
903 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k));
904
905 fail:
906 if (keys_k != stackkeys) {
907 kmem_free(keys_k, maxnum * sizeof(keys_k[0]));
908 }
909 if (vals_k != stackvals) {
910 kmem_free(vals_k, maxnum * sizeof(vals_k[0]));
911 }
912 return error;
913 }
914
915 static int
916 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u,
917 int *ret_u)
918 {
919 struct quotakcursor cursor_k;
920 int ret_k;
921 int error;
922
923 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
924 if (error) {
925 return error;
926 }
927
928 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k);
929 if (error) {
930 return error;
931 }
932
933 error = copyout(&ret_k, ret_u, sizeof(ret_k));
934 if (error) {
935 return error;
936 }
937
938 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
939 }
940
941 static int
942 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u)
943 {
944 struct quotakcursor cursor_k;
945 int error;
946
947 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
948 if (error) {
949 return error;
950 }
951
952 error = vfs_quotactl_cursorrewind(mp, &cursor_k);
953 if (error) {
954 return error;
955 }
956
957 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
958 }
959
960 static int
961 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u)
962 {
963 char *path_k;
964 int error;
965
966 /* XXX this should probably be a struct pathbuf */
967 path_k = PNBUF_GET();
968 error = copyin(path_u, path_k, PATH_MAX);
969 if (error) {
970 PNBUF_PUT(path_k);
971 return error;
972 }
973
974 error = vfs_quotactl_quotaon(mp, idtype, path_k);
975
976 PNBUF_PUT(path_k);
977 return error;
978 }
979
980 static int
981 do_sys_quotactl_quotaoff(struct mount *mp, int idtype)
982 {
983 return vfs_quotactl_quotaoff(mp, idtype);
984 }
985
986 int
987 do_sys_quotactl(const char *path_u, const struct quotactl_args *args)
988 {
989 struct mount *mp;
990 struct vnode *vp;
991 int error;
992
993 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp);
994 if (error != 0)
995 return (error);
996 mp = vp->v_mount;
997
998 switch (args->qc_op) {
999 case QUOTACTL_STAT:
1000 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info);
1001 break;
1002 case QUOTACTL_IDTYPESTAT:
1003 error = do_sys_quotactl_idtypestat(mp,
1004 args->u.idtypestat.qc_idtype,
1005 args->u.idtypestat.qc_info);
1006 break;
1007 case QUOTACTL_OBJTYPESTAT:
1008 error = do_sys_quotactl_objtypestat(mp,
1009 args->u.objtypestat.qc_objtype,
1010 args->u.objtypestat.qc_info);
1011 break;
1012 case QUOTACTL_GET:
1013 error = do_sys_quotactl_get(mp,
1014 args->u.get.qc_key,
1015 args->u.get.qc_val);
1016 break;
1017 case QUOTACTL_PUT:
1018 error = do_sys_quotactl_put(mp,
1019 args->u.put.qc_key,
1020 args->u.put.qc_val);
1021 break;
1022 case QUOTACTL_DEL:
1023 error = do_sys_quotactl_del(mp, args->u.del.qc_key);
1024 break;
1025 case QUOTACTL_CURSOROPEN:
1026 error = do_sys_quotactl_cursoropen(mp,
1027 args->u.cursoropen.qc_cursor);
1028 break;
1029 case QUOTACTL_CURSORCLOSE:
1030 error = do_sys_quotactl_cursorclose(mp,
1031 args->u.cursorclose.qc_cursor);
1032 break;
1033 case QUOTACTL_CURSORSKIPIDTYPE:
1034 error = do_sys_quotactl_cursorskipidtype(mp,
1035 args->u.cursorskipidtype.qc_cursor,
1036 args->u.cursorskipidtype.qc_idtype);
1037 break;
1038 case QUOTACTL_CURSORGET:
1039 error = do_sys_quotactl_cursorget(mp,
1040 args->u.cursorget.qc_cursor,
1041 args->u.cursorget.qc_keys,
1042 args->u.cursorget.qc_vals,
1043 args->u.cursorget.qc_maxnum,
1044 args->u.cursorget.qc_ret);
1045 break;
1046 case QUOTACTL_CURSORATEND:
1047 error = do_sys_quotactl_cursoratend(mp,
1048 args->u.cursoratend.qc_cursor,
1049 args->u.cursoratend.qc_ret);
1050 break;
1051 case QUOTACTL_CURSORREWIND:
1052 error = do_sys_quotactl_cursorrewind(mp,
1053 args->u.cursorrewind.qc_cursor);
1054 break;
1055 case QUOTACTL_QUOTAON:
1056 error = do_sys_quotactl_quotaon(mp,
1057 args->u.quotaon.qc_idtype,
1058 args->u.quotaon.qc_quotafile);
1059 break;
1060 case QUOTACTL_QUOTAOFF:
1061 error = do_sys_quotactl_quotaoff(mp,
1062 args->u.quotaoff.qc_idtype);
1063 break;
1064 default:
1065 error = EINVAL;
1066 break;
1067 }
1068
1069 vrele(vp);
1070 return error;
1071 }
1072
1073 /* ARGSUSED */
1074 int
1075 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap,
1076 register_t *retval)
1077 {
1078 /* {
1079 syscallarg(const char *) path;
1080 syscallarg(struct quotactl_args *) args;
1081 } */
1082 struct quotactl_args args;
1083 int error;
1084
1085 error = copyin(SCARG(uap, args), &args, sizeof(args));
1086 if (error) {
1087 return error;
1088 }
1089
1090 return do_sys_quotactl(SCARG(uap, path), &args);
1091 }
1092
1093 int
1094 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
1095 int root)
1096 {
1097 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1098 int error = 0;
1099
1100 /*
1101 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1102 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
1103 * overrides MNT_NOWAIT.
1104 */
1105 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
1106 (flags != MNT_WAIT && flags != 0)) {
1107 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
1108 goto done;
1109 }
1110
1111 /* Get the filesystem stats now */
1112 memset(sp, 0, sizeof(*sp));
1113 if ((error = VFS_STATVFS(mp, sp)) != 0) {
1114 return error;
1115 }
1116
1117 if (cwdi->cwdi_rdir == NULL)
1118 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
1119 done:
1120 if (cwdi->cwdi_rdir != NULL) {
1121 size_t len;
1122 char *bp;
1123 char c;
1124 char *path = PNBUF_GET();
1125
1126 bp = path + MAXPATHLEN;
1127 *--bp = '\0';
1128 rw_enter(&cwdi->cwdi_lock, RW_READER);
1129 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
1130 MAXPATHLEN / 2, 0, l);
1131 rw_exit(&cwdi->cwdi_lock);
1132 if (error) {
1133 PNBUF_PUT(path);
1134 return error;
1135 }
1136 len = strlen(bp);
1137 if (len != 1) {
1138 /*
1139 * for mount points that are below our root, we can see
1140 * them, so we fix up the pathname and return them. The
1141 * rest we cannot see, so we don't allow viewing the
1142 * data.
1143 */
1144 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
1145 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
1146 (void)strlcpy(sp->f_mntonname,
1147 c == '\0' ? "/" : &sp->f_mntonname[len],
1148 sizeof(sp->f_mntonname));
1149 } else {
1150 if (root)
1151 (void)strlcpy(sp->f_mntonname, "/",
1152 sizeof(sp->f_mntonname));
1153 else
1154 error = EPERM;
1155 }
1156 }
1157 PNBUF_PUT(path);
1158 }
1159 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
1160 return error;
1161 }
1162
1163 /*
1164 * Get filesystem statistics by path.
1165 */
1166 int
1167 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
1168 {
1169 struct mount *mp;
1170 int error;
1171 struct vnode *vp;
1172
1173 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
1174 if (error != 0)
1175 return error;
1176 mp = vp->v_mount;
1177 error = dostatvfs(mp, sb, l, flags, 1);
1178 vrele(vp);
1179 return error;
1180 }
1181
1182 /* ARGSUSED */
1183 int
1184 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
1185 {
1186 /* {
1187 syscallarg(const char *) path;
1188 syscallarg(struct statvfs *) buf;
1189 syscallarg(int) flags;
1190 } */
1191 struct statvfs *sb;
1192 int error;
1193
1194 sb = STATVFSBUF_GET();
1195 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
1196 if (error == 0)
1197 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1198 STATVFSBUF_PUT(sb);
1199 return error;
1200 }
1201
1202 /*
1203 * Get filesystem statistics by fd.
1204 */
1205 int
1206 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
1207 {
1208 file_t *fp;
1209 struct mount *mp;
1210 int error;
1211
1212 /* fd_getvnode() will use the descriptor for us */
1213 if ((error = fd_getvnode(fd, &fp)) != 0)
1214 return (error);
1215 mp = fp->f_vnode->v_mount;
1216 error = dostatvfs(mp, sb, curlwp, flags, 1);
1217 fd_putfile(fd);
1218 return error;
1219 }
1220
1221 /* ARGSUSED */
1222 int
1223 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
1224 {
1225 /* {
1226 syscallarg(int) fd;
1227 syscallarg(struct statvfs *) buf;
1228 syscallarg(int) flags;
1229 } */
1230 struct statvfs *sb;
1231 int error;
1232
1233 sb = STATVFSBUF_GET();
1234 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1235 if (error == 0)
1236 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1237 STATVFSBUF_PUT(sb);
1238 return error;
1239 }
1240
1241
1242 /*
1243 * Get statistics on all filesystems.
1244 */
1245 int
1246 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1247 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1248 register_t *retval)
1249 {
1250 int root = 0;
1251 mount_iterator_t *iter;
1252 struct proc *p = l->l_proc;
1253 struct mount *mp;
1254 struct statvfs *sb;
1255 size_t count, maxcount;
1256 int error = 0;
1257
1258 sb = STATVFSBUF_GET();
1259 maxcount = bufsize / entry_sz;
1260 count = 0;
1261 mountlist_iterator_init(&iter);
1262 while ((mp = mountlist_iterator_next(iter)) != NULL) {
1263 if (sfsp && count < maxcount) {
1264 error = dostatvfs(mp, sb, l, flags, 0);
1265 if (error) {
1266 error = 0;
1267 continue;
1268 }
1269 error = copyfn(sb, sfsp, entry_sz);
1270 if (error)
1271 goto out;
1272 sfsp = (char *)sfsp + entry_sz;
1273 root |= strcmp(sb->f_mntonname, "/") == 0;
1274 }
1275 count++;
1276 }
1277
1278 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1279 /*
1280 * fake a root entry
1281 */
1282 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1283 sb, l, flags, 1);
1284 if (error != 0)
1285 goto out;
1286 if (sfsp) {
1287 error = copyfn(sb, sfsp, entry_sz);
1288 if (error != 0)
1289 goto out;
1290 }
1291 count++;
1292 }
1293 if (sfsp && count > maxcount)
1294 *retval = maxcount;
1295 else
1296 *retval = count;
1297 out:
1298 mountlist_iterator_destroy(iter);
1299 STATVFSBUF_PUT(sb);
1300 return error;
1301 }
1302
1303 int
1304 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1305 {
1306 /* {
1307 syscallarg(struct statvfs *) buf;
1308 syscallarg(size_t) bufsize;
1309 syscallarg(int) flags;
1310 } */
1311
1312 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1313 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1314 }
1315
1316 /*
1317 * Change current working directory to a given file descriptor.
1318 */
1319 /* ARGSUSED */
1320 int
1321 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1322 {
1323 /* {
1324 syscallarg(int) fd;
1325 } */
1326 struct proc *p = l->l_proc;
1327 struct cwdinfo *cwdi;
1328 struct vnode *vp, *tdp;
1329 struct mount *mp;
1330 file_t *fp;
1331 int error, fd;
1332
1333 /* fd_getvnode() will use the descriptor for us */
1334 fd = SCARG(uap, fd);
1335 if ((error = fd_getvnode(fd, &fp)) != 0)
1336 return (error);
1337 vp = fp->f_vnode;
1338
1339 vref(vp);
1340 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1341 if (vp->v_type != VDIR)
1342 error = ENOTDIR;
1343 else
1344 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1345 if (error) {
1346 vput(vp);
1347 goto out;
1348 }
1349 while ((mp = vp->v_mountedhere) != NULL) {
1350 error = vfs_busy(mp, NULL);
1351 vput(vp);
1352 if (error != 0)
1353 goto out;
1354 error = VFS_ROOT(mp, &tdp);
1355 vfs_unbusy(mp, false, NULL);
1356 if (error)
1357 goto out;
1358 vp = tdp;
1359 }
1360 VOP_UNLOCK(vp);
1361
1362 /*
1363 * Disallow changing to a directory not under the process's
1364 * current root directory (if there is one).
1365 */
1366 cwdi = p->p_cwdi;
1367 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1368 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1369 vrele(vp);
1370 error = EPERM; /* operation not permitted */
1371 } else {
1372 vrele(cwdi->cwdi_cdir);
1373 cwdi->cwdi_cdir = vp;
1374 }
1375 rw_exit(&cwdi->cwdi_lock);
1376
1377 out:
1378 fd_putfile(fd);
1379 return (error);
1380 }
1381
1382 /*
1383 * Change this process's notion of the root directory to a given file
1384 * descriptor.
1385 */
1386 int
1387 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1388 {
1389 struct proc *p = l->l_proc;
1390 struct vnode *vp;
1391 file_t *fp;
1392 int error, fd = SCARG(uap, fd);
1393
1394 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1395 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1396 return error;
1397 /* fd_getvnode() will use the descriptor for us */
1398 if ((error = fd_getvnode(fd, &fp)) != 0)
1399 return error;
1400 vp = fp->f_vnode;
1401 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1402 if (vp->v_type != VDIR)
1403 error = ENOTDIR;
1404 else
1405 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1406 VOP_UNLOCK(vp);
1407 if (error)
1408 goto out;
1409 vref(vp);
1410
1411 change_root(p->p_cwdi, vp, l);
1412
1413 out:
1414 fd_putfile(fd);
1415 return (error);
1416 }
1417
1418 /*
1419 * Change current working directory (``.'').
1420 */
1421 /* ARGSUSED */
1422 int
1423 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1424 {
1425 /* {
1426 syscallarg(const char *) path;
1427 } */
1428 struct proc *p = l->l_proc;
1429 struct cwdinfo *cwdi;
1430 int error;
1431 struct vnode *vp;
1432
1433 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1434 &vp, l)) != 0)
1435 return (error);
1436 cwdi = p->p_cwdi;
1437 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1438 vrele(cwdi->cwdi_cdir);
1439 cwdi->cwdi_cdir = vp;
1440 rw_exit(&cwdi->cwdi_lock);
1441 return (0);
1442 }
1443
1444 /*
1445 * Change notion of root (``/'') directory.
1446 */
1447 /* ARGSUSED */
1448 int
1449 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1450 {
1451 /* {
1452 syscallarg(const char *) path;
1453 } */
1454 struct proc *p = l->l_proc;
1455 int error;
1456 struct vnode *vp;
1457
1458 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1459 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1460 return (error);
1461 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1462 &vp, l)) != 0)
1463 return (error);
1464
1465 change_root(p->p_cwdi, vp, l);
1466
1467 return (0);
1468 }
1469
1470 /*
1471 * Common routine for chroot and fchroot.
1472 * NB: callers need to properly authorize the change root operation.
1473 */
1474 void
1475 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l)
1476 {
1477 struct proc *p = l->l_proc;
1478 kauth_cred_t ncred;
1479
1480 ncred = kauth_cred_alloc();
1481
1482 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1483 if (cwdi->cwdi_rdir != NULL)
1484 vrele(cwdi->cwdi_rdir);
1485 cwdi->cwdi_rdir = vp;
1486
1487 /*
1488 * Prevent escaping from chroot by putting the root under
1489 * the working directory. Silently chdir to / if we aren't
1490 * already there.
1491 */
1492 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1493 /*
1494 * XXX would be more failsafe to change directory to a
1495 * deadfs node here instead
1496 */
1497 vrele(cwdi->cwdi_cdir);
1498 vref(vp);
1499 cwdi->cwdi_cdir = vp;
1500 }
1501 rw_exit(&cwdi->cwdi_lock);
1502
1503 /* Get a write lock on the process credential. */
1504 proc_crmod_enter();
1505
1506 kauth_cred_clone(p->p_cred, ncred);
1507 kauth_proc_chroot(ncred, p->p_cwdi);
1508
1509 /* Broadcast our credentials to the process and other LWPs. */
1510 proc_crmod_leave(ncred, p->p_cred, true);
1511 }
1512
1513 /*
1514 * Common routine for chroot and chdir.
1515 * XXX "where" should be enum uio_seg
1516 */
1517 int
1518 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l)
1519 {
1520 struct pathbuf *pb;
1521 struct nameidata nd;
1522 int error;
1523
1524 error = pathbuf_maybe_copyin(path, where, &pb);
1525 if (error) {
1526 return error;
1527 }
1528 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1529 if ((error = namei(&nd)) != 0) {
1530 pathbuf_destroy(pb);
1531 return error;
1532 }
1533 *vpp = nd.ni_vp;
1534 pathbuf_destroy(pb);
1535
1536 if ((*vpp)->v_type != VDIR)
1537 error = ENOTDIR;
1538 else
1539 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred);
1540
1541 if (error)
1542 vput(*vpp);
1543 else
1544 VOP_UNLOCK(*vpp);
1545 return (error);
1546 }
1547
1548 /*
1549 * Internals of sys_open - path has already been converted into a pathbuf
1550 * (so we can easily reuse this function from other parts of the kernel,
1551 * like posix_spawn post-processing).
1552 */
1553 int
1554 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags,
1555 int open_mode, int *fd)
1556 {
1557 struct proc *p = l->l_proc;
1558 struct cwdinfo *cwdi = p->p_cwdi;
1559 file_t *fp;
1560 struct vnode *vp;
1561 int flags, cmode;
1562 int indx, error;
1563 struct nameidata nd;
1564
1565 if (open_flags & O_SEARCH) {
1566 open_flags &= ~(int)O_SEARCH;
1567 }
1568
1569 flags = FFLAGS(open_flags);
1570 if ((flags & (FREAD | FWRITE)) == 0)
1571 return EINVAL;
1572
1573 if ((error = fd_allocfile(&fp, &indx)) != 0) {
1574 return error;
1575 }
1576
1577 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1578 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1579 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb);
1580 if (dvp != NULL)
1581 NDAT(&nd, dvp);
1582
1583 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1584 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1585 fd_abort(p, fp, indx);
1586 if ((error == EDUPFD || error == EMOVEFD) &&
1587 l->l_dupfd >= 0 && /* XXX from fdopen */
1588 (error =
1589 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1590 *fd = indx;
1591 return 0;
1592 }
1593 if (error == ERESTART)
1594 error = EINTR;
1595 return error;
1596 }
1597
1598 l->l_dupfd = 0;
1599 vp = nd.ni_vp;
1600
1601 if ((error = open_setfp(l, fp, vp, indx, flags)))
1602 return error;
1603
1604 VOP_UNLOCK(vp);
1605 *fd = indx;
1606 fd_affix(p, fp, indx);
1607 return 0;
1608 }
1609
1610 int
1611 fd_open(const char *path, int open_flags, int open_mode, int *fd)
1612 {
1613 struct pathbuf *pb;
1614 int error, oflags;
1615
1616 oflags = FFLAGS(open_flags);
1617 if ((oflags & (FREAD | FWRITE)) == 0)
1618 return EINVAL;
1619
1620 pb = pathbuf_create(path);
1621 if (pb == NULL)
1622 return ENOMEM;
1623
1624 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd);
1625 pathbuf_destroy(pb);
1626
1627 return error;
1628 }
1629
1630 /*
1631 * Check permissions, allocate an open file structure,
1632 * and call the device open routine if any.
1633 */
1634 static int
1635 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags,
1636 int mode, int *fd)
1637 {
1638 file_t *dfp = NULL;
1639 struct vnode *dvp = NULL;
1640 struct pathbuf *pb;
1641 int error;
1642
1643 #ifdef COMPAT_10 /* XXX: and perhaps later */
1644 if (path == NULL) {
1645 pb = pathbuf_create(".");
1646 if (pb == NULL)
1647 return ENOMEM;
1648 } else
1649 #endif
1650 {
1651 error = pathbuf_copyin(path, &pb);
1652 if (error)
1653 return error;
1654 }
1655
1656 if (fdat != AT_FDCWD) {
1657 /* fd_getvnode() will use the descriptor for us */
1658 if ((error = fd_getvnode(fdat, &dfp)) != 0)
1659 goto out;
1660
1661 dvp = dfp->f_vnode;
1662 }
1663
1664 error = do_open(l, dvp, pb, flags, mode, fd);
1665
1666 if (dfp != NULL)
1667 fd_putfile(fdat);
1668 out:
1669 pathbuf_destroy(pb);
1670 return error;
1671 }
1672
1673 int
1674 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1675 {
1676 /* {
1677 syscallarg(const char *) path;
1678 syscallarg(int) flags;
1679 syscallarg(int) mode;
1680 } */
1681 int error;
1682 int fd;
1683
1684 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path),
1685 SCARG(uap, flags), SCARG(uap, mode), &fd);
1686
1687 if (error == 0)
1688 *retval = fd;
1689
1690 return error;
1691 }
1692
1693 int
1694 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval)
1695 {
1696 /* {
1697 syscallarg(int) fd;
1698 syscallarg(const char *) path;
1699 syscallarg(int) oflags;
1700 syscallarg(int) mode;
1701 } */
1702 int error;
1703 int fd;
1704
1705 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path),
1706 SCARG(uap, oflags), SCARG(uap, mode), &fd);
1707
1708 if (error == 0)
1709 *retval = fd;
1710
1711 return error;
1712 }
1713
1714 static void
1715 vfs__fhfree(fhandle_t *fhp)
1716 {
1717 size_t fhsize;
1718
1719 fhsize = FHANDLE_SIZE(fhp);
1720 kmem_free(fhp, fhsize);
1721 }
1722
1723 /*
1724 * vfs_composefh: compose a filehandle.
1725 */
1726
1727 int
1728 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1729 {
1730 struct mount *mp;
1731 struct fid *fidp;
1732 int error;
1733 size_t needfhsize;
1734 size_t fidsize;
1735
1736 mp = vp->v_mount;
1737 fidp = NULL;
1738 if (*fh_size < FHANDLE_SIZE_MIN) {
1739 fidsize = 0;
1740 } else {
1741 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1742 if (fhp != NULL) {
1743 memset(fhp, 0, *fh_size);
1744 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1745 fidp = &fhp->fh_fid;
1746 }
1747 }
1748 error = VFS_VPTOFH(vp, fidp, &fidsize);
1749 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1750 if (error == 0 && *fh_size < needfhsize) {
1751 error = E2BIG;
1752 }
1753 *fh_size = needfhsize;
1754 return error;
1755 }
1756
1757 int
1758 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1759 {
1760 struct mount *mp;
1761 fhandle_t *fhp;
1762 size_t fhsize;
1763 size_t fidsize;
1764 int error;
1765
1766 mp = vp->v_mount;
1767 fidsize = 0;
1768 error = VFS_VPTOFH(vp, NULL, &fidsize);
1769 KASSERT(error != 0);
1770 if (error != E2BIG) {
1771 goto out;
1772 }
1773 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1774 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1775 if (fhp == NULL) {
1776 error = ENOMEM;
1777 goto out;
1778 }
1779 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1780 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1781 if (error == 0) {
1782 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1783 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1784 *fhpp = fhp;
1785 } else {
1786 kmem_free(fhp, fhsize);
1787 }
1788 out:
1789 return error;
1790 }
1791
1792 void
1793 vfs_composefh_free(fhandle_t *fhp)
1794 {
1795
1796 vfs__fhfree(fhp);
1797 }
1798
1799 /*
1800 * vfs_fhtovp: lookup a vnode by a filehandle.
1801 */
1802
1803 int
1804 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1805 {
1806 struct mount *mp;
1807 int error;
1808
1809 *vpp = NULL;
1810 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1811 if (mp == NULL) {
1812 error = ESTALE;
1813 goto out;
1814 }
1815 if (mp->mnt_op->vfs_fhtovp == NULL) {
1816 error = EOPNOTSUPP;
1817 goto out;
1818 }
1819 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1820 out:
1821 return error;
1822 }
1823
1824 /*
1825 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1826 * the needed size.
1827 */
1828
1829 int
1830 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1831 {
1832 fhandle_t *fhp;
1833 int error;
1834
1835 if (fhsize > FHANDLE_SIZE_MAX) {
1836 return EINVAL;
1837 }
1838 if (fhsize < FHANDLE_SIZE_MIN) {
1839 return EINVAL;
1840 }
1841 again:
1842 fhp = kmem_alloc(fhsize, KM_SLEEP);
1843 if (fhp == NULL) {
1844 return ENOMEM;
1845 }
1846 error = copyin(ufhp, fhp, fhsize);
1847 if (error == 0) {
1848 /* XXX this check shouldn't be here */
1849 if (FHANDLE_SIZE(fhp) == fhsize) {
1850 *fhpp = fhp;
1851 return 0;
1852 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1853 /*
1854 * a kludge for nfsv2 padded handles.
1855 */
1856 size_t sz;
1857
1858 sz = FHANDLE_SIZE(fhp);
1859 kmem_free(fhp, fhsize);
1860 fhsize = sz;
1861 goto again;
1862 } else {
1863 /*
1864 * userland told us wrong size.
1865 */
1866 error = EINVAL;
1867 }
1868 }
1869 kmem_free(fhp, fhsize);
1870 return error;
1871 }
1872
1873 void
1874 vfs_copyinfh_free(fhandle_t *fhp)
1875 {
1876
1877 vfs__fhfree(fhp);
1878 }
1879
1880 /*
1881 * Get file handle system call
1882 */
1883 int
1884 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1885 {
1886 /* {
1887 syscallarg(char *) fname;
1888 syscallarg(fhandle_t *) fhp;
1889 syscallarg(size_t *) fh_size;
1890 } */
1891 struct vnode *vp;
1892 fhandle_t *fh;
1893 int error;
1894 struct pathbuf *pb;
1895 struct nameidata nd;
1896 size_t sz;
1897 size_t usz;
1898
1899 /*
1900 * Must be super user
1901 */
1902 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1903 0, NULL, NULL, NULL);
1904 if (error)
1905 return (error);
1906
1907 error = pathbuf_copyin(SCARG(uap, fname), &pb);
1908 if (error) {
1909 return error;
1910 }
1911 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1912 error = namei(&nd);
1913 if (error) {
1914 pathbuf_destroy(pb);
1915 return error;
1916 }
1917 vp = nd.ni_vp;
1918 pathbuf_destroy(pb);
1919
1920 error = vfs_composefh_alloc(vp, &fh);
1921 vput(vp);
1922 if (error != 0) {
1923 return error;
1924 }
1925 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1926 if (error != 0) {
1927 goto out;
1928 }
1929 sz = FHANDLE_SIZE(fh);
1930 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1931 if (error != 0) {
1932 goto out;
1933 }
1934 if (usz >= sz) {
1935 error = copyout(fh, SCARG(uap, fhp), sz);
1936 } else {
1937 error = E2BIG;
1938 }
1939 out:
1940 vfs_composefh_free(fh);
1941 return (error);
1942 }
1943
1944 /*
1945 * Open a file given a file handle.
1946 *
1947 * Check permissions, allocate an open file structure,
1948 * and call the device open routine if any.
1949 */
1950
1951 int
1952 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1953 register_t *retval)
1954 {
1955 file_t *fp;
1956 struct vnode *vp = NULL;
1957 kauth_cred_t cred = l->l_cred;
1958 file_t *nfp;
1959 int indx, error;
1960 struct vattr va;
1961 fhandle_t *fh;
1962 int flags;
1963 proc_t *p;
1964
1965 p = curproc;
1966
1967 /*
1968 * Must be super user
1969 */
1970 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1971 0, NULL, NULL, NULL)))
1972 return (error);
1973
1974 if (oflags & O_SEARCH) {
1975 oflags &= ~(int)O_SEARCH;
1976 }
1977
1978 flags = FFLAGS(oflags);
1979 if ((flags & (FREAD | FWRITE)) == 0)
1980 return (EINVAL);
1981 if ((flags & O_CREAT))
1982 return (EINVAL);
1983 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1984 return (error);
1985 fp = nfp;
1986 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1987 if (error != 0) {
1988 goto bad;
1989 }
1990 error = vfs_fhtovp(fh, &vp);
1991 vfs_copyinfh_free(fh);
1992 if (error != 0) {
1993 goto bad;
1994 }
1995
1996 /* Now do an effective vn_open */
1997
1998 if (vp->v_type == VSOCK) {
1999 error = EOPNOTSUPP;
2000 goto bad;
2001 }
2002 error = vn_openchk(vp, cred, flags);
2003 if (error != 0)
2004 goto bad;
2005 if (flags & O_TRUNC) {
2006 VOP_UNLOCK(vp); /* XXX */
2007 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
2008 vattr_null(&va);
2009 va.va_size = 0;
2010 error = VOP_SETATTR(vp, &va, cred);
2011 if (error)
2012 goto bad;
2013 }
2014 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
2015 goto bad;
2016 if (flags & FWRITE) {
2017 mutex_enter(vp->v_interlock);
2018 vp->v_writecount++;
2019 mutex_exit(vp->v_interlock);
2020 }
2021
2022 /* done with modified vn_open, now finish what sys_open does. */
2023 if ((error = open_setfp(l, fp, vp, indx, flags)))
2024 return error;
2025
2026 VOP_UNLOCK(vp);
2027 *retval = indx;
2028 fd_affix(p, fp, indx);
2029 return (0);
2030
2031 bad:
2032 fd_abort(p, fp, indx);
2033 if (vp != NULL)
2034 vput(vp);
2035 return (error);
2036 }
2037
2038 int
2039 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
2040 {
2041 /* {
2042 syscallarg(const void *) fhp;
2043 syscallarg(size_t) fh_size;
2044 syscallarg(int) flags;
2045 } */
2046
2047 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
2048 SCARG(uap, flags), retval);
2049 }
2050
2051 int
2052 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
2053 {
2054 int error;
2055 fhandle_t *fh;
2056 struct vnode *vp;
2057
2058 /*
2059 * Must be super user
2060 */
2061 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2062 0, NULL, NULL, NULL)))
2063 return (error);
2064
2065 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2066 if (error != 0)
2067 return error;
2068
2069 error = vfs_fhtovp(fh, &vp);
2070 vfs_copyinfh_free(fh);
2071 if (error != 0)
2072 return error;
2073
2074 error = vn_stat(vp, sb);
2075 vput(vp);
2076 return error;
2077 }
2078
2079
2080 /* ARGSUSED */
2081 int
2082 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval)
2083 {
2084 /* {
2085 syscallarg(const void *) fhp;
2086 syscallarg(size_t) fh_size;
2087 syscallarg(struct stat *) sb;
2088 } */
2089 struct stat sb;
2090 int error;
2091
2092 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
2093 if (error)
2094 return error;
2095 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
2096 }
2097
2098 int
2099 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
2100 int flags)
2101 {
2102 fhandle_t *fh;
2103 struct mount *mp;
2104 struct vnode *vp;
2105 int error;
2106
2107 /*
2108 * Must be super user
2109 */
2110 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2111 0, NULL, NULL, NULL)))
2112 return error;
2113
2114 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2115 if (error != 0)
2116 return error;
2117
2118 error = vfs_fhtovp(fh, &vp);
2119 vfs_copyinfh_free(fh);
2120 if (error != 0)
2121 return error;
2122
2123 mp = vp->v_mount;
2124 error = dostatvfs(mp, sb, l, flags, 1);
2125 vput(vp);
2126 return error;
2127 }
2128
2129 /* ARGSUSED */
2130 int
2131 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
2132 {
2133 /* {
2134 syscallarg(const void *) fhp;
2135 syscallarg(size_t) fh_size;
2136 syscallarg(struct statvfs *) buf;
2137 syscallarg(int) flags;
2138 } */
2139 struct statvfs *sb = STATVFSBUF_GET();
2140 int error;
2141
2142 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
2143 SCARG(uap, flags));
2144 if (error == 0)
2145 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
2146 STATVFSBUF_PUT(sb);
2147 return error;
2148 }
2149
2150 /*
2151 * Create a special file.
2152 */
2153 /* ARGSUSED */
2154 int
2155 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
2156 register_t *retval)
2157 {
2158 /* {
2159 syscallarg(const char *) path;
2160 syscallarg(mode_t) mode;
2161 syscallarg(dev_t) dev;
2162 } */
2163 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
2164 SCARG(uap, dev), retval, UIO_USERSPACE);
2165 }
2166
2167 int
2168 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap,
2169 register_t *retval)
2170 {
2171 /* {
2172 syscallarg(int) fd;
2173 syscallarg(const char *) path;
2174 syscallarg(mode_t) mode;
2175 syscallarg(int) pad;
2176 syscallarg(dev_t) dev;
2177 } */
2178
2179 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path),
2180 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE);
2181 }
2182
2183 int
2184 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
2185 register_t *retval, enum uio_seg seg)
2186 {
2187 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg);
2188 }
2189
2190 int
2191 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode,
2192 dev_t dev, register_t *retval, enum uio_seg seg)
2193 {
2194 struct proc *p = l->l_proc;
2195 struct vnode *vp;
2196 struct vattr vattr;
2197 int error, optype;
2198 struct pathbuf *pb;
2199 struct nameidata nd;
2200 const char *pathstring;
2201
2202 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
2203 0, NULL, NULL, NULL)) != 0)
2204 return (error);
2205
2206 optype = VOP_MKNOD_DESCOFFSET;
2207
2208 error = pathbuf_maybe_copyin(pathname, seg, &pb);
2209 if (error) {
2210 return error;
2211 }
2212 pathstring = pathbuf_stringcopy_get(pb);
2213 if (pathstring == NULL) {
2214 pathbuf_destroy(pb);
2215 return ENOMEM;
2216 }
2217
2218 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2219
2220 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2221 goto out;
2222 vp = nd.ni_vp;
2223
2224 if (vp != NULL)
2225 error = EEXIST;
2226 else {
2227 vattr_null(&vattr);
2228 /* We will read cwdi->cwdi_cmask unlocked. */
2229 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2230 vattr.va_rdev = dev;
2231
2232 switch (mode & S_IFMT) {
2233 case S_IFMT: /* used by badsect to flag bad sectors */
2234 vattr.va_type = VBAD;
2235 break;
2236 case S_IFCHR:
2237 vattr.va_type = VCHR;
2238 break;
2239 case S_IFBLK:
2240 vattr.va_type = VBLK;
2241 break;
2242 case S_IFWHT:
2243 optype = VOP_WHITEOUT_DESCOFFSET;
2244 break;
2245 case S_IFREG:
2246 #if NVERIEXEC > 0
2247 error = veriexec_openchk(l, nd.ni_vp, pathstring,
2248 O_CREAT);
2249 #endif /* NVERIEXEC > 0 */
2250 vattr.va_type = VREG;
2251 vattr.va_rdev = VNOVAL;
2252 optype = VOP_CREATE_DESCOFFSET;
2253 break;
2254 default:
2255 error = EINVAL;
2256 break;
2257 }
2258 }
2259 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET
2260 && vattr.va_rdev == VNOVAL)
2261 error = EINVAL;
2262 if (!error) {
2263 switch (optype) {
2264 case VOP_WHITEOUT_DESCOFFSET:
2265 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
2266 if (error)
2267 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2268 vput(nd.ni_dvp);
2269 break;
2270
2271 case VOP_MKNOD_DESCOFFSET:
2272 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
2273 &nd.ni_cnd, &vattr);
2274 if (error == 0)
2275 vrele(nd.ni_vp);
2276 vput(nd.ni_dvp);
2277 break;
2278
2279 case VOP_CREATE_DESCOFFSET:
2280 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
2281 &nd.ni_cnd, &vattr);
2282 if (error == 0)
2283 vrele(nd.ni_vp);
2284 vput(nd.ni_dvp);
2285 break;
2286 }
2287 } else {
2288 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2289 if (nd.ni_dvp == vp)
2290 vrele(nd.ni_dvp);
2291 else
2292 vput(nd.ni_dvp);
2293 if (vp)
2294 vrele(vp);
2295 }
2296 out:
2297 pathbuf_stringcopy_put(pb, pathstring);
2298 pathbuf_destroy(pb);
2299 return (error);
2300 }
2301
2302 /*
2303 * Create a named pipe.
2304 */
2305 /* ARGSUSED */
2306 int
2307 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
2308 {
2309 /* {
2310 syscallarg(const char *) path;
2311 syscallarg(int) mode;
2312 } */
2313 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode));
2314 }
2315
2316 int
2317 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap,
2318 register_t *retval)
2319 {
2320 /* {
2321 syscallarg(int) fd;
2322 syscallarg(const char *) path;
2323 syscallarg(int) mode;
2324 } */
2325
2326 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path),
2327 SCARG(uap, mode));
2328 }
2329
2330 static int
2331 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode)
2332 {
2333 struct proc *p = l->l_proc;
2334 struct vattr vattr;
2335 int error;
2336 struct pathbuf *pb;
2337 struct nameidata nd;
2338
2339 error = pathbuf_copyin(path, &pb);
2340 if (error) {
2341 return error;
2342 }
2343 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2344
2345 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2346 pathbuf_destroy(pb);
2347 return error;
2348 }
2349 if (nd.ni_vp != NULL) {
2350 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2351 if (nd.ni_dvp == nd.ni_vp)
2352 vrele(nd.ni_dvp);
2353 else
2354 vput(nd.ni_dvp);
2355 vrele(nd.ni_vp);
2356 pathbuf_destroy(pb);
2357 return (EEXIST);
2358 }
2359 vattr_null(&vattr);
2360 vattr.va_type = VFIFO;
2361 /* We will read cwdi->cwdi_cmask unlocked. */
2362 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2363 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2364 if (error == 0)
2365 vrele(nd.ni_vp);
2366 vput(nd.ni_dvp);
2367 pathbuf_destroy(pb);
2368 return (error);
2369 }
2370
2371 /*
2372 * Make a hard file link.
2373 */
2374 /* ARGSUSED */
2375 int
2376 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink,
2377 const char *link, int follow, register_t *retval)
2378 {
2379 struct vnode *vp;
2380 struct pathbuf *linkpb;
2381 struct nameidata nd;
2382 namei_simple_flags_t ns_flags;
2383 int error;
2384
2385 if (follow & AT_SYMLINK_FOLLOW)
2386 ns_flags = NSM_FOLLOW_TRYEMULROOT;
2387 else
2388 ns_flags = NSM_NOFOLLOW_TRYEMULROOT;
2389
2390 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp);
2391 if (error != 0)
2392 return (error);
2393 error = pathbuf_copyin(link, &linkpb);
2394 if (error) {
2395 goto out1;
2396 }
2397 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2398 if ((error = fd_nameiat(l, fdlink, &nd)) != 0)
2399 goto out2;
2400 if (nd.ni_vp) {
2401 error = EEXIST;
2402 goto abortop;
2403 }
2404 /* Prevent hard links on directories. */
2405 if (vp->v_type == VDIR) {
2406 error = EPERM;
2407 goto abortop;
2408 }
2409 /* Prevent cross-mount operation. */
2410 if (nd.ni_dvp->v_mount != vp->v_mount) {
2411 error = EXDEV;
2412 goto abortop;
2413 }
2414 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2415 VOP_UNLOCK(nd.ni_dvp);
2416 vrele(nd.ni_dvp);
2417 out2:
2418 pathbuf_destroy(linkpb);
2419 out1:
2420 vrele(vp);
2421 return (error);
2422 abortop:
2423 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2424 if (nd.ni_dvp == nd.ni_vp)
2425 vrele(nd.ni_dvp);
2426 else
2427 vput(nd.ni_dvp);
2428 if (nd.ni_vp != NULL)
2429 vrele(nd.ni_vp);
2430 goto out2;
2431 }
2432
2433 int
2434 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
2435 {
2436 /* {
2437 syscallarg(const char *) path;
2438 syscallarg(const char *) link;
2439 } */
2440 const char *path = SCARG(uap, path);
2441 const char *link = SCARG(uap, link);
2442
2443 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link,
2444 AT_SYMLINK_FOLLOW, retval);
2445 }
2446
2447 int
2448 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap,
2449 register_t *retval)
2450 {
2451 /* {
2452 syscallarg(int) fd1;
2453 syscallarg(const char *) name1;
2454 syscallarg(int) fd2;
2455 syscallarg(const char *) name2;
2456 syscallarg(int) flags;
2457 } */
2458 int fd1 = SCARG(uap, fd1);
2459 const char *name1 = SCARG(uap, name1);
2460 int fd2 = SCARG(uap, fd2);
2461 const char *name2 = SCARG(uap, name2);
2462 int follow;
2463
2464 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW;
2465
2466 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval);
2467 }
2468
2469
2470 int
2471 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg)
2472 {
2473 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg);
2474 }
2475
2476 static int
2477 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat,
2478 const char *link, enum uio_seg seg)
2479 {
2480 struct proc *p = curproc;
2481 struct vattr vattr;
2482 char *path;
2483 int error;
2484 struct pathbuf *linkpb;
2485 struct nameidata nd;
2486
2487 KASSERT(l != NULL || fdat == AT_FDCWD);
2488
2489 path = PNBUF_GET();
2490 if (seg == UIO_USERSPACE) {
2491 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0)
2492 goto out1;
2493 if ((error = pathbuf_copyin(link, &linkpb)) != 0)
2494 goto out1;
2495 } else {
2496 KASSERT(strlen(patharg) < MAXPATHLEN);
2497 strcpy(path, patharg);
2498 linkpb = pathbuf_create(link);
2499 if (linkpb == NULL) {
2500 error = ENOMEM;
2501 goto out1;
2502 }
2503 }
2504 ktrkuser("symlink-target", path, strlen(path));
2505
2506 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2507 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2508 goto out2;
2509 if (nd.ni_vp) {
2510 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2511 if (nd.ni_dvp == nd.ni_vp)
2512 vrele(nd.ni_dvp);
2513 else
2514 vput(nd.ni_dvp);
2515 vrele(nd.ni_vp);
2516 error = EEXIST;
2517 goto out2;
2518 }
2519 vattr_null(&vattr);
2520 vattr.va_type = VLNK;
2521 /* We will read cwdi->cwdi_cmask unlocked. */
2522 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2523 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2524 if (error == 0)
2525 vrele(nd.ni_vp);
2526 vput(nd.ni_dvp);
2527 out2:
2528 pathbuf_destroy(linkpb);
2529 out1:
2530 PNBUF_PUT(path);
2531 return (error);
2532 }
2533
2534 /*
2535 * Make a symbolic link.
2536 */
2537 /* ARGSUSED */
2538 int
2539 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2540 {
2541 /* {
2542 syscallarg(const char *) path;
2543 syscallarg(const char *) link;
2544 } */
2545
2546 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link),
2547 UIO_USERSPACE);
2548 }
2549
2550 int
2551 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap,
2552 register_t *retval)
2553 {
2554 /* {
2555 syscallarg(const char *) path1;
2556 syscallarg(int) fd;
2557 syscallarg(const char *) path2;
2558 } */
2559
2560 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd),
2561 SCARG(uap, path2), UIO_USERSPACE);
2562 }
2563
2564 /*
2565 * Delete a whiteout from the filesystem.
2566 */
2567 /* ARGSUSED */
2568 int
2569 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2570 {
2571 /* {
2572 syscallarg(const char *) path;
2573 } */
2574 int error;
2575 struct pathbuf *pb;
2576 struct nameidata nd;
2577
2578 error = pathbuf_copyin(SCARG(uap, path), &pb);
2579 if (error) {
2580 return error;
2581 }
2582
2583 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb);
2584 error = namei(&nd);
2585 if (error) {
2586 pathbuf_destroy(pb);
2587 return (error);
2588 }
2589
2590 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2591 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2592 if (nd.ni_dvp == nd.ni_vp)
2593 vrele(nd.ni_dvp);
2594 else
2595 vput(nd.ni_dvp);
2596 if (nd.ni_vp)
2597 vrele(nd.ni_vp);
2598 pathbuf_destroy(pb);
2599 return (EEXIST);
2600 }
2601 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2602 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2603 vput(nd.ni_dvp);
2604 pathbuf_destroy(pb);
2605 return (error);
2606 }
2607
2608 /*
2609 * Delete a name from the filesystem.
2610 */
2611 /* ARGSUSED */
2612 int
2613 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2614 {
2615 /* {
2616 syscallarg(const char *) path;
2617 } */
2618
2619 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE);
2620 }
2621
2622 int
2623 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap,
2624 register_t *retval)
2625 {
2626 /* {
2627 syscallarg(int) fd;
2628 syscallarg(const char *) path;
2629 syscallarg(int) flag;
2630 } */
2631
2632 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path),
2633 SCARG(uap, flag), UIO_USERSPACE);
2634 }
2635
2636 int
2637 do_sys_unlink(const char *arg, enum uio_seg seg)
2638 {
2639 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg);
2640 }
2641
2642 static int
2643 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags,
2644 enum uio_seg seg)
2645 {
2646 struct vnode *vp;
2647 int error;
2648 struct pathbuf *pb;
2649 struct nameidata nd;
2650 const char *pathstring;
2651
2652 KASSERT(l != NULL || fdat == AT_FDCWD);
2653
2654 error = pathbuf_maybe_copyin(arg, seg, &pb);
2655 if (error) {
2656 return error;
2657 }
2658 pathstring = pathbuf_stringcopy_get(pb);
2659 if (pathstring == NULL) {
2660 pathbuf_destroy(pb);
2661 return ENOMEM;
2662 }
2663
2664 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb);
2665 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2666 goto out;
2667 vp = nd.ni_vp;
2668
2669 /*
2670 * The root of a mounted filesystem cannot be deleted.
2671 */
2672 if ((vp->v_vflag & VV_ROOT) != 0) {
2673 error = EBUSY;
2674 goto abort;
2675 }
2676
2677 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) {
2678 error = EBUSY;
2679 goto abort;
2680 }
2681
2682 /*
2683 * No rmdir "." please.
2684 */
2685 if (nd.ni_dvp == vp) {
2686 error = EINVAL;
2687 goto abort;
2688 }
2689
2690 /*
2691 * AT_REMOVEDIR is required to remove a directory
2692 */
2693 if (vp->v_type == VDIR) {
2694 if (!(flags & AT_REMOVEDIR)) {
2695 error = EPERM;
2696 goto abort;
2697 } else {
2698 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2699 goto out;
2700 }
2701 }
2702
2703 /*
2704 * Starting here we only deal with non directories.
2705 */
2706 if (flags & AT_REMOVEDIR) {
2707 error = ENOTDIR;
2708 goto abort;
2709 }
2710
2711 #if NVERIEXEC > 0
2712 /* Handle remove requests for veriexec entries. */
2713 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) {
2714 goto abort;
2715 }
2716 #endif /* NVERIEXEC > 0 */
2717
2718 #ifdef FILEASSOC
2719 (void)fileassoc_file_delete(vp);
2720 #endif /* FILEASSOC */
2721 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2722 goto out;
2723
2724 abort:
2725 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2726 if (nd.ni_dvp == vp)
2727 vrele(nd.ni_dvp);
2728 else
2729 vput(nd.ni_dvp);
2730 vput(vp);
2731
2732 out:
2733 pathbuf_stringcopy_put(pb, pathstring);
2734 pathbuf_destroy(pb);
2735 return (error);
2736 }
2737
2738 /*
2739 * Reposition read/write file offset.
2740 */
2741 int
2742 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2743 {
2744 /* {
2745 syscallarg(int) fd;
2746 syscallarg(int) pad;
2747 syscallarg(off_t) offset;
2748 syscallarg(int) whence;
2749 } */
2750 kauth_cred_t cred = l->l_cred;
2751 file_t *fp;
2752 struct vnode *vp;
2753 struct vattr vattr;
2754 off_t newoff;
2755 int error, fd;
2756
2757 fd = SCARG(uap, fd);
2758
2759 if ((fp = fd_getfile(fd)) == NULL)
2760 return (EBADF);
2761
2762 vp = fp->f_vnode;
2763 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2764 error = ESPIPE;
2765 goto out;
2766 }
2767
2768 vn_lock(vp, LK_SHARED | LK_RETRY);
2769
2770 switch (SCARG(uap, whence)) {
2771 case SEEK_CUR:
2772 newoff = fp->f_offset + SCARG(uap, offset);
2773 break;
2774 case SEEK_END:
2775 error = VOP_GETATTR(vp, &vattr, cred);
2776 if (error) {
2777 VOP_UNLOCK(vp);
2778 goto out;
2779 }
2780 newoff = SCARG(uap, offset) + vattr.va_size;
2781 break;
2782 case SEEK_SET:
2783 newoff = SCARG(uap, offset);
2784 break;
2785 default:
2786 error = EINVAL;
2787 VOP_UNLOCK(vp);
2788 goto out;
2789 }
2790 VOP_UNLOCK(vp);
2791 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2792 *(off_t *)retval = fp->f_offset = newoff;
2793 }
2794 out:
2795 fd_putfile(fd);
2796 return (error);
2797 }
2798
2799 /*
2800 * Positional read system call.
2801 */
2802 int
2803 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2804 {
2805 /* {
2806 syscallarg(int) fd;
2807 syscallarg(void *) buf;
2808 syscallarg(size_t) nbyte;
2809 syscallarg(off_t) offset;
2810 } */
2811 file_t *fp;
2812 struct vnode *vp;
2813 off_t offset;
2814 int error, fd = SCARG(uap, fd);
2815
2816 if ((fp = fd_getfile(fd)) == NULL)
2817 return (EBADF);
2818
2819 if ((fp->f_flag & FREAD) == 0) {
2820 fd_putfile(fd);
2821 return (EBADF);
2822 }
2823
2824 vp = fp->f_vnode;
2825 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2826 error = ESPIPE;
2827 goto out;
2828 }
2829
2830 offset = SCARG(uap, offset);
2831
2832 /*
2833 * XXX This works because no file systems actually
2834 * XXX take any action on the seek operation.
2835 */
2836 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2837 goto out;
2838
2839 /* dofileread() will unuse the descriptor for us */
2840 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2841 &offset, 0, retval));
2842
2843 out:
2844 fd_putfile(fd);
2845 return (error);
2846 }
2847
2848 /*
2849 * Positional scatter read system call.
2850 */
2851 int
2852 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2853 {
2854 /* {
2855 syscallarg(int) fd;
2856 syscallarg(const struct iovec *) iovp;
2857 syscallarg(int) iovcnt;
2858 syscallarg(off_t) offset;
2859 } */
2860 off_t offset = SCARG(uap, offset);
2861
2862 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2863 SCARG(uap, iovcnt), &offset, 0, retval);
2864 }
2865
2866 /*
2867 * Positional write system call.
2868 */
2869 int
2870 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2871 {
2872 /* {
2873 syscallarg(int) fd;
2874 syscallarg(const void *) buf;
2875 syscallarg(size_t) nbyte;
2876 syscallarg(off_t) offset;
2877 } */
2878 file_t *fp;
2879 struct vnode *vp;
2880 off_t offset;
2881 int error, fd = SCARG(uap, fd);
2882
2883 if ((fp = fd_getfile(fd)) == NULL)
2884 return (EBADF);
2885
2886 if ((fp->f_flag & FWRITE) == 0) {
2887 fd_putfile(fd);
2888 return (EBADF);
2889 }
2890
2891 vp = fp->f_vnode;
2892 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2893 error = ESPIPE;
2894 goto out;
2895 }
2896
2897 offset = SCARG(uap, offset);
2898
2899 /*
2900 * XXX This works because no file systems actually
2901 * XXX take any action on the seek operation.
2902 */
2903 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2904 goto out;
2905
2906 /* dofilewrite() will unuse the descriptor for us */
2907 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2908 &offset, 0, retval));
2909
2910 out:
2911 fd_putfile(fd);
2912 return (error);
2913 }
2914
2915 /*
2916 * Positional gather write system call.
2917 */
2918 int
2919 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2920 {
2921 /* {
2922 syscallarg(int) fd;
2923 syscallarg(const struct iovec *) iovp;
2924 syscallarg(int) iovcnt;
2925 syscallarg(off_t) offset;
2926 } */
2927 off_t offset = SCARG(uap, offset);
2928
2929 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2930 SCARG(uap, iovcnt), &offset, 0, retval);
2931 }
2932
2933 /*
2934 * Check access permissions.
2935 */
2936 int
2937 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2938 {
2939 /* {
2940 syscallarg(const char *) path;
2941 syscallarg(int) flags;
2942 } */
2943
2944 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path),
2945 SCARG(uap, flags), 0);
2946 }
2947
2948 int
2949 do_sys_accessat(struct lwp *l, int fdat, const char *path,
2950 int mode, int flags)
2951 {
2952 kauth_cred_t cred;
2953 struct vnode *vp;
2954 int error, nd_flag, vmode;
2955 struct pathbuf *pb;
2956 struct nameidata nd;
2957
2958 CTASSERT(F_OK == 0);
2959 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) {
2960 /* nonsense mode */
2961 return EINVAL;
2962 }
2963
2964 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT;
2965 if (flags & AT_SYMLINK_NOFOLLOW)
2966 nd_flag &= ~FOLLOW;
2967
2968 error = pathbuf_copyin(path, &pb);
2969 if (error)
2970 return error;
2971
2972 NDINIT(&nd, LOOKUP, nd_flag, pb);
2973
2974 /* Override default credentials */
2975 cred = kauth_cred_dup(l->l_cred);
2976 if (!(flags & AT_EACCESS)) {
2977 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2978 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2979 }
2980 nd.ni_cnd.cn_cred = cred;
2981
2982 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2983 pathbuf_destroy(pb);
2984 goto out;
2985 }
2986 vp = nd.ni_vp;
2987 pathbuf_destroy(pb);
2988
2989 /* Flags == 0 means only check for existence. */
2990 if (mode) {
2991 vmode = 0;
2992 if (mode & R_OK)
2993 vmode |= VREAD;
2994 if (mode & W_OK)
2995 vmode |= VWRITE;
2996 if (mode & X_OK)
2997 vmode |= VEXEC;
2998
2999 error = VOP_ACCESS(vp, vmode, cred);
3000 if (!error && (vmode & VWRITE))
3001 error = vn_writechk(vp);
3002 }
3003 vput(vp);
3004 out:
3005 kauth_cred_free(cred);
3006 return (error);
3007 }
3008
3009 int
3010 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap,
3011 register_t *retval)
3012 {
3013 /* {
3014 syscallarg(int) fd;
3015 syscallarg(const char *) path;
3016 syscallarg(int) amode;
3017 syscallarg(int) flag;
3018 } */
3019
3020 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path),
3021 SCARG(uap, amode), SCARG(uap, flag));
3022 }
3023
3024 /*
3025 * Common code for all sys_stat functions, including compat versions.
3026 */
3027 int
3028 do_sys_stat(const char *userpath, unsigned int nd_flag,
3029 struct stat *sb)
3030 {
3031 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb);
3032 }
3033
3034 int
3035 do_sys_statat(struct lwp *l, int fdat, const char *userpath,
3036 unsigned int nd_flag, struct stat *sb)
3037 {
3038 int error;
3039 struct pathbuf *pb;
3040 struct nameidata nd;
3041
3042 KASSERT(l != NULL || fdat == AT_FDCWD);
3043
3044 error = pathbuf_copyin(userpath, &pb);
3045 if (error) {
3046 return error;
3047 }
3048
3049 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb);
3050
3051 error = fd_nameiat(l, fdat, &nd);
3052 if (error != 0) {
3053 pathbuf_destroy(pb);
3054 return error;
3055 }
3056 error = vn_stat(nd.ni_vp, sb);
3057 vput(nd.ni_vp);
3058 pathbuf_destroy(pb);
3059 return error;
3060 }
3061
3062 /*
3063 * Get file status; this version follows links.
3064 */
3065 /* ARGSUSED */
3066 int
3067 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval)
3068 {
3069 /* {
3070 syscallarg(const char *) path;
3071 syscallarg(struct stat *) ub;
3072 } */
3073 struct stat sb;
3074 int error;
3075
3076 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb);
3077 if (error)
3078 return error;
3079 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3080 }
3081
3082 /*
3083 * Get file status; this version does not follow links.
3084 */
3085 /* ARGSUSED */
3086 int
3087 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval)
3088 {
3089 /* {
3090 syscallarg(const char *) path;
3091 syscallarg(struct stat *) ub;
3092 } */
3093 struct stat sb;
3094 int error;
3095
3096 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb);
3097 if (error)
3098 return error;
3099 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3100 }
3101
3102 int
3103 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap,
3104 register_t *retval)
3105 {
3106 /* {
3107 syscallarg(int) fd;
3108 syscallarg(const char *) path;
3109 syscallarg(struct stat *) buf;
3110 syscallarg(int) flag;
3111 } */
3112 unsigned int nd_flag;
3113 struct stat sb;
3114 int error;
3115
3116 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW)
3117 nd_flag = NOFOLLOW;
3118 else
3119 nd_flag = FOLLOW;
3120
3121 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag,
3122 &sb);
3123 if (error)
3124 return error;
3125 return copyout(&sb, SCARG(uap, buf), sizeof(sb));
3126 }
3127
3128 /*
3129 * Get configurable pathname variables.
3130 */
3131 /* ARGSUSED */
3132 int
3133 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
3134 {
3135 /* {
3136 syscallarg(const char *) path;
3137 syscallarg(int) name;
3138 } */
3139 int error;
3140 struct pathbuf *pb;
3141 struct nameidata nd;
3142
3143 error = pathbuf_copyin(SCARG(uap, path), &pb);
3144 if (error) {
3145 return error;
3146 }
3147 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3148 if ((error = namei(&nd)) != 0) {
3149 pathbuf_destroy(pb);
3150 return (error);
3151 }
3152 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
3153 vput(nd.ni_vp);
3154 pathbuf_destroy(pb);
3155 return (error);
3156 }
3157
3158 /*
3159 * Return target name of a symbolic link.
3160 */
3161 /* ARGSUSED */
3162 int
3163 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap,
3164 register_t *retval)
3165 {
3166 /* {
3167 syscallarg(const char *) path;
3168 syscallarg(char *) buf;
3169 syscallarg(size_t) count;
3170 } */
3171 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path),
3172 SCARG(uap, buf), SCARG(uap, count), retval);
3173 }
3174
3175 static int
3176 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf,
3177 size_t count, register_t *retval)
3178 {
3179 struct vnode *vp;
3180 struct iovec aiov;
3181 struct uio auio;
3182 int error;
3183 struct pathbuf *pb;
3184 struct nameidata nd;
3185
3186 error = pathbuf_copyin(path, &pb);
3187 if (error) {
3188 return error;
3189 }
3190 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3191 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
3192 pathbuf_destroy(pb);
3193 return error;
3194 }
3195 vp = nd.ni_vp;
3196 pathbuf_destroy(pb);
3197 if (vp->v_type != VLNK)
3198 error = EINVAL;
3199 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
3200 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
3201 aiov.iov_base = buf;
3202 aiov.iov_len = count;
3203 auio.uio_iov = &aiov;
3204 auio.uio_iovcnt = 1;
3205 auio.uio_offset = 0;
3206 auio.uio_rw = UIO_READ;
3207 KASSERT(l == curlwp);
3208 auio.uio_vmspace = l->l_proc->p_vmspace;
3209 auio.uio_resid = count;
3210 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0)
3211 *retval = count - auio.uio_resid;
3212 }
3213 vput(vp);
3214 return (error);
3215 }
3216
3217 int
3218 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap,
3219 register_t *retval)
3220 {
3221 /* {
3222 syscallarg(int) fd;
3223 syscallarg(const char *) path;
3224 syscallarg(char *) buf;
3225 syscallarg(size_t) bufsize;
3226 } */
3227
3228 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path),
3229 SCARG(uap, buf), SCARG(uap, bufsize), retval);
3230 }
3231
3232 /*
3233 * Change flags of a file given a path name.
3234 */
3235 /* ARGSUSED */
3236 int
3237 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
3238 {
3239 /* {
3240 syscallarg(const char *) path;
3241 syscallarg(u_long) flags;
3242 } */
3243 struct vnode *vp;
3244 int error;
3245
3246 error = namei_simple_user(SCARG(uap, path),
3247 NSM_FOLLOW_TRYEMULROOT, &vp);
3248 if (error != 0)
3249 return (error);
3250 error = change_flags(vp, SCARG(uap, flags), l);
3251 vput(vp);
3252 return (error);
3253 }
3254
3255 /*
3256 * Change flags of a file given a file descriptor.
3257 */
3258 /* ARGSUSED */
3259 int
3260 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
3261 {
3262 /* {
3263 syscallarg(int) fd;
3264 syscallarg(u_long) flags;
3265 } */
3266 struct vnode *vp;
3267 file_t *fp;
3268 int error;
3269
3270 /* fd_getvnode() will use the descriptor for us */
3271 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3272 return (error);
3273 vp = fp->f_vnode;
3274 error = change_flags(vp, SCARG(uap, flags), l);
3275 VOP_UNLOCK(vp);
3276 fd_putfile(SCARG(uap, fd));
3277 return (error);
3278 }
3279
3280 /*
3281 * Change flags of a file given a path name; this version does
3282 * not follow links.
3283 */
3284 int
3285 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
3286 {
3287 /* {
3288 syscallarg(const char *) path;
3289 syscallarg(u_long) flags;
3290 } */
3291 struct vnode *vp;
3292 int error;
3293
3294 error = namei_simple_user(SCARG(uap, path),
3295 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3296 if (error != 0)
3297 return (error);
3298 error = change_flags(vp, SCARG(uap, flags), l);
3299 vput(vp);
3300 return (error);
3301 }
3302
3303 /*
3304 * Common routine to change flags of a file.
3305 */
3306 int
3307 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
3308 {
3309 struct vattr vattr;
3310 int error;
3311
3312 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3313
3314 vattr_null(&vattr);
3315 vattr.va_flags = flags;
3316 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3317
3318 return (error);
3319 }
3320
3321 /*
3322 * Change mode of a file given path name; this version follows links.
3323 */
3324 /* ARGSUSED */
3325 int
3326 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
3327 {
3328 /* {
3329 syscallarg(const char *) path;
3330 syscallarg(int) mode;
3331 } */
3332 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path),
3333 SCARG(uap, mode), 0);
3334 }
3335
3336 int
3337 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags)
3338 {
3339 int error;
3340 struct vnode *vp;
3341 namei_simple_flags_t ns_flag;
3342
3343 if (flags & AT_SYMLINK_NOFOLLOW)
3344 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3345 else
3346 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3347
3348 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3349 if (error != 0)
3350 return error;
3351
3352 error = change_mode(vp, mode, l);
3353
3354 vrele(vp);
3355
3356 return (error);
3357 }
3358
3359 /*
3360 * Change mode of a file given a file descriptor.
3361 */
3362 /* ARGSUSED */
3363 int
3364 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
3365 {
3366 /* {
3367 syscallarg(int) fd;
3368 syscallarg(int) mode;
3369 } */
3370 file_t *fp;
3371 int error;
3372
3373 /* fd_getvnode() will use the descriptor for us */
3374 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3375 return (error);
3376 error = change_mode(fp->f_vnode, SCARG(uap, mode), l);
3377 fd_putfile(SCARG(uap, fd));
3378 return (error);
3379 }
3380
3381 int
3382 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap,
3383 register_t *retval)
3384 {
3385 /* {
3386 syscallarg(int) fd;
3387 syscallarg(const char *) path;
3388 syscallarg(int) mode;
3389 syscallarg(int) flag;
3390 } */
3391
3392 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path),
3393 SCARG(uap, mode), SCARG(uap, flag));
3394 }
3395
3396 /*
3397 * Change mode of a file given path name; this version does not follow links.
3398 */
3399 /* ARGSUSED */
3400 int
3401 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
3402 {
3403 /* {
3404 syscallarg(const char *) path;
3405 syscallarg(int) mode;
3406 } */
3407 int error;
3408 struct vnode *vp;
3409
3410 error = namei_simple_user(SCARG(uap, path),
3411 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3412 if (error != 0)
3413 return (error);
3414
3415 error = change_mode(vp, SCARG(uap, mode), l);
3416
3417 vrele(vp);
3418 return (error);
3419 }
3420
3421 /*
3422 * Common routine to set mode given a vnode.
3423 */
3424 static int
3425 change_mode(struct vnode *vp, int mode, struct lwp *l)
3426 {
3427 struct vattr vattr;
3428 int error;
3429
3430 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3431 vattr_null(&vattr);
3432 vattr.va_mode = mode & ALLPERMS;
3433 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3434 VOP_UNLOCK(vp);
3435 return (error);
3436 }
3437
3438 /*
3439 * Set ownership given a path name; this version follows links.
3440 */
3441 /* ARGSUSED */
3442 int
3443 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
3444 {
3445 /* {
3446 syscallarg(const char *) path;
3447 syscallarg(uid_t) uid;
3448 syscallarg(gid_t) gid;
3449 } */
3450 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid),
3451 SCARG(uap, gid), 0);
3452 }
3453
3454 int
3455 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid,
3456 gid_t gid, int flags)
3457 {
3458 int error;
3459 struct vnode *vp;
3460 namei_simple_flags_t ns_flag;
3461
3462 if (flags & AT_SYMLINK_NOFOLLOW)
3463 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3464 else
3465 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3466
3467 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3468 if (error != 0)
3469 return error;
3470
3471 error = change_owner(vp, uid, gid, l, 0);
3472
3473 vrele(vp);
3474
3475 return (error);
3476 }
3477
3478 /*
3479 * Set ownership given a path name; this version follows links.
3480 * Provides POSIX semantics.
3481 */
3482 /* ARGSUSED */
3483 int
3484 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
3485 {
3486 /* {
3487 syscallarg(const char *) path;
3488 syscallarg(uid_t) uid;
3489 syscallarg(gid_t) gid;
3490 } */
3491 int error;
3492 struct vnode *vp;
3493
3494 error = namei_simple_user(SCARG(uap, path),
3495 NSM_FOLLOW_TRYEMULROOT, &vp);
3496 if (error != 0)
3497 return (error);
3498
3499 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3500
3501 vrele(vp);
3502 return (error);
3503 }
3504
3505 /*
3506 * Set ownership given a file descriptor.
3507 */
3508 /* ARGSUSED */
3509 int
3510 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
3511 {
3512 /* {
3513 syscallarg(int) fd;
3514 syscallarg(uid_t) uid;
3515 syscallarg(gid_t) gid;
3516 } */
3517 int error;
3518 file_t *fp;
3519
3520 /* fd_getvnode() will use the descriptor for us */
3521 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3522 return (error);
3523 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3524 l, 0);
3525 fd_putfile(SCARG(uap, fd));
3526 return (error);
3527 }
3528
3529 int
3530 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap,
3531 register_t *retval)
3532 {
3533 /* {
3534 syscallarg(int) fd;
3535 syscallarg(const char *) path;
3536 syscallarg(uid_t) owner;
3537 syscallarg(gid_t) group;
3538 syscallarg(int) flag;
3539 } */
3540
3541 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path),
3542 SCARG(uap, owner), SCARG(uap, group),
3543 SCARG(uap, flag));
3544 }
3545
3546 /*
3547 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
3548 */
3549 /* ARGSUSED */
3550 int
3551 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
3552 {
3553 /* {
3554 syscallarg(int) fd;
3555 syscallarg(uid_t) uid;
3556 syscallarg(gid_t) gid;
3557 } */
3558 int error;
3559 file_t *fp;
3560
3561 /* fd_getvnode() will use the descriptor for us */
3562 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3563 return (error);
3564 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3565 l, 1);
3566 fd_putfile(SCARG(uap, fd));
3567 return (error);
3568 }
3569
3570 /*
3571 * Set ownership given a path name; this version does not follow links.
3572 */
3573 /* ARGSUSED */
3574 int
3575 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
3576 {
3577 /* {
3578 syscallarg(const char *) path;
3579 syscallarg(uid_t) uid;
3580 syscallarg(gid_t) gid;
3581 } */
3582 int error;
3583 struct vnode *vp;
3584
3585 error = namei_simple_user(SCARG(uap, path),
3586 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3587 if (error != 0)
3588 return (error);
3589
3590 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
3591
3592 vrele(vp);
3593 return (error);
3594 }
3595
3596 /*
3597 * Set ownership given a path name; this version does not follow links.
3598 * Provides POSIX/XPG semantics.
3599 */
3600 /* ARGSUSED */
3601 int
3602 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
3603 {
3604 /* {
3605 syscallarg(const char *) path;
3606 syscallarg(uid_t) uid;
3607 syscallarg(gid_t) gid;
3608 } */
3609 int error;
3610 struct vnode *vp;
3611
3612 error = namei_simple_user(SCARG(uap, path),
3613 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3614 if (error != 0)
3615 return (error);
3616
3617 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3618
3619 vrele(vp);
3620 return (error);
3621 }
3622
3623 /*
3624 * Common routine to set ownership given a vnode.
3625 */
3626 static int
3627 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
3628 int posix_semantics)
3629 {
3630 struct vattr vattr;
3631 mode_t newmode;
3632 int error;
3633
3634 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3635 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
3636 goto out;
3637
3638 #define CHANGED(x) ((int)(x) != -1)
3639 newmode = vattr.va_mode;
3640 if (posix_semantics) {
3641 /*
3642 * POSIX/XPG semantics: if the caller is not the super-user,
3643 * clear set-user-id and set-group-id bits. Both POSIX and
3644 * the XPG consider the behaviour for calls by the super-user
3645 * implementation-defined; we leave the set-user-id and set-
3646 * group-id settings intact in that case.
3647 */
3648 if (vattr.va_mode & S_ISUID) {
3649 if (kauth_authorize_vnode(l->l_cred,
3650 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0)
3651 newmode &= ~S_ISUID;
3652 }
3653 if (vattr.va_mode & S_ISGID) {
3654 if (kauth_authorize_vnode(l->l_cred,
3655 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0)
3656 newmode &= ~S_ISGID;
3657 }
3658 } else {
3659 /*
3660 * NetBSD semantics: when changing owner and/or group,
3661 * clear the respective bit(s).
3662 */
3663 if (CHANGED(uid))
3664 newmode &= ~S_ISUID;
3665 if (CHANGED(gid))
3666 newmode &= ~S_ISGID;
3667 }
3668 /* Update va_mode iff altered. */
3669 if (vattr.va_mode == newmode)
3670 newmode = VNOVAL;
3671
3672 vattr_null(&vattr);
3673 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
3674 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
3675 vattr.va_mode = newmode;
3676 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3677 #undef CHANGED
3678
3679 out:
3680 VOP_UNLOCK(vp);
3681 return (error);
3682 }
3683
3684 /*
3685 * Set the access and modification times given a path name; this
3686 * version follows links.
3687 */
3688 /* ARGSUSED */
3689 int
3690 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
3691 register_t *retval)
3692 {
3693 /* {
3694 syscallarg(const char *) path;
3695 syscallarg(const struct timeval *) tptr;
3696 } */
3697
3698 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
3699 SCARG(uap, tptr), UIO_USERSPACE);
3700 }
3701
3702 /*
3703 * Set the access and modification times given a file descriptor.
3704 */
3705 /* ARGSUSED */
3706 int
3707 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
3708 register_t *retval)
3709 {
3710 /* {
3711 syscallarg(int) fd;
3712 syscallarg(const struct timeval *) tptr;
3713 } */
3714 int error;
3715 file_t *fp;
3716
3717 /* fd_getvnode() will use the descriptor for us */
3718 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3719 return (error);
3720 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr),
3721 UIO_USERSPACE);
3722 fd_putfile(SCARG(uap, fd));
3723 return (error);
3724 }
3725
3726 int
3727 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap,
3728 register_t *retval)
3729 {
3730 /* {
3731 syscallarg(int) fd;
3732 syscallarg(const struct timespec *) tptr;
3733 } */
3734 int error;
3735 file_t *fp;
3736
3737 /* fd_getvnode() will use the descriptor for us */
3738 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3739 return (error);
3740 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0,
3741 SCARG(uap, tptr), UIO_USERSPACE);
3742 fd_putfile(SCARG(uap, fd));
3743 return (error);
3744 }
3745
3746 /*
3747 * Set the access and modification times given a path name; this
3748 * version does not follow links.
3749 */
3750 int
3751 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
3752 register_t *retval)
3753 {
3754 /* {
3755 syscallarg(const char *) path;
3756 syscallarg(const struct timeval *) tptr;
3757 } */
3758
3759 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
3760 SCARG(uap, tptr), UIO_USERSPACE);
3761 }
3762
3763 int
3764 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap,
3765 register_t *retval)
3766 {
3767 /* {
3768 syscallarg(int) fd;
3769 syscallarg(const char *) path;
3770 syscallarg(const struct timespec *) tptr;
3771 syscallarg(int) flag;
3772 } */
3773 int follow;
3774 const struct timespec *tptr;
3775 int error;
3776
3777 tptr = SCARG(uap, tptr);
3778 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3779
3780 error = do_sys_utimensat(l, SCARG(uap, fd), NULL,
3781 SCARG(uap, path), follow, tptr, UIO_USERSPACE);
3782
3783 return error;
3784 }
3785
3786 /*
3787 * Common routine to set access and modification times given a vnode.
3788 */
3789 int
3790 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag,
3791 const struct timespec *tptr, enum uio_seg seg)
3792 {
3793 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg);
3794 }
3795
3796 int
3797 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp,
3798 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg)
3799 {
3800 struct vattr vattr;
3801 int error, dorele = 0;
3802 namei_simple_flags_t sflags;
3803 bool vanull, setbirthtime;
3804 struct timespec ts[2];
3805
3806 KASSERT(l != NULL || fdat == AT_FDCWD);
3807
3808 /*
3809 * I have checked all callers and they pass either FOLLOW,
3810 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW
3811 * is 0. More to the point, they don't pass anything else.
3812 * Let's keep it that way at least until the namei interfaces
3813 * are fully sanitized.
3814 */
3815 KASSERT(flag == NOFOLLOW || flag == FOLLOW);
3816 sflags = (flag == FOLLOW) ?
3817 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT;
3818
3819 if (tptr == NULL) {
3820 vanull = true;
3821 nanotime(&ts[0]);
3822 ts[1] = ts[0];
3823 } else {
3824 vanull = false;
3825 if (seg != UIO_SYSSPACE) {
3826 error = copyin(tptr, ts, sizeof (ts));
3827 if (error != 0)
3828 return error;
3829 } else {
3830 ts[0] = tptr[0];
3831 ts[1] = tptr[1];
3832 }
3833 }
3834
3835 if (ts[0].tv_nsec == UTIME_NOW) {
3836 nanotime(&ts[0]);
3837 if (ts[1].tv_nsec == UTIME_NOW) {
3838 vanull = true;
3839 ts[1] = ts[0];
3840 }
3841 } else if (ts[1].tv_nsec == UTIME_NOW)
3842 nanotime(&ts[1]);
3843
3844 if (vp == NULL) {
3845 /* note: SEG describes TPTR, not PATH; PATH is always user */
3846 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp);
3847 if (error != 0)
3848 return error;
3849 dorele = 1;
3850 }
3851
3852 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3853 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3854 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3855 vattr_null(&vattr);
3856
3857 if (ts[0].tv_nsec != UTIME_OMIT)
3858 vattr.va_atime = ts[0];
3859
3860 if (ts[1].tv_nsec != UTIME_OMIT) {
3861 vattr.va_mtime = ts[1];
3862 if (setbirthtime)
3863 vattr.va_birthtime = ts[1];
3864 }
3865
3866 if (vanull)
3867 vattr.va_vaflags |= VA_UTIMES_NULL;
3868 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3869 VOP_UNLOCK(vp);
3870
3871 if (dorele != 0)
3872 vrele(vp);
3873
3874 return error;
3875 }
3876
3877 int
3878 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
3879 const struct timeval *tptr, enum uio_seg seg)
3880 {
3881 struct timespec ts[2];
3882 struct timespec *tsptr = NULL;
3883 int error;
3884
3885 if (tptr != NULL) {
3886 struct timeval tv[2];
3887
3888 if (seg != UIO_SYSSPACE) {
3889 error = copyin(tptr, tv, sizeof (tv));
3890 if (error != 0)
3891 return error;
3892 tptr = tv;
3893 }
3894
3895 if ((tv[0].tv_usec == UTIME_NOW) ||
3896 (tv[0].tv_usec == UTIME_OMIT))
3897 ts[0].tv_nsec = tv[0].tv_usec;
3898 else
3899 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3900
3901 if ((tv[1].tv_usec == UTIME_NOW) ||
3902 (tv[1].tv_usec == UTIME_OMIT))
3903 ts[1].tv_nsec = tv[1].tv_usec;
3904 else
3905 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3906
3907 tsptr = &ts[0];
3908 }
3909
3910 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE);
3911 }
3912
3913 /*
3914 * Truncate a file given its path name.
3915 */
3916 /* ARGSUSED */
3917 int
3918 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3919 {
3920 /* {
3921 syscallarg(const char *) path;
3922 syscallarg(int) pad;
3923 syscallarg(off_t) length;
3924 } */
3925 struct vnode *vp;
3926 struct vattr vattr;
3927 int error;
3928
3929 if (SCARG(uap, length) < 0)
3930 return EINVAL;
3931
3932 error = namei_simple_user(SCARG(uap, path),
3933 NSM_FOLLOW_TRYEMULROOT, &vp);
3934 if (error != 0)
3935 return (error);
3936 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3937 if (vp->v_type == VDIR)
3938 error = EISDIR;
3939 else if ((error = vn_writechk(vp)) == 0 &&
3940 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3941 vattr_null(&vattr);
3942 vattr.va_size = SCARG(uap, length);
3943 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3944 }
3945 vput(vp);
3946 return (error);
3947 }
3948
3949 /*
3950 * Truncate a file given a file descriptor.
3951 */
3952 /* ARGSUSED */
3953 int
3954 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3955 {
3956 /* {
3957 syscallarg(int) fd;
3958 syscallarg(int) pad;
3959 syscallarg(off_t) length;
3960 } */
3961 struct vattr vattr;
3962 struct vnode *vp;
3963 file_t *fp;
3964 int error;
3965
3966 if (SCARG(uap, length) < 0)
3967 return EINVAL;
3968
3969 /* fd_getvnode() will use the descriptor for us */
3970 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3971 return (error);
3972 if ((fp->f_flag & FWRITE) == 0) {
3973 error = EINVAL;
3974 goto out;
3975 }
3976 vp = fp->f_vnode;
3977 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3978 if (vp->v_type == VDIR)
3979 error = EISDIR;
3980 else if ((error = vn_writechk(vp)) == 0) {
3981 vattr_null(&vattr);
3982 vattr.va_size = SCARG(uap, length);
3983 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3984 }
3985 VOP_UNLOCK(vp);
3986 out:
3987 fd_putfile(SCARG(uap, fd));
3988 return (error);
3989 }
3990
3991 /*
3992 * Sync an open file.
3993 */
3994 /* ARGSUSED */
3995 int
3996 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3997 {
3998 /* {
3999 syscallarg(int) fd;
4000 } */
4001 struct vnode *vp;
4002 file_t *fp;
4003 int error;
4004
4005 /* fd_getvnode() will use the descriptor for us */
4006 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4007 return (error);
4008 vp = fp->f_vnode;
4009 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4010 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
4011 VOP_UNLOCK(vp);
4012 fd_putfile(SCARG(uap, fd));
4013 return (error);
4014 }
4015
4016 /*
4017 * Sync a range of file data. API modeled after that found in AIX.
4018 *
4019 * FDATASYNC indicates that we need only save enough metadata to be able
4020 * to re-read the written data. Note we duplicate AIX's requirement that
4021 * the file be open for writing.
4022 */
4023 /* ARGSUSED */
4024 int
4025 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
4026 {
4027 /* {
4028 syscallarg(int) fd;
4029 syscallarg(int) flags;
4030 syscallarg(off_t) start;
4031 syscallarg(off_t) length;
4032 } */
4033 struct vnode *vp;
4034 file_t *fp;
4035 int flags, nflags;
4036 off_t s, e, len;
4037 int error;
4038
4039 /* fd_getvnode() will use the descriptor for us */
4040 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4041 return (error);
4042
4043 if ((fp->f_flag & FWRITE) == 0) {
4044 error = EBADF;
4045 goto out;
4046 }
4047
4048 flags = SCARG(uap, flags);
4049 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
4050 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
4051 error = EINVAL;
4052 goto out;
4053 }
4054 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
4055 if (flags & FDATASYNC)
4056 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
4057 else
4058 nflags = FSYNC_WAIT;
4059 if (flags & FDISKSYNC)
4060 nflags |= FSYNC_CACHE;
4061
4062 len = SCARG(uap, length);
4063 /* If length == 0, we do the whole file, and s = e = 0 will do that */
4064 if (len) {
4065 s = SCARG(uap, start);
4066 e = s + len;
4067 if (e < s) {
4068 error = EINVAL;
4069 goto out;
4070 }
4071 } else {
4072 e = 0;
4073 s = 0;
4074 }
4075
4076 vp = fp->f_vnode;
4077 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4078 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
4079 VOP_UNLOCK(vp);
4080 out:
4081 fd_putfile(SCARG(uap, fd));
4082 return (error);
4083 }
4084
4085 /*
4086 * Sync the data of an open file.
4087 */
4088 /* ARGSUSED */
4089 int
4090 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
4091 {
4092 /* {
4093 syscallarg(int) fd;
4094 } */
4095 struct vnode *vp;
4096 file_t *fp;
4097 int error;
4098
4099 /* fd_getvnode() will use the descriptor for us */
4100 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4101 return (error);
4102 if ((fp->f_flag & FWRITE) == 0) {
4103 fd_putfile(SCARG(uap, fd));
4104 return (EBADF);
4105 }
4106 vp = fp->f_vnode;
4107 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4108 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
4109 VOP_UNLOCK(vp);
4110 fd_putfile(SCARG(uap, fd));
4111 return (error);
4112 }
4113
4114 /*
4115 * Rename files, (standard) BSD semantics frontend.
4116 */
4117 /* ARGSUSED */
4118 int
4119 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
4120 {
4121 /* {
4122 syscallarg(const char *) from;
4123 syscallarg(const char *) to;
4124 } */
4125
4126 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4127 SCARG(uap, to), UIO_USERSPACE, 0));
4128 }
4129
4130 int
4131 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap,
4132 register_t *retval)
4133 {
4134 /* {
4135 syscallarg(int) fromfd;
4136 syscallarg(const char *) from;
4137 syscallarg(int) tofd;
4138 syscallarg(const char *) to;
4139 } */
4140
4141 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from),
4142 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0));
4143 }
4144
4145 /*
4146 * Rename files, POSIX semantics frontend.
4147 */
4148 /* ARGSUSED */
4149 int
4150 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
4151 {
4152 /* {
4153 syscallarg(const char *) from;
4154 syscallarg(const char *) to;
4155 } */
4156
4157 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4158 SCARG(uap, to), UIO_USERSPACE, 1));
4159 }
4160
4161 /*
4162 * Rename files. Source and destination must either both be directories,
4163 * or both not be directories. If target is a directory, it must be empty.
4164 * If `from' and `to' refer to the same object, the value of the `retain'
4165 * argument is used to determine whether `from' will be
4166 *
4167 * (retain == 0) deleted unless `from' and `to' refer to the same
4168 * object in the file system's name space (BSD).
4169 * (retain == 1) always retained (POSIX).
4170 *
4171 * XXX Synchronize with nfsrv_rename in nfs_serv.c.
4172 */
4173 int
4174 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
4175 {
4176 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain);
4177 }
4178
4179 static int
4180 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd,
4181 const char *to, enum uio_seg seg, int retain)
4182 {
4183 struct pathbuf *fpb, *tpb;
4184 struct nameidata fnd, tnd;
4185 struct vnode *fdvp, *fvp;
4186 struct vnode *tdvp, *tvp;
4187 struct mount *mp, *tmp;
4188 int error;
4189
4190 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD));
4191
4192 error = pathbuf_maybe_copyin(from, seg, &fpb);
4193 if (error)
4194 goto out0;
4195 KASSERT(fpb != NULL);
4196
4197 error = pathbuf_maybe_copyin(to, seg, &tpb);
4198 if (error)
4199 goto out1;
4200 KASSERT(tpb != NULL);
4201
4202 /*
4203 * Lookup from.
4204 *
4205 * XXX LOCKPARENT is wrong because we don't actually want it
4206 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is
4207 * insane, so for the time being we need to leave it like this.
4208 */
4209 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb);
4210 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0)
4211 goto out2;
4212
4213 /*
4214 * Pull out the important results of the lookup, fdvp and fvp.
4215 * Of course, fvp is bogus because we're about to unlock fdvp.
4216 */
4217 fdvp = fnd.ni_dvp;
4218 fvp = fnd.ni_vp;
4219 KASSERT(fdvp != NULL);
4220 KASSERT(fvp != NULL);
4221 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE));
4222
4223 /*
4224 * Make sure neither fdvp nor fvp is locked.
4225 */
4226 if (fdvp != fvp)
4227 VOP_UNLOCK(fdvp);
4228 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4229 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4230
4231 /*
4232 * Reject renaming `.' and `..'. Can't do this until after
4233 * namei because we need namei's parsing to find the final
4234 * component name. (namei should just leave us with the final
4235 * component name and not look it up itself, but anyway...)
4236 *
4237 * This was here before because we used to relookup from
4238 * instead of to and relookup requires the caller to check
4239 * this, but now file systems may depend on this check, so we
4240 * must retain it until the file systems are all rototilled.
4241 */
4242 if (((fnd.ni_cnd.cn_namelen == 1) &&
4243 (fnd.ni_cnd.cn_nameptr[0] == '.')) ||
4244 ((fnd.ni_cnd.cn_namelen == 2) &&
4245 (fnd.ni_cnd.cn_nameptr[0] == '.') &&
4246 (fnd.ni_cnd.cn_nameptr[1] == '.'))) {
4247 error = EINVAL; /* XXX EISDIR? */
4248 goto abort0;
4249 }
4250
4251 /*
4252 * Lookup to.
4253 *
4254 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using
4255 * fvp here to decide whether to add CREATEDIR is a load of
4256 * bollocks because fvp might be the wrong node by now, since
4257 * fdvp is unlocked.
4258 *
4259 * XXX Why not pass CREATEDIR always?
4260 */
4261 NDINIT(&tnd, RENAME,
4262 (LOCKPARENT | NOCACHE | TRYEMULROOT |
4263 ((fvp->v_type == VDIR)? CREATEDIR : 0)),
4264 tpb);
4265 if ((error = fd_nameiat(l, tofd, &tnd)) != 0)
4266 goto abort0;
4267
4268 /*
4269 * Pull out the important results of the lookup, tdvp and tvp.
4270 * Of course, tvp is bogus because we're about to unlock tdvp.
4271 */
4272 tdvp = tnd.ni_dvp;
4273 tvp = tnd.ni_vp;
4274 KASSERT(tdvp != NULL);
4275 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE));
4276
4277 /*
4278 * Make sure neither tdvp nor tvp is locked.
4279 */
4280 if (tdvp != tvp)
4281 VOP_UNLOCK(tdvp);
4282 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4283 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4284
4285 /*
4286 * Reject renaming onto `.' or `..'. relookup is unhappy with
4287 * these, which is why we must do this here. Once upon a time
4288 * we relooked up from instead of to, and consequently didn't
4289 * need this check, but now that we relookup to instead of
4290 * from, we need this; and we shall need it forever forward
4291 * until the VOP_RENAME protocol changes, because file systems
4292 * will no doubt begin to depend on this check.
4293 */
4294 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) {
4295 error = EISDIR;
4296 goto abort1;
4297 }
4298 if ((tnd.ni_cnd.cn_namelen == 2) &&
4299 (tnd.ni_cnd.cn_nameptr[0] == '.') &&
4300 (tnd.ni_cnd.cn_nameptr[1] == '.')) {
4301 error = EINVAL;
4302 goto abort1;
4303 }
4304
4305 /*
4306 * Get the mount point. If the file system has been unmounted,
4307 * which it may be because we're not holding any vnode locks,
4308 * then v_mount will be NULL. We're not really supposed to
4309 * read v_mount without holding the vnode lock, but since we
4310 * have fdvp referenced, if fdvp->v_mount changes then at worst
4311 * it will be set to NULL, not changed to another mount point.
4312 * And, of course, since it is up to the file system to
4313 * determine the real lock order, we can't lock both fdvp and
4314 * tdvp at the same time.
4315 */
4316 mp = fdvp->v_mount;
4317 if (mp == NULL) {
4318 error = ENOENT;
4319 goto abort1;
4320 }
4321
4322 /*
4323 * Make sure the mount points match. Again, although we don't
4324 * hold any vnode locks, the v_mount fields may change -- but
4325 * at worst they will change to NULL, so this will never become
4326 * a cross-device rename, because we hold vnode references.
4327 *
4328 * XXX Because nothing is locked and the compiler may reorder
4329 * things here, unmounting the file system at an inopportune
4330 * moment may cause rename to fail with EXDEV when it really
4331 * should fail with ENOENT.
4332 */
4333 tmp = tdvp->v_mount;
4334 if (tmp == NULL) {
4335 error = ENOENT;
4336 goto abort1;
4337 }
4338
4339 if (mp != tmp) {
4340 error = EXDEV;
4341 goto abort1;
4342 }
4343
4344 /*
4345 * Take the vfs rename lock to avoid cross-directory screw cases.
4346 * Nothing is locked currently, so taking this lock is safe.
4347 */
4348 error = VFS_RENAMELOCK_ENTER(mp);
4349 if (error)
4350 goto abort1;
4351
4352 /*
4353 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced,
4354 * and nothing is locked except for the vfs rename lock.
4355 *
4356 * The next step is a little rain dance to conform to the
4357 * insane lock protocol, even though it does nothing to ward
4358 * off race conditions.
4359 *
4360 * We need tdvp and tvp to be locked. However, because we have
4361 * unlocked tdvp in order to hold no locks while we take the
4362 * vfs rename lock, tvp may be wrong here, and we can't safely
4363 * lock it even if the sensible file systems will just unlock
4364 * it straight away. Consequently, we must lock tdvp and then
4365 * relookup tvp to get it locked.
4366 *
4367 * Finally, because the VOP_RENAME protocol is brain-damaged
4368 * and various file systems insanely depend on the semantics of
4369 * this brain damage, the lookup of to must be the last lookup
4370 * before VOP_RENAME.
4371 */
4372 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
4373 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0);
4374 if (error)
4375 goto abort2;
4376
4377 /*
4378 * Drop the old tvp and pick up the new one -- which might be
4379 * the same, but that doesn't matter to us. After this, tdvp
4380 * and tvp should both be locked.
4381 */
4382 if (tvp != NULL)
4383 vrele(tvp);
4384 tvp = tnd.ni_vp;
4385 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4386 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4387
4388 /*
4389 * The old do_sys_rename had various consistency checks here
4390 * involving fvp and tvp. fvp is bogus already here, and tvp
4391 * will become bogus soon in any sensible file system, so the
4392 * only purpose in putting these checks here is to give lip
4393 * service to these screw cases and to acknowledge that they
4394 * exist, not actually to handle them, but here you go
4395 * anyway...
4396 */
4397
4398 /*
4399 * Acknowledge that directories and non-directories aren't
4400 * suposed to mix.
4401 */
4402 if (tvp != NULL) {
4403 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) {
4404 error = ENOTDIR;
4405 goto abort3;
4406 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) {
4407 error = EISDIR;
4408 goto abort3;
4409 }
4410 }
4411
4412 /*
4413 * Acknowledge some random screw case, among the dozens that
4414 * might arise.
4415 */
4416 if (fvp == tdvp) {
4417 error = EINVAL;
4418 goto abort3;
4419 }
4420
4421 /*
4422 * Acknowledge that POSIX has a wacky screw case.
4423 *
4424 * XXX Eventually the retain flag needs to be passed on to
4425 * VOP_RENAME.
4426 */
4427 if (fvp == tvp) {
4428 if (retain) {
4429 error = 0;
4430 goto abort3;
4431 } else if ((fdvp == tdvp) &&
4432 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) &&
4433 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr,
4434 fnd.ni_cnd.cn_namelen))) {
4435 error = 0;
4436 goto abort3;
4437 }
4438 }
4439
4440 /*
4441 * Make sure veriexec can screw us up. (But a race can screw
4442 * up veriexec, of course -- remember, fvp and (soon) tvp are
4443 * bogus.)
4444 */
4445 #if NVERIEXEC > 0
4446 {
4447 char *f1, *f2;
4448 size_t f1_len;
4449 size_t f2_len;
4450
4451 f1_len = fnd.ni_cnd.cn_namelen + 1;
4452 f1 = kmem_alloc(f1_len, KM_SLEEP);
4453 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len);
4454
4455 f2_len = tnd.ni_cnd.cn_namelen + 1;
4456 f2 = kmem_alloc(f2_len, KM_SLEEP);
4457 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len);
4458
4459 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2);
4460
4461 kmem_free(f1, f1_len);
4462 kmem_free(f2, f2_len);
4463
4464 if (error)
4465 goto abort3;
4466 }
4467 #endif /* NVERIEXEC > 0 */
4468
4469 /*
4470 * All ready. Incant the rename vop.
4471 */
4472 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4473 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4474 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4475 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4476 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd);
4477
4478 /*
4479 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks
4480 * tdvp and tvp. But we can't assert any of that.
4481 */
4482 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4483 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4484 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4485 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4486
4487 /*
4488 * So all we have left to do is to drop the rename lock and
4489 * destroy the pathbufs.
4490 */
4491 VFS_RENAMELOCK_EXIT(mp);
4492 goto out2;
4493
4494 abort3: if ((tvp != NULL) && (tvp != tdvp))
4495 VOP_UNLOCK(tvp);
4496 abort2: VOP_UNLOCK(tdvp);
4497 VFS_RENAMELOCK_EXIT(mp);
4498 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd);
4499 vrele(tdvp);
4500 if (tvp != NULL)
4501 vrele(tvp);
4502 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd);
4503 vrele(fdvp);
4504 vrele(fvp);
4505 out2: pathbuf_destroy(tpb);
4506 out1: pathbuf_destroy(fpb);
4507 out0: return error;
4508 }
4509
4510 /*
4511 * Make a directory file.
4512 */
4513 /* ARGSUSED */
4514 int
4515 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
4516 {
4517 /* {
4518 syscallarg(const char *) path;
4519 syscallarg(int) mode;
4520 } */
4521
4522 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path),
4523 SCARG(uap, mode), UIO_USERSPACE);
4524 }
4525
4526 int
4527 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap,
4528 register_t *retval)
4529 {
4530 /* {
4531 syscallarg(int) fd;
4532 syscallarg(const char *) path;
4533 syscallarg(int) mode;
4534 } */
4535
4536 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path),
4537 SCARG(uap, mode), UIO_USERSPACE);
4538 }
4539
4540
4541 int
4542 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg)
4543 {
4544 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE);
4545 }
4546
4547 static int
4548 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode,
4549 enum uio_seg seg)
4550 {
4551 struct proc *p = curlwp->l_proc;
4552 struct vnode *vp;
4553 struct vattr vattr;
4554 int error;
4555 struct pathbuf *pb;
4556 struct nameidata nd;
4557
4558 KASSERT(l != NULL || fdat == AT_FDCWD);
4559
4560 /* XXX bollocks, should pass in a pathbuf */
4561 error = pathbuf_maybe_copyin(path, seg, &pb);
4562 if (error) {
4563 return error;
4564 }
4565
4566 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb);
4567
4568 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
4569 pathbuf_destroy(pb);
4570 return (error);
4571 }
4572 vp = nd.ni_vp;
4573 if (vp != NULL) {
4574 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
4575 if (nd.ni_dvp == vp)
4576 vrele(nd.ni_dvp);
4577 else
4578 vput(nd.ni_dvp);
4579 vrele(vp);
4580 pathbuf_destroy(pb);
4581 return (EEXIST);
4582 }
4583 vattr_null(&vattr);
4584 vattr.va_type = VDIR;
4585 /* We will read cwdi->cwdi_cmask unlocked. */
4586 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
4587 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
4588 if (!error)
4589 vrele(nd.ni_vp);
4590 vput(nd.ni_dvp);
4591 pathbuf_destroy(pb);
4592 return (error);
4593 }
4594
4595 /*
4596 * Remove a directory file.
4597 */
4598 /* ARGSUSED */
4599 int
4600 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
4601 {
4602 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path),
4603 AT_REMOVEDIR, UIO_USERSPACE);
4604 }
4605
4606 /*
4607 * Read a block of directory entries in a file system independent format.
4608 */
4609 int
4610 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
4611 {
4612 /* {
4613 syscallarg(int) fd;
4614 syscallarg(char *) buf;
4615 syscallarg(size_t) count;
4616 } */
4617 file_t *fp;
4618 int error, done;
4619
4620 /* fd_getvnode() will use the descriptor for us */
4621 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4622 return (error);
4623 if ((fp->f_flag & FREAD) == 0) {
4624 error = EBADF;
4625 goto out;
4626 }
4627 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
4628 SCARG(uap, count), &done, l, 0, 0);
4629 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
4630 *retval = done;
4631 out:
4632 fd_putfile(SCARG(uap, fd));
4633 return (error);
4634 }
4635
4636 /*
4637 * Set the mode mask for creation of filesystem nodes.
4638 */
4639 int
4640 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
4641 {
4642 /* {
4643 syscallarg(mode_t) newmask;
4644 } */
4645 struct proc *p = l->l_proc;
4646 struct cwdinfo *cwdi;
4647
4648 /*
4649 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
4650 * important is that we serialize changes to the mask. The
4651 * rw_exit() will issue a write memory barrier on our behalf,
4652 * and force the changes out to other CPUs (as it must use an
4653 * atomic operation, draining the local CPU's store buffers).
4654 */
4655 cwdi = p->p_cwdi;
4656 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
4657 *retval = cwdi->cwdi_cmask;
4658 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
4659 rw_exit(&cwdi->cwdi_lock);
4660
4661 return (0);
4662 }
4663
4664 int
4665 dorevoke(struct vnode *vp, kauth_cred_t cred)
4666 {
4667 struct vattr vattr;
4668 int error, fs_decision;
4669
4670 vn_lock(vp, LK_SHARED | LK_RETRY);
4671 error = VOP_GETATTR(vp, &vattr, cred);
4672 VOP_UNLOCK(vp);
4673 if (error != 0)
4674 return error;
4675 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM;
4676 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL,
4677 fs_decision);
4678 if (!error)
4679 VOP_REVOKE(vp, REVOKEALL);
4680 return (error);
4681 }
4682
4683 /*
4684 * Void all references to file by ripping underlying filesystem
4685 * away from vnode.
4686 */
4687 /* ARGSUSED */
4688 int
4689 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
4690 {
4691 /* {
4692 syscallarg(const char *) path;
4693 } */
4694 struct vnode *vp;
4695 int error;
4696
4697 error = namei_simple_user(SCARG(uap, path),
4698 NSM_FOLLOW_TRYEMULROOT, &vp);
4699 if (error != 0)
4700 return (error);
4701 error = dorevoke(vp, l->l_cred);
4702 vrele(vp);
4703 return (error);
4704 }
4705
4706 /*
4707 * Allocate backing store for a file, filling a hole without having to
4708 * explicitly write anything out.
4709 */
4710 /* ARGSUSED */
4711 int
4712 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap,
4713 register_t *retval)
4714 {
4715 /* {
4716 syscallarg(int) fd;
4717 syscallarg(off_t) pos;
4718 syscallarg(off_t) len;
4719 } */
4720 int fd;
4721 off_t pos, len;
4722 struct file *fp;
4723 struct vnode *vp;
4724 int error;
4725
4726 fd = SCARG(uap, fd);
4727 pos = SCARG(uap, pos);
4728 len = SCARG(uap, len);
4729
4730 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
4731 *retval = EINVAL;
4732 return 0;
4733 }
4734
4735 error = fd_getvnode(fd, &fp);
4736 if (error) {
4737 *retval = error;
4738 return 0;
4739 }
4740 if ((fp->f_flag & FWRITE) == 0) {
4741 error = EBADF;
4742 goto fail;
4743 }
4744 vp = fp->f_vnode;
4745
4746 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4747 if (vp->v_type == VDIR) {
4748 error = EISDIR;
4749 } else {
4750 error = VOP_FALLOCATE(vp, pos, len);
4751 }
4752 VOP_UNLOCK(vp);
4753
4754 fail:
4755 fd_putfile(fd);
4756 *retval = error;
4757 return 0;
4758 }
4759
4760 /*
4761 * Deallocate backing store for a file, creating a hole. Also used for
4762 * invoking TRIM on disks.
4763 */
4764 /* ARGSUSED */
4765 int
4766 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap,
4767 register_t *retval)
4768 {
4769 /* {
4770 syscallarg(int) fd;
4771 syscallarg(off_t) pos;
4772 syscallarg(off_t) len;
4773 } */
4774 int fd;
4775 off_t pos, len;
4776 struct file *fp;
4777 struct vnode *vp;
4778 int error;
4779
4780 fd = SCARG(uap, fd);
4781 pos = SCARG(uap, pos);
4782 len = SCARG(uap, len);
4783
4784 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
4785 return EINVAL;
4786 }
4787
4788 error = fd_getvnode(fd, &fp);
4789 if (error) {
4790 return error;
4791 }
4792 if ((fp->f_flag & FWRITE) == 0) {
4793 error = EBADF;
4794 goto fail;
4795 }
4796 vp = fp->f_vnode;
4797
4798 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4799 if (vp->v_type == VDIR) {
4800 error = EISDIR;
4801 } else {
4802 error = VOP_FDISCARD(vp, pos, len);
4803 }
4804 VOP_UNLOCK(vp);
4805
4806 fail:
4807 fd_putfile(fd);
4808 return error;
4809 }
4810