vfs_syscalls.c revision 1.503 1 /* $NetBSD: vfs_syscalls.c,v 1.503 2015/10/28 14:05:04 martin Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
66 */
67
68 /*
69 * Virtual File System System Calls
70 */
71
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.503 2015/10/28 14:05:04 martin Exp $");
74
75 #ifdef _KERNEL_OPT
76 #include "opt_fileassoc.h"
77 #include "veriexec.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/namei.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/file.h>
86 #include <sys/fcntl.h>
87 #include <sys/stat.h>
88 #include <sys/vnode.h>
89 #include <sys/mount.h>
90 #include <sys/proc.h>
91 #include <sys/uio.h>
92 #include <sys/kmem.h>
93 #include <sys/dirent.h>
94 #include <sys/sysctl.h>
95 #include <sys/syscallargs.h>
96 #include <sys/vfs_syscalls.h>
97 #include <sys/quota.h>
98 #include <sys/quotactl.h>
99 #include <sys/ktrace.h>
100 #ifdef FILEASSOC
101 #include <sys/fileassoc.h>
102 #endif /* FILEASSOC */
103 #include <sys/extattr.h>
104 #include <sys/verified_exec.h>
105 #include <sys/kauth.h>
106 #include <sys/atomic.h>
107 #include <sys/module.h>
108 #include <sys/buf.h>
109
110 #include <miscfs/genfs/genfs.h>
111 #include <miscfs/specfs/specdev.h>
112
113 #include <nfs/rpcv2.h>
114 #include <nfs/nfsproto.h>
115 #include <nfs/nfs.h>
116 #include <nfs/nfs_var.h>
117
118 /* XXX this shouldn't be here */
119 #ifndef OFF_T_MAX
120 #define OFF_T_MAX __type_max(off_t)
121 #endif
122
123 static int change_flags(struct vnode *, u_long, struct lwp *);
124 static int change_mode(struct vnode *, int, struct lwp *);
125 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
126 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *);
127 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t,
128 enum uio_seg);
129 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t);
130 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *,
131 enum uio_seg);
132 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *,
133 enum uio_seg, int);
134 static int do_sys_readlinkat(struct lwp *, int, const char *, char *,
135 size_t, register_t *);
136 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg);
137
138 static int fd_nameiat(struct lwp *, int, struct nameidata *);
139 static int fd_nameiat_simple_user(struct lwp *, int, const char *,
140 namei_simple_flags_t, struct vnode **);
141
142
143 /*
144 * This table is used to maintain compatibility with 4.3BSD
145 * and NetBSD 0.9 mount syscalls - and possibly other systems.
146 * Note, the order is important!
147 *
148 * Do not modify this table. It should only contain filesystems
149 * supported by NetBSD 0.9 and 4.3BSD.
150 */
151 const char * const mountcompatnames[] = {
152 NULL, /* 0 = MOUNT_NONE */
153 MOUNT_FFS, /* 1 = MOUNT_UFS */
154 MOUNT_NFS, /* 2 */
155 MOUNT_MFS, /* 3 */
156 MOUNT_MSDOS, /* 4 */
157 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
158 MOUNT_FDESC, /* 6 */
159 MOUNT_KERNFS, /* 7 */
160 NULL, /* 8 = MOUNT_DEVFS */
161 MOUNT_AFS, /* 9 */
162 };
163
164 const int nmountcompatnames = __arraycount(mountcompatnames);
165
166 static int
167 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp)
168 {
169 file_t *dfp;
170 int error;
171
172 if (fdat != AT_FDCWD) {
173 if ((error = fd_getvnode(fdat, &dfp)) != 0)
174 goto out;
175
176 NDAT(ndp, dfp->f_vnode);
177 }
178
179 error = namei(ndp);
180
181 if (fdat != AT_FDCWD)
182 fd_putfile(fdat);
183 out:
184 return error;
185 }
186
187 static int
188 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path,
189 namei_simple_flags_t sflags, struct vnode **vp_ret)
190 {
191 file_t *dfp;
192 struct vnode *dvp;
193 int error;
194
195 if (fdat != AT_FDCWD) {
196 if ((error = fd_getvnode(fdat, &dfp)) != 0)
197 goto out;
198
199 dvp = dfp->f_vnode;
200 } else {
201 dvp = NULL;
202 }
203
204 error = nameiat_simple_user(dvp, path, sflags, vp_ret);
205
206 if (fdat != AT_FDCWD)
207 fd_putfile(fdat);
208 out:
209 return error;
210 }
211
212 static int
213 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags)
214 {
215 int error;
216
217 fp->f_flag = flags & FMASK;
218 fp->f_type = DTYPE_VNODE;
219 fp->f_ops = &vnops;
220 fp->f_vnode = vp;
221
222 if (flags & (O_EXLOCK | O_SHLOCK)) {
223 struct flock lf;
224 int type;
225
226 lf.l_whence = SEEK_SET;
227 lf.l_start = 0;
228 lf.l_len = 0;
229 if (flags & O_EXLOCK)
230 lf.l_type = F_WRLCK;
231 else
232 lf.l_type = F_RDLCK;
233 type = F_FLOCK;
234 if ((flags & FNONBLOCK) == 0)
235 type |= F_WAIT;
236 VOP_UNLOCK(vp);
237 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
238 if (error) {
239 (void) vn_close(vp, fp->f_flag, fp->f_cred);
240 fd_abort(l->l_proc, fp, indx);
241 return error;
242 }
243 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
244 atomic_or_uint(&fp->f_flag, FHASLOCK);
245 }
246 if (flags & O_CLOEXEC)
247 fd_set_exclose(l, indx, true);
248 return 0;
249 }
250
251 static int
252 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
253 void *data, size_t *data_len)
254 {
255 struct mount *mp;
256 int error = 0, saved_flags;
257
258 mp = vp->v_mount;
259 saved_flags = mp->mnt_flag;
260
261 /* We can operate only on VV_ROOT nodes. */
262 if ((vp->v_vflag & VV_ROOT) == 0) {
263 error = EINVAL;
264 goto out;
265 }
266
267 /*
268 * We only allow the filesystem to be reloaded if it
269 * is currently mounted read-only. Additionally, we
270 * prevent read-write to read-only downgrades.
271 */
272 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
273 (mp->mnt_flag & MNT_RDONLY) == 0 &&
274 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) {
275 error = EOPNOTSUPP; /* Needs translation */
276 goto out;
277 }
278
279 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
280 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
281 if (error)
282 goto out;
283
284 if (vfs_busy(mp, NULL)) {
285 error = EPERM;
286 goto out;
287 }
288
289 mutex_enter(&mp->mnt_updating);
290
291 mp->mnt_flag &= ~MNT_OP_FLAGS;
292 mp->mnt_flag |= flags & MNT_OP_FLAGS;
293
294 /*
295 * Set the mount level flags.
296 */
297 if (flags & MNT_RDONLY)
298 mp->mnt_flag |= MNT_RDONLY;
299 else if (mp->mnt_flag & MNT_RDONLY)
300 mp->mnt_iflag |= IMNT_WANTRDWR;
301 mp->mnt_flag &= ~MNT_BASIC_FLAGS;
302 mp->mnt_flag |= flags & MNT_BASIC_FLAGS;
303 error = VFS_MOUNT(mp, path, data, data_len);
304
305 if (error && data != NULL) {
306 int error2;
307
308 /*
309 * Update failed; let's try and see if it was an
310 * export request. For compat with 3.0 and earlier.
311 */
312 error2 = vfs_hooks_reexport(mp, path, data);
313
314 /*
315 * Only update error code if the export request was
316 * understood but some problem occurred while
317 * processing it.
318 */
319 if (error2 != EJUSTRETURN)
320 error = error2;
321 }
322
323 if (mp->mnt_iflag & IMNT_WANTRDWR)
324 mp->mnt_flag &= ~MNT_RDONLY;
325 if (error)
326 mp->mnt_flag = saved_flags;
327 mp->mnt_flag &= ~MNT_OP_FLAGS;
328 mp->mnt_iflag &= ~IMNT_WANTRDWR;
329 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
330 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0)
331 vfs_syncer_add_to_worklist(mp);
332 } else {
333 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0)
334 vfs_syncer_remove_from_worklist(mp);
335 }
336 mutex_exit(&mp->mnt_updating);
337 vfs_unbusy(mp, false, NULL);
338
339 if ((error == 0) && !(saved_flags & MNT_EXTATTR) &&
340 (flags & MNT_EXTATTR)) {
341 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START,
342 NULL, 0, NULL) != 0) {
343 printf("%s: failed to start extattr, error = %d",
344 mp->mnt_stat.f_mntonname, error);
345 mp->mnt_flag &= ~MNT_EXTATTR;
346 }
347 }
348
349 if ((error == 0) && (saved_flags & MNT_EXTATTR) &&
350 !(flags & MNT_EXTATTR)) {
351 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP,
352 NULL, 0, NULL) != 0) {
353 printf("%s: failed to stop extattr, error = %d",
354 mp->mnt_stat.f_mntonname, error);
355 mp->mnt_flag |= MNT_RDONLY;
356 }
357 }
358 out:
359 return (error);
360 }
361
362 static int
363 mount_get_vfsops(const char *fstype, enum uio_seg type_seg,
364 struct vfsops **vfsops)
365 {
366 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
367 int error;
368
369 if (type_seg == UIO_USERSPACE) {
370 /* Copy file-system type from userspace. */
371 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
372 } else {
373 error = copystr(fstype, fstypename, sizeof(fstypename), NULL);
374 KASSERT(error == 0);
375 }
376
377 if (error) {
378 /*
379 * Historically, filesystem types were identified by numbers.
380 * If we get an integer for the filesystem type instead of a
381 * string, we check to see if it matches one of the historic
382 * filesystem types.
383 */
384 u_long fsindex = (u_long)fstype;
385 if (fsindex >= nmountcompatnames ||
386 mountcompatnames[fsindex] == NULL)
387 return ENODEV;
388 strlcpy(fstypename, mountcompatnames[fsindex],
389 sizeof(fstypename));
390 }
391
392 /* Accept `ufs' as an alias for `ffs', for compatibility. */
393 if (strcmp(fstypename, "ufs") == 0)
394 fstypename[0] = 'f';
395
396 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
397 return 0;
398
399 /* If we can autoload a vfs module, try again */
400 (void)module_autoload(fstypename, MODULE_CLASS_VFS);
401
402 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
403 return 0;
404
405 return ENODEV;
406 }
407
408 static int
409 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
410 void *data, size_t *data_len)
411 {
412 struct mount *mp;
413 int error;
414
415 /* If MNT_GETARGS is specified, it should be the only flag. */
416 if (flags & ~MNT_GETARGS)
417 return EINVAL;
418
419 mp = vp->v_mount;
420
421 /* XXX: probably some notion of "can see" here if we want isolation. */
422 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
423 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
424 if (error)
425 return error;
426
427 if ((vp->v_vflag & VV_ROOT) == 0)
428 return EINVAL;
429
430 if (vfs_busy(mp, NULL))
431 return EPERM;
432
433 mutex_enter(&mp->mnt_updating);
434 mp->mnt_flag &= ~MNT_OP_FLAGS;
435 mp->mnt_flag |= MNT_GETARGS;
436 error = VFS_MOUNT(mp, path, data, data_len);
437 mp->mnt_flag &= ~MNT_OP_FLAGS;
438 mutex_exit(&mp->mnt_updating);
439
440 vfs_unbusy(mp, false, NULL);
441 return (error);
442 }
443
444 int
445 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
446 {
447 /* {
448 syscallarg(const char *) type;
449 syscallarg(const char *) path;
450 syscallarg(int) flags;
451 syscallarg(void *) data;
452 syscallarg(size_t) data_len;
453 } */
454
455 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path),
456 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
457 SCARG(uap, data_len), retval);
458 }
459
460 int
461 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg,
462 const char *path, int flags, void *data, enum uio_seg data_seg,
463 size_t data_len, register_t *retval)
464 {
465 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */
466 struct vnode *vp;
467 void *data_buf = data;
468 bool vfsopsrele = false;
469 size_t alloc_sz = 0;
470 int error;
471
472 /*
473 * Get vnode to be covered
474 */
475 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
476 if (error != 0) {
477 vp = NULL;
478 goto done;
479 }
480
481 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
482 vfsops = vp->v_mount->mnt_op;
483 } else {
484 /* 'type' is userspace */
485 error = mount_get_vfsops(type, type_seg, &vfsops);
486 if (error != 0)
487 goto done;
488 vfsopsrele = true;
489 }
490
491 /*
492 * We allow data to be NULL, even for userspace. Some fs's don't need
493 * it. The others will handle NULL.
494 */
495 if (data != NULL && data_seg == UIO_USERSPACE) {
496 if (data_len == 0) {
497 /* No length supplied, use default for filesystem */
498 data_len = vfsops->vfs_min_mount_data;
499
500 /*
501 * Hopefully a longer buffer won't make copyin() fail.
502 * For compatibility with 3.0 and earlier.
503 */
504 if (flags & MNT_UPDATE
505 && data_len < sizeof (struct mnt_export_args30))
506 data_len = sizeof (struct mnt_export_args30);
507 }
508 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) {
509 error = EINVAL;
510 goto done;
511 }
512 alloc_sz = data_len;
513 data_buf = kmem_alloc(alloc_sz, KM_SLEEP);
514
515 /* NFS needs the buffer even for mnt_getargs .... */
516 error = copyin(data, data_buf, data_len);
517 if (error != 0)
518 goto done;
519 }
520
521 if (flags & MNT_GETARGS) {
522 if (data_len == 0) {
523 error = EINVAL;
524 goto done;
525 }
526 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
527 if (error != 0)
528 goto done;
529 if (data_seg == UIO_USERSPACE)
530 error = copyout(data_buf, data, data_len);
531 *retval = data_len;
532 } else if (flags & MNT_UPDATE) {
533 error = mount_update(l, vp, path, flags, data_buf, &data_len);
534 } else {
535 /* Locking is handled internally in mount_domount(). */
536 KASSERT(vfsopsrele == true);
537 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
538 &data_len);
539 vfsopsrele = false;
540 }
541
542 done:
543 if (vfsopsrele)
544 vfs_delref(vfsops);
545 if (vp != NULL) {
546 vrele(vp);
547 }
548 if (data_buf != data)
549 kmem_free(data_buf, alloc_sz);
550 return (error);
551 }
552
553 /*
554 * Unmount a file system.
555 *
556 * Note: unmount takes a path to the vnode mounted on as argument,
557 * not special file (as before).
558 */
559 /* ARGSUSED */
560 int
561 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
562 {
563 /* {
564 syscallarg(const char *) path;
565 syscallarg(int) flags;
566 } */
567 struct vnode *vp;
568 struct mount *mp;
569 int error;
570 struct pathbuf *pb;
571 struct nameidata nd;
572
573 error = pathbuf_copyin(SCARG(uap, path), &pb);
574 if (error) {
575 return error;
576 }
577
578 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
579 if ((error = namei(&nd)) != 0) {
580 pathbuf_destroy(pb);
581 return error;
582 }
583 vp = nd.ni_vp;
584 pathbuf_destroy(pb);
585
586 mp = vp->v_mount;
587 atomic_inc_uint(&mp->mnt_refcnt);
588 VOP_UNLOCK(vp);
589
590 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
591 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
592 if (error) {
593 vrele(vp);
594 vfs_destroy(mp);
595 return (error);
596 }
597
598 /*
599 * Don't allow unmounting the root file system.
600 */
601 if (mp->mnt_flag & MNT_ROOTFS) {
602 vrele(vp);
603 vfs_destroy(mp);
604 return (EINVAL);
605 }
606
607 /*
608 * Must be the root of the filesystem
609 */
610 if ((vp->v_vflag & VV_ROOT) == 0) {
611 vrele(vp);
612 vfs_destroy(mp);
613 return (EINVAL);
614 }
615
616 vrele(vp);
617 error = dounmount(mp, SCARG(uap, flags), l);
618 vfs_destroy(mp);
619 return error;
620 }
621
622 /*
623 * Sync each mounted filesystem.
624 */
625 #ifdef DEBUG
626 int syncprt = 0;
627 struct ctldebug debug0 = { "syncprt", &syncprt };
628 #endif
629
630 void
631 do_sys_sync(struct lwp *l)
632 {
633 struct mount *mp, *nmp;
634 int asyncflag;
635
636 mutex_enter(&mountlist_lock);
637 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
638 if (vfs_busy(mp, &nmp)) {
639 continue;
640 }
641 mutex_enter(&mp->mnt_updating);
642 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
643 asyncflag = mp->mnt_flag & MNT_ASYNC;
644 mp->mnt_flag &= ~MNT_ASYNC;
645 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
646 if (asyncflag)
647 mp->mnt_flag |= MNT_ASYNC;
648 }
649 mutex_exit(&mp->mnt_updating);
650 vfs_unbusy(mp, false, &nmp);
651 }
652 mutex_exit(&mountlist_lock);
653 #ifdef DEBUG
654 if (syncprt)
655 vfs_bufstats();
656 #endif /* DEBUG */
657 }
658
659 /* ARGSUSED */
660 int
661 sys_sync(struct lwp *l, const void *v, register_t *retval)
662 {
663 do_sys_sync(l);
664 return (0);
665 }
666
667
668 /*
669 * Access or change filesystem quotas.
670 *
671 * (this is really 14 different calls bundled into one)
672 */
673
674 static int
675 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u)
676 {
677 struct quotastat info_k;
678 int error;
679
680 /* ensure any padding bytes are cleared */
681 memset(&info_k, 0, sizeof(info_k));
682
683 error = vfs_quotactl_stat(mp, &info_k);
684 if (error) {
685 return error;
686 }
687
688 return copyout(&info_k, info_u, sizeof(info_k));
689 }
690
691 static int
692 do_sys_quotactl_idtypestat(struct mount *mp, int idtype,
693 struct quotaidtypestat *info_u)
694 {
695 struct quotaidtypestat info_k;
696 int error;
697
698 /* ensure any padding bytes are cleared */
699 memset(&info_k, 0, sizeof(info_k));
700
701 error = vfs_quotactl_idtypestat(mp, idtype, &info_k);
702 if (error) {
703 return error;
704 }
705
706 return copyout(&info_k, info_u, sizeof(info_k));
707 }
708
709 static int
710 do_sys_quotactl_objtypestat(struct mount *mp, int objtype,
711 struct quotaobjtypestat *info_u)
712 {
713 struct quotaobjtypestat info_k;
714 int error;
715
716 /* ensure any padding bytes are cleared */
717 memset(&info_k, 0, sizeof(info_k));
718
719 error = vfs_quotactl_objtypestat(mp, objtype, &info_k);
720 if (error) {
721 return error;
722 }
723
724 return copyout(&info_k, info_u, sizeof(info_k));
725 }
726
727 static int
728 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u,
729 struct quotaval *val_u)
730 {
731 struct quotakey key_k;
732 struct quotaval val_k;
733 int error;
734
735 /* ensure any padding bytes are cleared */
736 memset(&val_k, 0, sizeof(val_k));
737
738 error = copyin(key_u, &key_k, sizeof(key_k));
739 if (error) {
740 return error;
741 }
742
743 error = vfs_quotactl_get(mp, &key_k, &val_k);
744 if (error) {
745 return error;
746 }
747
748 return copyout(&val_k, val_u, sizeof(val_k));
749 }
750
751 static int
752 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u,
753 const struct quotaval *val_u)
754 {
755 struct quotakey key_k;
756 struct quotaval val_k;
757 int error;
758
759 error = copyin(key_u, &key_k, sizeof(key_k));
760 if (error) {
761 return error;
762 }
763
764 error = copyin(val_u, &val_k, sizeof(val_k));
765 if (error) {
766 return error;
767 }
768
769 return vfs_quotactl_put(mp, &key_k, &val_k);
770 }
771
772 static int
773 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u)
774 {
775 struct quotakey key_k;
776 int error;
777
778 error = copyin(key_u, &key_k, sizeof(key_k));
779 if (error) {
780 return error;
781 }
782
783 return vfs_quotactl_del(mp, &key_k);
784 }
785
786 static int
787 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u)
788 {
789 struct quotakcursor cursor_k;
790 int error;
791
792 /* ensure any padding bytes are cleared */
793 memset(&cursor_k, 0, sizeof(cursor_k));
794
795 error = vfs_quotactl_cursoropen(mp, &cursor_k);
796 if (error) {
797 return error;
798 }
799
800 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
801 }
802
803 static int
804 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u)
805 {
806 struct quotakcursor cursor_k;
807 int error;
808
809 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
810 if (error) {
811 return error;
812 }
813
814 return vfs_quotactl_cursorclose(mp, &cursor_k);
815 }
816
817 static int
818 do_sys_quotactl_cursorskipidtype(struct mount *mp,
819 struct quotakcursor *cursor_u, int idtype)
820 {
821 struct quotakcursor cursor_k;
822 int error;
823
824 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
825 if (error) {
826 return error;
827 }
828
829 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype);
830 if (error) {
831 return error;
832 }
833
834 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
835 }
836
837 static int
838 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u,
839 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum,
840 unsigned *ret_u)
841 {
842 #define CGET_STACK_MAX 8
843 struct quotakcursor cursor_k;
844 struct quotakey stackkeys[CGET_STACK_MAX];
845 struct quotaval stackvals[CGET_STACK_MAX];
846 struct quotakey *keys_k;
847 struct quotaval *vals_k;
848 unsigned ret_k;
849 int error;
850
851 if (maxnum > 128) {
852 maxnum = 128;
853 }
854
855 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
856 if (error) {
857 return error;
858 }
859
860 if (maxnum <= CGET_STACK_MAX) {
861 keys_k = stackkeys;
862 vals_k = stackvals;
863 /* ensure any padding bytes are cleared */
864 memset(keys_k, 0, maxnum * sizeof(keys_k[0]));
865 memset(vals_k, 0, maxnum * sizeof(vals_k[0]));
866 } else {
867 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP);
868 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP);
869 }
870
871 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum,
872 &ret_k);
873 if (error) {
874 goto fail;
875 }
876
877 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0]));
878 if (error) {
879 goto fail;
880 }
881
882 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0]));
883 if (error) {
884 goto fail;
885 }
886
887 error = copyout(&ret_k, ret_u, sizeof(ret_k));
888 if (error) {
889 goto fail;
890 }
891
892 /* do last to maximize the chance of being able to recover a failure */
893 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k));
894
895 fail:
896 if (keys_k != stackkeys) {
897 kmem_free(keys_k, maxnum * sizeof(keys_k[0]));
898 }
899 if (vals_k != stackvals) {
900 kmem_free(vals_k, maxnum * sizeof(vals_k[0]));
901 }
902 return error;
903 }
904
905 static int
906 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u,
907 int *ret_u)
908 {
909 struct quotakcursor cursor_k;
910 int ret_k;
911 int error;
912
913 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
914 if (error) {
915 return error;
916 }
917
918 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k);
919 if (error) {
920 return error;
921 }
922
923 error = copyout(&ret_k, ret_u, sizeof(ret_k));
924 if (error) {
925 return error;
926 }
927
928 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
929 }
930
931 static int
932 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u)
933 {
934 struct quotakcursor cursor_k;
935 int error;
936
937 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
938 if (error) {
939 return error;
940 }
941
942 error = vfs_quotactl_cursorrewind(mp, &cursor_k);
943 if (error) {
944 return error;
945 }
946
947 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
948 }
949
950 static int
951 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u)
952 {
953 char *path_k;
954 int error;
955
956 /* XXX this should probably be a struct pathbuf */
957 path_k = PNBUF_GET();
958 error = copyin(path_u, path_k, PATH_MAX);
959 if (error) {
960 PNBUF_PUT(path_k);
961 return error;
962 }
963
964 error = vfs_quotactl_quotaon(mp, idtype, path_k);
965
966 PNBUF_PUT(path_k);
967 return error;
968 }
969
970 static int
971 do_sys_quotactl_quotaoff(struct mount *mp, int idtype)
972 {
973 return vfs_quotactl_quotaoff(mp, idtype);
974 }
975
976 int
977 do_sys_quotactl(const char *path_u, const struct quotactl_args *args)
978 {
979 struct mount *mp;
980 struct vnode *vp;
981 int error;
982
983 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp);
984 if (error != 0)
985 return (error);
986 mp = vp->v_mount;
987
988 switch (args->qc_op) {
989 case QUOTACTL_STAT:
990 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info);
991 break;
992 case QUOTACTL_IDTYPESTAT:
993 error = do_sys_quotactl_idtypestat(mp,
994 args->u.idtypestat.qc_idtype,
995 args->u.idtypestat.qc_info);
996 break;
997 case QUOTACTL_OBJTYPESTAT:
998 error = do_sys_quotactl_objtypestat(mp,
999 args->u.objtypestat.qc_objtype,
1000 args->u.objtypestat.qc_info);
1001 break;
1002 case QUOTACTL_GET:
1003 error = do_sys_quotactl_get(mp,
1004 args->u.get.qc_key,
1005 args->u.get.qc_val);
1006 break;
1007 case QUOTACTL_PUT:
1008 error = do_sys_quotactl_put(mp,
1009 args->u.put.qc_key,
1010 args->u.put.qc_val);
1011 break;
1012 case QUOTACTL_DEL:
1013 error = do_sys_quotactl_del(mp, args->u.del.qc_key);
1014 break;
1015 case QUOTACTL_CURSOROPEN:
1016 error = do_sys_quotactl_cursoropen(mp,
1017 args->u.cursoropen.qc_cursor);
1018 break;
1019 case QUOTACTL_CURSORCLOSE:
1020 error = do_sys_quotactl_cursorclose(mp,
1021 args->u.cursorclose.qc_cursor);
1022 break;
1023 case QUOTACTL_CURSORSKIPIDTYPE:
1024 error = do_sys_quotactl_cursorskipidtype(mp,
1025 args->u.cursorskipidtype.qc_cursor,
1026 args->u.cursorskipidtype.qc_idtype);
1027 break;
1028 case QUOTACTL_CURSORGET:
1029 error = do_sys_quotactl_cursorget(mp,
1030 args->u.cursorget.qc_cursor,
1031 args->u.cursorget.qc_keys,
1032 args->u.cursorget.qc_vals,
1033 args->u.cursorget.qc_maxnum,
1034 args->u.cursorget.qc_ret);
1035 break;
1036 case QUOTACTL_CURSORATEND:
1037 error = do_sys_quotactl_cursoratend(mp,
1038 args->u.cursoratend.qc_cursor,
1039 args->u.cursoratend.qc_ret);
1040 break;
1041 case QUOTACTL_CURSORREWIND:
1042 error = do_sys_quotactl_cursorrewind(mp,
1043 args->u.cursorrewind.qc_cursor);
1044 break;
1045 case QUOTACTL_QUOTAON:
1046 error = do_sys_quotactl_quotaon(mp,
1047 args->u.quotaon.qc_idtype,
1048 args->u.quotaon.qc_quotafile);
1049 break;
1050 case QUOTACTL_QUOTAOFF:
1051 error = do_sys_quotactl_quotaoff(mp,
1052 args->u.quotaoff.qc_idtype);
1053 break;
1054 default:
1055 error = EINVAL;
1056 break;
1057 }
1058
1059 vrele(vp);
1060 return error;
1061 }
1062
1063 /* ARGSUSED */
1064 int
1065 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap,
1066 register_t *retval)
1067 {
1068 /* {
1069 syscallarg(const char *) path;
1070 syscallarg(struct quotactl_args *) args;
1071 } */
1072 struct quotactl_args args;
1073 int error;
1074
1075 error = copyin(SCARG(uap, args), &args, sizeof(args));
1076 if (error) {
1077 return error;
1078 }
1079
1080 return do_sys_quotactl(SCARG(uap, path), &args);
1081 }
1082
1083 int
1084 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
1085 int root)
1086 {
1087 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1088 int error = 0;
1089
1090 /*
1091 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1092 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
1093 * overrides MNT_NOWAIT.
1094 */
1095 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
1096 (flags != MNT_WAIT && flags != 0)) {
1097 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
1098 goto done;
1099 }
1100
1101 /* Get the filesystem stats now */
1102 memset(sp, 0, sizeof(*sp));
1103 if ((error = VFS_STATVFS(mp, sp)) != 0) {
1104 return error;
1105 }
1106
1107 if (cwdi->cwdi_rdir == NULL)
1108 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
1109 done:
1110 if (cwdi->cwdi_rdir != NULL) {
1111 size_t len;
1112 char *bp;
1113 char c;
1114 char *path = PNBUF_GET();
1115
1116 bp = path + MAXPATHLEN;
1117 *--bp = '\0';
1118 rw_enter(&cwdi->cwdi_lock, RW_READER);
1119 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
1120 MAXPATHLEN / 2, 0, l);
1121 rw_exit(&cwdi->cwdi_lock);
1122 if (error) {
1123 PNBUF_PUT(path);
1124 return error;
1125 }
1126 len = strlen(bp);
1127 if (len != 1) {
1128 /*
1129 * for mount points that are below our root, we can see
1130 * them, so we fix up the pathname and return them. The
1131 * rest we cannot see, so we don't allow viewing the
1132 * data.
1133 */
1134 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
1135 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
1136 (void)strlcpy(sp->f_mntonname,
1137 c == '\0' ? "/" : &sp->f_mntonname[len],
1138 sizeof(sp->f_mntonname));
1139 } else {
1140 if (root)
1141 (void)strlcpy(sp->f_mntonname, "/",
1142 sizeof(sp->f_mntonname));
1143 else
1144 error = EPERM;
1145 }
1146 }
1147 PNBUF_PUT(path);
1148 }
1149 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
1150 return error;
1151 }
1152
1153 /*
1154 * Get filesystem statistics by path.
1155 */
1156 int
1157 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
1158 {
1159 struct mount *mp;
1160 int error;
1161 struct vnode *vp;
1162
1163 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
1164 if (error != 0)
1165 return error;
1166 mp = vp->v_mount;
1167 error = dostatvfs(mp, sb, l, flags, 1);
1168 vrele(vp);
1169 return error;
1170 }
1171
1172 /* ARGSUSED */
1173 int
1174 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
1175 {
1176 /* {
1177 syscallarg(const char *) path;
1178 syscallarg(struct statvfs *) buf;
1179 syscallarg(int) flags;
1180 } */
1181 struct statvfs *sb;
1182 int error;
1183
1184 sb = STATVFSBUF_GET();
1185 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
1186 if (error == 0)
1187 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1188 STATVFSBUF_PUT(sb);
1189 return error;
1190 }
1191
1192 /*
1193 * Get filesystem statistics by fd.
1194 */
1195 int
1196 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
1197 {
1198 file_t *fp;
1199 struct mount *mp;
1200 int error;
1201
1202 /* fd_getvnode() will use the descriptor for us */
1203 if ((error = fd_getvnode(fd, &fp)) != 0)
1204 return (error);
1205 mp = fp->f_vnode->v_mount;
1206 error = dostatvfs(mp, sb, curlwp, flags, 1);
1207 fd_putfile(fd);
1208 return error;
1209 }
1210
1211 /* ARGSUSED */
1212 int
1213 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
1214 {
1215 /* {
1216 syscallarg(int) fd;
1217 syscallarg(struct statvfs *) buf;
1218 syscallarg(int) flags;
1219 } */
1220 struct statvfs *sb;
1221 int error;
1222
1223 sb = STATVFSBUF_GET();
1224 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1225 if (error == 0)
1226 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1227 STATVFSBUF_PUT(sb);
1228 return error;
1229 }
1230
1231
1232 /*
1233 * Get statistics on all filesystems.
1234 */
1235 int
1236 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1237 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1238 register_t *retval)
1239 {
1240 int root = 0;
1241 struct proc *p = l->l_proc;
1242 struct mount *mp, *nmp;
1243 struct statvfs *sb;
1244 size_t count, maxcount;
1245 int error = 0;
1246
1247 sb = STATVFSBUF_GET();
1248 maxcount = bufsize / entry_sz;
1249 mutex_enter(&mountlist_lock);
1250 count = 0;
1251 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1252 if (vfs_busy(mp, &nmp)) {
1253 continue;
1254 }
1255 if (sfsp && count < maxcount) {
1256 error = dostatvfs(mp, sb, l, flags, 0);
1257 if (error) {
1258 vfs_unbusy(mp, false, &nmp);
1259 error = 0;
1260 continue;
1261 }
1262 error = copyfn(sb, sfsp, entry_sz);
1263 if (error) {
1264 vfs_unbusy(mp, false, NULL);
1265 goto out;
1266 }
1267 sfsp = (char *)sfsp + entry_sz;
1268 root |= strcmp(sb->f_mntonname, "/") == 0;
1269 }
1270 count++;
1271 vfs_unbusy(mp, false, &nmp);
1272 }
1273 mutex_exit(&mountlist_lock);
1274
1275 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1276 /*
1277 * fake a root entry
1278 */
1279 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1280 sb, l, flags, 1);
1281 if (error != 0)
1282 goto out;
1283 if (sfsp) {
1284 error = copyfn(sb, sfsp, entry_sz);
1285 if (error != 0)
1286 goto out;
1287 }
1288 count++;
1289 }
1290 if (sfsp && count > maxcount)
1291 *retval = maxcount;
1292 else
1293 *retval = count;
1294 out:
1295 STATVFSBUF_PUT(sb);
1296 return error;
1297 }
1298
1299 int
1300 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1301 {
1302 /* {
1303 syscallarg(struct statvfs *) buf;
1304 syscallarg(size_t) bufsize;
1305 syscallarg(int) flags;
1306 } */
1307
1308 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1309 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1310 }
1311
1312 /*
1313 * Change current working directory to a given file descriptor.
1314 */
1315 /* ARGSUSED */
1316 int
1317 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1318 {
1319 /* {
1320 syscallarg(int) fd;
1321 } */
1322 struct proc *p = l->l_proc;
1323 struct cwdinfo *cwdi;
1324 struct vnode *vp, *tdp;
1325 struct mount *mp;
1326 file_t *fp;
1327 int error, fd;
1328
1329 /* fd_getvnode() will use the descriptor for us */
1330 fd = SCARG(uap, fd);
1331 if ((error = fd_getvnode(fd, &fp)) != 0)
1332 return (error);
1333 vp = fp->f_vnode;
1334
1335 vref(vp);
1336 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1337 if (vp->v_type != VDIR)
1338 error = ENOTDIR;
1339 else
1340 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1341 if (error) {
1342 vput(vp);
1343 goto out;
1344 }
1345 while ((mp = vp->v_mountedhere) != NULL) {
1346 error = vfs_busy(mp, NULL);
1347 vput(vp);
1348 if (error != 0)
1349 goto out;
1350 error = VFS_ROOT(mp, &tdp);
1351 vfs_unbusy(mp, false, NULL);
1352 if (error)
1353 goto out;
1354 vp = tdp;
1355 }
1356 VOP_UNLOCK(vp);
1357
1358 /*
1359 * Disallow changing to a directory not under the process's
1360 * current root directory (if there is one).
1361 */
1362 cwdi = p->p_cwdi;
1363 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1364 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1365 vrele(vp);
1366 error = EPERM; /* operation not permitted */
1367 } else {
1368 vrele(cwdi->cwdi_cdir);
1369 cwdi->cwdi_cdir = vp;
1370 }
1371 rw_exit(&cwdi->cwdi_lock);
1372
1373 out:
1374 fd_putfile(fd);
1375 return (error);
1376 }
1377
1378 /*
1379 * Change this process's notion of the root directory to a given file
1380 * descriptor.
1381 */
1382 int
1383 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1384 {
1385 struct proc *p = l->l_proc;
1386 struct vnode *vp;
1387 file_t *fp;
1388 int error, fd = SCARG(uap, fd);
1389
1390 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1391 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1392 return error;
1393 /* fd_getvnode() will use the descriptor for us */
1394 if ((error = fd_getvnode(fd, &fp)) != 0)
1395 return error;
1396 vp = fp->f_vnode;
1397 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1398 if (vp->v_type != VDIR)
1399 error = ENOTDIR;
1400 else
1401 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1402 VOP_UNLOCK(vp);
1403 if (error)
1404 goto out;
1405 vref(vp);
1406
1407 change_root(p->p_cwdi, vp, l);
1408
1409 out:
1410 fd_putfile(fd);
1411 return (error);
1412 }
1413
1414 /*
1415 * Change current working directory (``.'').
1416 */
1417 /* ARGSUSED */
1418 int
1419 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1420 {
1421 /* {
1422 syscallarg(const char *) path;
1423 } */
1424 struct proc *p = l->l_proc;
1425 struct cwdinfo *cwdi;
1426 int error;
1427 struct vnode *vp;
1428
1429 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1430 &vp, l)) != 0)
1431 return (error);
1432 cwdi = p->p_cwdi;
1433 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1434 vrele(cwdi->cwdi_cdir);
1435 cwdi->cwdi_cdir = vp;
1436 rw_exit(&cwdi->cwdi_lock);
1437 return (0);
1438 }
1439
1440 /*
1441 * Change notion of root (``/'') directory.
1442 */
1443 /* ARGSUSED */
1444 int
1445 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1446 {
1447 /* {
1448 syscallarg(const char *) path;
1449 } */
1450 struct proc *p = l->l_proc;
1451 int error;
1452 struct vnode *vp;
1453
1454 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1455 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1456 return (error);
1457 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1458 &vp, l)) != 0)
1459 return (error);
1460
1461 change_root(p->p_cwdi, vp, l);
1462
1463 return (0);
1464 }
1465
1466 /*
1467 * Common routine for chroot and fchroot.
1468 * NB: callers need to properly authorize the change root operation.
1469 */
1470 void
1471 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l)
1472 {
1473 struct proc *p = l->l_proc;
1474 kauth_cred_t ncred;
1475
1476 ncred = kauth_cred_alloc();
1477
1478 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1479 if (cwdi->cwdi_rdir != NULL)
1480 vrele(cwdi->cwdi_rdir);
1481 cwdi->cwdi_rdir = vp;
1482
1483 /*
1484 * Prevent escaping from chroot by putting the root under
1485 * the working directory. Silently chdir to / if we aren't
1486 * already there.
1487 */
1488 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1489 /*
1490 * XXX would be more failsafe to change directory to a
1491 * deadfs node here instead
1492 */
1493 vrele(cwdi->cwdi_cdir);
1494 vref(vp);
1495 cwdi->cwdi_cdir = vp;
1496 }
1497 rw_exit(&cwdi->cwdi_lock);
1498
1499 /* Get a write lock on the process credential. */
1500 proc_crmod_enter();
1501
1502 kauth_cred_clone(p->p_cred, ncred);
1503 kauth_proc_chroot(ncred, p->p_cwdi);
1504
1505 /* Broadcast our credentials to the process and other LWPs. */
1506 proc_crmod_leave(ncred, p->p_cred, true);
1507 }
1508
1509 /*
1510 * Common routine for chroot and chdir.
1511 * XXX "where" should be enum uio_seg
1512 */
1513 int
1514 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l)
1515 {
1516 struct pathbuf *pb;
1517 struct nameidata nd;
1518 int error;
1519
1520 error = pathbuf_maybe_copyin(path, where, &pb);
1521 if (error) {
1522 return error;
1523 }
1524 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1525 if ((error = namei(&nd)) != 0) {
1526 pathbuf_destroy(pb);
1527 return error;
1528 }
1529 *vpp = nd.ni_vp;
1530 pathbuf_destroy(pb);
1531
1532 if ((*vpp)->v_type != VDIR)
1533 error = ENOTDIR;
1534 else
1535 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred);
1536
1537 if (error)
1538 vput(*vpp);
1539 else
1540 VOP_UNLOCK(*vpp);
1541 return (error);
1542 }
1543
1544 /*
1545 * Internals of sys_open - path has already been converted into a pathbuf
1546 * (so we can easily reuse this function from other parts of the kernel,
1547 * like posix_spawn post-processing).
1548 */
1549 int
1550 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags,
1551 int open_mode, int *fd)
1552 {
1553 struct proc *p = l->l_proc;
1554 struct cwdinfo *cwdi = p->p_cwdi;
1555 file_t *fp;
1556 struct vnode *vp;
1557 int flags, cmode;
1558 int indx, error;
1559 struct nameidata nd;
1560
1561 if (open_flags & O_SEARCH) {
1562 open_flags &= ~(int)O_SEARCH;
1563 }
1564
1565 flags = FFLAGS(open_flags);
1566 if ((flags & (FREAD | FWRITE)) == 0)
1567 return EINVAL;
1568
1569 if ((error = fd_allocfile(&fp, &indx)) != 0) {
1570 return error;
1571 }
1572
1573 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1574 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1575 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb);
1576 if (dvp != NULL)
1577 NDAT(&nd, dvp);
1578
1579 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1580 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1581 fd_abort(p, fp, indx);
1582 if ((error == EDUPFD || error == EMOVEFD) &&
1583 l->l_dupfd >= 0 && /* XXX from fdopen */
1584 (error =
1585 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1586 *fd = indx;
1587 return 0;
1588 }
1589 if (error == ERESTART)
1590 error = EINTR;
1591 return error;
1592 }
1593
1594 l->l_dupfd = 0;
1595 vp = nd.ni_vp;
1596
1597 if ((error = open_setfp(l, fp, vp, indx, flags)))
1598 return error;
1599
1600 VOP_UNLOCK(vp);
1601 *fd = indx;
1602 fd_affix(p, fp, indx);
1603 return 0;
1604 }
1605
1606 int
1607 fd_open(const char *path, int open_flags, int open_mode, int *fd)
1608 {
1609 struct pathbuf *pb;
1610 int error, oflags;
1611
1612 oflags = FFLAGS(open_flags);
1613 if ((oflags & (FREAD | FWRITE)) == 0)
1614 return EINVAL;
1615
1616 pb = pathbuf_create(path);
1617 if (pb == NULL)
1618 return ENOMEM;
1619
1620 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd);
1621 pathbuf_destroy(pb);
1622
1623 return error;
1624 }
1625
1626 /*
1627 * Check permissions, allocate an open file structure,
1628 * and call the device open routine if any.
1629 */
1630 static int
1631 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags,
1632 int mode, int *fd)
1633 {
1634 file_t *dfp = NULL;
1635 struct vnode *dvp = NULL;
1636 struct pathbuf *pb;
1637 int error;
1638
1639 #ifdef COMPAT_10 /* XXX: and perhaps later */
1640 if (path == NULL) {
1641 pb = pathbuf_create(".");
1642 if (pb == NULL)
1643 return ENOMEM;
1644 } else
1645 #endif
1646 {
1647 error = pathbuf_copyin(path, &pb);
1648 if (error)
1649 return error;
1650 }
1651
1652 if (fdat != AT_FDCWD) {
1653 /* fd_getvnode() will use the descriptor for us */
1654 if ((error = fd_getvnode(fdat, &dfp)) != 0)
1655 goto out;
1656
1657 dvp = dfp->f_vnode;
1658 }
1659
1660 error = do_open(l, dvp, pb, flags, mode, fd);
1661
1662 if (dfp != NULL)
1663 fd_putfile(fdat);
1664 out:
1665 pathbuf_destroy(pb);
1666 return error;
1667 }
1668
1669 int
1670 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1671 {
1672 /* {
1673 syscallarg(const char *) path;
1674 syscallarg(int) flags;
1675 syscallarg(int) mode;
1676 } */
1677 int error;
1678 int fd;
1679
1680 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path),
1681 SCARG(uap, flags), SCARG(uap, mode), &fd);
1682
1683 if (error == 0)
1684 *retval = fd;
1685
1686 return error;
1687 }
1688
1689 int
1690 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval)
1691 {
1692 /* {
1693 syscallarg(int) fd;
1694 syscallarg(const char *) path;
1695 syscallarg(int) oflags;
1696 syscallarg(int) mode;
1697 } */
1698 int error;
1699 int fd;
1700
1701 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path),
1702 SCARG(uap, oflags), SCARG(uap, mode), &fd);
1703
1704 if (error == 0)
1705 *retval = fd;
1706
1707 return error;
1708 }
1709
1710 static void
1711 vfs__fhfree(fhandle_t *fhp)
1712 {
1713 size_t fhsize;
1714
1715 fhsize = FHANDLE_SIZE(fhp);
1716 kmem_free(fhp, fhsize);
1717 }
1718
1719 /*
1720 * vfs_composefh: compose a filehandle.
1721 */
1722
1723 int
1724 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1725 {
1726 struct mount *mp;
1727 struct fid *fidp;
1728 int error;
1729 size_t needfhsize;
1730 size_t fidsize;
1731
1732 mp = vp->v_mount;
1733 fidp = NULL;
1734 if (*fh_size < FHANDLE_SIZE_MIN) {
1735 fidsize = 0;
1736 } else {
1737 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1738 if (fhp != NULL) {
1739 memset(fhp, 0, *fh_size);
1740 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1741 fidp = &fhp->fh_fid;
1742 }
1743 }
1744 error = VFS_VPTOFH(vp, fidp, &fidsize);
1745 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1746 if (error == 0 && *fh_size < needfhsize) {
1747 error = E2BIG;
1748 }
1749 *fh_size = needfhsize;
1750 return error;
1751 }
1752
1753 int
1754 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1755 {
1756 struct mount *mp;
1757 fhandle_t *fhp;
1758 size_t fhsize;
1759 size_t fidsize;
1760 int error;
1761
1762 mp = vp->v_mount;
1763 fidsize = 0;
1764 error = VFS_VPTOFH(vp, NULL, &fidsize);
1765 KASSERT(error != 0);
1766 if (error != E2BIG) {
1767 goto out;
1768 }
1769 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1770 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1771 if (fhp == NULL) {
1772 error = ENOMEM;
1773 goto out;
1774 }
1775 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1776 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1777 if (error == 0) {
1778 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1779 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1780 *fhpp = fhp;
1781 } else {
1782 kmem_free(fhp, fhsize);
1783 }
1784 out:
1785 return error;
1786 }
1787
1788 void
1789 vfs_composefh_free(fhandle_t *fhp)
1790 {
1791
1792 vfs__fhfree(fhp);
1793 }
1794
1795 /*
1796 * vfs_fhtovp: lookup a vnode by a filehandle.
1797 */
1798
1799 int
1800 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1801 {
1802 struct mount *mp;
1803 int error;
1804
1805 *vpp = NULL;
1806 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1807 if (mp == NULL) {
1808 error = ESTALE;
1809 goto out;
1810 }
1811 if (mp->mnt_op->vfs_fhtovp == NULL) {
1812 error = EOPNOTSUPP;
1813 goto out;
1814 }
1815 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1816 out:
1817 return error;
1818 }
1819
1820 /*
1821 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1822 * the needed size.
1823 */
1824
1825 int
1826 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1827 {
1828 fhandle_t *fhp;
1829 int error;
1830
1831 if (fhsize > FHANDLE_SIZE_MAX) {
1832 return EINVAL;
1833 }
1834 if (fhsize < FHANDLE_SIZE_MIN) {
1835 return EINVAL;
1836 }
1837 again:
1838 fhp = kmem_alloc(fhsize, KM_SLEEP);
1839 if (fhp == NULL) {
1840 return ENOMEM;
1841 }
1842 error = copyin(ufhp, fhp, fhsize);
1843 if (error == 0) {
1844 /* XXX this check shouldn't be here */
1845 if (FHANDLE_SIZE(fhp) == fhsize) {
1846 *fhpp = fhp;
1847 return 0;
1848 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1849 /*
1850 * a kludge for nfsv2 padded handles.
1851 */
1852 size_t sz;
1853
1854 sz = FHANDLE_SIZE(fhp);
1855 kmem_free(fhp, fhsize);
1856 fhsize = sz;
1857 goto again;
1858 } else {
1859 /*
1860 * userland told us wrong size.
1861 */
1862 error = EINVAL;
1863 }
1864 }
1865 kmem_free(fhp, fhsize);
1866 return error;
1867 }
1868
1869 void
1870 vfs_copyinfh_free(fhandle_t *fhp)
1871 {
1872
1873 vfs__fhfree(fhp);
1874 }
1875
1876 /*
1877 * Get file handle system call
1878 */
1879 int
1880 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1881 {
1882 /* {
1883 syscallarg(char *) fname;
1884 syscallarg(fhandle_t *) fhp;
1885 syscallarg(size_t *) fh_size;
1886 } */
1887 struct vnode *vp;
1888 fhandle_t *fh;
1889 int error;
1890 struct pathbuf *pb;
1891 struct nameidata nd;
1892 size_t sz;
1893 size_t usz;
1894
1895 /*
1896 * Must be super user
1897 */
1898 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1899 0, NULL, NULL, NULL);
1900 if (error)
1901 return (error);
1902
1903 error = pathbuf_copyin(SCARG(uap, fname), &pb);
1904 if (error) {
1905 return error;
1906 }
1907 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1908 error = namei(&nd);
1909 if (error) {
1910 pathbuf_destroy(pb);
1911 return error;
1912 }
1913 vp = nd.ni_vp;
1914 pathbuf_destroy(pb);
1915
1916 error = vfs_composefh_alloc(vp, &fh);
1917 vput(vp);
1918 if (error != 0) {
1919 return error;
1920 }
1921 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1922 if (error != 0) {
1923 goto out;
1924 }
1925 sz = FHANDLE_SIZE(fh);
1926 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1927 if (error != 0) {
1928 goto out;
1929 }
1930 if (usz >= sz) {
1931 error = copyout(fh, SCARG(uap, fhp), sz);
1932 } else {
1933 error = E2BIG;
1934 }
1935 out:
1936 vfs_composefh_free(fh);
1937 return (error);
1938 }
1939
1940 /*
1941 * Open a file given a file handle.
1942 *
1943 * Check permissions, allocate an open file structure,
1944 * and call the device open routine if any.
1945 */
1946
1947 int
1948 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1949 register_t *retval)
1950 {
1951 file_t *fp;
1952 struct vnode *vp = NULL;
1953 kauth_cred_t cred = l->l_cred;
1954 file_t *nfp;
1955 int indx, error;
1956 struct vattr va;
1957 fhandle_t *fh;
1958 int flags;
1959 proc_t *p;
1960
1961 p = curproc;
1962
1963 /*
1964 * Must be super user
1965 */
1966 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1967 0, NULL, NULL, NULL)))
1968 return (error);
1969
1970 if (oflags & O_SEARCH) {
1971 oflags &= ~(int)O_SEARCH;
1972 }
1973
1974 flags = FFLAGS(oflags);
1975 if ((flags & (FREAD | FWRITE)) == 0)
1976 return (EINVAL);
1977 if ((flags & O_CREAT))
1978 return (EINVAL);
1979 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1980 return (error);
1981 fp = nfp;
1982 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1983 if (error != 0) {
1984 goto bad;
1985 }
1986 error = vfs_fhtovp(fh, &vp);
1987 vfs_copyinfh_free(fh);
1988 if (error != 0) {
1989 goto bad;
1990 }
1991
1992 /* Now do an effective vn_open */
1993
1994 if (vp->v_type == VSOCK) {
1995 error = EOPNOTSUPP;
1996 goto bad;
1997 }
1998 error = vn_openchk(vp, cred, flags);
1999 if (error != 0)
2000 goto bad;
2001 if (flags & O_TRUNC) {
2002 VOP_UNLOCK(vp); /* XXX */
2003 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
2004 vattr_null(&va);
2005 va.va_size = 0;
2006 error = VOP_SETATTR(vp, &va, cred);
2007 if (error)
2008 goto bad;
2009 }
2010 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
2011 goto bad;
2012 if (flags & FWRITE) {
2013 mutex_enter(vp->v_interlock);
2014 vp->v_writecount++;
2015 mutex_exit(vp->v_interlock);
2016 }
2017
2018 /* done with modified vn_open, now finish what sys_open does. */
2019 if ((error = open_setfp(l, fp, vp, indx, flags)))
2020 return error;
2021
2022 VOP_UNLOCK(vp);
2023 *retval = indx;
2024 fd_affix(p, fp, indx);
2025 return (0);
2026
2027 bad:
2028 fd_abort(p, fp, indx);
2029 if (vp != NULL)
2030 vput(vp);
2031 return (error);
2032 }
2033
2034 int
2035 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
2036 {
2037 /* {
2038 syscallarg(const void *) fhp;
2039 syscallarg(size_t) fh_size;
2040 syscallarg(int) flags;
2041 } */
2042
2043 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
2044 SCARG(uap, flags), retval);
2045 }
2046
2047 int
2048 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
2049 {
2050 int error;
2051 fhandle_t *fh;
2052 struct vnode *vp;
2053
2054 /*
2055 * Must be super user
2056 */
2057 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2058 0, NULL, NULL, NULL)))
2059 return (error);
2060
2061 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2062 if (error != 0)
2063 return error;
2064
2065 error = vfs_fhtovp(fh, &vp);
2066 vfs_copyinfh_free(fh);
2067 if (error != 0)
2068 return error;
2069
2070 error = vn_stat(vp, sb);
2071 vput(vp);
2072 return error;
2073 }
2074
2075
2076 /* ARGSUSED */
2077 int
2078 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval)
2079 {
2080 /* {
2081 syscallarg(const void *) fhp;
2082 syscallarg(size_t) fh_size;
2083 syscallarg(struct stat *) sb;
2084 } */
2085 struct stat sb;
2086 int error;
2087
2088 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
2089 if (error)
2090 return error;
2091 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
2092 }
2093
2094 int
2095 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
2096 int flags)
2097 {
2098 fhandle_t *fh;
2099 struct mount *mp;
2100 struct vnode *vp;
2101 int error;
2102
2103 /*
2104 * Must be super user
2105 */
2106 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2107 0, NULL, NULL, NULL)))
2108 return error;
2109
2110 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2111 if (error != 0)
2112 return error;
2113
2114 error = vfs_fhtovp(fh, &vp);
2115 vfs_copyinfh_free(fh);
2116 if (error != 0)
2117 return error;
2118
2119 mp = vp->v_mount;
2120 error = dostatvfs(mp, sb, l, flags, 1);
2121 vput(vp);
2122 return error;
2123 }
2124
2125 /* ARGSUSED */
2126 int
2127 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
2128 {
2129 /* {
2130 syscallarg(const void *) fhp;
2131 syscallarg(size_t) fh_size;
2132 syscallarg(struct statvfs *) buf;
2133 syscallarg(int) flags;
2134 } */
2135 struct statvfs *sb = STATVFSBUF_GET();
2136 int error;
2137
2138 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
2139 SCARG(uap, flags));
2140 if (error == 0)
2141 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
2142 STATVFSBUF_PUT(sb);
2143 return error;
2144 }
2145
2146 /*
2147 * Create a special file.
2148 */
2149 /* ARGSUSED */
2150 int
2151 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
2152 register_t *retval)
2153 {
2154 /* {
2155 syscallarg(const char *) path;
2156 syscallarg(mode_t) mode;
2157 syscallarg(dev_t) dev;
2158 } */
2159 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
2160 SCARG(uap, dev), retval, UIO_USERSPACE);
2161 }
2162
2163 int
2164 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap,
2165 register_t *retval)
2166 {
2167 /* {
2168 syscallarg(int) fd;
2169 syscallarg(const char *) path;
2170 syscallarg(mode_t) mode;
2171 syscallarg(int) pad;
2172 syscallarg(dev_t) dev;
2173 } */
2174
2175 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path),
2176 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE);
2177 }
2178
2179 int
2180 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
2181 register_t *retval, enum uio_seg seg)
2182 {
2183 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg);
2184 }
2185
2186 int
2187 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode,
2188 dev_t dev, register_t *retval, enum uio_seg seg)
2189 {
2190 struct proc *p = l->l_proc;
2191 struct vnode *vp;
2192 struct vattr vattr;
2193 int error, optype;
2194 struct pathbuf *pb;
2195 struct nameidata nd;
2196 const char *pathstring;
2197
2198 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
2199 0, NULL, NULL, NULL)) != 0)
2200 return (error);
2201
2202 optype = VOP_MKNOD_DESCOFFSET;
2203
2204 error = pathbuf_maybe_copyin(pathname, seg, &pb);
2205 if (error) {
2206 return error;
2207 }
2208 pathstring = pathbuf_stringcopy_get(pb);
2209 if (pathstring == NULL) {
2210 pathbuf_destroy(pb);
2211 return ENOMEM;
2212 }
2213
2214 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2215
2216 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2217 goto out;
2218 vp = nd.ni_vp;
2219
2220 if (vp != NULL)
2221 error = EEXIST;
2222 else {
2223 vattr_null(&vattr);
2224 /* We will read cwdi->cwdi_cmask unlocked. */
2225 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2226 vattr.va_rdev = dev;
2227
2228 switch (mode & S_IFMT) {
2229 case S_IFMT: /* used by badsect to flag bad sectors */
2230 vattr.va_type = VBAD;
2231 break;
2232 case S_IFCHR:
2233 vattr.va_type = VCHR;
2234 break;
2235 case S_IFBLK:
2236 vattr.va_type = VBLK;
2237 break;
2238 case S_IFWHT:
2239 optype = VOP_WHITEOUT_DESCOFFSET;
2240 break;
2241 case S_IFREG:
2242 #if NVERIEXEC > 0
2243 error = veriexec_openchk(l, nd.ni_vp, pathstring,
2244 O_CREAT);
2245 #endif /* NVERIEXEC > 0 */
2246 vattr.va_type = VREG;
2247 vattr.va_rdev = VNOVAL;
2248 optype = VOP_CREATE_DESCOFFSET;
2249 break;
2250 default:
2251 error = EINVAL;
2252 break;
2253 }
2254 }
2255 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET
2256 && vattr.va_rdev == VNOVAL)
2257 error = EINVAL;
2258 if (!error) {
2259 switch (optype) {
2260 case VOP_WHITEOUT_DESCOFFSET:
2261 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
2262 if (error)
2263 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2264 vput(nd.ni_dvp);
2265 break;
2266
2267 case VOP_MKNOD_DESCOFFSET:
2268 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
2269 &nd.ni_cnd, &vattr);
2270 if (error == 0)
2271 vrele(nd.ni_vp);
2272 vput(nd.ni_dvp);
2273 break;
2274
2275 case VOP_CREATE_DESCOFFSET:
2276 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
2277 &nd.ni_cnd, &vattr);
2278 if (error == 0)
2279 vrele(nd.ni_vp);
2280 vput(nd.ni_dvp);
2281 break;
2282 }
2283 } else {
2284 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2285 if (nd.ni_dvp == vp)
2286 vrele(nd.ni_dvp);
2287 else
2288 vput(nd.ni_dvp);
2289 if (vp)
2290 vrele(vp);
2291 }
2292 out:
2293 pathbuf_stringcopy_put(pb, pathstring);
2294 pathbuf_destroy(pb);
2295 return (error);
2296 }
2297
2298 /*
2299 * Create a named pipe.
2300 */
2301 /* ARGSUSED */
2302 int
2303 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
2304 {
2305 /* {
2306 syscallarg(const char *) path;
2307 syscallarg(int) mode;
2308 } */
2309 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode));
2310 }
2311
2312 int
2313 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap,
2314 register_t *retval)
2315 {
2316 /* {
2317 syscallarg(int) fd;
2318 syscallarg(const char *) path;
2319 syscallarg(int) mode;
2320 } */
2321
2322 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path),
2323 SCARG(uap, mode));
2324 }
2325
2326 static int
2327 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode)
2328 {
2329 struct proc *p = l->l_proc;
2330 struct vattr vattr;
2331 int error;
2332 struct pathbuf *pb;
2333 struct nameidata nd;
2334
2335 error = pathbuf_copyin(path, &pb);
2336 if (error) {
2337 return error;
2338 }
2339 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2340
2341 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2342 pathbuf_destroy(pb);
2343 return error;
2344 }
2345 if (nd.ni_vp != NULL) {
2346 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2347 if (nd.ni_dvp == nd.ni_vp)
2348 vrele(nd.ni_dvp);
2349 else
2350 vput(nd.ni_dvp);
2351 vrele(nd.ni_vp);
2352 pathbuf_destroy(pb);
2353 return (EEXIST);
2354 }
2355 vattr_null(&vattr);
2356 vattr.va_type = VFIFO;
2357 /* We will read cwdi->cwdi_cmask unlocked. */
2358 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2359 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2360 if (error == 0)
2361 vrele(nd.ni_vp);
2362 vput(nd.ni_dvp);
2363 pathbuf_destroy(pb);
2364 return (error);
2365 }
2366
2367 /*
2368 * Make a hard file link.
2369 */
2370 /* ARGSUSED */
2371 int
2372 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink,
2373 const char *link, int follow, register_t *retval)
2374 {
2375 struct vnode *vp;
2376 struct pathbuf *linkpb;
2377 struct nameidata nd;
2378 namei_simple_flags_t ns_flags;
2379 int error;
2380
2381 if (follow & AT_SYMLINK_FOLLOW)
2382 ns_flags = NSM_FOLLOW_TRYEMULROOT;
2383 else
2384 ns_flags = NSM_NOFOLLOW_TRYEMULROOT;
2385
2386 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp);
2387 if (error != 0)
2388 return (error);
2389 error = pathbuf_copyin(link, &linkpb);
2390 if (error) {
2391 goto out1;
2392 }
2393 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2394 if ((error = fd_nameiat(l, fdlink, &nd)) != 0)
2395 goto out2;
2396 if (nd.ni_vp) {
2397 error = EEXIST;
2398 goto abortop;
2399 }
2400 /* Prevent hard links on directories. */
2401 if (vp->v_type == VDIR) {
2402 error = EPERM;
2403 goto abortop;
2404 }
2405 /* Prevent cross-mount operation. */
2406 if (nd.ni_dvp->v_mount != vp->v_mount) {
2407 error = EXDEV;
2408 goto abortop;
2409 }
2410 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2411 VOP_UNLOCK(nd.ni_dvp);
2412 vrele(nd.ni_dvp);
2413 out2:
2414 pathbuf_destroy(linkpb);
2415 out1:
2416 vrele(vp);
2417 return (error);
2418 abortop:
2419 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2420 if (nd.ni_dvp == nd.ni_vp)
2421 vrele(nd.ni_dvp);
2422 else
2423 vput(nd.ni_dvp);
2424 if (nd.ni_vp != NULL)
2425 vrele(nd.ni_vp);
2426 goto out2;
2427 }
2428
2429 int
2430 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
2431 {
2432 /* {
2433 syscallarg(const char *) path;
2434 syscallarg(const char *) link;
2435 } */
2436 const char *path = SCARG(uap, path);
2437 const char *link = SCARG(uap, link);
2438
2439 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link,
2440 AT_SYMLINK_FOLLOW, retval);
2441 }
2442
2443 int
2444 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap,
2445 register_t *retval)
2446 {
2447 /* {
2448 syscallarg(int) fd1;
2449 syscallarg(const char *) name1;
2450 syscallarg(int) fd2;
2451 syscallarg(const char *) name2;
2452 syscallarg(int) flags;
2453 } */
2454 int fd1 = SCARG(uap, fd1);
2455 const char *name1 = SCARG(uap, name1);
2456 int fd2 = SCARG(uap, fd2);
2457 const char *name2 = SCARG(uap, name2);
2458 int follow;
2459
2460 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW;
2461
2462 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval);
2463 }
2464
2465
2466 int
2467 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg)
2468 {
2469 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg);
2470 }
2471
2472 static int
2473 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat,
2474 const char *link, enum uio_seg seg)
2475 {
2476 struct proc *p = curproc;
2477 struct vattr vattr;
2478 char *path;
2479 int error;
2480 struct pathbuf *linkpb;
2481 struct nameidata nd;
2482
2483 KASSERT(l != NULL || fdat == AT_FDCWD);
2484
2485 path = PNBUF_GET();
2486 if (seg == UIO_USERSPACE) {
2487 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0)
2488 goto out1;
2489 if ((error = pathbuf_copyin(link, &linkpb)) != 0)
2490 goto out1;
2491 } else {
2492 KASSERT(strlen(patharg) < MAXPATHLEN);
2493 strcpy(path, patharg);
2494 linkpb = pathbuf_create(link);
2495 if (linkpb == NULL) {
2496 error = ENOMEM;
2497 goto out1;
2498 }
2499 }
2500 ktrkuser("symlink-target", path, strlen(path));
2501
2502 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2503 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2504 goto out2;
2505 if (nd.ni_vp) {
2506 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2507 if (nd.ni_dvp == nd.ni_vp)
2508 vrele(nd.ni_dvp);
2509 else
2510 vput(nd.ni_dvp);
2511 vrele(nd.ni_vp);
2512 error = EEXIST;
2513 goto out2;
2514 }
2515 vattr_null(&vattr);
2516 vattr.va_type = VLNK;
2517 /* We will read cwdi->cwdi_cmask unlocked. */
2518 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2519 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2520 if (error == 0)
2521 vrele(nd.ni_vp);
2522 vput(nd.ni_dvp);
2523 out2:
2524 pathbuf_destroy(linkpb);
2525 out1:
2526 PNBUF_PUT(path);
2527 return (error);
2528 }
2529
2530 /*
2531 * Make a symbolic link.
2532 */
2533 /* ARGSUSED */
2534 int
2535 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2536 {
2537 /* {
2538 syscallarg(const char *) path;
2539 syscallarg(const char *) link;
2540 } */
2541
2542 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link),
2543 UIO_USERSPACE);
2544 }
2545
2546 int
2547 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap,
2548 register_t *retval)
2549 {
2550 /* {
2551 syscallarg(const char *) path1;
2552 syscallarg(int) fd;
2553 syscallarg(const char *) path2;
2554 } */
2555
2556 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd),
2557 SCARG(uap, path2), UIO_USERSPACE);
2558 }
2559
2560 /*
2561 * Delete a whiteout from the filesystem.
2562 */
2563 /* ARGSUSED */
2564 int
2565 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2566 {
2567 /* {
2568 syscallarg(const char *) path;
2569 } */
2570 int error;
2571 struct pathbuf *pb;
2572 struct nameidata nd;
2573
2574 error = pathbuf_copyin(SCARG(uap, path), &pb);
2575 if (error) {
2576 return error;
2577 }
2578
2579 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb);
2580 error = namei(&nd);
2581 if (error) {
2582 pathbuf_destroy(pb);
2583 return (error);
2584 }
2585
2586 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2587 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2588 if (nd.ni_dvp == nd.ni_vp)
2589 vrele(nd.ni_dvp);
2590 else
2591 vput(nd.ni_dvp);
2592 if (nd.ni_vp)
2593 vrele(nd.ni_vp);
2594 pathbuf_destroy(pb);
2595 return (EEXIST);
2596 }
2597 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2598 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2599 vput(nd.ni_dvp);
2600 pathbuf_destroy(pb);
2601 return (error);
2602 }
2603
2604 /*
2605 * Delete a name from the filesystem.
2606 */
2607 /* ARGSUSED */
2608 int
2609 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2610 {
2611 /* {
2612 syscallarg(const char *) path;
2613 } */
2614
2615 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE);
2616 }
2617
2618 int
2619 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap,
2620 register_t *retval)
2621 {
2622 /* {
2623 syscallarg(int) fd;
2624 syscallarg(const char *) path;
2625 syscallarg(int) flag;
2626 } */
2627
2628 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path),
2629 SCARG(uap, flag), UIO_USERSPACE);
2630 }
2631
2632 int
2633 do_sys_unlink(const char *arg, enum uio_seg seg)
2634 {
2635 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg);
2636 }
2637
2638 static int
2639 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags,
2640 enum uio_seg seg)
2641 {
2642 struct vnode *vp;
2643 int error;
2644 struct pathbuf *pb;
2645 struct nameidata nd;
2646 const char *pathstring;
2647
2648 KASSERT(l != NULL || fdat == AT_FDCWD);
2649
2650 error = pathbuf_maybe_copyin(arg, seg, &pb);
2651 if (error) {
2652 return error;
2653 }
2654 pathstring = pathbuf_stringcopy_get(pb);
2655 if (pathstring == NULL) {
2656 pathbuf_destroy(pb);
2657 return ENOMEM;
2658 }
2659
2660 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb);
2661 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2662 goto out;
2663 vp = nd.ni_vp;
2664
2665 /*
2666 * The root of a mounted filesystem cannot be deleted.
2667 */
2668 if ((vp->v_vflag & VV_ROOT) != 0) {
2669 error = EBUSY;
2670 goto abort;
2671 }
2672
2673 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) {
2674 error = EBUSY;
2675 goto abort;
2676 }
2677
2678 /*
2679 * No rmdir "." please.
2680 */
2681 if (nd.ni_dvp == vp) {
2682 error = EINVAL;
2683 goto abort;
2684 }
2685
2686 /*
2687 * AT_REMOVEDIR is required to remove a directory
2688 */
2689 if (vp->v_type == VDIR) {
2690 if (!(flags & AT_REMOVEDIR)) {
2691 error = EPERM;
2692 goto abort;
2693 } else {
2694 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2695 goto out;
2696 }
2697 }
2698
2699 /*
2700 * Starting here we only deal with non directories.
2701 */
2702 if (flags & AT_REMOVEDIR) {
2703 error = ENOTDIR;
2704 goto abort;
2705 }
2706
2707 #if NVERIEXEC > 0
2708 /* Handle remove requests for veriexec entries. */
2709 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) {
2710 goto abort;
2711 }
2712 #endif /* NVERIEXEC > 0 */
2713
2714 #ifdef FILEASSOC
2715 (void)fileassoc_file_delete(vp);
2716 #endif /* FILEASSOC */
2717 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2718 goto out;
2719
2720 abort:
2721 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2722 if (nd.ni_dvp == vp)
2723 vrele(nd.ni_dvp);
2724 else
2725 vput(nd.ni_dvp);
2726 vput(vp);
2727
2728 out:
2729 pathbuf_stringcopy_put(pb, pathstring);
2730 pathbuf_destroy(pb);
2731 return (error);
2732 }
2733
2734 /*
2735 * Reposition read/write file offset.
2736 */
2737 int
2738 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2739 {
2740 /* {
2741 syscallarg(int) fd;
2742 syscallarg(int) pad;
2743 syscallarg(off_t) offset;
2744 syscallarg(int) whence;
2745 } */
2746 kauth_cred_t cred = l->l_cred;
2747 file_t *fp;
2748 struct vnode *vp;
2749 struct vattr vattr;
2750 off_t newoff;
2751 int error, fd;
2752
2753 fd = SCARG(uap, fd);
2754
2755 if ((fp = fd_getfile(fd)) == NULL)
2756 return (EBADF);
2757
2758 vp = fp->f_vnode;
2759 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2760 error = ESPIPE;
2761 goto out;
2762 }
2763
2764 switch (SCARG(uap, whence)) {
2765 case SEEK_CUR:
2766 newoff = fp->f_offset + SCARG(uap, offset);
2767 break;
2768 case SEEK_END:
2769 vn_lock(vp, LK_SHARED | LK_RETRY);
2770 error = VOP_GETATTR(vp, &vattr, cred);
2771 VOP_UNLOCK(vp);
2772 if (error) {
2773 goto out;
2774 }
2775 newoff = SCARG(uap, offset) + vattr.va_size;
2776 break;
2777 case SEEK_SET:
2778 newoff = SCARG(uap, offset);
2779 break;
2780 default:
2781 error = EINVAL;
2782 goto out;
2783 }
2784 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2785 *(off_t *)retval = fp->f_offset = newoff;
2786 }
2787 out:
2788 fd_putfile(fd);
2789 return (error);
2790 }
2791
2792 /*
2793 * Positional read system call.
2794 */
2795 int
2796 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2797 {
2798 /* {
2799 syscallarg(int) fd;
2800 syscallarg(void *) buf;
2801 syscallarg(size_t) nbyte;
2802 syscallarg(off_t) offset;
2803 } */
2804 file_t *fp;
2805 struct vnode *vp;
2806 off_t offset;
2807 int error, fd = SCARG(uap, fd);
2808
2809 if ((fp = fd_getfile(fd)) == NULL)
2810 return (EBADF);
2811
2812 if ((fp->f_flag & FREAD) == 0) {
2813 fd_putfile(fd);
2814 return (EBADF);
2815 }
2816
2817 vp = fp->f_vnode;
2818 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2819 error = ESPIPE;
2820 goto out;
2821 }
2822
2823 offset = SCARG(uap, offset);
2824
2825 /*
2826 * XXX This works because no file systems actually
2827 * XXX take any action on the seek operation.
2828 */
2829 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2830 goto out;
2831
2832 /* dofileread() will unuse the descriptor for us */
2833 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2834 &offset, 0, retval));
2835
2836 out:
2837 fd_putfile(fd);
2838 return (error);
2839 }
2840
2841 /*
2842 * Positional scatter read system call.
2843 */
2844 int
2845 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2846 {
2847 /* {
2848 syscallarg(int) fd;
2849 syscallarg(const struct iovec *) iovp;
2850 syscallarg(int) iovcnt;
2851 syscallarg(off_t) offset;
2852 } */
2853 off_t offset = SCARG(uap, offset);
2854
2855 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2856 SCARG(uap, iovcnt), &offset, 0, retval);
2857 }
2858
2859 /*
2860 * Positional write system call.
2861 */
2862 int
2863 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2864 {
2865 /* {
2866 syscallarg(int) fd;
2867 syscallarg(const void *) buf;
2868 syscallarg(size_t) nbyte;
2869 syscallarg(off_t) offset;
2870 } */
2871 file_t *fp;
2872 struct vnode *vp;
2873 off_t offset;
2874 int error, fd = SCARG(uap, fd);
2875
2876 if ((fp = fd_getfile(fd)) == NULL)
2877 return (EBADF);
2878
2879 if ((fp->f_flag & FWRITE) == 0) {
2880 fd_putfile(fd);
2881 return (EBADF);
2882 }
2883
2884 vp = fp->f_vnode;
2885 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2886 error = ESPIPE;
2887 goto out;
2888 }
2889
2890 offset = SCARG(uap, offset);
2891
2892 /*
2893 * XXX This works because no file systems actually
2894 * XXX take any action on the seek operation.
2895 */
2896 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2897 goto out;
2898
2899 /* dofilewrite() will unuse the descriptor for us */
2900 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2901 &offset, 0, retval));
2902
2903 out:
2904 fd_putfile(fd);
2905 return (error);
2906 }
2907
2908 /*
2909 * Positional gather write system call.
2910 */
2911 int
2912 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2913 {
2914 /* {
2915 syscallarg(int) fd;
2916 syscallarg(const struct iovec *) iovp;
2917 syscallarg(int) iovcnt;
2918 syscallarg(off_t) offset;
2919 } */
2920 off_t offset = SCARG(uap, offset);
2921
2922 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2923 SCARG(uap, iovcnt), &offset, 0, retval);
2924 }
2925
2926 /*
2927 * Check access permissions.
2928 */
2929 int
2930 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2931 {
2932 /* {
2933 syscallarg(const char *) path;
2934 syscallarg(int) flags;
2935 } */
2936
2937 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path),
2938 SCARG(uap, flags), 0);
2939 }
2940
2941 int
2942 do_sys_accessat(struct lwp *l, int fdat, const char *path,
2943 int mode, int flags)
2944 {
2945 kauth_cred_t cred;
2946 struct vnode *vp;
2947 int error, nd_flag, vmode;
2948 struct pathbuf *pb;
2949 struct nameidata nd;
2950
2951 CTASSERT(F_OK == 0);
2952 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) {
2953 /* nonsense mode */
2954 return EINVAL;
2955 }
2956
2957 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT;
2958 if (flags & AT_SYMLINK_NOFOLLOW)
2959 nd_flag &= ~FOLLOW;
2960
2961 error = pathbuf_copyin(path, &pb);
2962 if (error)
2963 return error;
2964
2965 NDINIT(&nd, LOOKUP, nd_flag, pb);
2966
2967 /* Override default credentials */
2968 cred = kauth_cred_dup(l->l_cred);
2969 if (!(flags & AT_EACCESS)) {
2970 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2971 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2972 }
2973 nd.ni_cnd.cn_cred = cred;
2974
2975 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2976 pathbuf_destroy(pb);
2977 goto out;
2978 }
2979 vp = nd.ni_vp;
2980 pathbuf_destroy(pb);
2981
2982 /* Flags == 0 means only check for existence. */
2983 if (mode) {
2984 vmode = 0;
2985 if (mode & R_OK)
2986 vmode |= VREAD;
2987 if (mode & W_OK)
2988 vmode |= VWRITE;
2989 if (mode & X_OK)
2990 vmode |= VEXEC;
2991
2992 error = VOP_ACCESS(vp, vmode, cred);
2993 if (!error && (vmode & VWRITE))
2994 error = vn_writechk(vp);
2995 }
2996 vput(vp);
2997 out:
2998 kauth_cred_free(cred);
2999 return (error);
3000 }
3001
3002 int
3003 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap,
3004 register_t *retval)
3005 {
3006 /* {
3007 syscallarg(int) fd;
3008 syscallarg(const char *) path;
3009 syscallarg(int) amode;
3010 syscallarg(int) flag;
3011 } */
3012
3013 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path),
3014 SCARG(uap, amode), SCARG(uap, flag));
3015 }
3016
3017 /*
3018 * Common code for all sys_stat functions, including compat versions.
3019 */
3020 int
3021 do_sys_stat(const char *userpath, unsigned int nd_flag,
3022 struct stat *sb)
3023 {
3024 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb);
3025 }
3026
3027 int
3028 do_sys_statat(struct lwp *l, int fdat, const char *userpath,
3029 unsigned int nd_flag, struct stat *sb)
3030 {
3031 int error;
3032 struct pathbuf *pb;
3033 struct nameidata nd;
3034
3035 KASSERT(l != NULL || fdat == AT_FDCWD);
3036
3037 error = pathbuf_copyin(userpath, &pb);
3038 if (error) {
3039 return error;
3040 }
3041
3042 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb);
3043
3044 error = fd_nameiat(l, fdat, &nd);
3045 if (error != 0) {
3046 pathbuf_destroy(pb);
3047 return error;
3048 }
3049 error = vn_stat(nd.ni_vp, sb);
3050 vput(nd.ni_vp);
3051 pathbuf_destroy(pb);
3052 return error;
3053 }
3054
3055 /*
3056 * Get file status; this version follows links.
3057 */
3058 /* ARGSUSED */
3059 int
3060 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval)
3061 {
3062 /* {
3063 syscallarg(const char *) path;
3064 syscallarg(struct stat *) ub;
3065 } */
3066 struct stat sb;
3067 int error;
3068
3069 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb);
3070 if (error)
3071 return error;
3072 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3073 }
3074
3075 /*
3076 * Get file status; this version does not follow links.
3077 */
3078 /* ARGSUSED */
3079 int
3080 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval)
3081 {
3082 /* {
3083 syscallarg(const char *) path;
3084 syscallarg(struct stat *) ub;
3085 } */
3086 struct stat sb;
3087 int error;
3088
3089 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb);
3090 if (error)
3091 return error;
3092 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3093 }
3094
3095 int
3096 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap,
3097 register_t *retval)
3098 {
3099 /* {
3100 syscallarg(int) fd;
3101 syscallarg(const char *) path;
3102 syscallarg(struct stat *) buf;
3103 syscallarg(int) flag;
3104 } */
3105 unsigned int nd_flag;
3106 struct stat sb;
3107 int error;
3108
3109 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW)
3110 nd_flag = NOFOLLOW;
3111 else
3112 nd_flag = FOLLOW;
3113
3114 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag,
3115 &sb);
3116 if (error)
3117 return error;
3118 return copyout(&sb, SCARG(uap, buf), sizeof(sb));
3119 }
3120
3121 /*
3122 * Get configurable pathname variables.
3123 */
3124 /* ARGSUSED */
3125 int
3126 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
3127 {
3128 /* {
3129 syscallarg(const char *) path;
3130 syscallarg(int) name;
3131 } */
3132 int error;
3133 struct pathbuf *pb;
3134 struct nameidata nd;
3135
3136 error = pathbuf_copyin(SCARG(uap, path), &pb);
3137 if (error) {
3138 return error;
3139 }
3140 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3141 if ((error = namei(&nd)) != 0) {
3142 pathbuf_destroy(pb);
3143 return (error);
3144 }
3145 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
3146 vput(nd.ni_vp);
3147 pathbuf_destroy(pb);
3148 return (error);
3149 }
3150
3151 /*
3152 * Return target name of a symbolic link.
3153 */
3154 /* ARGSUSED */
3155 int
3156 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap,
3157 register_t *retval)
3158 {
3159 /* {
3160 syscallarg(const char *) path;
3161 syscallarg(char *) buf;
3162 syscallarg(size_t) count;
3163 } */
3164 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path),
3165 SCARG(uap, buf), SCARG(uap, count), retval);
3166 }
3167
3168 static int
3169 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf,
3170 size_t count, register_t *retval)
3171 {
3172 struct vnode *vp;
3173 struct iovec aiov;
3174 struct uio auio;
3175 int error;
3176 struct pathbuf *pb;
3177 struct nameidata nd;
3178
3179 error = pathbuf_copyin(path, &pb);
3180 if (error) {
3181 return error;
3182 }
3183 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3184 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
3185 pathbuf_destroy(pb);
3186 return error;
3187 }
3188 vp = nd.ni_vp;
3189 pathbuf_destroy(pb);
3190 if (vp->v_type != VLNK)
3191 error = EINVAL;
3192 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
3193 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
3194 aiov.iov_base = buf;
3195 aiov.iov_len = count;
3196 auio.uio_iov = &aiov;
3197 auio.uio_iovcnt = 1;
3198 auio.uio_offset = 0;
3199 auio.uio_rw = UIO_READ;
3200 KASSERT(l == curlwp);
3201 auio.uio_vmspace = l->l_proc->p_vmspace;
3202 auio.uio_resid = count;
3203 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0)
3204 *retval = count - auio.uio_resid;
3205 }
3206 vput(vp);
3207 return (error);
3208 }
3209
3210 int
3211 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap,
3212 register_t *retval)
3213 {
3214 /* {
3215 syscallarg(int) fd;
3216 syscallarg(const char *) path;
3217 syscallarg(char *) buf;
3218 syscallarg(size_t) bufsize;
3219 } */
3220
3221 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path),
3222 SCARG(uap, buf), SCARG(uap, bufsize), retval);
3223 }
3224
3225 /*
3226 * Change flags of a file given a path name.
3227 */
3228 /* ARGSUSED */
3229 int
3230 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
3231 {
3232 /* {
3233 syscallarg(const char *) path;
3234 syscallarg(u_long) flags;
3235 } */
3236 struct vnode *vp;
3237 int error;
3238
3239 error = namei_simple_user(SCARG(uap, path),
3240 NSM_FOLLOW_TRYEMULROOT, &vp);
3241 if (error != 0)
3242 return (error);
3243 error = change_flags(vp, SCARG(uap, flags), l);
3244 vput(vp);
3245 return (error);
3246 }
3247
3248 /*
3249 * Change flags of a file given a file descriptor.
3250 */
3251 /* ARGSUSED */
3252 int
3253 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
3254 {
3255 /* {
3256 syscallarg(int) fd;
3257 syscallarg(u_long) flags;
3258 } */
3259 struct vnode *vp;
3260 file_t *fp;
3261 int error;
3262
3263 /* fd_getvnode() will use the descriptor for us */
3264 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3265 return (error);
3266 vp = fp->f_vnode;
3267 error = change_flags(vp, SCARG(uap, flags), l);
3268 VOP_UNLOCK(vp);
3269 fd_putfile(SCARG(uap, fd));
3270 return (error);
3271 }
3272
3273 /*
3274 * Change flags of a file given a path name; this version does
3275 * not follow links.
3276 */
3277 int
3278 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
3279 {
3280 /* {
3281 syscallarg(const char *) path;
3282 syscallarg(u_long) flags;
3283 } */
3284 struct vnode *vp;
3285 int error;
3286
3287 error = namei_simple_user(SCARG(uap, path),
3288 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3289 if (error != 0)
3290 return (error);
3291 error = change_flags(vp, SCARG(uap, flags), l);
3292 vput(vp);
3293 return (error);
3294 }
3295
3296 /*
3297 * Common routine to change flags of a file.
3298 */
3299 int
3300 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
3301 {
3302 struct vattr vattr;
3303 int error;
3304
3305 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3306
3307 vattr_null(&vattr);
3308 vattr.va_flags = flags;
3309 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3310
3311 return (error);
3312 }
3313
3314 /*
3315 * Change mode of a file given path name; this version follows links.
3316 */
3317 /* ARGSUSED */
3318 int
3319 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
3320 {
3321 /* {
3322 syscallarg(const char *) path;
3323 syscallarg(int) mode;
3324 } */
3325 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path),
3326 SCARG(uap, mode), 0);
3327 }
3328
3329 int
3330 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags)
3331 {
3332 int error;
3333 struct vnode *vp;
3334 namei_simple_flags_t ns_flag;
3335
3336 if (flags & AT_SYMLINK_NOFOLLOW)
3337 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3338 else
3339 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3340
3341 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3342 if (error != 0)
3343 return error;
3344
3345 error = change_mode(vp, mode, l);
3346
3347 vrele(vp);
3348
3349 return (error);
3350 }
3351
3352 /*
3353 * Change mode of a file given a file descriptor.
3354 */
3355 /* ARGSUSED */
3356 int
3357 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
3358 {
3359 /* {
3360 syscallarg(int) fd;
3361 syscallarg(int) mode;
3362 } */
3363 file_t *fp;
3364 int error;
3365
3366 /* fd_getvnode() will use the descriptor for us */
3367 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3368 return (error);
3369 error = change_mode(fp->f_vnode, SCARG(uap, mode), l);
3370 fd_putfile(SCARG(uap, fd));
3371 return (error);
3372 }
3373
3374 int
3375 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap,
3376 register_t *retval)
3377 {
3378 /* {
3379 syscallarg(int) fd;
3380 syscallarg(const char *) path;
3381 syscallarg(int) mode;
3382 syscallarg(int) flag;
3383 } */
3384
3385 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path),
3386 SCARG(uap, mode), SCARG(uap, flag));
3387 }
3388
3389 /*
3390 * Change mode of a file given path name; this version does not follow links.
3391 */
3392 /* ARGSUSED */
3393 int
3394 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
3395 {
3396 /* {
3397 syscallarg(const char *) path;
3398 syscallarg(int) mode;
3399 } */
3400 int error;
3401 struct vnode *vp;
3402
3403 error = namei_simple_user(SCARG(uap, path),
3404 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3405 if (error != 0)
3406 return (error);
3407
3408 error = change_mode(vp, SCARG(uap, mode), l);
3409
3410 vrele(vp);
3411 return (error);
3412 }
3413
3414 /*
3415 * Common routine to set mode given a vnode.
3416 */
3417 static int
3418 change_mode(struct vnode *vp, int mode, struct lwp *l)
3419 {
3420 struct vattr vattr;
3421 int error;
3422
3423 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3424 vattr_null(&vattr);
3425 vattr.va_mode = mode & ALLPERMS;
3426 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3427 VOP_UNLOCK(vp);
3428 return (error);
3429 }
3430
3431 /*
3432 * Set ownership given a path name; this version follows links.
3433 */
3434 /* ARGSUSED */
3435 int
3436 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
3437 {
3438 /* {
3439 syscallarg(const char *) path;
3440 syscallarg(uid_t) uid;
3441 syscallarg(gid_t) gid;
3442 } */
3443 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid),
3444 SCARG(uap, gid), 0);
3445 }
3446
3447 int
3448 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid,
3449 gid_t gid, int flags)
3450 {
3451 int error;
3452 struct vnode *vp;
3453 namei_simple_flags_t ns_flag;
3454
3455 if (flags & AT_SYMLINK_NOFOLLOW)
3456 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3457 else
3458 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3459
3460 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3461 if (error != 0)
3462 return error;
3463
3464 error = change_owner(vp, uid, gid, l, 0);
3465
3466 vrele(vp);
3467
3468 return (error);
3469 }
3470
3471 /*
3472 * Set ownership given a path name; this version follows links.
3473 * Provides POSIX semantics.
3474 */
3475 /* ARGSUSED */
3476 int
3477 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
3478 {
3479 /* {
3480 syscallarg(const char *) path;
3481 syscallarg(uid_t) uid;
3482 syscallarg(gid_t) gid;
3483 } */
3484 int error;
3485 struct vnode *vp;
3486
3487 error = namei_simple_user(SCARG(uap, path),
3488 NSM_FOLLOW_TRYEMULROOT, &vp);
3489 if (error != 0)
3490 return (error);
3491
3492 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3493
3494 vrele(vp);
3495 return (error);
3496 }
3497
3498 /*
3499 * Set ownership given a file descriptor.
3500 */
3501 /* ARGSUSED */
3502 int
3503 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
3504 {
3505 /* {
3506 syscallarg(int) fd;
3507 syscallarg(uid_t) uid;
3508 syscallarg(gid_t) gid;
3509 } */
3510 int error;
3511 file_t *fp;
3512
3513 /* fd_getvnode() will use the descriptor for us */
3514 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3515 return (error);
3516 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3517 l, 0);
3518 fd_putfile(SCARG(uap, fd));
3519 return (error);
3520 }
3521
3522 int
3523 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap,
3524 register_t *retval)
3525 {
3526 /* {
3527 syscallarg(int) fd;
3528 syscallarg(const char *) path;
3529 syscallarg(uid_t) owner;
3530 syscallarg(gid_t) group;
3531 syscallarg(int) flag;
3532 } */
3533
3534 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path),
3535 SCARG(uap, owner), SCARG(uap, group),
3536 SCARG(uap, flag));
3537 }
3538
3539 /*
3540 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
3541 */
3542 /* ARGSUSED */
3543 int
3544 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
3545 {
3546 /* {
3547 syscallarg(int) fd;
3548 syscallarg(uid_t) uid;
3549 syscallarg(gid_t) gid;
3550 } */
3551 int error;
3552 file_t *fp;
3553
3554 /* fd_getvnode() will use the descriptor for us */
3555 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3556 return (error);
3557 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3558 l, 1);
3559 fd_putfile(SCARG(uap, fd));
3560 return (error);
3561 }
3562
3563 /*
3564 * Set ownership given a path name; this version does not follow links.
3565 */
3566 /* ARGSUSED */
3567 int
3568 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
3569 {
3570 /* {
3571 syscallarg(const char *) path;
3572 syscallarg(uid_t) uid;
3573 syscallarg(gid_t) gid;
3574 } */
3575 int error;
3576 struct vnode *vp;
3577
3578 error = namei_simple_user(SCARG(uap, path),
3579 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3580 if (error != 0)
3581 return (error);
3582
3583 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
3584
3585 vrele(vp);
3586 return (error);
3587 }
3588
3589 /*
3590 * Set ownership given a path name; this version does not follow links.
3591 * Provides POSIX/XPG semantics.
3592 */
3593 /* ARGSUSED */
3594 int
3595 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
3596 {
3597 /* {
3598 syscallarg(const char *) path;
3599 syscallarg(uid_t) uid;
3600 syscallarg(gid_t) gid;
3601 } */
3602 int error;
3603 struct vnode *vp;
3604
3605 error = namei_simple_user(SCARG(uap, path),
3606 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3607 if (error != 0)
3608 return (error);
3609
3610 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3611
3612 vrele(vp);
3613 return (error);
3614 }
3615
3616 /*
3617 * Common routine to set ownership given a vnode.
3618 */
3619 static int
3620 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
3621 int posix_semantics)
3622 {
3623 struct vattr vattr;
3624 mode_t newmode;
3625 int error;
3626
3627 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3628 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
3629 goto out;
3630
3631 #define CHANGED(x) ((int)(x) != -1)
3632 newmode = vattr.va_mode;
3633 if (posix_semantics) {
3634 /*
3635 * POSIX/XPG semantics: if the caller is not the super-user,
3636 * clear set-user-id and set-group-id bits. Both POSIX and
3637 * the XPG consider the behaviour for calls by the super-user
3638 * implementation-defined; we leave the set-user-id and set-
3639 * group-id settings intact in that case.
3640 */
3641 if (vattr.va_mode & S_ISUID) {
3642 if (kauth_authorize_vnode(l->l_cred,
3643 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0)
3644 newmode &= ~S_ISUID;
3645 }
3646 if (vattr.va_mode & S_ISGID) {
3647 if (kauth_authorize_vnode(l->l_cred,
3648 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0)
3649 newmode &= ~S_ISGID;
3650 }
3651 } else {
3652 /*
3653 * NetBSD semantics: when changing owner and/or group,
3654 * clear the respective bit(s).
3655 */
3656 if (CHANGED(uid))
3657 newmode &= ~S_ISUID;
3658 if (CHANGED(gid))
3659 newmode &= ~S_ISGID;
3660 }
3661 /* Update va_mode iff altered. */
3662 if (vattr.va_mode == newmode)
3663 newmode = VNOVAL;
3664
3665 vattr_null(&vattr);
3666 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
3667 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
3668 vattr.va_mode = newmode;
3669 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3670 #undef CHANGED
3671
3672 out:
3673 VOP_UNLOCK(vp);
3674 return (error);
3675 }
3676
3677 /*
3678 * Set the access and modification times given a path name; this
3679 * version follows links.
3680 */
3681 /* ARGSUSED */
3682 int
3683 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
3684 register_t *retval)
3685 {
3686 /* {
3687 syscallarg(const char *) path;
3688 syscallarg(const struct timeval *) tptr;
3689 } */
3690
3691 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
3692 SCARG(uap, tptr), UIO_USERSPACE);
3693 }
3694
3695 /*
3696 * Set the access and modification times given a file descriptor.
3697 */
3698 /* ARGSUSED */
3699 int
3700 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
3701 register_t *retval)
3702 {
3703 /* {
3704 syscallarg(int) fd;
3705 syscallarg(const struct timeval *) tptr;
3706 } */
3707 int error;
3708 file_t *fp;
3709
3710 /* fd_getvnode() will use the descriptor for us */
3711 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3712 return (error);
3713 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr),
3714 UIO_USERSPACE);
3715 fd_putfile(SCARG(uap, fd));
3716 return (error);
3717 }
3718
3719 int
3720 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap,
3721 register_t *retval)
3722 {
3723 /* {
3724 syscallarg(int) fd;
3725 syscallarg(const struct timespec *) tptr;
3726 } */
3727 int error;
3728 file_t *fp;
3729
3730 /* fd_getvnode() will use the descriptor for us */
3731 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3732 return (error);
3733 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0,
3734 SCARG(uap, tptr), UIO_USERSPACE);
3735 fd_putfile(SCARG(uap, fd));
3736 return (error);
3737 }
3738
3739 /*
3740 * Set the access and modification times given a path name; this
3741 * version does not follow links.
3742 */
3743 int
3744 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
3745 register_t *retval)
3746 {
3747 /* {
3748 syscallarg(const char *) path;
3749 syscallarg(const struct timeval *) tptr;
3750 } */
3751
3752 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
3753 SCARG(uap, tptr), UIO_USERSPACE);
3754 }
3755
3756 int
3757 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap,
3758 register_t *retval)
3759 {
3760 /* {
3761 syscallarg(int) fd;
3762 syscallarg(const char *) path;
3763 syscallarg(const struct timespec *) tptr;
3764 syscallarg(int) flag;
3765 } */
3766 int follow;
3767 const struct timespec *tptr;
3768 int error;
3769
3770 tptr = SCARG(uap, tptr);
3771 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3772
3773 error = do_sys_utimensat(l, SCARG(uap, fd), NULL,
3774 SCARG(uap, path), follow, tptr, UIO_USERSPACE);
3775
3776 return error;
3777 }
3778
3779 /*
3780 * Common routine to set access and modification times given a vnode.
3781 */
3782 int
3783 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag,
3784 const struct timespec *tptr, enum uio_seg seg)
3785 {
3786 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg);
3787 }
3788
3789 int
3790 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp,
3791 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg)
3792 {
3793 struct vattr vattr;
3794 int error, dorele = 0;
3795 namei_simple_flags_t sflags;
3796 bool vanull, setbirthtime;
3797 struct timespec ts[2];
3798
3799 KASSERT(l != NULL || fdat == AT_FDCWD);
3800
3801 /*
3802 * I have checked all callers and they pass either FOLLOW,
3803 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW
3804 * is 0. More to the point, they don't pass anything else.
3805 * Let's keep it that way at least until the namei interfaces
3806 * are fully sanitized.
3807 */
3808 KASSERT(flag == NOFOLLOW || flag == FOLLOW);
3809 sflags = (flag == FOLLOW) ?
3810 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT;
3811
3812 if (tptr == NULL) {
3813 vanull = true;
3814 nanotime(&ts[0]);
3815 ts[1] = ts[0];
3816 } else {
3817 vanull = false;
3818 if (seg != UIO_SYSSPACE) {
3819 error = copyin(tptr, ts, sizeof (ts));
3820 if (error != 0)
3821 return error;
3822 } else {
3823 ts[0] = tptr[0];
3824 ts[1] = tptr[1];
3825 }
3826 }
3827
3828 if (ts[0].tv_nsec == UTIME_NOW) {
3829 nanotime(&ts[0]);
3830 if (ts[1].tv_nsec == UTIME_NOW) {
3831 vanull = true;
3832 ts[1] = ts[0];
3833 }
3834 } else if (ts[1].tv_nsec == UTIME_NOW)
3835 nanotime(&ts[1]);
3836
3837 if (vp == NULL) {
3838 /* note: SEG describes TPTR, not PATH; PATH is always user */
3839 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp);
3840 if (error != 0)
3841 return error;
3842 dorele = 1;
3843 }
3844
3845 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3846 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3847 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3848 vattr_null(&vattr);
3849
3850 if (ts[0].tv_nsec != UTIME_OMIT)
3851 vattr.va_atime = ts[0];
3852
3853 if (ts[1].tv_nsec != UTIME_OMIT) {
3854 vattr.va_mtime = ts[1];
3855 if (setbirthtime)
3856 vattr.va_birthtime = ts[1];
3857 }
3858
3859 if (vanull)
3860 vattr.va_vaflags |= VA_UTIMES_NULL;
3861 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3862 VOP_UNLOCK(vp);
3863
3864 if (dorele != 0)
3865 vrele(vp);
3866
3867 return error;
3868 }
3869
3870 int
3871 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
3872 const struct timeval *tptr, enum uio_seg seg)
3873 {
3874 struct timespec ts[2];
3875 struct timespec *tsptr = NULL;
3876 int error;
3877
3878 if (tptr != NULL) {
3879 struct timeval tv[2];
3880
3881 if (seg != UIO_SYSSPACE) {
3882 error = copyin(tptr, tv, sizeof (tv));
3883 if (error != 0)
3884 return error;
3885 tptr = tv;
3886 }
3887
3888 if ((tv[0].tv_usec == UTIME_NOW) ||
3889 (tv[0].tv_usec == UTIME_OMIT))
3890 ts[0].tv_nsec = tv[0].tv_usec;
3891 else
3892 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3893
3894 if ((tv[1].tv_usec == UTIME_NOW) ||
3895 (tv[1].tv_usec == UTIME_OMIT))
3896 ts[1].tv_nsec = tv[1].tv_usec;
3897 else
3898 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3899
3900 tsptr = &ts[0];
3901 }
3902
3903 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE);
3904 }
3905
3906 /*
3907 * Truncate a file given its path name.
3908 */
3909 /* ARGSUSED */
3910 int
3911 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3912 {
3913 /* {
3914 syscallarg(const char *) path;
3915 syscallarg(int) pad;
3916 syscallarg(off_t) length;
3917 } */
3918 struct vnode *vp;
3919 struct vattr vattr;
3920 int error;
3921
3922 if (SCARG(uap, length) < 0)
3923 return EINVAL;
3924
3925 error = namei_simple_user(SCARG(uap, path),
3926 NSM_FOLLOW_TRYEMULROOT, &vp);
3927 if (error != 0)
3928 return (error);
3929 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3930 if (vp->v_type == VDIR)
3931 error = EISDIR;
3932 else if ((error = vn_writechk(vp)) == 0 &&
3933 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3934 vattr_null(&vattr);
3935 vattr.va_size = SCARG(uap, length);
3936 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3937 }
3938 vput(vp);
3939 return (error);
3940 }
3941
3942 /*
3943 * Truncate a file given a file descriptor.
3944 */
3945 /* ARGSUSED */
3946 int
3947 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3948 {
3949 /* {
3950 syscallarg(int) fd;
3951 syscallarg(int) pad;
3952 syscallarg(off_t) length;
3953 } */
3954 struct vattr vattr;
3955 struct vnode *vp;
3956 file_t *fp;
3957 int error;
3958
3959 if (SCARG(uap, length) < 0)
3960 return EINVAL;
3961
3962 /* fd_getvnode() will use the descriptor for us */
3963 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3964 return (error);
3965 if ((fp->f_flag & FWRITE) == 0) {
3966 error = EINVAL;
3967 goto out;
3968 }
3969 vp = fp->f_vnode;
3970 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3971 if (vp->v_type == VDIR)
3972 error = EISDIR;
3973 else if ((error = vn_writechk(vp)) == 0) {
3974 vattr_null(&vattr);
3975 vattr.va_size = SCARG(uap, length);
3976 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3977 }
3978 VOP_UNLOCK(vp);
3979 out:
3980 fd_putfile(SCARG(uap, fd));
3981 return (error);
3982 }
3983
3984 /*
3985 * Sync an open file.
3986 */
3987 /* ARGSUSED */
3988 int
3989 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3990 {
3991 /* {
3992 syscallarg(int) fd;
3993 } */
3994 struct vnode *vp;
3995 file_t *fp;
3996 int error;
3997
3998 /* fd_getvnode() will use the descriptor for us */
3999 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4000 return (error);
4001 vp = fp->f_vnode;
4002 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4003 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
4004 VOP_UNLOCK(vp);
4005 fd_putfile(SCARG(uap, fd));
4006 return (error);
4007 }
4008
4009 /*
4010 * Sync a range of file data. API modeled after that found in AIX.
4011 *
4012 * FDATASYNC indicates that we need only save enough metadata to be able
4013 * to re-read the written data. Note we duplicate AIX's requirement that
4014 * the file be open for writing.
4015 */
4016 /* ARGSUSED */
4017 int
4018 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
4019 {
4020 /* {
4021 syscallarg(int) fd;
4022 syscallarg(int) flags;
4023 syscallarg(off_t) start;
4024 syscallarg(off_t) length;
4025 } */
4026 struct vnode *vp;
4027 file_t *fp;
4028 int flags, nflags;
4029 off_t s, e, len;
4030 int error;
4031
4032 /* fd_getvnode() will use the descriptor for us */
4033 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4034 return (error);
4035
4036 if ((fp->f_flag & FWRITE) == 0) {
4037 error = EBADF;
4038 goto out;
4039 }
4040
4041 flags = SCARG(uap, flags);
4042 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
4043 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
4044 error = EINVAL;
4045 goto out;
4046 }
4047 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
4048 if (flags & FDATASYNC)
4049 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
4050 else
4051 nflags = FSYNC_WAIT;
4052 if (flags & FDISKSYNC)
4053 nflags |= FSYNC_CACHE;
4054
4055 len = SCARG(uap, length);
4056 /* If length == 0, we do the whole file, and s = e = 0 will do that */
4057 if (len) {
4058 s = SCARG(uap, start);
4059 e = s + len;
4060 if (e < s) {
4061 error = EINVAL;
4062 goto out;
4063 }
4064 } else {
4065 e = 0;
4066 s = 0;
4067 }
4068
4069 vp = fp->f_vnode;
4070 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4071 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
4072 VOP_UNLOCK(vp);
4073 out:
4074 fd_putfile(SCARG(uap, fd));
4075 return (error);
4076 }
4077
4078 /*
4079 * Sync the data of an open file.
4080 */
4081 /* ARGSUSED */
4082 int
4083 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
4084 {
4085 /* {
4086 syscallarg(int) fd;
4087 } */
4088 struct vnode *vp;
4089 file_t *fp;
4090 int error;
4091
4092 /* fd_getvnode() will use the descriptor for us */
4093 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4094 return (error);
4095 if ((fp->f_flag & FWRITE) == 0) {
4096 fd_putfile(SCARG(uap, fd));
4097 return (EBADF);
4098 }
4099 vp = fp->f_vnode;
4100 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4101 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
4102 VOP_UNLOCK(vp);
4103 fd_putfile(SCARG(uap, fd));
4104 return (error);
4105 }
4106
4107 /*
4108 * Rename files, (standard) BSD semantics frontend.
4109 */
4110 /* ARGSUSED */
4111 int
4112 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
4113 {
4114 /* {
4115 syscallarg(const char *) from;
4116 syscallarg(const char *) to;
4117 } */
4118
4119 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4120 SCARG(uap, to), UIO_USERSPACE, 0));
4121 }
4122
4123 int
4124 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap,
4125 register_t *retval)
4126 {
4127 /* {
4128 syscallarg(int) fromfd;
4129 syscallarg(const char *) from;
4130 syscallarg(int) tofd;
4131 syscallarg(const char *) to;
4132 } */
4133
4134 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from),
4135 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0));
4136 }
4137
4138 /*
4139 * Rename files, POSIX semantics frontend.
4140 */
4141 /* ARGSUSED */
4142 int
4143 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
4144 {
4145 /* {
4146 syscallarg(const char *) from;
4147 syscallarg(const char *) to;
4148 } */
4149
4150 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4151 SCARG(uap, to), UIO_USERSPACE, 1));
4152 }
4153
4154 /*
4155 * Rename files. Source and destination must either both be directories,
4156 * or both not be directories. If target is a directory, it must be empty.
4157 * If `from' and `to' refer to the same object, the value of the `retain'
4158 * argument is used to determine whether `from' will be
4159 *
4160 * (retain == 0) deleted unless `from' and `to' refer to the same
4161 * object in the file system's name space (BSD).
4162 * (retain == 1) always retained (POSIX).
4163 *
4164 * XXX Synchronize with nfsrv_rename in nfs_serv.c.
4165 */
4166 int
4167 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
4168 {
4169 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain);
4170 }
4171
4172 static int
4173 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd,
4174 const char *to, enum uio_seg seg, int retain)
4175 {
4176 struct pathbuf *fpb, *tpb;
4177 struct nameidata fnd, tnd;
4178 struct vnode *fdvp, *fvp;
4179 struct vnode *tdvp, *tvp;
4180 struct mount *mp, *tmp;
4181 int error;
4182
4183 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD));
4184
4185 error = pathbuf_maybe_copyin(from, seg, &fpb);
4186 if (error)
4187 goto out0;
4188 KASSERT(fpb != NULL);
4189
4190 error = pathbuf_maybe_copyin(to, seg, &tpb);
4191 if (error)
4192 goto out1;
4193 KASSERT(tpb != NULL);
4194
4195 /*
4196 * Lookup from.
4197 *
4198 * XXX LOCKPARENT is wrong because we don't actually want it
4199 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is
4200 * insane, so for the time being we need to leave it like this.
4201 */
4202 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb);
4203 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0)
4204 goto out2;
4205
4206 /*
4207 * Pull out the important results of the lookup, fdvp and fvp.
4208 * Of course, fvp is bogus because we're about to unlock fdvp.
4209 */
4210 fdvp = fnd.ni_dvp;
4211 fvp = fnd.ni_vp;
4212 KASSERT(fdvp != NULL);
4213 KASSERT(fvp != NULL);
4214 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE));
4215
4216 /*
4217 * Make sure neither fdvp nor fvp is locked.
4218 */
4219 if (fdvp != fvp)
4220 VOP_UNLOCK(fdvp);
4221 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4222 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4223
4224 /*
4225 * Reject renaming `.' and `..'. Can't do this until after
4226 * namei because we need namei's parsing to find the final
4227 * component name. (namei should just leave us with the final
4228 * component name and not look it up itself, but anyway...)
4229 *
4230 * This was here before because we used to relookup from
4231 * instead of to and relookup requires the caller to check
4232 * this, but now file systems may depend on this check, so we
4233 * must retain it until the file systems are all rototilled.
4234 */
4235 if (((fnd.ni_cnd.cn_namelen == 1) &&
4236 (fnd.ni_cnd.cn_nameptr[0] == '.')) ||
4237 ((fnd.ni_cnd.cn_namelen == 2) &&
4238 (fnd.ni_cnd.cn_nameptr[0] == '.') &&
4239 (fnd.ni_cnd.cn_nameptr[1] == '.'))) {
4240 error = EINVAL; /* XXX EISDIR? */
4241 goto abort0;
4242 }
4243
4244 /*
4245 * Lookup to.
4246 *
4247 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using
4248 * fvp here to decide whether to add CREATEDIR is a load of
4249 * bollocks because fvp might be the wrong node by now, since
4250 * fdvp is unlocked.
4251 *
4252 * XXX Why not pass CREATEDIR always?
4253 */
4254 NDINIT(&tnd, RENAME,
4255 (LOCKPARENT | NOCACHE | TRYEMULROOT |
4256 ((fvp->v_type == VDIR)? CREATEDIR : 0)),
4257 tpb);
4258 if ((error = fd_nameiat(l, tofd, &tnd)) != 0)
4259 goto abort0;
4260
4261 /*
4262 * Pull out the important results of the lookup, tdvp and tvp.
4263 * Of course, tvp is bogus because we're about to unlock tdvp.
4264 */
4265 tdvp = tnd.ni_dvp;
4266 tvp = tnd.ni_vp;
4267 KASSERT(tdvp != NULL);
4268 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE));
4269
4270 /*
4271 * Make sure neither tdvp nor tvp is locked.
4272 */
4273 if (tdvp != tvp)
4274 VOP_UNLOCK(tdvp);
4275 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4276 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4277
4278 /*
4279 * Reject renaming onto `.' or `..'. relookup is unhappy with
4280 * these, which is why we must do this here. Once upon a time
4281 * we relooked up from instead of to, and consequently didn't
4282 * need this check, but now that we relookup to instead of
4283 * from, we need this; and we shall need it forever forward
4284 * until the VOP_RENAME protocol changes, because file systems
4285 * will no doubt begin to depend on this check.
4286 */
4287 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) {
4288 error = EISDIR;
4289 goto abort1;
4290 }
4291 if ((tnd.ni_cnd.cn_namelen == 2) &&
4292 (tnd.ni_cnd.cn_nameptr[0] == '.') &&
4293 (tnd.ni_cnd.cn_nameptr[1] == '.')) {
4294 error = EINVAL;
4295 goto abort1;
4296 }
4297
4298 /*
4299 * Get the mount point. If the file system has been unmounted,
4300 * which it may be because we're not holding any vnode locks,
4301 * then v_mount will be NULL. We're not really supposed to
4302 * read v_mount without holding the vnode lock, but since we
4303 * have fdvp referenced, if fdvp->v_mount changes then at worst
4304 * it will be set to NULL, not changed to another mount point.
4305 * And, of course, since it is up to the file system to
4306 * determine the real lock order, we can't lock both fdvp and
4307 * tdvp at the same time.
4308 */
4309 mp = fdvp->v_mount;
4310 if (mp == NULL) {
4311 error = ENOENT;
4312 goto abort1;
4313 }
4314
4315 /*
4316 * Make sure the mount points match. Again, although we don't
4317 * hold any vnode locks, the v_mount fields may change -- but
4318 * at worst they will change to NULL, so this will never become
4319 * a cross-device rename, because we hold vnode references.
4320 *
4321 * XXX Because nothing is locked and the compiler may reorder
4322 * things here, unmounting the file system at an inopportune
4323 * moment may cause rename to fail with ENXDEV when it really
4324 * should fail with ENOENT.
4325 */
4326 tmp = tdvp->v_mount;
4327 if (tmp == NULL) {
4328 error = ENOENT;
4329 goto abort1;
4330 }
4331
4332 if (mp != tmp) {
4333 error = EXDEV;
4334 goto abort1;
4335 }
4336
4337 /*
4338 * Take the vfs rename lock to avoid cross-directory screw cases.
4339 * Nothing is locked currently, so taking this lock is safe.
4340 */
4341 error = VFS_RENAMELOCK_ENTER(mp);
4342 if (error)
4343 goto abort1;
4344
4345 /*
4346 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced,
4347 * and nothing is locked except for the vfs rename lock.
4348 *
4349 * The next step is a little rain dance to conform to the
4350 * insane lock protocol, even though it does nothing to ward
4351 * off race conditions.
4352 *
4353 * We need tdvp and tvp to be locked. However, because we have
4354 * unlocked tdvp in order to hold no locks while we take the
4355 * vfs rename lock, tvp may be wrong here, and we can't safely
4356 * lock it even if the sensible file systems will just unlock
4357 * it straight away. Consequently, we must lock tdvp and then
4358 * relookup tvp to get it locked.
4359 *
4360 * Finally, because the VOP_RENAME protocol is brain-damaged
4361 * and various file systems insanely depend on the semantics of
4362 * this brain damage, the lookup of to must be the last lookup
4363 * before VOP_RENAME.
4364 */
4365 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
4366 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0);
4367 if (error)
4368 goto abort2;
4369
4370 /*
4371 * Drop the old tvp and pick up the new one -- which might be
4372 * the same, but that doesn't matter to us. After this, tdvp
4373 * and tvp should both be locked.
4374 */
4375 if (tvp != NULL)
4376 vrele(tvp);
4377 tvp = tnd.ni_vp;
4378 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4379 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4380
4381 /*
4382 * The old do_sys_rename had various consistency checks here
4383 * involving fvp and tvp. fvp is bogus already here, and tvp
4384 * will become bogus soon in any sensible file system, so the
4385 * only purpose in putting these checks here is to give lip
4386 * service to these screw cases and to acknowledge that they
4387 * exist, not actually to handle them, but here you go
4388 * anyway...
4389 */
4390
4391 /*
4392 * Acknowledge that directories and non-directories aren't
4393 * suposed to mix.
4394 */
4395 if (tvp != NULL) {
4396 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) {
4397 error = ENOTDIR;
4398 goto abort3;
4399 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) {
4400 error = EISDIR;
4401 goto abort3;
4402 }
4403 }
4404
4405 /*
4406 * Acknowledge some random screw case, among the dozens that
4407 * might arise.
4408 */
4409 if (fvp == tdvp) {
4410 error = EINVAL;
4411 goto abort3;
4412 }
4413
4414 /*
4415 * Acknowledge that POSIX has a wacky screw case.
4416 *
4417 * XXX Eventually the retain flag needs to be passed on to
4418 * VOP_RENAME.
4419 */
4420 if (fvp == tvp) {
4421 if (retain) {
4422 error = 0;
4423 goto abort3;
4424 } else if ((fdvp == tdvp) &&
4425 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) &&
4426 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr,
4427 fnd.ni_cnd.cn_namelen))) {
4428 error = 0;
4429 goto abort3;
4430 }
4431 }
4432
4433 /*
4434 * Make sure veriexec can screw us up. (But a race can screw
4435 * up veriexec, of course -- remember, fvp and (soon) tvp are
4436 * bogus.)
4437 */
4438 #if NVERIEXEC > 0
4439 {
4440 char *f1, *f2;
4441 size_t f1_len;
4442 size_t f2_len;
4443
4444 f1_len = fnd.ni_cnd.cn_namelen + 1;
4445 f1 = kmem_alloc(f1_len, KM_SLEEP);
4446 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len);
4447
4448 f2_len = tnd.ni_cnd.cn_namelen + 1;
4449 f2 = kmem_alloc(f2_len, KM_SLEEP);
4450 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len);
4451
4452 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2);
4453
4454 kmem_free(f1, f1_len);
4455 kmem_free(f2, f2_len);
4456
4457 if (error)
4458 goto abort3;
4459 }
4460 #endif /* NVERIEXEC > 0 */
4461
4462 /*
4463 * All ready. Incant the rename vop.
4464 */
4465 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4466 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4467 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4468 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4469 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd);
4470
4471 /*
4472 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks
4473 * tdvp and tvp. But we can't assert any of that.
4474 */
4475 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4476 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4477 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4478 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4479
4480 /*
4481 * So all we have left to do is to drop the rename lock and
4482 * destroy the pathbufs.
4483 */
4484 VFS_RENAMELOCK_EXIT(mp);
4485 goto out2;
4486
4487 abort3: if ((tvp != NULL) && (tvp != tdvp))
4488 VOP_UNLOCK(tvp);
4489 abort2: VOP_UNLOCK(tdvp);
4490 VFS_RENAMELOCK_EXIT(mp);
4491 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd);
4492 vrele(tdvp);
4493 if (tvp != NULL)
4494 vrele(tvp);
4495 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd);
4496 vrele(fdvp);
4497 vrele(fvp);
4498 out2: pathbuf_destroy(tpb);
4499 out1: pathbuf_destroy(fpb);
4500 out0: return error;
4501 }
4502
4503 /*
4504 * Make a directory file.
4505 */
4506 /* ARGSUSED */
4507 int
4508 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
4509 {
4510 /* {
4511 syscallarg(const char *) path;
4512 syscallarg(int) mode;
4513 } */
4514
4515 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path),
4516 SCARG(uap, mode), UIO_USERSPACE);
4517 }
4518
4519 int
4520 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap,
4521 register_t *retval)
4522 {
4523 /* {
4524 syscallarg(int) fd;
4525 syscallarg(const char *) path;
4526 syscallarg(int) mode;
4527 } */
4528
4529 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path),
4530 SCARG(uap, mode), UIO_USERSPACE);
4531 }
4532
4533
4534 int
4535 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg)
4536 {
4537 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE);
4538 }
4539
4540 static int
4541 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode,
4542 enum uio_seg seg)
4543 {
4544 struct proc *p = curlwp->l_proc;
4545 struct vnode *vp;
4546 struct vattr vattr;
4547 int error;
4548 struct pathbuf *pb;
4549 struct nameidata nd;
4550
4551 KASSERT(l != NULL || fdat == AT_FDCWD);
4552
4553 /* XXX bollocks, should pass in a pathbuf */
4554 error = pathbuf_maybe_copyin(path, seg, &pb);
4555 if (error) {
4556 return error;
4557 }
4558
4559 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb);
4560
4561 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
4562 pathbuf_destroy(pb);
4563 return (error);
4564 }
4565 vp = nd.ni_vp;
4566 if (vp != NULL) {
4567 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
4568 if (nd.ni_dvp == vp)
4569 vrele(nd.ni_dvp);
4570 else
4571 vput(nd.ni_dvp);
4572 vrele(vp);
4573 pathbuf_destroy(pb);
4574 return (EEXIST);
4575 }
4576 vattr_null(&vattr);
4577 vattr.va_type = VDIR;
4578 /* We will read cwdi->cwdi_cmask unlocked. */
4579 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
4580 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
4581 if (!error)
4582 vrele(nd.ni_vp);
4583 vput(nd.ni_dvp);
4584 pathbuf_destroy(pb);
4585 return (error);
4586 }
4587
4588 /*
4589 * Remove a directory file.
4590 */
4591 /* ARGSUSED */
4592 int
4593 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
4594 {
4595 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path),
4596 AT_REMOVEDIR, UIO_USERSPACE);
4597 }
4598
4599 /*
4600 * Read a block of directory entries in a file system independent format.
4601 */
4602 int
4603 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
4604 {
4605 /* {
4606 syscallarg(int) fd;
4607 syscallarg(char *) buf;
4608 syscallarg(size_t) count;
4609 } */
4610 file_t *fp;
4611 int error, done;
4612
4613 /* fd_getvnode() will use the descriptor for us */
4614 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4615 return (error);
4616 if ((fp->f_flag & FREAD) == 0) {
4617 error = EBADF;
4618 goto out;
4619 }
4620 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
4621 SCARG(uap, count), &done, l, 0, 0);
4622 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
4623 *retval = done;
4624 out:
4625 fd_putfile(SCARG(uap, fd));
4626 return (error);
4627 }
4628
4629 /*
4630 * Set the mode mask for creation of filesystem nodes.
4631 */
4632 int
4633 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
4634 {
4635 /* {
4636 syscallarg(mode_t) newmask;
4637 } */
4638 struct proc *p = l->l_proc;
4639 struct cwdinfo *cwdi;
4640
4641 /*
4642 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
4643 * important is that we serialize changes to the mask. The
4644 * rw_exit() will issue a write memory barrier on our behalf,
4645 * and force the changes out to other CPUs (as it must use an
4646 * atomic operation, draining the local CPU's store buffers).
4647 */
4648 cwdi = p->p_cwdi;
4649 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
4650 *retval = cwdi->cwdi_cmask;
4651 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
4652 rw_exit(&cwdi->cwdi_lock);
4653
4654 return (0);
4655 }
4656
4657 int
4658 dorevoke(struct vnode *vp, kauth_cred_t cred)
4659 {
4660 struct vattr vattr;
4661 int error, fs_decision;
4662
4663 vn_lock(vp, LK_SHARED | LK_RETRY);
4664 error = VOP_GETATTR(vp, &vattr, cred);
4665 VOP_UNLOCK(vp);
4666 if (error != 0)
4667 return error;
4668 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM;
4669 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL,
4670 fs_decision);
4671 if (!error)
4672 VOP_REVOKE(vp, REVOKEALL);
4673 return (error);
4674 }
4675
4676 /*
4677 * Void all references to file by ripping underlying filesystem
4678 * away from vnode.
4679 */
4680 /* ARGSUSED */
4681 int
4682 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
4683 {
4684 /* {
4685 syscallarg(const char *) path;
4686 } */
4687 struct vnode *vp;
4688 int error;
4689
4690 error = namei_simple_user(SCARG(uap, path),
4691 NSM_FOLLOW_TRYEMULROOT, &vp);
4692 if (error != 0)
4693 return (error);
4694 error = dorevoke(vp, l->l_cred);
4695 vrele(vp);
4696 return (error);
4697 }
4698
4699 /*
4700 * Allocate backing store for a file, filling a hole without having to
4701 * explicitly write anything out.
4702 */
4703 /* ARGSUSED */
4704 int
4705 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap,
4706 register_t *retval)
4707 {
4708 /* {
4709 syscallarg(int) fd;
4710 syscallarg(off_t) pos;
4711 syscallarg(off_t) len;
4712 } */
4713 int fd;
4714 off_t pos, len;
4715 struct file *fp;
4716 struct vnode *vp;
4717 int error;
4718
4719 fd = SCARG(uap, fd);
4720 pos = SCARG(uap, pos);
4721 len = SCARG(uap, len);
4722
4723 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
4724 *retval = EINVAL;
4725 return 0;
4726 }
4727
4728 error = fd_getvnode(fd, &fp);
4729 if (error) {
4730 *retval = error;
4731 return 0;
4732 }
4733 if ((fp->f_flag & FWRITE) == 0) {
4734 error = EBADF;
4735 goto fail;
4736 }
4737 vp = fp->f_vnode;
4738
4739 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4740 if (vp->v_type == VDIR) {
4741 error = EISDIR;
4742 } else {
4743 error = VOP_FALLOCATE(vp, pos, len);
4744 }
4745 VOP_UNLOCK(vp);
4746
4747 fail:
4748 fd_putfile(fd);
4749 *retval = error;
4750 return 0;
4751 }
4752
4753 /*
4754 * Deallocate backing store for a file, creating a hole. Also used for
4755 * invoking TRIM on disks.
4756 */
4757 /* ARGSUSED */
4758 int
4759 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap,
4760 register_t *retval)
4761 {
4762 /* {
4763 syscallarg(int) fd;
4764 syscallarg(off_t) pos;
4765 syscallarg(off_t) len;
4766 } */
4767 int fd;
4768 off_t pos, len;
4769 struct file *fp;
4770 struct vnode *vp;
4771 int error;
4772
4773 fd = SCARG(uap, fd);
4774 pos = SCARG(uap, pos);
4775 len = SCARG(uap, len);
4776
4777 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
4778 return EINVAL;
4779 }
4780
4781 error = fd_getvnode(fd, &fp);
4782 if (error) {
4783 return error;
4784 }
4785 if ((fp->f_flag & FWRITE) == 0) {
4786 error = EBADF;
4787 goto fail;
4788 }
4789 vp = fp->f_vnode;
4790
4791 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4792 if (vp->v_type == VDIR) {
4793 error = EISDIR;
4794 } else {
4795 error = VOP_FDISCARD(vp, pos, len);
4796 }
4797 VOP_UNLOCK(vp);
4798
4799 fail:
4800 fd_putfile(fd);
4801 return error;
4802 }
4803