vfs_syscalls.c revision 1.535 1 /* $NetBSD: vfs_syscalls.c,v 1.535 2019/09/20 13:29:31 kamil Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
66 */
67
68 /*
69 * Virtual File System System Calls
70 */
71
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.535 2019/09/20 13:29:31 kamil Exp $");
74
75 #ifdef _KERNEL_OPT
76 #include "opt_fileassoc.h"
77 #include "veriexec.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/namei.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/file.h>
86 #include <sys/fcntl.h>
87 #include <sys/stat.h>
88 #include <sys/vnode.h>
89 #include <sys/mount.h>
90 #include <sys/fstrans.h>
91 #include <sys/proc.h>
92 #include <sys/uio.h>
93 #include <sys/kmem.h>
94 #include <sys/dirent.h>
95 #include <sys/sysctl.h>
96 #include <sys/syscallargs.h>
97 #include <sys/vfs_syscalls.h>
98 #include <sys/quota.h>
99 #include <sys/quotactl.h>
100 #include <sys/ktrace.h>
101 #ifdef FILEASSOC
102 #include <sys/fileassoc.h>
103 #endif /* FILEASSOC */
104 #include <sys/extattr.h>
105 #include <sys/verified_exec.h>
106 #include <sys/kauth.h>
107 #include <sys/atomic.h>
108 #include <sys/module.h>
109 #include <sys/buf.h>
110 #include <sys/event.h>
111 #include <sys/compat_stub.h>
112
113 #include <miscfs/genfs/genfs.h>
114 #include <miscfs/specfs/specdev.h>
115
116 #include <nfs/rpcv2.h>
117 #include <nfs/nfsproto.h>
118 #include <nfs/nfs.h>
119 #include <nfs/nfs_var.h>
120
121 /* XXX this shouldn't be here */
122 #ifndef OFF_T_MAX
123 #define OFF_T_MAX __type_max(off_t)
124 #endif
125
126 static int change_flags(struct vnode *, u_long, struct lwp *);
127 static int change_mode(struct vnode *, int, struct lwp *);
128 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
129 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *);
130 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t,
131 enum uio_seg);
132 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t);
133 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *,
134 enum uio_seg);
135 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *,
136 enum uio_seg, int);
137 static int do_sys_readlinkat(struct lwp *, int, const char *, char *,
138 size_t, register_t *);
139 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg);
140
141 static int fd_nameiat(struct lwp *, int, struct nameidata *);
142 static int fd_nameiat_simple_user(struct lwp *, int, const char *,
143 namei_simple_flags_t, struct vnode **);
144
145 /*
146 * This table is used to maintain compatibility with 4.3BSD
147 * and NetBSD 0.9 mount syscalls - and possibly other systems.
148 * Note, the order is important!
149 *
150 * Do not modify this table. It should only contain filesystems
151 * supported by NetBSD 0.9 and 4.3BSD.
152 */
153 const char * const mountcompatnames[] = {
154 NULL, /* 0 = MOUNT_NONE */
155 MOUNT_FFS, /* 1 = MOUNT_UFS */
156 MOUNT_NFS, /* 2 */
157 MOUNT_MFS, /* 3 */
158 MOUNT_MSDOS, /* 4 */
159 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
160 MOUNT_FDESC, /* 6 */
161 MOUNT_KERNFS, /* 7 */
162 NULL, /* 8 = MOUNT_DEVFS */
163 MOUNT_AFS, /* 9 */
164 };
165
166 const int nmountcompatnames = __arraycount(mountcompatnames);
167
168 static int
169 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp)
170 {
171 file_t *dfp;
172 int error;
173
174 if (fdat != AT_FDCWD) {
175 if ((error = fd_getvnode(fdat, &dfp)) != 0)
176 goto out;
177
178 NDAT(ndp, dfp->f_vnode);
179 }
180
181 error = namei(ndp);
182
183 if (fdat != AT_FDCWD)
184 fd_putfile(fdat);
185 out:
186 return error;
187 }
188
189 static int
190 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path,
191 namei_simple_flags_t sflags, struct vnode **vp_ret)
192 {
193 file_t *dfp;
194 struct vnode *dvp;
195 int error;
196
197 if (fdat != AT_FDCWD) {
198 if ((error = fd_getvnode(fdat, &dfp)) != 0)
199 goto out;
200
201 dvp = dfp->f_vnode;
202 } else {
203 dvp = NULL;
204 }
205
206 error = nameiat_simple_user(dvp, path, sflags, vp_ret);
207
208 if (fdat != AT_FDCWD)
209 fd_putfile(fdat);
210 out:
211 return error;
212 }
213
214 static int
215 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags)
216 {
217 int error;
218
219 fp->f_flag = flags & FMASK;
220 fp->f_type = DTYPE_VNODE;
221 fp->f_ops = &vnops;
222 fp->f_vnode = vp;
223
224 if (flags & (O_EXLOCK | O_SHLOCK)) {
225 struct flock lf;
226 int type;
227
228 lf.l_whence = SEEK_SET;
229 lf.l_start = 0;
230 lf.l_len = 0;
231 if (flags & O_EXLOCK)
232 lf.l_type = F_WRLCK;
233 else
234 lf.l_type = F_RDLCK;
235 type = F_FLOCK;
236 if ((flags & FNONBLOCK) == 0)
237 type |= F_WAIT;
238 VOP_UNLOCK(vp);
239 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
240 if (error) {
241 (void) vn_close(vp, fp->f_flag, fp->f_cred);
242 fd_abort(l->l_proc, fp, indx);
243 return error;
244 }
245 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
246 atomic_or_uint(&fp->f_flag, FHASLOCK);
247 }
248 if (flags & O_CLOEXEC)
249 fd_set_exclose(l, indx, true);
250 return 0;
251 }
252
253 static int
254 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
255 void *data, size_t *data_len)
256 {
257 struct mount *mp;
258 int error = 0, saved_flags;
259
260 mp = vp->v_mount;
261 saved_flags = mp->mnt_flag;
262
263 /* We can operate only on VV_ROOT nodes. */
264 if ((vp->v_vflag & VV_ROOT) == 0) {
265 error = EINVAL;
266 goto out;
267 }
268
269 /*
270 * We only allow the filesystem to be reloaded if it
271 * is currently mounted read-only. Additionally, we
272 * prevent read-write to read-only downgrades.
273 */
274 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
275 (mp->mnt_flag & MNT_RDONLY) == 0 &&
276 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) {
277 error = EOPNOTSUPP; /* Needs translation */
278 goto out;
279 }
280
281 /*
282 * Enabling MNT_UNION requires a covered mountpoint and
283 * must not happen on the root mount.
284 */
285 if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) {
286 error = EOPNOTSUPP;
287 goto out;
288 }
289
290 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
291 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
292 if (error)
293 goto out;
294
295 error = vfs_suspend(mp, 0);
296 if (error)
297 goto out;
298
299 mutex_enter(&mp->mnt_updating);
300
301 mp->mnt_flag &= ~MNT_OP_FLAGS;
302 mp->mnt_flag |= flags & MNT_OP_FLAGS;
303
304 /*
305 * Set the mount level flags.
306 */
307 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) {
308 if ((flags & MNT_RDONLY))
309 mp->mnt_iflag |= IMNT_WANTRDONLY;
310 else
311 mp->mnt_iflag |= IMNT_WANTRDWR;
312 }
313 mp->mnt_flag &= ~MNT_BASIC_FLAGS;
314 mp->mnt_flag |= flags & MNT_BASIC_FLAGS;
315 if ((mp->mnt_iflag & IMNT_WANTRDONLY))
316 mp->mnt_flag &= ~MNT_RDONLY;
317
318 error = VFS_MOUNT(mp, path, data, data_len);
319
320 if (error && data != NULL) {
321 int error2;
322
323 /*
324 * Update failed; let's try and see if it was an
325 * export request. For compat with 3.0 and earlier.
326 */
327 error2 = vfs_hooks_reexport(mp, path, data);
328
329 /*
330 * Only update error code if the export request was
331 * understood but some problem occurred while
332 * processing it.
333 */
334 if (error2 != EJUSTRETURN)
335 error = error2;
336 }
337
338 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY))
339 mp->mnt_flag |= MNT_RDONLY;
340 if (error)
341 mp->mnt_flag = saved_flags;
342 mp->mnt_flag &= ~MNT_OP_FLAGS;
343 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR);
344 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
345 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0)
346 vfs_syncer_add_to_worklist(mp);
347 } else {
348 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0)
349 vfs_syncer_remove_from_worklist(mp);
350 }
351 mutex_exit(&mp->mnt_updating);
352 vfs_resume(mp);
353
354 if ((error == 0) && !(saved_flags & MNT_EXTATTR) &&
355 (flags & MNT_EXTATTR)) {
356 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START,
357 NULL, 0, NULL) != 0) {
358 printf("%s: failed to start extattr, error = %d",
359 mp->mnt_stat.f_mntonname, error);
360 mp->mnt_flag &= ~MNT_EXTATTR;
361 }
362 }
363
364 if ((error == 0) && (saved_flags & MNT_EXTATTR) &&
365 !(flags & MNT_EXTATTR)) {
366 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP,
367 NULL, 0, NULL) != 0) {
368 printf("%s: failed to stop extattr, error = %d",
369 mp->mnt_stat.f_mntonname, error);
370 mp->mnt_flag |= MNT_RDONLY;
371 }
372 }
373 out:
374 return (error);
375 }
376
377 static int
378 mount_get_vfsops(const char *fstype, enum uio_seg type_seg,
379 struct vfsops **vfsops)
380 {
381 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
382 int error;
383
384 if (type_seg == UIO_USERSPACE) {
385 /* Copy file-system type from userspace. */
386 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
387 } else {
388 error = copystr(fstype, fstypename, sizeof(fstypename), NULL);
389 KASSERT(error == 0);
390 }
391
392 if (error) {
393 /*
394 * Historically, filesystem types were identified by numbers.
395 * If we get an integer for the filesystem type instead of a
396 * string, we check to see if it matches one of the historic
397 * filesystem types.
398 */
399 u_long fsindex = (u_long)fstype;
400 if (fsindex >= nmountcompatnames ||
401 mountcompatnames[fsindex] == NULL)
402 return ENODEV;
403 strlcpy(fstypename, mountcompatnames[fsindex],
404 sizeof(fstypename));
405 }
406
407 /* Accept `ufs' as an alias for `ffs', for compatibility. */
408 if (strcmp(fstypename, "ufs") == 0)
409 fstypename[0] = 'f';
410
411 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
412 return 0;
413
414 /* If we can autoload a vfs module, try again */
415 (void)module_autoload(fstypename, MODULE_CLASS_VFS);
416
417 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
418 return 0;
419
420 return ENODEV;
421 }
422
423 static int
424 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
425 void *data, size_t *data_len)
426 {
427 struct mount *mp;
428 int error;
429
430 /* If MNT_GETARGS is specified, it should be the only flag. */
431 if (flags & ~MNT_GETARGS)
432 return EINVAL;
433
434 mp = vp->v_mount;
435
436 /* XXX: probably some notion of "can see" here if we want isolation. */
437 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
438 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
439 if (error)
440 return error;
441
442 if ((vp->v_vflag & VV_ROOT) == 0)
443 return EINVAL;
444
445 if (vfs_busy(mp))
446 return EPERM;
447
448 mutex_enter(&mp->mnt_updating);
449 mp->mnt_flag &= ~MNT_OP_FLAGS;
450 mp->mnt_flag |= MNT_GETARGS;
451 error = VFS_MOUNT(mp, path, data, data_len);
452 mp->mnt_flag &= ~MNT_OP_FLAGS;
453 mutex_exit(&mp->mnt_updating);
454
455 vfs_unbusy(mp);
456 return (error);
457 }
458
459 int
460 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
461 {
462 /* {
463 syscallarg(const char *) type;
464 syscallarg(const char *) path;
465 syscallarg(int) flags;
466 syscallarg(void *) data;
467 syscallarg(size_t) data_len;
468 } */
469
470 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path),
471 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
472 SCARG(uap, data_len), retval);
473 }
474
475 int
476 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg,
477 const char *path, int flags, void *data, enum uio_seg data_seg,
478 size_t data_len, register_t *retval)
479 {
480 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */
481 struct vnode *vp;
482 void *data_buf = data;
483 bool vfsopsrele = false;
484 size_t alloc_sz = 0;
485 int error;
486
487 /*
488 * Get vnode to be covered
489 */
490 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
491 if (error != 0) {
492 vp = NULL;
493 goto done;
494 }
495
496 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
497 vfsops = vp->v_mount->mnt_op;
498 } else {
499 /* 'type' is userspace */
500 error = mount_get_vfsops(type, type_seg, &vfsops);
501 if (error != 0)
502 goto done;
503 vfsopsrele = true;
504 }
505
506 /*
507 * We allow data to be NULL, even for userspace. Some fs's don't need
508 * it. The others will handle NULL.
509 */
510 if (data != NULL && data_seg == UIO_USERSPACE) {
511 if (data_len == 0) {
512 /* No length supplied, use default for filesystem */
513 data_len = vfsops->vfs_min_mount_data;
514
515 /*
516 * Hopefully a longer buffer won't make copyin() fail.
517 * For compatibility with 3.0 and earlier.
518 */
519 if (flags & MNT_UPDATE
520 && data_len < sizeof (struct mnt_export_args30))
521 data_len = sizeof (struct mnt_export_args30);
522 }
523 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) {
524 error = EINVAL;
525 goto done;
526 }
527 alloc_sz = data_len;
528 data_buf = kmem_alloc(alloc_sz, KM_SLEEP);
529
530 /* NFS needs the buffer even for mnt_getargs .... */
531 error = copyin(data, data_buf, data_len);
532 if (error != 0)
533 goto done;
534 }
535
536 if (flags & MNT_GETARGS) {
537 if (data_len == 0) {
538 error = EINVAL;
539 goto done;
540 }
541 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
542 if (error != 0)
543 goto done;
544 if (data_seg == UIO_USERSPACE)
545 error = copyout(data_buf, data, data_len);
546 *retval = data_len;
547 } else if (flags & MNT_UPDATE) {
548 error = mount_update(l, vp, path, flags, data_buf, &data_len);
549 } else {
550 /* Locking is handled internally in mount_domount(). */
551 KASSERT(vfsopsrele == true);
552 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
553 &data_len);
554 vfsopsrele = false;
555 }
556 if (!error)
557 KNOTE(&fs_klist, VQ_MOUNT);
558
559 done:
560 if (vfsopsrele)
561 vfs_delref(vfsops);
562 if (vp != NULL) {
563 vrele(vp);
564 }
565 if (data_buf != data)
566 kmem_free(data_buf, alloc_sz);
567 return (error);
568 }
569
570 /*
571 * Unmount a file system.
572 *
573 * Note: unmount takes a path to the vnode mounted on as argument,
574 * not special file (as before).
575 */
576 /* ARGSUSED */
577 int
578 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
579 {
580 /* {
581 syscallarg(const char *) path;
582 syscallarg(int) flags;
583 } */
584 struct vnode *vp;
585 struct mount *mp;
586 int error;
587 struct pathbuf *pb;
588 struct nameidata nd;
589
590 error = pathbuf_copyin(SCARG(uap, path), &pb);
591 if (error) {
592 return error;
593 }
594
595 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
596 if ((error = namei(&nd)) != 0) {
597 pathbuf_destroy(pb);
598 return error;
599 }
600 vp = nd.ni_vp;
601 pathbuf_destroy(pb);
602
603 mp = vp->v_mount;
604 vfs_ref(mp);
605 VOP_UNLOCK(vp);
606
607 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
608 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
609 if (error) {
610 vrele(vp);
611 vfs_rele(mp);
612 return (error);
613 }
614
615 /*
616 * Don't allow unmounting the root file system.
617 */
618 if (mp->mnt_flag & MNT_ROOTFS) {
619 vrele(vp);
620 vfs_rele(mp);
621 return (EINVAL);
622 }
623
624 /*
625 * Must be the root of the filesystem
626 */
627 if ((vp->v_vflag & VV_ROOT) == 0) {
628 vrele(vp);
629 vfs_rele(mp);
630 return (EINVAL);
631 }
632
633 vrele(vp);
634 error = dounmount(mp, SCARG(uap, flags), l);
635 vfs_rele(mp);
636 if (!error)
637 KNOTE(&fs_klist, VQ_UNMOUNT);
638 return error;
639 }
640
641 /*
642 * Sync each mounted filesystem.
643 */
644 #ifdef DEBUG
645 int syncprt = 0;
646 struct ctldebug debug0 = { "syncprt", &syncprt };
647 #endif
648
649 void
650 do_sys_sync(struct lwp *l)
651 {
652 mount_iterator_t *iter;
653 struct mount *mp;
654 int asyncflag;
655
656 mountlist_iterator_init(&iter);
657 while ((mp = mountlist_iterator_next(iter)) != NULL) {
658 mutex_enter(&mp->mnt_updating);
659 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
660 asyncflag = mp->mnt_flag & MNT_ASYNC;
661 mp->mnt_flag &= ~MNT_ASYNC;
662 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
663 if (asyncflag)
664 mp->mnt_flag |= MNT_ASYNC;
665 }
666 mutex_exit(&mp->mnt_updating);
667 }
668 mountlist_iterator_destroy(iter);
669 #ifdef DEBUG
670 if (syncprt)
671 vfs_bufstats();
672 #endif /* DEBUG */
673 }
674
675 /* ARGSUSED */
676 int
677 sys_sync(struct lwp *l, const void *v, register_t *retval)
678 {
679 do_sys_sync(l);
680 return (0);
681 }
682
683
684 /*
685 * Access or change filesystem quotas.
686 *
687 * (this is really 14 different calls bundled into one)
688 */
689
690 static int
691 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u)
692 {
693 struct quotastat info_k;
694 int error;
695
696 /* ensure any padding bytes are cleared */
697 memset(&info_k, 0, sizeof(info_k));
698
699 error = vfs_quotactl_stat(mp, &info_k);
700 if (error) {
701 return error;
702 }
703
704 return copyout(&info_k, info_u, sizeof(info_k));
705 }
706
707 static int
708 do_sys_quotactl_idtypestat(struct mount *mp, int idtype,
709 struct quotaidtypestat *info_u)
710 {
711 struct quotaidtypestat info_k;
712 int error;
713
714 /* ensure any padding bytes are cleared */
715 memset(&info_k, 0, sizeof(info_k));
716
717 error = vfs_quotactl_idtypestat(mp, idtype, &info_k);
718 if (error) {
719 return error;
720 }
721
722 return copyout(&info_k, info_u, sizeof(info_k));
723 }
724
725 static int
726 do_sys_quotactl_objtypestat(struct mount *mp, int objtype,
727 struct quotaobjtypestat *info_u)
728 {
729 struct quotaobjtypestat info_k;
730 int error;
731
732 /* ensure any padding bytes are cleared */
733 memset(&info_k, 0, sizeof(info_k));
734
735 error = vfs_quotactl_objtypestat(mp, objtype, &info_k);
736 if (error) {
737 return error;
738 }
739
740 return copyout(&info_k, info_u, sizeof(info_k));
741 }
742
743 static int
744 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u,
745 struct quotaval *val_u)
746 {
747 struct quotakey key_k;
748 struct quotaval val_k;
749 int error;
750
751 /* ensure any padding bytes are cleared */
752 memset(&val_k, 0, sizeof(val_k));
753
754 error = copyin(key_u, &key_k, sizeof(key_k));
755 if (error) {
756 return error;
757 }
758
759 error = vfs_quotactl_get(mp, &key_k, &val_k);
760 if (error) {
761 return error;
762 }
763
764 return copyout(&val_k, val_u, sizeof(val_k));
765 }
766
767 static int
768 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u,
769 const struct quotaval *val_u)
770 {
771 struct quotakey key_k;
772 struct quotaval val_k;
773 int error;
774
775 error = copyin(key_u, &key_k, sizeof(key_k));
776 if (error) {
777 return error;
778 }
779
780 error = copyin(val_u, &val_k, sizeof(val_k));
781 if (error) {
782 return error;
783 }
784
785 return vfs_quotactl_put(mp, &key_k, &val_k);
786 }
787
788 static int
789 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u)
790 {
791 struct quotakey key_k;
792 int error;
793
794 error = copyin(key_u, &key_k, sizeof(key_k));
795 if (error) {
796 return error;
797 }
798
799 return vfs_quotactl_del(mp, &key_k);
800 }
801
802 static int
803 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u)
804 {
805 struct quotakcursor cursor_k;
806 int error;
807
808 /* ensure any padding bytes are cleared */
809 memset(&cursor_k, 0, sizeof(cursor_k));
810
811 error = vfs_quotactl_cursoropen(mp, &cursor_k);
812 if (error) {
813 return error;
814 }
815
816 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
817 }
818
819 static int
820 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u)
821 {
822 struct quotakcursor cursor_k;
823 int error;
824
825 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
826 if (error) {
827 return error;
828 }
829
830 return vfs_quotactl_cursorclose(mp, &cursor_k);
831 }
832
833 static int
834 do_sys_quotactl_cursorskipidtype(struct mount *mp,
835 struct quotakcursor *cursor_u, int idtype)
836 {
837 struct quotakcursor cursor_k;
838 int error;
839
840 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
841 if (error) {
842 return error;
843 }
844
845 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype);
846 if (error) {
847 return error;
848 }
849
850 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
851 }
852
853 static int
854 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u,
855 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum,
856 unsigned *ret_u)
857 {
858 #define CGET_STACK_MAX 8
859 struct quotakcursor cursor_k;
860 struct quotakey stackkeys[CGET_STACK_MAX];
861 struct quotaval stackvals[CGET_STACK_MAX];
862 struct quotakey *keys_k;
863 struct quotaval *vals_k;
864 unsigned ret_k;
865 int error;
866
867 if (maxnum > 128) {
868 maxnum = 128;
869 }
870
871 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
872 if (error) {
873 return error;
874 }
875
876 if (maxnum <= CGET_STACK_MAX) {
877 keys_k = stackkeys;
878 vals_k = stackvals;
879 /* ensure any padding bytes are cleared */
880 memset(keys_k, 0, maxnum * sizeof(keys_k[0]));
881 memset(vals_k, 0, maxnum * sizeof(vals_k[0]));
882 } else {
883 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP);
884 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP);
885 }
886
887 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum,
888 &ret_k);
889 if (error) {
890 goto fail;
891 }
892
893 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0]));
894 if (error) {
895 goto fail;
896 }
897
898 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0]));
899 if (error) {
900 goto fail;
901 }
902
903 error = copyout(&ret_k, ret_u, sizeof(ret_k));
904 if (error) {
905 goto fail;
906 }
907
908 /* do last to maximize the chance of being able to recover a failure */
909 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k));
910
911 fail:
912 if (keys_k != stackkeys) {
913 kmem_free(keys_k, maxnum * sizeof(keys_k[0]));
914 }
915 if (vals_k != stackvals) {
916 kmem_free(vals_k, maxnum * sizeof(vals_k[0]));
917 }
918 return error;
919 }
920
921 static int
922 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u,
923 int *ret_u)
924 {
925 struct quotakcursor cursor_k;
926 int ret_k;
927 int error;
928
929 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
930 if (error) {
931 return error;
932 }
933
934 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k);
935 if (error) {
936 return error;
937 }
938
939 error = copyout(&ret_k, ret_u, sizeof(ret_k));
940 if (error) {
941 return error;
942 }
943
944 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
945 }
946
947 static int
948 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u)
949 {
950 struct quotakcursor cursor_k;
951 int error;
952
953 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
954 if (error) {
955 return error;
956 }
957
958 error = vfs_quotactl_cursorrewind(mp, &cursor_k);
959 if (error) {
960 return error;
961 }
962
963 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
964 }
965
966 static int
967 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u)
968 {
969 char *path_k;
970 int error;
971
972 /* XXX this should probably be a struct pathbuf */
973 path_k = PNBUF_GET();
974 error = copyin(path_u, path_k, PATH_MAX);
975 if (error) {
976 PNBUF_PUT(path_k);
977 return error;
978 }
979
980 error = vfs_quotactl_quotaon(mp, idtype, path_k);
981
982 PNBUF_PUT(path_k);
983 return error;
984 }
985
986 static int
987 do_sys_quotactl_quotaoff(struct mount *mp, int idtype)
988 {
989 return vfs_quotactl_quotaoff(mp, idtype);
990 }
991
992 int
993 do_sys_quotactl(const char *path_u, const struct quotactl_args *args)
994 {
995 struct mount *mp;
996 struct vnode *vp;
997 int error;
998
999 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp);
1000 if (error != 0)
1001 return (error);
1002 mp = vp->v_mount;
1003
1004 switch (args->qc_op) {
1005 case QUOTACTL_STAT:
1006 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info);
1007 break;
1008 case QUOTACTL_IDTYPESTAT:
1009 error = do_sys_quotactl_idtypestat(mp,
1010 args->u.idtypestat.qc_idtype,
1011 args->u.idtypestat.qc_info);
1012 break;
1013 case QUOTACTL_OBJTYPESTAT:
1014 error = do_sys_quotactl_objtypestat(mp,
1015 args->u.objtypestat.qc_objtype,
1016 args->u.objtypestat.qc_info);
1017 break;
1018 case QUOTACTL_GET:
1019 error = do_sys_quotactl_get(mp,
1020 args->u.get.qc_key,
1021 args->u.get.qc_val);
1022 break;
1023 case QUOTACTL_PUT:
1024 error = do_sys_quotactl_put(mp,
1025 args->u.put.qc_key,
1026 args->u.put.qc_val);
1027 break;
1028 case QUOTACTL_DEL:
1029 error = do_sys_quotactl_del(mp, args->u.del.qc_key);
1030 break;
1031 case QUOTACTL_CURSOROPEN:
1032 error = do_sys_quotactl_cursoropen(mp,
1033 args->u.cursoropen.qc_cursor);
1034 break;
1035 case QUOTACTL_CURSORCLOSE:
1036 error = do_sys_quotactl_cursorclose(mp,
1037 args->u.cursorclose.qc_cursor);
1038 break;
1039 case QUOTACTL_CURSORSKIPIDTYPE:
1040 error = do_sys_quotactl_cursorskipidtype(mp,
1041 args->u.cursorskipidtype.qc_cursor,
1042 args->u.cursorskipidtype.qc_idtype);
1043 break;
1044 case QUOTACTL_CURSORGET:
1045 error = do_sys_quotactl_cursorget(mp,
1046 args->u.cursorget.qc_cursor,
1047 args->u.cursorget.qc_keys,
1048 args->u.cursorget.qc_vals,
1049 args->u.cursorget.qc_maxnum,
1050 args->u.cursorget.qc_ret);
1051 break;
1052 case QUOTACTL_CURSORATEND:
1053 error = do_sys_quotactl_cursoratend(mp,
1054 args->u.cursoratend.qc_cursor,
1055 args->u.cursoratend.qc_ret);
1056 break;
1057 case QUOTACTL_CURSORREWIND:
1058 error = do_sys_quotactl_cursorrewind(mp,
1059 args->u.cursorrewind.qc_cursor);
1060 break;
1061 case QUOTACTL_QUOTAON:
1062 error = do_sys_quotactl_quotaon(mp,
1063 args->u.quotaon.qc_idtype,
1064 args->u.quotaon.qc_quotafile);
1065 break;
1066 case QUOTACTL_QUOTAOFF:
1067 error = do_sys_quotactl_quotaoff(mp,
1068 args->u.quotaoff.qc_idtype);
1069 break;
1070 default:
1071 error = EINVAL;
1072 break;
1073 }
1074
1075 vrele(vp);
1076 return error;
1077 }
1078
1079 /* ARGSUSED */
1080 int
1081 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap,
1082 register_t *retval)
1083 {
1084 /* {
1085 syscallarg(const char *) path;
1086 syscallarg(struct quotactl_args *) args;
1087 } */
1088 struct quotactl_args args;
1089 int error;
1090
1091 error = copyin(SCARG(uap, args), &args, sizeof(args));
1092 if (error) {
1093 return error;
1094 }
1095
1096 return do_sys_quotactl(SCARG(uap, path), &args);
1097 }
1098
1099 int
1100 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
1101 int root)
1102 {
1103 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1104 int error = 0;
1105
1106 /*
1107 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1108 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
1109 * overrides MNT_NOWAIT.
1110 */
1111 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
1112 (flags != MNT_WAIT && flags != 0)) {
1113 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
1114 goto done;
1115 }
1116
1117 /* Get the filesystem stats now */
1118 memset(sp, 0, sizeof(*sp));
1119 if ((error = VFS_STATVFS(mp, sp)) != 0) {
1120 return error;
1121 }
1122
1123 if (cwdi->cwdi_rdir == NULL)
1124 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
1125 done:
1126 if (cwdi->cwdi_rdir != NULL) {
1127 size_t len;
1128 char *bp;
1129 char c;
1130 char *path = PNBUF_GET();
1131
1132 bp = path + MAXPATHLEN;
1133 *--bp = '\0';
1134 rw_enter(&cwdi->cwdi_lock, RW_READER);
1135 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
1136 MAXPATHLEN / 2, 0, l);
1137 rw_exit(&cwdi->cwdi_lock);
1138 if (error) {
1139 PNBUF_PUT(path);
1140 return error;
1141 }
1142 len = strlen(bp);
1143 if (len != 1) {
1144 /*
1145 * for mount points that are below our root, we can see
1146 * them, so we fix up the pathname and return them. The
1147 * rest we cannot see, so we don't allow viewing the
1148 * data.
1149 */
1150 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
1151 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
1152 (void)strlcpy(sp->f_mntonname,
1153 c == '\0' ? "/" : &sp->f_mntonname[len],
1154 sizeof(sp->f_mntonname));
1155 } else {
1156 if (root)
1157 (void)strlcpy(sp->f_mntonname, "/",
1158 sizeof(sp->f_mntonname));
1159 else
1160 error = EPERM;
1161 }
1162 }
1163 PNBUF_PUT(path);
1164 }
1165 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
1166 return error;
1167 }
1168
1169 /*
1170 * Get filesystem statistics by path.
1171 */
1172 int
1173 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
1174 {
1175 struct mount *mp;
1176 int error;
1177 struct vnode *vp;
1178
1179 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
1180 if (error != 0)
1181 return error;
1182 mp = vp->v_mount;
1183 error = dostatvfs(mp, sb, l, flags, 1);
1184 vrele(vp);
1185 return error;
1186 }
1187
1188 /* ARGSUSED */
1189 int
1190 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
1191 {
1192 /* {
1193 syscallarg(const char *) path;
1194 syscallarg(struct statvfs *) buf;
1195 syscallarg(int) flags;
1196 } */
1197 struct statvfs *sb;
1198 int error;
1199
1200 sb = STATVFSBUF_GET();
1201 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
1202 if (error == 0)
1203 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1204 STATVFSBUF_PUT(sb);
1205 return error;
1206 }
1207
1208 /*
1209 * Get filesystem statistics by fd.
1210 */
1211 int
1212 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
1213 {
1214 file_t *fp;
1215 struct mount *mp;
1216 int error;
1217
1218 /* fd_getvnode() will use the descriptor for us */
1219 if ((error = fd_getvnode(fd, &fp)) != 0)
1220 return (error);
1221 mp = fp->f_vnode->v_mount;
1222 error = dostatvfs(mp, sb, curlwp, flags, 1);
1223 fd_putfile(fd);
1224 return error;
1225 }
1226
1227 /* ARGSUSED */
1228 int
1229 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
1230 {
1231 /* {
1232 syscallarg(int) fd;
1233 syscallarg(struct statvfs *) buf;
1234 syscallarg(int) flags;
1235 } */
1236 struct statvfs *sb;
1237 int error;
1238
1239 sb = STATVFSBUF_GET();
1240 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1241 if (error == 0)
1242 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1243 STATVFSBUF_PUT(sb);
1244 return error;
1245 }
1246
1247
1248 /*
1249 * Get statistics on all filesystems.
1250 */
1251 int
1252 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1253 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1254 register_t *retval)
1255 {
1256 int root = 0;
1257 mount_iterator_t *iter;
1258 struct proc *p = l->l_proc;
1259 struct mount *mp;
1260 struct statvfs *sb;
1261 size_t count, maxcount;
1262 int error = 0;
1263
1264 sb = STATVFSBUF_GET();
1265 maxcount = bufsize / entry_sz;
1266 count = 0;
1267 mountlist_iterator_init(&iter);
1268 while ((mp = mountlist_iterator_next(iter)) != NULL) {
1269 if (sfsp && count < maxcount) {
1270 error = dostatvfs(mp, sb, l, flags, 0);
1271 if (error) {
1272 error = 0;
1273 continue;
1274 }
1275 error = copyfn(sb, sfsp, entry_sz);
1276 if (error)
1277 goto out;
1278 sfsp = (char *)sfsp + entry_sz;
1279 root |= strcmp(sb->f_mntonname, "/") == 0;
1280 }
1281 count++;
1282 }
1283
1284 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1285 /*
1286 * fake a root entry
1287 */
1288 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1289 sb, l, flags, 1);
1290 if (error != 0)
1291 goto out;
1292 if (sfsp) {
1293 error = copyfn(sb, sfsp, entry_sz);
1294 if (error != 0)
1295 goto out;
1296 }
1297 count++;
1298 }
1299 if (sfsp && count > maxcount)
1300 *retval = maxcount;
1301 else
1302 *retval = count;
1303 out:
1304 mountlist_iterator_destroy(iter);
1305 STATVFSBUF_PUT(sb);
1306 return error;
1307 }
1308
1309 int
1310 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1311 {
1312 /* {
1313 syscallarg(struct statvfs *) buf;
1314 syscallarg(size_t) bufsize;
1315 syscallarg(int) flags;
1316 } */
1317
1318 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1319 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1320 }
1321
1322 /*
1323 * Change current working directory to a given file descriptor.
1324 */
1325 /* ARGSUSED */
1326 int
1327 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1328 {
1329 /* {
1330 syscallarg(int) fd;
1331 } */
1332 struct proc *p = l->l_proc;
1333 struct cwdinfo *cwdi;
1334 struct vnode *vp, *tdp;
1335 struct mount *mp;
1336 file_t *fp;
1337 int error, fd;
1338
1339 /* fd_getvnode() will use the descriptor for us */
1340 fd = SCARG(uap, fd);
1341 if ((error = fd_getvnode(fd, &fp)) != 0)
1342 return (error);
1343 vp = fp->f_vnode;
1344
1345 vref(vp);
1346 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1347 if (vp->v_type != VDIR)
1348 error = ENOTDIR;
1349 else
1350 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1351 if (error) {
1352 vput(vp);
1353 goto out;
1354 }
1355 while ((mp = vp->v_mountedhere) != NULL) {
1356 error = vfs_busy(mp);
1357 vput(vp);
1358 if (error != 0)
1359 goto out;
1360 error = VFS_ROOT(mp, &tdp);
1361 vfs_unbusy(mp);
1362 if (error)
1363 goto out;
1364 vp = tdp;
1365 }
1366 VOP_UNLOCK(vp);
1367
1368 /*
1369 * Disallow changing to a directory not under the process's
1370 * current root directory (if there is one).
1371 */
1372 cwdi = p->p_cwdi;
1373 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1374 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1375 vrele(vp);
1376 error = EPERM; /* operation not permitted */
1377 } else {
1378 vrele(cwdi->cwdi_cdir);
1379 cwdi->cwdi_cdir = vp;
1380 }
1381 rw_exit(&cwdi->cwdi_lock);
1382
1383 out:
1384 fd_putfile(fd);
1385 return (error);
1386 }
1387
1388 /*
1389 * Change this process's notion of the root directory to a given file
1390 * descriptor.
1391 */
1392 int
1393 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1394 {
1395 struct proc *p = l->l_proc;
1396 struct vnode *vp;
1397 file_t *fp;
1398 int error, fd = SCARG(uap, fd);
1399
1400 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1401 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1402 return error;
1403 /* fd_getvnode() will use the descriptor for us */
1404 if ((error = fd_getvnode(fd, &fp)) != 0)
1405 return error;
1406 vp = fp->f_vnode;
1407 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1408 if (vp->v_type != VDIR)
1409 error = ENOTDIR;
1410 else
1411 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1412 VOP_UNLOCK(vp);
1413 if (error)
1414 goto out;
1415 vref(vp);
1416
1417 change_root(p->p_cwdi, vp, l);
1418
1419 out:
1420 fd_putfile(fd);
1421 return (error);
1422 }
1423
1424 /*
1425 * Change current working directory (``.'').
1426 */
1427 /* ARGSUSED */
1428 int
1429 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1430 {
1431 /* {
1432 syscallarg(const char *) path;
1433 } */
1434 struct proc *p = l->l_proc;
1435 struct cwdinfo *cwdi;
1436 int error;
1437 struct vnode *vp;
1438
1439 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1440 &vp, l)) != 0)
1441 return (error);
1442 cwdi = p->p_cwdi;
1443 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1444 vrele(cwdi->cwdi_cdir);
1445 cwdi->cwdi_cdir = vp;
1446 rw_exit(&cwdi->cwdi_lock);
1447 return (0);
1448 }
1449
1450 /*
1451 * Change notion of root (``/'') directory.
1452 */
1453 /* ARGSUSED */
1454 int
1455 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1456 {
1457 /* {
1458 syscallarg(const char *) path;
1459 } */
1460 struct proc *p = l->l_proc;
1461 int error;
1462 struct vnode *vp;
1463
1464 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1465 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1466 return (error);
1467 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1468 &vp, l)) != 0)
1469 return (error);
1470
1471 change_root(p->p_cwdi, vp, l);
1472
1473 return (0);
1474 }
1475
1476 /*
1477 * Common routine for chroot and fchroot.
1478 * NB: callers need to properly authorize the change root operation.
1479 */
1480 void
1481 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l)
1482 {
1483 struct proc *p = l->l_proc;
1484 kauth_cred_t ncred;
1485
1486 ncred = kauth_cred_alloc();
1487
1488 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1489 if (cwdi->cwdi_rdir != NULL)
1490 vrele(cwdi->cwdi_rdir);
1491 cwdi->cwdi_rdir = vp;
1492
1493 /*
1494 * Prevent escaping from chroot by putting the root under
1495 * the working directory. Silently chdir to / if we aren't
1496 * already there.
1497 */
1498 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1499 /*
1500 * XXX would be more failsafe to change directory to a
1501 * deadfs node here instead
1502 */
1503 vrele(cwdi->cwdi_cdir);
1504 vref(vp);
1505 cwdi->cwdi_cdir = vp;
1506 }
1507 rw_exit(&cwdi->cwdi_lock);
1508
1509 /* Get a write lock on the process credential. */
1510 proc_crmod_enter();
1511
1512 kauth_cred_clone(p->p_cred, ncred);
1513 kauth_proc_chroot(ncred, p->p_cwdi);
1514
1515 /* Broadcast our credentials to the process and other LWPs. */
1516 proc_crmod_leave(ncred, p->p_cred, true);
1517 }
1518
1519 /*
1520 * Common routine for chroot and chdir.
1521 * XXX "where" should be enum uio_seg
1522 */
1523 int
1524 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l)
1525 {
1526 struct pathbuf *pb;
1527 struct nameidata nd;
1528 int error;
1529
1530 error = pathbuf_maybe_copyin(path, where, &pb);
1531 if (error) {
1532 return error;
1533 }
1534 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1535 if ((error = namei(&nd)) != 0) {
1536 pathbuf_destroy(pb);
1537 return error;
1538 }
1539 *vpp = nd.ni_vp;
1540 pathbuf_destroy(pb);
1541
1542 if ((*vpp)->v_type != VDIR)
1543 error = ENOTDIR;
1544 else
1545 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred);
1546
1547 if (error)
1548 vput(*vpp);
1549 else
1550 VOP_UNLOCK(*vpp);
1551 return (error);
1552 }
1553
1554 /*
1555 * Internals of sys_open - path has already been converted into a pathbuf
1556 * (so we can easily reuse this function from other parts of the kernel,
1557 * like posix_spawn post-processing).
1558 */
1559 int
1560 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags,
1561 int open_mode, int *fd)
1562 {
1563 struct proc *p = l->l_proc;
1564 struct cwdinfo *cwdi = p->p_cwdi;
1565 file_t *fp;
1566 struct vnode *vp;
1567 int flags, cmode;
1568 int indx, error;
1569 struct nameidata nd;
1570
1571 if (open_flags & O_SEARCH) {
1572 open_flags &= ~(int)O_SEARCH;
1573 }
1574
1575 /*
1576 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
1577 * may be specified.
1578 */
1579 if ((open_flags & O_EXEC) && (open_flags & O_ACCMODE))
1580 return EINVAL;
1581
1582 flags = FFLAGS(open_flags);
1583 if ((flags & (FREAD | FWRITE)) == 0)
1584 return EINVAL;
1585
1586 if ((error = fd_allocfile(&fp, &indx)) != 0) {
1587 return error;
1588 }
1589
1590 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1591 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1592 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb);
1593 if (dvp != NULL)
1594 NDAT(&nd, dvp);
1595
1596 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1597 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1598 fd_abort(p, fp, indx);
1599 if ((error == EDUPFD || error == EMOVEFD) &&
1600 l->l_dupfd >= 0 && /* XXX from fdopen */
1601 (error =
1602 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1603 *fd = indx;
1604 return 0;
1605 }
1606 if (error == ERESTART)
1607 error = EINTR;
1608 return error;
1609 }
1610
1611 l->l_dupfd = 0;
1612 vp = nd.ni_vp;
1613
1614 if ((error = open_setfp(l, fp, vp, indx, flags)))
1615 return error;
1616
1617 VOP_UNLOCK(vp);
1618 *fd = indx;
1619 fd_affix(p, fp, indx);
1620 return 0;
1621 }
1622
1623 int
1624 fd_open(const char *path, int open_flags, int open_mode, int *fd)
1625 {
1626 struct pathbuf *pb;
1627 int error, oflags;
1628
1629 oflags = FFLAGS(open_flags);
1630 if ((oflags & (FREAD | FWRITE)) == 0)
1631 return EINVAL;
1632
1633 pb = pathbuf_create(path);
1634 if (pb == NULL)
1635 return ENOMEM;
1636
1637 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd);
1638 pathbuf_destroy(pb);
1639
1640 return error;
1641 }
1642
1643 static int
1644 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags,
1645 int mode, int *fd)
1646 {
1647 file_t *dfp = NULL;
1648 struct vnode *dvp = NULL;
1649 struct pathbuf *pb;
1650 const char *pathstring = NULL;
1651 int error;
1652
1653 if (path == NULL) {
1654 MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error);
1655 if (error == ENOSYS)
1656 goto no_compat;
1657 if (error)
1658 return error;
1659 } else {
1660 no_compat:
1661 error = pathbuf_copyin(path, &pb);
1662 if (error)
1663 return error;
1664 }
1665
1666 pathstring = pathbuf_stringcopy_get(pb);
1667
1668 /*
1669 * fdat is ignored if:
1670 * 1) if fdat is AT_FDCWD, which means use current directory as base.
1671 * 2) if path is absolute, then fdat is useless.
1672 */
1673 if (fdat != AT_FDCWD && pathstring[0] != '/') {
1674 /* fd_getvnode() will use the descriptor for us */
1675 if ((error = fd_getvnode(fdat, &dfp)) != 0)
1676 goto out;
1677
1678 dvp = dfp->f_vnode;
1679 }
1680
1681 error = do_open(l, dvp, pb, flags, mode, fd);
1682
1683 if (dfp != NULL)
1684 fd_putfile(fdat);
1685 out:
1686 pathbuf_stringcopy_put(pb, pathstring);
1687 pathbuf_destroy(pb);
1688 return error;
1689 }
1690
1691 int
1692 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1693 {
1694 /* {
1695 syscallarg(const char *) path;
1696 syscallarg(int) flags;
1697 syscallarg(int) mode;
1698 } */
1699 int error;
1700 int fd;
1701
1702 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path),
1703 SCARG(uap, flags), SCARG(uap, mode), &fd);
1704
1705 if (error == 0)
1706 *retval = fd;
1707
1708 return error;
1709 }
1710
1711 int
1712 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval)
1713 {
1714 /* {
1715 syscallarg(int) fd;
1716 syscallarg(const char *) path;
1717 syscallarg(int) oflags;
1718 syscallarg(int) mode;
1719 } */
1720 int error;
1721 int fd;
1722
1723 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path),
1724 SCARG(uap, oflags), SCARG(uap, mode), &fd);
1725
1726 if (error == 0)
1727 *retval = fd;
1728
1729 return error;
1730 }
1731
1732 static void
1733 vfs__fhfree(fhandle_t *fhp)
1734 {
1735 size_t fhsize;
1736
1737 fhsize = FHANDLE_SIZE(fhp);
1738 kmem_free(fhp, fhsize);
1739 }
1740
1741 /*
1742 * vfs_composefh: compose a filehandle.
1743 */
1744
1745 int
1746 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1747 {
1748 struct mount *mp;
1749 struct fid *fidp;
1750 int error;
1751 size_t needfhsize;
1752 size_t fidsize;
1753
1754 mp = vp->v_mount;
1755 fidp = NULL;
1756 if (*fh_size < FHANDLE_SIZE_MIN) {
1757 fidsize = 0;
1758 } else {
1759 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1760 if (fhp != NULL) {
1761 memset(fhp, 0, *fh_size);
1762 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1763 fidp = &fhp->fh_fid;
1764 }
1765 }
1766 error = VFS_VPTOFH(vp, fidp, &fidsize);
1767 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1768 if (error == 0 && *fh_size < needfhsize) {
1769 error = E2BIG;
1770 }
1771 *fh_size = needfhsize;
1772 return error;
1773 }
1774
1775 int
1776 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1777 {
1778 struct mount *mp;
1779 fhandle_t *fhp;
1780 size_t fhsize;
1781 size_t fidsize;
1782 int error;
1783
1784 mp = vp->v_mount;
1785 fidsize = 0;
1786 error = VFS_VPTOFH(vp, NULL, &fidsize);
1787 KASSERT(error != 0);
1788 if (error != E2BIG) {
1789 goto out;
1790 }
1791 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1792 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1793 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1794 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1795 if (error == 0) {
1796 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1797 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1798 *fhpp = fhp;
1799 } else {
1800 kmem_free(fhp, fhsize);
1801 }
1802 out:
1803 return error;
1804 }
1805
1806 void
1807 vfs_composefh_free(fhandle_t *fhp)
1808 {
1809
1810 vfs__fhfree(fhp);
1811 }
1812
1813 /*
1814 * vfs_fhtovp: lookup a vnode by a filehandle.
1815 */
1816
1817 int
1818 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1819 {
1820 struct mount *mp;
1821 int error;
1822
1823 *vpp = NULL;
1824 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1825 if (mp == NULL) {
1826 error = ESTALE;
1827 goto out;
1828 }
1829 if (mp->mnt_op->vfs_fhtovp == NULL) {
1830 error = EOPNOTSUPP;
1831 goto out;
1832 }
1833 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1834 out:
1835 return error;
1836 }
1837
1838 /*
1839 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1840 * the needed size.
1841 */
1842
1843 int
1844 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1845 {
1846 fhandle_t *fhp;
1847 int error;
1848
1849 if (fhsize > FHANDLE_SIZE_MAX) {
1850 return EINVAL;
1851 }
1852 if (fhsize < FHANDLE_SIZE_MIN) {
1853 return EINVAL;
1854 }
1855 again:
1856 fhp = kmem_alloc(fhsize, KM_SLEEP);
1857 error = copyin(ufhp, fhp, fhsize);
1858 if (error == 0) {
1859 /* XXX this check shouldn't be here */
1860 if (FHANDLE_SIZE(fhp) == fhsize) {
1861 *fhpp = fhp;
1862 return 0;
1863 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1864 /*
1865 * a kludge for nfsv2 padded handles.
1866 */
1867 size_t sz;
1868
1869 sz = FHANDLE_SIZE(fhp);
1870 kmem_free(fhp, fhsize);
1871 fhsize = sz;
1872 goto again;
1873 } else {
1874 /*
1875 * userland told us wrong size.
1876 */
1877 error = EINVAL;
1878 }
1879 }
1880 kmem_free(fhp, fhsize);
1881 return error;
1882 }
1883
1884 void
1885 vfs_copyinfh_free(fhandle_t *fhp)
1886 {
1887
1888 vfs__fhfree(fhp);
1889 }
1890
1891 /*
1892 * Get file handle system call
1893 */
1894 int
1895 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1896 {
1897 /* {
1898 syscallarg(char *) fname;
1899 syscallarg(fhandle_t *) fhp;
1900 syscallarg(size_t *) fh_size;
1901 } */
1902 struct vnode *vp;
1903 fhandle_t *fh;
1904 int error;
1905 struct pathbuf *pb;
1906 struct nameidata nd;
1907 size_t sz;
1908 size_t usz;
1909
1910 /*
1911 * Must be super user
1912 */
1913 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1914 0, NULL, NULL, NULL);
1915 if (error)
1916 return (error);
1917
1918 error = pathbuf_copyin(SCARG(uap, fname), &pb);
1919 if (error) {
1920 return error;
1921 }
1922 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1923 error = namei(&nd);
1924 if (error) {
1925 pathbuf_destroy(pb);
1926 return error;
1927 }
1928 vp = nd.ni_vp;
1929 pathbuf_destroy(pb);
1930
1931 error = vfs_composefh_alloc(vp, &fh);
1932 vput(vp);
1933 if (error != 0) {
1934 return error;
1935 }
1936 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1937 if (error != 0) {
1938 goto out;
1939 }
1940 sz = FHANDLE_SIZE(fh);
1941 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1942 if (error != 0) {
1943 goto out;
1944 }
1945 if (usz >= sz) {
1946 error = copyout(fh, SCARG(uap, fhp), sz);
1947 } else {
1948 error = E2BIG;
1949 }
1950 out:
1951 vfs_composefh_free(fh);
1952 return (error);
1953 }
1954
1955 /*
1956 * Open a file given a file handle.
1957 *
1958 * Check permissions, allocate an open file structure,
1959 * and call the device open routine if any.
1960 */
1961
1962 int
1963 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1964 register_t *retval)
1965 {
1966 file_t *fp;
1967 struct vnode *vp = NULL;
1968 kauth_cred_t cred = l->l_cred;
1969 file_t *nfp;
1970 int indx, error;
1971 struct vattr va;
1972 fhandle_t *fh;
1973 int flags;
1974 proc_t *p;
1975
1976 p = curproc;
1977
1978 /*
1979 * Must be super user
1980 */
1981 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1982 0, NULL, NULL, NULL)))
1983 return (error);
1984
1985 if (oflags & O_SEARCH) {
1986 oflags &= ~(int)O_SEARCH;
1987 }
1988
1989 flags = FFLAGS(oflags);
1990 if ((flags & (FREAD | FWRITE)) == 0)
1991 return (EINVAL);
1992 if ((flags & O_CREAT))
1993 return (EINVAL);
1994 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1995 return (error);
1996 fp = nfp;
1997 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1998 if (error != 0) {
1999 goto bad;
2000 }
2001 error = vfs_fhtovp(fh, &vp);
2002 vfs_copyinfh_free(fh);
2003 if (error != 0) {
2004 goto bad;
2005 }
2006
2007 /* Now do an effective vn_open */
2008
2009 if (vp->v_type == VSOCK) {
2010 error = EOPNOTSUPP;
2011 goto bad;
2012 }
2013 error = vn_openchk(vp, cred, flags);
2014 if (error != 0)
2015 goto bad;
2016 if (flags & O_TRUNC) {
2017 VOP_UNLOCK(vp); /* XXX */
2018 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
2019 vattr_null(&va);
2020 va.va_size = 0;
2021 error = VOP_SETATTR(vp, &va, cred);
2022 if (error)
2023 goto bad;
2024 }
2025 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
2026 goto bad;
2027 if (flags & FWRITE) {
2028 mutex_enter(vp->v_interlock);
2029 vp->v_writecount++;
2030 mutex_exit(vp->v_interlock);
2031 }
2032
2033 /* done with modified vn_open, now finish what sys_open does. */
2034 if ((error = open_setfp(l, fp, vp, indx, flags)))
2035 return error;
2036
2037 VOP_UNLOCK(vp);
2038 *retval = indx;
2039 fd_affix(p, fp, indx);
2040 return (0);
2041
2042 bad:
2043 fd_abort(p, fp, indx);
2044 if (vp != NULL)
2045 vput(vp);
2046 return (error);
2047 }
2048
2049 int
2050 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
2051 {
2052 /* {
2053 syscallarg(const void *) fhp;
2054 syscallarg(size_t) fh_size;
2055 syscallarg(int) flags;
2056 } */
2057
2058 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
2059 SCARG(uap, flags), retval);
2060 }
2061
2062 int
2063 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
2064 {
2065 int error;
2066 fhandle_t *fh;
2067 struct vnode *vp;
2068
2069 /*
2070 * Must be super user
2071 */
2072 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2073 0, NULL, NULL, NULL)))
2074 return (error);
2075
2076 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2077 if (error != 0)
2078 return error;
2079
2080 error = vfs_fhtovp(fh, &vp);
2081 vfs_copyinfh_free(fh);
2082 if (error != 0)
2083 return error;
2084
2085 error = vn_stat(vp, sb);
2086 vput(vp);
2087 return error;
2088 }
2089
2090
2091 /* ARGSUSED */
2092 int
2093 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval)
2094 {
2095 /* {
2096 syscallarg(const void *) fhp;
2097 syscallarg(size_t) fh_size;
2098 syscallarg(struct stat *) sb;
2099 } */
2100 struct stat sb;
2101 int error;
2102
2103 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
2104 if (error)
2105 return error;
2106 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
2107 }
2108
2109 int
2110 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
2111 int flags)
2112 {
2113 fhandle_t *fh;
2114 struct mount *mp;
2115 struct vnode *vp;
2116 int error;
2117
2118 /*
2119 * Must be super user
2120 */
2121 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2122 0, NULL, NULL, NULL)))
2123 return error;
2124
2125 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2126 if (error != 0)
2127 return error;
2128
2129 error = vfs_fhtovp(fh, &vp);
2130 vfs_copyinfh_free(fh);
2131 if (error != 0)
2132 return error;
2133
2134 mp = vp->v_mount;
2135 error = dostatvfs(mp, sb, l, flags, 1);
2136 vput(vp);
2137 return error;
2138 }
2139
2140 /* ARGSUSED */
2141 int
2142 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
2143 {
2144 /* {
2145 syscallarg(const void *) fhp;
2146 syscallarg(size_t) fh_size;
2147 syscallarg(struct statvfs *) buf;
2148 syscallarg(int) flags;
2149 } */
2150 struct statvfs *sb = STATVFSBUF_GET();
2151 int error;
2152
2153 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
2154 SCARG(uap, flags));
2155 if (error == 0)
2156 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
2157 STATVFSBUF_PUT(sb);
2158 return error;
2159 }
2160
2161 int
2162 do_posix_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode,
2163 dev_t dev)
2164 {
2165
2166 /*
2167 * The POSIX mknod(2) call is an alias for mkfifo(2) for S_IFIFO
2168 * in mode and dev=0.
2169 *
2170 * In all the other cases it's implementation defined behavior.
2171 */
2172
2173 if ((mode & S_IFIFO) && dev == 0)
2174 return do_sys_mkfifoat(l, fdat, pathname, mode);
2175 else
2176 return do_sys_mknodat(l, fdat, pathname, mode, dev,
2177 UIO_USERSPACE);
2178 }
2179
2180 /*
2181 * Create a special file.
2182 */
2183 /* ARGSUSED */
2184 int
2185 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
2186 register_t *retval)
2187 {
2188 /* {
2189 syscallarg(const char *) path;
2190 syscallarg(mode_t) mode;
2191 syscallarg(dev_t) dev;
2192 } */
2193 return do_posix_mknodat(l, AT_FDCWD, SCARG(uap, path),
2194 SCARG(uap, mode), SCARG(uap, dev));
2195 }
2196
2197 int
2198 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap,
2199 register_t *retval)
2200 {
2201 /* {
2202 syscallarg(int) fd;
2203 syscallarg(const char *) path;
2204 syscallarg(mode_t) mode;
2205 syscallarg(int) pad;
2206 syscallarg(dev_t) dev;
2207 } */
2208
2209 return do_posix_mknodat(l, SCARG(uap, fd), SCARG(uap, path),
2210 SCARG(uap, mode), SCARG(uap, dev));
2211 }
2212
2213 int
2214 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
2215 enum uio_seg seg)
2216 {
2217 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, seg);
2218 }
2219
2220 int
2221 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode,
2222 dev_t dev, enum uio_seg seg)
2223 {
2224 struct proc *p = l->l_proc;
2225 struct vnode *vp;
2226 struct vattr vattr;
2227 int error, optype;
2228 struct pathbuf *pb;
2229 struct nameidata nd;
2230 const char *pathstring;
2231
2232 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
2233 0, NULL, NULL, NULL)) != 0)
2234 return (error);
2235
2236 optype = VOP_MKNOD_DESCOFFSET;
2237
2238 error = pathbuf_maybe_copyin(pathname, seg, &pb);
2239 if (error) {
2240 return error;
2241 }
2242 pathstring = pathbuf_stringcopy_get(pb);
2243 if (pathstring == NULL) {
2244 pathbuf_destroy(pb);
2245 return ENOMEM;
2246 }
2247
2248 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2249
2250 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2251 goto out;
2252 vp = nd.ni_vp;
2253
2254 if (vp != NULL)
2255 error = EEXIST;
2256 else {
2257 vattr_null(&vattr);
2258 /* We will read cwdi->cwdi_cmask unlocked. */
2259 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2260 vattr.va_rdev = dev;
2261
2262 switch (mode & S_IFMT) {
2263 case S_IFMT: /* used by badsect to flag bad sectors */
2264 vattr.va_type = VBAD;
2265 break;
2266 case S_IFCHR:
2267 vattr.va_type = VCHR;
2268 break;
2269 case S_IFBLK:
2270 vattr.va_type = VBLK;
2271 break;
2272 case S_IFWHT:
2273 optype = VOP_WHITEOUT_DESCOFFSET;
2274 break;
2275 case S_IFREG:
2276 #if NVERIEXEC > 0
2277 error = veriexec_openchk(l, nd.ni_vp, pathstring,
2278 O_CREAT);
2279 #endif /* NVERIEXEC > 0 */
2280 vattr.va_type = VREG;
2281 vattr.va_rdev = VNOVAL;
2282 optype = VOP_CREATE_DESCOFFSET;
2283 break;
2284 default:
2285 error = EINVAL;
2286 break;
2287 }
2288 }
2289 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET
2290 && vattr.va_rdev == VNOVAL)
2291 error = EINVAL;
2292 if (!error) {
2293 switch (optype) {
2294 case VOP_WHITEOUT_DESCOFFSET:
2295 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
2296 if (error)
2297 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2298 vput(nd.ni_dvp);
2299 break;
2300
2301 case VOP_MKNOD_DESCOFFSET:
2302 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
2303 &nd.ni_cnd, &vattr);
2304 if (error == 0)
2305 vrele(nd.ni_vp);
2306 vput(nd.ni_dvp);
2307 break;
2308
2309 case VOP_CREATE_DESCOFFSET:
2310 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
2311 &nd.ni_cnd, &vattr);
2312 if (error == 0)
2313 vrele(nd.ni_vp);
2314 vput(nd.ni_dvp);
2315 break;
2316 }
2317 } else {
2318 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2319 if (nd.ni_dvp == vp)
2320 vrele(nd.ni_dvp);
2321 else
2322 vput(nd.ni_dvp);
2323 if (vp)
2324 vrele(vp);
2325 }
2326 out:
2327 pathbuf_stringcopy_put(pb, pathstring);
2328 pathbuf_destroy(pb);
2329 return (error);
2330 }
2331
2332 /*
2333 * Create a named pipe.
2334 */
2335 /* ARGSUSED */
2336 int
2337 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
2338 {
2339 /* {
2340 syscallarg(const char *) path;
2341 syscallarg(int) mode;
2342 } */
2343 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode));
2344 }
2345
2346 int
2347 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap,
2348 register_t *retval)
2349 {
2350 /* {
2351 syscallarg(int) fd;
2352 syscallarg(const char *) path;
2353 syscallarg(int) mode;
2354 } */
2355
2356 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path),
2357 SCARG(uap, mode));
2358 }
2359
2360 static int
2361 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode)
2362 {
2363 struct proc *p = l->l_proc;
2364 struct vattr vattr;
2365 int error;
2366 struct pathbuf *pb;
2367 struct nameidata nd;
2368
2369 error = pathbuf_copyin(path, &pb);
2370 if (error) {
2371 return error;
2372 }
2373 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2374
2375 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2376 pathbuf_destroy(pb);
2377 return error;
2378 }
2379 if (nd.ni_vp != NULL) {
2380 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2381 if (nd.ni_dvp == nd.ni_vp)
2382 vrele(nd.ni_dvp);
2383 else
2384 vput(nd.ni_dvp);
2385 vrele(nd.ni_vp);
2386 pathbuf_destroy(pb);
2387 return (EEXIST);
2388 }
2389 vattr_null(&vattr);
2390 vattr.va_type = VFIFO;
2391 /* We will read cwdi->cwdi_cmask unlocked. */
2392 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2393 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2394 if (error == 0)
2395 vrele(nd.ni_vp);
2396 vput(nd.ni_dvp);
2397 pathbuf_destroy(pb);
2398 return (error);
2399 }
2400
2401 /*
2402 * Make a hard file link.
2403 */
2404 /* ARGSUSED */
2405 int
2406 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink,
2407 const char *link, int follow, register_t *retval)
2408 {
2409 struct vnode *vp;
2410 struct pathbuf *linkpb;
2411 struct nameidata nd;
2412 namei_simple_flags_t ns_flags;
2413 int error;
2414
2415 if (follow & AT_SYMLINK_FOLLOW)
2416 ns_flags = NSM_FOLLOW_TRYEMULROOT;
2417 else
2418 ns_flags = NSM_NOFOLLOW_TRYEMULROOT;
2419
2420 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp);
2421 if (error != 0)
2422 return (error);
2423 error = pathbuf_copyin(link, &linkpb);
2424 if (error) {
2425 goto out1;
2426 }
2427 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2428 if ((error = fd_nameiat(l, fdlink, &nd)) != 0)
2429 goto out2;
2430 if (nd.ni_vp) {
2431 error = EEXIST;
2432 goto abortop;
2433 }
2434 /* Prevent hard links on directories. */
2435 if (vp->v_type == VDIR) {
2436 error = EPERM;
2437 goto abortop;
2438 }
2439 /* Prevent cross-mount operation. */
2440 if (nd.ni_dvp->v_mount != vp->v_mount) {
2441 error = EXDEV;
2442 goto abortop;
2443 }
2444 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2445 VOP_UNLOCK(nd.ni_dvp);
2446 vrele(nd.ni_dvp);
2447 out2:
2448 pathbuf_destroy(linkpb);
2449 out1:
2450 vrele(vp);
2451 return (error);
2452 abortop:
2453 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2454 if (nd.ni_dvp == nd.ni_vp)
2455 vrele(nd.ni_dvp);
2456 else
2457 vput(nd.ni_dvp);
2458 if (nd.ni_vp != NULL)
2459 vrele(nd.ni_vp);
2460 goto out2;
2461 }
2462
2463 int
2464 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
2465 {
2466 /* {
2467 syscallarg(const char *) path;
2468 syscallarg(const char *) link;
2469 } */
2470 const char *path = SCARG(uap, path);
2471 const char *link = SCARG(uap, link);
2472
2473 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link,
2474 AT_SYMLINK_FOLLOW, retval);
2475 }
2476
2477 int
2478 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap,
2479 register_t *retval)
2480 {
2481 /* {
2482 syscallarg(int) fd1;
2483 syscallarg(const char *) name1;
2484 syscallarg(int) fd2;
2485 syscallarg(const char *) name2;
2486 syscallarg(int) flags;
2487 } */
2488 int fd1 = SCARG(uap, fd1);
2489 const char *name1 = SCARG(uap, name1);
2490 int fd2 = SCARG(uap, fd2);
2491 const char *name2 = SCARG(uap, name2);
2492 int follow;
2493
2494 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW;
2495
2496 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval);
2497 }
2498
2499
2500 int
2501 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg)
2502 {
2503 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg);
2504 }
2505
2506 static int
2507 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat,
2508 const char *link, enum uio_seg seg)
2509 {
2510 struct proc *p = curproc;
2511 struct vattr vattr;
2512 char *path;
2513 int error;
2514 size_t len;
2515 struct pathbuf *linkpb;
2516 struct nameidata nd;
2517
2518 KASSERT(l != NULL || fdat == AT_FDCWD);
2519
2520 path = PNBUF_GET();
2521 if (seg == UIO_USERSPACE) {
2522 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0)
2523 goto out1;
2524 if ((error = pathbuf_copyin(link, &linkpb)) != 0)
2525 goto out1;
2526 } else {
2527 len = strlen(patharg) + 1;
2528 KASSERT(len <= MAXPATHLEN);
2529 memcpy(path, patharg, len);
2530 linkpb = pathbuf_create(link);
2531 if (linkpb == NULL) {
2532 error = ENOMEM;
2533 goto out1;
2534 }
2535 }
2536 ktrkuser("symlink-target", path, len - 1);
2537
2538 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2539 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2540 goto out2;
2541 if (nd.ni_vp) {
2542 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2543 if (nd.ni_dvp == nd.ni_vp)
2544 vrele(nd.ni_dvp);
2545 else
2546 vput(nd.ni_dvp);
2547 vrele(nd.ni_vp);
2548 error = EEXIST;
2549 goto out2;
2550 }
2551 vattr_null(&vattr);
2552 vattr.va_type = VLNK;
2553 /* We will read cwdi->cwdi_cmask unlocked. */
2554 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2555 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2556 if (error == 0)
2557 vrele(nd.ni_vp);
2558 vput(nd.ni_dvp);
2559 out2:
2560 pathbuf_destroy(linkpb);
2561 out1:
2562 PNBUF_PUT(path);
2563 return (error);
2564 }
2565
2566 /*
2567 * Make a symbolic link.
2568 */
2569 /* ARGSUSED */
2570 int
2571 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2572 {
2573 /* {
2574 syscallarg(const char *) path;
2575 syscallarg(const char *) link;
2576 } */
2577
2578 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link),
2579 UIO_USERSPACE);
2580 }
2581
2582 int
2583 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap,
2584 register_t *retval)
2585 {
2586 /* {
2587 syscallarg(const char *) path1;
2588 syscallarg(int) fd;
2589 syscallarg(const char *) path2;
2590 } */
2591
2592 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd),
2593 SCARG(uap, path2), UIO_USERSPACE);
2594 }
2595
2596 /*
2597 * Delete a whiteout from the filesystem.
2598 */
2599 /* ARGSUSED */
2600 int
2601 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2602 {
2603 /* {
2604 syscallarg(const char *) path;
2605 } */
2606 int error;
2607 struct pathbuf *pb;
2608 struct nameidata nd;
2609
2610 error = pathbuf_copyin(SCARG(uap, path), &pb);
2611 if (error) {
2612 return error;
2613 }
2614
2615 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb);
2616 error = namei(&nd);
2617 if (error) {
2618 pathbuf_destroy(pb);
2619 return (error);
2620 }
2621
2622 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2623 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2624 if (nd.ni_dvp == nd.ni_vp)
2625 vrele(nd.ni_dvp);
2626 else
2627 vput(nd.ni_dvp);
2628 if (nd.ni_vp)
2629 vrele(nd.ni_vp);
2630 pathbuf_destroy(pb);
2631 return (EEXIST);
2632 }
2633 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2634 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2635 vput(nd.ni_dvp);
2636 pathbuf_destroy(pb);
2637 return (error);
2638 }
2639
2640 /*
2641 * Delete a name from the filesystem.
2642 */
2643 /* ARGSUSED */
2644 int
2645 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2646 {
2647 /* {
2648 syscallarg(const char *) path;
2649 } */
2650
2651 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE);
2652 }
2653
2654 int
2655 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap,
2656 register_t *retval)
2657 {
2658 /* {
2659 syscallarg(int) fd;
2660 syscallarg(const char *) path;
2661 syscallarg(int) flag;
2662 } */
2663
2664 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path),
2665 SCARG(uap, flag), UIO_USERSPACE);
2666 }
2667
2668 int
2669 do_sys_unlink(const char *arg, enum uio_seg seg)
2670 {
2671 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg);
2672 }
2673
2674 static int
2675 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags,
2676 enum uio_seg seg)
2677 {
2678 struct vnode *vp;
2679 int error;
2680 struct pathbuf *pb;
2681 struct nameidata nd;
2682 const char *pathstring;
2683
2684 KASSERT(l != NULL || fdat == AT_FDCWD);
2685
2686 error = pathbuf_maybe_copyin(arg, seg, &pb);
2687 if (error) {
2688 return error;
2689 }
2690 pathstring = pathbuf_stringcopy_get(pb);
2691 if (pathstring == NULL) {
2692 pathbuf_destroy(pb);
2693 return ENOMEM;
2694 }
2695
2696 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb);
2697 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2698 goto out;
2699 vp = nd.ni_vp;
2700
2701 /*
2702 * The root of a mounted filesystem cannot be deleted.
2703 */
2704 if ((vp->v_vflag & VV_ROOT) != 0) {
2705 error = EBUSY;
2706 goto abort;
2707 }
2708
2709 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) {
2710 error = EBUSY;
2711 goto abort;
2712 }
2713
2714 /*
2715 * No rmdir "." please.
2716 */
2717 if (nd.ni_dvp == vp) {
2718 error = EINVAL;
2719 goto abort;
2720 }
2721
2722 /*
2723 * AT_REMOVEDIR is required to remove a directory
2724 */
2725 if (vp->v_type == VDIR) {
2726 if (!(flags & AT_REMOVEDIR)) {
2727 error = EPERM;
2728 goto abort;
2729 } else {
2730 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2731 vput(nd.ni_dvp);
2732 goto out;
2733 }
2734 }
2735
2736 /*
2737 * Starting here we only deal with non directories.
2738 */
2739 if (flags & AT_REMOVEDIR) {
2740 error = ENOTDIR;
2741 goto abort;
2742 }
2743
2744 #if NVERIEXEC > 0
2745 /* Handle remove requests for veriexec entries. */
2746 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) {
2747 goto abort;
2748 }
2749 #endif /* NVERIEXEC > 0 */
2750
2751 #ifdef FILEASSOC
2752 (void)fileassoc_file_delete(vp);
2753 #endif /* FILEASSOC */
2754 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2755 vput(nd.ni_dvp);
2756 goto out;
2757
2758 abort:
2759 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2760 if (nd.ni_dvp == vp)
2761 vrele(nd.ni_dvp);
2762 else
2763 vput(nd.ni_dvp);
2764 vput(vp);
2765
2766 out:
2767 pathbuf_stringcopy_put(pb, pathstring);
2768 pathbuf_destroy(pb);
2769 return (error);
2770 }
2771
2772 /*
2773 * Reposition read/write file offset.
2774 */
2775 int
2776 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2777 {
2778 /* {
2779 syscallarg(int) fd;
2780 syscallarg(int) pad;
2781 syscallarg(off_t) offset;
2782 syscallarg(int) whence;
2783 } */
2784 kauth_cred_t cred = l->l_cred;
2785 file_t *fp;
2786 struct vnode *vp;
2787 struct vattr vattr;
2788 off_t newoff;
2789 int error, fd;
2790
2791 fd = SCARG(uap, fd);
2792
2793 if ((fp = fd_getfile(fd)) == NULL)
2794 return (EBADF);
2795
2796 vp = fp->f_vnode;
2797 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2798 error = ESPIPE;
2799 goto out;
2800 }
2801
2802 vn_lock(vp, LK_SHARED | LK_RETRY);
2803
2804 switch (SCARG(uap, whence)) {
2805 case SEEK_CUR:
2806 newoff = fp->f_offset + SCARG(uap, offset);
2807 break;
2808 case SEEK_END:
2809 error = VOP_GETATTR(vp, &vattr, cred);
2810 if (error) {
2811 VOP_UNLOCK(vp);
2812 goto out;
2813 }
2814 newoff = SCARG(uap, offset) + vattr.va_size;
2815 break;
2816 case SEEK_SET:
2817 newoff = SCARG(uap, offset);
2818 break;
2819 default:
2820 error = EINVAL;
2821 VOP_UNLOCK(vp);
2822 goto out;
2823 }
2824 VOP_UNLOCK(vp);
2825 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2826 *(off_t *)retval = fp->f_offset = newoff;
2827 }
2828 out:
2829 fd_putfile(fd);
2830 return (error);
2831 }
2832
2833 /*
2834 * Positional read system call.
2835 */
2836 int
2837 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2838 {
2839 /* {
2840 syscallarg(int) fd;
2841 syscallarg(void *) buf;
2842 syscallarg(size_t) nbyte;
2843 syscallarg(off_t) offset;
2844 } */
2845 file_t *fp;
2846 struct vnode *vp;
2847 off_t offset;
2848 int error, fd = SCARG(uap, fd);
2849
2850 if ((fp = fd_getfile(fd)) == NULL)
2851 return (EBADF);
2852
2853 if ((fp->f_flag & FREAD) == 0) {
2854 fd_putfile(fd);
2855 return (EBADF);
2856 }
2857
2858 vp = fp->f_vnode;
2859 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2860 error = ESPIPE;
2861 goto out;
2862 }
2863
2864 offset = SCARG(uap, offset);
2865
2866 /*
2867 * XXX This works because no file systems actually
2868 * XXX take any action on the seek operation.
2869 */
2870 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2871 goto out;
2872
2873 /* dofileread() will unuse the descriptor for us */
2874 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2875 &offset, 0, retval));
2876
2877 out:
2878 fd_putfile(fd);
2879 return (error);
2880 }
2881
2882 /*
2883 * Positional scatter read system call.
2884 */
2885 int
2886 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2887 {
2888 /* {
2889 syscallarg(int) fd;
2890 syscallarg(const struct iovec *) iovp;
2891 syscallarg(int) iovcnt;
2892 syscallarg(off_t) offset;
2893 } */
2894 off_t offset = SCARG(uap, offset);
2895
2896 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2897 SCARG(uap, iovcnt), &offset, 0, retval);
2898 }
2899
2900 /*
2901 * Positional write system call.
2902 */
2903 int
2904 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2905 {
2906 /* {
2907 syscallarg(int) fd;
2908 syscallarg(const void *) buf;
2909 syscallarg(size_t) nbyte;
2910 syscallarg(off_t) offset;
2911 } */
2912 file_t *fp;
2913 struct vnode *vp;
2914 off_t offset;
2915 int error, fd = SCARG(uap, fd);
2916
2917 if ((fp = fd_getfile(fd)) == NULL)
2918 return (EBADF);
2919
2920 if ((fp->f_flag & FWRITE) == 0) {
2921 fd_putfile(fd);
2922 return (EBADF);
2923 }
2924
2925 vp = fp->f_vnode;
2926 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2927 error = ESPIPE;
2928 goto out;
2929 }
2930
2931 offset = SCARG(uap, offset);
2932
2933 /*
2934 * XXX This works because no file systems actually
2935 * XXX take any action on the seek operation.
2936 */
2937 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2938 goto out;
2939
2940 /* dofilewrite() will unuse the descriptor for us */
2941 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2942 &offset, 0, retval));
2943
2944 out:
2945 fd_putfile(fd);
2946 return (error);
2947 }
2948
2949 /*
2950 * Positional gather write system call.
2951 */
2952 int
2953 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2954 {
2955 /* {
2956 syscallarg(int) fd;
2957 syscallarg(const struct iovec *) iovp;
2958 syscallarg(int) iovcnt;
2959 syscallarg(off_t) offset;
2960 } */
2961 off_t offset = SCARG(uap, offset);
2962
2963 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2964 SCARG(uap, iovcnt), &offset, 0, retval);
2965 }
2966
2967 /*
2968 * Check access permissions.
2969 */
2970 int
2971 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2972 {
2973 /* {
2974 syscallarg(const char *) path;
2975 syscallarg(int) flags;
2976 } */
2977
2978 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path),
2979 SCARG(uap, flags), 0);
2980 }
2981
2982 int
2983 do_sys_accessat(struct lwp *l, int fdat, const char *path,
2984 int mode, int flags)
2985 {
2986 kauth_cred_t cred;
2987 struct vnode *vp;
2988 int error, nd_flag, vmode;
2989 struct pathbuf *pb;
2990 struct nameidata nd;
2991
2992 CTASSERT(F_OK == 0);
2993 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) {
2994 /* nonsense mode */
2995 return EINVAL;
2996 }
2997
2998 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT;
2999 if (flags & AT_SYMLINK_NOFOLLOW)
3000 nd_flag &= ~FOLLOW;
3001
3002 error = pathbuf_copyin(path, &pb);
3003 if (error)
3004 return error;
3005
3006 NDINIT(&nd, LOOKUP, nd_flag, pb);
3007
3008 /* Override default credentials */
3009 cred = kauth_cred_dup(l->l_cred);
3010 if (!(flags & AT_EACCESS)) {
3011 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
3012 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
3013 }
3014 nd.ni_cnd.cn_cred = cred;
3015
3016 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
3017 pathbuf_destroy(pb);
3018 goto out;
3019 }
3020 vp = nd.ni_vp;
3021 pathbuf_destroy(pb);
3022
3023 /* Flags == 0 means only check for existence. */
3024 if (mode) {
3025 vmode = 0;
3026 if (mode & R_OK)
3027 vmode |= VREAD;
3028 if (mode & W_OK)
3029 vmode |= VWRITE;
3030 if (mode & X_OK)
3031 vmode |= VEXEC;
3032
3033 error = VOP_ACCESS(vp, vmode, cred);
3034 if (!error && (vmode & VWRITE))
3035 error = vn_writechk(vp);
3036 }
3037 vput(vp);
3038 out:
3039 kauth_cred_free(cred);
3040 return (error);
3041 }
3042
3043 int
3044 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap,
3045 register_t *retval)
3046 {
3047 /* {
3048 syscallarg(int) fd;
3049 syscallarg(const char *) path;
3050 syscallarg(int) amode;
3051 syscallarg(int) flag;
3052 } */
3053
3054 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path),
3055 SCARG(uap, amode), SCARG(uap, flag));
3056 }
3057
3058 /*
3059 * Common code for all sys_stat functions, including compat versions.
3060 */
3061 int
3062 do_sys_stat(const char *userpath, unsigned int nd_flag,
3063 struct stat *sb)
3064 {
3065 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb);
3066 }
3067
3068 int
3069 do_sys_statat(struct lwp *l, int fdat, const char *userpath,
3070 unsigned int nd_flag, struct stat *sb)
3071 {
3072 int error;
3073 struct pathbuf *pb;
3074 struct nameidata nd;
3075
3076 KASSERT(l != NULL || fdat == AT_FDCWD);
3077
3078 error = pathbuf_copyin(userpath, &pb);
3079 if (error) {
3080 return error;
3081 }
3082
3083 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb);
3084
3085 error = fd_nameiat(l, fdat, &nd);
3086 if (error != 0) {
3087 pathbuf_destroy(pb);
3088 return error;
3089 }
3090 error = vn_stat(nd.ni_vp, sb);
3091 vput(nd.ni_vp);
3092 pathbuf_destroy(pb);
3093 return error;
3094 }
3095
3096 /*
3097 * Get file status; this version follows links.
3098 */
3099 /* ARGSUSED */
3100 int
3101 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval)
3102 {
3103 /* {
3104 syscallarg(const char *) path;
3105 syscallarg(struct stat *) ub;
3106 } */
3107 struct stat sb;
3108 int error;
3109
3110 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb);
3111 if (error)
3112 return error;
3113 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3114 }
3115
3116 /*
3117 * Get file status; this version does not follow links.
3118 */
3119 /* ARGSUSED */
3120 int
3121 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval)
3122 {
3123 /* {
3124 syscallarg(const char *) path;
3125 syscallarg(struct stat *) ub;
3126 } */
3127 struct stat sb;
3128 int error;
3129
3130 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb);
3131 if (error)
3132 return error;
3133 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3134 }
3135
3136 int
3137 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap,
3138 register_t *retval)
3139 {
3140 /* {
3141 syscallarg(int) fd;
3142 syscallarg(const char *) path;
3143 syscallarg(struct stat *) buf;
3144 syscallarg(int) flag;
3145 } */
3146 unsigned int nd_flag;
3147 struct stat sb;
3148 int error;
3149
3150 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW)
3151 nd_flag = NOFOLLOW;
3152 else
3153 nd_flag = FOLLOW;
3154
3155 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag,
3156 &sb);
3157 if (error)
3158 return error;
3159 return copyout(&sb, SCARG(uap, buf), sizeof(sb));
3160 }
3161
3162 /*
3163 * Get configurable pathname variables.
3164 */
3165 /* ARGSUSED */
3166 int
3167 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
3168 {
3169 /* {
3170 syscallarg(const char *) path;
3171 syscallarg(int) name;
3172 } */
3173 int error;
3174 struct pathbuf *pb;
3175 struct nameidata nd;
3176
3177 error = pathbuf_copyin(SCARG(uap, path), &pb);
3178 if (error) {
3179 return error;
3180 }
3181 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3182 if ((error = namei(&nd)) != 0) {
3183 pathbuf_destroy(pb);
3184 return (error);
3185 }
3186 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
3187 vput(nd.ni_vp);
3188 pathbuf_destroy(pb);
3189 return (error);
3190 }
3191
3192 /*
3193 * Return target name of a symbolic link.
3194 */
3195 /* ARGSUSED */
3196 int
3197 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap,
3198 register_t *retval)
3199 {
3200 /* {
3201 syscallarg(const char *) path;
3202 syscallarg(char *) buf;
3203 syscallarg(size_t) count;
3204 } */
3205 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path),
3206 SCARG(uap, buf), SCARG(uap, count), retval);
3207 }
3208
3209 static int
3210 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf,
3211 size_t count, register_t *retval)
3212 {
3213 struct vnode *vp;
3214 struct iovec aiov;
3215 struct uio auio;
3216 int error;
3217 struct pathbuf *pb;
3218 struct nameidata nd;
3219
3220 error = pathbuf_copyin(path, &pb);
3221 if (error) {
3222 return error;
3223 }
3224 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3225 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
3226 pathbuf_destroy(pb);
3227 return error;
3228 }
3229 vp = nd.ni_vp;
3230 pathbuf_destroy(pb);
3231 if (vp->v_type != VLNK)
3232 error = EINVAL;
3233 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
3234 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
3235 aiov.iov_base = buf;
3236 aiov.iov_len = count;
3237 auio.uio_iov = &aiov;
3238 auio.uio_iovcnt = 1;
3239 auio.uio_offset = 0;
3240 auio.uio_rw = UIO_READ;
3241 KASSERT(l == curlwp);
3242 auio.uio_vmspace = l->l_proc->p_vmspace;
3243 auio.uio_resid = count;
3244 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0)
3245 *retval = count - auio.uio_resid;
3246 }
3247 vput(vp);
3248 return (error);
3249 }
3250
3251 int
3252 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap,
3253 register_t *retval)
3254 {
3255 /* {
3256 syscallarg(int) fd;
3257 syscallarg(const char *) path;
3258 syscallarg(char *) buf;
3259 syscallarg(size_t) bufsize;
3260 } */
3261
3262 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path),
3263 SCARG(uap, buf), SCARG(uap, bufsize), retval);
3264 }
3265
3266 /*
3267 * Change flags of a file given a path name.
3268 */
3269 /* ARGSUSED */
3270 int
3271 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
3272 {
3273 /* {
3274 syscallarg(const char *) path;
3275 syscallarg(u_long) flags;
3276 } */
3277 struct vnode *vp;
3278 int error;
3279
3280 error = namei_simple_user(SCARG(uap, path),
3281 NSM_FOLLOW_TRYEMULROOT, &vp);
3282 if (error != 0)
3283 return (error);
3284 error = change_flags(vp, SCARG(uap, flags), l);
3285 vput(vp);
3286 return (error);
3287 }
3288
3289 /*
3290 * Change flags of a file given a file descriptor.
3291 */
3292 /* ARGSUSED */
3293 int
3294 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
3295 {
3296 /* {
3297 syscallarg(int) fd;
3298 syscallarg(u_long) flags;
3299 } */
3300 struct vnode *vp;
3301 file_t *fp;
3302 int error;
3303
3304 /* fd_getvnode() will use the descriptor for us */
3305 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3306 return (error);
3307 vp = fp->f_vnode;
3308 error = change_flags(vp, SCARG(uap, flags), l);
3309 VOP_UNLOCK(vp);
3310 fd_putfile(SCARG(uap, fd));
3311 return (error);
3312 }
3313
3314 /*
3315 * Change flags of a file given a path name; this version does
3316 * not follow links.
3317 */
3318 int
3319 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
3320 {
3321 /* {
3322 syscallarg(const char *) path;
3323 syscallarg(u_long) flags;
3324 } */
3325 struct vnode *vp;
3326 int error;
3327
3328 error = namei_simple_user(SCARG(uap, path),
3329 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3330 if (error != 0)
3331 return (error);
3332 error = change_flags(vp, SCARG(uap, flags), l);
3333 vput(vp);
3334 return (error);
3335 }
3336
3337 /*
3338 * Common routine to change flags of a file.
3339 */
3340 int
3341 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
3342 {
3343 struct vattr vattr;
3344 int error;
3345
3346 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3347
3348 vattr_null(&vattr);
3349 vattr.va_flags = flags;
3350 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3351
3352 return (error);
3353 }
3354
3355 /*
3356 * Change mode of a file given path name; this version follows links.
3357 */
3358 /* ARGSUSED */
3359 int
3360 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
3361 {
3362 /* {
3363 syscallarg(const char *) path;
3364 syscallarg(int) mode;
3365 } */
3366 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path),
3367 SCARG(uap, mode), 0);
3368 }
3369
3370 int
3371 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags)
3372 {
3373 int error;
3374 struct vnode *vp;
3375 namei_simple_flags_t ns_flag;
3376
3377 if (flags & AT_SYMLINK_NOFOLLOW)
3378 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3379 else
3380 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3381
3382 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3383 if (error != 0)
3384 return error;
3385
3386 error = change_mode(vp, mode, l);
3387
3388 vrele(vp);
3389
3390 return (error);
3391 }
3392
3393 /*
3394 * Change mode of a file given a file descriptor.
3395 */
3396 /* ARGSUSED */
3397 int
3398 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
3399 {
3400 /* {
3401 syscallarg(int) fd;
3402 syscallarg(int) mode;
3403 } */
3404 file_t *fp;
3405 int error;
3406
3407 /* fd_getvnode() will use the descriptor for us */
3408 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3409 return (error);
3410 error = change_mode(fp->f_vnode, SCARG(uap, mode), l);
3411 fd_putfile(SCARG(uap, fd));
3412 return (error);
3413 }
3414
3415 int
3416 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap,
3417 register_t *retval)
3418 {
3419 /* {
3420 syscallarg(int) fd;
3421 syscallarg(const char *) path;
3422 syscallarg(int) mode;
3423 syscallarg(int) flag;
3424 } */
3425
3426 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path),
3427 SCARG(uap, mode), SCARG(uap, flag));
3428 }
3429
3430 /*
3431 * Change mode of a file given path name; this version does not follow links.
3432 */
3433 /* ARGSUSED */
3434 int
3435 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
3436 {
3437 /* {
3438 syscallarg(const char *) path;
3439 syscallarg(int) mode;
3440 } */
3441 int error;
3442 struct vnode *vp;
3443
3444 error = namei_simple_user(SCARG(uap, path),
3445 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3446 if (error != 0)
3447 return (error);
3448
3449 error = change_mode(vp, SCARG(uap, mode), l);
3450
3451 vrele(vp);
3452 return (error);
3453 }
3454
3455 /*
3456 * Common routine to set mode given a vnode.
3457 */
3458 static int
3459 change_mode(struct vnode *vp, int mode, struct lwp *l)
3460 {
3461 struct vattr vattr;
3462 int error;
3463
3464 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3465 vattr_null(&vattr);
3466 vattr.va_mode = mode & ALLPERMS;
3467 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3468 VOP_UNLOCK(vp);
3469 return (error);
3470 }
3471
3472 /*
3473 * Set ownership given a path name; this version follows links.
3474 */
3475 /* ARGSUSED */
3476 int
3477 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
3478 {
3479 /* {
3480 syscallarg(const char *) path;
3481 syscallarg(uid_t) uid;
3482 syscallarg(gid_t) gid;
3483 } */
3484 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid),
3485 SCARG(uap, gid), 0);
3486 }
3487
3488 int
3489 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid,
3490 gid_t gid, int flags)
3491 {
3492 int error;
3493 struct vnode *vp;
3494 namei_simple_flags_t ns_flag;
3495
3496 if (flags & AT_SYMLINK_NOFOLLOW)
3497 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3498 else
3499 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3500
3501 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3502 if (error != 0)
3503 return error;
3504
3505 error = change_owner(vp, uid, gid, l, 0);
3506
3507 vrele(vp);
3508
3509 return (error);
3510 }
3511
3512 /*
3513 * Set ownership given a path name; this version follows links.
3514 * Provides POSIX semantics.
3515 */
3516 /* ARGSUSED */
3517 int
3518 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
3519 {
3520 /* {
3521 syscallarg(const char *) path;
3522 syscallarg(uid_t) uid;
3523 syscallarg(gid_t) gid;
3524 } */
3525 int error;
3526 struct vnode *vp;
3527
3528 error = namei_simple_user(SCARG(uap, path),
3529 NSM_FOLLOW_TRYEMULROOT, &vp);
3530 if (error != 0)
3531 return (error);
3532
3533 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3534
3535 vrele(vp);
3536 return (error);
3537 }
3538
3539 /*
3540 * Set ownership given a file descriptor.
3541 */
3542 /* ARGSUSED */
3543 int
3544 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
3545 {
3546 /* {
3547 syscallarg(int) fd;
3548 syscallarg(uid_t) uid;
3549 syscallarg(gid_t) gid;
3550 } */
3551 int error;
3552 file_t *fp;
3553
3554 /* fd_getvnode() will use the descriptor for us */
3555 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3556 return (error);
3557 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3558 l, 0);
3559 fd_putfile(SCARG(uap, fd));
3560 return (error);
3561 }
3562
3563 int
3564 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap,
3565 register_t *retval)
3566 {
3567 /* {
3568 syscallarg(int) fd;
3569 syscallarg(const char *) path;
3570 syscallarg(uid_t) owner;
3571 syscallarg(gid_t) group;
3572 syscallarg(int) flag;
3573 } */
3574
3575 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path),
3576 SCARG(uap, owner), SCARG(uap, group),
3577 SCARG(uap, flag));
3578 }
3579
3580 /*
3581 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
3582 */
3583 /* ARGSUSED */
3584 int
3585 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
3586 {
3587 /* {
3588 syscallarg(int) fd;
3589 syscallarg(uid_t) uid;
3590 syscallarg(gid_t) gid;
3591 } */
3592 int error;
3593 file_t *fp;
3594
3595 /* fd_getvnode() will use the descriptor for us */
3596 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3597 return (error);
3598 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3599 l, 1);
3600 fd_putfile(SCARG(uap, fd));
3601 return (error);
3602 }
3603
3604 /*
3605 * Set ownership given a path name; this version does not follow links.
3606 */
3607 /* ARGSUSED */
3608 int
3609 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
3610 {
3611 /* {
3612 syscallarg(const char *) path;
3613 syscallarg(uid_t) uid;
3614 syscallarg(gid_t) gid;
3615 } */
3616 int error;
3617 struct vnode *vp;
3618
3619 error = namei_simple_user(SCARG(uap, path),
3620 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3621 if (error != 0)
3622 return (error);
3623
3624 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
3625
3626 vrele(vp);
3627 return (error);
3628 }
3629
3630 /*
3631 * Set ownership given a path name; this version does not follow links.
3632 * Provides POSIX/XPG semantics.
3633 */
3634 /* ARGSUSED */
3635 int
3636 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
3637 {
3638 /* {
3639 syscallarg(const char *) path;
3640 syscallarg(uid_t) uid;
3641 syscallarg(gid_t) gid;
3642 } */
3643 int error;
3644 struct vnode *vp;
3645
3646 error = namei_simple_user(SCARG(uap, path),
3647 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3648 if (error != 0)
3649 return (error);
3650
3651 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3652
3653 vrele(vp);
3654 return (error);
3655 }
3656
3657 /*
3658 * Common routine to set ownership given a vnode.
3659 */
3660 static int
3661 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
3662 int posix_semantics)
3663 {
3664 struct vattr vattr;
3665 mode_t newmode;
3666 int error;
3667
3668 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3669 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
3670 goto out;
3671
3672 #define CHANGED(x) ((int)(x) != -1)
3673 newmode = vattr.va_mode;
3674 if (posix_semantics) {
3675 /*
3676 * POSIX/XPG semantics: if the caller is not the super-user,
3677 * clear set-user-id and set-group-id bits. Both POSIX and
3678 * the XPG consider the behaviour for calls by the super-user
3679 * implementation-defined; we leave the set-user-id and set-
3680 * group-id settings intact in that case.
3681 */
3682 if (vattr.va_mode & S_ISUID) {
3683 if (kauth_authorize_vnode(l->l_cred,
3684 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0)
3685 newmode &= ~S_ISUID;
3686 }
3687 if (vattr.va_mode & S_ISGID) {
3688 if (kauth_authorize_vnode(l->l_cred,
3689 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0)
3690 newmode &= ~S_ISGID;
3691 }
3692 } else {
3693 /*
3694 * NetBSD semantics: when changing owner and/or group,
3695 * clear the respective bit(s).
3696 */
3697 if (CHANGED(uid))
3698 newmode &= ~S_ISUID;
3699 if (CHANGED(gid))
3700 newmode &= ~S_ISGID;
3701 }
3702 /* Update va_mode iff altered. */
3703 if (vattr.va_mode == newmode)
3704 newmode = VNOVAL;
3705
3706 vattr_null(&vattr);
3707 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
3708 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
3709 vattr.va_mode = newmode;
3710 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3711 #undef CHANGED
3712
3713 out:
3714 VOP_UNLOCK(vp);
3715 return (error);
3716 }
3717
3718 /*
3719 * Set the access and modification times given a path name; this
3720 * version follows links.
3721 */
3722 /* ARGSUSED */
3723 int
3724 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
3725 register_t *retval)
3726 {
3727 /* {
3728 syscallarg(const char *) path;
3729 syscallarg(const struct timeval *) tptr;
3730 } */
3731
3732 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
3733 SCARG(uap, tptr), UIO_USERSPACE);
3734 }
3735
3736 /*
3737 * Set the access and modification times given a file descriptor.
3738 */
3739 /* ARGSUSED */
3740 int
3741 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
3742 register_t *retval)
3743 {
3744 /* {
3745 syscallarg(int) fd;
3746 syscallarg(const struct timeval *) tptr;
3747 } */
3748 int error;
3749 file_t *fp;
3750
3751 /* fd_getvnode() will use the descriptor for us */
3752 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3753 return (error);
3754 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr),
3755 UIO_USERSPACE);
3756 fd_putfile(SCARG(uap, fd));
3757 return (error);
3758 }
3759
3760 int
3761 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap,
3762 register_t *retval)
3763 {
3764 /* {
3765 syscallarg(int) fd;
3766 syscallarg(const struct timespec *) tptr;
3767 } */
3768 int error;
3769 file_t *fp;
3770
3771 /* fd_getvnode() will use the descriptor for us */
3772 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3773 return (error);
3774 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0,
3775 SCARG(uap, tptr), UIO_USERSPACE);
3776 fd_putfile(SCARG(uap, fd));
3777 return (error);
3778 }
3779
3780 /*
3781 * Set the access and modification times given a path name; this
3782 * version does not follow links.
3783 */
3784 int
3785 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
3786 register_t *retval)
3787 {
3788 /* {
3789 syscallarg(const char *) path;
3790 syscallarg(const struct timeval *) tptr;
3791 } */
3792
3793 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
3794 SCARG(uap, tptr), UIO_USERSPACE);
3795 }
3796
3797 int
3798 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap,
3799 register_t *retval)
3800 {
3801 /* {
3802 syscallarg(int) fd;
3803 syscallarg(const char *) path;
3804 syscallarg(const struct timespec *) tptr;
3805 syscallarg(int) flag;
3806 } */
3807 int follow;
3808 const struct timespec *tptr;
3809 int error;
3810
3811 tptr = SCARG(uap, tptr);
3812 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3813
3814 error = do_sys_utimensat(l, SCARG(uap, fd), NULL,
3815 SCARG(uap, path), follow, tptr, UIO_USERSPACE);
3816
3817 return error;
3818 }
3819
3820 /*
3821 * Common routine to set access and modification times given a vnode.
3822 */
3823 int
3824 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag,
3825 const struct timespec *tptr, enum uio_seg seg)
3826 {
3827 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg);
3828 }
3829
3830 int
3831 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp,
3832 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg)
3833 {
3834 struct vattr vattr;
3835 int error, dorele = 0;
3836 namei_simple_flags_t sflags;
3837 bool vanull, setbirthtime;
3838 struct timespec ts[2];
3839
3840 KASSERT(l != NULL || fdat == AT_FDCWD);
3841
3842 /*
3843 * I have checked all callers and they pass either FOLLOW,
3844 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW
3845 * is 0. More to the point, they don't pass anything else.
3846 * Let's keep it that way at least until the namei interfaces
3847 * are fully sanitized.
3848 */
3849 KASSERT(flag == NOFOLLOW || flag == FOLLOW);
3850 sflags = (flag == FOLLOW) ?
3851 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT;
3852
3853 if (tptr == NULL) {
3854 vanull = true;
3855 nanotime(&ts[0]);
3856 ts[1] = ts[0];
3857 } else {
3858 vanull = false;
3859 if (seg != UIO_SYSSPACE) {
3860 error = copyin(tptr, ts, sizeof (ts));
3861 if (error != 0)
3862 return error;
3863 } else {
3864 ts[0] = tptr[0];
3865 ts[1] = tptr[1];
3866 }
3867 }
3868
3869 if (ts[0].tv_nsec == UTIME_NOW) {
3870 nanotime(&ts[0]);
3871 if (ts[1].tv_nsec == UTIME_NOW) {
3872 vanull = true;
3873 ts[1] = ts[0];
3874 }
3875 } else if (ts[1].tv_nsec == UTIME_NOW)
3876 nanotime(&ts[1]);
3877
3878 if (vp == NULL) {
3879 /* note: SEG describes TPTR, not PATH; PATH is always user */
3880 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp);
3881 if (error != 0)
3882 return error;
3883 dorele = 1;
3884 }
3885
3886 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3887 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3888 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3889 vattr_null(&vattr);
3890
3891 if (ts[0].tv_nsec != UTIME_OMIT)
3892 vattr.va_atime = ts[0];
3893
3894 if (ts[1].tv_nsec != UTIME_OMIT) {
3895 vattr.va_mtime = ts[1];
3896 if (setbirthtime)
3897 vattr.va_birthtime = ts[1];
3898 }
3899
3900 if (vanull)
3901 vattr.va_vaflags |= VA_UTIMES_NULL;
3902 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3903 VOP_UNLOCK(vp);
3904
3905 if (dorele != 0)
3906 vrele(vp);
3907
3908 return error;
3909 }
3910
3911 int
3912 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
3913 const struct timeval *tptr, enum uio_seg seg)
3914 {
3915 struct timespec ts[2];
3916 struct timespec *tsptr = NULL;
3917 int error;
3918
3919 if (tptr != NULL) {
3920 struct timeval tv[2];
3921
3922 if (seg != UIO_SYSSPACE) {
3923 error = copyin(tptr, tv, sizeof(tv));
3924 if (error != 0)
3925 return error;
3926 tptr = tv;
3927 }
3928
3929 if ((tptr[0].tv_usec == UTIME_NOW) ||
3930 (tptr[0].tv_usec == UTIME_OMIT))
3931 ts[0].tv_nsec = tptr[0].tv_usec;
3932 else {
3933 if (tptr[0].tv_usec < 0 || tptr[0].tv_usec >= 1000000)
3934 return EINVAL;
3935
3936 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3937 }
3938
3939 if ((tptr[1].tv_usec == UTIME_NOW) ||
3940 (tptr[1].tv_usec == UTIME_OMIT))
3941 ts[1].tv_nsec = tptr[1].tv_usec;
3942 else {
3943 if (tptr[1].tv_usec < 0 || tptr[1].tv_usec >= 1000000)
3944 return EINVAL;
3945
3946 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3947 }
3948
3949 tsptr = &ts[0];
3950 }
3951
3952 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE);
3953 }
3954
3955 /*
3956 * Truncate a file given its path name.
3957 */
3958 /* ARGSUSED */
3959 int
3960 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3961 {
3962 /* {
3963 syscallarg(const char *) path;
3964 syscallarg(int) pad;
3965 syscallarg(off_t) length;
3966 } */
3967 struct vnode *vp;
3968 struct vattr vattr;
3969 int error;
3970
3971 if (SCARG(uap, length) < 0)
3972 return EINVAL;
3973
3974 error = namei_simple_user(SCARG(uap, path),
3975 NSM_FOLLOW_TRYEMULROOT, &vp);
3976 if (error != 0)
3977 return (error);
3978 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3979 if (vp->v_type == VDIR)
3980 error = EISDIR;
3981 else if ((error = vn_writechk(vp)) == 0 &&
3982 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3983 vattr_null(&vattr);
3984 vattr.va_size = SCARG(uap, length);
3985 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3986 }
3987 vput(vp);
3988 return (error);
3989 }
3990
3991 /*
3992 * Truncate a file given a file descriptor.
3993 */
3994 /* ARGSUSED */
3995 int
3996 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3997 {
3998 /* {
3999 syscallarg(int) fd;
4000 syscallarg(int) pad;
4001 syscallarg(off_t) length;
4002 } */
4003 struct vattr vattr;
4004 struct vnode *vp;
4005 file_t *fp;
4006 int error;
4007
4008 if (SCARG(uap, length) < 0)
4009 return EINVAL;
4010
4011 /* fd_getvnode() will use the descriptor for us */
4012 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4013 return (error);
4014 if ((fp->f_flag & FWRITE) == 0) {
4015 error = EINVAL;
4016 goto out;
4017 }
4018 vp = fp->f_vnode;
4019 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4020 if (vp->v_type == VDIR)
4021 error = EISDIR;
4022 else if ((error = vn_writechk(vp)) == 0) {
4023 vattr_null(&vattr);
4024 vattr.va_size = SCARG(uap, length);
4025 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
4026 }
4027 VOP_UNLOCK(vp);
4028 out:
4029 fd_putfile(SCARG(uap, fd));
4030 return (error);
4031 }
4032
4033 /*
4034 * Sync an open file.
4035 */
4036 /* ARGSUSED */
4037 int
4038 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
4039 {
4040 /* {
4041 syscallarg(int) fd;
4042 } */
4043 struct vnode *vp;
4044 file_t *fp;
4045 int error;
4046
4047 /* fd_getvnode() will use the descriptor for us */
4048 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4049 return (error);
4050 vp = fp->f_vnode;
4051 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4052 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
4053 VOP_UNLOCK(vp);
4054 fd_putfile(SCARG(uap, fd));
4055 return (error);
4056 }
4057
4058 /*
4059 * Sync a range of file data. API modeled after that found in AIX.
4060 *
4061 * FDATASYNC indicates that we need only save enough metadata to be able
4062 * to re-read the written data. Note we duplicate AIX's requirement that
4063 * the file be open for writing.
4064 */
4065 /* ARGSUSED */
4066 int
4067 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
4068 {
4069 /* {
4070 syscallarg(int) fd;
4071 syscallarg(int) flags;
4072 syscallarg(off_t) start;
4073 syscallarg(off_t) length;
4074 } */
4075 struct vnode *vp;
4076 file_t *fp;
4077 int flags, nflags;
4078 off_t s, e, len;
4079 int error;
4080
4081 /* fd_getvnode() will use the descriptor for us */
4082 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4083 return (error);
4084
4085 if ((fp->f_flag & FWRITE) == 0) {
4086 error = EBADF;
4087 goto out;
4088 }
4089
4090 flags = SCARG(uap, flags);
4091 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
4092 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
4093 error = EINVAL;
4094 goto out;
4095 }
4096 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
4097 if (flags & FDATASYNC)
4098 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
4099 else
4100 nflags = FSYNC_WAIT;
4101 if (flags & FDISKSYNC)
4102 nflags |= FSYNC_CACHE;
4103
4104 len = SCARG(uap, length);
4105 /* If length == 0, we do the whole file, and s = e = 0 will do that */
4106 if (len) {
4107 s = SCARG(uap, start);
4108 e = s + len;
4109 if (e < s) {
4110 error = EINVAL;
4111 goto out;
4112 }
4113 } else {
4114 e = 0;
4115 s = 0;
4116 }
4117
4118 vp = fp->f_vnode;
4119 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4120 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
4121 VOP_UNLOCK(vp);
4122 out:
4123 fd_putfile(SCARG(uap, fd));
4124 return (error);
4125 }
4126
4127 /*
4128 * Sync the data of an open file.
4129 */
4130 /* ARGSUSED */
4131 int
4132 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
4133 {
4134 /* {
4135 syscallarg(int) fd;
4136 } */
4137 struct vnode *vp;
4138 file_t *fp;
4139 int error;
4140
4141 /* fd_getvnode() will use the descriptor for us */
4142 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4143 return (error);
4144 if ((fp->f_flag & FWRITE) == 0) {
4145 fd_putfile(SCARG(uap, fd));
4146 return (EBADF);
4147 }
4148 vp = fp->f_vnode;
4149 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4150 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
4151 VOP_UNLOCK(vp);
4152 fd_putfile(SCARG(uap, fd));
4153 return (error);
4154 }
4155
4156 /*
4157 * Rename files, (standard) BSD semantics frontend.
4158 */
4159 /* ARGSUSED */
4160 int
4161 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
4162 {
4163 /* {
4164 syscallarg(const char *) from;
4165 syscallarg(const char *) to;
4166 } */
4167
4168 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4169 SCARG(uap, to), UIO_USERSPACE, 0));
4170 }
4171
4172 int
4173 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap,
4174 register_t *retval)
4175 {
4176 /* {
4177 syscallarg(int) fromfd;
4178 syscallarg(const char *) from;
4179 syscallarg(int) tofd;
4180 syscallarg(const char *) to;
4181 } */
4182
4183 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from),
4184 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0));
4185 }
4186
4187 /*
4188 * Rename files, POSIX semantics frontend.
4189 */
4190 /* ARGSUSED */
4191 int
4192 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
4193 {
4194 /* {
4195 syscallarg(const char *) from;
4196 syscallarg(const char *) to;
4197 } */
4198
4199 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4200 SCARG(uap, to), UIO_USERSPACE, 1));
4201 }
4202
4203 /*
4204 * Rename files. Source and destination must either both be directories,
4205 * or both not be directories. If target is a directory, it must be empty.
4206 * If `from' and `to' refer to the same object, the value of the `retain'
4207 * argument is used to determine whether `from' will be
4208 *
4209 * (retain == 0) deleted unless `from' and `to' refer to the same
4210 * object in the file system's name space (BSD).
4211 * (retain == 1) always retained (POSIX).
4212 *
4213 * XXX Synchronize with nfsrv_rename in nfs_serv.c.
4214 */
4215 int
4216 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
4217 {
4218 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain);
4219 }
4220
4221 static int
4222 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd,
4223 const char *to, enum uio_seg seg, int retain)
4224 {
4225 struct pathbuf *fpb, *tpb;
4226 struct nameidata fnd, tnd;
4227 struct vnode *fdvp, *fvp;
4228 struct vnode *tdvp, *tvp;
4229 struct mount *mp, *tmp;
4230 int error;
4231
4232 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD));
4233
4234 error = pathbuf_maybe_copyin(from, seg, &fpb);
4235 if (error)
4236 goto out0;
4237 KASSERT(fpb != NULL);
4238
4239 error = pathbuf_maybe_copyin(to, seg, &tpb);
4240 if (error)
4241 goto out1;
4242 KASSERT(tpb != NULL);
4243
4244 /*
4245 * Lookup from.
4246 *
4247 * XXX LOCKPARENT is wrong because we don't actually want it
4248 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is
4249 * insane, so for the time being we need to leave it like this.
4250 */
4251 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb);
4252 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0)
4253 goto out2;
4254
4255 /*
4256 * Pull out the important results of the lookup, fdvp and fvp.
4257 * Of course, fvp is bogus because we're about to unlock fdvp.
4258 */
4259 fdvp = fnd.ni_dvp;
4260 fvp = fnd.ni_vp;
4261 mp = fdvp->v_mount;
4262 KASSERT(fdvp != NULL);
4263 KASSERT(fvp != NULL);
4264 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE));
4265 /*
4266 * Bracket the operation with fstrans_start()/fstrans_done().
4267 *
4268 * Inside the bracket this file system cannot be unmounted so
4269 * a vnode on this file system cannot change its v_mount.
4270 * A vnode on another file system may still change to dead mount.
4271 */
4272 fstrans_start(mp);
4273
4274 /*
4275 * Make sure neither fdvp nor fvp is locked.
4276 */
4277 if (fdvp != fvp)
4278 VOP_UNLOCK(fdvp);
4279 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4280 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4281
4282 /*
4283 * Reject renaming `.' and `..'. Can't do this until after
4284 * namei because we need namei's parsing to find the final
4285 * component name. (namei should just leave us with the final
4286 * component name and not look it up itself, but anyway...)
4287 *
4288 * This was here before because we used to relookup from
4289 * instead of to and relookup requires the caller to check
4290 * this, but now file systems may depend on this check, so we
4291 * must retain it until the file systems are all rototilled.
4292 */
4293 if (((fnd.ni_cnd.cn_namelen == 1) &&
4294 (fnd.ni_cnd.cn_nameptr[0] == '.')) ||
4295 ((fnd.ni_cnd.cn_namelen == 2) &&
4296 (fnd.ni_cnd.cn_nameptr[0] == '.') &&
4297 (fnd.ni_cnd.cn_nameptr[1] == '.'))) {
4298 error = EINVAL; /* XXX EISDIR? */
4299 goto abort0;
4300 }
4301
4302 /*
4303 * Lookup to.
4304 *
4305 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using
4306 * fvp here to decide whether to add CREATEDIR is a load of
4307 * bollocks because fvp might be the wrong node by now, since
4308 * fdvp is unlocked.
4309 *
4310 * XXX Why not pass CREATEDIR always?
4311 */
4312 NDINIT(&tnd, RENAME,
4313 (LOCKPARENT | NOCACHE | TRYEMULROOT |
4314 ((fvp->v_type == VDIR)? CREATEDIR : 0)),
4315 tpb);
4316 if ((error = fd_nameiat(l, tofd, &tnd)) != 0)
4317 goto abort0;
4318
4319 /*
4320 * Pull out the important results of the lookup, tdvp and tvp.
4321 * Of course, tvp is bogus because we're about to unlock tdvp.
4322 */
4323 tdvp = tnd.ni_dvp;
4324 tvp = tnd.ni_vp;
4325 KASSERT(tdvp != NULL);
4326 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE));
4327
4328 /*
4329 * Make sure neither tdvp nor tvp is locked.
4330 */
4331 if (tdvp != tvp)
4332 VOP_UNLOCK(tdvp);
4333 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4334 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4335
4336 /*
4337 * Reject renaming onto `.' or `..'. relookup is unhappy with
4338 * these, which is why we must do this here. Once upon a time
4339 * we relooked up from instead of to, and consequently didn't
4340 * need this check, but now that we relookup to instead of
4341 * from, we need this; and we shall need it forever forward
4342 * until the VOP_RENAME protocol changes, because file systems
4343 * will no doubt begin to depend on this check.
4344 */
4345 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) {
4346 error = EISDIR;
4347 goto abort1;
4348 }
4349 if ((tnd.ni_cnd.cn_namelen == 2) &&
4350 (tnd.ni_cnd.cn_nameptr[0] == '.') &&
4351 (tnd.ni_cnd.cn_nameptr[1] == '.')) {
4352 error = EINVAL;
4353 goto abort1;
4354 }
4355
4356 /*
4357 * Make sure the mount points match. Although we don't hold
4358 * any vnode locks, the v_mount on fdvp file system are stable.
4359 *
4360 * Unmounting another file system at an inopportune moment may
4361 * cause tdvp to disappear and change its v_mount to dead.
4362 *
4363 * So in either case different v_mount means cross-device rename.
4364 */
4365 KASSERT(mp != NULL);
4366 tmp = tdvp->v_mount;
4367
4368 if (mp != tmp) {
4369 error = EXDEV;
4370 goto abort1;
4371 }
4372
4373 /*
4374 * Take the vfs rename lock to avoid cross-directory screw cases.
4375 * Nothing is locked currently, so taking this lock is safe.
4376 */
4377 error = VFS_RENAMELOCK_ENTER(mp);
4378 if (error)
4379 goto abort1;
4380
4381 /*
4382 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced,
4383 * and nothing is locked except for the vfs rename lock.
4384 *
4385 * The next step is a little rain dance to conform to the
4386 * insane lock protocol, even though it does nothing to ward
4387 * off race conditions.
4388 *
4389 * We need tdvp and tvp to be locked. However, because we have
4390 * unlocked tdvp in order to hold no locks while we take the
4391 * vfs rename lock, tvp may be wrong here, and we can't safely
4392 * lock it even if the sensible file systems will just unlock
4393 * it straight away. Consequently, we must lock tdvp and then
4394 * relookup tvp to get it locked.
4395 *
4396 * Finally, because the VOP_RENAME protocol is brain-damaged
4397 * and various file systems insanely depend on the semantics of
4398 * this brain damage, the lookup of to must be the last lookup
4399 * before VOP_RENAME.
4400 */
4401 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
4402 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0);
4403 if (error)
4404 goto abort2;
4405
4406 /*
4407 * Drop the old tvp and pick up the new one -- which might be
4408 * the same, but that doesn't matter to us. After this, tdvp
4409 * and tvp should both be locked.
4410 */
4411 if (tvp != NULL)
4412 vrele(tvp);
4413 tvp = tnd.ni_vp;
4414 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4415 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4416
4417 /*
4418 * The old do_sys_rename had various consistency checks here
4419 * involving fvp and tvp. fvp is bogus already here, and tvp
4420 * will become bogus soon in any sensible file system, so the
4421 * only purpose in putting these checks here is to give lip
4422 * service to these screw cases and to acknowledge that they
4423 * exist, not actually to handle them, but here you go
4424 * anyway...
4425 */
4426
4427 /*
4428 * Acknowledge that directories and non-directories aren't
4429 * suposed to mix.
4430 */
4431 if (tvp != NULL) {
4432 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) {
4433 error = ENOTDIR;
4434 goto abort3;
4435 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) {
4436 error = EISDIR;
4437 goto abort3;
4438 }
4439 }
4440
4441 /*
4442 * Acknowledge some random screw case, among the dozens that
4443 * might arise.
4444 */
4445 if (fvp == tdvp) {
4446 error = EINVAL;
4447 goto abort3;
4448 }
4449
4450 /*
4451 * Acknowledge that POSIX has a wacky screw case.
4452 *
4453 * XXX Eventually the retain flag needs to be passed on to
4454 * VOP_RENAME.
4455 */
4456 if (fvp == tvp) {
4457 if (retain) {
4458 error = 0;
4459 goto abort3;
4460 } else if ((fdvp == tdvp) &&
4461 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) &&
4462 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr,
4463 fnd.ni_cnd.cn_namelen))) {
4464 error = 0;
4465 goto abort3;
4466 }
4467 }
4468
4469 /*
4470 * Make sure veriexec can screw us up. (But a race can screw
4471 * up veriexec, of course -- remember, fvp and (soon) tvp are
4472 * bogus.)
4473 */
4474 #if NVERIEXEC > 0
4475 {
4476 char *f1, *f2;
4477 size_t f1_len;
4478 size_t f2_len;
4479
4480 f1_len = fnd.ni_cnd.cn_namelen + 1;
4481 f1 = kmem_alloc(f1_len, KM_SLEEP);
4482 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len);
4483
4484 f2_len = tnd.ni_cnd.cn_namelen + 1;
4485 f2 = kmem_alloc(f2_len, KM_SLEEP);
4486 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len);
4487
4488 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2);
4489
4490 kmem_free(f1, f1_len);
4491 kmem_free(f2, f2_len);
4492
4493 if (error)
4494 goto abort3;
4495 }
4496 #endif /* NVERIEXEC > 0 */
4497
4498 /*
4499 * All ready. Incant the rename vop.
4500 */
4501 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4502 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4503 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4504 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4505 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd);
4506
4507 /*
4508 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks
4509 * tdvp and tvp. But we can't assert any of that.
4510 */
4511 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4512 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4513 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4514 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4515
4516 /*
4517 * So all we have left to do is to drop the rename lock and
4518 * destroy the pathbufs.
4519 */
4520 VFS_RENAMELOCK_EXIT(mp);
4521 fstrans_done(mp);
4522 goto out2;
4523
4524 abort3: if ((tvp != NULL) && (tvp != tdvp))
4525 VOP_UNLOCK(tvp);
4526 abort2: VOP_UNLOCK(tdvp);
4527 VFS_RENAMELOCK_EXIT(mp);
4528 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd);
4529 vrele(tdvp);
4530 if (tvp != NULL)
4531 vrele(tvp);
4532 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd);
4533 vrele(fdvp);
4534 vrele(fvp);
4535 fstrans_done(mp);
4536 out2: pathbuf_destroy(tpb);
4537 out1: pathbuf_destroy(fpb);
4538 out0: return error;
4539 }
4540
4541 /*
4542 * Make a directory file.
4543 */
4544 /* ARGSUSED */
4545 int
4546 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
4547 {
4548 /* {
4549 syscallarg(const char *) path;
4550 syscallarg(int) mode;
4551 } */
4552
4553 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path),
4554 SCARG(uap, mode), UIO_USERSPACE);
4555 }
4556
4557 int
4558 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap,
4559 register_t *retval)
4560 {
4561 /* {
4562 syscallarg(int) fd;
4563 syscallarg(const char *) path;
4564 syscallarg(int) mode;
4565 } */
4566
4567 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path),
4568 SCARG(uap, mode), UIO_USERSPACE);
4569 }
4570
4571
4572 int
4573 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg)
4574 {
4575 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg);
4576 }
4577
4578 static int
4579 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode,
4580 enum uio_seg seg)
4581 {
4582 struct proc *p = curlwp->l_proc;
4583 struct vnode *vp;
4584 struct vattr vattr;
4585 int error;
4586 struct pathbuf *pb;
4587 struct nameidata nd;
4588
4589 KASSERT(l != NULL || fdat == AT_FDCWD);
4590
4591 /* XXX bollocks, should pass in a pathbuf */
4592 error = pathbuf_maybe_copyin(path, seg, &pb);
4593 if (error) {
4594 return error;
4595 }
4596
4597 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb);
4598
4599 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
4600 pathbuf_destroy(pb);
4601 return (error);
4602 }
4603 vp = nd.ni_vp;
4604 if (vp != NULL) {
4605 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
4606 if (nd.ni_dvp == vp)
4607 vrele(nd.ni_dvp);
4608 else
4609 vput(nd.ni_dvp);
4610 vrele(vp);
4611 pathbuf_destroy(pb);
4612 return (EEXIST);
4613 }
4614 vattr_null(&vattr);
4615 vattr.va_type = VDIR;
4616 /* We will read cwdi->cwdi_cmask unlocked. */
4617 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
4618 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
4619 if (!error)
4620 vrele(nd.ni_vp);
4621 vput(nd.ni_dvp);
4622 pathbuf_destroy(pb);
4623 return (error);
4624 }
4625
4626 /*
4627 * Remove a directory file.
4628 */
4629 /* ARGSUSED */
4630 int
4631 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
4632 {
4633 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path),
4634 AT_REMOVEDIR, UIO_USERSPACE);
4635 }
4636
4637 /*
4638 * Read a block of directory entries in a file system independent format.
4639 */
4640 int
4641 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
4642 {
4643 /* {
4644 syscallarg(int) fd;
4645 syscallarg(char *) buf;
4646 syscallarg(size_t) count;
4647 } */
4648 file_t *fp;
4649 int error, done;
4650
4651 /* fd_getvnode() will use the descriptor for us */
4652 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4653 return (error);
4654 if ((fp->f_flag & FREAD) == 0) {
4655 error = EBADF;
4656 goto out;
4657 }
4658 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
4659 SCARG(uap, count), &done, l, 0, 0);
4660 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
4661 *retval = done;
4662 out:
4663 fd_putfile(SCARG(uap, fd));
4664 return (error);
4665 }
4666
4667 /*
4668 * Set the mode mask for creation of filesystem nodes.
4669 */
4670 int
4671 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
4672 {
4673 /* {
4674 syscallarg(mode_t) newmask;
4675 } */
4676 struct proc *p = l->l_proc;
4677 struct cwdinfo *cwdi;
4678
4679 /*
4680 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
4681 * important is that we serialize changes to the mask. The
4682 * rw_exit() will issue a write memory barrier on our behalf,
4683 * and force the changes out to other CPUs (as it must use an
4684 * atomic operation, draining the local CPU's store buffers).
4685 */
4686 cwdi = p->p_cwdi;
4687 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
4688 *retval = cwdi->cwdi_cmask;
4689 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
4690 rw_exit(&cwdi->cwdi_lock);
4691
4692 return (0);
4693 }
4694
4695 int
4696 dorevoke(struct vnode *vp, kauth_cred_t cred)
4697 {
4698 struct vattr vattr;
4699 int error, fs_decision;
4700
4701 vn_lock(vp, LK_SHARED | LK_RETRY);
4702 error = VOP_GETATTR(vp, &vattr, cred);
4703 VOP_UNLOCK(vp);
4704 if (error != 0)
4705 return error;
4706 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM;
4707 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL,
4708 fs_decision);
4709 if (!error)
4710 VOP_REVOKE(vp, REVOKEALL);
4711 return (error);
4712 }
4713
4714 /*
4715 * Void all references to file by ripping underlying filesystem
4716 * away from vnode.
4717 */
4718 /* ARGSUSED */
4719 int
4720 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
4721 {
4722 /* {
4723 syscallarg(const char *) path;
4724 } */
4725 struct vnode *vp;
4726 int error;
4727
4728 error = namei_simple_user(SCARG(uap, path),
4729 NSM_FOLLOW_TRYEMULROOT, &vp);
4730 if (error != 0)
4731 return (error);
4732 error = dorevoke(vp, l->l_cred);
4733 vrele(vp);
4734 return (error);
4735 }
4736
4737 /*
4738 * Allocate backing store for a file, filling a hole without having to
4739 * explicitly write anything out.
4740 */
4741 /* ARGSUSED */
4742 int
4743 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap,
4744 register_t *retval)
4745 {
4746 /* {
4747 syscallarg(int) fd;
4748 syscallarg(off_t) pos;
4749 syscallarg(off_t) len;
4750 } */
4751 int fd;
4752 off_t pos, len;
4753 struct file *fp;
4754 struct vnode *vp;
4755 int error;
4756
4757 fd = SCARG(uap, fd);
4758 pos = SCARG(uap, pos);
4759 len = SCARG(uap, len);
4760
4761 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
4762 *retval = EINVAL;
4763 return 0;
4764 }
4765
4766 error = fd_getvnode(fd, &fp);
4767 if (error) {
4768 *retval = error;
4769 return 0;
4770 }
4771 if ((fp->f_flag & FWRITE) == 0) {
4772 error = EBADF;
4773 goto fail;
4774 }
4775 vp = fp->f_vnode;
4776
4777 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4778 if (vp->v_type == VDIR) {
4779 error = EISDIR;
4780 } else {
4781 error = VOP_FALLOCATE(vp, pos, len);
4782 }
4783 VOP_UNLOCK(vp);
4784
4785 fail:
4786 fd_putfile(fd);
4787 *retval = error;
4788 return 0;
4789 }
4790
4791 /*
4792 * Deallocate backing store for a file, creating a hole. Also used for
4793 * invoking TRIM on disks.
4794 */
4795 /* ARGSUSED */
4796 int
4797 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap,
4798 register_t *retval)
4799 {
4800 /* {
4801 syscallarg(int) fd;
4802 syscallarg(off_t) pos;
4803 syscallarg(off_t) len;
4804 } */
4805 int fd;
4806 off_t pos, len;
4807 struct file *fp;
4808 struct vnode *vp;
4809 int error;
4810
4811 fd = SCARG(uap, fd);
4812 pos = SCARG(uap, pos);
4813 len = SCARG(uap, len);
4814
4815 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
4816 return EINVAL;
4817 }
4818
4819 error = fd_getvnode(fd, &fp);
4820 if (error) {
4821 return error;
4822 }
4823 if ((fp->f_flag & FWRITE) == 0) {
4824 error = EBADF;
4825 goto fail;
4826 }
4827 vp = fp->f_vnode;
4828
4829 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4830 if (vp->v_type == VDIR) {
4831 error = EISDIR;
4832 } else {
4833 error = VOP_FDISCARD(vp, pos, len);
4834 }
4835 VOP_UNLOCK(vp);
4836
4837 fail:
4838 fd_putfile(fd);
4839 return error;
4840 }
4841