vfs_syscalls.c revision 1.548 1 /* $NetBSD: vfs_syscalls.c,v 1.548 2020/05/16 18:31:50 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
66 */
67
68 /*
69 * Virtual File System System Calls
70 */
71
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.548 2020/05/16 18:31:50 christos Exp $");
74
75 #ifdef _KERNEL_OPT
76 #include "opt_fileassoc.h"
77 #include "veriexec.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/namei.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/file.h>
86 #include <sys/fcntl.h>
87 #include <sys/stat.h>
88 #include <sys/vnode.h>
89 #include <sys/mount.h>
90 #include <sys/fstrans.h>
91 #include <sys/proc.h>
92 #include <sys/uio.h>
93 #include <sys/kmem.h>
94 #include <sys/dirent.h>
95 #include <sys/sysctl.h>
96 #include <sys/syscallargs.h>
97 #include <sys/vfs_syscalls.h>
98 #include <sys/quota.h>
99 #include <sys/quotactl.h>
100 #include <sys/ktrace.h>
101 #ifdef FILEASSOC
102 #include <sys/fileassoc.h>
103 #endif /* FILEASSOC */
104 #include <sys/extattr.h>
105 #include <sys/verified_exec.h>
106 #include <sys/kauth.h>
107 #include <sys/atomic.h>
108 #include <sys/module.h>
109 #include <sys/buf.h>
110 #include <sys/event.h>
111 #include <sys/compat_stub.h>
112
113 #include <miscfs/genfs/genfs.h>
114 #include <miscfs/specfs/specdev.h>
115
116 #include <nfs/rpcv2.h>
117 #include <nfs/nfsproto.h>
118 #include <nfs/nfs.h>
119 #include <nfs/nfs_var.h>
120
121 /* XXX this shouldn't be here */
122 #ifndef OFF_T_MAX
123 #define OFF_T_MAX __type_max(off_t)
124 #endif
125
126 static int change_flags(struct vnode *, u_long, struct lwp *);
127 static int change_mode(struct vnode *, int, struct lwp *);
128 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
129 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *);
130 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t,
131 enum uio_seg);
132 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t);
133 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *,
134 enum uio_seg);
135 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *,
136 enum uio_seg, int);
137 static int do_sys_readlinkat(struct lwp *, int, const char *, char *,
138 size_t, register_t *);
139 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg);
140
141 static int fd_nameiat(struct lwp *, int, struct nameidata *);
142 static int fd_nameiat_simple_user(struct lwp *, int, const char *,
143 namei_simple_flags_t, struct vnode **);
144
145 /*
146 * This table is used to maintain compatibility with 4.3BSD
147 * and NetBSD 0.9 mount syscalls - and possibly other systems.
148 * Note, the order is important!
149 *
150 * Do not modify this table. It should only contain filesystems
151 * supported by NetBSD 0.9 and 4.3BSD.
152 */
153 const char * const mountcompatnames[] = {
154 NULL, /* 0 = MOUNT_NONE */
155 MOUNT_FFS, /* 1 = MOUNT_UFS */
156 MOUNT_NFS, /* 2 */
157 MOUNT_MFS, /* 3 */
158 MOUNT_MSDOS, /* 4 */
159 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
160 MOUNT_FDESC, /* 6 */
161 MOUNT_KERNFS, /* 7 */
162 NULL, /* 8 = MOUNT_DEVFS */
163 MOUNT_AFS, /* 9 */
164 };
165
166 const u_int nmountcompatnames = __arraycount(mountcompatnames);
167
168 static int
169 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp)
170 {
171 file_t *dfp;
172 int error;
173
174 if (fdat != AT_FDCWD) {
175 if ((error = fd_getvnode(fdat, &dfp)) != 0)
176 goto out;
177
178 NDAT(ndp, dfp->f_vnode);
179 }
180
181 error = namei(ndp);
182
183 if (fdat != AT_FDCWD)
184 fd_putfile(fdat);
185 out:
186 return error;
187 }
188
189 static int
190 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path,
191 namei_simple_flags_t sflags, struct vnode **vp_ret)
192 {
193 file_t *dfp;
194 struct vnode *dvp;
195 int error;
196
197 if (fdat != AT_FDCWD) {
198 if ((error = fd_getvnode(fdat, &dfp)) != 0)
199 goto out;
200
201 dvp = dfp->f_vnode;
202 } else {
203 dvp = NULL;
204 }
205
206 error = nameiat_simple_user(dvp, path, sflags, vp_ret);
207
208 if (fdat != AT_FDCWD)
209 fd_putfile(fdat);
210 out:
211 return error;
212 }
213
214 static int
215 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags)
216 {
217 int error;
218
219 fp->f_flag = flags & FMASK;
220 fp->f_type = DTYPE_VNODE;
221 fp->f_ops = &vnops;
222 fp->f_vnode = vp;
223
224 if (flags & (O_EXLOCK | O_SHLOCK)) {
225 struct flock lf;
226 int type;
227
228 lf.l_whence = SEEK_SET;
229 lf.l_start = 0;
230 lf.l_len = 0;
231 if (flags & O_EXLOCK)
232 lf.l_type = F_WRLCK;
233 else
234 lf.l_type = F_RDLCK;
235 type = F_FLOCK;
236 if ((flags & FNONBLOCK) == 0)
237 type |= F_WAIT;
238 VOP_UNLOCK(vp);
239 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
240 if (error) {
241 (void) vn_close(vp, fp->f_flag, fp->f_cred);
242 fd_abort(l->l_proc, fp, indx);
243 return error;
244 }
245 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
246 atomic_or_uint(&fp->f_flag, FHASLOCK);
247 }
248 if (flags & O_CLOEXEC)
249 fd_set_exclose(l, indx, true);
250 return 0;
251 }
252
253 static int
254 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
255 void *data, size_t *data_len)
256 {
257 struct mount *mp;
258 int error = 0, saved_flags;
259
260 mp = vp->v_mount;
261 saved_flags = mp->mnt_flag;
262
263 /* We can operate only on VV_ROOT nodes. */
264 if ((vp->v_vflag & VV_ROOT) == 0) {
265 error = EINVAL;
266 goto out;
267 }
268
269 /*
270 * We only allow the filesystem to be reloaded if it
271 * is currently mounted read-only. Additionally, we
272 * prevent read-write to read-only downgrades.
273 */
274 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
275 (mp->mnt_flag & MNT_RDONLY) == 0 &&
276 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) {
277 error = EOPNOTSUPP; /* Needs translation */
278 goto out;
279 }
280
281 /*
282 * Enabling MNT_UNION requires a covered mountpoint and
283 * must not happen on the root mount.
284 */
285 if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) {
286 error = EOPNOTSUPP;
287 goto out;
288 }
289
290 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
291 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
292 if (error)
293 goto out;
294
295 error = vfs_suspend(mp, 0);
296 if (error)
297 goto out;
298
299 mutex_enter(mp->mnt_updating);
300
301 mp->mnt_flag &= ~MNT_OP_FLAGS;
302 mp->mnt_flag |= flags & MNT_OP_FLAGS;
303
304 /*
305 * Set the mount level flags.
306 */
307 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) {
308 if ((flags & MNT_RDONLY))
309 mp->mnt_iflag |= IMNT_WANTRDONLY;
310 else
311 mp->mnt_iflag |= IMNT_WANTRDWR;
312 }
313 mp->mnt_flag &= ~MNT_BASIC_FLAGS;
314 mp->mnt_flag |= flags & MNT_BASIC_FLAGS;
315 if ((mp->mnt_iflag & IMNT_WANTRDONLY))
316 mp->mnt_flag &= ~MNT_RDONLY;
317
318 error = VFS_MOUNT(mp, path, data, data_len);
319
320 if (error && data != NULL) {
321 int error2;
322
323 /*
324 * Update failed; let's try and see if it was an
325 * export request. For compat with 3.0 and earlier.
326 */
327 error2 = vfs_hooks_reexport(mp, path, data);
328
329 /*
330 * Only update error code if the export request was
331 * understood but some problem occurred while
332 * processing it.
333 */
334 if (error2 != EJUSTRETURN)
335 error = error2;
336 }
337
338 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY))
339 mp->mnt_flag |= MNT_RDONLY;
340 if (error)
341 mp->mnt_flag = saved_flags;
342 mp->mnt_flag &= ~MNT_OP_FLAGS;
343 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR);
344 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
345 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0)
346 vfs_syncer_add_to_worklist(mp);
347 } else {
348 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0)
349 vfs_syncer_remove_from_worklist(mp);
350 }
351 mutex_exit(mp->mnt_updating);
352 vfs_resume(mp);
353
354 if ((error == 0) && !(saved_flags & MNT_EXTATTR) &&
355 (flags & MNT_EXTATTR)) {
356 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START,
357 NULL, 0, NULL) != 0) {
358 printf("%s: failed to start extattr, error = %d",
359 mp->mnt_stat.f_mntonname, error);
360 mp->mnt_flag &= ~MNT_EXTATTR;
361 }
362 }
363
364 if ((error == 0) && (saved_flags & MNT_EXTATTR) &&
365 !(flags & MNT_EXTATTR)) {
366 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP,
367 NULL, 0, NULL) != 0) {
368 printf("%s: failed to stop extattr, error = %d",
369 mp->mnt_stat.f_mntonname, error);
370 mp->mnt_flag |= MNT_RDONLY;
371 }
372 }
373 out:
374 return (error);
375 }
376
377 static int
378 mount_get_vfsops(const char *fstype, enum uio_seg type_seg,
379 struct vfsops **vfsops)
380 {
381 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
382 int error;
383
384 if (type_seg == UIO_USERSPACE) {
385 /* Copy file-system type from userspace. */
386 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
387 } else {
388 error = copystr(fstype, fstypename, sizeof(fstypename), NULL);
389 KASSERT(error == 0);
390 }
391
392 if (error) {
393 /*
394 * Historically, filesystem types were identified by numbers.
395 * If we get an integer for the filesystem type instead of a
396 * string, we check to see if it matches one of the historic
397 * filesystem types.
398 */
399 u_long fsindex = (u_long)fstype;
400 if (fsindex >= nmountcompatnames ||
401 mountcompatnames[fsindex] == NULL)
402 return ENODEV;
403 strlcpy(fstypename, mountcompatnames[fsindex],
404 sizeof(fstypename));
405 }
406
407 /* Accept `ufs' as an alias for `ffs', for compatibility. */
408 if (strcmp(fstypename, "ufs") == 0)
409 fstypename[0] = 'f';
410
411 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
412 return 0;
413
414 /* If we can autoload a vfs module, try again */
415 (void)module_autoload(fstypename, MODULE_CLASS_VFS);
416
417 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
418 return 0;
419
420 return ENODEV;
421 }
422
423 static int
424 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
425 void *data, size_t *data_len)
426 {
427 struct mount *mp;
428 int error;
429
430 /* If MNT_GETARGS is specified, it should be the only flag. */
431 if (flags & ~MNT_GETARGS)
432 return EINVAL;
433
434 mp = vp->v_mount;
435
436 /* XXX: probably some notion of "can see" here if we want isolation. */
437 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
438 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
439 if (error)
440 return error;
441
442 if ((vp->v_vflag & VV_ROOT) == 0)
443 return EINVAL;
444
445 if (vfs_busy(mp))
446 return EPERM;
447
448 mutex_enter(mp->mnt_updating);
449 mp->mnt_flag &= ~MNT_OP_FLAGS;
450 mp->mnt_flag |= MNT_GETARGS;
451 error = VFS_MOUNT(mp, path, data, data_len);
452 mp->mnt_flag &= ~MNT_OP_FLAGS;
453 mutex_exit(mp->mnt_updating);
454
455 vfs_unbusy(mp);
456 return (error);
457 }
458
459 int
460 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
461 {
462 /* {
463 syscallarg(const char *) type;
464 syscallarg(const char *) path;
465 syscallarg(int) flags;
466 syscallarg(void *) data;
467 syscallarg(size_t) data_len;
468 } */
469
470 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path),
471 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
472 SCARG(uap, data_len), retval);
473 }
474
475 int
476 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg,
477 const char *path, int flags, void *data, enum uio_seg data_seg,
478 size_t data_len, register_t *retval)
479 {
480 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */
481 struct vnode *vp;
482 void *data_buf = data;
483 bool vfsopsrele = false;
484 size_t alloc_sz = 0;
485 int error;
486
487 /*
488 * Get vnode to be covered
489 */
490 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
491 if (error != 0) {
492 vp = NULL;
493 goto done;
494 }
495
496 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
497 vfsops = vp->v_mount->mnt_op;
498 } else {
499 /* 'type' is userspace */
500 error = mount_get_vfsops(type, type_seg, &vfsops);
501 if (error != 0)
502 goto done;
503 vfsopsrele = true;
504 }
505
506 /*
507 * We allow data to be NULL, even for userspace. Some fs's don't need
508 * it. The others will handle NULL.
509 */
510 if (data != NULL && data_seg == UIO_USERSPACE) {
511 if (data_len == 0) {
512 /* No length supplied, use default for filesystem */
513 data_len = vfsops->vfs_min_mount_data;
514
515 /*
516 * Hopefully a longer buffer won't make copyin() fail.
517 * For compatibility with 3.0 and earlier.
518 */
519 if (flags & MNT_UPDATE
520 && data_len < sizeof (struct mnt_export_args30))
521 data_len = sizeof (struct mnt_export_args30);
522 }
523 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) {
524 error = EINVAL;
525 goto done;
526 }
527 alloc_sz = data_len;
528 data_buf = kmem_alloc(alloc_sz, KM_SLEEP);
529
530 /* NFS needs the buffer even for mnt_getargs .... */
531 error = copyin(data, data_buf, data_len);
532 if (error != 0)
533 goto done;
534 }
535
536 if (flags & MNT_GETARGS) {
537 if (data_len == 0) {
538 error = EINVAL;
539 goto done;
540 }
541 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
542 if (error != 0)
543 goto done;
544 if (data_seg == UIO_USERSPACE)
545 error = copyout(data_buf, data, data_len);
546 *retval = data_len;
547 } else if (flags & MNT_UPDATE) {
548 error = mount_update(l, vp, path, flags, data_buf, &data_len);
549 } else {
550 /* Locking is handled internally in mount_domount(). */
551 KASSERT(vfsopsrele == true);
552 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
553 &data_len);
554 vfsopsrele = false;
555 }
556 if (!error)
557 KNOTE(&fs_klist, VQ_MOUNT);
558
559 done:
560 if (vfsopsrele)
561 vfs_delref(vfsops);
562 if (vp != NULL) {
563 vrele(vp);
564 }
565 if (data_buf != data)
566 kmem_free(data_buf, alloc_sz);
567 return (error);
568 }
569
570 /*
571 * Unmount a file system.
572 *
573 * Note: unmount takes a path to the vnode mounted on as argument,
574 * not special file (as before).
575 */
576 /* ARGSUSED */
577 int
578 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
579 {
580 /* {
581 syscallarg(const char *) path;
582 syscallarg(int) flags;
583 } */
584 struct vnode *vp;
585 struct mount *mp;
586 int error;
587 struct pathbuf *pb;
588 struct nameidata nd;
589
590 error = pathbuf_copyin(SCARG(uap, path), &pb);
591 if (error) {
592 return error;
593 }
594
595 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
596 if ((error = namei(&nd)) != 0) {
597 pathbuf_destroy(pb);
598 return error;
599 }
600 vp = nd.ni_vp;
601 pathbuf_destroy(pb);
602
603 mp = vp->v_mount;
604 vfs_ref(mp);
605 VOP_UNLOCK(vp);
606
607 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
608 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
609 if (error) {
610 vrele(vp);
611 vfs_rele(mp);
612 return (error);
613 }
614
615 /*
616 * Don't allow unmounting the root file system.
617 */
618 if (mp->mnt_flag & MNT_ROOTFS) {
619 vrele(vp);
620 vfs_rele(mp);
621 return (EINVAL);
622 }
623
624 /*
625 * Must be the root of the filesystem
626 */
627 if ((vp->v_vflag & VV_ROOT) == 0) {
628 vrele(vp);
629 vfs_rele(mp);
630 return (EINVAL);
631 }
632
633 vrele(vp);
634 error = dounmount(mp, SCARG(uap, flags), l);
635 vfs_rele(mp);
636 if (!error)
637 KNOTE(&fs_klist, VQ_UNMOUNT);
638 return error;
639 }
640
641 /*
642 * Sync each mounted filesystem.
643 */
644 #ifdef DEBUG
645 int syncprt = 0;
646 struct ctldebug debug0 = { "syncprt", &syncprt };
647 #endif
648
649 void
650 do_sys_sync(struct lwp *l)
651 {
652 mount_iterator_t *iter;
653 struct mount *mp;
654 int asyncflag;
655
656 mountlist_iterator_init(&iter);
657 while ((mp = mountlist_iterator_next(iter)) != NULL) {
658 mutex_enter(mp->mnt_updating);
659 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
660 asyncflag = mp->mnt_flag & MNT_ASYNC;
661 mp->mnt_flag &= ~MNT_ASYNC;
662 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
663 if (asyncflag)
664 mp->mnt_flag |= MNT_ASYNC;
665 }
666 mutex_exit(mp->mnt_updating);
667 }
668 mountlist_iterator_destroy(iter);
669 #ifdef DEBUG
670 if (syncprt)
671 vfs_bufstats();
672 #endif /* DEBUG */
673 }
674
675 static bool
676 sync_vnode_filter(void *cookie, vnode_t *vp)
677 {
678
679 if (vp->v_numoutput > 0) {
680 ++*(int *)cookie;
681 }
682 return false;
683 }
684
685 int
686 vfs_syncwait(void)
687 {
688 int nbusy, nbusy_prev, iter;
689 struct vnode_iterator *vniter;
690 mount_iterator_t *mpiter;
691 struct mount *mp;
692
693 for (nbusy_prev = 0, iter = 0; iter < 20;) {
694 nbusy = 0;
695 mountlist_iterator_init(&mpiter);
696 while ((mp = mountlist_iterator_next(mpiter)) != NULL) {
697 vnode_t *vp __diagused;
698 vfs_vnode_iterator_init(mp, &vniter);
699 vp = vfs_vnode_iterator_next(vniter,
700 sync_vnode_filter, &nbusy);
701 KASSERT(vp == NULL);
702 vfs_vnode_iterator_destroy(vniter);
703 }
704 mountlist_iterator_destroy(mpiter);
705
706 if (nbusy == 0)
707 break;
708 if (nbusy_prev == 0)
709 nbusy_prev = nbusy;
710 printf("%d ", nbusy);
711 kpause("syncwait", false, MAX(1, hz / 25 * iter), NULL);
712 if (nbusy >= nbusy_prev) /* we didn't flush anything */
713 iter++;
714 else
715 nbusy_prev = nbusy;
716 }
717
718 if (nbusy) {
719 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY)
720 printf("giving up\nPrinting vnodes for busy buffers\n");
721 mountlist_iterator_init(&mpiter);
722 while ((mp = mountlist_iterator_next(mpiter)) != NULL) {
723 vnode_t *vp;
724 vfs_vnode_iterator_init(mp, &vniter);
725 vp = vfs_vnode_iterator_next(vniter,
726 NULL, NULL);
727 mutex_enter(vp->v_interlock);
728 if (vp->v_numoutput > 0)
729 vprint(NULL, vp);
730 mutex_exit(vp->v_interlock);
731 vrele(vp);
732 vfs_vnode_iterator_destroy(vniter);
733 }
734 mountlist_iterator_destroy(mpiter);
735 #endif
736 }
737
738 return nbusy;
739 }
740
741 /* ARGSUSED */
742 int
743 sys_sync(struct lwp *l, const void *v, register_t *retval)
744 {
745 do_sys_sync(l);
746 return (0);
747 }
748
749
750 /*
751 * Access or change filesystem quotas.
752 *
753 * (this is really 14 different calls bundled into one)
754 */
755
756 static int
757 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u)
758 {
759 struct quotastat info_k;
760 int error;
761
762 /* ensure any padding bytes are cleared */
763 memset(&info_k, 0, sizeof(info_k));
764
765 error = vfs_quotactl_stat(mp, &info_k);
766 if (error) {
767 return error;
768 }
769
770 return copyout(&info_k, info_u, sizeof(info_k));
771 }
772
773 static int
774 do_sys_quotactl_idtypestat(struct mount *mp, int idtype,
775 struct quotaidtypestat *info_u)
776 {
777 struct quotaidtypestat info_k;
778 int error;
779
780 /* ensure any padding bytes are cleared */
781 memset(&info_k, 0, sizeof(info_k));
782
783 error = vfs_quotactl_idtypestat(mp, idtype, &info_k);
784 if (error) {
785 return error;
786 }
787
788 return copyout(&info_k, info_u, sizeof(info_k));
789 }
790
791 static int
792 do_sys_quotactl_objtypestat(struct mount *mp, int objtype,
793 struct quotaobjtypestat *info_u)
794 {
795 struct quotaobjtypestat info_k;
796 int error;
797
798 /* ensure any padding bytes are cleared */
799 memset(&info_k, 0, sizeof(info_k));
800
801 error = vfs_quotactl_objtypestat(mp, objtype, &info_k);
802 if (error) {
803 return error;
804 }
805
806 return copyout(&info_k, info_u, sizeof(info_k));
807 }
808
809 static int
810 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u,
811 struct quotaval *val_u)
812 {
813 struct quotakey key_k;
814 struct quotaval val_k;
815 int error;
816
817 /* ensure any padding bytes are cleared */
818 memset(&val_k, 0, sizeof(val_k));
819
820 error = copyin(key_u, &key_k, sizeof(key_k));
821 if (error) {
822 return error;
823 }
824
825 error = vfs_quotactl_get(mp, &key_k, &val_k);
826 if (error) {
827 return error;
828 }
829
830 return copyout(&val_k, val_u, sizeof(val_k));
831 }
832
833 static int
834 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u,
835 const struct quotaval *val_u)
836 {
837 struct quotakey key_k;
838 struct quotaval val_k;
839 int error;
840
841 error = copyin(key_u, &key_k, sizeof(key_k));
842 if (error) {
843 return error;
844 }
845
846 error = copyin(val_u, &val_k, sizeof(val_k));
847 if (error) {
848 return error;
849 }
850
851 return vfs_quotactl_put(mp, &key_k, &val_k);
852 }
853
854 static int
855 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u)
856 {
857 struct quotakey key_k;
858 int error;
859
860 error = copyin(key_u, &key_k, sizeof(key_k));
861 if (error) {
862 return error;
863 }
864
865 return vfs_quotactl_del(mp, &key_k);
866 }
867
868 static int
869 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u)
870 {
871 struct quotakcursor cursor_k;
872 int error;
873
874 /* ensure any padding bytes are cleared */
875 memset(&cursor_k, 0, sizeof(cursor_k));
876
877 error = vfs_quotactl_cursoropen(mp, &cursor_k);
878 if (error) {
879 return error;
880 }
881
882 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
883 }
884
885 static int
886 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u)
887 {
888 struct quotakcursor cursor_k;
889 int error;
890
891 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
892 if (error) {
893 return error;
894 }
895
896 return vfs_quotactl_cursorclose(mp, &cursor_k);
897 }
898
899 static int
900 do_sys_quotactl_cursorskipidtype(struct mount *mp,
901 struct quotakcursor *cursor_u, int idtype)
902 {
903 struct quotakcursor cursor_k;
904 int error;
905
906 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
907 if (error) {
908 return error;
909 }
910
911 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype);
912 if (error) {
913 return error;
914 }
915
916 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
917 }
918
919 static int
920 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u,
921 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum,
922 unsigned *ret_u)
923 {
924 #define CGET_STACK_MAX 8
925 struct quotakcursor cursor_k;
926 struct quotakey stackkeys[CGET_STACK_MAX];
927 struct quotaval stackvals[CGET_STACK_MAX];
928 struct quotakey *keys_k;
929 struct quotaval *vals_k;
930 unsigned ret_k;
931 int error;
932
933 if (maxnum > 128) {
934 maxnum = 128;
935 }
936
937 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
938 if (error) {
939 return error;
940 }
941
942 if (maxnum <= CGET_STACK_MAX) {
943 keys_k = stackkeys;
944 vals_k = stackvals;
945 /* ensure any padding bytes are cleared */
946 memset(keys_k, 0, maxnum * sizeof(keys_k[0]));
947 memset(vals_k, 0, maxnum * sizeof(vals_k[0]));
948 } else {
949 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP);
950 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP);
951 }
952
953 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum,
954 &ret_k);
955 if (error) {
956 goto fail;
957 }
958
959 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0]));
960 if (error) {
961 goto fail;
962 }
963
964 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0]));
965 if (error) {
966 goto fail;
967 }
968
969 error = copyout(&ret_k, ret_u, sizeof(ret_k));
970 if (error) {
971 goto fail;
972 }
973
974 /* do last to maximize the chance of being able to recover a failure */
975 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k));
976
977 fail:
978 if (keys_k != stackkeys) {
979 kmem_free(keys_k, maxnum * sizeof(keys_k[0]));
980 }
981 if (vals_k != stackvals) {
982 kmem_free(vals_k, maxnum * sizeof(vals_k[0]));
983 }
984 return error;
985 }
986
987 static int
988 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u,
989 int *ret_u)
990 {
991 struct quotakcursor cursor_k;
992 int ret_k;
993 int error;
994
995 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
996 if (error) {
997 return error;
998 }
999
1000 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k);
1001 if (error) {
1002 return error;
1003 }
1004
1005 error = copyout(&ret_k, ret_u, sizeof(ret_k));
1006 if (error) {
1007 return error;
1008 }
1009
1010 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
1011 }
1012
1013 static int
1014 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u)
1015 {
1016 struct quotakcursor cursor_k;
1017 int error;
1018
1019 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
1020 if (error) {
1021 return error;
1022 }
1023
1024 error = vfs_quotactl_cursorrewind(mp, &cursor_k);
1025 if (error) {
1026 return error;
1027 }
1028
1029 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
1030 }
1031
1032 static int
1033 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u)
1034 {
1035 char *path_k;
1036 int error;
1037
1038 /* XXX this should probably be a struct pathbuf */
1039 path_k = PNBUF_GET();
1040 error = copyin(path_u, path_k, PATH_MAX);
1041 if (error) {
1042 PNBUF_PUT(path_k);
1043 return error;
1044 }
1045
1046 error = vfs_quotactl_quotaon(mp, idtype, path_k);
1047
1048 PNBUF_PUT(path_k);
1049 return error;
1050 }
1051
1052 static int
1053 do_sys_quotactl_quotaoff(struct mount *mp, int idtype)
1054 {
1055 return vfs_quotactl_quotaoff(mp, idtype);
1056 }
1057
1058 int
1059 do_sys_quotactl(const char *path_u, const struct quotactl_args *args)
1060 {
1061 struct mount *mp;
1062 struct vnode *vp;
1063 int error;
1064
1065 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp);
1066 if (error != 0)
1067 return (error);
1068 mp = vp->v_mount;
1069
1070 switch (args->qc_op) {
1071 case QUOTACTL_STAT:
1072 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info);
1073 break;
1074 case QUOTACTL_IDTYPESTAT:
1075 error = do_sys_quotactl_idtypestat(mp,
1076 args->u.idtypestat.qc_idtype,
1077 args->u.idtypestat.qc_info);
1078 break;
1079 case QUOTACTL_OBJTYPESTAT:
1080 error = do_sys_quotactl_objtypestat(mp,
1081 args->u.objtypestat.qc_objtype,
1082 args->u.objtypestat.qc_info);
1083 break;
1084 case QUOTACTL_GET:
1085 error = do_sys_quotactl_get(mp,
1086 args->u.get.qc_key,
1087 args->u.get.qc_val);
1088 break;
1089 case QUOTACTL_PUT:
1090 error = do_sys_quotactl_put(mp,
1091 args->u.put.qc_key,
1092 args->u.put.qc_val);
1093 break;
1094 case QUOTACTL_DEL:
1095 error = do_sys_quotactl_del(mp, args->u.del.qc_key);
1096 break;
1097 case QUOTACTL_CURSOROPEN:
1098 error = do_sys_quotactl_cursoropen(mp,
1099 args->u.cursoropen.qc_cursor);
1100 break;
1101 case QUOTACTL_CURSORCLOSE:
1102 error = do_sys_quotactl_cursorclose(mp,
1103 args->u.cursorclose.qc_cursor);
1104 break;
1105 case QUOTACTL_CURSORSKIPIDTYPE:
1106 error = do_sys_quotactl_cursorskipidtype(mp,
1107 args->u.cursorskipidtype.qc_cursor,
1108 args->u.cursorskipidtype.qc_idtype);
1109 break;
1110 case QUOTACTL_CURSORGET:
1111 error = do_sys_quotactl_cursorget(mp,
1112 args->u.cursorget.qc_cursor,
1113 args->u.cursorget.qc_keys,
1114 args->u.cursorget.qc_vals,
1115 args->u.cursorget.qc_maxnum,
1116 args->u.cursorget.qc_ret);
1117 break;
1118 case QUOTACTL_CURSORATEND:
1119 error = do_sys_quotactl_cursoratend(mp,
1120 args->u.cursoratend.qc_cursor,
1121 args->u.cursoratend.qc_ret);
1122 break;
1123 case QUOTACTL_CURSORREWIND:
1124 error = do_sys_quotactl_cursorrewind(mp,
1125 args->u.cursorrewind.qc_cursor);
1126 break;
1127 case QUOTACTL_QUOTAON:
1128 error = do_sys_quotactl_quotaon(mp,
1129 args->u.quotaon.qc_idtype,
1130 args->u.quotaon.qc_quotafile);
1131 break;
1132 case QUOTACTL_QUOTAOFF:
1133 error = do_sys_quotactl_quotaoff(mp,
1134 args->u.quotaoff.qc_idtype);
1135 break;
1136 default:
1137 error = EINVAL;
1138 break;
1139 }
1140
1141 vrele(vp);
1142 return error;
1143 }
1144
1145 /* ARGSUSED */
1146 int
1147 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap,
1148 register_t *retval)
1149 {
1150 /* {
1151 syscallarg(const char *) path;
1152 syscallarg(struct quotactl_args *) args;
1153 } */
1154 struct quotactl_args args;
1155 int error;
1156
1157 error = copyin(SCARG(uap, args), &args, sizeof(args));
1158 if (error) {
1159 return error;
1160 }
1161
1162 return do_sys_quotactl(SCARG(uap, path), &args);
1163 }
1164
1165 int
1166 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
1167 int root)
1168 {
1169 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1170 bool chrooted;
1171 int error = 0;
1172
1173 KASSERT(l == curlwp);
1174
1175 /*
1176 * This is safe unlocked. cwdi_rdir never goes non-NULL -> NULL,
1177 * since it would imply chroots can be escaped. Just make sure this
1178 * routine is self-consistent.
1179 */
1180 chrooted = (atomic_load_relaxed(&cwdi->cwdi_rdir) != NULL);
1181
1182 /*
1183 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1184 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
1185 * overrides MNT_NOWAIT.
1186 */
1187 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
1188 (flags != MNT_WAIT && flags != 0)) {
1189 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
1190 } else {
1191 /* Get the filesystem stats now */
1192 memset(sp, 0, sizeof(*sp));
1193 if ((error = VFS_STATVFS(mp, sp)) != 0)
1194 return error;
1195 if (!chrooted)
1196 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
1197 }
1198
1199 if (chrooted) {
1200 size_t len;
1201 char *bp;
1202 char c;
1203 char *path = PNBUF_GET();
1204
1205 bp = path + MAXPATHLEN;
1206 *--bp = '\0';
1207 rw_enter(&cwdi->cwdi_lock, RW_READER);
1208 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
1209 MAXPATHLEN / 2, 0, l);
1210 rw_exit(&cwdi->cwdi_lock);
1211 if (error) {
1212 PNBUF_PUT(path);
1213 return error;
1214 }
1215 len = strlen(bp);
1216 if (len != 1) {
1217 /*
1218 * for mount points that are below our root, we can see
1219 * them, so we fix up the pathname and return them. The
1220 * rest we cannot see, so we don't allow viewing the
1221 * data.
1222 */
1223 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
1224 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
1225 (void)strlcpy(sp->f_mntonname,
1226 c == '\0' ? "/" : &sp->f_mntonname[len],
1227 sizeof(sp->f_mntonname));
1228 } else {
1229 if (root)
1230 (void)strlcpy(sp->f_mntonname, "/",
1231 sizeof(sp->f_mntonname));
1232 else
1233 error = EPERM;
1234 }
1235 }
1236 PNBUF_PUT(path);
1237 }
1238 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
1239 return error;
1240 }
1241
1242 /*
1243 * Get filesystem statistics by path.
1244 */
1245 int
1246 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
1247 {
1248 struct mount *mp;
1249 int error;
1250 struct vnode *vp;
1251
1252 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
1253 if (error != 0)
1254 return error;
1255 mp = vp->v_mount;
1256 error = dostatvfs(mp, sb, l, flags, 1);
1257 vrele(vp);
1258 return error;
1259 }
1260
1261 /* ARGSUSED */
1262 int
1263 sys___statvfs190(struct lwp *l, const struct sys___statvfs190_args *uap, register_t *retval)
1264 {
1265 /* {
1266 syscallarg(const char *) path;
1267 syscallarg(struct statvfs *) buf;
1268 syscallarg(int) flags;
1269 } */
1270 struct statvfs *sb;
1271 int error;
1272
1273 sb = STATVFSBUF_GET();
1274 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
1275 if (error == 0)
1276 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1277 STATVFSBUF_PUT(sb);
1278 return error;
1279 }
1280
1281 /*
1282 * Get filesystem statistics by fd.
1283 */
1284 int
1285 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
1286 {
1287 file_t *fp;
1288 struct mount *mp;
1289 int error;
1290
1291 /* fd_getvnode() will use the descriptor for us */
1292 if ((error = fd_getvnode(fd, &fp)) != 0)
1293 return (error);
1294 mp = fp->f_vnode->v_mount;
1295 error = dostatvfs(mp, sb, curlwp, flags, 1);
1296 fd_putfile(fd);
1297 return error;
1298 }
1299
1300 /* ARGSUSED */
1301 int
1302 sys___fstatvfs190(struct lwp *l, const struct sys___fstatvfs190_args *uap, register_t *retval)
1303 {
1304 /* {
1305 syscallarg(int) fd;
1306 syscallarg(struct statvfs *) buf;
1307 syscallarg(int) flags;
1308 } */
1309 struct statvfs *sb;
1310 int error;
1311
1312 sb = STATVFSBUF_GET();
1313 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1314 if (error == 0)
1315 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1316 STATVFSBUF_PUT(sb);
1317 return error;
1318 }
1319
1320
1321 /*
1322 * Get statistics on all filesystems.
1323 */
1324 int
1325 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1326 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1327 register_t *retval)
1328 {
1329 int root = 0;
1330 mount_iterator_t *iter;
1331 struct proc *p = l->l_proc;
1332 struct mount *mp;
1333 struct statvfs *sb;
1334 size_t count, maxcount;
1335 int error = 0;
1336
1337 sb = STATVFSBUF_GET();
1338 maxcount = bufsize / entry_sz;
1339 count = 0;
1340 mountlist_iterator_init(&iter);
1341 while ((mp = mountlist_iterator_next(iter)) != NULL) {
1342 if (sfsp && count < maxcount) {
1343 error = dostatvfs(mp, sb, l, flags, 0);
1344 if (error) {
1345 error = 0;
1346 continue;
1347 }
1348 error = copyfn(sb, sfsp, entry_sz);
1349 if (error)
1350 goto out;
1351 sfsp = (char *)sfsp + entry_sz;
1352 root |= strcmp(sb->f_mntonname, "/") == 0;
1353 }
1354 count++;
1355 }
1356
1357 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1358 /*
1359 * fake a root entry
1360 */
1361 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1362 sb, l, flags, 1);
1363 if (error != 0)
1364 goto out;
1365 if (sfsp) {
1366 error = copyfn(sb, sfsp, entry_sz);
1367 if (error != 0)
1368 goto out;
1369 }
1370 count++;
1371 }
1372 if (sfsp && count > maxcount)
1373 *retval = maxcount;
1374 else
1375 *retval = count;
1376 out:
1377 mountlist_iterator_destroy(iter);
1378 STATVFSBUF_PUT(sb);
1379 return error;
1380 }
1381
1382 int
1383 sys___getvfsstat90(struct lwp *l, const struct sys___getvfsstat90_args *uap,
1384 register_t *retval)
1385 {
1386 /* {
1387 syscallarg(struct statvfs *) buf;
1388 syscallarg(size_t) bufsize;
1389 syscallarg(int) flags;
1390 } */
1391
1392 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1393 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1394 }
1395
1396 /*
1397 * Change current working directory to a given file descriptor.
1398 */
1399 /* ARGSUSED */
1400 int
1401 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1402 {
1403 /* {
1404 syscallarg(int) fd;
1405 } */
1406 struct proc *p = l->l_proc;
1407 struct cwdinfo *cwdi;
1408 struct vnode *vp, *tdp;
1409 struct mount *mp;
1410 file_t *fp;
1411 int error, fd;
1412
1413 /* fd_getvnode() will use the descriptor for us */
1414 fd = SCARG(uap, fd);
1415 if ((error = fd_getvnode(fd, &fp)) != 0)
1416 return (error);
1417 vp = fp->f_vnode;
1418
1419 vref(vp);
1420 vn_lock(vp, LK_SHARED | LK_RETRY);
1421 if (vp->v_type != VDIR)
1422 error = ENOTDIR;
1423 else
1424 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1425 if (error) {
1426 vput(vp);
1427 goto out;
1428 }
1429 while ((mp = vp->v_mountedhere) != NULL) {
1430 error = vfs_busy(mp);
1431 vput(vp);
1432 if (error != 0)
1433 goto out;
1434 error = VFS_ROOT(mp, LK_SHARED, &tdp);
1435 vfs_unbusy(mp);
1436 if (error)
1437 goto out;
1438 vp = tdp;
1439 }
1440 VOP_UNLOCK(vp);
1441
1442 /*
1443 * Disallow changing to a directory not under the process's
1444 * current root directory (if there is one).
1445 */
1446 cwdi = p->p_cwdi;
1447 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1448 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1449 vrele(vp);
1450 error = EPERM; /* operation not permitted */
1451 } else {
1452 vrele(cwdi->cwdi_cdir);
1453 cwdi->cwdi_cdir = vp;
1454 }
1455 rw_exit(&cwdi->cwdi_lock);
1456
1457 out:
1458 fd_putfile(fd);
1459 return (error);
1460 }
1461
1462 /*
1463 * Change this process's notion of the root directory to a given file
1464 * descriptor.
1465 */
1466 int
1467 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1468 {
1469 struct vnode *vp;
1470 file_t *fp;
1471 int error, fd = SCARG(uap, fd);
1472
1473 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1474 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1475 return error;
1476 /* fd_getvnode() will use the descriptor for us */
1477 if ((error = fd_getvnode(fd, &fp)) != 0)
1478 return error;
1479 vp = fp->f_vnode;
1480 vn_lock(vp, LK_SHARED | LK_RETRY);
1481 if (vp->v_type != VDIR)
1482 error = ENOTDIR;
1483 else
1484 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1485 VOP_UNLOCK(vp);
1486 if (error)
1487 goto out;
1488 vref(vp);
1489 change_root(vp);
1490
1491 out:
1492 fd_putfile(fd);
1493 return (error);
1494 }
1495
1496 /*
1497 * Change current working directory (``.'').
1498 */
1499 /* ARGSUSED */
1500 int
1501 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1502 {
1503 /* {
1504 syscallarg(const char *) path;
1505 } */
1506 struct proc *p = l->l_proc;
1507 struct cwdinfo *cwdi;
1508 int error;
1509 struct vnode *vp;
1510
1511 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1512 &vp, l)) != 0)
1513 return (error);
1514 cwdi = p->p_cwdi;
1515 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1516 vrele(cwdi->cwdi_cdir);
1517 cwdi->cwdi_cdir = vp;
1518 rw_exit(&cwdi->cwdi_lock);
1519 return (0);
1520 }
1521
1522 /*
1523 * Change notion of root (``/'') directory.
1524 */
1525 /* ARGSUSED */
1526 int
1527 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1528 {
1529 /* {
1530 syscallarg(const char *) path;
1531 } */
1532 int error;
1533 struct vnode *vp;
1534
1535 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1536 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1537 return (error);
1538
1539 error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l);
1540 if (error == 0)
1541 change_root(vp);
1542 return error;
1543 }
1544
1545 /*
1546 * Common routine for chroot and fchroot.
1547 * NB: callers need to properly authorize the change root operation.
1548 */
1549 void
1550 change_root(struct vnode *vp)
1551 {
1552 kauth_cred_t ncred;
1553 struct lwp *l = curlwp;
1554 struct proc *p = l->l_proc;
1555 struct cwdinfo *cwdi = p->p_cwdi;
1556
1557 ncred = kauth_cred_alloc();
1558
1559 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1560 if (cwdi->cwdi_rdir != NULL)
1561 vrele(cwdi->cwdi_rdir);
1562 cwdi->cwdi_rdir = vp;
1563
1564 /*
1565 * Prevent escaping from chroot by putting the root under
1566 * the working directory. Silently chdir to / if we aren't
1567 * already there.
1568 */
1569 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1570 /*
1571 * XXX would be more failsafe to change directory to a
1572 * deadfs node here instead
1573 */
1574 vrele(cwdi->cwdi_cdir);
1575 vref(vp);
1576 cwdi->cwdi_cdir = vp;
1577 }
1578 rw_exit(&cwdi->cwdi_lock);
1579
1580 /* Get a write lock on the process credential. */
1581 proc_crmod_enter();
1582
1583 kauth_cred_clone(p->p_cred, ncred);
1584 kauth_proc_chroot(ncred, p->p_cwdi);
1585
1586 /* Broadcast our credentials to the process and other LWPs. */
1587 proc_crmod_leave(ncred, p->p_cred, true);
1588 }
1589
1590 /*
1591 * Common routine for chroot and chdir.
1592 * XXX "where" should be enum uio_seg
1593 */
1594 int
1595 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l)
1596 {
1597 struct pathbuf *pb;
1598 struct nameidata nd;
1599 int error;
1600
1601 error = pathbuf_maybe_copyin(path, where, &pb);
1602 if (error) {
1603 return error;
1604 }
1605 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb);
1606 if ((error = namei(&nd)) != 0) {
1607 pathbuf_destroy(pb);
1608 return error;
1609 }
1610 *vpp = nd.ni_vp;
1611 pathbuf_destroy(pb);
1612
1613 if ((*vpp)->v_type != VDIR)
1614 error = ENOTDIR;
1615 else
1616 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred);
1617
1618 if (error)
1619 vput(*vpp);
1620 else
1621 VOP_UNLOCK(*vpp);
1622 return (error);
1623 }
1624
1625 /*
1626 * Internals of sys_open - path has already been converted into a pathbuf
1627 * (so we can easily reuse this function from other parts of the kernel,
1628 * like posix_spawn post-processing).
1629 */
1630 int
1631 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags,
1632 int open_mode, int *fd)
1633 {
1634 struct proc *p = l->l_proc;
1635 struct cwdinfo *cwdi = p->p_cwdi;
1636 file_t *fp;
1637 struct vnode *vp;
1638 int flags, cmode;
1639 int indx, error;
1640 struct nameidata nd;
1641
1642 if (open_flags & O_SEARCH) {
1643 open_flags &= ~(int)O_SEARCH;
1644 }
1645
1646 /*
1647 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
1648 * may be specified.
1649 */
1650 if ((open_flags & O_EXEC) && (open_flags & O_ACCMODE))
1651 return EINVAL;
1652
1653 flags = FFLAGS(open_flags);
1654 if ((flags & (FREAD | FWRITE)) == 0)
1655 return EINVAL;
1656
1657 if ((error = fd_allocfile(&fp, &indx)) != 0) {
1658 return error;
1659 }
1660
1661 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1662 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1663 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb);
1664 if (dvp != NULL)
1665 NDAT(&nd, dvp);
1666
1667 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1668 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1669 fd_abort(p, fp, indx);
1670 if ((error == EDUPFD || error == EMOVEFD) &&
1671 l->l_dupfd >= 0 && /* XXX from fdopen */
1672 (error =
1673 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1674 *fd = indx;
1675 return 0;
1676 }
1677 if (error == ERESTART)
1678 error = EINTR;
1679 return error;
1680 }
1681
1682 l->l_dupfd = 0;
1683 vp = nd.ni_vp;
1684
1685 if ((error = open_setfp(l, fp, vp, indx, flags)))
1686 return error;
1687
1688 VOP_UNLOCK(vp);
1689 *fd = indx;
1690 fd_affix(p, fp, indx);
1691 return 0;
1692 }
1693
1694 int
1695 fd_open(const char *path, int open_flags, int open_mode, int *fd)
1696 {
1697 struct pathbuf *pb;
1698 int error, oflags;
1699
1700 oflags = FFLAGS(open_flags);
1701 if ((oflags & (FREAD | FWRITE)) == 0)
1702 return EINVAL;
1703
1704 pb = pathbuf_create(path);
1705 if (pb == NULL)
1706 return ENOMEM;
1707
1708 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd);
1709 pathbuf_destroy(pb);
1710
1711 return error;
1712 }
1713
1714 static int
1715 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags,
1716 int mode, int *fd)
1717 {
1718 file_t *dfp = NULL;
1719 struct vnode *dvp = NULL;
1720 struct pathbuf *pb;
1721 const char *pathstring = NULL;
1722 int error;
1723
1724 if (path == NULL) {
1725 MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error);
1726 if (error == ENOSYS)
1727 goto no_compat;
1728 if (error)
1729 return error;
1730 } else {
1731 no_compat:
1732 error = pathbuf_copyin(path, &pb);
1733 if (error)
1734 return error;
1735 }
1736
1737 pathstring = pathbuf_stringcopy_get(pb);
1738
1739 /*
1740 * fdat is ignored if:
1741 * 1) if fdat is AT_FDCWD, which means use current directory as base.
1742 * 2) if path is absolute, then fdat is useless.
1743 */
1744 if (fdat != AT_FDCWD && pathstring[0] != '/') {
1745 /* fd_getvnode() will use the descriptor for us */
1746 if ((error = fd_getvnode(fdat, &dfp)) != 0)
1747 goto out;
1748
1749 dvp = dfp->f_vnode;
1750 }
1751
1752 error = do_open(l, dvp, pb, flags, mode, fd);
1753
1754 if (dfp != NULL)
1755 fd_putfile(fdat);
1756 out:
1757 pathbuf_stringcopy_put(pb, pathstring);
1758 pathbuf_destroy(pb);
1759 return error;
1760 }
1761
1762 int
1763 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1764 {
1765 /* {
1766 syscallarg(const char *) path;
1767 syscallarg(int) flags;
1768 syscallarg(int) mode;
1769 } */
1770 int error;
1771 int fd;
1772
1773 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path),
1774 SCARG(uap, flags), SCARG(uap, mode), &fd);
1775
1776 if (error == 0)
1777 *retval = fd;
1778
1779 return error;
1780 }
1781
1782 int
1783 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval)
1784 {
1785 /* {
1786 syscallarg(int) fd;
1787 syscallarg(const char *) path;
1788 syscallarg(int) oflags;
1789 syscallarg(int) mode;
1790 } */
1791 int error;
1792 int fd;
1793
1794 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path),
1795 SCARG(uap, oflags), SCARG(uap, mode), &fd);
1796
1797 if (error == 0)
1798 *retval = fd;
1799
1800 return error;
1801 }
1802
1803 static void
1804 vfs__fhfree(fhandle_t *fhp)
1805 {
1806 size_t fhsize;
1807
1808 fhsize = FHANDLE_SIZE(fhp);
1809 kmem_free(fhp, fhsize);
1810 }
1811
1812 /*
1813 * vfs_composefh: compose a filehandle.
1814 */
1815
1816 int
1817 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1818 {
1819 struct mount *mp;
1820 struct fid *fidp;
1821 int error;
1822 size_t needfhsize;
1823 size_t fidsize;
1824
1825 mp = vp->v_mount;
1826 fidp = NULL;
1827 if (*fh_size < FHANDLE_SIZE_MIN) {
1828 fidsize = 0;
1829 } else {
1830 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1831 if (fhp != NULL) {
1832 memset(fhp, 0, *fh_size);
1833 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1834 fidp = &fhp->fh_fid;
1835 }
1836 }
1837 error = VFS_VPTOFH(vp, fidp, &fidsize);
1838 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1839 if (error == 0 && *fh_size < needfhsize) {
1840 error = E2BIG;
1841 }
1842 *fh_size = needfhsize;
1843 return error;
1844 }
1845
1846 int
1847 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1848 {
1849 struct mount *mp;
1850 fhandle_t *fhp;
1851 size_t fhsize;
1852 size_t fidsize;
1853 int error;
1854
1855 mp = vp->v_mount;
1856 fidsize = 0;
1857 error = VFS_VPTOFH(vp, NULL, &fidsize);
1858 KASSERT(error != 0);
1859 if (error != E2BIG) {
1860 goto out;
1861 }
1862 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1863 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1864 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1865 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1866 if (error == 0) {
1867 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1868 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1869 *fhpp = fhp;
1870 } else {
1871 kmem_free(fhp, fhsize);
1872 }
1873 out:
1874 return error;
1875 }
1876
1877 void
1878 vfs_composefh_free(fhandle_t *fhp)
1879 {
1880
1881 vfs__fhfree(fhp);
1882 }
1883
1884 /*
1885 * vfs_fhtovp: lookup a vnode by a filehandle.
1886 */
1887
1888 int
1889 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1890 {
1891 struct mount *mp;
1892 int error;
1893
1894 *vpp = NULL;
1895 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1896 if (mp == NULL) {
1897 error = ESTALE;
1898 goto out;
1899 }
1900 if (mp->mnt_op->vfs_fhtovp == NULL) {
1901 error = EOPNOTSUPP;
1902 goto out;
1903 }
1904 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), LK_EXCLUSIVE, vpp);
1905 out:
1906 return error;
1907 }
1908
1909 /*
1910 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1911 * the needed size.
1912 */
1913
1914 int
1915 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1916 {
1917 fhandle_t *fhp;
1918 int error;
1919
1920 if (fhsize > FHANDLE_SIZE_MAX) {
1921 return EINVAL;
1922 }
1923 if (fhsize < FHANDLE_SIZE_MIN) {
1924 return EINVAL;
1925 }
1926 again:
1927 fhp = kmem_alloc(fhsize, KM_SLEEP);
1928 error = copyin(ufhp, fhp, fhsize);
1929 if (error == 0) {
1930 /* XXX this check shouldn't be here */
1931 if (FHANDLE_SIZE(fhp) == fhsize) {
1932 *fhpp = fhp;
1933 return 0;
1934 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1935 /*
1936 * a kludge for nfsv2 padded handles.
1937 */
1938 size_t sz;
1939
1940 sz = FHANDLE_SIZE(fhp);
1941 kmem_free(fhp, fhsize);
1942 fhsize = sz;
1943 goto again;
1944 } else {
1945 /*
1946 * userland told us wrong size.
1947 */
1948 error = EINVAL;
1949 }
1950 }
1951 kmem_free(fhp, fhsize);
1952 return error;
1953 }
1954
1955 void
1956 vfs_copyinfh_free(fhandle_t *fhp)
1957 {
1958
1959 vfs__fhfree(fhp);
1960 }
1961
1962 /*
1963 * Get file handle system call
1964 */
1965 int
1966 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1967 {
1968 /* {
1969 syscallarg(char *) fname;
1970 syscallarg(fhandle_t *) fhp;
1971 syscallarg(size_t *) fh_size;
1972 } */
1973 struct vnode *vp;
1974 fhandle_t *fh;
1975 int error;
1976 struct pathbuf *pb;
1977 struct nameidata nd;
1978 size_t sz;
1979 size_t usz;
1980
1981 /*
1982 * Must be super user
1983 */
1984 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1985 0, NULL, NULL, NULL);
1986 if (error)
1987 return (error);
1988
1989 error = pathbuf_copyin(SCARG(uap, fname), &pb);
1990 if (error) {
1991 return error;
1992 }
1993 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1994 error = namei(&nd);
1995 if (error) {
1996 pathbuf_destroy(pb);
1997 return error;
1998 }
1999 vp = nd.ni_vp;
2000 pathbuf_destroy(pb);
2001
2002 error = vfs_composefh_alloc(vp, &fh);
2003 vput(vp);
2004 if (error != 0) {
2005 return error;
2006 }
2007 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
2008 if (error != 0) {
2009 goto out;
2010 }
2011 sz = FHANDLE_SIZE(fh);
2012 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
2013 if (error != 0) {
2014 goto out;
2015 }
2016 if (usz >= sz) {
2017 error = copyout(fh, SCARG(uap, fhp), sz);
2018 } else {
2019 error = E2BIG;
2020 }
2021 out:
2022 vfs_composefh_free(fh);
2023 return (error);
2024 }
2025
2026 /*
2027 * Open a file given a file handle.
2028 *
2029 * Check permissions, allocate an open file structure,
2030 * and call the device open routine if any.
2031 */
2032
2033 int
2034 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
2035 register_t *retval)
2036 {
2037 file_t *fp;
2038 struct vnode *vp = NULL;
2039 kauth_cred_t cred = l->l_cred;
2040 file_t *nfp;
2041 int indx, error;
2042 struct vattr va;
2043 fhandle_t *fh;
2044 int flags;
2045 proc_t *p;
2046
2047 p = curproc;
2048
2049 /*
2050 * Must be super user
2051 */
2052 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2053 0, NULL, NULL, NULL)))
2054 return (error);
2055
2056 if (oflags & O_SEARCH) {
2057 oflags &= ~(int)O_SEARCH;
2058 }
2059
2060 flags = FFLAGS(oflags);
2061 if ((flags & (FREAD | FWRITE)) == 0)
2062 return (EINVAL);
2063 if ((flags & O_CREAT))
2064 return (EINVAL);
2065 if ((error = fd_allocfile(&nfp, &indx)) != 0)
2066 return (error);
2067 fp = nfp;
2068 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2069 if (error != 0) {
2070 goto bad;
2071 }
2072 error = vfs_fhtovp(fh, &vp);
2073 vfs_copyinfh_free(fh);
2074 if (error != 0) {
2075 goto bad;
2076 }
2077
2078 /* Now do an effective vn_open */
2079
2080 if (vp->v_type == VSOCK) {
2081 error = EOPNOTSUPP;
2082 goto bad;
2083 }
2084 error = vn_openchk(vp, cred, flags);
2085 if (error != 0)
2086 goto bad;
2087 if (flags & O_TRUNC) {
2088 VOP_UNLOCK(vp); /* XXX */
2089 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
2090 vattr_null(&va);
2091 va.va_size = 0;
2092 error = VOP_SETATTR(vp, &va, cred);
2093 if (error)
2094 goto bad;
2095 }
2096 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
2097 goto bad;
2098 if (flags & FWRITE) {
2099 mutex_enter(vp->v_interlock);
2100 vp->v_writecount++;
2101 mutex_exit(vp->v_interlock);
2102 }
2103
2104 /* done with modified vn_open, now finish what sys_open does. */
2105 if ((error = open_setfp(l, fp, vp, indx, flags)))
2106 return error;
2107
2108 VOP_UNLOCK(vp);
2109 *retval = indx;
2110 fd_affix(p, fp, indx);
2111 return (0);
2112
2113 bad:
2114 fd_abort(p, fp, indx);
2115 if (vp != NULL)
2116 vput(vp);
2117 return (error);
2118 }
2119
2120 int
2121 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
2122 {
2123 /* {
2124 syscallarg(const void *) fhp;
2125 syscallarg(size_t) fh_size;
2126 syscallarg(int) flags;
2127 } */
2128
2129 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
2130 SCARG(uap, flags), retval);
2131 }
2132
2133 int
2134 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
2135 {
2136 int error;
2137 fhandle_t *fh;
2138 struct vnode *vp;
2139
2140 /*
2141 * Must be super user
2142 */
2143 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2144 0, NULL, NULL, NULL)))
2145 return (error);
2146
2147 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2148 if (error != 0)
2149 return error;
2150
2151 error = vfs_fhtovp(fh, &vp);
2152 vfs_copyinfh_free(fh);
2153 if (error != 0)
2154 return error;
2155
2156 error = vn_stat(vp, sb);
2157 vput(vp);
2158 return error;
2159 }
2160
2161
2162 /* ARGSUSED */
2163 int
2164 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval)
2165 {
2166 /* {
2167 syscallarg(const void *) fhp;
2168 syscallarg(size_t) fh_size;
2169 syscallarg(struct stat *) sb;
2170 } */
2171 struct stat sb;
2172 int error;
2173
2174 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
2175 if (error)
2176 return error;
2177 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
2178 }
2179
2180 int
2181 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
2182 int flags)
2183 {
2184 fhandle_t *fh;
2185 struct mount *mp;
2186 struct vnode *vp;
2187 int error;
2188
2189 /*
2190 * Must be super user
2191 */
2192 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2193 0, NULL, NULL, NULL)))
2194 return error;
2195
2196 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2197 if (error != 0)
2198 return error;
2199
2200 error = vfs_fhtovp(fh, &vp);
2201 vfs_copyinfh_free(fh);
2202 if (error != 0)
2203 return error;
2204
2205 mp = vp->v_mount;
2206 error = dostatvfs(mp, sb, l, flags, 1);
2207 vput(vp);
2208 return error;
2209 }
2210
2211 /* ARGSUSED */
2212 int
2213 sys___fhstatvfs190(struct lwp *l, const struct sys___fhstatvfs190_args *uap, register_t *retval)
2214 {
2215 /* {
2216 syscallarg(const void *) fhp;
2217 syscallarg(size_t) fh_size;
2218 syscallarg(struct statvfs *) buf;
2219 syscallarg(int) flags;
2220 } */
2221 struct statvfs *sb = STATVFSBUF_GET();
2222 int error;
2223
2224 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
2225 SCARG(uap, flags));
2226 if (error == 0)
2227 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
2228 STATVFSBUF_PUT(sb);
2229 return error;
2230 }
2231
2232 int
2233 do_posix_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode,
2234 dev_t dev)
2235 {
2236
2237 /*
2238 * The POSIX mknod(2) call is an alias for mkfifo(2) for S_IFIFO
2239 * in mode and dev=0.
2240 *
2241 * In all the other cases it's implementation defined behavior.
2242 */
2243
2244 if ((mode & S_IFIFO) && dev == 0)
2245 return do_sys_mkfifoat(l, fdat, pathname, mode);
2246 else
2247 return do_sys_mknodat(l, fdat, pathname, mode, dev,
2248 UIO_USERSPACE);
2249 }
2250
2251 /*
2252 * Create a special file.
2253 */
2254 /* ARGSUSED */
2255 int
2256 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
2257 register_t *retval)
2258 {
2259 /* {
2260 syscallarg(const char *) path;
2261 syscallarg(mode_t) mode;
2262 syscallarg(dev_t) dev;
2263 } */
2264 return do_posix_mknodat(l, AT_FDCWD, SCARG(uap, path),
2265 SCARG(uap, mode), SCARG(uap, dev));
2266 }
2267
2268 int
2269 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap,
2270 register_t *retval)
2271 {
2272 /* {
2273 syscallarg(int) fd;
2274 syscallarg(const char *) path;
2275 syscallarg(mode_t) mode;
2276 syscallarg(int) pad;
2277 syscallarg(dev_t) dev;
2278 } */
2279
2280 return do_posix_mknodat(l, SCARG(uap, fd), SCARG(uap, path),
2281 SCARG(uap, mode), SCARG(uap, dev));
2282 }
2283
2284 int
2285 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
2286 enum uio_seg seg)
2287 {
2288 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, seg);
2289 }
2290
2291 int
2292 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode,
2293 dev_t dev, enum uio_seg seg)
2294 {
2295 struct proc *p = l->l_proc;
2296 struct vnode *vp;
2297 struct vattr vattr;
2298 int error, optype;
2299 struct pathbuf *pb;
2300 struct nameidata nd;
2301 const char *pathstring;
2302
2303 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
2304 0, NULL, NULL, NULL)) != 0)
2305 return (error);
2306
2307 optype = VOP_MKNOD_DESCOFFSET;
2308
2309 error = pathbuf_maybe_copyin(pathname, seg, &pb);
2310 if (error) {
2311 return error;
2312 }
2313 pathstring = pathbuf_stringcopy_get(pb);
2314 if (pathstring == NULL) {
2315 pathbuf_destroy(pb);
2316 return ENOMEM;
2317 }
2318
2319 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2320
2321 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2322 goto out;
2323 vp = nd.ni_vp;
2324
2325 if (vp != NULL)
2326 error = EEXIST;
2327 else {
2328 vattr_null(&vattr);
2329 /* We will read cwdi->cwdi_cmask unlocked. */
2330 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2331 vattr.va_rdev = dev;
2332
2333 switch (mode & S_IFMT) {
2334 case S_IFMT: /* used by badsect to flag bad sectors */
2335 vattr.va_type = VBAD;
2336 break;
2337 case S_IFCHR:
2338 vattr.va_type = VCHR;
2339 break;
2340 case S_IFBLK:
2341 vattr.va_type = VBLK;
2342 break;
2343 case S_IFWHT:
2344 optype = VOP_WHITEOUT_DESCOFFSET;
2345 break;
2346 case S_IFREG:
2347 #if NVERIEXEC > 0
2348 error = veriexec_openchk(l, nd.ni_vp, pathstring,
2349 O_CREAT);
2350 #endif /* NVERIEXEC > 0 */
2351 vattr.va_type = VREG;
2352 vattr.va_rdev = VNOVAL;
2353 optype = VOP_CREATE_DESCOFFSET;
2354 break;
2355 default:
2356 error = EINVAL;
2357 break;
2358 }
2359
2360 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET &&
2361 vattr.va_rdev == VNOVAL)
2362 error = EINVAL;
2363 }
2364
2365 if (!error) {
2366 switch (optype) {
2367 case VOP_WHITEOUT_DESCOFFSET:
2368 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
2369 if (error)
2370 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2371 vput(nd.ni_dvp);
2372 break;
2373
2374 case VOP_MKNOD_DESCOFFSET:
2375 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
2376 &nd.ni_cnd, &vattr);
2377 if (error == 0)
2378 vrele(nd.ni_vp);
2379 vput(nd.ni_dvp);
2380 break;
2381
2382 case VOP_CREATE_DESCOFFSET:
2383 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
2384 &nd.ni_cnd, &vattr);
2385 if (error == 0)
2386 vrele(nd.ni_vp);
2387 vput(nd.ni_dvp);
2388 break;
2389 }
2390 } else {
2391 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2392 if (nd.ni_dvp == vp)
2393 vrele(nd.ni_dvp);
2394 else
2395 vput(nd.ni_dvp);
2396 if (vp)
2397 vrele(vp);
2398 }
2399 out:
2400 pathbuf_stringcopy_put(pb, pathstring);
2401 pathbuf_destroy(pb);
2402 return (error);
2403 }
2404
2405 /*
2406 * Create a named pipe.
2407 */
2408 /* ARGSUSED */
2409 int
2410 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
2411 {
2412 /* {
2413 syscallarg(const char *) path;
2414 syscallarg(int) mode;
2415 } */
2416 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode));
2417 }
2418
2419 int
2420 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap,
2421 register_t *retval)
2422 {
2423 /* {
2424 syscallarg(int) fd;
2425 syscallarg(const char *) path;
2426 syscallarg(int) mode;
2427 } */
2428
2429 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path),
2430 SCARG(uap, mode));
2431 }
2432
2433 static int
2434 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode)
2435 {
2436 struct proc *p = l->l_proc;
2437 struct vattr vattr;
2438 int error;
2439 struct pathbuf *pb;
2440 struct nameidata nd;
2441
2442 error = pathbuf_copyin(path, &pb);
2443 if (error) {
2444 return error;
2445 }
2446 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2447
2448 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2449 pathbuf_destroy(pb);
2450 return error;
2451 }
2452 if (nd.ni_vp != NULL) {
2453 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2454 if (nd.ni_dvp == nd.ni_vp)
2455 vrele(nd.ni_dvp);
2456 else
2457 vput(nd.ni_dvp);
2458 vrele(nd.ni_vp);
2459 pathbuf_destroy(pb);
2460 return (EEXIST);
2461 }
2462 vattr_null(&vattr);
2463 vattr.va_type = VFIFO;
2464 /* We will read cwdi->cwdi_cmask unlocked. */
2465 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2466 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2467 if (error == 0)
2468 vrele(nd.ni_vp);
2469 vput(nd.ni_dvp);
2470 pathbuf_destroy(pb);
2471 return (error);
2472 }
2473
2474 /*
2475 * Make a hard file link.
2476 */
2477 /* ARGSUSED */
2478 int
2479 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink,
2480 const char *link, int follow, register_t *retval)
2481 {
2482 struct vnode *vp;
2483 struct pathbuf *linkpb;
2484 struct nameidata nd;
2485 namei_simple_flags_t ns_flags;
2486 int error;
2487
2488 if (follow & AT_SYMLINK_FOLLOW)
2489 ns_flags = NSM_FOLLOW_TRYEMULROOT;
2490 else
2491 ns_flags = NSM_NOFOLLOW_TRYEMULROOT;
2492
2493 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp);
2494 if (error != 0)
2495 return (error);
2496 error = pathbuf_copyin(link, &linkpb);
2497 if (error) {
2498 goto out1;
2499 }
2500 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2501 if ((error = fd_nameiat(l, fdlink, &nd)) != 0)
2502 goto out2;
2503 if (nd.ni_vp) {
2504 error = EEXIST;
2505 goto abortop;
2506 }
2507 /* Prevent hard links on directories. */
2508 if (vp->v_type == VDIR) {
2509 error = EPERM;
2510 goto abortop;
2511 }
2512 /* Prevent cross-mount operation. */
2513 if (nd.ni_dvp->v_mount != vp->v_mount) {
2514 error = EXDEV;
2515 goto abortop;
2516 }
2517 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2518 VOP_UNLOCK(nd.ni_dvp);
2519 vrele(nd.ni_dvp);
2520 out2:
2521 pathbuf_destroy(linkpb);
2522 out1:
2523 vrele(vp);
2524 return (error);
2525 abortop:
2526 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2527 if (nd.ni_dvp == nd.ni_vp)
2528 vrele(nd.ni_dvp);
2529 else
2530 vput(nd.ni_dvp);
2531 if (nd.ni_vp != NULL)
2532 vrele(nd.ni_vp);
2533 goto out2;
2534 }
2535
2536 int
2537 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
2538 {
2539 /* {
2540 syscallarg(const char *) path;
2541 syscallarg(const char *) link;
2542 } */
2543 const char *path = SCARG(uap, path);
2544 const char *link = SCARG(uap, link);
2545
2546 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link,
2547 AT_SYMLINK_FOLLOW, retval);
2548 }
2549
2550 int
2551 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap,
2552 register_t *retval)
2553 {
2554 /* {
2555 syscallarg(int) fd1;
2556 syscallarg(const char *) name1;
2557 syscallarg(int) fd2;
2558 syscallarg(const char *) name2;
2559 syscallarg(int) flags;
2560 } */
2561 int fd1 = SCARG(uap, fd1);
2562 const char *name1 = SCARG(uap, name1);
2563 int fd2 = SCARG(uap, fd2);
2564 const char *name2 = SCARG(uap, name2);
2565 int follow;
2566
2567 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW;
2568
2569 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval);
2570 }
2571
2572
2573 int
2574 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg)
2575 {
2576 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg);
2577 }
2578
2579 static int
2580 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat,
2581 const char *link, enum uio_seg seg)
2582 {
2583 struct proc *p = curproc;
2584 struct vattr vattr;
2585 char *path;
2586 int error;
2587 size_t len;
2588 struct pathbuf *linkpb;
2589 struct nameidata nd;
2590
2591 KASSERT(l != NULL || fdat == AT_FDCWD);
2592
2593 path = PNBUF_GET();
2594 if (seg == UIO_USERSPACE) {
2595 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0)
2596 goto out1;
2597 if ((error = pathbuf_copyin(link, &linkpb)) != 0)
2598 goto out1;
2599 } else {
2600 len = strlen(patharg) + 1;
2601 KASSERT(len <= MAXPATHLEN);
2602 memcpy(path, patharg, len);
2603 linkpb = pathbuf_create(link);
2604 if (linkpb == NULL) {
2605 error = ENOMEM;
2606 goto out1;
2607 }
2608 }
2609 ktrkuser("symlink-target", path, len - 1);
2610
2611 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2612 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2613 goto out2;
2614 if (nd.ni_vp) {
2615 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2616 if (nd.ni_dvp == nd.ni_vp)
2617 vrele(nd.ni_dvp);
2618 else
2619 vput(nd.ni_dvp);
2620 vrele(nd.ni_vp);
2621 error = EEXIST;
2622 goto out2;
2623 }
2624 vattr_null(&vattr);
2625 vattr.va_type = VLNK;
2626 /* We will read cwdi->cwdi_cmask unlocked. */
2627 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2628 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2629 if (error == 0)
2630 vrele(nd.ni_vp);
2631 vput(nd.ni_dvp);
2632 out2:
2633 pathbuf_destroy(linkpb);
2634 out1:
2635 PNBUF_PUT(path);
2636 return (error);
2637 }
2638
2639 /*
2640 * Make a symbolic link.
2641 */
2642 /* ARGSUSED */
2643 int
2644 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2645 {
2646 /* {
2647 syscallarg(const char *) path;
2648 syscallarg(const char *) link;
2649 } */
2650
2651 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link),
2652 UIO_USERSPACE);
2653 }
2654
2655 int
2656 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap,
2657 register_t *retval)
2658 {
2659 /* {
2660 syscallarg(const char *) path1;
2661 syscallarg(int) fd;
2662 syscallarg(const char *) path2;
2663 } */
2664
2665 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd),
2666 SCARG(uap, path2), UIO_USERSPACE);
2667 }
2668
2669 /*
2670 * Delete a whiteout from the filesystem.
2671 */
2672 /* ARGSUSED */
2673 int
2674 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2675 {
2676 /* {
2677 syscallarg(const char *) path;
2678 } */
2679 int error;
2680 struct pathbuf *pb;
2681 struct nameidata nd;
2682
2683 error = pathbuf_copyin(SCARG(uap, path), &pb);
2684 if (error) {
2685 return error;
2686 }
2687
2688 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb);
2689 error = namei(&nd);
2690 if (error) {
2691 pathbuf_destroy(pb);
2692 return (error);
2693 }
2694
2695 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2696 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2697 if (nd.ni_dvp == nd.ni_vp)
2698 vrele(nd.ni_dvp);
2699 else
2700 vput(nd.ni_dvp);
2701 if (nd.ni_vp)
2702 vrele(nd.ni_vp);
2703 pathbuf_destroy(pb);
2704 return (EEXIST);
2705 }
2706 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2707 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2708 vput(nd.ni_dvp);
2709 pathbuf_destroy(pb);
2710 return (error);
2711 }
2712
2713 /*
2714 * Delete a name from the filesystem.
2715 */
2716 /* ARGSUSED */
2717 int
2718 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2719 {
2720 /* {
2721 syscallarg(const char *) path;
2722 } */
2723
2724 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE);
2725 }
2726
2727 int
2728 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap,
2729 register_t *retval)
2730 {
2731 /* {
2732 syscallarg(int) fd;
2733 syscallarg(const char *) path;
2734 syscallarg(int) flag;
2735 } */
2736
2737 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path),
2738 SCARG(uap, flag), UIO_USERSPACE);
2739 }
2740
2741 int
2742 do_sys_unlink(const char *arg, enum uio_seg seg)
2743 {
2744 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg);
2745 }
2746
2747 static int
2748 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags,
2749 enum uio_seg seg)
2750 {
2751 struct vnode *vp;
2752 int error;
2753 struct pathbuf *pb;
2754 struct nameidata nd;
2755 const char *pathstring;
2756
2757 KASSERT(l != NULL || fdat == AT_FDCWD);
2758
2759 error = pathbuf_maybe_copyin(arg, seg, &pb);
2760 if (error) {
2761 return error;
2762 }
2763 pathstring = pathbuf_stringcopy_get(pb);
2764 if (pathstring == NULL) {
2765 pathbuf_destroy(pb);
2766 return ENOMEM;
2767 }
2768
2769 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb);
2770 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2771 goto out;
2772 vp = nd.ni_vp;
2773
2774 /*
2775 * The root of a mounted filesystem cannot be deleted.
2776 */
2777 if ((vp->v_vflag & VV_ROOT) != 0) {
2778 error = EBUSY;
2779 goto abort;
2780 }
2781
2782 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) {
2783 error = EBUSY;
2784 goto abort;
2785 }
2786
2787 /*
2788 * No rmdir "." please.
2789 */
2790 if (nd.ni_dvp == vp) {
2791 error = EINVAL;
2792 goto abort;
2793 }
2794
2795 /*
2796 * AT_REMOVEDIR is required to remove a directory
2797 */
2798 if (vp->v_type == VDIR) {
2799 if (!(flags & AT_REMOVEDIR)) {
2800 error = EPERM;
2801 goto abort;
2802 } else {
2803 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2804 vput(nd.ni_dvp);
2805 goto out;
2806 }
2807 }
2808
2809 /*
2810 * Starting here we only deal with non directories.
2811 */
2812 if (flags & AT_REMOVEDIR) {
2813 error = ENOTDIR;
2814 goto abort;
2815 }
2816
2817 #if NVERIEXEC > 0
2818 /* Handle remove requests for veriexec entries. */
2819 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) {
2820 goto abort;
2821 }
2822 #endif /* NVERIEXEC > 0 */
2823
2824 #ifdef FILEASSOC
2825 (void)fileassoc_file_delete(vp);
2826 #endif /* FILEASSOC */
2827 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2828 vput(nd.ni_dvp);
2829 goto out;
2830
2831 abort:
2832 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2833 if (nd.ni_dvp == vp)
2834 vrele(nd.ni_dvp);
2835 else
2836 vput(nd.ni_dvp);
2837 vput(vp);
2838
2839 out:
2840 pathbuf_stringcopy_put(pb, pathstring);
2841 pathbuf_destroy(pb);
2842 return (error);
2843 }
2844
2845 /*
2846 * Reposition read/write file offset.
2847 */
2848 int
2849 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2850 {
2851 /* {
2852 syscallarg(int) fd;
2853 syscallarg(int) pad;
2854 syscallarg(off_t) offset;
2855 syscallarg(int) whence;
2856 } */
2857 kauth_cred_t cred = l->l_cred;
2858 file_t *fp;
2859 struct vnode *vp;
2860 struct vattr vattr;
2861 off_t newoff;
2862 int error, fd;
2863
2864 fd = SCARG(uap, fd);
2865
2866 if ((fp = fd_getfile(fd)) == NULL)
2867 return (EBADF);
2868
2869 vp = fp->f_vnode;
2870 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2871 error = ESPIPE;
2872 goto out;
2873 }
2874
2875 vn_lock(vp, LK_SHARED | LK_RETRY);
2876
2877 switch (SCARG(uap, whence)) {
2878 case SEEK_CUR:
2879 newoff = fp->f_offset + SCARG(uap, offset);
2880 break;
2881 case SEEK_END:
2882 error = VOP_GETATTR(vp, &vattr, cred);
2883 if (error) {
2884 VOP_UNLOCK(vp);
2885 goto out;
2886 }
2887 newoff = SCARG(uap, offset) + vattr.va_size;
2888 break;
2889 case SEEK_SET:
2890 newoff = SCARG(uap, offset);
2891 break;
2892 default:
2893 error = EINVAL;
2894 VOP_UNLOCK(vp);
2895 goto out;
2896 }
2897 VOP_UNLOCK(vp);
2898 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2899 *(off_t *)retval = fp->f_offset = newoff;
2900 }
2901 out:
2902 fd_putfile(fd);
2903 return (error);
2904 }
2905
2906 /*
2907 * Positional read system call.
2908 */
2909 int
2910 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2911 {
2912 /* {
2913 syscallarg(int) fd;
2914 syscallarg(void *) buf;
2915 syscallarg(size_t) nbyte;
2916 syscallarg(off_t) offset;
2917 } */
2918 file_t *fp;
2919 struct vnode *vp;
2920 off_t offset;
2921 int error, fd = SCARG(uap, fd);
2922
2923 if ((fp = fd_getfile(fd)) == NULL)
2924 return (EBADF);
2925
2926 if ((fp->f_flag & FREAD) == 0) {
2927 fd_putfile(fd);
2928 return (EBADF);
2929 }
2930
2931 vp = fp->f_vnode;
2932 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2933 error = ESPIPE;
2934 goto out;
2935 }
2936
2937 offset = SCARG(uap, offset);
2938
2939 /*
2940 * XXX This works because no file systems actually
2941 * XXX take any action on the seek operation.
2942 */
2943 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2944 goto out;
2945
2946 /* dofileread() will unuse the descriptor for us */
2947 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2948 &offset, 0, retval));
2949
2950 out:
2951 fd_putfile(fd);
2952 return (error);
2953 }
2954
2955 /*
2956 * Positional scatter read system call.
2957 */
2958 int
2959 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2960 {
2961 /* {
2962 syscallarg(int) fd;
2963 syscallarg(const struct iovec *) iovp;
2964 syscallarg(int) iovcnt;
2965 syscallarg(off_t) offset;
2966 } */
2967 off_t offset = SCARG(uap, offset);
2968
2969 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2970 SCARG(uap, iovcnt), &offset, 0, retval);
2971 }
2972
2973 /*
2974 * Positional write system call.
2975 */
2976 int
2977 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2978 {
2979 /* {
2980 syscallarg(int) fd;
2981 syscallarg(const void *) buf;
2982 syscallarg(size_t) nbyte;
2983 syscallarg(off_t) offset;
2984 } */
2985 file_t *fp;
2986 struct vnode *vp;
2987 off_t offset;
2988 int error, fd = SCARG(uap, fd);
2989
2990 if ((fp = fd_getfile(fd)) == NULL)
2991 return (EBADF);
2992
2993 if ((fp->f_flag & FWRITE) == 0) {
2994 fd_putfile(fd);
2995 return (EBADF);
2996 }
2997
2998 vp = fp->f_vnode;
2999 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
3000 error = ESPIPE;
3001 goto out;
3002 }
3003
3004 offset = SCARG(uap, offset);
3005
3006 /*
3007 * XXX This works because no file systems actually
3008 * XXX take any action on the seek operation.
3009 */
3010 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
3011 goto out;
3012
3013 /* dofilewrite() will unuse the descriptor for us */
3014 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
3015 &offset, 0, retval));
3016
3017 out:
3018 fd_putfile(fd);
3019 return (error);
3020 }
3021
3022 /*
3023 * Positional gather write system call.
3024 */
3025 int
3026 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
3027 {
3028 /* {
3029 syscallarg(int) fd;
3030 syscallarg(const struct iovec *) iovp;
3031 syscallarg(int) iovcnt;
3032 syscallarg(off_t) offset;
3033 } */
3034 off_t offset = SCARG(uap, offset);
3035
3036 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
3037 SCARG(uap, iovcnt), &offset, 0, retval);
3038 }
3039
3040 /*
3041 * Check access permissions.
3042 */
3043 int
3044 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
3045 {
3046 /* {
3047 syscallarg(const char *) path;
3048 syscallarg(int) flags;
3049 } */
3050
3051 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path),
3052 SCARG(uap, flags), 0);
3053 }
3054
3055 int
3056 do_sys_accessat(struct lwp *l, int fdat, const char *path,
3057 int mode, int flags)
3058 {
3059 kauth_cred_t cred;
3060 struct vnode *vp;
3061 int error, nd_flag, vmode;
3062 struct pathbuf *pb;
3063 struct nameidata nd;
3064
3065 CTASSERT(F_OK == 0);
3066 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) {
3067 /* nonsense mode */
3068 return EINVAL;
3069 }
3070
3071 nd_flag = FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT;
3072 if (flags & AT_SYMLINK_NOFOLLOW)
3073 nd_flag &= ~FOLLOW;
3074
3075 error = pathbuf_copyin(path, &pb);
3076 if (error)
3077 return error;
3078
3079 NDINIT(&nd, LOOKUP, nd_flag, pb);
3080
3081 /* Override default credentials */
3082 cred = kauth_cred_dup(l->l_cred);
3083 if (!(flags & AT_EACCESS)) {
3084 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
3085 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
3086 }
3087 nd.ni_cnd.cn_cred = cred;
3088
3089 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
3090 pathbuf_destroy(pb);
3091 goto out;
3092 }
3093 vp = nd.ni_vp;
3094 pathbuf_destroy(pb);
3095
3096 /* Flags == 0 means only check for existence. */
3097 if (mode) {
3098 vmode = 0;
3099 if (mode & R_OK)
3100 vmode |= VREAD;
3101 if (mode & W_OK)
3102 vmode |= VWRITE;
3103 if (mode & X_OK)
3104 vmode |= VEXEC;
3105
3106 error = VOP_ACCESS(vp, vmode, cred);
3107 if (!error && (vmode & VWRITE))
3108 error = vn_writechk(vp);
3109 }
3110 vput(vp);
3111 out:
3112 kauth_cred_free(cred);
3113 return (error);
3114 }
3115
3116 int
3117 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap,
3118 register_t *retval)
3119 {
3120 /* {
3121 syscallarg(int) fd;
3122 syscallarg(const char *) path;
3123 syscallarg(int) amode;
3124 syscallarg(int) flag;
3125 } */
3126
3127 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path),
3128 SCARG(uap, amode), SCARG(uap, flag));
3129 }
3130
3131 /*
3132 * Common code for all sys_stat functions, including compat versions.
3133 */
3134 int
3135 do_sys_stat(const char *userpath, unsigned int nd_flag,
3136 struct stat *sb)
3137 {
3138 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb);
3139 }
3140
3141 int
3142 do_sys_statat(struct lwp *l, int fdat, const char *userpath,
3143 unsigned int nd_flag, struct stat *sb)
3144 {
3145 int error;
3146 struct pathbuf *pb;
3147 struct nameidata nd;
3148
3149 KASSERT(l != NULL || fdat == AT_FDCWD);
3150
3151 error = pathbuf_copyin(userpath, &pb);
3152 if (error) {
3153 return error;
3154 }
3155
3156 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb);
3157
3158 error = fd_nameiat(l, fdat, &nd);
3159 if (error != 0) {
3160 pathbuf_destroy(pb);
3161 return error;
3162 }
3163 error = vn_stat(nd.ni_vp, sb);
3164 vput(nd.ni_vp);
3165 pathbuf_destroy(pb);
3166 return error;
3167 }
3168
3169 /*
3170 * Get file status; this version follows links.
3171 */
3172 /* ARGSUSED */
3173 int
3174 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval)
3175 {
3176 /* {
3177 syscallarg(const char *) path;
3178 syscallarg(struct stat *) ub;
3179 } */
3180 struct stat sb;
3181 int error;
3182
3183 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb);
3184 if (error)
3185 return error;
3186 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3187 }
3188
3189 /*
3190 * Get file status; this version does not follow links.
3191 */
3192 /* ARGSUSED */
3193 int
3194 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval)
3195 {
3196 /* {
3197 syscallarg(const char *) path;
3198 syscallarg(struct stat *) ub;
3199 } */
3200 struct stat sb;
3201 int error;
3202
3203 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb);
3204 if (error)
3205 return error;
3206 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3207 }
3208
3209 int
3210 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap,
3211 register_t *retval)
3212 {
3213 /* {
3214 syscallarg(int) fd;
3215 syscallarg(const char *) path;
3216 syscallarg(struct stat *) buf;
3217 syscallarg(int) flag;
3218 } */
3219 unsigned int nd_flag;
3220 struct stat sb;
3221 int error;
3222
3223 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW)
3224 nd_flag = NOFOLLOW;
3225 else
3226 nd_flag = FOLLOW;
3227
3228 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag,
3229 &sb);
3230 if (error)
3231 return error;
3232 return copyout(&sb, SCARG(uap, buf), sizeof(sb));
3233 }
3234
3235 static int
3236 kern_pathconf(register_t *retval, const char *path, int name, int flag)
3237 {
3238 int error;
3239 struct pathbuf *pb;
3240 struct nameidata nd;
3241
3242 error = pathbuf_copyin(path, &pb);
3243 if (error) {
3244 return error;
3245 }
3246 NDINIT(&nd, LOOKUP, flag | LOCKLEAF | TRYEMULROOT, pb);
3247 if ((error = namei(&nd)) != 0) {
3248 pathbuf_destroy(pb);
3249 return error;
3250 }
3251 error = VOP_PATHCONF(nd.ni_vp, name, retval);
3252 vput(nd.ni_vp);
3253 pathbuf_destroy(pb);
3254 return error;
3255 }
3256
3257 /*
3258 * Get configurable pathname variables.
3259 */
3260 /* ARGSUSED */
3261 int
3262 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap,
3263 register_t *retval)
3264 {
3265 /* {
3266 syscallarg(const char *) path;
3267 syscallarg(int) name;
3268 } */
3269 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name),
3270 FOLLOW);
3271 }
3272
3273 /* ARGSUSED */
3274 int
3275 sys_lpathconf(struct lwp *l, const struct sys_lpathconf_args *uap,
3276 register_t *retval)
3277 {
3278 /* {
3279 syscallarg(const char *) path;
3280 syscallarg(int) name;
3281 } */
3282 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name),
3283 NOFOLLOW);
3284 }
3285
3286 /*
3287 * Return target name of a symbolic link.
3288 */
3289 /* ARGSUSED */
3290 int
3291 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap,
3292 register_t *retval)
3293 {
3294 /* {
3295 syscallarg(const char *) path;
3296 syscallarg(char *) buf;
3297 syscallarg(size_t) count;
3298 } */
3299 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path),
3300 SCARG(uap, buf), SCARG(uap, count), retval);
3301 }
3302
3303 static int
3304 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf,
3305 size_t count, register_t *retval)
3306 {
3307 struct vnode *vp;
3308 struct iovec aiov;
3309 struct uio auio;
3310 int error;
3311 struct pathbuf *pb;
3312 struct nameidata nd;
3313
3314 error = pathbuf_copyin(path, &pb);
3315 if (error) {
3316 return error;
3317 }
3318 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb);
3319 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
3320 pathbuf_destroy(pb);
3321 return error;
3322 }
3323 vp = nd.ni_vp;
3324 pathbuf_destroy(pb);
3325 if (vp->v_type != VLNK)
3326 error = EINVAL;
3327 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
3328 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
3329 aiov.iov_base = buf;
3330 aiov.iov_len = count;
3331 auio.uio_iov = &aiov;
3332 auio.uio_iovcnt = 1;
3333 auio.uio_offset = 0;
3334 auio.uio_rw = UIO_READ;
3335 KASSERT(l == curlwp);
3336 auio.uio_vmspace = l->l_proc->p_vmspace;
3337 auio.uio_resid = count;
3338 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0)
3339 *retval = count - auio.uio_resid;
3340 }
3341 vput(vp);
3342 return (error);
3343 }
3344
3345 int
3346 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap,
3347 register_t *retval)
3348 {
3349 /* {
3350 syscallarg(int) fd;
3351 syscallarg(const char *) path;
3352 syscallarg(char *) buf;
3353 syscallarg(size_t) bufsize;
3354 } */
3355
3356 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path),
3357 SCARG(uap, buf), SCARG(uap, bufsize), retval);
3358 }
3359
3360 /*
3361 * Change flags of a file given a path name.
3362 */
3363 /* ARGSUSED */
3364 int
3365 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
3366 {
3367 /* {
3368 syscallarg(const char *) path;
3369 syscallarg(u_long) flags;
3370 } */
3371 struct vnode *vp;
3372 int error;
3373
3374 error = namei_simple_user(SCARG(uap, path),
3375 NSM_FOLLOW_TRYEMULROOT, &vp);
3376 if (error != 0)
3377 return (error);
3378 error = change_flags(vp, SCARG(uap, flags), l);
3379 vput(vp);
3380 return (error);
3381 }
3382
3383 /*
3384 * Change flags of a file given a file descriptor.
3385 */
3386 /* ARGSUSED */
3387 int
3388 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
3389 {
3390 /* {
3391 syscallarg(int) fd;
3392 syscallarg(u_long) flags;
3393 } */
3394 struct vnode *vp;
3395 file_t *fp;
3396 int error;
3397
3398 /* fd_getvnode() will use the descriptor for us */
3399 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3400 return (error);
3401 vp = fp->f_vnode;
3402 error = change_flags(vp, SCARG(uap, flags), l);
3403 VOP_UNLOCK(vp);
3404 fd_putfile(SCARG(uap, fd));
3405 return (error);
3406 }
3407
3408 /*
3409 * Change flags of a file given a path name; this version does
3410 * not follow links.
3411 */
3412 int
3413 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
3414 {
3415 /* {
3416 syscallarg(const char *) path;
3417 syscallarg(u_long) flags;
3418 } */
3419 struct vnode *vp;
3420 int error;
3421
3422 error = namei_simple_user(SCARG(uap, path),
3423 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3424 if (error != 0)
3425 return (error);
3426 error = change_flags(vp, SCARG(uap, flags), l);
3427 vput(vp);
3428 return (error);
3429 }
3430
3431 /*
3432 * Common routine to change flags of a file.
3433 */
3434 int
3435 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
3436 {
3437 struct vattr vattr;
3438 int error;
3439
3440 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3441
3442 vattr_null(&vattr);
3443 vattr.va_flags = flags;
3444 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3445
3446 return (error);
3447 }
3448
3449 /*
3450 * Change mode of a file given path name; this version follows links.
3451 */
3452 /* ARGSUSED */
3453 int
3454 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
3455 {
3456 /* {
3457 syscallarg(const char *) path;
3458 syscallarg(int) mode;
3459 } */
3460 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path),
3461 SCARG(uap, mode), 0);
3462 }
3463
3464 int
3465 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags)
3466 {
3467 int error;
3468 struct vnode *vp;
3469 namei_simple_flags_t ns_flag;
3470
3471 if (flags & AT_SYMLINK_NOFOLLOW)
3472 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3473 else
3474 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3475
3476 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3477 if (error != 0)
3478 return error;
3479
3480 error = change_mode(vp, mode, l);
3481
3482 vrele(vp);
3483
3484 return (error);
3485 }
3486
3487 /*
3488 * Change mode of a file given a file descriptor.
3489 */
3490 /* ARGSUSED */
3491 int
3492 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
3493 {
3494 /* {
3495 syscallarg(int) fd;
3496 syscallarg(int) mode;
3497 } */
3498 file_t *fp;
3499 int error;
3500
3501 /* fd_getvnode() will use the descriptor for us */
3502 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3503 return (error);
3504 error = change_mode(fp->f_vnode, SCARG(uap, mode), l);
3505 fd_putfile(SCARG(uap, fd));
3506 return (error);
3507 }
3508
3509 int
3510 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap,
3511 register_t *retval)
3512 {
3513 /* {
3514 syscallarg(int) fd;
3515 syscallarg(const char *) path;
3516 syscallarg(int) mode;
3517 syscallarg(int) flag;
3518 } */
3519
3520 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path),
3521 SCARG(uap, mode), SCARG(uap, flag));
3522 }
3523
3524 /*
3525 * Change mode of a file given path name; this version does not follow links.
3526 */
3527 /* ARGSUSED */
3528 int
3529 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
3530 {
3531 /* {
3532 syscallarg(const char *) path;
3533 syscallarg(int) mode;
3534 } */
3535 int error;
3536 struct vnode *vp;
3537
3538 error = namei_simple_user(SCARG(uap, path),
3539 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3540 if (error != 0)
3541 return (error);
3542
3543 error = change_mode(vp, SCARG(uap, mode), l);
3544
3545 vrele(vp);
3546 return (error);
3547 }
3548
3549 /*
3550 * Common routine to set mode given a vnode.
3551 */
3552 static int
3553 change_mode(struct vnode *vp, int mode, struct lwp *l)
3554 {
3555 struct vattr vattr;
3556 int error;
3557
3558 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3559 vattr_null(&vattr);
3560 vattr.va_mode = mode & ALLPERMS;
3561 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3562 VOP_UNLOCK(vp);
3563 return (error);
3564 }
3565
3566 /*
3567 * Set ownership given a path name; this version follows links.
3568 */
3569 /* ARGSUSED */
3570 int
3571 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
3572 {
3573 /* {
3574 syscallarg(const char *) path;
3575 syscallarg(uid_t) uid;
3576 syscallarg(gid_t) gid;
3577 } */
3578 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid),
3579 SCARG(uap, gid), 0);
3580 }
3581
3582 int
3583 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid,
3584 gid_t gid, int flags)
3585 {
3586 int error;
3587 struct vnode *vp;
3588 namei_simple_flags_t ns_flag;
3589
3590 if (flags & AT_SYMLINK_NOFOLLOW)
3591 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3592 else
3593 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3594
3595 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3596 if (error != 0)
3597 return error;
3598
3599 error = change_owner(vp, uid, gid, l, 0);
3600
3601 vrele(vp);
3602
3603 return (error);
3604 }
3605
3606 /*
3607 * Set ownership given a path name; this version follows links.
3608 * Provides POSIX semantics.
3609 */
3610 /* ARGSUSED */
3611 int
3612 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
3613 {
3614 /* {
3615 syscallarg(const char *) path;
3616 syscallarg(uid_t) uid;
3617 syscallarg(gid_t) gid;
3618 } */
3619 int error;
3620 struct vnode *vp;
3621
3622 error = namei_simple_user(SCARG(uap, path),
3623 NSM_FOLLOW_TRYEMULROOT, &vp);
3624 if (error != 0)
3625 return (error);
3626
3627 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3628
3629 vrele(vp);
3630 return (error);
3631 }
3632
3633 /*
3634 * Set ownership given a file descriptor.
3635 */
3636 /* ARGSUSED */
3637 int
3638 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
3639 {
3640 /* {
3641 syscallarg(int) fd;
3642 syscallarg(uid_t) uid;
3643 syscallarg(gid_t) gid;
3644 } */
3645 int error;
3646 file_t *fp;
3647
3648 /* fd_getvnode() will use the descriptor for us */
3649 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3650 return (error);
3651 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3652 l, 0);
3653 fd_putfile(SCARG(uap, fd));
3654 return (error);
3655 }
3656
3657 int
3658 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap,
3659 register_t *retval)
3660 {
3661 /* {
3662 syscallarg(int) fd;
3663 syscallarg(const char *) path;
3664 syscallarg(uid_t) owner;
3665 syscallarg(gid_t) group;
3666 syscallarg(int) flag;
3667 } */
3668
3669 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path),
3670 SCARG(uap, owner), SCARG(uap, group),
3671 SCARG(uap, flag));
3672 }
3673
3674 /*
3675 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
3676 */
3677 /* ARGSUSED */
3678 int
3679 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
3680 {
3681 /* {
3682 syscallarg(int) fd;
3683 syscallarg(uid_t) uid;
3684 syscallarg(gid_t) gid;
3685 } */
3686 int error;
3687 file_t *fp;
3688
3689 /* fd_getvnode() will use the descriptor for us */
3690 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3691 return (error);
3692 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3693 l, 1);
3694 fd_putfile(SCARG(uap, fd));
3695 return (error);
3696 }
3697
3698 /*
3699 * Set ownership given a path name; this version does not follow links.
3700 */
3701 /* ARGSUSED */
3702 int
3703 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
3704 {
3705 /* {
3706 syscallarg(const char *) path;
3707 syscallarg(uid_t) uid;
3708 syscallarg(gid_t) gid;
3709 } */
3710 int error;
3711 struct vnode *vp;
3712
3713 error = namei_simple_user(SCARG(uap, path),
3714 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3715 if (error != 0)
3716 return (error);
3717
3718 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
3719
3720 vrele(vp);
3721 return (error);
3722 }
3723
3724 /*
3725 * Set ownership given a path name; this version does not follow links.
3726 * Provides POSIX/XPG semantics.
3727 */
3728 /* ARGSUSED */
3729 int
3730 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
3731 {
3732 /* {
3733 syscallarg(const char *) path;
3734 syscallarg(uid_t) uid;
3735 syscallarg(gid_t) gid;
3736 } */
3737 int error;
3738 struct vnode *vp;
3739
3740 error = namei_simple_user(SCARG(uap, path),
3741 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3742 if (error != 0)
3743 return (error);
3744
3745 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3746
3747 vrele(vp);
3748 return (error);
3749 }
3750
3751 /*
3752 * Common routine to set ownership given a vnode.
3753 */
3754 static int
3755 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
3756 int posix_semantics)
3757 {
3758 struct vattr vattr;
3759 mode_t newmode;
3760 int error;
3761
3762 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3763 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
3764 goto out;
3765
3766 #define CHANGED(x) ((int)(x) != -1)
3767 newmode = vattr.va_mode;
3768 if (posix_semantics) {
3769 /*
3770 * POSIX/XPG semantics: if the caller is not the super-user,
3771 * clear set-user-id and set-group-id bits. Both POSIX and
3772 * the XPG consider the behaviour for calls by the super-user
3773 * implementation-defined; we leave the set-user-id and set-
3774 * group-id settings intact in that case.
3775 */
3776 if (vattr.va_mode & S_ISUID) {
3777 if (kauth_authorize_vnode(l->l_cred,
3778 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0)
3779 newmode &= ~S_ISUID;
3780 }
3781 if (vattr.va_mode & S_ISGID) {
3782 if (kauth_authorize_vnode(l->l_cred,
3783 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0)
3784 newmode &= ~S_ISGID;
3785 }
3786 } else {
3787 /*
3788 * NetBSD semantics: when changing owner and/or group,
3789 * clear the respective bit(s).
3790 */
3791 if (CHANGED(uid))
3792 newmode &= ~S_ISUID;
3793 if (CHANGED(gid))
3794 newmode &= ~S_ISGID;
3795 }
3796 /* Update va_mode iff altered. */
3797 if (vattr.va_mode == newmode)
3798 newmode = VNOVAL;
3799
3800 vattr_null(&vattr);
3801 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
3802 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
3803 vattr.va_mode = newmode;
3804 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3805 #undef CHANGED
3806
3807 out:
3808 VOP_UNLOCK(vp);
3809 return (error);
3810 }
3811
3812 /*
3813 * Set the access and modification times given a path name; this
3814 * version follows links.
3815 */
3816 /* ARGSUSED */
3817 int
3818 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
3819 register_t *retval)
3820 {
3821 /* {
3822 syscallarg(const char *) path;
3823 syscallarg(const struct timeval *) tptr;
3824 } */
3825
3826 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
3827 SCARG(uap, tptr), UIO_USERSPACE);
3828 }
3829
3830 /*
3831 * Set the access and modification times given a file descriptor.
3832 */
3833 /* ARGSUSED */
3834 int
3835 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
3836 register_t *retval)
3837 {
3838 /* {
3839 syscallarg(int) fd;
3840 syscallarg(const struct timeval *) tptr;
3841 } */
3842 int error;
3843 file_t *fp;
3844
3845 /* fd_getvnode() will use the descriptor for us */
3846 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3847 return (error);
3848 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr),
3849 UIO_USERSPACE);
3850 fd_putfile(SCARG(uap, fd));
3851 return (error);
3852 }
3853
3854 int
3855 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap,
3856 register_t *retval)
3857 {
3858 /* {
3859 syscallarg(int) fd;
3860 syscallarg(const struct timespec *) tptr;
3861 } */
3862 int error;
3863 file_t *fp;
3864
3865 /* fd_getvnode() will use the descriptor for us */
3866 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3867 return (error);
3868 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0,
3869 SCARG(uap, tptr), UIO_USERSPACE);
3870 fd_putfile(SCARG(uap, fd));
3871 return (error);
3872 }
3873
3874 /*
3875 * Set the access and modification times given a path name; this
3876 * version does not follow links.
3877 */
3878 int
3879 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
3880 register_t *retval)
3881 {
3882 /* {
3883 syscallarg(const char *) path;
3884 syscallarg(const struct timeval *) tptr;
3885 } */
3886
3887 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
3888 SCARG(uap, tptr), UIO_USERSPACE);
3889 }
3890
3891 int
3892 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap,
3893 register_t *retval)
3894 {
3895 /* {
3896 syscallarg(int) fd;
3897 syscallarg(const char *) path;
3898 syscallarg(const struct timespec *) tptr;
3899 syscallarg(int) flag;
3900 } */
3901 int follow;
3902 const struct timespec *tptr;
3903 int error;
3904
3905 tptr = SCARG(uap, tptr);
3906 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3907
3908 error = do_sys_utimensat(l, SCARG(uap, fd), NULL,
3909 SCARG(uap, path), follow, tptr, UIO_USERSPACE);
3910
3911 return error;
3912 }
3913
3914 /*
3915 * Common routine to set access and modification times given a vnode.
3916 */
3917 int
3918 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag,
3919 const struct timespec *tptr, enum uio_seg seg)
3920 {
3921 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg);
3922 }
3923
3924 int
3925 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp,
3926 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg)
3927 {
3928 struct vattr vattr;
3929 int error, dorele = 0;
3930 namei_simple_flags_t sflags;
3931 bool vanull, setbirthtime;
3932 struct timespec ts[2];
3933
3934 KASSERT(l != NULL || fdat == AT_FDCWD);
3935
3936 /*
3937 * I have checked all callers and they pass either FOLLOW,
3938 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW
3939 * is 0. More to the point, they don't pass anything else.
3940 * Let's keep it that way at least until the namei interfaces
3941 * are fully sanitized.
3942 */
3943 KASSERT(flag == NOFOLLOW || flag == FOLLOW);
3944 sflags = (flag == FOLLOW) ?
3945 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT;
3946
3947 if (tptr == NULL) {
3948 vanull = true;
3949 nanotime(&ts[0]);
3950 ts[1] = ts[0];
3951 } else {
3952 vanull = false;
3953 if (seg != UIO_SYSSPACE) {
3954 error = copyin(tptr, ts, sizeof (ts));
3955 if (error != 0)
3956 return error;
3957 } else {
3958 ts[0] = tptr[0];
3959 ts[1] = tptr[1];
3960 }
3961 }
3962
3963 if (ts[0].tv_nsec == UTIME_NOW) {
3964 nanotime(&ts[0]);
3965 if (ts[1].tv_nsec == UTIME_NOW) {
3966 vanull = true;
3967 ts[1] = ts[0];
3968 }
3969 } else if (ts[1].tv_nsec == UTIME_NOW)
3970 nanotime(&ts[1]);
3971
3972 if (vp == NULL) {
3973 /* note: SEG describes TPTR, not PATH; PATH is always user */
3974 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp);
3975 if (error != 0)
3976 return error;
3977 dorele = 1;
3978 }
3979
3980 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3981 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3982 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3983 vattr_null(&vattr);
3984
3985 if (ts[0].tv_nsec != UTIME_OMIT)
3986 vattr.va_atime = ts[0];
3987
3988 if (ts[1].tv_nsec != UTIME_OMIT) {
3989 vattr.va_mtime = ts[1];
3990 if (setbirthtime)
3991 vattr.va_birthtime = ts[1];
3992 }
3993
3994 if (vanull)
3995 vattr.va_vaflags |= VA_UTIMES_NULL;
3996 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3997 VOP_UNLOCK(vp);
3998
3999 if (dorele != 0)
4000 vrele(vp);
4001
4002 return error;
4003 }
4004
4005 int
4006 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
4007 const struct timeval *tptr, enum uio_seg seg)
4008 {
4009 struct timespec ts[2];
4010 struct timespec *tsptr = NULL;
4011 int error;
4012
4013 if (tptr != NULL) {
4014 struct timeval tv[2];
4015
4016 if (seg != UIO_SYSSPACE) {
4017 error = copyin(tptr, tv, sizeof(tv));
4018 if (error != 0)
4019 return error;
4020 tptr = tv;
4021 }
4022
4023 if ((tptr[0].tv_usec == UTIME_NOW) ||
4024 (tptr[0].tv_usec == UTIME_OMIT))
4025 ts[0].tv_nsec = tptr[0].tv_usec;
4026 else {
4027 if (tptr[0].tv_usec < 0 || tptr[0].tv_usec >= 1000000)
4028 return EINVAL;
4029
4030 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
4031 }
4032
4033 if ((tptr[1].tv_usec == UTIME_NOW) ||
4034 (tptr[1].tv_usec == UTIME_OMIT))
4035 ts[1].tv_nsec = tptr[1].tv_usec;
4036 else {
4037 if (tptr[1].tv_usec < 0 || tptr[1].tv_usec >= 1000000)
4038 return EINVAL;
4039
4040 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
4041 }
4042
4043 tsptr = &ts[0];
4044 }
4045
4046 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE);
4047 }
4048
4049 /*
4050 * Truncate a file given its path name.
4051 */
4052 /* ARGSUSED */
4053 int
4054 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
4055 {
4056 /* {
4057 syscallarg(const char *) path;
4058 syscallarg(int) pad;
4059 syscallarg(off_t) length;
4060 } */
4061 struct vnode *vp;
4062 struct vattr vattr;
4063 int error;
4064
4065 if (SCARG(uap, length) < 0)
4066 return EINVAL;
4067
4068 error = namei_simple_user(SCARG(uap, path),
4069 NSM_FOLLOW_TRYEMULROOT, &vp);
4070 if (error != 0)
4071 return (error);
4072 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4073 if (vp->v_type == VDIR)
4074 error = EISDIR;
4075 else if ((error = vn_writechk(vp)) == 0 &&
4076 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
4077 vattr_null(&vattr);
4078 vattr.va_size = SCARG(uap, length);
4079 error = VOP_SETATTR(vp, &vattr, l->l_cred);
4080 }
4081 vput(vp);
4082 return (error);
4083 }
4084
4085 /*
4086 * Truncate a file given a file descriptor.
4087 */
4088 /* ARGSUSED */
4089 int
4090 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
4091 {
4092 /* {
4093 syscallarg(int) fd;
4094 syscallarg(int) pad;
4095 syscallarg(off_t) length;
4096 } */
4097 struct vattr vattr;
4098 struct vnode *vp;
4099 file_t *fp;
4100 int error;
4101
4102 if (SCARG(uap, length) < 0)
4103 return EINVAL;
4104
4105 /* fd_getvnode() will use the descriptor for us */
4106 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4107 return (error);
4108 if ((fp->f_flag & FWRITE) == 0) {
4109 error = EINVAL;
4110 goto out;
4111 }
4112 vp = fp->f_vnode;
4113 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4114 if (vp->v_type == VDIR)
4115 error = EISDIR;
4116 else if ((error = vn_writechk(vp)) == 0) {
4117 vattr_null(&vattr);
4118 vattr.va_size = SCARG(uap, length);
4119 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
4120 }
4121 VOP_UNLOCK(vp);
4122 out:
4123 fd_putfile(SCARG(uap, fd));
4124 return (error);
4125 }
4126
4127 /*
4128 * Sync an open file.
4129 */
4130 /* ARGSUSED */
4131 int
4132 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
4133 {
4134 /* {
4135 syscallarg(int) fd;
4136 } */
4137 struct vnode *vp;
4138 file_t *fp;
4139 int error;
4140
4141 /* fd_getvnode() will use the descriptor for us */
4142 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4143 return (error);
4144 vp = fp->f_vnode;
4145 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4146 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
4147 VOP_UNLOCK(vp);
4148 fd_putfile(SCARG(uap, fd));
4149 return (error);
4150 }
4151
4152 /*
4153 * Sync a range of file data. API modeled after that found in AIX.
4154 *
4155 * FDATASYNC indicates that we need only save enough metadata to be able
4156 * to re-read the written data.
4157 */
4158 /* ARGSUSED */
4159 int
4160 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
4161 {
4162 /* {
4163 syscallarg(int) fd;
4164 syscallarg(int) flags;
4165 syscallarg(off_t) start;
4166 syscallarg(off_t) length;
4167 } */
4168 struct vnode *vp;
4169 file_t *fp;
4170 int flags, nflags;
4171 off_t s, e, len;
4172 int error;
4173
4174 /* fd_getvnode() will use the descriptor for us */
4175 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4176 return (error);
4177
4178 if ((fp->f_flag & FWRITE) == 0) {
4179 error = EBADF;
4180 goto out;
4181 }
4182
4183 flags = SCARG(uap, flags);
4184 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
4185 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
4186 error = EINVAL;
4187 goto out;
4188 }
4189 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
4190 if (flags & FDATASYNC)
4191 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
4192 else
4193 nflags = FSYNC_WAIT;
4194 if (flags & FDISKSYNC)
4195 nflags |= FSYNC_CACHE;
4196
4197 len = SCARG(uap, length);
4198 /* If length == 0, we do the whole file, and s = e = 0 will do that */
4199 if (len) {
4200 s = SCARG(uap, start);
4201 e = s + len;
4202 if (e < s) {
4203 error = EINVAL;
4204 goto out;
4205 }
4206 } else {
4207 e = 0;
4208 s = 0;
4209 }
4210
4211 vp = fp->f_vnode;
4212 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4213 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
4214 VOP_UNLOCK(vp);
4215 out:
4216 fd_putfile(SCARG(uap, fd));
4217 return (error);
4218 }
4219
4220 /*
4221 * Sync the data of an open file.
4222 */
4223 /* ARGSUSED */
4224 int
4225 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
4226 {
4227 /* {
4228 syscallarg(int) fd;
4229 } */
4230 struct vnode *vp;
4231 file_t *fp;
4232 int error;
4233
4234 /* fd_getvnode() will use the descriptor for us */
4235 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4236 return (error);
4237 vp = fp->f_vnode;
4238 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4239 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
4240 VOP_UNLOCK(vp);
4241 fd_putfile(SCARG(uap, fd));
4242 return (error);
4243 }
4244
4245 /*
4246 * Rename files, (standard) BSD semantics frontend.
4247 */
4248 /* ARGSUSED */
4249 int
4250 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
4251 {
4252 /* {
4253 syscallarg(const char *) from;
4254 syscallarg(const char *) to;
4255 } */
4256
4257 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4258 SCARG(uap, to), UIO_USERSPACE, 0));
4259 }
4260
4261 int
4262 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap,
4263 register_t *retval)
4264 {
4265 /* {
4266 syscallarg(int) fromfd;
4267 syscallarg(const char *) from;
4268 syscallarg(int) tofd;
4269 syscallarg(const char *) to;
4270 } */
4271
4272 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from),
4273 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0));
4274 }
4275
4276 /*
4277 * Rename files, POSIX semantics frontend.
4278 */
4279 /* ARGSUSED */
4280 int
4281 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
4282 {
4283 /* {
4284 syscallarg(const char *) from;
4285 syscallarg(const char *) to;
4286 } */
4287
4288 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4289 SCARG(uap, to), UIO_USERSPACE, 1));
4290 }
4291
4292 /*
4293 * Rename files. Source and destination must either both be directories,
4294 * or both not be directories. If target is a directory, it must be empty.
4295 * If `from' and `to' refer to the same object, the value of the `retain'
4296 * argument is used to determine whether `from' will be
4297 *
4298 * (retain == 0) deleted unless `from' and `to' refer to the same
4299 * object in the file system's name space (BSD).
4300 * (retain == 1) always retained (POSIX).
4301 *
4302 * XXX Synchronize with nfsrv_rename in nfs_serv.c.
4303 */
4304 int
4305 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
4306 {
4307 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain);
4308 }
4309
4310 static int
4311 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd,
4312 const char *to, enum uio_seg seg, int retain)
4313 {
4314 struct pathbuf *fpb, *tpb;
4315 struct nameidata fnd, tnd;
4316 struct vnode *fdvp, *fvp;
4317 struct vnode *tdvp, *tvp;
4318 struct mount *mp, *tmp;
4319 int error;
4320
4321 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD));
4322
4323 error = pathbuf_maybe_copyin(from, seg, &fpb);
4324 if (error)
4325 goto out0;
4326 KASSERT(fpb != NULL);
4327
4328 error = pathbuf_maybe_copyin(to, seg, &tpb);
4329 if (error)
4330 goto out1;
4331 KASSERT(tpb != NULL);
4332
4333 /*
4334 * Lookup from.
4335 *
4336 * XXX LOCKPARENT is wrong because we don't actually want it
4337 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is
4338 * insane, so for the time being we need to leave it like this.
4339 */
4340 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb);
4341 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0)
4342 goto out2;
4343
4344 /*
4345 * Pull out the important results of the lookup, fdvp and fvp.
4346 * Of course, fvp is bogus because we're about to unlock fdvp.
4347 */
4348 fdvp = fnd.ni_dvp;
4349 fvp = fnd.ni_vp;
4350 mp = fdvp->v_mount;
4351 KASSERT(fdvp != NULL);
4352 KASSERT(fvp != NULL);
4353 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE));
4354 /*
4355 * Bracket the operation with fstrans_start()/fstrans_done().
4356 *
4357 * Inside the bracket this file system cannot be unmounted so
4358 * a vnode on this file system cannot change its v_mount.
4359 * A vnode on another file system may still change to dead mount.
4360 */
4361 fstrans_start(mp);
4362
4363 /*
4364 * Make sure neither fdvp nor fvp is locked.
4365 */
4366 if (fdvp != fvp)
4367 VOP_UNLOCK(fdvp);
4368 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4369 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4370
4371 /*
4372 * Reject renaming `.' and `..'. Can't do this until after
4373 * namei because we need namei's parsing to find the final
4374 * component name. (namei should just leave us with the final
4375 * component name and not look it up itself, but anyway...)
4376 *
4377 * This was here before because we used to relookup from
4378 * instead of to and relookup requires the caller to check
4379 * this, but now file systems may depend on this check, so we
4380 * must retain it until the file systems are all rototilled.
4381 */
4382 if (((fnd.ni_cnd.cn_namelen == 1) &&
4383 (fnd.ni_cnd.cn_nameptr[0] == '.')) ||
4384 ((fnd.ni_cnd.cn_namelen == 2) &&
4385 (fnd.ni_cnd.cn_nameptr[0] == '.') &&
4386 (fnd.ni_cnd.cn_nameptr[1] == '.'))) {
4387 error = EINVAL; /* XXX EISDIR? */
4388 goto abort0;
4389 }
4390
4391 /*
4392 * Lookup to.
4393 *
4394 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using
4395 * fvp here to decide whether to add CREATEDIR is a load of
4396 * bollocks because fvp might be the wrong node by now, since
4397 * fdvp is unlocked.
4398 *
4399 * XXX Why not pass CREATEDIR always?
4400 */
4401 NDINIT(&tnd, RENAME,
4402 (LOCKPARENT | NOCACHE | TRYEMULROOT |
4403 ((fvp->v_type == VDIR)? CREATEDIR : 0)),
4404 tpb);
4405 if ((error = fd_nameiat(l, tofd, &tnd)) != 0)
4406 goto abort0;
4407
4408 /*
4409 * Pull out the important results of the lookup, tdvp and tvp.
4410 * Of course, tvp is bogus because we're about to unlock tdvp.
4411 */
4412 tdvp = tnd.ni_dvp;
4413 tvp = tnd.ni_vp;
4414 KASSERT(tdvp != NULL);
4415 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE));
4416
4417 if (fvp->v_type == VDIR)
4418 tnd.ni_cnd.cn_flags |= WILLBEDIR;
4419 /*
4420 * Make sure neither tdvp nor tvp is locked.
4421 */
4422 if (tdvp != tvp)
4423 VOP_UNLOCK(tdvp);
4424 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4425 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4426
4427 /*
4428 * Reject renaming onto `.' or `..'. relookup is unhappy with
4429 * these, which is why we must do this here. Once upon a time
4430 * we relooked up from instead of to, and consequently didn't
4431 * need this check, but now that we relookup to instead of
4432 * from, we need this; and we shall need it forever forward
4433 * until the VOP_RENAME protocol changes, because file systems
4434 * will no doubt begin to depend on this check.
4435 */
4436 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) {
4437 error = EISDIR;
4438 goto abort1;
4439 }
4440 if ((tnd.ni_cnd.cn_namelen == 2) &&
4441 (tnd.ni_cnd.cn_nameptr[0] == '.') &&
4442 (tnd.ni_cnd.cn_nameptr[1] == '.')) {
4443 error = EINVAL;
4444 goto abort1;
4445 }
4446
4447 /*
4448 * Make sure the mount points match. Although we don't hold
4449 * any vnode locks, the v_mount on fdvp file system are stable.
4450 *
4451 * Unmounting another file system at an inopportune moment may
4452 * cause tdvp to disappear and change its v_mount to dead.
4453 *
4454 * So in either case different v_mount means cross-device rename.
4455 */
4456 KASSERT(mp != NULL);
4457 tmp = tdvp->v_mount;
4458
4459 if (mp != tmp) {
4460 error = EXDEV;
4461 goto abort1;
4462 }
4463
4464 /*
4465 * Take the vfs rename lock to avoid cross-directory screw cases.
4466 * Nothing is locked currently, so taking this lock is safe.
4467 */
4468 error = VFS_RENAMELOCK_ENTER(mp);
4469 if (error)
4470 goto abort1;
4471
4472 /*
4473 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced,
4474 * and nothing is locked except for the vfs rename lock.
4475 *
4476 * The next step is a little rain dance to conform to the
4477 * insane lock protocol, even though it does nothing to ward
4478 * off race conditions.
4479 *
4480 * We need tdvp and tvp to be locked. However, because we have
4481 * unlocked tdvp in order to hold no locks while we take the
4482 * vfs rename lock, tvp may be wrong here, and we can't safely
4483 * lock it even if the sensible file systems will just unlock
4484 * it straight away. Consequently, we must lock tdvp and then
4485 * relookup tvp to get it locked.
4486 *
4487 * Finally, because the VOP_RENAME protocol is brain-damaged
4488 * and various file systems insanely depend on the semantics of
4489 * this brain damage, the lookup of to must be the last lookup
4490 * before VOP_RENAME.
4491 */
4492 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
4493 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0);
4494 if (error)
4495 goto abort2;
4496
4497 /*
4498 * Drop the old tvp and pick up the new one -- which might be
4499 * the same, but that doesn't matter to us. After this, tdvp
4500 * and tvp should both be locked.
4501 */
4502 if (tvp != NULL)
4503 vrele(tvp);
4504 tvp = tnd.ni_vp;
4505 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4506 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4507
4508 /*
4509 * The old do_sys_rename had various consistency checks here
4510 * involving fvp and tvp. fvp is bogus already here, and tvp
4511 * will become bogus soon in any sensible file system, so the
4512 * only purpose in putting these checks here is to give lip
4513 * service to these screw cases and to acknowledge that they
4514 * exist, not actually to handle them, but here you go
4515 * anyway...
4516 */
4517
4518 /*
4519 * Acknowledge that directories and non-directories aren't
4520 * suposed to mix.
4521 */
4522 if (tvp != NULL) {
4523 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) {
4524 error = ENOTDIR;
4525 goto abort3;
4526 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) {
4527 error = EISDIR;
4528 goto abort3;
4529 }
4530 }
4531
4532 /*
4533 * Acknowledge some random screw case, among the dozens that
4534 * might arise.
4535 */
4536 if (fvp == tdvp) {
4537 error = EINVAL;
4538 goto abort3;
4539 }
4540
4541 /*
4542 * Acknowledge that POSIX has a wacky screw case.
4543 *
4544 * XXX Eventually the retain flag needs to be passed on to
4545 * VOP_RENAME.
4546 */
4547 if (fvp == tvp) {
4548 if (retain) {
4549 error = 0;
4550 goto abort3;
4551 } else if ((fdvp == tdvp) &&
4552 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) &&
4553 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr,
4554 fnd.ni_cnd.cn_namelen))) {
4555 error = 0;
4556 goto abort3;
4557 }
4558 }
4559
4560 /*
4561 * Make sure veriexec can screw us up. (But a race can screw
4562 * up veriexec, of course -- remember, fvp and (soon) tvp are
4563 * bogus.)
4564 */
4565 #if NVERIEXEC > 0
4566 {
4567 char *f1, *f2;
4568 size_t f1_len;
4569 size_t f2_len;
4570
4571 f1_len = fnd.ni_cnd.cn_namelen + 1;
4572 f1 = kmem_alloc(f1_len, KM_SLEEP);
4573 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len);
4574
4575 f2_len = tnd.ni_cnd.cn_namelen + 1;
4576 f2 = kmem_alloc(f2_len, KM_SLEEP);
4577 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len);
4578
4579 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2);
4580
4581 kmem_free(f1, f1_len);
4582 kmem_free(f2, f2_len);
4583
4584 if (error)
4585 goto abort3;
4586 }
4587 #endif /* NVERIEXEC > 0 */
4588
4589 /*
4590 * All ready. Incant the rename vop.
4591 */
4592 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4593 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4594 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4595 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4596 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd);
4597
4598 /*
4599 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks
4600 * tdvp and tvp. But we can't assert any of that.
4601 */
4602 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4603 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4604 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4605 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4606
4607 /*
4608 * So all we have left to do is to drop the rename lock and
4609 * destroy the pathbufs.
4610 */
4611 VFS_RENAMELOCK_EXIT(mp);
4612 fstrans_done(mp);
4613 goto out2;
4614
4615 abort3: if ((tvp != NULL) && (tvp != tdvp))
4616 VOP_UNLOCK(tvp);
4617 abort2: VOP_UNLOCK(tdvp);
4618 VFS_RENAMELOCK_EXIT(mp);
4619 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd);
4620 vrele(tdvp);
4621 if (tvp != NULL)
4622 vrele(tvp);
4623 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd);
4624 vrele(fdvp);
4625 vrele(fvp);
4626 fstrans_done(mp);
4627 out2: pathbuf_destroy(tpb);
4628 out1: pathbuf_destroy(fpb);
4629 out0: return error;
4630 }
4631
4632 /*
4633 * Make a directory file.
4634 */
4635 /* ARGSUSED */
4636 int
4637 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
4638 {
4639 /* {
4640 syscallarg(const char *) path;
4641 syscallarg(int) mode;
4642 } */
4643
4644 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path),
4645 SCARG(uap, mode), UIO_USERSPACE);
4646 }
4647
4648 int
4649 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap,
4650 register_t *retval)
4651 {
4652 /* {
4653 syscallarg(int) fd;
4654 syscallarg(const char *) path;
4655 syscallarg(int) mode;
4656 } */
4657
4658 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path),
4659 SCARG(uap, mode), UIO_USERSPACE);
4660 }
4661
4662
4663 int
4664 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg)
4665 {
4666 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg);
4667 }
4668
4669 static int
4670 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode,
4671 enum uio_seg seg)
4672 {
4673 struct proc *p = curlwp->l_proc;
4674 struct vnode *vp;
4675 struct vattr vattr;
4676 int error;
4677 struct pathbuf *pb;
4678 struct nameidata nd;
4679
4680 KASSERT(l != NULL || fdat == AT_FDCWD);
4681
4682 /* XXX bollocks, should pass in a pathbuf */
4683 error = pathbuf_maybe_copyin(path, seg, &pb);
4684 if (error) {
4685 return error;
4686 }
4687
4688 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb);
4689
4690 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
4691 pathbuf_destroy(pb);
4692 return (error);
4693 }
4694 vp = nd.ni_vp;
4695 if (vp != NULL) {
4696 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
4697 if (nd.ni_dvp == vp)
4698 vrele(nd.ni_dvp);
4699 else
4700 vput(nd.ni_dvp);
4701 vrele(vp);
4702 pathbuf_destroy(pb);
4703 return (EEXIST);
4704 }
4705 vattr_null(&vattr);
4706 vattr.va_type = VDIR;
4707 /* We will read cwdi->cwdi_cmask unlocked. */
4708 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
4709 nd.ni_cnd.cn_flags |= WILLBEDIR;
4710 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
4711 if (!error)
4712 vrele(nd.ni_vp);
4713 vput(nd.ni_dvp);
4714 pathbuf_destroy(pb);
4715 return (error);
4716 }
4717
4718 /*
4719 * Remove a directory file.
4720 */
4721 /* ARGSUSED */
4722 int
4723 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
4724 {
4725 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path),
4726 AT_REMOVEDIR, UIO_USERSPACE);
4727 }
4728
4729 /*
4730 * Read a block of directory entries in a file system independent format.
4731 */
4732 int
4733 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
4734 {
4735 /* {
4736 syscallarg(int) fd;
4737 syscallarg(char *) buf;
4738 syscallarg(size_t) count;
4739 } */
4740 file_t *fp;
4741 int error, done;
4742
4743 /* fd_getvnode() will use the descriptor for us */
4744 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4745 return (error);
4746 if ((fp->f_flag & FREAD) == 0) {
4747 error = EBADF;
4748 goto out;
4749 }
4750 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
4751 SCARG(uap, count), &done, l, 0, 0);
4752 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
4753 *retval = done;
4754 out:
4755 fd_putfile(SCARG(uap, fd));
4756 return (error);
4757 }
4758
4759 /*
4760 * Set the mode mask for creation of filesystem nodes.
4761 */
4762 int
4763 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
4764 {
4765 /* {
4766 syscallarg(mode_t) newmask;
4767 } */
4768
4769 /*
4770 * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of
4771 * serialization with those reads is required. It's important to
4772 * return a coherent answer for the caller of umask() though, and
4773 * the atomic operation accomplishes that.
4774 */
4775 *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask,
4776 SCARG(uap, newmask) & ALLPERMS);
4777
4778 return (0);
4779 }
4780
4781 int
4782 dorevoke(struct vnode *vp, kauth_cred_t cred)
4783 {
4784 struct vattr vattr;
4785 int error, fs_decision;
4786
4787 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4788 error = VOP_GETATTR(vp, &vattr, cred);
4789 VOP_UNLOCK(vp);
4790 if (error != 0)
4791 return error;
4792 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM;
4793 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL,
4794 fs_decision);
4795 if (!error)
4796 VOP_REVOKE(vp, REVOKEALL);
4797 return (error);
4798 }
4799
4800 /*
4801 * Void all references to file by ripping underlying filesystem
4802 * away from vnode.
4803 */
4804 /* ARGSUSED */
4805 int
4806 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
4807 {
4808 /* {
4809 syscallarg(const char *) path;
4810 } */
4811 struct vnode *vp;
4812 int error;
4813
4814 error = namei_simple_user(SCARG(uap, path),
4815 NSM_FOLLOW_TRYEMULROOT, &vp);
4816 if (error != 0)
4817 return (error);
4818 error = dorevoke(vp, l->l_cred);
4819 vrele(vp);
4820 return (error);
4821 }
4822
4823 /*
4824 * Allocate backing store for a file, filling a hole without having to
4825 * explicitly write anything out.
4826 */
4827 /* ARGSUSED */
4828 int
4829 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap,
4830 register_t *retval)
4831 {
4832 /* {
4833 syscallarg(int) fd;
4834 syscallarg(off_t) pos;
4835 syscallarg(off_t) len;
4836 } */
4837 int fd;
4838 off_t pos, len;
4839 struct file *fp;
4840 struct vnode *vp;
4841 int error;
4842
4843 fd = SCARG(uap, fd);
4844 pos = SCARG(uap, pos);
4845 len = SCARG(uap, len);
4846
4847 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
4848 *retval = EINVAL;
4849 return 0;
4850 }
4851
4852 error = fd_getvnode(fd, &fp);
4853 if (error) {
4854 *retval = error;
4855 return 0;
4856 }
4857 if ((fp->f_flag & FWRITE) == 0) {
4858 error = EBADF;
4859 goto fail;
4860 }
4861 vp = fp->f_vnode;
4862
4863 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4864 if (vp->v_type == VDIR) {
4865 error = EISDIR;
4866 } else {
4867 error = VOP_FALLOCATE(vp, pos, len);
4868 }
4869 VOP_UNLOCK(vp);
4870
4871 fail:
4872 fd_putfile(fd);
4873 *retval = error;
4874 return 0;
4875 }
4876
4877 /*
4878 * Deallocate backing store for a file, creating a hole. Also used for
4879 * invoking TRIM on disks.
4880 */
4881 /* ARGSUSED */
4882 int
4883 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap,
4884 register_t *retval)
4885 {
4886 /* {
4887 syscallarg(int) fd;
4888 syscallarg(off_t) pos;
4889 syscallarg(off_t) len;
4890 } */
4891 int fd;
4892 off_t pos, len;
4893 struct file *fp;
4894 struct vnode *vp;
4895 int error;
4896
4897 fd = SCARG(uap, fd);
4898 pos = SCARG(uap, pos);
4899 len = SCARG(uap, len);
4900
4901 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
4902 return EINVAL;
4903 }
4904
4905 error = fd_getvnode(fd, &fp);
4906 if (error) {
4907 return error;
4908 }
4909 if ((fp->f_flag & FWRITE) == 0) {
4910 error = EBADF;
4911 goto fail;
4912 }
4913 vp = fp->f_vnode;
4914
4915 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4916 if (vp->v_type == VDIR) {
4917 error = EISDIR;
4918 } else {
4919 error = VOP_FDISCARD(vp, pos, len);
4920 }
4921 VOP_UNLOCK(vp);
4922
4923 fail:
4924 fd_putfile(fd);
4925 return error;
4926 }
4927