vfs_syscalls.c revision 1.569 1 /* $NetBSD: vfs_syscalls.c,v 1.569 2024/12/07 02:11:42 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009, 2019, 2020, 2023 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
66 */
67
68 /*
69 * Virtual File System System Calls
70 */
71
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.569 2024/12/07 02:11:42 riastradh Exp $");
74
75 #ifdef _KERNEL_OPT
76 #include "opt_fileassoc.h"
77 #include "veriexec.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/types.h>
82
83 #include <sys/atomic.h>
84 #include <sys/buf.h>
85 #include <sys/compat_stub.h>
86 #include <sys/dirent.h>
87 #include <sys/event.h>
88 #include <sys/extattr.h>
89 #include <sys/fcntl.h>
90 #include <sys/file.h>
91 #ifdef FILEASSOC
92 #include <sys/fileassoc.h>
93 #endif /* FILEASSOC */
94 #include <sys/filedesc.h>
95 #include <sys/fstrans.h>
96 #include <sys/kauth.h>
97 #include <sys/kernel.h>
98 #include <sys/kmem.h>
99 #include <sys/ktrace.h>
100 #include <sys/module.h>
101 #include <sys/mount.h>
102 #include <sys/namei.h>
103 #include <sys/proc.h>
104 #include <sys/quota.h>
105 #include <sys/quotactl.h>
106 #include <sys/stat.h>
107 #include <sys/syscallargs.h>
108 #include <sys/sysctl.h>
109 #include <sys/systm.h>
110 #include <sys/uio.h>
111 #include <sys/verified_exec.h>
112 #include <sys/vfs_syscalls.h>
113 #include <sys/vnode.h>
114
115 #include <miscfs/genfs/genfs.h>
116 #include <miscfs/specfs/specdev.h>
117
118 #include <nfs/nfs.h>
119 #include <nfs/nfs_var.h>
120 #include <nfs/nfsproto.h>
121 #include <nfs/rpcv2.h>
122
123 /* XXX this shouldn't be here */
124 #ifndef OFF_T_MAX
125 #define OFF_T_MAX __type_max(off_t)
126 #endif
127
128 static int change_flags(struct vnode *, u_long, struct lwp *);
129 static int change_mode(struct vnode *, int, struct lwp *);
130 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
131 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *);
132 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t,
133 enum uio_seg);
134 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t);
135 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *,
136 enum uio_seg);
137 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *,
138 enum uio_seg, int);
139 static int do_sys_readlinkat(struct lwp *, int, const char *, char *,
140 size_t, register_t *);
141 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg);
142
143 static int fd_nameiat(struct lwp *, int, struct nameidata *);
144 static int fd_nameiat_simple_user(struct lwp *, int, const char *,
145 namei_simple_flags_t, struct vnode **);
146
147 /*
148 * This table is used to maintain compatibility with 4.3BSD
149 * and NetBSD 0.9 mount syscalls - and possibly other systems.
150 * Note, the order is important!
151 *
152 * Do not modify this table. It should only contain filesystems
153 * supported by NetBSD 0.9 and 4.3BSD.
154 */
155 const char * const mountcompatnames[] = {
156 NULL, /* 0 = MOUNT_NONE */
157 MOUNT_FFS, /* 1 = MOUNT_UFS */
158 MOUNT_NFS, /* 2 */
159 MOUNT_MFS, /* 3 */
160 MOUNT_MSDOS, /* 4 */
161 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
162 MOUNT_FDESC, /* 6 */
163 MOUNT_KERNFS, /* 7 */
164 NULL, /* 8 = MOUNT_DEVFS */
165 MOUNT_AFS, /* 9 */
166 };
167
168 const u_int nmountcompatnames = __arraycount(mountcompatnames);
169
170 /*
171 * Filter event method for EVFILT_FS.
172 */
173 static struct klist fs_klist;
174 static kmutex_t fs_klist_lock;
175
176 CTASSERT((NOTE_SUBMIT & VQ_MOUNT) == 0);
177 CTASSERT((NOTE_SUBMIT & VQ_UNMOUNT) == 0);
178
179 void
180 vfs_evfilt_fs_init(void)
181 {
182
183 klist_init(&fs_klist);
184 mutex_init(&fs_klist_lock, MUTEX_DEFAULT, IPL_NONE);
185 }
186
187 static int
188 filt_fsattach(struct knote *kn)
189 {
190
191 mutex_enter(&fs_klist_lock);
192 kn->kn_flags |= EV_CLEAR;
193 klist_insert(&fs_klist, kn);
194 mutex_exit(&fs_klist_lock);
195
196 return 0;
197 }
198
199 static void
200 filt_fsdetach(struct knote *kn)
201 {
202
203 mutex_enter(&fs_klist_lock);
204 klist_remove(&fs_klist, kn);
205 mutex_exit(&fs_klist_lock);
206 }
207
208 static int
209 filt_fs(struct knote *kn, long hint)
210 {
211 int rv;
212
213 if (hint & NOTE_SUBMIT) {
214 KASSERT(mutex_owned(&fs_klist_lock));
215 kn->kn_fflags |= hint & ~NOTE_SUBMIT;
216 } else {
217 mutex_enter(&fs_klist_lock);
218 }
219
220 rv = (kn->kn_fflags != 0);
221
222 if ((hint & NOTE_SUBMIT) == 0) {
223 mutex_exit(&fs_klist_lock);
224 }
225
226 return rv;
227 }
228
229 /* referenced in kern_event.c */
230 const struct filterops fs_filtops = {
231 .f_flags = FILTEROP_MPSAFE,
232 .f_attach = filt_fsattach,
233 .f_detach = filt_fsdetach,
234 .f_event = filt_fs,
235 };
236
237 static int
238 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp)
239 {
240 file_t *dfp;
241 int error;
242 const char *path = pathbuf_stringcopy_get(ndp->ni_pathbuf);
243
244 if (fdat != AT_FDCWD && path[0] != '/') {
245 if ((error = fd_getvnode(fdat, &dfp)) != 0)
246 goto out;
247
248 NDAT(ndp, dfp->f_vnode);
249 }
250
251 error = namei(ndp);
252
253 if (fdat != AT_FDCWD && path[0] != '/')
254 fd_putfile(fdat);
255 out:
256 pathbuf_stringcopy_put(ndp->ni_pathbuf, path);
257 return error;
258 }
259
260 static int
261 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path,
262 namei_simple_flags_t sflags, struct vnode **vp_ret)
263 {
264 file_t *dfp;
265 struct vnode *dvp;
266 int error;
267 struct pathbuf *pb;
268 const char *p;
269
270 error = pathbuf_copyin(path, &pb);
271 if (error) {
272 return error;
273 }
274 p = pathbuf_stringcopy_get(pb);
275
276 if (fdat != AT_FDCWD && p[0] != '/') {
277 if ((error = fd_getvnode(fdat, &dfp)) != 0)
278 goto out;
279
280 dvp = dfp->f_vnode;
281 } else {
282 dvp = NULL;
283 }
284
285 error = nameiat_simple(dvp, pb, sflags, vp_ret);
286
287 if (fdat != AT_FDCWD && p[0] != '/')
288 fd_putfile(fdat);
289
290 out:
291 pathbuf_stringcopy_put(pb, p);
292 pathbuf_destroy(pb);
293
294 return error;
295 }
296
297 static int
298 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags)
299 {
300 int error;
301
302 fp->f_flag = flags & FMASK;
303 fp->f_type = DTYPE_VNODE;
304 fp->f_ops = &vnops;
305 fp->f_vnode = vp;
306
307 if (flags & (O_EXLOCK | O_SHLOCK)) {
308 struct flock lf;
309 int type;
310
311 lf.l_whence = SEEK_SET;
312 lf.l_start = 0;
313 lf.l_len = 0;
314 if (flags & O_EXLOCK)
315 lf.l_type = F_WRLCK;
316 else
317 lf.l_type = F_RDLCK;
318 type = F_FLOCK;
319 if ((flags & FNONBLOCK) == 0)
320 type |= F_WAIT;
321 VOP_UNLOCK(vp);
322 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
323 if (error) {
324 (void) vn_close(vp, fp->f_flag, fp->f_cred);
325 fd_abort(l->l_proc, fp, indx);
326 return error;
327 }
328 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
329 atomic_or_uint(&fp->f_flag, FHASLOCK);
330 }
331 if (flags & O_CLOEXEC)
332 fd_set_exclose(l, indx, true);
333 return 0;
334 }
335
336 static int
337 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
338 void *data, size_t *data_len)
339 {
340 struct mount *mp;
341 int error = 0, saved_flags;
342
343 mp = vp->v_mount;
344 saved_flags = mp->mnt_flag;
345
346 /* We can operate only on VV_ROOT nodes. */
347 if ((vp->v_vflag & VV_ROOT) == 0) {
348 error = EINVAL;
349 goto out;
350 }
351
352 /*
353 * We only allow the filesystem to be reloaded if it
354 * is currently mounted read-only. Additionally, we
355 * prevent read-write to read-only downgrades.
356 */
357 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
358 (mp->mnt_flag & MNT_RDONLY) == 0 &&
359 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) {
360 error = EOPNOTSUPP; /* Needs translation */
361 goto out;
362 }
363
364 /*
365 * Enabling MNT_UNION requires a covered mountpoint and
366 * must not happen on the root mount.
367 */
368 if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) {
369 error = EOPNOTSUPP;
370 goto out;
371 }
372
373 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
374 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
375 if (error)
376 goto out;
377
378 error = vfs_suspend(mp, 0);
379 if (error)
380 goto out;
381
382 mutex_enter(mp->mnt_updating);
383
384 mp->mnt_flag &= ~MNT_OP_FLAGS;
385 mp->mnt_flag |= flags & MNT_OP_FLAGS;
386
387 /*
388 * Set the mount level flags.
389 */
390 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) {
391 if ((flags & MNT_RDONLY))
392 mp->mnt_iflag |= IMNT_WANTRDONLY;
393 else
394 mp->mnt_iflag |= IMNT_WANTRDWR;
395 }
396 mp->mnt_flag &= ~MNT_BASIC_FLAGS;
397 mp->mnt_flag |= flags & MNT_BASIC_FLAGS;
398 if ((mp->mnt_iflag & IMNT_WANTRDONLY))
399 mp->mnt_flag &= ~MNT_RDONLY;
400
401 error = VFS_MOUNT(mp, path, data, data_len);
402
403 if (error && data != NULL) {
404 int error2;
405
406 /*
407 * Update failed; let's try and see if it was an
408 * export request. For compat with 3.0 and earlier.
409 */
410 error2 = vfs_hooks_reexport(mp, path, data);
411
412 /*
413 * Only update error code if the export request was
414 * understood but some problem occurred while
415 * processing it.
416 */
417 if (error2 != EJUSTRETURN)
418 error = error2;
419 }
420
421 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY))
422 mp->mnt_flag |= MNT_RDONLY;
423 if (error)
424 mp->mnt_flag = saved_flags;
425 mp->mnt_flag &= ~MNT_OP_FLAGS;
426 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR);
427 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
428 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0)
429 vfs_syncer_add_to_worklist(mp);
430 } else {
431 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0)
432 vfs_syncer_remove_from_worklist(mp);
433 }
434 mutex_exit(mp->mnt_updating);
435 vfs_resume(mp);
436
437 if ((error == 0) && !(saved_flags & MNT_EXTATTR) &&
438 (flags & MNT_EXTATTR)) {
439 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START,
440 NULL, 0, NULL) != 0) {
441 printf("%s: failed to start extattr, error = %d",
442 mp->mnt_stat.f_mntonname, error);
443 mp->mnt_flag &= ~MNT_EXTATTR;
444 }
445 }
446
447 if ((error == 0) && (saved_flags & MNT_EXTATTR) &&
448 !(flags & MNT_EXTATTR)) {
449 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP,
450 NULL, 0, NULL) != 0) {
451 printf("%s: failed to stop extattr, error = %d",
452 mp->mnt_stat.f_mntonname, error);
453 mp->mnt_flag |= MNT_RDONLY;
454 }
455 }
456 out:
457 return (error);
458 }
459
460 static int
461 mount_get_vfsops(const char *fstype, enum uio_seg type_seg,
462 struct vfsops **vfsops)
463 {
464 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
465 int error;
466
467 if (type_seg == UIO_USERSPACE) {
468 /* Copy file-system type from userspace. */
469 error = copyinstr(fstype, fstypename, sizeof(fstypename),
470 NULL);
471 } else {
472 error = copystr(fstype, fstypename, sizeof(fstypename), NULL);
473 KASSERT(error == 0);
474 }
475
476 if (error) {
477 /*
478 * Historically, filesystem types were identified by numbers.
479 * If we get an integer for the filesystem type instead of a
480 * string, we check to see if it matches one of the historic
481 * filesystem types.
482 */
483 u_long fsindex = (u_long)fstype;
484 if (fsindex >= nmountcompatnames ||
485 mountcompatnames[fsindex] == NULL)
486 return ENODEV;
487 strlcpy(fstypename, mountcompatnames[fsindex],
488 sizeof(fstypename));
489 }
490
491 /* Accept `ufs' as an alias for `ffs', for compatibility. */
492 if (strcmp(fstypename, "ufs") == 0)
493 fstypename[0] = 'f';
494
495 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
496 return 0;
497
498 /* If we can autoload a vfs module, try again */
499 (void)module_autoload(fstypename, MODULE_CLASS_VFS);
500
501 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
502 return 0;
503
504 return ENODEV;
505 }
506
507 static int
508 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
509 void *data, size_t *data_len)
510 {
511 struct mount *mp;
512 int error;
513
514 /* If MNT_GETARGS is specified, it should be the only flag. */
515 if (flags & ~MNT_GETARGS)
516 return EINVAL;
517
518 mp = vp->v_mount;
519
520 /* XXX: probably some notion of "can see" here if we want isolation. */
521 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
522 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
523 if (error)
524 return error;
525
526 if ((vp->v_vflag & VV_ROOT) == 0)
527 return EINVAL;
528
529 if (vfs_busy(mp))
530 return EPERM;
531
532 mutex_enter(mp->mnt_updating);
533 mp->mnt_flag &= ~MNT_OP_FLAGS;
534 mp->mnt_flag |= MNT_GETARGS;
535 error = VFS_MOUNT(mp, path, data, data_len);
536 mp->mnt_flag &= ~MNT_OP_FLAGS;
537 mutex_exit(mp->mnt_updating);
538
539 vfs_unbusy(mp);
540 return (error);
541 }
542
543 int
544 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap,
545 register_t *retval)
546 {
547 /* {
548 syscallarg(const char *) type;
549 syscallarg(const char *) path;
550 syscallarg(int) flags;
551 syscallarg(void *) data;
552 syscallarg(size_t) data_len;
553 } */
554
555 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE,
556 SCARG(uap, path), SCARG(uap, flags),
557 SCARG(uap, data), UIO_USERSPACE, SCARG(uap, data_len),
558 retval);
559 }
560
561 int
562 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg,
563 const char *path, int flags,
564 void *data, enum uio_seg data_seg, size_t data_len,
565 register_t *retval)
566 {
567 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */
568 struct vnode *vp;
569 void *data_buf = data;
570 bool vfsopsrele = false;
571 size_t alloc_sz = 0;
572 int error;
573
574 /*
575 * Get vnode to be covered
576 */
577 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
578 if (error != 0) {
579 vp = NULL;
580 goto done;
581 }
582
583 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
584 vfsops = vp->v_mount->mnt_op;
585 } else {
586 /* 'type' is userspace */
587 error = mount_get_vfsops(type, type_seg, &vfsops);
588 if (error != 0)
589 goto done;
590 vfsopsrele = true;
591 }
592
593 /*
594 * We allow data to be NULL, even for userspace. Some fs's don't need
595 * it. The others will handle NULL.
596 */
597 if (data != NULL && data_seg == UIO_USERSPACE) {
598 if (data_len == 0) {
599 /* No length supplied, use default for filesystem */
600 data_len = vfsops->vfs_min_mount_data;
601
602 /*
603 * Hopefully a longer buffer won't make copyin() fail.
604 * For compatibility with 3.0 and earlier.
605 */
606 if (flags & MNT_UPDATE
607 && data_len < sizeof (struct mnt_export_args30))
608 data_len = sizeof (struct mnt_export_args30);
609 }
610 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) {
611 error = EINVAL;
612 goto done;
613 }
614 alloc_sz = data_len;
615 data_buf = kmem_alloc(alloc_sz, KM_SLEEP);
616
617 /* NFS needs the buffer even for mnt_getargs .... */
618 error = copyin(data, data_buf, data_len);
619 if (error != 0)
620 goto done;
621 }
622
623 if (flags & MNT_GETARGS) {
624 if (data_len == 0) {
625 error = EINVAL;
626 goto done;
627 }
628 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
629 if (error != 0)
630 goto done;
631 if (data_seg == UIO_USERSPACE)
632 error = copyout(data_buf, data, data_len);
633 *retval = data_len;
634 } else if (flags & MNT_UPDATE) {
635 error = mount_update(l, vp, path, flags, data_buf, &data_len);
636 } else {
637 /* Locking is handled internally in mount_domount(). */
638 KASSERT(vfsopsrele == true);
639 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
640 &data_len);
641 vfsopsrele = false;
642 }
643 if (!error) {
644 mutex_enter(&fs_klist_lock);
645 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_MOUNT);
646 mutex_exit(&fs_klist_lock);
647 }
648
649 done:
650 if (vfsopsrele)
651 vfs_delref(vfsops);
652 if (vp != NULL) {
653 vrele(vp);
654 }
655 if (data_buf != data)
656 kmem_free(data_buf, alloc_sz);
657 return (error);
658 }
659
660 /*
661 * Unmount a file system.
662 *
663 * Note: unmount takes a path to the vnode mounted on as argument,
664 * not special file (as before).
665 */
666 /* ARGSUSED */
667 int
668 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap,
669 register_t *retval)
670 {
671 /* {
672 syscallarg(const char *) path;
673 syscallarg(int) flags;
674 } */
675 struct vnode *vp;
676 struct mount *mp;
677 int error;
678 struct pathbuf *pb;
679 struct nameidata nd;
680
681 error = pathbuf_copyin(SCARG(uap, path), &pb);
682 if (error) {
683 return error;
684 }
685
686 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
687 if ((error = namei(&nd)) != 0) {
688 pathbuf_destroy(pb);
689 return error;
690 }
691 vp = nd.ni_vp;
692 pathbuf_destroy(pb);
693
694 mp = vp->v_mount;
695 vfs_ref(mp);
696 VOP_UNLOCK(vp);
697
698 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
699 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
700 if (error) {
701 vrele(vp);
702 vfs_rele(mp);
703 return (error);
704 }
705
706 /*
707 * Don't allow unmounting the root file system.
708 */
709 if (mp->mnt_flag & MNT_ROOTFS) {
710 vrele(vp);
711 vfs_rele(mp);
712 return (EINVAL);
713 }
714
715 /*
716 * Must be the root of the filesystem
717 */
718 if ((vp->v_vflag & VV_ROOT) == 0) {
719 vrele(vp);
720 vfs_rele(mp);
721 return (EINVAL);
722 }
723
724 vrele(vp);
725 error = dounmount(mp, SCARG(uap, flags), l);
726 vfs_rele(mp);
727 if (!error) {
728 mutex_enter(&fs_klist_lock);
729 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_UNMOUNT);
730 mutex_exit(&fs_klist_lock);
731 }
732 return error;
733 }
734
735 /*
736 * Sync each mounted filesystem.
737 */
738 #ifdef DEBUG
739 int syncprt = 0;
740 struct ctldebug debug0 = { "syncprt", &syncprt };
741 #endif
742
743 void
744 do_sys_sync(struct lwp *l)
745 {
746 mount_iterator_t *iter;
747 struct mount *mp;
748 int asyncflag;
749
750 mountlist_iterator_init(&iter);
751 while ((mp = mountlist_iterator_next(iter)) != NULL) {
752 mutex_enter(mp->mnt_updating);
753 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
754 /*
755 * Temporarily clear the MNT_ASYNC flags so that
756 * bwrite() doesnt convert the sync writes to
757 * delayed writes.
758 */
759 asyncflag = mp->mnt_flag & MNT_ASYNC;
760 mp->mnt_flag &= ~MNT_ASYNC;
761 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
762 mp->mnt_flag |= asyncflag;
763 }
764 mutex_exit(mp->mnt_updating);
765 }
766 mountlist_iterator_destroy(iter);
767 #ifdef DEBUG
768 if (syncprt)
769 vfs_bufstats();
770 #endif /* DEBUG */
771 }
772
773 static bool
774 sync_vnode_filter(void *cookie, vnode_t *vp)
775 {
776
777 if (vp->v_numoutput > 0) {
778 ++*(int *)cookie;
779 }
780 return false;
781 }
782
783 int
784 vfs_syncwait(void)
785 {
786 int nbusy, nbusy_prev, iter;
787 struct vnode_iterator *vniter;
788 mount_iterator_t *mpiter;
789 struct mount *mp;
790
791 for (nbusy_prev = 0, iter = 0; iter < 20;) {
792 nbusy = 0;
793 mountlist_iterator_init(&mpiter);
794 while ((mp = mountlist_iterator_next(mpiter)) != NULL) {
795 vnode_t *vp __diagused;
796 vfs_vnode_iterator_init(mp, &vniter);
797 vp = vfs_vnode_iterator_next(vniter,
798 sync_vnode_filter, &nbusy);
799 KASSERT(vp == NULL);
800 vfs_vnode_iterator_destroy(vniter);
801 }
802 mountlist_iterator_destroy(mpiter);
803
804 if (nbusy == 0)
805 break;
806 if (nbusy_prev == 0)
807 nbusy_prev = nbusy;
808 printf("%d ", nbusy);
809 kpause("syncwait", false, MAX(1, hz / 25 * iter), NULL);
810 if (nbusy >= nbusy_prev) /* we didn't flush anything */
811 iter++;
812 else
813 nbusy_prev = nbusy;
814 }
815
816 if (nbusy) {
817 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY)
818 printf("giving up\nPrinting vnodes for busy buffers\n");
819 mountlist_iterator_init(&mpiter);
820 while ((mp = mountlist_iterator_next(mpiter)) != NULL) {
821 vnode_t *vp;
822 vfs_vnode_iterator_init(mp, &vniter);
823 vp = vfs_vnode_iterator_next(vniter,
824 NULL, NULL);
825 mutex_enter(vp->v_interlock);
826 if (vp->v_numoutput > 0)
827 vprint(NULL, vp);
828 mutex_exit(vp->v_interlock);
829 vrele(vp);
830 vfs_vnode_iterator_destroy(vniter);
831 }
832 mountlist_iterator_destroy(mpiter);
833 #endif
834 }
835
836 return nbusy;
837 }
838
839 /* ARGSUSED */
840 int
841 sys_sync(struct lwp *l, const void *v, register_t *retval)
842 {
843
844 do_sys_sync(l);
845 return (0);
846 }
847
848 /*
849 * Access or change filesystem quotas.
850 *
851 * (this is really 14 different calls bundled into one)
852 */
853
854 static int
855 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u)
856 {
857 struct quotastat info_k;
858 int error;
859
860 /* ensure any padding bytes are cleared */
861 memset(&info_k, 0, sizeof(info_k));
862
863 error = vfs_quotactl_stat(mp, &info_k);
864 if (error) {
865 return error;
866 }
867
868 return copyout(&info_k, info_u, sizeof(info_k));
869 }
870
871 static int
872 do_sys_quotactl_idtypestat(struct mount *mp, int idtype,
873 struct quotaidtypestat *info_u)
874 {
875 struct quotaidtypestat info_k;
876 int error;
877
878 /* ensure any padding bytes are cleared */
879 memset(&info_k, 0, sizeof(info_k));
880
881 error = vfs_quotactl_idtypestat(mp, idtype, &info_k);
882 if (error) {
883 return error;
884 }
885
886 return copyout(&info_k, info_u, sizeof(info_k));
887 }
888
889 static int
890 do_sys_quotactl_objtypestat(struct mount *mp, int objtype,
891 struct quotaobjtypestat *info_u)
892 {
893 struct quotaobjtypestat info_k;
894 int error;
895
896 /* ensure any padding bytes are cleared */
897 memset(&info_k, 0, sizeof(info_k));
898
899 error = vfs_quotactl_objtypestat(mp, objtype, &info_k);
900 if (error) {
901 return error;
902 }
903
904 return copyout(&info_k, info_u, sizeof(info_k));
905 }
906
907 static int
908 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u,
909 struct quotaval *val_u)
910 {
911 struct quotakey key_k;
912 struct quotaval val_k;
913 int error;
914
915 /* ensure any padding bytes are cleared */
916 memset(&val_k, 0, sizeof(val_k));
917
918 error = copyin(key_u, &key_k, sizeof(key_k));
919 if (error) {
920 return error;
921 }
922
923 error = vfs_quotactl_get(mp, &key_k, &val_k);
924 if (error) {
925 return error;
926 }
927
928 return copyout(&val_k, val_u, sizeof(val_k));
929 }
930
931 static int
932 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u,
933 const struct quotaval *val_u)
934 {
935 struct quotakey key_k;
936 struct quotaval val_k;
937 int error;
938
939 error = copyin(key_u, &key_k, sizeof(key_k));
940 if (error) {
941 return error;
942 }
943
944 error = copyin(val_u, &val_k, sizeof(val_k));
945 if (error) {
946 return error;
947 }
948
949 return vfs_quotactl_put(mp, &key_k, &val_k);
950 }
951
952 static int
953 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u)
954 {
955 struct quotakey key_k;
956 int error;
957
958 error = copyin(key_u, &key_k, sizeof(key_k));
959 if (error) {
960 return error;
961 }
962
963 return vfs_quotactl_del(mp, &key_k);
964 }
965
966 static int
967 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u)
968 {
969 struct quotakcursor cursor_k;
970 int error;
971
972 /* ensure any padding bytes are cleared */
973 memset(&cursor_k, 0, sizeof(cursor_k));
974
975 error = vfs_quotactl_cursoropen(mp, &cursor_k);
976 if (error) {
977 return error;
978 }
979
980 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
981 }
982
983 static int
984 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u)
985 {
986 struct quotakcursor cursor_k;
987 int error;
988
989 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
990 if (error) {
991 return error;
992 }
993
994 return vfs_quotactl_cursorclose(mp, &cursor_k);
995 }
996
997 static int
998 do_sys_quotactl_cursorskipidtype(struct mount *mp,
999 struct quotakcursor *cursor_u, int idtype)
1000 {
1001 struct quotakcursor cursor_k;
1002 int error;
1003
1004 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
1005 if (error) {
1006 return error;
1007 }
1008
1009 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype);
1010 if (error) {
1011 return error;
1012 }
1013
1014 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
1015 }
1016
1017 static int
1018 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u,
1019 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum,
1020 unsigned *ret_u)
1021 {
1022 #define CGET_STACK_MAX 8
1023 struct quotakcursor cursor_k;
1024 struct quotakey stackkeys[CGET_STACK_MAX];
1025 struct quotaval stackvals[CGET_STACK_MAX];
1026 struct quotakey *keys_k;
1027 struct quotaval *vals_k;
1028 unsigned ret_k;
1029 int error;
1030
1031 if (maxnum > 128) {
1032 maxnum = 128;
1033 }
1034
1035 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
1036 if (error) {
1037 return error;
1038 }
1039
1040 if (maxnum <= CGET_STACK_MAX) {
1041 keys_k = stackkeys;
1042 vals_k = stackvals;
1043 /* ensure any padding bytes are cleared */
1044 memset(keys_k, 0, maxnum * sizeof(keys_k[0]));
1045 memset(vals_k, 0, maxnum * sizeof(vals_k[0]));
1046 } else {
1047 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP);
1048 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP);
1049 }
1050
1051 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum,
1052 &ret_k);
1053 if (error) {
1054 goto fail;
1055 }
1056
1057 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0]));
1058 if (error) {
1059 goto fail;
1060 }
1061
1062 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0]));
1063 if (error) {
1064 goto fail;
1065 }
1066
1067 error = copyout(&ret_k, ret_u, sizeof(ret_k));
1068 if (error) {
1069 goto fail;
1070 }
1071
1072 /* do last to maximize the chance of being able to recover a failure */
1073 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k));
1074
1075 fail:
1076 if (keys_k != stackkeys) {
1077 kmem_free(keys_k, maxnum * sizeof(keys_k[0]));
1078 }
1079 if (vals_k != stackvals) {
1080 kmem_free(vals_k, maxnum * sizeof(vals_k[0]));
1081 }
1082 return error;
1083 }
1084
1085 static int
1086 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u,
1087 int *ret_u)
1088 {
1089 struct quotakcursor cursor_k;
1090 int ret_k;
1091 int error;
1092
1093 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
1094 if (error) {
1095 return error;
1096 }
1097
1098 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k);
1099 if (error) {
1100 return error;
1101 }
1102
1103 error = copyout(&ret_k, ret_u, sizeof(ret_k));
1104 if (error) {
1105 return error;
1106 }
1107
1108 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
1109 }
1110
1111 static int
1112 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u)
1113 {
1114 struct quotakcursor cursor_k;
1115 int error;
1116
1117 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
1118 if (error) {
1119 return error;
1120 }
1121
1122 error = vfs_quotactl_cursorrewind(mp, &cursor_k);
1123 if (error) {
1124 return error;
1125 }
1126
1127 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
1128 }
1129
1130 static int
1131 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u)
1132 {
1133 char *path_k;
1134 int error;
1135
1136 /* XXX this should probably be a struct pathbuf */
1137 path_k = PNBUF_GET();
1138 error = copyin(path_u, path_k, PATH_MAX);
1139 if (error) {
1140 PNBUF_PUT(path_k);
1141 return error;
1142 }
1143
1144 error = vfs_quotactl_quotaon(mp, idtype, path_k);
1145
1146 PNBUF_PUT(path_k);
1147 return error;
1148 }
1149
1150 static int
1151 do_sys_quotactl_quotaoff(struct mount *mp, int idtype)
1152 {
1153
1154 return vfs_quotactl_quotaoff(mp, idtype);
1155 }
1156
1157 int
1158 do_sys_quotactl(const char *path_u, const struct quotactl_args *args)
1159 {
1160 struct mount *mp;
1161 struct vnode *vp;
1162 int error;
1163
1164 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp);
1165 if (error != 0)
1166 return (error);
1167 mp = vp->v_mount;
1168
1169 switch (args->qc_op) {
1170 case QUOTACTL_STAT:
1171 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info);
1172 break;
1173 case QUOTACTL_IDTYPESTAT:
1174 error = do_sys_quotactl_idtypestat(mp,
1175 args->u.idtypestat.qc_idtype,
1176 args->u.idtypestat.qc_info);
1177 break;
1178 case QUOTACTL_OBJTYPESTAT:
1179 error = do_sys_quotactl_objtypestat(mp,
1180 args->u.objtypestat.qc_objtype,
1181 args->u.objtypestat.qc_info);
1182 break;
1183 case QUOTACTL_GET:
1184 error = do_sys_quotactl_get(mp,
1185 args->u.get.qc_key,
1186 args->u.get.qc_val);
1187 break;
1188 case QUOTACTL_PUT:
1189 error = do_sys_quotactl_put(mp,
1190 args->u.put.qc_key,
1191 args->u.put.qc_val);
1192 break;
1193 case QUOTACTL_DEL:
1194 error = do_sys_quotactl_del(mp, args->u.del.qc_key);
1195 break;
1196 case QUOTACTL_CURSOROPEN:
1197 error = do_sys_quotactl_cursoropen(mp,
1198 args->u.cursoropen.qc_cursor);
1199 break;
1200 case QUOTACTL_CURSORCLOSE:
1201 error = do_sys_quotactl_cursorclose(mp,
1202 args->u.cursorclose.qc_cursor);
1203 break;
1204 case QUOTACTL_CURSORSKIPIDTYPE:
1205 error = do_sys_quotactl_cursorskipidtype(mp,
1206 args->u.cursorskipidtype.qc_cursor,
1207 args->u.cursorskipidtype.qc_idtype);
1208 break;
1209 case QUOTACTL_CURSORGET:
1210 error = do_sys_quotactl_cursorget(mp,
1211 args->u.cursorget.qc_cursor,
1212 args->u.cursorget.qc_keys,
1213 args->u.cursorget.qc_vals,
1214 args->u.cursorget.qc_maxnum,
1215 args->u.cursorget.qc_ret);
1216 break;
1217 case QUOTACTL_CURSORATEND:
1218 error = do_sys_quotactl_cursoratend(mp,
1219 args->u.cursoratend.qc_cursor,
1220 args->u.cursoratend.qc_ret);
1221 break;
1222 case QUOTACTL_CURSORREWIND:
1223 error = do_sys_quotactl_cursorrewind(mp,
1224 args->u.cursorrewind.qc_cursor);
1225 break;
1226 case QUOTACTL_QUOTAON:
1227 error = do_sys_quotactl_quotaon(mp,
1228 args->u.quotaon.qc_idtype,
1229 args->u.quotaon.qc_quotafile);
1230 break;
1231 case QUOTACTL_QUOTAOFF:
1232 error = do_sys_quotactl_quotaoff(mp,
1233 args->u.quotaoff.qc_idtype);
1234 break;
1235 default:
1236 error = EINVAL;
1237 break;
1238 }
1239
1240 vrele(vp);
1241 return error;
1242 }
1243
1244 /* ARGSUSED */
1245 int
1246 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap,
1247 register_t *retval)
1248 {
1249 /* {
1250 syscallarg(const char *) path;
1251 syscallarg(struct quotactl_args *) args;
1252 } */
1253 struct quotactl_args args;
1254 int error;
1255
1256 error = copyin(SCARG(uap, args), &args, sizeof(args));
1257 if (error) {
1258 return error;
1259 }
1260
1261 return do_sys_quotactl(SCARG(uap, path), &args);
1262 }
1263
1264 int
1265 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
1266 int root)
1267 {
1268 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1269 bool chrooted;
1270 int error = 0;
1271
1272 KASSERT(l == curlwp);
1273
1274 /*
1275 * This is safe unlocked. cwdi_rdir never goes non-NULL -> NULL,
1276 * since it would imply chroots can be escaped. Just make sure this
1277 * routine is self-consistent.
1278 */
1279 chrooted = (atomic_load_relaxed(&cwdi->cwdi_rdir) != NULL);
1280
1281 /*
1282 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1283 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
1284 * overrides MNT_NOWAIT.
1285 */
1286 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
1287 (flags != MNT_WAIT && flags != 0)) {
1288 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
1289 } else {
1290 /* Get the filesystem stats now */
1291 memset(sp, 0, sizeof(*sp));
1292 if ((error = VFS_STATVFS(mp, sp)) != 0)
1293 return error;
1294 if (!chrooted)
1295 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
1296 }
1297
1298 if (chrooted) {
1299 size_t len;
1300 char *bp;
1301 char c;
1302 char *path = PNBUF_GET();
1303
1304 bp = path + MAXPATHLEN;
1305 *--bp = '\0';
1306 rw_enter(&cwdi->cwdi_lock, RW_READER);
1307 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
1308 MAXPATHLEN / 2, 0, l);
1309 rw_exit(&cwdi->cwdi_lock);
1310 if (error) {
1311 PNBUF_PUT(path);
1312 return error;
1313 }
1314 len = strlen(bp);
1315 if (len != 1) {
1316 /*
1317 * for mount points that are below our root, we can see
1318 * them, so we fix up the pathname and return them. The
1319 * rest we cannot see, so we don't allow viewing the
1320 * data.
1321 */
1322 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
1323 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
1324 (void)strlcpy(sp->f_mntonname,
1325 c == '\0' ? "/" : &sp->f_mntonname[len],
1326 sizeof(sp->f_mntonname));
1327 } else {
1328 if (root)
1329 (void)strlcpy(sp->f_mntonname, "/",
1330 sizeof(sp->f_mntonname));
1331 else
1332 error = EPERM;
1333 }
1334 }
1335 PNBUF_PUT(path);
1336 }
1337 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
1338 return error;
1339 }
1340
1341 /*
1342 * Get filesystem statistics by path.
1343 */
1344 int
1345 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
1346 {
1347 struct mount *mp;
1348 int error;
1349 struct vnode *vp;
1350
1351 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
1352 if (error != 0)
1353 return error;
1354 mp = vp->v_mount;
1355 error = dostatvfs(mp, sb, l, flags, 1);
1356 vrele(vp);
1357 return error;
1358 }
1359
1360 /* ARGSUSED */
1361 int
1362 sys___statvfs190(struct lwp *l, const struct sys___statvfs190_args *uap,
1363 register_t *retval)
1364 {
1365 /* {
1366 syscallarg(const char *) path;
1367 syscallarg(struct statvfs *) buf;
1368 syscallarg(int) flags;
1369 } */
1370 struct statvfs *sb;
1371 int error;
1372
1373 sb = STATVFSBUF_GET();
1374 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
1375 if (error == 0)
1376 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1377 STATVFSBUF_PUT(sb);
1378 return error;
1379 }
1380
1381 /*
1382 * Get filesystem statistics by fd.
1383 */
1384 int
1385 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
1386 {
1387 file_t *fp;
1388 struct mount *mp;
1389 int error;
1390
1391 /* fd_getvnode() will use the descriptor for us */
1392 if ((error = fd_getvnode(fd, &fp)) != 0)
1393 return (error);
1394 mp = fp->f_vnode->v_mount;
1395 error = dostatvfs(mp, sb, curlwp, flags, 1);
1396 fd_putfile(fd);
1397 return error;
1398 }
1399
1400 /* ARGSUSED */
1401 int
1402 sys___fstatvfs190(struct lwp *l, const struct sys___fstatvfs190_args *uap,
1403 register_t *retval)
1404 {
1405 /* {
1406 syscallarg(int) fd;
1407 syscallarg(struct statvfs *) buf;
1408 syscallarg(int) flags;
1409 } */
1410 struct statvfs *sb;
1411 int error;
1412
1413 sb = STATVFSBUF_GET();
1414 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1415 if (error == 0)
1416 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1417 STATVFSBUF_PUT(sb);
1418 return error;
1419 }
1420
1421 /*
1422 * Get statistics on all filesystems.
1423 */
1424 int
1425 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1426 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1427 register_t *retval)
1428 {
1429 int root = 0;
1430 mount_iterator_t *iter;
1431 struct proc *p = l->l_proc;
1432 struct mount *mp;
1433 struct statvfs *sb;
1434 size_t count, maxcount;
1435 int error = 0;
1436
1437 sb = STATVFSBUF_GET();
1438 maxcount = bufsize / entry_sz;
1439 count = 0;
1440 mountlist_iterator_init(&iter);
1441 while ((mp = mountlist_iterator_next(iter)) != NULL) {
1442 if (sfsp && count < maxcount) {
1443 error = dostatvfs(mp, sb, l, flags, 0);
1444 if (error) {
1445 error = 0;
1446 continue;
1447 }
1448 error = copyfn(sb, sfsp, entry_sz);
1449 if (error)
1450 goto out;
1451 sfsp = (char *)sfsp + entry_sz;
1452 root |= strcmp(sb->f_mntonname, "/") == 0;
1453 }
1454 count++;
1455 }
1456
1457 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1458 /*
1459 * fake a root entry
1460 */
1461 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1462 sb, l, flags, 1);
1463 if (error != 0)
1464 goto out;
1465 if (sfsp) {
1466 error = copyfn(sb, sfsp, entry_sz);
1467 if (error != 0)
1468 goto out;
1469 }
1470 count++;
1471 }
1472 if (sfsp && count > maxcount)
1473 *retval = maxcount;
1474 else
1475 *retval = count;
1476 out:
1477 mountlist_iterator_destroy(iter);
1478 STATVFSBUF_PUT(sb);
1479 return error;
1480 }
1481
1482 int
1483 sys___getvfsstat90(struct lwp *l, const struct sys___getvfsstat90_args *uap,
1484 register_t *retval)
1485 {
1486 /* {
1487 syscallarg(struct statvfs *) buf;
1488 syscallarg(size_t) bufsize;
1489 syscallarg(int) flags;
1490 } */
1491
1492 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1493 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1494 }
1495
1496 /*
1497 * Change current working directory to a given file descriptor.
1498 */
1499 int
1500 do_sys_fchdir(struct lwp *l, int fd, register_t *retval)
1501 {
1502 struct proc *p = l->l_proc;
1503 struct cwdinfo *cwdi;
1504 struct vnode *vp, *tdp;
1505 struct mount *mp;
1506 file_t *fp;
1507 int error;
1508
1509 /* fd_getvnode() will use the descriptor for us */
1510 if ((error = fd_getvnode(fd, &fp)) != 0)
1511 return error;
1512 vp = fp->f_vnode;
1513
1514 vref(vp);
1515 vn_lock(vp, LK_SHARED | LK_RETRY);
1516 if (vp->v_type != VDIR)
1517 error = ENOTDIR;
1518 else
1519 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1520 if (error) {
1521 vput(vp);
1522 goto out;
1523 }
1524 while ((mp = vp->v_mountedhere) != NULL) {
1525 error = vfs_busy(mp);
1526 vput(vp);
1527 if (error != 0)
1528 goto out;
1529 error = VFS_ROOT(mp, LK_SHARED, &tdp);
1530 vfs_unbusy(mp);
1531 if (error)
1532 goto out;
1533 vp = tdp;
1534 }
1535 VOP_UNLOCK(vp);
1536
1537 /*
1538 * Disallow changing to a directory not under the process's
1539 * current root directory (if there is one).
1540 */
1541 cwdi = p->p_cwdi;
1542 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1543 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1544 vrele(vp);
1545 error = EPERM; /* operation not permitted */
1546 } else {
1547 vrele(cwdi->cwdi_cdir);
1548 cwdi->cwdi_cdir = vp;
1549 }
1550 rw_exit(&cwdi->cwdi_lock);
1551
1552 out:
1553 fd_putfile(fd);
1554 return error;
1555 }
1556
1557 /*
1558 * Change current working directory to a given file descriptor.
1559 */
1560 /* ARGSUSED */
1561 int
1562 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap,
1563 register_t *retval)
1564 {
1565 /* {
1566 syscallarg(int) fd;
1567 } */
1568
1569 return do_sys_fchdir(l, SCARG(uap, fd), retval);
1570 }
1571
1572 /*
1573 * Change this process's notion of the root directory to a given file
1574 * descriptor.
1575 */
1576 int
1577 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap,
1578 register_t *retval)
1579 {
1580 struct vnode *vp;
1581 file_t *fp;
1582 int error, fd = SCARG(uap, fd);
1583
1584 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1585 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1586 return error;
1587 /* fd_getvnode() will use the descriptor for us */
1588 if ((error = fd_getvnode(fd, &fp)) != 0)
1589 return error;
1590 vp = fp->f_vnode;
1591 vn_lock(vp, LK_SHARED | LK_RETRY);
1592 if (vp->v_type != VDIR)
1593 error = ENOTDIR;
1594 else
1595 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1596 VOP_UNLOCK(vp);
1597 if (error)
1598 goto out;
1599 vref(vp);
1600 change_root(vp);
1601
1602 out:
1603 fd_putfile(fd);
1604 return (error);
1605 }
1606
1607 /*
1608 * Change current working directory (``.'').
1609 */
1610 int
1611 do_sys_chdir(struct lwp *l, const char *path, enum uio_seg seg,
1612 register_t *retval)
1613 {
1614 struct proc *p = l->l_proc;
1615 struct cwdinfo * cwdi;
1616 int error;
1617 struct vnode *vp;
1618
1619 if ((error = chdir_lookup(path, seg, &vp, l)) != 0)
1620 return error;
1621 cwdi = p->p_cwdi;
1622 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1623 vrele(cwdi->cwdi_cdir);
1624 cwdi->cwdi_cdir = vp;
1625 rw_exit(&cwdi->cwdi_lock);
1626 return 0;
1627 }
1628
1629 /*
1630 * Change current working directory (``.'').
1631 */
1632 /* ARGSUSED */
1633 int
1634 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1635 {
1636 /* {
1637 syscallarg(const char *) path;
1638 } */
1639
1640 return do_sys_chdir(l, SCARG(uap, path), UIO_USERSPACE, retval);
1641 }
1642
1643 /*
1644 * Change notion of root (``/'') directory.
1645 */
1646 /* ARGSUSED */
1647 int
1648 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap,
1649 register_t *retval)
1650 {
1651 /* {
1652 syscallarg(const char *) path;
1653 } */
1654 int error;
1655 struct vnode *vp;
1656
1657 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1658 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1659 return (error);
1660
1661 error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l);
1662 if (error == 0)
1663 change_root(vp);
1664 return error;
1665 }
1666
1667 /*
1668 * Common routine for chroot and fchroot.
1669 * NB: callers need to properly authorize the change root operation.
1670 */
1671 void
1672 change_root(struct vnode *vp)
1673 {
1674 kauth_cred_t ncred;
1675 struct lwp *l = curlwp;
1676 struct proc *p = l->l_proc;
1677 struct cwdinfo *cwdi = p->p_cwdi;
1678
1679 ncred = kauth_cred_alloc();
1680
1681 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1682 if (cwdi->cwdi_rdir != NULL)
1683 vrele(cwdi->cwdi_rdir);
1684 cwdi->cwdi_rdir = vp;
1685
1686 /*
1687 * Prevent escaping from chroot by putting the root under
1688 * the working directory. Silently chdir to / if we aren't
1689 * already there.
1690 */
1691 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1692 /*
1693 * XXX would be more failsafe to change directory to a
1694 * deadfs node here instead
1695 */
1696 vrele(cwdi->cwdi_cdir);
1697 vref(vp);
1698 cwdi->cwdi_cdir = vp;
1699 }
1700 rw_exit(&cwdi->cwdi_lock);
1701
1702 /* Get a write lock on the process credential. */
1703 proc_crmod_enter();
1704
1705 kauth_cred_clone(p->p_cred, ncred);
1706 kauth_proc_chroot(ncred, p->p_cwdi);
1707
1708 /* Broadcast our credentials to the process and other LWPs. */
1709 proc_crmod_leave(ncred, p->p_cred, true);
1710 }
1711
1712 /*
1713 * Common routine for chroot and chdir.
1714 * XXX "where" should be enum uio_seg
1715 */
1716 int
1717 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l)
1718 {
1719 struct pathbuf *pb;
1720 struct nameidata nd;
1721 int error;
1722
1723 error = pathbuf_maybe_copyin(path, where, &pb);
1724 if (error) {
1725 return error;
1726 }
1727 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb);
1728 if ((error = namei(&nd)) != 0) {
1729 pathbuf_destroy(pb);
1730 return error;
1731 }
1732 *vpp = nd.ni_vp;
1733 pathbuf_destroy(pb);
1734
1735 if ((*vpp)->v_type != VDIR)
1736 error = ENOTDIR;
1737 else
1738 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred);
1739
1740 if (error)
1741 vput(*vpp);
1742 else
1743 VOP_UNLOCK(*vpp);
1744 return (error);
1745 }
1746
1747 /*
1748 * Internals of sys_open - path has already been converted into a pathbuf
1749 * (so we can easily reuse this function from other parts of the kernel,
1750 * like posix_spawn post-processing).
1751 */
1752 int
1753 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags,
1754 int open_mode, int *fd)
1755 {
1756 struct proc *p = l->l_proc;
1757 struct cwdinfo *cwdi = p->p_cwdi;
1758 file_t *fp;
1759 struct vnode *vp;
1760 int dupfd;
1761 bool dupfd_move;
1762 int flags, cmode;
1763 int indx, error;
1764
1765 if (open_flags & O_SEARCH) {
1766 open_flags &= ~(int)O_SEARCH;
1767 }
1768
1769 /*
1770 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
1771 * may be specified.
1772 */
1773 if ((open_flags & O_EXEC) && (open_flags & O_ACCMODE))
1774 return EINVAL;
1775
1776 flags = FFLAGS(open_flags);
1777 if ((flags & (FREAD | FWRITE)) == 0)
1778 return EINVAL;
1779
1780 if ((error = fd_allocfile(&fp, &indx)) != 0) {
1781 return error;
1782 }
1783
1784 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1785 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1786
1787 error = vn_open(dvp, pb, TRYEMULROOT, flags, cmode,
1788 &vp, &dupfd_move, &dupfd);
1789 if (error != 0) {
1790 fd_abort(p, fp, indx);
1791 return error;
1792 }
1793
1794 if (vp == NULL) {
1795 fd_abort(p, fp, indx);
1796 error = fd_dupopen(dupfd, dupfd_move, flags, &indx);
1797 if (error)
1798 return error;
1799 *fd = indx;
1800 } else {
1801 error = open_setfp(l, fp, vp, indx, flags);
1802 if (error)
1803 return error;
1804 VOP_UNLOCK(vp);
1805 *fd = indx;
1806 fd_affix(p, fp, indx);
1807 }
1808
1809 return 0;
1810 }
1811
1812 int
1813 fd_open(const char *path, int open_flags, int open_mode, int *fd)
1814 {
1815 struct pathbuf *pb;
1816 int error, oflags;
1817
1818 oflags = FFLAGS(open_flags);
1819 if ((oflags & (FREAD | FWRITE)) == 0)
1820 return EINVAL;
1821
1822 pb = pathbuf_create(path);
1823 if (pb == NULL)
1824 return ENOMEM;
1825
1826 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd);
1827 pathbuf_destroy(pb);
1828
1829 return error;
1830 }
1831
1832 static int
1833 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags,
1834 int mode, int *fd)
1835 {
1836 file_t *dfp = NULL;
1837 struct vnode *dvp = NULL;
1838 struct pathbuf *pb;
1839 const char *pathstring = NULL;
1840 int error;
1841
1842 if (path == NULL) {
1843 MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error);
1844 if (error == ENOSYS)
1845 goto no_compat;
1846 if (error)
1847 return error;
1848 } else {
1849 no_compat:
1850 error = pathbuf_copyin(path, &pb);
1851 if (error)
1852 return error;
1853 }
1854
1855 pathstring = pathbuf_stringcopy_get(pb);
1856
1857 /*
1858 * fdat is ignored if:
1859 * 1) if fdat is AT_FDCWD, which means use current directory as base.
1860 * 2) if path is absolute, then fdat is useless.
1861 */
1862 if (fdat != AT_FDCWD && pathstring[0] != '/') {
1863 /* fd_getvnode() will use the descriptor for us */
1864 if ((error = fd_getvnode(fdat, &dfp)) != 0)
1865 goto out;
1866
1867 dvp = dfp->f_vnode;
1868 }
1869
1870 error = do_open(l, dvp, pb, flags, mode, fd);
1871
1872 if (dfp != NULL)
1873 fd_putfile(fdat);
1874 out:
1875 pathbuf_stringcopy_put(pb, pathstring);
1876 pathbuf_destroy(pb);
1877 return error;
1878 }
1879
1880 int
1881 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1882 {
1883 /* {
1884 syscallarg(const char *) path;
1885 syscallarg(int) flags;
1886 syscallarg(int) mode;
1887 } */
1888 int error;
1889 int fd;
1890
1891 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path),
1892 SCARG(uap, flags), SCARG(uap, mode), &fd);
1893
1894 if (error == 0)
1895 *retval = fd;
1896
1897 return error;
1898 }
1899
1900 int
1901 sys_openat(struct lwp *l, const struct sys_openat_args *uap,
1902 register_t *retval)
1903 {
1904 /* {
1905 syscallarg(int) fd;
1906 syscallarg(const char *) path;
1907 syscallarg(int) oflags;
1908 syscallarg(int) mode;
1909 } */
1910 int error;
1911 int fd;
1912
1913 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path),
1914 SCARG(uap, oflags), SCARG(uap, mode), &fd);
1915
1916 if (error == 0)
1917 *retval = fd;
1918
1919 return error;
1920 }
1921
1922 static void
1923 vfs__fhfree(fhandle_t *fhp)
1924 {
1925 size_t fhsize;
1926
1927 fhsize = FHANDLE_SIZE(fhp);
1928 kmem_free(fhp, fhsize);
1929 }
1930
1931 /*
1932 * vfs_composefh: compose a filehandle.
1933 */
1934
1935 int
1936 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1937 {
1938 struct mount *mp;
1939 struct fid *fidp;
1940 int error;
1941 size_t needfhsize;
1942 size_t fidsize;
1943
1944 mp = vp->v_mount;
1945 fidp = NULL;
1946 if (*fh_size < FHANDLE_SIZE_MIN) {
1947 fidsize = 0;
1948 } else {
1949 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1950 if (fhp != NULL) {
1951 memset(fhp, 0, *fh_size);
1952 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1953 fidp = &fhp->fh_fid;
1954 }
1955 }
1956 error = VFS_VPTOFH(vp, fidp, &fidsize);
1957 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1958 if (error == 0 && *fh_size < needfhsize) {
1959 error = E2BIG;
1960 }
1961 *fh_size = needfhsize;
1962 return error;
1963 }
1964
1965 int
1966 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1967 {
1968 struct mount *mp;
1969 fhandle_t *fhp;
1970 size_t fhsize;
1971 size_t fidsize;
1972 int error;
1973
1974 mp = vp->v_mount;
1975 fidsize = 0;
1976 error = VFS_VPTOFH(vp, NULL, &fidsize);
1977 KASSERT(error != 0);
1978 if (error != E2BIG) {
1979 goto out;
1980 }
1981 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1982 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1983 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1984 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1985 if (error == 0) {
1986 KASSERT(FHANDLE_SIZE(fhp) == fhsize);
1987 KASSERT(FHANDLE_FILEID(fhp)->fid_len == fidsize);
1988 *fhpp = fhp;
1989 } else {
1990 kmem_free(fhp, fhsize);
1991 }
1992 out:
1993 return error;
1994 }
1995
1996 void
1997 vfs_composefh_free(fhandle_t *fhp)
1998 {
1999
2000 vfs__fhfree(fhp);
2001 }
2002
2003 /*
2004 * vfs_fhtovp: lookup a vnode by a filehandle.
2005 */
2006
2007 int
2008 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
2009 {
2010 struct mount *mp;
2011 int error;
2012
2013 *vpp = NULL;
2014 mp = vfs_getvfs(FHANDLE_FSID(fhp));
2015 if (mp == NULL) {
2016 error = ESTALE;
2017 goto out;
2018 }
2019 if (mp->mnt_op->vfs_fhtovp == NULL) {
2020 error = EOPNOTSUPP;
2021 goto out;
2022 }
2023 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), LK_EXCLUSIVE, vpp);
2024 out:
2025 return error;
2026 }
2027
2028 /*
2029 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
2030 * the needed size.
2031 */
2032
2033 int
2034 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
2035 {
2036 fhandle_t *fhp;
2037 int error;
2038
2039 if (fhsize > FHANDLE_SIZE_MAX) {
2040 return EINVAL;
2041 }
2042 if (fhsize < FHANDLE_SIZE_MIN) {
2043 return EINVAL;
2044 }
2045 again:
2046 fhp = kmem_alloc(fhsize, KM_SLEEP);
2047 error = copyin(ufhp, fhp, fhsize);
2048 if (error == 0) {
2049 /* XXX this check shouldn't be here */
2050 if (FHANDLE_SIZE(fhp) == fhsize) {
2051 *fhpp = fhp;
2052 return 0;
2053 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
2054 /*
2055 * a kludge for nfsv2 padded handles.
2056 */
2057 size_t sz;
2058
2059 sz = FHANDLE_SIZE(fhp);
2060 kmem_free(fhp, fhsize);
2061 fhsize = sz;
2062 goto again;
2063 } else {
2064 /*
2065 * userland told us wrong size.
2066 */
2067 error = EINVAL;
2068 }
2069 }
2070 kmem_free(fhp, fhsize);
2071 return error;
2072 }
2073
2074 void
2075 vfs_copyinfh_free(fhandle_t *fhp)
2076 {
2077
2078 vfs__fhfree(fhp);
2079 }
2080
2081 /*
2082 * Get file handle system call
2083 */
2084 int
2085 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap,
2086 register_t *retval)
2087 {
2088 /* {
2089 syscallarg(char *) fname;
2090 syscallarg(fhandle_t *) fhp;
2091 syscallarg(size_t *) fh_size;
2092 } */
2093 struct vnode *vp;
2094 fhandle_t *fh;
2095 int error;
2096 struct pathbuf *pb;
2097 struct nameidata nd;
2098 size_t sz;
2099 size_t usz;
2100
2101 /*
2102 * Must be super user
2103 */
2104 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2105 0, NULL, NULL, NULL);
2106 if (error)
2107 return (error);
2108
2109 error = pathbuf_copyin(SCARG(uap, fname), &pb);
2110 if (error) {
2111 return error;
2112 }
2113 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
2114 error = namei(&nd);
2115 if (error) {
2116 pathbuf_destroy(pb);
2117 return error;
2118 }
2119 vp = nd.ni_vp;
2120 pathbuf_destroy(pb);
2121
2122 error = vfs_composefh_alloc(vp, &fh);
2123 vput(vp);
2124 if (error != 0) {
2125 return error;
2126 }
2127 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
2128 if (error != 0) {
2129 goto out;
2130 }
2131 sz = FHANDLE_SIZE(fh);
2132 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
2133 if (error != 0) {
2134 goto out;
2135 }
2136 if (usz >= sz) {
2137 error = copyout(fh, SCARG(uap, fhp), sz);
2138 } else {
2139 error = E2BIG;
2140 }
2141 out:
2142 vfs_composefh_free(fh);
2143 return (error);
2144 }
2145
2146 /*
2147 * Open a file given a file handle.
2148 *
2149 * Check permissions, allocate an open file structure,
2150 * and call the device open routine if any.
2151 */
2152
2153 int
2154 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
2155 register_t *retval)
2156 {
2157 file_t *fp;
2158 struct vnode *vp = NULL;
2159 kauth_cred_t cred = l->l_cred;
2160 file_t *nfp;
2161 int indx, error;
2162 struct vattr va;
2163 fhandle_t *fh;
2164 int flags;
2165 proc_t *p;
2166
2167 p = curproc;
2168
2169 /*
2170 * Must be super user
2171 */
2172 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2173 0, NULL, NULL, NULL)))
2174 return (error);
2175
2176 if (oflags & O_SEARCH) {
2177 oflags &= ~(int)O_SEARCH;
2178 }
2179
2180 flags = FFLAGS(oflags);
2181 if ((flags & (FREAD | FWRITE)) == 0)
2182 return (EINVAL);
2183 if ((flags & O_CREAT))
2184 return (EINVAL);
2185 if ((error = fd_allocfile(&nfp, &indx)) != 0)
2186 return (error);
2187 fp = nfp;
2188 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2189 if (error != 0) {
2190 goto bad;
2191 }
2192 error = vfs_fhtovp(fh, &vp);
2193 vfs_copyinfh_free(fh);
2194 if (error != 0) {
2195 goto bad;
2196 }
2197
2198 /* Now do an effective vn_open */
2199
2200 if (vp->v_type == VSOCK) {
2201 error = EOPNOTSUPP;
2202 goto bad;
2203 }
2204 error = vn_openchk(vp, cred, flags);
2205 if (error != 0)
2206 goto bad;
2207 if (flags & O_TRUNC) {
2208 VOP_UNLOCK(vp); /* XXX */
2209 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
2210 vattr_null(&va);
2211 va.va_size = 0;
2212 error = VOP_SETATTR(vp, &va, cred);
2213 if (error)
2214 goto bad;
2215 }
2216 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
2217 goto bad;
2218 if (flags & FWRITE) {
2219 mutex_enter(vp->v_interlock);
2220 vp->v_writecount++;
2221 mutex_exit(vp->v_interlock);
2222 }
2223
2224 /* done with modified vn_open, now finish what sys_open does. */
2225 if ((error = open_setfp(l, fp, vp, indx, flags)))
2226 return error;
2227
2228 VOP_UNLOCK(vp);
2229 *retval = indx;
2230 fd_affix(p, fp, indx);
2231 return (0);
2232
2233 bad:
2234 fd_abort(p, fp, indx);
2235 if (vp != NULL)
2236 vput(vp);
2237 if (error == EDUPFD || error == EMOVEFD) {
2238 /* XXX should probably close curlwp->l_dupfd */
2239 error = EOPNOTSUPP;
2240 }
2241 return (error);
2242 }
2243
2244 int
2245 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap,
2246 register_t *retval)
2247 {
2248 /* {
2249 syscallarg(const void *) fhp;
2250 syscallarg(size_t) fh_size;
2251 syscallarg(int) flags;
2252 } */
2253
2254 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
2255 SCARG(uap, flags), retval);
2256 }
2257
2258 int
2259 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
2260 {
2261 int error;
2262 fhandle_t *fh;
2263 struct vnode *vp;
2264
2265 /*
2266 * Must be super user
2267 */
2268 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2269 0, NULL, NULL, NULL)))
2270 return error;
2271
2272 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2273 if (error != 0)
2274 return error;
2275
2276 error = vfs_fhtovp(fh, &vp);
2277 vfs_copyinfh_free(fh);
2278 if (error != 0)
2279 return error;
2280
2281 error = vn_stat(vp, sb);
2282 vput(vp);
2283 return error;
2284 }
2285
2286 /* ARGSUSED */
2287 int
2288 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap,
2289 register_t *retval)
2290 {
2291 /* {
2292 syscallarg(const void *) fhp;
2293 syscallarg(size_t) fh_size;
2294 syscallarg(struct stat *) sb;
2295 } */
2296 struct stat sb;
2297 int error;
2298
2299 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
2300 if (error)
2301 return error;
2302 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
2303 }
2304
2305 int
2306 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize,
2307 struct statvfs *sb, int flags)
2308 {
2309 fhandle_t *fh;
2310 struct mount *mp;
2311 struct vnode *vp;
2312 int error;
2313
2314 /*
2315 * Must be super user
2316 */
2317 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2318 0, NULL, NULL, NULL)))
2319 return error;
2320
2321 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2322 if (error != 0)
2323 return error;
2324
2325 error = vfs_fhtovp(fh, &vp);
2326 vfs_copyinfh_free(fh);
2327 if (error != 0)
2328 return error;
2329
2330 mp = vp->v_mount;
2331 error = dostatvfs(mp, sb, l, flags, 1);
2332 vput(vp);
2333 return error;
2334 }
2335
2336 /* ARGSUSED */
2337 int
2338 sys___fhstatvfs190(struct lwp *l, const struct sys___fhstatvfs190_args *uap,
2339 register_t *retval)
2340 {
2341 /* {
2342 syscallarg(const void *) fhp;
2343 syscallarg(size_t) fh_size;
2344 syscallarg(struct statvfs *) buf;
2345 syscallarg(int) flags;
2346 } */
2347 struct statvfs *sb = STATVFSBUF_GET();
2348 int error;
2349
2350 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
2351 SCARG(uap, flags));
2352 if (error == 0)
2353 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
2354 STATVFSBUF_PUT(sb);
2355 return error;
2356 }
2357
2358 int
2359 do_posix_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode,
2360 dev_t dev)
2361 {
2362
2363 /*
2364 * The POSIX mknod(2) call is an alias for mkfifo(2) for S_IFIFO
2365 * in mode and dev=0.
2366 *
2367 * In all the other cases it's implementation defined behavior.
2368 */
2369
2370 if ((mode & S_IFIFO) && dev == 0)
2371 return do_sys_mkfifoat(l, fdat, pathname, mode);
2372 else
2373 return do_sys_mknodat(l, fdat, pathname, mode, dev,
2374 UIO_USERSPACE);
2375 }
2376
2377 /*
2378 * Create a special file.
2379 */
2380 /* ARGSUSED */
2381 int
2382 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
2383 register_t *retval)
2384 {
2385 /* {
2386 syscallarg(const char *) path;
2387 syscallarg(mode_t) mode;
2388 syscallarg(dev_t) dev;
2389 } */
2390 return do_posix_mknodat(l, AT_FDCWD, SCARG(uap, path),
2391 SCARG(uap, mode), SCARG(uap, dev));
2392 }
2393
2394 int
2395 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap,
2396 register_t *retval)
2397 {
2398 /* {
2399 syscallarg(int) fd;
2400 syscallarg(const char *) path;
2401 syscallarg(mode_t) mode;
2402 syscallarg(int) pad;
2403 syscallarg(dev_t) dev;
2404 } */
2405
2406 return do_posix_mknodat(l, SCARG(uap, fd), SCARG(uap, path),
2407 SCARG(uap, mode), SCARG(uap, dev));
2408 }
2409
2410 int
2411 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
2412 enum uio_seg seg)
2413 {
2414 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, seg);
2415 }
2416
2417 int
2418 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode,
2419 dev_t dev, enum uio_seg seg)
2420 {
2421 struct proc *p = l->l_proc;
2422 struct vnode *vp;
2423 struct vattr vattr;
2424 int error, optype;
2425 struct pathbuf *pb;
2426 struct nameidata nd;
2427 const char *pathstring;
2428
2429 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
2430 0, NULL, NULL, NULL)) != 0)
2431 return (error);
2432
2433 optype = VOP_MKNOD_DESCOFFSET;
2434
2435 error = pathbuf_maybe_copyin(pathname, seg, &pb);
2436 if (error) {
2437 return error;
2438 }
2439 pathstring = pathbuf_stringcopy_get(pb);
2440 if (pathstring == NULL) {
2441 pathbuf_destroy(pb);
2442 return ENOMEM;
2443 }
2444
2445 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2446
2447 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2448 goto out;
2449 vp = nd.ni_vp;
2450
2451 if (vp != NULL)
2452 error = EEXIST;
2453 else {
2454 vattr_null(&vattr);
2455 /* We will read cwdi->cwdi_cmask unlocked. */
2456 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2457 vattr.va_rdev = dev;
2458
2459 switch (mode & S_IFMT) {
2460 case S_IFMT: /* used by badsect to flag bad sectors */
2461 vattr.va_type = VBAD;
2462 break;
2463 case S_IFCHR:
2464 vattr.va_type = VCHR;
2465 break;
2466 case S_IFBLK:
2467 vattr.va_type = VBLK;
2468 break;
2469 case S_IFWHT:
2470 optype = VOP_WHITEOUT_DESCOFFSET;
2471 break;
2472 case S_IFREG:
2473 #if NVERIEXEC > 0
2474 error = veriexec_openchk(l, nd.ni_vp, pathstring,
2475 O_CREAT);
2476 #endif /* NVERIEXEC > 0 */
2477 vattr.va_type = VREG;
2478 vattr.va_rdev = VNOVAL;
2479 optype = VOP_CREATE_DESCOFFSET;
2480 break;
2481 default:
2482 error = EINVAL;
2483 break;
2484 }
2485
2486 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET &&
2487 vattr.va_rdev == VNOVAL)
2488 error = EINVAL;
2489 }
2490
2491 if (!error) {
2492 switch (optype) {
2493 case VOP_WHITEOUT_DESCOFFSET:
2494 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
2495 if (error)
2496 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2497 vput(nd.ni_dvp);
2498 break;
2499
2500 case VOP_MKNOD_DESCOFFSET:
2501 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
2502 &nd.ni_cnd, &vattr);
2503 if (error == 0)
2504 vrele(nd.ni_vp);
2505 vput(nd.ni_dvp);
2506 break;
2507
2508 case VOP_CREATE_DESCOFFSET:
2509 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
2510 &nd.ni_cnd, &vattr);
2511 if (error == 0)
2512 vrele(nd.ni_vp);
2513 vput(nd.ni_dvp);
2514 break;
2515 }
2516 } else {
2517 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2518 if (nd.ni_dvp == vp)
2519 vrele(nd.ni_dvp);
2520 else
2521 vput(nd.ni_dvp);
2522 if (vp)
2523 vrele(vp);
2524 }
2525 out:
2526 pathbuf_stringcopy_put(pb, pathstring);
2527 pathbuf_destroy(pb);
2528 return (error);
2529 }
2530
2531 /*
2532 * Create a named pipe.
2533 */
2534 /* ARGSUSED */
2535 int
2536 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap,
2537 register_t *retval)
2538 {
2539 /* {
2540 syscallarg(const char *) path;
2541 syscallarg(int) mode;
2542 } */
2543
2544 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path),
2545 SCARG(uap, mode));
2546 }
2547
2548 int
2549 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap,
2550 register_t *retval)
2551 {
2552 /* {
2553 syscallarg(int) fd;
2554 syscallarg(const char *) path;
2555 syscallarg(int) mode;
2556 } */
2557
2558 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path),
2559 SCARG(uap, mode));
2560 }
2561
2562 static int
2563 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode)
2564 {
2565 struct proc *p = l->l_proc;
2566 struct vattr vattr;
2567 int error;
2568 struct pathbuf *pb;
2569 struct nameidata nd;
2570
2571 error = pathbuf_copyin(path, &pb);
2572 if (error) {
2573 return error;
2574 }
2575 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2576
2577 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2578 pathbuf_destroy(pb);
2579 return error;
2580 }
2581 if (nd.ni_vp != NULL) {
2582 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2583 if (nd.ni_dvp == nd.ni_vp)
2584 vrele(nd.ni_dvp);
2585 else
2586 vput(nd.ni_dvp);
2587 vrele(nd.ni_vp);
2588 pathbuf_destroy(pb);
2589 return (EEXIST);
2590 }
2591 vattr_null(&vattr);
2592 vattr.va_type = VFIFO;
2593 /* We will read cwdi->cwdi_cmask unlocked. */
2594 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2595 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2596 if (error == 0)
2597 vrele(nd.ni_vp);
2598 vput(nd.ni_dvp);
2599 pathbuf_destroy(pb);
2600 return (error);
2601 }
2602
2603 /*
2604 * Make a hard file link.
2605 */
2606 /* ARGSUSED */
2607 int
2608 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink,
2609 const char *link, int follow, register_t *retval)
2610 {
2611 struct vnode *vp;
2612 struct pathbuf *linkpb;
2613 struct nameidata nd;
2614 namei_simple_flags_t ns_flags;
2615 int error;
2616
2617 if (follow & AT_SYMLINK_FOLLOW)
2618 ns_flags = NSM_FOLLOW_TRYEMULROOT;
2619 else
2620 ns_flags = NSM_NOFOLLOW_TRYEMULROOT;
2621
2622 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp);
2623 if (error != 0)
2624 return (error);
2625 error = pathbuf_copyin(link, &linkpb);
2626 if (error) {
2627 goto out1;
2628 }
2629 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2630 if ((error = fd_nameiat(l, fdlink, &nd)) != 0)
2631 goto out2;
2632 if (nd.ni_vp) {
2633 error = EEXIST;
2634 goto abortop;
2635 }
2636 /* Prevent hard links on directories. */
2637 if (vp->v_type == VDIR) {
2638 error = EPERM;
2639 goto abortop;
2640 }
2641 /* Prevent cross-mount operation. */
2642 if (nd.ni_dvp->v_mount != vp->v_mount) {
2643 error = EXDEV;
2644 goto abortop;
2645 }
2646 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2647 VOP_UNLOCK(nd.ni_dvp);
2648 vrele(nd.ni_dvp);
2649 out2:
2650 pathbuf_destroy(linkpb);
2651 out1:
2652 vrele(vp);
2653 return (error);
2654 abortop:
2655 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2656 if (nd.ni_dvp == nd.ni_vp)
2657 vrele(nd.ni_dvp);
2658 else
2659 vput(nd.ni_dvp);
2660 if (nd.ni_vp != NULL)
2661 vrele(nd.ni_vp);
2662 goto out2;
2663 }
2664
2665 int
2666 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
2667 {
2668 /* {
2669 syscallarg(const char *) path;
2670 syscallarg(const char *) link;
2671 } */
2672 const char *path = SCARG(uap, path);
2673 const char *link = SCARG(uap, link);
2674
2675 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link,
2676 AT_SYMLINK_FOLLOW, retval);
2677 }
2678
2679 int
2680 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap,
2681 register_t *retval)
2682 {
2683 /* {
2684 syscallarg(int) fd1;
2685 syscallarg(const char *) name1;
2686 syscallarg(int) fd2;
2687 syscallarg(const char *) name2;
2688 syscallarg(int) flags;
2689 } */
2690 int fd1 = SCARG(uap, fd1);
2691 const char *name1 = SCARG(uap, name1);
2692 int fd2 = SCARG(uap, fd2);
2693 const char *name2 = SCARG(uap, name2);
2694 int follow;
2695
2696 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW;
2697
2698 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval);
2699 }
2700
2701 int
2702 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg)
2703 {
2704
2705 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg);
2706 }
2707
2708 static int
2709 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat,
2710 const char *link, enum uio_seg seg)
2711 {
2712 struct proc *p = curproc;
2713 struct vattr vattr;
2714 char *path;
2715 int error;
2716 size_t len;
2717 struct pathbuf *linkpb;
2718 struct nameidata nd;
2719
2720 KASSERT(l != NULL || fdat == AT_FDCWD);
2721
2722 path = PNBUF_GET();
2723 if (seg == UIO_USERSPACE) {
2724 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0)
2725 goto out1;
2726 if ((error = pathbuf_copyin(link, &linkpb)) != 0)
2727 goto out1;
2728 } else {
2729 len = strlen(patharg) + 1;
2730 KASSERT(len <= MAXPATHLEN);
2731 memcpy(path, patharg, len);
2732 linkpb = pathbuf_create(link);
2733 if (linkpb == NULL) {
2734 error = ENOMEM;
2735 goto out1;
2736 }
2737 }
2738 ktrkuser("symlink-target", path, len - 1);
2739
2740 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2741 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2742 goto out2;
2743 if (nd.ni_vp) {
2744 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2745 if (nd.ni_dvp == nd.ni_vp)
2746 vrele(nd.ni_dvp);
2747 else
2748 vput(nd.ni_dvp);
2749 vrele(nd.ni_vp);
2750 error = EEXIST;
2751 goto out2;
2752 }
2753 vattr_null(&vattr);
2754 vattr.va_type = VLNK;
2755 /* We will read cwdi->cwdi_cmask unlocked. */
2756 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2757 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2758 if (error == 0)
2759 vrele(nd.ni_vp);
2760 vput(nd.ni_dvp);
2761 out2:
2762 pathbuf_destroy(linkpb);
2763 out1:
2764 PNBUF_PUT(path);
2765 return (error);
2766 }
2767
2768 /*
2769 * Make a symbolic link.
2770 */
2771 /* ARGSUSED */
2772 int
2773 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2774 {
2775 /* {
2776 syscallarg(const char *) path;
2777 syscallarg(const char *) link;
2778 } */
2779
2780 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link),
2781 UIO_USERSPACE);
2782 }
2783
2784 int
2785 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap,
2786 register_t *retval)
2787 {
2788 /* {
2789 syscallarg(const char *) path1;
2790 syscallarg(int) fd;
2791 syscallarg(const char *) path2;
2792 } */
2793
2794 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd),
2795 SCARG(uap, path2), UIO_USERSPACE);
2796 }
2797
2798 /*
2799 * Delete a whiteout from the filesystem.
2800 */
2801 /* ARGSUSED */
2802 int
2803 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap,
2804 register_t *retval)
2805 {
2806 /* {
2807 syscallarg(const char *) path;
2808 } */
2809 int error;
2810 struct pathbuf *pb;
2811 struct nameidata nd;
2812
2813 error = pathbuf_copyin(SCARG(uap, path), &pb);
2814 if (error) {
2815 return error;
2816 }
2817
2818 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb);
2819 error = namei(&nd);
2820 if (error) {
2821 pathbuf_destroy(pb);
2822 return (error);
2823 }
2824
2825 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2826 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2827 if (nd.ni_dvp == nd.ni_vp)
2828 vrele(nd.ni_dvp);
2829 else
2830 vput(nd.ni_dvp);
2831 if (nd.ni_vp)
2832 vrele(nd.ni_vp);
2833 pathbuf_destroy(pb);
2834 return (EEXIST);
2835 }
2836 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2837 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2838 vput(nd.ni_dvp);
2839 pathbuf_destroy(pb);
2840 return (error);
2841 }
2842
2843 /*
2844 * Delete a name from the filesystem.
2845 */
2846 /* ARGSUSED */
2847 int
2848 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap,
2849 register_t *retval)
2850 {
2851 /* {
2852 syscallarg(const char *) path;
2853 } */
2854
2855 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0,
2856 UIO_USERSPACE);
2857 }
2858
2859 int
2860 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap,
2861 register_t *retval)
2862 {
2863 /* {
2864 syscallarg(int) fd;
2865 syscallarg(const char *) path;
2866 syscallarg(int) flag;
2867 } */
2868
2869 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path),
2870 SCARG(uap, flag), UIO_USERSPACE);
2871 }
2872
2873 int
2874 do_sys_unlink(const char *arg, enum uio_seg seg)
2875 {
2876
2877 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg);
2878 }
2879
2880 static int
2881 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags,
2882 enum uio_seg seg)
2883 {
2884 struct vnode *vp;
2885 int error;
2886 struct pathbuf *pb;
2887 struct nameidata nd;
2888 const char *pathstring;
2889
2890 KASSERT(l != NULL || fdat == AT_FDCWD);
2891
2892 error = pathbuf_maybe_copyin(arg, seg, &pb);
2893 if (error) {
2894 return error;
2895 }
2896 pathstring = pathbuf_stringcopy_get(pb);
2897 if (pathstring == NULL) {
2898 pathbuf_destroy(pb);
2899 return ENOMEM;
2900 }
2901
2902 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb);
2903 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2904 goto out;
2905 vp = nd.ni_vp;
2906
2907 /*
2908 * The root of a mounted filesystem cannot be deleted.
2909 */
2910 if ((vp->v_vflag & VV_ROOT) != 0) {
2911 error = EBUSY;
2912 goto abort;
2913 }
2914
2915 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) {
2916 error = EBUSY;
2917 goto abort;
2918 }
2919
2920 /*
2921 * No rmdir "." please.
2922 */
2923 if (nd.ni_dvp == vp) {
2924 error = EINVAL;
2925 goto abort;
2926 }
2927
2928 /*
2929 * AT_REMOVEDIR is required to remove a directory
2930 */
2931 if (vp->v_type == VDIR) {
2932 if (!(flags & AT_REMOVEDIR)) {
2933 error = EPERM;
2934 goto abort;
2935 } else {
2936 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2937 vput(nd.ni_dvp);
2938 goto out;
2939 }
2940 }
2941
2942 /*
2943 * Starting here we only deal with non directories.
2944 */
2945 if (flags & AT_REMOVEDIR) {
2946 error = ENOTDIR;
2947 goto abort;
2948 }
2949
2950 #if NVERIEXEC > 0
2951 /* Handle remove requests for veriexec entries. */
2952 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) {
2953 goto abort;
2954 }
2955 #endif /* NVERIEXEC > 0 */
2956
2957 #ifdef FILEASSOC
2958 (void)fileassoc_file_delete(vp);
2959 #endif /* FILEASSOC */
2960 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2961 vput(nd.ni_dvp);
2962 goto out;
2963
2964 abort:
2965 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2966 if (nd.ni_dvp == vp)
2967 vrele(nd.ni_dvp);
2968 else
2969 vput(nd.ni_dvp);
2970 vput(vp);
2971
2972 out:
2973 pathbuf_stringcopy_put(pb, pathstring);
2974 pathbuf_destroy(pb);
2975 return (error);
2976 }
2977
2978 /*
2979 * Reposition read/write file offset.
2980 */
2981 int
2982 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2983 {
2984 /* {
2985 syscallarg(int) fd;
2986 syscallarg(int) pad;
2987 syscallarg(off_t) offset;
2988 syscallarg(int) whence;
2989 } */
2990 file_t *fp;
2991 int error, fd;
2992
2993 switch (SCARG(uap, whence)) {
2994 case SEEK_CUR:
2995 case SEEK_END:
2996 case SEEK_SET:
2997 break;
2998 default:
2999 return EINVAL;
3000 }
3001
3002 fd = SCARG(uap, fd);
3003
3004 if ((fp = fd_getfile(fd)) == NULL)
3005 return (EBADF);
3006
3007 if (fp->f_ops->fo_seek == NULL) {
3008 error = ESPIPE;
3009 goto out;
3010 }
3011
3012 error = (*fp->f_ops->fo_seek)(fp, SCARG(uap, offset),
3013 SCARG(uap, whence), (off_t *)retval, FOF_UPDATE_OFFSET);
3014 out:
3015 fd_putfile(fd);
3016 return (error);
3017 }
3018
3019 /*
3020 * Positional read system call.
3021 */
3022 int
3023 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
3024 {
3025 /* {
3026 syscallarg(int) fd;
3027 syscallarg(void *) buf;
3028 syscallarg(size_t) nbyte;
3029 syscallarg(off_t) offset;
3030 } */
3031 file_t *fp;
3032 off_t offset;
3033 int error, fd = SCARG(uap, fd);
3034
3035 if ((fp = fd_getfile(fd)) == NULL)
3036 return (EBADF);
3037
3038 if ((fp->f_flag & FREAD) == 0) {
3039 fd_putfile(fd);
3040 return (EBADF);
3041 }
3042
3043 if (fp->f_ops->fo_seek == NULL) {
3044 error = ESPIPE;
3045 goto out;
3046 }
3047
3048 offset = SCARG(uap, offset);
3049 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0);
3050 if (error)
3051 goto out;
3052
3053 /* dofileread() will unuse the descriptor for us */
3054 return dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
3055 &offset, 0, retval);
3056
3057 out:
3058 fd_putfile(fd);
3059 return (error);
3060 }
3061
3062 /*
3063 * Positional scatter read system call.
3064 */
3065 int
3066 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap,
3067 register_t *retval)
3068 {
3069 /* {
3070 syscallarg(int) fd;
3071 syscallarg(const struct iovec *) iovp;
3072 syscallarg(int) iovcnt;
3073 syscallarg(off_t) offset;
3074 } */
3075 off_t offset = SCARG(uap, offset);
3076
3077 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
3078 SCARG(uap, iovcnt), &offset, 0, retval);
3079 }
3080
3081 /*
3082 * Positional write system call.
3083 */
3084 int
3085 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap,
3086 register_t *retval)
3087 {
3088 /* {
3089 syscallarg(int) fd;
3090 syscallarg(const void *) buf;
3091 syscallarg(size_t) nbyte;
3092 syscallarg(off_t) offset;
3093 } */
3094 file_t *fp;
3095 off_t offset;
3096 int error, fd = SCARG(uap, fd);
3097
3098 if ((fp = fd_getfile(fd)) == NULL)
3099 return (EBADF);
3100
3101 if ((fp->f_flag & FWRITE) == 0) {
3102 fd_putfile(fd);
3103 return (EBADF);
3104 }
3105
3106 if (fp->f_ops->fo_seek == NULL) {
3107 error = ESPIPE;
3108 goto out;
3109 }
3110
3111 offset = SCARG(uap, offset);
3112 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0);
3113 if (error)
3114 goto out;
3115
3116 /* dofilewrite() will unuse the descriptor for us */
3117 return dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
3118 &offset, 0, retval);
3119
3120 out:
3121 fd_putfile(fd);
3122 return (error);
3123 }
3124
3125 /*
3126 * Positional gather write system call.
3127 */
3128 int
3129 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap,
3130 register_t *retval)
3131 {
3132 /* {
3133 syscallarg(int) fd;
3134 syscallarg(const struct iovec *) iovp;
3135 syscallarg(int) iovcnt;
3136 syscallarg(off_t) offset;
3137 } */
3138 off_t offset = SCARG(uap, offset);
3139
3140 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
3141 SCARG(uap, iovcnt), &offset, 0, retval);
3142 }
3143
3144 /*
3145 * Check access permissions.
3146 */
3147 int
3148 sys_access(struct lwp *l, const struct sys_access_args *uap,
3149 register_t *retval)
3150 {
3151 /* {
3152 syscallarg(const char *) path;
3153 syscallarg(int) flags;
3154 } */
3155
3156 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path),
3157 SCARG(uap, flags), 0);
3158 }
3159
3160 int
3161 do_sys_accessat(struct lwp *l, int fdat, const char *path,
3162 int mode, int flags)
3163 {
3164 kauth_cred_t cred;
3165 struct vnode *vp;
3166 int error, nd_flag, vmode;
3167 struct pathbuf *pb;
3168 struct nameidata nd;
3169
3170 CTASSERT(F_OK == 0);
3171 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) {
3172 /* nonsense mode */
3173 return EINVAL;
3174 }
3175
3176 nd_flag = FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT;
3177 if (flags & AT_SYMLINK_NOFOLLOW)
3178 nd_flag &= ~FOLLOW;
3179
3180 error = pathbuf_copyin(path, &pb);
3181 if (error)
3182 return error;
3183
3184 NDINIT(&nd, LOOKUP, nd_flag, pb);
3185
3186 /* Override default credentials */
3187 if (!(flags & AT_EACCESS)) {
3188 cred = kauth_cred_dup(l->l_cred);
3189 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
3190 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
3191 } else
3192 cred = l->l_cred;
3193 nd.ni_cnd.cn_cred = cred;
3194
3195 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
3196 pathbuf_destroy(pb);
3197 goto out;
3198 }
3199 vp = nd.ni_vp;
3200 pathbuf_destroy(pb);
3201
3202 /* Flags == 0 means only check for existence. */
3203 if (mode) {
3204 vmode = 0;
3205 if (mode & R_OK)
3206 vmode |= VREAD;
3207 if (mode & W_OK)
3208 vmode |= VWRITE;
3209 if (mode & X_OK)
3210 vmode |= VEXEC;
3211
3212 error = VOP_ACCESS(vp, vmode, cred);
3213 if (!error && (vmode & VWRITE))
3214 error = vn_writechk(vp);
3215 }
3216 vput(vp);
3217 out:
3218 if (!(flags & AT_EACCESS))
3219 kauth_cred_free(cred);
3220 return (error);
3221 }
3222
3223 int
3224 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap,
3225 register_t *retval)
3226 {
3227 /* {
3228 syscallarg(int) fd;
3229 syscallarg(const char *) path;
3230 syscallarg(int) amode;
3231 syscallarg(int) flag;
3232 } */
3233
3234 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path),
3235 SCARG(uap, amode), SCARG(uap, flag));
3236 }
3237
3238 /*
3239 * Common code for all sys_stat functions, including compat versions.
3240 */
3241 int
3242 do_sys_stat(const char *userpath, unsigned int nd_flag, struct stat *sb)
3243 {
3244
3245 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb);
3246 }
3247
3248 int
3249 do_sys_statat(struct lwp *l, int fdat, const char *userpath,
3250 unsigned int nd_flag, struct stat *sb)
3251 {
3252 int error;
3253 struct pathbuf *pb;
3254 struct nameidata nd;
3255
3256 KASSERT(l != NULL || fdat == AT_FDCWD);
3257
3258 error = pathbuf_copyin(userpath, &pb);
3259 if (error) {
3260 return error;
3261 }
3262
3263 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb);
3264
3265 error = fd_nameiat(l, fdat, &nd);
3266 if (error != 0) {
3267 pathbuf_destroy(pb);
3268 return error;
3269 }
3270 error = vn_stat(nd.ni_vp, sb);
3271 vput(nd.ni_vp);
3272 pathbuf_destroy(pb);
3273 return error;
3274 }
3275
3276 /*
3277 * Get file status; this version follows links.
3278 */
3279 /* ARGSUSED */
3280 int
3281 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap,
3282 register_t *retval)
3283 {
3284 /* {
3285 syscallarg(const char *) path;
3286 syscallarg(struct stat *) ub;
3287 } */
3288 struct stat sb;
3289 int error;
3290
3291 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb);
3292 if (error)
3293 return error;
3294 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3295 }
3296
3297 /*
3298 * Get file status; this version does not follow links.
3299 */
3300 /* ARGSUSED */
3301 int
3302 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap,
3303 register_t *retval)
3304 {
3305 /* {
3306 syscallarg(const char *) path;
3307 syscallarg(struct stat *) ub;
3308 } */
3309 struct stat sb;
3310 int error;
3311
3312 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb);
3313 if (error)
3314 return error;
3315 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3316 }
3317
3318 int
3319 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap,
3320 register_t *retval)
3321 {
3322 /* {
3323 syscallarg(int) fd;
3324 syscallarg(const char *) path;
3325 syscallarg(struct stat *) buf;
3326 syscallarg(int) flag;
3327 } */
3328 unsigned int nd_flag;
3329 struct stat sb;
3330 int error;
3331
3332 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW)
3333 nd_flag = NOFOLLOW;
3334 else
3335 nd_flag = FOLLOW;
3336
3337 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag,
3338 &sb);
3339 if (error)
3340 return error;
3341 return copyout(&sb, SCARG(uap, buf), sizeof(sb));
3342 }
3343
3344 static int
3345 kern_pathconf(register_t *retval, const char *path, int name, int flag)
3346 {
3347 int error;
3348 struct pathbuf *pb;
3349 struct nameidata nd;
3350
3351 error = pathbuf_copyin(path, &pb);
3352 if (error) {
3353 return error;
3354 }
3355 NDINIT(&nd, LOOKUP, flag | LOCKLEAF | TRYEMULROOT, pb);
3356 if ((error = namei(&nd)) != 0) {
3357 pathbuf_destroy(pb);
3358 return error;
3359 }
3360 error = VOP_PATHCONF(nd.ni_vp, name, retval);
3361 vput(nd.ni_vp);
3362 pathbuf_destroy(pb);
3363 return error;
3364 }
3365
3366 /*
3367 * Get configurable pathname variables.
3368 */
3369 /* ARGSUSED */
3370 int
3371 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap,
3372 register_t *retval)
3373 {
3374 /* {
3375 syscallarg(const char *) path;
3376 syscallarg(int) name;
3377 } */
3378
3379 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name),
3380 FOLLOW);
3381 }
3382
3383 /* ARGSUSED */
3384 int
3385 sys_lpathconf(struct lwp *l, const struct sys_lpathconf_args *uap,
3386 register_t *retval)
3387 {
3388 /* {
3389 syscallarg(const char *) path;
3390 syscallarg(int) name;
3391 } */
3392
3393 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name),
3394 NOFOLLOW);
3395 }
3396
3397 /*
3398 * Return target name of a symbolic link.
3399 */
3400 /* ARGSUSED */
3401 int
3402 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap,
3403 register_t *retval)
3404 {
3405 /* {
3406 syscallarg(const char *) path;
3407 syscallarg(char *) buf;
3408 syscallarg(size_t) count;
3409 } */
3410
3411 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path),
3412 SCARG(uap, buf), SCARG(uap, count), retval);
3413 }
3414
3415 static int
3416 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf,
3417 size_t count, register_t *retval)
3418 {
3419 struct vnode *vp;
3420 struct iovec aiov;
3421 struct uio auio;
3422 int error;
3423 struct pathbuf *pb;
3424 struct nameidata nd;
3425
3426 error = pathbuf_copyin(path, &pb);
3427 if (error) {
3428 return error;
3429 }
3430 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT,
3431 pb);
3432 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
3433 pathbuf_destroy(pb);
3434 return error;
3435 }
3436 vp = nd.ni_vp;
3437 pathbuf_destroy(pb);
3438 if (vp->v_type != VLNK)
3439 error = EINVAL;
3440 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
3441 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
3442 aiov.iov_base = buf;
3443 aiov.iov_len = count;
3444 auio.uio_iov = &aiov;
3445 auio.uio_iovcnt = 1;
3446 auio.uio_offset = 0;
3447 auio.uio_rw = UIO_READ;
3448 KASSERT(l == curlwp);
3449 auio.uio_vmspace = l->l_proc->p_vmspace;
3450 auio.uio_resid = count;
3451 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0)
3452 *retval = count - auio.uio_resid;
3453 }
3454 vput(vp);
3455 return (error);
3456 }
3457
3458 int
3459 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap,
3460 register_t *retval)
3461 {
3462 /* {
3463 syscallarg(int) fd;
3464 syscallarg(const char *) path;
3465 syscallarg(char *) buf;
3466 syscallarg(size_t) bufsize;
3467 } */
3468
3469 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path),
3470 SCARG(uap, buf), SCARG(uap, bufsize), retval);
3471 }
3472
3473 /*
3474 * Change flags of a file given a path name.
3475 */
3476 /* ARGSUSED */
3477 int
3478 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap,
3479 register_t *retval)
3480 {
3481 /* {
3482 syscallarg(const char *) path;
3483 syscallarg(u_long) flags;
3484 } */
3485 struct vnode *vp;
3486 int error;
3487
3488 error = namei_simple_user(SCARG(uap, path),
3489 NSM_FOLLOW_TRYEMULROOT, &vp);
3490 if (error != 0)
3491 return (error);
3492 error = change_flags(vp, SCARG(uap, flags), l);
3493 vput(vp);
3494 return (error);
3495 }
3496
3497 /*
3498 * Change flags of a file given a file descriptor.
3499 */
3500 /* ARGSUSED */
3501 int
3502 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap,
3503 register_t *retval)
3504 {
3505 /* {
3506 syscallarg(int) fd;
3507 syscallarg(u_long) flags;
3508 } */
3509 struct vnode *vp;
3510 file_t *fp;
3511 int error;
3512
3513 /* fd_getvnode() will use the descriptor for us */
3514 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3515 return (error);
3516 vp = fp->f_vnode;
3517 error = change_flags(vp, SCARG(uap, flags), l);
3518 VOP_UNLOCK(vp);
3519 fd_putfile(SCARG(uap, fd));
3520 return (error);
3521 }
3522
3523 /*
3524 * Change flags of a file given a path name; this version does
3525 * not follow links.
3526 */
3527 int
3528 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap,
3529 register_t *retval)
3530 {
3531 /* {
3532 syscallarg(const char *) path;
3533 syscallarg(u_long) flags;
3534 } */
3535 struct vnode *vp;
3536 int error;
3537
3538 error = namei_simple_user(SCARG(uap, path),
3539 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3540 if (error != 0)
3541 return (error);
3542 error = change_flags(vp, SCARG(uap, flags), l);
3543 vput(vp);
3544 return (error);
3545 }
3546
3547 /*
3548 * Common routine to change flags of a file.
3549 */
3550 int
3551 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
3552 {
3553 struct vattr vattr;
3554 int error;
3555
3556 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3557
3558 vattr_null(&vattr);
3559 vattr.va_flags = flags;
3560 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3561
3562 return (error);
3563 }
3564
3565 /*
3566 * Change mode of a file given path name; this version follows links.
3567 */
3568 /* ARGSUSED */
3569 int
3570 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
3571 {
3572 /* {
3573 syscallarg(const char *) path;
3574 syscallarg(int) mode;
3575 } */
3576
3577 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path),
3578 SCARG(uap, mode), 0);
3579 }
3580
3581 int
3582 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags)
3583 {
3584 int error;
3585 struct vnode *vp;
3586 namei_simple_flags_t ns_flag;
3587
3588 if (flags & AT_SYMLINK_NOFOLLOW)
3589 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3590 else
3591 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3592
3593 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3594 if (error != 0)
3595 return error;
3596
3597 error = change_mode(vp, mode, l);
3598
3599 vrele(vp);
3600
3601 return (error);
3602 }
3603
3604 /*
3605 * Change mode of a file given a file descriptor.
3606 */
3607 /* ARGSUSED */
3608 int
3609 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap,
3610 register_t *retval)
3611 {
3612 /* {
3613 syscallarg(int) fd;
3614 syscallarg(int) mode;
3615 } */
3616 file_t *fp;
3617 int error;
3618
3619 /* fd_getvnode() will use the descriptor for us */
3620 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3621 return (error);
3622 error = change_mode(fp->f_vnode, SCARG(uap, mode), l);
3623 fd_putfile(SCARG(uap, fd));
3624 return (error);
3625 }
3626
3627 int
3628 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap,
3629 register_t *retval)
3630 {
3631 /* {
3632 syscallarg(int) fd;
3633 syscallarg(const char *) path;
3634 syscallarg(int) mode;
3635 syscallarg(int) flag;
3636 } */
3637
3638 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path),
3639 SCARG(uap, mode), SCARG(uap, flag));
3640 }
3641
3642 /*
3643 * Change mode of a file given path name; this version does not follow links.
3644 */
3645 /* ARGSUSED */
3646 int
3647 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap,
3648 register_t *retval)
3649 {
3650 /* {
3651 syscallarg(const char *) path;
3652 syscallarg(int) mode;
3653 } */
3654 int error;
3655 struct vnode *vp;
3656
3657 error = namei_simple_user(SCARG(uap, path),
3658 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3659 if (error != 0)
3660 return (error);
3661
3662 error = change_mode(vp, SCARG(uap, mode), l);
3663
3664 vrele(vp);
3665 return (error);
3666 }
3667
3668 /*
3669 * Common routine to set mode given a vnode.
3670 */
3671 static int
3672 change_mode(struct vnode *vp, int mode, struct lwp *l)
3673 {
3674 struct vattr vattr;
3675 int error;
3676
3677 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3678 vattr_null(&vattr);
3679 vattr.va_mode = mode & ALLPERMS;
3680 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3681 VOP_UNLOCK(vp);
3682 return (error);
3683 }
3684
3685 /*
3686 * Set ownership given a path name; this version follows links.
3687 */
3688 /* ARGSUSED */
3689 int
3690 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
3691 {
3692 /* {
3693 syscallarg(const char *) path;
3694 syscallarg(uid_t) uid;
3695 syscallarg(gid_t) gid;
3696 } */
3697 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid),
3698 SCARG(uap, gid), 0);
3699 }
3700
3701 int
3702 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid,
3703 gid_t gid, int flags)
3704 {
3705 int error;
3706 struct vnode *vp;
3707 namei_simple_flags_t ns_flag;
3708
3709 if (flags & AT_SYMLINK_NOFOLLOW)
3710 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3711 else
3712 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3713
3714 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3715 if (error != 0)
3716 return error;
3717
3718 error = change_owner(vp, uid, gid, l, 0);
3719
3720 vrele(vp);
3721
3722 return (error);
3723 }
3724
3725 /*
3726 * Set ownership given a path name; this version follows links.
3727 * Provides POSIX semantics.
3728 */
3729 /* ARGSUSED */
3730 int
3731 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap,
3732 register_t *retval)
3733 {
3734 /* {
3735 syscallarg(const char *) path;
3736 syscallarg(uid_t) uid;
3737 syscallarg(gid_t) gid;
3738 } */
3739 int error;
3740 struct vnode *vp;
3741
3742 error = namei_simple_user(SCARG(uap, path),
3743 NSM_FOLLOW_TRYEMULROOT, &vp);
3744 if (error != 0)
3745 return (error);
3746
3747 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3748
3749 vrele(vp);
3750 return (error);
3751 }
3752
3753 /*
3754 * Set ownership given a file descriptor.
3755 */
3756 /* ARGSUSED */
3757 int
3758 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap,
3759 register_t *retval)
3760 {
3761 /* {
3762 syscallarg(int) fd;
3763 syscallarg(uid_t) uid;
3764 syscallarg(gid_t) gid;
3765 } */
3766 int error;
3767 file_t *fp;
3768
3769 /* fd_getvnode() will use the descriptor for us */
3770 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3771 return (error);
3772 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3773 l, 0);
3774 fd_putfile(SCARG(uap, fd));
3775 return (error);
3776 }
3777
3778 int
3779 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap,
3780 register_t *retval)
3781 {
3782 /* {
3783 syscallarg(int) fd;
3784 syscallarg(const char *) path;
3785 syscallarg(uid_t) owner;
3786 syscallarg(gid_t) group;
3787 syscallarg(int) flag;
3788 } */
3789
3790 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path),
3791 SCARG(uap, owner), SCARG(uap, group),
3792 SCARG(uap, flag));
3793 }
3794
3795 /*
3796 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
3797 */
3798 /* ARGSUSED */
3799 int
3800 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap,
3801 register_t *retval)
3802 {
3803 /* {
3804 syscallarg(int) fd;
3805 syscallarg(uid_t) uid;
3806 syscallarg(gid_t) gid;
3807 } */
3808 int error;
3809 file_t *fp;
3810
3811 /* fd_getvnode() will use the descriptor for us */
3812 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3813 return (error);
3814 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3815 l, 1);
3816 fd_putfile(SCARG(uap, fd));
3817 return (error);
3818 }
3819
3820 /*
3821 * Set ownership given a path name; this version does not follow links.
3822 */
3823 /* ARGSUSED */
3824 int
3825 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap,
3826 register_t *retval)
3827 {
3828 /* {
3829 syscallarg(const char *) path;
3830 syscallarg(uid_t) uid;
3831 syscallarg(gid_t) gid;
3832 } */
3833 int error;
3834 struct vnode *vp;
3835
3836 error = namei_simple_user(SCARG(uap, path),
3837 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3838 if (error != 0)
3839 return (error);
3840
3841 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
3842
3843 vrele(vp);
3844 return (error);
3845 }
3846
3847 /*
3848 * Set ownership given a path name; this version does not follow links.
3849 * Provides POSIX/XPG semantics.
3850 */
3851 /* ARGSUSED */
3852 int
3853 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap,
3854 register_t *retval)
3855 {
3856 /* {
3857 syscallarg(const char *) path;
3858 syscallarg(uid_t) uid;
3859 syscallarg(gid_t) gid;
3860 } */
3861 int error;
3862 struct vnode *vp;
3863
3864 error = namei_simple_user(SCARG(uap, path),
3865 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3866 if (error != 0)
3867 return (error);
3868
3869 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3870
3871 vrele(vp);
3872 return (error);
3873 }
3874
3875 /*
3876 * Common routine to set ownership given a vnode.
3877 */
3878 static int
3879 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
3880 int posix_semantics)
3881 {
3882 struct vattr vattr;
3883 mode_t newmode;
3884 int error;
3885
3886 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3887 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
3888 goto out;
3889
3890 #define CHANGED(x) ((int)(x) != -1)
3891 newmode = vattr.va_mode;
3892 if (posix_semantics) {
3893 /*
3894 * POSIX/XPG semantics: if the caller is not the super-user,
3895 * clear set-user-id and set-group-id bits. Both POSIX and
3896 * the XPG consider the behaviour for calls by the super-user
3897 * implementation-defined; we leave the set-user-id and set-
3898 * group-id settings intact in that case.
3899 */
3900 if (vattr.va_mode & S_ISUID) {
3901 if (kauth_authorize_vnode(l->l_cred,
3902 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0)
3903 newmode &= ~S_ISUID;
3904 }
3905 if (vattr.va_mode & S_ISGID) {
3906 if (kauth_authorize_vnode(l->l_cred,
3907 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0)
3908 newmode &= ~S_ISGID;
3909 }
3910 } else {
3911 /*
3912 * NetBSD semantics: when changing owner and/or group,
3913 * clear the respective bit(s).
3914 */
3915 if (CHANGED(uid))
3916 newmode &= ~S_ISUID;
3917 if (CHANGED(gid))
3918 newmode &= ~S_ISGID;
3919 }
3920 /* Update va_mode iff altered. */
3921 if (vattr.va_mode == newmode)
3922 newmode = VNOVAL;
3923
3924 vattr_null(&vattr);
3925 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
3926 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
3927 vattr.va_mode = newmode;
3928 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3929 #undef CHANGED
3930
3931 out:
3932 VOP_UNLOCK(vp);
3933 return (error);
3934 }
3935
3936 /*
3937 * Set the access and modification times given a path name; this
3938 * version follows links.
3939 */
3940 /* ARGSUSED */
3941 int
3942 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
3943 register_t *retval)
3944 {
3945 /* {
3946 syscallarg(const char *) path;
3947 syscallarg(const struct timeval *) tptr;
3948 } */
3949
3950 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
3951 SCARG(uap, tptr), UIO_USERSPACE);
3952 }
3953
3954 /*
3955 * Set the access and modification times given a file descriptor.
3956 */
3957 /* ARGSUSED */
3958 int
3959 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
3960 register_t *retval)
3961 {
3962 /* {
3963 syscallarg(int) fd;
3964 syscallarg(const struct timeval *) tptr;
3965 } */
3966 int error;
3967 file_t *fp;
3968
3969 /* fd_getvnode() will use the descriptor for us */
3970 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3971 return (error);
3972 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr),
3973 UIO_USERSPACE);
3974 fd_putfile(SCARG(uap, fd));
3975 return (error);
3976 }
3977
3978 int
3979 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap,
3980 register_t *retval)
3981 {
3982 /* {
3983 syscallarg(int) fd;
3984 syscallarg(const struct timespec *) tptr;
3985 } */
3986 int error;
3987 file_t *fp;
3988
3989 /* fd_getvnode() will use the descriptor for us */
3990 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3991 return (error);
3992 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0,
3993 SCARG(uap, tptr), UIO_USERSPACE);
3994 fd_putfile(SCARG(uap, fd));
3995 return (error);
3996 }
3997
3998 /*
3999 * Set the access and modification times given a path name; this
4000 * version does not follow links.
4001 */
4002 int
4003 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
4004 register_t *retval)
4005 {
4006 /* {
4007 syscallarg(const char *) path;
4008 syscallarg(const struct timeval *) tptr;
4009 } */
4010
4011 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
4012 SCARG(uap, tptr), UIO_USERSPACE);
4013 }
4014
4015 int
4016 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap,
4017 register_t *retval)
4018 {
4019 /* {
4020 syscallarg(int) fd;
4021 syscallarg(const char *) path;
4022 syscallarg(const struct timespec *) tptr;
4023 syscallarg(int) flag;
4024 } */
4025 int follow;
4026 const struct timespec *tptr;
4027 int error;
4028
4029 tptr = SCARG(uap, tptr);
4030 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
4031
4032 error = do_sys_utimensat(l, SCARG(uap, fd), NULL,
4033 SCARG(uap, path), follow, tptr, UIO_USERSPACE);
4034
4035 return error;
4036 }
4037
4038 /*
4039 * Common routine to set access and modification times given a vnode.
4040 */
4041 int
4042 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag,
4043 const struct timespec *tptr, enum uio_seg seg)
4044 {
4045
4046 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg);
4047 }
4048
4049 int
4050 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp,
4051 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg)
4052 {
4053 struct vattr vattr;
4054 int error, dorele = 0;
4055 namei_simple_flags_t sflags;
4056 bool vanull, setbirthtime;
4057 struct timespec ts[2];
4058
4059 KASSERT(l != NULL || fdat == AT_FDCWD);
4060
4061 /*
4062 * I have checked all callers and they pass either FOLLOW,
4063 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW
4064 * is 0. More to the point, they don't pass anything else.
4065 * Let's keep it that way at least until the namei interfaces
4066 * are fully sanitized.
4067 */
4068 KASSERT(flag == NOFOLLOW || flag == FOLLOW);
4069 sflags = (flag == FOLLOW) ?
4070 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT;
4071
4072 if (tptr == NULL) {
4073 vanull = true;
4074 nanotime(&ts[0]);
4075 ts[1] = ts[0];
4076 } else {
4077 vanull = false;
4078 if (seg != UIO_SYSSPACE) {
4079 error = copyin(tptr, ts, sizeof (ts));
4080 if (error != 0)
4081 return error;
4082 } else {
4083 ts[0] = tptr[0];
4084 ts[1] = tptr[1];
4085 }
4086 }
4087
4088 if (ts[0].tv_nsec == UTIME_NOW) {
4089 nanotime(&ts[0]);
4090 if (ts[1].tv_nsec == UTIME_NOW) {
4091 vanull = true;
4092 ts[1] = ts[0];
4093 }
4094 } else if (ts[1].tv_nsec == UTIME_NOW)
4095 nanotime(&ts[1]);
4096
4097 if (vp == NULL) {
4098 /* note: SEG describes TPTR, not PATH; PATH is always user */
4099 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp);
4100 if (error != 0)
4101 return error;
4102 dorele = 1;
4103 }
4104
4105 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4106 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
4107 timespeccmp(&ts[1], &vattr.va_birthtime, <));
4108 vattr_null(&vattr);
4109
4110 if (ts[0].tv_nsec != UTIME_OMIT)
4111 vattr.va_atime = ts[0];
4112
4113 if (ts[1].tv_nsec != UTIME_OMIT) {
4114 vattr.va_mtime = ts[1];
4115 if (setbirthtime)
4116 vattr.va_birthtime = ts[1];
4117 }
4118
4119 if (vanull)
4120 vattr.va_vaflags |= VA_UTIMES_NULL;
4121 error = VOP_SETATTR(vp, &vattr, l->l_cred);
4122 VOP_UNLOCK(vp);
4123
4124 if (dorele != 0)
4125 vrele(vp);
4126
4127 return error;
4128 }
4129
4130 int
4131 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
4132 const struct timeval *tptr, enum uio_seg seg)
4133 {
4134 struct timespec ts[2];
4135 struct timespec *tsptr = NULL;
4136 int error;
4137
4138 if (tptr != NULL) {
4139 struct timeval tv[2];
4140
4141 if (seg != UIO_SYSSPACE) {
4142 error = copyin(tptr, tv, sizeof(tv));
4143 if (error != 0)
4144 return error;
4145 tptr = tv;
4146 }
4147
4148 if ((tptr[0].tv_usec == UTIME_NOW) ||
4149 (tptr[0].tv_usec == UTIME_OMIT))
4150 ts[0].tv_nsec = tptr[0].tv_usec;
4151 else {
4152 if (tptr[0].tv_usec < 0 || tptr[0].tv_usec >= 1000000)
4153 return EINVAL;
4154
4155 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
4156 }
4157
4158 if ((tptr[1].tv_usec == UTIME_NOW) ||
4159 (tptr[1].tv_usec == UTIME_OMIT))
4160 ts[1].tv_nsec = tptr[1].tv_usec;
4161 else {
4162 if (tptr[1].tv_usec < 0 || tptr[1].tv_usec >= 1000000)
4163 return EINVAL;
4164
4165 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
4166 }
4167
4168 tsptr = &ts[0];
4169 }
4170
4171 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE);
4172 }
4173
4174 /*
4175 * Truncate a file given its path name.
4176 */
4177 /* ARGSUSED */
4178 int
4179 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap,
4180 register_t *retval)
4181 {
4182 /* {
4183 syscallarg(const char *) path;
4184 syscallarg(int) pad;
4185 syscallarg(off_t) length;
4186 } */
4187 struct vnode *vp;
4188 struct vattr vattr;
4189 int error;
4190
4191 if (SCARG(uap, length) < 0)
4192 return EINVAL;
4193
4194 error = namei_simple_user(SCARG(uap, path),
4195 NSM_FOLLOW_TRYEMULROOT, &vp);
4196 if (error != 0)
4197 return (error);
4198 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4199 if (vp->v_type == VDIR)
4200 error = EISDIR;
4201 else if ((error = vn_writechk(vp)) == 0 &&
4202 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
4203 vattr_null(&vattr);
4204 vattr.va_size = SCARG(uap, length);
4205 error = VOP_SETATTR(vp, &vattr, l->l_cred);
4206 }
4207 vput(vp);
4208 return (error);
4209 }
4210
4211 /*
4212 * Truncate a file given a file descriptor.
4213 */
4214 /* ARGSUSED */
4215 int
4216 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap,
4217 register_t *retval)
4218 {
4219 /* {
4220 syscallarg(int) fd;
4221 syscallarg(int) pad;
4222 syscallarg(off_t) length;
4223 } */
4224 file_t *fp;
4225 int error, fd = SCARG(uap, fd);
4226
4227 fp = fd_getfile(fd);
4228 if (fp == NULL)
4229 return EBADF;
4230 if (fp->f_ops->fo_truncate == NULL)
4231 error = EOPNOTSUPP;
4232 else
4233 error = (*fp->f_ops->fo_truncate)(fp, SCARG(uap, length));
4234
4235 fd_putfile(fd);
4236 return error;
4237 }
4238
4239 /*
4240 * Sync an open file.
4241 */
4242 /* ARGSUSED */
4243 int
4244 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
4245 {
4246 /* {
4247 syscallarg(int) fd;
4248 } */
4249 struct vnode *vp;
4250 file_t *fp;
4251 int error;
4252
4253 /* fd_getvnode() will use the descriptor for us */
4254 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4255 return (error);
4256 vp = fp->f_vnode;
4257 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4258 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
4259 VOP_UNLOCK(vp);
4260 fd_putfile(SCARG(uap, fd));
4261 return (error);
4262 }
4263
4264 /*
4265 * Sync a range of file data. API modeled after that found in AIX.
4266 *
4267 * FDATASYNC indicates that we need only save enough metadata to be able
4268 * to re-read the written data.
4269 */
4270 /* ARGSUSED */
4271 int
4272 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap,
4273 register_t *retval)
4274 {
4275 /* {
4276 syscallarg(int) fd;
4277 syscallarg(int) flags;
4278 syscallarg(off_t) start;
4279 syscallarg(off_t) length;
4280 } */
4281 struct vnode *vp;
4282 file_t *fp;
4283 int flags, nflags;
4284 off_t s, e, len;
4285 int error;
4286
4287 /* fd_getvnode() will use the descriptor for us */
4288 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4289 return (error);
4290
4291 if ((fp->f_flag & FWRITE) == 0) {
4292 error = EBADF;
4293 goto out;
4294 }
4295
4296 flags = SCARG(uap, flags);
4297 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
4298 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
4299 error = EINVAL;
4300 goto out;
4301 }
4302 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
4303 if (flags & FDATASYNC)
4304 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
4305 else
4306 nflags = FSYNC_WAIT;
4307 if (flags & FDISKSYNC)
4308 nflags |= FSYNC_CACHE;
4309
4310 len = SCARG(uap, length);
4311 /* If length == 0, we do the whole file, and s = e = 0 will do that */
4312 if (len) {
4313 s = SCARG(uap, start);
4314 if (s < 0 || len < 0 || len > OFF_T_MAX - s) {
4315 error = EINVAL;
4316 goto out;
4317 }
4318 e = s + len;
4319 KASSERT(s <= e);
4320 } else {
4321 e = 0;
4322 s = 0;
4323 }
4324
4325 vp = fp->f_vnode;
4326 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4327 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
4328 VOP_UNLOCK(vp);
4329 out:
4330 fd_putfile(SCARG(uap, fd));
4331 return (error);
4332 }
4333
4334 /*
4335 * Sync the data of an open file.
4336 */
4337 /* ARGSUSED */
4338 int
4339 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap,
4340 register_t *retval)
4341 {
4342 /* {
4343 syscallarg(int) fd;
4344 } */
4345 struct vnode *vp;
4346 file_t *fp;
4347 int error;
4348
4349 /* fd_getvnode() will use the descriptor for us */
4350 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4351 return (error);
4352 vp = fp->f_vnode;
4353 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4354 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
4355 VOP_UNLOCK(vp);
4356 fd_putfile(SCARG(uap, fd));
4357 return (error);
4358 }
4359
4360 /*
4361 * Rename files, (standard) BSD semantics frontend.
4362 */
4363 /* ARGSUSED */
4364 int
4365 sys_rename(struct lwp *l, const struct sys_rename_args *uap,
4366 register_t *retval)
4367 {
4368 /* {
4369 syscallarg(const char *) from;
4370 syscallarg(const char *) to;
4371 } */
4372
4373 return do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4374 SCARG(uap, to), UIO_USERSPACE, 0);
4375 }
4376
4377 int
4378 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap,
4379 register_t *retval)
4380 {
4381 /* {
4382 syscallarg(int) fromfd;
4383 syscallarg(const char *) from;
4384 syscallarg(int) tofd;
4385 syscallarg(const char *) to;
4386 } */
4387
4388 return do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from),
4389 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0);
4390 }
4391
4392 /*
4393 * Rename files, POSIX semantics frontend.
4394 */
4395 /* ARGSUSED */
4396 int
4397 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap,
4398 register_t *retval)
4399 {
4400 /* {
4401 syscallarg(const char *) from;
4402 syscallarg(const char *) to;
4403 } */
4404
4405 return do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4406 SCARG(uap, to), UIO_USERSPACE, 1);
4407 }
4408
4409 /*
4410 * Rename files. Source and destination must either both be directories,
4411 * or both not be directories. If target is a directory, it must be empty.
4412 * If `from' and `to' refer to the same object, the value of the `retain'
4413 * argument is used to determine whether `from' will be
4414 *
4415 * (retain == 0) deleted unless `from' and `to' refer to the same
4416 * object in the file system's name space (BSD).
4417 * (retain == 1) always retained (POSIX).
4418 *
4419 * XXX Synchronize with nfsrv_rename in nfs_serv.c.
4420 */
4421 int
4422 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
4423 {
4424
4425 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg,
4426 retain);
4427 }
4428
4429 static int
4430 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd,
4431 const char *to, enum uio_seg seg, int retain)
4432 {
4433 struct pathbuf *fpb, *tpb;
4434 struct nameidata fnd, tnd;
4435 struct vnode *fdvp, *fvp;
4436 struct vnode *tdvp, *tvp;
4437 struct mount *mp, *tmp;
4438 int error;
4439
4440 KASSERT(l != NULL || fromfd == AT_FDCWD);
4441 KASSERT(l != NULL || tofd == AT_FDCWD);
4442
4443 error = pathbuf_maybe_copyin(from, seg, &fpb);
4444 if (error)
4445 goto out0;
4446 KASSERT(fpb != NULL);
4447
4448 error = pathbuf_maybe_copyin(to, seg, &tpb);
4449 if (error)
4450 goto out1;
4451 KASSERT(tpb != NULL);
4452
4453 /*
4454 * Lookup from.
4455 *
4456 * XXX LOCKPARENT is wrong because we don't actually want it
4457 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is
4458 * insane, so for the time being we need to leave it like this.
4459 */
4460 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb);
4461 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0)
4462 goto out2;
4463
4464 /*
4465 * Pull out the important results of the lookup, fdvp and fvp.
4466 * Of course, fvp is bogus because we're about to unlock fdvp.
4467 */
4468 fdvp = fnd.ni_dvp;
4469 fvp = fnd.ni_vp;
4470 mp = fdvp->v_mount;
4471 KASSERT(fdvp != NULL);
4472 KASSERT(fvp != NULL);
4473 KASSERT(fdvp == fvp || VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
4474 /*
4475 * Bracket the operation with fstrans_start()/fstrans_done().
4476 *
4477 * Inside the bracket this file system cannot be unmounted so
4478 * a vnode on this file system cannot change its v_mount.
4479 * A vnode on another file system may still change to dead mount.
4480 */
4481 fstrans_start(mp);
4482
4483 /*
4484 * Make sure neither fdvp nor fvp is locked.
4485 */
4486 if (fdvp != fvp)
4487 VOP_UNLOCK(fdvp);
4488 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4489 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4490
4491 /*
4492 * Reject renaming `.' and `..'. Can't do this until after
4493 * namei because we need namei's parsing to find the final
4494 * component name. (namei should just leave us with the final
4495 * component name and not look it up itself, but anyway...)
4496 *
4497 * This was here before because we used to relookup from
4498 * instead of to and relookup requires the caller to check
4499 * this, but now file systems may depend on this check, so we
4500 * must retain it until the file systems are all rototilled.
4501 */
4502 if ((fnd.ni_cnd.cn_namelen == 1 &&
4503 fnd.ni_cnd.cn_nameptr[0] == '.') ||
4504 (fnd.ni_cnd.cn_namelen == 2 &&
4505 fnd.ni_cnd.cn_nameptr[0] == '.' &&
4506 fnd.ni_cnd.cn_nameptr[1] == '.')) {
4507 error = EINVAL; /* XXX EISDIR? */
4508 goto abort0;
4509 }
4510
4511 /*
4512 * Lookup to.
4513 *
4514 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using
4515 * fvp here to decide whether to add CREATEDIR is a load of
4516 * bollocks because fvp might be the wrong node by now, since
4517 * fdvp is unlocked.
4518 *
4519 * XXX Why not pass CREATEDIR always?
4520 */
4521 NDINIT(&tnd, RENAME,
4522 (LOCKPARENT | NOCACHE | TRYEMULROOT |
4523 ((fvp->v_type == VDIR)? CREATEDIR : 0)),
4524 tpb);
4525 if ((error = fd_nameiat(l, tofd, &tnd)) != 0)
4526 goto abort0;
4527
4528 /*
4529 * Pull out the important results of the lookup, tdvp and tvp.
4530 * Of course, tvp is bogus because we're about to unlock tdvp.
4531 */
4532 tdvp = tnd.ni_dvp;
4533 tvp = tnd.ni_vp;
4534 KASSERT(tdvp != NULL);
4535 KASSERT(tdvp == tvp || VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4536
4537 if (fvp->v_type == VDIR)
4538 tnd.ni_cnd.cn_flags |= WILLBEDIR;
4539 /*
4540 * Make sure neither tdvp nor tvp is locked.
4541 */
4542 if (tdvp != tvp)
4543 VOP_UNLOCK(tdvp);
4544 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4545 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4546
4547 /*
4548 * Reject renaming onto `.' or `..'. relookup is unhappy with
4549 * these, which is why we must do this here. Once upon a time
4550 * we relooked up from instead of to, and consequently didn't
4551 * need this check, but now that we relookup to instead of
4552 * from, we need this; and we shall need it forever forward
4553 * until the VOP_RENAME protocol changes, because file systems
4554 * will no doubt begin to depend on this check.
4555 */
4556 if (tnd.ni_cnd.cn_namelen == 1 && tnd.ni_cnd.cn_nameptr[0] == '.') {
4557 error = EISDIR;
4558 goto abort1;
4559 }
4560 if (tnd.ni_cnd.cn_namelen == 2 &&
4561 tnd.ni_cnd.cn_nameptr[0] == '.' &&
4562 tnd.ni_cnd.cn_nameptr[1] == '.') {
4563 error = EINVAL;
4564 goto abort1;
4565 }
4566
4567 /*
4568 * Make sure the mount points match. Although we don't hold
4569 * any vnode locks, the v_mount on fdvp file system are stable.
4570 *
4571 * Unmounting another file system at an inopportune moment may
4572 * cause tdvp to disappear and change its v_mount to dead.
4573 *
4574 * So in either case different v_mount means cross-device rename.
4575 */
4576 KASSERT(mp != NULL);
4577 tmp = tdvp->v_mount;
4578
4579 if (mp != tmp) {
4580 error = EXDEV;
4581 goto abort1;
4582 }
4583
4584 /*
4585 * Take the vfs rename lock to avoid cross-directory screw cases.
4586 * Nothing is locked currently, so taking this lock is safe.
4587 */
4588 error = VFS_RENAMELOCK_ENTER(mp);
4589 if (error)
4590 goto abort1;
4591
4592 /*
4593 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced,
4594 * and nothing is locked except for the vfs rename lock.
4595 *
4596 * The next step is a little rain dance to conform to the
4597 * insane lock protocol, even though it does nothing to ward
4598 * off race conditions.
4599 *
4600 * We need tdvp and tvp to be locked. However, because we have
4601 * unlocked tdvp in order to hold no locks while we take the
4602 * vfs rename lock, tvp may be wrong here, and we can't safely
4603 * lock it even if the sensible file systems will just unlock
4604 * it straight away. Consequently, we must lock tdvp and then
4605 * relookup tvp to get it locked.
4606 *
4607 * Finally, because the VOP_RENAME protocol is brain-damaged
4608 * and various file systems insanely depend on the semantics of
4609 * this brain damage, the lookup of to must be the last lookup
4610 * before VOP_RENAME.
4611 */
4612 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
4613 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0);
4614 if (error)
4615 goto abort2;
4616
4617 /*
4618 * Drop the old tvp and pick up the new one -- which might be
4619 * the same, but that doesn't matter to us. After this, tdvp
4620 * and tvp should both be locked.
4621 */
4622 if (tvp != NULL)
4623 vrele(tvp);
4624 tvp = tnd.ni_vp;
4625 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4626 KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE);
4627
4628 /*
4629 * The old do_sys_rename had various consistency checks here
4630 * involving fvp and tvp. fvp is bogus already here, and tvp
4631 * will become bogus soon in any sensible file system, so the
4632 * only purpose in putting these checks here is to give lip
4633 * service to these screw cases and to acknowledge that they
4634 * exist, not actually to handle them, but here you go
4635 * anyway...
4636 */
4637
4638 /*
4639 * Acknowledge that directories and non-directories aren't
4640 * supposed to mix.
4641 */
4642 if (tvp != NULL) {
4643 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
4644 error = ENOTDIR;
4645 goto abort3;
4646 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
4647 error = EISDIR;
4648 goto abort3;
4649 }
4650 }
4651
4652 /*
4653 * Acknowledge some random screw case, among the dozens that
4654 * might arise.
4655 */
4656 if (fvp == tdvp) {
4657 error = EINVAL;
4658 goto abort3;
4659 }
4660
4661 /*
4662 * Acknowledge that POSIX has a wacky screw case.
4663 *
4664 * XXX Eventually the retain flag needs to be passed on to
4665 * VOP_RENAME.
4666 */
4667 if (fvp == tvp) {
4668 if (retain) {
4669 error = 0;
4670 goto abort3;
4671 } else if (fdvp == tdvp &&
4672 fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen &&
4673 0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr,
4674 fnd.ni_cnd.cn_namelen)) {
4675 error = 0;
4676 goto abort3;
4677 }
4678 }
4679
4680 /*
4681 * Make sure veriexec can screw us up. (But a race can screw
4682 * up veriexec, of course -- remember, fvp and (soon) tvp are
4683 * bogus.)
4684 */
4685 #if NVERIEXEC > 0
4686 {
4687 char *f1, *f2;
4688 size_t f1_len;
4689 size_t f2_len;
4690
4691 f1_len = fnd.ni_cnd.cn_namelen + 1;
4692 f1 = kmem_alloc(f1_len, KM_SLEEP);
4693 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len);
4694
4695 f2_len = tnd.ni_cnd.cn_namelen + 1;
4696 f2 = kmem_alloc(f2_len, KM_SLEEP);
4697 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len);
4698
4699 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2);
4700
4701 kmem_free(f1, f1_len);
4702 kmem_free(f2, f2_len);
4703
4704 if (error)
4705 goto abort3;
4706 }
4707 #endif /* NVERIEXEC > 0 */
4708
4709 /*
4710 * All ready. Incant the rename vop.
4711 */
4712 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4713 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4714 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4715 KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE);
4716 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd);
4717
4718 /*
4719 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks
4720 * tdvp and tvp. But we can't assert any of that.
4721 */
4722 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4723 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4724 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4725 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4726
4727 /*
4728 * So all we have left to do is to drop the rename lock and
4729 * destroy the pathbufs.
4730 */
4731 VFS_RENAMELOCK_EXIT(mp);
4732 fstrans_done(mp);
4733 goto out2;
4734
4735 abort3: if (tvp != NULL && tvp != tdvp)
4736 VOP_UNLOCK(tvp);
4737 abort2: VOP_UNLOCK(tdvp);
4738 VFS_RENAMELOCK_EXIT(mp);
4739 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd);
4740 vrele(tdvp);
4741 if (tvp != NULL)
4742 vrele(tvp);
4743 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd);
4744 vrele(fdvp);
4745 vrele(fvp);
4746 fstrans_done(mp);
4747 out2: pathbuf_destroy(tpb);
4748 out1: pathbuf_destroy(fpb);
4749 out0: return error;
4750 }
4751
4752 /*
4753 * Make a directory file.
4754 */
4755 /* ARGSUSED */
4756 int
4757 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
4758 {
4759 /* {
4760 syscallarg(const char *) path;
4761 syscallarg(int) mode;
4762 } */
4763
4764 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path),
4765 SCARG(uap, mode), UIO_USERSPACE);
4766 }
4767
4768 int
4769 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap,
4770 register_t *retval)
4771 {
4772 /* {
4773 syscallarg(int) fd;
4774 syscallarg(const char *) path;
4775 syscallarg(int) mode;
4776 } */
4777
4778 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path),
4779 SCARG(uap, mode), UIO_USERSPACE);
4780 }
4781
4782 int
4783 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg)
4784 {
4785
4786 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg);
4787 }
4788
4789 static int
4790 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode,
4791 enum uio_seg seg)
4792 {
4793 struct proc *p = curlwp->l_proc;
4794 struct vnode *vp;
4795 struct vattr vattr;
4796 int error;
4797 struct pathbuf *pb;
4798 struct nameidata nd;
4799
4800 KASSERT(l != NULL || fdat == AT_FDCWD);
4801
4802 /* XXX bollocks, should pass in a pathbuf */
4803 error = pathbuf_maybe_copyin(path, seg, &pb);
4804 if (error) {
4805 return error;
4806 }
4807
4808 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb);
4809
4810 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
4811 pathbuf_destroy(pb);
4812 return (error);
4813 }
4814 vp = nd.ni_vp;
4815 if (vp != NULL) {
4816 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
4817 if (nd.ni_dvp == vp)
4818 vrele(nd.ni_dvp);
4819 else
4820 vput(nd.ni_dvp);
4821 vrele(vp);
4822 pathbuf_destroy(pb);
4823 return (EEXIST);
4824 }
4825 vattr_null(&vattr);
4826 vattr.va_type = VDIR;
4827 /* We will read cwdi->cwdi_cmask unlocked. */
4828 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
4829 nd.ni_cnd.cn_flags |= WILLBEDIR;
4830 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
4831 if (!error)
4832 vrele(nd.ni_vp);
4833 vput(nd.ni_dvp);
4834 pathbuf_destroy(pb);
4835 return (error);
4836 }
4837
4838 /*
4839 * Remove a directory file.
4840 */
4841 /* ARGSUSED */
4842 int
4843 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
4844 {
4845 /* {
4846 syscallarg(char *) path;
4847 } */
4848
4849 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR,
4850 UIO_USERSPACE);
4851 }
4852
4853 /*
4854 * Read a block of directory entries in a file system independent format.
4855 */
4856 int
4857 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap,
4858 register_t *retval)
4859 {
4860 /* {
4861 syscallarg(int) fd;
4862 syscallarg(char *) buf;
4863 syscallarg(size_t) count;
4864 } */
4865 file_t *fp;
4866 int error, done;
4867
4868 /* fd_getvnode() will use the descriptor for us */
4869 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4870 return (error);
4871 if ((fp->f_flag & FREAD) == 0) {
4872 error = EBADF;
4873 goto out;
4874 }
4875 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
4876 SCARG(uap, count), &done, l, 0, 0);
4877 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
4878 *retval = done;
4879 out:
4880 fd_putfile(SCARG(uap, fd));
4881 return (error);
4882 }
4883
4884 /*
4885 * Set the mode mask for creation of filesystem nodes.
4886 */
4887 int
4888 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
4889 {
4890 /* {
4891 syscallarg(mode_t) newmask;
4892 } */
4893
4894 /*
4895 * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of
4896 * serialization with those reads is required. It's important to
4897 * return a coherent answer for the caller of umask() though, and
4898 * the atomic operation accomplishes that.
4899 */
4900 *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask,
4901 SCARG(uap, newmask) & ALLPERMS);
4902
4903 return (0);
4904 }
4905
4906 int
4907 dorevoke(struct vnode *vp, kauth_cred_t cred)
4908 {
4909 struct vattr vattr;
4910 int error, fs_decision;
4911
4912 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4913 error = VOP_GETATTR(vp, &vattr, cred);
4914 VOP_UNLOCK(vp);
4915 if (error != 0)
4916 return error;
4917 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM;
4918 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL,
4919 fs_decision);
4920 if (!error)
4921 VOP_REVOKE(vp, REVOKEALL);
4922 return (error);
4923 }
4924
4925 /*
4926 * Void all references to file by ripping underlying filesystem
4927 * away from vnode.
4928 */
4929 /* ARGSUSED */
4930 int
4931 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap,
4932 register_t *retval)
4933 {
4934 /* {
4935 syscallarg(const char *) path;
4936 } */
4937 struct vnode *vp;
4938 int error;
4939
4940 error = namei_simple_user(SCARG(uap, path), NSM_FOLLOW_TRYEMULROOT,
4941 &vp);
4942 if (error != 0)
4943 return (error);
4944 error = dorevoke(vp, l->l_cred);
4945 vrele(vp);
4946 return (error);
4947 }
4948
4949 /*
4950 * Allocate backing store for a file, filling a hole without having to
4951 * explicitly write anything out.
4952 */
4953 /* ARGSUSED */
4954 int
4955 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap,
4956 register_t *retval)
4957 {
4958 /* {
4959 syscallarg(int) fd;
4960 syscallarg(off_t) pos;
4961 syscallarg(off_t) len;
4962 } */
4963 int fd;
4964 off_t pos, len;
4965 struct file *fp;
4966 struct vnode *vp;
4967 int error;
4968
4969 fd = SCARG(uap, fd);
4970 pos = SCARG(uap, pos);
4971 len = SCARG(uap, len);
4972
4973 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
4974 *retval = EINVAL;
4975 return 0;
4976 }
4977
4978 error = fd_getvnode(fd, &fp);
4979 if (error) {
4980 *retval = error;
4981 return 0;
4982 }
4983 if ((fp->f_flag & FWRITE) == 0) {
4984 error = EBADF;
4985 goto fail;
4986 }
4987 vp = fp->f_vnode;
4988
4989 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4990 if (vp->v_type == VDIR) {
4991 error = EISDIR;
4992 } else {
4993 error = VOP_FALLOCATE(vp, pos, len);
4994 }
4995 VOP_UNLOCK(vp);
4996
4997 fail:
4998 fd_putfile(fd);
4999 *retval = error;
5000 return 0;
5001 }
5002
5003 /*
5004 * Deallocate backing store for a file, creating a hole. Also used for
5005 * invoking TRIM on disks.
5006 */
5007 /* ARGSUSED */
5008 int
5009 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap,
5010 register_t *retval)
5011 {
5012 /* {
5013 syscallarg(int) fd;
5014 syscallarg(off_t) pos;
5015 syscallarg(off_t) len;
5016 } */
5017 int fd;
5018 off_t pos, len;
5019 struct file *fp;
5020 struct vnode *vp;
5021 int error;
5022
5023 fd = SCARG(uap, fd);
5024 pos = SCARG(uap, pos);
5025 len = SCARG(uap, len);
5026
5027 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
5028 return EINVAL;
5029 }
5030
5031 error = fd_getvnode(fd, &fp);
5032 if (error) {
5033 return error;
5034 }
5035 if ((fp->f_flag & FWRITE) == 0) {
5036 error = EBADF;
5037 goto fail;
5038 }
5039 vp = fp->f_vnode;
5040
5041 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
5042 if (vp->v_type == VDIR) {
5043 error = EISDIR;
5044 } else {
5045 error = VOP_FDISCARD(vp, pos, len);
5046 }
5047 VOP_UNLOCK(vp);
5048
5049 fail:
5050 fd_putfile(fd);
5051 return error;
5052 }
5053