vfs_syscalls.c revision 1.519 1 /* $NetBSD: vfs_syscalls.c,v 1.519 2019/01/27 02:08:43 pgoyette Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
66 */
67
68 /*
69 * Virtual File System System Calls
70 */
71
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.519 2019/01/27 02:08:43 pgoyette Exp $");
74
75 #ifdef _KERNEL_OPT
76 #include "opt_fileassoc.h"
77 #include "veriexec.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/namei.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/file.h>
86 #include <sys/fcntl.h>
87 #include <sys/stat.h>
88 #include <sys/vnode.h>
89 #include <sys/mount.h>
90 #include <sys/fstrans.h>
91 #include <sys/proc.h>
92 #include <sys/uio.h>
93 #include <sys/kmem.h>
94 #include <sys/dirent.h>
95 #include <sys/sysctl.h>
96 #include <sys/syscallargs.h>
97 #include <sys/vfs_syscalls.h>
98 #include <sys/quota.h>
99 #include <sys/quotactl.h>
100 #include <sys/ktrace.h>
101 #ifdef FILEASSOC
102 #include <sys/fileassoc.h>
103 #endif /* FILEASSOC */
104 #include <sys/extattr.h>
105 #include <sys/verified_exec.h>
106 #include <sys/kauth.h>
107 #include <sys/atomic.h>
108 #include <sys/module.h>
109 #include <sys/buf.h>
110 #include <sys/event.h>
111 #include <sys/compat_stub.h>
112
113 #include <miscfs/genfs/genfs.h>
114 #include <miscfs/specfs/specdev.h>
115
116 #include <nfs/rpcv2.h>
117 #include <nfs/nfsproto.h>
118 #include <nfs/nfs.h>
119 #include <nfs/nfs_var.h>
120
121 /* XXX this shouldn't be here */
122 #ifndef OFF_T_MAX
123 #define OFF_T_MAX __type_max(off_t)
124 #endif
125
126 static int change_flags(struct vnode *, u_long, struct lwp *);
127 static int change_mode(struct vnode *, int, struct lwp *);
128 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
129 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *);
130 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t,
131 enum uio_seg);
132 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t);
133 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *,
134 enum uio_seg);
135 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *,
136 enum uio_seg, int);
137 static int do_sys_readlinkat(struct lwp *, int, const char *, char *,
138 size_t, register_t *);
139 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg);
140
141 static int fd_nameiat(struct lwp *, int, struct nameidata *);
142 static int fd_nameiat_simple_user(struct lwp *, int, const char *,
143 namei_simple_flags_t, struct vnode **);
144
145 /*
146 * This table is used to maintain compatibility with 4.3BSD
147 * and NetBSD 0.9 mount syscalls - and possibly other systems.
148 * Note, the order is important!
149 *
150 * Do not modify this table. It should only contain filesystems
151 * supported by NetBSD 0.9 and 4.3BSD.
152 */
153 const char * const mountcompatnames[] = {
154 NULL, /* 0 = MOUNT_NONE */
155 MOUNT_FFS, /* 1 = MOUNT_UFS */
156 MOUNT_NFS, /* 2 */
157 MOUNT_MFS, /* 3 */
158 MOUNT_MSDOS, /* 4 */
159 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
160 MOUNT_FDESC, /* 6 */
161 MOUNT_KERNFS, /* 7 */
162 NULL, /* 8 = MOUNT_DEVFS */
163 MOUNT_AFS, /* 9 */
164 };
165
166 const int nmountcompatnames = __arraycount(mountcompatnames);
167
168 static int
169 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp)
170 {
171 file_t *dfp;
172 int error;
173
174 if (fdat != AT_FDCWD) {
175 if ((error = fd_getvnode(fdat, &dfp)) != 0)
176 goto out;
177
178 NDAT(ndp, dfp->f_vnode);
179 }
180
181 error = namei(ndp);
182
183 if (fdat != AT_FDCWD)
184 fd_putfile(fdat);
185 out:
186 return error;
187 }
188
189 static int
190 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path,
191 namei_simple_flags_t sflags, struct vnode **vp_ret)
192 {
193 file_t *dfp;
194 struct vnode *dvp;
195 int error;
196
197 if (fdat != AT_FDCWD) {
198 if ((error = fd_getvnode(fdat, &dfp)) != 0)
199 goto out;
200
201 dvp = dfp->f_vnode;
202 } else {
203 dvp = NULL;
204 }
205
206 error = nameiat_simple_user(dvp, path, sflags, vp_ret);
207
208 if (fdat != AT_FDCWD)
209 fd_putfile(fdat);
210 out:
211 return error;
212 }
213
214 static int
215 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags)
216 {
217 int error;
218
219 fp->f_flag = flags & FMASK;
220 fp->f_type = DTYPE_VNODE;
221 fp->f_ops = &vnops;
222 fp->f_vnode = vp;
223
224 if (flags & (O_EXLOCK | O_SHLOCK)) {
225 struct flock lf;
226 int type;
227
228 lf.l_whence = SEEK_SET;
229 lf.l_start = 0;
230 lf.l_len = 0;
231 if (flags & O_EXLOCK)
232 lf.l_type = F_WRLCK;
233 else
234 lf.l_type = F_RDLCK;
235 type = F_FLOCK;
236 if ((flags & FNONBLOCK) == 0)
237 type |= F_WAIT;
238 VOP_UNLOCK(vp);
239 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
240 if (error) {
241 (void) vn_close(vp, fp->f_flag, fp->f_cred);
242 fd_abort(l->l_proc, fp, indx);
243 return error;
244 }
245 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
246 atomic_or_uint(&fp->f_flag, FHASLOCK);
247 }
248 if (flags & O_CLOEXEC)
249 fd_set_exclose(l, indx, true);
250 return 0;
251 }
252
253 static int
254 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
255 void *data, size_t *data_len)
256 {
257 struct mount *mp;
258 int error = 0, saved_flags;
259
260 mp = vp->v_mount;
261 saved_flags = mp->mnt_flag;
262
263 /* We can operate only on VV_ROOT nodes. */
264 if ((vp->v_vflag & VV_ROOT) == 0) {
265 error = EINVAL;
266 goto out;
267 }
268
269 /*
270 * We only allow the filesystem to be reloaded if it
271 * is currently mounted read-only. Additionally, we
272 * prevent read-write to read-only downgrades.
273 */
274 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
275 (mp->mnt_flag & MNT_RDONLY) == 0 &&
276 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) {
277 error = EOPNOTSUPP; /* Needs translation */
278 goto out;
279 }
280
281 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
282 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
283 if (error)
284 goto out;
285
286 error = vfs_suspend(mp, 0);
287 if (error)
288 goto out;
289
290 mutex_enter(&mp->mnt_updating);
291
292 mp->mnt_flag &= ~MNT_OP_FLAGS;
293 mp->mnt_flag |= flags & MNT_OP_FLAGS;
294
295 /*
296 * Set the mount level flags.
297 */
298 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) {
299 if ((flags & MNT_RDONLY))
300 mp->mnt_iflag |= IMNT_WANTRDONLY;
301 else
302 mp->mnt_iflag |= IMNT_WANTRDWR;
303 }
304 mp->mnt_flag &= ~MNT_BASIC_FLAGS;
305 mp->mnt_flag |= flags & MNT_BASIC_FLAGS;
306 if ((mp->mnt_iflag & IMNT_WANTRDONLY))
307 mp->mnt_flag &= ~MNT_RDONLY;
308
309 error = VFS_MOUNT(mp, path, data, data_len);
310
311 if (error && data != NULL) {
312 int error2;
313
314 /*
315 * Update failed; let's try and see if it was an
316 * export request. For compat with 3.0 and earlier.
317 */
318 error2 = vfs_hooks_reexport(mp, path, data);
319
320 /*
321 * Only update error code if the export request was
322 * understood but some problem occurred while
323 * processing it.
324 */
325 if (error2 != EJUSTRETURN)
326 error = error2;
327 }
328
329 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY))
330 mp->mnt_flag |= MNT_RDONLY;
331 if (error)
332 mp->mnt_flag = saved_flags;
333 mp->mnt_flag &= ~MNT_OP_FLAGS;
334 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR);
335 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
336 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0)
337 vfs_syncer_add_to_worklist(mp);
338 } else {
339 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0)
340 vfs_syncer_remove_from_worklist(mp);
341 }
342 mutex_exit(&mp->mnt_updating);
343 vfs_resume(mp);
344
345 if ((error == 0) && !(saved_flags & MNT_EXTATTR) &&
346 (flags & MNT_EXTATTR)) {
347 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START,
348 NULL, 0, NULL) != 0) {
349 printf("%s: failed to start extattr, error = %d",
350 mp->mnt_stat.f_mntonname, error);
351 mp->mnt_flag &= ~MNT_EXTATTR;
352 }
353 }
354
355 if ((error == 0) && (saved_flags & MNT_EXTATTR) &&
356 !(flags & MNT_EXTATTR)) {
357 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP,
358 NULL, 0, NULL) != 0) {
359 printf("%s: failed to stop extattr, error = %d",
360 mp->mnt_stat.f_mntonname, error);
361 mp->mnt_flag |= MNT_RDONLY;
362 }
363 }
364 out:
365 return (error);
366 }
367
368 static int
369 mount_get_vfsops(const char *fstype, enum uio_seg type_seg,
370 struct vfsops **vfsops)
371 {
372 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
373 int error;
374
375 if (type_seg == UIO_USERSPACE) {
376 /* Copy file-system type from userspace. */
377 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
378 } else {
379 error = copystr(fstype, fstypename, sizeof(fstypename), NULL);
380 KASSERT(error == 0);
381 }
382
383 if (error) {
384 /*
385 * Historically, filesystem types were identified by numbers.
386 * If we get an integer for the filesystem type instead of a
387 * string, we check to see if it matches one of the historic
388 * filesystem types.
389 */
390 u_long fsindex = (u_long)fstype;
391 if (fsindex >= nmountcompatnames ||
392 mountcompatnames[fsindex] == NULL)
393 return ENODEV;
394 strlcpy(fstypename, mountcompatnames[fsindex],
395 sizeof(fstypename));
396 }
397
398 /* Accept `ufs' as an alias for `ffs', for compatibility. */
399 if (strcmp(fstypename, "ufs") == 0)
400 fstypename[0] = 'f';
401
402 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
403 return 0;
404
405 /* If we can autoload a vfs module, try again */
406 (void)module_autoload(fstypename, MODULE_CLASS_VFS);
407
408 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
409 return 0;
410
411 return ENODEV;
412 }
413
414 static int
415 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
416 void *data, size_t *data_len)
417 {
418 struct mount *mp;
419 int error;
420
421 /* If MNT_GETARGS is specified, it should be the only flag. */
422 if (flags & ~MNT_GETARGS)
423 return EINVAL;
424
425 mp = vp->v_mount;
426
427 /* XXX: probably some notion of "can see" here if we want isolation. */
428 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
429 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
430 if (error)
431 return error;
432
433 if ((vp->v_vflag & VV_ROOT) == 0)
434 return EINVAL;
435
436 if (vfs_busy(mp))
437 return EPERM;
438
439 mutex_enter(&mp->mnt_updating);
440 mp->mnt_flag &= ~MNT_OP_FLAGS;
441 mp->mnt_flag |= MNT_GETARGS;
442 error = VFS_MOUNT(mp, path, data, data_len);
443 mp->mnt_flag &= ~MNT_OP_FLAGS;
444 mutex_exit(&mp->mnt_updating);
445
446 vfs_unbusy(mp);
447 return (error);
448 }
449
450 int
451 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
452 {
453 /* {
454 syscallarg(const char *) type;
455 syscallarg(const char *) path;
456 syscallarg(int) flags;
457 syscallarg(void *) data;
458 syscallarg(size_t) data_len;
459 } */
460
461 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path),
462 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
463 SCARG(uap, data_len), retval);
464 }
465
466 int
467 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg,
468 const char *path, int flags, void *data, enum uio_seg data_seg,
469 size_t data_len, register_t *retval)
470 {
471 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */
472 struct vnode *vp;
473 void *data_buf = data;
474 bool vfsopsrele = false;
475 size_t alloc_sz = 0;
476 int error;
477
478 /*
479 * Get vnode to be covered
480 */
481 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
482 if (error != 0) {
483 vp = NULL;
484 goto done;
485 }
486
487 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
488 vfsops = vp->v_mount->mnt_op;
489 } else {
490 /* 'type' is userspace */
491 error = mount_get_vfsops(type, type_seg, &vfsops);
492 if (error != 0)
493 goto done;
494 vfsopsrele = true;
495 }
496
497 /*
498 * We allow data to be NULL, even for userspace. Some fs's don't need
499 * it. The others will handle NULL.
500 */
501 if (data != NULL && data_seg == UIO_USERSPACE) {
502 if (data_len == 0) {
503 /* No length supplied, use default for filesystem */
504 data_len = vfsops->vfs_min_mount_data;
505
506 /*
507 * Hopefully a longer buffer won't make copyin() fail.
508 * For compatibility with 3.0 and earlier.
509 */
510 if (flags & MNT_UPDATE
511 && data_len < sizeof (struct mnt_export_args30))
512 data_len = sizeof (struct mnt_export_args30);
513 }
514 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) {
515 error = EINVAL;
516 goto done;
517 }
518 alloc_sz = data_len;
519 data_buf = kmem_alloc(alloc_sz, KM_SLEEP);
520
521 /* NFS needs the buffer even for mnt_getargs .... */
522 error = copyin(data, data_buf, data_len);
523 if (error != 0)
524 goto done;
525 }
526
527 if (flags & MNT_GETARGS) {
528 if (data_len == 0) {
529 error = EINVAL;
530 goto done;
531 }
532 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
533 if (error != 0)
534 goto done;
535 if (data_seg == UIO_USERSPACE)
536 error = copyout(data_buf, data, data_len);
537 *retval = data_len;
538 } else if (flags & MNT_UPDATE) {
539 error = mount_update(l, vp, path, flags, data_buf, &data_len);
540 } else {
541 /* Locking is handled internally in mount_domount(). */
542 KASSERT(vfsopsrele == true);
543 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
544 &data_len);
545 vfsopsrele = false;
546 }
547 if (!error)
548 KNOTE(&fs_klist, VQ_MOUNT);
549
550 done:
551 if (vfsopsrele)
552 vfs_delref(vfsops);
553 if (vp != NULL) {
554 vrele(vp);
555 }
556 if (data_buf != data)
557 kmem_free(data_buf, alloc_sz);
558 return (error);
559 }
560
561 /*
562 * Unmount a file system.
563 *
564 * Note: unmount takes a path to the vnode mounted on as argument,
565 * not special file (as before).
566 */
567 /* ARGSUSED */
568 int
569 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
570 {
571 /* {
572 syscallarg(const char *) path;
573 syscallarg(int) flags;
574 } */
575 struct vnode *vp;
576 struct mount *mp;
577 int error;
578 struct pathbuf *pb;
579 struct nameidata nd;
580
581 error = pathbuf_copyin(SCARG(uap, path), &pb);
582 if (error) {
583 return error;
584 }
585
586 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
587 if ((error = namei(&nd)) != 0) {
588 pathbuf_destroy(pb);
589 return error;
590 }
591 vp = nd.ni_vp;
592 pathbuf_destroy(pb);
593
594 mp = vp->v_mount;
595 vfs_ref(mp);
596 VOP_UNLOCK(vp);
597
598 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
599 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
600 if (error) {
601 vrele(vp);
602 vfs_rele(mp);
603 return (error);
604 }
605
606 /*
607 * Don't allow unmounting the root file system.
608 */
609 if (mp->mnt_flag & MNT_ROOTFS) {
610 vrele(vp);
611 vfs_rele(mp);
612 return (EINVAL);
613 }
614
615 /*
616 * Must be the root of the filesystem
617 */
618 if ((vp->v_vflag & VV_ROOT) == 0) {
619 vrele(vp);
620 vfs_rele(mp);
621 return (EINVAL);
622 }
623
624 vrele(vp);
625 error = dounmount(mp, SCARG(uap, flags), l);
626 vfs_rele(mp);
627 if (!error)
628 KNOTE(&fs_klist, VQ_UNMOUNT);
629 return error;
630 }
631
632 /*
633 * Sync each mounted filesystem.
634 */
635 #ifdef DEBUG
636 int syncprt = 0;
637 struct ctldebug debug0 = { "syncprt", &syncprt };
638 #endif
639
640 void
641 do_sys_sync(struct lwp *l)
642 {
643 mount_iterator_t *iter;
644 struct mount *mp;
645 int asyncflag;
646
647 mountlist_iterator_init(&iter);
648 while ((mp = mountlist_iterator_next(iter)) != NULL) {
649 mutex_enter(&mp->mnt_updating);
650 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
651 asyncflag = mp->mnt_flag & MNT_ASYNC;
652 mp->mnt_flag &= ~MNT_ASYNC;
653 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
654 if (asyncflag)
655 mp->mnt_flag |= MNT_ASYNC;
656 }
657 mutex_exit(&mp->mnt_updating);
658 }
659 mountlist_iterator_destroy(iter);
660 #ifdef DEBUG
661 if (syncprt)
662 vfs_bufstats();
663 #endif /* DEBUG */
664 }
665
666 /* ARGSUSED */
667 int
668 sys_sync(struct lwp *l, const void *v, register_t *retval)
669 {
670 do_sys_sync(l);
671 return (0);
672 }
673
674
675 /*
676 * Access or change filesystem quotas.
677 *
678 * (this is really 14 different calls bundled into one)
679 */
680
681 static int
682 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u)
683 {
684 struct quotastat info_k;
685 int error;
686
687 /* ensure any padding bytes are cleared */
688 memset(&info_k, 0, sizeof(info_k));
689
690 error = vfs_quotactl_stat(mp, &info_k);
691 if (error) {
692 return error;
693 }
694
695 return copyout(&info_k, info_u, sizeof(info_k));
696 }
697
698 static int
699 do_sys_quotactl_idtypestat(struct mount *mp, int idtype,
700 struct quotaidtypestat *info_u)
701 {
702 struct quotaidtypestat info_k;
703 int error;
704
705 /* ensure any padding bytes are cleared */
706 memset(&info_k, 0, sizeof(info_k));
707
708 error = vfs_quotactl_idtypestat(mp, idtype, &info_k);
709 if (error) {
710 return error;
711 }
712
713 return copyout(&info_k, info_u, sizeof(info_k));
714 }
715
716 static int
717 do_sys_quotactl_objtypestat(struct mount *mp, int objtype,
718 struct quotaobjtypestat *info_u)
719 {
720 struct quotaobjtypestat info_k;
721 int error;
722
723 /* ensure any padding bytes are cleared */
724 memset(&info_k, 0, sizeof(info_k));
725
726 error = vfs_quotactl_objtypestat(mp, objtype, &info_k);
727 if (error) {
728 return error;
729 }
730
731 return copyout(&info_k, info_u, sizeof(info_k));
732 }
733
734 static int
735 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u,
736 struct quotaval *val_u)
737 {
738 struct quotakey key_k;
739 struct quotaval val_k;
740 int error;
741
742 /* ensure any padding bytes are cleared */
743 memset(&val_k, 0, sizeof(val_k));
744
745 error = copyin(key_u, &key_k, sizeof(key_k));
746 if (error) {
747 return error;
748 }
749
750 error = vfs_quotactl_get(mp, &key_k, &val_k);
751 if (error) {
752 return error;
753 }
754
755 return copyout(&val_k, val_u, sizeof(val_k));
756 }
757
758 static int
759 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u,
760 const struct quotaval *val_u)
761 {
762 struct quotakey key_k;
763 struct quotaval val_k;
764 int error;
765
766 error = copyin(key_u, &key_k, sizeof(key_k));
767 if (error) {
768 return error;
769 }
770
771 error = copyin(val_u, &val_k, sizeof(val_k));
772 if (error) {
773 return error;
774 }
775
776 return vfs_quotactl_put(mp, &key_k, &val_k);
777 }
778
779 static int
780 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u)
781 {
782 struct quotakey key_k;
783 int error;
784
785 error = copyin(key_u, &key_k, sizeof(key_k));
786 if (error) {
787 return error;
788 }
789
790 return vfs_quotactl_del(mp, &key_k);
791 }
792
793 static int
794 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u)
795 {
796 struct quotakcursor cursor_k;
797 int error;
798
799 /* ensure any padding bytes are cleared */
800 memset(&cursor_k, 0, sizeof(cursor_k));
801
802 error = vfs_quotactl_cursoropen(mp, &cursor_k);
803 if (error) {
804 return error;
805 }
806
807 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
808 }
809
810 static int
811 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u)
812 {
813 struct quotakcursor cursor_k;
814 int error;
815
816 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
817 if (error) {
818 return error;
819 }
820
821 return vfs_quotactl_cursorclose(mp, &cursor_k);
822 }
823
824 static int
825 do_sys_quotactl_cursorskipidtype(struct mount *mp,
826 struct quotakcursor *cursor_u, int idtype)
827 {
828 struct quotakcursor cursor_k;
829 int error;
830
831 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
832 if (error) {
833 return error;
834 }
835
836 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype);
837 if (error) {
838 return error;
839 }
840
841 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
842 }
843
844 static int
845 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u,
846 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum,
847 unsigned *ret_u)
848 {
849 #define CGET_STACK_MAX 8
850 struct quotakcursor cursor_k;
851 struct quotakey stackkeys[CGET_STACK_MAX];
852 struct quotaval stackvals[CGET_STACK_MAX];
853 struct quotakey *keys_k;
854 struct quotaval *vals_k;
855 unsigned ret_k;
856 int error;
857
858 if (maxnum > 128) {
859 maxnum = 128;
860 }
861
862 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
863 if (error) {
864 return error;
865 }
866
867 if (maxnum <= CGET_STACK_MAX) {
868 keys_k = stackkeys;
869 vals_k = stackvals;
870 /* ensure any padding bytes are cleared */
871 memset(keys_k, 0, maxnum * sizeof(keys_k[0]));
872 memset(vals_k, 0, maxnum * sizeof(vals_k[0]));
873 } else {
874 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP);
875 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP);
876 }
877
878 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum,
879 &ret_k);
880 if (error) {
881 goto fail;
882 }
883
884 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0]));
885 if (error) {
886 goto fail;
887 }
888
889 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0]));
890 if (error) {
891 goto fail;
892 }
893
894 error = copyout(&ret_k, ret_u, sizeof(ret_k));
895 if (error) {
896 goto fail;
897 }
898
899 /* do last to maximize the chance of being able to recover a failure */
900 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k));
901
902 fail:
903 if (keys_k != stackkeys) {
904 kmem_free(keys_k, maxnum * sizeof(keys_k[0]));
905 }
906 if (vals_k != stackvals) {
907 kmem_free(vals_k, maxnum * sizeof(vals_k[0]));
908 }
909 return error;
910 }
911
912 static int
913 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u,
914 int *ret_u)
915 {
916 struct quotakcursor cursor_k;
917 int ret_k;
918 int error;
919
920 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
921 if (error) {
922 return error;
923 }
924
925 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k);
926 if (error) {
927 return error;
928 }
929
930 error = copyout(&ret_k, ret_u, sizeof(ret_k));
931 if (error) {
932 return error;
933 }
934
935 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
936 }
937
938 static int
939 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u)
940 {
941 struct quotakcursor cursor_k;
942 int error;
943
944 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
945 if (error) {
946 return error;
947 }
948
949 error = vfs_quotactl_cursorrewind(mp, &cursor_k);
950 if (error) {
951 return error;
952 }
953
954 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
955 }
956
957 static int
958 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u)
959 {
960 char *path_k;
961 int error;
962
963 /* XXX this should probably be a struct pathbuf */
964 path_k = PNBUF_GET();
965 error = copyin(path_u, path_k, PATH_MAX);
966 if (error) {
967 PNBUF_PUT(path_k);
968 return error;
969 }
970
971 error = vfs_quotactl_quotaon(mp, idtype, path_k);
972
973 PNBUF_PUT(path_k);
974 return error;
975 }
976
977 static int
978 do_sys_quotactl_quotaoff(struct mount *mp, int idtype)
979 {
980 return vfs_quotactl_quotaoff(mp, idtype);
981 }
982
983 int
984 do_sys_quotactl(const char *path_u, const struct quotactl_args *args)
985 {
986 struct mount *mp;
987 struct vnode *vp;
988 int error;
989
990 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp);
991 if (error != 0)
992 return (error);
993 mp = vp->v_mount;
994
995 switch (args->qc_op) {
996 case QUOTACTL_STAT:
997 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info);
998 break;
999 case QUOTACTL_IDTYPESTAT:
1000 error = do_sys_quotactl_idtypestat(mp,
1001 args->u.idtypestat.qc_idtype,
1002 args->u.idtypestat.qc_info);
1003 break;
1004 case QUOTACTL_OBJTYPESTAT:
1005 error = do_sys_quotactl_objtypestat(mp,
1006 args->u.objtypestat.qc_objtype,
1007 args->u.objtypestat.qc_info);
1008 break;
1009 case QUOTACTL_GET:
1010 error = do_sys_quotactl_get(mp,
1011 args->u.get.qc_key,
1012 args->u.get.qc_val);
1013 break;
1014 case QUOTACTL_PUT:
1015 error = do_sys_quotactl_put(mp,
1016 args->u.put.qc_key,
1017 args->u.put.qc_val);
1018 break;
1019 case QUOTACTL_DEL:
1020 error = do_sys_quotactl_del(mp, args->u.del.qc_key);
1021 break;
1022 case QUOTACTL_CURSOROPEN:
1023 error = do_sys_quotactl_cursoropen(mp,
1024 args->u.cursoropen.qc_cursor);
1025 break;
1026 case QUOTACTL_CURSORCLOSE:
1027 error = do_sys_quotactl_cursorclose(mp,
1028 args->u.cursorclose.qc_cursor);
1029 break;
1030 case QUOTACTL_CURSORSKIPIDTYPE:
1031 error = do_sys_quotactl_cursorskipidtype(mp,
1032 args->u.cursorskipidtype.qc_cursor,
1033 args->u.cursorskipidtype.qc_idtype);
1034 break;
1035 case QUOTACTL_CURSORGET:
1036 error = do_sys_quotactl_cursorget(mp,
1037 args->u.cursorget.qc_cursor,
1038 args->u.cursorget.qc_keys,
1039 args->u.cursorget.qc_vals,
1040 args->u.cursorget.qc_maxnum,
1041 args->u.cursorget.qc_ret);
1042 break;
1043 case QUOTACTL_CURSORATEND:
1044 error = do_sys_quotactl_cursoratend(mp,
1045 args->u.cursoratend.qc_cursor,
1046 args->u.cursoratend.qc_ret);
1047 break;
1048 case QUOTACTL_CURSORREWIND:
1049 error = do_sys_quotactl_cursorrewind(mp,
1050 args->u.cursorrewind.qc_cursor);
1051 break;
1052 case QUOTACTL_QUOTAON:
1053 error = do_sys_quotactl_quotaon(mp,
1054 args->u.quotaon.qc_idtype,
1055 args->u.quotaon.qc_quotafile);
1056 break;
1057 case QUOTACTL_QUOTAOFF:
1058 error = do_sys_quotactl_quotaoff(mp,
1059 args->u.quotaoff.qc_idtype);
1060 break;
1061 default:
1062 error = EINVAL;
1063 break;
1064 }
1065
1066 vrele(vp);
1067 return error;
1068 }
1069
1070 /* ARGSUSED */
1071 int
1072 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap,
1073 register_t *retval)
1074 {
1075 /* {
1076 syscallarg(const char *) path;
1077 syscallarg(struct quotactl_args *) args;
1078 } */
1079 struct quotactl_args args;
1080 int error;
1081
1082 error = copyin(SCARG(uap, args), &args, sizeof(args));
1083 if (error) {
1084 return error;
1085 }
1086
1087 return do_sys_quotactl(SCARG(uap, path), &args);
1088 }
1089
1090 int
1091 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
1092 int root)
1093 {
1094 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1095 int error = 0;
1096
1097 /*
1098 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1099 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
1100 * overrides MNT_NOWAIT.
1101 */
1102 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
1103 (flags != MNT_WAIT && flags != 0)) {
1104 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
1105 goto done;
1106 }
1107
1108 /* Get the filesystem stats now */
1109 memset(sp, 0, sizeof(*sp));
1110 if ((error = VFS_STATVFS(mp, sp)) != 0) {
1111 return error;
1112 }
1113
1114 if (cwdi->cwdi_rdir == NULL)
1115 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
1116 done:
1117 if (cwdi->cwdi_rdir != NULL) {
1118 size_t len;
1119 char *bp;
1120 char c;
1121 char *path = PNBUF_GET();
1122
1123 bp = path + MAXPATHLEN;
1124 *--bp = '\0';
1125 rw_enter(&cwdi->cwdi_lock, RW_READER);
1126 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
1127 MAXPATHLEN / 2, 0, l);
1128 rw_exit(&cwdi->cwdi_lock);
1129 if (error) {
1130 PNBUF_PUT(path);
1131 return error;
1132 }
1133 len = strlen(bp);
1134 if (len != 1) {
1135 /*
1136 * for mount points that are below our root, we can see
1137 * them, so we fix up the pathname and return them. The
1138 * rest we cannot see, so we don't allow viewing the
1139 * data.
1140 */
1141 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
1142 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
1143 (void)strlcpy(sp->f_mntonname,
1144 c == '\0' ? "/" : &sp->f_mntonname[len],
1145 sizeof(sp->f_mntonname));
1146 } else {
1147 if (root)
1148 (void)strlcpy(sp->f_mntonname, "/",
1149 sizeof(sp->f_mntonname));
1150 else
1151 error = EPERM;
1152 }
1153 }
1154 PNBUF_PUT(path);
1155 }
1156 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
1157 return error;
1158 }
1159
1160 /*
1161 * Get filesystem statistics by path.
1162 */
1163 int
1164 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
1165 {
1166 struct mount *mp;
1167 int error;
1168 struct vnode *vp;
1169
1170 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
1171 if (error != 0)
1172 return error;
1173 mp = vp->v_mount;
1174 error = dostatvfs(mp, sb, l, flags, 1);
1175 vrele(vp);
1176 return error;
1177 }
1178
1179 /* ARGSUSED */
1180 int
1181 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
1182 {
1183 /* {
1184 syscallarg(const char *) path;
1185 syscallarg(struct statvfs *) buf;
1186 syscallarg(int) flags;
1187 } */
1188 struct statvfs *sb;
1189 int error;
1190
1191 sb = STATVFSBUF_GET();
1192 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
1193 if (error == 0)
1194 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1195 STATVFSBUF_PUT(sb);
1196 return error;
1197 }
1198
1199 /*
1200 * Get filesystem statistics by fd.
1201 */
1202 int
1203 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
1204 {
1205 file_t *fp;
1206 struct mount *mp;
1207 int error;
1208
1209 /* fd_getvnode() will use the descriptor for us */
1210 if ((error = fd_getvnode(fd, &fp)) != 0)
1211 return (error);
1212 mp = fp->f_vnode->v_mount;
1213 error = dostatvfs(mp, sb, curlwp, flags, 1);
1214 fd_putfile(fd);
1215 return error;
1216 }
1217
1218 /* ARGSUSED */
1219 int
1220 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
1221 {
1222 /* {
1223 syscallarg(int) fd;
1224 syscallarg(struct statvfs *) buf;
1225 syscallarg(int) flags;
1226 } */
1227 struct statvfs *sb;
1228 int error;
1229
1230 sb = STATVFSBUF_GET();
1231 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1232 if (error == 0)
1233 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1234 STATVFSBUF_PUT(sb);
1235 return error;
1236 }
1237
1238
1239 /*
1240 * Get statistics on all filesystems.
1241 */
1242 int
1243 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1244 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1245 register_t *retval)
1246 {
1247 int root = 0;
1248 mount_iterator_t *iter;
1249 struct proc *p = l->l_proc;
1250 struct mount *mp;
1251 struct statvfs *sb;
1252 size_t count, maxcount;
1253 int error = 0;
1254
1255 sb = STATVFSBUF_GET();
1256 maxcount = bufsize / entry_sz;
1257 count = 0;
1258 mountlist_iterator_init(&iter);
1259 while ((mp = mountlist_iterator_next(iter)) != NULL) {
1260 if (sfsp && count < maxcount) {
1261 error = dostatvfs(mp, sb, l, flags, 0);
1262 if (error) {
1263 error = 0;
1264 continue;
1265 }
1266 error = copyfn(sb, sfsp, entry_sz);
1267 if (error)
1268 goto out;
1269 sfsp = (char *)sfsp + entry_sz;
1270 root |= strcmp(sb->f_mntonname, "/") == 0;
1271 }
1272 count++;
1273 }
1274
1275 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1276 /*
1277 * fake a root entry
1278 */
1279 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1280 sb, l, flags, 1);
1281 if (error != 0)
1282 goto out;
1283 if (sfsp) {
1284 error = copyfn(sb, sfsp, entry_sz);
1285 if (error != 0)
1286 goto out;
1287 }
1288 count++;
1289 }
1290 if (sfsp && count > maxcount)
1291 *retval = maxcount;
1292 else
1293 *retval = count;
1294 out:
1295 mountlist_iterator_destroy(iter);
1296 STATVFSBUF_PUT(sb);
1297 return error;
1298 }
1299
1300 int
1301 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1302 {
1303 /* {
1304 syscallarg(struct statvfs *) buf;
1305 syscallarg(size_t) bufsize;
1306 syscallarg(int) flags;
1307 } */
1308
1309 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1310 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1311 }
1312
1313 /*
1314 * Change current working directory to a given file descriptor.
1315 */
1316 /* ARGSUSED */
1317 int
1318 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1319 {
1320 /* {
1321 syscallarg(int) fd;
1322 } */
1323 struct proc *p = l->l_proc;
1324 struct cwdinfo *cwdi;
1325 struct vnode *vp, *tdp;
1326 struct mount *mp;
1327 file_t *fp;
1328 int error, fd;
1329
1330 /* fd_getvnode() will use the descriptor for us */
1331 fd = SCARG(uap, fd);
1332 if ((error = fd_getvnode(fd, &fp)) != 0)
1333 return (error);
1334 vp = fp->f_vnode;
1335
1336 vref(vp);
1337 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1338 if (vp->v_type != VDIR)
1339 error = ENOTDIR;
1340 else
1341 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1342 if (error) {
1343 vput(vp);
1344 goto out;
1345 }
1346 while ((mp = vp->v_mountedhere) != NULL) {
1347 error = vfs_busy(mp);
1348 vput(vp);
1349 if (error != 0)
1350 goto out;
1351 error = VFS_ROOT(mp, &tdp);
1352 vfs_unbusy(mp);
1353 if (error)
1354 goto out;
1355 vp = tdp;
1356 }
1357 VOP_UNLOCK(vp);
1358
1359 /*
1360 * Disallow changing to a directory not under the process's
1361 * current root directory (if there is one).
1362 */
1363 cwdi = p->p_cwdi;
1364 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1365 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1366 vrele(vp);
1367 error = EPERM; /* operation not permitted */
1368 } else {
1369 vrele(cwdi->cwdi_cdir);
1370 cwdi->cwdi_cdir = vp;
1371 }
1372 rw_exit(&cwdi->cwdi_lock);
1373
1374 out:
1375 fd_putfile(fd);
1376 return (error);
1377 }
1378
1379 /*
1380 * Change this process's notion of the root directory to a given file
1381 * descriptor.
1382 */
1383 int
1384 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1385 {
1386 struct proc *p = l->l_proc;
1387 struct vnode *vp;
1388 file_t *fp;
1389 int error, fd = SCARG(uap, fd);
1390
1391 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1392 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1393 return error;
1394 /* fd_getvnode() will use the descriptor for us */
1395 if ((error = fd_getvnode(fd, &fp)) != 0)
1396 return error;
1397 vp = fp->f_vnode;
1398 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1399 if (vp->v_type != VDIR)
1400 error = ENOTDIR;
1401 else
1402 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1403 VOP_UNLOCK(vp);
1404 if (error)
1405 goto out;
1406 vref(vp);
1407
1408 change_root(p->p_cwdi, vp, l);
1409
1410 out:
1411 fd_putfile(fd);
1412 return (error);
1413 }
1414
1415 /*
1416 * Change current working directory (``.'').
1417 */
1418 /* ARGSUSED */
1419 int
1420 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1421 {
1422 /* {
1423 syscallarg(const char *) path;
1424 } */
1425 struct proc *p = l->l_proc;
1426 struct cwdinfo *cwdi;
1427 int error;
1428 struct vnode *vp;
1429
1430 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1431 &vp, l)) != 0)
1432 return (error);
1433 cwdi = p->p_cwdi;
1434 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1435 vrele(cwdi->cwdi_cdir);
1436 cwdi->cwdi_cdir = vp;
1437 rw_exit(&cwdi->cwdi_lock);
1438 return (0);
1439 }
1440
1441 /*
1442 * Change notion of root (``/'') directory.
1443 */
1444 /* ARGSUSED */
1445 int
1446 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1447 {
1448 /* {
1449 syscallarg(const char *) path;
1450 } */
1451 struct proc *p = l->l_proc;
1452 int error;
1453 struct vnode *vp;
1454
1455 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1456 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1457 return (error);
1458 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1459 &vp, l)) != 0)
1460 return (error);
1461
1462 change_root(p->p_cwdi, vp, l);
1463
1464 return (0);
1465 }
1466
1467 /*
1468 * Common routine for chroot and fchroot.
1469 * NB: callers need to properly authorize the change root operation.
1470 */
1471 void
1472 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l)
1473 {
1474 struct proc *p = l->l_proc;
1475 kauth_cred_t ncred;
1476
1477 ncred = kauth_cred_alloc();
1478
1479 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1480 if (cwdi->cwdi_rdir != NULL)
1481 vrele(cwdi->cwdi_rdir);
1482 cwdi->cwdi_rdir = vp;
1483
1484 /*
1485 * Prevent escaping from chroot by putting the root under
1486 * the working directory. Silently chdir to / if we aren't
1487 * already there.
1488 */
1489 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1490 /*
1491 * XXX would be more failsafe to change directory to a
1492 * deadfs node here instead
1493 */
1494 vrele(cwdi->cwdi_cdir);
1495 vref(vp);
1496 cwdi->cwdi_cdir = vp;
1497 }
1498 rw_exit(&cwdi->cwdi_lock);
1499
1500 /* Get a write lock on the process credential. */
1501 proc_crmod_enter();
1502
1503 kauth_cred_clone(p->p_cred, ncred);
1504 kauth_proc_chroot(ncred, p->p_cwdi);
1505
1506 /* Broadcast our credentials to the process and other LWPs. */
1507 proc_crmod_leave(ncred, p->p_cred, true);
1508 }
1509
1510 /*
1511 * Common routine for chroot and chdir.
1512 * XXX "where" should be enum uio_seg
1513 */
1514 int
1515 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l)
1516 {
1517 struct pathbuf *pb;
1518 struct nameidata nd;
1519 int error;
1520
1521 error = pathbuf_maybe_copyin(path, where, &pb);
1522 if (error) {
1523 return error;
1524 }
1525 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1526 if ((error = namei(&nd)) != 0) {
1527 pathbuf_destroy(pb);
1528 return error;
1529 }
1530 *vpp = nd.ni_vp;
1531 pathbuf_destroy(pb);
1532
1533 if ((*vpp)->v_type != VDIR)
1534 error = ENOTDIR;
1535 else
1536 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred);
1537
1538 if (error)
1539 vput(*vpp);
1540 else
1541 VOP_UNLOCK(*vpp);
1542 return (error);
1543 }
1544
1545 /*
1546 * Internals of sys_open - path has already been converted into a pathbuf
1547 * (so we can easily reuse this function from other parts of the kernel,
1548 * like posix_spawn post-processing).
1549 */
1550 int
1551 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags,
1552 int open_mode, int *fd)
1553 {
1554 struct proc *p = l->l_proc;
1555 struct cwdinfo *cwdi = p->p_cwdi;
1556 file_t *fp;
1557 struct vnode *vp;
1558 int flags, cmode;
1559 int indx, error;
1560 struct nameidata nd;
1561
1562 if (open_flags & O_SEARCH) {
1563 open_flags &= ~(int)O_SEARCH;
1564 }
1565
1566 flags = FFLAGS(open_flags);
1567 if ((flags & (FREAD | FWRITE)) == 0)
1568 return EINVAL;
1569
1570 if ((error = fd_allocfile(&fp, &indx)) != 0) {
1571 return error;
1572 }
1573
1574 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1575 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1576 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb);
1577 if (dvp != NULL)
1578 NDAT(&nd, dvp);
1579
1580 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1581 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1582 fd_abort(p, fp, indx);
1583 if ((error == EDUPFD || error == EMOVEFD) &&
1584 l->l_dupfd >= 0 && /* XXX from fdopen */
1585 (error =
1586 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1587 *fd = indx;
1588 return 0;
1589 }
1590 if (error == ERESTART)
1591 error = EINTR;
1592 return error;
1593 }
1594
1595 l->l_dupfd = 0;
1596 vp = nd.ni_vp;
1597
1598 if ((error = open_setfp(l, fp, vp, indx, flags)))
1599 return error;
1600
1601 VOP_UNLOCK(vp);
1602 *fd = indx;
1603 fd_affix(p, fp, indx);
1604 return 0;
1605 }
1606
1607 int
1608 fd_open(const char *path, int open_flags, int open_mode, int *fd)
1609 {
1610 struct pathbuf *pb;
1611 int error, oflags;
1612
1613 oflags = FFLAGS(open_flags);
1614 if ((oflags & (FREAD | FWRITE)) == 0)
1615 return EINVAL;
1616
1617 pb = pathbuf_create(path);
1618 if (pb == NULL)
1619 return ENOMEM;
1620
1621 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd);
1622 pathbuf_destroy(pb);
1623
1624 return error;
1625 }
1626
1627 static int
1628 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags,
1629 int mode, int *fd)
1630 {
1631 file_t *dfp = NULL;
1632 struct vnode *dvp = NULL;
1633 struct pathbuf *pb;
1634 int error;
1635
1636 if (path == NULL) {
1637 MODULE_CALL_HOOK(compat_10_openat_hook, (&pb),
1638 0, error);
1639 if (error)
1640 return error;
1641 } else {
1642 error = pathbuf_copyin(path, &pb);
1643 if (error)
1644 return error;
1645 }
1646
1647 if (fdat != AT_FDCWD) {
1648 /* fd_getvnode() will use the descriptor for us */
1649 if ((error = fd_getvnode(fdat, &dfp)) != 0)
1650 goto out;
1651
1652 dvp = dfp->f_vnode;
1653 }
1654
1655 error = do_open(l, dvp, pb, flags, mode, fd);
1656
1657 if (dfp != NULL)
1658 fd_putfile(fdat);
1659 out:
1660 pathbuf_destroy(pb);
1661 return error;
1662 }
1663
1664 int
1665 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1666 {
1667 /* {
1668 syscallarg(const char *) path;
1669 syscallarg(int) flags;
1670 syscallarg(int) mode;
1671 } */
1672 int error;
1673 int fd;
1674
1675 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path),
1676 SCARG(uap, flags), SCARG(uap, mode), &fd);
1677
1678 if (error == 0)
1679 *retval = fd;
1680
1681 return error;
1682 }
1683
1684 int
1685 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval)
1686 {
1687 /* {
1688 syscallarg(int) fd;
1689 syscallarg(const char *) path;
1690 syscallarg(int) oflags;
1691 syscallarg(int) mode;
1692 } */
1693 int error;
1694 int fd;
1695
1696 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path),
1697 SCARG(uap, oflags), SCARG(uap, mode), &fd);
1698
1699 if (error == 0)
1700 *retval = fd;
1701
1702 return error;
1703 }
1704
1705 static void
1706 vfs__fhfree(fhandle_t *fhp)
1707 {
1708 size_t fhsize;
1709
1710 fhsize = FHANDLE_SIZE(fhp);
1711 kmem_free(fhp, fhsize);
1712 }
1713
1714 /*
1715 * vfs_composefh: compose a filehandle.
1716 */
1717
1718 int
1719 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1720 {
1721 struct mount *mp;
1722 struct fid *fidp;
1723 int error;
1724 size_t needfhsize;
1725 size_t fidsize;
1726
1727 mp = vp->v_mount;
1728 fidp = NULL;
1729 if (*fh_size < FHANDLE_SIZE_MIN) {
1730 fidsize = 0;
1731 } else {
1732 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1733 if (fhp != NULL) {
1734 memset(fhp, 0, *fh_size);
1735 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1736 fidp = &fhp->fh_fid;
1737 }
1738 }
1739 error = VFS_VPTOFH(vp, fidp, &fidsize);
1740 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1741 if (error == 0 && *fh_size < needfhsize) {
1742 error = E2BIG;
1743 }
1744 *fh_size = needfhsize;
1745 return error;
1746 }
1747
1748 int
1749 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1750 {
1751 struct mount *mp;
1752 fhandle_t *fhp;
1753 size_t fhsize;
1754 size_t fidsize;
1755 int error;
1756
1757 mp = vp->v_mount;
1758 fidsize = 0;
1759 error = VFS_VPTOFH(vp, NULL, &fidsize);
1760 KASSERT(error != 0);
1761 if (error != E2BIG) {
1762 goto out;
1763 }
1764 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1765 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1766 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1767 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1768 if (error == 0) {
1769 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1770 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1771 *fhpp = fhp;
1772 } else {
1773 kmem_free(fhp, fhsize);
1774 }
1775 out:
1776 return error;
1777 }
1778
1779 void
1780 vfs_composefh_free(fhandle_t *fhp)
1781 {
1782
1783 vfs__fhfree(fhp);
1784 }
1785
1786 /*
1787 * vfs_fhtovp: lookup a vnode by a filehandle.
1788 */
1789
1790 int
1791 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1792 {
1793 struct mount *mp;
1794 int error;
1795
1796 *vpp = NULL;
1797 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1798 if (mp == NULL) {
1799 error = ESTALE;
1800 goto out;
1801 }
1802 if (mp->mnt_op->vfs_fhtovp == NULL) {
1803 error = EOPNOTSUPP;
1804 goto out;
1805 }
1806 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1807 out:
1808 return error;
1809 }
1810
1811 /*
1812 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1813 * the needed size.
1814 */
1815
1816 int
1817 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1818 {
1819 fhandle_t *fhp;
1820 int error;
1821
1822 if (fhsize > FHANDLE_SIZE_MAX) {
1823 return EINVAL;
1824 }
1825 if (fhsize < FHANDLE_SIZE_MIN) {
1826 return EINVAL;
1827 }
1828 again:
1829 fhp = kmem_alloc(fhsize, KM_SLEEP);
1830 error = copyin(ufhp, fhp, fhsize);
1831 if (error == 0) {
1832 /* XXX this check shouldn't be here */
1833 if (FHANDLE_SIZE(fhp) == fhsize) {
1834 *fhpp = fhp;
1835 return 0;
1836 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1837 /*
1838 * a kludge for nfsv2 padded handles.
1839 */
1840 size_t sz;
1841
1842 sz = FHANDLE_SIZE(fhp);
1843 kmem_free(fhp, fhsize);
1844 fhsize = sz;
1845 goto again;
1846 } else {
1847 /*
1848 * userland told us wrong size.
1849 */
1850 error = EINVAL;
1851 }
1852 }
1853 kmem_free(fhp, fhsize);
1854 return error;
1855 }
1856
1857 void
1858 vfs_copyinfh_free(fhandle_t *fhp)
1859 {
1860
1861 vfs__fhfree(fhp);
1862 }
1863
1864 /*
1865 * Get file handle system call
1866 */
1867 int
1868 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1869 {
1870 /* {
1871 syscallarg(char *) fname;
1872 syscallarg(fhandle_t *) fhp;
1873 syscallarg(size_t *) fh_size;
1874 } */
1875 struct vnode *vp;
1876 fhandle_t *fh;
1877 int error;
1878 struct pathbuf *pb;
1879 struct nameidata nd;
1880 size_t sz;
1881 size_t usz;
1882
1883 /*
1884 * Must be super user
1885 */
1886 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1887 0, NULL, NULL, NULL);
1888 if (error)
1889 return (error);
1890
1891 error = pathbuf_copyin(SCARG(uap, fname), &pb);
1892 if (error) {
1893 return error;
1894 }
1895 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1896 error = namei(&nd);
1897 if (error) {
1898 pathbuf_destroy(pb);
1899 return error;
1900 }
1901 vp = nd.ni_vp;
1902 pathbuf_destroy(pb);
1903
1904 error = vfs_composefh_alloc(vp, &fh);
1905 vput(vp);
1906 if (error != 0) {
1907 return error;
1908 }
1909 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1910 if (error != 0) {
1911 goto out;
1912 }
1913 sz = FHANDLE_SIZE(fh);
1914 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1915 if (error != 0) {
1916 goto out;
1917 }
1918 if (usz >= sz) {
1919 error = copyout(fh, SCARG(uap, fhp), sz);
1920 } else {
1921 error = E2BIG;
1922 }
1923 out:
1924 vfs_composefh_free(fh);
1925 return (error);
1926 }
1927
1928 /*
1929 * Open a file given a file handle.
1930 *
1931 * Check permissions, allocate an open file structure,
1932 * and call the device open routine if any.
1933 */
1934
1935 int
1936 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1937 register_t *retval)
1938 {
1939 file_t *fp;
1940 struct vnode *vp = NULL;
1941 kauth_cred_t cred = l->l_cred;
1942 file_t *nfp;
1943 int indx, error;
1944 struct vattr va;
1945 fhandle_t *fh;
1946 int flags;
1947 proc_t *p;
1948
1949 p = curproc;
1950
1951 /*
1952 * Must be super user
1953 */
1954 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1955 0, NULL, NULL, NULL)))
1956 return (error);
1957
1958 if (oflags & O_SEARCH) {
1959 oflags &= ~(int)O_SEARCH;
1960 }
1961
1962 flags = FFLAGS(oflags);
1963 if ((flags & (FREAD | FWRITE)) == 0)
1964 return (EINVAL);
1965 if ((flags & O_CREAT))
1966 return (EINVAL);
1967 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1968 return (error);
1969 fp = nfp;
1970 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1971 if (error != 0) {
1972 goto bad;
1973 }
1974 error = vfs_fhtovp(fh, &vp);
1975 vfs_copyinfh_free(fh);
1976 if (error != 0) {
1977 goto bad;
1978 }
1979
1980 /* Now do an effective vn_open */
1981
1982 if (vp->v_type == VSOCK) {
1983 error = EOPNOTSUPP;
1984 goto bad;
1985 }
1986 error = vn_openchk(vp, cred, flags);
1987 if (error != 0)
1988 goto bad;
1989 if (flags & O_TRUNC) {
1990 VOP_UNLOCK(vp); /* XXX */
1991 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1992 vattr_null(&va);
1993 va.va_size = 0;
1994 error = VOP_SETATTR(vp, &va, cred);
1995 if (error)
1996 goto bad;
1997 }
1998 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
1999 goto bad;
2000 if (flags & FWRITE) {
2001 mutex_enter(vp->v_interlock);
2002 vp->v_writecount++;
2003 mutex_exit(vp->v_interlock);
2004 }
2005
2006 /* done with modified vn_open, now finish what sys_open does. */
2007 if ((error = open_setfp(l, fp, vp, indx, flags)))
2008 return error;
2009
2010 VOP_UNLOCK(vp);
2011 *retval = indx;
2012 fd_affix(p, fp, indx);
2013 return (0);
2014
2015 bad:
2016 fd_abort(p, fp, indx);
2017 if (vp != NULL)
2018 vput(vp);
2019 return (error);
2020 }
2021
2022 int
2023 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
2024 {
2025 /* {
2026 syscallarg(const void *) fhp;
2027 syscallarg(size_t) fh_size;
2028 syscallarg(int) flags;
2029 } */
2030
2031 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
2032 SCARG(uap, flags), retval);
2033 }
2034
2035 int
2036 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
2037 {
2038 int error;
2039 fhandle_t *fh;
2040 struct vnode *vp;
2041
2042 /*
2043 * Must be super user
2044 */
2045 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2046 0, NULL, NULL, NULL)))
2047 return (error);
2048
2049 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2050 if (error != 0)
2051 return error;
2052
2053 error = vfs_fhtovp(fh, &vp);
2054 vfs_copyinfh_free(fh);
2055 if (error != 0)
2056 return error;
2057
2058 error = vn_stat(vp, sb);
2059 vput(vp);
2060 return error;
2061 }
2062
2063
2064 /* ARGSUSED */
2065 int
2066 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval)
2067 {
2068 /* {
2069 syscallarg(const void *) fhp;
2070 syscallarg(size_t) fh_size;
2071 syscallarg(struct stat *) sb;
2072 } */
2073 struct stat sb;
2074 int error;
2075
2076 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
2077 if (error)
2078 return error;
2079 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
2080 }
2081
2082 int
2083 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
2084 int flags)
2085 {
2086 fhandle_t *fh;
2087 struct mount *mp;
2088 struct vnode *vp;
2089 int error;
2090
2091 /*
2092 * Must be super user
2093 */
2094 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2095 0, NULL, NULL, NULL)))
2096 return error;
2097
2098 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2099 if (error != 0)
2100 return error;
2101
2102 error = vfs_fhtovp(fh, &vp);
2103 vfs_copyinfh_free(fh);
2104 if (error != 0)
2105 return error;
2106
2107 mp = vp->v_mount;
2108 error = dostatvfs(mp, sb, l, flags, 1);
2109 vput(vp);
2110 return error;
2111 }
2112
2113 /* ARGSUSED */
2114 int
2115 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
2116 {
2117 /* {
2118 syscallarg(const void *) fhp;
2119 syscallarg(size_t) fh_size;
2120 syscallarg(struct statvfs *) buf;
2121 syscallarg(int) flags;
2122 } */
2123 struct statvfs *sb = STATVFSBUF_GET();
2124 int error;
2125
2126 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
2127 SCARG(uap, flags));
2128 if (error == 0)
2129 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
2130 STATVFSBUF_PUT(sb);
2131 return error;
2132 }
2133
2134 /*
2135 * Create a special file.
2136 */
2137 /* ARGSUSED */
2138 int
2139 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
2140 register_t *retval)
2141 {
2142 /* {
2143 syscallarg(const char *) path;
2144 syscallarg(mode_t) mode;
2145 syscallarg(dev_t) dev;
2146 } */
2147 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
2148 SCARG(uap, dev), retval, UIO_USERSPACE);
2149 }
2150
2151 int
2152 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap,
2153 register_t *retval)
2154 {
2155 /* {
2156 syscallarg(int) fd;
2157 syscallarg(const char *) path;
2158 syscallarg(mode_t) mode;
2159 syscallarg(int) pad;
2160 syscallarg(dev_t) dev;
2161 } */
2162
2163 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path),
2164 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE);
2165 }
2166
2167 int
2168 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
2169 register_t *retval, enum uio_seg seg)
2170 {
2171 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg);
2172 }
2173
2174 int
2175 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode,
2176 dev_t dev, register_t *retval, enum uio_seg seg)
2177 {
2178 struct proc *p = l->l_proc;
2179 struct vnode *vp;
2180 struct vattr vattr;
2181 int error, optype;
2182 struct pathbuf *pb;
2183 struct nameidata nd;
2184 const char *pathstring;
2185
2186 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
2187 0, NULL, NULL, NULL)) != 0)
2188 return (error);
2189
2190 optype = VOP_MKNOD_DESCOFFSET;
2191
2192 error = pathbuf_maybe_copyin(pathname, seg, &pb);
2193 if (error) {
2194 return error;
2195 }
2196 pathstring = pathbuf_stringcopy_get(pb);
2197 if (pathstring == NULL) {
2198 pathbuf_destroy(pb);
2199 return ENOMEM;
2200 }
2201
2202 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2203
2204 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2205 goto out;
2206 vp = nd.ni_vp;
2207
2208 if (vp != NULL)
2209 error = EEXIST;
2210 else {
2211 vattr_null(&vattr);
2212 /* We will read cwdi->cwdi_cmask unlocked. */
2213 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2214 vattr.va_rdev = dev;
2215
2216 switch (mode & S_IFMT) {
2217 case S_IFMT: /* used by badsect to flag bad sectors */
2218 vattr.va_type = VBAD;
2219 break;
2220 case S_IFCHR:
2221 vattr.va_type = VCHR;
2222 break;
2223 case S_IFBLK:
2224 vattr.va_type = VBLK;
2225 break;
2226 case S_IFWHT:
2227 optype = VOP_WHITEOUT_DESCOFFSET;
2228 break;
2229 case S_IFREG:
2230 #if NVERIEXEC > 0
2231 error = veriexec_openchk(l, nd.ni_vp, pathstring,
2232 O_CREAT);
2233 #endif /* NVERIEXEC > 0 */
2234 vattr.va_type = VREG;
2235 vattr.va_rdev = VNOVAL;
2236 optype = VOP_CREATE_DESCOFFSET;
2237 break;
2238 default:
2239 error = EINVAL;
2240 break;
2241 }
2242 }
2243 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET
2244 && vattr.va_rdev == VNOVAL)
2245 error = EINVAL;
2246 if (!error) {
2247 switch (optype) {
2248 case VOP_WHITEOUT_DESCOFFSET:
2249 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
2250 if (error)
2251 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2252 vput(nd.ni_dvp);
2253 break;
2254
2255 case VOP_MKNOD_DESCOFFSET:
2256 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
2257 &nd.ni_cnd, &vattr);
2258 if (error == 0)
2259 vrele(nd.ni_vp);
2260 vput(nd.ni_dvp);
2261 break;
2262
2263 case VOP_CREATE_DESCOFFSET:
2264 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
2265 &nd.ni_cnd, &vattr);
2266 if (error == 0)
2267 vrele(nd.ni_vp);
2268 vput(nd.ni_dvp);
2269 break;
2270 }
2271 } else {
2272 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2273 if (nd.ni_dvp == vp)
2274 vrele(nd.ni_dvp);
2275 else
2276 vput(nd.ni_dvp);
2277 if (vp)
2278 vrele(vp);
2279 }
2280 out:
2281 pathbuf_stringcopy_put(pb, pathstring);
2282 pathbuf_destroy(pb);
2283 return (error);
2284 }
2285
2286 /*
2287 * Create a named pipe.
2288 */
2289 /* ARGSUSED */
2290 int
2291 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
2292 {
2293 /* {
2294 syscallarg(const char *) path;
2295 syscallarg(int) mode;
2296 } */
2297 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode));
2298 }
2299
2300 int
2301 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap,
2302 register_t *retval)
2303 {
2304 /* {
2305 syscallarg(int) fd;
2306 syscallarg(const char *) path;
2307 syscallarg(int) mode;
2308 } */
2309
2310 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path),
2311 SCARG(uap, mode));
2312 }
2313
2314 static int
2315 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode)
2316 {
2317 struct proc *p = l->l_proc;
2318 struct vattr vattr;
2319 int error;
2320 struct pathbuf *pb;
2321 struct nameidata nd;
2322
2323 error = pathbuf_copyin(path, &pb);
2324 if (error) {
2325 return error;
2326 }
2327 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2328
2329 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2330 pathbuf_destroy(pb);
2331 return error;
2332 }
2333 if (nd.ni_vp != NULL) {
2334 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2335 if (nd.ni_dvp == nd.ni_vp)
2336 vrele(nd.ni_dvp);
2337 else
2338 vput(nd.ni_dvp);
2339 vrele(nd.ni_vp);
2340 pathbuf_destroy(pb);
2341 return (EEXIST);
2342 }
2343 vattr_null(&vattr);
2344 vattr.va_type = VFIFO;
2345 /* We will read cwdi->cwdi_cmask unlocked. */
2346 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2347 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2348 if (error == 0)
2349 vrele(nd.ni_vp);
2350 vput(nd.ni_dvp);
2351 pathbuf_destroy(pb);
2352 return (error);
2353 }
2354
2355 /*
2356 * Make a hard file link.
2357 */
2358 /* ARGSUSED */
2359 int
2360 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink,
2361 const char *link, int follow, register_t *retval)
2362 {
2363 struct vnode *vp;
2364 struct pathbuf *linkpb;
2365 struct nameidata nd;
2366 namei_simple_flags_t ns_flags;
2367 int error;
2368
2369 if (follow & AT_SYMLINK_FOLLOW)
2370 ns_flags = NSM_FOLLOW_TRYEMULROOT;
2371 else
2372 ns_flags = NSM_NOFOLLOW_TRYEMULROOT;
2373
2374 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp);
2375 if (error != 0)
2376 return (error);
2377 error = pathbuf_copyin(link, &linkpb);
2378 if (error) {
2379 goto out1;
2380 }
2381 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2382 if ((error = fd_nameiat(l, fdlink, &nd)) != 0)
2383 goto out2;
2384 if (nd.ni_vp) {
2385 error = EEXIST;
2386 goto abortop;
2387 }
2388 /* Prevent hard links on directories. */
2389 if (vp->v_type == VDIR) {
2390 error = EPERM;
2391 goto abortop;
2392 }
2393 /* Prevent cross-mount operation. */
2394 if (nd.ni_dvp->v_mount != vp->v_mount) {
2395 error = EXDEV;
2396 goto abortop;
2397 }
2398 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2399 VOP_UNLOCK(nd.ni_dvp);
2400 vrele(nd.ni_dvp);
2401 out2:
2402 pathbuf_destroy(linkpb);
2403 out1:
2404 vrele(vp);
2405 return (error);
2406 abortop:
2407 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2408 if (nd.ni_dvp == nd.ni_vp)
2409 vrele(nd.ni_dvp);
2410 else
2411 vput(nd.ni_dvp);
2412 if (nd.ni_vp != NULL)
2413 vrele(nd.ni_vp);
2414 goto out2;
2415 }
2416
2417 int
2418 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
2419 {
2420 /* {
2421 syscallarg(const char *) path;
2422 syscallarg(const char *) link;
2423 } */
2424 const char *path = SCARG(uap, path);
2425 const char *link = SCARG(uap, link);
2426
2427 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link,
2428 AT_SYMLINK_FOLLOW, retval);
2429 }
2430
2431 int
2432 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap,
2433 register_t *retval)
2434 {
2435 /* {
2436 syscallarg(int) fd1;
2437 syscallarg(const char *) name1;
2438 syscallarg(int) fd2;
2439 syscallarg(const char *) name2;
2440 syscallarg(int) flags;
2441 } */
2442 int fd1 = SCARG(uap, fd1);
2443 const char *name1 = SCARG(uap, name1);
2444 int fd2 = SCARG(uap, fd2);
2445 const char *name2 = SCARG(uap, name2);
2446 int follow;
2447
2448 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW;
2449
2450 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval);
2451 }
2452
2453
2454 int
2455 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg)
2456 {
2457 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg);
2458 }
2459
2460 static int
2461 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat,
2462 const char *link, enum uio_seg seg)
2463 {
2464 struct proc *p = curproc;
2465 struct vattr vattr;
2466 char *path;
2467 int error;
2468 size_t len;
2469 struct pathbuf *linkpb;
2470 struct nameidata nd;
2471
2472 KASSERT(l != NULL || fdat == AT_FDCWD);
2473
2474 path = PNBUF_GET();
2475 if (seg == UIO_USERSPACE) {
2476 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0)
2477 goto out1;
2478 if ((error = pathbuf_copyin(link, &linkpb)) != 0)
2479 goto out1;
2480 } else {
2481 len = strlen(patharg) + 1;
2482 KASSERT(len <= MAXPATHLEN);
2483 memcpy(path, patharg, len);
2484 linkpb = pathbuf_create(link);
2485 if (linkpb == NULL) {
2486 error = ENOMEM;
2487 goto out1;
2488 }
2489 }
2490 ktrkuser("symlink-target", path, len - 1);
2491
2492 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2493 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2494 goto out2;
2495 if (nd.ni_vp) {
2496 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2497 if (nd.ni_dvp == nd.ni_vp)
2498 vrele(nd.ni_dvp);
2499 else
2500 vput(nd.ni_dvp);
2501 vrele(nd.ni_vp);
2502 error = EEXIST;
2503 goto out2;
2504 }
2505 vattr_null(&vattr);
2506 vattr.va_type = VLNK;
2507 /* We will read cwdi->cwdi_cmask unlocked. */
2508 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2509 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2510 if (error == 0)
2511 vrele(nd.ni_vp);
2512 vput(nd.ni_dvp);
2513 out2:
2514 pathbuf_destroy(linkpb);
2515 out1:
2516 PNBUF_PUT(path);
2517 return (error);
2518 }
2519
2520 /*
2521 * Make a symbolic link.
2522 */
2523 /* ARGSUSED */
2524 int
2525 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2526 {
2527 /* {
2528 syscallarg(const char *) path;
2529 syscallarg(const char *) link;
2530 } */
2531
2532 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link),
2533 UIO_USERSPACE);
2534 }
2535
2536 int
2537 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap,
2538 register_t *retval)
2539 {
2540 /* {
2541 syscallarg(const char *) path1;
2542 syscallarg(int) fd;
2543 syscallarg(const char *) path2;
2544 } */
2545
2546 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd),
2547 SCARG(uap, path2), UIO_USERSPACE);
2548 }
2549
2550 /*
2551 * Delete a whiteout from the filesystem.
2552 */
2553 /* ARGSUSED */
2554 int
2555 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2556 {
2557 /* {
2558 syscallarg(const char *) path;
2559 } */
2560 int error;
2561 struct pathbuf *pb;
2562 struct nameidata nd;
2563
2564 error = pathbuf_copyin(SCARG(uap, path), &pb);
2565 if (error) {
2566 return error;
2567 }
2568
2569 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb);
2570 error = namei(&nd);
2571 if (error) {
2572 pathbuf_destroy(pb);
2573 return (error);
2574 }
2575
2576 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2577 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2578 if (nd.ni_dvp == nd.ni_vp)
2579 vrele(nd.ni_dvp);
2580 else
2581 vput(nd.ni_dvp);
2582 if (nd.ni_vp)
2583 vrele(nd.ni_vp);
2584 pathbuf_destroy(pb);
2585 return (EEXIST);
2586 }
2587 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2588 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2589 vput(nd.ni_dvp);
2590 pathbuf_destroy(pb);
2591 return (error);
2592 }
2593
2594 /*
2595 * Delete a name from the filesystem.
2596 */
2597 /* ARGSUSED */
2598 int
2599 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2600 {
2601 /* {
2602 syscallarg(const char *) path;
2603 } */
2604
2605 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE);
2606 }
2607
2608 int
2609 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap,
2610 register_t *retval)
2611 {
2612 /* {
2613 syscallarg(int) fd;
2614 syscallarg(const char *) path;
2615 syscallarg(int) flag;
2616 } */
2617
2618 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path),
2619 SCARG(uap, flag), UIO_USERSPACE);
2620 }
2621
2622 int
2623 do_sys_unlink(const char *arg, enum uio_seg seg)
2624 {
2625 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg);
2626 }
2627
2628 static int
2629 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags,
2630 enum uio_seg seg)
2631 {
2632 struct vnode *vp;
2633 int error;
2634 struct pathbuf *pb;
2635 struct nameidata nd;
2636 const char *pathstring;
2637
2638 KASSERT(l != NULL || fdat == AT_FDCWD);
2639
2640 error = pathbuf_maybe_copyin(arg, seg, &pb);
2641 if (error) {
2642 return error;
2643 }
2644 pathstring = pathbuf_stringcopy_get(pb);
2645 if (pathstring == NULL) {
2646 pathbuf_destroy(pb);
2647 return ENOMEM;
2648 }
2649
2650 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb);
2651 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2652 goto out;
2653 vp = nd.ni_vp;
2654
2655 /*
2656 * The root of a mounted filesystem cannot be deleted.
2657 */
2658 if ((vp->v_vflag & VV_ROOT) != 0) {
2659 error = EBUSY;
2660 goto abort;
2661 }
2662
2663 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) {
2664 error = EBUSY;
2665 goto abort;
2666 }
2667
2668 /*
2669 * No rmdir "." please.
2670 */
2671 if (nd.ni_dvp == vp) {
2672 error = EINVAL;
2673 goto abort;
2674 }
2675
2676 /*
2677 * AT_REMOVEDIR is required to remove a directory
2678 */
2679 if (vp->v_type == VDIR) {
2680 if (!(flags & AT_REMOVEDIR)) {
2681 error = EPERM;
2682 goto abort;
2683 } else {
2684 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2685 vput(nd.ni_dvp);
2686 goto out;
2687 }
2688 }
2689
2690 /*
2691 * Starting here we only deal with non directories.
2692 */
2693 if (flags & AT_REMOVEDIR) {
2694 error = ENOTDIR;
2695 goto abort;
2696 }
2697
2698 #if NVERIEXEC > 0
2699 /* Handle remove requests for veriexec entries. */
2700 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) {
2701 goto abort;
2702 }
2703 #endif /* NVERIEXEC > 0 */
2704
2705 #ifdef FILEASSOC
2706 (void)fileassoc_file_delete(vp);
2707 #endif /* FILEASSOC */
2708 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2709 vput(nd.ni_dvp);
2710 goto out;
2711
2712 abort:
2713 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2714 if (nd.ni_dvp == vp)
2715 vrele(nd.ni_dvp);
2716 else
2717 vput(nd.ni_dvp);
2718 vput(vp);
2719
2720 out:
2721 pathbuf_stringcopy_put(pb, pathstring);
2722 pathbuf_destroy(pb);
2723 return (error);
2724 }
2725
2726 /*
2727 * Reposition read/write file offset.
2728 */
2729 int
2730 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2731 {
2732 /* {
2733 syscallarg(int) fd;
2734 syscallarg(int) pad;
2735 syscallarg(off_t) offset;
2736 syscallarg(int) whence;
2737 } */
2738 kauth_cred_t cred = l->l_cred;
2739 file_t *fp;
2740 struct vnode *vp;
2741 struct vattr vattr;
2742 off_t newoff;
2743 int error, fd;
2744
2745 fd = SCARG(uap, fd);
2746
2747 if ((fp = fd_getfile(fd)) == NULL)
2748 return (EBADF);
2749
2750 vp = fp->f_vnode;
2751 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2752 error = ESPIPE;
2753 goto out;
2754 }
2755
2756 vn_lock(vp, LK_SHARED | LK_RETRY);
2757
2758 switch (SCARG(uap, whence)) {
2759 case SEEK_CUR:
2760 newoff = fp->f_offset + SCARG(uap, offset);
2761 break;
2762 case SEEK_END:
2763 error = VOP_GETATTR(vp, &vattr, cred);
2764 if (error) {
2765 VOP_UNLOCK(vp);
2766 goto out;
2767 }
2768 newoff = SCARG(uap, offset) + vattr.va_size;
2769 break;
2770 case SEEK_SET:
2771 newoff = SCARG(uap, offset);
2772 break;
2773 default:
2774 error = EINVAL;
2775 VOP_UNLOCK(vp);
2776 goto out;
2777 }
2778 VOP_UNLOCK(vp);
2779 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2780 *(off_t *)retval = fp->f_offset = newoff;
2781 }
2782 out:
2783 fd_putfile(fd);
2784 return (error);
2785 }
2786
2787 /*
2788 * Positional read system call.
2789 */
2790 int
2791 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2792 {
2793 /* {
2794 syscallarg(int) fd;
2795 syscallarg(void *) buf;
2796 syscallarg(size_t) nbyte;
2797 syscallarg(off_t) offset;
2798 } */
2799 file_t *fp;
2800 struct vnode *vp;
2801 off_t offset;
2802 int error, fd = SCARG(uap, fd);
2803
2804 if ((fp = fd_getfile(fd)) == NULL)
2805 return (EBADF);
2806
2807 if ((fp->f_flag & FREAD) == 0) {
2808 fd_putfile(fd);
2809 return (EBADF);
2810 }
2811
2812 vp = fp->f_vnode;
2813 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2814 error = ESPIPE;
2815 goto out;
2816 }
2817
2818 offset = SCARG(uap, offset);
2819
2820 /*
2821 * XXX This works because no file systems actually
2822 * XXX take any action on the seek operation.
2823 */
2824 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2825 goto out;
2826
2827 /* dofileread() will unuse the descriptor for us */
2828 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2829 &offset, 0, retval));
2830
2831 out:
2832 fd_putfile(fd);
2833 return (error);
2834 }
2835
2836 /*
2837 * Positional scatter read system call.
2838 */
2839 int
2840 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2841 {
2842 /* {
2843 syscallarg(int) fd;
2844 syscallarg(const struct iovec *) iovp;
2845 syscallarg(int) iovcnt;
2846 syscallarg(off_t) offset;
2847 } */
2848 off_t offset = SCARG(uap, offset);
2849
2850 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2851 SCARG(uap, iovcnt), &offset, 0, retval);
2852 }
2853
2854 /*
2855 * Positional write system call.
2856 */
2857 int
2858 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2859 {
2860 /* {
2861 syscallarg(int) fd;
2862 syscallarg(const void *) buf;
2863 syscallarg(size_t) nbyte;
2864 syscallarg(off_t) offset;
2865 } */
2866 file_t *fp;
2867 struct vnode *vp;
2868 off_t offset;
2869 int error, fd = SCARG(uap, fd);
2870
2871 if ((fp = fd_getfile(fd)) == NULL)
2872 return (EBADF);
2873
2874 if ((fp->f_flag & FWRITE) == 0) {
2875 fd_putfile(fd);
2876 return (EBADF);
2877 }
2878
2879 vp = fp->f_vnode;
2880 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2881 error = ESPIPE;
2882 goto out;
2883 }
2884
2885 offset = SCARG(uap, offset);
2886
2887 /*
2888 * XXX This works because no file systems actually
2889 * XXX take any action on the seek operation.
2890 */
2891 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2892 goto out;
2893
2894 /* dofilewrite() will unuse the descriptor for us */
2895 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2896 &offset, 0, retval));
2897
2898 out:
2899 fd_putfile(fd);
2900 return (error);
2901 }
2902
2903 /*
2904 * Positional gather write system call.
2905 */
2906 int
2907 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2908 {
2909 /* {
2910 syscallarg(int) fd;
2911 syscallarg(const struct iovec *) iovp;
2912 syscallarg(int) iovcnt;
2913 syscallarg(off_t) offset;
2914 } */
2915 off_t offset = SCARG(uap, offset);
2916
2917 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2918 SCARG(uap, iovcnt), &offset, 0, retval);
2919 }
2920
2921 /*
2922 * Check access permissions.
2923 */
2924 int
2925 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2926 {
2927 /* {
2928 syscallarg(const char *) path;
2929 syscallarg(int) flags;
2930 } */
2931
2932 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path),
2933 SCARG(uap, flags), 0);
2934 }
2935
2936 int
2937 do_sys_accessat(struct lwp *l, int fdat, const char *path,
2938 int mode, int flags)
2939 {
2940 kauth_cred_t cred;
2941 struct vnode *vp;
2942 int error, nd_flag, vmode;
2943 struct pathbuf *pb;
2944 struct nameidata nd;
2945
2946 CTASSERT(F_OK == 0);
2947 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) {
2948 /* nonsense mode */
2949 return EINVAL;
2950 }
2951
2952 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT;
2953 if (flags & AT_SYMLINK_NOFOLLOW)
2954 nd_flag &= ~FOLLOW;
2955
2956 error = pathbuf_copyin(path, &pb);
2957 if (error)
2958 return error;
2959
2960 NDINIT(&nd, LOOKUP, nd_flag, pb);
2961
2962 /* Override default credentials */
2963 cred = kauth_cred_dup(l->l_cred);
2964 if (!(flags & AT_EACCESS)) {
2965 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2966 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2967 }
2968 nd.ni_cnd.cn_cred = cred;
2969
2970 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2971 pathbuf_destroy(pb);
2972 goto out;
2973 }
2974 vp = nd.ni_vp;
2975 pathbuf_destroy(pb);
2976
2977 /* Flags == 0 means only check for existence. */
2978 if (mode) {
2979 vmode = 0;
2980 if (mode & R_OK)
2981 vmode |= VREAD;
2982 if (mode & W_OK)
2983 vmode |= VWRITE;
2984 if (mode & X_OK)
2985 vmode |= VEXEC;
2986
2987 error = VOP_ACCESS(vp, vmode, cred);
2988 if (!error && (vmode & VWRITE))
2989 error = vn_writechk(vp);
2990 }
2991 vput(vp);
2992 out:
2993 kauth_cred_free(cred);
2994 return (error);
2995 }
2996
2997 int
2998 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap,
2999 register_t *retval)
3000 {
3001 /* {
3002 syscallarg(int) fd;
3003 syscallarg(const char *) path;
3004 syscallarg(int) amode;
3005 syscallarg(int) flag;
3006 } */
3007
3008 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path),
3009 SCARG(uap, amode), SCARG(uap, flag));
3010 }
3011
3012 /*
3013 * Common code for all sys_stat functions, including compat versions.
3014 */
3015 int
3016 do_sys_stat(const char *userpath, unsigned int nd_flag,
3017 struct stat *sb)
3018 {
3019 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb);
3020 }
3021
3022 int
3023 do_sys_statat(struct lwp *l, int fdat, const char *userpath,
3024 unsigned int nd_flag, struct stat *sb)
3025 {
3026 int error;
3027 struct pathbuf *pb;
3028 struct nameidata nd;
3029
3030 KASSERT(l != NULL || fdat == AT_FDCWD);
3031
3032 error = pathbuf_copyin(userpath, &pb);
3033 if (error) {
3034 return error;
3035 }
3036
3037 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb);
3038
3039 error = fd_nameiat(l, fdat, &nd);
3040 if (error != 0) {
3041 pathbuf_destroy(pb);
3042 return error;
3043 }
3044 error = vn_stat(nd.ni_vp, sb);
3045 vput(nd.ni_vp);
3046 pathbuf_destroy(pb);
3047 return error;
3048 }
3049
3050 /*
3051 * Get file status; this version follows links.
3052 */
3053 /* ARGSUSED */
3054 int
3055 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval)
3056 {
3057 /* {
3058 syscallarg(const char *) path;
3059 syscallarg(struct stat *) ub;
3060 } */
3061 struct stat sb;
3062 int error;
3063
3064 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb);
3065 if (error)
3066 return error;
3067 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3068 }
3069
3070 /*
3071 * Get file status; this version does not follow links.
3072 */
3073 /* ARGSUSED */
3074 int
3075 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval)
3076 {
3077 /* {
3078 syscallarg(const char *) path;
3079 syscallarg(struct stat *) ub;
3080 } */
3081 struct stat sb;
3082 int error;
3083
3084 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb);
3085 if (error)
3086 return error;
3087 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3088 }
3089
3090 int
3091 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap,
3092 register_t *retval)
3093 {
3094 /* {
3095 syscallarg(int) fd;
3096 syscallarg(const char *) path;
3097 syscallarg(struct stat *) buf;
3098 syscallarg(int) flag;
3099 } */
3100 unsigned int nd_flag;
3101 struct stat sb;
3102 int error;
3103
3104 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW)
3105 nd_flag = NOFOLLOW;
3106 else
3107 nd_flag = FOLLOW;
3108
3109 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag,
3110 &sb);
3111 if (error)
3112 return error;
3113 return copyout(&sb, SCARG(uap, buf), sizeof(sb));
3114 }
3115
3116 /*
3117 * Get configurable pathname variables.
3118 */
3119 /* ARGSUSED */
3120 int
3121 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
3122 {
3123 /* {
3124 syscallarg(const char *) path;
3125 syscallarg(int) name;
3126 } */
3127 int error;
3128 struct pathbuf *pb;
3129 struct nameidata nd;
3130
3131 error = pathbuf_copyin(SCARG(uap, path), &pb);
3132 if (error) {
3133 return error;
3134 }
3135 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3136 if ((error = namei(&nd)) != 0) {
3137 pathbuf_destroy(pb);
3138 return (error);
3139 }
3140 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
3141 vput(nd.ni_vp);
3142 pathbuf_destroy(pb);
3143 return (error);
3144 }
3145
3146 /*
3147 * Return target name of a symbolic link.
3148 */
3149 /* ARGSUSED */
3150 int
3151 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap,
3152 register_t *retval)
3153 {
3154 /* {
3155 syscallarg(const char *) path;
3156 syscallarg(char *) buf;
3157 syscallarg(size_t) count;
3158 } */
3159 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path),
3160 SCARG(uap, buf), SCARG(uap, count), retval);
3161 }
3162
3163 static int
3164 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf,
3165 size_t count, register_t *retval)
3166 {
3167 struct vnode *vp;
3168 struct iovec aiov;
3169 struct uio auio;
3170 int error;
3171 struct pathbuf *pb;
3172 struct nameidata nd;
3173
3174 error = pathbuf_copyin(path, &pb);
3175 if (error) {
3176 return error;
3177 }
3178 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3179 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
3180 pathbuf_destroy(pb);
3181 return error;
3182 }
3183 vp = nd.ni_vp;
3184 pathbuf_destroy(pb);
3185 if (vp->v_type != VLNK)
3186 error = EINVAL;
3187 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
3188 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
3189 aiov.iov_base = buf;
3190 aiov.iov_len = count;
3191 auio.uio_iov = &aiov;
3192 auio.uio_iovcnt = 1;
3193 auio.uio_offset = 0;
3194 auio.uio_rw = UIO_READ;
3195 KASSERT(l == curlwp);
3196 auio.uio_vmspace = l->l_proc->p_vmspace;
3197 auio.uio_resid = count;
3198 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0)
3199 *retval = count - auio.uio_resid;
3200 }
3201 vput(vp);
3202 return (error);
3203 }
3204
3205 int
3206 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap,
3207 register_t *retval)
3208 {
3209 /* {
3210 syscallarg(int) fd;
3211 syscallarg(const char *) path;
3212 syscallarg(char *) buf;
3213 syscallarg(size_t) bufsize;
3214 } */
3215
3216 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path),
3217 SCARG(uap, buf), SCARG(uap, bufsize), retval);
3218 }
3219
3220 /*
3221 * Change flags of a file given a path name.
3222 */
3223 /* ARGSUSED */
3224 int
3225 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
3226 {
3227 /* {
3228 syscallarg(const char *) path;
3229 syscallarg(u_long) flags;
3230 } */
3231 struct vnode *vp;
3232 int error;
3233
3234 error = namei_simple_user(SCARG(uap, path),
3235 NSM_FOLLOW_TRYEMULROOT, &vp);
3236 if (error != 0)
3237 return (error);
3238 error = change_flags(vp, SCARG(uap, flags), l);
3239 vput(vp);
3240 return (error);
3241 }
3242
3243 /*
3244 * Change flags of a file given a file descriptor.
3245 */
3246 /* ARGSUSED */
3247 int
3248 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
3249 {
3250 /* {
3251 syscallarg(int) fd;
3252 syscallarg(u_long) flags;
3253 } */
3254 struct vnode *vp;
3255 file_t *fp;
3256 int error;
3257
3258 /* fd_getvnode() will use the descriptor for us */
3259 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3260 return (error);
3261 vp = fp->f_vnode;
3262 error = change_flags(vp, SCARG(uap, flags), l);
3263 VOP_UNLOCK(vp);
3264 fd_putfile(SCARG(uap, fd));
3265 return (error);
3266 }
3267
3268 /*
3269 * Change flags of a file given a path name; this version does
3270 * not follow links.
3271 */
3272 int
3273 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
3274 {
3275 /* {
3276 syscallarg(const char *) path;
3277 syscallarg(u_long) flags;
3278 } */
3279 struct vnode *vp;
3280 int error;
3281
3282 error = namei_simple_user(SCARG(uap, path),
3283 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3284 if (error != 0)
3285 return (error);
3286 error = change_flags(vp, SCARG(uap, flags), l);
3287 vput(vp);
3288 return (error);
3289 }
3290
3291 /*
3292 * Common routine to change flags of a file.
3293 */
3294 int
3295 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
3296 {
3297 struct vattr vattr;
3298 int error;
3299
3300 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3301
3302 vattr_null(&vattr);
3303 vattr.va_flags = flags;
3304 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3305
3306 return (error);
3307 }
3308
3309 /*
3310 * Change mode of a file given path name; this version follows links.
3311 */
3312 /* ARGSUSED */
3313 int
3314 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
3315 {
3316 /* {
3317 syscallarg(const char *) path;
3318 syscallarg(int) mode;
3319 } */
3320 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path),
3321 SCARG(uap, mode), 0);
3322 }
3323
3324 int
3325 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags)
3326 {
3327 int error;
3328 struct vnode *vp;
3329 namei_simple_flags_t ns_flag;
3330
3331 if (flags & AT_SYMLINK_NOFOLLOW)
3332 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3333 else
3334 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3335
3336 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3337 if (error != 0)
3338 return error;
3339
3340 error = change_mode(vp, mode, l);
3341
3342 vrele(vp);
3343
3344 return (error);
3345 }
3346
3347 /*
3348 * Change mode of a file given a file descriptor.
3349 */
3350 /* ARGSUSED */
3351 int
3352 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
3353 {
3354 /* {
3355 syscallarg(int) fd;
3356 syscallarg(int) mode;
3357 } */
3358 file_t *fp;
3359 int error;
3360
3361 /* fd_getvnode() will use the descriptor for us */
3362 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3363 return (error);
3364 error = change_mode(fp->f_vnode, SCARG(uap, mode), l);
3365 fd_putfile(SCARG(uap, fd));
3366 return (error);
3367 }
3368
3369 int
3370 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap,
3371 register_t *retval)
3372 {
3373 /* {
3374 syscallarg(int) fd;
3375 syscallarg(const char *) path;
3376 syscallarg(int) mode;
3377 syscallarg(int) flag;
3378 } */
3379
3380 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path),
3381 SCARG(uap, mode), SCARG(uap, flag));
3382 }
3383
3384 /*
3385 * Change mode of a file given path name; this version does not follow links.
3386 */
3387 /* ARGSUSED */
3388 int
3389 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
3390 {
3391 /* {
3392 syscallarg(const char *) path;
3393 syscallarg(int) mode;
3394 } */
3395 int error;
3396 struct vnode *vp;
3397
3398 error = namei_simple_user(SCARG(uap, path),
3399 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3400 if (error != 0)
3401 return (error);
3402
3403 error = change_mode(vp, SCARG(uap, mode), l);
3404
3405 vrele(vp);
3406 return (error);
3407 }
3408
3409 /*
3410 * Common routine to set mode given a vnode.
3411 */
3412 static int
3413 change_mode(struct vnode *vp, int mode, struct lwp *l)
3414 {
3415 struct vattr vattr;
3416 int error;
3417
3418 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3419 vattr_null(&vattr);
3420 vattr.va_mode = mode & ALLPERMS;
3421 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3422 VOP_UNLOCK(vp);
3423 return (error);
3424 }
3425
3426 /*
3427 * Set ownership given a path name; this version follows links.
3428 */
3429 /* ARGSUSED */
3430 int
3431 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
3432 {
3433 /* {
3434 syscallarg(const char *) path;
3435 syscallarg(uid_t) uid;
3436 syscallarg(gid_t) gid;
3437 } */
3438 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid),
3439 SCARG(uap, gid), 0);
3440 }
3441
3442 int
3443 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid,
3444 gid_t gid, int flags)
3445 {
3446 int error;
3447 struct vnode *vp;
3448 namei_simple_flags_t ns_flag;
3449
3450 if (flags & AT_SYMLINK_NOFOLLOW)
3451 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3452 else
3453 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3454
3455 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3456 if (error != 0)
3457 return error;
3458
3459 error = change_owner(vp, uid, gid, l, 0);
3460
3461 vrele(vp);
3462
3463 return (error);
3464 }
3465
3466 /*
3467 * Set ownership given a path name; this version follows links.
3468 * Provides POSIX semantics.
3469 */
3470 /* ARGSUSED */
3471 int
3472 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
3473 {
3474 /* {
3475 syscallarg(const char *) path;
3476 syscallarg(uid_t) uid;
3477 syscallarg(gid_t) gid;
3478 } */
3479 int error;
3480 struct vnode *vp;
3481
3482 error = namei_simple_user(SCARG(uap, path),
3483 NSM_FOLLOW_TRYEMULROOT, &vp);
3484 if (error != 0)
3485 return (error);
3486
3487 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3488
3489 vrele(vp);
3490 return (error);
3491 }
3492
3493 /*
3494 * Set ownership given a file descriptor.
3495 */
3496 /* ARGSUSED */
3497 int
3498 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
3499 {
3500 /* {
3501 syscallarg(int) fd;
3502 syscallarg(uid_t) uid;
3503 syscallarg(gid_t) gid;
3504 } */
3505 int error;
3506 file_t *fp;
3507
3508 /* fd_getvnode() will use the descriptor for us */
3509 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3510 return (error);
3511 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3512 l, 0);
3513 fd_putfile(SCARG(uap, fd));
3514 return (error);
3515 }
3516
3517 int
3518 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap,
3519 register_t *retval)
3520 {
3521 /* {
3522 syscallarg(int) fd;
3523 syscallarg(const char *) path;
3524 syscallarg(uid_t) owner;
3525 syscallarg(gid_t) group;
3526 syscallarg(int) flag;
3527 } */
3528
3529 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path),
3530 SCARG(uap, owner), SCARG(uap, group),
3531 SCARG(uap, flag));
3532 }
3533
3534 /*
3535 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
3536 */
3537 /* ARGSUSED */
3538 int
3539 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
3540 {
3541 /* {
3542 syscallarg(int) fd;
3543 syscallarg(uid_t) uid;
3544 syscallarg(gid_t) gid;
3545 } */
3546 int error;
3547 file_t *fp;
3548
3549 /* fd_getvnode() will use the descriptor for us */
3550 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3551 return (error);
3552 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3553 l, 1);
3554 fd_putfile(SCARG(uap, fd));
3555 return (error);
3556 }
3557
3558 /*
3559 * Set ownership given a path name; this version does not follow links.
3560 */
3561 /* ARGSUSED */
3562 int
3563 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
3564 {
3565 /* {
3566 syscallarg(const char *) path;
3567 syscallarg(uid_t) uid;
3568 syscallarg(gid_t) gid;
3569 } */
3570 int error;
3571 struct vnode *vp;
3572
3573 error = namei_simple_user(SCARG(uap, path),
3574 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3575 if (error != 0)
3576 return (error);
3577
3578 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
3579
3580 vrele(vp);
3581 return (error);
3582 }
3583
3584 /*
3585 * Set ownership given a path name; this version does not follow links.
3586 * Provides POSIX/XPG semantics.
3587 */
3588 /* ARGSUSED */
3589 int
3590 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
3591 {
3592 /* {
3593 syscallarg(const char *) path;
3594 syscallarg(uid_t) uid;
3595 syscallarg(gid_t) gid;
3596 } */
3597 int error;
3598 struct vnode *vp;
3599
3600 error = namei_simple_user(SCARG(uap, path),
3601 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3602 if (error != 0)
3603 return (error);
3604
3605 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3606
3607 vrele(vp);
3608 return (error);
3609 }
3610
3611 /*
3612 * Common routine to set ownership given a vnode.
3613 */
3614 static int
3615 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
3616 int posix_semantics)
3617 {
3618 struct vattr vattr;
3619 mode_t newmode;
3620 int error;
3621
3622 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3623 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
3624 goto out;
3625
3626 #define CHANGED(x) ((int)(x) != -1)
3627 newmode = vattr.va_mode;
3628 if (posix_semantics) {
3629 /*
3630 * POSIX/XPG semantics: if the caller is not the super-user,
3631 * clear set-user-id and set-group-id bits. Both POSIX and
3632 * the XPG consider the behaviour for calls by the super-user
3633 * implementation-defined; we leave the set-user-id and set-
3634 * group-id settings intact in that case.
3635 */
3636 if (vattr.va_mode & S_ISUID) {
3637 if (kauth_authorize_vnode(l->l_cred,
3638 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0)
3639 newmode &= ~S_ISUID;
3640 }
3641 if (vattr.va_mode & S_ISGID) {
3642 if (kauth_authorize_vnode(l->l_cred,
3643 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0)
3644 newmode &= ~S_ISGID;
3645 }
3646 } else {
3647 /*
3648 * NetBSD semantics: when changing owner and/or group,
3649 * clear the respective bit(s).
3650 */
3651 if (CHANGED(uid))
3652 newmode &= ~S_ISUID;
3653 if (CHANGED(gid))
3654 newmode &= ~S_ISGID;
3655 }
3656 /* Update va_mode iff altered. */
3657 if (vattr.va_mode == newmode)
3658 newmode = VNOVAL;
3659
3660 vattr_null(&vattr);
3661 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
3662 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
3663 vattr.va_mode = newmode;
3664 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3665 #undef CHANGED
3666
3667 out:
3668 VOP_UNLOCK(vp);
3669 return (error);
3670 }
3671
3672 /*
3673 * Set the access and modification times given a path name; this
3674 * version follows links.
3675 */
3676 /* ARGSUSED */
3677 int
3678 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
3679 register_t *retval)
3680 {
3681 /* {
3682 syscallarg(const char *) path;
3683 syscallarg(const struct timeval *) tptr;
3684 } */
3685
3686 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
3687 SCARG(uap, tptr), UIO_USERSPACE);
3688 }
3689
3690 /*
3691 * Set the access and modification times given a file descriptor.
3692 */
3693 /* ARGSUSED */
3694 int
3695 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
3696 register_t *retval)
3697 {
3698 /* {
3699 syscallarg(int) fd;
3700 syscallarg(const struct timeval *) tptr;
3701 } */
3702 int error;
3703 file_t *fp;
3704
3705 /* fd_getvnode() will use the descriptor for us */
3706 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3707 return (error);
3708 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr),
3709 UIO_USERSPACE);
3710 fd_putfile(SCARG(uap, fd));
3711 return (error);
3712 }
3713
3714 int
3715 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap,
3716 register_t *retval)
3717 {
3718 /* {
3719 syscallarg(int) fd;
3720 syscallarg(const struct timespec *) tptr;
3721 } */
3722 int error;
3723 file_t *fp;
3724
3725 /* fd_getvnode() will use the descriptor for us */
3726 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3727 return (error);
3728 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0,
3729 SCARG(uap, tptr), UIO_USERSPACE);
3730 fd_putfile(SCARG(uap, fd));
3731 return (error);
3732 }
3733
3734 /*
3735 * Set the access and modification times given a path name; this
3736 * version does not follow links.
3737 */
3738 int
3739 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
3740 register_t *retval)
3741 {
3742 /* {
3743 syscallarg(const char *) path;
3744 syscallarg(const struct timeval *) tptr;
3745 } */
3746
3747 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
3748 SCARG(uap, tptr), UIO_USERSPACE);
3749 }
3750
3751 int
3752 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap,
3753 register_t *retval)
3754 {
3755 /* {
3756 syscallarg(int) fd;
3757 syscallarg(const char *) path;
3758 syscallarg(const struct timespec *) tptr;
3759 syscallarg(int) flag;
3760 } */
3761 int follow;
3762 const struct timespec *tptr;
3763 int error;
3764
3765 tptr = SCARG(uap, tptr);
3766 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3767
3768 error = do_sys_utimensat(l, SCARG(uap, fd), NULL,
3769 SCARG(uap, path), follow, tptr, UIO_USERSPACE);
3770
3771 return error;
3772 }
3773
3774 /*
3775 * Common routine to set access and modification times given a vnode.
3776 */
3777 int
3778 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag,
3779 const struct timespec *tptr, enum uio_seg seg)
3780 {
3781 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg);
3782 }
3783
3784 int
3785 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp,
3786 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg)
3787 {
3788 struct vattr vattr;
3789 int error, dorele = 0;
3790 namei_simple_flags_t sflags;
3791 bool vanull, setbirthtime;
3792 struct timespec ts[2];
3793
3794 KASSERT(l != NULL || fdat == AT_FDCWD);
3795
3796 /*
3797 * I have checked all callers and they pass either FOLLOW,
3798 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW
3799 * is 0. More to the point, they don't pass anything else.
3800 * Let's keep it that way at least until the namei interfaces
3801 * are fully sanitized.
3802 */
3803 KASSERT(flag == NOFOLLOW || flag == FOLLOW);
3804 sflags = (flag == FOLLOW) ?
3805 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT;
3806
3807 if (tptr == NULL) {
3808 vanull = true;
3809 nanotime(&ts[0]);
3810 ts[1] = ts[0];
3811 } else {
3812 vanull = false;
3813 if (seg != UIO_SYSSPACE) {
3814 error = copyin(tptr, ts, sizeof (ts));
3815 if (error != 0)
3816 return error;
3817 } else {
3818 ts[0] = tptr[0];
3819 ts[1] = tptr[1];
3820 }
3821 }
3822
3823 if (ts[0].tv_nsec == UTIME_NOW) {
3824 nanotime(&ts[0]);
3825 if (ts[1].tv_nsec == UTIME_NOW) {
3826 vanull = true;
3827 ts[1] = ts[0];
3828 }
3829 } else if (ts[1].tv_nsec == UTIME_NOW)
3830 nanotime(&ts[1]);
3831
3832 if (vp == NULL) {
3833 /* note: SEG describes TPTR, not PATH; PATH is always user */
3834 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp);
3835 if (error != 0)
3836 return error;
3837 dorele = 1;
3838 }
3839
3840 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3841 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3842 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3843 vattr_null(&vattr);
3844
3845 if (ts[0].tv_nsec != UTIME_OMIT)
3846 vattr.va_atime = ts[0];
3847
3848 if (ts[1].tv_nsec != UTIME_OMIT) {
3849 vattr.va_mtime = ts[1];
3850 if (setbirthtime)
3851 vattr.va_birthtime = ts[1];
3852 }
3853
3854 if (vanull)
3855 vattr.va_vaflags |= VA_UTIMES_NULL;
3856 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3857 VOP_UNLOCK(vp);
3858
3859 if (dorele != 0)
3860 vrele(vp);
3861
3862 return error;
3863 }
3864
3865 int
3866 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
3867 const struct timeval *tptr, enum uio_seg seg)
3868 {
3869 struct timespec ts[2];
3870 struct timespec *tsptr = NULL;
3871 int error;
3872
3873 if (tptr != NULL) {
3874 struct timeval tv[2];
3875
3876 if (seg != UIO_SYSSPACE) {
3877 error = copyin(tptr, tv, sizeof (tv));
3878 if (error != 0)
3879 return error;
3880 tptr = tv;
3881 }
3882
3883 if ((tv[0].tv_usec == UTIME_NOW) ||
3884 (tv[0].tv_usec == UTIME_OMIT))
3885 ts[0].tv_nsec = tv[0].tv_usec;
3886 else
3887 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3888
3889 if ((tv[1].tv_usec == UTIME_NOW) ||
3890 (tv[1].tv_usec == UTIME_OMIT))
3891 ts[1].tv_nsec = tv[1].tv_usec;
3892 else
3893 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3894
3895 tsptr = &ts[0];
3896 }
3897
3898 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE);
3899 }
3900
3901 /*
3902 * Truncate a file given its path name.
3903 */
3904 /* ARGSUSED */
3905 int
3906 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3907 {
3908 /* {
3909 syscallarg(const char *) path;
3910 syscallarg(int) pad;
3911 syscallarg(off_t) length;
3912 } */
3913 struct vnode *vp;
3914 struct vattr vattr;
3915 int error;
3916
3917 if (SCARG(uap, length) < 0)
3918 return EINVAL;
3919
3920 error = namei_simple_user(SCARG(uap, path),
3921 NSM_FOLLOW_TRYEMULROOT, &vp);
3922 if (error != 0)
3923 return (error);
3924 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3925 if (vp->v_type == VDIR)
3926 error = EISDIR;
3927 else if ((error = vn_writechk(vp)) == 0 &&
3928 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3929 vattr_null(&vattr);
3930 vattr.va_size = SCARG(uap, length);
3931 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3932 }
3933 vput(vp);
3934 return (error);
3935 }
3936
3937 /*
3938 * Truncate a file given a file descriptor.
3939 */
3940 /* ARGSUSED */
3941 int
3942 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3943 {
3944 /* {
3945 syscallarg(int) fd;
3946 syscallarg(int) pad;
3947 syscallarg(off_t) length;
3948 } */
3949 struct vattr vattr;
3950 struct vnode *vp;
3951 file_t *fp;
3952 int error;
3953
3954 if (SCARG(uap, length) < 0)
3955 return EINVAL;
3956
3957 /* fd_getvnode() will use the descriptor for us */
3958 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3959 return (error);
3960 if ((fp->f_flag & FWRITE) == 0) {
3961 error = EINVAL;
3962 goto out;
3963 }
3964 vp = fp->f_vnode;
3965 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3966 if (vp->v_type == VDIR)
3967 error = EISDIR;
3968 else if ((error = vn_writechk(vp)) == 0) {
3969 vattr_null(&vattr);
3970 vattr.va_size = SCARG(uap, length);
3971 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3972 }
3973 VOP_UNLOCK(vp);
3974 out:
3975 fd_putfile(SCARG(uap, fd));
3976 return (error);
3977 }
3978
3979 /*
3980 * Sync an open file.
3981 */
3982 /* ARGSUSED */
3983 int
3984 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3985 {
3986 /* {
3987 syscallarg(int) fd;
3988 } */
3989 struct vnode *vp;
3990 file_t *fp;
3991 int error;
3992
3993 /* fd_getvnode() will use the descriptor for us */
3994 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3995 return (error);
3996 vp = fp->f_vnode;
3997 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3998 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
3999 VOP_UNLOCK(vp);
4000 fd_putfile(SCARG(uap, fd));
4001 return (error);
4002 }
4003
4004 /*
4005 * Sync a range of file data. API modeled after that found in AIX.
4006 *
4007 * FDATASYNC indicates that we need only save enough metadata to be able
4008 * to re-read the written data. Note we duplicate AIX's requirement that
4009 * the file be open for writing.
4010 */
4011 /* ARGSUSED */
4012 int
4013 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
4014 {
4015 /* {
4016 syscallarg(int) fd;
4017 syscallarg(int) flags;
4018 syscallarg(off_t) start;
4019 syscallarg(off_t) length;
4020 } */
4021 struct vnode *vp;
4022 file_t *fp;
4023 int flags, nflags;
4024 off_t s, e, len;
4025 int error;
4026
4027 /* fd_getvnode() will use the descriptor for us */
4028 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4029 return (error);
4030
4031 if ((fp->f_flag & FWRITE) == 0) {
4032 error = EBADF;
4033 goto out;
4034 }
4035
4036 flags = SCARG(uap, flags);
4037 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
4038 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
4039 error = EINVAL;
4040 goto out;
4041 }
4042 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
4043 if (flags & FDATASYNC)
4044 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
4045 else
4046 nflags = FSYNC_WAIT;
4047 if (flags & FDISKSYNC)
4048 nflags |= FSYNC_CACHE;
4049
4050 len = SCARG(uap, length);
4051 /* If length == 0, we do the whole file, and s = e = 0 will do that */
4052 if (len) {
4053 s = SCARG(uap, start);
4054 e = s + len;
4055 if (e < s) {
4056 error = EINVAL;
4057 goto out;
4058 }
4059 } else {
4060 e = 0;
4061 s = 0;
4062 }
4063
4064 vp = fp->f_vnode;
4065 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4066 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
4067 VOP_UNLOCK(vp);
4068 out:
4069 fd_putfile(SCARG(uap, fd));
4070 return (error);
4071 }
4072
4073 /*
4074 * Sync the data of an open file.
4075 */
4076 /* ARGSUSED */
4077 int
4078 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
4079 {
4080 /* {
4081 syscallarg(int) fd;
4082 } */
4083 struct vnode *vp;
4084 file_t *fp;
4085 int error;
4086
4087 /* fd_getvnode() will use the descriptor for us */
4088 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4089 return (error);
4090 if ((fp->f_flag & FWRITE) == 0) {
4091 fd_putfile(SCARG(uap, fd));
4092 return (EBADF);
4093 }
4094 vp = fp->f_vnode;
4095 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4096 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
4097 VOP_UNLOCK(vp);
4098 fd_putfile(SCARG(uap, fd));
4099 return (error);
4100 }
4101
4102 /*
4103 * Rename files, (standard) BSD semantics frontend.
4104 */
4105 /* ARGSUSED */
4106 int
4107 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
4108 {
4109 /* {
4110 syscallarg(const char *) from;
4111 syscallarg(const char *) to;
4112 } */
4113
4114 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4115 SCARG(uap, to), UIO_USERSPACE, 0));
4116 }
4117
4118 int
4119 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap,
4120 register_t *retval)
4121 {
4122 /* {
4123 syscallarg(int) fromfd;
4124 syscallarg(const char *) from;
4125 syscallarg(int) tofd;
4126 syscallarg(const char *) to;
4127 } */
4128
4129 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from),
4130 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0));
4131 }
4132
4133 /*
4134 * Rename files, POSIX semantics frontend.
4135 */
4136 /* ARGSUSED */
4137 int
4138 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
4139 {
4140 /* {
4141 syscallarg(const char *) from;
4142 syscallarg(const char *) to;
4143 } */
4144
4145 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4146 SCARG(uap, to), UIO_USERSPACE, 1));
4147 }
4148
4149 /*
4150 * Rename files. Source and destination must either both be directories,
4151 * or both not be directories. If target is a directory, it must be empty.
4152 * If `from' and `to' refer to the same object, the value of the `retain'
4153 * argument is used to determine whether `from' will be
4154 *
4155 * (retain == 0) deleted unless `from' and `to' refer to the same
4156 * object in the file system's name space (BSD).
4157 * (retain == 1) always retained (POSIX).
4158 *
4159 * XXX Synchronize with nfsrv_rename in nfs_serv.c.
4160 */
4161 int
4162 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
4163 {
4164 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain);
4165 }
4166
4167 static int
4168 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd,
4169 const char *to, enum uio_seg seg, int retain)
4170 {
4171 struct pathbuf *fpb, *tpb;
4172 struct nameidata fnd, tnd;
4173 struct vnode *fdvp, *fvp;
4174 struct vnode *tdvp, *tvp;
4175 struct mount *mp, *tmp;
4176 int error;
4177
4178 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD));
4179
4180 error = pathbuf_maybe_copyin(from, seg, &fpb);
4181 if (error)
4182 goto out0;
4183 KASSERT(fpb != NULL);
4184
4185 error = pathbuf_maybe_copyin(to, seg, &tpb);
4186 if (error)
4187 goto out1;
4188 KASSERT(tpb != NULL);
4189
4190 /*
4191 * Lookup from.
4192 *
4193 * XXX LOCKPARENT is wrong because we don't actually want it
4194 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is
4195 * insane, so for the time being we need to leave it like this.
4196 */
4197 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb);
4198 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0)
4199 goto out2;
4200
4201 /*
4202 * Pull out the important results of the lookup, fdvp and fvp.
4203 * Of course, fvp is bogus because we're about to unlock fdvp.
4204 */
4205 fdvp = fnd.ni_dvp;
4206 fvp = fnd.ni_vp;
4207 KASSERT(fdvp != NULL);
4208 KASSERT(fvp != NULL);
4209 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE));
4210
4211 /*
4212 * Make sure neither fdvp nor fvp is locked.
4213 */
4214 if (fdvp != fvp)
4215 VOP_UNLOCK(fdvp);
4216 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4217 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4218
4219 /*
4220 * Reject renaming `.' and `..'. Can't do this until after
4221 * namei because we need namei's parsing to find the final
4222 * component name. (namei should just leave us with the final
4223 * component name and not look it up itself, but anyway...)
4224 *
4225 * This was here before because we used to relookup from
4226 * instead of to and relookup requires the caller to check
4227 * this, but now file systems may depend on this check, so we
4228 * must retain it until the file systems are all rototilled.
4229 */
4230 if (((fnd.ni_cnd.cn_namelen == 1) &&
4231 (fnd.ni_cnd.cn_nameptr[0] == '.')) ||
4232 ((fnd.ni_cnd.cn_namelen == 2) &&
4233 (fnd.ni_cnd.cn_nameptr[0] == '.') &&
4234 (fnd.ni_cnd.cn_nameptr[1] == '.'))) {
4235 error = EINVAL; /* XXX EISDIR? */
4236 goto abort0;
4237 }
4238
4239 /*
4240 * Lookup to.
4241 *
4242 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using
4243 * fvp here to decide whether to add CREATEDIR is a load of
4244 * bollocks because fvp might be the wrong node by now, since
4245 * fdvp is unlocked.
4246 *
4247 * XXX Why not pass CREATEDIR always?
4248 */
4249 NDINIT(&tnd, RENAME,
4250 (LOCKPARENT | NOCACHE | TRYEMULROOT |
4251 ((fvp->v_type == VDIR)? CREATEDIR : 0)),
4252 tpb);
4253 if ((error = fd_nameiat(l, tofd, &tnd)) != 0)
4254 goto abort0;
4255
4256 /*
4257 * Pull out the important results of the lookup, tdvp and tvp.
4258 * Of course, tvp is bogus because we're about to unlock tdvp.
4259 */
4260 tdvp = tnd.ni_dvp;
4261 tvp = tnd.ni_vp;
4262 KASSERT(tdvp != NULL);
4263 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE));
4264
4265 /*
4266 * Make sure neither tdvp nor tvp is locked.
4267 */
4268 if (tdvp != tvp)
4269 VOP_UNLOCK(tdvp);
4270 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4271 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4272
4273 /*
4274 * Reject renaming onto `.' or `..'. relookup is unhappy with
4275 * these, which is why we must do this here. Once upon a time
4276 * we relooked up from instead of to, and consequently didn't
4277 * need this check, but now that we relookup to instead of
4278 * from, we need this; and we shall need it forever forward
4279 * until the VOP_RENAME protocol changes, because file systems
4280 * will no doubt begin to depend on this check.
4281 */
4282 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) {
4283 error = EISDIR;
4284 goto abort1;
4285 }
4286 if ((tnd.ni_cnd.cn_namelen == 2) &&
4287 (tnd.ni_cnd.cn_nameptr[0] == '.') &&
4288 (tnd.ni_cnd.cn_nameptr[1] == '.')) {
4289 error = EINVAL;
4290 goto abort1;
4291 }
4292
4293 /*
4294 * Get the mount point. If the file system has been unmounted,
4295 * which it may be because we're not holding any vnode locks,
4296 * then v_mount will be NULL. We're not really supposed to
4297 * read v_mount without holding the vnode lock, but since we
4298 * have fdvp referenced, if fdvp->v_mount changes then at worst
4299 * it will be set to NULL, not changed to another mount point.
4300 * And, of course, since it is up to the file system to
4301 * determine the real lock order, we can't lock both fdvp and
4302 * tdvp at the same time.
4303 */
4304 mp = fdvp->v_mount;
4305 if (mp == NULL) {
4306 error = ENOENT;
4307 goto abort1;
4308 }
4309
4310 /*
4311 * Make sure the mount points match. Again, although we don't
4312 * hold any vnode locks, the v_mount fields may change -- but
4313 * at worst they will change to NULL, so this will never become
4314 * a cross-device rename, because we hold vnode references.
4315 *
4316 * XXX Because nothing is locked and the compiler may reorder
4317 * things here, unmounting the file system at an inopportune
4318 * moment may cause rename to fail with EXDEV when it really
4319 * should fail with ENOENT.
4320 */
4321 tmp = tdvp->v_mount;
4322 if (tmp == NULL) {
4323 error = ENOENT;
4324 goto abort1;
4325 }
4326
4327 if (mp != tmp) {
4328 error = EXDEV;
4329 goto abort1;
4330 }
4331
4332 /*
4333 * Take the vfs rename lock to avoid cross-directory screw cases.
4334 * Nothing is locked currently, so taking this lock is safe.
4335 */
4336 error = VFS_RENAMELOCK_ENTER(mp);
4337 if (error)
4338 goto abort1;
4339
4340 /*
4341 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced,
4342 * and nothing is locked except for the vfs rename lock.
4343 *
4344 * The next step is a little rain dance to conform to the
4345 * insane lock protocol, even though it does nothing to ward
4346 * off race conditions.
4347 *
4348 * We need tdvp and tvp to be locked. However, because we have
4349 * unlocked tdvp in order to hold no locks while we take the
4350 * vfs rename lock, tvp may be wrong here, and we can't safely
4351 * lock it even if the sensible file systems will just unlock
4352 * it straight away. Consequently, we must lock tdvp and then
4353 * relookup tvp to get it locked.
4354 *
4355 * Finally, because the VOP_RENAME protocol is brain-damaged
4356 * and various file systems insanely depend on the semantics of
4357 * this brain damage, the lookup of to must be the last lookup
4358 * before VOP_RENAME.
4359 */
4360 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
4361 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0);
4362 if (error)
4363 goto abort2;
4364
4365 /*
4366 * Drop the old tvp and pick up the new one -- which might be
4367 * the same, but that doesn't matter to us. After this, tdvp
4368 * and tvp should both be locked.
4369 */
4370 if (tvp != NULL)
4371 vrele(tvp);
4372 tvp = tnd.ni_vp;
4373 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4374 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4375
4376 /*
4377 * The old do_sys_rename had various consistency checks here
4378 * involving fvp and tvp. fvp is bogus already here, and tvp
4379 * will become bogus soon in any sensible file system, so the
4380 * only purpose in putting these checks here is to give lip
4381 * service to these screw cases and to acknowledge that they
4382 * exist, not actually to handle them, but here you go
4383 * anyway...
4384 */
4385
4386 /*
4387 * Acknowledge that directories and non-directories aren't
4388 * suposed to mix.
4389 */
4390 if (tvp != NULL) {
4391 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) {
4392 error = ENOTDIR;
4393 goto abort3;
4394 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) {
4395 error = EISDIR;
4396 goto abort3;
4397 }
4398 }
4399
4400 /*
4401 * Acknowledge some random screw case, among the dozens that
4402 * might arise.
4403 */
4404 if (fvp == tdvp) {
4405 error = EINVAL;
4406 goto abort3;
4407 }
4408
4409 /*
4410 * Acknowledge that POSIX has a wacky screw case.
4411 *
4412 * XXX Eventually the retain flag needs to be passed on to
4413 * VOP_RENAME.
4414 */
4415 if (fvp == tvp) {
4416 if (retain) {
4417 error = 0;
4418 goto abort3;
4419 } else if ((fdvp == tdvp) &&
4420 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) &&
4421 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr,
4422 fnd.ni_cnd.cn_namelen))) {
4423 error = 0;
4424 goto abort3;
4425 }
4426 }
4427
4428 /*
4429 * Make sure veriexec can screw us up. (But a race can screw
4430 * up veriexec, of course -- remember, fvp and (soon) tvp are
4431 * bogus.)
4432 */
4433 #if NVERIEXEC > 0
4434 {
4435 char *f1, *f2;
4436 size_t f1_len;
4437 size_t f2_len;
4438
4439 f1_len = fnd.ni_cnd.cn_namelen + 1;
4440 f1 = kmem_alloc(f1_len, KM_SLEEP);
4441 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len);
4442
4443 f2_len = tnd.ni_cnd.cn_namelen + 1;
4444 f2 = kmem_alloc(f2_len, KM_SLEEP);
4445 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len);
4446
4447 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2);
4448
4449 kmem_free(f1, f1_len);
4450 kmem_free(f2, f2_len);
4451
4452 if (error)
4453 goto abort3;
4454 }
4455 #endif /* NVERIEXEC > 0 */
4456
4457 /*
4458 * All ready. Incant the rename vop.
4459 */
4460 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4461 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4462 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4463 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4464 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd);
4465
4466 /*
4467 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks
4468 * tdvp and tvp. But we can't assert any of that.
4469 */
4470 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4471 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4472 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4473 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4474
4475 /*
4476 * So all we have left to do is to drop the rename lock and
4477 * destroy the pathbufs.
4478 */
4479 VFS_RENAMELOCK_EXIT(mp);
4480 goto out2;
4481
4482 abort3: if ((tvp != NULL) && (tvp != tdvp))
4483 VOP_UNLOCK(tvp);
4484 abort2: VOP_UNLOCK(tdvp);
4485 VFS_RENAMELOCK_EXIT(mp);
4486 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd);
4487 vrele(tdvp);
4488 if (tvp != NULL)
4489 vrele(tvp);
4490 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd);
4491 vrele(fdvp);
4492 vrele(fvp);
4493 out2: pathbuf_destroy(tpb);
4494 out1: pathbuf_destroy(fpb);
4495 out0: return error;
4496 }
4497
4498 /*
4499 * Make a directory file.
4500 */
4501 /* ARGSUSED */
4502 int
4503 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
4504 {
4505 /* {
4506 syscallarg(const char *) path;
4507 syscallarg(int) mode;
4508 } */
4509
4510 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path),
4511 SCARG(uap, mode), UIO_USERSPACE);
4512 }
4513
4514 int
4515 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap,
4516 register_t *retval)
4517 {
4518 /* {
4519 syscallarg(int) fd;
4520 syscallarg(const char *) path;
4521 syscallarg(int) mode;
4522 } */
4523
4524 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path),
4525 SCARG(uap, mode), UIO_USERSPACE);
4526 }
4527
4528
4529 int
4530 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg)
4531 {
4532 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE);
4533 }
4534
4535 static int
4536 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode,
4537 enum uio_seg seg)
4538 {
4539 struct proc *p = curlwp->l_proc;
4540 struct vnode *vp;
4541 struct vattr vattr;
4542 int error;
4543 struct pathbuf *pb;
4544 struct nameidata nd;
4545
4546 KASSERT(l != NULL || fdat == AT_FDCWD);
4547
4548 /* XXX bollocks, should pass in a pathbuf */
4549 error = pathbuf_maybe_copyin(path, seg, &pb);
4550 if (error) {
4551 return error;
4552 }
4553
4554 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb);
4555
4556 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
4557 pathbuf_destroy(pb);
4558 return (error);
4559 }
4560 vp = nd.ni_vp;
4561 if (vp != NULL) {
4562 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
4563 if (nd.ni_dvp == vp)
4564 vrele(nd.ni_dvp);
4565 else
4566 vput(nd.ni_dvp);
4567 vrele(vp);
4568 pathbuf_destroy(pb);
4569 return (EEXIST);
4570 }
4571 vattr_null(&vattr);
4572 vattr.va_type = VDIR;
4573 /* We will read cwdi->cwdi_cmask unlocked. */
4574 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
4575 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
4576 if (!error)
4577 vrele(nd.ni_vp);
4578 vput(nd.ni_dvp);
4579 pathbuf_destroy(pb);
4580 return (error);
4581 }
4582
4583 /*
4584 * Remove a directory file.
4585 */
4586 /* ARGSUSED */
4587 int
4588 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
4589 {
4590 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path),
4591 AT_REMOVEDIR, UIO_USERSPACE);
4592 }
4593
4594 /*
4595 * Read a block of directory entries in a file system independent format.
4596 */
4597 int
4598 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
4599 {
4600 /* {
4601 syscallarg(int) fd;
4602 syscallarg(char *) buf;
4603 syscallarg(size_t) count;
4604 } */
4605 file_t *fp;
4606 int error, done;
4607
4608 /* fd_getvnode() will use the descriptor for us */
4609 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4610 return (error);
4611 if ((fp->f_flag & FREAD) == 0) {
4612 error = EBADF;
4613 goto out;
4614 }
4615 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
4616 SCARG(uap, count), &done, l, 0, 0);
4617 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
4618 *retval = done;
4619 out:
4620 fd_putfile(SCARG(uap, fd));
4621 return (error);
4622 }
4623
4624 /*
4625 * Set the mode mask for creation of filesystem nodes.
4626 */
4627 int
4628 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
4629 {
4630 /* {
4631 syscallarg(mode_t) newmask;
4632 } */
4633 struct proc *p = l->l_proc;
4634 struct cwdinfo *cwdi;
4635
4636 /*
4637 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
4638 * important is that we serialize changes to the mask. The
4639 * rw_exit() will issue a write memory barrier on our behalf,
4640 * and force the changes out to other CPUs (as it must use an
4641 * atomic operation, draining the local CPU's store buffers).
4642 */
4643 cwdi = p->p_cwdi;
4644 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
4645 *retval = cwdi->cwdi_cmask;
4646 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
4647 rw_exit(&cwdi->cwdi_lock);
4648
4649 return (0);
4650 }
4651
4652 int
4653 dorevoke(struct vnode *vp, kauth_cred_t cred)
4654 {
4655 struct vattr vattr;
4656 int error, fs_decision;
4657
4658 vn_lock(vp, LK_SHARED | LK_RETRY);
4659 error = VOP_GETATTR(vp, &vattr, cred);
4660 VOP_UNLOCK(vp);
4661 if (error != 0)
4662 return error;
4663 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM;
4664 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL,
4665 fs_decision);
4666 if (!error)
4667 VOP_REVOKE(vp, REVOKEALL);
4668 return (error);
4669 }
4670
4671 /*
4672 * Void all references to file by ripping underlying filesystem
4673 * away from vnode.
4674 */
4675 /* ARGSUSED */
4676 int
4677 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
4678 {
4679 /* {
4680 syscallarg(const char *) path;
4681 } */
4682 struct vnode *vp;
4683 int error;
4684
4685 error = namei_simple_user(SCARG(uap, path),
4686 NSM_FOLLOW_TRYEMULROOT, &vp);
4687 if (error != 0)
4688 return (error);
4689 error = dorevoke(vp, l->l_cred);
4690 vrele(vp);
4691 return (error);
4692 }
4693
4694 /*
4695 * Allocate backing store for a file, filling a hole without having to
4696 * explicitly write anything out.
4697 */
4698 /* ARGSUSED */
4699 int
4700 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap,
4701 register_t *retval)
4702 {
4703 /* {
4704 syscallarg(int) fd;
4705 syscallarg(off_t) pos;
4706 syscallarg(off_t) len;
4707 } */
4708 int fd;
4709 off_t pos, len;
4710 struct file *fp;
4711 struct vnode *vp;
4712 int error;
4713
4714 fd = SCARG(uap, fd);
4715 pos = SCARG(uap, pos);
4716 len = SCARG(uap, len);
4717
4718 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
4719 *retval = EINVAL;
4720 return 0;
4721 }
4722
4723 error = fd_getvnode(fd, &fp);
4724 if (error) {
4725 *retval = error;
4726 return 0;
4727 }
4728 if ((fp->f_flag & FWRITE) == 0) {
4729 error = EBADF;
4730 goto fail;
4731 }
4732 vp = fp->f_vnode;
4733
4734 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4735 if (vp->v_type == VDIR) {
4736 error = EISDIR;
4737 } else {
4738 error = VOP_FALLOCATE(vp, pos, len);
4739 }
4740 VOP_UNLOCK(vp);
4741
4742 fail:
4743 fd_putfile(fd);
4744 *retval = error;
4745 return 0;
4746 }
4747
4748 /*
4749 * Deallocate backing store for a file, creating a hole. Also used for
4750 * invoking TRIM on disks.
4751 */
4752 /* ARGSUSED */
4753 int
4754 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap,
4755 register_t *retval)
4756 {
4757 /* {
4758 syscallarg(int) fd;
4759 syscallarg(off_t) pos;
4760 syscallarg(off_t) len;
4761 } */
4762 int fd;
4763 off_t pos, len;
4764 struct file *fp;
4765 struct vnode *vp;
4766 int error;
4767
4768 fd = SCARG(uap, fd);
4769 pos = SCARG(uap, pos);
4770 len = SCARG(uap, len);
4771
4772 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
4773 return EINVAL;
4774 }
4775
4776 error = fd_getvnode(fd, &fp);
4777 if (error) {
4778 return error;
4779 }
4780 if ((fp->f_flag & FWRITE) == 0) {
4781 error = EBADF;
4782 goto fail;
4783 }
4784 vp = fp->f_vnode;
4785
4786 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4787 if (vp->v_type == VDIR) {
4788 error = EISDIR;
4789 } else {
4790 error = VOP_FDISCARD(vp, pos, len);
4791 }
4792 VOP_UNLOCK(vp);
4793
4794 fail:
4795 fd_putfile(fd);
4796 return error;
4797 }
4798