vfs_syscalls.c revision 1.492.2.3 1 /* $NetBSD: vfs_syscalls.c,v 1.492.2.3 2015/09/22 12:06:07 skrll Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
66 */
67
68 /*
69 * Virtual File System System Calls
70 */
71
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.492.2.3 2015/09/22 12:06:07 skrll Exp $");
74
75 #ifdef _KERNEL_OPT
76 #include "opt_fileassoc.h"
77 #include "veriexec.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/namei.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/file.h>
86 #include <sys/fcntl.h>
87 #include <sys/stat.h>
88 #include <sys/vnode.h>
89 #include <sys/mount.h>
90 #include <sys/proc.h>
91 #include <sys/uio.h>
92 #include <sys/kmem.h>
93 #include <sys/dirent.h>
94 #include <sys/sysctl.h>
95 #include <sys/syscallargs.h>
96 #include <sys/vfs_syscalls.h>
97 #include <sys/quota.h>
98 #include <sys/quotactl.h>
99 #include <sys/ktrace.h>
100 #ifdef FILEASSOC
101 #include <sys/fileassoc.h>
102 #endif /* FILEASSOC */
103 #include <sys/extattr.h>
104 #include <sys/verified_exec.h>
105 #include <sys/kauth.h>
106 #include <sys/atomic.h>
107 #include <sys/module.h>
108 #include <sys/buf.h>
109
110 #include <miscfs/genfs/genfs.h>
111 #include <miscfs/specfs/specdev.h>
112
113 #include <nfs/rpcv2.h>
114 #include <nfs/nfsproto.h>
115 #include <nfs/nfs.h>
116 #include <nfs/nfs_var.h>
117
118 /* XXX this shouldn't be here */
119 #ifndef OFF_T_MAX
120 #define OFF_T_MAX __type_max(off_t)
121 #endif
122
123 static int change_flags(struct vnode *, u_long, struct lwp *);
124 static int change_mode(struct vnode *, int, struct lwp *);
125 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
126 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *);
127 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t,
128 enum uio_seg);
129 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t);
130 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *,
131 enum uio_seg);
132 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *,
133 enum uio_seg, int);
134 static int do_sys_readlinkat(struct lwp *, int, const char *, char *,
135 size_t, register_t *);
136 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg);
137
138 static int fd_nameiat(struct lwp *, int, struct nameidata *);
139 static int fd_nameiat_simple_user(struct lwp *, int, const char *,
140 namei_simple_flags_t, struct vnode **);
141
142
143 /*
144 * This table is used to maintain compatibility with 4.3BSD
145 * and NetBSD 0.9 mount syscalls - and possibly other systems.
146 * Note, the order is important!
147 *
148 * Do not modify this table. It should only contain filesystems
149 * supported by NetBSD 0.9 and 4.3BSD.
150 */
151 const char * const mountcompatnames[] = {
152 NULL, /* 0 = MOUNT_NONE */
153 MOUNT_FFS, /* 1 = MOUNT_UFS */
154 MOUNT_NFS, /* 2 */
155 MOUNT_MFS, /* 3 */
156 MOUNT_MSDOS, /* 4 */
157 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
158 MOUNT_FDESC, /* 6 */
159 MOUNT_KERNFS, /* 7 */
160 NULL, /* 8 = MOUNT_DEVFS */
161 MOUNT_AFS, /* 9 */
162 };
163
164 const int nmountcompatnames = __arraycount(mountcompatnames);
165
166 static int
167 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp)
168 {
169 file_t *dfp;
170 int error;
171
172 if (fdat != AT_FDCWD) {
173 if ((error = fd_getvnode(fdat, &dfp)) != 0)
174 goto out;
175
176 NDAT(ndp, dfp->f_vnode);
177 }
178
179 error = namei(ndp);
180
181 if (fdat != AT_FDCWD)
182 fd_putfile(fdat);
183 out:
184 return error;
185 }
186
187 static int
188 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path,
189 namei_simple_flags_t sflags, struct vnode **vp_ret)
190 {
191 file_t *dfp;
192 struct vnode *dvp;
193 int error;
194
195 if (fdat != AT_FDCWD) {
196 if ((error = fd_getvnode(fdat, &dfp)) != 0)
197 goto out;
198
199 dvp = dfp->f_vnode;
200 } else {
201 dvp = NULL;
202 }
203
204 error = nameiat_simple_user(dvp, path, sflags, vp_ret);
205
206 if (fdat != AT_FDCWD)
207 fd_putfile(fdat);
208 out:
209 return error;
210 }
211
212 static int
213 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags)
214 {
215 int error;
216
217 fp->f_flag = flags & FMASK;
218 fp->f_type = DTYPE_VNODE;
219 fp->f_ops = &vnops;
220 fp->f_vnode = vp;
221
222 if (flags & (O_EXLOCK | O_SHLOCK)) {
223 struct flock lf;
224 int type;
225
226 lf.l_whence = SEEK_SET;
227 lf.l_start = 0;
228 lf.l_len = 0;
229 if (flags & O_EXLOCK)
230 lf.l_type = F_WRLCK;
231 else
232 lf.l_type = F_RDLCK;
233 type = F_FLOCK;
234 if ((flags & FNONBLOCK) == 0)
235 type |= F_WAIT;
236 VOP_UNLOCK(vp);
237 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
238 if (error) {
239 (void) vn_close(vp, fp->f_flag, fp->f_cred);
240 fd_abort(l->l_proc, fp, indx);
241 return error;
242 }
243 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
244 atomic_or_uint(&fp->f_flag, FHASLOCK);
245 }
246 if (flags & O_CLOEXEC)
247 fd_set_exclose(l, indx, true);
248 return 0;
249 }
250
251 static int
252 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
253 void *data, size_t *data_len)
254 {
255 struct mount *mp;
256 int error = 0, saved_flags;
257
258 mp = vp->v_mount;
259 saved_flags = mp->mnt_flag;
260
261 /* We can operate only on VV_ROOT nodes. */
262 if ((vp->v_vflag & VV_ROOT) == 0) {
263 error = EINVAL;
264 goto out;
265 }
266
267 /*
268 * We only allow the filesystem to be reloaded if it
269 * is currently mounted read-only. Additionally, we
270 * prevent read-write to read-only downgrades.
271 */
272 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
273 (mp->mnt_flag & MNT_RDONLY) == 0 &&
274 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) {
275 error = EOPNOTSUPP; /* Needs translation */
276 goto out;
277 }
278
279 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
280 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
281 if (error)
282 goto out;
283
284 if (vfs_busy(mp, NULL)) {
285 error = EPERM;
286 goto out;
287 }
288
289 mutex_enter(&mp->mnt_updating);
290
291 mp->mnt_flag &= ~MNT_OP_FLAGS;
292 mp->mnt_flag |= flags & MNT_OP_FLAGS;
293
294 /*
295 * Set the mount level flags.
296 */
297 if (flags & MNT_RDONLY)
298 mp->mnt_flag |= MNT_RDONLY;
299 else if (mp->mnt_flag & MNT_RDONLY)
300 mp->mnt_iflag |= IMNT_WANTRDWR;
301 mp->mnt_flag &= ~MNT_BASIC_FLAGS;
302 mp->mnt_flag |= flags & MNT_BASIC_FLAGS;
303 error = VFS_MOUNT(mp, path, data, data_len);
304
305 if (error && data != NULL) {
306 int error2;
307
308 /*
309 * Update failed; let's try and see if it was an
310 * export request. For compat with 3.0 and earlier.
311 */
312 error2 = vfs_hooks_reexport(mp, path, data);
313
314 /*
315 * Only update error code if the export request was
316 * understood but some problem occurred while
317 * processing it.
318 */
319 if (error2 != EJUSTRETURN)
320 error = error2;
321 }
322
323 if (mp->mnt_iflag & IMNT_WANTRDWR)
324 mp->mnt_flag &= ~MNT_RDONLY;
325 if (error)
326 mp->mnt_flag = saved_flags;
327 mp->mnt_flag &= ~MNT_OP_FLAGS;
328 mp->mnt_iflag &= ~IMNT_WANTRDWR;
329 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
330 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0)
331 vfs_syncer_add_to_worklist(mp);
332 } else {
333 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0)
334 vfs_syncer_remove_from_worklist(mp);
335 }
336 mutex_exit(&mp->mnt_updating);
337 vfs_unbusy(mp, false, NULL);
338
339 if ((error == 0) && !(saved_flags & MNT_EXTATTR) &&
340 (flags & MNT_EXTATTR)) {
341 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START,
342 NULL, 0, NULL) != 0) {
343 printf("%s: failed to start extattr, error = %d",
344 mp->mnt_stat.f_mntonname, error);
345 mp->mnt_flag &= ~MNT_EXTATTR;
346 }
347 }
348
349 if ((error == 0) && (saved_flags & MNT_EXTATTR) &&
350 !(flags & MNT_EXTATTR)) {
351 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP,
352 NULL, 0, NULL) != 0) {
353 printf("%s: failed to stop extattr, error = %d",
354 mp->mnt_stat.f_mntonname, error);
355 mp->mnt_flag |= MNT_RDONLY;
356 }
357 }
358 out:
359 return (error);
360 }
361
362 static int
363 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
364 {
365 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
366 int error;
367
368 /* Copy file-system type from userspace. */
369 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
370 if (error) {
371 /*
372 * Historically, filesystem types were identified by numbers.
373 * If we get an integer for the filesystem type instead of a
374 * string, we check to see if it matches one of the historic
375 * filesystem types.
376 */
377 u_long fsindex = (u_long)fstype;
378 if (fsindex >= nmountcompatnames ||
379 mountcompatnames[fsindex] == NULL)
380 return ENODEV;
381 strlcpy(fstypename, mountcompatnames[fsindex],
382 sizeof(fstypename));
383 }
384
385 /* Accept `ufs' as an alias for `ffs', for compatibility. */
386 if (strcmp(fstypename, "ufs") == 0)
387 fstypename[0] = 'f';
388
389 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
390 return 0;
391
392 /* If we can autoload a vfs module, try again */
393 (void)module_autoload(fstypename, MODULE_CLASS_VFS);
394
395 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
396 return 0;
397
398 return ENODEV;
399 }
400
401 static int
402 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
403 void *data, size_t *data_len)
404 {
405 struct mount *mp;
406 int error;
407
408 /* If MNT_GETARGS is specified, it should be the only flag. */
409 if (flags & ~MNT_GETARGS)
410 return EINVAL;
411
412 mp = vp->v_mount;
413
414 /* XXX: probably some notion of "can see" here if we want isolation. */
415 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
416 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
417 if (error)
418 return error;
419
420 if ((vp->v_vflag & VV_ROOT) == 0)
421 return EINVAL;
422
423 if (vfs_busy(mp, NULL))
424 return EPERM;
425
426 mutex_enter(&mp->mnt_updating);
427 mp->mnt_flag &= ~MNT_OP_FLAGS;
428 mp->mnt_flag |= MNT_GETARGS;
429 error = VFS_MOUNT(mp, path, data, data_len);
430 mp->mnt_flag &= ~MNT_OP_FLAGS;
431 mutex_exit(&mp->mnt_updating);
432
433 vfs_unbusy(mp, false, NULL);
434 return (error);
435 }
436
437 int
438 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
439 {
440 /* {
441 syscallarg(const char *) type;
442 syscallarg(const char *) path;
443 syscallarg(int) flags;
444 syscallarg(void *) data;
445 syscallarg(size_t) data_len;
446 } */
447
448 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
449 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
450 SCARG(uap, data_len), retval);
451 }
452
453 int
454 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
455 const char *path, int flags, void *data, enum uio_seg data_seg,
456 size_t data_len, register_t *retval)
457 {
458 struct vnode *vp;
459 void *data_buf = data;
460 bool vfsopsrele = false;
461 size_t alloc_sz = 0;
462 int error;
463
464 /* XXX: The calling convention of this routine is totally bizarre */
465 if (vfsops)
466 vfsopsrele = true;
467
468 /*
469 * Get vnode to be covered
470 */
471 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
472 if (error != 0) {
473 vp = NULL;
474 goto done;
475 }
476
477 if (vfsops == NULL) {
478 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
479 vfsops = vp->v_mount->mnt_op;
480 } else {
481 /* 'type' is userspace */
482 error = mount_get_vfsops(type, &vfsops);
483 if (error != 0)
484 goto done;
485 vfsopsrele = true;
486 }
487 }
488
489 /*
490 * We allow data to be NULL, even for userspace. Some fs's don't need
491 * it. The others will handle NULL.
492 */
493 if (data != NULL && data_seg == UIO_USERSPACE) {
494 if (data_len == 0) {
495 /* No length supplied, use default for filesystem */
496 data_len = vfsops->vfs_min_mount_data;
497
498 /*
499 * Hopefully a longer buffer won't make copyin() fail.
500 * For compatibility with 3.0 and earlier.
501 */
502 if (flags & MNT_UPDATE
503 && data_len < sizeof (struct mnt_export_args30))
504 data_len = sizeof (struct mnt_export_args30);
505 }
506 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) {
507 error = EINVAL;
508 goto done;
509 }
510 alloc_sz = data_len;
511 data_buf = kmem_alloc(alloc_sz, KM_SLEEP);
512
513 /* NFS needs the buffer even for mnt_getargs .... */
514 error = copyin(data, data_buf, data_len);
515 if (error != 0)
516 goto done;
517 }
518
519 if (flags & MNT_GETARGS) {
520 if (data_len == 0) {
521 error = EINVAL;
522 goto done;
523 }
524 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
525 if (error != 0)
526 goto done;
527 if (data_seg == UIO_USERSPACE)
528 error = copyout(data_buf, data, data_len);
529 *retval = data_len;
530 } else if (flags & MNT_UPDATE) {
531 error = mount_update(l, vp, path, flags, data_buf, &data_len);
532 } else {
533 /* Locking is handled internally in mount_domount(). */
534 KASSERT(vfsopsrele == true);
535 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
536 &data_len);
537 vfsopsrele = false;
538 }
539
540 done:
541 if (vfsopsrele)
542 vfs_delref(vfsops);
543 if (vp != NULL) {
544 vrele(vp);
545 }
546 if (data_buf != data)
547 kmem_free(data_buf, alloc_sz);
548 return (error);
549 }
550
551 /*
552 * Unmount a file system.
553 *
554 * Note: unmount takes a path to the vnode mounted on as argument,
555 * not special file (as before).
556 */
557 /* ARGSUSED */
558 int
559 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
560 {
561 /* {
562 syscallarg(const char *) path;
563 syscallarg(int) flags;
564 } */
565 struct vnode *vp;
566 struct mount *mp;
567 int error;
568 struct pathbuf *pb;
569 struct nameidata nd;
570
571 error = pathbuf_copyin(SCARG(uap, path), &pb);
572 if (error) {
573 return error;
574 }
575
576 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
577 if ((error = namei(&nd)) != 0) {
578 pathbuf_destroy(pb);
579 return error;
580 }
581 vp = nd.ni_vp;
582 pathbuf_destroy(pb);
583
584 mp = vp->v_mount;
585 atomic_inc_uint(&mp->mnt_refcnt);
586 VOP_UNLOCK(vp);
587
588 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
589 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
590 if (error) {
591 vrele(vp);
592 vfs_destroy(mp);
593 return (error);
594 }
595
596 /*
597 * Don't allow unmounting the root file system.
598 */
599 if (mp->mnt_flag & MNT_ROOTFS) {
600 vrele(vp);
601 vfs_destroy(mp);
602 return (EINVAL);
603 }
604
605 /*
606 * Must be the root of the filesystem
607 */
608 if ((vp->v_vflag & VV_ROOT) == 0) {
609 vrele(vp);
610 vfs_destroy(mp);
611 return (EINVAL);
612 }
613
614 vrele(vp);
615 error = dounmount(mp, SCARG(uap, flags), l);
616 vfs_destroy(mp);
617 return error;
618 }
619
620 /*
621 * Sync each mounted filesystem.
622 */
623 #ifdef DEBUG
624 int syncprt = 0;
625 struct ctldebug debug0 = { "syncprt", &syncprt };
626 #endif
627
628 void
629 do_sys_sync(struct lwp *l)
630 {
631 struct mount *mp, *nmp;
632 int asyncflag;
633
634 mutex_enter(&mountlist_lock);
635 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
636 if (vfs_busy(mp, &nmp)) {
637 continue;
638 }
639 mutex_enter(&mp->mnt_updating);
640 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
641 asyncflag = mp->mnt_flag & MNT_ASYNC;
642 mp->mnt_flag &= ~MNT_ASYNC;
643 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
644 if (asyncflag)
645 mp->mnt_flag |= MNT_ASYNC;
646 }
647 mutex_exit(&mp->mnt_updating);
648 vfs_unbusy(mp, false, &nmp);
649 }
650 mutex_exit(&mountlist_lock);
651 #ifdef DEBUG
652 if (syncprt)
653 vfs_bufstats();
654 #endif /* DEBUG */
655 }
656
657 /* ARGSUSED */
658 int
659 sys_sync(struct lwp *l, const void *v, register_t *retval)
660 {
661 do_sys_sync(l);
662 return (0);
663 }
664
665
666 /*
667 * Access or change filesystem quotas.
668 *
669 * (this is really 14 different calls bundled into one)
670 */
671
672 static int
673 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u)
674 {
675 struct quotastat info_k;
676 int error;
677
678 /* ensure any padding bytes are cleared */
679 memset(&info_k, 0, sizeof(info_k));
680
681 error = vfs_quotactl_stat(mp, &info_k);
682 if (error) {
683 return error;
684 }
685
686 return copyout(&info_k, info_u, sizeof(info_k));
687 }
688
689 static int
690 do_sys_quotactl_idtypestat(struct mount *mp, int idtype,
691 struct quotaidtypestat *info_u)
692 {
693 struct quotaidtypestat info_k;
694 int error;
695
696 /* ensure any padding bytes are cleared */
697 memset(&info_k, 0, sizeof(info_k));
698
699 error = vfs_quotactl_idtypestat(mp, idtype, &info_k);
700 if (error) {
701 return error;
702 }
703
704 return copyout(&info_k, info_u, sizeof(info_k));
705 }
706
707 static int
708 do_sys_quotactl_objtypestat(struct mount *mp, int objtype,
709 struct quotaobjtypestat *info_u)
710 {
711 struct quotaobjtypestat info_k;
712 int error;
713
714 /* ensure any padding bytes are cleared */
715 memset(&info_k, 0, sizeof(info_k));
716
717 error = vfs_quotactl_objtypestat(mp, objtype, &info_k);
718 if (error) {
719 return error;
720 }
721
722 return copyout(&info_k, info_u, sizeof(info_k));
723 }
724
725 static int
726 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u,
727 struct quotaval *val_u)
728 {
729 struct quotakey key_k;
730 struct quotaval val_k;
731 int error;
732
733 /* ensure any padding bytes are cleared */
734 memset(&val_k, 0, sizeof(val_k));
735
736 error = copyin(key_u, &key_k, sizeof(key_k));
737 if (error) {
738 return error;
739 }
740
741 error = vfs_quotactl_get(mp, &key_k, &val_k);
742 if (error) {
743 return error;
744 }
745
746 return copyout(&val_k, val_u, sizeof(val_k));
747 }
748
749 static int
750 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u,
751 const struct quotaval *val_u)
752 {
753 struct quotakey key_k;
754 struct quotaval val_k;
755 int error;
756
757 error = copyin(key_u, &key_k, sizeof(key_k));
758 if (error) {
759 return error;
760 }
761
762 error = copyin(val_u, &val_k, sizeof(val_k));
763 if (error) {
764 return error;
765 }
766
767 return vfs_quotactl_put(mp, &key_k, &val_k);
768 }
769
770 static int
771 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u)
772 {
773 struct quotakey key_k;
774 int error;
775
776 error = copyin(key_u, &key_k, sizeof(key_k));
777 if (error) {
778 return error;
779 }
780
781 return vfs_quotactl_del(mp, &key_k);
782 }
783
784 static int
785 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u)
786 {
787 struct quotakcursor cursor_k;
788 int error;
789
790 /* ensure any padding bytes are cleared */
791 memset(&cursor_k, 0, sizeof(cursor_k));
792
793 error = vfs_quotactl_cursoropen(mp, &cursor_k);
794 if (error) {
795 return error;
796 }
797
798 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
799 }
800
801 static int
802 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u)
803 {
804 struct quotakcursor cursor_k;
805 int error;
806
807 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
808 if (error) {
809 return error;
810 }
811
812 return vfs_quotactl_cursorclose(mp, &cursor_k);
813 }
814
815 static int
816 do_sys_quotactl_cursorskipidtype(struct mount *mp,
817 struct quotakcursor *cursor_u, int idtype)
818 {
819 struct quotakcursor cursor_k;
820 int error;
821
822 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
823 if (error) {
824 return error;
825 }
826
827 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype);
828 if (error) {
829 return error;
830 }
831
832 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
833 }
834
835 static int
836 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u,
837 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum,
838 unsigned *ret_u)
839 {
840 #define CGET_STACK_MAX 8
841 struct quotakcursor cursor_k;
842 struct quotakey stackkeys[CGET_STACK_MAX];
843 struct quotaval stackvals[CGET_STACK_MAX];
844 struct quotakey *keys_k;
845 struct quotaval *vals_k;
846 unsigned ret_k;
847 int error;
848
849 if (maxnum > 128) {
850 maxnum = 128;
851 }
852
853 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
854 if (error) {
855 return error;
856 }
857
858 if (maxnum <= CGET_STACK_MAX) {
859 keys_k = stackkeys;
860 vals_k = stackvals;
861 /* ensure any padding bytes are cleared */
862 memset(keys_k, 0, maxnum * sizeof(keys_k[0]));
863 memset(vals_k, 0, maxnum * sizeof(vals_k[0]));
864 } else {
865 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP);
866 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP);
867 }
868
869 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum,
870 &ret_k);
871 if (error) {
872 goto fail;
873 }
874
875 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0]));
876 if (error) {
877 goto fail;
878 }
879
880 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0]));
881 if (error) {
882 goto fail;
883 }
884
885 error = copyout(&ret_k, ret_u, sizeof(ret_k));
886 if (error) {
887 goto fail;
888 }
889
890 /* do last to maximize the chance of being able to recover a failure */
891 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k));
892
893 fail:
894 if (keys_k != stackkeys) {
895 kmem_free(keys_k, maxnum * sizeof(keys_k[0]));
896 }
897 if (vals_k != stackvals) {
898 kmem_free(vals_k, maxnum * sizeof(vals_k[0]));
899 }
900 return error;
901 }
902
903 static int
904 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u,
905 int *ret_u)
906 {
907 struct quotakcursor cursor_k;
908 int ret_k;
909 int error;
910
911 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
912 if (error) {
913 return error;
914 }
915
916 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k);
917 if (error) {
918 return error;
919 }
920
921 error = copyout(&ret_k, ret_u, sizeof(ret_k));
922 if (error) {
923 return error;
924 }
925
926 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
927 }
928
929 static int
930 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u)
931 {
932 struct quotakcursor cursor_k;
933 int error;
934
935 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
936 if (error) {
937 return error;
938 }
939
940 error = vfs_quotactl_cursorrewind(mp, &cursor_k);
941 if (error) {
942 return error;
943 }
944
945 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
946 }
947
948 static int
949 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u)
950 {
951 char *path_k;
952 int error;
953
954 /* XXX this should probably be a struct pathbuf */
955 path_k = PNBUF_GET();
956 error = copyin(path_u, path_k, PATH_MAX);
957 if (error) {
958 PNBUF_PUT(path_k);
959 return error;
960 }
961
962 error = vfs_quotactl_quotaon(mp, idtype, path_k);
963
964 PNBUF_PUT(path_k);
965 return error;
966 }
967
968 static int
969 do_sys_quotactl_quotaoff(struct mount *mp, int idtype)
970 {
971 return vfs_quotactl_quotaoff(mp, idtype);
972 }
973
974 int
975 do_sys_quotactl(const char *path_u, const struct quotactl_args *args)
976 {
977 struct mount *mp;
978 struct vnode *vp;
979 int error;
980
981 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp);
982 if (error != 0)
983 return (error);
984 mp = vp->v_mount;
985
986 switch (args->qc_op) {
987 case QUOTACTL_STAT:
988 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info);
989 break;
990 case QUOTACTL_IDTYPESTAT:
991 error = do_sys_quotactl_idtypestat(mp,
992 args->u.idtypestat.qc_idtype,
993 args->u.idtypestat.qc_info);
994 break;
995 case QUOTACTL_OBJTYPESTAT:
996 error = do_sys_quotactl_objtypestat(mp,
997 args->u.objtypestat.qc_objtype,
998 args->u.objtypestat.qc_info);
999 break;
1000 case QUOTACTL_GET:
1001 error = do_sys_quotactl_get(mp,
1002 args->u.get.qc_key,
1003 args->u.get.qc_val);
1004 break;
1005 case QUOTACTL_PUT:
1006 error = do_sys_quotactl_put(mp,
1007 args->u.put.qc_key,
1008 args->u.put.qc_val);
1009 break;
1010 case QUOTACTL_DEL:
1011 error = do_sys_quotactl_del(mp, args->u.del.qc_key);
1012 break;
1013 case QUOTACTL_CURSOROPEN:
1014 error = do_sys_quotactl_cursoropen(mp,
1015 args->u.cursoropen.qc_cursor);
1016 break;
1017 case QUOTACTL_CURSORCLOSE:
1018 error = do_sys_quotactl_cursorclose(mp,
1019 args->u.cursorclose.qc_cursor);
1020 break;
1021 case QUOTACTL_CURSORSKIPIDTYPE:
1022 error = do_sys_quotactl_cursorskipidtype(mp,
1023 args->u.cursorskipidtype.qc_cursor,
1024 args->u.cursorskipidtype.qc_idtype);
1025 break;
1026 case QUOTACTL_CURSORGET:
1027 error = do_sys_quotactl_cursorget(mp,
1028 args->u.cursorget.qc_cursor,
1029 args->u.cursorget.qc_keys,
1030 args->u.cursorget.qc_vals,
1031 args->u.cursorget.qc_maxnum,
1032 args->u.cursorget.qc_ret);
1033 break;
1034 case QUOTACTL_CURSORATEND:
1035 error = do_sys_quotactl_cursoratend(mp,
1036 args->u.cursoratend.qc_cursor,
1037 args->u.cursoratend.qc_ret);
1038 break;
1039 case QUOTACTL_CURSORREWIND:
1040 error = do_sys_quotactl_cursorrewind(mp,
1041 args->u.cursorrewind.qc_cursor);
1042 break;
1043 case QUOTACTL_QUOTAON:
1044 error = do_sys_quotactl_quotaon(mp,
1045 args->u.quotaon.qc_idtype,
1046 args->u.quotaon.qc_quotafile);
1047 break;
1048 case QUOTACTL_QUOTAOFF:
1049 error = do_sys_quotactl_quotaoff(mp,
1050 args->u.quotaoff.qc_idtype);
1051 break;
1052 default:
1053 error = EINVAL;
1054 break;
1055 }
1056
1057 vrele(vp);
1058 return error;
1059 }
1060
1061 /* ARGSUSED */
1062 int
1063 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap,
1064 register_t *retval)
1065 {
1066 /* {
1067 syscallarg(const char *) path;
1068 syscallarg(struct quotactl_args *) args;
1069 } */
1070 struct quotactl_args args;
1071 int error;
1072
1073 error = copyin(SCARG(uap, args), &args, sizeof(args));
1074 if (error) {
1075 return error;
1076 }
1077
1078 return do_sys_quotactl(SCARG(uap, path), &args);
1079 }
1080
1081 int
1082 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
1083 int root)
1084 {
1085 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1086 int error = 0;
1087
1088 /*
1089 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1090 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
1091 * overrides MNT_NOWAIT.
1092 */
1093 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
1094 (flags != MNT_WAIT && flags != 0)) {
1095 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
1096 goto done;
1097 }
1098
1099 /* Get the filesystem stats now */
1100 memset(sp, 0, sizeof(*sp));
1101 if ((error = VFS_STATVFS(mp, sp)) != 0) {
1102 return error;
1103 }
1104
1105 if (cwdi->cwdi_rdir == NULL)
1106 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
1107 done:
1108 if (cwdi->cwdi_rdir != NULL) {
1109 size_t len;
1110 char *bp;
1111 char c;
1112 char *path = PNBUF_GET();
1113
1114 bp = path + MAXPATHLEN;
1115 *--bp = '\0';
1116 rw_enter(&cwdi->cwdi_lock, RW_READER);
1117 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
1118 MAXPATHLEN / 2, 0, l);
1119 rw_exit(&cwdi->cwdi_lock);
1120 if (error) {
1121 PNBUF_PUT(path);
1122 return error;
1123 }
1124 len = strlen(bp);
1125 if (len != 1) {
1126 /*
1127 * for mount points that are below our root, we can see
1128 * them, so we fix up the pathname and return them. The
1129 * rest we cannot see, so we don't allow viewing the
1130 * data.
1131 */
1132 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
1133 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
1134 (void)strlcpy(sp->f_mntonname,
1135 c == '\0' ? "/" : &sp->f_mntonname[len],
1136 sizeof(sp->f_mntonname));
1137 } else {
1138 if (root)
1139 (void)strlcpy(sp->f_mntonname, "/",
1140 sizeof(sp->f_mntonname));
1141 else
1142 error = EPERM;
1143 }
1144 }
1145 PNBUF_PUT(path);
1146 }
1147 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
1148 return error;
1149 }
1150
1151 /*
1152 * Get filesystem statistics by path.
1153 */
1154 int
1155 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
1156 {
1157 struct mount *mp;
1158 int error;
1159 struct vnode *vp;
1160
1161 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
1162 if (error != 0)
1163 return error;
1164 mp = vp->v_mount;
1165 error = dostatvfs(mp, sb, l, flags, 1);
1166 vrele(vp);
1167 return error;
1168 }
1169
1170 /* ARGSUSED */
1171 int
1172 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
1173 {
1174 /* {
1175 syscallarg(const char *) path;
1176 syscallarg(struct statvfs *) buf;
1177 syscallarg(int) flags;
1178 } */
1179 struct statvfs *sb;
1180 int error;
1181
1182 sb = STATVFSBUF_GET();
1183 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
1184 if (error == 0)
1185 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1186 STATVFSBUF_PUT(sb);
1187 return error;
1188 }
1189
1190 /*
1191 * Get filesystem statistics by fd.
1192 */
1193 int
1194 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
1195 {
1196 file_t *fp;
1197 struct mount *mp;
1198 int error;
1199
1200 /* fd_getvnode() will use the descriptor for us */
1201 if ((error = fd_getvnode(fd, &fp)) != 0)
1202 return (error);
1203 mp = fp->f_vnode->v_mount;
1204 error = dostatvfs(mp, sb, curlwp, flags, 1);
1205 fd_putfile(fd);
1206 return error;
1207 }
1208
1209 /* ARGSUSED */
1210 int
1211 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
1212 {
1213 /* {
1214 syscallarg(int) fd;
1215 syscallarg(struct statvfs *) buf;
1216 syscallarg(int) flags;
1217 } */
1218 struct statvfs *sb;
1219 int error;
1220
1221 sb = STATVFSBUF_GET();
1222 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1223 if (error == 0)
1224 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1225 STATVFSBUF_PUT(sb);
1226 return error;
1227 }
1228
1229
1230 /*
1231 * Get statistics on all filesystems.
1232 */
1233 int
1234 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1235 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1236 register_t *retval)
1237 {
1238 int root = 0;
1239 struct proc *p = l->l_proc;
1240 struct mount *mp, *nmp;
1241 struct statvfs *sb;
1242 size_t count, maxcount;
1243 int error = 0;
1244
1245 sb = STATVFSBUF_GET();
1246 maxcount = bufsize / entry_sz;
1247 mutex_enter(&mountlist_lock);
1248 count = 0;
1249 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1250 if (vfs_busy(mp, &nmp)) {
1251 continue;
1252 }
1253 if (sfsp && count < maxcount) {
1254 error = dostatvfs(mp, sb, l, flags, 0);
1255 if (error) {
1256 vfs_unbusy(mp, false, &nmp);
1257 error = 0;
1258 continue;
1259 }
1260 error = copyfn(sb, sfsp, entry_sz);
1261 if (error) {
1262 vfs_unbusy(mp, false, NULL);
1263 goto out;
1264 }
1265 sfsp = (char *)sfsp + entry_sz;
1266 root |= strcmp(sb->f_mntonname, "/") == 0;
1267 }
1268 count++;
1269 vfs_unbusy(mp, false, &nmp);
1270 }
1271 mutex_exit(&mountlist_lock);
1272
1273 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1274 /*
1275 * fake a root entry
1276 */
1277 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1278 sb, l, flags, 1);
1279 if (error != 0)
1280 goto out;
1281 if (sfsp) {
1282 error = copyfn(sb, sfsp, entry_sz);
1283 if (error != 0)
1284 goto out;
1285 }
1286 count++;
1287 }
1288 if (sfsp && count > maxcount)
1289 *retval = maxcount;
1290 else
1291 *retval = count;
1292 out:
1293 STATVFSBUF_PUT(sb);
1294 return error;
1295 }
1296
1297 int
1298 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1299 {
1300 /* {
1301 syscallarg(struct statvfs *) buf;
1302 syscallarg(size_t) bufsize;
1303 syscallarg(int) flags;
1304 } */
1305
1306 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1307 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1308 }
1309
1310 /*
1311 * Change current working directory to a given file descriptor.
1312 */
1313 /* ARGSUSED */
1314 int
1315 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1316 {
1317 /* {
1318 syscallarg(int) fd;
1319 } */
1320 struct proc *p = l->l_proc;
1321 struct cwdinfo *cwdi;
1322 struct vnode *vp, *tdp;
1323 struct mount *mp;
1324 file_t *fp;
1325 int error, fd;
1326
1327 /* fd_getvnode() will use the descriptor for us */
1328 fd = SCARG(uap, fd);
1329 if ((error = fd_getvnode(fd, &fp)) != 0)
1330 return (error);
1331 vp = fp->f_vnode;
1332
1333 vref(vp);
1334 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1335 if (vp->v_type != VDIR)
1336 error = ENOTDIR;
1337 else
1338 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1339 if (error) {
1340 vput(vp);
1341 goto out;
1342 }
1343 while ((mp = vp->v_mountedhere) != NULL) {
1344 error = vfs_busy(mp, NULL);
1345 vput(vp);
1346 if (error != 0)
1347 goto out;
1348 error = VFS_ROOT(mp, &tdp);
1349 vfs_unbusy(mp, false, NULL);
1350 if (error)
1351 goto out;
1352 vp = tdp;
1353 }
1354 VOP_UNLOCK(vp);
1355
1356 /*
1357 * Disallow changing to a directory not under the process's
1358 * current root directory (if there is one).
1359 */
1360 cwdi = p->p_cwdi;
1361 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1362 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1363 vrele(vp);
1364 error = EPERM; /* operation not permitted */
1365 } else {
1366 vrele(cwdi->cwdi_cdir);
1367 cwdi->cwdi_cdir = vp;
1368 }
1369 rw_exit(&cwdi->cwdi_lock);
1370
1371 out:
1372 fd_putfile(fd);
1373 return (error);
1374 }
1375
1376 /*
1377 * Change this process's notion of the root directory to a given file
1378 * descriptor.
1379 */
1380 int
1381 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1382 {
1383 struct proc *p = l->l_proc;
1384 struct vnode *vp;
1385 file_t *fp;
1386 int error, fd = SCARG(uap, fd);
1387
1388 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1389 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1390 return error;
1391 /* fd_getvnode() will use the descriptor for us */
1392 if ((error = fd_getvnode(fd, &fp)) != 0)
1393 return error;
1394 vp = fp->f_vnode;
1395 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1396 if (vp->v_type != VDIR)
1397 error = ENOTDIR;
1398 else
1399 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1400 VOP_UNLOCK(vp);
1401 if (error)
1402 goto out;
1403 vref(vp);
1404
1405 change_root(p->p_cwdi, vp, l);
1406
1407 out:
1408 fd_putfile(fd);
1409 return (error);
1410 }
1411
1412 /*
1413 * Change current working directory (``.'').
1414 */
1415 /* ARGSUSED */
1416 int
1417 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1418 {
1419 /* {
1420 syscallarg(const char *) path;
1421 } */
1422 struct proc *p = l->l_proc;
1423 struct cwdinfo *cwdi;
1424 int error;
1425 struct vnode *vp;
1426
1427 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1428 &vp, l)) != 0)
1429 return (error);
1430 cwdi = p->p_cwdi;
1431 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1432 vrele(cwdi->cwdi_cdir);
1433 cwdi->cwdi_cdir = vp;
1434 rw_exit(&cwdi->cwdi_lock);
1435 return (0);
1436 }
1437
1438 /*
1439 * Change notion of root (``/'') directory.
1440 */
1441 /* ARGSUSED */
1442 int
1443 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1444 {
1445 /* {
1446 syscallarg(const char *) path;
1447 } */
1448 struct proc *p = l->l_proc;
1449 int error;
1450 struct vnode *vp;
1451
1452 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1453 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1454 return (error);
1455 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1456 &vp, l)) != 0)
1457 return (error);
1458
1459 change_root(p->p_cwdi, vp, l);
1460
1461 return (0);
1462 }
1463
1464 /*
1465 * Common routine for chroot and fchroot.
1466 * NB: callers need to properly authorize the change root operation.
1467 */
1468 void
1469 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l)
1470 {
1471 struct proc *p = l->l_proc;
1472 kauth_cred_t ncred;
1473
1474 ncred = kauth_cred_alloc();
1475
1476 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1477 if (cwdi->cwdi_rdir != NULL)
1478 vrele(cwdi->cwdi_rdir);
1479 cwdi->cwdi_rdir = vp;
1480
1481 /*
1482 * Prevent escaping from chroot by putting the root under
1483 * the working directory. Silently chdir to / if we aren't
1484 * already there.
1485 */
1486 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1487 /*
1488 * XXX would be more failsafe to change directory to a
1489 * deadfs node here instead
1490 */
1491 vrele(cwdi->cwdi_cdir);
1492 vref(vp);
1493 cwdi->cwdi_cdir = vp;
1494 }
1495 rw_exit(&cwdi->cwdi_lock);
1496
1497 /* Get a write lock on the process credential. */
1498 proc_crmod_enter();
1499
1500 kauth_cred_clone(p->p_cred, ncred);
1501 kauth_proc_chroot(ncred, p->p_cwdi);
1502
1503 /* Broadcast our credentials to the process and other LWPs. */
1504 proc_crmod_leave(ncred, p->p_cred, true);
1505 }
1506
1507 /*
1508 * Common routine for chroot and chdir.
1509 * XXX "where" should be enum uio_seg
1510 */
1511 int
1512 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l)
1513 {
1514 struct pathbuf *pb;
1515 struct nameidata nd;
1516 int error;
1517
1518 error = pathbuf_maybe_copyin(path, where, &pb);
1519 if (error) {
1520 return error;
1521 }
1522 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1523 if ((error = namei(&nd)) != 0) {
1524 pathbuf_destroy(pb);
1525 return error;
1526 }
1527 *vpp = nd.ni_vp;
1528 pathbuf_destroy(pb);
1529
1530 if ((*vpp)->v_type != VDIR)
1531 error = ENOTDIR;
1532 else
1533 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred);
1534
1535 if (error)
1536 vput(*vpp);
1537 else
1538 VOP_UNLOCK(*vpp);
1539 return (error);
1540 }
1541
1542 /*
1543 * Internals of sys_open - path has already been converted into a pathbuf
1544 * (so we can easily reuse this function from other parts of the kernel,
1545 * like posix_spawn post-processing).
1546 */
1547 int
1548 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags,
1549 int open_mode, int *fd)
1550 {
1551 struct proc *p = l->l_proc;
1552 struct cwdinfo *cwdi = p->p_cwdi;
1553 file_t *fp;
1554 struct vnode *vp;
1555 int flags, cmode;
1556 int indx, error;
1557 struct nameidata nd;
1558
1559 if (open_flags & O_SEARCH) {
1560 open_flags &= ~(int)O_SEARCH;
1561 }
1562
1563 flags = FFLAGS(open_flags);
1564 if ((flags & (FREAD | FWRITE)) == 0)
1565 return EINVAL;
1566
1567 if ((error = fd_allocfile(&fp, &indx)) != 0) {
1568 return error;
1569 }
1570
1571 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1572 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1573 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb);
1574 if (dvp != NULL)
1575 NDAT(&nd, dvp);
1576
1577 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1578 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1579 fd_abort(p, fp, indx);
1580 if ((error == EDUPFD || error == EMOVEFD) &&
1581 l->l_dupfd >= 0 && /* XXX from fdopen */
1582 (error =
1583 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1584 *fd = indx;
1585 return 0;
1586 }
1587 if (error == ERESTART)
1588 error = EINTR;
1589 return error;
1590 }
1591
1592 l->l_dupfd = 0;
1593 vp = nd.ni_vp;
1594
1595 if ((error = open_setfp(l, fp, vp, indx, flags)))
1596 return error;
1597
1598 VOP_UNLOCK(vp);
1599 *fd = indx;
1600 fd_affix(p, fp, indx);
1601 return 0;
1602 }
1603
1604 int
1605 fd_open(const char *path, int open_flags, int open_mode, int *fd)
1606 {
1607 struct pathbuf *pb;
1608 int error, oflags;
1609
1610 oflags = FFLAGS(open_flags);
1611 if ((oflags & (FREAD | FWRITE)) == 0)
1612 return EINVAL;
1613
1614 pb = pathbuf_create(path);
1615 if (pb == NULL)
1616 return ENOMEM;
1617
1618 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd);
1619 pathbuf_destroy(pb);
1620
1621 return error;
1622 }
1623
1624 /*
1625 * Check permissions, allocate an open file structure,
1626 * and call the device open routine if any.
1627 */
1628 static int
1629 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags,
1630 int mode, int *fd)
1631 {
1632 file_t *dfp = NULL;
1633 struct vnode *dvp = NULL;
1634 struct pathbuf *pb;
1635 int error;
1636
1637 #ifdef COMPAT_10 /* XXX: and perhaps later */
1638 if (path == NULL) {
1639 pb = pathbuf_create(".");
1640 if (pb == NULL)
1641 return ENOMEM;
1642 } else
1643 #endif
1644 {
1645 error = pathbuf_copyin(path, &pb);
1646 if (error)
1647 return error;
1648 }
1649
1650 if (fdat != AT_FDCWD) {
1651 /* fd_getvnode() will use the descriptor for us */
1652 if ((error = fd_getvnode(fdat, &dfp)) != 0)
1653 goto out;
1654
1655 dvp = dfp->f_vnode;
1656 }
1657
1658 error = do_open(l, dvp, pb, flags, mode, fd);
1659
1660 if (dfp != NULL)
1661 fd_putfile(fdat);
1662 out:
1663 pathbuf_destroy(pb);
1664 return error;
1665 }
1666
1667 int
1668 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1669 {
1670 /* {
1671 syscallarg(const char *) path;
1672 syscallarg(int) flags;
1673 syscallarg(int) mode;
1674 } */
1675 int error;
1676 int fd;
1677
1678 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path),
1679 SCARG(uap, flags), SCARG(uap, mode), &fd);
1680
1681 if (error == 0)
1682 *retval = fd;
1683
1684 return error;
1685 }
1686
1687 int
1688 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval)
1689 {
1690 /* {
1691 syscallarg(int) fd;
1692 syscallarg(const char *) path;
1693 syscallarg(int) oflags;
1694 syscallarg(int) mode;
1695 } */
1696 int error;
1697 int fd;
1698
1699 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path),
1700 SCARG(uap, oflags), SCARG(uap, mode), &fd);
1701
1702 if (error == 0)
1703 *retval = fd;
1704
1705 return error;
1706 }
1707
1708 static void
1709 vfs__fhfree(fhandle_t *fhp)
1710 {
1711 size_t fhsize;
1712
1713 fhsize = FHANDLE_SIZE(fhp);
1714 kmem_free(fhp, fhsize);
1715 }
1716
1717 /*
1718 * vfs_composefh: compose a filehandle.
1719 */
1720
1721 int
1722 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1723 {
1724 struct mount *mp;
1725 struct fid *fidp;
1726 int error;
1727 size_t needfhsize;
1728 size_t fidsize;
1729
1730 mp = vp->v_mount;
1731 fidp = NULL;
1732 if (*fh_size < FHANDLE_SIZE_MIN) {
1733 fidsize = 0;
1734 } else {
1735 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1736 if (fhp != NULL) {
1737 memset(fhp, 0, *fh_size);
1738 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1739 fidp = &fhp->fh_fid;
1740 }
1741 }
1742 error = VFS_VPTOFH(vp, fidp, &fidsize);
1743 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1744 if (error == 0 && *fh_size < needfhsize) {
1745 error = E2BIG;
1746 }
1747 *fh_size = needfhsize;
1748 return error;
1749 }
1750
1751 int
1752 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1753 {
1754 struct mount *mp;
1755 fhandle_t *fhp;
1756 size_t fhsize;
1757 size_t fidsize;
1758 int error;
1759
1760 mp = vp->v_mount;
1761 fidsize = 0;
1762 error = VFS_VPTOFH(vp, NULL, &fidsize);
1763 KASSERT(error != 0);
1764 if (error != E2BIG) {
1765 goto out;
1766 }
1767 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1768 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1769 if (fhp == NULL) {
1770 error = ENOMEM;
1771 goto out;
1772 }
1773 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1774 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1775 if (error == 0) {
1776 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1777 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1778 *fhpp = fhp;
1779 } else {
1780 kmem_free(fhp, fhsize);
1781 }
1782 out:
1783 return error;
1784 }
1785
1786 void
1787 vfs_composefh_free(fhandle_t *fhp)
1788 {
1789
1790 vfs__fhfree(fhp);
1791 }
1792
1793 /*
1794 * vfs_fhtovp: lookup a vnode by a filehandle.
1795 */
1796
1797 int
1798 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1799 {
1800 struct mount *mp;
1801 int error;
1802
1803 *vpp = NULL;
1804 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1805 if (mp == NULL) {
1806 error = ESTALE;
1807 goto out;
1808 }
1809 if (mp->mnt_op->vfs_fhtovp == NULL) {
1810 error = EOPNOTSUPP;
1811 goto out;
1812 }
1813 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1814 out:
1815 return error;
1816 }
1817
1818 /*
1819 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1820 * the needed size.
1821 */
1822
1823 int
1824 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1825 {
1826 fhandle_t *fhp;
1827 int error;
1828
1829 if (fhsize > FHANDLE_SIZE_MAX) {
1830 return EINVAL;
1831 }
1832 if (fhsize < FHANDLE_SIZE_MIN) {
1833 return EINVAL;
1834 }
1835 again:
1836 fhp = kmem_alloc(fhsize, KM_SLEEP);
1837 if (fhp == NULL) {
1838 return ENOMEM;
1839 }
1840 error = copyin(ufhp, fhp, fhsize);
1841 if (error == 0) {
1842 /* XXX this check shouldn't be here */
1843 if (FHANDLE_SIZE(fhp) == fhsize) {
1844 *fhpp = fhp;
1845 return 0;
1846 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1847 /*
1848 * a kludge for nfsv2 padded handles.
1849 */
1850 size_t sz;
1851
1852 sz = FHANDLE_SIZE(fhp);
1853 kmem_free(fhp, fhsize);
1854 fhsize = sz;
1855 goto again;
1856 } else {
1857 /*
1858 * userland told us wrong size.
1859 */
1860 error = EINVAL;
1861 }
1862 }
1863 kmem_free(fhp, fhsize);
1864 return error;
1865 }
1866
1867 void
1868 vfs_copyinfh_free(fhandle_t *fhp)
1869 {
1870
1871 vfs__fhfree(fhp);
1872 }
1873
1874 /*
1875 * Get file handle system call
1876 */
1877 int
1878 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1879 {
1880 /* {
1881 syscallarg(char *) fname;
1882 syscallarg(fhandle_t *) fhp;
1883 syscallarg(size_t *) fh_size;
1884 } */
1885 struct vnode *vp;
1886 fhandle_t *fh;
1887 int error;
1888 struct pathbuf *pb;
1889 struct nameidata nd;
1890 size_t sz;
1891 size_t usz;
1892
1893 /*
1894 * Must be super user
1895 */
1896 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1897 0, NULL, NULL, NULL);
1898 if (error)
1899 return (error);
1900
1901 error = pathbuf_copyin(SCARG(uap, fname), &pb);
1902 if (error) {
1903 return error;
1904 }
1905 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1906 error = namei(&nd);
1907 if (error) {
1908 pathbuf_destroy(pb);
1909 return error;
1910 }
1911 vp = nd.ni_vp;
1912 pathbuf_destroy(pb);
1913
1914 error = vfs_composefh_alloc(vp, &fh);
1915 vput(vp);
1916 if (error != 0) {
1917 return error;
1918 }
1919 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1920 if (error != 0) {
1921 goto out;
1922 }
1923 sz = FHANDLE_SIZE(fh);
1924 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1925 if (error != 0) {
1926 goto out;
1927 }
1928 if (usz >= sz) {
1929 error = copyout(fh, SCARG(uap, fhp), sz);
1930 } else {
1931 error = E2BIG;
1932 }
1933 out:
1934 vfs_composefh_free(fh);
1935 return (error);
1936 }
1937
1938 /*
1939 * Open a file given a file handle.
1940 *
1941 * Check permissions, allocate an open file structure,
1942 * and call the device open routine if any.
1943 */
1944
1945 int
1946 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1947 register_t *retval)
1948 {
1949 file_t *fp;
1950 struct vnode *vp = NULL;
1951 kauth_cred_t cred = l->l_cred;
1952 file_t *nfp;
1953 int indx, error;
1954 struct vattr va;
1955 fhandle_t *fh;
1956 int flags;
1957 proc_t *p;
1958
1959 p = curproc;
1960
1961 /*
1962 * Must be super user
1963 */
1964 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1965 0, NULL, NULL, NULL)))
1966 return (error);
1967
1968 if (oflags & O_SEARCH) {
1969 oflags &= ~(int)O_SEARCH;
1970 }
1971
1972 flags = FFLAGS(oflags);
1973 if ((flags & (FREAD | FWRITE)) == 0)
1974 return (EINVAL);
1975 if ((flags & O_CREAT))
1976 return (EINVAL);
1977 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1978 return (error);
1979 fp = nfp;
1980 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1981 if (error != 0) {
1982 goto bad;
1983 }
1984 error = vfs_fhtovp(fh, &vp);
1985 vfs_copyinfh_free(fh);
1986 if (error != 0) {
1987 goto bad;
1988 }
1989
1990 /* Now do an effective vn_open */
1991
1992 if (vp->v_type == VSOCK) {
1993 error = EOPNOTSUPP;
1994 goto bad;
1995 }
1996 error = vn_openchk(vp, cred, flags);
1997 if (error != 0)
1998 goto bad;
1999 if (flags & O_TRUNC) {
2000 VOP_UNLOCK(vp); /* XXX */
2001 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
2002 vattr_null(&va);
2003 va.va_size = 0;
2004 error = VOP_SETATTR(vp, &va, cred);
2005 if (error)
2006 goto bad;
2007 }
2008 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
2009 goto bad;
2010 if (flags & FWRITE) {
2011 mutex_enter(vp->v_interlock);
2012 vp->v_writecount++;
2013 mutex_exit(vp->v_interlock);
2014 }
2015
2016 /* done with modified vn_open, now finish what sys_open does. */
2017 if ((error = open_setfp(l, fp, vp, indx, flags)))
2018 return error;
2019
2020 VOP_UNLOCK(vp);
2021 *retval = indx;
2022 fd_affix(p, fp, indx);
2023 return (0);
2024
2025 bad:
2026 fd_abort(p, fp, indx);
2027 if (vp != NULL)
2028 vput(vp);
2029 return (error);
2030 }
2031
2032 int
2033 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
2034 {
2035 /* {
2036 syscallarg(const void *) fhp;
2037 syscallarg(size_t) fh_size;
2038 syscallarg(int) flags;
2039 } */
2040
2041 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
2042 SCARG(uap, flags), retval);
2043 }
2044
2045 int
2046 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
2047 {
2048 int error;
2049 fhandle_t *fh;
2050 struct vnode *vp;
2051
2052 /*
2053 * Must be super user
2054 */
2055 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2056 0, NULL, NULL, NULL)))
2057 return (error);
2058
2059 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2060 if (error != 0)
2061 return error;
2062
2063 error = vfs_fhtovp(fh, &vp);
2064 vfs_copyinfh_free(fh);
2065 if (error != 0)
2066 return error;
2067
2068 error = vn_stat(vp, sb);
2069 vput(vp);
2070 return error;
2071 }
2072
2073
2074 /* ARGSUSED */
2075 int
2076 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval)
2077 {
2078 /* {
2079 syscallarg(const void *) fhp;
2080 syscallarg(size_t) fh_size;
2081 syscallarg(struct stat *) sb;
2082 } */
2083 struct stat sb;
2084 int error;
2085
2086 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
2087 if (error)
2088 return error;
2089 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
2090 }
2091
2092 int
2093 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
2094 int flags)
2095 {
2096 fhandle_t *fh;
2097 struct mount *mp;
2098 struct vnode *vp;
2099 int error;
2100
2101 /*
2102 * Must be super user
2103 */
2104 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2105 0, NULL, NULL, NULL)))
2106 return error;
2107
2108 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2109 if (error != 0)
2110 return error;
2111
2112 error = vfs_fhtovp(fh, &vp);
2113 vfs_copyinfh_free(fh);
2114 if (error != 0)
2115 return error;
2116
2117 mp = vp->v_mount;
2118 error = dostatvfs(mp, sb, l, flags, 1);
2119 vput(vp);
2120 return error;
2121 }
2122
2123 /* ARGSUSED */
2124 int
2125 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
2126 {
2127 /* {
2128 syscallarg(const void *) fhp;
2129 syscallarg(size_t) fh_size;
2130 syscallarg(struct statvfs *) buf;
2131 syscallarg(int) flags;
2132 } */
2133 struct statvfs *sb = STATVFSBUF_GET();
2134 int error;
2135
2136 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
2137 SCARG(uap, flags));
2138 if (error == 0)
2139 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
2140 STATVFSBUF_PUT(sb);
2141 return error;
2142 }
2143
2144 /*
2145 * Create a special file.
2146 */
2147 /* ARGSUSED */
2148 int
2149 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
2150 register_t *retval)
2151 {
2152 /* {
2153 syscallarg(const char *) path;
2154 syscallarg(mode_t) mode;
2155 syscallarg(dev_t) dev;
2156 } */
2157 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
2158 SCARG(uap, dev), retval, UIO_USERSPACE);
2159 }
2160
2161 int
2162 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap,
2163 register_t *retval)
2164 {
2165 /* {
2166 syscallarg(int) fd;
2167 syscallarg(const char *) path;
2168 syscallarg(mode_t) mode;
2169 syscallarg(int) pad;
2170 syscallarg(dev_t) dev;
2171 } */
2172
2173 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path),
2174 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE);
2175 }
2176
2177 int
2178 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
2179 register_t *retval, enum uio_seg seg)
2180 {
2181 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg);
2182 }
2183
2184 int
2185 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode,
2186 dev_t dev, register_t *retval, enum uio_seg seg)
2187 {
2188 struct proc *p = l->l_proc;
2189 struct vnode *vp;
2190 struct vattr vattr;
2191 int error, optype;
2192 struct pathbuf *pb;
2193 struct nameidata nd;
2194 const char *pathstring;
2195
2196 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
2197 0, NULL, NULL, NULL)) != 0)
2198 return (error);
2199
2200 optype = VOP_MKNOD_DESCOFFSET;
2201
2202 error = pathbuf_maybe_copyin(pathname, seg, &pb);
2203 if (error) {
2204 return error;
2205 }
2206 pathstring = pathbuf_stringcopy_get(pb);
2207 if (pathstring == NULL) {
2208 pathbuf_destroy(pb);
2209 return ENOMEM;
2210 }
2211
2212 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2213
2214 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2215 goto out;
2216 vp = nd.ni_vp;
2217
2218 if (vp != NULL)
2219 error = EEXIST;
2220 else {
2221 vattr_null(&vattr);
2222 /* We will read cwdi->cwdi_cmask unlocked. */
2223 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2224 vattr.va_rdev = dev;
2225
2226 switch (mode & S_IFMT) {
2227 case S_IFMT: /* used by badsect to flag bad sectors */
2228 vattr.va_type = VBAD;
2229 break;
2230 case S_IFCHR:
2231 vattr.va_type = VCHR;
2232 break;
2233 case S_IFBLK:
2234 vattr.va_type = VBLK;
2235 break;
2236 case S_IFWHT:
2237 optype = VOP_WHITEOUT_DESCOFFSET;
2238 break;
2239 case S_IFREG:
2240 #if NVERIEXEC > 0
2241 error = veriexec_openchk(l, nd.ni_vp, pathstring,
2242 O_CREAT);
2243 #endif /* NVERIEXEC > 0 */
2244 vattr.va_type = VREG;
2245 vattr.va_rdev = VNOVAL;
2246 optype = VOP_CREATE_DESCOFFSET;
2247 break;
2248 default:
2249 error = EINVAL;
2250 break;
2251 }
2252 }
2253 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET
2254 && vattr.va_rdev == VNOVAL)
2255 error = EINVAL;
2256 if (!error) {
2257 switch (optype) {
2258 case VOP_WHITEOUT_DESCOFFSET:
2259 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
2260 if (error)
2261 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2262 vput(nd.ni_dvp);
2263 break;
2264
2265 case VOP_MKNOD_DESCOFFSET:
2266 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
2267 &nd.ni_cnd, &vattr);
2268 if (error == 0)
2269 vrele(nd.ni_vp);
2270 vput(nd.ni_dvp);
2271 break;
2272
2273 case VOP_CREATE_DESCOFFSET:
2274 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
2275 &nd.ni_cnd, &vattr);
2276 if (error == 0)
2277 vrele(nd.ni_vp);
2278 vput(nd.ni_dvp);
2279 break;
2280 }
2281 } else {
2282 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2283 if (nd.ni_dvp == vp)
2284 vrele(nd.ni_dvp);
2285 else
2286 vput(nd.ni_dvp);
2287 if (vp)
2288 vrele(vp);
2289 }
2290 out:
2291 pathbuf_stringcopy_put(pb, pathstring);
2292 pathbuf_destroy(pb);
2293 return (error);
2294 }
2295
2296 /*
2297 * Create a named pipe.
2298 */
2299 /* ARGSUSED */
2300 int
2301 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
2302 {
2303 /* {
2304 syscallarg(const char *) path;
2305 syscallarg(int) mode;
2306 } */
2307 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode));
2308 }
2309
2310 int
2311 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap,
2312 register_t *retval)
2313 {
2314 /* {
2315 syscallarg(int) fd;
2316 syscallarg(const char *) path;
2317 syscallarg(int) mode;
2318 } */
2319
2320 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path),
2321 SCARG(uap, mode));
2322 }
2323
2324 static int
2325 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode)
2326 {
2327 struct proc *p = l->l_proc;
2328 struct vattr vattr;
2329 int error;
2330 struct pathbuf *pb;
2331 struct nameidata nd;
2332
2333 error = pathbuf_copyin(path, &pb);
2334 if (error) {
2335 return error;
2336 }
2337 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2338
2339 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2340 pathbuf_destroy(pb);
2341 return error;
2342 }
2343 if (nd.ni_vp != NULL) {
2344 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2345 if (nd.ni_dvp == nd.ni_vp)
2346 vrele(nd.ni_dvp);
2347 else
2348 vput(nd.ni_dvp);
2349 vrele(nd.ni_vp);
2350 pathbuf_destroy(pb);
2351 return (EEXIST);
2352 }
2353 vattr_null(&vattr);
2354 vattr.va_type = VFIFO;
2355 /* We will read cwdi->cwdi_cmask unlocked. */
2356 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2357 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2358 if (error == 0)
2359 vrele(nd.ni_vp);
2360 vput(nd.ni_dvp);
2361 pathbuf_destroy(pb);
2362 return (error);
2363 }
2364
2365 /*
2366 * Make a hard file link.
2367 */
2368 /* ARGSUSED */
2369 int
2370 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink,
2371 const char *link, int follow, register_t *retval)
2372 {
2373 struct vnode *vp;
2374 struct pathbuf *linkpb;
2375 struct nameidata nd;
2376 namei_simple_flags_t ns_flags;
2377 int error;
2378
2379 if (follow & AT_SYMLINK_FOLLOW)
2380 ns_flags = NSM_FOLLOW_TRYEMULROOT;
2381 else
2382 ns_flags = NSM_NOFOLLOW_TRYEMULROOT;
2383
2384 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp);
2385 if (error != 0)
2386 return (error);
2387 error = pathbuf_copyin(link, &linkpb);
2388 if (error) {
2389 goto out1;
2390 }
2391 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2392 if ((error = fd_nameiat(l, fdlink, &nd)) != 0)
2393 goto out2;
2394 if (nd.ni_vp) {
2395 error = EEXIST;
2396 goto abortop;
2397 }
2398 /* Prevent hard links on directories. */
2399 if (vp->v_type == VDIR) {
2400 error = EPERM;
2401 goto abortop;
2402 }
2403 /* Prevent cross-mount operation. */
2404 if (nd.ni_dvp->v_mount != vp->v_mount) {
2405 error = EXDEV;
2406 goto abortop;
2407 }
2408 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2409 VOP_UNLOCK(nd.ni_dvp);
2410 vrele(nd.ni_dvp);
2411 out2:
2412 pathbuf_destroy(linkpb);
2413 out1:
2414 vrele(vp);
2415 return (error);
2416 abortop:
2417 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2418 if (nd.ni_dvp == nd.ni_vp)
2419 vrele(nd.ni_dvp);
2420 else
2421 vput(nd.ni_dvp);
2422 if (nd.ni_vp != NULL)
2423 vrele(nd.ni_vp);
2424 goto out2;
2425 }
2426
2427 int
2428 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
2429 {
2430 /* {
2431 syscallarg(const char *) path;
2432 syscallarg(const char *) link;
2433 } */
2434 const char *path = SCARG(uap, path);
2435 const char *link = SCARG(uap, link);
2436
2437 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link,
2438 AT_SYMLINK_FOLLOW, retval);
2439 }
2440
2441 int
2442 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap,
2443 register_t *retval)
2444 {
2445 /* {
2446 syscallarg(int) fd1;
2447 syscallarg(const char *) name1;
2448 syscallarg(int) fd2;
2449 syscallarg(const char *) name2;
2450 syscallarg(int) flags;
2451 } */
2452 int fd1 = SCARG(uap, fd1);
2453 const char *name1 = SCARG(uap, name1);
2454 int fd2 = SCARG(uap, fd2);
2455 const char *name2 = SCARG(uap, name2);
2456 int follow;
2457
2458 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW;
2459
2460 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval);
2461 }
2462
2463
2464 int
2465 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg)
2466 {
2467 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg);
2468 }
2469
2470 static int
2471 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat,
2472 const char *link, enum uio_seg seg)
2473 {
2474 struct proc *p = curproc;
2475 struct vattr vattr;
2476 char *path;
2477 int error;
2478 struct pathbuf *linkpb;
2479 struct nameidata nd;
2480
2481 KASSERT(l != NULL || fdat == AT_FDCWD);
2482
2483 path = PNBUF_GET();
2484 if (seg == UIO_USERSPACE) {
2485 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0)
2486 goto out1;
2487 if ((error = pathbuf_copyin(link, &linkpb)) != 0)
2488 goto out1;
2489 } else {
2490 KASSERT(strlen(patharg) < MAXPATHLEN);
2491 strcpy(path, patharg);
2492 linkpb = pathbuf_create(link);
2493 if (linkpb == NULL) {
2494 error = ENOMEM;
2495 goto out1;
2496 }
2497 }
2498 ktrkuser("symlink-target", path, strlen(path));
2499
2500 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2501 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2502 goto out2;
2503 if (nd.ni_vp) {
2504 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2505 if (nd.ni_dvp == nd.ni_vp)
2506 vrele(nd.ni_dvp);
2507 else
2508 vput(nd.ni_dvp);
2509 vrele(nd.ni_vp);
2510 error = EEXIST;
2511 goto out2;
2512 }
2513 vattr_null(&vattr);
2514 vattr.va_type = VLNK;
2515 /* We will read cwdi->cwdi_cmask unlocked. */
2516 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2517 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2518 if (error == 0)
2519 vrele(nd.ni_vp);
2520 vput(nd.ni_dvp);
2521 out2:
2522 pathbuf_destroy(linkpb);
2523 out1:
2524 PNBUF_PUT(path);
2525 return (error);
2526 }
2527
2528 /*
2529 * Make a symbolic link.
2530 */
2531 /* ARGSUSED */
2532 int
2533 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2534 {
2535 /* {
2536 syscallarg(const char *) path;
2537 syscallarg(const char *) link;
2538 } */
2539
2540 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link),
2541 UIO_USERSPACE);
2542 }
2543
2544 int
2545 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap,
2546 register_t *retval)
2547 {
2548 /* {
2549 syscallarg(const char *) path1;
2550 syscallarg(int) fd;
2551 syscallarg(const char *) path2;
2552 } */
2553
2554 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd),
2555 SCARG(uap, path2), UIO_USERSPACE);
2556 }
2557
2558 /*
2559 * Delete a whiteout from the filesystem.
2560 */
2561 /* ARGSUSED */
2562 int
2563 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2564 {
2565 /* {
2566 syscallarg(const char *) path;
2567 } */
2568 int error;
2569 struct pathbuf *pb;
2570 struct nameidata nd;
2571
2572 error = pathbuf_copyin(SCARG(uap, path), &pb);
2573 if (error) {
2574 return error;
2575 }
2576
2577 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb);
2578 error = namei(&nd);
2579 if (error) {
2580 pathbuf_destroy(pb);
2581 return (error);
2582 }
2583
2584 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2585 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2586 if (nd.ni_dvp == nd.ni_vp)
2587 vrele(nd.ni_dvp);
2588 else
2589 vput(nd.ni_dvp);
2590 if (nd.ni_vp)
2591 vrele(nd.ni_vp);
2592 pathbuf_destroy(pb);
2593 return (EEXIST);
2594 }
2595 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2596 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2597 vput(nd.ni_dvp);
2598 pathbuf_destroy(pb);
2599 return (error);
2600 }
2601
2602 /*
2603 * Delete a name from the filesystem.
2604 */
2605 /* ARGSUSED */
2606 int
2607 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2608 {
2609 /* {
2610 syscallarg(const char *) path;
2611 } */
2612
2613 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE);
2614 }
2615
2616 int
2617 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap,
2618 register_t *retval)
2619 {
2620 /* {
2621 syscallarg(int) fd;
2622 syscallarg(const char *) path;
2623 syscallarg(int) flag;
2624 } */
2625
2626 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path),
2627 SCARG(uap, flag), UIO_USERSPACE);
2628 }
2629
2630 int
2631 do_sys_unlink(const char *arg, enum uio_seg seg)
2632 {
2633 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg);
2634 }
2635
2636 static int
2637 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags,
2638 enum uio_seg seg)
2639 {
2640 struct vnode *vp;
2641 int error;
2642 struct pathbuf *pb;
2643 struct nameidata nd;
2644 const char *pathstring;
2645
2646 KASSERT(l != NULL || fdat == AT_FDCWD);
2647
2648 error = pathbuf_maybe_copyin(arg, seg, &pb);
2649 if (error) {
2650 return error;
2651 }
2652 pathstring = pathbuf_stringcopy_get(pb);
2653 if (pathstring == NULL) {
2654 pathbuf_destroy(pb);
2655 return ENOMEM;
2656 }
2657
2658 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb);
2659 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2660 goto out;
2661 vp = nd.ni_vp;
2662
2663 /*
2664 * The root of a mounted filesystem cannot be deleted.
2665 */
2666 if ((vp->v_vflag & VV_ROOT) != 0) {
2667 error = EBUSY;
2668 goto abort;
2669 }
2670
2671 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) {
2672 error = EBUSY;
2673 goto abort;
2674 }
2675
2676 /*
2677 * No rmdir "." please.
2678 */
2679 if (nd.ni_dvp == vp) {
2680 error = EINVAL;
2681 goto abort;
2682 }
2683
2684 /*
2685 * AT_REMOVEDIR is required to remove a directory
2686 */
2687 if (vp->v_type == VDIR) {
2688 if (!(flags & AT_REMOVEDIR)) {
2689 error = EPERM;
2690 goto abort;
2691 } else {
2692 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2693 goto out;
2694 }
2695 }
2696
2697 /*
2698 * Starting here we only deal with non directories.
2699 */
2700 if (flags & AT_REMOVEDIR) {
2701 error = ENOTDIR;
2702 goto abort;
2703 }
2704
2705 #if NVERIEXEC > 0
2706 /* Handle remove requests for veriexec entries. */
2707 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) {
2708 goto abort;
2709 }
2710 #endif /* NVERIEXEC > 0 */
2711
2712 #ifdef FILEASSOC
2713 (void)fileassoc_file_delete(vp);
2714 #endif /* FILEASSOC */
2715 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2716 goto out;
2717
2718 abort:
2719 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2720 if (nd.ni_dvp == vp)
2721 vrele(nd.ni_dvp);
2722 else
2723 vput(nd.ni_dvp);
2724 vput(vp);
2725
2726 out:
2727 pathbuf_stringcopy_put(pb, pathstring);
2728 pathbuf_destroy(pb);
2729 return (error);
2730 }
2731
2732 /*
2733 * Reposition read/write file offset.
2734 */
2735 int
2736 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2737 {
2738 /* {
2739 syscallarg(int) fd;
2740 syscallarg(int) pad;
2741 syscallarg(off_t) offset;
2742 syscallarg(int) whence;
2743 } */
2744 kauth_cred_t cred = l->l_cred;
2745 file_t *fp;
2746 struct vnode *vp;
2747 struct vattr vattr;
2748 off_t newoff;
2749 int error, fd;
2750
2751 fd = SCARG(uap, fd);
2752
2753 if ((fp = fd_getfile(fd)) == NULL)
2754 return (EBADF);
2755
2756 vp = fp->f_vnode;
2757 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2758 error = ESPIPE;
2759 goto out;
2760 }
2761
2762 switch (SCARG(uap, whence)) {
2763 case SEEK_CUR:
2764 newoff = fp->f_offset + SCARG(uap, offset);
2765 break;
2766 case SEEK_END:
2767 vn_lock(vp, LK_SHARED | LK_RETRY);
2768 error = VOP_GETATTR(vp, &vattr, cred);
2769 VOP_UNLOCK(vp);
2770 if (error) {
2771 goto out;
2772 }
2773 newoff = SCARG(uap, offset) + vattr.va_size;
2774 break;
2775 case SEEK_SET:
2776 newoff = SCARG(uap, offset);
2777 break;
2778 default:
2779 error = EINVAL;
2780 goto out;
2781 }
2782 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2783 *(off_t *)retval = fp->f_offset = newoff;
2784 }
2785 out:
2786 fd_putfile(fd);
2787 return (error);
2788 }
2789
2790 /*
2791 * Positional read system call.
2792 */
2793 int
2794 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2795 {
2796 /* {
2797 syscallarg(int) fd;
2798 syscallarg(void *) buf;
2799 syscallarg(size_t) nbyte;
2800 syscallarg(off_t) offset;
2801 } */
2802 file_t *fp;
2803 struct vnode *vp;
2804 off_t offset;
2805 int error, fd = SCARG(uap, fd);
2806
2807 if ((fp = fd_getfile(fd)) == NULL)
2808 return (EBADF);
2809
2810 if ((fp->f_flag & FREAD) == 0) {
2811 fd_putfile(fd);
2812 return (EBADF);
2813 }
2814
2815 vp = fp->f_vnode;
2816 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2817 error = ESPIPE;
2818 goto out;
2819 }
2820
2821 offset = SCARG(uap, offset);
2822
2823 /*
2824 * XXX This works because no file systems actually
2825 * XXX take any action on the seek operation.
2826 */
2827 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2828 goto out;
2829
2830 /* dofileread() will unuse the descriptor for us */
2831 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2832 &offset, 0, retval));
2833
2834 out:
2835 fd_putfile(fd);
2836 return (error);
2837 }
2838
2839 /*
2840 * Positional scatter read system call.
2841 */
2842 int
2843 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2844 {
2845 /* {
2846 syscallarg(int) fd;
2847 syscallarg(const struct iovec *) iovp;
2848 syscallarg(int) iovcnt;
2849 syscallarg(off_t) offset;
2850 } */
2851 off_t offset = SCARG(uap, offset);
2852
2853 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2854 SCARG(uap, iovcnt), &offset, 0, retval);
2855 }
2856
2857 /*
2858 * Positional write system call.
2859 */
2860 int
2861 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2862 {
2863 /* {
2864 syscallarg(int) fd;
2865 syscallarg(const void *) buf;
2866 syscallarg(size_t) nbyte;
2867 syscallarg(off_t) offset;
2868 } */
2869 file_t *fp;
2870 struct vnode *vp;
2871 off_t offset;
2872 int error, fd = SCARG(uap, fd);
2873
2874 if ((fp = fd_getfile(fd)) == NULL)
2875 return (EBADF);
2876
2877 if ((fp->f_flag & FWRITE) == 0) {
2878 fd_putfile(fd);
2879 return (EBADF);
2880 }
2881
2882 vp = fp->f_vnode;
2883 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2884 error = ESPIPE;
2885 goto out;
2886 }
2887
2888 offset = SCARG(uap, offset);
2889
2890 /*
2891 * XXX This works because no file systems actually
2892 * XXX take any action on the seek operation.
2893 */
2894 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2895 goto out;
2896
2897 /* dofilewrite() will unuse the descriptor for us */
2898 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2899 &offset, 0, retval));
2900
2901 out:
2902 fd_putfile(fd);
2903 return (error);
2904 }
2905
2906 /*
2907 * Positional gather write system call.
2908 */
2909 int
2910 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2911 {
2912 /* {
2913 syscallarg(int) fd;
2914 syscallarg(const struct iovec *) iovp;
2915 syscallarg(int) iovcnt;
2916 syscallarg(off_t) offset;
2917 } */
2918 off_t offset = SCARG(uap, offset);
2919
2920 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2921 SCARG(uap, iovcnt), &offset, 0, retval);
2922 }
2923
2924 /*
2925 * Check access permissions.
2926 */
2927 int
2928 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2929 {
2930 /* {
2931 syscallarg(const char *) path;
2932 syscallarg(int) flags;
2933 } */
2934
2935 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path),
2936 SCARG(uap, flags), 0);
2937 }
2938
2939 int
2940 do_sys_accessat(struct lwp *l, int fdat, const char *path,
2941 int mode, int flags)
2942 {
2943 kauth_cred_t cred;
2944 struct vnode *vp;
2945 int error, nd_flag, vmode;
2946 struct pathbuf *pb;
2947 struct nameidata nd;
2948
2949 CTASSERT(F_OK == 0);
2950 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) {
2951 /* nonsense mode */
2952 return EINVAL;
2953 }
2954
2955 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT;
2956 if (flags & AT_SYMLINK_NOFOLLOW)
2957 nd_flag &= ~FOLLOW;
2958
2959 error = pathbuf_copyin(path, &pb);
2960 if (error)
2961 return error;
2962
2963 NDINIT(&nd, LOOKUP, nd_flag, pb);
2964
2965 /* Override default credentials */
2966 cred = kauth_cred_dup(l->l_cred);
2967 if (!(flags & AT_EACCESS)) {
2968 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2969 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2970 }
2971 nd.ni_cnd.cn_cred = cred;
2972
2973 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2974 pathbuf_destroy(pb);
2975 goto out;
2976 }
2977 vp = nd.ni_vp;
2978 pathbuf_destroy(pb);
2979
2980 /* Flags == 0 means only check for existence. */
2981 if (mode) {
2982 vmode = 0;
2983 if (mode & R_OK)
2984 vmode |= VREAD;
2985 if (mode & W_OK)
2986 vmode |= VWRITE;
2987 if (mode & X_OK)
2988 vmode |= VEXEC;
2989
2990 error = VOP_ACCESS(vp, vmode, cred);
2991 if (!error && (vmode & VWRITE))
2992 error = vn_writechk(vp);
2993 }
2994 vput(vp);
2995 out:
2996 kauth_cred_free(cred);
2997 return (error);
2998 }
2999
3000 int
3001 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap,
3002 register_t *retval)
3003 {
3004 /* {
3005 syscallarg(int) fd;
3006 syscallarg(const char *) path;
3007 syscallarg(int) amode;
3008 syscallarg(int) flag;
3009 } */
3010
3011 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path),
3012 SCARG(uap, amode), SCARG(uap, flag));
3013 }
3014
3015 /*
3016 * Common code for all sys_stat functions, including compat versions.
3017 */
3018 int
3019 do_sys_stat(const char *userpath, unsigned int nd_flag,
3020 struct stat *sb)
3021 {
3022 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb);
3023 }
3024
3025 int
3026 do_sys_statat(struct lwp *l, int fdat, const char *userpath,
3027 unsigned int nd_flag, struct stat *sb)
3028 {
3029 int error;
3030 struct pathbuf *pb;
3031 struct nameidata nd;
3032
3033 KASSERT(l != NULL || fdat == AT_FDCWD);
3034
3035 error = pathbuf_copyin(userpath, &pb);
3036 if (error) {
3037 return error;
3038 }
3039
3040 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb);
3041
3042 error = fd_nameiat(l, fdat, &nd);
3043 if (error != 0) {
3044 pathbuf_destroy(pb);
3045 return error;
3046 }
3047 error = vn_stat(nd.ni_vp, sb);
3048 vput(nd.ni_vp);
3049 pathbuf_destroy(pb);
3050 return error;
3051 }
3052
3053 /*
3054 * Get file status; this version follows links.
3055 */
3056 /* ARGSUSED */
3057 int
3058 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval)
3059 {
3060 /* {
3061 syscallarg(const char *) path;
3062 syscallarg(struct stat *) ub;
3063 } */
3064 struct stat sb;
3065 int error;
3066
3067 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb);
3068 if (error)
3069 return error;
3070 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3071 }
3072
3073 /*
3074 * Get file status; this version does not follow links.
3075 */
3076 /* ARGSUSED */
3077 int
3078 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval)
3079 {
3080 /* {
3081 syscallarg(const char *) path;
3082 syscallarg(struct stat *) ub;
3083 } */
3084 struct stat sb;
3085 int error;
3086
3087 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb);
3088 if (error)
3089 return error;
3090 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3091 }
3092
3093 int
3094 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap,
3095 register_t *retval)
3096 {
3097 /* {
3098 syscallarg(int) fd;
3099 syscallarg(const char *) path;
3100 syscallarg(struct stat *) buf;
3101 syscallarg(int) flag;
3102 } */
3103 unsigned int nd_flag;
3104 struct stat sb;
3105 int error;
3106
3107 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW)
3108 nd_flag = NOFOLLOW;
3109 else
3110 nd_flag = FOLLOW;
3111
3112 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag,
3113 &sb);
3114 if (error)
3115 return error;
3116 return copyout(&sb, SCARG(uap, buf), sizeof(sb));
3117 }
3118
3119 /*
3120 * Get configurable pathname variables.
3121 */
3122 /* ARGSUSED */
3123 int
3124 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
3125 {
3126 /* {
3127 syscallarg(const char *) path;
3128 syscallarg(int) name;
3129 } */
3130 int error;
3131 struct pathbuf *pb;
3132 struct nameidata nd;
3133
3134 error = pathbuf_copyin(SCARG(uap, path), &pb);
3135 if (error) {
3136 return error;
3137 }
3138 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3139 if ((error = namei(&nd)) != 0) {
3140 pathbuf_destroy(pb);
3141 return (error);
3142 }
3143 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
3144 vput(nd.ni_vp);
3145 pathbuf_destroy(pb);
3146 return (error);
3147 }
3148
3149 /*
3150 * Return target name of a symbolic link.
3151 */
3152 /* ARGSUSED */
3153 int
3154 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap,
3155 register_t *retval)
3156 {
3157 /* {
3158 syscallarg(const char *) path;
3159 syscallarg(char *) buf;
3160 syscallarg(size_t) count;
3161 } */
3162 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path),
3163 SCARG(uap, buf), SCARG(uap, count), retval);
3164 }
3165
3166 static int
3167 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf,
3168 size_t count, register_t *retval)
3169 {
3170 struct vnode *vp;
3171 struct iovec aiov;
3172 struct uio auio;
3173 int error;
3174 struct pathbuf *pb;
3175 struct nameidata nd;
3176
3177 error = pathbuf_copyin(path, &pb);
3178 if (error) {
3179 return error;
3180 }
3181 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3182 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
3183 pathbuf_destroy(pb);
3184 return error;
3185 }
3186 vp = nd.ni_vp;
3187 pathbuf_destroy(pb);
3188 if (vp->v_type != VLNK)
3189 error = EINVAL;
3190 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
3191 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
3192 aiov.iov_base = buf;
3193 aiov.iov_len = count;
3194 auio.uio_iov = &aiov;
3195 auio.uio_iovcnt = 1;
3196 auio.uio_offset = 0;
3197 auio.uio_rw = UIO_READ;
3198 KASSERT(l == curlwp);
3199 auio.uio_vmspace = l->l_proc->p_vmspace;
3200 auio.uio_resid = count;
3201 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0)
3202 *retval = count - auio.uio_resid;
3203 }
3204 vput(vp);
3205 return (error);
3206 }
3207
3208 int
3209 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap,
3210 register_t *retval)
3211 {
3212 /* {
3213 syscallarg(int) fd;
3214 syscallarg(const char *) path;
3215 syscallarg(char *) buf;
3216 syscallarg(size_t) bufsize;
3217 } */
3218
3219 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path),
3220 SCARG(uap, buf), SCARG(uap, bufsize), retval);
3221 }
3222
3223 /*
3224 * Change flags of a file given a path name.
3225 */
3226 /* ARGSUSED */
3227 int
3228 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
3229 {
3230 /* {
3231 syscallarg(const char *) path;
3232 syscallarg(u_long) flags;
3233 } */
3234 struct vnode *vp;
3235 int error;
3236
3237 error = namei_simple_user(SCARG(uap, path),
3238 NSM_FOLLOW_TRYEMULROOT, &vp);
3239 if (error != 0)
3240 return (error);
3241 error = change_flags(vp, SCARG(uap, flags), l);
3242 vput(vp);
3243 return (error);
3244 }
3245
3246 /*
3247 * Change flags of a file given a file descriptor.
3248 */
3249 /* ARGSUSED */
3250 int
3251 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
3252 {
3253 /* {
3254 syscallarg(int) fd;
3255 syscallarg(u_long) flags;
3256 } */
3257 struct vnode *vp;
3258 file_t *fp;
3259 int error;
3260
3261 /* fd_getvnode() will use the descriptor for us */
3262 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3263 return (error);
3264 vp = fp->f_vnode;
3265 error = change_flags(vp, SCARG(uap, flags), l);
3266 VOP_UNLOCK(vp);
3267 fd_putfile(SCARG(uap, fd));
3268 return (error);
3269 }
3270
3271 /*
3272 * Change flags of a file given a path name; this version does
3273 * not follow links.
3274 */
3275 int
3276 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
3277 {
3278 /* {
3279 syscallarg(const char *) path;
3280 syscallarg(u_long) flags;
3281 } */
3282 struct vnode *vp;
3283 int error;
3284
3285 error = namei_simple_user(SCARG(uap, path),
3286 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3287 if (error != 0)
3288 return (error);
3289 error = change_flags(vp, SCARG(uap, flags), l);
3290 vput(vp);
3291 return (error);
3292 }
3293
3294 /*
3295 * Common routine to change flags of a file.
3296 */
3297 int
3298 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
3299 {
3300 struct vattr vattr;
3301 int error;
3302
3303 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3304
3305 vattr_null(&vattr);
3306 vattr.va_flags = flags;
3307 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3308
3309 return (error);
3310 }
3311
3312 /*
3313 * Change mode of a file given path name; this version follows links.
3314 */
3315 /* ARGSUSED */
3316 int
3317 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
3318 {
3319 /* {
3320 syscallarg(const char *) path;
3321 syscallarg(int) mode;
3322 } */
3323 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path),
3324 SCARG(uap, mode), 0);
3325 }
3326
3327 int
3328 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags)
3329 {
3330 int error;
3331 struct vnode *vp;
3332 namei_simple_flags_t ns_flag;
3333
3334 if (flags & AT_SYMLINK_NOFOLLOW)
3335 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3336 else
3337 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3338
3339 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3340 if (error != 0)
3341 return error;
3342
3343 error = change_mode(vp, mode, l);
3344
3345 vrele(vp);
3346
3347 return (error);
3348 }
3349
3350 /*
3351 * Change mode of a file given a file descriptor.
3352 */
3353 /* ARGSUSED */
3354 int
3355 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
3356 {
3357 /* {
3358 syscallarg(int) fd;
3359 syscallarg(int) mode;
3360 } */
3361 file_t *fp;
3362 int error;
3363
3364 /* fd_getvnode() will use the descriptor for us */
3365 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3366 return (error);
3367 error = change_mode(fp->f_vnode, SCARG(uap, mode), l);
3368 fd_putfile(SCARG(uap, fd));
3369 return (error);
3370 }
3371
3372 int
3373 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap,
3374 register_t *retval)
3375 {
3376 /* {
3377 syscallarg(int) fd;
3378 syscallarg(const char *) path;
3379 syscallarg(int) mode;
3380 syscallarg(int) flag;
3381 } */
3382
3383 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path),
3384 SCARG(uap, mode), SCARG(uap, flag));
3385 }
3386
3387 /*
3388 * Change mode of a file given path name; this version does not follow links.
3389 */
3390 /* ARGSUSED */
3391 int
3392 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
3393 {
3394 /* {
3395 syscallarg(const char *) path;
3396 syscallarg(int) mode;
3397 } */
3398 int error;
3399 struct vnode *vp;
3400
3401 error = namei_simple_user(SCARG(uap, path),
3402 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3403 if (error != 0)
3404 return (error);
3405
3406 error = change_mode(vp, SCARG(uap, mode), l);
3407
3408 vrele(vp);
3409 return (error);
3410 }
3411
3412 /*
3413 * Common routine to set mode given a vnode.
3414 */
3415 static int
3416 change_mode(struct vnode *vp, int mode, struct lwp *l)
3417 {
3418 struct vattr vattr;
3419 int error;
3420
3421 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3422 vattr_null(&vattr);
3423 vattr.va_mode = mode & ALLPERMS;
3424 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3425 VOP_UNLOCK(vp);
3426 return (error);
3427 }
3428
3429 /*
3430 * Set ownership given a path name; this version follows links.
3431 */
3432 /* ARGSUSED */
3433 int
3434 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
3435 {
3436 /* {
3437 syscallarg(const char *) path;
3438 syscallarg(uid_t) uid;
3439 syscallarg(gid_t) gid;
3440 } */
3441 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid),
3442 SCARG(uap, gid), 0);
3443 }
3444
3445 int
3446 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid,
3447 gid_t gid, int flags)
3448 {
3449 int error;
3450 struct vnode *vp;
3451 namei_simple_flags_t ns_flag;
3452
3453 if (flags & AT_SYMLINK_NOFOLLOW)
3454 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3455 else
3456 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3457
3458 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3459 if (error != 0)
3460 return error;
3461
3462 error = change_owner(vp, uid, gid, l, 0);
3463
3464 vrele(vp);
3465
3466 return (error);
3467 }
3468
3469 /*
3470 * Set ownership given a path name; this version follows links.
3471 * Provides POSIX semantics.
3472 */
3473 /* ARGSUSED */
3474 int
3475 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
3476 {
3477 /* {
3478 syscallarg(const char *) path;
3479 syscallarg(uid_t) uid;
3480 syscallarg(gid_t) gid;
3481 } */
3482 int error;
3483 struct vnode *vp;
3484
3485 error = namei_simple_user(SCARG(uap, path),
3486 NSM_FOLLOW_TRYEMULROOT, &vp);
3487 if (error != 0)
3488 return (error);
3489
3490 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3491
3492 vrele(vp);
3493 return (error);
3494 }
3495
3496 /*
3497 * Set ownership given a file descriptor.
3498 */
3499 /* ARGSUSED */
3500 int
3501 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
3502 {
3503 /* {
3504 syscallarg(int) fd;
3505 syscallarg(uid_t) uid;
3506 syscallarg(gid_t) gid;
3507 } */
3508 int error;
3509 file_t *fp;
3510
3511 /* fd_getvnode() will use the descriptor for us */
3512 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3513 return (error);
3514 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3515 l, 0);
3516 fd_putfile(SCARG(uap, fd));
3517 return (error);
3518 }
3519
3520 int
3521 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap,
3522 register_t *retval)
3523 {
3524 /* {
3525 syscallarg(int) fd;
3526 syscallarg(const char *) path;
3527 syscallarg(uid_t) owner;
3528 syscallarg(gid_t) group;
3529 syscallarg(int) flag;
3530 } */
3531
3532 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path),
3533 SCARG(uap, owner), SCARG(uap, group),
3534 SCARG(uap, flag));
3535 }
3536
3537 /*
3538 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
3539 */
3540 /* ARGSUSED */
3541 int
3542 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
3543 {
3544 /* {
3545 syscallarg(int) fd;
3546 syscallarg(uid_t) uid;
3547 syscallarg(gid_t) gid;
3548 } */
3549 int error;
3550 file_t *fp;
3551
3552 /* fd_getvnode() will use the descriptor for us */
3553 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3554 return (error);
3555 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
3556 l, 1);
3557 fd_putfile(SCARG(uap, fd));
3558 return (error);
3559 }
3560
3561 /*
3562 * Set ownership given a path name; this version does not follow links.
3563 */
3564 /* ARGSUSED */
3565 int
3566 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
3567 {
3568 /* {
3569 syscallarg(const char *) path;
3570 syscallarg(uid_t) uid;
3571 syscallarg(gid_t) gid;
3572 } */
3573 int error;
3574 struct vnode *vp;
3575
3576 error = namei_simple_user(SCARG(uap, path),
3577 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3578 if (error != 0)
3579 return (error);
3580
3581 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
3582
3583 vrele(vp);
3584 return (error);
3585 }
3586
3587 /*
3588 * Set ownership given a path name; this version does not follow links.
3589 * Provides POSIX/XPG semantics.
3590 */
3591 /* ARGSUSED */
3592 int
3593 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
3594 {
3595 /* {
3596 syscallarg(const char *) path;
3597 syscallarg(uid_t) uid;
3598 syscallarg(gid_t) gid;
3599 } */
3600 int error;
3601 struct vnode *vp;
3602
3603 error = namei_simple_user(SCARG(uap, path),
3604 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3605 if (error != 0)
3606 return (error);
3607
3608 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3609
3610 vrele(vp);
3611 return (error);
3612 }
3613
3614 /*
3615 * Common routine to set ownership given a vnode.
3616 */
3617 static int
3618 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
3619 int posix_semantics)
3620 {
3621 struct vattr vattr;
3622 mode_t newmode;
3623 int error;
3624
3625 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3626 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
3627 goto out;
3628
3629 #define CHANGED(x) ((int)(x) != -1)
3630 newmode = vattr.va_mode;
3631 if (posix_semantics) {
3632 /*
3633 * POSIX/XPG semantics: if the caller is not the super-user,
3634 * clear set-user-id and set-group-id bits. Both POSIX and
3635 * the XPG consider the behaviour for calls by the super-user
3636 * implementation-defined; we leave the set-user-id and set-
3637 * group-id settings intact in that case.
3638 */
3639 if (vattr.va_mode & S_ISUID) {
3640 if (kauth_authorize_vnode(l->l_cred,
3641 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0)
3642 newmode &= ~S_ISUID;
3643 }
3644 if (vattr.va_mode & S_ISGID) {
3645 if (kauth_authorize_vnode(l->l_cred,
3646 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0)
3647 newmode &= ~S_ISGID;
3648 }
3649 } else {
3650 /*
3651 * NetBSD semantics: when changing owner and/or group,
3652 * clear the respective bit(s).
3653 */
3654 if (CHANGED(uid))
3655 newmode &= ~S_ISUID;
3656 if (CHANGED(gid))
3657 newmode &= ~S_ISGID;
3658 }
3659 /* Update va_mode iff altered. */
3660 if (vattr.va_mode == newmode)
3661 newmode = VNOVAL;
3662
3663 vattr_null(&vattr);
3664 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
3665 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
3666 vattr.va_mode = newmode;
3667 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3668 #undef CHANGED
3669
3670 out:
3671 VOP_UNLOCK(vp);
3672 return (error);
3673 }
3674
3675 /*
3676 * Set the access and modification times given a path name; this
3677 * version follows links.
3678 */
3679 /* ARGSUSED */
3680 int
3681 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
3682 register_t *retval)
3683 {
3684 /* {
3685 syscallarg(const char *) path;
3686 syscallarg(const struct timeval *) tptr;
3687 } */
3688
3689 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
3690 SCARG(uap, tptr), UIO_USERSPACE);
3691 }
3692
3693 /*
3694 * Set the access and modification times given a file descriptor.
3695 */
3696 /* ARGSUSED */
3697 int
3698 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
3699 register_t *retval)
3700 {
3701 /* {
3702 syscallarg(int) fd;
3703 syscallarg(const struct timeval *) tptr;
3704 } */
3705 int error;
3706 file_t *fp;
3707
3708 /* fd_getvnode() will use the descriptor for us */
3709 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3710 return (error);
3711 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr),
3712 UIO_USERSPACE);
3713 fd_putfile(SCARG(uap, fd));
3714 return (error);
3715 }
3716
3717 int
3718 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap,
3719 register_t *retval)
3720 {
3721 /* {
3722 syscallarg(int) fd;
3723 syscallarg(const struct timespec *) tptr;
3724 } */
3725 int error;
3726 file_t *fp;
3727
3728 /* fd_getvnode() will use the descriptor for us */
3729 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3730 return (error);
3731 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0,
3732 SCARG(uap, tptr), UIO_USERSPACE);
3733 fd_putfile(SCARG(uap, fd));
3734 return (error);
3735 }
3736
3737 /*
3738 * Set the access and modification times given a path name; this
3739 * version does not follow links.
3740 */
3741 int
3742 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
3743 register_t *retval)
3744 {
3745 /* {
3746 syscallarg(const char *) path;
3747 syscallarg(const struct timeval *) tptr;
3748 } */
3749
3750 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
3751 SCARG(uap, tptr), UIO_USERSPACE);
3752 }
3753
3754 int
3755 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap,
3756 register_t *retval)
3757 {
3758 /* {
3759 syscallarg(int) fd;
3760 syscallarg(const char *) path;
3761 syscallarg(const struct timespec *) tptr;
3762 syscallarg(int) flag;
3763 } */
3764 int follow;
3765 const struct timespec *tptr;
3766 int error;
3767
3768 tptr = SCARG(uap, tptr);
3769 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3770
3771 error = do_sys_utimensat(l, SCARG(uap, fd), NULL,
3772 SCARG(uap, path), follow, tptr, UIO_USERSPACE);
3773
3774 return error;
3775 }
3776
3777 /*
3778 * Common routine to set access and modification times given a vnode.
3779 */
3780 int
3781 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag,
3782 const struct timespec *tptr, enum uio_seg seg)
3783 {
3784 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg);
3785 }
3786
3787 int
3788 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp,
3789 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg)
3790 {
3791 struct vattr vattr;
3792 int error, dorele = 0;
3793 namei_simple_flags_t sflags;
3794 bool vanull, setbirthtime;
3795 struct timespec ts[2];
3796
3797 KASSERT(l != NULL || fdat == AT_FDCWD);
3798
3799 /*
3800 * I have checked all callers and they pass either FOLLOW,
3801 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW
3802 * is 0. More to the point, they don't pass anything else.
3803 * Let's keep it that way at least until the namei interfaces
3804 * are fully sanitized.
3805 */
3806 KASSERT(flag == NOFOLLOW || flag == FOLLOW);
3807 sflags = (flag == FOLLOW) ?
3808 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT;
3809
3810 if (tptr == NULL) {
3811 vanull = true;
3812 nanotime(&ts[0]);
3813 ts[1] = ts[0];
3814 } else {
3815 vanull = false;
3816 if (seg != UIO_SYSSPACE) {
3817 error = copyin(tptr, ts, sizeof (ts));
3818 if (error != 0)
3819 return error;
3820 } else {
3821 ts[0] = tptr[0];
3822 ts[1] = tptr[1];
3823 }
3824 }
3825
3826 if (ts[0].tv_nsec == UTIME_NOW) {
3827 nanotime(&ts[0]);
3828 if (ts[1].tv_nsec == UTIME_NOW) {
3829 vanull = true;
3830 ts[1] = ts[0];
3831 }
3832 } else if (ts[1].tv_nsec == UTIME_NOW)
3833 nanotime(&ts[1]);
3834
3835 if (vp == NULL) {
3836 /* note: SEG describes TPTR, not PATH; PATH is always user */
3837 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp);
3838 if (error != 0)
3839 return error;
3840 dorele = 1;
3841 }
3842
3843 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3844 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3845 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3846 vattr_null(&vattr);
3847
3848 if (ts[0].tv_nsec != UTIME_OMIT)
3849 vattr.va_atime = ts[0];
3850
3851 if (ts[1].tv_nsec != UTIME_OMIT) {
3852 vattr.va_mtime = ts[1];
3853 if (setbirthtime)
3854 vattr.va_birthtime = ts[1];
3855 }
3856
3857 if (vanull)
3858 vattr.va_vaflags |= VA_UTIMES_NULL;
3859 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3860 VOP_UNLOCK(vp);
3861
3862 if (dorele != 0)
3863 vrele(vp);
3864
3865 return error;
3866 }
3867
3868 int
3869 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
3870 const struct timeval *tptr, enum uio_seg seg)
3871 {
3872 struct timespec ts[2];
3873 struct timespec *tsptr = NULL;
3874 int error;
3875
3876 if (tptr != NULL) {
3877 struct timeval tv[2];
3878
3879 if (seg != UIO_SYSSPACE) {
3880 error = copyin(tptr, tv, sizeof (tv));
3881 if (error != 0)
3882 return error;
3883 tptr = tv;
3884 }
3885
3886 if ((tv[0].tv_usec == UTIME_NOW) ||
3887 (tv[0].tv_usec == UTIME_OMIT))
3888 ts[0].tv_nsec = tv[0].tv_usec;
3889 else
3890 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3891
3892 if ((tv[1].tv_usec == UTIME_NOW) ||
3893 (tv[1].tv_usec == UTIME_OMIT))
3894 ts[1].tv_nsec = tv[1].tv_usec;
3895 else
3896 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3897
3898 tsptr = &ts[0];
3899 }
3900
3901 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE);
3902 }
3903
3904 /*
3905 * Truncate a file given its path name.
3906 */
3907 /* ARGSUSED */
3908 int
3909 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3910 {
3911 /* {
3912 syscallarg(const char *) path;
3913 syscallarg(int) pad;
3914 syscallarg(off_t) length;
3915 } */
3916 struct vnode *vp;
3917 struct vattr vattr;
3918 int error;
3919
3920 if (SCARG(uap, length) < 0)
3921 return EINVAL;
3922
3923 error = namei_simple_user(SCARG(uap, path),
3924 NSM_FOLLOW_TRYEMULROOT, &vp);
3925 if (error != 0)
3926 return (error);
3927 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3928 if (vp->v_type == VDIR)
3929 error = EISDIR;
3930 else if ((error = vn_writechk(vp)) == 0 &&
3931 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3932 vattr_null(&vattr);
3933 vattr.va_size = SCARG(uap, length);
3934 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3935 }
3936 vput(vp);
3937 return (error);
3938 }
3939
3940 /*
3941 * Truncate a file given a file descriptor.
3942 */
3943 /* ARGSUSED */
3944 int
3945 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3946 {
3947 /* {
3948 syscallarg(int) fd;
3949 syscallarg(int) pad;
3950 syscallarg(off_t) length;
3951 } */
3952 struct vattr vattr;
3953 struct vnode *vp;
3954 file_t *fp;
3955 int error;
3956
3957 if (SCARG(uap, length) < 0)
3958 return EINVAL;
3959
3960 /* fd_getvnode() will use the descriptor for us */
3961 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3962 return (error);
3963 if ((fp->f_flag & FWRITE) == 0) {
3964 error = EINVAL;
3965 goto out;
3966 }
3967 vp = fp->f_vnode;
3968 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3969 if (vp->v_type == VDIR)
3970 error = EISDIR;
3971 else if ((error = vn_writechk(vp)) == 0) {
3972 vattr_null(&vattr);
3973 vattr.va_size = SCARG(uap, length);
3974 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3975 }
3976 VOP_UNLOCK(vp);
3977 out:
3978 fd_putfile(SCARG(uap, fd));
3979 return (error);
3980 }
3981
3982 /*
3983 * Sync an open file.
3984 */
3985 /* ARGSUSED */
3986 int
3987 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3988 {
3989 /* {
3990 syscallarg(int) fd;
3991 } */
3992 struct vnode *vp;
3993 file_t *fp;
3994 int error;
3995
3996 /* fd_getvnode() will use the descriptor for us */
3997 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3998 return (error);
3999 vp = fp->f_vnode;
4000 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4001 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
4002 VOP_UNLOCK(vp);
4003 fd_putfile(SCARG(uap, fd));
4004 return (error);
4005 }
4006
4007 /*
4008 * Sync a range of file data. API modeled after that found in AIX.
4009 *
4010 * FDATASYNC indicates that we need only save enough metadata to be able
4011 * to re-read the written data. Note we duplicate AIX's requirement that
4012 * the file be open for writing.
4013 */
4014 /* ARGSUSED */
4015 int
4016 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
4017 {
4018 /* {
4019 syscallarg(int) fd;
4020 syscallarg(int) flags;
4021 syscallarg(off_t) start;
4022 syscallarg(off_t) length;
4023 } */
4024 struct vnode *vp;
4025 file_t *fp;
4026 int flags, nflags;
4027 off_t s, e, len;
4028 int error;
4029
4030 /* fd_getvnode() will use the descriptor for us */
4031 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4032 return (error);
4033
4034 if ((fp->f_flag & FWRITE) == 0) {
4035 error = EBADF;
4036 goto out;
4037 }
4038
4039 flags = SCARG(uap, flags);
4040 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
4041 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
4042 error = EINVAL;
4043 goto out;
4044 }
4045 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
4046 if (flags & FDATASYNC)
4047 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
4048 else
4049 nflags = FSYNC_WAIT;
4050 if (flags & FDISKSYNC)
4051 nflags |= FSYNC_CACHE;
4052
4053 len = SCARG(uap, length);
4054 /* If length == 0, we do the whole file, and s = e = 0 will do that */
4055 if (len) {
4056 s = SCARG(uap, start);
4057 e = s + len;
4058 if (e < s) {
4059 error = EINVAL;
4060 goto out;
4061 }
4062 } else {
4063 e = 0;
4064 s = 0;
4065 }
4066
4067 vp = fp->f_vnode;
4068 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4069 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
4070 VOP_UNLOCK(vp);
4071 out:
4072 fd_putfile(SCARG(uap, fd));
4073 return (error);
4074 }
4075
4076 /*
4077 * Sync the data of an open file.
4078 */
4079 /* ARGSUSED */
4080 int
4081 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
4082 {
4083 /* {
4084 syscallarg(int) fd;
4085 } */
4086 struct vnode *vp;
4087 file_t *fp;
4088 int error;
4089
4090 /* fd_getvnode() will use the descriptor for us */
4091 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4092 return (error);
4093 if ((fp->f_flag & FWRITE) == 0) {
4094 fd_putfile(SCARG(uap, fd));
4095 return (EBADF);
4096 }
4097 vp = fp->f_vnode;
4098 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4099 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
4100 VOP_UNLOCK(vp);
4101 fd_putfile(SCARG(uap, fd));
4102 return (error);
4103 }
4104
4105 /*
4106 * Rename files, (standard) BSD semantics frontend.
4107 */
4108 /* ARGSUSED */
4109 int
4110 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
4111 {
4112 /* {
4113 syscallarg(const char *) from;
4114 syscallarg(const char *) to;
4115 } */
4116
4117 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4118 SCARG(uap, to), UIO_USERSPACE, 0));
4119 }
4120
4121 int
4122 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap,
4123 register_t *retval)
4124 {
4125 /* {
4126 syscallarg(int) fromfd;
4127 syscallarg(const char *) from;
4128 syscallarg(int) tofd;
4129 syscallarg(const char *) to;
4130 } */
4131
4132 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from),
4133 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0));
4134 }
4135
4136 /*
4137 * Rename files, POSIX semantics frontend.
4138 */
4139 /* ARGSUSED */
4140 int
4141 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
4142 {
4143 /* {
4144 syscallarg(const char *) from;
4145 syscallarg(const char *) to;
4146 } */
4147
4148 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4149 SCARG(uap, to), UIO_USERSPACE, 1));
4150 }
4151
4152 /*
4153 * Rename files. Source and destination must either both be directories,
4154 * or both not be directories. If target is a directory, it must be empty.
4155 * If `from' and `to' refer to the same object, the value of the `retain'
4156 * argument is used to determine whether `from' will be
4157 *
4158 * (retain == 0) deleted unless `from' and `to' refer to the same
4159 * object in the file system's name space (BSD).
4160 * (retain == 1) always retained (POSIX).
4161 *
4162 * XXX Synchronize with nfsrv_rename in nfs_serv.c.
4163 */
4164 int
4165 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
4166 {
4167 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain);
4168 }
4169
4170 static int
4171 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd,
4172 const char *to, enum uio_seg seg, int retain)
4173 {
4174 struct pathbuf *fpb, *tpb;
4175 struct nameidata fnd, tnd;
4176 struct vnode *fdvp, *fvp;
4177 struct vnode *tdvp, *tvp;
4178 struct mount *mp, *tmp;
4179 int error;
4180
4181 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD));
4182
4183 error = pathbuf_maybe_copyin(from, seg, &fpb);
4184 if (error)
4185 goto out0;
4186 KASSERT(fpb != NULL);
4187
4188 error = pathbuf_maybe_copyin(to, seg, &tpb);
4189 if (error)
4190 goto out1;
4191 KASSERT(tpb != NULL);
4192
4193 /*
4194 * Lookup from.
4195 *
4196 * XXX LOCKPARENT is wrong because we don't actually want it
4197 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is
4198 * insane, so for the time being we need to leave it like this.
4199 */
4200 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb);
4201 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0)
4202 goto out2;
4203
4204 /*
4205 * Pull out the important results of the lookup, fdvp and fvp.
4206 * Of course, fvp is bogus because we're about to unlock fdvp.
4207 */
4208 fdvp = fnd.ni_dvp;
4209 fvp = fnd.ni_vp;
4210 KASSERT(fdvp != NULL);
4211 KASSERT(fvp != NULL);
4212 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE));
4213
4214 /*
4215 * Make sure neither fdvp nor fvp is locked.
4216 */
4217 if (fdvp != fvp)
4218 VOP_UNLOCK(fdvp);
4219 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4220 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4221
4222 /*
4223 * Reject renaming `.' and `..'. Can't do this until after
4224 * namei because we need namei's parsing to find the final
4225 * component name. (namei should just leave us with the final
4226 * component name and not look it up itself, but anyway...)
4227 *
4228 * This was here before because we used to relookup from
4229 * instead of to and relookup requires the caller to check
4230 * this, but now file systems may depend on this check, so we
4231 * must retain it until the file systems are all rototilled.
4232 */
4233 if (((fnd.ni_cnd.cn_namelen == 1) &&
4234 (fnd.ni_cnd.cn_nameptr[0] == '.')) ||
4235 ((fnd.ni_cnd.cn_namelen == 2) &&
4236 (fnd.ni_cnd.cn_nameptr[0] == '.') &&
4237 (fnd.ni_cnd.cn_nameptr[1] == '.'))) {
4238 error = EINVAL; /* XXX EISDIR? */
4239 goto abort0;
4240 }
4241
4242 /*
4243 * Lookup to.
4244 *
4245 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using
4246 * fvp here to decide whether to add CREATEDIR is a load of
4247 * bollocks because fvp might be the wrong node by now, since
4248 * fdvp is unlocked.
4249 *
4250 * XXX Why not pass CREATEDIR always?
4251 */
4252 NDINIT(&tnd, RENAME,
4253 (LOCKPARENT | NOCACHE | TRYEMULROOT |
4254 ((fvp->v_type == VDIR)? CREATEDIR : 0)),
4255 tpb);
4256 if ((error = fd_nameiat(l, tofd, &tnd)) != 0)
4257 goto abort0;
4258
4259 /*
4260 * Pull out the important results of the lookup, tdvp and tvp.
4261 * Of course, tvp is bogus because we're about to unlock tdvp.
4262 */
4263 tdvp = tnd.ni_dvp;
4264 tvp = tnd.ni_vp;
4265 KASSERT(tdvp != NULL);
4266 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE));
4267
4268 /*
4269 * Make sure neither tdvp nor tvp is locked.
4270 */
4271 if (tdvp != tvp)
4272 VOP_UNLOCK(tdvp);
4273 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4274 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4275
4276 /*
4277 * Reject renaming onto `.' or `..'. relookup is unhappy with
4278 * these, which is why we must do this here. Once upon a time
4279 * we relooked up from instead of to, and consequently didn't
4280 * need this check, but now that we relookup to instead of
4281 * from, we need this; and we shall need it forever forward
4282 * until the VOP_RENAME protocol changes, because file systems
4283 * will no doubt begin to depend on this check.
4284 */
4285 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) {
4286 error = EISDIR;
4287 goto abort1;
4288 }
4289 if ((tnd.ni_cnd.cn_namelen == 2) &&
4290 (tnd.ni_cnd.cn_nameptr[0] == '.') &&
4291 (tnd.ni_cnd.cn_nameptr[1] == '.')) {
4292 error = EINVAL;
4293 goto abort1;
4294 }
4295
4296 /*
4297 * Get the mount point. If the file system has been unmounted,
4298 * which it may be because we're not holding any vnode locks,
4299 * then v_mount will be NULL. We're not really supposed to
4300 * read v_mount without holding the vnode lock, but since we
4301 * have fdvp referenced, if fdvp->v_mount changes then at worst
4302 * it will be set to NULL, not changed to another mount point.
4303 * And, of course, since it is up to the file system to
4304 * determine the real lock order, we can't lock both fdvp and
4305 * tdvp at the same time.
4306 */
4307 mp = fdvp->v_mount;
4308 if (mp == NULL) {
4309 error = ENOENT;
4310 goto abort1;
4311 }
4312
4313 /*
4314 * Make sure the mount points match. Again, although we don't
4315 * hold any vnode locks, the v_mount fields may change -- but
4316 * at worst they will change to NULL, so this will never become
4317 * a cross-device rename, because we hold vnode references.
4318 *
4319 * XXX Because nothing is locked and the compiler may reorder
4320 * things here, unmounting the file system at an inopportune
4321 * moment may cause rename to fail with ENXDEV when it really
4322 * should fail with ENOENT.
4323 */
4324 tmp = tdvp->v_mount;
4325 if (tmp == NULL) {
4326 error = ENOENT;
4327 goto abort1;
4328 }
4329
4330 if (mp != tmp) {
4331 error = EXDEV;
4332 goto abort1;
4333 }
4334
4335 /*
4336 * Take the vfs rename lock to avoid cross-directory screw cases.
4337 * Nothing is locked currently, so taking this lock is safe.
4338 */
4339 error = VFS_RENAMELOCK_ENTER(mp);
4340 if (error)
4341 goto abort1;
4342
4343 /*
4344 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced,
4345 * and nothing is locked except for the vfs rename lock.
4346 *
4347 * The next step is a little rain dance to conform to the
4348 * insane lock protocol, even though it does nothing to ward
4349 * off race conditions.
4350 *
4351 * We need tdvp and tvp to be locked. However, because we have
4352 * unlocked tdvp in order to hold no locks while we take the
4353 * vfs rename lock, tvp may be wrong here, and we can't safely
4354 * lock it even if the sensible file systems will just unlock
4355 * it straight away. Consequently, we must lock tdvp and then
4356 * relookup tvp to get it locked.
4357 *
4358 * Finally, because the VOP_RENAME protocol is brain-damaged
4359 * and various file systems insanely depend on the semantics of
4360 * this brain damage, the lookup of to must be the last lookup
4361 * before VOP_RENAME.
4362 */
4363 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
4364 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0);
4365 if (error)
4366 goto abort2;
4367
4368 /*
4369 * Drop the old tvp and pick up the new one -- which might be
4370 * the same, but that doesn't matter to us. After this, tdvp
4371 * and tvp should both be locked.
4372 */
4373 if (tvp != NULL)
4374 vrele(tvp);
4375 tvp = tnd.ni_vp;
4376 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4377 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4378
4379 /*
4380 * The old do_sys_rename had various consistency checks here
4381 * involving fvp and tvp. fvp is bogus already here, and tvp
4382 * will become bogus soon in any sensible file system, so the
4383 * only purpose in putting these checks here is to give lip
4384 * service to these screw cases and to acknowledge that they
4385 * exist, not actually to handle them, but here you go
4386 * anyway...
4387 */
4388
4389 /*
4390 * Acknowledge that directories and non-directories aren't
4391 * suposed to mix.
4392 */
4393 if (tvp != NULL) {
4394 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) {
4395 error = ENOTDIR;
4396 goto abort3;
4397 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) {
4398 error = EISDIR;
4399 goto abort3;
4400 }
4401 }
4402
4403 /*
4404 * Acknowledge some random screw case, among the dozens that
4405 * might arise.
4406 */
4407 if (fvp == tdvp) {
4408 error = EINVAL;
4409 goto abort3;
4410 }
4411
4412 /*
4413 * Acknowledge that POSIX has a wacky screw case.
4414 *
4415 * XXX Eventually the retain flag needs to be passed on to
4416 * VOP_RENAME.
4417 */
4418 if (fvp == tvp) {
4419 if (retain) {
4420 error = 0;
4421 goto abort3;
4422 } else if ((fdvp == tdvp) &&
4423 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) &&
4424 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr,
4425 fnd.ni_cnd.cn_namelen))) {
4426 error = 0;
4427 goto abort3;
4428 }
4429 }
4430
4431 /*
4432 * Make sure veriexec can screw us up. (But a race can screw
4433 * up veriexec, of course -- remember, fvp and (soon) tvp are
4434 * bogus.)
4435 */
4436 #if NVERIEXEC > 0
4437 {
4438 char *f1, *f2;
4439 size_t f1_len;
4440 size_t f2_len;
4441
4442 f1_len = fnd.ni_cnd.cn_namelen + 1;
4443 f1 = kmem_alloc(f1_len, KM_SLEEP);
4444 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len);
4445
4446 f2_len = tnd.ni_cnd.cn_namelen + 1;
4447 f2 = kmem_alloc(f2_len, KM_SLEEP);
4448 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len);
4449
4450 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2);
4451
4452 kmem_free(f1, f1_len);
4453 kmem_free(f2, f2_len);
4454
4455 if (error)
4456 goto abort3;
4457 }
4458 #endif /* NVERIEXEC > 0 */
4459
4460 /*
4461 * All ready. Incant the rename vop.
4462 */
4463 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4464 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4465 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4466 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4467 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd);
4468
4469 /*
4470 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks
4471 * tdvp and tvp. But we can't assert any of that.
4472 */
4473 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4474 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4475 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4476 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4477
4478 /*
4479 * So all we have left to do is to drop the rename lock and
4480 * destroy the pathbufs.
4481 */
4482 VFS_RENAMELOCK_EXIT(mp);
4483 goto out2;
4484
4485 abort3: if ((tvp != NULL) && (tvp != tdvp))
4486 VOP_UNLOCK(tvp);
4487 abort2: VOP_UNLOCK(tdvp);
4488 VFS_RENAMELOCK_EXIT(mp);
4489 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd);
4490 vrele(tdvp);
4491 if (tvp != NULL)
4492 vrele(tvp);
4493 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd);
4494 vrele(fdvp);
4495 vrele(fvp);
4496 out2: pathbuf_destroy(tpb);
4497 out1: pathbuf_destroy(fpb);
4498 out0: return error;
4499 }
4500
4501 /*
4502 * Make a directory file.
4503 */
4504 /* ARGSUSED */
4505 int
4506 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
4507 {
4508 /* {
4509 syscallarg(const char *) path;
4510 syscallarg(int) mode;
4511 } */
4512
4513 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path),
4514 SCARG(uap, mode), UIO_USERSPACE);
4515 }
4516
4517 int
4518 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap,
4519 register_t *retval)
4520 {
4521 /* {
4522 syscallarg(int) fd;
4523 syscallarg(const char *) path;
4524 syscallarg(int) mode;
4525 } */
4526
4527 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path),
4528 SCARG(uap, mode), UIO_USERSPACE);
4529 }
4530
4531
4532 int
4533 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg)
4534 {
4535 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE);
4536 }
4537
4538 static int
4539 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode,
4540 enum uio_seg seg)
4541 {
4542 struct proc *p = curlwp->l_proc;
4543 struct vnode *vp;
4544 struct vattr vattr;
4545 int error;
4546 struct pathbuf *pb;
4547 struct nameidata nd;
4548
4549 KASSERT(l != NULL || fdat == AT_FDCWD);
4550
4551 /* XXX bollocks, should pass in a pathbuf */
4552 error = pathbuf_maybe_copyin(path, seg, &pb);
4553 if (error) {
4554 return error;
4555 }
4556
4557 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb);
4558
4559 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
4560 pathbuf_destroy(pb);
4561 return (error);
4562 }
4563 vp = nd.ni_vp;
4564 if (vp != NULL) {
4565 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
4566 if (nd.ni_dvp == vp)
4567 vrele(nd.ni_dvp);
4568 else
4569 vput(nd.ni_dvp);
4570 vrele(vp);
4571 pathbuf_destroy(pb);
4572 return (EEXIST);
4573 }
4574 vattr_null(&vattr);
4575 vattr.va_type = VDIR;
4576 /* We will read cwdi->cwdi_cmask unlocked. */
4577 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
4578 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
4579 if (!error)
4580 vrele(nd.ni_vp);
4581 vput(nd.ni_dvp);
4582 pathbuf_destroy(pb);
4583 return (error);
4584 }
4585
4586 /*
4587 * Remove a directory file.
4588 */
4589 /* ARGSUSED */
4590 int
4591 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
4592 {
4593 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path),
4594 AT_REMOVEDIR, UIO_USERSPACE);
4595 }
4596
4597 /*
4598 * Read a block of directory entries in a file system independent format.
4599 */
4600 int
4601 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
4602 {
4603 /* {
4604 syscallarg(int) fd;
4605 syscallarg(char *) buf;
4606 syscallarg(size_t) count;
4607 } */
4608 file_t *fp;
4609 int error, done;
4610
4611 /* fd_getvnode() will use the descriptor for us */
4612 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4613 return (error);
4614 if ((fp->f_flag & FREAD) == 0) {
4615 error = EBADF;
4616 goto out;
4617 }
4618 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
4619 SCARG(uap, count), &done, l, 0, 0);
4620 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
4621 *retval = done;
4622 out:
4623 fd_putfile(SCARG(uap, fd));
4624 return (error);
4625 }
4626
4627 /*
4628 * Set the mode mask for creation of filesystem nodes.
4629 */
4630 int
4631 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
4632 {
4633 /* {
4634 syscallarg(mode_t) newmask;
4635 } */
4636 struct proc *p = l->l_proc;
4637 struct cwdinfo *cwdi;
4638
4639 /*
4640 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
4641 * important is that we serialize changes to the mask. The
4642 * rw_exit() will issue a write memory barrier on our behalf,
4643 * and force the changes out to other CPUs (as it must use an
4644 * atomic operation, draining the local CPU's store buffers).
4645 */
4646 cwdi = p->p_cwdi;
4647 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
4648 *retval = cwdi->cwdi_cmask;
4649 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
4650 rw_exit(&cwdi->cwdi_lock);
4651
4652 return (0);
4653 }
4654
4655 int
4656 dorevoke(struct vnode *vp, kauth_cred_t cred)
4657 {
4658 struct vattr vattr;
4659 int error, fs_decision;
4660
4661 vn_lock(vp, LK_SHARED | LK_RETRY);
4662 error = VOP_GETATTR(vp, &vattr, cred);
4663 VOP_UNLOCK(vp);
4664 if (error != 0)
4665 return error;
4666 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM;
4667 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL,
4668 fs_decision);
4669 if (!error)
4670 VOP_REVOKE(vp, REVOKEALL);
4671 return (error);
4672 }
4673
4674 /*
4675 * Void all references to file by ripping underlying filesystem
4676 * away from vnode.
4677 */
4678 /* ARGSUSED */
4679 int
4680 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
4681 {
4682 /* {
4683 syscallarg(const char *) path;
4684 } */
4685 struct vnode *vp;
4686 int error;
4687
4688 error = namei_simple_user(SCARG(uap, path),
4689 NSM_FOLLOW_TRYEMULROOT, &vp);
4690 if (error != 0)
4691 return (error);
4692 error = dorevoke(vp, l->l_cred);
4693 vrele(vp);
4694 return (error);
4695 }
4696
4697 /*
4698 * Allocate backing store for a file, filling a hole without having to
4699 * explicitly write anything out.
4700 */
4701 /* ARGSUSED */
4702 int
4703 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap,
4704 register_t *retval)
4705 {
4706 /* {
4707 syscallarg(int) fd;
4708 syscallarg(off_t) pos;
4709 syscallarg(off_t) len;
4710 } */
4711 int fd;
4712 off_t pos, len;
4713 struct file *fp;
4714 struct vnode *vp;
4715 int error;
4716
4717 fd = SCARG(uap, fd);
4718 pos = SCARG(uap, pos);
4719 len = SCARG(uap, len);
4720
4721 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
4722 *retval = EINVAL;
4723 return 0;
4724 }
4725
4726 error = fd_getvnode(fd, &fp);
4727 if (error) {
4728 *retval = error;
4729 return 0;
4730 }
4731 if ((fp->f_flag & FWRITE) == 0) {
4732 error = EBADF;
4733 goto fail;
4734 }
4735 vp = fp->f_vnode;
4736
4737 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4738 if (vp->v_type == VDIR) {
4739 error = EISDIR;
4740 } else {
4741 error = VOP_FALLOCATE(vp, pos, len);
4742 }
4743 VOP_UNLOCK(vp);
4744
4745 fail:
4746 fd_putfile(fd);
4747 *retval = error;
4748 return 0;
4749 }
4750
4751 /*
4752 * Deallocate backing store for a file, creating a hole. Also used for
4753 * invoking TRIM on disks.
4754 */
4755 /* ARGSUSED */
4756 int
4757 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap,
4758 register_t *retval)
4759 {
4760 /* {
4761 syscallarg(int) fd;
4762 syscallarg(off_t) pos;
4763 syscallarg(off_t) len;
4764 } */
4765 int fd;
4766 off_t pos, len;
4767 struct file *fp;
4768 struct vnode *vp;
4769 int error;
4770
4771 fd = SCARG(uap, fd);
4772 pos = SCARG(uap, pos);
4773 len = SCARG(uap, len);
4774
4775 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
4776 return EINVAL;
4777 }
4778
4779 error = fd_getvnode(fd, &fp);
4780 if (error) {
4781 return error;
4782 }
4783 if ((fp->f_flag & FWRITE) == 0) {
4784 error = EBADF;
4785 goto fail;
4786 }
4787 vp = fp->f_vnode;
4788
4789 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4790 if (vp->v_type == VDIR) {
4791 error = EISDIR;
4792 } else {
4793 error = VOP_FDISCARD(vp, pos, len);
4794 }
4795 VOP_UNLOCK(vp);
4796
4797 fail:
4798 fd_putfile(fd);
4799 return error;
4800 }
4801