vfs_syscalls.c revision 1.474 1 /* $NetBSD: vfs_syscalls.c,v 1.474 2014/01/25 02:28:31 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
66 */
67
68 /*
69 * Virtual File System System Calls
70 */
71
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.474 2014/01/25 02:28:31 christos Exp $");
74
75 #ifdef _KERNEL_OPT
76 #include "opt_fileassoc.h"
77 #include "veriexec.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/namei.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/file.h>
86 #include <sys/fcntl.h>
87 #include <sys/stat.h>
88 #include <sys/vnode.h>
89 #include <sys/mount.h>
90 #include <sys/proc.h>
91 #include <sys/uio.h>
92 #include <sys/kmem.h>
93 #include <sys/dirent.h>
94 #include <sys/sysctl.h>
95 #include <sys/syscallargs.h>
96 #include <sys/vfs_syscalls.h>
97 #include <sys/quota.h>
98 #include <sys/quotactl.h>
99 #include <sys/ktrace.h>
100 #ifdef FILEASSOC
101 #include <sys/fileassoc.h>
102 #endif /* FILEASSOC */
103 #include <sys/extattr.h>
104 #include <sys/verified_exec.h>
105 #include <sys/kauth.h>
106 #include <sys/atomic.h>
107 #include <sys/module.h>
108 #include <sys/buf.h>
109
110 #include <miscfs/genfs/genfs.h>
111 #include <miscfs/syncfs/syncfs.h>
112 #include <miscfs/specfs/specdev.h>
113
114 #include <nfs/rpcv2.h>
115 #include <nfs/nfsproto.h>
116 #include <nfs/nfs.h>
117 #include <nfs/nfs_var.h>
118
119 static int change_flags(struct vnode *, u_long, struct lwp *);
120 static int change_mode(struct vnode *, int, struct lwp *l);
121 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
122 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *);
123 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t,
124 enum uio_seg);
125 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t);
126 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *,
127 enum uio_seg);
128 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *,
129 enum uio_seg, int);
130 static int do_sys_readlinkat(struct lwp *, int, const char *, char *,
131 size_t, register_t *);
132 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg);
133
134 static int fd_nameiat(struct lwp *, int, struct nameidata *);
135 static int fd_nameiat_simple_user(struct lwp *, int, const char *,
136 namei_simple_flags_t, struct vnode **);
137
138
139 /*
140 * This table is used to maintain compatibility with 4.3BSD
141 * and NetBSD 0.9 mount syscalls - and possibly other systems.
142 * Note, the order is important!
143 *
144 * Do not modify this table. It should only contain filesystems
145 * supported by NetBSD 0.9 and 4.3BSD.
146 */
147 const char * const mountcompatnames[] = {
148 NULL, /* 0 = MOUNT_NONE */
149 MOUNT_FFS, /* 1 = MOUNT_UFS */
150 MOUNT_NFS, /* 2 */
151 MOUNT_MFS, /* 3 */
152 MOUNT_MSDOS, /* 4 */
153 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
154 MOUNT_FDESC, /* 6 */
155 MOUNT_KERNFS, /* 7 */
156 NULL, /* 8 = MOUNT_DEVFS */
157 MOUNT_AFS, /* 9 */
158 };
159
160 const int nmountcompatnames = __arraycount(mountcompatnames);
161
162 static int
163 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp)
164 {
165 file_t *dfp;
166 int error;
167
168 if (fdat != AT_FDCWD) {
169 if ((error = fd_getvnode(fdat, &dfp)) != 0)
170 goto out;
171
172 NDAT(ndp, dfp->f_data);
173 }
174
175 error = namei(ndp);
176
177 if (fdat != AT_FDCWD)
178 fd_putfile(fdat);
179 out:
180 return error;
181 }
182
183 static int
184 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path,
185 namei_simple_flags_t sflags, struct vnode **vp_ret)
186 {
187 file_t *dfp;
188 struct vnode *dvp;
189 int error;
190
191 if (fdat != AT_FDCWD) {
192 if ((error = fd_getvnode(fdat, &dfp)) != 0)
193 goto out;
194
195 dvp = dfp->f_data;
196 } else {
197 dvp = NULL;
198 }
199
200 error = nameiat_simple_user(dvp, path, sflags, vp_ret);
201
202 if (fdat != AT_FDCWD)
203 fd_putfile(fdat);
204 out:
205 return error;
206 }
207
208 static int
209 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags)
210 {
211 int error;
212
213 fp->f_flag = flags & FMASK;
214 fp->f_type = DTYPE_VNODE;
215 fp->f_ops = &vnops;
216 fp->f_data = vp;
217
218 if (flags & (O_EXLOCK | O_SHLOCK)) {
219 struct flock lf;
220 int type;
221
222 lf.l_whence = SEEK_SET;
223 lf.l_start = 0;
224 lf.l_len = 0;
225 if (flags & O_EXLOCK)
226 lf.l_type = F_WRLCK;
227 else
228 lf.l_type = F_RDLCK;
229 type = F_FLOCK;
230 if ((flags & FNONBLOCK) == 0)
231 type |= F_WAIT;
232 VOP_UNLOCK(vp);
233 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
234 if (error) {
235 (void) vn_close(vp, fp->f_flag, fp->f_cred);
236 fd_abort(l->l_proc, fp, indx);
237 return error;
238 }
239 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
240 atomic_or_uint(&fp->f_flag, FHASLOCK);
241 }
242 if (flags & O_CLOEXEC)
243 fd_set_exclose(l, indx, true);
244 return 0;
245 }
246
247 static int
248 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
249 void *data, size_t *data_len)
250 {
251 struct mount *mp;
252 int error = 0, saved_flags;
253
254 mp = vp->v_mount;
255 saved_flags = mp->mnt_flag;
256
257 /* We can operate only on VV_ROOT nodes. */
258 if ((vp->v_vflag & VV_ROOT) == 0) {
259 error = EINVAL;
260 goto out;
261 }
262
263 /*
264 * We only allow the filesystem to be reloaded if it
265 * is currently mounted read-only. Additionally, we
266 * prevent read-write to read-only downgrades.
267 */
268 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
269 (mp->mnt_flag & MNT_RDONLY) == 0 &&
270 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) {
271 error = EOPNOTSUPP; /* Needs translation */
272 goto out;
273 }
274
275 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
276 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
277 if (error)
278 goto out;
279
280 if (vfs_busy(mp, NULL)) {
281 error = EPERM;
282 goto out;
283 }
284
285 mutex_enter(&mp->mnt_updating);
286
287 mp->mnt_flag &= ~MNT_OP_FLAGS;
288 mp->mnt_flag |= flags & MNT_OP_FLAGS;
289
290 /*
291 * Set the mount level flags.
292 */
293 if (flags & MNT_RDONLY)
294 mp->mnt_flag |= MNT_RDONLY;
295 else if (mp->mnt_flag & MNT_RDONLY)
296 mp->mnt_iflag |= IMNT_WANTRDWR;
297 mp->mnt_flag &= ~MNT_BASIC_FLAGS;
298 mp->mnt_flag |= flags & MNT_BASIC_FLAGS;
299 error = VFS_MOUNT(mp, path, data, data_len);
300
301 if (error && data != NULL) {
302 int error2;
303
304 /*
305 * Update failed; let's try and see if it was an
306 * export request. For compat with 3.0 and earlier.
307 */
308 error2 = vfs_hooks_reexport(mp, path, data);
309
310 /*
311 * Only update error code if the export request was
312 * understood but some problem occurred while
313 * processing it.
314 */
315 if (error2 != EJUSTRETURN)
316 error = error2;
317 }
318
319 if (mp->mnt_iflag & IMNT_WANTRDWR)
320 mp->mnt_flag &= ~MNT_RDONLY;
321 if (error)
322 mp->mnt_flag = saved_flags;
323 mp->mnt_flag &= ~MNT_OP_FLAGS;
324 mp->mnt_iflag &= ~IMNT_WANTRDWR;
325 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
326 if (mp->mnt_syncer == NULL)
327 error = vfs_allocate_syncvnode(mp);
328 } else {
329 if (mp->mnt_syncer != NULL)
330 vfs_deallocate_syncvnode(mp);
331 }
332 mutex_exit(&mp->mnt_updating);
333 vfs_unbusy(mp, false, NULL);
334
335 if ((error == 0) && !(saved_flags & MNT_EXTATTR) &&
336 (flags & MNT_EXTATTR)) {
337 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START,
338 NULL, 0, NULL) != 0) {
339 printf("%s: failed to start extattr, error = %d",
340 mp->mnt_stat.f_mntonname, error);
341 mp->mnt_flag &= ~MNT_EXTATTR;
342 }
343 }
344
345 if ((error == 0) && (saved_flags & MNT_EXTATTR) &&
346 !(flags & MNT_EXTATTR)) {
347 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP,
348 NULL, 0, NULL) != 0) {
349 printf("%s: failed to stop extattr, error = %d",
350 mp->mnt_stat.f_mntonname, error);
351 mp->mnt_flag |= MNT_RDONLY;
352 }
353 }
354 out:
355 return (error);
356 }
357
358 static int
359 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
360 {
361 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
362 int error;
363
364 /* Copy file-system type from userspace. */
365 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
366 if (error) {
367 /*
368 * Historically, filesystem types were identified by numbers.
369 * If we get an integer for the filesystem type instead of a
370 * string, we check to see if it matches one of the historic
371 * filesystem types.
372 */
373 u_long fsindex = (u_long)fstype;
374 if (fsindex >= nmountcompatnames ||
375 mountcompatnames[fsindex] == NULL)
376 return ENODEV;
377 strlcpy(fstypename, mountcompatnames[fsindex],
378 sizeof(fstypename));
379 }
380
381 /* Accept `ufs' as an alias for `ffs', for compatibility. */
382 if (strcmp(fstypename, "ufs") == 0)
383 fstypename[0] = 'f';
384
385 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
386 return 0;
387
388 /* If we can autoload a vfs module, try again */
389 (void)module_autoload(fstypename, MODULE_CLASS_VFS);
390
391 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
392 return 0;
393
394 return ENODEV;
395 }
396
397 static int
398 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
399 void *data, size_t *data_len)
400 {
401 struct mount *mp;
402 int error;
403
404 /* If MNT_GETARGS is specified, it should be the only flag. */
405 if (flags & ~MNT_GETARGS)
406 return EINVAL;
407
408 mp = vp->v_mount;
409
410 /* XXX: probably some notion of "can see" here if we want isolation. */
411 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
412 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
413 if (error)
414 return error;
415
416 if ((vp->v_vflag & VV_ROOT) == 0)
417 return EINVAL;
418
419 if (vfs_busy(mp, NULL))
420 return EPERM;
421
422 mutex_enter(&mp->mnt_updating);
423 mp->mnt_flag &= ~MNT_OP_FLAGS;
424 mp->mnt_flag |= MNT_GETARGS;
425 error = VFS_MOUNT(mp, path, data, data_len);
426 mp->mnt_flag &= ~MNT_OP_FLAGS;
427 mutex_exit(&mp->mnt_updating);
428
429 vfs_unbusy(mp, false, NULL);
430 return (error);
431 }
432
433 int
434 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
435 {
436 /* {
437 syscallarg(const char *) type;
438 syscallarg(const char *) path;
439 syscallarg(int) flags;
440 syscallarg(void *) data;
441 syscallarg(size_t) data_len;
442 } */
443
444 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
445 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
446 SCARG(uap, data_len), retval);
447 }
448
449 int
450 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
451 const char *path, int flags, void *data, enum uio_seg data_seg,
452 size_t data_len, register_t *retval)
453 {
454 struct vnode *vp;
455 void *data_buf = data;
456 bool vfsopsrele = false;
457 int error;
458
459 /* XXX: The calling convention of this routine is totally bizarre */
460 if (vfsops)
461 vfsopsrele = true;
462
463 /*
464 * Get vnode to be covered
465 */
466 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
467 if (error != 0) {
468 vp = NULL;
469 goto done;
470 }
471
472 if (vfsops == NULL) {
473 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
474 vfsops = vp->v_mount->mnt_op;
475 } else {
476 /* 'type' is userspace */
477 error = mount_get_vfsops(type, &vfsops);
478 if (error != 0)
479 goto done;
480 vfsopsrele = true;
481 }
482 }
483
484 if (data != NULL && data_seg == UIO_USERSPACE) {
485 if (data_len == 0) {
486 /* No length supplied, use default for filesystem */
487 data_len = vfsops->vfs_min_mount_data;
488 if (data_len > VFS_MAX_MOUNT_DATA) {
489 error = EINVAL;
490 goto done;
491 }
492 /*
493 * Hopefully a longer buffer won't make copyin() fail.
494 * For compatibility with 3.0 and earlier.
495 */
496 if (flags & MNT_UPDATE
497 && data_len < sizeof (struct mnt_export_args30))
498 data_len = sizeof (struct mnt_export_args30);
499 }
500 data_buf = kmem_alloc(data_len, KM_SLEEP);
501
502 /* NFS needs the buffer even for mnt_getargs .... */
503 error = copyin(data, data_buf, data_len);
504 if (error != 0)
505 goto done;
506 }
507
508 if (flags & MNT_GETARGS) {
509 if (data_len == 0) {
510 error = EINVAL;
511 goto done;
512 }
513 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
514 if (error != 0)
515 goto done;
516 if (data_seg == UIO_USERSPACE)
517 error = copyout(data_buf, data, data_len);
518 *retval = data_len;
519 } else if (flags & MNT_UPDATE) {
520 error = mount_update(l, vp, path, flags, data_buf, &data_len);
521 } else {
522 /* Locking is handled internally in mount_domount(). */
523 KASSERT(vfsopsrele == true);
524 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
525 &data_len);
526 vfsopsrele = false;
527 }
528
529 done:
530 if (vfsopsrele)
531 vfs_delref(vfsops);
532 if (vp != NULL) {
533 vrele(vp);
534 }
535 if (data_buf != data)
536 kmem_free(data_buf, data_len);
537 return (error);
538 }
539
540 /*
541 * Unmount a file system.
542 *
543 * Note: unmount takes a path to the vnode mounted on as argument,
544 * not special file (as before).
545 */
546 /* ARGSUSED */
547 int
548 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
549 {
550 /* {
551 syscallarg(const char *) path;
552 syscallarg(int) flags;
553 } */
554 struct vnode *vp;
555 struct mount *mp;
556 int error;
557 struct pathbuf *pb;
558 struct nameidata nd;
559
560 error = pathbuf_copyin(SCARG(uap, path), &pb);
561 if (error) {
562 return error;
563 }
564
565 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
566 if ((error = namei(&nd)) != 0) {
567 pathbuf_destroy(pb);
568 return error;
569 }
570 vp = nd.ni_vp;
571 pathbuf_destroy(pb);
572
573 mp = vp->v_mount;
574 atomic_inc_uint(&mp->mnt_refcnt);
575 VOP_UNLOCK(vp);
576
577 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
578 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
579 if (error) {
580 vrele(vp);
581 vfs_destroy(mp);
582 return (error);
583 }
584
585 /*
586 * Don't allow unmounting the root file system.
587 */
588 if (mp->mnt_flag & MNT_ROOTFS) {
589 vrele(vp);
590 vfs_destroy(mp);
591 return (EINVAL);
592 }
593
594 /*
595 * Must be the root of the filesystem
596 */
597 if ((vp->v_vflag & VV_ROOT) == 0) {
598 vrele(vp);
599 vfs_destroy(mp);
600 return (EINVAL);
601 }
602
603 vrele(vp);
604 error = dounmount(mp, SCARG(uap, flags), l);
605 vfs_destroy(mp);
606 return error;
607 }
608
609 /*
610 * Sync each mounted filesystem.
611 */
612 #ifdef DEBUG
613 int syncprt = 0;
614 struct ctldebug debug0 = { "syncprt", &syncprt };
615 #endif
616
617 void
618 do_sys_sync(struct lwp *l)
619 {
620 struct mount *mp, *nmp;
621 int asyncflag;
622
623 mutex_enter(&mountlist_lock);
624 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
625 if (vfs_busy(mp, &nmp)) {
626 continue;
627 }
628 mutex_enter(&mp->mnt_updating);
629 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
630 asyncflag = mp->mnt_flag & MNT_ASYNC;
631 mp->mnt_flag &= ~MNT_ASYNC;
632 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
633 if (asyncflag)
634 mp->mnt_flag |= MNT_ASYNC;
635 }
636 mutex_exit(&mp->mnt_updating);
637 vfs_unbusy(mp, false, &nmp);
638 }
639 mutex_exit(&mountlist_lock);
640 #ifdef DEBUG
641 if (syncprt)
642 vfs_bufstats();
643 #endif /* DEBUG */
644 }
645
646 /* ARGSUSED */
647 int
648 sys_sync(struct lwp *l, const void *v, register_t *retval)
649 {
650 do_sys_sync(l);
651 return (0);
652 }
653
654
655 /*
656 * Access or change filesystem quotas.
657 *
658 * (this is really 14 different calls bundled into one)
659 */
660
661 static int
662 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u)
663 {
664 struct quotastat info_k;
665 int error;
666
667 /* ensure any padding bytes are cleared */
668 memset(&info_k, 0, sizeof(info_k));
669
670 error = vfs_quotactl_stat(mp, &info_k);
671 if (error) {
672 return error;
673 }
674
675 return copyout(&info_k, info_u, sizeof(info_k));
676 }
677
678 static int
679 do_sys_quotactl_idtypestat(struct mount *mp, int idtype,
680 struct quotaidtypestat *info_u)
681 {
682 struct quotaidtypestat info_k;
683 int error;
684
685 /* ensure any padding bytes are cleared */
686 memset(&info_k, 0, sizeof(info_k));
687
688 error = vfs_quotactl_idtypestat(mp, idtype, &info_k);
689 if (error) {
690 return error;
691 }
692
693 return copyout(&info_k, info_u, sizeof(info_k));
694 }
695
696 static int
697 do_sys_quotactl_objtypestat(struct mount *mp, int objtype,
698 struct quotaobjtypestat *info_u)
699 {
700 struct quotaobjtypestat info_k;
701 int error;
702
703 /* ensure any padding bytes are cleared */
704 memset(&info_k, 0, sizeof(info_k));
705
706 error = vfs_quotactl_objtypestat(mp, objtype, &info_k);
707 if (error) {
708 return error;
709 }
710
711 return copyout(&info_k, info_u, sizeof(info_k));
712 }
713
714 static int
715 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u,
716 struct quotaval *val_u)
717 {
718 struct quotakey key_k;
719 struct quotaval val_k;
720 int error;
721
722 /* ensure any padding bytes are cleared */
723 memset(&val_k, 0, sizeof(val_k));
724
725 error = copyin(key_u, &key_k, sizeof(key_k));
726 if (error) {
727 return error;
728 }
729
730 error = vfs_quotactl_get(mp, &key_k, &val_k);
731 if (error) {
732 return error;
733 }
734
735 return copyout(&val_k, val_u, sizeof(val_k));
736 }
737
738 static int
739 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u,
740 const struct quotaval *val_u)
741 {
742 struct quotakey key_k;
743 struct quotaval val_k;
744 int error;
745
746 error = copyin(key_u, &key_k, sizeof(key_k));
747 if (error) {
748 return error;
749 }
750
751 error = copyin(val_u, &val_k, sizeof(val_k));
752 if (error) {
753 return error;
754 }
755
756 return vfs_quotactl_put(mp, &key_k, &val_k);
757 }
758
759 static int
760 do_sys_quotactl_delete(struct mount *mp, const struct quotakey *key_u)
761 {
762 struct quotakey key_k;
763 int error;
764
765 error = copyin(key_u, &key_k, sizeof(key_k));
766 if (error) {
767 return error;
768 }
769
770 return vfs_quotactl_delete(mp, &key_k);
771 }
772
773 static int
774 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u)
775 {
776 struct quotakcursor cursor_k;
777 int error;
778
779 /* ensure any padding bytes are cleared */
780 memset(&cursor_k, 0, sizeof(cursor_k));
781
782 error = vfs_quotactl_cursoropen(mp, &cursor_k);
783 if (error) {
784 return error;
785 }
786
787 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
788 }
789
790 static int
791 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u)
792 {
793 struct quotakcursor cursor_k;
794 int error;
795
796 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
797 if (error) {
798 return error;
799 }
800
801 return vfs_quotactl_cursorclose(mp, &cursor_k);
802 }
803
804 static int
805 do_sys_quotactl_cursorskipidtype(struct mount *mp,
806 struct quotakcursor *cursor_u, int idtype)
807 {
808 struct quotakcursor cursor_k;
809 int error;
810
811 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
812 if (error) {
813 return error;
814 }
815
816 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype);
817 if (error) {
818 return error;
819 }
820
821 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
822 }
823
824 static int
825 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u,
826 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum,
827 unsigned *ret_u)
828 {
829 #define CGET_STACK_MAX 8
830 struct quotakcursor cursor_k;
831 struct quotakey stackkeys[CGET_STACK_MAX];
832 struct quotaval stackvals[CGET_STACK_MAX];
833 struct quotakey *keys_k;
834 struct quotaval *vals_k;
835 unsigned ret_k;
836 int error;
837
838 if (maxnum > 128) {
839 maxnum = 128;
840 }
841
842 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
843 if (error) {
844 return error;
845 }
846
847 if (maxnum <= CGET_STACK_MAX) {
848 keys_k = stackkeys;
849 vals_k = stackvals;
850 /* ensure any padding bytes are cleared */
851 memset(keys_k, 0, maxnum * sizeof(keys_k[0]));
852 memset(vals_k, 0, maxnum * sizeof(vals_k[0]));
853 } else {
854 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP);
855 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP);
856 }
857
858 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum,
859 &ret_k);
860 if (error) {
861 goto fail;
862 }
863
864 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0]));
865 if (error) {
866 goto fail;
867 }
868
869 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0]));
870 if (error) {
871 goto fail;
872 }
873
874 error = copyout(&ret_k, ret_u, sizeof(ret_k));
875 if (error) {
876 goto fail;
877 }
878
879 /* do last to maximize the chance of being able to recover a failure */
880 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k));
881
882 fail:
883 if (keys_k != stackkeys) {
884 kmem_free(keys_k, maxnum * sizeof(keys_k[0]));
885 }
886 if (vals_k != stackvals) {
887 kmem_free(vals_k, maxnum * sizeof(vals_k[0]));
888 }
889 return error;
890 }
891
892 static int
893 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u,
894 int *ret_u)
895 {
896 struct quotakcursor cursor_k;
897 int ret_k;
898 int error;
899
900 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
901 if (error) {
902 return error;
903 }
904
905 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k);
906 if (error) {
907 return error;
908 }
909
910 error = copyout(&ret_k, ret_u, sizeof(ret_k));
911 if (error) {
912 return error;
913 }
914
915 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
916 }
917
918 static int
919 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u)
920 {
921 struct quotakcursor cursor_k;
922 int error;
923
924 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
925 if (error) {
926 return error;
927 }
928
929 error = vfs_quotactl_cursorrewind(mp, &cursor_k);
930 if (error) {
931 return error;
932 }
933
934 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
935 }
936
937 static int
938 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u)
939 {
940 char *path_k;
941 int error;
942
943 /* XXX this should probably be a struct pathbuf */
944 path_k = PNBUF_GET();
945 error = copyin(path_u, path_k, PATH_MAX);
946 if (error) {
947 PNBUF_PUT(path_k);
948 return error;
949 }
950
951 error = vfs_quotactl_quotaon(mp, idtype, path_k);
952
953 PNBUF_PUT(path_k);
954 return error;
955 }
956
957 static int
958 do_sys_quotactl_quotaoff(struct mount *mp, int idtype)
959 {
960 return vfs_quotactl_quotaoff(mp, idtype);
961 }
962
963 int
964 do_sys_quotactl(const char *path_u, const struct quotactl_args *args)
965 {
966 struct mount *mp;
967 struct vnode *vp;
968 int error;
969
970 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp);
971 if (error != 0)
972 return (error);
973 mp = vp->v_mount;
974
975 switch (args->qc_op) {
976 case QUOTACTL_STAT:
977 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info);
978 break;
979 case QUOTACTL_IDTYPESTAT:
980 error = do_sys_quotactl_idtypestat(mp,
981 args->u.idtypestat.qc_idtype,
982 args->u.idtypestat.qc_info);
983 break;
984 case QUOTACTL_OBJTYPESTAT:
985 error = do_sys_quotactl_objtypestat(mp,
986 args->u.objtypestat.qc_objtype,
987 args->u.objtypestat.qc_info);
988 break;
989 case QUOTACTL_GET:
990 error = do_sys_quotactl_get(mp,
991 args->u.get.qc_key,
992 args->u.get.qc_val);
993 break;
994 case QUOTACTL_PUT:
995 error = do_sys_quotactl_put(mp,
996 args->u.put.qc_key,
997 args->u.put.qc_val);
998 break;
999 case QUOTACTL_DELETE:
1000 error = do_sys_quotactl_delete(mp, args->u.delete.qc_key);
1001 break;
1002 case QUOTACTL_CURSOROPEN:
1003 error = do_sys_quotactl_cursoropen(mp,
1004 args->u.cursoropen.qc_cursor);
1005 break;
1006 case QUOTACTL_CURSORCLOSE:
1007 error = do_sys_quotactl_cursorclose(mp,
1008 args->u.cursorclose.qc_cursor);
1009 break;
1010 case QUOTACTL_CURSORSKIPIDTYPE:
1011 error = do_sys_quotactl_cursorskipidtype(mp,
1012 args->u.cursorskipidtype.qc_cursor,
1013 args->u.cursorskipidtype.qc_idtype);
1014 break;
1015 case QUOTACTL_CURSORGET:
1016 error = do_sys_quotactl_cursorget(mp,
1017 args->u.cursorget.qc_cursor,
1018 args->u.cursorget.qc_keys,
1019 args->u.cursorget.qc_vals,
1020 args->u.cursorget.qc_maxnum,
1021 args->u.cursorget.qc_ret);
1022 break;
1023 case QUOTACTL_CURSORATEND:
1024 error = do_sys_quotactl_cursoratend(mp,
1025 args->u.cursoratend.qc_cursor,
1026 args->u.cursoratend.qc_ret);
1027 break;
1028 case QUOTACTL_CURSORREWIND:
1029 error = do_sys_quotactl_cursorrewind(mp,
1030 args->u.cursorrewind.qc_cursor);
1031 break;
1032 case QUOTACTL_QUOTAON:
1033 error = do_sys_quotactl_quotaon(mp,
1034 args->u.quotaon.qc_idtype,
1035 args->u.quotaon.qc_quotafile);
1036 break;
1037 case QUOTACTL_QUOTAOFF:
1038 error = do_sys_quotactl_quotaoff(mp,
1039 args->u.quotaoff.qc_idtype);
1040 break;
1041 default:
1042 error = EINVAL;
1043 break;
1044 }
1045
1046 vrele(vp);
1047 return error;
1048 }
1049
1050 /* ARGSUSED */
1051 int
1052 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap,
1053 register_t *retval)
1054 {
1055 /* {
1056 syscallarg(const char *) path;
1057 syscallarg(struct quotactl_args *) args;
1058 } */
1059 struct quotactl_args args;
1060 int error;
1061
1062 error = copyin(SCARG(uap, args), &args, sizeof(args));
1063 if (error) {
1064 return error;
1065 }
1066
1067 return do_sys_quotactl(SCARG(uap, path), &args);
1068 }
1069
1070 int
1071 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
1072 int root)
1073 {
1074 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1075 int error = 0;
1076
1077 /*
1078 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1079 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
1080 * overrides MNT_NOWAIT.
1081 */
1082 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
1083 (flags != MNT_WAIT && flags != 0)) {
1084 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
1085 goto done;
1086 }
1087
1088 /* Get the filesystem stats now */
1089 memset(sp, 0, sizeof(*sp));
1090 if ((error = VFS_STATVFS(mp, sp)) != 0) {
1091 return error;
1092 }
1093
1094 if (cwdi->cwdi_rdir == NULL)
1095 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
1096 done:
1097 if (cwdi->cwdi_rdir != NULL) {
1098 size_t len;
1099 char *bp;
1100 char c;
1101 char *path = PNBUF_GET();
1102
1103 bp = path + MAXPATHLEN;
1104 *--bp = '\0';
1105 rw_enter(&cwdi->cwdi_lock, RW_READER);
1106 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
1107 MAXPATHLEN / 2, 0, l);
1108 rw_exit(&cwdi->cwdi_lock);
1109 if (error) {
1110 PNBUF_PUT(path);
1111 return error;
1112 }
1113 len = strlen(bp);
1114 if (len != 1) {
1115 /*
1116 * for mount points that are below our root, we can see
1117 * them, so we fix up the pathname and return them. The
1118 * rest we cannot see, so we don't allow viewing the
1119 * data.
1120 */
1121 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
1122 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
1123 (void)strlcpy(sp->f_mntonname,
1124 c == '\0' ? "/" : &sp->f_mntonname[len],
1125 sizeof(sp->f_mntonname));
1126 } else {
1127 if (root)
1128 (void)strlcpy(sp->f_mntonname, "/",
1129 sizeof(sp->f_mntonname));
1130 else
1131 error = EPERM;
1132 }
1133 }
1134 PNBUF_PUT(path);
1135 }
1136 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
1137 return error;
1138 }
1139
1140 /*
1141 * Get filesystem statistics by path.
1142 */
1143 int
1144 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
1145 {
1146 struct mount *mp;
1147 int error;
1148 struct vnode *vp;
1149
1150 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
1151 if (error != 0)
1152 return error;
1153 mp = vp->v_mount;
1154 error = dostatvfs(mp, sb, l, flags, 1);
1155 vrele(vp);
1156 return error;
1157 }
1158
1159 /* ARGSUSED */
1160 int
1161 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
1162 {
1163 /* {
1164 syscallarg(const char *) path;
1165 syscallarg(struct statvfs *) buf;
1166 syscallarg(int) flags;
1167 } */
1168 struct statvfs *sb;
1169 int error;
1170
1171 sb = STATVFSBUF_GET();
1172 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
1173 if (error == 0)
1174 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1175 STATVFSBUF_PUT(sb);
1176 return error;
1177 }
1178
1179 /*
1180 * Get filesystem statistics by fd.
1181 */
1182 int
1183 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
1184 {
1185 file_t *fp;
1186 struct mount *mp;
1187 int error;
1188
1189 /* fd_getvnode() will use the descriptor for us */
1190 if ((error = fd_getvnode(fd, &fp)) != 0)
1191 return (error);
1192 mp = ((struct vnode *)fp->f_data)->v_mount;
1193 error = dostatvfs(mp, sb, curlwp, flags, 1);
1194 fd_putfile(fd);
1195 return error;
1196 }
1197
1198 /* ARGSUSED */
1199 int
1200 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
1201 {
1202 /* {
1203 syscallarg(int) fd;
1204 syscallarg(struct statvfs *) buf;
1205 syscallarg(int) flags;
1206 } */
1207 struct statvfs *sb;
1208 int error;
1209
1210 sb = STATVFSBUF_GET();
1211 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1212 if (error == 0)
1213 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1214 STATVFSBUF_PUT(sb);
1215 return error;
1216 }
1217
1218
1219 /*
1220 * Get statistics on all filesystems.
1221 */
1222 int
1223 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1224 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1225 register_t *retval)
1226 {
1227 int root = 0;
1228 struct proc *p = l->l_proc;
1229 struct mount *mp, *nmp;
1230 struct statvfs *sb;
1231 size_t count, maxcount;
1232 int error = 0;
1233
1234 sb = STATVFSBUF_GET();
1235 maxcount = bufsize / entry_sz;
1236 mutex_enter(&mountlist_lock);
1237 count = 0;
1238 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1239 if (vfs_busy(mp, &nmp)) {
1240 continue;
1241 }
1242 if (sfsp && count < maxcount) {
1243 error = dostatvfs(mp, sb, l, flags, 0);
1244 if (error) {
1245 vfs_unbusy(mp, false, &nmp);
1246 error = 0;
1247 continue;
1248 }
1249 error = copyfn(sb, sfsp, entry_sz);
1250 if (error) {
1251 vfs_unbusy(mp, false, NULL);
1252 goto out;
1253 }
1254 sfsp = (char *)sfsp + entry_sz;
1255 root |= strcmp(sb->f_mntonname, "/") == 0;
1256 }
1257 count++;
1258 vfs_unbusy(mp, false, &nmp);
1259 }
1260 mutex_exit(&mountlist_lock);
1261
1262 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1263 /*
1264 * fake a root entry
1265 */
1266 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1267 sb, l, flags, 1);
1268 if (error != 0)
1269 goto out;
1270 if (sfsp) {
1271 error = copyfn(sb, sfsp, entry_sz);
1272 if (error != 0)
1273 goto out;
1274 }
1275 count++;
1276 }
1277 if (sfsp && count > maxcount)
1278 *retval = maxcount;
1279 else
1280 *retval = count;
1281 out:
1282 STATVFSBUF_PUT(sb);
1283 return error;
1284 }
1285
1286 int
1287 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1288 {
1289 /* {
1290 syscallarg(struct statvfs *) buf;
1291 syscallarg(size_t) bufsize;
1292 syscallarg(int) flags;
1293 } */
1294
1295 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1296 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1297 }
1298
1299 /*
1300 * Change current working directory to a given file descriptor.
1301 */
1302 /* ARGSUSED */
1303 int
1304 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1305 {
1306 /* {
1307 syscallarg(int) fd;
1308 } */
1309 struct proc *p = l->l_proc;
1310 struct cwdinfo *cwdi;
1311 struct vnode *vp, *tdp;
1312 struct mount *mp;
1313 file_t *fp;
1314 int error, fd;
1315
1316 /* fd_getvnode() will use the descriptor for us */
1317 fd = SCARG(uap, fd);
1318 if ((error = fd_getvnode(fd, &fp)) != 0)
1319 return (error);
1320 vp = fp->f_data;
1321
1322 vref(vp);
1323 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1324 if (vp->v_type != VDIR)
1325 error = ENOTDIR;
1326 else
1327 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1328 if (error) {
1329 vput(vp);
1330 goto out;
1331 }
1332 while ((mp = vp->v_mountedhere) != NULL) {
1333 error = vfs_busy(mp, NULL);
1334 vput(vp);
1335 if (error != 0)
1336 goto out;
1337 error = VFS_ROOT(mp, &tdp);
1338 vfs_unbusy(mp, false, NULL);
1339 if (error)
1340 goto out;
1341 vp = tdp;
1342 }
1343 VOP_UNLOCK(vp);
1344
1345 /*
1346 * Disallow changing to a directory not under the process's
1347 * current root directory (if there is one).
1348 */
1349 cwdi = p->p_cwdi;
1350 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1351 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1352 vrele(vp);
1353 error = EPERM; /* operation not permitted */
1354 } else {
1355 vrele(cwdi->cwdi_cdir);
1356 cwdi->cwdi_cdir = vp;
1357 }
1358 rw_exit(&cwdi->cwdi_lock);
1359
1360 out:
1361 fd_putfile(fd);
1362 return (error);
1363 }
1364
1365 /*
1366 * Change this process's notion of the root directory to a given file
1367 * descriptor.
1368 */
1369 int
1370 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1371 {
1372 struct proc *p = l->l_proc;
1373 struct vnode *vp;
1374 file_t *fp;
1375 int error, fd = SCARG(uap, fd);
1376
1377 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1378 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1379 return error;
1380 /* fd_getvnode() will use the descriptor for us */
1381 if ((error = fd_getvnode(fd, &fp)) != 0)
1382 return error;
1383 vp = fp->f_data;
1384 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1385 if (vp->v_type != VDIR)
1386 error = ENOTDIR;
1387 else
1388 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1389 VOP_UNLOCK(vp);
1390 if (error)
1391 goto out;
1392 vref(vp);
1393
1394 change_root(p->p_cwdi, vp, l);
1395
1396 out:
1397 fd_putfile(fd);
1398 return (error);
1399 }
1400
1401 /*
1402 * Change current working directory (``.'').
1403 */
1404 /* ARGSUSED */
1405 int
1406 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1407 {
1408 /* {
1409 syscallarg(const char *) path;
1410 } */
1411 struct proc *p = l->l_proc;
1412 struct cwdinfo *cwdi;
1413 int error;
1414 struct vnode *vp;
1415
1416 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1417 &vp, l)) != 0)
1418 return (error);
1419 cwdi = p->p_cwdi;
1420 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1421 vrele(cwdi->cwdi_cdir);
1422 cwdi->cwdi_cdir = vp;
1423 rw_exit(&cwdi->cwdi_lock);
1424 return (0);
1425 }
1426
1427 /*
1428 * Change notion of root (``/'') directory.
1429 */
1430 /* ARGSUSED */
1431 int
1432 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1433 {
1434 /* {
1435 syscallarg(const char *) path;
1436 } */
1437 struct proc *p = l->l_proc;
1438 int error;
1439 struct vnode *vp;
1440
1441 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1442 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1443 return (error);
1444 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1445 &vp, l)) != 0)
1446 return (error);
1447
1448 change_root(p->p_cwdi, vp, l);
1449
1450 return (0);
1451 }
1452
1453 /*
1454 * Common routine for chroot and fchroot.
1455 * NB: callers need to properly authorize the change root operation.
1456 */
1457 void
1458 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l)
1459 {
1460 struct proc *p = l->l_proc;
1461 kauth_cred_t ncred;
1462
1463 ncred = kauth_cred_alloc();
1464
1465 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1466 if (cwdi->cwdi_rdir != NULL)
1467 vrele(cwdi->cwdi_rdir);
1468 cwdi->cwdi_rdir = vp;
1469
1470 /*
1471 * Prevent escaping from chroot by putting the root under
1472 * the working directory. Silently chdir to / if we aren't
1473 * already there.
1474 */
1475 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1476 /*
1477 * XXX would be more failsafe to change directory to a
1478 * deadfs node here instead
1479 */
1480 vrele(cwdi->cwdi_cdir);
1481 vref(vp);
1482 cwdi->cwdi_cdir = vp;
1483 }
1484 rw_exit(&cwdi->cwdi_lock);
1485
1486 /* Get a write lock on the process credential. */
1487 proc_crmod_enter();
1488
1489 kauth_cred_clone(p->p_cred, ncred);
1490 kauth_proc_chroot(ncred, p->p_cwdi);
1491
1492 /* Broadcast our credentials to the process and other LWPs. */
1493 proc_crmod_leave(ncred, p->p_cred, true);
1494 }
1495
1496 /*
1497 * Common routine for chroot and chdir.
1498 * XXX "where" should be enum uio_seg
1499 */
1500 int
1501 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l)
1502 {
1503 struct pathbuf *pb;
1504 struct nameidata nd;
1505 int error;
1506
1507 error = pathbuf_maybe_copyin(path, where, &pb);
1508 if (error) {
1509 return error;
1510 }
1511 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1512 if ((error = namei(&nd)) != 0) {
1513 pathbuf_destroy(pb);
1514 return error;
1515 }
1516 *vpp = nd.ni_vp;
1517 pathbuf_destroy(pb);
1518
1519 if ((*vpp)->v_type != VDIR)
1520 error = ENOTDIR;
1521 else
1522 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred);
1523
1524 if (error)
1525 vput(*vpp);
1526 else
1527 VOP_UNLOCK(*vpp);
1528 return (error);
1529 }
1530
1531 /*
1532 * Internals of sys_open - path has already been converted into a pathbuf
1533 * (so we can easily reuse this function from other parts of the kernel,
1534 * like posix_spawn post-processing).
1535 */
1536 int
1537 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags,
1538 int open_mode, int *fd)
1539 {
1540 struct proc *p = l->l_proc;
1541 struct cwdinfo *cwdi = p->p_cwdi;
1542 file_t *fp;
1543 struct vnode *vp;
1544 int flags, cmode;
1545 int indx, error;
1546 struct nameidata nd;
1547
1548 if (open_flags & O_SEARCH) {
1549 open_flags &= ~(int)O_SEARCH;
1550 }
1551
1552 flags = FFLAGS(open_flags);
1553 if ((flags & (FREAD | FWRITE)) == 0)
1554 return EINVAL;
1555
1556 if ((error = fd_allocfile(&fp, &indx)) != 0) {
1557 return error;
1558 }
1559
1560 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1561 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1562 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb);
1563 if (dvp != NULL)
1564 NDAT(&nd, dvp);
1565
1566 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1567 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1568 fd_abort(p, fp, indx);
1569 if ((error == EDUPFD || error == EMOVEFD) &&
1570 l->l_dupfd >= 0 && /* XXX from fdopen */
1571 (error =
1572 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1573 *fd = indx;
1574 return 0;
1575 }
1576 if (error == ERESTART)
1577 error = EINTR;
1578 return error;
1579 }
1580
1581 l->l_dupfd = 0;
1582 vp = nd.ni_vp;
1583
1584 if ((error = open_setfp(l, fp, vp, indx, flags)))
1585 return error;
1586
1587 VOP_UNLOCK(vp);
1588 *fd = indx;
1589 fd_affix(p, fp, indx);
1590 return 0;
1591 }
1592
1593 int
1594 fd_open(const char *path, int open_flags, int open_mode, int *fd)
1595 {
1596 struct pathbuf *pb;
1597 int error, oflags;
1598
1599 oflags = FFLAGS(open_flags);
1600 if ((oflags & (FREAD | FWRITE)) == 0)
1601 return EINVAL;
1602
1603 pb = pathbuf_create(path);
1604 if (pb == NULL)
1605 return ENOMEM;
1606
1607 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd);
1608 pathbuf_destroy(pb);
1609
1610 return error;
1611 }
1612
1613 /*
1614 * Check permissions, allocate an open file structure,
1615 * and call the device open routine if any.
1616 */
1617 static int
1618 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags,
1619 int mode, int *fd)
1620 {
1621 file_t *dfp = NULL;
1622 struct vnode *dvp = NULL;
1623 struct pathbuf *pb;
1624 int error;
1625
1626 error = pathbuf_copyin(path, &pb);
1627 if (error)
1628 return error;
1629
1630 if (fdat != AT_FDCWD) {
1631 /* fd_getvnode() will use the descriptor for us */
1632 if ((error = fd_getvnode(fdat, &dfp)) != 0)
1633 goto out;
1634
1635 dvp = dfp->f_data;
1636 }
1637
1638 error = do_open(l, dvp, pb, flags, mode, fd);
1639
1640 if (dfp != NULL)
1641 fd_putfile(fdat);
1642 out:
1643 pathbuf_destroy(pb);
1644 return error;
1645 }
1646
1647 int
1648 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1649 {
1650 /* {
1651 syscallarg(const char *) path;
1652 syscallarg(int) flags;
1653 syscallarg(int) mode;
1654 } */
1655 int error;
1656 int fd;
1657
1658 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path),
1659 SCARG(uap, flags), SCARG(uap, mode), &fd);
1660
1661 if (error == 0)
1662 *retval = fd;
1663
1664 return error;
1665 }
1666
1667 int
1668 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval)
1669 {
1670 /* {
1671 syscallarg(int) fd;
1672 syscallarg(const char *) path;
1673 syscallarg(int) oflags;
1674 syscallarg(int) mode;
1675 } */
1676 int error;
1677 int fd;
1678
1679 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path),
1680 SCARG(uap, oflags), SCARG(uap, mode), &fd);
1681
1682 if (error == 0)
1683 *retval = fd;
1684
1685 return error;
1686 }
1687
1688 static void
1689 vfs__fhfree(fhandle_t *fhp)
1690 {
1691 size_t fhsize;
1692
1693 if (fhp == NULL) {
1694 return;
1695 }
1696 fhsize = FHANDLE_SIZE(fhp);
1697 kmem_free(fhp, fhsize);
1698 }
1699
1700 /*
1701 * vfs_composefh: compose a filehandle.
1702 */
1703
1704 int
1705 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1706 {
1707 struct mount *mp;
1708 struct fid *fidp;
1709 int error;
1710 size_t needfhsize;
1711 size_t fidsize;
1712
1713 mp = vp->v_mount;
1714 fidp = NULL;
1715 if (*fh_size < FHANDLE_SIZE_MIN) {
1716 fidsize = 0;
1717 } else {
1718 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1719 if (fhp != NULL) {
1720 memset(fhp, 0, *fh_size);
1721 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1722 fidp = &fhp->fh_fid;
1723 }
1724 }
1725 error = VFS_VPTOFH(vp, fidp, &fidsize);
1726 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1727 if (error == 0 && *fh_size < needfhsize) {
1728 error = E2BIG;
1729 }
1730 *fh_size = needfhsize;
1731 return error;
1732 }
1733
1734 int
1735 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1736 {
1737 struct mount *mp;
1738 fhandle_t *fhp;
1739 size_t fhsize;
1740 size_t fidsize;
1741 int error;
1742
1743 *fhpp = NULL;
1744 mp = vp->v_mount;
1745 fidsize = 0;
1746 error = VFS_VPTOFH(vp, NULL, &fidsize);
1747 KASSERT(error != 0);
1748 if (error != E2BIG) {
1749 goto out;
1750 }
1751 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1752 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1753 if (fhp == NULL) {
1754 error = ENOMEM;
1755 goto out;
1756 }
1757 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1758 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1759 if (error == 0) {
1760 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1761 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1762 *fhpp = fhp;
1763 } else {
1764 kmem_free(fhp, fhsize);
1765 }
1766 out:
1767 return error;
1768 }
1769
1770 void
1771 vfs_composefh_free(fhandle_t *fhp)
1772 {
1773
1774 vfs__fhfree(fhp);
1775 }
1776
1777 /*
1778 * vfs_fhtovp: lookup a vnode by a filehandle.
1779 */
1780
1781 int
1782 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1783 {
1784 struct mount *mp;
1785 int error;
1786
1787 *vpp = NULL;
1788 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1789 if (mp == NULL) {
1790 error = ESTALE;
1791 goto out;
1792 }
1793 if (mp->mnt_op->vfs_fhtovp == NULL) {
1794 error = EOPNOTSUPP;
1795 goto out;
1796 }
1797 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1798 out:
1799 return error;
1800 }
1801
1802 /*
1803 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1804 * the needed size.
1805 */
1806
1807 int
1808 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1809 {
1810 fhandle_t *fhp;
1811 int error;
1812
1813 *fhpp = NULL;
1814 if (fhsize > FHANDLE_SIZE_MAX) {
1815 return EINVAL;
1816 }
1817 if (fhsize < FHANDLE_SIZE_MIN) {
1818 return EINVAL;
1819 }
1820 again:
1821 fhp = kmem_alloc(fhsize, KM_SLEEP);
1822 if (fhp == NULL) {
1823 return ENOMEM;
1824 }
1825 error = copyin(ufhp, fhp, fhsize);
1826 if (error == 0) {
1827 /* XXX this check shouldn't be here */
1828 if (FHANDLE_SIZE(fhp) == fhsize) {
1829 *fhpp = fhp;
1830 return 0;
1831 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1832 /*
1833 * a kludge for nfsv2 padded handles.
1834 */
1835 size_t sz;
1836
1837 sz = FHANDLE_SIZE(fhp);
1838 kmem_free(fhp, fhsize);
1839 fhsize = sz;
1840 goto again;
1841 } else {
1842 /*
1843 * userland told us wrong size.
1844 */
1845 error = EINVAL;
1846 }
1847 }
1848 kmem_free(fhp, fhsize);
1849 return error;
1850 }
1851
1852 void
1853 vfs_copyinfh_free(fhandle_t *fhp)
1854 {
1855
1856 vfs__fhfree(fhp);
1857 }
1858
1859 /*
1860 * Get file handle system call
1861 */
1862 int
1863 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1864 {
1865 /* {
1866 syscallarg(char *) fname;
1867 syscallarg(fhandle_t *) fhp;
1868 syscallarg(size_t *) fh_size;
1869 } */
1870 struct vnode *vp;
1871 fhandle_t *fh;
1872 int error;
1873 struct pathbuf *pb;
1874 struct nameidata nd;
1875 size_t sz;
1876 size_t usz;
1877
1878 /*
1879 * Must be super user
1880 */
1881 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1882 0, NULL, NULL, NULL);
1883 if (error)
1884 return (error);
1885
1886 error = pathbuf_copyin(SCARG(uap, fname), &pb);
1887 if (error) {
1888 return error;
1889 }
1890 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1891 error = namei(&nd);
1892 if (error) {
1893 pathbuf_destroy(pb);
1894 return error;
1895 }
1896 vp = nd.ni_vp;
1897 pathbuf_destroy(pb);
1898
1899 error = vfs_composefh_alloc(vp, &fh);
1900 vput(vp);
1901 if (error != 0) {
1902 goto out;
1903 }
1904 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1905 if (error != 0) {
1906 goto out;
1907 }
1908 sz = FHANDLE_SIZE(fh);
1909 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1910 if (error != 0) {
1911 goto out;
1912 }
1913 if (usz >= sz) {
1914 error = copyout(fh, SCARG(uap, fhp), sz);
1915 } else {
1916 error = E2BIG;
1917 }
1918 out:
1919 vfs_composefh_free(fh);
1920 return (error);
1921 }
1922
1923 /*
1924 * Open a file given a file handle.
1925 *
1926 * Check permissions, allocate an open file structure,
1927 * and call the device open routine if any.
1928 */
1929
1930 int
1931 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1932 register_t *retval)
1933 {
1934 file_t *fp;
1935 struct vnode *vp = NULL;
1936 kauth_cred_t cred = l->l_cred;
1937 file_t *nfp;
1938 int indx, error = 0;
1939 struct vattr va;
1940 fhandle_t *fh;
1941 int flags;
1942 proc_t *p;
1943
1944 p = curproc;
1945
1946 /*
1947 * Must be super user
1948 */
1949 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1950 0, NULL, NULL, NULL)))
1951 return (error);
1952
1953 if (oflags & O_SEARCH) {
1954 oflags &= ~(int)O_SEARCH;
1955 }
1956
1957 flags = FFLAGS(oflags);
1958 if ((flags & (FREAD | FWRITE)) == 0)
1959 return (EINVAL);
1960 if ((flags & O_CREAT))
1961 return (EINVAL);
1962 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1963 return (error);
1964 fp = nfp;
1965 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1966 if (error != 0) {
1967 goto bad;
1968 }
1969 error = vfs_fhtovp(fh, &vp);
1970 if (error != 0) {
1971 goto bad;
1972 }
1973
1974 /* Now do an effective vn_open */
1975
1976 if (vp->v_type == VSOCK) {
1977 error = EOPNOTSUPP;
1978 goto bad;
1979 }
1980 error = vn_openchk(vp, cred, flags);
1981 if (error != 0)
1982 goto bad;
1983 if (flags & O_TRUNC) {
1984 VOP_UNLOCK(vp); /* XXX */
1985 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1986 vattr_null(&va);
1987 va.va_size = 0;
1988 error = VOP_SETATTR(vp, &va, cred);
1989 if (error)
1990 goto bad;
1991 }
1992 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
1993 goto bad;
1994 if (flags & FWRITE) {
1995 mutex_enter(vp->v_interlock);
1996 vp->v_writecount++;
1997 mutex_exit(vp->v_interlock);
1998 }
1999
2000 /* done with modified vn_open, now finish what sys_open does. */
2001 if ((error = open_setfp(l, fp, vp, indx, flags)))
2002 return error;
2003
2004 VOP_UNLOCK(vp);
2005 *retval = indx;
2006 fd_affix(p, fp, indx);
2007 vfs_copyinfh_free(fh);
2008 return (0);
2009
2010 bad:
2011 fd_abort(p, fp, indx);
2012 if (vp != NULL)
2013 vput(vp);
2014 vfs_copyinfh_free(fh);
2015 return (error);
2016 }
2017
2018 int
2019 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
2020 {
2021 /* {
2022 syscallarg(const void *) fhp;
2023 syscallarg(size_t) fh_size;
2024 syscallarg(int) flags;
2025 } */
2026
2027 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
2028 SCARG(uap, flags), retval);
2029 }
2030
2031 int
2032 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
2033 {
2034 int error;
2035 fhandle_t *fh;
2036 struct vnode *vp;
2037
2038 /*
2039 * Must be super user
2040 */
2041 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2042 0, NULL, NULL, NULL)))
2043 return (error);
2044
2045 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2046 if (error != 0)
2047 return error;
2048
2049 error = vfs_fhtovp(fh, &vp);
2050 vfs_copyinfh_free(fh);
2051 if (error != 0)
2052 return error;
2053
2054 error = vn_stat(vp, sb);
2055 vput(vp);
2056 return error;
2057 }
2058
2059
2060 /* ARGSUSED */
2061 int
2062 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval)
2063 {
2064 /* {
2065 syscallarg(const void *) fhp;
2066 syscallarg(size_t) fh_size;
2067 syscallarg(struct stat *) sb;
2068 } */
2069 struct stat sb;
2070 int error;
2071
2072 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
2073 if (error)
2074 return error;
2075 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
2076 }
2077
2078 int
2079 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
2080 int flags)
2081 {
2082 fhandle_t *fh;
2083 struct mount *mp;
2084 struct vnode *vp;
2085 int error;
2086
2087 /*
2088 * Must be super user
2089 */
2090 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2091 0, NULL, NULL, NULL)))
2092 return error;
2093
2094 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2095 if (error != 0)
2096 return error;
2097
2098 error = vfs_fhtovp(fh, &vp);
2099 vfs_copyinfh_free(fh);
2100 if (error != 0)
2101 return error;
2102
2103 mp = vp->v_mount;
2104 error = dostatvfs(mp, sb, l, flags, 1);
2105 vput(vp);
2106 return error;
2107 }
2108
2109 /* ARGSUSED */
2110 int
2111 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
2112 {
2113 /* {
2114 syscallarg(const void *) fhp;
2115 syscallarg(size_t) fh_size;
2116 syscallarg(struct statvfs *) buf;
2117 syscallarg(int) flags;
2118 } */
2119 struct statvfs *sb = STATVFSBUF_GET();
2120 int error;
2121
2122 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
2123 SCARG(uap, flags));
2124 if (error == 0)
2125 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
2126 STATVFSBUF_PUT(sb);
2127 return error;
2128 }
2129
2130 /*
2131 * Create a special file.
2132 */
2133 /* ARGSUSED */
2134 int
2135 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
2136 register_t *retval)
2137 {
2138 /* {
2139 syscallarg(const char *) path;
2140 syscallarg(mode_t) mode;
2141 syscallarg(dev_t) dev;
2142 } */
2143 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
2144 SCARG(uap, dev), retval, UIO_USERSPACE);
2145 }
2146
2147 int
2148 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap,
2149 register_t *retval)
2150 {
2151 /* {
2152 syscallarg(int) fd;
2153 syscallarg(const char *) path;
2154 syscallarg(mode_t) mode;
2155 syscallarg(int) pad;
2156 syscallarg(dev_t) dev;
2157 } */
2158
2159 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path),
2160 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE);
2161 }
2162
2163 int
2164 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
2165 register_t *retval, enum uio_seg seg)
2166 {
2167 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg);
2168 }
2169
2170 int
2171 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode,
2172 dev_t dev, register_t *retval, enum uio_seg seg)
2173 {
2174 struct proc *p = l->l_proc;
2175 struct vnode *vp;
2176 struct vattr vattr;
2177 int error, optype;
2178 struct pathbuf *pb;
2179 struct nameidata nd;
2180 const char *pathstring;
2181
2182 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
2183 0, NULL, NULL, NULL)) != 0)
2184 return (error);
2185
2186 optype = VOP_MKNOD_DESCOFFSET;
2187
2188 error = pathbuf_maybe_copyin(pathname, seg, &pb);
2189 if (error) {
2190 return error;
2191 }
2192 pathstring = pathbuf_stringcopy_get(pb);
2193 if (pathstring == NULL) {
2194 pathbuf_destroy(pb);
2195 return ENOMEM;
2196 }
2197
2198 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2199
2200 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2201 goto out;
2202 vp = nd.ni_vp;
2203
2204 if (vp != NULL)
2205 error = EEXIST;
2206 else {
2207 vattr_null(&vattr);
2208 /* We will read cwdi->cwdi_cmask unlocked. */
2209 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2210 vattr.va_rdev = dev;
2211
2212 switch (mode & S_IFMT) {
2213 case S_IFMT: /* used by badsect to flag bad sectors */
2214 vattr.va_type = VBAD;
2215 break;
2216 case S_IFCHR:
2217 vattr.va_type = VCHR;
2218 break;
2219 case S_IFBLK:
2220 vattr.va_type = VBLK;
2221 break;
2222 case S_IFWHT:
2223 optype = VOP_WHITEOUT_DESCOFFSET;
2224 break;
2225 case S_IFREG:
2226 #if NVERIEXEC > 0
2227 error = veriexec_openchk(l, nd.ni_vp, pathstring,
2228 O_CREAT);
2229 #endif /* NVERIEXEC > 0 */
2230 vattr.va_type = VREG;
2231 vattr.va_rdev = VNOVAL;
2232 optype = VOP_CREATE_DESCOFFSET;
2233 break;
2234 default:
2235 error = EINVAL;
2236 break;
2237 }
2238 }
2239 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET
2240 && vattr.va_rdev == VNOVAL)
2241 error = EINVAL;
2242 if (!error) {
2243 switch (optype) {
2244 case VOP_WHITEOUT_DESCOFFSET:
2245 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
2246 if (error)
2247 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2248 vput(nd.ni_dvp);
2249 break;
2250
2251 case VOP_MKNOD_DESCOFFSET:
2252 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
2253 &nd.ni_cnd, &vattr);
2254 if (error == 0)
2255 vrele(nd.ni_vp);
2256 vput(nd.ni_dvp);
2257 break;
2258
2259 case VOP_CREATE_DESCOFFSET:
2260 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
2261 &nd.ni_cnd, &vattr);
2262 if (error == 0)
2263 vrele(nd.ni_vp);
2264 vput(nd.ni_dvp);
2265 break;
2266 }
2267 } else {
2268 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2269 if (nd.ni_dvp == vp)
2270 vrele(nd.ni_dvp);
2271 else
2272 vput(nd.ni_dvp);
2273 if (vp)
2274 vrele(vp);
2275 }
2276 out:
2277 pathbuf_stringcopy_put(pb, pathstring);
2278 pathbuf_destroy(pb);
2279 return (error);
2280 }
2281
2282 /*
2283 * Create a named pipe.
2284 */
2285 /* ARGSUSED */
2286 int
2287 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
2288 {
2289 /* {
2290 syscallarg(const char *) path;
2291 syscallarg(int) mode;
2292 } */
2293 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode));
2294 }
2295
2296 int
2297 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap,
2298 register_t *retval)
2299 {
2300 /* {
2301 syscallarg(int) fd;
2302 syscallarg(const char *) path;
2303 syscallarg(int) mode;
2304 } */
2305
2306 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path),
2307 SCARG(uap, mode));
2308 }
2309
2310 static int
2311 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode)
2312 {
2313 struct proc *p = l->l_proc;
2314 struct vattr vattr;
2315 int error;
2316 struct pathbuf *pb;
2317 struct nameidata nd;
2318
2319 error = pathbuf_copyin(path, &pb);
2320 if (error) {
2321 return error;
2322 }
2323 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2324
2325 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2326 pathbuf_destroy(pb);
2327 return error;
2328 }
2329 if (nd.ni_vp != NULL) {
2330 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2331 if (nd.ni_dvp == nd.ni_vp)
2332 vrele(nd.ni_dvp);
2333 else
2334 vput(nd.ni_dvp);
2335 vrele(nd.ni_vp);
2336 pathbuf_destroy(pb);
2337 return (EEXIST);
2338 }
2339 vattr_null(&vattr);
2340 vattr.va_type = VFIFO;
2341 /* We will read cwdi->cwdi_cmask unlocked. */
2342 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2343 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2344 if (error == 0)
2345 vrele(nd.ni_vp);
2346 vput(nd.ni_dvp);
2347 pathbuf_destroy(pb);
2348 return (error);
2349 }
2350
2351 /*
2352 * Make a hard file link.
2353 */
2354 /* ARGSUSED */
2355 int
2356 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink,
2357 const char *link, int follow, register_t *retval)
2358 {
2359 struct vnode *vp;
2360 struct pathbuf *linkpb;
2361 struct nameidata nd;
2362 namei_simple_flags_t ns_flags;
2363 int error;
2364
2365 if (follow & AT_SYMLINK_FOLLOW)
2366 ns_flags = NSM_FOLLOW_TRYEMULROOT;
2367 else
2368 ns_flags = NSM_NOFOLLOW_TRYEMULROOT;
2369
2370 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp);
2371 if (error != 0)
2372 return (error);
2373 error = pathbuf_copyin(link, &linkpb);
2374 if (error) {
2375 goto out1;
2376 }
2377 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2378 if ((error = fd_nameiat(l, fdlink, &nd)) != 0)
2379 goto out2;
2380 if (nd.ni_vp) {
2381 error = EEXIST;
2382 goto abortop;
2383 }
2384 /* Prevent hard links on directories. */
2385 if (vp->v_type == VDIR) {
2386 error = EPERM;
2387 goto abortop;
2388 }
2389 /* Prevent cross-mount operation. */
2390 if (nd.ni_dvp->v_mount != vp->v_mount) {
2391 error = EXDEV;
2392 goto abortop;
2393 }
2394 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2395 out2:
2396 pathbuf_destroy(linkpb);
2397 out1:
2398 vrele(vp);
2399 return (error);
2400 abortop:
2401 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2402 if (nd.ni_dvp == nd.ni_vp)
2403 vrele(nd.ni_dvp);
2404 else
2405 vput(nd.ni_dvp);
2406 if (nd.ni_vp != NULL)
2407 vrele(nd.ni_vp);
2408 goto out2;
2409 }
2410
2411 int
2412 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
2413 {
2414 /* {
2415 syscallarg(const char *) path;
2416 syscallarg(const char *) link;
2417 } */
2418 const char *path = SCARG(uap, path);
2419 const char *link = SCARG(uap, link);
2420
2421 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link,
2422 AT_SYMLINK_FOLLOW, retval);
2423 }
2424
2425 int
2426 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap,
2427 register_t *retval)
2428 {
2429 /* {
2430 syscallarg(int) fd1;
2431 syscallarg(const char *) name1;
2432 syscallarg(int) fd2;
2433 syscallarg(const char *) name2;
2434 syscallarg(int) flags;
2435 } */
2436 int fd1 = SCARG(uap, fd1);
2437 const char *name1 = SCARG(uap, name1);
2438 int fd2 = SCARG(uap, fd2);
2439 const char *name2 = SCARG(uap, name2);
2440 int follow;
2441
2442 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW;
2443
2444 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval);
2445 }
2446
2447
2448 int
2449 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg)
2450 {
2451 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg);
2452 }
2453
2454 static int
2455 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat,
2456 const char *link, enum uio_seg seg)
2457 {
2458 struct proc *p = curproc;
2459 struct vattr vattr;
2460 char *path;
2461 int error;
2462 struct pathbuf *linkpb;
2463 struct nameidata nd;
2464
2465 KASSERT(l != NULL || fdat == AT_FDCWD);
2466
2467 path = PNBUF_GET();
2468 if (seg == UIO_USERSPACE) {
2469 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0)
2470 goto out1;
2471 if ((error = pathbuf_copyin(link, &linkpb)) != 0)
2472 goto out1;
2473 } else {
2474 KASSERT(strlen(patharg) < MAXPATHLEN);
2475 strcpy(path, patharg);
2476 linkpb = pathbuf_create(link);
2477 if (linkpb == NULL) {
2478 error = ENOMEM;
2479 goto out1;
2480 }
2481 }
2482 ktrkuser("symlink-target", path, strlen(path));
2483
2484 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2485 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2486 goto out2;
2487 if (nd.ni_vp) {
2488 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2489 if (nd.ni_dvp == nd.ni_vp)
2490 vrele(nd.ni_dvp);
2491 else
2492 vput(nd.ni_dvp);
2493 vrele(nd.ni_vp);
2494 error = EEXIST;
2495 goto out2;
2496 }
2497 vattr_null(&vattr);
2498 vattr.va_type = VLNK;
2499 /* We will read cwdi->cwdi_cmask unlocked. */
2500 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2501 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2502 if (error == 0)
2503 vrele(nd.ni_vp);
2504 vput(nd.ni_dvp);
2505 out2:
2506 pathbuf_destroy(linkpb);
2507 out1:
2508 PNBUF_PUT(path);
2509 return (error);
2510 }
2511
2512 /*
2513 * Make a symbolic link.
2514 */
2515 /* ARGSUSED */
2516 int
2517 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2518 {
2519 /* {
2520 syscallarg(const char *) path;
2521 syscallarg(const char *) link;
2522 } */
2523
2524 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link),
2525 UIO_USERSPACE);
2526 }
2527
2528 int
2529 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap,
2530 register_t *retval)
2531 {
2532 /* {
2533 syscallarg(const char *) path1;
2534 syscallarg(int) fd;
2535 syscallarg(const char *) path2;
2536 } */
2537
2538 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd),
2539 SCARG(uap, path2), UIO_USERSPACE);
2540 }
2541
2542 /*
2543 * Delete a whiteout from the filesystem.
2544 */
2545 /* ARGSUSED */
2546 int
2547 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2548 {
2549 /* {
2550 syscallarg(const char *) path;
2551 } */
2552 int error;
2553 struct pathbuf *pb;
2554 struct nameidata nd;
2555
2556 error = pathbuf_copyin(SCARG(uap, path), &pb);
2557 if (error) {
2558 return error;
2559 }
2560
2561 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb);
2562 error = namei(&nd);
2563 if (error) {
2564 pathbuf_destroy(pb);
2565 return (error);
2566 }
2567
2568 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2569 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2570 if (nd.ni_dvp == nd.ni_vp)
2571 vrele(nd.ni_dvp);
2572 else
2573 vput(nd.ni_dvp);
2574 if (nd.ni_vp)
2575 vrele(nd.ni_vp);
2576 pathbuf_destroy(pb);
2577 return (EEXIST);
2578 }
2579 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2580 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2581 vput(nd.ni_dvp);
2582 pathbuf_destroy(pb);
2583 return (error);
2584 }
2585
2586 /*
2587 * Delete a name from the filesystem.
2588 */
2589 /* ARGSUSED */
2590 int
2591 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2592 {
2593 /* {
2594 syscallarg(const char *) path;
2595 } */
2596
2597 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE);
2598 }
2599
2600 int
2601 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap,
2602 register_t *retval)
2603 {
2604 /* {
2605 syscallarg(int) fd;
2606 syscallarg(const char *) path;
2607 syscallarg(int) flag;
2608 } */
2609
2610 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path),
2611 SCARG(uap, flag), UIO_USERSPACE);
2612 }
2613
2614 int
2615 do_sys_unlink(const char *arg, enum uio_seg seg)
2616 {
2617 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg);
2618 }
2619
2620 static int
2621 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags,
2622 enum uio_seg seg)
2623 {
2624 struct vnode *vp;
2625 int error;
2626 struct pathbuf *pb;
2627 struct nameidata nd;
2628 const char *pathstring;
2629
2630 KASSERT(l != NULL || fdat == AT_FDCWD);
2631
2632 error = pathbuf_maybe_copyin(arg, seg, &pb);
2633 if (error) {
2634 return error;
2635 }
2636 pathstring = pathbuf_stringcopy_get(pb);
2637 if (pathstring == NULL) {
2638 pathbuf_destroy(pb);
2639 return ENOMEM;
2640 }
2641
2642 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb);
2643 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2644 goto out;
2645 vp = nd.ni_vp;
2646
2647 /*
2648 * The root of a mounted filesystem cannot be deleted.
2649 */
2650 if ((vp->v_vflag & VV_ROOT) != 0) {
2651 error = EBUSY;
2652 goto abort;
2653 }
2654
2655 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) {
2656 error = EBUSY;
2657 goto abort;
2658 }
2659
2660 /*
2661 * No rmdir "." please.
2662 */
2663 if (nd.ni_dvp == vp) {
2664 error = EINVAL;
2665 goto abort;
2666 }
2667
2668 /*
2669 * AT_REMOVEDIR is required to remove a directory
2670 */
2671 if (vp->v_type == VDIR) {
2672 if (!(flags & AT_REMOVEDIR)) {
2673 error = EPERM;
2674 goto abort;
2675 } else {
2676 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2677 goto out;
2678 }
2679 }
2680
2681 /*
2682 * Starting here we only deal with non directories.
2683 */
2684 if (flags & AT_REMOVEDIR) {
2685 error = ENOTDIR;
2686 goto abort;
2687 }
2688
2689
2690 #if NVERIEXEC > 0
2691 /* Handle remove requests for veriexec entries. */
2692 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) {
2693 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2694 if (nd.ni_dvp == vp)
2695 vrele(nd.ni_dvp);
2696 else
2697 vput(nd.ni_dvp);
2698 vput(vp);
2699 goto out;
2700 }
2701 #endif /* NVERIEXEC > 0 */
2702
2703 #ifdef FILEASSOC
2704 (void)fileassoc_file_delete(vp);
2705 #endif /* FILEASSOC */
2706 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2707 goto out;
2708
2709 abort:
2710 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2711 if (nd.ni_dvp == vp)
2712 vrele(nd.ni_dvp);
2713 else
2714 vput(nd.ni_dvp);
2715 vput(vp);
2716
2717 out:
2718 pathbuf_stringcopy_put(pb, pathstring);
2719 pathbuf_destroy(pb);
2720 return (error);
2721 }
2722
2723 /*
2724 * Reposition read/write file offset.
2725 */
2726 int
2727 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2728 {
2729 /* {
2730 syscallarg(int) fd;
2731 syscallarg(int) pad;
2732 syscallarg(off_t) offset;
2733 syscallarg(int) whence;
2734 } */
2735 kauth_cred_t cred = l->l_cred;
2736 file_t *fp;
2737 struct vnode *vp;
2738 struct vattr vattr;
2739 off_t newoff;
2740 int error, fd;
2741
2742 fd = SCARG(uap, fd);
2743
2744 if ((fp = fd_getfile(fd)) == NULL)
2745 return (EBADF);
2746
2747 vp = fp->f_data;
2748 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2749 error = ESPIPE;
2750 goto out;
2751 }
2752
2753 switch (SCARG(uap, whence)) {
2754 case SEEK_CUR:
2755 newoff = fp->f_offset + SCARG(uap, offset);
2756 break;
2757 case SEEK_END:
2758 vn_lock(vp, LK_SHARED | LK_RETRY);
2759 error = VOP_GETATTR(vp, &vattr, cred);
2760 VOP_UNLOCK(vp);
2761 if (error) {
2762 goto out;
2763 }
2764 newoff = SCARG(uap, offset) + vattr.va_size;
2765 break;
2766 case SEEK_SET:
2767 newoff = SCARG(uap, offset);
2768 break;
2769 default:
2770 error = EINVAL;
2771 goto out;
2772 }
2773 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2774 *(off_t *)retval = fp->f_offset = newoff;
2775 }
2776 out:
2777 fd_putfile(fd);
2778 return (error);
2779 }
2780
2781 /*
2782 * Positional read system call.
2783 */
2784 int
2785 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2786 {
2787 /* {
2788 syscallarg(int) fd;
2789 syscallarg(void *) buf;
2790 syscallarg(size_t) nbyte;
2791 syscallarg(off_t) offset;
2792 } */
2793 file_t *fp;
2794 struct vnode *vp;
2795 off_t offset;
2796 int error, fd = SCARG(uap, fd);
2797
2798 if ((fp = fd_getfile(fd)) == NULL)
2799 return (EBADF);
2800
2801 if ((fp->f_flag & FREAD) == 0) {
2802 fd_putfile(fd);
2803 return (EBADF);
2804 }
2805
2806 vp = fp->f_data;
2807 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2808 error = ESPIPE;
2809 goto out;
2810 }
2811
2812 offset = SCARG(uap, offset);
2813
2814 /*
2815 * XXX This works because no file systems actually
2816 * XXX take any action on the seek operation.
2817 */
2818 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2819 goto out;
2820
2821 /* dofileread() will unuse the descriptor for us */
2822 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2823 &offset, 0, retval));
2824
2825 out:
2826 fd_putfile(fd);
2827 return (error);
2828 }
2829
2830 /*
2831 * Positional scatter read system call.
2832 */
2833 int
2834 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2835 {
2836 /* {
2837 syscallarg(int) fd;
2838 syscallarg(const struct iovec *) iovp;
2839 syscallarg(int) iovcnt;
2840 syscallarg(off_t) offset;
2841 } */
2842 off_t offset = SCARG(uap, offset);
2843
2844 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2845 SCARG(uap, iovcnt), &offset, 0, retval);
2846 }
2847
2848 /*
2849 * Positional write system call.
2850 */
2851 int
2852 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2853 {
2854 /* {
2855 syscallarg(int) fd;
2856 syscallarg(const void *) buf;
2857 syscallarg(size_t) nbyte;
2858 syscallarg(off_t) offset;
2859 } */
2860 file_t *fp;
2861 struct vnode *vp;
2862 off_t offset;
2863 int error, fd = SCARG(uap, fd);
2864
2865 if ((fp = fd_getfile(fd)) == NULL)
2866 return (EBADF);
2867
2868 if ((fp->f_flag & FWRITE) == 0) {
2869 fd_putfile(fd);
2870 return (EBADF);
2871 }
2872
2873 vp = fp->f_data;
2874 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2875 error = ESPIPE;
2876 goto out;
2877 }
2878
2879 offset = SCARG(uap, offset);
2880
2881 /*
2882 * XXX This works because no file systems actually
2883 * XXX take any action on the seek operation.
2884 */
2885 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2886 goto out;
2887
2888 /* dofilewrite() will unuse the descriptor for us */
2889 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2890 &offset, 0, retval));
2891
2892 out:
2893 fd_putfile(fd);
2894 return (error);
2895 }
2896
2897 /*
2898 * Positional gather write system call.
2899 */
2900 int
2901 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2902 {
2903 /* {
2904 syscallarg(int) fd;
2905 syscallarg(const struct iovec *) iovp;
2906 syscallarg(int) iovcnt;
2907 syscallarg(off_t) offset;
2908 } */
2909 off_t offset = SCARG(uap, offset);
2910
2911 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2912 SCARG(uap, iovcnt), &offset, 0, retval);
2913 }
2914
2915 /*
2916 * Check access permissions.
2917 */
2918 int
2919 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2920 {
2921 /* {
2922 syscallarg(const char *) path;
2923 syscallarg(int) flags;
2924 } */
2925
2926 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path),
2927 SCARG(uap, flags), 0);
2928 }
2929
2930 int
2931 do_sys_accessat(struct lwp *l, int fdat, const char *path,
2932 int mode, int flags)
2933 {
2934 kauth_cred_t cred;
2935 struct vnode *vp;
2936 int error, nd_flag, vmode;
2937 struct pathbuf *pb;
2938 struct nameidata nd;
2939
2940 CTASSERT(F_OK == 0);
2941 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) {
2942 /* nonsense mode */
2943 return EINVAL;
2944 }
2945
2946 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT;
2947 if (flags & AT_SYMLINK_NOFOLLOW)
2948 nd_flag &= ~FOLLOW;
2949
2950 error = pathbuf_copyin(path, &pb);
2951 if (error)
2952 return error;
2953
2954 NDINIT(&nd, LOOKUP, nd_flag, pb);
2955
2956 /* Override default credentials */
2957 cred = kauth_cred_dup(l->l_cred);
2958 if (!(flags & AT_EACCESS)) {
2959 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2960 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2961 }
2962 nd.ni_cnd.cn_cred = cred;
2963
2964 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2965 pathbuf_destroy(pb);
2966 goto out;
2967 }
2968 vp = nd.ni_vp;
2969 pathbuf_destroy(pb);
2970
2971 /* Flags == 0 means only check for existence. */
2972 if (mode) {
2973 vmode = 0;
2974 if (mode & R_OK)
2975 vmode |= VREAD;
2976 if (mode & W_OK)
2977 vmode |= VWRITE;
2978 if (mode & X_OK)
2979 vmode |= VEXEC;
2980
2981 error = VOP_ACCESS(vp, vmode, cred);
2982 if (!error && (vmode & VWRITE))
2983 error = vn_writechk(vp);
2984 }
2985 vput(vp);
2986 out:
2987 kauth_cred_free(cred);
2988 return (error);
2989 }
2990
2991 int
2992 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap,
2993 register_t *retval)
2994 {
2995 /* {
2996 syscallarg(int) fd;
2997 syscallarg(const char *) path;
2998 syscallarg(int) amode;
2999 syscallarg(int) flag;
3000 } */
3001
3002 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path),
3003 SCARG(uap, amode), SCARG(uap, flag));
3004 }
3005
3006 /*
3007 * Common code for all sys_stat functions, including compat versions.
3008 */
3009 int
3010 do_sys_stat(const char *userpath, unsigned int nd_flag,
3011 struct stat *sb)
3012 {
3013 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb);
3014 }
3015
3016 int
3017 do_sys_statat(struct lwp *l, int fdat, const char *userpath,
3018 unsigned int nd_flag, struct stat *sb)
3019 {
3020 int error;
3021 struct pathbuf *pb;
3022 struct nameidata nd;
3023
3024 KASSERT(l != NULL || fdat == AT_FDCWD);
3025
3026 error = pathbuf_copyin(userpath, &pb);
3027 if (error) {
3028 return error;
3029 }
3030
3031 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb);
3032
3033 error = fd_nameiat(l, fdat, &nd);
3034 if (error != 0) {
3035 pathbuf_destroy(pb);
3036 return error;
3037 }
3038 error = vn_stat(nd.ni_vp, sb);
3039 vput(nd.ni_vp);
3040 pathbuf_destroy(pb);
3041 return error;
3042 }
3043
3044 /*
3045 * Get file status; this version follows links.
3046 */
3047 /* ARGSUSED */
3048 int
3049 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval)
3050 {
3051 /* {
3052 syscallarg(const char *) path;
3053 syscallarg(struct stat *) ub;
3054 } */
3055 struct stat sb;
3056 int error;
3057
3058 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb);
3059 if (error)
3060 return error;
3061 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3062 }
3063
3064 /*
3065 * Get file status; this version does not follow links.
3066 */
3067 /* ARGSUSED */
3068 int
3069 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval)
3070 {
3071 /* {
3072 syscallarg(const char *) path;
3073 syscallarg(struct stat *) ub;
3074 } */
3075 struct stat sb;
3076 int error;
3077
3078 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb);
3079 if (error)
3080 return error;
3081 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3082 }
3083
3084 int
3085 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap,
3086 register_t *retval)
3087 {
3088 /* {
3089 syscallarg(int) fd;
3090 syscallarg(const char *) path;
3091 syscallarg(struct stat *) buf;
3092 syscallarg(int) flag;
3093 } */
3094 unsigned int nd_flag;
3095 struct stat sb;
3096 int error;
3097
3098 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW)
3099 nd_flag = NOFOLLOW;
3100 else
3101 nd_flag = FOLLOW;
3102
3103 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag,
3104 &sb);
3105 if (error)
3106 return error;
3107 return copyout(&sb, SCARG(uap, buf), sizeof(sb));
3108 }
3109
3110 /*
3111 * Get configurable pathname variables.
3112 */
3113 /* ARGSUSED */
3114 int
3115 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
3116 {
3117 /* {
3118 syscallarg(const char *) path;
3119 syscallarg(int) name;
3120 } */
3121 int error;
3122 struct pathbuf *pb;
3123 struct nameidata nd;
3124
3125 error = pathbuf_copyin(SCARG(uap, path), &pb);
3126 if (error) {
3127 return error;
3128 }
3129 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3130 if ((error = namei(&nd)) != 0) {
3131 pathbuf_destroy(pb);
3132 return (error);
3133 }
3134 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
3135 vput(nd.ni_vp);
3136 pathbuf_destroy(pb);
3137 return (error);
3138 }
3139
3140 /*
3141 * Return target name of a symbolic link.
3142 */
3143 /* ARGSUSED */
3144 int
3145 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap,
3146 register_t *retval)
3147 {
3148 /* {
3149 syscallarg(const char *) path;
3150 syscallarg(char *) buf;
3151 syscallarg(size_t) count;
3152 } */
3153 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path),
3154 SCARG(uap, buf), SCARG(uap, count), retval);
3155 }
3156
3157 static int
3158 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf,
3159 size_t count, register_t *retval)
3160 {
3161 struct vnode *vp;
3162 struct iovec aiov;
3163 struct uio auio;
3164 int error;
3165 struct pathbuf *pb;
3166 struct nameidata nd;
3167
3168 error = pathbuf_copyin(path, &pb);
3169 if (error) {
3170 return error;
3171 }
3172 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3173 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
3174 pathbuf_destroy(pb);
3175 return error;
3176 }
3177 vp = nd.ni_vp;
3178 pathbuf_destroy(pb);
3179 if (vp->v_type != VLNK)
3180 error = EINVAL;
3181 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
3182 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
3183 aiov.iov_base = buf;
3184 aiov.iov_len = count;
3185 auio.uio_iov = &aiov;
3186 auio.uio_iovcnt = 1;
3187 auio.uio_offset = 0;
3188 auio.uio_rw = UIO_READ;
3189 KASSERT(l == curlwp);
3190 auio.uio_vmspace = l->l_proc->p_vmspace;
3191 auio.uio_resid = count;
3192 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0)
3193 *retval = count - auio.uio_resid;
3194 }
3195 vput(vp);
3196 return (error);
3197 }
3198
3199 int
3200 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap,
3201 register_t *retval)
3202 {
3203 /* {
3204 syscallarg(int) fd;
3205 syscallarg(const char *) path;
3206 syscallarg(char *) buf;
3207 syscallarg(size_t) bufsize;
3208 } */
3209
3210 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path),
3211 SCARG(uap, buf), SCARG(uap, bufsize), retval);
3212 }
3213
3214 /*
3215 * Change flags of a file given a path name.
3216 */
3217 /* ARGSUSED */
3218 int
3219 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
3220 {
3221 /* {
3222 syscallarg(const char *) path;
3223 syscallarg(u_long) flags;
3224 } */
3225 struct vnode *vp;
3226 int error;
3227
3228 error = namei_simple_user(SCARG(uap, path),
3229 NSM_FOLLOW_TRYEMULROOT, &vp);
3230 if (error != 0)
3231 return (error);
3232 error = change_flags(vp, SCARG(uap, flags), l);
3233 vput(vp);
3234 return (error);
3235 }
3236
3237 /*
3238 * Change flags of a file given a file descriptor.
3239 */
3240 /* ARGSUSED */
3241 int
3242 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
3243 {
3244 /* {
3245 syscallarg(int) fd;
3246 syscallarg(u_long) flags;
3247 } */
3248 struct vnode *vp;
3249 file_t *fp;
3250 int error;
3251
3252 /* fd_getvnode() will use the descriptor for us */
3253 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3254 return (error);
3255 vp = fp->f_data;
3256 error = change_flags(vp, SCARG(uap, flags), l);
3257 VOP_UNLOCK(vp);
3258 fd_putfile(SCARG(uap, fd));
3259 return (error);
3260 }
3261
3262 /*
3263 * Change flags of a file given a path name; this version does
3264 * not follow links.
3265 */
3266 int
3267 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
3268 {
3269 /* {
3270 syscallarg(const char *) path;
3271 syscallarg(u_long) flags;
3272 } */
3273 struct vnode *vp;
3274 int error;
3275
3276 error = namei_simple_user(SCARG(uap, path),
3277 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3278 if (error != 0)
3279 return (error);
3280 error = change_flags(vp, SCARG(uap, flags), l);
3281 vput(vp);
3282 return (error);
3283 }
3284
3285 /*
3286 * Common routine to change flags of a file.
3287 */
3288 int
3289 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
3290 {
3291 struct vattr vattr;
3292 int error;
3293
3294 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3295
3296 vattr_null(&vattr);
3297 vattr.va_flags = flags;
3298 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3299
3300 return (error);
3301 }
3302
3303 /*
3304 * Change mode of a file given path name; this version follows links.
3305 */
3306 /* ARGSUSED */
3307 int
3308 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
3309 {
3310 /* {
3311 syscallarg(const char *) path;
3312 syscallarg(int) mode;
3313 } */
3314 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path),
3315 SCARG(uap, mode), 0);
3316 }
3317
3318 int
3319 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags)
3320 {
3321 int error;
3322 struct vnode *vp;
3323 namei_simple_flags_t ns_flag;
3324
3325 if (flags & AT_SYMLINK_NOFOLLOW)
3326 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3327 else
3328 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3329
3330 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3331 if (error != 0)
3332 return error;
3333
3334 error = change_mode(vp, mode, l);
3335
3336 vrele(vp);
3337
3338 return (error);
3339 }
3340
3341 /*
3342 * Change mode of a file given a file descriptor.
3343 */
3344 /* ARGSUSED */
3345 int
3346 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
3347 {
3348 /* {
3349 syscallarg(int) fd;
3350 syscallarg(int) mode;
3351 } */
3352 file_t *fp;
3353 int error;
3354
3355 /* fd_getvnode() will use the descriptor for us */
3356 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3357 return (error);
3358 error = change_mode(fp->f_data, SCARG(uap, mode), l);
3359 fd_putfile(SCARG(uap, fd));
3360 return (error);
3361 }
3362
3363 int
3364 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap,
3365 register_t *retval)
3366 {
3367 /* {
3368 syscallarg(int) fd;
3369 syscallarg(const char *) path;
3370 syscallarg(int) mode;
3371 syscallarg(int) flag;
3372 } */
3373
3374 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path),
3375 SCARG(uap, mode), SCARG(uap, flag));
3376 }
3377
3378 /*
3379 * Change mode of a file given path name; this version does not follow links.
3380 */
3381 /* ARGSUSED */
3382 int
3383 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
3384 {
3385 /* {
3386 syscallarg(const char *) path;
3387 syscallarg(int) mode;
3388 } */
3389 int error;
3390 struct vnode *vp;
3391
3392 error = namei_simple_user(SCARG(uap, path),
3393 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3394 if (error != 0)
3395 return (error);
3396
3397 error = change_mode(vp, SCARG(uap, mode), l);
3398
3399 vrele(vp);
3400 return (error);
3401 }
3402
3403 /*
3404 * Common routine to set mode given a vnode.
3405 */
3406 static int
3407 change_mode(struct vnode *vp, int mode, struct lwp *l)
3408 {
3409 struct vattr vattr;
3410 int error;
3411
3412 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3413 vattr_null(&vattr);
3414 vattr.va_mode = mode & ALLPERMS;
3415 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3416 VOP_UNLOCK(vp);
3417 return (error);
3418 }
3419
3420 /*
3421 * Set ownership given a path name; this version follows links.
3422 */
3423 /* ARGSUSED */
3424 int
3425 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
3426 {
3427 /* {
3428 syscallarg(const char *) path;
3429 syscallarg(uid_t) uid;
3430 syscallarg(gid_t) gid;
3431 } */
3432 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid),
3433 SCARG(uap, gid), 0);
3434 }
3435
3436 int
3437 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid,
3438 gid_t gid, int flags)
3439 {
3440 int error;
3441 struct vnode *vp;
3442 namei_simple_flags_t ns_flag;
3443
3444 if (flags & AT_SYMLINK_NOFOLLOW)
3445 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3446 else
3447 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3448
3449 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3450 if (error != 0)
3451 return error;
3452
3453 error = change_owner(vp, uid, gid, l, 0);
3454
3455 vrele(vp);
3456
3457 return (error);
3458 }
3459
3460 /*
3461 * Set ownership given a path name; this version follows links.
3462 * Provides POSIX semantics.
3463 */
3464 /* ARGSUSED */
3465 int
3466 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
3467 {
3468 /* {
3469 syscallarg(const char *) path;
3470 syscallarg(uid_t) uid;
3471 syscallarg(gid_t) gid;
3472 } */
3473 int error;
3474 struct vnode *vp;
3475
3476 error = namei_simple_user(SCARG(uap, path),
3477 NSM_FOLLOW_TRYEMULROOT, &vp);
3478 if (error != 0)
3479 return (error);
3480
3481 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3482
3483 vrele(vp);
3484 return (error);
3485 }
3486
3487 /*
3488 * Set ownership given a file descriptor.
3489 */
3490 /* ARGSUSED */
3491 int
3492 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
3493 {
3494 /* {
3495 syscallarg(int) fd;
3496 syscallarg(uid_t) uid;
3497 syscallarg(gid_t) gid;
3498 } */
3499 int error;
3500 file_t *fp;
3501
3502 /* fd_getvnode() will use the descriptor for us */
3503 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3504 return (error);
3505 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
3506 l, 0);
3507 fd_putfile(SCARG(uap, fd));
3508 return (error);
3509 }
3510
3511 int
3512 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap,
3513 register_t *retval)
3514 {
3515 /* {
3516 syscallarg(int) fd;
3517 syscallarg(const char *) path;
3518 syscallarg(uid_t) owner;
3519 syscallarg(gid_t) group;
3520 syscallarg(int) flag;
3521 } */
3522
3523 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path),
3524 SCARG(uap, owner), SCARG(uap, group),
3525 SCARG(uap, flag));
3526 }
3527
3528 /*
3529 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
3530 */
3531 /* ARGSUSED */
3532 int
3533 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
3534 {
3535 /* {
3536 syscallarg(int) fd;
3537 syscallarg(uid_t) uid;
3538 syscallarg(gid_t) gid;
3539 } */
3540 int error;
3541 file_t *fp;
3542
3543 /* fd_getvnode() will use the descriptor for us */
3544 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3545 return (error);
3546 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
3547 l, 1);
3548 fd_putfile(SCARG(uap, fd));
3549 return (error);
3550 }
3551
3552 /*
3553 * Set ownership given a path name; this version does not follow links.
3554 */
3555 /* ARGSUSED */
3556 int
3557 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
3558 {
3559 /* {
3560 syscallarg(const char *) path;
3561 syscallarg(uid_t) uid;
3562 syscallarg(gid_t) gid;
3563 } */
3564 int error;
3565 struct vnode *vp;
3566
3567 error = namei_simple_user(SCARG(uap, path),
3568 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3569 if (error != 0)
3570 return (error);
3571
3572 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
3573
3574 vrele(vp);
3575 return (error);
3576 }
3577
3578 /*
3579 * Set ownership given a path name; this version does not follow links.
3580 * Provides POSIX/XPG semantics.
3581 */
3582 /* ARGSUSED */
3583 int
3584 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
3585 {
3586 /* {
3587 syscallarg(const char *) path;
3588 syscallarg(uid_t) uid;
3589 syscallarg(gid_t) gid;
3590 } */
3591 int error;
3592 struct vnode *vp;
3593
3594 error = namei_simple_user(SCARG(uap, path),
3595 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3596 if (error != 0)
3597 return (error);
3598
3599 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3600
3601 vrele(vp);
3602 return (error);
3603 }
3604
3605 /*
3606 * Common routine to set ownership given a vnode.
3607 */
3608 static int
3609 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
3610 int posix_semantics)
3611 {
3612 struct vattr vattr;
3613 mode_t newmode;
3614 int error;
3615
3616 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3617 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
3618 goto out;
3619
3620 #define CHANGED(x) ((int)(x) != -1)
3621 newmode = vattr.va_mode;
3622 if (posix_semantics) {
3623 /*
3624 * POSIX/XPG semantics: if the caller is not the super-user,
3625 * clear set-user-id and set-group-id bits. Both POSIX and
3626 * the XPG consider the behaviour for calls by the super-user
3627 * implementation-defined; we leave the set-user-id and set-
3628 * group-id settings intact in that case.
3629 */
3630 if (vattr.va_mode & S_ISUID) {
3631 if (kauth_authorize_vnode(l->l_cred,
3632 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0)
3633 newmode &= ~S_ISUID;
3634 }
3635 if (vattr.va_mode & S_ISGID) {
3636 if (kauth_authorize_vnode(l->l_cred,
3637 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0)
3638 newmode &= ~S_ISGID;
3639 }
3640 } else {
3641 /*
3642 * NetBSD semantics: when changing owner and/or group,
3643 * clear the respective bit(s).
3644 */
3645 if (CHANGED(uid))
3646 newmode &= ~S_ISUID;
3647 if (CHANGED(gid))
3648 newmode &= ~S_ISGID;
3649 }
3650 /* Update va_mode iff altered. */
3651 if (vattr.va_mode == newmode)
3652 newmode = VNOVAL;
3653
3654 vattr_null(&vattr);
3655 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
3656 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
3657 vattr.va_mode = newmode;
3658 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3659 #undef CHANGED
3660
3661 out:
3662 VOP_UNLOCK(vp);
3663 return (error);
3664 }
3665
3666 /*
3667 * Set the access and modification times given a path name; this
3668 * version follows links.
3669 */
3670 /* ARGSUSED */
3671 int
3672 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
3673 register_t *retval)
3674 {
3675 /* {
3676 syscallarg(const char *) path;
3677 syscallarg(const struct timeval *) tptr;
3678 } */
3679
3680 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
3681 SCARG(uap, tptr), UIO_USERSPACE);
3682 }
3683
3684 /*
3685 * Set the access and modification times given a file descriptor.
3686 */
3687 /* ARGSUSED */
3688 int
3689 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
3690 register_t *retval)
3691 {
3692 /* {
3693 syscallarg(int) fd;
3694 syscallarg(const struct timeval *) tptr;
3695 } */
3696 int error;
3697 file_t *fp;
3698
3699 /* fd_getvnode() will use the descriptor for us */
3700 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3701 return (error);
3702 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr),
3703 UIO_USERSPACE);
3704 fd_putfile(SCARG(uap, fd));
3705 return (error);
3706 }
3707
3708 int
3709 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap,
3710 register_t *retval)
3711 {
3712 /* {
3713 syscallarg(int) fd;
3714 syscallarg(const struct timespec *) tptr;
3715 } */
3716 int error;
3717 file_t *fp;
3718
3719 /* fd_getvnode() will use the descriptor for us */
3720 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3721 return (error);
3722 error = do_sys_utimensat(l, AT_FDCWD, fp->f_data, NULL, 0,
3723 SCARG(uap, tptr), UIO_USERSPACE);
3724 fd_putfile(SCARG(uap, fd));
3725 return (error);
3726 }
3727
3728 /*
3729 * Set the access and modification times given a path name; this
3730 * version does not follow links.
3731 */
3732 int
3733 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
3734 register_t *retval)
3735 {
3736 /* {
3737 syscallarg(const char *) path;
3738 syscallarg(const struct timeval *) tptr;
3739 } */
3740
3741 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
3742 SCARG(uap, tptr), UIO_USERSPACE);
3743 }
3744
3745 int
3746 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap,
3747 register_t *retval)
3748 {
3749 /* {
3750 syscallarg(int) fd;
3751 syscallarg(const char *) path;
3752 syscallarg(const struct timespec *) tptr;
3753 syscallarg(int) flag;
3754 } */
3755 int follow;
3756 const struct timespec *tptr;
3757 int error;
3758
3759 tptr = SCARG(uap, tptr);
3760 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3761
3762 error = do_sys_utimensat(l, SCARG(uap, fd), NULL,
3763 SCARG(uap, path), follow, tptr, UIO_USERSPACE);
3764
3765 return error;
3766 }
3767
3768 /*
3769 * Common routine to set access and modification times given a vnode.
3770 */
3771 int
3772 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag,
3773 const struct timespec *tptr, enum uio_seg seg)
3774 {
3775 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg);
3776 }
3777
3778 int
3779 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp,
3780 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg)
3781 {
3782 struct vattr vattr;
3783 int error, dorele = 0;
3784 namei_simple_flags_t sflags;
3785 bool vanull, setbirthtime;
3786 struct timespec ts[2];
3787
3788 KASSERT(l != NULL || fdat == AT_FDCWD);
3789
3790 /*
3791 * I have checked all callers and they pass either FOLLOW,
3792 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW
3793 * is 0. More to the point, they don't pass anything else.
3794 * Let's keep it that way at least until the namei interfaces
3795 * are fully sanitized.
3796 */
3797 KASSERT(flag == NOFOLLOW || flag == FOLLOW);
3798 sflags = (flag == FOLLOW) ?
3799 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT;
3800
3801 if (tptr == NULL) {
3802 vanull = true;
3803 nanotime(&ts[0]);
3804 ts[1] = ts[0];
3805 } else {
3806 vanull = false;
3807 if (seg != UIO_SYSSPACE) {
3808 error = copyin(tptr, ts, sizeof (ts));
3809 if (error != 0)
3810 return error;
3811 } else {
3812 ts[0] = tptr[0];
3813 ts[1] = tptr[1];
3814 }
3815 }
3816
3817 if (ts[0].tv_nsec == UTIME_NOW) {
3818 nanotime(&ts[0]);
3819 if (ts[1].tv_nsec == UTIME_NOW) {
3820 vanull = true;
3821 ts[1] = ts[0];
3822 }
3823 } else if (ts[1].tv_nsec == UTIME_NOW)
3824 nanotime(&ts[1]);
3825
3826 if (vp == NULL) {
3827 /* note: SEG describes TPTR, not PATH; PATH is always user */
3828 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp);
3829 if (error != 0)
3830 return error;
3831 dorele = 1;
3832 }
3833
3834 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3835 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3836 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3837 vattr_null(&vattr);
3838
3839 if (ts[0].tv_nsec != UTIME_OMIT)
3840 vattr.va_atime = ts[0];
3841
3842 if (ts[1].tv_nsec != UTIME_OMIT) {
3843 vattr.va_mtime = ts[1];
3844 if (setbirthtime)
3845 vattr.va_birthtime = ts[1];
3846 }
3847
3848 if (vanull)
3849 vattr.va_vaflags |= VA_UTIMES_NULL;
3850 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3851 VOP_UNLOCK(vp);
3852
3853 if (dorele != 0)
3854 vrele(vp);
3855
3856 return error;
3857 }
3858
3859 int
3860 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
3861 const struct timeval *tptr, enum uio_seg seg)
3862 {
3863 struct timespec ts[2];
3864 struct timespec *tsptr = NULL;
3865 int error;
3866
3867 if (tptr != NULL) {
3868 struct timeval tv[2];
3869
3870 if (seg != UIO_SYSSPACE) {
3871 error = copyin(tptr, tv, sizeof (tv));
3872 if (error != 0)
3873 return error;
3874 tptr = tv;
3875 }
3876
3877 if ((tv[0].tv_usec == UTIME_NOW) ||
3878 (tv[0].tv_usec == UTIME_OMIT))
3879 ts[0].tv_nsec = tv[0].tv_usec;
3880 else
3881 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3882
3883 if ((tv[1].tv_usec == UTIME_NOW) ||
3884 (tv[1].tv_usec == UTIME_OMIT))
3885 ts[1].tv_nsec = tv[1].tv_usec;
3886 else
3887 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3888
3889 tsptr = &ts[0];
3890 }
3891
3892 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE);
3893 }
3894
3895 /*
3896 * Truncate a file given its path name.
3897 */
3898 /* ARGSUSED */
3899 int
3900 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3901 {
3902 /* {
3903 syscallarg(const char *) path;
3904 syscallarg(int) pad;
3905 syscallarg(off_t) length;
3906 } */
3907 struct vnode *vp;
3908 struct vattr vattr;
3909 int error;
3910
3911 error = namei_simple_user(SCARG(uap, path),
3912 NSM_FOLLOW_TRYEMULROOT, &vp);
3913 if (error != 0)
3914 return (error);
3915 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3916 if (vp->v_type == VDIR)
3917 error = EISDIR;
3918 else if ((error = vn_writechk(vp)) == 0 &&
3919 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3920 vattr_null(&vattr);
3921 vattr.va_size = SCARG(uap, length);
3922 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3923 }
3924 vput(vp);
3925 return (error);
3926 }
3927
3928 /*
3929 * Truncate a file given a file descriptor.
3930 */
3931 /* ARGSUSED */
3932 int
3933 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3934 {
3935 /* {
3936 syscallarg(int) fd;
3937 syscallarg(int) pad;
3938 syscallarg(off_t) length;
3939 } */
3940 struct vattr vattr;
3941 struct vnode *vp;
3942 file_t *fp;
3943 int error;
3944
3945 /* fd_getvnode() will use the descriptor for us */
3946 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3947 return (error);
3948 if ((fp->f_flag & FWRITE) == 0) {
3949 error = EINVAL;
3950 goto out;
3951 }
3952 vp = fp->f_data;
3953 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3954 if (vp->v_type == VDIR)
3955 error = EISDIR;
3956 else if ((error = vn_writechk(vp)) == 0) {
3957 vattr_null(&vattr);
3958 vattr.va_size = SCARG(uap, length);
3959 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3960 }
3961 VOP_UNLOCK(vp);
3962 out:
3963 fd_putfile(SCARG(uap, fd));
3964 return (error);
3965 }
3966
3967 /*
3968 * Sync an open file.
3969 */
3970 /* ARGSUSED */
3971 int
3972 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3973 {
3974 /* {
3975 syscallarg(int) fd;
3976 } */
3977 struct vnode *vp;
3978 file_t *fp;
3979 int error;
3980
3981 /* fd_getvnode() will use the descriptor for us */
3982 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3983 return (error);
3984 vp = fp->f_data;
3985 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3986 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
3987 VOP_UNLOCK(vp);
3988 fd_putfile(SCARG(uap, fd));
3989 return (error);
3990 }
3991
3992 /*
3993 * Sync a range of file data. API modeled after that found in AIX.
3994 *
3995 * FDATASYNC indicates that we need only save enough metadata to be able
3996 * to re-read the written data. Note we duplicate AIX's requirement that
3997 * the file be open for writing.
3998 */
3999 /* ARGSUSED */
4000 int
4001 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
4002 {
4003 /* {
4004 syscallarg(int) fd;
4005 syscallarg(int) flags;
4006 syscallarg(off_t) start;
4007 syscallarg(off_t) length;
4008 } */
4009 struct vnode *vp;
4010 file_t *fp;
4011 int flags, nflags;
4012 off_t s, e, len;
4013 int error;
4014
4015 /* fd_getvnode() will use the descriptor for us */
4016 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4017 return (error);
4018
4019 if ((fp->f_flag & FWRITE) == 0) {
4020 error = EBADF;
4021 goto out;
4022 }
4023
4024 flags = SCARG(uap, flags);
4025 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
4026 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
4027 error = EINVAL;
4028 goto out;
4029 }
4030 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
4031 if (flags & FDATASYNC)
4032 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
4033 else
4034 nflags = FSYNC_WAIT;
4035 if (flags & FDISKSYNC)
4036 nflags |= FSYNC_CACHE;
4037
4038 len = SCARG(uap, length);
4039 /* If length == 0, we do the whole file, and s = e = 0 will do that */
4040 if (len) {
4041 s = SCARG(uap, start);
4042 e = s + len;
4043 if (e < s) {
4044 error = EINVAL;
4045 goto out;
4046 }
4047 } else {
4048 e = 0;
4049 s = 0;
4050 }
4051
4052 vp = fp->f_data;
4053 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4054 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
4055 VOP_UNLOCK(vp);
4056 out:
4057 fd_putfile(SCARG(uap, fd));
4058 return (error);
4059 }
4060
4061 /*
4062 * Sync the data of an open file.
4063 */
4064 /* ARGSUSED */
4065 int
4066 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
4067 {
4068 /* {
4069 syscallarg(int) fd;
4070 } */
4071 struct vnode *vp;
4072 file_t *fp;
4073 int error;
4074
4075 /* fd_getvnode() will use the descriptor for us */
4076 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4077 return (error);
4078 if ((fp->f_flag & FWRITE) == 0) {
4079 fd_putfile(SCARG(uap, fd));
4080 return (EBADF);
4081 }
4082 vp = fp->f_data;
4083 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4084 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
4085 VOP_UNLOCK(vp);
4086 fd_putfile(SCARG(uap, fd));
4087 return (error);
4088 }
4089
4090 /*
4091 * Rename files, (standard) BSD semantics frontend.
4092 */
4093 /* ARGSUSED */
4094 int
4095 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
4096 {
4097 /* {
4098 syscallarg(const char *) from;
4099 syscallarg(const char *) to;
4100 } */
4101
4102 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4103 SCARG(uap, to), UIO_USERSPACE, 0));
4104 }
4105
4106 int
4107 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap,
4108 register_t *retval)
4109 {
4110 /* {
4111 syscallarg(int) fromfd;
4112 syscallarg(const char *) from;
4113 syscallarg(int) tofd;
4114 syscallarg(const char *) to;
4115 } */
4116
4117 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from),
4118 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0));
4119 }
4120
4121 /*
4122 * Rename files, POSIX semantics frontend.
4123 */
4124 /* ARGSUSED */
4125 int
4126 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
4127 {
4128 /* {
4129 syscallarg(const char *) from;
4130 syscallarg(const char *) to;
4131 } */
4132
4133 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4134 SCARG(uap, to), UIO_USERSPACE, 1));
4135 }
4136
4137 /*
4138 * Rename files. Source and destination must either both be directories,
4139 * or both not be directories. If target is a directory, it must be empty.
4140 * If `from' and `to' refer to the same object, the value of the `retain'
4141 * argument is used to determine whether `from' will be
4142 *
4143 * (retain == 0) deleted unless `from' and `to' refer to the same
4144 * object in the file system's name space (BSD).
4145 * (retain == 1) always retained (POSIX).
4146 *
4147 * XXX Synchronize with nfsrv_rename in nfs_serv.c.
4148 */
4149 int
4150 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
4151 {
4152 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain);
4153 }
4154
4155 static int
4156 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd,
4157 const char *to, enum uio_seg seg, int retain)
4158 {
4159 struct pathbuf *fpb, *tpb;
4160 struct nameidata fnd, tnd;
4161 struct vnode *fdvp, *fvp;
4162 struct vnode *tdvp, *tvp;
4163 struct mount *mp, *tmp;
4164 int error;
4165
4166 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD));
4167
4168 error = pathbuf_maybe_copyin(from, seg, &fpb);
4169 if (error)
4170 goto out0;
4171 KASSERT(fpb != NULL);
4172
4173 error = pathbuf_maybe_copyin(to, seg, &tpb);
4174 if (error)
4175 goto out1;
4176 KASSERT(tpb != NULL);
4177
4178 /*
4179 * Lookup from.
4180 *
4181 * XXX LOCKPARENT is wrong because we don't actually want it
4182 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is
4183 * insane, so for the time being we need to leave it like this.
4184 */
4185 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT | INRENAME), fpb);
4186 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0)
4187 goto out2;
4188
4189 /*
4190 * Pull out the important results of the lookup, fdvp and fvp.
4191 * Of course, fvp is bogus because we're about to unlock fdvp.
4192 */
4193 fdvp = fnd.ni_dvp;
4194 fvp = fnd.ni_vp;
4195 KASSERT(fdvp != NULL);
4196 KASSERT(fvp != NULL);
4197 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE));
4198
4199 /*
4200 * Make sure neither fdvp nor fvp is locked.
4201 */
4202 if (fdvp != fvp)
4203 VOP_UNLOCK(fdvp);
4204 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4205 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4206
4207 /*
4208 * Reject renaming `.' and `..'. Can't do this until after
4209 * namei because we need namei's parsing to find the final
4210 * component name. (namei should just leave us with the final
4211 * component name and not look it up itself, but anyway...)
4212 *
4213 * This was here before because we used to relookup from
4214 * instead of to and relookup requires the caller to check
4215 * this, but now file systems may depend on this check, so we
4216 * must retain it until the file systems are all rototilled.
4217 */
4218 if (((fnd.ni_cnd.cn_namelen == 1) &&
4219 (fnd.ni_cnd.cn_nameptr[0] == '.')) ||
4220 ((fnd.ni_cnd.cn_namelen == 2) &&
4221 (fnd.ni_cnd.cn_nameptr[0] == '.') &&
4222 (fnd.ni_cnd.cn_nameptr[1] == '.'))) {
4223 error = EINVAL; /* XXX EISDIR? */
4224 goto abort0;
4225 }
4226
4227 /*
4228 * Lookup to.
4229 *
4230 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using
4231 * fvp here to decide whether to add CREATEDIR is a load of
4232 * bollocks because fvp might be the wrong node by now, since
4233 * fdvp is unlocked.
4234 *
4235 * XXX Why not pass CREATEDIR always?
4236 */
4237 NDINIT(&tnd, RENAME,
4238 (LOCKPARENT | NOCACHE | TRYEMULROOT | INRENAME |
4239 ((fvp->v_type == VDIR)? CREATEDIR : 0)),
4240 tpb);
4241 if ((error = fd_nameiat(l, tofd, &tnd)) != 0)
4242 goto abort0;
4243
4244 /*
4245 * Pull out the important results of the lookup, tdvp and tvp.
4246 * Of course, tvp is bogus because we're about to unlock tdvp.
4247 */
4248 tdvp = tnd.ni_dvp;
4249 tvp = tnd.ni_vp;
4250 KASSERT(tdvp != NULL);
4251 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE));
4252
4253 /*
4254 * Make sure neither tdvp nor tvp is locked.
4255 */
4256 if (tdvp != tvp)
4257 VOP_UNLOCK(tdvp);
4258 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4259 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4260
4261 /*
4262 * Reject renaming onto `.' or `..'. relookup is unhappy with
4263 * these, which is why we must do this here. Once upon a time
4264 * we relooked up from instead of to, and consequently didn't
4265 * need this check, but now that we relookup to instead of
4266 * from, we need this; and we shall need it forever forward
4267 * until the VOP_RENAME protocol changes, because file systems
4268 * will no doubt begin to depend on this check.
4269 */
4270 if (((tnd.ni_cnd.cn_namelen == 1) &&
4271 (tnd.ni_cnd.cn_nameptr[0] == '.')) ||
4272 ((tnd.ni_cnd.cn_namelen == 2) &&
4273 (tnd.ni_cnd.cn_nameptr[0] == '.') &&
4274 (tnd.ni_cnd.cn_nameptr[1] == '.'))) {
4275 error = EINVAL; /* XXX EISDIR? */
4276 goto abort1;
4277 }
4278
4279 /*
4280 * Get the mount point. If the file system has been unmounted,
4281 * which it may be because we're not holding any vnode locks,
4282 * then v_mount will be NULL. We're not really supposed to
4283 * read v_mount without holding the vnode lock, but since we
4284 * have fdvp referenced, if fdvp->v_mount changes then at worst
4285 * it will be set to NULL, not changed to another mount point.
4286 * And, of course, since it is up to the file system to
4287 * determine the real lock order, we can't lock both fdvp and
4288 * tdvp at the same time.
4289 */
4290 mp = fdvp->v_mount;
4291 if (mp == NULL) {
4292 error = ENOENT;
4293 goto abort1;
4294 }
4295
4296 /*
4297 * Make sure the mount points match. Again, although we don't
4298 * hold any vnode locks, the v_mount fields may change -- but
4299 * at worst they will change to NULL, so this will never become
4300 * a cross-device rename, because we hold vnode references.
4301 *
4302 * XXX Because nothing is locked and the compiler may reorder
4303 * things here, unmounting the file system at an inopportune
4304 * moment may cause rename to fail with ENXDEV when it really
4305 * should fail with ENOENT.
4306 */
4307 tmp = tdvp->v_mount;
4308 if (tmp == NULL) {
4309 error = ENOENT;
4310 goto abort1;
4311 }
4312
4313 if (mp != tmp) {
4314 error = EXDEV;
4315 goto abort1;
4316 }
4317
4318 /*
4319 * Take the vfs rename lock to avoid cross-directory screw cases.
4320 * Nothing is locked currently, so taking this lock is safe.
4321 */
4322 error = VFS_RENAMELOCK_ENTER(mp);
4323 if (error)
4324 goto abort1;
4325
4326 /*
4327 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced,
4328 * and nothing is locked except for the vfs rename lock.
4329 *
4330 * The next step is a little rain dance to conform to the
4331 * insane lock protocol, even though it does nothing to ward
4332 * off race conditions.
4333 *
4334 * We need tdvp and tvp to be locked. However, because we have
4335 * unlocked tdvp in order to hold no locks while we take the
4336 * vfs rename lock, tvp may be wrong here, and we can't safely
4337 * lock it even if the sensible file systems will just unlock
4338 * it straight away. Consequently, we must lock tdvp and then
4339 * relookup tvp to get it locked.
4340 *
4341 * Finally, because the VOP_RENAME protocol is brain-damaged
4342 * and various file systems insanely depend on the semantics of
4343 * this brain damage, the lookup of to must be the last lookup
4344 * before VOP_RENAME.
4345 */
4346 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
4347 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0);
4348 if (error)
4349 goto abort2;
4350
4351 /*
4352 * Drop the old tvp and pick up the new one -- which might be
4353 * the same, but that doesn't matter to us. After this, tdvp
4354 * and tvp should both be locked.
4355 */
4356 if (tvp != NULL)
4357 vrele(tvp);
4358 tvp = tnd.ni_vp;
4359 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4360 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4361
4362 /*
4363 * The old do_sys_rename had various consistency checks here
4364 * involving fvp and tvp. fvp is bogus already here, and tvp
4365 * will become bogus soon in any sensible file system, so the
4366 * only purpose in putting these checks here is to give lip
4367 * service to these screw cases and to acknowledge that they
4368 * exist, not actually to handle them, but here you go
4369 * anyway...
4370 */
4371
4372 /*
4373 * Acknowledge that directories and non-directories aren't
4374 * suposed to mix.
4375 */
4376 if (tvp != NULL) {
4377 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) {
4378 error = ENOTDIR;
4379 goto abort3;
4380 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) {
4381 error = EISDIR;
4382 goto abort3;
4383 }
4384 }
4385
4386 /*
4387 * Acknowledge some random screw case, among the dozens that
4388 * might arise.
4389 */
4390 if (fvp == tdvp) {
4391 error = EINVAL;
4392 goto abort3;
4393 }
4394
4395 /*
4396 * Acknowledge that POSIX has a wacky screw case.
4397 *
4398 * XXX Eventually the retain flag needs to be passed on to
4399 * VOP_RENAME.
4400 */
4401 if (fvp == tvp) {
4402 if (retain) {
4403 error = 0;
4404 goto abort3;
4405 } else if ((fdvp == tdvp) &&
4406 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) &&
4407 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr,
4408 fnd.ni_cnd.cn_namelen))) {
4409 error = 0;
4410 goto abort3;
4411 }
4412 }
4413
4414 /*
4415 * Make sure veriexec can screw us up. (But a race can screw
4416 * up veriexec, of course -- remember, fvp and (soon) tvp are
4417 * bogus.)
4418 */
4419 #if NVERIEXEC > 0
4420 {
4421 char *f1, *f2;
4422 size_t f1_len;
4423 size_t f2_len;
4424
4425 f1_len = fnd.ni_cnd.cn_namelen + 1;
4426 f1 = kmem_alloc(f1_len, KM_SLEEP);
4427 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len);
4428
4429 f2_len = tnd.ni_cnd.cn_namelen + 1;
4430 f2 = kmem_alloc(f2_len, KM_SLEEP);
4431 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len);
4432
4433 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2);
4434
4435 kmem_free(f1, f1_len);
4436 kmem_free(f2, f2_len);
4437
4438 if (error)
4439 goto abort3;
4440 }
4441 #endif /* NVERIEXEC > 0 */
4442
4443 /*
4444 * All ready. Incant the rename vop.
4445 */
4446 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4447 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4448 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4449 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4450 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd);
4451
4452 /*
4453 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks
4454 * tdvp and tvp. But we can't assert any of that.
4455 */
4456 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4457 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4458 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4459 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4460
4461 /*
4462 * So all we have left to do is to drop the rename lock and
4463 * destroy the pathbufs.
4464 */
4465 VFS_RENAMELOCK_EXIT(mp);
4466 goto out2;
4467
4468 abort3: if ((tvp != NULL) && (tvp != tdvp))
4469 VOP_UNLOCK(tvp);
4470 abort2: VOP_UNLOCK(tdvp);
4471 VFS_RENAMELOCK_EXIT(mp);
4472 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd);
4473 vrele(tdvp);
4474 if (tvp != NULL)
4475 vrele(tvp);
4476 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd);
4477 vrele(fdvp);
4478 vrele(fvp);
4479 out2: pathbuf_destroy(tpb);
4480 out1: pathbuf_destroy(fpb);
4481 out0: return error;
4482 }
4483
4484 /*
4485 * Make a directory file.
4486 */
4487 /* ARGSUSED */
4488 int
4489 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
4490 {
4491 /* {
4492 syscallarg(const char *) path;
4493 syscallarg(int) mode;
4494 } */
4495
4496 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path),
4497 SCARG(uap, mode), UIO_USERSPACE);
4498 }
4499
4500 int
4501 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap,
4502 register_t *retval)
4503 {
4504 /* {
4505 syscallarg(int) fd;
4506 syscallarg(const char *) path;
4507 syscallarg(int) mode;
4508 } */
4509
4510 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path),
4511 SCARG(uap, mode), UIO_USERSPACE);
4512 }
4513
4514
4515 int
4516 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg)
4517 {
4518 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE);
4519 }
4520
4521 static int
4522 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode,
4523 enum uio_seg seg)
4524 {
4525 struct proc *p = curlwp->l_proc;
4526 struct vnode *vp;
4527 struct vattr vattr;
4528 int error;
4529 struct pathbuf *pb;
4530 struct nameidata nd;
4531
4532 KASSERT(l != NULL || fdat == AT_FDCWD);
4533
4534 /* XXX bollocks, should pass in a pathbuf */
4535 error = pathbuf_maybe_copyin(path, seg, &pb);
4536 if (error) {
4537 return error;
4538 }
4539
4540 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb);
4541
4542 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
4543 pathbuf_destroy(pb);
4544 return (error);
4545 }
4546 vp = nd.ni_vp;
4547 if (vp != NULL) {
4548 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
4549 if (nd.ni_dvp == vp)
4550 vrele(nd.ni_dvp);
4551 else
4552 vput(nd.ni_dvp);
4553 vrele(vp);
4554 pathbuf_destroy(pb);
4555 return (EEXIST);
4556 }
4557 vattr_null(&vattr);
4558 vattr.va_type = VDIR;
4559 /* We will read cwdi->cwdi_cmask unlocked. */
4560 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
4561 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
4562 if (!error)
4563 vrele(nd.ni_vp);
4564 vput(nd.ni_dvp);
4565 pathbuf_destroy(pb);
4566 return (error);
4567 }
4568
4569 /*
4570 * Remove a directory file.
4571 */
4572 /* ARGSUSED */
4573 int
4574 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
4575 {
4576 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path),
4577 AT_REMOVEDIR, UIO_USERSPACE);
4578 }
4579
4580 /*
4581 * Read a block of directory entries in a file system independent format.
4582 */
4583 int
4584 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
4585 {
4586 /* {
4587 syscallarg(int) fd;
4588 syscallarg(char *) buf;
4589 syscallarg(size_t) count;
4590 } */
4591 file_t *fp;
4592 int error, done;
4593
4594 /* fd_getvnode() will use the descriptor for us */
4595 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4596 return (error);
4597 if ((fp->f_flag & FREAD) == 0) {
4598 error = EBADF;
4599 goto out;
4600 }
4601 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
4602 SCARG(uap, count), &done, l, 0, 0);
4603 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
4604 *retval = done;
4605 out:
4606 fd_putfile(SCARG(uap, fd));
4607 return (error);
4608 }
4609
4610 /*
4611 * Set the mode mask for creation of filesystem nodes.
4612 */
4613 int
4614 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
4615 {
4616 /* {
4617 syscallarg(mode_t) newmask;
4618 } */
4619 struct proc *p = l->l_proc;
4620 struct cwdinfo *cwdi;
4621
4622 /*
4623 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
4624 * important is that we serialize changes to the mask. The
4625 * rw_exit() will issue a write memory barrier on our behalf,
4626 * and force the changes out to other CPUs (as it must use an
4627 * atomic operation, draining the local CPU's store buffers).
4628 */
4629 cwdi = p->p_cwdi;
4630 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
4631 *retval = cwdi->cwdi_cmask;
4632 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
4633 rw_exit(&cwdi->cwdi_lock);
4634
4635 return (0);
4636 }
4637
4638 int
4639 dorevoke(struct vnode *vp, kauth_cred_t cred)
4640 {
4641 struct vattr vattr;
4642 int error, fs_decision;
4643
4644 vn_lock(vp, LK_SHARED | LK_RETRY);
4645 error = VOP_GETATTR(vp, &vattr, cred);
4646 VOP_UNLOCK(vp);
4647 if (error != 0)
4648 return error;
4649 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM;
4650 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL,
4651 fs_decision);
4652 if (!error)
4653 VOP_REVOKE(vp, REVOKEALL);
4654 return (error);
4655 }
4656
4657 /*
4658 * Void all references to file by ripping underlying filesystem
4659 * away from vnode.
4660 */
4661 /* ARGSUSED */
4662 int
4663 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
4664 {
4665 /* {
4666 syscallarg(const char *) path;
4667 } */
4668 struct vnode *vp;
4669 int error;
4670
4671 error = namei_simple_user(SCARG(uap, path),
4672 NSM_FOLLOW_TRYEMULROOT, &vp);
4673 if (error != 0)
4674 return (error);
4675 error = dorevoke(vp, l->l_cred);
4676 vrele(vp);
4677 return (error);
4678 }
4679