vfs_syscalls.c revision 1.444 1 /* $NetBSD: vfs_syscalls.c,v 1.444 2012/02/01 05:34:41 dholland Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
66 */
67
68 /*
69 * Virtual File System System Calls
70 */
71
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.444 2012/02/01 05:34:41 dholland Exp $");
74
75 #ifdef _KERNEL_OPT
76 #include "opt_fileassoc.h"
77 #include "veriexec.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/namei.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/file.h>
86 #include <sys/fcntl.h>
87 #include <sys/stat.h>
88 #include <sys/vnode.h>
89 #include <sys/mount.h>
90 #include <sys/proc.h>
91 #include <sys/uio.h>
92 #include <sys/kmem.h>
93 #include <sys/dirent.h>
94 #include <sys/sysctl.h>
95 #include <sys/syscallargs.h>
96 #include <sys/vfs_syscalls.h>
97 #include <sys/quota.h>
98 #include <sys/quotactl.h>
99 #include <sys/ktrace.h>
100 #ifdef FILEASSOC
101 #include <sys/fileassoc.h>
102 #endif /* FILEASSOC */
103 #include <sys/extattr.h>
104 #include <sys/verified_exec.h>
105 #include <sys/kauth.h>
106 #include <sys/atomic.h>
107 #include <sys/module.h>
108 #include <sys/buf.h>
109
110 #include <miscfs/genfs/genfs.h>
111 #include <miscfs/syncfs/syncfs.h>
112 #include <miscfs/specfs/specdev.h>
113
114 #include <nfs/rpcv2.h>
115 #include <nfs/nfsproto.h>
116 #include <nfs/nfs.h>
117 #include <nfs/nfs_var.h>
118
119 static int change_flags(struct vnode *, u_long, struct lwp *);
120 static int change_mode(struct vnode *, int, struct lwp *l);
121 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
122
123 /*
124 * This table is used to maintain compatibility with 4.3BSD
125 * and NetBSD 0.9 mount syscalls - and possibly other systems.
126 * Note, the order is important!
127 *
128 * Do not modify this table. It should only contain filesystems
129 * supported by NetBSD 0.9 and 4.3BSD.
130 */
131 const char * const mountcompatnames[] = {
132 NULL, /* 0 = MOUNT_NONE */
133 MOUNT_FFS, /* 1 = MOUNT_UFS */
134 MOUNT_NFS, /* 2 */
135 MOUNT_MFS, /* 3 */
136 MOUNT_MSDOS, /* 4 */
137 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
138 MOUNT_FDESC, /* 6 */
139 MOUNT_KERNFS, /* 7 */
140 NULL, /* 8 = MOUNT_DEVFS */
141 MOUNT_AFS, /* 9 */
142 };
143
144 const int nmountcompatnames = __arraycount(mountcompatnames);
145
146 static int
147 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags)
148 {
149 int error;
150
151 fp->f_flag = flags & FMASK;
152 fp->f_type = DTYPE_VNODE;
153 fp->f_ops = &vnops;
154 fp->f_data = vp;
155
156 if (flags & (O_EXLOCK | O_SHLOCK)) {
157 struct flock lf;
158 int type;
159
160 lf.l_whence = SEEK_SET;
161 lf.l_start = 0;
162 lf.l_len = 0;
163 if (flags & O_EXLOCK)
164 lf.l_type = F_WRLCK;
165 else
166 lf.l_type = F_RDLCK;
167 type = F_FLOCK;
168 if ((flags & FNONBLOCK) == 0)
169 type |= F_WAIT;
170 VOP_UNLOCK(vp);
171 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
172 if (error) {
173 (void) vn_close(vp, fp->f_flag, fp->f_cred);
174 fd_abort(l->l_proc, fp, indx);
175 return error;
176 }
177 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
178 atomic_or_uint(&fp->f_flag, FHASLOCK);
179 }
180 if (flags & O_CLOEXEC)
181 fd_set_exclose(l, indx, true);
182 return 0;
183 }
184
185 static int
186 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
187 void *data, size_t *data_len)
188 {
189 struct mount *mp;
190 int error = 0, saved_flags;
191
192 mp = vp->v_mount;
193 saved_flags = mp->mnt_flag;
194
195 /* We can operate only on VV_ROOT nodes. */
196 if ((vp->v_vflag & VV_ROOT) == 0) {
197 error = EINVAL;
198 goto out;
199 }
200
201 /*
202 * We only allow the filesystem to be reloaded if it
203 * is currently mounted read-only. Additionally, we
204 * prevent read-write to read-only downgrades.
205 */
206 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
207 (mp->mnt_flag & MNT_RDONLY) == 0 &&
208 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) {
209 error = EOPNOTSUPP; /* Needs translation */
210 goto out;
211 }
212
213 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
214 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
215 if (error)
216 goto out;
217
218 if (vfs_busy(mp, NULL)) {
219 error = EPERM;
220 goto out;
221 }
222
223 mutex_enter(&mp->mnt_updating);
224
225 mp->mnt_flag &= ~MNT_OP_FLAGS;
226 mp->mnt_flag |= flags & MNT_OP_FLAGS;
227
228 /*
229 * Set the mount level flags.
230 */
231 if (flags & MNT_RDONLY)
232 mp->mnt_flag |= MNT_RDONLY;
233 else if (mp->mnt_flag & MNT_RDONLY)
234 mp->mnt_iflag |= IMNT_WANTRDWR;
235 mp->mnt_flag &= ~MNT_BASIC_FLAGS;
236 mp->mnt_flag |= flags & MNT_BASIC_FLAGS;
237 error = VFS_MOUNT(mp, path, data, data_len);
238
239 if (error && data != NULL) {
240 int error2;
241
242 /*
243 * Update failed; let's try and see if it was an
244 * export request. For compat with 3.0 and earlier.
245 */
246 error2 = vfs_hooks_reexport(mp, path, data);
247
248 /*
249 * Only update error code if the export request was
250 * understood but some problem occurred while
251 * processing it.
252 */
253 if (error2 != EJUSTRETURN)
254 error = error2;
255 }
256
257 if (mp->mnt_iflag & IMNT_WANTRDWR)
258 mp->mnt_flag &= ~MNT_RDONLY;
259 if (error)
260 mp->mnt_flag = saved_flags;
261 mp->mnt_flag &= ~MNT_OP_FLAGS;
262 mp->mnt_iflag &= ~IMNT_WANTRDWR;
263 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
264 if (mp->mnt_syncer == NULL)
265 error = vfs_allocate_syncvnode(mp);
266 } else {
267 if (mp->mnt_syncer != NULL)
268 vfs_deallocate_syncvnode(mp);
269 }
270 mutex_exit(&mp->mnt_updating);
271 vfs_unbusy(mp, false, NULL);
272
273 if ((error == 0) && !(saved_flags & MNT_EXTATTR) &&
274 (flags & MNT_EXTATTR)) {
275 if (VFS_EXTATTRCTL(vp->v_mount, EXTATTR_CMD_START,
276 NULL, 0, NULL) != 0) {
277 printf("%s: failed to start extattr, error = %d",
278 vp->v_mount->mnt_stat.f_mntonname, error);
279 mp->mnt_flag &= ~MNT_EXTATTR;
280 }
281 }
282
283 if ((error == 0) && (saved_flags & MNT_EXTATTR) &&
284 !(flags & MNT_EXTATTR)) {
285 if (VFS_EXTATTRCTL(vp->v_mount, EXTATTR_CMD_STOP,
286 NULL, 0, NULL) != 0) {
287 printf("%s: failed to stop extattr, error = %d",
288 vp->v_mount->mnt_stat.f_mntonname, error);
289 mp->mnt_flag |= MNT_RDONLY;
290 }
291 }
292 out:
293 return (error);
294 }
295
296 static int
297 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
298 {
299 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
300 int error;
301
302 /* Copy file-system type from userspace. */
303 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
304 if (error) {
305 /*
306 * Historically, filesystem types were identified by numbers.
307 * If we get an integer for the filesystem type instead of a
308 * string, we check to see if it matches one of the historic
309 * filesystem types.
310 */
311 u_long fsindex = (u_long)fstype;
312 if (fsindex >= nmountcompatnames ||
313 mountcompatnames[fsindex] == NULL)
314 return ENODEV;
315 strlcpy(fstypename, mountcompatnames[fsindex],
316 sizeof(fstypename));
317 }
318
319 /* Accept `ufs' as an alias for `ffs', for compatibility. */
320 if (strcmp(fstypename, "ufs") == 0)
321 fstypename[0] = 'f';
322
323 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
324 return 0;
325
326 /* If we can autoload a vfs module, try again */
327 (void)module_autoload(fstypename, MODULE_CLASS_VFS);
328
329 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
330 return 0;
331
332 return ENODEV;
333 }
334
335 static int
336 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
337 void *data, size_t *data_len)
338 {
339 struct mount *mp;
340 int error;
341
342 /* If MNT_GETARGS is specified, it should be the only flag. */
343 if (flags & ~MNT_GETARGS)
344 return EINVAL;
345
346 mp = vp->v_mount;
347
348 /* XXX: probably some notion of "can see" here if we want isolation. */
349 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
350 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
351 if (error)
352 return error;
353
354 if ((vp->v_vflag & VV_ROOT) == 0)
355 return EINVAL;
356
357 if (vfs_busy(mp, NULL))
358 return EPERM;
359
360 mutex_enter(&mp->mnt_updating);
361 mp->mnt_flag &= ~MNT_OP_FLAGS;
362 mp->mnt_flag |= MNT_GETARGS;
363 error = VFS_MOUNT(mp, path, data, data_len);
364 mp->mnt_flag &= ~MNT_OP_FLAGS;
365 mutex_exit(&mp->mnt_updating);
366
367 vfs_unbusy(mp, false, NULL);
368 return (error);
369 }
370
371 int
372 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
373 {
374 /* {
375 syscallarg(const char *) type;
376 syscallarg(const char *) path;
377 syscallarg(int) flags;
378 syscallarg(void *) data;
379 syscallarg(size_t) data_len;
380 } */
381
382 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
383 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
384 SCARG(uap, data_len), retval);
385 }
386
387 int
388 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
389 const char *path, int flags, void *data, enum uio_seg data_seg,
390 size_t data_len, register_t *retval)
391 {
392 struct vnode *vp;
393 void *data_buf = data;
394 bool vfsopsrele = false;
395 int error;
396
397 /* XXX: The calling convention of this routine is totally bizarre */
398 if (vfsops)
399 vfsopsrele = true;
400
401 /*
402 * Get vnode to be covered
403 */
404 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
405 if (error != 0) {
406 vp = NULL;
407 goto done;
408 }
409
410 if (vfsops == NULL) {
411 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
412 vfsops = vp->v_mount->mnt_op;
413 } else {
414 /* 'type' is userspace */
415 error = mount_get_vfsops(type, &vfsops);
416 if (error != 0)
417 goto done;
418 vfsopsrele = true;
419 }
420 }
421
422 if (data != NULL && data_seg == UIO_USERSPACE) {
423 if (data_len == 0) {
424 /* No length supplied, use default for filesystem */
425 data_len = vfsops->vfs_min_mount_data;
426 if (data_len > VFS_MAX_MOUNT_DATA) {
427 error = EINVAL;
428 goto done;
429 }
430 /*
431 * Hopefully a longer buffer won't make copyin() fail.
432 * For compatibility with 3.0 and earlier.
433 */
434 if (flags & MNT_UPDATE
435 && data_len < sizeof (struct mnt_export_args30))
436 data_len = sizeof (struct mnt_export_args30);
437 }
438 data_buf = kmem_alloc(data_len, KM_SLEEP);
439
440 /* NFS needs the buffer even for mnt_getargs .... */
441 error = copyin(data, data_buf, data_len);
442 if (error != 0)
443 goto done;
444 }
445
446 if (flags & MNT_GETARGS) {
447 if (data_len == 0) {
448 error = EINVAL;
449 goto done;
450 }
451 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
452 if (error != 0)
453 goto done;
454 if (data_seg == UIO_USERSPACE)
455 error = copyout(data_buf, data, data_len);
456 *retval = data_len;
457 } else if (flags & MNT_UPDATE) {
458 error = mount_update(l, vp, path, flags, data_buf, &data_len);
459 } else {
460 /* Locking is handled internally in mount_domount(). */
461 KASSERT(vfsopsrele == true);
462 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
463 &data_len);
464 vfsopsrele = false;
465
466 if ((error == 0) && (flags & MNT_EXTATTR)) {
467 if (VFS_EXTATTRCTL(vp->v_mount, EXTATTR_CMD_START,
468 NULL, 0, NULL) != 0)
469 printf("%s: failed to start extattr",
470 vp->v_mount->mnt_stat.f_mntonname);
471 /* XXX remove flag */
472 }
473 }
474
475 done:
476 if (vfsopsrele)
477 vfs_delref(vfsops);
478 if (vp != NULL) {
479 vrele(vp);
480 }
481 if (data_buf != data)
482 kmem_free(data_buf, data_len);
483 return (error);
484 }
485
486 /*
487 * Unmount a file system.
488 *
489 * Note: unmount takes a path to the vnode mounted on as argument,
490 * not special file (as before).
491 */
492 /* ARGSUSED */
493 int
494 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
495 {
496 /* {
497 syscallarg(const char *) path;
498 syscallarg(int) flags;
499 } */
500 struct vnode *vp;
501 struct mount *mp;
502 int error;
503 struct pathbuf *pb;
504 struct nameidata nd;
505
506 error = pathbuf_copyin(SCARG(uap, path), &pb);
507 if (error) {
508 return error;
509 }
510
511 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
512 if ((error = namei(&nd)) != 0) {
513 pathbuf_destroy(pb);
514 return error;
515 }
516 vp = nd.ni_vp;
517 pathbuf_destroy(pb);
518
519 mp = vp->v_mount;
520 atomic_inc_uint(&mp->mnt_refcnt);
521 VOP_UNLOCK(vp);
522
523 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
524 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
525 if (error) {
526 vrele(vp);
527 vfs_destroy(mp);
528 return (error);
529 }
530
531 /*
532 * Don't allow unmounting the root file system.
533 */
534 if (mp->mnt_flag & MNT_ROOTFS) {
535 vrele(vp);
536 vfs_destroy(mp);
537 return (EINVAL);
538 }
539
540 /*
541 * Must be the root of the filesystem
542 */
543 if ((vp->v_vflag & VV_ROOT) == 0) {
544 vrele(vp);
545 vfs_destroy(mp);
546 return (EINVAL);
547 }
548
549 vrele(vp);
550 error = dounmount(mp, SCARG(uap, flags), l);
551 vfs_destroy(mp);
552 return error;
553 }
554
555 /*
556 * Sync each mounted filesystem.
557 */
558 #ifdef DEBUG
559 int syncprt = 0;
560 struct ctldebug debug0 = { "syncprt", &syncprt };
561 #endif
562
563 void
564 do_sys_sync(struct lwp *l)
565 {
566 struct mount *mp, *nmp;
567 int asyncflag;
568
569 mutex_enter(&mountlist_lock);
570 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
571 mp = nmp) {
572 if (vfs_busy(mp, &nmp)) {
573 continue;
574 }
575 mutex_enter(&mp->mnt_updating);
576 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
577 asyncflag = mp->mnt_flag & MNT_ASYNC;
578 mp->mnt_flag &= ~MNT_ASYNC;
579 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
580 if (asyncflag)
581 mp->mnt_flag |= MNT_ASYNC;
582 }
583 mutex_exit(&mp->mnt_updating);
584 vfs_unbusy(mp, false, &nmp);
585 }
586 mutex_exit(&mountlist_lock);
587 #ifdef DEBUG
588 if (syncprt)
589 vfs_bufstats();
590 #endif /* DEBUG */
591 }
592
593 /* ARGSUSED */
594 int
595 sys_sync(struct lwp *l, const void *v, register_t *retval)
596 {
597 do_sys_sync(l);
598 return (0);
599 }
600
601
602 /*
603 * Access or change filesystem quotas.
604 *
605 * (this is really 14 different calls bundled into one)
606 */
607
608 static int
609 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u)
610 {
611 struct quotastat info_k;
612 int error;
613
614 /* ensure any padding bytes are cleared */
615 memset(&info_k, 0, sizeof(info_k));
616
617 error = vfs_quotactl_stat(mp, &info_k);
618 if (error) {
619 return error;
620 }
621
622 return copyout(&info_k, info_u, sizeof(info_k));
623 }
624
625 static int
626 do_sys_quotactl_idtypestat(struct mount *mp, int idtype,
627 struct quotaidtypestat *info_u)
628 {
629 struct quotaidtypestat info_k;
630 int error;
631
632 /* ensure any padding bytes are cleared */
633 memset(&info_k, 0, sizeof(info_k));
634
635 error = vfs_quotactl_idtypestat(mp, idtype, &info_k);
636 if (error) {
637 return error;
638 }
639
640 return copyout(&info_k, info_u, sizeof(info_k));
641 }
642
643 static int
644 do_sys_quotactl_objtypestat(struct mount *mp, int objtype,
645 struct quotaobjtypestat *info_u)
646 {
647 struct quotaobjtypestat info_k;
648 int error;
649
650 /* ensure any padding bytes are cleared */
651 memset(&info_k, 0, sizeof(info_k));
652
653 error = vfs_quotactl_objtypestat(mp, objtype, &info_k);
654 if (error) {
655 return error;
656 }
657
658 return copyout(&info_k, info_u, sizeof(info_k));
659 }
660
661 static int
662 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u,
663 struct quotaval *val_u)
664 {
665 struct quotakey key_k;
666 struct quotaval val_k;
667 int error;
668
669 /* ensure any padding bytes are cleared */
670 memset(&val_k, 0, sizeof(val_k));
671
672 error = copyin(key_u, &key_k, sizeof(key_k));
673 if (error) {
674 return error;
675 }
676
677 error = vfs_quotactl_get(mp, &key_k, &val_k);
678 if (error) {
679 return error;
680 }
681
682 return copyout(&val_k, val_u, sizeof(val_k));
683 }
684
685 static int
686 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u,
687 const struct quotaval *val_u)
688 {
689 struct quotakey key_k;
690 struct quotaval val_k;
691 int error;
692
693 error = copyin(key_u, &key_k, sizeof(key_k));
694 if (error) {
695 return error;
696 }
697
698 error = copyin(val_u, &val_k, sizeof(val_k));
699 if (error) {
700 return error;
701 }
702
703 return vfs_quotactl_put(mp, &key_k, &val_k);
704 }
705
706 static int
707 do_sys_quotactl_delete(struct mount *mp, const struct quotakey *key_u)
708 {
709 struct quotakey key_k;
710 int error;
711
712 error = copyin(key_u, &key_k, sizeof(key_k));
713 if (error) {
714 return error;
715 }
716
717 return vfs_quotactl_delete(mp, &key_k);
718 }
719
720 static int
721 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u)
722 {
723 struct quotakcursor cursor_k;
724 int error;
725
726 /* ensure any padding bytes are cleared */
727 memset(&cursor_k, 0, sizeof(cursor_k));
728
729 error = vfs_quotactl_cursoropen(mp, &cursor_k);
730 if (error) {
731 return error;
732 }
733
734 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
735 }
736
737 static int
738 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u)
739 {
740 struct quotakcursor cursor_k;
741 int error;
742
743 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
744 if (error) {
745 return error;
746 }
747
748 return vfs_quotactl_cursorclose(mp, &cursor_k);
749 }
750
751 static int
752 do_sys_quotactl_cursorskipidtype(struct mount *mp,
753 struct quotakcursor *cursor_u, int idtype)
754 {
755 struct quotakcursor cursor_k;
756 int error;
757
758 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
759 if (error) {
760 return error;
761 }
762
763 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype);
764 if (error) {
765 return error;
766 }
767
768 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
769 }
770
771 static int
772 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u,
773 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum,
774 int *ret_u)
775 {
776 #define CGET_STACK_MAX 8
777 struct quotakcursor cursor_k;
778 struct quotakey stackkeys[CGET_STACK_MAX];
779 struct quotaval stackvals[CGET_STACK_MAX];
780 struct quotakey *keys_k;
781 struct quotaval *vals_k;
782 int ret_k;
783 int error;
784
785 if (maxnum > 128) {
786 maxnum = 128;
787 }
788
789 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
790 if (error) {
791 return error;
792 }
793
794 if (maxnum <= CGET_STACK_MAX) {
795 keys_k = stackkeys;
796 vals_k = stackvals;
797 /* ensure any padding bytes are cleared */
798 memset(keys_k, 0, maxnum * sizeof(keys_k[0]));
799 memset(vals_k, 0, maxnum * sizeof(vals_k[0]));
800 } else {
801 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP);
802 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP);
803 }
804
805 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum,
806 &ret_k);
807 if (error) {
808 goto fail;
809 }
810
811 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0]));
812 if (error) {
813 goto fail;
814 }
815
816 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0]));
817 if (error) {
818 goto fail;
819 }
820
821 error = copyout(&ret_k, ret_u, sizeof(ret_k));
822 if (error) {
823 goto fail;
824 }
825
826 /* do last to maximize the chance of being able to recover a failure */
827 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k));
828
829 fail:
830 if (keys_k != stackkeys) {
831 kmem_free(keys_k, maxnum * sizeof(keys_k[0]));
832 }
833 if (vals_k != stackvals) {
834 kmem_free(vals_k, maxnum * sizeof(vals_k[0]));
835 }
836 return error;
837 }
838
839 static int
840 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u,
841 int *ret_u)
842 {
843 struct quotakcursor cursor_k;
844 int ret_k;
845 int error;
846
847 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
848 if (error) {
849 return error;
850 }
851
852 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k);
853 if (error) {
854 return error;
855 }
856
857 error = copyout(&ret_k, ret_u, sizeof(ret_k));
858 if (error) {
859 return error;
860 }
861
862 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
863 }
864
865 static int
866 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u)
867 {
868 struct quotakcursor cursor_k;
869 int error;
870
871 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
872 if (error) {
873 return error;
874 }
875
876 error = vfs_quotactl_cursorrewind(mp, &cursor_k);
877 if (error) {
878 return error;
879 }
880
881 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
882 }
883
884 static int
885 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u)
886 {
887 char *path_k;
888 int error;
889
890 /* XXX this should probably be a struct pathbuf */
891 path_k = PNBUF_GET();
892 error = copyin(path_u, path_k, PATH_MAX);
893 if (error) {
894 PNBUF_PUT(path_k);
895 return error;
896 }
897
898 error = vfs_quotactl_quotaon(mp, idtype, path_k);
899
900 PNBUF_PUT(path_k);
901 return error;
902 }
903
904 static int
905 do_sys_quotactl_quotaoff(struct mount *mp, int idtype)
906 {
907 return vfs_quotactl_quotaoff(mp, idtype);
908 }
909
910 /* ARGSUSED */
911 int
912 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap,
913 register_t *retval)
914 {
915 /* {
916 syscallarg(const char *) path;
917 syscallarg(struct quotactl_args *) args;
918 } */
919 struct mount *mp;
920 struct vnode *vp;
921 struct quotactl_args args;
922 int error;
923
924 error = namei_simple_user(SCARG(uap, path),
925 NSM_FOLLOW_TRYEMULROOT, &vp);
926 if (error != 0)
927 return (error);
928 mp = vp->v_mount;
929
930 error = copyin(SCARG(uap, args), &args, sizeof(args));
931 if (error) {
932 goto fail;
933 }
934
935 switch (args.qc_op) {
936 case QUOTACTL_STAT:
937 error = do_sys_quotactl_stat(mp, args.u.stat.qc_ret);
938 break;
939 case QUOTACTL_IDTYPESTAT:
940 error = do_sys_quotactl_idtypestat(mp,
941 args.u.idtypestat.qc_idtype,
942 args.u.idtypestat.qc_info);
943 break;
944 case QUOTACTL_OBJTYPESTAT:
945 error = do_sys_quotactl_objtypestat(mp,
946 args.u.objtypestat.qc_objtype,
947 args.u.objtypestat.qc_info);
948 break;
949 case QUOTACTL_GET:
950 error = do_sys_quotactl_get(mp,
951 args.u.get.qc_key,
952 args.u.get.qc_ret);
953 break;
954 case QUOTACTL_PUT:
955 error = do_sys_quotactl_put(mp,
956 args.u.put.qc_key,
957 args.u.put.qc_val);
958 break;
959 case QUOTACTL_DELETE:
960 error = do_sys_quotactl_delete(mp, args.u.delete.qc_key);
961 break;
962 case QUOTACTL_CURSOROPEN:
963 error = do_sys_quotactl_cursoropen(mp,
964 args.u.cursoropen.qc_cursor);
965 break;
966 case QUOTACTL_CURSORCLOSE:
967 error = do_sys_quotactl_cursorclose(mp,
968 args.u.cursorclose.qc_cursor);
969 break;
970 case QUOTACTL_CURSORSKIPIDTYPE:
971 error = do_sys_quotactl_cursorskipidtype(mp,
972 args.u.cursorskipidtype.qc_cursor,
973 args.u.cursorskipidtype.qc_idtype);
974 break;
975 case QUOTACTL_CURSORGET:
976 error = do_sys_quotactl_cursorget(mp,
977 args.u.cursorget.qc_cursor,
978 args.u.cursorget.qc_keys,
979 args.u.cursorget.qc_vals,
980 args.u.cursorget.qc_maxnum,
981 args.u.cursorget.qc_ret);
982 break;
983 case QUOTACTL_CURSORATEND:
984 error = do_sys_quotactl_cursoratend(mp,
985 args.u.cursoratend.qc_cursor,
986 args.u.cursoratend.qc_ret);
987 break;
988 case QUOTACTL_CURSORREWIND:
989 error = do_sys_quotactl_cursorrewind(mp,
990 args.u.cursorrewind.qc_cursor);
991 break;
992 case QUOTACTL_QUOTAON:
993 error = do_sys_quotactl_quotaon(mp,
994 args.u.quotaon.qc_idtype,
995 args.u.quotaon.qc_quotafile);
996 break;
997 case QUOTACTL_QUOTAOFF:
998 error = do_sys_quotactl_quotaoff(mp,
999 args.u.quotaoff.qc_idtype);
1000 break;
1001 default:
1002 error = EINVAL;
1003 break;
1004 }
1005
1006 fail:
1007 vrele(vp);
1008 return error;
1009 }
1010
1011 int
1012 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
1013 int root)
1014 {
1015 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1016 int error = 0;
1017
1018 /*
1019 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1020 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
1021 * overrides MNT_NOWAIT.
1022 */
1023 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
1024 (flags != MNT_WAIT && flags != 0)) {
1025 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
1026 goto done;
1027 }
1028
1029 /* Get the filesystem stats now */
1030 memset(sp, 0, sizeof(*sp));
1031 if ((error = VFS_STATVFS(mp, sp)) != 0) {
1032 return error;
1033 }
1034
1035 if (cwdi->cwdi_rdir == NULL)
1036 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
1037 done:
1038 if (cwdi->cwdi_rdir != NULL) {
1039 size_t len;
1040 char *bp;
1041 char c;
1042 char *path = PNBUF_GET();
1043
1044 bp = path + MAXPATHLEN;
1045 *--bp = '\0';
1046 rw_enter(&cwdi->cwdi_lock, RW_READER);
1047 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
1048 MAXPATHLEN / 2, 0, l);
1049 rw_exit(&cwdi->cwdi_lock);
1050 if (error) {
1051 PNBUF_PUT(path);
1052 return error;
1053 }
1054 len = strlen(bp);
1055 if (len != 1) {
1056 /*
1057 * for mount points that are below our root, we can see
1058 * them, so we fix up the pathname and return them. The
1059 * rest we cannot see, so we don't allow viewing the
1060 * data.
1061 */
1062 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
1063 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
1064 (void)strlcpy(sp->f_mntonname,
1065 c == '\0' ? "/" : &sp->f_mntonname[len],
1066 sizeof(sp->f_mntonname));
1067 } else {
1068 if (root)
1069 (void)strlcpy(sp->f_mntonname, "/",
1070 sizeof(sp->f_mntonname));
1071 else
1072 error = EPERM;
1073 }
1074 }
1075 PNBUF_PUT(path);
1076 }
1077 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
1078 return error;
1079 }
1080
1081 /*
1082 * Get filesystem statistics by path.
1083 */
1084 int
1085 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
1086 {
1087 struct mount *mp;
1088 int error;
1089 struct vnode *vp;
1090
1091 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
1092 if (error != 0)
1093 return error;
1094 mp = vp->v_mount;
1095 error = dostatvfs(mp, sb, l, flags, 1);
1096 vrele(vp);
1097 return error;
1098 }
1099
1100 /* ARGSUSED */
1101 int
1102 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
1103 {
1104 /* {
1105 syscallarg(const char *) path;
1106 syscallarg(struct statvfs *) buf;
1107 syscallarg(int) flags;
1108 } */
1109 struct statvfs *sb;
1110 int error;
1111
1112 sb = STATVFSBUF_GET();
1113 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
1114 if (error == 0)
1115 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1116 STATVFSBUF_PUT(sb);
1117 return error;
1118 }
1119
1120 /*
1121 * Get filesystem statistics by fd.
1122 */
1123 int
1124 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
1125 {
1126 file_t *fp;
1127 struct mount *mp;
1128 int error;
1129
1130 /* fd_getvnode() will use the descriptor for us */
1131 if ((error = fd_getvnode(fd, &fp)) != 0)
1132 return (error);
1133 mp = ((struct vnode *)fp->f_data)->v_mount;
1134 error = dostatvfs(mp, sb, curlwp, flags, 1);
1135 fd_putfile(fd);
1136 return error;
1137 }
1138
1139 /* ARGSUSED */
1140 int
1141 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
1142 {
1143 /* {
1144 syscallarg(int) fd;
1145 syscallarg(struct statvfs *) buf;
1146 syscallarg(int) flags;
1147 } */
1148 struct statvfs *sb;
1149 int error;
1150
1151 sb = STATVFSBUF_GET();
1152 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1153 if (error == 0)
1154 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1155 STATVFSBUF_PUT(sb);
1156 return error;
1157 }
1158
1159
1160 /*
1161 * Get statistics on all filesystems.
1162 */
1163 int
1164 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1165 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1166 register_t *retval)
1167 {
1168 int root = 0;
1169 struct proc *p = l->l_proc;
1170 struct mount *mp, *nmp;
1171 struct statvfs *sb;
1172 size_t count, maxcount;
1173 int error = 0;
1174
1175 sb = STATVFSBUF_GET();
1176 maxcount = bufsize / entry_sz;
1177 mutex_enter(&mountlist_lock);
1178 count = 0;
1179 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1180 mp = nmp) {
1181 if (vfs_busy(mp, &nmp)) {
1182 continue;
1183 }
1184 if (sfsp && count < maxcount) {
1185 error = dostatvfs(mp, sb, l, flags, 0);
1186 if (error) {
1187 vfs_unbusy(mp, false, &nmp);
1188 error = 0;
1189 continue;
1190 }
1191 error = copyfn(sb, sfsp, entry_sz);
1192 if (error) {
1193 vfs_unbusy(mp, false, NULL);
1194 goto out;
1195 }
1196 sfsp = (char *)sfsp + entry_sz;
1197 root |= strcmp(sb->f_mntonname, "/") == 0;
1198 }
1199 count++;
1200 vfs_unbusy(mp, false, &nmp);
1201 }
1202 mutex_exit(&mountlist_lock);
1203
1204 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1205 /*
1206 * fake a root entry
1207 */
1208 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1209 sb, l, flags, 1);
1210 if (error != 0)
1211 goto out;
1212 if (sfsp) {
1213 error = copyfn(sb, sfsp, entry_sz);
1214 if (error != 0)
1215 goto out;
1216 }
1217 count++;
1218 }
1219 if (sfsp && count > maxcount)
1220 *retval = maxcount;
1221 else
1222 *retval = count;
1223 out:
1224 STATVFSBUF_PUT(sb);
1225 return error;
1226 }
1227
1228 int
1229 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1230 {
1231 /* {
1232 syscallarg(struct statvfs *) buf;
1233 syscallarg(size_t) bufsize;
1234 syscallarg(int) flags;
1235 } */
1236
1237 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1238 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1239 }
1240
1241 /*
1242 * Change current working directory to a given file descriptor.
1243 */
1244 /* ARGSUSED */
1245 int
1246 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1247 {
1248 /* {
1249 syscallarg(int) fd;
1250 } */
1251 struct proc *p = l->l_proc;
1252 struct cwdinfo *cwdi;
1253 struct vnode *vp, *tdp;
1254 struct mount *mp;
1255 file_t *fp;
1256 int error, fd;
1257
1258 /* fd_getvnode() will use the descriptor for us */
1259 fd = SCARG(uap, fd);
1260 if ((error = fd_getvnode(fd, &fp)) != 0)
1261 return (error);
1262 vp = fp->f_data;
1263
1264 vref(vp);
1265 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1266 if (vp->v_type != VDIR)
1267 error = ENOTDIR;
1268 else
1269 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1270 if (error) {
1271 vput(vp);
1272 goto out;
1273 }
1274 while ((mp = vp->v_mountedhere) != NULL) {
1275 error = vfs_busy(mp, NULL);
1276 vput(vp);
1277 if (error != 0)
1278 goto out;
1279 error = VFS_ROOT(mp, &tdp);
1280 vfs_unbusy(mp, false, NULL);
1281 if (error)
1282 goto out;
1283 vp = tdp;
1284 }
1285 VOP_UNLOCK(vp);
1286
1287 /*
1288 * Disallow changing to a directory not under the process's
1289 * current root directory (if there is one).
1290 */
1291 cwdi = p->p_cwdi;
1292 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1293 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1294 vrele(vp);
1295 error = EPERM; /* operation not permitted */
1296 } else {
1297 vrele(cwdi->cwdi_cdir);
1298 cwdi->cwdi_cdir = vp;
1299 }
1300 rw_exit(&cwdi->cwdi_lock);
1301
1302 out:
1303 fd_putfile(fd);
1304 return (error);
1305 }
1306
1307 /*
1308 * Change this process's notion of the root directory to a given file
1309 * descriptor.
1310 */
1311 int
1312 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1313 {
1314 struct proc *p = l->l_proc;
1315 struct vnode *vp;
1316 file_t *fp;
1317 int error, fd = SCARG(uap, fd);
1318
1319 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1320 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1321 return error;
1322 /* fd_getvnode() will use the descriptor for us */
1323 if ((error = fd_getvnode(fd, &fp)) != 0)
1324 return error;
1325 vp = fp->f_data;
1326 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1327 if (vp->v_type != VDIR)
1328 error = ENOTDIR;
1329 else
1330 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1331 VOP_UNLOCK(vp);
1332 if (error)
1333 goto out;
1334 vref(vp);
1335
1336 change_root(p->p_cwdi, vp, l);
1337
1338 out:
1339 fd_putfile(fd);
1340 return (error);
1341 }
1342
1343 /*
1344 * Change current working directory (``.'').
1345 */
1346 /* ARGSUSED */
1347 int
1348 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1349 {
1350 /* {
1351 syscallarg(const char *) path;
1352 } */
1353 struct proc *p = l->l_proc;
1354 struct cwdinfo *cwdi;
1355 int error;
1356 struct vnode *vp;
1357
1358 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1359 &vp, l)) != 0)
1360 return (error);
1361 cwdi = p->p_cwdi;
1362 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1363 vrele(cwdi->cwdi_cdir);
1364 cwdi->cwdi_cdir = vp;
1365 rw_exit(&cwdi->cwdi_lock);
1366 return (0);
1367 }
1368
1369 /*
1370 * Change notion of root (``/'') directory.
1371 */
1372 /* ARGSUSED */
1373 int
1374 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1375 {
1376 /* {
1377 syscallarg(const char *) path;
1378 } */
1379 struct proc *p = l->l_proc;
1380 int error;
1381 struct vnode *vp;
1382
1383 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1384 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1385 return (error);
1386 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1387 &vp, l)) != 0)
1388 return (error);
1389
1390 change_root(p->p_cwdi, vp, l);
1391
1392 return (0);
1393 }
1394
1395 /*
1396 * Common routine for chroot and fchroot.
1397 * NB: callers need to properly authorize the change root operation.
1398 */
1399 void
1400 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l)
1401 {
1402
1403 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1404 if (cwdi->cwdi_rdir != NULL)
1405 vrele(cwdi->cwdi_rdir);
1406 cwdi->cwdi_rdir = vp;
1407
1408 /*
1409 * Prevent escaping from chroot by putting the root under
1410 * the working directory. Silently chdir to / if we aren't
1411 * already there.
1412 */
1413 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1414 /*
1415 * XXX would be more failsafe to change directory to a
1416 * deadfs node here instead
1417 */
1418 vrele(cwdi->cwdi_cdir);
1419 vref(vp);
1420 cwdi->cwdi_cdir = vp;
1421 }
1422 rw_exit(&cwdi->cwdi_lock);
1423 }
1424
1425 /*
1426 * Common routine for chroot and chdir.
1427 * XXX "where" should be enum uio_seg
1428 */
1429 int
1430 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l)
1431 {
1432 struct pathbuf *pb;
1433 struct nameidata nd;
1434 int error;
1435
1436 error = pathbuf_maybe_copyin(path, where, &pb);
1437 if (error) {
1438 return error;
1439 }
1440 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1441 if ((error = namei(&nd)) != 0) {
1442 pathbuf_destroy(pb);
1443 return error;
1444 }
1445 *vpp = nd.ni_vp;
1446 pathbuf_destroy(pb);
1447
1448 if ((*vpp)->v_type != VDIR)
1449 error = ENOTDIR;
1450 else
1451 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred);
1452
1453 if (error)
1454 vput(*vpp);
1455 else
1456 VOP_UNLOCK(*vpp);
1457 return (error);
1458 }
1459
1460 /*
1461 * Check permissions, allocate an open file structure,
1462 * and call the device open routine if any.
1463 */
1464 int
1465 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1466 {
1467 /* {
1468 syscallarg(const char *) path;
1469 syscallarg(int) flags;
1470 syscallarg(int) mode;
1471 } */
1472 struct proc *p = l->l_proc;
1473 struct cwdinfo *cwdi = p->p_cwdi;
1474 file_t *fp;
1475 struct vnode *vp;
1476 int flags, cmode;
1477 int indx, error;
1478 struct pathbuf *pb;
1479 struct nameidata nd;
1480
1481 flags = FFLAGS(SCARG(uap, flags));
1482 if ((flags & (FREAD | FWRITE)) == 0)
1483 return (EINVAL);
1484
1485 error = pathbuf_copyin(SCARG(uap, path), &pb);
1486 if (error) {
1487 return error;
1488 }
1489
1490 if ((error = fd_allocfile(&fp, &indx)) != 0) {
1491 pathbuf_destroy(pb);
1492 return error;
1493 }
1494 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1495 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1496 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb);
1497 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1498 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1499 fd_abort(p, fp, indx);
1500 if ((error == EDUPFD || error == EMOVEFD) &&
1501 l->l_dupfd >= 0 && /* XXX from fdopen */
1502 (error =
1503 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1504 *retval = indx;
1505 pathbuf_destroy(pb);
1506 return (0);
1507 }
1508 if (error == ERESTART)
1509 error = EINTR;
1510 pathbuf_destroy(pb);
1511 return (error);
1512 }
1513
1514 l->l_dupfd = 0;
1515 vp = nd.ni_vp;
1516 pathbuf_destroy(pb);
1517
1518 if ((error = open_setfp(l, fp, vp, indx, flags)))
1519 return error;
1520
1521 VOP_UNLOCK(vp);
1522 *retval = indx;
1523 fd_affix(p, fp, indx);
1524 return (0);
1525 }
1526
1527 int
1528 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval)
1529 {
1530 /* {
1531 syscallarg(int) fd;
1532 syscallarg(const char *) path;
1533 syscallarg(int) flags;
1534 syscallarg(int) mode;
1535 } */
1536
1537 return ENOSYS;
1538 }
1539
1540 static void
1541 vfs__fhfree(fhandle_t *fhp)
1542 {
1543 size_t fhsize;
1544
1545 if (fhp == NULL) {
1546 return;
1547 }
1548 fhsize = FHANDLE_SIZE(fhp);
1549 kmem_free(fhp, fhsize);
1550 }
1551
1552 /*
1553 * vfs_composefh: compose a filehandle.
1554 */
1555
1556 int
1557 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1558 {
1559 struct mount *mp;
1560 struct fid *fidp;
1561 int error;
1562 size_t needfhsize;
1563 size_t fidsize;
1564
1565 mp = vp->v_mount;
1566 fidp = NULL;
1567 if (*fh_size < FHANDLE_SIZE_MIN) {
1568 fidsize = 0;
1569 } else {
1570 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1571 if (fhp != NULL) {
1572 memset(fhp, 0, *fh_size);
1573 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1574 fidp = &fhp->fh_fid;
1575 }
1576 }
1577 error = VFS_VPTOFH(vp, fidp, &fidsize);
1578 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1579 if (error == 0 && *fh_size < needfhsize) {
1580 error = E2BIG;
1581 }
1582 *fh_size = needfhsize;
1583 return error;
1584 }
1585
1586 int
1587 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1588 {
1589 struct mount *mp;
1590 fhandle_t *fhp;
1591 size_t fhsize;
1592 size_t fidsize;
1593 int error;
1594
1595 *fhpp = NULL;
1596 mp = vp->v_mount;
1597 fidsize = 0;
1598 error = VFS_VPTOFH(vp, NULL, &fidsize);
1599 KASSERT(error != 0);
1600 if (error != E2BIG) {
1601 goto out;
1602 }
1603 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1604 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1605 if (fhp == NULL) {
1606 error = ENOMEM;
1607 goto out;
1608 }
1609 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1610 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1611 if (error == 0) {
1612 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1613 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1614 *fhpp = fhp;
1615 } else {
1616 kmem_free(fhp, fhsize);
1617 }
1618 out:
1619 return error;
1620 }
1621
1622 void
1623 vfs_composefh_free(fhandle_t *fhp)
1624 {
1625
1626 vfs__fhfree(fhp);
1627 }
1628
1629 /*
1630 * vfs_fhtovp: lookup a vnode by a filehandle.
1631 */
1632
1633 int
1634 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1635 {
1636 struct mount *mp;
1637 int error;
1638
1639 *vpp = NULL;
1640 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1641 if (mp == NULL) {
1642 error = ESTALE;
1643 goto out;
1644 }
1645 if (mp->mnt_op->vfs_fhtovp == NULL) {
1646 error = EOPNOTSUPP;
1647 goto out;
1648 }
1649 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1650 out:
1651 return error;
1652 }
1653
1654 /*
1655 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1656 * the needed size.
1657 */
1658
1659 int
1660 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1661 {
1662 fhandle_t *fhp;
1663 int error;
1664
1665 *fhpp = NULL;
1666 if (fhsize > FHANDLE_SIZE_MAX) {
1667 return EINVAL;
1668 }
1669 if (fhsize < FHANDLE_SIZE_MIN) {
1670 return EINVAL;
1671 }
1672 again:
1673 fhp = kmem_alloc(fhsize, KM_SLEEP);
1674 if (fhp == NULL) {
1675 return ENOMEM;
1676 }
1677 error = copyin(ufhp, fhp, fhsize);
1678 if (error == 0) {
1679 /* XXX this check shouldn't be here */
1680 if (FHANDLE_SIZE(fhp) == fhsize) {
1681 *fhpp = fhp;
1682 return 0;
1683 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1684 /*
1685 * a kludge for nfsv2 padded handles.
1686 */
1687 size_t sz;
1688
1689 sz = FHANDLE_SIZE(fhp);
1690 kmem_free(fhp, fhsize);
1691 fhsize = sz;
1692 goto again;
1693 } else {
1694 /*
1695 * userland told us wrong size.
1696 */
1697 error = EINVAL;
1698 }
1699 }
1700 kmem_free(fhp, fhsize);
1701 return error;
1702 }
1703
1704 void
1705 vfs_copyinfh_free(fhandle_t *fhp)
1706 {
1707
1708 vfs__fhfree(fhp);
1709 }
1710
1711 /*
1712 * Get file handle system call
1713 */
1714 int
1715 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1716 {
1717 /* {
1718 syscallarg(char *) fname;
1719 syscallarg(fhandle_t *) fhp;
1720 syscallarg(size_t *) fh_size;
1721 } */
1722 struct vnode *vp;
1723 fhandle_t *fh;
1724 int error;
1725 struct pathbuf *pb;
1726 struct nameidata nd;
1727 size_t sz;
1728 size_t usz;
1729
1730 /*
1731 * Must be super user
1732 */
1733 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1734 0, NULL, NULL, NULL);
1735 if (error)
1736 return (error);
1737
1738 error = pathbuf_copyin(SCARG(uap, fname), &pb);
1739 if (error) {
1740 return error;
1741 }
1742 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1743 error = namei(&nd);
1744 if (error) {
1745 pathbuf_destroy(pb);
1746 return error;
1747 }
1748 vp = nd.ni_vp;
1749 pathbuf_destroy(pb);
1750
1751 error = vfs_composefh_alloc(vp, &fh);
1752 vput(vp);
1753 if (error != 0) {
1754 goto out;
1755 }
1756 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1757 if (error != 0) {
1758 goto out;
1759 }
1760 sz = FHANDLE_SIZE(fh);
1761 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1762 if (error != 0) {
1763 goto out;
1764 }
1765 if (usz >= sz) {
1766 error = copyout(fh, SCARG(uap, fhp), sz);
1767 } else {
1768 error = E2BIG;
1769 }
1770 out:
1771 vfs_composefh_free(fh);
1772 return (error);
1773 }
1774
1775 /*
1776 * Open a file given a file handle.
1777 *
1778 * Check permissions, allocate an open file structure,
1779 * and call the device open routine if any.
1780 */
1781
1782 int
1783 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1784 register_t *retval)
1785 {
1786 file_t *fp;
1787 struct vnode *vp = NULL;
1788 kauth_cred_t cred = l->l_cred;
1789 file_t *nfp;
1790 int indx, error = 0;
1791 struct vattr va;
1792 fhandle_t *fh;
1793 int flags;
1794 proc_t *p;
1795
1796 p = curproc;
1797
1798 /*
1799 * Must be super user
1800 */
1801 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1802 0, NULL, NULL, NULL)))
1803 return (error);
1804
1805 flags = FFLAGS(oflags);
1806 if ((flags & (FREAD | FWRITE)) == 0)
1807 return (EINVAL);
1808 if ((flags & O_CREAT))
1809 return (EINVAL);
1810 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1811 return (error);
1812 fp = nfp;
1813 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1814 if (error != 0) {
1815 goto bad;
1816 }
1817 error = vfs_fhtovp(fh, &vp);
1818 if (error != 0) {
1819 goto bad;
1820 }
1821
1822 /* Now do an effective vn_open */
1823
1824 if (vp->v_type == VSOCK) {
1825 error = EOPNOTSUPP;
1826 goto bad;
1827 }
1828 error = vn_openchk(vp, cred, flags);
1829 if (error != 0)
1830 goto bad;
1831 if (flags & O_TRUNC) {
1832 VOP_UNLOCK(vp); /* XXX */
1833 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1834 vattr_null(&va);
1835 va.va_size = 0;
1836 error = VOP_SETATTR(vp, &va, cred);
1837 if (error)
1838 goto bad;
1839 }
1840 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
1841 goto bad;
1842 if (flags & FWRITE) {
1843 mutex_enter(vp->v_interlock);
1844 vp->v_writecount++;
1845 mutex_exit(vp->v_interlock);
1846 }
1847
1848 /* done with modified vn_open, now finish what sys_open does. */
1849 if ((error = open_setfp(l, fp, vp, indx, flags)))
1850 return error;
1851
1852 VOP_UNLOCK(vp);
1853 *retval = indx;
1854 fd_affix(p, fp, indx);
1855 vfs_copyinfh_free(fh);
1856 return (0);
1857
1858 bad:
1859 fd_abort(p, fp, indx);
1860 if (vp != NULL)
1861 vput(vp);
1862 vfs_copyinfh_free(fh);
1863 return (error);
1864 }
1865
1866 int
1867 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
1868 {
1869 /* {
1870 syscallarg(const void *) fhp;
1871 syscallarg(size_t) fh_size;
1872 syscallarg(int) flags;
1873 } */
1874
1875 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1876 SCARG(uap, flags), retval);
1877 }
1878
1879 int
1880 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
1881 {
1882 int error;
1883 fhandle_t *fh;
1884 struct vnode *vp;
1885
1886 /*
1887 * Must be super user
1888 */
1889 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1890 0, NULL, NULL, NULL)))
1891 return (error);
1892
1893 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1894 if (error != 0)
1895 return error;
1896
1897 error = vfs_fhtovp(fh, &vp);
1898 vfs_copyinfh_free(fh);
1899 if (error != 0)
1900 return error;
1901
1902 error = vn_stat(vp, sb);
1903 vput(vp);
1904 return error;
1905 }
1906
1907
1908 /* ARGSUSED */
1909 int
1910 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval)
1911 {
1912 /* {
1913 syscallarg(const void *) fhp;
1914 syscallarg(size_t) fh_size;
1915 syscallarg(struct stat *) sb;
1916 } */
1917 struct stat sb;
1918 int error;
1919
1920 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
1921 if (error)
1922 return error;
1923 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
1924 }
1925
1926 int
1927 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
1928 int flags)
1929 {
1930 fhandle_t *fh;
1931 struct mount *mp;
1932 struct vnode *vp;
1933 int error;
1934
1935 /*
1936 * Must be super user
1937 */
1938 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1939 0, NULL, NULL, NULL)))
1940 return error;
1941
1942 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1943 if (error != 0)
1944 return error;
1945
1946 error = vfs_fhtovp(fh, &vp);
1947 vfs_copyinfh_free(fh);
1948 if (error != 0)
1949 return error;
1950
1951 mp = vp->v_mount;
1952 error = dostatvfs(mp, sb, l, flags, 1);
1953 vput(vp);
1954 return error;
1955 }
1956
1957 /* ARGSUSED */
1958 int
1959 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
1960 {
1961 /* {
1962 syscallarg(const void *) fhp;
1963 syscallarg(size_t) fh_size;
1964 syscallarg(struct statvfs *) buf;
1965 syscallarg(int) flags;
1966 } */
1967 struct statvfs *sb = STATVFSBUF_GET();
1968 int error;
1969
1970 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
1971 SCARG(uap, flags));
1972 if (error == 0)
1973 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1974 STATVFSBUF_PUT(sb);
1975 return error;
1976 }
1977
1978 /*
1979 * Create a special file.
1980 */
1981 /* ARGSUSED */
1982 int
1983 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
1984 register_t *retval)
1985 {
1986 /* {
1987 syscallarg(const char *) path;
1988 syscallarg(mode_t) mode;
1989 syscallarg(dev_t) dev;
1990 } */
1991 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode),
1992 SCARG(uap, dev), retval, UIO_USERSPACE);
1993 }
1994
1995 int
1996 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap,
1997 register_t *retval)
1998 {
1999 /* {
2000 syscallarg(int) fd;
2001 syscallarg(const char *) path;
2002 syscallarg(mode_t) mode;
2003 syscallarg(uint32_t) dev;
2004 } */
2005
2006 return ENOSYS;
2007 }
2008
2009 int
2010 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
2011 register_t *retval, enum uio_seg seg)
2012 {
2013 struct proc *p = l->l_proc;
2014 struct vnode *vp;
2015 struct vattr vattr;
2016 int error, optype;
2017 struct pathbuf *pb;
2018 struct nameidata nd;
2019 const char *pathstring;
2020
2021 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
2022 0, NULL, NULL, NULL)) != 0)
2023 return (error);
2024
2025 optype = VOP_MKNOD_DESCOFFSET;
2026
2027 error = pathbuf_maybe_copyin(pathname, seg, &pb);
2028 if (error) {
2029 return error;
2030 }
2031 pathstring = pathbuf_stringcopy_get(pb);
2032 if (pathstring == NULL) {
2033 pathbuf_destroy(pb);
2034 return ENOMEM;
2035 }
2036
2037 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2038 if ((error = namei(&nd)) != 0)
2039 goto out;
2040 vp = nd.ni_vp;
2041
2042 if (vp != NULL)
2043 error = EEXIST;
2044 else {
2045 vattr_null(&vattr);
2046 /* We will read cwdi->cwdi_cmask unlocked. */
2047 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2048 vattr.va_rdev = dev;
2049
2050 switch (mode & S_IFMT) {
2051 case S_IFMT: /* used by badsect to flag bad sectors */
2052 vattr.va_type = VBAD;
2053 break;
2054 case S_IFCHR:
2055 vattr.va_type = VCHR;
2056 break;
2057 case S_IFBLK:
2058 vattr.va_type = VBLK;
2059 break;
2060 case S_IFWHT:
2061 optype = VOP_WHITEOUT_DESCOFFSET;
2062 break;
2063 case S_IFREG:
2064 #if NVERIEXEC > 0
2065 error = veriexec_openchk(l, nd.ni_vp, pathstring,
2066 O_CREAT);
2067 #endif /* NVERIEXEC > 0 */
2068 vattr.va_type = VREG;
2069 vattr.va_rdev = VNOVAL;
2070 optype = VOP_CREATE_DESCOFFSET;
2071 break;
2072 default:
2073 error = EINVAL;
2074 break;
2075 }
2076 }
2077 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET
2078 && vattr.va_rdev == VNOVAL)
2079 error = EINVAL;
2080 if (!error) {
2081 switch (optype) {
2082 case VOP_WHITEOUT_DESCOFFSET:
2083 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
2084 if (error)
2085 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2086 vput(nd.ni_dvp);
2087 break;
2088
2089 case VOP_MKNOD_DESCOFFSET:
2090 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
2091 &nd.ni_cnd, &vattr);
2092 if (error == 0)
2093 vput(nd.ni_vp);
2094 break;
2095
2096 case VOP_CREATE_DESCOFFSET:
2097 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
2098 &nd.ni_cnd, &vattr);
2099 if (error == 0)
2100 vput(nd.ni_vp);
2101 break;
2102 }
2103 } else {
2104 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2105 if (nd.ni_dvp == vp)
2106 vrele(nd.ni_dvp);
2107 else
2108 vput(nd.ni_dvp);
2109 if (vp)
2110 vrele(vp);
2111 }
2112 out:
2113 pathbuf_stringcopy_put(pb, pathstring);
2114 pathbuf_destroy(pb);
2115 return (error);
2116 }
2117
2118 /*
2119 * Create a named pipe.
2120 */
2121 /* ARGSUSED */
2122 int
2123 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
2124 {
2125 /* {
2126 syscallarg(const char *) path;
2127 syscallarg(int) mode;
2128 } */
2129 struct proc *p = l->l_proc;
2130 struct vattr vattr;
2131 int error;
2132 struct pathbuf *pb;
2133 struct nameidata nd;
2134
2135 error = pathbuf_copyin(SCARG(uap, path), &pb);
2136 if (error) {
2137 return error;
2138 }
2139 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2140 if ((error = namei(&nd)) != 0) {
2141 pathbuf_destroy(pb);
2142 return error;
2143 }
2144 if (nd.ni_vp != NULL) {
2145 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2146 if (nd.ni_dvp == nd.ni_vp)
2147 vrele(nd.ni_dvp);
2148 else
2149 vput(nd.ni_dvp);
2150 vrele(nd.ni_vp);
2151 pathbuf_destroy(pb);
2152 return (EEXIST);
2153 }
2154 vattr_null(&vattr);
2155 vattr.va_type = VFIFO;
2156 /* We will read cwdi->cwdi_cmask unlocked. */
2157 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2158 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2159 if (error == 0)
2160 vput(nd.ni_vp);
2161 pathbuf_destroy(pb);
2162 return (error);
2163 }
2164
2165 int
2166 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap,
2167 register_t *retval)
2168 {
2169 /* {
2170 syscallarg(int) fd;
2171 syscallarg(const char *) path;
2172 syscallarg(int) mode;
2173 } */
2174
2175 return ENOSYS;
2176 }
2177 /*
2178 * Make a hard file link.
2179 */
2180 /* ARGSUSED */
2181 static int
2182 do_sys_link(struct lwp *l, const char *path, const char *link,
2183 int follow, register_t *retval)
2184 {
2185 struct vnode *vp;
2186 struct pathbuf *linkpb;
2187 struct nameidata nd;
2188 namei_simple_flags_t namei_simple_flags;
2189 int error;
2190
2191 if (follow)
2192 namei_simple_flags = NSM_FOLLOW_TRYEMULROOT;
2193 else
2194 namei_simple_flags = NSM_NOFOLLOW_TRYEMULROOT;
2195
2196 error = namei_simple_user(path, namei_simple_flags, &vp);
2197 if (error != 0)
2198 return (error);
2199 error = pathbuf_copyin(link, &linkpb);
2200 if (error) {
2201 goto out1;
2202 }
2203 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2204 if ((error = namei(&nd)) != 0)
2205 goto out2;
2206 if (nd.ni_vp) {
2207 error = EEXIST;
2208 goto abortop;
2209 }
2210 /* Prevent hard links on directories. */
2211 if (vp->v_type == VDIR) {
2212 error = EPERM;
2213 goto abortop;
2214 }
2215 /* Prevent cross-mount operation. */
2216 if (nd.ni_dvp->v_mount != vp->v_mount) {
2217 error = EXDEV;
2218 goto abortop;
2219 }
2220 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2221 out2:
2222 pathbuf_destroy(linkpb);
2223 out1:
2224 vrele(vp);
2225 return (error);
2226 abortop:
2227 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2228 if (nd.ni_dvp == nd.ni_vp)
2229 vrele(nd.ni_dvp);
2230 else
2231 vput(nd.ni_dvp);
2232 if (nd.ni_vp != NULL)
2233 vrele(nd.ni_vp);
2234 goto out2;
2235 }
2236
2237 int
2238 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
2239 {
2240 /* {
2241 syscallarg(const char *) path;
2242 syscallarg(const char *) link;
2243 } */
2244 const char *path = SCARG(uap, path);
2245 const char *link = SCARG(uap, link);
2246
2247 return do_sys_link(l, path, link, 1, retval);
2248 }
2249
2250 int
2251 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap,
2252 register_t *retval)
2253 {
2254 /* {
2255 syscallarg(int) fd1;
2256 syscallarg(const char *) name1;
2257 syscallarg(int) fd2;
2258 syscallarg(const char *) name2;
2259 syscallarg(int) flags;
2260 } */
2261 const char *name1 = SCARG(uap, name1);
2262 const char *name2 = SCARG(uap, name2);
2263 int follow;
2264
2265 /*
2266 * Specified fd1 and fd2 are not yet implemented
2267 */
2268 if ((SCARG(uap, fd1) != AT_FDCWD) || (SCARG(uap, fd2) != AT_FDCWD))
2269 return ENOSYS;
2270
2271 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW;
2272
2273 return do_sys_link(l, name1, name2, follow, retval);
2274 }
2275
2276
2277 int
2278 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg)
2279 {
2280 struct proc *p = curproc;
2281 struct vattr vattr;
2282 char *path;
2283 int error;
2284 struct pathbuf *linkpb;
2285 struct nameidata nd;
2286
2287 path = PNBUF_GET();
2288 if (seg == UIO_USERSPACE) {
2289 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0)
2290 goto out1;
2291 if ((error = pathbuf_copyin(link, &linkpb)) != 0)
2292 goto out1;
2293 } else {
2294 KASSERT(strlen(patharg) < MAXPATHLEN);
2295 strcpy(path, patharg);
2296 linkpb = pathbuf_create(link);
2297 if (linkpb == NULL) {
2298 error = ENOMEM;
2299 goto out1;
2300 }
2301 }
2302 ktrkuser("symlink-target", path, strlen(path));
2303
2304 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2305 if ((error = namei(&nd)) != 0)
2306 goto out2;
2307 if (nd.ni_vp) {
2308 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2309 if (nd.ni_dvp == nd.ni_vp)
2310 vrele(nd.ni_dvp);
2311 else
2312 vput(nd.ni_dvp);
2313 vrele(nd.ni_vp);
2314 error = EEXIST;
2315 goto out2;
2316 }
2317 vattr_null(&vattr);
2318 vattr.va_type = VLNK;
2319 /* We will read cwdi->cwdi_cmask unlocked. */
2320 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2321 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2322 if (error == 0)
2323 vput(nd.ni_vp);
2324 out2:
2325 pathbuf_destroy(linkpb);
2326 out1:
2327 PNBUF_PUT(path);
2328 return (error);
2329 }
2330
2331 /*
2332 * Make a symbolic link.
2333 */
2334 /* ARGSUSED */
2335 int
2336 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2337 {
2338 /* {
2339 syscallarg(const char *) path;
2340 syscallarg(const char *) link;
2341 } */
2342
2343 return do_sys_symlink(SCARG(uap, path), SCARG(uap, link),
2344 UIO_USERSPACE);
2345 }
2346
2347 int
2348 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap,
2349 register_t *retval)
2350 {
2351 /* {
2352 syscallarg(int) fd;
2353 syscallarg(const char *) path;
2354 syscallarg(const char *) link;
2355 } */
2356
2357 return ENOSYS;
2358 }
2359
2360 /*
2361 * Delete a whiteout from the filesystem.
2362 */
2363 /* ARGSUSED */
2364 int
2365 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2366 {
2367 /* {
2368 syscallarg(const char *) path;
2369 } */
2370 int error;
2371 struct pathbuf *pb;
2372 struct nameidata nd;
2373
2374 error = pathbuf_copyin(SCARG(uap, path), &pb);
2375 if (error) {
2376 return error;
2377 }
2378
2379 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb);
2380 error = namei(&nd);
2381 if (error) {
2382 pathbuf_destroy(pb);
2383 return (error);
2384 }
2385
2386 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2387 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2388 if (nd.ni_dvp == nd.ni_vp)
2389 vrele(nd.ni_dvp);
2390 else
2391 vput(nd.ni_dvp);
2392 if (nd.ni_vp)
2393 vrele(nd.ni_vp);
2394 pathbuf_destroy(pb);
2395 return (EEXIST);
2396 }
2397 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2398 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2399 vput(nd.ni_dvp);
2400 pathbuf_destroy(pb);
2401 return (error);
2402 }
2403
2404 /*
2405 * Delete a name from the filesystem.
2406 */
2407 /* ARGSUSED */
2408 int
2409 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2410 {
2411 /* {
2412 syscallarg(const char *) path;
2413 } */
2414
2415 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE);
2416 }
2417
2418 int
2419 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap,
2420 register_t *retval)
2421 {
2422 /* {
2423 syscallarg(int) fd;
2424 syscallarg(const char *) path;
2425 } */
2426
2427 return ENOSYS;
2428 }
2429
2430 int
2431 do_sys_unlink(const char *arg, enum uio_seg seg)
2432 {
2433 struct vnode *vp;
2434 int error;
2435 struct pathbuf *pb;
2436 struct nameidata nd;
2437 const char *pathstring;
2438
2439 error = pathbuf_maybe_copyin(arg, seg, &pb);
2440 if (error) {
2441 return error;
2442 }
2443 pathstring = pathbuf_stringcopy_get(pb);
2444 if (pathstring == NULL) {
2445 pathbuf_destroy(pb);
2446 return ENOMEM;
2447 }
2448
2449 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb);
2450 if ((error = namei(&nd)) != 0)
2451 goto out;
2452 vp = nd.ni_vp;
2453
2454 /*
2455 * The root of a mounted filesystem cannot be deleted.
2456 */
2457 if (vp->v_vflag & VV_ROOT) {
2458 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2459 if (nd.ni_dvp == vp)
2460 vrele(nd.ni_dvp);
2461 else
2462 vput(nd.ni_dvp);
2463 vput(vp);
2464 error = EBUSY;
2465 goto out;
2466 }
2467
2468 #if NVERIEXEC > 0
2469 /* Handle remove requests for veriexec entries. */
2470 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) {
2471 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2472 if (nd.ni_dvp == vp)
2473 vrele(nd.ni_dvp);
2474 else
2475 vput(nd.ni_dvp);
2476 vput(vp);
2477 goto out;
2478 }
2479 #endif /* NVERIEXEC > 0 */
2480
2481 #ifdef FILEASSOC
2482 (void)fileassoc_file_delete(vp);
2483 #endif /* FILEASSOC */
2484 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2485 out:
2486 pathbuf_stringcopy_put(pb, pathstring);
2487 pathbuf_destroy(pb);
2488 return (error);
2489 }
2490
2491 /*
2492 * Reposition read/write file offset.
2493 */
2494 int
2495 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2496 {
2497 /* {
2498 syscallarg(int) fd;
2499 syscallarg(int) pad;
2500 syscallarg(off_t) offset;
2501 syscallarg(int) whence;
2502 } */
2503 kauth_cred_t cred = l->l_cred;
2504 file_t *fp;
2505 struct vnode *vp;
2506 struct vattr vattr;
2507 off_t newoff;
2508 int error, fd;
2509
2510 fd = SCARG(uap, fd);
2511
2512 if ((fp = fd_getfile(fd)) == NULL)
2513 return (EBADF);
2514
2515 vp = fp->f_data;
2516 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2517 error = ESPIPE;
2518 goto out;
2519 }
2520
2521 switch (SCARG(uap, whence)) {
2522 case SEEK_CUR:
2523 newoff = fp->f_offset + SCARG(uap, offset);
2524 break;
2525 case SEEK_END:
2526 vn_lock(vp, LK_SHARED | LK_RETRY);
2527 error = VOP_GETATTR(vp, &vattr, cred);
2528 VOP_UNLOCK(vp);
2529 if (error) {
2530 goto out;
2531 }
2532 newoff = SCARG(uap, offset) + vattr.va_size;
2533 break;
2534 case SEEK_SET:
2535 newoff = SCARG(uap, offset);
2536 break;
2537 default:
2538 error = EINVAL;
2539 goto out;
2540 }
2541 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2542 *(off_t *)retval = fp->f_offset = newoff;
2543 }
2544 out:
2545 fd_putfile(fd);
2546 return (error);
2547 }
2548
2549 /*
2550 * Positional read system call.
2551 */
2552 int
2553 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2554 {
2555 /* {
2556 syscallarg(int) fd;
2557 syscallarg(void *) buf;
2558 syscallarg(size_t) nbyte;
2559 syscallarg(off_t) offset;
2560 } */
2561 file_t *fp;
2562 struct vnode *vp;
2563 off_t offset;
2564 int error, fd = SCARG(uap, fd);
2565
2566 if ((fp = fd_getfile(fd)) == NULL)
2567 return (EBADF);
2568
2569 if ((fp->f_flag & FREAD) == 0) {
2570 fd_putfile(fd);
2571 return (EBADF);
2572 }
2573
2574 vp = fp->f_data;
2575 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2576 error = ESPIPE;
2577 goto out;
2578 }
2579
2580 offset = SCARG(uap, offset);
2581
2582 /*
2583 * XXX This works because no file systems actually
2584 * XXX take any action on the seek operation.
2585 */
2586 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2587 goto out;
2588
2589 /* dofileread() will unuse the descriptor for us */
2590 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2591 &offset, 0, retval));
2592
2593 out:
2594 fd_putfile(fd);
2595 return (error);
2596 }
2597
2598 /*
2599 * Positional scatter read system call.
2600 */
2601 int
2602 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2603 {
2604 /* {
2605 syscallarg(int) fd;
2606 syscallarg(const struct iovec *) iovp;
2607 syscallarg(int) iovcnt;
2608 syscallarg(off_t) offset;
2609 } */
2610 off_t offset = SCARG(uap, offset);
2611
2612 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2613 SCARG(uap, iovcnt), &offset, 0, retval);
2614 }
2615
2616 /*
2617 * Positional write system call.
2618 */
2619 int
2620 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2621 {
2622 /* {
2623 syscallarg(int) fd;
2624 syscallarg(const void *) buf;
2625 syscallarg(size_t) nbyte;
2626 syscallarg(off_t) offset;
2627 } */
2628 file_t *fp;
2629 struct vnode *vp;
2630 off_t offset;
2631 int error, fd = SCARG(uap, fd);
2632
2633 if ((fp = fd_getfile(fd)) == NULL)
2634 return (EBADF);
2635
2636 if ((fp->f_flag & FWRITE) == 0) {
2637 fd_putfile(fd);
2638 return (EBADF);
2639 }
2640
2641 vp = fp->f_data;
2642 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2643 error = ESPIPE;
2644 goto out;
2645 }
2646
2647 offset = SCARG(uap, offset);
2648
2649 /*
2650 * XXX This works because no file systems actually
2651 * XXX take any action on the seek operation.
2652 */
2653 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2654 goto out;
2655
2656 /* dofilewrite() will unuse the descriptor for us */
2657 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2658 &offset, 0, retval));
2659
2660 out:
2661 fd_putfile(fd);
2662 return (error);
2663 }
2664
2665 /*
2666 * Positional gather write system call.
2667 */
2668 int
2669 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2670 {
2671 /* {
2672 syscallarg(int) fd;
2673 syscallarg(const struct iovec *) iovp;
2674 syscallarg(int) iovcnt;
2675 syscallarg(off_t) offset;
2676 } */
2677 off_t offset = SCARG(uap, offset);
2678
2679 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2680 SCARG(uap, iovcnt), &offset, 0, retval);
2681 }
2682
2683 /*
2684 * Check access permissions.
2685 */
2686 int
2687 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2688 {
2689 /* {
2690 syscallarg(const char *) path;
2691 syscallarg(int) flags;
2692 } */
2693 kauth_cred_t cred;
2694 struct vnode *vp;
2695 int error, flags;
2696 struct pathbuf *pb;
2697 struct nameidata nd;
2698
2699 CTASSERT(F_OK == 0);
2700 if ((SCARG(uap, flags) & ~(R_OK | W_OK | X_OK)) != 0) {
2701 /* nonsense flags */
2702 return EINVAL;
2703 }
2704
2705 error = pathbuf_copyin(SCARG(uap, path), &pb);
2706 if (error) {
2707 return error;
2708 }
2709 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
2710
2711 /* Override default credentials */
2712 cred = kauth_cred_dup(l->l_cred);
2713 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2714 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2715 nd.ni_cnd.cn_cred = cred;
2716
2717 if ((error = namei(&nd)) != 0) {
2718 pathbuf_destroy(pb);
2719 goto out;
2720 }
2721 vp = nd.ni_vp;
2722 pathbuf_destroy(pb);
2723
2724 /* Flags == 0 means only check for existence. */
2725 if (SCARG(uap, flags)) {
2726 flags = 0;
2727 if (SCARG(uap, flags) & R_OK)
2728 flags |= VREAD;
2729 if (SCARG(uap, flags) & W_OK)
2730 flags |= VWRITE;
2731 if (SCARG(uap, flags) & X_OK)
2732 flags |= VEXEC;
2733
2734 error = VOP_ACCESS(vp, flags, cred);
2735 if (!error && (flags & VWRITE))
2736 error = vn_writechk(vp);
2737 }
2738 vput(vp);
2739 out:
2740 kauth_cred_free(cred);
2741 return (error);
2742 }
2743
2744 int
2745 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap,
2746 register_t *retval)
2747 {
2748 /* {
2749 syscallarg(int) fd;
2750 syscallarg(const char *) path;
2751 syscallarg(int) amode;
2752 syscallarg(int) flag;
2753 } */
2754
2755 return ENOSYS;
2756 }
2757
2758 /*
2759 * Common code for all sys_stat functions, including compat versions.
2760 */
2761 int
2762 do_sys_stat(const char *userpath, unsigned int nd_flags, struct stat *sb)
2763 {
2764 int error;
2765 struct pathbuf *pb;
2766 struct nameidata nd;
2767
2768 error = pathbuf_copyin(userpath, &pb);
2769 if (error) {
2770 return error;
2771 }
2772 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, pb);
2773 error = namei(&nd);
2774 if (error != 0) {
2775 pathbuf_destroy(pb);
2776 return error;
2777 }
2778 error = vn_stat(nd.ni_vp, sb);
2779 vput(nd.ni_vp);
2780 pathbuf_destroy(pb);
2781 return error;
2782 }
2783
2784 /*
2785 * Get file status; this version follows links.
2786 */
2787 /* ARGSUSED */
2788 int
2789 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval)
2790 {
2791 /* {
2792 syscallarg(const char *) path;
2793 syscallarg(struct stat *) ub;
2794 } */
2795 struct stat sb;
2796 int error;
2797
2798 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb);
2799 if (error)
2800 return error;
2801 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2802 }
2803
2804 /*
2805 * Get file status; this version does not follow links.
2806 */
2807 /* ARGSUSED */
2808 int
2809 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval)
2810 {
2811 /* {
2812 syscallarg(const char *) path;
2813 syscallarg(struct stat *) ub;
2814 } */
2815 struct stat sb;
2816 int error;
2817
2818 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb);
2819 if (error)
2820 return error;
2821 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2822 }
2823
2824 int
2825 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap,
2826 register_t *retval)
2827 {
2828 /* {
2829 syscallarg(int) fd;
2830 syscallarg(const char *) path;
2831 syscallarg(struct stat *) ub;
2832 syscallarg(int) flag;
2833 } */
2834
2835 return ENOSYS;
2836 }
2837 /*
2838 * Get configurable pathname variables.
2839 */
2840 /* ARGSUSED */
2841 int
2842 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
2843 {
2844 /* {
2845 syscallarg(const char *) path;
2846 syscallarg(int) name;
2847 } */
2848 int error;
2849 struct pathbuf *pb;
2850 struct nameidata nd;
2851
2852 error = pathbuf_copyin(SCARG(uap, path), &pb);
2853 if (error) {
2854 return error;
2855 }
2856 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
2857 if ((error = namei(&nd)) != 0) {
2858 pathbuf_destroy(pb);
2859 return (error);
2860 }
2861 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2862 vput(nd.ni_vp);
2863 pathbuf_destroy(pb);
2864 return (error);
2865 }
2866
2867 /*
2868 * Return target name of a symbolic link.
2869 */
2870 /* ARGSUSED */
2871 int
2872 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval)
2873 {
2874 /* {
2875 syscallarg(const char *) path;
2876 syscallarg(char *) buf;
2877 syscallarg(size_t) count;
2878 } */
2879 struct vnode *vp;
2880 struct iovec aiov;
2881 struct uio auio;
2882 int error;
2883 struct pathbuf *pb;
2884 struct nameidata nd;
2885
2886 error = pathbuf_copyin(SCARG(uap, path), &pb);
2887 if (error) {
2888 return error;
2889 }
2890 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
2891 if ((error = namei(&nd)) != 0) {
2892 pathbuf_destroy(pb);
2893 return error;
2894 }
2895 vp = nd.ni_vp;
2896 pathbuf_destroy(pb);
2897 if (vp->v_type != VLNK)
2898 error = EINVAL;
2899 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2900 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
2901 aiov.iov_base = SCARG(uap, buf);
2902 aiov.iov_len = SCARG(uap, count);
2903 auio.uio_iov = &aiov;
2904 auio.uio_iovcnt = 1;
2905 auio.uio_offset = 0;
2906 auio.uio_rw = UIO_READ;
2907 KASSERT(l == curlwp);
2908 auio.uio_vmspace = l->l_proc->p_vmspace;
2909 auio.uio_resid = SCARG(uap, count);
2910 error = VOP_READLINK(vp, &auio, l->l_cred);
2911 }
2912 vput(vp);
2913 *retval = SCARG(uap, count) - auio.uio_resid;
2914 return (error);
2915 }
2916
2917 int
2918 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap,
2919 register_t *retval)
2920 {
2921 /* {
2922 syscallarg(int) fd;
2923 syscallarg(const char *) path;
2924 syscallarg(char *) buf;
2925 syscallarg(size_t) count;
2926 } */
2927
2928 return ENOSYS;
2929 }
2930
2931 /*
2932 * Change flags of a file given a path name.
2933 */
2934 /* ARGSUSED */
2935 int
2936 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
2937 {
2938 /* {
2939 syscallarg(const char *) path;
2940 syscallarg(u_long) flags;
2941 } */
2942 struct vnode *vp;
2943 int error;
2944
2945 error = namei_simple_user(SCARG(uap, path),
2946 NSM_FOLLOW_TRYEMULROOT, &vp);
2947 if (error != 0)
2948 return (error);
2949 error = change_flags(vp, SCARG(uap, flags), l);
2950 vput(vp);
2951 return (error);
2952 }
2953
2954 /*
2955 * Change flags of a file given a file descriptor.
2956 */
2957 /* ARGSUSED */
2958 int
2959 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
2960 {
2961 /* {
2962 syscallarg(int) fd;
2963 syscallarg(u_long) flags;
2964 } */
2965 struct vnode *vp;
2966 file_t *fp;
2967 int error;
2968
2969 /* fd_getvnode() will use the descriptor for us */
2970 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
2971 return (error);
2972 vp = fp->f_data;
2973 error = change_flags(vp, SCARG(uap, flags), l);
2974 VOP_UNLOCK(vp);
2975 fd_putfile(SCARG(uap, fd));
2976 return (error);
2977 }
2978
2979 /*
2980 * Change flags of a file given a path name; this version does
2981 * not follow links.
2982 */
2983 int
2984 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
2985 {
2986 /* {
2987 syscallarg(const char *) path;
2988 syscallarg(u_long) flags;
2989 } */
2990 struct vnode *vp;
2991 int error;
2992
2993 error = namei_simple_user(SCARG(uap, path),
2994 NSM_NOFOLLOW_TRYEMULROOT, &vp);
2995 if (error != 0)
2996 return (error);
2997 error = change_flags(vp, SCARG(uap, flags), l);
2998 vput(vp);
2999 return (error);
3000 }
3001
3002 /*
3003 * Common routine to change flags of a file.
3004 */
3005 int
3006 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
3007 {
3008 struct vattr vattr;
3009 int error;
3010
3011 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3012 /*
3013 * Non-superusers cannot change the flags on devices, even if they
3014 * own them.
3015 */
3016 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) {
3017 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
3018 goto out;
3019 if (vattr.va_type == VCHR || vattr.va_type == VBLK) {
3020 error = EINVAL;
3021 goto out;
3022 }
3023 }
3024 vattr_null(&vattr);
3025 vattr.va_flags = flags;
3026 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3027 out:
3028 return (error);
3029 }
3030
3031 /*
3032 * Change mode of a file given path name; this version follows links.
3033 */
3034 /* ARGSUSED */
3035 int
3036 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
3037 {
3038 /* {
3039 syscallarg(const char *) path;
3040 syscallarg(int) mode;
3041 } */
3042 int error;
3043 struct vnode *vp;
3044
3045 error = namei_simple_user(SCARG(uap, path),
3046 NSM_FOLLOW_TRYEMULROOT, &vp);
3047 if (error != 0)
3048 return (error);
3049
3050 error = change_mode(vp, SCARG(uap, mode), l);
3051
3052 vrele(vp);
3053 return (error);
3054 }
3055
3056 /*
3057 * Change mode of a file given a file descriptor.
3058 */
3059 /* ARGSUSED */
3060 int
3061 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
3062 {
3063 /* {
3064 syscallarg(int) fd;
3065 syscallarg(int) mode;
3066 } */
3067 file_t *fp;
3068 int error;
3069
3070 /* fd_getvnode() will use the descriptor for us */
3071 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3072 return (error);
3073 error = change_mode(fp->f_data, SCARG(uap, mode), l);
3074 fd_putfile(SCARG(uap, fd));
3075 return (error);
3076 }
3077
3078 int
3079 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap,
3080 register_t *retval)
3081 {
3082 /* {
3083 syscallarg(int) fd;
3084 syscallarg(const char *) path;
3085 syscallarg(int) mode;
3086 syscallarg(int) flag;
3087 } */
3088
3089 return ENOSYS;
3090 }
3091
3092 /*
3093 * Change mode of a file given path name; this version does not follow links.
3094 */
3095 /* ARGSUSED */
3096 int
3097 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
3098 {
3099 /* {
3100 syscallarg(const char *) path;
3101 syscallarg(int) mode;
3102 } */
3103 int error;
3104 struct vnode *vp;
3105
3106 error = namei_simple_user(SCARG(uap, path),
3107 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3108 if (error != 0)
3109 return (error);
3110
3111 error = change_mode(vp, SCARG(uap, mode), l);
3112
3113 vrele(vp);
3114 return (error);
3115 }
3116
3117 /*
3118 * Common routine to set mode given a vnode.
3119 */
3120 static int
3121 change_mode(struct vnode *vp, int mode, struct lwp *l)
3122 {
3123 struct vattr vattr;
3124 int error;
3125
3126 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3127 vattr_null(&vattr);
3128 vattr.va_mode = mode & ALLPERMS;
3129 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3130 VOP_UNLOCK(vp);
3131 return (error);
3132 }
3133
3134 /*
3135 * Set ownership given a path name; this version follows links.
3136 */
3137 /* ARGSUSED */
3138 int
3139 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
3140 {
3141 /* {
3142 syscallarg(const char *) path;
3143 syscallarg(uid_t) uid;
3144 syscallarg(gid_t) gid;
3145 } */
3146 int error;
3147 struct vnode *vp;
3148
3149 error = namei_simple_user(SCARG(uap, path),
3150 NSM_FOLLOW_TRYEMULROOT, &vp);
3151 if (error != 0)
3152 return (error);
3153
3154 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
3155
3156 vrele(vp);
3157 return (error);
3158 }
3159
3160 /*
3161 * Set ownership given a path name; this version follows links.
3162 * Provides POSIX semantics.
3163 */
3164 /* ARGSUSED */
3165 int
3166 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
3167 {
3168 /* {
3169 syscallarg(const char *) path;
3170 syscallarg(uid_t) uid;
3171 syscallarg(gid_t) gid;
3172 } */
3173 int error;
3174 struct vnode *vp;
3175
3176 error = namei_simple_user(SCARG(uap, path),
3177 NSM_FOLLOW_TRYEMULROOT, &vp);
3178 if (error != 0)
3179 return (error);
3180
3181 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3182
3183 vrele(vp);
3184 return (error);
3185 }
3186
3187 /*
3188 * Set ownership given a file descriptor.
3189 */
3190 /* ARGSUSED */
3191 int
3192 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
3193 {
3194 /* {
3195 syscallarg(int) fd;
3196 syscallarg(uid_t) uid;
3197 syscallarg(gid_t) gid;
3198 } */
3199 int error;
3200 file_t *fp;
3201
3202 /* fd_getvnode() will use the descriptor for us */
3203 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3204 return (error);
3205 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
3206 l, 0);
3207 fd_putfile(SCARG(uap, fd));
3208 return (error);
3209 }
3210
3211 int
3212 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap,
3213 register_t *retval)
3214 {
3215 /* {
3216 syscallarg(int) fd;
3217 syscallarg(const char *) path;
3218 syscallarg(uid_t) uid;
3219 syscallarg(gid_t) gid;
3220 syscallarg(int) flag;
3221 } */
3222
3223 return ENOSYS;
3224 }
3225
3226 /*
3227 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
3228 */
3229 /* ARGSUSED */
3230 int
3231 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
3232 {
3233 /* {
3234 syscallarg(int) fd;
3235 syscallarg(uid_t) uid;
3236 syscallarg(gid_t) gid;
3237 } */
3238 int error;
3239 file_t *fp;
3240
3241 /* fd_getvnode() will use the descriptor for us */
3242 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3243 return (error);
3244 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
3245 l, 1);
3246 fd_putfile(SCARG(uap, fd));
3247 return (error);
3248 }
3249
3250 /*
3251 * Set ownership given a path name; this version does not follow links.
3252 */
3253 /* ARGSUSED */
3254 int
3255 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
3256 {
3257 /* {
3258 syscallarg(const char *) path;
3259 syscallarg(uid_t) uid;
3260 syscallarg(gid_t) gid;
3261 } */
3262 int error;
3263 struct vnode *vp;
3264
3265 error = namei_simple_user(SCARG(uap, path),
3266 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3267 if (error != 0)
3268 return (error);
3269
3270 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
3271
3272 vrele(vp);
3273 return (error);
3274 }
3275
3276 /*
3277 * Set ownership given a path name; this version does not follow links.
3278 * Provides POSIX/XPG semantics.
3279 */
3280 /* ARGSUSED */
3281 int
3282 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
3283 {
3284 /* {
3285 syscallarg(const char *) path;
3286 syscallarg(uid_t) uid;
3287 syscallarg(gid_t) gid;
3288 } */
3289 int error;
3290 struct vnode *vp;
3291
3292 error = namei_simple_user(SCARG(uap, path),
3293 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3294 if (error != 0)
3295 return (error);
3296
3297 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3298
3299 vrele(vp);
3300 return (error);
3301 }
3302
3303 /*
3304 * Common routine to set ownership given a vnode.
3305 */
3306 static int
3307 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
3308 int posix_semantics)
3309 {
3310 struct vattr vattr;
3311 mode_t newmode;
3312 int error;
3313
3314 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3315 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
3316 goto out;
3317
3318 #define CHANGED(x) ((int)(x) != -1)
3319 newmode = vattr.va_mode;
3320 if (posix_semantics) {
3321 /*
3322 * POSIX/XPG semantics: if the caller is not the super-user,
3323 * clear set-user-id and set-group-id bits. Both POSIX and
3324 * the XPG consider the behaviour for calls by the super-user
3325 * implementation-defined; we leave the set-user-id and set-
3326 * group-id settings intact in that case.
3327 */
3328 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
3329 NULL) != 0)
3330 newmode &= ~(S_ISUID | S_ISGID);
3331 } else {
3332 /*
3333 * NetBSD semantics: when changing owner and/or group,
3334 * clear the respective bit(s).
3335 */
3336 if (CHANGED(uid))
3337 newmode &= ~S_ISUID;
3338 if (CHANGED(gid))
3339 newmode &= ~S_ISGID;
3340 }
3341 /* Update va_mode iff altered. */
3342 if (vattr.va_mode == newmode)
3343 newmode = VNOVAL;
3344
3345 vattr_null(&vattr);
3346 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
3347 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
3348 vattr.va_mode = newmode;
3349 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3350 #undef CHANGED
3351
3352 out:
3353 VOP_UNLOCK(vp);
3354 return (error);
3355 }
3356
3357 /*
3358 * Set the access and modification times given a path name; this
3359 * version follows links.
3360 */
3361 /* ARGSUSED */
3362 int
3363 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
3364 register_t *retval)
3365 {
3366 /* {
3367 syscallarg(const char *) path;
3368 syscallarg(const struct timeval *) tptr;
3369 } */
3370
3371 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
3372 SCARG(uap, tptr), UIO_USERSPACE);
3373 }
3374
3375 /*
3376 * Set the access and modification times given a file descriptor.
3377 */
3378 /* ARGSUSED */
3379 int
3380 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
3381 register_t *retval)
3382 {
3383 /* {
3384 syscallarg(int) fd;
3385 syscallarg(const struct timeval *) tptr;
3386 } */
3387 int error;
3388 file_t *fp;
3389
3390 /* fd_getvnode() will use the descriptor for us */
3391 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3392 return (error);
3393 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr),
3394 UIO_USERSPACE);
3395 fd_putfile(SCARG(uap, fd));
3396 return (error);
3397 }
3398
3399 int
3400 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap,
3401 register_t *retval)
3402 {
3403 /* {
3404 syscallarg(int) fd;
3405 syscallarg(const struct timespec *) tptr;
3406 } */
3407 int error;
3408 file_t *fp;
3409
3410 /* fd_getvnode() will use the descriptor for us */
3411 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3412 return (error);
3413 error = do_sys_utimens(l, fp->f_data, NULL, 0, SCARG(uap, tptr),
3414 UIO_USERSPACE);
3415 fd_putfile(SCARG(uap, fd));
3416 return (error);
3417 }
3418
3419 /*
3420 * Set the access and modification times given a path name; this
3421 * version does not follow links.
3422 */
3423 int
3424 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
3425 register_t *retval)
3426 {
3427 /* {
3428 syscallarg(const char *) path;
3429 syscallarg(const struct timeval *) tptr;
3430 } */
3431
3432 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
3433 SCARG(uap, tptr), UIO_USERSPACE);
3434 }
3435
3436 int
3437 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap,
3438 register_t *retval)
3439 {
3440 /* {
3441 syscallarg(int) fd;
3442 syscallarg(const char *) path;
3443 syscallarg(const struct timespec *) tptr;
3444 syscallarg(int) flag;
3445 } */
3446 int follow;
3447 const struct timespec *tptr;
3448
3449 /*
3450 * Specified fd is not yet implemented
3451 */
3452 if (SCARG(uap, fd) != AT_FDCWD)
3453 return ENOSYS;
3454
3455 tptr = SCARG(uap, tptr);
3456 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3457
3458 return do_sys_utimens(l, NULL, SCARG(uap, path), follow,
3459 tptr, UIO_USERSPACE);
3460 }
3461
3462 /*
3463 * Common routine to set access and modification times given a vnode.
3464 */
3465 int
3466 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag,
3467 const struct timespec *tptr, enum uio_seg seg)
3468 {
3469 struct vattr vattr;
3470 int error, dorele = 0;
3471 namei_simple_flags_t sflags;
3472
3473 bool vanull, setbirthtime;
3474 struct timespec ts[2];
3475
3476 /*
3477 * I have checked all callers and they pass either FOLLOW,
3478 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW
3479 * is 0. More to the point, they don't pass anything else.
3480 * Let's keep it that way at least until the namei interfaces
3481 * are fully sanitized.
3482 */
3483 KASSERT(flag == NOFOLLOW || flag == FOLLOW);
3484 sflags = (flag == FOLLOW) ?
3485 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT;
3486
3487 if (tptr == NULL) {
3488 vanull = true;
3489 nanotime(&ts[0]);
3490 ts[1] = ts[0];
3491 } else {
3492 vanull = false;
3493 if (seg != UIO_SYSSPACE) {
3494 error = copyin(tptr, ts, sizeof (ts));
3495 if (error != 0)
3496 return error;
3497 } else {
3498 ts[0] = tptr[0];
3499 ts[1] = tptr[1];
3500 }
3501 }
3502
3503 if (ts[0].tv_nsec == UTIME_NOW) {
3504 nanotime(&ts[0]);
3505 if (ts[1].tv_nsec == UTIME_NOW) {
3506 vanull = true;
3507 ts[1] = ts[0];
3508 }
3509 } else if (ts[1].tv_nsec == UTIME_NOW)
3510 nanotime(&ts[1]);
3511
3512 if (vp == NULL) {
3513 /* note: SEG describes TPTR, not PATH; PATH is always user */
3514 error = namei_simple_user(path, sflags, &vp);
3515 if (error != 0)
3516 return error;
3517 dorele = 1;
3518 }
3519
3520 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3521 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3522 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3523 vattr_null(&vattr);
3524
3525 if (ts[0].tv_nsec != UTIME_OMIT)
3526 vattr.va_atime = ts[0];
3527
3528 if (ts[1].tv_nsec != UTIME_OMIT) {
3529 vattr.va_mtime = ts[1];
3530 if (setbirthtime)
3531 vattr.va_birthtime = ts[1];
3532 }
3533
3534 if (vanull)
3535 vattr.va_vaflags |= VA_UTIMES_NULL;
3536 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3537 VOP_UNLOCK(vp);
3538
3539 if (dorele != 0)
3540 vrele(vp);
3541
3542 return error;
3543 }
3544
3545 int
3546 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
3547 const struct timeval *tptr, enum uio_seg seg)
3548 {
3549 struct timespec ts[2];
3550 struct timespec *tsptr = NULL;
3551 int error;
3552
3553 if (tptr != NULL) {
3554 struct timeval tv[2];
3555
3556 if (seg != UIO_SYSSPACE) {
3557 error = copyin(tptr, tv, sizeof (tv));
3558 if (error != 0)
3559 return error;
3560 tptr = tv;
3561 }
3562
3563 if ((tv[0].tv_usec == UTIME_NOW) ||
3564 (tv[0].tv_usec == UTIME_OMIT))
3565 ts[0].tv_nsec = tv[0].tv_usec;
3566 else
3567 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3568
3569 if ((tv[1].tv_usec == UTIME_NOW) ||
3570 (tv[1].tv_usec == UTIME_OMIT))
3571 ts[1].tv_nsec = tv[1].tv_usec;
3572 else
3573 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3574
3575 tsptr = &ts[0];
3576 }
3577
3578 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE);
3579 }
3580
3581 /*
3582 * Truncate a file given its path name.
3583 */
3584 /* ARGSUSED */
3585 int
3586 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3587 {
3588 /* {
3589 syscallarg(const char *) path;
3590 syscallarg(int) pad;
3591 syscallarg(off_t) length;
3592 } */
3593 struct vnode *vp;
3594 struct vattr vattr;
3595 int error;
3596
3597 error = namei_simple_user(SCARG(uap, path),
3598 NSM_FOLLOW_TRYEMULROOT, &vp);
3599 if (error != 0)
3600 return (error);
3601 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3602 if (vp->v_type == VDIR)
3603 error = EISDIR;
3604 else if ((error = vn_writechk(vp)) == 0 &&
3605 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3606 vattr_null(&vattr);
3607 vattr.va_size = SCARG(uap, length);
3608 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3609 }
3610 vput(vp);
3611 return (error);
3612 }
3613
3614 /*
3615 * Truncate a file given a file descriptor.
3616 */
3617 /* ARGSUSED */
3618 int
3619 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3620 {
3621 /* {
3622 syscallarg(int) fd;
3623 syscallarg(int) pad;
3624 syscallarg(off_t) length;
3625 } */
3626 struct vattr vattr;
3627 struct vnode *vp;
3628 file_t *fp;
3629 int error;
3630
3631 /* fd_getvnode() will use the descriptor for us */
3632 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3633 return (error);
3634 if ((fp->f_flag & FWRITE) == 0) {
3635 error = EINVAL;
3636 goto out;
3637 }
3638 vp = fp->f_data;
3639 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3640 if (vp->v_type == VDIR)
3641 error = EISDIR;
3642 else if ((error = vn_writechk(vp)) == 0) {
3643 vattr_null(&vattr);
3644 vattr.va_size = SCARG(uap, length);
3645 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3646 }
3647 VOP_UNLOCK(vp);
3648 out:
3649 fd_putfile(SCARG(uap, fd));
3650 return (error);
3651 }
3652
3653 /*
3654 * Sync an open file.
3655 */
3656 /* ARGSUSED */
3657 int
3658 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3659 {
3660 /* {
3661 syscallarg(int) fd;
3662 } */
3663 struct vnode *vp;
3664 file_t *fp;
3665 int error;
3666
3667 /* fd_getvnode() will use the descriptor for us */
3668 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3669 return (error);
3670 vp = fp->f_data;
3671 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3672 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
3673 VOP_UNLOCK(vp);
3674 fd_putfile(SCARG(uap, fd));
3675 return (error);
3676 }
3677
3678 /*
3679 * Sync a range of file data. API modeled after that found in AIX.
3680 *
3681 * FDATASYNC indicates that we need only save enough metadata to be able
3682 * to re-read the written data. Note we duplicate AIX's requirement that
3683 * the file be open for writing.
3684 */
3685 /* ARGSUSED */
3686 int
3687 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
3688 {
3689 /* {
3690 syscallarg(int) fd;
3691 syscallarg(int) flags;
3692 syscallarg(off_t) start;
3693 syscallarg(off_t) length;
3694 } */
3695 struct vnode *vp;
3696 file_t *fp;
3697 int flags, nflags;
3698 off_t s, e, len;
3699 int error;
3700
3701 /* fd_getvnode() will use the descriptor for us */
3702 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3703 return (error);
3704
3705 if ((fp->f_flag & FWRITE) == 0) {
3706 error = EBADF;
3707 goto out;
3708 }
3709
3710 flags = SCARG(uap, flags);
3711 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3712 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3713 error = EINVAL;
3714 goto out;
3715 }
3716 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3717 if (flags & FDATASYNC)
3718 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3719 else
3720 nflags = FSYNC_WAIT;
3721 if (flags & FDISKSYNC)
3722 nflags |= FSYNC_CACHE;
3723
3724 len = SCARG(uap, length);
3725 /* If length == 0, we do the whole file, and s = e = 0 will do that */
3726 if (len) {
3727 s = SCARG(uap, start);
3728 e = s + len;
3729 if (e < s) {
3730 error = EINVAL;
3731 goto out;
3732 }
3733 } else {
3734 e = 0;
3735 s = 0;
3736 }
3737
3738 vp = fp->f_data;
3739 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3740 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
3741 VOP_UNLOCK(vp);
3742 out:
3743 fd_putfile(SCARG(uap, fd));
3744 return (error);
3745 }
3746
3747 /*
3748 * Sync the data of an open file.
3749 */
3750 /* ARGSUSED */
3751 int
3752 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
3753 {
3754 /* {
3755 syscallarg(int) fd;
3756 } */
3757 struct vnode *vp;
3758 file_t *fp;
3759 int error;
3760
3761 /* fd_getvnode() will use the descriptor for us */
3762 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3763 return (error);
3764 if ((fp->f_flag & FWRITE) == 0) {
3765 fd_putfile(SCARG(uap, fd));
3766 return (EBADF);
3767 }
3768 vp = fp->f_data;
3769 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3770 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
3771 VOP_UNLOCK(vp);
3772 fd_putfile(SCARG(uap, fd));
3773 return (error);
3774 }
3775
3776 /*
3777 * Rename files, (standard) BSD semantics frontend.
3778 */
3779 /* ARGSUSED */
3780 int
3781 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
3782 {
3783 /* {
3784 syscallarg(const char *) from;
3785 syscallarg(const char *) to;
3786 } */
3787
3788 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0));
3789 }
3790
3791 int
3792 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap,
3793 register_t *retval)
3794 {
3795 /* {
3796 syscallarg(int) fromfd;
3797 syscallarg(const char *) from;
3798 syscallarg(int) tofd;
3799 syscallarg(const char *) to;
3800 } */
3801
3802 return ENOSYS;
3803 }
3804
3805 /*
3806 * Rename files, POSIX semantics frontend.
3807 */
3808 /* ARGSUSED */
3809 int
3810 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
3811 {
3812 /* {
3813 syscallarg(const char *) from;
3814 syscallarg(const char *) to;
3815 } */
3816
3817 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1));
3818 }
3819
3820 /*
3821 * Rename files. Source and destination must either both be directories,
3822 * or both not be directories. If target is a directory, it must be empty.
3823 * If `from' and `to' refer to the same object, the value of the `retain'
3824 * argument is used to determine whether `from' will be
3825 *
3826 * (retain == 0) deleted unless `from' and `to' refer to the same
3827 * object in the file system's name space (BSD).
3828 * (retain == 1) always retained (POSIX).
3829 */
3830 int
3831 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
3832 {
3833 struct vnode *tvp, *fvp, *tdvp;
3834 struct pathbuf *frompb, *topb;
3835 struct nameidata fromnd, tond;
3836 struct mount *fs;
3837 int error;
3838
3839 error = pathbuf_maybe_copyin(from, seg, &frompb);
3840 if (error) {
3841 return error;
3842 }
3843 error = pathbuf_maybe_copyin(to, seg, &topb);
3844 if (error) {
3845 pathbuf_destroy(frompb);
3846 return error;
3847 }
3848
3849 NDINIT(&fromnd, DELETE, LOCKPARENT | TRYEMULROOT | INRENAME,
3850 frompb);
3851 if ((error = namei(&fromnd)) != 0) {
3852 pathbuf_destroy(frompb);
3853 pathbuf_destroy(topb);
3854 return (error);
3855 }
3856 if (fromnd.ni_dvp != fromnd.ni_vp)
3857 VOP_UNLOCK(fromnd.ni_dvp);
3858 fvp = fromnd.ni_vp;
3859
3860 fs = fvp->v_mount;
3861 error = VFS_RENAMELOCK_ENTER(fs);
3862 if (error) {
3863 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3864 vrele(fromnd.ni_dvp);
3865 vrele(fvp);
3866 goto out1;
3867 }
3868
3869 /*
3870 * close, partially, yet another race - ideally we should only
3871 * go as far as getting fromnd.ni_dvp before getting the per-fs
3872 * lock, and then continue to get fromnd.ni_vp, but we can't do
3873 * that with namei as it stands.
3874 *
3875 * This still won't prevent rmdir from nuking fromnd.ni_vp
3876 * under us. The real fix is to get the locks in the right
3877 * order and do the lookups in the right places, but that's a
3878 * major rototill.
3879 *
3880 * Note: this logic (as well as this whole function) is cloned
3881 * in nfs_serv.c. Proceed accordingly.
3882 */
3883 vrele(fvp);
3884 if ((fromnd.ni_cnd.cn_namelen == 1 &&
3885 fromnd.ni_cnd.cn_nameptr[0] == '.') ||
3886 (fromnd.ni_cnd.cn_namelen == 2 &&
3887 fromnd.ni_cnd.cn_nameptr[0] == '.' &&
3888 fromnd.ni_cnd.cn_nameptr[1] == '.')) {
3889 error = EINVAL;
3890 VFS_RENAMELOCK_EXIT(fs);
3891 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3892 vrele(fromnd.ni_dvp);
3893 goto out1;
3894 }
3895 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY);
3896 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd, 0);
3897 if (error) {
3898 VOP_UNLOCK(fromnd.ni_dvp);
3899 VFS_RENAMELOCK_EXIT(fs);
3900 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3901 vrele(fromnd.ni_dvp);
3902 goto out1;
3903 }
3904 VOP_UNLOCK(fromnd.ni_vp);
3905 if (fromnd.ni_dvp != fromnd.ni_vp)
3906 VOP_UNLOCK(fromnd.ni_dvp);
3907 fvp = fromnd.ni_vp;
3908
3909 NDINIT(&tond, RENAME,
3910 LOCKPARENT | LOCKLEAF | NOCACHE | TRYEMULROOT
3911 | INRENAME | (fvp->v_type == VDIR ? CREATEDIR : 0),
3912 topb);
3913 if ((error = namei(&tond)) != 0) {
3914 VFS_RENAMELOCK_EXIT(fs);
3915 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3916 vrele(fromnd.ni_dvp);
3917 vrele(fvp);
3918 goto out1;
3919 }
3920 tdvp = tond.ni_dvp;
3921 tvp = tond.ni_vp;
3922
3923 if (tvp != NULL) {
3924 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3925 error = ENOTDIR;
3926 goto out;
3927 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3928 error = EISDIR;
3929 goto out;
3930 }
3931 }
3932
3933 if (fvp == tdvp)
3934 error = EINVAL;
3935
3936 /*
3937 * Source and destination refer to the same object.
3938 */
3939 if (fvp == tvp) {
3940 if (retain)
3941 error = -1;
3942 else if (fromnd.ni_dvp == tdvp &&
3943 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3944 !memcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
3945 fromnd.ni_cnd.cn_namelen))
3946 error = -1;
3947 }
3948 /*
3949 * Prevent cross-mount operation.
3950 */
3951 if (error == 0) {
3952 if (tond.ni_dvp->v_mount != fromnd.ni_dvp->v_mount) {
3953 error = EXDEV;
3954 }
3955 }
3956 #if NVERIEXEC > 0
3957 if (!error) {
3958 char *f1, *f2;
3959 size_t f1_len;
3960 size_t f2_len;
3961
3962 f1_len = fromnd.ni_cnd.cn_namelen + 1;
3963 f1 = kmem_alloc(f1_len, KM_SLEEP);
3964 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, f1_len);
3965
3966 f2_len = tond.ni_cnd.cn_namelen + 1;
3967 f2 = kmem_alloc(f2_len, KM_SLEEP);
3968 strlcpy(f2, tond.ni_cnd.cn_nameptr, f2_len);
3969
3970 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2);
3971
3972 kmem_free(f1, f1_len);
3973 kmem_free(f2, f2_len);
3974 }
3975 #endif /* NVERIEXEC > 0 */
3976
3977 out:
3978 if (!error) {
3979 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
3980 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
3981 VFS_RENAMELOCK_EXIT(fs);
3982 } else {
3983 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
3984 if (tdvp == tvp)
3985 vrele(tdvp);
3986 else
3987 vput(tdvp);
3988 if (tvp)
3989 vput(tvp);
3990 VFS_RENAMELOCK_EXIT(fs);
3991 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3992 vrele(fromnd.ni_dvp);
3993 vrele(fvp);
3994 }
3995 out1:
3996 pathbuf_destroy(frompb);
3997 pathbuf_destroy(topb);
3998 return (error == -1 ? 0 : error);
3999 }
4000
4001 /*
4002 * Make a directory file.
4003 */
4004 /* ARGSUSED */
4005 int
4006 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
4007 {
4008 /* {
4009 syscallarg(const char *) path;
4010 syscallarg(int) mode;
4011 } */
4012
4013 return do_sys_mkdir(SCARG(uap, path), SCARG(uap, mode), UIO_USERSPACE);
4014 }
4015
4016 int
4017 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap,
4018 register_t *retval)
4019 {
4020 /* {
4021 syscallarg(int) fd;
4022 syscallarg(const char *) path;
4023 syscallarg(int) mode;
4024 } */
4025
4026 return ENOSYS;
4027 }
4028
4029
4030 int
4031 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg)
4032 {
4033 struct proc *p = curlwp->l_proc;
4034 struct vnode *vp;
4035 struct vattr vattr;
4036 int error;
4037 struct pathbuf *pb;
4038 struct nameidata nd;
4039
4040 /* XXX bollocks, should pass in a pathbuf */
4041 error = pathbuf_maybe_copyin(path, seg, &pb);
4042 if (error) {
4043 return error;
4044 }
4045
4046 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb);
4047 if ((error = namei(&nd)) != 0) {
4048 pathbuf_destroy(pb);
4049 return (error);
4050 }
4051 vp = nd.ni_vp;
4052 if (vp != NULL) {
4053 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
4054 if (nd.ni_dvp == vp)
4055 vrele(nd.ni_dvp);
4056 else
4057 vput(nd.ni_dvp);
4058 vrele(vp);
4059 pathbuf_destroy(pb);
4060 return (EEXIST);
4061 }
4062 vattr_null(&vattr);
4063 vattr.va_type = VDIR;
4064 /* We will read cwdi->cwdi_cmask unlocked. */
4065 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
4066 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
4067 if (!error)
4068 vput(nd.ni_vp);
4069 pathbuf_destroy(pb);
4070 return (error);
4071 }
4072
4073 /*
4074 * Remove a directory file.
4075 */
4076 /* ARGSUSED */
4077 int
4078 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
4079 {
4080 /* {
4081 syscallarg(const char *) path;
4082 } */
4083 struct vnode *vp;
4084 int error;
4085 struct pathbuf *pb;
4086 struct nameidata nd;
4087
4088 error = pathbuf_copyin(SCARG(uap, path), &pb);
4089 if (error) {
4090 return error;
4091 }
4092 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb);
4093 if ((error = namei(&nd)) != 0) {
4094 pathbuf_destroy(pb);
4095 return error;
4096 }
4097 vp = nd.ni_vp;
4098 if (vp->v_type != VDIR) {
4099 error = ENOTDIR;
4100 goto out;
4101 }
4102 /*
4103 * No rmdir "." please.
4104 */
4105 if (nd.ni_dvp == vp) {
4106 error = EINVAL;
4107 goto out;
4108 }
4109 /*
4110 * The root of a mounted filesystem cannot be deleted.
4111 */
4112 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) {
4113 error = EBUSY;
4114 goto out;
4115 }
4116 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
4117 pathbuf_destroy(pb);
4118 return (error);
4119
4120 out:
4121 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
4122 if (nd.ni_dvp == vp)
4123 vrele(nd.ni_dvp);
4124 else
4125 vput(nd.ni_dvp);
4126 vput(vp);
4127 pathbuf_destroy(pb);
4128 return (error);
4129 }
4130
4131 /*
4132 * Read a block of directory entries in a file system independent format.
4133 */
4134 int
4135 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
4136 {
4137 /* {
4138 syscallarg(int) fd;
4139 syscallarg(char *) buf;
4140 syscallarg(size_t) count;
4141 } */
4142 file_t *fp;
4143 int error, done;
4144
4145 /* fd_getvnode() will use the descriptor for us */
4146 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4147 return (error);
4148 if ((fp->f_flag & FREAD) == 0) {
4149 error = EBADF;
4150 goto out;
4151 }
4152 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
4153 SCARG(uap, count), &done, l, 0, 0);
4154 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
4155 *retval = done;
4156 out:
4157 fd_putfile(SCARG(uap, fd));
4158 return (error);
4159 }
4160
4161 /*
4162 * Set the mode mask for creation of filesystem nodes.
4163 */
4164 int
4165 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
4166 {
4167 /* {
4168 syscallarg(mode_t) newmask;
4169 } */
4170 struct proc *p = l->l_proc;
4171 struct cwdinfo *cwdi;
4172
4173 /*
4174 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
4175 * important is that we serialize changes to the mask. The
4176 * rw_exit() will issue a write memory barrier on our behalf,
4177 * and force the changes out to other CPUs (as it must use an
4178 * atomic operation, draining the local CPU's store buffers).
4179 */
4180 cwdi = p->p_cwdi;
4181 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
4182 *retval = cwdi->cwdi_cmask;
4183 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
4184 rw_exit(&cwdi->cwdi_lock);
4185
4186 return (0);
4187 }
4188
4189 int
4190 dorevoke(struct vnode *vp, kauth_cred_t cred)
4191 {
4192 struct vattr vattr;
4193 int error;
4194
4195 vn_lock(vp, LK_SHARED | LK_RETRY);
4196 error = VOP_GETATTR(vp, &vattr, cred);
4197 VOP_UNLOCK(vp);
4198 if (error != 0)
4199 return error;
4200 if (kauth_cred_geteuid(cred) == vattr.va_uid ||
4201 (error = kauth_authorize_generic(cred,
4202 KAUTH_GENERIC_ISSUSER, NULL)) == 0)
4203 VOP_REVOKE(vp, REVOKEALL);
4204 return (error);
4205 }
4206
4207 /*
4208 * Void all references to file by ripping underlying filesystem
4209 * away from vnode.
4210 */
4211 /* ARGSUSED */
4212 int
4213 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
4214 {
4215 /* {
4216 syscallarg(const char *) path;
4217 } */
4218 struct vnode *vp;
4219 int error;
4220
4221 error = namei_simple_user(SCARG(uap, path),
4222 NSM_FOLLOW_TRYEMULROOT, &vp);
4223 if (error != 0)
4224 return (error);
4225 error = dorevoke(vp, l->l_cred);
4226 vrele(vp);
4227 return (error);
4228 }
4229