vfs_syscalls.c revision 1.453 1 /* $NetBSD: vfs_syscalls.c,v 1.453 2012/04/30 03:51:10 manu Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
66 */
67
68 /*
69 * Virtual File System System Calls
70 */
71
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.453 2012/04/30 03:51:10 manu Exp $");
74
75 #ifdef _KERNEL_OPT
76 #include "opt_fileassoc.h"
77 #include "veriexec.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/namei.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/file.h>
86 #include <sys/fcntl.h>
87 #include <sys/stat.h>
88 #include <sys/vnode.h>
89 #include <sys/mount.h>
90 #include <sys/proc.h>
91 #include <sys/uio.h>
92 #include <sys/kmem.h>
93 #include <sys/dirent.h>
94 #include <sys/sysctl.h>
95 #include <sys/syscallargs.h>
96 #include <sys/vfs_syscalls.h>
97 #include <sys/quota.h>
98 #include <sys/quotactl.h>
99 #include <sys/ktrace.h>
100 #ifdef FILEASSOC
101 #include <sys/fileassoc.h>
102 #endif /* FILEASSOC */
103 #include <sys/extattr.h>
104 #include <sys/verified_exec.h>
105 #include <sys/kauth.h>
106 #include <sys/atomic.h>
107 #include <sys/module.h>
108 #include <sys/buf.h>
109
110 #include <miscfs/genfs/genfs.h>
111 #include <miscfs/syncfs/syncfs.h>
112 #include <miscfs/specfs/specdev.h>
113
114 #include <nfs/rpcv2.h>
115 #include <nfs/nfsproto.h>
116 #include <nfs/nfs.h>
117 #include <nfs/nfs_var.h>
118
119 static int change_flags(struct vnode *, u_long, struct lwp *);
120 static int change_mode(struct vnode *, int, struct lwp *l);
121 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
122 static int do_open(lwp_t *, struct pathbuf *, int, int, int *);
123
124 /*
125 * This table is used to maintain compatibility with 4.3BSD
126 * and NetBSD 0.9 mount syscalls - and possibly other systems.
127 * Note, the order is important!
128 *
129 * Do not modify this table. It should only contain filesystems
130 * supported by NetBSD 0.9 and 4.3BSD.
131 */
132 const char * const mountcompatnames[] = {
133 NULL, /* 0 = MOUNT_NONE */
134 MOUNT_FFS, /* 1 = MOUNT_UFS */
135 MOUNT_NFS, /* 2 */
136 MOUNT_MFS, /* 3 */
137 MOUNT_MSDOS, /* 4 */
138 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
139 MOUNT_FDESC, /* 6 */
140 MOUNT_KERNFS, /* 7 */
141 NULL, /* 8 = MOUNT_DEVFS */
142 MOUNT_AFS, /* 9 */
143 };
144
145 const int nmountcompatnames = __arraycount(mountcompatnames);
146
147 static int
148 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags)
149 {
150 int error;
151
152 fp->f_flag = flags & FMASK;
153 fp->f_type = DTYPE_VNODE;
154 fp->f_ops = &vnops;
155 fp->f_data = vp;
156
157 if (flags & (O_EXLOCK | O_SHLOCK)) {
158 struct flock lf;
159 int type;
160
161 lf.l_whence = SEEK_SET;
162 lf.l_start = 0;
163 lf.l_len = 0;
164 if (flags & O_EXLOCK)
165 lf.l_type = F_WRLCK;
166 else
167 lf.l_type = F_RDLCK;
168 type = F_FLOCK;
169 if ((flags & FNONBLOCK) == 0)
170 type |= F_WAIT;
171 VOP_UNLOCK(vp);
172 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
173 if (error) {
174 (void) vn_close(vp, fp->f_flag, fp->f_cred);
175 fd_abort(l->l_proc, fp, indx);
176 return error;
177 }
178 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
179 atomic_or_uint(&fp->f_flag, FHASLOCK);
180 }
181 if (flags & O_CLOEXEC)
182 fd_set_exclose(l, indx, true);
183 return 0;
184 }
185
186 static int
187 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
188 void *data, size_t *data_len)
189 {
190 struct mount *mp;
191 int error = 0, saved_flags;
192
193 mp = vp->v_mount;
194 saved_flags = mp->mnt_flag;
195
196 /* We can operate only on VV_ROOT nodes. */
197 if ((vp->v_vflag & VV_ROOT) == 0) {
198 error = EINVAL;
199 goto out;
200 }
201
202 /*
203 * We only allow the filesystem to be reloaded if it
204 * is currently mounted read-only. Additionally, we
205 * prevent read-write to read-only downgrades.
206 */
207 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
208 (mp->mnt_flag & MNT_RDONLY) == 0 &&
209 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) {
210 error = EOPNOTSUPP; /* Needs translation */
211 goto out;
212 }
213
214 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
215 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
216 if (error)
217 goto out;
218
219 if (vfs_busy(mp, NULL)) {
220 error = EPERM;
221 goto out;
222 }
223
224 mutex_enter(&mp->mnt_updating);
225
226 mp->mnt_flag &= ~MNT_OP_FLAGS;
227 mp->mnt_flag |= flags & MNT_OP_FLAGS;
228
229 /*
230 * Set the mount level flags.
231 */
232 if (flags & MNT_RDONLY)
233 mp->mnt_flag |= MNT_RDONLY;
234 else if (mp->mnt_flag & MNT_RDONLY)
235 mp->mnt_iflag |= IMNT_WANTRDWR;
236 mp->mnt_flag &= ~MNT_BASIC_FLAGS;
237 mp->mnt_flag |= flags & MNT_BASIC_FLAGS;
238 error = VFS_MOUNT(mp, path, data, data_len);
239
240 if (error && data != NULL) {
241 int error2;
242
243 /*
244 * Update failed; let's try and see if it was an
245 * export request. For compat with 3.0 and earlier.
246 */
247 error2 = vfs_hooks_reexport(mp, path, data);
248
249 /*
250 * Only update error code if the export request was
251 * understood but some problem occurred while
252 * processing it.
253 */
254 if (error2 != EJUSTRETURN)
255 error = error2;
256 }
257
258 if (mp->mnt_iflag & IMNT_WANTRDWR)
259 mp->mnt_flag &= ~MNT_RDONLY;
260 if (error)
261 mp->mnt_flag = saved_flags;
262 mp->mnt_flag &= ~MNT_OP_FLAGS;
263 mp->mnt_iflag &= ~IMNT_WANTRDWR;
264 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
265 if (mp->mnt_syncer == NULL)
266 error = vfs_allocate_syncvnode(mp);
267 } else {
268 if (mp->mnt_syncer != NULL)
269 vfs_deallocate_syncvnode(mp);
270 }
271 mutex_exit(&mp->mnt_updating);
272 vfs_unbusy(mp, false, NULL);
273
274 if ((error == 0) && !(saved_flags & MNT_EXTATTR) &&
275 (flags & MNT_EXTATTR)) {
276 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START,
277 NULL, 0, NULL) != 0) {
278 printf("%s: failed to start extattr, error = %d",
279 mp->mnt_stat.f_mntonname, error);
280 mp->mnt_flag &= ~MNT_EXTATTR;
281 }
282 }
283
284 if ((error == 0) && (saved_flags & MNT_EXTATTR) &&
285 !(flags & MNT_EXTATTR)) {
286 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP,
287 NULL, 0, NULL) != 0) {
288 printf("%s: failed to stop extattr, error = %d",
289 mp->mnt_stat.f_mntonname, error);
290 mp->mnt_flag |= MNT_RDONLY;
291 }
292 }
293 out:
294 return (error);
295 }
296
297 static int
298 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
299 {
300 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
301 int error;
302
303 /* Copy file-system type from userspace. */
304 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
305 if (error) {
306 /*
307 * Historically, filesystem types were identified by numbers.
308 * If we get an integer for the filesystem type instead of a
309 * string, we check to see if it matches one of the historic
310 * filesystem types.
311 */
312 u_long fsindex = (u_long)fstype;
313 if (fsindex >= nmountcompatnames ||
314 mountcompatnames[fsindex] == NULL)
315 return ENODEV;
316 strlcpy(fstypename, mountcompatnames[fsindex],
317 sizeof(fstypename));
318 }
319
320 /* Accept `ufs' as an alias for `ffs', for compatibility. */
321 if (strcmp(fstypename, "ufs") == 0)
322 fstypename[0] = 'f';
323
324 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
325 return 0;
326
327 /* If we can autoload a vfs module, try again */
328 (void)module_autoload(fstypename, MODULE_CLASS_VFS);
329
330 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
331 return 0;
332
333 return ENODEV;
334 }
335
336 static int
337 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
338 void *data, size_t *data_len)
339 {
340 struct mount *mp;
341 int error;
342
343 /* If MNT_GETARGS is specified, it should be the only flag. */
344 if (flags & ~MNT_GETARGS)
345 return EINVAL;
346
347 mp = vp->v_mount;
348
349 /* XXX: probably some notion of "can see" here if we want isolation. */
350 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
351 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
352 if (error)
353 return error;
354
355 if ((vp->v_vflag & VV_ROOT) == 0)
356 return EINVAL;
357
358 if (vfs_busy(mp, NULL))
359 return EPERM;
360
361 mutex_enter(&mp->mnt_updating);
362 mp->mnt_flag &= ~MNT_OP_FLAGS;
363 mp->mnt_flag |= MNT_GETARGS;
364 error = VFS_MOUNT(mp, path, data, data_len);
365 mp->mnt_flag &= ~MNT_OP_FLAGS;
366 mutex_exit(&mp->mnt_updating);
367
368 vfs_unbusy(mp, false, NULL);
369 return (error);
370 }
371
372 int
373 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
374 {
375 /* {
376 syscallarg(const char *) type;
377 syscallarg(const char *) path;
378 syscallarg(int) flags;
379 syscallarg(void *) data;
380 syscallarg(size_t) data_len;
381 } */
382
383 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
384 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
385 SCARG(uap, data_len), retval);
386 }
387
388 int
389 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
390 const char *path, int flags, void *data, enum uio_seg data_seg,
391 size_t data_len, register_t *retval)
392 {
393 struct vnode *vp;
394 void *data_buf = data;
395 bool vfsopsrele = false;
396 int error;
397
398 /* XXX: The calling convention of this routine is totally bizarre */
399 if (vfsops)
400 vfsopsrele = true;
401
402 /*
403 * Get vnode to be covered
404 */
405 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
406 if (error != 0) {
407 vp = NULL;
408 goto done;
409 }
410
411 if (vfsops == NULL) {
412 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
413 vfsops = vp->v_mount->mnt_op;
414 } else {
415 /* 'type' is userspace */
416 error = mount_get_vfsops(type, &vfsops);
417 if (error != 0)
418 goto done;
419 vfsopsrele = true;
420 }
421 }
422
423 if (data != NULL && data_seg == UIO_USERSPACE) {
424 if (data_len == 0) {
425 /* No length supplied, use default for filesystem */
426 data_len = vfsops->vfs_min_mount_data;
427 if (data_len > VFS_MAX_MOUNT_DATA) {
428 error = EINVAL;
429 goto done;
430 }
431 /*
432 * Hopefully a longer buffer won't make copyin() fail.
433 * For compatibility with 3.0 and earlier.
434 */
435 if (flags & MNT_UPDATE
436 && data_len < sizeof (struct mnt_export_args30))
437 data_len = sizeof (struct mnt_export_args30);
438 }
439 data_buf = kmem_alloc(data_len, KM_SLEEP);
440
441 /* NFS needs the buffer even for mnt_getargs .... */
442 error = copyin(data, data_buf, data_len);
443 if (error != 0)
444 goto done;
445 }
446
447 if (flags & MNT_GETARGS) {
448 if (data_len == 0) {
449 error = EINVAL;
450 goto done;
451 }
452 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
453 if (error != 0)
454 goto done;
455 if (data_seg == UIO_USERSPACE)
456 error = copyout(data_buf, data, data_len);
457 *retval = data_len;
458 } else if (flags & MNT_UPDATE) {
459 error = mount_update(l, vp, path, flags, data_buf, &data_len);
460 } else {
461 /* Locking is handled internally in mount_domount(). */
462 KASSERT(vfsopsrele == true);
463 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
464 &data_len);
465 vfsopsrele = false;
466
467 if ((error == 0) && (flags & MNT_EXTATTR)) {
468 if ((error = namei_simple_user(path,
469 NSM_FOLLOW_TRYEMULROOT, &vp)) != 0)
470 goto done;
471
472 if (vp->v_mountedhere == NULL)
473 goto done;
474
475 if (VFS_EXTATTRCTL(vp->v_mountedhere,
476 EXTATTR_CMD_START,
477 NULL, 0, NULL) != 0)
478 printf("%s: failed to start extattr",
479 vp->v_mountedhere->mnt_stat.f_mntonname);
480 /* XXX remove flag */
481 }
482 }
483
484 done:
485 if (vfsopsrele)
486 vfs_delref(vfsops);
487 if (vp != NULL) {
488 vrele(vp);
489 }
490 if (data_buf != data)
491 kmem_free(data_buf, data_len);
492 return (error);
493 }
494
495 /*
496 * Unmount a file system.
497 *
498 * Note: unmount takes a path to the vnode mounted on as argument,
499 * not special file (as before).
500 */
501 /* ARGSUSED */
502 int
503 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
504 {
505 /* {
506 syscallarg(const char *) path;
507 syscallarg(int) flags;
508 } */
509 struct vnode *vp;
510 struct mount *mp;
511 int error;
512 struct pathbuf *pb;
513 struct nameidata nd;
514
515 error = pathbuf_copyin(SCARG(uap, path), &pb);
516 if (error) {
517 return error;
518 }
519
520 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
521 if ((error = namei(&nd)) != 0) {
522 pathbuf_destroy(pb);
523 return error;
524 }
525 vp = nd.ni_vp;
526 pathbuf_destroy(pb);
527
528 mp = vp->v_mount;
529 atomic_inc_uint(&mp->mnt_refcnt);
530 VOP_UNLOCK(vp);
531
532 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
533 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
534 if (error) {
535 vrele(vp);
536 vfs_destroy(mp);
537 return (error);
538 }
539
540 /*
541 * Don't allow unmounting the root file system.
542 */
543 if (mp->mnt_flag & MNT_ROOTFS) {
544 vrele(vp);
545 vfs_destroy(mp);
546 return (EINVAL);
547 }
548
549 /*
550 * Must be the root of the filesystem
551 */
552 if ((vp->v_vflag & VV_ROOT) == 0) {
553 vrele(vp);
554 vfs_destroy(mp);
555 return (EINVAL);
556 }
557
558 vrele(vp);
559 error = dounmount(mp, SCARG(uap, flags), l);
560 vfs_destroy(mp);
561 return error;
562 }
563
564 /*
565 * Sync each mounted filesystem.
566 */
567 #ifdef DEBUG
568 int syncprt = 0;
569 struct ctldebug debug0 = { "syncprt", &syncprt };
570 #endif
571
572 void
573 do_sys_sync(struct lwp *l)
574 {
575 struct mount *mp, *nmp;
576 int asyncflag;
577
578 mutex_enter(&mountlist_lock);
579 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
580 mp = nmp) {
581 if (vfs_busy(mp, &nmp)) {
582 continue;
583 }
584 mutex_enter(&mp->mnt_updating);
585 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
586 asyncflag = mp->mnt_flag & MNT_ASYNC;
587 mp->mnt_flag &= ~MNT_ASYNC;
588 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
589 if (asyncflag)
590 mp->mnt_flag |= MNT_ASYNC;
591 }
592 mutex_exit(&mp->mnt_updating);
593 vfs_unbusy(mp, false, &nmp);
594 }
595 mutex_exit(&mountlist_lock);
596 #ifdef DEBUG
597 if (syncprt)
598 vfs_bufstats();
599 #endif /* DEBUG */
600 }
601
602 /* ARGSUSED */
603 int
604 sys_sync(struct lwp *l, const void *v, register_t *retval)
605 {
606 do_sys_sync(l);
607 return (0);
608 }
609
610
611 /*
612 * Access or change filesystem quotas.
613 *
614 * (this is really 14 different calls bundled into one)
615 */
616
617 static int
618 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u)
619 {
620 struct quotastat info_k;
621 int error;
622
623 /* ensure any padding bytes are cleared */
624 memset(&info_k, 0, sizeof(info_k));
625
626 error = vfs_quotactl_stat(mp, &info_k);
627 if (error) {
628 return error;
629 }
630
631 return copyout(&info_k, info_u, sizeof(info_k));
632 }
633
634 static int
635 do_sys_quotactl_idtypestat(struct mount *mp, int idtype,
636 struct quotaidtypestat *info_u)
637 {
638 struct quotaidtypestat info_k;
639 int error;
640
641 /* ensure any padding bytes are cleared */
642 memset(&info_k, 0, sizeof(info_k));
643
644 error = vfs_quotactl_idtypestat(mp, idtype, &info_k);
645 if (error) {
646 return error;
647 }
648
649 return copyout(&info_k, info_u, sizeof(info_k));
650 }
651
652 static int
653 do_sys_quotactl_objtypestat(struct mount *mp, int objtype,
654 struct quotaobjtypestat *info_u)
655 {
656 struct quotaobjtypestat info_k;
657 int error;
658
659 /* ensure any padding bytes are cleared */
660 memset(&info_k, 0, sizeof(info_k));
661
662 error = vfs_quotactl_objtypestat(mp, objtype, &info_k);
663 if (error) {
664 return error;
665 }
666
667 return copyout(&info_k, info_u, sizeof(info_k));
668 }
669
670 static int
671 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u,
672 struct quotaval *val_u)
673 {
674 struct quotakey key_k;
675 struct quotaval val_k;
676 int error;
677
678 /* ensure any padding bytes are cleared */
679 memset(&val_k, 0, sizeof(val_k));
680
681 error = copyin(key_u, &key_k, sizeof(key_k));
682 if (error) {
683 return error;
684 }
685
686 error = vfs_quotactl_get(mp, &key_k, &val_k);
687 if (error) {
688 return error;
689 }
690
691 return copyout(&val_k, val_u, sizeof(val_k));
692 }
693
694 static int
695 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u,
696 const struct quotaval *val_u)
697 {
698 struct quotakey key_k;
699 struct quotaval val_k;
700 int error;
701
702 error = copyin(key_u, &key_k, sizeof(key_k));
703 if (error) {
704 return error;
705 }
706
707 error = copyin(val_u, &val_k, sizeof(val_k));
708 if (error) {
709 return error;
710 }
711
712 return vfs_quotactl_put(mp, &key_k, &val_k);
713 }
714
715 static int
716 do_sys_quotactl_delete(struct mount *mp, const struct quotakey *key_u)
717 {
718 struct quotakey key_k;
719 int error;
720
721 error = copyin(key_u, &key_k, sizeof(key_k));
722 if (error) {
723 return error;
724 }
725
726 return vfs_quotactl_delete(mp, &key_k);
727 }
728
729 static int
730 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u)
731 {
732 struct quotakcursor cursor_k;
733 int error;
734
735 /* ensure any padding bytes are cleared */
736 memset(&cursor_k, 0, sizeof(cursor_k));
737
738 error = vfs_quotactl_cursoropen(mp, &cursor_k);
739 if (error) {
740 return error;
741 }
742
743 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
744 }
745
746 static int
747 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u)
748 {
749 struct quotakcursor cursor_k;
750 int error;
751
752 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
753 if (error) {
754 return error;
755 }
756
757 return vfs_quotactl_cursorclose(mp, &cursor_k);
758 }
759
760 static int
761 do_sys_quotactl_cursorskipidtype(struct mount *mp,
762 struct quotakcursor *cursor_u, int idtype)
763 {
764 struct quotakcursor cursor_k;
765 int error;
766
767 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
768 if (error) {
769 return error;
770 }
771
772 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype);
773 if (error) {
774 return error;
775 }
776
777 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
778 }
779
780 static int
781 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u,
782 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum,
783 unsigned *ret_u)
784 {
785 #define CGET_STACK_MAX 8
786 struct quotakcursor cursor_k;
787 struct quotakey stackkeys[CGET_STACK_MAX];
788 struct quotaval stackvals[CGET_STACK_MAX];
789 struct quotakey *keys_k;
790 struct quotaval *vals_k;
791 unsigned ret_k;
792 int error;
793
794 if (maxnum > 128) {
795 maxnum = 128;
796 }
797
798 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
799 if (error) {
800 return error;
801 }
802
803 if (maxnum <= CGET_STACK_MAX) {
804 keys_k = stackkeys;
805 vals_k = stackvals;
806 /* ensure any padding bytes are cleared */
807 memset(keys_k, 0, maxnum * sizeof(keys_k[0]));
808 memset(vals_k, 0, maxnum * sizeof(vals_k[0]));
809 } else {
810 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP);
811 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP);
812 }
813
814 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum,
815 &ret_k);
816 if (error) {
817 goto fail;
818 }
819
820 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0]));
821 if (error) {
822 goto fail;
823 }
824
825 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0]));
826 if (error) {
827 goto fail;
828 }
829
830 error = copyout(&ret_k, ret_u, sizeof(ret_k));
831 if (error) {
832 goto fail;
833 }
834
835 /* do last to maximize the chance of being able to recover a failure */
836 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k));
837
838 fail:
839 if (keys_k != stackkeys) {
840 kmem_free(keys_k, maxnum * sizeof(keys_k[0]));
841 }
842 if (vals_k != stackvals) {
843 kmem_free(vals_k, maxnum * sizeof(vals_k[0]));
844 }
845 return error;
846 }
847
848 static int
849 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u,
850 int *ret_u)
851 {
852 struct quotakcursor cursor_k;
853 int ret_k;
854 int error;
855
856 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
857 if (error) {
858 return error;
859 }
860
861 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k);
862 if (error) {
863 return error;
864 }
865
866 error = copyout(&ret_k, ret_u, sizeof(ret_k));
867 if (error) {
868 return error;
869 }
870
871 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
872 }
873
874 static int
875 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u)
876 {
877 struct quotakcursor cursor_k;
878 int error;
879
880 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
881 if (error) {
882 return error;
883 }
884
885 error = vfs_quotactl_cursorrewind(mp, &cursor_k);
886 if (error) {
887 return error;
888 }
889
890 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
891 }
892
893 static int
894 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u)
895 {
896 char *path_k;
897 int error;
898
899 /* XXX this should probably be a struct pathbuf */
900 path_k = PNBUF_GET();
901 error = copyin(path_u, path_k, PATH_MAX);
902 if (error) {
903 PNBUF_PUT(path_k);
904 return error;
905 }
906
907 error = vfs_quotactl_quotaon(mp, idtype, path_k);
908
909 PNBUF_PUT(path_k);
910 return error;
911 }
912
913 static int
914 do_sys_quotactl_quotaoff(struct mount *mp, int idtype)
915 {
916 return vfs_quotactl_quotaoff(mp, idtype);
917 }
918
919 int
920 do_sys_quotactl(const char *path_u, const struct quotactl_args *args)
921 {
922 struct mount *mp;
923 struct vnode *vp;
924 int error;
925
926 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp);
927 if (error != 0)
928 return (error);
929 mp = vp->v_mount;
930
931 switch (args->qc_op) {
932 case QUOTACTL_STAT:
933 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info);
934 break;
935 case QUOTACTL_IDTYPESTAT:
936 error = do_sys_quotactl_idtypestat(mp,
937 args->u.idtypestat.qc_idtype,
938 args->u.idtypestat.qc_info);
939 break;
940 case QUOTACTL_OBJTYPESTAT:
941 error = do_sys_quotactl_objtypestat(mp,
942 args->u.objtypestat.qc_objtype,
943 args->u.objtypestat.qc_info);
944 break;
945 case QUOTACTL_GET:
946 error = do_sys_quotactl_get(mp,
947 args->u.get.qc_key,
948 args->u.get.qc_val);
949 break;
950 case QUOTACTL_PUT:
951 error = do_sys_quotactl_put(mp,
952 args->u.put.qc_key,
953 args->u.put.qc_val);
954 break;
955 case QUOTACTL_DELETE:
956 error = do_sys_quotactl_delete(mp, args->u.delete.qc_key);
957 break;
958 case QUOTACTL_CURSOROPEN:
959 error = do_sys_quotactl_cursoropen(mp,
960 args->u.cursoropen.qc_cursor);
961 break;
962 case QUOTACTL_CURSORCLOSE:
963 error = do_sys_quotactl_cursorclose(mp,
964 args->u.cursorclose.qc_cursor);
965 break;
966 case QUOTACTL_CURSORSKIPIDTYPE:
967 error = do_sys_quotactl_cursorskipidtype(mp,
968 args->u.cursorskipidtype.qc_cursor,
969 args->u.cursorskipidtype.qc_idtype);
970 break;
971 case QUOTACTL_CURSORGET:
972 error = do_sys_quotactl_cursorget(mp,
973 args->u.cursorget.qc_cursor,
974 args->u.cursorget.qc_keys,
975 args->u.cursorget.qc_vals,
976 args->u.cursorget.qc_maxnum,
977 args->u.cursorget.qc_ret);
978 break;
979 case QUOTACTL_CURSORATEND:
980 error = do_sys_quotactl_cursoratend(mp,
981 args->u.cursoratend.qc_cursor,
982 args->u.cursoratend.qc_ret);
983 break;
984 case QUOTACTL_CURSORREWIND:
985 error = do_sys_quotactl_cursorrewind(mp,
986 args->u.cursorrewind.qc_cursor);
987 break;
988 case QUOTACTL_QUOTAON:
989 error = do_sys_quotactl_quotaon(mp,
990 args->u.quotaon.qc_idtype,
991 args->u.quotaon.qc_quotafile);
992 break;
993 case QUOTACTL_QUOTAOFF:
994 error = do_sys_quotactl_quotaoff(mp,
995 args->u.quotaoff.qc_idtype);
996 break;
997 default:
998 error = EINVAL;
999 break;
1000 }
1001
1002 vrele(vp);
1003 return error;
1004 }
1005
1006 /* ARGSUSED */
1007 int
1008 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap,
1009 register_t *retval)
1010 {
1011 /* {
1012 syscallarg(const char *) path;
1013 syscallarg(struct quotactl_args *) args;
1014 } */
1015 struct quotactl_args args;
1016 int error;
1017
1018 error = copyin(SCARG(uap, args), &args, sizeof(args));
1019 if (error) {
1020 return error;
1021 }
1022
1023 return do_sys_quotactl(SCARG(uap, path), &args);
1024 }
1025
1026 int
1027 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
1028 int root)
1029 {
1030 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1031 int error = 0;
1032
1033 /*
1034 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1035 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
1036 * overrides MNT_NOWAIT.
1037 */
1038 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
1039 (flags != MNT_WAIT && flags != 0)) {
1040 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
1041 goto done;
1042 }
1043
1044 /* Get the filesystem stats now */
1045 memset(sp, 0, sizeof(*sp));
1046 if ((error = VFS_STATVFS(mp, sp)) != 0) {
1047 return error;
1048 }
1049
1050 if (cwdi->cwdi_rdir == NULL)
1051 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
1052 done:
1053 if (cwdi->cwdi_rdir != NULL) {
1054 size_t len;
1055 char *bp;
1056 char c;
1057 char *path = PNBUF_GET();
1058
1059 bp = path + MAXPATHLEN;
1060 *--bp = '\0';
1061 rw_enter(&cwdi->cwdi_lock, RW_READER);
1062 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
1063 MAXPATHLEN / 2, 0, l);
1064 rw_exit(&cwdi->cwdi_lock);
1065 if (error) {
1066 PNBUF_PUT(path);
1067 return error;
1068 }
1069 len = strlen(bp);
1070 if (len != 1) {
1071 /*
1072 * for mount points that are below our root, we can see
1073 * them, so we fix up the pathname and return them. The
1074 * rest we cannot see, so we don't allow viewing the
1075 * data.
1076 */
1077 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
1078 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
1079 (void)strlcpy(sp->f_mntonname,
1080 c == '\0' ? "/" : &sp->f_mntonname[len],
1081 sizeof(sp->f_mntonname));
1082 } else {
1083 if (root)
1084 (void)strlcpy(sp->f_mntonname, "/",
1085 sizeof(sp->f_mntonname));
1086 else
1087 error = EPERM;
1088 }
1089 }
1090 PNBUF_PUT(path);
1091 }
1092 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
1093 return error;
1094 }
1095
1096 /*
1097 * Get filesystem statistics by path.
1098 */
1099 int
1100 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
1101 {
1102 struct mount *mp;
1103 int error;
1104 struct vnode *vp;
1105
1106 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
1107 if (error != 0)
1108 return error;
1109 mp = vp->v_mount;
1110 error = dostatvfs(mp, sb, l, flags, 1);
1111 vrele(vp);
1112 return error;
1113 }
1114
1115 /* ARGSUSED */
1116 int
1117 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
1118 {
1119 /* {
1120 syscallarg(const char *) path;
1121 syscallarg(struct statvfs *) buf;
1122 syscallarg(int) flags;
1123 } */
1124 struct statvfs *sb;
1125 int error;
1126
1127 sb = STATVFSBUF_GET();
1128 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
1129 if (error == 0)
1130 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1131 STATVFSBUF_PUT(sb);
1132 return error;
1133 }
1134
1135 /*
1136 * Get filesystem statistics by fd.
1137 */
1138 int
1139 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
1140 {
1141 file_t *fp;
1142 struct mount *mp;
1143 int error;
1144
1145 /* fd_getvnode() will use the descriptor for us */
1146 if ((error = fd_getvnode(fd, &fp)) != 0)
1147 return (error);
1148 mp = ((struct vnode *)fp->f_data)->v_mount;
1149 error = dostatvfs(mp, sb, curlwp, flags, 1);
1150 fd_putfile(fd);
1151 return error;
1152 }
1153
1154 /* ARGSUSED */
1155 int
1156 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
1157 {
1158 /* {
1159 syscallarg(int) fd;
1160 syscallarg(struct statvfs *) buf;
1161 syscallarg(int) flags;
1162 } */
1163 struct statvfs *sb;
1164 int error;
1165
1166 sb = STATVFSBUF_GET();
1167 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1168 if (error == 0)
1169 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1170 STATVFSBUF_PUT(sb);
1171 return error;
1172 }
1173
1174
1175 /*
1176 * Get statistics on all filesystems.
1177 */
1178 int
1179 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1180 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1181 register_t *retval)
1182 {
1183 int root = 0;
1184 struct proc *p = l->l_proc;
1185 struct mount *mp, *nmp;
1186 struct statvfs *sb;
1187 size_t count, maxcount;
1188 int error = 0;
1189
1190 sb = STATVFSBUF_GET();
1191 maxcount = bufsize / entry_sz;
1192 mutex_enter(&mountlist_lock);
1193 count = 0;
1194 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
1195 mp = nmp) {
1196 if (vfs_busy(mp, &nmp)) {
1197 continue;
1198 }
1199 if (sfsp && count < maxcount) {
1200 error = dostatvfs(mp, sb, l, flags, 0);
1201 if (error) {
1202 vfs_unbusy(mp, false, &nmp);
1203 error = 0;
1204 continue;
1205 }
1206 error = copyfn(sb, sfsp, entry_sz);
1207 if (error) {
1208 vfs_unbusy(mp, false, NULL);
1209 goto out;
1210 }
1211 sfsp = (char *)sfsp + entry_sz;
1212 root |= strcmp(sb->f_mntonname, "/") == 0;
1213 }
1214 count++;
1215 vfs_unbusy(mp, false, &nmp);
1216 }
1217 mutex_exit(&mountlist_lock);
1218
1219 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1220 /*
1221 * fake a root entry
1222 */
1223 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1224 sb, l, flags, 1);
1225 if (error != 0)
1226 goto out;
1227 if (sfsp) {
1228 error = copyfn(sb, sfsp, entry_sz);
1229 if (error != 0)
1230 goto out;
1231 }
1232 count++;
1233 }
1234 if (sfsp && count > maxcount)
1235 *retval = maxcount;
1236 else
1237 *retval = count;
1238 out:
1239 STATVFSBUF_PUT(sb);
1240 return error;
1241 }
1242
1243 int
1244 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1245 {
1246 /* {
1247 syscallarg(struct statvfs *) buf;
1248 syscallarg(size_t) bufsize;
1249 syscallarg(int) flags;
1250 } */
1251
1252 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1253 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1254 }
1255
1256 /*
1257 * Change current working directory to a given file descriptor.
1258 */
1259 /* ARGSUSED */
1260 int
1261 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1262 {
1263 /* {
1264 syscallarg(int) fd;
1265 } */
1266 struct proc *p = l->l_proc;
1267 struct cwdinfo *cwdi;
1268 struct vnode *vp, *tdp;
1269 struct mount *mp;
1270 file_t *fp;
1271 int error, fd;
1272
1273 /* fd_getvnode() will use the descriptor for us */
1274 fd = SCARG(uap, fd);
1275 if ((error = fd_getvnode(fd, &fp)) != 0)
1276 return (error);
1277 vp = fp->f_data;
1278
1279 vref(vp);
1280 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1281 if (vp->v_type != VDIR)
1282 error = ENOTDIR;
1283 else
1284 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1285 if (error) {
1286 vput(vp);
1287 goto out;
1288 }
1289 while ((mp = vp->v_mountedhere) != NULL) {
1290 error = vfs_busy(mp, NULL);
1291 vput(vp);
1292 if (error != 0)
1293 goto out;
1294 error = VFS_ROOT(mp, &tdp);
1295 vfs_unbusy(mp, false, NULL);
1296 if (error)
1297 goto out;
1298 vp = tdp;
1299 }
1300 VOP_UNLOCK(vp);
1301
1302 /*
1303 * Disallow changing to a directory not under the process's
1304 * current root directory (if there is one).
1305 */
1306 cwdi = p->p_cwdi;
1307 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1308 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1309 vrele(vp);
1310 error = EPERM; /* operation not permitted */
1311 } else {
1312 vrele(cwdi->cwdi_cdir);
1313 cwdi->cwdi_cdir = vp;
1314 }
1315 rw_exit(&cwdi->cwdi_lock);
1316
1317 out:
1318 fd_putfile(fd);
1319 return (error);
1320 }
1321
1322 /*
1323 * Change this process's notion of the root directory to a given file
1324 * descriptor.
1325 */
1326 int
1327 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1328 {
1329 struct proc *p = l->l_proc;
1330 struct vnode *vp;
1331 file_t *fp;
1332 int error, fd = SCARG(uap, fd);
1333
1334 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1335 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1336 return error;
1337 /* fd_getvnode() will use the descriptor for us */
1338 if ((error = fd_getvnode(fd, &fp)) != 0)
1339 return error;
1340 vp = fp->f_data;
1341 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1342 if (vp->v_type != VDIR)
1343 error = ENOTDIR;
1344 else
1345 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1346 VOP_UNLOCK(vp);
1347 if (error)
1348 goto out;
1349 vref(vp);
1350
1351 change_root(p->p_cwdi, vp, l);
1352
1353 out:
1354 fd_putfile(fd);
1355 return (error);
1356 }
1357
1358 /*
1359 * Change current working directory (``.'').
1360 */
1361 /* ARGSUSED */
1362 int
1363 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1364 {
1365 /* {
1366 syscallarg(const char *) path;
1367 } */
1368 struct proc *p = l->l_proc;
1369 struct cwdinfo *cwdi;
1370 int error;
1371 struct vnode *vp;
1372
1373 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1374 &vp, l)) != 0)
1375 return (error);
1376 cwdi = p->p_cwdi;
1377 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1378 vrele(cwdi->cwdi_cdir);
1379 cwdi->cwdi_cdir = vp;
1380 rw_exit(&cwdi->cwdi_lock);
1381 return (0);
1382 }
1383
1384 /*
1385 * Change notion of root (``/'') directory.
1386 */
1387 /* ARGSUSED */
1388 int
1389 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1390 {
1391 /* {
1392 syscallarg(const char *) path;
1393 } */
1394 struct proc *p = l->l_proc;
1395 int error;
1396 struct vnode *vp;
1397
1398 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1399 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1400 return (error);
1401 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1402 &vp, l)) != 0)
1403 return (error);
1404
1405 change_root(p->p_cwdi, vp, l);
1406
1407 return (0);
1408 }
1409
1410 /*
1411 * Common routine for chroot and fchroot.
1412 * NB: callers need to properly authorize the change root operation.
1413 */
1414 void
1415 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l)
1416 {
1417
1418 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1419 if (cwdi->cwdi_rdir != NULL)
1420 vrele(cwdi->cwdi_rdir);
1421 cwdi->cwdi_rdir = vp;
1422
1423 /*
1424 * Prevent escaping from chroot by putting the root under
1425 * the working directory. Silently chdir to / if we aren't
1426 * already there.
1427 */
1428 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1429 /*
1430 * XXX would be more failsafe to change directory to a
1431 * deadfs node here instead
1432 */
1433 vrele(cwdi->cwdi_cdir);
1434 vref(vp);
1435 cwdi->cwdi_cdir = vp;
1436 }
1437 rw_exit(&cwdi->cwdi_lock);
1438 }
1439
1440 /*
1441 * Common routine for chroot and chdir.
1442 * XXX "where" should be enum uio_seg
1443 */
1444 int
1445 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l)
1446 {
1447 struct pathbuf *pb;
1448 struct nameidata nd;
1449 int error;
1450
1451 error = pathbuf_maybe_copyin(path, where, &pb);
1452 if (error) {
1453 return error;
1454 }
1455 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1456 if ((error = namei(&nd)) != 0) {
1457 pathbuf_destroy(pb);
1458 return error;
1459 }
1460 *vpp = nd.ni_vp;
1461 pathbuf_destroy(pb);
1462
1463 if ((*vpp)->v_type != VDIR)
1464 error = ENOTDIR;
1465 else
1466 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred);
1467
1468 if (error)
1469 vput(*vpp);
1470 else
1471 VOP_UNLOCK(*vpp);
1472 return (error);
1473 }
1474
1475 /*
1476 * Internals of sys_open - path has already been converted into a pathbuf
1477 * (so we can easily reuse this function from other parts of the kernel,
1478 * like posix_spawn post-processing).
1479 */
1480 static int
1481 do_open(lwp_t *l, struct pathbuf *pb, int open_flags, int open_mode, int *fd)
1482 {
1483 struct proc *p = l->l_proc;
1484 struct cwdinfo *cwdi = p->p_cwdi;
1485 file_t *fp;
1486 struct vnode *vp;
1487 int flags, cmode;
1488 int indx, error;
1489 struct nameidata nd;
1490
1491 flags = FFLAGS(open_flags);
1492 if ((flags & (FREAD | FWRITE)) == 0)
1493 return EINVAL;
1494
1495 if ((error = fd_allocfile(&fp, &indx)) != 0) {
1496 pathbuf_destroy(pb);
1497 return error;
1498 }
1499 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1500 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1501 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb);
1502 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1503 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1504 fd_abort(p, fp, indx);
1505 if ((error == EDUPFD || error == EMOVEFD) &&
1506 l->l_dupfd >= 0 && /* XXX from fdopen */
1507 (error =
1508 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1509 *fd = indx;
1510 pathbuf_destroy(pb);
1511 return (0);
1512 }
1513 if (error == ERESTART)
1514 error = EINTR;
1515 pathbuf_destroy(pb);
1516 return error;
1517 }
1518
1519 l->l_dupfd = 0;
1520 vp = nd.ni_vp;
1521 pathbuf_destroy(pb);
1522
1523 if ((error = open_setfp(l, fp, vp, indx, flags)))
1524 return error;
1525
1526 VOP_UNLOCK(vp);
1527 *fd = indx;
1528 fd_affix(p, fp, indx);
1529 return 0;
1530 }
1531
1532 int
1533 fd_open(const char *path, int open_flags, int open_mode, int *fd)
1534 {
1535 struct pathbuf *pb;
1536 int oflags;
1537
1538 oflags = FFLAGS(open_flags);
1539 if ((oflags & (FREAD | FWRITE)) == 0)
1540 return EINVAL;
1541
1542 pb = pathbuf_create(path);
1543 if (pb == NULL)
1544 return ENOMEM;
1545
1546 return do_open(curlwp, pb, open_flags, open_mode, fd);
1547 }
1548
1549 /*
1550 * Check permissions, allocate an open file structure,
1551 * and call the device open routine if any.
1552 */
1553 int
1554 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1555 {
1556 /* {
1557 syscallarg(const char *) path;
1558 syscallarg(int) flags;
1559 syscallarg(int) mode;
1560 } */
1561 struct pathbuf *pb;
1562 int result, flags, error;
1563
1564 flags = FFLAGS(SCARG(uap, flags));
1565 if ((flags & (FREAD | FWRITE)) == 0)
1566 return (EINVAL);
1567
1568 error = pathbuf_copyin(SCARG(uap, path), &pb);
1569 if (error)
1570 return error;
1571
1572 error = do_open(l, pb, SCARG(uap, flags), SCARG(uap, mode), &result);
1573 if (error)
1574 return error;
1575
1576 *retval = result;
1577 return 0;
1578 }
1579
1580 int
1581 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval)
1582 {
1583 /* {
1584 syscallarg(int) fd;
1585 syscallarg(const char *) path;
1586 syscallarg(int) flags;
1587 syscallarg(int) mode;
1588 } */
1589
1590 return ENOSYS;
1591 }
1592
1593 static void
1594 vfs__fhfree(fhandle_t *fhp)
1595 {
1596 size_t fhsize;
1597
1598 if (fhp == NULL) {
1599 return;
1600 }
1601 fhsize = FHANDLE_SIZE(fhp);
1602 kmem_free(fhp, fhsize);
1603 }
1604
1605 /*
1606 * vfs_composefh: compose a filehandle.
1607 */
1608
1609 int
1610 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1611 {
1612 struct mount *mp;
1613 struct fid *fidp;
1614 int error;
1615 size_t needfhsize;
1616 size_t fidsize;
1617
1618 mp = vp->v_mount;
1619 fidp = NULL;
1620 if (*fh_size < FHANDLE_SIZE_MIN) {
1621 fidsize = 0;
1622 } else {
1623 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1624 if (fhp != NULL) {
1625 memset(fhp, 0, *fh_size);
1626 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1627 fidp = &fhp->fh_fid;
1628 }
1629 }
1630 error = VFS_VPTOFH(vp, fidp, &fidsize);
1631 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1632 if (error == 0 && *fh_size < needfhsize) {
1633 error = E2BIG;
1634 }
1635 *fh_size = needfhsize;
1636 return error;
1637 }
1638
1639 int
1640 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1641 {
1642 struct mount *mp;
1643 fhandle_t *fhp;
1644 size_t fhsize;
1645 size_t fidsize;
1646 int error;
1647
1648 *fhpp = NULL;
1649 mp = vp->v_mount;
1650 fidsize = 0;
1651 error = VFS_VPTOFH(vp, NULL, &fidsize);
1652 KASSERT(error != 0);
1653 if (error != E2BIG) {
1654 goto out;
1655 }
1656 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1657 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1658 if (fhp == NULL) {
1659 error = ENOMEM;
1660 goto out;
1661 }
1662 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1663 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1664 if (error == 0) {
1665 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1666 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1667 *fhpp = fhp;
1668 } else {
1669 kmem_free(fhp, fhsize);
1670 }
1671 out:
1672 return error;
1673 }
1674
1675 void
1676 vfs_composefh_free(fhandle_t *fhp)
1677 {
1678
1679 vfs__fhfree(fhp);
1680 }
1681
1682 /*
1683 * vfs_fhtovp: lookup a vnode by a filehandle.
1684 */
1685
1686 int
1687 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1688 {
1689 struct mount *mp;
1690 int error;
1691
1692 *vpp = NULL;
1693 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1694 if (mp == NULL) {
1695 error = ESTALE;
1696 goto out;
1697 }
1698 if (mp->mnt_op->vfs_fhtovp == NULL) {
1699 error = EOPNOTSUPP;
1700 goto out;
1701 }
1702 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1703 out:
1704 return error;
1705 }
1706
1707 /*
1708 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1709 * the needed size.
1710 */
1711
1712 int
1713 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1714 {
1715 fhandle_t *fhp;
1716 int error;
1717
1718 *fhpp = NULL;
1719 if (fhsize > FHANDLE_SIZE_MAX) {
1720 return EINVAL;
1721 }
1722 if (fhsize < FHANDLE_SIZE_MIN) {
1723 return EINVAL;
1724 }
1725 again:
1726 fhp = kmem_alloc(fhsize, KM_SLEEP);
1727 if (fhp == NULL) {
1728 return ENOMEM;
1729 }
1730 error = copyin(ufhp, fhp, fhsize);
1731 if (error == 0) {
1732 /* XXX this check shouldn't be here */
1733 if (FHANDLE_SIZE(fhp) == fhsize) {
1734 *fhpp = fhp;
1735 return 0;
1736 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1737 /*
1738 * a kludge for nfsv2 padded handles.
1739 */
1740 size_t sz;
1741
1742 sz = FHANDLE_SIZE(fhp);
1743 kmem_free(fhp, fhsize);
1744 fhsize = sz;
1745 goto again;
1746 } else {
1747 /*
1748 * userland told us wrong size.
1749 */
1750 error = EINVAL;
1751 }
1752 }
1753 kmem_free(fhp, fhsize);
1754 return error;
1755 }
1756
1757 void
1758 vfs_copyinfh_free(fhandle_t *fhp)
1759 {
1760
1761 vfs__fhfree(fhp);
1762 }
1763
1764 /*
1765 * Get file handle system call
1766 */
1767 int
1768 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1769 {
1770 /* {
1771 syscallarg(char *) fname;
1772 syscallarg(fhandle_t *) fhp;
1773 syscallarg(size_t *) fh_size;
1774 } */
1775 struct vnode *vp;
1776 fhandle_t *fh;
1777 int error;
1778 struct pathbuf *pb;
1779 struct nameidata nd;
1780 size_t sz;
1781 size_t usz;
1782
1783 /*
1784 * Must be super user
1785 */
1786 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1787 0, NULL, NULL, NULL);
1788 if (error)
1789 return (error);
1790
1791 error = pathbuf_copyin(SCARG(uap, fname), &pb);
1792 if (error) {
1793 return error;
1794 }
1795 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1796 error = namei(&nd);
1797 if (error) {
1798 pathbuf_destroy(pb);
1799 return error;
1800 }
1801 vp = nd.ni_vp;
1802 pathbuf_destroy(pb);
1803
1804 error = vfs_composefh_alloc(vp, &fh);
1805 vput(vp);
1806 if (error != 0) {
1807 goto out;
1808 }
1809 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1810 if (error != 0) {
1811 goto out;
1812 }
1813 sz = FHANDLE_SIZE(fh);
1814 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1815 if (error != 0) {
1816 goto out;
1817 }
1818 if (usz >= sz) {
1819 error = copyout(fh, SCARG(uap, fhp), sz);
1820 } else {
1821 error = E2BIG;
1822 }
1823 out:
1824 vfs_composefh_free(fh);
1825 return (error);
1826 }
1827
1828 /*
1829 * Open a file given a file handle.
1830 *
1831 * Check permissions, allocate an open file structure,
1832 * and call the device open routine if any.
1833 */
1834
1835 int
1836 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1837 register_t *retval)
1838 {
1839 file_t *fp;
1840 struct vnode *vp = NULL;
1841 kauth_cred_t cred = l->l_cred;
1842 file_t *nfp;
1843 int indx, error = 0;
1844 struct vattr va;
1845 fhandle_t *fh;
1846 int flags;
1847 proc_t *p;
1848
1849 p = curproc;
1850
1851 /*
1852 * Must be super user
1853 */
1854 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1855 0, NULL, NULL, NULL)))
1856 return (error);
1857
1858 flags = FFLAGS(oflags);
1859 if ((flags & (FREAD | FWRITE)) == 0)
1860 return (EINVAL);
1861 if ((flags & O_CREAT))
1862 return (EINVAL);
1863 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1864 return (error);
1865 fp = nfp;
1866 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1867 if (error != 0) {
1868 goto bad;
1869 }
1870 error = vfs_fhtovp(fh, &vp);
1871 if (error != 0) {
1872 goto bad;
1873 }
1874
1875 /* Now do an effective vn_open */
1876
1877 if (vp->v_type == VSOCK) {
1878 error = EOPNOTSUPP;
1879 goto bad;
1880 }
1881 error = vn_openchk(vp, cred, flags);
1882 if (error != 0)
1883 goto bad;
1884 if (flags & O_TRUNC) {
1885 VOP_UNLOCK(vp); /* XXX */
1886 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1887 vattr_null(&va);
1888 va.va_size = 0;
1889 error = VOP_SETATTR(vp, &va, cred);
1890 if (error)
1891 goto bad;
1892 }
1893 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
1894 goto bad;
1895 if (flags & FWRITE) {
1896 mutex_enter(vp->v_interlock);
1897 vp->v_writecount++;
1898 mutex_exit(vp->v_interlock);
1899 }
1900
1901 /* done with modified vn_open, now finish what sys_open does. */
1902 if ((error = open_setfp(l, fp, vp, indx, flags)))
1903 return error;
1904
1905 VOP_UNLOCK(vp);
1906 *retval = indx;
1907 fd_affix(p, fp, indx);
1908 vfs_copyinfh_free(fh);
1909 return (0);
1910
1911 bad:
1912 fd_abort(p, fp, indx);
1913 if (vp != NULL)
1914 vput(vp);
1915 vfs_copyinfh_free(fh);
1916 return (error);
1917 }
1918
1919 int
1920 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
1921 {
1922 /* {
1923 syscallarg(const void *) fhp;
1924 syscallarg(size_t) fh_size;
1925 syscallarg(int) flags;
1926 } */
1927
1928 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
1929 SCARG(uap, flags), retval);
1930 }
1931
1932 int
1933 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
1934 {
1935 int error;
1936 fhandle_t *fh;
1937 struct vnode *vp;
1938
1939 /*
1940 * Must be super user
1941 */
1942 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1943 0, NULL, NULL, NULL)))
1944 return (error);
1945
1946 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1947 if (error != 0)
1948 return error;
1949
1950 error = vfs_fhtovp(fh, &vp);
1951 vfs_copyinfh_free(fh);
1952 if (error != 0)
1953 return error;
1954
1955 error = vn_stat(vp, sb);
1956 vput(vp);
1957 return error;
1958 }
1959
1960
1961 /* ARGSUSED */
1962 int
1963 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval)
1964 {
1965 /* {
1966 syscallarg(const void *) fhp;
1967 syscallarg(size_t) fh_size;
1968 syscallarg(struct stat *) sb;
1969 } */
1970 struct stat sb;
1971 int error;
1972
1973 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
1974 if (error)
1975 return error;
1976 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
1977 }
1978
1979 int
1980 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
1981 int flags)
1982 {
1983 fhandle_t *fh;
1984 struct mount *mp;
1985 struct vnode *vp;
1986 int error;
1987
1988 /*
1989 * Must be super user
1990 */
1991 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1992 0, NULL, NULL, NULL)))
1993 return error;
1994
1995 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1996 if (error != 0)
1997 return error;
1998
1999 error = vfs_fhtovp(fh, &vp);
2000 vfs_copyinfh_free(fh);
2001 if (error != 0)
2002 return error;
2003
2004 mp = vp->v_mount;
2005 error = dostatvfs(mp, sb, l, flags, 1);
2006 vput(vp);
2007 return error;
2008 }
2009
2010 /* ARGSUSED */
2011 int
2012 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
2013 {
2014 /* {
2015 syscallarg(const void *) fhp;
2016 syscallarg(size_t) fh_size;
2017 syscallarg(struct statvfs *) buf;
2018 syscallarg(int) flags;
2019 } */
2020 struct statvfs *sb = STATVFSBUF_GET();
2021 int error;
2022
2023 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
2024 SCARG(uap, flags));
2025 if (error == 0)
2026 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
2027 STATVFSBUF_PUT(sb);
2028 return error;
2029 }
2030
2031 /*
2032 * Create a special file.
2033 */
2034 /* ARGSUSED */
2035 int
2036 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
2037 register_t *retval)
2038 {
2039 /* {
2040 syscallarg(const char *) path;
2041 syscallarg(mode_t) mode;
2042 syscallarg(dev_t) dev;
2043 } */
2044 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode),
2045 SCARG(uap, dev), retval, UIO_USERSPACE);
2046 }
2047
2048 int
2049 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap,
2050 register_t *retval)
2051 {
2052 /* {
2053 syscallarg(int) fd;
2054 syscallarg(const char *) path;
2055 syscallarg(mode_t) mode;
2056 syscallarg(uint32_t) dev;
2057 } */
2058
2059 return ENOSYS;
2060 }
2061
2062 int
2063 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
2064 register_t *retval, enum uio_seg seg)
2065 {
2066 struct proc *p = l->l_proc;
2067 struct vnode *vp;
2068 struct vattr vattr;
2069 int error, optype;
2070 struct pathbuf *pb;
2071 struct nameidata nd;
2072 const char *pathstring;
2073
2074 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
2075 0, NULL, NULL, NULL)) != 0)
2076 return (error);
2077
2078 optype = VOP_MKNOD_DESCOFFSET;
2079
2080 error = pathbuf_maybe_copyin(pathname, seg, &pb);
2081 if (error) {
2082 return error;
2083 }
2084 pathstring = pathbuf_stringcopy_get(pb);
2085 if (pathstring == NULL) {
2086 pathbuf_destroy(pb);
2087 return ENOMEM;
2088 }
2089
2090 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2091 if ((error = namei(&nd)) != 0)
2092 goto out;
2093 vp = nd.ni_vp;
2094
2095 if (vp != NULL)
2096 error = EEXIST;
2097 else {
2098 vattr_null(&vattr);
2099 /* We will read cwdi->cwdi_cmask unlocked. */
2100 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2101 vattr.va_rdev = dev;
2102
2103 switch (mode & S_IFMT) {
2104 case S_IFMT: /* used by badsect to flag bad sectors */
2105 vattr.va_type = VBAD;
2106 break;
2107 case S_IFCHR:
2108 vattr.va_type = VCHR;
2109 break;
2110 case S_IFBLK:
2111 vattr.va_type = VBLK;
2112 break;
2113 case S_IFWHT:
2114 optype = VOP_WHITEOUT_DESCOFFSET;
2115 break;
2116 case S_IFREG:
2117 #if NVERIEXEC > 0
2118 error = veriexec_openchk(l, nd.ni_vp, pathstring,
2119 O_CREAT);
2120 #endif /* NVERIEXEC > 0 */
2121 vattr.va_type = VREG;
2122 vattr.va_rdev = VNOVAL;
2123 optype = VOP_CREATE_DESCOFFSET;
2124 break;
2125 default:
2126 error = EINVAL;
2127 break;
2128 }
2129 }
2130 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET
2131 && vattr.va_rdev == VNOVAL)
2132 error = EINVAL;
2133 if (!error) {
2134 switch (optype) {
2135 case VOP_WHITEOUT_DESCOFFSET:
2136 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
2137 if (error)
2138 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2139 vput(nd.ni_dvp);
2140 break;
2141
2142 case VOP_MKNOD_DESCOFFSET:
2143 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
2144 &nd.ni_cnd, &vattr);
2145 if (error == 0)
2146 vput(nd.ni_vp);
2147 break;
2148
2149 case VOP_CREATE_DESCOFFSET:
2150 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
2151 &nd.ni_cnd, &vattr);
2152 if (error == 0)
2153 vput(nd.ni_vp);
2154 break;
2155 }
2156 } else {
2157 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2158 if (nd.ni_dvp == vp)
2159 vrele(nd.ni_dvp);
2160 else
2161 vput(nd.ni_dvp);
2162 if (vp)
2163 vrele(vp);
2164 }
2165 out:
2166 pathbuf_stringcopy_put(pb, pathstring);
2167 pathbuf_destroy(pb);
2168 return (error);
2169 }
2170
2171 /*
2172 * Create a named pipe.
2173 */
2174 /* ARGSUSED */
2175 int
2176 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
2177 {
2178 /* {
2179 syscallarg(const char *) path;
2180 syscallarg(int) mode;
2181 } */
2182 struct proc *p = l->l_proc;
2183 struct vattr vattr;
2184 int error;
2185 struct pathbuf *pb;
2186 struct nameidata nd;
2187
2188 error = pathbuf_copyin(SCARG(uap, path), &pb);
2189 if (error) {
2190 return error;
2191 }
2192 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2193 if ((error = namei(&nd)) != 0) {
2194 pathbuf_destroy(pb);
2195 return error;
2196 }
2197 if (nd.ni_vp != NULL) {
2198 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2199 if (nd.ni_dvp == nd.ni_vp)
2200 vrele(nd.ni_dvp);
2201 else
2202 vput(nd.ni_dvp);
2203 vrele(nd.ni_vp);
2204 pathbuf_destroy(pb);
2205 return (EEXIST);
2206 }
2207 vattr_null(&vattr);
2208 vattr.va_type = VFIFO;
2209 /* We will read cwdi->cwdi_cmask unlocked. */
2210 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2211 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2212 if (error == 0)
2213 vput(nd.ni_vp);
2214 pathbuf_destroy(pb);
2215 return (error);
2216 }
2217
2218 int
2219 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap,
2220 register_t *retval)
2221 {
2222 /* {
2223 syscallarg(int) fd;
2224 syscallarg(const char *) path;
2225 syscallarg(int) mode;
2226 } */
2227
2228 return ENOSYS;
2229 }
2230 /*
2231 * Make a hard file link.
2232 */
2233 /* ARGSUSED */
2234 static int
2235 do_sys_link(struct lwp *l, const char *path, const char *link,
2236 int follow, register_t *retval)
2237 {
2238 struct vnode *vp;
2239 struct pathbuf *linkpb;
2240 struct nameidata nd;
2241 namei_simple_flags_t namei_simple_flags;
2242 int error;
2243
2244 if (follow)
2245 namei_simple_flags = NSM_FOLLOW_TRYEMULROOT;
2246 else
2247 namei_simple_flags = NSM_NOFOLLOW_TRYEMULROOT;
2248
2249 error = namei_simple_user(path, namei_simple_flags, &vp);
2250 if (error != 0)
2251 return (error);
2252 error = pathbuf_copyin(link, &linkpb);
2253 if (error) {
2254 goto out1;
2255 }
2256 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2257 if ((error = namei(&nd)) != 0)
2258 goto out2;
2259 if (nd.ni_vp) {
2260 error = EEXIST;
2261 goto abortop;
2262 }
2263 /* Prevent hard links on directories. */
2264 if (vp->v_type == VDIR) {
2265 error = EPERM;
2266 goto abortop;
2267 }
2268 /* Prevent cross-mount operation. */
2269 if (nd.ni_dvp->v_mount != vp->v_mount) {
2270 error = EXDEV;
2271 goto abortop;
2272 }
2273 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2274 out2:
2275 pathbuf_destroy(linkpb);
2276 out1:
2277 vrele(vp);
2278 return (error);
2279 abortop:
2280 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2281 if (nd.ni_dvp == nd.ni_vp)
2282 vrele(nd.ni_dvp);
2283 else
2284 vput(nd.ni_dvp);
2285 if (nd.ni_vp != NULL)
2286 vrele(nd.ni_vp);
2287 goto out2;
2288 }
2289
2290 int
2291 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
2292 {
2293 /* {
2294 syscallarg(const char *) path;
2295 syscallarg(const char *) link;
2296 } */
2297 const char *path = SCARG(uap, path);
2298 const char *link = SCARG(uap, link);
2299
2300 return do_sys_link(l, path, link, 1, retval);
2301 }
2302
2303 int
2304 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap,
2305 register_t *retval)
2306 {
2307 /* {
2308 syscallarg(int) fd1;
2309 syscallarg(const char *) name1;
2310 syscallarg(int) fd2;
2311 syscallarg(const char *) name2;
2312 syscallarg(int) flags;
2313 } */
2314 const char *name1 = SCARG(uap, name1);
2315 const char *name2 = SCARG(uap, name2);
2316 int follow;
2317
2318 /*
2319 * Specified fd1 and fd2 are not yet implemented
2320 */
2321 if ((SCARG(uap, fd1) != AT_FDCWD) || (SCARG(uap, fd2) != AT_FDCWD))
2322 return ENOSYS;
2323
2324 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW;
2325
2326 return do_sys_link(l, name1, name2, follow, retval);
2327 }
2328
2329
2330 int
2331 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg)
2332 {
2333 struct proc *p = curproc;
2334 struct vattr vattr;
2335 char *path;
2336 int error;
2337 struct pathbuf *linkpb;
2338 struct nameidata nd;
2339
2340 path = PNBUF_GET();
2341 if (seg == UIO_USERSPACE) {
2342 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0)
2343 goto out1;
2344 if ((error = pathbuf_copyin(link, &linkpb)) != 0)
2345 goto out1;
2346 } else {
2347 KASSERT(strlen(patharg) < MAXPATHLEN);
2348 strcpy(path, patharg);
2349 linkpb = pathbuf_create(link);
2350 if (linkpb == NULL) {
2351 error = ENOMEM;
2352 goto out1;
2353 }
2354 }
2355 ktrkuser("symlink-target", path, strlen(path));
2356
2357 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2358 if ((error = namei(&nd)) != 0)
2359 goto out2;
2360 if (nd.ni_vp) {
2361 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2362 if (nd.ni_dvp == nd.ni_vp)
2363 vrele(nd.ni_dvp);
2364 else
2365 vput(nd.ni_dvp);
2366 vrele(nd.ni_vp);
2367 error = EEXIST;
2368 goto out2;
2369 }
2370 vattr_null(&vattr);
2371 vattr.va_type = VLNK;
2372 /* We will read cwdi->cwdi_cmask unlocked. */
2373 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2374 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2375 if (error == 0)
2376 vput(nd.ni_vp);
2377 out2:
2378 pathbuf_destroy(linkpb);
2379 out1:
2380 PNBUF_PUT(path);
2381 return (error);
2382 }
2383
2384 /*
2385 * Make a symbolic link.
2386 */
2387 /* ARGSUSED */
2388 int
2389 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2390 {
2391 /* {
2392 syscallarg(const char *) path;
2393 syscallarg(const char *) link;
2394 } */
2395
2396 return do_sys_symlink(SCARG(uap, path), SCARG(uap, link),
2397 UIO_USERSPACE);
2398 }
2399
2400 int
2401 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap,
2402 register_t *retval)
2403 {
2404 /* {
2405 syscallarg(int) fd;
2406 syscallarg(const char *) path;
2407 syscallarg(const char *) link;
2408 } */
2409
2410 return ENOSYS;
2411 }
2412
2413 /*
2414 * Delete a whiteout from the filesystem.
2415 */
2416 /* ARGSUSED */
2417 int
2418 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2419 {
2420 /* {
2421 syscallarg(const char *) path;
2422 } */
2423 int error;
2424 struct pathbuf *pb;
2425 struct nameidata nd;
2426
2427 error = pathbuf_copyin(SCARG(uap, path), &pb);
2428 if (error) {
2429 return error;
2430 }
2431
2432 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb);
2433 error = namei(&nd);
2434 if (error) {
2435 pathbuf_destroy(pb);
2436 return (error);
2437 }
2438
2439 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2440 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2441 if (nd.ni_dvp == nd.ni_vp)
2442 vrele(nd.ni_dvp);
2443 else
2444 vput(nd.ni_dvp);
2445 if (nd.ni_vp)
2446 vrele(nd.ni_vp);
2447 pathbuf_destroy(pb);
2448 return (EEXIST);
2449 }
2450 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2451 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2452 vput(nd.ni_dvp);
2453 pathbuf_destroy(pb);
2454 return (error);
2455 }
2456
2457 /*
2458 * Delete a name from the filesystem.
2459 */
2460 /* ARGSUSED */
2461 int
2462 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2463 {
2464 /* {
2465 syscallarg(const char *) path;
2466 } */
2467
2468 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE);
2469 }
2470
2471 int
2472 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap,
2473 register_t *retval)
2474 {
2475 /* {
2476 syscallarg(int) fd;
2477 syscallarg(const char *) path;
2478 } */
2479
2480 return ENOSYS;
2481 }
2482
2483 int
2484 do_sys_unlink(const char *arg, enum uio_seg seg)
2485 {
2486 struct vnode *vp;
2487 int error;
2488 struct pathbuf *pb;
2489 struct nameidata nd;
2490 const char *pathstring;
2491
2492 error = pathbuf_maybe_copyin(arg, seg, &pb);
2493 if (error) {
2494 return error;
2495 }
2496 pathstring = pathbuf_stringcopy_get(pb);
2497 if (pathstring == NULL) {
2498 pathbuf_destroy(pb);
2499 return ENOMEM;
2500 }
2501
2502 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb);
2503 if ((error = namei(&nd)) != 0)
2504 goto out;
2505 vp = nd.ni_vp;
2506
2507 /*
2508 * The root of a mounted filesystem cannot be deleted.
2509 */
2510 if (vp->v_vflag & VV_ROOT) {
2511 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2512 if (nd.ni_dvp == vp)
2513 vrele(nd.ni_dvp);
2514 else
2515 vput(nd.ni_dvp);
2516 vput(vp);
2517 error = EBUSY;
2518 goto out;
2519 }
2520
2521 #if NVERIEXEC > 0
2522 /* Handle remove requests for veriexec entries. */
2523 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) {
2524 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2525 if (nd.ni_dvp == vp)
2526 vrele(nd.ni_dvp);
2527 else
2528 vput(nd.ni_dvp);
2529 vput(vp);
2530 goto out;
2531 }
2532 #endif /* NVERIEXEC > 0 */
2533
2534 #ifdef FILEASSOC
2535 (void)fileassoc_file_delete(vp);
2536 #endif /* FILEASSOC */
2537 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2538 out:
2539 pathbuf_stringcopy_put(pb, pathstring);
2540 pathbuf_destroy(pb);
2541 return (error);
2542 }
2543
2544 /*
2545 * Reposition read/write file offset.
2546 */
2547 int
2548 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2549 {
2550 /* {
2551 syscallarg(int) fd;
2552 syscallarg(int) pad;
2553 syscallarg(off_t) offset;
2554 syscallarg(int) whence;
2555 } */
2556 kauth_cred_t cred = l->l_cred;
2557 file_t *fp;
2558 struct vnode *vp;
2559 struct vattr vattr;
2560 off_t newoff;
2561 int error, fd;
2562
2563 fd = SCARG(uap, fd);
2564
2565 if ((fp = fd_getfile(fd)) == NULL)
2566 return (EBADF);
2567
2568 vp = fp->f_data;
2569 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2570 error = ESPIPE;
2571 goto out;
2572 }
2573
2574 switch (SCARG(uap, whence)) {
2575 case SEEK_CUR:
2576 newoff = fp->f_offset + SCARG(uap, offset);
2577 break;
2578 case SEEK_END:
2579 vn_lock(vp, LK_SHARED | LK_RETRY);
2580 error = VOP_GETATTR(vp, &vattr, cred);
2581 VOP_UNLOCK(vp);
2582 if (error) {
2583 goto out;
2584 }
2585 newoff = SCARG(uap, offset) + vattr.va_size;
2586 break;
2587 case SEEK_SET:
2588 newoff = SCARG(uap, offset);
2589 break;
2590 default:
2591 error = EINVAL;
2592 goto out;
2593 }
2594 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2595 *(off_t *)retval = fp->f_offset = newoff;
2596 }
2597 out:
2598 fd_putfile(fd);
2599 return (error);
2600 }
2601
2602 /*
2603 * Positional read system call.
2604 */
2605 int
2606 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2607 {
2608 /* {
2609 syscallarg(int) fd;
2610 syscallarg(void *) buf;
2611 syscallarg(size_t) nbyte;
2612 syscallarg(off_t) offset;
2613 } */
2614 file_t *fp;
2615 struct vnode *vp;
2616 off_t offset;
2617 int error, fd = SCARG(uap, fd);
2618
2619 if ((fp = fd_getfile(fd)) == NULL)
2620 return (EBADF);
2621
2622 if ((fp->f_flag & FREAD) == 0) {
2623 fd_putfile(fd);
2624 return (EBADF);
2625 }
2626
2627 vp = fp->f_data;
2628 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2629 error = ESPIPE;
2630 goto out;
2631 }
2632
2633 offset = SCARG(uap, offset);
2634
2635 /*
2636 * XXX This works because no file systems actually
2637 * XXX take any action on the seek operation.
2638 */
2639 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2640 goto out;
2641
2642 /* dofileread() will unuse the descriptor for us */
2643 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2644 &offset, 0, retval));
2645
2646 out:
2647 fd_putfile(fd);
2648 return (error);
2649 }
2650
2651 /*
2652 * Positional scatter read system call.
2653 */
2654 int
2655 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2656 {
2657 /* {
2658 syscallarg(int) fd;
2659 syscallarg(const struct iovec *) iovp;
2660 syscallarg(int) iovcnt;
2661 syscallarg(off_t) offset;
2662 } */
2663 off_t offset = SCARG(uap, offset);
2664
2665 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2666 SCARG(uap, iovcnt), &offset, 0, retval);
2667 }
2668
2669 /*
2670 * Positional write system call.
2671 */
2672 int
2673 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2674 {
2675 /* {
2676 syscallarg(int) fd;
2677 syscallarg(const void *) buf;
2678 syscallarg(size_t) nbyte;
2679 syscallarg(off_t) offset;
2680 } */
2681 file_t *fp;
2682 struct vnode *vp;
2683 off_t offset;
2684 int error, fd = SCARG(uap, fd);
2685
2686 if ((fp = fd_getfile(fd)) == NULL)
2687 return (EBADF);
2688
2689 if ((fp->f_flag & FWRITE) == 0) {
2690 fd_putfile(fd);
2691 return (EBADF);
2692 }
2693
2694 vp = fp->f_data;
2695 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2696 error = ESPIPE;
2697 goto out;
2698 }
2699
2700 offset = SCARG(uap, offset);
2701
2702 /*
2703 * XXX This works because no file systems actually
2704 * XXX take any action on the seek operation.
2705 */
2706 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2707 goto out;
2708
2709 /* dofilewrite() will unuse the descriptor for us */
2710 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2711 &offset, 0, retval));
2712
2713 out:
2714 fd_putfile(fd);
2715 return (error);
2716 }
2717
2718 /*
2719 * Positional gather write system call.
2720 */
2721 int
2722 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2723 {
2724 /* {
2725 syscallarg(int) fd;
2726 syscallarg(const struct iovec *) iovp;
2727 syscallarg(int) iovcnt;
2728 syscallarg(off_t) offset;
2729 } */
2730 off_t offset = SCARG(uap, offset);
2731
2732 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2733 SCARG(uap, iovcnt), &offset, 0, retval);
2734 }
2735
2736 /*
2737 * Check access permissions.
2738 */
2739 int
2740 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2741 {
2742 /* {
2743 syscallarg(const char *) path;
2744 syscallarg(int) flags;
2745 } */
2746 kauth_cred_t cred;
2747 struct vnode *vp;
2748 int error, flags;
2749 struct pathbuf *pb;
2750 struct nameidata nd;
2751
2752 CTASSERT(F_OK == 0);
2753 if ((SCARG(uap, flags) & ~(R_OK | W_OK | X_OK)) != 0) {
2754 /* nonsense flags */
2755 return EINVAL;
2756 }
2757
2758 error = pathbuf_copyin(SCARG(uap, path), &pb);
2759 if (error) {
2760 return error;
2761 }
2762 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
2763
2764 /* Override default credentials */
2765 cred = kauth_cred_dup(l->l_cred);
2766 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2767 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2768 nd.ni_cnd.cn_cred = cred;
2769
2770 if ((error = namei(&nd)) != 0) {
2771 pathbuf_destroy(pb);
2772 goto out;
2773 }
2774 vp = nd.ni_vp;
2775 pathbuf_destroy(pb);
2776
2777 /* Flags == 0 means only check for existence. */
2778 if (SCARG(uap, flags)) {
2779 flags = 0;
2780 if (SCARG(uap, flags) & R_OK)
2781 flags |= VREAD;
2782 if (SCARG(uap, flags) & W_OK)
2783 flags |= VWRITE;
2784 if (SCARG(uap, flags) & X_OK)
2785 flags |= VEXEC;
2786
2787 error = VOP_ACCESS(vp, flags, cred);
2788 if (!error && (flags & VWRITE))
2789 error = vn_writechk(vp);
2790 }
2791 vput(vp);
2792 out:
2793 kauth_cred_free(cred);
2794 return (error);
2795 }
2796
2797 int
2798 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap,
2799 register_t *retval)
2800 {
2801 /* {
2802 syscallarg(int) fd;
2803 syscallarg(const char *) path;
2804 syscallarg(int) amode;
2805 syscallarg(int) flag;
2806 } */
2807
2808 return ENOSYS;
2809 }
2810
2811 /*
2812 * Common code for all sys_stat functions, including compat versions.
2813 */
2814 int
2815 do_sys_stat(const char *userpath, unsigned int nd_flags, struct stat *sb)
2816 {
2817 int error;
2818 struct pathbuf *pb;
2819 struct nameidata nd;
2820
2821 error = pathbuf_copyin(userpath, &pb);
2822 if (error) {
2823 return error;
2824 }
2825 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, pb);
2826 error = namei(&nd);
2827 if (error != 0) {
2828 pathbuf_destroy(pb);
2829 return error;
2830 }
2831 error = vn_stat(nd.ni_vp, sb);
2832 vput(nd.ni_vp);
2833 pathbuf_destroy(pb);
2834 return error;
2835 }
2836
2837 /*
2838 * Get file status; this version follows links.
2839 */
2840 /* ARGSUSED */
2841 int
2842 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval)
2843 {
2844 /* {
2845 syscallarg(const char *) path;
2846 syscallarg(struct stat *) ub;
2847 } */
2848 struct stat sb;
2849 int error;
2850
2851 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb);
2852 if (error)
2853 return error;
2854 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2855 }
2856
2857 /*
2858 * Get file status; this version does not follow links.
2859 */
2860 /* ARGSUSED */
2861 int
2862 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval)
2863 {
2864 /* {
2865 syscallarg(const char *) path;
2866 syscallarg(struct stat *) ub;
2867 } */
2868 struct stat sb;
2869 int error;
2870
2871 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb);
2872 if (error)
2873 return error;
2874 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
2875 }
2876
2877 int
2878 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap,
2879 register_t *retval)
2880 {
2881 /* {
2882 syscallarg(int) fd;
2883 syscallarg(const char *) path;
2884 syscallarg(struct stat *) ub;
2885 syscallarg(int) flag;
2886 } */
2887
2888 return ENOSYS;
2889 }
2890 /*
2891 * Get configurable pathname variables.
2892 */
2893 /* ARGSUSED */
2894 int
2895 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
2896 {
2897 /* {
2898 syscallarg(const char *) path;
2899 syscallarg(int) name;
2900 } */
2901 int error;
2902 struct pathbuf *pb;
2903 struct nameidata nd;
2904
2905 error = pathbuf_copyin(SCARG(uap, path), &pb);
2906 if (error) {
2907 return error;
2908 }
2909 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
2910 if ((error = namei(&nd)) != 0) {
2911 pathbuf_destroy(pb);
2912 return (error);
2913 }
2914 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
2915 vput(nd.ni_vp);
2916 pathbuf_destroy(pb);
2917 return (error);
2918 }
2919
2920 /*
2921 * Return target name of a symbolic link.
2922 */
2923 /* ARGSUSED */
2924 int
2925 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval)
2926 {
2927 /* {
2928 syscallarg(const char *) path;
2929 syscallarg(char *) buf;
2930 syscallarg(size_t) count;
2931 } */
2932 struct vnode *vp;
2933 struct iovec aiov;
2934 struct uio auio;
2935 int error;
2936 struct pathbuf *pb;
2937 struct nameidata nd;
2938
2939 error = pathbuf_copyin(SCARG(uap, path), &pb);
2940 if (error) {
2941 return error;
2942 }
2943 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
2944 if ((error = namei(&nd)) != 0) {
2945 pathbuf_destroy(pb);
2946 return error;
2947 }
2948 vp = nd.ni_vp;
2949 pathbuf_destroy(pb);
2950 if (vp->v_type != VLNK)
2951 error = EINVAL;
2952 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
2953 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
2954 aiov.iov_base = SCARG(uap, buf);
2955 aiov.iov_len = SCARG(uap, count);
2956 auio.uio_iov = &aiov;
2957 auio.uio_iovcnt = 1;
2958 auio.uio_offset = 0;
2959 auio.uio_rw = UIO_READ;
2960 KASSERT(l == curlwp);
2961 auio.uio_vmspace = l->l_proc->p_vmspace;
2962 auio.uio_resid = SCARG(uap, count);
2963 error = VOP_READLINK(vp, &auio, l->l_cred);
2964 }
2965 vput(vp);
2966 *retval = SCARG(uap, count) - auio.uio_resid;
2967 return (error);
2968 }
2969
2970 int
2971 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap,
2972 register_t *retval)
2973 {
2974 /* {
2975 syscallarg(int) fd;
2976 syscallarg(const char *) path;
2977 syscallarg(char *) buf;
2978 syscallarg(size_t) count;
2979 } */
2980
2981 return ENOSYS;
2982 }
2983
2984 /*
2985 * Change flags of a file given a path name.
2986 */
2987 /* ARGSUSED */
2988 int
2989 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
2990 {
2991 /* {
2992 syscallarg(const char *) path;
2993 syscallarg(u_long) flags;
2994 } */
2995 struct vnode *vp;
2996 int error;
2997
2998 error = namei_simple_user(SCARG(uap, path),
2999 NSM_FOLLOW_TRYEMULROOT, &vp);
3000 if (error != 0)
3001 return (error);
3002 error = change_flags(vp, SCARG(uap, flags), l);
3003 vput(vp);
3004 return (error);
3005 }
3006
3007 /*
3008 * Change flags of a file given a file descriptor.
3009 */
3010 /* ARGSUSED */
3011 int
3012 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
3013 {
3014 /* {
3015 syscallarg(int) fd;
3016 syscallarg(u_long) flags;
3017 } */
3018 struct vnode *vp;
3019 file_t *fp;
3020 int error;
3021
3022 /* fd_getvnode() will use the descriptor for us */
3023 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3024 return (error);
3025 vp = fp->f_data;
3026 error = change_flags(vp, SCARG(uap, flags), l);
3027 VOP_UNLOCK(vp);
3028 fd_putfile(SCARG(uap, fd));
3029 return (error);
3030 }
3031
3032 /*
3033 * Change flags of a file given a path name; this version does
3034 * not follow links.
3035 */
3036 int
3037 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
3038 {
3039 /* {
3040 syscallarg(const char *) path;
3041 syscallarg(u_long) flags;
3042 } */
3043 struct vnode *vp;
3044 int error;
3045
3046 error = namei_simple_user(SCARG(uap, path),
3047 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3048 if (error != 0)
3049 return (error);
3050 error = change_flags(vp, SCARG(uap, flags), l);
3051 vput(vp);
3052 return (error);
3053 }
3054
3055 /*
3056 * Common routine to change flags of a file.
3057 */
3058 int
3059 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
3060 {
3061 struct vattr vattr;
3062 int error;
3063
3064 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3065
3066 vattr_null(&vattr);
3067 vattr.va_flags = flags;
3068 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3069
3070 return (error);
3071 }
3072
3073 /*
3074 * Change mode of a file given path name; this version follows links.
3075 */
3076 /* ARGSUSED */
3077 int
3078 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
3079 {
3080 /* {
3081 syscallarg(const char *) path;
3082 syscallarg(int) mode;
3083 } */
3084 int error;
3085 struct vnode *vp;
3086
3087 error = namei_simple_user(SCARG(uap, path),
3088 NSM_FOLLOW_TRYEMULROOT, &vp);
3089 if (error != 0)
3090 return (error);
3091
3092 error = change_mode(vp, SCARG(uap, mode), l);
3093
3094 vrele(vp);
3095 return (error);
3096 }
3097
3098 /*
3099 * Change mode of a file given a file descriptor.
3100 */
3101 /* ARGSUSED */
3102 int
3103 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
3104 {
3105 /* {
3106 syscallarg(int) fd;
3107 syscallarg(int) mode;
3108 } */
3109 file_t *fp;
3110 int error;
3111
3112 /* fd_getvnode() will use the descriptor for us */
3113 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3114 return (error);
3115 error = change_mode(fp->f_data, SCARG(uap, mode), l);
3116 fd_putfile(SCARG(uap, fd));
3117 return (error);
3118 }
3119
3120 int
3121 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap,
3122 register_t *retval)
3123 {
3124 /* {
3125 syscallarg(int) fd;
3126 syscallarg(const char *) path;
3127 syscallarg(int) mode;
3128 syscallarg(int) flag;
3129 } */
3130
3131 return ENOSYS;
3132 }
3133
3134 /*
3135 * Change mode of a file given path name; this version does not follow links.
3136 */
3137 /* ARGSUSED */
3138 int
3139 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
3140 {
3141 /* {
3142 syscallarg(const char *) path;
3143 syscallarg(int) mode;
3144 } */
3145 int error;
3146 struct vnode *vp;
3147
3148 error = namei_simple_user(SCARG(uap, path),
3149 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3150 if (error != 0)
3151 return (error);
3152
3153 error = change_mode(vp, SCARG(uap, mode), l);
3154
3155 vrele(vp);
3156 return (error);
3157 }
3158
3159 /*
3160 * Common routine to set mode given a vnode.
3161 */
3162 static int
3163 change_mode(struct vnode *vp, int mode, struct lwp *l)
3164 {
3165 struct vattr vattr;
3166 int error;
3167
3168 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3169 vattr_null(&vattr);
3170 vattr.va_mode = mode & ALLPERMS;
3171 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3172 VOP_UNLOCK(vp);
3173 return (error);
3174 }
3175
3176 /*
3177 * Set ownership given a path name; this version follows links.
3178 */
3179 /* ARGSUSED */
3180 int
3181 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
3182 {
3183 /* {
3184 syscallarg(const char *) path;
3185 syscallarg(uid_t) uid;
3186 syscallarg(gid_t) gid;
3187 } */
3188 int error;
3189 struct vnode *vp;
3190
3191 error = namei_simple_user(SCARG(uap, path),
3192 NSM_FOLLOW_TRYEMULROOT, &vp);
3193 if (error != 0)
3194 return (error);
3195
3196 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
3197
3198 vrele(vp);
3199 return (error);
3200 }
3201
3202 /*
3203 * Set ownership given a path name; this version follows links.
3204 * Provides POSIX semantics.
3205 */
3206 /* ARGSUSED */
3207 int
3208 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
3209 {
3210 /* {
3211 syscallarg(const char *) path;
3212 syscallarg(uid_t) uid;
3213 syscallarg(gid_t) gid;
3214 } */
3215 int error;
3216 struct vnode *vp;
3217
3218 error = namei_simple_user(SCARG(uap, path),
3219 NSM_FOLLOW_TRYEMULROOT, &vp);
3220 if (error != 0)
3221 return (error);
3222
3223 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3224
3225 vrele(vp);
3226 return (error);
3227 }
3228
3229 /*
3230 * Set ownership given a file descriptor.
3231 */
3232 /* ARGSUSED */
3233 int
3234 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
3235 {
3236 /* {
3237 syscallarg(int) fd;
3238 syscallarg(uid_t) uid;
3239 syscallarg(gid_t) gid;
3240 } */
3241 int error;
3242 file_t *fp;
3243
3244 /* fd_getvnode() will use the descriptor for us */
3245 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3246 return (error);
3247 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
3248 l, 0);
3249 fd_putfile(SCARG(uap, fd));
3250 return (error);
3251 }
3252
3253 int
3254 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap,
3255 register_t *retval)
3256 {
3257 /* {
3258 syscallarg(int) fd;
3259 syscallarg(const char *) path;
3260 syscallarg(uid_t) uid;
3261 syscallarg(gid_t) gid;
3262 syscallarg(int) flag;
3263 } */
3264
3265 return ENOSYS;
3266 }
3267
3268 /*
3269 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
3270 */
3271 /* ARGSUSED */
3272 int
3273 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
3274 {
3275 /* {
3276 syscallarg(int) fd;
3277 syscallarg(uid_t) uid;
3278 syscallarg(gid_t) gid;
3279 } */
3280 int error;
3281 file_t *fp;
3282
3283 /* fd_getvnode() will use the descriptor for us */
3284 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3285 return (error);
3286 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
3287 l, 1);
3288 fd_putfile(SCARG(uap, fd));
3289 return (error);
3290 }
3291
3292 /*
3293 * Set ownership given a path name; this version does not follow links.
3294 */
3295 /* ARGSUSED */
3296 int
3297 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
3298 {
3299 /* {
3300 syscallarg(const char *) path;
3301 syscallarg(uid_t) uid;
3302 syscallarg(gid_t) gid;
3303 } */
3304 int error;
3305 struct vnode *vp;
3306
3307 error = namei_simple_user(SCARG(uap, path),
3308 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3309 if (error != 0)
3310 return (error);
3311
3312 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
3313
3314 vrele(vp);
3315 return (error);
3316 }
3317
3318 /*
3319 * Set ownership given a path name; this version does not follow links.
3320 * Provides POSIX/XPG semantics.
3321 */
3322 /* ARGSUSED */
3323 int
3324 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
3325 {
3326 /* {
3327 syscallarg(const char *) path;
3328 syscallarg(uid_t) uid;
3329 syscallarg(gid_t) gid;
3330 } */
3331 int error;
3332 struct vnode *vp;
3333
3334 error = namei_simple_user(SCARG(uap, path),
3335 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3336 if (error != 0)
3337 return (error);
3338
3339 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3340
3341 vrele(vp);
3342 return (error);
3343 }
3344
3345 /*
3346 * Common routine to set ownership given a vnode.
3347 */
3348 static int
3349 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
3350 int posix_semantics)
3351 {
3352 struct vattr vattr;
3353 mode_t newmode;
3354 int error;
3355
3356 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3357 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
3358 goto out;
3359
3360 #define CHANGED(x) ((int)(x) != -1)
3361 newmode = vattr.va_mode;
3362 if (posix_semantics) {
3363 /*
3364 * POSIX/XPG semantics: if the caller is not the super-user,
3365 * clear set-user-id and set-group-id bits. Both POSIX and
3366 * the XPG consider the behaviour for calls by the super-user
3367 * implementation-defined; we leave the set-user-id and set-
3368 * group-id settings intact in that case.
3369 */
3370 if (vattr.va_mode & S_ISUID) {
3371 if (kauth_authorize_vnode(l->l_cred,
3372 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0)
3373 newmode &= ~S_ISUID;
3374 }
3375 if (vattr.va_mode & S_ISGID) {
3376 if (kauth_authorize_vnode(l->l_cred,
3377 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0)
3378 newmode &= ~S_ISGID;
3379 }
3380 } else {
3381 /*
3382 * NetBSD semantics: when changing owner and/or group,
3383 * clear the respective bit(s).
3384 */
3385 if (CHANGED(uid))
3386 newmode &= ~S_ISUID;
3387 if (CHANGED(gid))
3388 newmode &= ~S_ISGID;
3389 }
3390 /* Update va_mode iff altered. */
3391 if (vattr.va_mode == newmode)
3392 newmode = VNOVAL;
3393
3394 vattr_null(&vattr);
3395 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
3396 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
3397 vattr.va_mode = newmode;
3398 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3399 #undef CHANGED
3400
3401 out:
3402 VOP_UNLOCK(vp);
3403 return (error);
3404 }
3405
3406 /*
3407 * Set the access and modification times given a path name; this
3408 * version follows links.
3409 */
3410 /* ARGSUSED */
3411 int
3412 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
3413 register_t *retval)
3414 {
3415 /* {
3416 syscallarg(const char *) path;
3417 syscallarg(const struct timeval *) tptr;
3418 } */
3419
3420 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
3421 SCARG(uap, tptr), UIO_USERSPACE);
3422 }
3423
3424 /*
3425 * Set the access and modification times given a file descriptor.
3426 */
3427 /* ARGSUSED */
3428 int
3429 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
3430 register_t *retval)
3431 {
3432 /* {
3433 syscallarg(int) fd;
3434 syscallarg(const struct timeval *) tptr;
3435 } */
3436 int error;
3437 file_t *fp;
3438
3439 /* fd_getvnode() will use the descriptor for us */
3440 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3441 return (error);
3442 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr),
3443 UIO_USERSPACE);
3444 fd_putfile(SCARG(uap, fd));
3445 return (error);
3446 }
3447
3448 int
3449 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap,
3450 register_t *retval)
3451 {
3452 /* {
3453 syscallarg(int) fd;
3454 syscallarg(const struct timespec *) tptr;
3455 } */
3456 int error;
3457 file_t *fp;
3458
3459 /* fd_getvnode() will use the descriptor for us */
3460 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3461 return (error);
3462 error = do_sys_utimens(l, fp->f_data, NULL, 0, SCARG(uap, tptr),
3463 UIO_USERSPACE);
3464 fd_putfile(SCARG(uap, fd));
3465 return (error);
3466 }
3467
3468 /*
3469 * Set the access and modification times given a path name; this
3470 * version does not follow links.
3471 */
3472 int
3473 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
3474 register_t *retval)
3475 {
3476 /* {
3477 syscallarg(const char *) path;
3478 syscallarg(const struct timeval *) tptr;
3479 } */
3480
3481 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
3482 SCARG(uap, tptr), UIO_USERSPACE);
3483 }
3484
3485 int
3486 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap,
3487 register_t *retval)
3488 {
3489 /* {
3490 syscallarg(int) fd;
3491 syscallarg(const char *) path;
3492 syscallarg(const struct timespec *) tptr;
3493 syscallarg(int) flag;
3494 } */
3495 int follow;
3496 const struct timespec *tptr;
3497
3498 /*
3499 * Specified fd is not yet implemented
3500 */
3501 if (SCARG(uap, fd) != AT_FDCWD)
3502 return ENOSYS;
3503
3504 tptr = SCARG(uap, tptr);
3505 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3506
3507 return do_sys_utimens(l, NULL, SCARG(uap, path), follow,
3508 tptr, UIO_USERSPACE);
3509 }
3510
3511 /*
3512 * Common routine to set access and modification times given a vnode.
3513 */
3514 int
3515 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag,
3516 const struct timespec *tptr, enum uio_seg seg)
3517 {
3518 struct vattr vattr;
3519 int error, dorele = 0;
3520 namei_simple_flags_t sflags;
3521
3522 bool vanull, setbirthtime;
3523 struct timespec ts[2];
3524
3525 /*
3526 * I have checked all callers and they pass either FOLLOW,
3527 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW
3528 * is 0. More to the point, they don't pass anything else.
3529 * Let's keep it that way at least until the namei interfaces
3530 * are fully sanitized.
3531 */
3532 KASSERT(flag == NOFOLLOW || flag == FOLLOW);
3533 sflags = (flag == FOLLOW) ?
3534 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT;
3535
3536 if (tptr == NULL) {
3537 vanull = true;
3538 nanotime(&ts[0]);
3539 ts[1] = ts[0];
3540 } else {
3541 vanull = false;
3542 if (seg != UIO_SYSSPACE) {
3543 error = copyin(tptr, ts, sizeof (ts));
3544 if (error != 0)
3545 return error;
3546 } else {
3547 ts[0] = tptr[0];
3548 ts[1] = tptr[1];
3549 }
3550 }
3551
3552 if (ts[0].tv_nsec == UTIME_NOW) {
3553 nanotime(&ts[0]);
3554 if (ts[1].tv_nsec == UTIME_NOW) {
3555 vanull = true;
3556 ts[1] = ts[0];
3557 }
3558 } else if (ts[1].tv_nsec == UTIME_NOW)
3559 nanotime(&ts[1]);
3560
3561 if (vp == NULL) {
3562 /* note: SEG describes TPTR, not PATH; PATH is always user */
3563 error = namei_simple_user(path, sflags, &vp);
3564 if (error != 0)
3565 return error;
3566 dorele = 1;
3567 }
3568
3569 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3570 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3571 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3572 vattr_null(&vattr);
3573
3574 if (ts[0].tv_nsec != UTIME_OMIT)
3575 vattr.va_atime = ts[0];
3576
3577 if (ts[1].tv_nsec != UTIME_OMIT) {
3578 vattr.va_mtime = ts[1];
3579 if (setbirthtime)
3580 vattr.va_birthtime = ts[1];
3581 }
3582
3583 if (vanull)
3584 vattr.va_vaflags |= VA_UTIMES_NULL;
3585 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3586 VOP_UNLOCK(vp);
3587
3588 if (dorele != 0)
3589 vrele(vp);
3590
3591 return error;
3592 }
3593
3594 int
3595 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
3596 const struct timeval *tptr, enum uio_seg seg)
3597 {
3598 struct timespec ts[2];
3599 struct timespec *tsptr = NULL;
3600 int error;
3601
3602 if (tptr != NULL) {
3603 struct timeval tv[2];
3604
3605 if (seg != UIO_SYSSPACE) {
3606 error = copyin(tptr, tv, sizeof (tv));
3607 if (error != 0)
3608 return error;
3609 tptr = tv;
3610 }
3611
3612 if ((tv[0].tv_usec == UTIME_NOW) ||
3613 (tv[0].tv_usec == UTIME_OMIT))
3614 ts[0].tv_nsec = tv[0].tv_usec;
3615 else
3616 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3617
3618 if ((tv[1].tv_usec == UTIME_NOW) ||
3619 (tv[1].tv_usec == UTIME_OMIT))
3620 ts[1].tv_nsec = tv[1].tv_usec;
3621 else
3622 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3623
3624 tsptr = &ts[0];
3625 }
3626
3627 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE);
3628 }
3629
3630 /*
3631 * Truncate a file given its path name.
3632 */
3633 /* ARGSUSED */
3634 int
3635 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3636 {
3637 /* {
3638 syscallarg(const char *) path;
3639 syscallarg(int) pad;
3640 syscallarg(off_t) length;
3641 } */
3642 struct vnode *vp;
3643 struct vattr vattr;
3644 int error;
3645
3646 error = namei_simple_user(SCARG(uap, path),
3647 NSM_FOLLOW_TRYEMULROOT, &vp);
3648 if (error != 0)
3649 return (error);
3650 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3651 if (vp->v_type == VDIR)
3652 error = EISDIR;
3653 else if ((error = vn_writechk(vp)) == 0 &&
3654 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3655 vattr_null(&vattr);
3656 vattr.va_size = SCARG(uap, length);
3657 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3658 }
3659 vput(vp);
3660 return (error);
3661 }
3662
3663 /*
3664 * Truncate a file given a file descriptor.
3665 */
3666 /* ARGSUSED */
3667 int
3668 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3669 {
3670 /* {
3671 syscallarg(int) fd;
3672 syscallarg(int) pad;
3673 syscallarg(off_t) length;
3674 } */
3675 struct vattr vattr;
3676 struct vnode *vp;
3677 file_t *fp;
3678 int error;
3679
3680 /* fd_getvnode() will use the descriptor for us */
3681 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3682 return (error);
3683 if ((fp->f_flag & FWRITE) == 0) {
3684 error = EINVAL;
3685 goto out;
3686 }
3687 vp = fp->f_data;
3688 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3689 if (vp->v_type == VDIR)
3690 error = EISDIR;
3691 else if ((error = vn_writechk(vp)) == 0) {
3692 vattr_null(&vattr);
3693 vattr.va_size = SCARG(uap, length);
3694 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3695 }
3696 VOP_UNLOCK(vp);
3697 out:
3698 fd_putfile(SCARG(uap, fd));
3699 return (error);
3700 }
3701
3702 /*
3703 * Sync an open file.
3704 */
3705 /* ARGSUSED */
3706 int
3707 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3708 {
3709 /* {
3710 syscallarg(int) fd;
3711 } */
3712 struct vnode *vp;
3713 file_t *fp;
3714 int error;
3715
3716 /* fd_getvnode() will use the descriptor for us */
3717 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3718 return (error);
3719 vp = fp->f_data;
3720 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3721 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
3722 VOP_UNLOCK(vp);
3723 fd_putfile(SCARG(uap, fd));
3724 return (error);
3725 }
3726
3727 /*
3728 * Sync a range of file data. API modeled after that found in AIX.
3729 *
3730 * FDATASYNC indicates that we need only save enough metadata to be able
3731 * to re-read the written data. Note we duplicate AIX's requirement that
3732 * the file be open for writing.
3733 */
3734 /* ARGSUSED */
3735 int
3736 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
3737 {
3738 /* {
3739 syscallarg(int) fd;
3740 syscallarg(int) flags;
3741 syscallarg(off_t) start;
3742 syscallarg(off_t) length;
3743 } */
3744 struct vnode *vp;
3745 file_t *fp;
3746 int flags, nflags;
3747 off_t s, e, len;
3748 int error;
3749
3750 /* fd_getvnode() will use the descriptor for us */
3751 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3752 return (error);
3753
3754 if ((fp->f_flag & FWRITE) == 0) {
3755 error = EBADF;
3756 goto out;
3757 }
3758
3759 flags = SCARG(uap, flags);
3760 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
3761 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
3762 error = EINVAL;
3763 goto out;
3764 }
3765 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
3766 if (flags & FDATASYNC)
3767 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
3768 else
3769 nflags = FSYNC_WAIT;
3770 if (flags & FDISKSYNC)
3771 nflags |= FSYNC_CACHE;
3772
3773 len = SCARG(uap, length);
3774 /* If length == 0, we do the whole file, and s = e = 0 will do that */
3775 if (len) {
3776 s = SCARG(uap, start);
3777 e = s + len;
3778 if (e < s) {
3779 error = EINVAL;
3780 goto out;
3781 }
3782 } else {
3783 e = 0;
3784 s = 0;
3785 }
3786
3787 vp = fp->f_data;
3788 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3789 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
3790 VOP_UNLOCK(vp);
3791 out:
3792 fd_putfile(SCARG(uap, fd));
3793 return (error);
3794 }
3795
3796 /*
3797 * Sync the data of an open file.
3798 */
3799 /* ARGSUSED */
3800 int
3801 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
3802 {
3803 /* {
3804 syscallarg(int) fd;
3805 } */
3806 struct vnode *vp;
3807 file_t *fp;
3808 int error;
3809
3810 /* fd_getvnode() will use the descriptor for us */
3811 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3812 return (error);
3813 if ((fp->f_flag & FWRITE) == 0) {
3814 fd_putfile(SCARG(uap, fd));
3815 return (EBADF);
3816 }
3817 vp = fp->f_data;
3818 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3819 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
3820 VOP_UNLOCK(vp);
3821 fd_putfile(SCARG(uap, fd));
3822 return (error);
3823 }
3824
3825 /*
3826 * Rename files, (standard) BSD semantics frontend.
3827 */
3828 /* ARGSUSED */
3829 int
3830 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
3831 {
3832 /* {
3833 syscallarg(const char *) from;
3834 syscallarg(const char *) to;
3835 } */
3836
3837 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0));
3838 }
3839
3840 int
3841 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap,
3842 register_t *retval)
3843 {
3844 /* {
3845 syscallarg(int) fromfd;
3846 syscallarg(const char *) from;
3847 syscallarg(int) tofd;
3848 syscallarg(const char *) to;
3849 } */
3850
3851 return ENOSYS;
3852 }
3853
3854 /*
3855 * Rename files, POSIX semantics frontend.
3856 */
3857 /* ARGSUSED */
3858 int
3859 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
3860 {
3861 /* {
3862 syscallarg(const char *) from;
3863 syscallarg(const char *) to;
3864 } */
3865
3866 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1));
3867 }
3868
3869 /*
3870 * Rename files. Source and destination must either both be directories,
3871 * or both not be directories. If target is a directory, it must be empty.
3872 * If `from' and `to' refer to the same object, the value of the `retain'
3873 * argument is used to determine whether `from' will be
3874 *
3875 * (retain == 0) deleted unless `from' and `to' refer to the same
3876 * object in the file system's name space (BSD).
3877 * (retain == 1) always retained (POSIX).
3878 */
3879 int
3880 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
3881 {
3882 struct vnode *tvp, *fvp, *tdvp;
3883 struct pathbuf *frompb, *topb;
3884 struct nameidata fromnd, tond;
3885 struct mount *fs;
3886 int error;
3887
3888 error = pathbuf_maybe_copyin(from, seg, &frompb);
3889 if (error) {
3890 return error;
3891 }
3892 error = pathbuf_maybe_copyin(to, seg, &topb);
3893 if (error) {
3894 pathbuf_destroy(frompb);
3895 return error;
3896 }
3897
3898 NDINIT(&fromnd, DELETE, LOCKPARENT | TRYEMULROOT | INRENAME,
3899 frompb);
3900 if ((error = namei(&fromnd)) != 0) {
3901 pathbuf_destroy(frompb);
3902 pathbuf_destroy(topb);
3903 return (error);
3904 }
3905 if (fromnd.ni_dvp != fromnd.ni_vp)
3906 VOP_UNLOCK(fromnd.ni_dvp);
3907 fvp = fromnd.ni_vp;
3908
3909 fs = fvp->v_mount;
3910 error = VFS_RENAMELOCK_ENTER(fs);
3911 if (error) {
3912 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3913 vrele(fromnd.ni_dvp);
3914 vrele(fvp);
3915 goto out1;
3916 }
3917
3918 /*
3919 * close, partially, yet another race - ideally we should only
3920 * go as far as getting fromnd.ni_dvp before getting the per-fs
3921 * lock, and then continue to get fromnd.ni_vp, but we can't do
3922 * that with namei as it stands.
3923 *
3924 * This still won't prevent rmdir from nuking fromnd.ni_vp
3925 * under us. The real fix is to get the locks in the right
3926 * order and do the lookups in the right places, but that's a
3927 * major rototill.
3928 *
3929 * Note: this logic (as well as this whole function) is cloned
3930 * in nfs_serv.c. Proceed accordingly.
3931 */
3932 vrele(fvp);
3933 if ((fromnd.ni_cnd.cn_namelen == 1 &&
3934 fromnd.ni_cnd.cn_nameptr[0] == '.') ||
3935 (fromnd.ni_cnd.cn_namelen == 2 &&
3936 fromnd.ni_cnd.cn_nameptr[0] == '.' &&
3937 fromnd.ni_cnd.cn_nameptr[1] == '.')) {
3938 error = EINVAL;
3939 VFS_RENAMELOCK_EXIT(fs);
3940 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3941 vrele(fromnd.ni_dvp);
3942 goto out1;
3943 }
3944 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY);
3945 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd, 0);
3946 if (error) {
3947 VOP_UNLOCK(fromnd.ni_dvp);
3948 VFS_RENAMELOCK_EXIT(fs);
3949 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3950 vrele(fromnd.ni_dvp);
3951 goto out1;
3952 }
3953 VOP_UNLOCK(fromnd.ni_vp);
3954 if (fromnd.ni_dvp != fromnd.ni_vp)
3955 VOP_UNLOCK(fromnd.ni_dvp);
3956 fvp = fromnd.ni_vp;
3957
3958 NDINIT(&tond, RENAME,
3959 LOCKPARENT | LOCKLEAF | NOCACHE | TRYEMULROOT
3960 | INRENAME | (fvp->v_type == VDIR ? CREATEDIR : 0),
3961 topb);
3962 if ((error = namei(&tond)) != 0) {
3963 VFS_RENAMELOCK_EXIT(fs);
3964 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
3965 vrele(fromnd.ni_dvp);
3966 vrele(fvp);
3967 goto out1;
3968 }
3969 tdvp = tond.ni_dvp;
3970 tvp = tond.ni_vp;
3971
3972 if (tvp != NULL) {
3973 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
3974 error = ENOTDIR;
3975 goto out;
3976 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
3977 error = EISDIR;
3978 goto out;
3979 }
3980 }
3981
3982 if (fvp == tdvp)
3983 error = EINVAL;
3984
3985 /*
3986 * Source and destination refer to the same object.
3987 */
3988 if (fvp == tvp) {
3989 if (retain)
3990 error = -1;
3991 else if (fromnd.ni_dvp == tdvp &&
3992 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
3993 !memcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
3994 fromnd.ni_cnd.cn_namelen))
3995 error = -1;
3996 }
3997 /*
3998 * Prevent cross-mount operation.
3999 */
4000 if (error == 0) {
4001 if (tond.ni_dvp->v_mount != fromnd.ni_dvp->v_mount) {
4002 error = EXDEV;
4003 }
4004 }
4005 #if NVERIEXEC > 0
4006 if (!error) {
4007 char *f1, *f2;
4008 size_t f1_len;
4009 size_t f2_len;
4010
4011 f1_len = fromnd.ni_cnd.cn_namelen + 1;
4012 f1 = kmem_alloc(f1_len, KM_SLEEP);
4013 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, f1_len);
4014
4015 f2_len = tond.ni_cnd.cn_namelen + 1;
4016 f2 = kmem_alloc(f2_len, KM_SLEEP);
4017 strlcpy(f2, tond.ni_cnd.cn_nameptr, f2_len);
4018
4019 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2);
4020
4021 kmem_free(f1, f1_len);
4022 kmem_free(f2, f2_len);
4023 }
4024 #endif /* NVERIEXEC > 0 */
4025
4026 out:
4027 if (!error) {
4028 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
4029 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
4030 VFS_RENAMELOCK_EXIT(fs);
4031 } else {
4032 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
4033 if (tdvp == tvp)
4034 vrele(tdvp);
4035 else
4036 vput(tdvp);
4037 if (tvp)
4038 vput(tvp);
4039 VFS_RENAMELOCK_EXIT(fs);
4040 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
4041 vrele(fromnd.ni_dvp);
4042 vrele(fvp);
4043 }
4044 out1:
4045 pathbuf_destroy(frompb);
4046 pathbuf_destroy(topb);
4047 return (error == -1 ? 0 : error);
4048 }
4049
4050 /*
4051 * Make a directory file.
4052 */
4053 /* ARGSUSED */
4054 int
4055 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
4056 {
4057 /* {
4058 syscallarg(const char *) path;
4059 syscallarg(int) mode;
4060 } */
4061
4062 return do_sys_mkdir(SCARG(uap, path), SCARG(uap, mode), UIO_USERSPACE);
4063 }
4064
4065 int
4066 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap,
4067 register_t *retval)
4068 {
4069 /* {
4070 syscallarg(int) fd;
4071 syscallarg(const char *) path;
4072 syscallarg(int) mode;
4073 } */
4074
4075 return ENOSYS;
4076 }
4077
4078
4079 int
4080 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg)
4081 {
4082 struct proc *p = curlwp->l_proc;
4083 struct vnode *vp;
4084 struct vattr vattr;
4085 int error;
4086 struct pathbuf *pb;
4087 struct nameidata nd;
4088
4089 /* XXX bollocks, should pass in a pathbuf */
4090 error = pathbuf_maybe_copyin(path, seg, &pb);
4091 if (error) {
4092 return error;
4093 }
4094
4095 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb);
4096 if ((error = namei(&nd)) != 0) {
4097 pathbuf_destroy(pb);
4098 return (error);
4099 }
4100 vp = nd.ni_vp;
4101 if (vp != NULL) {
4102 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
4103 if (nd.ni_dvp == vp)
4104 vrele(nd.ni_dvp);
4105 else
4106 vput(nd.ni_dvp);
4107 vrele(vp);
4108 pathbuf_destroy(pb);
4109 return (EEXIST);
4110 }
4111 vattr_null(&vattr);
4112 vattr.va_type = VDIR;
4113 /* We will read cwdi->cwdi_cmask unlocked. */
4114 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
4115 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
4116 if (!error)
4117 vput(nd.ni_vp);
4118 pathbuf_destroy(pb);
4119 return (error);
4120 }
4121
4122 /*
4123 * Remove a directory file.
4124 */
4125 /* ARGSUSED */
4126 int
4127 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
4128 {
4129 /* {
4130 syscallarg(const char *) path;
4131 } */
4132 struct vnode *vp;
4133 int error;
4134 struct pathbuf *pb;
4135 struct nameidata nd;
4136
4137 error = pathbuf_copyin(SCARG(uap, path), &pb);
4138 if (error) {
4139 return error;
4140 }
4141 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb);
4142 if ((error = namei(&nd)) != 0) {
4143 pathbuf_destroy(pb);
4144 return error;
4145 }
4146 vp = nd.ni_vp;
4147 if (vp->v_type != VDIR) {
4148 error = ENOTDIR;
4149 goto out;
4150 }
4151 /*
4152 * No rmdir "." please.
4153 */
4154 if (nd.ni_dvp == vp) {
4155 error = EINVAL;
4156 goto out;
4157 }
4158 /*
4159 * The root of a mounted filesystem cannot be deleted.
4160 */
4161 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) {
4162 error = EBUSY;
4163 goto out;
4164 }
4165 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
4166 pathbuf_destroy(pb);
4167 return (error);
4168
4169 out:
4170 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
4171 if (nd.ni_dvp == vp)
4172 vrele(nd.ni_dvp);
4173 else
4174 vput(nd.ni_dvp);
4175 vput(vp);
4176 pathbuf_destroy(pb);
4177 return (error);
4178 }
4179
4180 /*
4181 * Read a block of directory entries in a file system independent format.
4182 */
4183 int
4184 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
4185 {
4186 /* {
4187 syscallarg(int) fd;
4188 syscallarg(char *) buf;
4189 syscallarg(size_t) count;
4190 } */
4191 file_t *fp;
4192 int error, done;
4193
4194 /* fd_getvnode() will use the descriptor for us */
4195 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4196 return (error);
4197 if ((fp->f_flag & FREAD) == 0) {
4198 error = EBADF;
4199 goto out;
4200 }
4201 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
4202 SCARG(uap, count), &done, l, 0, 0);
4203 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
4204 *retval = done;
4205 out:
4206 fd_putfile(SCARG(uap, fd));
4207 return (error);
4208 }
4209
4210 /*
4211 * Set the mode mask for creation of filesystem nodes.
4212 */
4213 int
4214 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
4215 {
4216 /* {
4217 syscallarg(mode_t) newmask;
4218 } */
4219 struct proc *p = l->l_proc;
4220 struct cwdinfo *cwdi;
4221
4222 /*
4223 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
4224 * important is that we serialize changes to the mask. The
4225 * rw_exit() will issue a write memory barrier on our behalf,
4226 * and force the changes out to other CPUs (as it must use an
4227 * atomic operation, draining the local CPU's store buffers).
4228 */
4229 cwdi = p->p_cwdi;
4230 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
4231 *retval = cwdi->cwdi_cmask;
4232 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
4233 rw_exit(&cwdi->cwdi_lock);
4234
4235 return (0);
4236 }
4237
4238 int
4239 dorevoke(struct vnode *vp, kauth_cred_t cred)
4240 {
4241 struct vattr vattr;
4242 int error, fs_decision;
4243
4244 vn_lock(vp, LK_SHARED | LK_RETRY);
4245 error = VOP_GETATTR(vp, &vattr, cred);
4246 VOP_UNLOCK(vp);
4247 if (error != 0)
4248 return error;
4249 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM;
4250 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL,
4251 fs_decision);
4252 if (!error)
4253 VOP_REVOKE(vp, REVOKEALL);
4254 return (error);
4255 }
4256
4257 /*
4258 * Void all references to file by ripping underlying filesystem
4259 * away from vnode.
4260 */
4261 /* ARGSUSED */
4262 int
4263 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
4264 {
4265 /* {
4266 syscallarg(const char *) path;
4267 } */
4268 struct vnode *vp;
4269 int error;
4270
4271 error = namei_simple_user(SCARG(uap, path),
4272 NSM_FOLLOW_TRYEMULROOT, &vp);
4273 if (error != 0)
4274 return (error);
4275 error = dorevoke(vp, l->l_cred);
4276 vrele(vp);
4277 return (error);
4278 }
4279