vfs_syscalls.c revision 1.473 1 /* $NetBSD: vfs_syscalls.c,v 1.473 2014/01/23 10:13:57 hannken Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95
66 */
67
68 /*
69 * Virtual File System System Calls
70 */
71
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.473 2014/01/23 10:13:57 hannken Exp $");
74
75 #ifdef _KERNEL_OPT
76 #include "opt_fileassoc.h"
77 #include "veriexec.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/namei.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/file.h>
86 #include <sys/fcntl.h>
87 #include <sys/stat.h>
88 #include <sys/vnode.h>
89 #include <sys/mount.h>
90 #include <sys/proc.h>
91 #include <sys/uio.h>
92 #include <sys/kmem.h>
93 #include <sys/dirent.h>
94 #include <sys/sysctl.h>
95 #include <sys/syscallargs.h>
96 #include <sys/vfs_syscalls.h>
97 #include <sys/quota.h>
98 #include <sys/quotactl.h>
99 #include <sys/ktrace.h>
100 #ifdef FILEASSOC
101 #include <sys/fileassoc.h>
102 #endif /* FILEASSOC */
103 #include <sys/extattr.h>
104 #include <sys/verified_exec.h>
105 #include <sys/kauth.h>
106 #include <sys/atomic.h>
107 #include <sys/module.h>
108 #include <sys/buf.h>
109
110 #include <miscfs/genfs/genfs.h>
111 #include <miscfs/syncfs/syncfs.h>
112 #include <miscfs/specfs/specdev.h>
113
114 #include <nfs/rpcv2.h>
115 #include <nfs/nfsproto.h>
116 #include <nfs/nfs.h>
117 #include <nfs/nfs_var.h>
118
119 static int change_flags(struct vnode *, u_long, struct lwp *);
120 static int change_mode(struct vnode *, int, struct lwp *l);
121 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
122 static int do_open(lwp_t *, struct vnode *, struct pathbuf *, int, int, int *);
123 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *);
124 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t,
125 enum uio_seg);
126 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t);
127 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *,
128 enum uio_seg);
129 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *,
130 enum uio_seg, int);
131 static int do_sys_readlinkat(struct lwp *, int, const char *, char *,
132 size_t, register_t *);
133 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg);
134
135 static int fd_nameiat(struct lwp *, int, struct nameidata *);
136 static int fd_nameiat_simple_user(struct lwp *, int, const char *,
137 namei_simple_flags_t, struct vnode **);
138
139
140 /*
141 * This table is used to maintain compatibility with 4.3BSD
142 * and NetBSD 0.9 mount syscalls - and possibly other systems.
143 * Note, the order is important!
144 *
145 * Do not modify this table. It should only contain filesystems
146 * supported by NetBSD 0.9 and 4.3BSD.
147 */
148 const char * const mountcompatnames[] = {
149 NULL, /* 0 = MOUNT_NONE */
150 MOUNT_FFS, /* 1 = MOUNT_UFS */
151 MOUNT_NFS, /* 2 */
152 MOUNT_MFS, /* 3 */
153 MOUNT_MSDOS, /* 4 */
154 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */
155 MOUNT_FDESC, /* 6 */
156 MOUNT_KERNFS, /* 7 */
157 NULL, /* 8 = MOUNT_DEVFS */
158 MOUNT_AFS, /* 9 */
159 };
160
161 const int nmountcompatnames = __arraycount(mountcompatnames);
162
163 static int
164 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp)
165 {
166 file_t *dfp;
167 int error;
168
169 if (fdat != AT_FDCWD) {
170 if ((error = fd_getvnode(fdat, &dfp)) != 0)
171 goto out;
172
173 NDAT(ndp, dfp->f_data);
174 }
175
176 error = namei(ndp);
177
178 if (fdat != AT_FDCWD)
179 fd_putfile(fdat);
180 out:
181 return error;
182 }
183
184 static int
185 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path,
186 namei_simple_flags_t sflags, struct vnode **vp_ret)
187 {
188 file_t *dfp;
189 struct vnode *dvp;
190 int error;
191
192 if (fdat != AT_FDCWD) {
193 if ((error = fd_getvnode(fdat, &dfp)) != 0)
194 goto out;
195
196 dvp = dfp->f_data;
197 } else {
198 dvp = NULL;
199 }
200
201 error = nameiat_simple_user(dvp, path, sflags, vp_ret);
202
203 if (fdat != AT_FDCWD)
204 fd_putfile(fdat);
205 out:
206 return error;
207 }
208
209 static int
210 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags)
211 {
212 int error;
213
214 fp->f_flag = flags & FMASK;
215 fp->f_type = DTYPE_VNODE;
216 fp->f_ops = &vnops;
217 fp->f_data = vp;
218
219 if (flags & (O_EXLOCK | O_SHLOCK)) {
220 struct flock lf;
221 int type;
222
223 lf.l_whence = SEEK_SET;
224 lf.l_start = 0;
225 lf.l_len = 0;
226 if (flags & O_EXLOCK)
227 lf.l_type = F_WRLCK;
228 else
229 lf.l_type = F_RDLCK;
230 type = F_FLOCK;
231 if ((flags & FNONBLOCK) == 0)
232 type |= F_WAIT;
233 VOP_UNLOCK(vp);
234 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
235 if (error) {
236 (void) vn_close(vp, fp->f_flag, fp->f_cred);
237 fd_abort(l->l_proc, fp, indx);
238 return error;
239 }
240 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
241 atomic_or_uint(&fp->f_flag, FHASLOCK);
242 }
243 if (flags & O_CLOEXEC)
244 fd_set_exclose(l, indx, true);
245 return 0;
246 }
247
248 static int
249 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
250 void *data, size_t *data_len)
251 {
252 struct mount *mp;
253 int error = 0, saved_flags;
254
255 mp = vp->v_mount;
256 saved_flags = mp->mnt_flag;
257
258 /* We can operate only on VV_ROOT nodes. */
259 if ((vp->v_vflag & VV_ROOT) == 0) {
260 error = EINVAL;
261 goto out;
262 }
263
264 /*
265 * We only allow the filesystem to be reloaded if it
266 * is currently mounted read-only. Additionally, we
267 * prevent read-write to read-only downgrades.
268 */
269 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
270 (mp->mnt_flag & MNT_RDONLY) == 0 &&
271 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) {
272 error = EOPNOTSUPP; /* Needs translation */
273 goto out;
274 }
275
276 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
277 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
278 if (error)
279 goto out;
280
281 if (vfs_busy(mp, NULL)) {
282 error = EPERM;
283 goto out;
284 }
285
286 mutex_enter(&mp->mnt_updating);
287
288 mp->mnt_flag &= ~MNT_OP_FLAGS;
289 mp->mnt_flag |= flags & MNT_OP_FLAGS;
290
291 /*
292 * Set the mount level flags.
293 */
294 if (flags & MNT_RDONLY)
295 mp->mnt_flag |= MNT_RDONLY;
296 else if (mp->mnt_flag & MNT_RDONLY)
297 mp->mnt_iflag |= IMNT_WANTRDWR;
298 mp->mnt_flag &= ~MNT_BASIC_FLAGS;
299 mp->mnt_flag |= flags & MNT_BASIC_FLAGS;
300 error = VFS_MOUNT(mp, path, data, data_len);
301
302 if (error && data != NULL) {
303 int error2;
304
305 /*
306 * Update failed; let's try and see if it was an
307 * export request. For compat with 3.0 and earlier.
308 */
309 error2 = vfs_hooks_reexport(mp, path, data);
310
311 /*
312 * Only update error code if the export request was
313 * understood but some problem occurred while
314 * processing it.
315 */
316 if (error2 != EJUSTRETURN)
317 error = error2;
318 }
319
320 if (mp->mnt_iflag & IMNT_WANTRDWR)
321 mp->mnt_flag &= ~MNT_RDONLY;
322 if (error)
323 mp->mnt_flag = saved_flags;
324 mp->mnt_flag &= ~MNT_OP_FLAGS;
325 mp->mnt_iflag &= ~IMNT_WANTRDWR;
326 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
327 if (mp->mnt_syncer == NULL)
328 error = vfs_allocate_syncvnode(mp);
329 } else {
330 if (mp->mnt_syncer != NULL)
331 vfs_deallocate_syncvnode(mp);
332 }
333 mutex_exit(&mp->mnt_updating);
334 vfs_unbusy(mp, false, NULL);
335
336 if ((error == 0) && !(saved_flags & MNT_EXTATTR) &&
337 (flags & MNT_EXTATTR)) {
338 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START,
339 NULL, 0, NULL) != 0) {
340 printf("%s: failed to start extattr, error = %d",
341 mp->mnt_stat.f_mntonname, error);
342 mp->mnt_flag &= ~MNT_EXTATTR;
343 }
344 }
345
346 if ((error == 0) && (saved_flags & MNT_EXTATTR) &&
347 !(flags & MNT_EXTATTR)) {
348 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP,
349 NULL, 0, NULL) != 0) {
350 printf("%s: failed to stop extattr, error = %d",
351 mp->mnt_stat.f_mntonname, error);
352 mp->mnt_flag |= MNT_RDONLY;
353 }
354 }
355 out:
356 return (error);
357 }
358
359 static int
360 mount_get_vfsops(const char *fstype, struct vfsops **vfsops)
361 {
362 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
363 int error;
364
365 /* Copy file-system type from userspace. */
366 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
367 if (error) {
368 /*
369 * Historically, filesystem types were identified by numbers.
370 * If we get an integer for the filesystem type instead of a
371 * string, we check to see if it matches one of the historic
372 * filesystem types.
373 */
374 u_long fsindex = (u_long)fstype;
375 if (fsindex >= nmountcompatnames ||
376 mountcompatnames[fsindex] == NULL)
377 return ENODEV;
378 strlcpy(fstypename, mountcompatnames[fsindex],
379 sizeof(fstypename));
380 }
381
382 /* Accept `ufs' as an alias for `ffs', for compatibility. */
383 if (strcmp(fstypename, "ufs") == 0)
384 fstypename[0] = 'f';
385
386 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
387 return 0;
388
389 /* If we can autoload a vfs module, try again */
390 (void)module_autoload(fstypename, MODULE_CLASS_VFS);
391
392 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
393 return 0;
394
395 return ENODEV;
396 }
397
398 static int
399 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
400 void *data, size_t *data_len)
401 {
402 struct mount *mp;
403 int error;
404
405 /* If MNT_GETARGS is specified, it should be the only flag. */
406 if (flags & ~MNT_GETARGS)
407 return EINVAL;
408
409 mp = vp->v_mount;
410
411 /* XXX: probably some notion of "can see" here if we want isolation. */
412 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
413 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
414 if (error)
415 return error;
416
417 if ((vp->v_vflag & VV_ROOT) == 0)
418 return EINVAL;
419
420 if (vfs_busy(mp, NULL))
421 return EPERM;
422
423 mutex_enter(&mp->mnt_updating);
424 mp->mnt_flag &= ~MNT_OP_FLAGS;
425 mp->mnt_flag |= MNT_GETARGS;
426 error = VFS_MOUNT(mp, path, data, data_len);
427 mp->mnt_flag &= ~MNT_OP_FLAGS;
428 mutex_exit(&mp->mnt_updating);
429
430 vfs_unbusy(mp, false, NULL);
431 return (error);
432 }
433
434 int
435 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
436 {
437 /* {
438 syscallarg(const char *) type;
439 syscallarg(const char *) path;
440 syscallarg(int) flags;
441 syscallarg(void *) data;
442 syscallarg(size_t) data_len;
443 } */
444
445 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path),
446 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
447 SCARG(uap, data_len), retval);
448 }
449
450 int
451 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type,
452 const char *path, int flags, void *data, enum uio_seg data_seg,
453 size_t data_len, register_t *retval)
454 {
455 struct vnode *vp;
456 void *data_buf = data;
457 bool vfsopsrele = false;
458 int error;
459
460 /* XXX: The calling convention of this routine is totally bizarre */
461 if (vfsops)
462 vfsopsrele = true;
463
464 /*
465 * Get vnode to be covered
466 */
467 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
468 if (error != 0) {
469 vp = NULL;
470 goto done;
471 }
472
473 if (vfsops == NULL) {
474 if (flags & (MNT_GETARGS | MNT_UPDATE)) {
475 vfsops = vp->v_mount->mnt_op;
476 } else {
477 /* 'type' is userspace */
478 error = mount_get_vfsops(type, &vfsops);
479 if (error != 0)
480 goto done;
481 vfsopsrele = true;
482 }
483 }
484
485 if (data != NULL && data_seg == UIO_USERSPACE) {
486 if (data_len == 0) {
487 /* No length supplied, use default for filesystem */
488 data_len = vfsops->vfs_min_mount_data;
489 if (data_len > VFS_MAX_MOUNT_DATA) {
490 error = EINVAL;
491 goto done;
492 }
493 /*
494 * Hopefully a longer buffer won't make copyin() fail.
495 * For compatibility with 3.0 and earlier.
496 */
497 if (flags & MNT_UPDATE
498 && data_len < sizeof (struct mnt_export_args30))
499 data_len = sizeof (struct mnt_export_args30);
500 }
501 data_buf = kmem_alloc(data_len, KM_SLEEP);
502
503 /* NFS needs the buffer even for mnt_getargs .... */
504 error = copyin(data, data_buf, data_len);
505 if (error != 0)
506 goto done;
507 }
508
509 if (flags & MNT_GETARGS) {
510 if (data_len == 0) {
511 error = EINVAL;
512 goto done;
513 }
514 error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
515 if (error != 0)
516 goto done;
517 if (data_seg == UIO_USERSPACE)
518 error = copyout(data_buf, data, data_len);
519 *retval = data_len;
520 } else if (flags & MNT_UPDATE) {
521 error = mount_update(l, vp, path, flags, data_buf, &data_len);
522 } else {
523 /* Locking is handled internally in mount_domount(). */
524 KASSERT(vfsopsrele == true);
525 error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
526 &data_len);
527 vfsopsrele = false;
528 }
529
530 done:
531 if (vfsopsrele)
532 vfs_delref(vfsops);
533 if (vp != NULL) {
534 vrele(vp);
535 }
536 if (data_buf != data)
537 kmem_free(data_buf, data_len);
538 return (error);
539 }
540
541 /*
542 * Unmount a file system.
543 *
544 * Note: unmount takes a path to the vnode mounted on as argument,
545 * not special file (as before).
546 */
547 /* ARGSUSED */
548 int
549 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
550 {
551 /* {
552 syscallarg(const char *) path;
553 syscallarg(int) flags;
554 } */
555 struct vnode *vp;
556 struct mount *mp;
557 int error;
558 struct pathbuf *pb;
559 struct nameidata nd;
560
561 error = pathbuf_copyin(SCARG(uap, path), &pb);
562 if (error) {
563 return error;
564 }
565
566 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
567 if ((error = namei(&nd)) != 0) {
568 pathbuf_destroy(pb);
569 return error;
570 }
571 vp = nd.ni_vp;
572 pathbuf_destroy(pb);
573
574 mp = vp->v_mount;
575 atomic_inc_uint(&mp->mnt_refcnt);
576 VOP_UNLOCK(vp);
577
578 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
579 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
580 if (error) {
581 vrele(vp);
582 vfs_destroy(mp);
583 return (error);
584 }
585
586 /*
587 * Don't allow unmounting the root file system.
588 */
589 if (mp->mnt_flag & MNT_ROOTFS) {
590 vrele(vp);
591 vfs_destroy(mp);
592 return (EINVAL);
593 }
594
595 /*
596 * Must be the root of the filesystem
597 */
598 if ((vp->v_vflag & VV_ROOT) == 0) {
599 vrele(vp);
600 vfs_destroy(mp);
601 return (EINVAL);
602 }
603
604 vrele(vp);
605 error = dounmount(mp, SCARG(uap, flags), l);
606 vfs_destroy(mp);
607 return error;
608 }
609
610 /*
611 * Sync each mounted filesystem.
612 */
613 #ifdef DEBUG
614 int syncprt = 0;
615 struct ctldebug debug0 = { "syncprt", &syncprt };
616 #endif
617
618 void
619 do_sys_sync(struct lwp *l)
620 {
621 struct mount *mp, *nmp;
622 int asyncflag;
623
624 mutex_enter(&mountlist_lock);
625 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
626 if (vfs_busy(mp, &nmp)) {
627 continue;
628 }
629 mutex_enter(&mp->mnt_updating);
630 if ((mp->mnt_flag & MNT_RDONLY) == 0) {
631 asyncflag = mp->mnt_flag & MNT_ASYNC;
632 mp->mnt_flag &= ~MNT_ASYNC;
633 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
634 if (asyncflag)
635 mp->mnt_flag |= MNT_ASYNC;
636 }
637 mutex_exit(&mp->mnt_updating);
638 vfs_unbusy(mp, false, &nmp);
639 }
640 mutex_exit(&mountlist_lock);
641 #ifdef DEBUG
642 if (syncprt)
643 vfs_bufstats();
644 #endif /* DEBUG */
645 }
646
647 /* ARGSUSED */
648 int
649 sys_sync(struct lwp *l, const void *v, register_t *retval)
650 {
651 do_sys_sync(l);
652 return (0);
653 }
654
655
656 /*
657 * Access or change filesystem quotas.
658 *
659 * (this is really 14 different calls bundled into one)
660 */
661
662 static int
663 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u)
664 {
665 struct quotastat info_k;
666 int error;
667
668 /* ensure any padding bytes are cleared */
669 memset(&info_k, 0, sizeof(info_k));
670
671 error = vfs_quotactl_stat(mp, &info_k);
672 if (error) {
673 return error;
674 }
675
676 return copyout(&info_k, info_u, sizeof(info_k));
677 }
678
679 static int
680 do_sys_quotactl_idtypestat(struct mount *mp, int idtype,
681 struct quotaidtypestat *info_u)
682 {
683 struct quotaidtypestat info_k;
684 int error;
685
686 /* ensure any padding bytes are cleared */
687 memset(&info_k, 0, sizeof(info_k));
688
689 error = vfs_quotactl_idtypestat(mp, idtype, &info_k);
690 if (error) {
691 return error;
692 }
693
694 return copyout(&info_k, info_u, sizeof(info_k));
695 }
696
697 static int
698 do_sys_quotactl_objtypestat(struct mount *mp, int objtype,
699 struct quotaobjtypestat *info_u)
700 {
701 struct quotaobjtypestat info_k;
702 int error;
703
704 /* ensure any padding bytes are cleared */
705 memset(&info_k, 0, sizeof(info_k));
706
707 error = vfs_quotactl_objtypestat(mp, objtype, &info_k);
708 if (error) {
709 return error;
710 }
711
712 return copyout(&info_k, info_u, sizeof(info_k));
713 }
714
715 static int
716 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u,
717 struct quotaval *val_u)
718 {
719 struct quotakey key_k;
720 struct quotaval val_k;
721 int error;
722
723 /* ensure any padding bytes are cleared */
724 memset(&val_k, 0, sizeof(val_k));
725
726 error = copyin(key_u, &key_k, sizeof(key_k));
727 if (error) {
728 return error;
729 }
730
731 error = vfs_quotactl_get(mp, &key_k, &val_k);
732 if (error) {
733 return error;
734 }
735
736 return copyout(&val_k, val_u, sizeof(val_k));
737 }
738
739 static int
740 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u,
741 const struct quotaval *val_u)
742 {
743 struct quotakey key_k;
744 struct quotaval val_k;
745 int error;
746
747 error = copyin(key_u, &key_k, sizeof(key_k));
748 if (error) {
749 return error;
750 }
751
752 error = copyin(val_u, &val_k, sizeof(val_k));
753 if (error) {
754 return error;
755 }
756
757 return vfs_quotactl_put(mp, &key_k, &val_k);
758 }
759
760 static int
761 do_sys_quotactl_delete(struct mount *mp, const struct quotakey *key_u)
762 {
763 struct quotakey key_k;
764 int error;
765
766 error = copyin(key_u, &key_k, sizeof(key_k));
767 if (error) {
768 return error;
769 }
770
771 return vfs_quotactl_delete(mp, &key_k);
772 }
773
774 static int
775 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u)
776 {
777 struct quotakcursor cursor_k;
778 int error;
779
780 /* ensure any padding bytes are cleared */
781 memset(&cursor_k, 0, sizeof(cursor_k));
782
783 error = vfs_quotactl_cursoropen(mp, &cursor_k);
784 if (error) {
785 return error;
786 }
787
788 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
789 }
790
791 static int
792 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u)
793 {
794 struct quotakcursor cursor_k;
795 int error;
796
797 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
798 if (error) {
799 return error;
800 }
801
802 return vfs_quotactl_cursorclose(mp, &cursor_k);
803 }
804
805 static int
806 do_sys_quotactl_cursorskipidtype(struct mount *mp,
807 struct quotakcursor *cursor_u, int idtype)
808 {
809 struct quotakcursor cursor_k;
810 int error;
811
812 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
813 if (error) {
814 return error;
815 }
816
817 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype);
818 if (error) {
819 return error;
820 }
821
822 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
823 }
824
825 static int
826 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u,
827 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum,
828 unsigned *ret_u)
829 {
830 #define CGET_STACK_MAX 8
831 struct quotakcursor cursor_k;
832 struct quotakey stackkeys[CGET_STACK_MAX];
833 struct quotaval stackvals[CGET_STACK_MAX];
834 struct quotakey *keys_k;
835 struct quotaval *vals_k;
836 unsigned ret_k;
837 int error;
838
839 if (maxnum > 128) {
840 maxnum = 128;
841 }
842
843 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
844 if (error) {
845 return error;
846 }
847
848 if (maxnum <= CGET_STACK_MAX) {
849 keys_k = stackkeys;
850 vals_k = stackvals;
851 /* ensure any padding bytes are cleared */
852 memset(keys_k, 0, maxnum * sizeof(keys_k[0]));
853 memset(vals_k, 0, maxnum * sizeof(vals_k[0]));
854 } else {
855 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP);
856 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP);
857 }
858
859 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum,
860 &ret_k);
861 if (error) {
862 goto fail;
863 }
864
865 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0]));
866 if (error) {
867 goto fail;
868 }
869
870 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0]));
871 if (error) {
872 goto fail;
873 }
874
875 error = copyout(&ret_k, ret_u, sizeof(ret_k));
876 if (error) {
877 goto fail;
878 }
879
880 /* do last to maximize the chance of being able to recover a failure */
881 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k));
882
883 fail:
884 if (keys_k != stackkeys) {
885 kmem_free(keys_k, maxnum * sizeof(keys_k[0]));
886 }
887 if (vals_k != stackvals) {
888 kmem_free(vals_k, maxnum * sizeof(vals_k[0]));
889 }
890 return error;
891 }
892
893 static int
894 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u,
895 int *ret_u)
896 {
897 struct quotakcursor cursor_k;
898 int ret_k;
899 int error;
900
901 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
902 if (error) {
903 return error;
904 }
905
906 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k);
907 if (error) {
908 return error;
909 }
910
911 error = copyout(&ret_k, ret_u, sizeof(ret_k));
912 if (error) {
913 return error;
914 }
915
916 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
917 }
918
919 static int
920 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u)
921 {
922 struct quotakcursor cursor_k;
923 int error;
924
925 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
926 if (error) {
927 return error;
928 }
929
930 error = vfs_quotactl_cursorrewind(mp, &cursor_k);
931 if (error) {
932 return error;
933 }
934
935 return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
936 }
937
938 static int
939 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u)
940 {
941 char *path_k;
942 int error;
943
944 /* XXX this should probably be a struct pathbuf */
945 path_k = PNBUF_GET();
946 error = copyin(path_u, path_k, PATH_MAX);
947 if (error) {
948 PNBUF_PUT(path_k);
949 return error;
950 }
951
952 error = vfs_quotactl_quotaon(mp, idtype, path_k);
953
954 PNBUF_PUT(path_k);
955 return error;
956 }
957
958 static int
959 do_sys_quotactl_quotaoff(struct mount *mp, int idtype)
960 {
961 return vfs_quotactl_quotaoff(mp, idtype);
962 }
963
964 int
965 do_sys_quotactl(const char *path_u, const struct quotactl_args *args)
966 {
967 struct mount *mp;
968 struct vnode *vp;
969 int error;
970
971 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp);
972 if (error != 0)
973 return (error);
974 mp = vp->v_mount;
975
976 switch (args->qc_op) {
977 case QUOTACTL_STAT:
978 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info);
979 break;
980 case QUOTACTL_IDTYPESTAT:
981 error = do_sys_quotactl_idtypestat(mp,
982 args->u.idtypestat.qc_idtype,
983 args->u.idtypestat.qc_info);
984 break;
985 case QUOTACTL_OBJTYPESTAT:
986 error = do_sys_quotactl_objtypestat(mp,
987 args->u.objtypestat.qc_objtype,
988 args->u.objtypestat.qc_info);
989 break;
990 case QUOTACTL_GET:
991 error = do_sys_quotactl_get(mp,
992 args->u.get.qc_key,
993 args->u.get.qc_val);
994 break;
995 case QUOTACTL_PUT:
996 error = do_sys_quotactl_put(mp,
997 args->u.put.qc_key,
998 args->u.put.qc_val);
999 break;
1000 case QUOTACTL_DELETE:
1001 error = do_sys_quotactl_delete(mp, args->u.delete.qc_key);
1002 break;
1003 case QUOTACTL_CURSOROPEN:
1004 error = do_sys_quotactl_cursoropen(mp,
1005 args->u.cursoropen.qc_cursor);
1006 break;
1007 case QUOTACTL_CURSORCLOSE:
1008 error = do_sys_quotactl_cursorclose(mp,
1009 args->u.cursorclose.qc_cursor);
1010 break;
1011 case QUOTACTL_CURSORSKIPIDTYPE:
1012 error = do_sys_quotactl_cursorskipidtype(mp,
1013 args->u.cursorskipidtype.qc_cursor,
1014 args->u.cursorskipidtype.qc_idtype);
1015 break;
1016 case QUOTACTL_CURSORGET:
1017 error = do_sys_quotactl_cursorget(mp,
1018 args->u.cursorget.qc_cursor,
1019 args->u.cursorget.qc_keys,
1020 args->u.cursorget.qc_vals,
1021 args->u.cursorget.qc_maxnum,
1022 args->u.cursorget.qc_ret);
1023 break;
1024 case QUOTACTL_CURSORATEND:
1025 error = do_sys_quotactl_cursoratend(mp,
1026 args->u.cursoratend.qc_cursor,
1027 args->u.cursoratend.qc_ret);
1028 break;
1029 case QUOTACTL_CURSORREWIND:
1030 error = do_sys_quotactl_cursorrewind(mp,
1031 args->u.cursorrewind.qc_cursor);
1032 break;
1033 case QUOTACTL_QUOTAON:
1034 error = do_sys_quotactl_quotaon(mp,
1035 args->u.quotaon.qc_idtype,
1036 args->u.quotaon.qc_quotafile);
1037 break;
1038 case QUOTACTL_QUOTAOFF:
1039 error = do_sys_quotactl_quotaoff(mp,
1040 args->u.quotaoff.qc_idtype);
1041 break;
1042 default:
1043 error = EINVAL;
1044 break;
1045 }
1046
1047 vrele(vp);
1048 return error;
1049 }
1050
1051 /* ARGSUSED */
1052 int
1053 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap,
1054 register_t *retval)
1055 {
1056 /* {
1057 syscallarg(const char *) path;
1058 syscallarg(struct quotactl_args *) args;
1059 } */
1060 struct quotactl_args args;
1061 int error;
1062
1063 error = copyin(SCARG(uap, args), &args, sizeof(args));
1064 if (error) {
1065 return error;
1066 }
1067
1068 return do_sys_quotactl(SCARG(uap, path), &args);
1069 }
1070
1071 int
1072 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
1073 int root)
1074 {
1075 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
1076 int error = 0;
1077
1078 /*
1079 * If MNT_NOWAIT or MNT_LAZY is specified, do not
1080 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
1081 * overrides MNT_NOWAIT.
1082 */
1083 if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
1084 (flags != MNT_WAIT && flags != 0)) {
1085 memcpy(sp, &mp->mnt_stat, sizeof(*sp));
1086 goto done;
1087 }
1088
1089 /* Get the filesystem stats now */
1090 memset(sp, 0, sizeof(*sp));
1091 if ((error = VFS_STATVFS(mp, sp)) != 0) {
1092 return error;
1093 }
1094
1095 if (cwdi->cwdi_rdir == NULL)
1096 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
1097 done:
1098 if (cwdi->cwdi_rdir != NULL) {
1099 size_t len;
1100 char *bp;
1101 char c;
1102 char *path = PNBUF_GET();
1103
1104 bp = path + MAXPATHLEN;
1105 *--bp = '\0';
1106 rw_enter(&cwdi->cwdi_lock, RW_READER);
1107 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
1108 MAXPATHLEN / 2, 0, l);
1109 rw_exit(&cwdi->cwdi_lock);
1110 if (error) {
1111 PNBUF_PUT(path);
1112 return error;
1113 }
1114 len = strlen(bp);
1115 if (len != 1) {
1116 /*
1117 * for mount points that are below our root, we can see
1118 * them, so we fix up the pathname and return them. The
1119 * rest we cannot see, so we don't allow viewing the
1120 * data.
1121 */
1122 if (strncmp(bp, sp->f_mntonname, len) == 0 &&
1123 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
1124 (void)strlcpy(sp->f_mntonname,
1125 c == '\0' ? "/" : &sp->f_mntonname[len],
1126 sizeof(sp->f_mntonname));
1127 } else {
1128 if (root)
1129 (void)strlcpy(sp->f_mntonname, "/",
1130 sizeof(sp->f_mntonname));
1131 else
1132 error = EPERM;
1133 }
1134 }
1135 PNBUF_PUT(path);
1136 }
1137 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
1138 return error;
1139 }
1140
1141 /*
1142 * Get filesystem statistics by path.
1143 */
1144 int
1145 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
1146 {
1147 struct mount *mp;
1148 int error;
1149 struct vnode *vp;
1150
1151 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
1152 if (error != 0)
1153 return error;
1154 mp = vp->v_mount;
1155 error = dostatvfs(mp, sb, l, flags, 1);
1156 vrele(vp);
1157 return error;
1158 }
1159
1160 /* ARGSUSED */
1161 int
1162 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval)
1163 {
1164 /* {
1165 syscallarg(const char *) path;
1166 syscallarg(struct statvfs *) buf;
1167 syscallarg(int) flags;
1168 } */
1169 struct statvfs *sb;
1170 int error;
1171
1172 sb = STATVFSBUF_GET();
1173 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
1174 if (error == 0)
1175 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1176 STATVFSBUF_PUT(sb);
1177 return error;
1178 }
1179
1180 /*
1181 * Get filesystem statistics by fd.
1182 */
1183 int
1184 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
1185 {
1186 file_t *fp;
1187 struct mount *mp;
1188 int error;
1189
1190 /* fd_getvnode() will use the descriptor for us */
1191 if ((error = fd_getvnode(fd, &fp)) != 0)
1192 return (error);
1193 mp = ((struct vnode *)fp->f_data)->v_mount;
1194 error = dostatvfs(mp, sb, curlwp, flags, 1);
1195 fd_putfile(fd);
1196 return error;
1197 }
1198
1199 /* ARGSUSED */
1200 int
1201 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval)
1202 {
1203 /* {
1204 syscallarg(int) fd;
1205 syscallarg(struct statvfs *) buf;
1206 syscallarg(int) flags;
1207 } */
1208 struct statvfs *sb;
1209 int error;
1210
1211 sb = STATVFSBUF_GET();
1212 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
1213 if (error == 0)
1214 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
1215 STATVFSBUF_PUT(sb);
1216 return error;
1217 }
1218
1219
1220 /*
1221 * Get statistics on all filesystems.
1222 */
1223 int
1224 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
1225 int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
1226 register_t *retval)
1227 {
1228 int root = 0;
1229 struct proc *p = l->l_proc;
1230 struct mount *mp, *nmp;
1231 struct statvfs *sb;
1232 size_t count, maxcount;
1233 int error = 0;
1234
1235 sb = STATVFSBUF_GET();
1236 maxcount = bufsize / entry_sz;
1237 mutex_enter(&mountlist_lock);
1238 count = 0;
1239 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
1240 if (vfs_busy(mp, &nmp)) {
1241 continue;
1242 }
1243 if (sfsp && count < maxcount) {
1244 error = dostatvfs(mp, sb, l, flags, 0);
1245 if (error) {
1246 vfs_unbusy(mp, false, &nmp);
1247 error = 0;
1248 continue;
1249 }
1250 error = copyfn(sb, sfsp, entry_sz);
1251 if (error) {
1252 vfs_unbusy(mp, false, NULL);
1253 goto out;
1254 }
1255 sfsp = (char *)sfsp + entry_sz;
1256 root |= strcmp(sb->f_mntonname, "/") == 0;
1257 }
1258 count++;
1259 vfs_unbusy(mp, false, &nmp);
1260 }
1261 mutex_exit(&mountlist_lock);
1262
1263 if (root == 0 && p->p_cwdi->cwdi_rdir) {
1264 /*
1265 * fake a root entry
1266 */
1267 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
1268 sb, l, flags, 1);
1269 if (error != 0)
1270 goto out;
1271 if (sfsp) {
1272 error = copyfn(sb, sfsp, entry_sz);
1273 if (error != 0)
1274 goto out;
1275 }
1276 count++;
1277 }
1278 if (sfsp && count > maxcount)
1279 *retval = maxcount;
1280 else
1281 *retval = count;
1282 out:
1283 STATVFSBUF_PUT(sb);
1284 return error;
1285 }
1286
1287 int
1288 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval)
1289 {
1290 /* {
1291 syscallarg(struct statvfs *) buf;
1292 syscallarg(size_t) bufsize;
1293 syscallarg(int) flags;
1294 } */
1295
1296 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
1297 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
1298 }
1299
1300 /*
1301 * Change current working directory to a given file descriptor.
1302 */
1303 /* ARGSUSED */
1304 int
1305 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
1306 {
1307 /* {
1308 syscallarg(int) fd;
1309 } */
1310 struct proc *p = l->l_proc;
1311 struct cwdinfo *cwdi;
1312 struct vnode *vp, *tdp;
1313 struct mount *mp;
1314 file_t *fp;
1315 int error, fd;
1316
1317 /* fd_getvnode() will use the descriptor for us */
1318 fd = SCARG(uap, fd);
1319 if ((error = fd_getvnode(fd, &fp)) != 0)
1320 return (error);
1321 vp = fp->f_data;
1322
1323 vref(vp);
1324 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1325 if (vp->v_type != VDIR)
1326 error = ENOTDIR;
1327 else
1328 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1329 if (error) {
1330 vput(vp);
1331 goto out;
1332 }
1333 while ((mp = vp->v_mountedhere) != NULL) {
1334 error = vfs_busy(mp, NULL);
1335 vput(vp);
1336 if (error != 0)
1337 goto out;
1338 error = VFS_ROOT(mp, &tdp);
1339 vfs_unbusy(mp, false, NULL);
1340 if (error)
1341 goto out;
1342 vp = tdp;
1343 }
1344 VOP_UNLOCK(vp);
1345
1346 /*
1347 * Disallow changing to a directory not under the process's
1348 * current root directory (if there is one).
1349 */
1350 cwdi = p->p_cwdi;
1351 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1352 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
1353 vrele(vp);
1354 error = EPERM; /* operation not permitted */
1355 } else {
1356 vrele(cwdi->cwdi_cdir);
1357 cwdi->cwdi_cdir = vp;
1358 }
1359 rw_exit(&cwdi->cwdi_lock);
1360
1361 out:
1362 fd_putfile(fd);
1363 return (error);
1364 }
1365
1366 /*
1367 * Change this process's notion of the root directory to a given file
1368 * descriptor.
1369 */
1370 int
1371 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
1372 {
1373 struct proc *p = l->l_proc;
1374 struct vnode *vp;
1375 file_t *fp;
1376 int error, fd = SCARG(uap, fd);
1377
1378 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1379 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
1380 return error;
1381 /* fd_getvnode() will use the descriptor for us */
1382 if ((error = fd_getvnode(fd, &fp)) != 0)
1383 return error;
1384 vp = fp->f_data;
1385 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1386 if (vp->v_type != VDIR)
1387 error = ENOTDIR;
1388 else
1389 error = VOP_ACCESS(vp, VEXEC, l->l_cred);
1390 VOP_UNLOCK(vp);
1391 if (error)
1392 goto out;
1393 vref(vp);
1394
1395 change_root(p->p_cwdi, vp, l);
1396
1397 out:
1398 fd_putfile(fd);
1399 return (error);
1400 }
1401
1402 /*
1403 * Change current working directory (``.'').
1404 */
1405 /* ARGSUSED */
1406 int
1407 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
1408 {
1409 /* {
1410 syscallarg(const char *) path;
1411 } */
1412 struct proc *p = l->l_proc;
1413 struct cwdinfo *cwdi;
1414 int error;
1415 struct vnode *vp;
1416
1417 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1418 &vp, l)) != 0)
1419 return (error);
1420 cwdi = p->p_cwdi;
1421 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1422 vrele(cwdi->cwdi_cdir);
1423 cwdi->cwdi_cdir = vp;
1424 rw_exit(&cwdi->cwdi_lock);
1425 return (0);
1426 }
1427
1428 /*
1429 * Change notion of root (``/'') directory.
1430 */
1431 /* ARGSUSED */
1432 int
1433 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
1434 {
1435 /* {
1436 syscallarg(const char *) path;
1437 } */
1438 struct proc *p = l->l_proc;
1439 int error;
1440 struct vnode *vp;
1441
1442 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
1443 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
1444 return (error);
1445 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
1446 &vp, l)) != 0)
1447 return (error);
1448
1449 change_root(p->p_cwdi, vp, l);
1450
1451 return (0);
1452 }
1453
1454 /*
1455 * Common routine for chroot and fchroot.
1456 * NB: callers need to properly authorize the change root operation.
1457 */
1458 void
1459 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l)
1460 {
1461 struct proc *p = l->l_proc;
1462 kauth_cred_t ncred;
1463
1464 ncred = kauth_cred_alloc();
1465
1466 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
1467 if (cwdi->cwdi_rdir != NULL)
1468 vrele(cwdi->cwdi_rdir);
1469 cwdi->cwdi_rdir = vp;
1470
1471 /*
1472 * Prevent escaping from chroot by putting the root under
1473 * the working directory. Silently chdir to / if we aren't
1474 * already there.
1475 */
1476 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
1477 /*
1478 * XXX would be more failsafe to change directory to a
1479 * deadfs node here instead
1480 */
1481 vrele(cwdi->cwdi_cdir);
1482 vref(vp);
1483 cwdi->cwdi_cdir = vp;
1484 }
1485 rw_exit(&cwdi->cwdi_lock);
1486
1487 /* Get a write lock on the process credential. */
1488 proc_crmod_enter();
1489
1490 kauth_cred_clone(p->p_cred, ncred);
1491 kauth_proc_chroot(ncred, p->p_cwdi);
1492
1493 /* Broadcast our credentials to the process and other LWPs. */
1494 proc_crmod_leave(ncred, p->p_cred, true);
1495 }
1496
1497 /*
1498 * Common routine for chroot and chdir.
1499 * XXX "where" should be enum uio_seg
1500 */
1501 int
1502 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l)
1503 {
1504 struct pathbuf *pb;
1505 struct nameidata nd;
1506 int error;
1507
1508 error = pathbuf_maybe_copyin(path, where, &pb);
1509 if (error) {
1510 return error;
1511 }
1512 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1513 if ((error = namei(&nd)) != 0) {
1514 pathbuf_destroy(pb);
1515 return error;
1516 }
1517 *vpp = nd.ni_vp;
1518 pathbuf_destroy(pb);
1519
1520 if ((*vpp)->v_type != VDIR)
1521 error = ENOTDIR;
1522 else
1523 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred);
1524
1525 if (error)
1526 vput(*vpp);
1527 else
1528 VOP_UNLOCK(*vpp);
1529 return (error);
1530 }
1531
1532 /*
1533 * Internals of sys_open - path has already been converted into a pathbuf
1534 * (so we can easily reuse this function from other parts of the kernel,
1535 * like posix_spawn post-processing).
1536 */
1537 static int
1538 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags,
1539 int open_mode, int *fd)
1540 {
1541 struct proc *p = l->l_proc;
1542 struct cwdinfo *cwdi = p->p_cwdi;
1543 file_t *fp;
1544 struct vnode *vp;
1545 int flags, cmode;
1546 int indx, error;
1547 struct nameidata nd;
1548
1549 if (open_flags & O_SEARCH) {
1550 open_flags &= ~(int)O_SEARCH;
1551 }
1552
1553 flags = FFLAGS(open_flags);
1554 if ((flags & (FREAD | FWRITE)) == 0)
1555 return EINVAL;
1556
1557 if ((error = fd_allocfile(&fp, &indx)) != 0) {
1558 return error;
1559 }
1560
1561 /* We're going to read cwdi->cwdi_cmask unlocked here. */
1562 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
1563 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb);
1564 if (dvp != NULL)
1565 NDAT(&nd, dvp);
1566
1567 l->l_dupfd = -indx - 1; /* XXX check for fdopen */
1568 if ((error = vn_open(&nd, flags, cmode)) != 0) {
1569 fd_abort(p, fp, indx);
1570 if ((error == EDUPFD || error == EMOVEFD) &&
1571 l->l_dupfd >= 0 && /* XXX from fdopen */
1572 (error =
1573 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) {
1574 *fd = indx;
1575 return 0;
1576 }
1577 if (error == ERESTART)
1578 error = EINTR;
1579 return error;
1580 }
1581
1582 l->l_dupfd = 0;
1583 vp = nd.ni_vp;
1584
1585 if ((error = open_setfp(l, fp, vp, indx, flags)))
1586 return error;
1587
1588 VOP_UNLOCK(vp);
1589 *fd = indx;
1590 fd_affix(p, fp, indx);
1591 return 0;
1592 }
1593
1594 int
1595 fd_open(const char *path, int open_flags, int open_mode, int *fd)
1596 {
1597 struct pathbuf *pb;
1598 int error, oflags;
1599
1600 oflags = FFLAGS(open_flags);
1601 if ((oflags & (FREAD | FWRITE)) == 0)
1602 return EINVAL;
1603
1604 pb = pathbuf_create(path);
1605 if (pb == NULL)
1606 return ENOMEM;
1607
1608 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd);
1609 pathbuf_destroy(pb);
1610
1611 return error;
1612 }
1613
1614 /*
1615 * Check permissions, allocate an open file structure,
1616 * and call the device open routine if any.
1617 */
1618 static int
1619 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags,
1620 int mode, int *fd)
1621 {
1622 file_t *dfp = NULL;
1623 struct vnode *dvp = NULL;
1624 struct pathbuf *pb;
1625 int error;
1626
1627 error = pathbuf_copyin(path, &pb);
1628 if (error)
1629 return error;
1630
1631 if (fdat != AT_FDCWD) {
1632 /* fd_getvnode() will use the descriptor for us */
1633 if ((error = fd_getvnode(fdat, &dfp)) != 0)
1634 goto out;
1635
1636 dvp = dfp->f_data;
1637 }
1638
1639 error = do_open(l, dvp, pb, flags, mode, fd);
1640
1641 if (dfp != NULL)
1642 fd_putfile(fdat);
1643 out:
1644 pathbuf_destroy(pb);
1645 return error;
1646 }
1647
1648 int
1649 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
1650 {
1651 /* {
1652 syscallarg(const char *) path;
1653 syscallarg(int) flags;
1654 syscallarg(int) mode;
1655 } */
1656 int error;
1657 int fd;
1658
1659 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path),
1660 SCARG(uap, flags), SCARG(uap, mode), &fd);
1661
1662 if (error == 0)
1663 *retval = fd;
1664
1665 return error;
1666 }
1667
1668 int
1669 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval)
1670 {
1671 /* {
1672 syscallarg(int) fd;
1673 syscallarg(const char *) path;
1674 syscallarg(int) oflags;
1675 syscallarg(int) mode;
1676 } */
1677 int error;
1678 int fd;
1679
1680 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path),
1681 SCARG(uap, oflags), SCARG(uap, mode), &fd);
1682
1683 if (error == 0)
1684 *retval = fd;
1685
1686 return error;
1687 }
1688
1689 static void
1690 vfs__fhfree(fhandle_t *fhp)
1691 {
1692 size_t fhsize;
1693
1694 if (fhp == NULL) {
1695 return;
1696 }
1697 fhsize = FHANDLE_SIZE(fhp);
1698 kmem_free(fhp, fhsize);
1699 }
1700
1701 /*
1702 * vfs_composefh: compose a filehandle.
1703 */
1704
1705 int
1706 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
1707 {
1708 struct mount *mp;
1709 struct fid *fidp;
1710 int error;
1711 size_t needfhsize;
1712 size_t fidsize;
1713
1714 mp = vp->v_mount;
1715 fidp = NULL;
1716 if (*fh_size < FHANDLE_SIZE_MIN) {
1717 fidsize = 0;
1718 } else {
1719 fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
1720 if (fhp != NULL) {
1721 memset(fhp, 0, *fh_size);
1722 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1723 fidp = &fhp->fh_fid;
1724 }
1725 }
1726 error = VFS_VPTOFH(vp, fidp, &fidsize);
1727 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1728 if (error == 0 && *fh_size < needfhsize) {
1729 error = E2BIG;
1730 }
1731 *fh_size = needfhsize;
1732 return error;
1733 }
1734
1735 int
1736 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
1737 {
1738 struct mount *mp;
1739 fhandle_t *fhp;
1740 size_t fhsize;
1741 size_t fidsize;
1742 int error;
1743
1744 *fhpp = NULL;
1745 mp = vp->v_mount;
1746 fidsize = 0;
1747 error = VFS_VPTOFH(vp, NULL, &fidsize);
1748 KASSERT(error != 0);
1749 if (error != E2BIG) {
1750 goto out;
1751 }
1752 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
1753 fhp = kmem_zalloc(fhsize, KM_SLEEP);
1754 if (fhp == NULL) {
1755 error = ENOMEM;
1756 goto out;
1757 }
1758 fhp->fh_fsid = mp->mnt_stat.f_fsidx;
1759 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
1760 if (error == 0) {
1761 KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
1762 FHANDLE_FILEID(fhp)->fid_len == fidsize));
1763 *fhpp = fhp;
1764 } else {
1765 kmem_free(fhp, fhsize);
1766 }
1767 out:
1768 return error;
1769 }
1770
1771 void
1772 vfs_composefh_free(fhandle_t *fhp)
1773 {
1774
1775 vfs__fhfree(fhp);
1776 }
1777
1778 /*
1779 * vfs_fhtovp: lookup a vnode by a filehandle.
1780 */
1781
1782 int
1783 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
1784 {
1785 struct mount *mp;
1786 int error;
1787
1788 *vpp = NULL;
1789 mp = vfs_getvfs(FHANDLE_FSID(fhp));
1790 if (mp == NULL) {
1791 error = ESTALE;
1792 goto out;
1793 }
1794 if (mp->mnt_op->vfs_fhtovp == NULL) {
1795 error = EOPNOTSUPP;
1796 goto out;
1797 }
1798 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp);
1799 out:
1800 return error;
1801 }
1802
1803 /*
1804 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
1805 * the needed size.
1806 */
1807
1808 int
1809 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
1810 {
1811 fhandle_t *fhp;
1812 int error;
1813
1814 *fhpp = NULL;
1815 if (fhsize > FHANDLE_SIZE_MAX) {
1816 return EINVAL;
1817 }
1818 if (fhsize < FHANDLE_SIZE_MIN) {
1819 return EINVAL;
1820 }
1821 again:
1822 fhp = kmem_alloc(fhsize, KM_SLEEP);
1823 if (fhp == NULL) {
1824 return ENOMEM;
1825 }
1826 error = copyin(ufhp, fhp, fhsize);
1827 if (error == 0) {
1828 /* XXX this check shouldn't be here */
1829 if (FHANDLE_SIZE(fhp) == fhsize) {
1830 *fhpp = fhp;
1831 return 0;
1832 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
1833 /*
1834 * a kludge for nfsv2 padded handles.
1835 */
1836 size_t sz;
1837
1838 sz = FHANDLE_SIZE(fhp);
1839 kmem_free(fhp, fhsize);
1840 fhsize = sz;
1841 goto again;
1842 } else {
1843 /*
1844 * userland told us wrong size.
1845 */
1846 error = EINVAL;
1847 }
1848 }
1849 kmem_free(fhp, fhsize);
1850 return error;
1851 }
1852
1853 void
1854 vfs_copyinfh_free(fhandle_t *fhp)
1855 {
1856
1857 vfs__fhfree(fhp);
1858 }
1859
1860 /*
1861 * Get file handle system call
1862 */
1863 int
1864 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
1865 {
1866 /* {
1867 syscallarg(char *) fname;
1868 syscallarg(fhandle_t *) fhp;
1869 syscallarg(size_t *) fh_size;
1870 } */
1871 struct vnode *vp;
1872 fhandle_t *fh;
1873 int error;
1874 struct pathbuf *pb;
1875 struct nameidata nd;
1876 size_t sz;
1877 size_t usz;
1878
1879 /*
1880 * Must be super user
1881 */
1882 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1883 0, NULL, NULL, NULL);
1884 if (error)
1885 return (error);
1886
1887 error = pathbuf_copyin(SCARG(uap, fname), &pb);
1888 if (error) {
1889 return error;
1890 }
1891 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
1892 error = namei(&nd);
1893 if (error) {
1894 pathbuf_destroy(pb);
1895 return error;
1896 }
1897 vp = nd.ni_vp;
1898 pathbuf_destroy(pb);
1899
1900 error = vfs_composefh_alloc(vp, &fh);
1901 vput(vp);
1902 if (error != 0) {
1903 goto out;
1904 }
1905 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
1906 if (error != 0) {
1907 goto out;
1908 }
1909 sz = FHANDLE_SIZE(fh);
1910 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
1911 if (error != 0) {
1912 goto out;
1913 }
1914 if (usz >= sz) {
1915 error = copyout(fh, SCARG(uap, fhp), sz);
1916 } else {
1917 error = E2BIG;
1918 }
1919 out:
1920 vfs_composefh_free(fh);
1921 return (error);
1922 }
1923
1924 /*
1925 * Open a file given a file handle.
1926 *
1927 * Check permissions, allocate an open file structure,
1928 * and call the device open routine if any.
1929 */
1930
1931 int
1932 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
1933 register_t *retval)
1934 {
1935 file_t *fp;
1936 struct vnode *vp = NULL;
1937 kauth_cred_t cred = l->l_cred;
1938 file_t *nfp;
1939 int indx, error = 0;
1940 struct vattr va;
1941 fhandle_t *fh;
1942 int flags;
1943 proc_t *p;
1944
1945 p = curproc;
1946
1947 /*
1948 * Must be super user
1949 */
1950 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
1951 0, NULL, NULL, NULL)))
1952 return (error);
1953
1954 if (oflags & O_SEARCH) {
1955 oflags &= ~(int)O_SEARCH;
1956 }
1957
1958 flags = FFLAGS(oflags);
1959 if ((flags & (FREAD | FWRITE)) == 0)
1960 return (EINVAL);
1961 if ((flags & O_CREAT))
1962 return (EINVAL);
1963 if ((error = fd_allocfile(&nfp, &indx)) != 0)
1964 return (error);
1965 fp = nfp;
1966 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
1967 if (error != 0) {
1968 goto bad;
1969 }
1970 error = vfs_fhtovp(fh, &vp);
1971 if (error != 0) {
1972 goto bad;
1973 }
1974
1975 /* Now do an effective vn_open */
1976
1977 if (vp->v_type == VSOCK) {
1978 error = EOPNOTSUPP;
1979 goto bad;
1980 }
1981 error = vn_openchk(vp, cred, flags);
1982 if (error != 0)
1983 goto bad;
1984 if (flags & O_TRUNC) {
1985 VOP_UNLOCK(vp); /* XXX */
1986 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
1987 vattr_null(&va);
1988 va.va_size = 0;
1989 error = VOP_SETATTR(vp, &va, cred);
1990 if (error)
1991 goto bad;
1992 }
1993 if ((error = VOP_OPEN(vp, flags, cred)) != 0)
1994 goto bad;
1995 if (flags & FWRITE) {
1996 mutex_enter(vp->v_interlock);
1997 vp->v_writecount++;
1998 mutex_exit(vp->v_interlock);
1999 }
2000
2001 /* done with modified vn_open, now finish what sys_open does. */
2002 if ((error = open_setfp(l, fp, vp, indx, flags)))
2003 return error;
2004
2005 VOP_UNLOCK(vp);
2006 *retval = indx;
2007 fd_affix(p, fp, indx);
2008 vfs_copyinfh_free(fh);
2009 return (0);
2010
2011 bad:
2012 fd_abort(p, fp, indx);
2013 if (vp != NULL)
2014 vput(vp);
2015 vfs_copyinfh_free(fh);
2016 return (error);
2017 }
2018
2019 int
2020 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
2021 {
2022 /* {
2023 syscallarg(const void *) fhp;
2024 syscallarg(size_t) fh_size;
2025 syscallarg(int) flags;
2026 } */
2027
2028 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
2029 SCARG(uap, flags), retval);
2030 }
2031
2032 int
2033 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
2034 {
2035 int error;
2036 fhandle_t *fh;
2037 struct vnode *vp;
2038
2039 /*
2040 * Must be super user
2041 */
2042 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2043 0, NULL, NULL, NULL)))
2044 return (error);
2045
2046 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2047 if (error != 0)
2048 return error;
2049
2050 error = vfs_fhtovp(fh, &vp);
2051 vfs_copyinfh_free(fh);
2052 if (error != 0)
2053 return error;
2054
2055 error = vn_stat(vp, sb);
2056 vput(vp);
2057 return error;
2058 }
2059
2060
2061 /* ARGSUSED */
2062 int
2063 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval)
2064 {
2065 /* {
2066 syscallarg(const void *) fhp;
2067 syscallarg(size_t) fh_size;
2068 syscallarg(struct stat *) sb;
2069 } */
2070 struct stat sb;
2071 int error;
2072
2073 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
2074 if (error)
2075 return error;
2076 return copyout(&sb, SCARG(uap, sb), sizeof(sb));
2077 }
2078
2079 int
2080 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
2081 int flags)
2082 {
2083 fhandle_t *fh;
2084 struct mount *mp;
2085 struct vnode *vp;
2086 int error;
2087
2088 /*
2089 * Must be super user
2090 */
2091 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
2092 0, NULL, NULL, NULL)))
2093 return error;
2094
2095 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
2096 if (error != 0)
2097 return error;
2098
2099 error = vfs_fhtovp(fh, &vp);
2100 vfs_copyinfh_free(fh);
2101 if (error != 0)
2102 return error;
2103
2104 mp = vp->v_mount;
2105 error = dostatvfs(mp, sb, l, flags, 1);
2106 vput(vp);
2107 return error;
2108 }
2109
2110 /* ARGSUSED */
2111 int
2112 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval)
2113 {
2114 /* {
2115 syscallarg(const void *) fhp;
2116 syscallarg(size_t) fh_size;
2117 syscallarg(struct statvfs *) buf;
2118 syscallarg(int) flags;
2119 } */
2120 struct statvfs *sb = STATVFSBUF_GET();
2121 int error;
2122
2123 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
2124 SCARG(uap, flags));
2125 if (error == 0)
2126 error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
2127 STATVFSBUF_PUT(sb);
2128 return error;
2129 }
2130
2131 /*
2132 * Create a special file.
2133 */
2134 /* ARGSUSED */
2135 int
2136 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
2137 register_t *retval)
2138 {
2139 /* {
2140 syscallarg(const char *) path;
2141 syscallarg(mode_t) mode;
2142 syscallarg(dev_t) dev;
2143 } */
2144 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode),
2145 SCARG(uap, dev), retval, UIO_USERSPACE);
2146 }
2147
2148 int
2149 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap,
2150 register_t *retval)
2151 {
2152 /* {
2153 syscallarg(int) fd;
2154 syscallarg(const char *) path;
2155 syscallarg(mode_t) mode;
2156 syscallarg(int) pad;
2157 syscallarg(dev_t) dev;
2158 } */
2159
2160 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path),
2161 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE);
2162 }
2163
2164 int
2165 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
2166 register_t *retval, enum uio_seg seg)
2167 {
2168 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg);
2169 }
2170
2171 int
2172 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode,
2173 dev_t dev, register_t *retval, enum uio_seg seg)
2174 {
2175 struct proc *p = l->l_proc;
2176 struct vnode *vp;
2177 struct vattr vattr;
2178 int error, optype;
2179 struct pathbuf *pb;
2180 struct nameidata nd;
2181 const char *pathstring;
2182
2183 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
2184 0, NULL, NULL, NULL)) != 0)
2185 return (error);
2186
2187 optype = VOP_MKNOD_DESCOFFSET;
2188
2189 error = pathbuf_maybe_copyin(pathname, seg, &pb);
2190 if (error) {
2191 return error;
2192 }
2193 pathstring = pathbuf_stringcopy_get(pb);
2194 if (pathstring == NULL) {
2195 pathbuf_destroy(pb);
2196 return ENOMEM;
2197 }
2198
2199 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2200
2201 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2202 goto out;
2203 vp = nd.ni_vp;
2204
2205 if (vp != NULL)
2206 error = EEXIST;
2207 else {
2208 vattr_null(&vattr);
2209 /* We will read cwdi->cwdi_cmask unlocked. */
2210 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2211 vattr.va_rdev = dev;
2212
2213 switch (mode & S_IFMT) {
2214 case S_IFMT: /* used by badsect to flag bad sectors */
2215 vattr.va_type = VBAD;
2216 break;
2217 case S_IFCHR:
2218 vattr.va_type = VCHR;
2219 break;
2220 case S_IFBLK:
2221 vattr.va_type = VBLK;
2222 break;
2223 case S_IFWHT:
2224 optype = VOP_WHITEOUT_DESCOFFSET;
2225 break;
2226 case S_IFREG:
2227 #if NVERIEXEC > 0
2228 error = veriexec_openchk(l, nd.ni_vp, pathstring,
2229 O_CREAT);
2230 #endif /* NVERIEXEC > 0 */
2231 vattr.va_type = VREG;
2232 vattr.va_rdev = VNOVAL;
2233 optype = VOP_CREATE_DESCOFFSET;
2234 break;
2235 default:
2236 error = EINVAL;
2237 break;
2238 }
2239 }
2240 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET
2241 && vattr.va_rdev == VNOVAL)
2242 error = EINVAL;
2243 if (!error) {
2244 switch (optype) {
2245 case VOP_WHITEOUT_DESCOFFSET:
2246 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
2247 if (error)
2248 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2249 vput(nd.ni_dvp);
2250 break;
2251
2252 case VOP_MKNOD_DESCOFFSET:
2253 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
2254 &nd.ni_cnd, &vattr);
2255 if (error == 0)
2256 vrele(nd.ni_vp);
2257 vput(nd.ni_dvp);
2258 break;
2259
2260 case VOP_CREATE_DESCOFFSET:
2261 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
2262 &nd.ni_cnd, &vattr);
2263 if (error == 0)
2264 vrele(nd.ni_vp);
2265 vput(nd.ni_dvp);
2266 break;
2267 }
2268 } else {
2269 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2270 if (nd.ni_dvp == vp)
2271 vrele(nd.ni_dvp);
2272 else
2273 vput(nd.ni_dvp);
2274 if (vp)
2275 vrele(vp);
2276 }
2277 out:
2278 pathbuf_stringcopy_put(pb, pathstring);
2279 pathbuf_destroy(pb);
2280 return (error);
2281 }
2282
2283 /*
2284 * Create a named pipe.
2285 */
2286 /* ARGSUSED */
2287 int
2288 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
2289 {
2290 /* {
2291 syscallarg(const char *) path;
2292 syscallarg(int) mode;
2293 } */
2294 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode));
2295 }
2296
2297 int
2298 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap,
2299 register_t *retval)
2300 {
2301 /* {
2302 syscallarg(int) fd;
2303 syscallarg(const char *) path;
2304 syscallarg(int) mode;
2305 } */
2306
2307 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path),
2308 SCARG(uap, mode));
2309 }
2310
2311 static int
2312 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode)
2313 {
2314 struct proc *p = l->l_proc;
2315 struct vattr vattr;
2316 int error;
2317 struct pathbuf *pb;
2318 struct nameidata nd;
2319
2320 error = pathbuf_copyin(path, &pb);
2321 if (error) {
2322 return error;
2323 }
2324 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);
2325
2326 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2327 pathbuf_destroy(pb);
2328 return error;
2329 }
2330 if (nd.ni_vp != NULL) {
2331 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2332 if (nd.ni_dvp == nd.ni_vp)
2333 vrele(nd.ni_dvp);
2334 else
2335 vput(nd.ni_dvp);
2336 vrele(nd.ni_vp);
2337 pathbuf_destroy(pb);
2338 return (EEXIST);
2339 }
2340 vattr_null(&vattr);
2341 vattr.va_type = VFIFO;
2342 /* We will read cwdi->cwdi_cmask unlocked. */
2343 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
2344 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
2345 if (error == 0)
2346 vrele(nd.ni_vp);
2347 vput(nd.ni_dvp);
2348 pathbuf_destroy(pb);
2349 return (error);
2350 }
2351
2352 /*
2353 * Make a hard file link.
2354 */
2355 /* ARGSUSED */
2356 int
2357 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink,
2358 const char *link, int follow, register_t *retval)
2359 {
2360 struct vnode *vp;
2361 struct pathbuf *linkpb;
2362 struct nameidata nd;
2363 namei_simple_flags_t ns_flags;
2364 int error;
2365
2366 if (follow & AT_SYMLINK_FOLLOW)
2367 ns_flags = NSM_FOLLOW_TRYEMULROOT;
2368 else
2369 ns_flags = NSM_NOFOLLOW_TRYEMULROOT;
2370
2371 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp);
2372 if (error != 0)
2373 return (error);
2374 error = pathbuf_copyin(link, &linkpb);
2375 if (error) {
2376 goto out1;
2377 }
2378 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2379 if ((error = fd_nameiat(l, fdlink, &nd)) != 0)
2380 goto out2;
2381 if (nd.ni_vp) {
2382 error = EEXIST;
2383 goto abortop;
2384 }
2385 /* Prevent hard links on directories. */
2386 if (vp->v_type == VDIR) {
2387 error = EPERM;
2388 goto abortop;
2389 }
2390 /* Prevent cross-mount operation. */
2391 if (nd.ni_dvp->v_mount != vp->v_mount) {
2392 error = EXDEV;
2393 goto abortop;
2394 }
2395 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
2396 out2:
2397 pathbuf_destroy(linkpb);
2398 out1:
2399 vrele(vp);
2400 return (error);
2401 abortop:
2402 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2403 if (nd.ni_dvp == nd.ni_vp)
2404 vrele(nd.ni_dvp);
2405 else
2406 vput(nd.ni_dvp);
2407 if (nd.ni_vp != NULL)
2408 vrele(nd.ni_vp);
2409 goto out2;
2410 }
2411
2412 int
2413 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
2414 {
2415 /* {
2416 syscallarg(const char *) path;
2417 syscallarg(const char *) link;
2418 } */
2419 const char *path = SCARG(uap, path);
2420 const char *link = SCARG(uap, link);
2421
2422 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link,
2423 AT_SYMLINK_FOLLOW, retval);
2424 }
2425
2426 int
2427 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap,
2428 register_t *retval)
2429 {
2430 /* {
2431 syscallarg(int) fd1;
2432 syscallarg(const char *) name1;
2433 syscallarg(int) fd2;
2434 syscallarg(const char *) name2;
2435 syscallarg(int) flags;
2436 } */
2437 int fd1 = SCARG(uap, fd1);
2438 const char *name1 = SCARG(uap, name1);
2439 int fd2 = SCARG(uap, fd2);
2440 const char *name2 = SCARG(uap, name2);
2441 int follow;
2442
2443 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW;
2444
2445 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval);
2446 }
2447
2448
2449 int
2450 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg)
2451 {
2452 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg);
2453 }
2454
2455 static int
2456 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat,
2457 const char *link, enum uio_seg seg)
2458 {
2459 struct proc *p = curproc;
2460 struct vattr vattr;
2461 char *path;
2462 int error;
2463 struct pathbuf *linkpb;
2464 struct nameidata nd;
2465
2466 KASSERT(l != NULL || fdat == AT_FDCWD);
2467
2468 path = PNBUF_GET();
2469 if (seg == UIO_USERSPACE) {
2470 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0)
2471 goto out1;
2472 if ((error = pathbuf_copyin(link, &linkpb)) != 0)
2473 goto out1;
2474 } else {
2475 KASSERT(strlen(patharg) < MAXPATHLEN);
2476 strcpy(path, patharg);
2477 linkpb = pathbuf_create(link);
2478 if (linkpb == NULL) {
2479 error = ENOMEM;
2480 goto out1;
2481 }
2482 }
2483 ktrkuser("symlink-target", path, strlen(path));
2484
2485 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
2486 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2487 goto out2;
2488 if (nd.ni_vp) {
2489 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2490 if (nd.ni_dvp == nd.ni_vp)
2491 vrele(nd.ni_dvp);
2492 else
2493 vput(nd.ni_dvp);
2494 vrele(nd.ni_vp);
2495 error = EEXIST;
2496 goto out2;
2497 }
2498 vattr_null(&vattr);
2499 vattr.va_type = VLNK;
2500 /* We will read cwdi->cwdi_cmask unlocked. */
2501 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
2502 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
2503 if (error == 0)
2504 vrele(nd.ni_vp);
2505 vput(nd.ni_dvp);
2506 out2:
2507 pathbuf_destroy(linkpb);
2508 out1:
2509 PNBUF_PUT(path);
2510 return (error);
2511 }
2512
2513 /*
2514 * Make a symbolic link.
2515 */
2516 /* ARGSUSED */
2517 int
2518 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
2519 {
2520 /* {
2521 syscallarg(const char *) path;
2522 syscallarg(const char *) link;
2523 } */
2524
2525 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link),
2526 UIO_USERSPACE);
2527 }
2528
2529 int
2530 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap,
2531 register_t *retval)
2532 {
2533 /* {
2534 syscallarg(const char *) path1;
2535 syscallarg(int) fd;
2536 syscallarg(const char *) path2;
2537 } */
2538
2539 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd),
2540 SCARG(uap, path2), UIO_USERSPACE);
2541 }
2542
2543 /*
2544 * Delete a whiteout from the filesystem.
2545 */
2546 /* ARGSUSED */
2547 int
2548 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
2549 {
2550 /* {
2551 syscallarg(const char *) path;
2552 } */
2553 int error;
2554 struct pathbuf *pb;
2555 struct nameidata nd;
2556
2557 error = pathbuf_copyin(SCARG(uap, path), &pb);
2558 if (error) {
2559 return error;
2560 }
2561
2562 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb);
2563 error = namei(&nd);
2564 if (error) {
2565 pathbuf_destroy(pb);
2566 return (error);
2567 }
2568
2569 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
2570 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2571 if (nd.ni_dvp == nd.ni_vp)
2572 vrele(nd.ni_dvp);
2573 else
2574 vput(nd.ni_dvp);
2575 if (nd.ni_vp)
2576 vrele(nd.ni_vp);
2577 pathbuf_destroy(pb);
2578 return (EEXIST);
2579 }
2580 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
2581 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2582 vput(nd.ni_dvp);
2583 pathbuf_destroy(pb);
2584 return (error);
2585 }
2586
2587 /*
2588 * Delete a name from the filesystem.
2589 */
2590 /* ARGSUSED */
2591 int
2592 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
2593 {
2594 /* {
2595 syscallarg(const char *) path;
2596 } */
2597
2598 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE);
2599 }
2600
2601 int
2602 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap,
2603 register_t *retval)
2604 {
2605 /* {
2606 syscallarg(int) fd;
2607 syscallarg(const char *) path;
2608 syscallarg(int) flag;
2609 } */
2610
2611 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path),
2612 SCARG(uap, flag), UIO_USERSPACE);
2613 }
2614
2615 int
2616 do_sys_unlink(const char *arg, enum uio_seg seg)
2617 {
2618 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg);
2619 }
2620
2621 static int
2622 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags,
2623 enum uio_seg seg)
2624 {
2625 struct vnode *vp;
2626 int error;
2627 struct pathbuf *pb;
2628 struct nameidata nd;
2629 const char *pathstring;
2630
2631 KASSERT(l != NULL || fdat == AT_FDCWD);
2632
2633 error = pathbuf_maybe_copyin(arg, seg, &pb);
2634 if (error) {
2635 return error;
2636 }
2637 pathstring = pathbuf_stringcopy_get(pb);
2638 if (pathstring == NULL) {
2639 pathbuf_destroy(pb);
2640 return ENOMEM;
2641 }
2642
2643 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb);
2644 if ((error = fd_nameiat(l, fdat, &nd)) != 0)
2645 goto out;
2646 vp = nd.ni_vp;
2647
2648 /*
2649 * The root of a mounted filesystem cannot be deleted.
2650 */
2651 if ((vp->v_vflag & VV_ROOT) != 0) {
2652 error = EBUSY;
2653 goto abort;
2654 }
2655
2656 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) {
2657 error = EBUSY;
2658 goto abort;
2659 }
2660
2661 /*
2662 * No rmdir "." please.
2663 */
2664 if (nd.ni_dvp == vp) {
2665 error = EINVAL;
2666 goto abort;
2667 }
2668
2669 /*
2670 * AT_REMOVEDIR is required to remove a directory
2671 */
2672 if (vp->v_type == VDIR) {
2673 if (!(flags & AT_REMOVEDIR)) {
2674 error = EPERM;
2675 goto abort;
2676 } else {
2677 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2678 goto out;
2679 }
2680 }
2681
2682 /*
2683 * Starting here we only deal with non directories.
2684 */
2685 if (flags & AT_REMOVEDIR) {
2686 error = ENOTDIR;
2687 goto abort;
2688 }
2689
2690
2691 #if NVERIEXEC > 0
2692 /* Handle remove requests for veriexec entries. */
2693 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) {
2694 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2695 if (nd.ni_dvp == vp)
2696 vrele(nd.ni_dvp);
2697 else
2698 vput(nd.ni_dvp);
2699 vput(vp);
2700 goto out;
2701 }
2702 #endif /* NVERIEXEC > 0 */
2703
2704 #ifdef FILEASSOC
2705 (void)fileassoc_file_delete(vp);
2706 #endif /* FILEASSOC */
2707 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
2708 goto out;
2709
2710 abort:
2711 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
2712 if (nd.ni_dvp == vp)
2713 vrele(nd.ni_dvp);
2714 else
2715 vput(nd.ni_dvp);
2716 vput(vp);
2717
2718 out:
2719 pathbuf_stringcopy_put(pb, pathstring);
2720 pathbuf_destroy(pb);
2721 return (error);
2722 }
2723
2724 /*
2725 * Reposition read/write file offset.
2726 */
2727 int
2728 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
2729 {
2730 /* {
2731 syscallarg(int) fd;
2732 syscallarg(int) pad;
2733 syscallarg(off_t) offset;
2734 syscallarg(int) whence;
2735 } */
2736 kauth_cred_t cred = l->l_cred;
2737 file_t *fp;
2738 struct vnode *vp;
2739 struct vattr vattr;
2740 off_t newoff;
2741 int error, fd;
2742
2743 fd = SCARG(uap, fd);
2744
2745 if ((fp = fd_getfile(fd)) == NULL)
2746 return (EBADF);
2747
2748 vp = fp->f_data;
2749 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2750 error = ESPIPE;
2751 goto out;
2752 }
2753
2754 switch (SCARG(uap, whence)) {
2755 case SEEK_CUR:
2756 newoff = fp->f_offset + SCARG(uap, offset);
2757 break;
2758 case SEEK_END:
2759 vn_lock(vp, LK_SHARED | LK_RETRY);
2760 error = VOP_GETATTR(vp, &vattr, cred);
2761 VOP_UNLOCK(vp);
2762 if (error) {
2763 goto out;
2764 }
2765 newoff = SCARG(uap, offset) + vattr.va_size;
2766 break;
2767 case SEEK_SET:
2768 newoff = SCARG(uap, offset);
2769 break;
2770 default:
2771 error = EINVAL;
2772 goto out;
2773 }
2774 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
2775 *(off_t *)retval = fp->f_offset = newoff;
2776 }
2777 out:
2778 fd_putfile(fd);
2779 return (error);
2780 }
2781
2782 /*
2783 * Positional read system call.
2784 */
2785 int
2786 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
2787 {
2788 /* {
2789 syscallarg(int) fd;
2790 syscallarg(void *) buf;
2791 syscallarg(size_t) nbyte;
2792 syscallarg(off_t) offset;
2793 } */
2794 file_t *fp;
2795 struct vnode *vp;
2796 off_t offset;
2797 int error, fd = SCARG(uap, fd);
2798
2799 if ((fp = fd_getfile(fd)) == NULL)
2800 return (EBADF);
2801
2802 if ((fp->f_flag & FREAD) == 0) {
2803 fd_putfile(fd);
2804 return (EBADF);
2805 }
2806
2807 vp = fp->f_data;
2808 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2809 error = ESPIPE;
2810 goto out;
2811 }
2812
2813 offset = SCARG(uap, offset);
2814
2815 /*
2816 * XXX This works because no file systems actually
2817 * XXX take any action on the seek operation.
2818 */
2819 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2820 goto out;
2821
2822 /* dofileread() will unuse the descriptor for us */
2823 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2824 &offset, 0, retval));
2825
2826 out:
2827 fd_putfile(fd);
2828 return (error);
2829 }
2830
2831 /*
2832 * Positional scatter read system call.
2833 */
2834 int
2835 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
2836 {
2837 /* {
2838 syscallarg(int) fd;
2839 syscallarg(const struct iovec *) iovp;
2840 syscallarg(int) iovcnt;
2841 syscallarg(off_t) offset;
2842 } */
2843 off_t offset = SCARG(uap, offset);
2844
2845 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
2846 SCARG(uap, iovcnt), &offset, 0, retval);
2847 }
2848
2849 /*
2850 * Positional write system call.
2851 */
2852 int
2853 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
2854 {
2855 /* {
2856 syscallarg(int) fd;
2857 syscallarg(const void *) buf;
2858 syscallarg(size_t) nbyte;
2859 syscallarg(off_t) offset;
2860 } */
2861 file_t *fp;
2862 struct vnode *vp;
2863 off_t offset;
2864 int error, fd = SCARG(uap, fd);
2865
2866 if ((fp = fd_getfile(fd)) == NULL)
2867 return (EBADF);
2868
2869 if ((fp->f_flag & FWRITE) == 0) {
2870 fd_putfile(fd);
2871 return (EBADF);
2872 }
2873
2874 vp = fp->f_data;
2875 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
2876 error = ESPIPE;
2877 goto out;
2878 }
2879
2880 offset = SCARG(uap, offset);
2881
2882 /*
2883 * XXX This works because no file systems actually
2884 * XXX take any action on the seek operation.
2885 */
2886 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
2887 goto out;
2888
2889 /* dofilewrite() will unuse the descriptor for us */
2890 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
2891 &offset, 0, retval));
2892
2893 out:
2894 fd_putfile(fd);
2895 return (error);
2896 }
2897
2898 /*
2899 * Positional gather write system call.
2900 */
2901 int
2902 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
2903 {
2904 /* {
2905 syscallarg(int) fd;
2906 syscallarg(const struct iovec *) iovp;
2907 syscallarg(int) iovcnt;
2908 syscallarg(off_t) offset;
2909 } */
2910 off_t offset = SCARG(uap, offset);
2911
2912 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
2913 SCARG(uap, iovcnt), &offset, 0, retval);
2914 }
2915
2916 /*
2917 * Check access permissions.
2918 */
2919 int
2920 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
2921 {
2922 /* {
2923 syscallarg(const char *) path;
2924 syscallarg(int) flags;
2925 } */
2926
2927 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path),
2928 SCARG(uap, flags), 0);
2929 }
2930
2931 int
2932 do_sys_accessat(struct lwp *l, int fdat, const char *path,
2933 int mode, int flags)
2934 {
2935 kauth_cred_t cred;
2936 struct vnode *vp;
2937 int error, nd_flag, vmode;
2938 struct pathbuf *pb;
2939 struct nameidata nd;
2940
2941 CTASSERT(F_OK == 0);
2942 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) {
2943 /* nonsense mode */
2944 return EINVAL;
2945 }
2946
2947 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT;
2948 if (flags & AT_SYMLINK_NOFOLLOW)
2949 nd_flag &= ~FOLLOW;
2950
2951 error = pathbuf_copyin(path, &pb);
2952 if (error)
2953 return error;
2954
2955 NDINIT(&nd, LOOKUP, nd_flag, pb);
2956
2957 /* Override default credentials */
2958 cred = kauth_cred_dup(l->l_cred);
2959 if (!(flags & AT_EACCESS)) {
2960 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
2961 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
2962 }
2963 nd.ni_cnd.cn_cred = cred;
2964
2965 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
2966 pathbuf_destroy(pb);
2967 goto out;
2968 }
2969 vp = nd.ni_vp;
2970 pathbuf_destroy(pb);
2971
2972 /* Flags == 0 means only check for existence. */
2973 if (mode) {
2974 vmode = 0;
2975 if (mode & R_OK)
2976 vmode |= VREAD;
2977 if (mode & W_OK)
2978 vmode |= VWRITE;
2979 if (mode & X_OK)
2980 vmode |= VEXEC;
2981
2982 error = VOP_ACCESS(vp, vmode, cred);
2983 if (!error && (vmode & VWRITE))
2984 error = vn_writechk(vp);
2985 }
2986 vput(vp);
2987 out:
2988 kauth_cred_free(cred);
2989 return (error);
2990 }
2991
2992 int
2993 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap,
2994 register_t *retval)
2995 {
2996 /* {
2997 syscallarg(int) fd;
2998 syscallarg(const char *) path;
2999 syscallarg(int) amode;
3000 syscallarg(int) flag;
3001 } */
3002
3003 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path),
3004 SCARG(uap, amode), SCARG(uap, flag));
3005 }
3006
3007 /*
3008 * Common code for all sys_stat functions, including compat versions.
3009 */
3010 int
3011 do_sys_stat(const char *userpath, unsigned int nd_flag,
3012 struct stat *sb)
3013 {
3014 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb);
3015 }
3016
3017 int
3018 do_sys_statat(struct lwp *l, int fdat, const char *userpath,
3019 unsigned int nd_flag, struct stat *sb)
3020 {
3021 int error;
3022 struct pathbuf *pb;
3023 struct nameidata nd;
3024
3025 KASSERT(l != NULL || fdat == AT_FDCWD);
3026
3027 error = pathbuf_copyin(userpath, &pb);
3028 if (error) {
3029 return error;
3030 }
3031
3032 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb);
3033
3034 error = fd_nameiat(l, fdat, &nd);
3035 if (error != 0) {
3036 pathbuf_destroy(pb);
3037 return error;
3038 }
3039 error = vn_stat(nd.ni_vp, sb);
3040 vput(nd.ni_vp);
3041 pathbuf_destroy(pb);
3042 return error;
3043 }
3044
3045 /*
3046 * Get file status; this version follows links.
3047 */
3048 /* ARGSUSED */
3049 int
3050 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval)
3051 {
3052 /* {
3053 syscallarg(const char *) path;
3054 syscallarg(struct stat *) ub;
3055 } */
3056 struct stat sb;
3057 int error;
3058
3059 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb);
3060 if (error)
3061 return error;
3062 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3063 }
3064
3065 /*
3066 * Get file status; this version does not follow links.
3067 */
3068 /* ARGSUSED */
3069 int
3070 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval)
3071 {
3072 /* {
3073 syscallarg(const char *) path;
3074 syscallarg(struct stat *) ub;
3075 } */
3076 struct stat sb;
3077 int error;
3078
3079 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb);
3080 if (error)
3081 return error;
3082 return copyout(&sb, SCARG(uap, ub), sizeof(sb));
3083 }
3084
3085 int
3086 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap,
3087 register_t *retval)
3088 {
3089 /* {
3090 syscallarg(int) fd;
3091 syscallarg(const char *) path;
3092 syscallarg(struct stat *) buf;
3093 syscallarg(int) flag;
3094 } */
3095 unsigned int nd_flag;
3096 struct stat sb;
3097 int error;
3098
3099 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW)
3100 nd_flag = NOFOLLOW;
3101 else
3102 nd_flag = FOLLOW;
3103
3104 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag,
3105 &sb);
3106 if (error)
3107 return error;
3108 return copyout(&sb, SCARG(uap, buf), sizeof(sb));
3109 }
3110
3111 /*
3112 * Get configurable pathname variables.
3113 */
3114 /* ARGSUSED */
3115 int
3116 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval)
3117 {
3118 /* {
3119 syscallarg(const char *) path;
3120 syscallarg(int) name;
3121 } */
3122 int error;
3123 struct pathbuf *pb;
3124 struct nameidata nd;
3125
3126 error = pathbuf_copyin(SCARG(uap, path), &pb);
3127 if (error) {
3128 return error;
3129 }
3130 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3131 if ((error = namei(&nd)) != 0) {
3132 pathbuf_destroy(pb);
3133 return (error);
3134 }
3135 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval);
3136 vput(nd.ni_vp);
3137 pathbuf_destroy(pb);
3138 return (error);
3139 }
3140
3141 /*
3142 * Return target name of a symbolic link.
3143 */
3144 /* ARGSUSED */
3145 int
3146 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap,
3147 register_t *retval)
3148 {
3149 /* {
3150 syscallarg(const char *) path;
3151 syscallarg(char *) buf;
3152 syscallarg(size_t) count;
3153 } */
3154 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path),
3155 SCARG(uap, buf), SCARG(uap, count), retval);
3156 }
3157
3158 static int
3159 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf,
3160 size_t count, register_t *retval)
3161 {
3162 struct vnode *vp;
3163 struct iovec aiov;
3164 struct uio auio;
3165 int error;
3166 struct pathbuf *pb;
3167 struct nameidata nd;
3168
3169 error = pathbuf_copyin(path, &pb);
3170 if (error) {
3171 return error;
3172 }
3173 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
3174 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
3175 pathbuf_destroy(pb);
3176 return error;
3177 }
3178 vp = nd.ni_vp;
3179 pathbuf_destroy(pb);
3180 if (vp->v_type != VLNK)
3181 error = EINVAL;
3182 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
3183 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
3184 aiov.iov_base = buf;
3185 aiov.iov_len = count;
3186 auio.uio_iov = &aiov;
3187 auio.uio_iovcnt = 1;
3188 auio.uio_offset = 0;
3189 auio.uio_rw = UIO_READ;
3190 KASSERT(l == curlwp);
3191 auio.uio_vmspace = l->l_proc->p_vmspace;
3192 auio.uio_resid = count;
3193 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0)
3194 *retval = count - auio.uio_resid;
3195 }
3196 vput(vp);
3197 return (error);
3198 }
3199
3200 int
3201 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap,
3202 register_t *retval)
3203 {
3204 /* {
3205 syscallarg(int) fd;
3206 syscallarg(const char *) path;
3207 syscallarg(char *) buf;
3208 syscallarg(size_t) bufsize;
3209 } */
3210
3211 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path),
3212 SCARG(uap, buf), SCARG(uap, bufsize), retval);
3213 }
3214
3215 /*
3216 * Change flags of a file given a path name.
3217 */
3218 /* ARGSUSED */
3219 int
3220 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
3221 {
3222 /* {
3223 syscallarg(const char *) path;
3224 syscallarg(u_long) flags;
3225 } */
3226 struct vnode *vp;
3227 int error;
3228
3229 error = namei_simple_user(SCARG(uap, path),
3230 NSM_FOLLOW_TRYEMULROOT, &vp);
3231 if (error != 0)
3232 return (error);
3233 error = change_flags(vp, SCARG(uap, flags), l);
3234 vput(vp);
3235 return (error);
3236 }
3237
3238 /*
3239 * Change flags of a file given a file descriptor.
3240 */
3241 /* ARGSUSED */
3242 int
3243 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
3244 {
3245 /* {
3246 syscallarg(int) fd;
3247 syscallarg(u_long) flags;
3248 } */
3249 struct vnode *vp;
3250 file_t *fp;
3251 int error;
3252
3253 /* fd_getvnode() will use the descriptor for us */
3254 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3255 return (error);
3256 vp = fp->f_data;
3257 error = change_flags(vp, SCARG(uap, flags), l);
3258 VOP_UNLOCK(vp);
3259 fd_putfile(SCARG(uap, fd));
3260 return (error);
3261 }
3262
3263 /*
3264 * Change flags of a file given a path name; this version does
3265 * not follow links.
3266 */
3267 int
3268 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
3269 {
3270 /* {
3271 syscallarg(const char *) path;
3272 syscallarg(u_long) flags;
3273 } */
3274 struct vnode *vp;
3275 int error;
3276
3277 error = namei_simple_user(SCARG(uap, path),
3278 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3279 if (error != 0)
3280 return (error);
3281 error = change_flags(vp, SCARG(uap, flags), l);
3282 vput(vp);
3283 return (error);
3284 }
3285
3286 /*
3287 * Common routine to change flags of a file.
3288 */
3289 int
3290 change_flags(struct vnode *vp, u_long flags, struct lwp *l)
3291 {
3292 struct vattr vattr;
3293 int error;
3294
3295 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3296
3297 vattr_null(&vattr);
3298 vattr.va_flags = flags;
3299 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3300
3301 return (error);
3302 }
3303
3304 /*
3305 * Change mode of a file given path name; this version follows links.
3306 */
3307 /* ARGSUSED */
3308 int
3309 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
3310 {
3311 /* {
3312 syscallarg(const char *) path;
3313 syscallarg(int) mode;
3314 } */
3315 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path),
3316 SCARG(uap, mode), 0);
3317 }
3318
3319 int
3320 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags)
3321 {
3322 int error;
3323 struct vnode *vp;
3324 namei_simple_flags_t ns_flag;
3325
3326 if (flags & AT_SYMLINK_NOFOLLOW)
3327 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3328 else
3329 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3330
3331 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3332 if (error != 0)
3333 return error;
3334
3335 error = change_mode(vp, mode, l);
3336
3337 vrele(vp);
3338
3339 return (error);
3340 }
3341
3342 /*
3343 * Change mode of a file given a file descriptor.
3344 */
3345 /* ARGSUSED */
3346 int
3347 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
3348 {
3349 /* {
3350 syscallarg(int) fd;
3351 syscallarg(int) mode;
3352 } */
3353 file_t *fp;
3354 int error;
3355
3356 /* fd_getvnode() will use the descriptor for us */
3357 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3358 return (error);
3359 error = change_mode(fp->f_data, SCARG(uap, mode), l);
3360 fd_putfile(SCARG(uap, fd));
3361 return (error);
3362 }
3363
3364 int
3365 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap,
3366 register_t *retval)
3367 {
3368 /* {
3369 syscallarg(int) fd;
3370 syscallarg(const char *) path;
3371 syscallarg(int) mode;
3372 syscallarg(int) flag;
3373 } */
3374
3375 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path),
3376 SCARG(uap, mode), SCARG(uap, flag));
3377 }
3378
3379 /*
3380 * Change mode of a file given path name; this version does not follow links.
3381 */
3382 /* ARGSUSED */
3383 int
3384 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
3385 {
3386 /* {
3387 syscallarg(const char *) path;
3388 syscallarg(int) mode;
3389 } */
3390 int error;
3391 struct vnode *vp;
3392
3393 error = namei_simple_user(SCARG(uap, path),
3394 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3395 if (error != 0)
3396 return (error);
3397
3398 error = change_mode(vp, SCARG(uap, mode), l);
3399
3400 vrele(vp);
3401 return (error);
3402 }
3403
3404 /*
3405 * Common routine to set mode given a vnode.
3406 */
3407 static int
3408 change_mode(struct vnode *vp, int mode, struct lwp *l)
3409 {
3410 struct vattr vattr;
3411 int error;
3412
3413 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3414 vattr_null(&vattr);
3415 vattr.va_mode = mode & ALLPERMS;
3416 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3417 VOP_UNLOCK(vp);
3418 return (error);
3419 }
3420
3421 /*
3422 * Set ownership given a path name; this version follows links.
3423 */
3424 /* ARGSUSED */
3425 int
3426 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
3427 {
3428 /* {
3429 syscallarg(const char *) path;
3430 syscallarg(uid_t) uid;
3431 syscallarg(gid_t) gid;
3432 } */
3433 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid),
3434 SCARG(uap, gid), 0);
3435 }
3436
3437 int
3438 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid,
3439 gid_t gid, int flags)
3440 {
3441 int error;
3442 struct vnode *vp;
3443 namei_simple_flags_t ns_flag;
3444
3445 if (flags & AT_SYMLINK_NOFOLLOW)
3446 ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
3447 else
3448 ns_flag = NSM_FOLLOW_TRYEMULROOT;
3449
3450 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
3451 if (error != 0)
3452 return error;
3453
3454 error = change_owner(vp, uid, gid, l, 0);
3455
3456 vrele(vp);
3457
3458 return (error);
3459 }
3460
3461 /*
3462 * Set ownership given a path name; this version follows links.
3463 * Provides POSIX semantics.
3464 */
3465 /* ARGSUSED */
3466 int
3467 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
3468 {
3469 /* {
3470 syscallarg(const char *) path;
3471 syscallarg(uid_t) uid;
3472 syscallarg(gid_t) gid;
3473 } */
3474 int error;
3475 struct vnode *vp;
3476
3477 error = namei_simple_user(SCARG(uap, path),
3478 NSM_FOLLOW_TRYEMULROOT, &vp);
3479 if (error != 0)
3480 return (error);
3481
3482 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3483
3484 vrele(vp);
3485 return (error);
3486 }
3487
3488 /*
3489 * Set ownership given a file descriptor.
3490 */
3491 /* ARGSUSED */
3492 int
3493 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
3494 {
3495 /* {
3496 syscallarg(int) fd;
3497 syscallarg(uid_t) uid;
3498 syscallarg(gid_t) gid;
3499 } */
3500 int error;
3501 file_t *fp;
3502
3503 /* fd_getvnode() will use the descriptor for us */
3504 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3505 return (error);
3506 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
3507 l, 0);
3508 fd_putfile(SCARG(uap, fd));
3509 return (error);
3510 }
3511
3512 int
3513 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap,
3514 register_t *retval)
3515 {
3516 /* {
3517 syscallarg(int) fd;
3518 syscallarg(const char *) path;
3519 syscallarg(uid_t) owner;
3520 syscallarg(gid_t) group;
3521 syscallarg(int) flag;
3522 } */
3523
3524 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path),
3525 SCARG(uap, owner), SCARG(uap, group),
3526 SCARG(uap, flag));
3527 }
3528
3529 /*
3530 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
3531 */
3532 /* ARGSUSED */
3533 int
3534 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
3535 {
3536 /* {
3537 syscallarg(int) fd;
3538 syscallarg(uid_t) uid;
3539 syscallarg(gid_t) gid;
3540 } */
3541 int error;
3542 file_t *fp;
3543
3544 /* fd_getvnode() will use the descriptor for us */
3545 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3546 return (error);
3547 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid),
3548 l, 1);
3549 fd_putfile(SCARG(uap, fd));
3550 return (error);
3551 }
3552
3553 /*
3554 * Set ownership given a path name; this version does not follow links.
3555 */
3556 /* ARGSUSED */
3557 int
3558 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
3559 {
3560 /* {
3561 syscallarg(const char *) path;
3562 syscallarg(uid_t) uid;
3563 syscallarg(gid_t) gid;
3564 } */
3565 int error;
3566 struct vnode *vp;
3567
3568 error = namei_simple_user(SCARG(uap, path),
3569 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3570 if (error != 0)
3571 return (error);
3572
3573 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);
3574
3575 vrele(vp);
3576 return (error);
3577 }
3578
3579 /*
3580 * Set ownership given a path name; this version does not follow links.
3581 * Provides POSIX/XPG semantics.
3582 */
3583 /* ARGSUSED */
3584 int
3585 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
3586 {
3587 /* {
3588 syscallarg(const char *) path;
3589 syscallarg(uid_t) uid;
3590 syscallarg(gid_t) gid;
3591 } */
3592 int error;
3593 struct vnode *vp;
3594
3595 error = namei_simple_user(SCARG(uap, path),
3596 NSM_NOFOLLOW_TRYEMULROOT, &vp);
3597 if (error != 0)
3598 return (error);
3599
3600 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);
3601
3602 vrele(vp);
3603 return (error);
3604 }
3605
3606 /*
3607 * Common routine to set ownership given a vnode.
3608 */
3609 static int
3610 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
3611 int posix_semantics)
3612 {
3613 struct vattr vattr;
3614 mode_t newmode;
3615 int error;
3616
3617 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3618 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
3619 goto out;
3620
3621 #define CHANGED(x) ((int)(x) != -1)
3622 newmode = vattr.va_mode;
3623 if (posix_semantics) {
3624 /*
3625 * POSIX/XPG semantics: if the caller is not the super-user,
3626 * clear set-user-id and set-group-id bits. Both POSIX and
3627 * the XPG consider the behaviour for calls by the super-user
3628 * implementation-defined; we leave the set-user-id and set-
3629 * group-id settings intact in that case.
3630 */
3631 if (vattr.va_mode & S_ISUID) {
3632 if (kauth_authorize_vnode(l->l_cred,
3633 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0)
3634 newmode &= ~S_ISUID;
3635 }
3636 if (vattr.va_mode & S_ISGID) {
3637 if (kauth_authorize_vnode(l->l_cred,
3638 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0)
3639 newmode &= ~S_ISGID;
3640 }
3641 } else {
3642 /*
3643 * NetBSD semantics: when changing owner and/or group,
3644 * clear the respective bit(s).
3645 */
3646 if (CHANGED(uid))
3647 newmode &= ~S_ISUID;
3648 if (CHANGED(gid))
3649 newmode &= ~S_ISGID;
3650 }
3651 /* Update va_mode iff altered. */
3652 if (vattr.va_mode == newmode)
3653 newmode = VNOVAL;
3654
3655 vattr_null(&vattr);
3656 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
3657 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
3658 vattr.va_mode = newmode;
3659 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3660 #undef CHANGED
3661
3662 out:
3663 VOP_UNLOCK(vp);
3664 return (error);
3665 }
3666
3667 /*
3668 * Set the access and modification times given a path name; this
3669 * version follows links.
3670 */
3671 /* ARGSUSED */
3672 int
3673 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
3674 register_t *retval)
3675 {
3676 /* {
3677 syscallarg(const char *) path;
3678 syscallarg(const struct timeval *) tptr;
3679 } */
3680
3681 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
3682 SCARG(uap, tptr), UIO_USERSPACE);
3683 }
3684
3685 /*
3686 * Set the access and modification times given a file descriptor.
3687 */
3688 /* ARGSUSED */
3689 int
3690 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
3691 register_t *retval)
3692 {
3693 /* {
3694 syscallarg(int) fd;
3695 syscallarg(const struct timeval *) tptr;
3696 } */
3697 int error;
3698 file_t *fp;
3699
3700 /* fd_getvnode() will use the descriptor for us */
3701 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3702 return (error);
3703 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr),
3704 UIO_USERSPACE);
3705 fd_putfile(SCARG(uap, fd));
3706 return (error);
3707 }
3708
3709 int
3710 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap,
3711 register_t *retval)
3712 {
3713 /* {
3714 syscallarg(int) fd;
3715 syscallarg(const struct timespec *) tptr;
3716 } */
3717 int error;
3718 file_t *fp;
3719
3720 /* fd_getvnode() will use the descriptor for us */
3721 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3722 return (error);
3723 error = do_sys_utimensat(l, AT_FDCWD, fp->f_data, NULL, 0,
3724 SCARG(uap, tptr), UIO_USERSPACE);
3725 fd_putfile(SCARG(uap, fd));
3726 return (error);
3727 }
3728
3729 /*
3730 * Set the access and modification times given a path name; this
3731 * version does not follow links.
3732 */
3733 int
3734 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
3735 register_t *retval)
3736 {
3737 /* {
3738 syscallarg(const char *) path;
3739 syscallarg(const struct timeval *) tptr;
3740 } */
3741
3742 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
3743 SCARG(uap, tptr), UIO_USERSPACE);
3744 }
3745
3746 int
3747 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap,
3748 register_t *retval)
3749 {
3750 /* {
3751 syscallarg(int) fd;
3752 syscallarg(const char *) path;
3753 syscallarg(const struct timespec *) tptr;
3754 syscallarg(int) flag;
3755 } */
3756 int follow;
3757 const struct timespec *tptr;
3758 int error;
3759
3760 tptr = SCARG(uap, tptr);
3761 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
3762
3763 error = do_sys_utimensat(l, SCARG(uap, fd), NULL,
3764 SCARG(uap, path), follow, tptr, UIO_USERSPACE);
3765
3766 return error;
3767 }
3768
3769 /*
3770 * Common routine to set access and modification times given a vnode.
3771 */
3772 int
3773 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag,
3774 const struct timespec *tptr, enum uio_seg seg)
3775 {
3776 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg);
3777 }
3778
3779 int
3780 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp,
3781 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg)
3782 {
3783 struct vattr vattr;
3784 int error, dorele = 0;
3785 namei_simple_flags_t sflags;
3786 bool vanull, setbirthtime;
3787 struct timespec ts[2];
3788
3789 KASSERT(l != NULL || fdat == AT_FDCWD);
3790
3791 /*
3792 * I have checked all callers and they pass either FOLLOW,
3793 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW
3794 * is 0. More to the point, they don't pass anything else.
3795 * Let's keep it that way at least until the namei interfaces
3796 * are fully sanitized.
3797 */
3798 KASSERT(flag == NOFOLLOW || flag == FOLLOW);
3799 sflags = (flag == FOLLOW) ?
3800 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT;
3801
3802 if (tptr == NULL) {
3803 vanull = true;
3804 nanotime(&ts[0]);
3805 ts[1] = ts[0];
3806 } else {
3807 vanull = false;
3808 if (seg != UIO_SYSSPACE) {
3809 error = copyin(tptr, ts, sizeof (ts));
3810 if (error != 0)
3811 return error;
3812 } else {
3813 ts[0] = tptr[0];
3814 ts[1] = tptr[1];
3815 }
3816 }
3817
3818 if (ts[0].tv_nsec == UTIME_NOW) {
3819 nanotime(&ts[0]);
3820 if (ts[1].tv_nsec == UTIME_NOW) {
3821 vanull = true;
3822 ts[1] = ts[0];
3823 }
3824 } else if (ts[1].tv_nsec == UTIME_NOW)
3825 nanotime(&ts[1]);
3826
3827 if (vp == NULL) {
3828 /* note: SEG describes TPTR, not PATH; PATH is always user */
3829 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp);
3830 if (error != 0)
3831 return error;
3832 dorele = 1;
3833 }
3834
3835 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3836 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
3837 timespeccmp(&ts[1], &vattr.va_birthtime, <));
3838 vattr_null(&vattr);
3839
3840 if (ts[0].tv_nsec != UTIME_OMIT)
3841 vattr.va_atime = ts[0];
3842
3843 if (ts[1].tv_nsec != UTIME_OMIT) {
3844 vattr.va_mtime = ts[1];
3845 if (setbirthtime)
3846 vattr.va_birthtime = ts[1];
3847 }
3848
3849 if (vanull)
3850 vattr.va_vaflags |= VA_UTIMES_NULL;
3851 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3852 VOP_UNLOCK(vp);
3853
3854 if (dorele != 0)
3855 vrele(vp);
3856
3857 return error;
3858 }
3859
3860 int
3861 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
3862 const struct timeval *tptr, enum uio_seg seg)
3863 {
3864 struct timespec ts[2];
3865 struct timespec *tsptr = NULL;
3866 int error;
3867
3868 if (tptr != NULL) {
3869 struct timeval tv[2];
3870
3871 if (seg != UIO_SYSSPACE) {
3872 error = copyin(tptr, tv, sizeof (tv));
3873 if (error != 0)
3874 return error;
3875 tptr = tv;
3876 }
3877
3878 if ((tv[0].tv_usec == UTIME_NOW) ||
3879 (tv[0].tv_usec == UTIME_OMIT))
3880 ts[0].tv_nsec = tv[0].tv_usec;
3881 else
3882 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
3883
3884 if ((tv[1].tv_usec == UTIME_NOW) ||
3885 (tv[1].tv_usec == UTIME_OMIT))
3886 ts[1].tv_nsec = tv[1].tv_usec;
3887 else
3888 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
3889
3890 tsptr = &ts[0];
3891 }
3892
3893 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE);
3894 }
3895
3896 /*
3897 * Truncate a file given its path name.
3898 */
3899 /* ARGSUSED */
3900 int
3901 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
3902 {
3903 /* {
3904 syscallarg(const char *) path;
3905 syscallarg(int) pad;
3906 syscallarg(off_t) length;
3907 } */
3908 struct vnode *vp;
3909 struct vattr vattr;
3910 int error;
3911
3912 error = namei_simple_user(SCARG(uap, path),
3913 NSM_FOLLOW_TRYEMULROOT, &vp);
3914 if (error != 0)
3915 return (error);
3916 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3917 if (vp->v_type == VDIR)
3918 error = EISDIR;
3919 else if ((error = vn_writechk(vp)) == 0 &&
3920 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
3921 vattr_null(&vattr);
3922 vattr.va_size = SCARG(uap, length);
3923 error = VOP_SETATTR(vp, &vattr, l->l_cred);
3924 }
3925 vput(vp);
3926 return (error);
3927 }
3928
3929 /*
3930 * Truncate a file given a file descriptor.
3931 */
3932 /* ARGSUSED */
3933 int
3934 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
3935 {
3936 /* {
3937 syscallarg(int) fd;
3938 syscallarg(int) pad;
3939 syscallarg(off_t) length;
3940 } */
3941 struct vattr vattr;
3942 struct vnode *vp;
3943 file_t *fp;
3944 int error;
3945
3946 /* fd_getvnode() will use the descriptor for us */
3947 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3948 return (error);
3949 if ((fp->f_flag & FWRITE) == 0) {
3950 error = EINVAL;
3951 goto out;
3952 }
3953 vp = fp->f_data;
3954 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3955 if (vp->v_type == VDIR)
3956 error = EISDIR;
3957 else if ((error = vn_writechk(vp)) == 0) {
3958 vattr_null(&vattr);
3959 vattr.va_size = SCARG(uap, length);
3960 error = VOP_SETATTR(vp, &vattr, fp->f_cred);
3961 }
3962 VOP_UNLOCK(vp);
3963 out:
3964 fd_putfile(SCARG(uap, fd));
3965 return (error);
3966 }
3967
3968 /*
3969 * Sync an open file.
3970 */
3971 /* ARGSUSED */
3972 int
3973 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
3974 {
3975 /* {
3976 syscallarg(int) fd;
3977 } */
3978 struct vnode *vp;
3979 file_t *fp;
3980 int error;
3981
3982 /* fd_getvnode() will use the descriptor for us */
3983 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
3984 return (error);
3985 vp = fp->f_data;
3986 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3987 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
3988 VOP_UNLOCK(vp);
3989 fd_putfile(SCARG(uap, fd));
3990 return (error);
3991 }
3992
3993 /*
3994 * Sync a range of file data. API modeled after that found in AIX.
3995 *
3996 * FDATASYNC indicates that we need only save enough metadata to be able
3997 * to re-read the written data. Note we duplicate AIX's requirement that
3998 * the file be open for writing.
3999 */
4000 /* ARGSUSED */
4001 int
4002 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
4003 {
4004 /* {
4005 syscallarg(int) fd;
4006 syscallarg(int) flags;
4007 syscallarg(off_t) start;
4008 syscallarg(off_t) length;
4009 } */
4010 struct vnode *vp;
4011 file_t *fp;
4012 int flags, nflags;
4013 off_t s, e, len;
4014 int error;
4015
4016 /* fd_getvnode() will use the descriptor for us */
4017 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4018 return (error);
4019
4020 if ((fp->f_flag & FWRITE) == 0) {
4021 error = EBADF;
4022 goto out;
4023 }
4024
4025 flags = SCARG(uap, flags);
4026 if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
4027 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
4028 error = EINVAL;
4029 goto out;
4030 }
4031 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
4032 if (flags & FDATASYNC)
4033 nflags = FSYNC_DATAONLY | FSYNC_WAIT;
4034 else
4035 nflags = FSYNC_WAIT;
4036 if (flags & FDISKSYNC)
4037 nflags |= FSYNC_CACHE;
4038
4039 len = SCARG(uap, length);
4040 /* If length == 0, we do the whole file, and s = e = 0 will do that */
4041 if (len) {
4042 s = SCARG(uap, start);
4043 e = s + len;
4044 if (e < s) {
4045 error = EINVAL;
4046 goto out;
4047 }
4048 } else {
4049 e = 0;
4050 s = 0;
4051 }
4052
4053 vp = fp->f_data;
4054 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4055 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
4056 VOP_UNLOCK(vp);
4057 out:
4058 fd_putfile(SCARG(uap, fd));
4059 return (error);
4060 }
4061
4062 /*
4063 * Sync the data of an open file.
4064 */
4065 /* ARGSUSED */
4066 int
4067 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
4068 {
4069 /* {
4070 syscallarg(int) fd;
4071 } */
4072 struct vnode *vp;
4073 file_t *fp;
4074 int error;
4075
4076 /* fd_getvnode() will use the descriptor for us */
4077 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4078 return (error);
4079 if ((fp->f_flag & FWRITE) == 0) {
4080 fd_putfile(SCARG(uap, fd));
4081 return (EBADF);
4082 }
4083 vp = fp->f_data;
4084 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
4085 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
4086 VOP_UNLOCK(vp);
4087 fd_putfile(SCARG(uap, fd));
4088 return (error);
4089 }
4090
4091 /*
4092 * Rename files, (standard) BSD semantics frontend.
4093 */
4094 /* ARGSUSED */
4095 int
4096 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
4097 {
4098 /* {
4099 syscallarg(const char *) from;
4100 syscallarg(const char *) to;
4101 } */
4102
4103 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4104 SCARG(uap, to), UIO_USERSPACE, 0));
4105 }
4106
4107 int
4108 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap,
4109 register_t *retval)
4110 {
4111 /* {
4112 syscallarg(int) fromfd;
4113 syscallarg(const char *) from;
4114 syscallarg(int) tofd;
4115 syscallarg(const char *) to;
4116 } */
4117
4118 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from),
4119 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0));
4120 }
4121
4122 /*
4123 * Rename files, POSIX semantics frontend.
4124 */
4125 /* ARGSUSED */
4126 int
4127 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
4128 {
4129 /* {
4130 syscallarg(const char *) from;
4131 syscallarg(const char *) to;
4132 } */
4133
4134 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
4135 SCARG(uap, to), UIO_USERSPACE, 1));
4136 }
4137
4138 /*
4139 * Rename files. Source and destination must either both be directories,
4140 * or both not be directories. If target is a directory, it must be empty.
4141 * If `from' and `to' refer to the same object, the value of the `retain'
4142 * argument is used to determine whether `from' will be
4143 *
4144 * (retain == 0) deleted unless `from' and `to' refer to the same
4145 * object in the file system's name space (BSD).
4146 * (retain == 1) always retained (POSIX).
4147 *
4148 * XXX Synchronize with nfsrv_rename in nfs_serv.c.
4149 */
4150 int
4151 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
4152 {
4153 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain);
4154 }
4155
4156 static int
4157 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd,
4158 const char *to, enum uio_seg seg, int retain)
4159 {
4160 struct pathbuf *fpb, *tpb;
4161 struct nameidata fnd, tnd;
4162 struct vnode *fdvp, *fvp;
4163 struct vnode *tdvp, *tvp;
4164 struct mount *mp, *tmp;
4165 int error;
4166
4167 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD));
4168
4169 error = pathbuf_maybe_copyin(from, seg, &fpb);
4170 if (error)
4171 goto out0;
4172 KASSERT(fpb != NULL);
4173
4174 error = pathbuf_maybe_copyin(to, seg, &tpb);
4175 if (error)
4176 goto out1;
4177 KASSERT(tpb != NULL);
4178
4179 /*
4180 * Lookup from.
4181 *
4182 * XXX LOCKPARENT is wrong because we don't actually want it
4183 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is
4184 * insane, so for the time being we need to leave it like this.
4185 */
4186 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT | INRENAME), fpb);
4187 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0)
4188 goto out2;
4189
4190 /*
4191 * Pull out the important results of the lookup, fdvp and fvp.
4192 * Of course, fvp is bogus because we're about to unlock fdvp.
4193 */
4194 fdvp = fnd.ni_dvp;
4195 fvp = fnd.ni_vp;
4196 KASSERT(fdvp != NULL);
4197 KASSERT(fvp != NULL);
4198 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE));
4199
4200 /*
4201 * Make sure neither fdvp nor fvp is locked.
4202 */
4203 if (fdvp != fvp)
4204 VOP_UNLOCK(fdvp);
4205 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4206 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4207
4208 /*
4209 * Reject renaming `.' and `..'. Can't do this until after
4210 * namei because we need namei's parsing to find the final
4211 * component name. (namei should just leave us with the final
4212 * component name and not look it up itself, but anyway...)
4213 *
4214 * This was here before because we used to relookup from
4215 * instead of to and relookup requires the caller to check
4216 * this, but now file systems may depend on this check, so we
4217 * must retain it until the file systems are all rototilled.
4218 */
4219 if (((fnd.ni_cnd.cn_namelen == 1) &&
4220 (fnd.ni_cnd.cn_nameptr[0] == '.')) ||
4221 ((fnd.ni_cnd.cn_namelen == 2) &&
4222 (fnd.ni_cnd.cn_nameptr[0] == '.') &&
4223 (fnd.ni_cnd.cn_nameptr[1] == '.'))) {
4224 error = EINVAL; /* XXX EISDIR? */
4225 goto abort0;
4226 }
4227
4228 /*
4229 * Lookup to.
4230 *
4231 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using
4232 * fvp here to decide whether to add CREATEDIR is a load of
4233 * bollocks because fvp might be the wrong node by now, since
4234 * fdvp is unlocked.
4235 *
4236 * XXX Why not pass CREATEDIR always?
4237 */
4238 NDINIT(&tnd, RENAME,
4239 (LOCKPARENT | NOCACHE | TRYEMULROOT | INRENAME |
4240 ((fvp->v_type == VDIR)? CREATEDIR : 0)),
4241 tpb);
4242 if ((error = fd_nameiat(l, tofd, &tnd)) != 0)
4243 goto abort0;
4244
4245 /*
4246 * Pull out the important results of the lookup, tdvp and tvp.
4247 * Of course, tvp is bogus because we're about to unlock tdvp.
4248 */
4249 tdvp = tnd.ni_dvp;
4250 tvp = tnd.ni_vp;
4251 KASSERT(tdvp != NULL);
4252 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE));
4253
4254 /*
4255 * Make sure neither tdvp nor tvp is locked.
4256 */
4257 if (tdvp != tvp)
4258 VOP_UNLOCK(tdvp);
4259 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4260 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4261
4262 /*
4263 * Reject renaming onto `.' or `..'. relookup is unhappy with
4264 * these, which is why we must do this here. Once upon a time
4265 * we relooked up from instead of to, and consequently didn't
4266 * need this check, but now that we relookup to instead of
4267 * from, we need this; and we shall need it forever forward
4268 * until the VOP_RENAME protocol changes, because file systems
4269 * will no doubt begin to depend on this check.
4270 */
4271 if (((tnd.ni_cnd.cn_namelen == 1) &&
4272 (tnd.ni_cnd.cn_nameptr[0] == '.')) ||
4273 ((tnd.ni_cnd.cn_namelen == 2) &&
4274 (tnd.ni_cnd.cn_nameptr[0] == '.') &&
4275 (tnd.ni_cnd.cn_nameptr[1] == '.'))) {
4276 error = EINVAL; /* XXX EISDIR? */
4277 goto abort1;
4278 }
4279
4280 /*
4281 * Get the mount point. If the file system has been unmounted,
4282 * which it may be because we're not holding any vnode locks,
4283 * then v_mount will be NULL. We're not really supposed to
4284 * read v_mount without holding the vnode lock, but since we
4285 * have fdvp referenced, if fdvp->v_mount changes then at worst
4286 * it will be set to NULL, not changed to another mount point.
4287 * And, of course, since it is up to the file system to
4288 * determine the real lock order, we can't lock both fdvp and
4289 * tdvp at the same time.
4290 */
4291 mp = fdvp->v_mount;
4292 if (mp == NULL) {
4293 error = ENOENT;
4294 goto abort1;
4295 }
4296
4297 /*
4298 * Make sure the mount points match. Again, although we don't
4299 * hold any vnode locks, the v_mount fields may change -- but
4300 * at worst they will change to NULL, so this will never become
4301 * a cross-device rename, because we hold vnode references.
4302 *
4303 * XXX Because nothing is locked and the compiler may reorder
4304 * things here, unmounting the file system at an inopportune
4305 * moment may cause rename to fail with ENXDEV when it really
4306 * should fail with ENOENT.
4307 */
4308 tmp = tdvp->v_mount;
4309 if (tmp == NULL) {
4310 error = ENOENT;
4311 goto abort1;
4312 }
4313
4314 if (mp != tmp) {
4315 error = EXDEV;
4316 goto abort1;
4317 }
4318
4319 /*
4320 * Take the vfs rename lock to avoid cross-directory screw cases.
4321 * Nothing is locked currently, so taking this lock is safe.
4322 */
4323 error = VFS_RENAMELOCK_ENTER(mp);
4324 if (error)
4325 goto abort1;
4326
4327 /*
4328 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced,
4329 * and nothing is locked except for the vfs rename lock.
4330 *
4331 * The next step is a little rain dance to conform to the
4332 * insane lock protocol, even though it does nothing to ward
4333 * off race conditions.
4334 *
4335 * We need tdvp and tvp to be locked. However, because we have
4336 * unlocked tdvp in order to hold no locks while we take the
4337 * vfs rename lock, tvp may be wrong here, and we can't safely
4338 * lock it even if the sensible file systems will just unlock
4339 * it straight away. Consequently, we must lock tdvp and then
4340 * relookup tvp to get it locked.
4341 *
4342 * Finally, because the VOP_RENAME protocol is brain-damaged
4343 * and various file systems insanely depend on the semantics of
4344 * this brain damage, the lookup of to must be the last lookup
4345 * before VOP_RENAME.
4346 */
4347 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
4348 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0);
4349 if (error)
4350 goto abort2;
4351
4352 /*
4353 * Drop the old tvp and pick up the new one -- which might be
4354 * the same, but that doesn't matter to us. After this, tdvp
4355 * and tvp should both be locked.
4356 */
4357 if (tvp != NULL)
4358 vrele(tvp);
4359 tvp = tnd.ni_vp;
4360 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4361 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4362
4363 /*
4364 * The old do_sys_rename had various consistency checks here
4365 * involving fvp and tvp. fvp is bogus already here, and tvp
4366 * will become bogus soon in any sensible file system, so the
4367 * only purpose in putting these checks here is to give lip
4368 * service to these screw cases and to acknowledge that they
4369 * exist, not actually to handle them, but here you go
4370 * anyway...
4371 */
4372
4373 /*
4374 * Acknowledge that directories and non-directories aren't
4375 * suposed to mix.
4376 */
4377 if (tvp != NULL) {
4378 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) {
4379 error = ENOTDIR;
4380 goto abort3;
4381 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) {
4382 error = EISDIR;
4383 goto abort3;
4384 }
4385 }
4386
4387 /*
4388 * Acknowledge some random screw case, among the dozens that
4389 * might arise.
4390 */
4391 if (fvp == tdvp) {
4392 error = EINVAL;
4393 goto abort3;
4394 }
4395
4396 /*
4397 * Acknowledge that POSIX has a wacky screw case.
4398 *
4399 * XXX Eventually the retain flag needs to be passed on to
4400 * VOP_RENAME.
4401 */
4402 if (fvp == tvp) {
4403 if (retain) {
4404 error = 0;
4405 goto abort3;
4406 } else if ((fdvp == tdvp) &&
4407 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) &&
4408 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr,
4409 fnd.ni_cnd.cn_namelen))) {
4410 error = 0;
4411 goto abort3;
4412 }
4413 }
4414
4415 /*
4416 * Make sure veriexec can screw us up. (But a race can screw
4417 * up veriexec, of course -- remember, fvp and (soon) tvp are
4418 * bogus.)
4419 */
4420 #if NVERIEXEC > 0
4421 {
4422 char *f1, *f2;
4423 size_t f1_len;
4424 size_t f2_len;
4425
4426 f1_len = fnd.ni_cnd.cn_namelen + 1;
4427 f1 = kmem_alloc(f1_len, KM_SLEEP);
4428 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len);
4429
4430 f2_len = tnd.ni_cnd.cn_namelen + 1;
4431 f2 = kmem_alloc(f2_len, KM_SLEEP);
4432 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len);
4433
4434 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2);
4435
4436 kmem_free(f1, f1_len);
4437 kmem_free(f2, f2_len);
4438
4439 if (error)
4440 goto abort3;
4441 }
4442 #endif /* NVERIEXEC > 0 */
4443
4444 /*
4445 * All ready. Incant the rename vop.
4446 */
4447 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4448 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4449 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
4450 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
4451 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd);
4452
4453 /*
4454 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks
4455 * tdvp and tvp. But we can't assert any of that.
4456 */
4457 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
4458 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
4459 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
4460 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */
4461
4462 /*
4463 * So all we have left to do is to drop the rename lock and
4464 * destroy the pathbufs.
4465 */
4466 VFS_RENAMELOCK_EXIT(mp);
4467 goto out2;
4468
4469 abort3: if ((tvp != NULL) && (tvp != tdvp))
4470 VOP_UNLOCK(tvp);
4471 abort2: VOP_UNLOCK(tdvp);
4472 VFS_RENAMELOCK_EXIT(mp);
4473 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd);
4474 vrele(tdvp);
4475 if (tvp != NULL)
4476 vrele(tvp);
4477 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd);
4478 vrele(fdvp);
4479 vrele(fvp);
4480 out2: pathbuf_destroy(tpb);
4481 out1: pathbuf_destroy(fpb);
4482 out0: return error;
4483 }
4484
4485 /*
4486 * Make a directory file.
4487 */
4488 /* ARGSUSED */
4489 int
4490 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
4491 {
4492 /* {
4493 syscallarg(const char *) path;
4494 syscallarg(int) mode;
4495 } */
4496
4497 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path),
4498 SCARG(uap, mode), UIO_USERSPACE);
4499 }
4500
4501 int
4502 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap,
4503 register_t *retval)
4504 {
4505 /* {
4506 syscallarg(int) fd;
4507 syscallarg(const char *) path;
4508 syscallarg(int) mode;
4509 } */
4510
4511 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path),
4512 SCARG(uap, mode), UIO_USERSPACE);
4513 }
4514
4515
4516 int
4517 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg)
4518 {
4519 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE);
4520 }
4521
4522 static int
4523 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode,
4524 enum uio_seg seg)
4525 {
4526 struct proc *p = curlwp->l_proc;
4527 struct vnode *vp;
4528 struct vattr vattr;
4529 int error;
4530 struct pathbuf *pb;
4531 struct nameidata nd;
4532
4533 KASSERT(l != NULL || fdat == AT_FDCWD);
4534
4535 /* XXX bollocks, should pass in a pathbuf */
4536 error = pathbuf_maybe_copyin(path, seg, &pb);
4537 if (error) {
4538 return error;
4539 }
4540
4541 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb);
4542
4543 if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
4544 pathbuf_destroy(pb);
4545 return (error);
4546 }
4547 vp = nd.ni_vp;
4548 if (vp != NULL) {
4549 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
4550 if (nd.ni_dvp == vp)
4551 vrele(nd.ni_dvp);
4552 else
4553 vput(nd.ni_dvp);
4554 vrele(vp);
4555 pathbuf_destroy(pb);
4556 return (EEXIST);
4557 }
4558 vattr_null(&vattr);
4559 vattr.va_type = VDIR;
4560 /* We will read cwdi->cwdi_cmask unlocked. */
4561 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
4562 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
4563 if (!error)
4564 vrele(nd.ni_vp);
4565 vput(nd.ni_dvp);
4566 pathbuf_destroy(pb);
4567 return (error);
4568 }
4569
4570 /*
4571 * Remove a directory file.
4572 */
4573 /* ARGSUSED */
4574 int
4575 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
4576 {
4577 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path),
4578 AT_REMOVEDIR, UIO_USERSPACE);
4579 }
4580
4581 /*
4582 * Read a block of directory entries in a file system independent format.
4583 */
4584 int
4585 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
4586 {
4587 /* {
4588 syscallarg(int) fd;
4589 syscallarg(char *) buf;
4590 syscallarg(size_t) count;
4591 } */
4592 file_t *fp;
4593 int error, done;
4594
4595 /* fd_getvnode() will use the descriptor for us */
4596 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
4597 return (error);
4598 if ((fp->f_flag & FREAD) == 0) {
4599 error = EBADF;
4600 goto out;
4601 }
4602 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
4603 SCARG(uap, count), &done, l, 0, 0);
4604 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
4605 *retval = done;
4606 out:
4607 fd_putfile(SCARG(uap, fd));
4608 return (error);
4609 }
4610
4611 /*
4612 * Set the mode mask for creation of filesystem nodes.
4613 */
4614 int
4615 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
4616 {
4617 /* {
4618 syscallarg(mode_t) newmask;
4619 } */
4620 struct proc *p = l->l_proc;
4621 struct cwdinfo *cwdi;
4622
4623 /*
4624 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
4625 * important is that we serialize changes to the mask. The
4626 * rw_exit() will issue a write memory barrier on our behalf,
4627 * and force the changes out to other CPUs (as it must use an
4628 * atomic operation, draining the local CPU's store buffers).
4629 */
4630 cwdi = p->p_cwdi;
4631 rw_enter(&cwdi->cwdi_lock, RW_WRITER);
4632 *retval = cwdi->cwdi_cmask;
4633 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
4634 rw_exit(&cwdi->cwdi_lock);
4635
4636 return (0);
4637 }
4638
4639 int
4640 dorevoke(struct vnode *vp, kauth_cred_t cred)
4641 {
4642 struct vattr vattr;
4643 int error, fs_decision;
4644
4645 vn_lock(vp, LK_SHARED | LK_RETRY);
4646 error = VOP_GETATTR(vp, &vattr, cred);
4647 VOP_UNLOCK(vp);
4648 if (error != 0)
4649 return error;
4650 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM;
4651 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL,
4652 fs_decision);
4653 if (!error)
4654 VOP_REVOKE(vp, REVOKEALL);
4655 return (error);
4656 }
4657
4658 /*
4659 * Void all references to file by ripping underlying filesystem
4660 * away from vnode.
4661 */
4662 /* ARGSUSED */
4663 int
4664 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
4665 {
4666 /* {
4667 syscallarg(const char *) path;
4668 } */
4669 struct vnode *vp;
4670 int error;
4671
4672 error = namei_simple_user(SCARG(uap, path),
4673 NSM_FOLLOW_TRYEMULROOT, &vp);
4674 if (error != 0)
4675 return (error);
4676 error = dorevoke(vp, l->l_cred);
4677 vrele(vp);
4678 return (error);
4679 }
4680