1 /* $NetBSD: vfs_syscalls.c,v 1.571 2025/07/16 19:14:13 kre Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009, 2019, 2020, 2023 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.571 2025/07/16 19:14:13 kre Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/types.h> 82 83 #include <sys/atomic.h> 84 #include <sys/buf.h> 85 #include <sys/compat_stub.h> 86 #include <sys/dirent.h> 87 #include <sys/event.h> 88 #include <sys/extattr.h> 89 #include <sys/fcntl.h> 90 #include <sys/file.h> 91 #ifdef FILEASSOC 92 #include <sys/fileassoc.h> 93 #endif /* FILEASSOC */ 94 #include <sys/filedesc.h> 95 #include <sys/fstrans.h> 96 #include <sys/kauth.h> 97 #include <sys/kernel.h> 98 #include <sys/kmem.h> 99 #include <sys/ktrace.h> 100 #include <sys/module.h> 101 #include <sys/mount.h> 102 #include <sys/namei.h> 103 #include <sys/proc.h> 104 #include <sys/quota.h> 105 #include <sys/quotactl.h> 106 #include <sys/stat.h> 107 #include <sys/syscallargs.h> 108 #include <sys/sysctl.h> 109 #include <sys/systm.h> 110 #include <sys/uio.h> 111 #include <sys/verified_exec.h> 112 #include <sys/vfs_syscalls.h> 113 #include <sys/vnode.h> 114 115 #include <miscfs/genfs/genfs.h> 116 #include <miscfs/specfs/specdev.h> 117 118 #include <nfs/nfs.h> 119 #include <nfs/nfs_var.h> 120 #include <nfs/nfsproto.h> 121 #include <nfs/rpcv2.h> 122 123 /* XXX this shouldn't be here */ 124 #ifndef OFF_T_MAX 125 #define OFF_T_MAX __type_max(off_t) 126 #endif 127 128 static int change_flags(struct vnode *, u_long, struct lwp *); 129 static int change_mode(struct vnode *, int, struct lwp *); 130 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 131 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 132 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 133 enum uio_seg); 134 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 135 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 136 enum uio_seg); 137 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 138 enum uio_seg, int); 139 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 140 size_t, register_t *); 141 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 142 143 static int fd_nameiat(struct lwp *, int, struct nameidata *); 144 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 145 namei_simple_flags_t, struct vnode **); 146 147 /* 148 * This table is used to maintain compatibility with 4.3BSD 149 * and NetBSD 0.9 mount syscalls - and possibly other systems. 150 * Note, the order is important! 151 * 152 * Do not modify this table. It should only contain filesystems 153 * supported by NetBSD 0.9 and 4.3BSD. 154 */ 155 const char * const mountcompatnames[] = { 156 NULL, /* 0 = MOUNT_NONE */ 157 MOUNT_FFS, /* 1 = MOUNT_UFS */ 158 MOUNT_NFS, /* 2 */ 159 MOUNT_MFS, /* 3 */ 160 MOUNT_MSDOS, /* 4 */ 161 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 162 MOUNT_FDESC, /* 6 */ 163 MOUNT_KERNFS, /* 7 */ 164 NULL, /* 8 = MOUNT_DEVFS */ 165 MOUNT_AFS, /* 9 */ 166 }; 167 168 const u_int nmountcompatnames = __arraycount(mountcompatnames); 169 170 /* 171 * Filter event method for EVFILT_FS. 172 */ 173 static struct klist fs_klist; 174 static kmutex_t fs_klist_lock; 175 176 CTASSERT((NOTE_SUBMIT & VQ_MOUNT) == 0); 177 CTASSERT((NOTE_SUBMIT & VQ_UNMOUNT) == 0); 178 179 void 180 vfs_evfilt_fs_init(void) 181 { 182 183 klist_init(&fs_klist); 184 mutex_init(&fs_klist_lock, MUTEX_DEFAULT, IPL_NONE); 185 } 186 187 static int 188 filt_fsattach(struct knote *kn) 189 { 190 191 mutex_enter(&fs_klist_lock); 192 kn->kn_flags |= EV_CLEAR; 193 klist_insert(&fs_klist, kn); 194 mutex_exit(&fs_klist_lock); 195 196 return 0; 197 } 198 199 static void 200 filt_fsdetach(struct knote *kn) 201 { 202 203 mutex_enter(&fs_klist_lock); 204 klist_remove(&fs_klist, kn); 205 mutex_exit(&fs_klist_lock); 206 } 207 208 static int 209 filt_fs(struct knote *kn, long hint) 210 { 211 int rv; 212 213 if (hint & NOTE_SUBMIT) { 214 KASSERT(mutex_owned(&fs_klist_lock)); 215 kn->kn_fflags |= hint & ~NOTE_SUBMIT; 216 } else { 217 mutex_enter(&fs_klist_lock); 218 } 219 220 rv = (kn->kn_fflags != 0); 221 222 if ((hint & NOTE_SUBMIT) == 0) { 223 mutex_exit(&fs_klist_lock); 224 } 225 226 return rv; 227 } 228 229 /* referenced in kern_event.c */ 230 const struct filterops fs_filtops = { 231 .f_flags = FILTEROP_MPSAFE, 232 .f_attach = filt_fsattach, 233 .f_detach = filt_fsdetach, 234 .f_event = filt_fs, 235 }; 236 237 static int 238 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 239 { 240 file_t *dfp; 241 int error; 242 const char *path = pathbuf_stringcopy_get(ndp->ni_pathbuf); 243 244 if (fdat != AT_FDCWD && path[0] != '/') { 245 if ((error = fd_getvnode(fdat, &dfp)) != 0) 246 goto out; 247 248 NDAT(ndp, dfp->f_vnode); 249 } 250 251 error = namei(ndp); 252 253 if (fdat != AT_FDCWD && path[0] != '/') 254 fd_putfile(fdat); 255 out: 256 pathbuf_stringcopy_put(ndp->ni_pathbuf, path); 257 return error; 258 } 259 260 static int 261 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 262 namei_simple_flags_t sflags, struct vnode **vp_ret) 263 { 264 file_t *dfp; 265 struct vnode *dvp; 266 int error; 267 struct pathbuf *pb; 268 const char *p; 269 270 error = pathbuf_copyin(path, &pb); 271 if (error) { 272 return error; 273 } 274 p = pathbuf_stringcopy_get(pb); 275 276 if (fdat != AT_FDCWD && p[0] != '/') { 277 if ((error = fd_getvnode(fdat, &dfp)) != 0) 278 goto out; 279 280 dvp = dfp->f_vnode; 281 } else { 282 dvp = NULL; 283 } 284 285 error = nameiat_simple(dvp, pb, sflags, vp_ret); 286 287 if (fdat != AT_FDCWD && p[0] != '/') 288 fd_putfile(fdat); 289 290 out: 291 pathbuf_stringcopy_put(pb, p); 292 pathbuf_destroy(pb); 293 294 return error; 295 } 296 297 static int 298 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 299 { 300 int error; 301 302 fp->f_flag = flags & FMASK; 303 fp->f_type = DTYPE_VNODE; 304 fp->f_ops = &vnops; 305 fp->f_vnode = vp; 306 307 if (flags & (O_EXLOCK | O_SHLOCK)) { 308 struct flock lf; 309 int type; 310 311 lf.l_whence = SEEK_SET; 312 lf.l_start = 0; 313 lf.l_len = 0; 314 if (flags & O_EXLOCK) 315 lf.l_type = F_WRLCK; 316 else 317 lf.l_type = F_RDLCK; 318 type = F_FLOCK; 319 if ((flags & FNONBLOCK) == 0) 320 type |= F_WAIT; 321 VOP_UNLOCK(vp); 322 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 323 if (error) { 324 (void) vn_close(vp, fp->f_flag, fp->f_cred); 325 fd_abort(l->l_proc, fp, indx); 326 return error; 327 } 328 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 329 atomic_or_uint(&fp->f_flag, FHASLOCK); 330 } 331 if (flags & O_CLOEXEC) 332 fd_set_exclose(l, indx, true); 333 if (flags & O_CLOFORK) 334 fd_set_foclose(l, indx, true); 335 return 0; 336 } 337 338 static int 339 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 340 void *data, size_t *data_len) 341 { 342 struct mount *mp; 343 int error = 0, saved_flags; 344 345 mp = vp->v_mount; 346 saved_flags = mp->mnt_flag; 347 348 /* We can operate only on VV_ROOT nodes. */ 349 if ((vp->v_vflag & VV_ROOT) == 0) { 350 error = EINVAL; 351 goto out; 352 } 353 354 /* 355 * We only allow the filesystem to be reloaded if it 356 * is currently mounted read-only. Additionally, we 357 * prevent read-write to read-only downgrades. 358 */ 359 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 360 (mp->mnt_flag & MNT_RDONLY) == 0 && 361 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 362 error = EOPNOTSUPP; /* Needs translation */ 363 goto out; 364 } 365 366 /* 367 * Enabling MNT_UNION requires a covered mountpoint and 368 * must not happen on the root mount. 369 */ 370 if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) { 371 error = EOPNOTSUPP; 372 goto out; 373 } 374 375 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 376 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 377 if (error) 378 goto out; 379 380 error = vfs_suspend(mp, 0); 381 if (error) 382 goto out; 383 384 mutex_enter(mp->mnt_updating); 385 386 mp->mnt_flag &= ~MNT_OP_FLAGS; 387 mp->mnt_flag |= flags & MNT_OP_FLAGS; 388 389 /* 390 * Set the mount level flags. 391 */ 392 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) { 393 if ((flags & MNT_RDONLY)) 394 mp->mnt_iflag |= IMNT_WANTRDONLY; 395 else 396 mp->mnt_iflag |= IMNT_WANTRDWR; 397 } 398 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 399 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 400 if ((mp->mnt_iflag & IMNT_WANTRDONLY)) 401 mp->mnt_flag &= ~MNT_RDONLY; 402 403 error = VFS_MOUNT(mp, path, data, data_len); 404 405 if (error && data != NULL) { 406 int error2; 407 408 /* 409 * Update failed; let's try and see if it was an 410 * export request. For compat with 3.0 and earlier. 411 */ 412 error2 = vfs_hooks_reexport(mp, path, data); 413 414 /* 415 * Only update error code if the export request was 416 * understood but some problem occurred while 417 * processing it. 418 */ 419 if (error2 != EJUSTRETURN) 420 error = error2; 421 } 422 423 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) 424 mp->mnt_flag |= MNT_RDONLY; 425 if (error) 426 mp->mnt_flag = saved_flags; 427 mp->mnt_flag &= ~MNT_OP_FLAGS; 428 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR); 429 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 430 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0) 431 vfs_syncer_add_to_worklist(mp); 432 } else { 433 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0) 434 vfs_syncer_remove_from_worklist(mp); 435 } 436 mutex_exit(mp->mnt_updating); 437 vfs_resume(mp); 438 439 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 440 (flags & MNT_EXTATTR)) { 441 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 442 NULL, 0, NULL) != 0) { 443 printf("%s: failed to start extattr, error = %d", 444 mp->mnt_stat.f_mntonname, error); 445 mp->mnt_flag &= ~MNT_EXTATTR; 446 } 447 } 448 449 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 450 !(flags & MNT_EXTATTR)) { 451 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 452 NULL, 0, NULL) != 0) { 453 printf("%s: failed to stop extattr, error = %d", 454 mp->mnt_stat.f_mntonname, error); 455 mp->mnt_flag |= MNT_RDONLY; 456 } 457 } 458 out: 459 return (error); 460 } 461 462 static int 463 mount_get_vfsops(const char *fstype, enum uio_seg type_seg, 464 struct vfsops **vfsops) 465 { 466 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 467 int error; 468 469 if (type_seg == UIO_USERSPACE) { 470 /* Copy file-system type from userspace. */ 471 error = copyinstr(fstype, fstypename, sizeof(fstypename), 472 NULL); 473 } else { 474 error = copystr(fstype, fstypename, sizeof(fstypename), NULL); 475 KASSERT(error == 0); 476 } 477 478 if (error) { 479 /* 480 * Historically, filesystem types were identified by numbers. 481 * If we get an integer for the filesystem type instead of a 482 * string, we check to see if it matches one of the historic 483 * filesystem types. 484 */ 485 u_long fsindex = (u_long)fstype; 486 if (fsindex >= nmountcompatnames || 487 mountcompatnames[fsindex] == NULL) 488 return ENODEV; 489 strlcpy(fstypename, mountcompatnames[fsindex], 490 sizeof(fstypename)); 491 } 492 493 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 494 if (strcmp(fstypename, "ufs") == 0) 495 fstypename[0] = 'f'; 496 497 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 498 return 0; 499 500 /* If we can autoload a vfs module, try again */ 501 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 502 503 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 504 return 0; 505 506 return ENODEV; 507 } 508 509 static int 510 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 511 void *data, size_t *data_len) 512 { 513 struct mount *mp; 514 int error; 515 516 /* If MNT_GETARGS is specified, it should be the only flag. */ 517 if (flags & ~MNT_GETARGS) 518 return EINVAL; 519 520 mp = vp->v_mount; 521 522 /* XXX: probably some notion of "can see" here if we want isolation. */ 523 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 524 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 525 if (error) 526 return error; 527 528 if ((vp->v_vflag & VV_ROOT) == 0) 529 return EINVAL; 530 531 if (vfs_busy(mp)) 532 return EPERM; 533 534 mutex_enter(mp->mnt_updating); 535 mp->mnt_flag &= ~MNT_OP_FLAGS; 536 mp->mnt_flag |= MNT_GETARGS; 537 error = VFS_MOUNT(mp, path, data, data_len); 538 mp->mnt_flag &= ~MNT_OP_FLAGS; 539 mutex_exit(mp->mnt_updating); 540 541 vfs_unbusy(mp); 542 return (error); 543 } 544 545 int 546 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, 547 register_t *retval) 548 { 549 /* { 550 syscallarg(const char *) type; 551 syscallarg(const char *) path; 552 syscallarg(int) flags; 553 syscallarg(void *) data; 554 syscallarg(size_t) data_len; 555 } */ 556 557 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, 558 SCARG(uap, path), SCARG(uap, flags), 559 SCARG(uap, data), UIO_USERSPACE, SCARG(uap, data_len), 560 retval); 561 } 562 563 int 564 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg, 565 const char *path, int flags, 566 void *data, enum uio_seg data_seg, size_t data_len, 567 register_t *retval) 568 { 569 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */ 570 struct vnode *vp; 571 void *data_buf = data; 572 bool vfsopsrele = false; 573 size_t alloc_sz = 0; 574 int error; 575 576 /* 577 * Get vnode to be covered 578 */ 579 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 580 if (error != 0) { 581 vp = NULL; 582 goto done; 583 } 584 585 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 586 vfsops = vp->v_mount->mnt_op; 587 } else { 588 /* 'type' is userspace */ 589 error = mount_get_vfsops(type, type_seg, &vfsops); 590 if (error != 0) 591 goto done; 592 vfsopsrele = true; 593 } 594 595 /* 596 * We allow data to be NULL, even for userspace. Some fs's don't need 597 * it. The others will handle NULL. 598 */ 599 if (data != NULL && data_seg == UIO_USERSPACE) { 600 if (data_len == 0) { 601 /* No length supplied, use default for filesystem */ 602 data_len = vfsops->vfs_min_mount_data; 603 604 /* 605 * Hopefully a longer buffer won't make copyin() fail. 606 * For compatibility with 3.0 and earlier. 607 */ 608 if (flags & MNT_UPDATE 609 && data_len < sizeof (struct mnt_export_args30)) 610 data_len = sizeof (struct mnt_export_args30); 611 } 612 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 613 error = EINVAL; 614 goto done; 615 } 616 alloc_sz = data_len; 617 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 618 619 /* NFS needs the buffer even for mnt_getargs .... */ 620 error = copyin(data, data_buf, data_len); 621 if (error != 0) 622 goto done; 623 } 624 625 if (flags & MNT_GETARGS) { 626 if (data_len == 0) { 627 error = EINVAL; 628 goto done; 629 } 630 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 631 if (error != 0) 632 goto done; 633 if (data_seg == UIO_USERSPACE) 634 error = copyout(data_buf, data, data_len); 635 *retval = data_len; 636 } else if (flags & MNT_UPDATE) { 637 error = mount_update(l, vp, path, flags, data_buf, &data_len); 638 } else { 639 /* Locking is handled internally in mount_domount(). */ 640 KASSERT(vfsopsrele == true); 641 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 642 &data_len); 643 vfsopsrele = false; 644 } 645 if (!error) { 646 mutex_enter(&fs_klist_lock); 647 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_MOUNT); 648 mutex_exit(&fs_klist_lock); 649 } 650 651 done: 652 if (vfsopsrele) 653 vfs_delref(vfsops); 654 if (vp != NULL) { 655 vrele(vp); 656 } 657 if (data_buf != data) 658 kmem_free(data_buf, alloc_sz); 659 return (error); 660 } 661 662 /* 663 * Unmount a file system. 664 * 665 * Note: unmount takes a path to the vnode mounted on as argument, 666 * not special file (as before). 667 */ 668 /* ARGSUSED */ 669 int 670 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, 671 register_t *retval) 672 { 673 /* { 674 syscallarg(const char *) path; 675 syscallarg(int) flags; 676 } */ 677 struct vnode *vp; 678 struct mount *mp; 679 int error; 680 struct pathbuf *pb; 681 struct nameidata nd; 682 683 error = pathbuf_copyin(SCARG(uap, path), &pb); 684 if (error) { 685 return error; 686 } 687 688 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 689 if ((error = namei(&nd)) != 0) { 690 pathbuf_destroy(pb); 691 return error; 692 } 693 vp = nd.ni_vp; 694 pathbuf_destroy(pb); 695 696 mp = vp->v_mount; 697 vfs_ref(mp); 698 VOP_UNLOCK(vp); 699 700 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 701 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 702 if (error) { 703 vrele(vp); 704 vfs_rele(mp); 705 return (error); 706 } 707 708 /* 709 * Don't allow unmounting the root file system. 710 */ 711 if (mp->mnt_flag & MNT_ROOTFS) { 712 vrele(vp); 713 vfs_rele(mp); 714 return (EINVAL); 715 } 716 717 /* 718 * Must be the root of the filesystem 719 */ 720 if ((vp->v_vflag & VV_ROOT) == 0) { 721 vrele(vp); 722 vfs_rele(mp); 723 return (EINVAL); 724 } 725 726 vrele(vp); 727 error = dounmount(mp, SCARG(uap, flags), l); 728 vfs_rele(mp); 729 if (!error) { 730 mutex_enter(&fs_klist_lock); 731 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_UNMOUNT); 732 mutex_exit(&fs_klist_lock); 733 } 734 return error; 735 } 736 737 /* 738 * Sync each mounted filesystem. 739 */ 740 #ifdef DEBUG 741 int syncprt = 0; 742 struct ctldebug debug0 = { "syncprt", &syncprt }; 743 #endif 744 745 void 746 do_sys_sync(struct lwp *l) 747 { 748 mount_iterator_t *iter; 749 struct mount *mp; 750 int asyncflag; 751 752 mountlist_iterator_init(&iter); 753 while ((mp = mountlist_iterator_next(iter)) != NULL) { 754 mutex_enter(mp->mnt_updating); 755 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 756 /* 757 * Temporarily clear the MNT_ASYNC flags so that 758 * bwrite() doesnt convert the sync writes to 759 * delayed writes. 760 */ 761 asyncflag = mp->mnt_flag & MNT_ASYNC; 762 mp->mnt_flag &= ~MNT_ASYNC; 763 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 764 mp->mnt_flag |= asyncflag; 765 } 766 mutex_exit(mp->mnt_updating); 767 } 768 mountlist_iterator_destroy(iter); 769 #ifdef DEBUG 770 if (syncprt) 771 vfs_bufstats(); 772 #endif /* DEBUG */ 773 } 774 775 static bool 776 sync_vnode_filter(void *cookie, vnode_t *vp) 777 { 778 779 if (vp->v_numoutput > 0) { 780 ++*(int *)cookie; 781 } 782 return false; 783 } 784 785 int 786 vfs_syncwait(void) 787 { 788 int nbusy, nbusy_prev, iter; 789 struct vnode_iterator *vniter; 790 mount_iterator_t *mpiter; 791 struct mount *mp; 792 793 for (nbusy_prev = 0, iter = 0; iter < 20;) { 794 nbusy = 0; 795 mountlist_iterator_init(&mpiter); 796 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 797 vnode_t *vp __diagused; 798 vfs_vnode_iterator_init(mp, &vniter); 799 vp = vfs_vnode_iterator_next(vniter, 800 sync_vnode_filter, &nbusy); 801 KASSERT(vp == NULL); 802 vfs_vnode_iterator_destroy(vniter); 803 } 804 mountlist_iterator_destroy(mpiter); 805 806 if (nbusy == 0) 807 break; 808 if (nbusy_prev == 0) 809 nbusy_prev = nbusy; 810 printf("%d ", nbusy); 811 kpause("syncwait", false, MAX(1, hz / 25 * iter), NULL); 812 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 813 iter++; 814 else 815 nbusy_prev = nbusy; 816 } 817 818 if (nbusy) { 819 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 820 printf("giving up\nPrinting vnodes for busy buffers\n"); 821 mountlist_iterator_init(&mpiter); 822 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 823 vnode_t *vp; 824 vfs_vnode_iterator_init(mp, &vniter); 825 vp = vfs_vnode_iterator_next(vniter, 826 NULL, NULL); 827 mutex_enter(vp->v_interlock); 828 if (vp->v_numoutput > 0) 829 vprint(NULL, vp); 830 mutex_exit(vp->v_interlock); 831 vrele(vp); 832 vfs_vnode_iterator_destroy(vniter); 833 } 834 mountlist_iterator_destroy(mpiter); 835 #endif 836 } 837 838 return nbusy; 839 } 840 841 /* ARGSUSED */ 842 int 843 sys_sync(struct lwp *l, const void *v, register_t *retval) 844 { 845 846 do_sys_sync(l); 847 return (0); 848 } 849 850 /* 851 * Access or change filesystem quotas. 852 * 853 * (this is really 14 different calls bundled into one) 854 */ 855 856 static int 857 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 858 { 859 struct quotastat info_k; 860 int error; 861 862 /* ensure any padding bytes are cleared */ 863 memset(&info_k, 0, sizeof(info_k)); 864 865 error = vfs_quotactl_stat(mp, &info_k); 866 if (error) { 867 return error; 868 } 869 870 return copyout(&info_k, info_u, sizeof(info_k)); 871 } 872 873 static int 874 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 875 struct quotaidtypestat *info_u) 876 { 877 struct quotaidtypestat info_k; 878 int error; 879 880 /* ensure any padding bytes are cleared */ 881 memset(&info_k, 0, sizeof(info_k)); 882 883 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 884 if (error) { 885 return error; 886 } 887 888 return copyout(&info_k, info_u, sizeof(info_k)); 889 } 890 891 static int 892 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 893 struct quotaobjtypestat *info_u) 894 { 895 struct quotaobjtypestat info_k; 896 int error; 897 898 /* ensure any padding bytes are cleared */ 899 memset(&info_k, 0, sizeof(info_k)); 900 901 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 902 if (error) { 903 return error; 904 } 905 906 return copyout(&info_k, info_u, sizeof(info_k)); 907 } 908 909 static int 910 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 911 struct quotaval *val_u) 912 { 913 struct quotakey key_k; 914 struct quotaval val_k; 915 int error; 916 917 /* ensure any padding bytes are cleared */ 918 memset(&val_k, 0, sizeof(val_k)); 919 920 error = copyin(key_u, &key_k, sizeof(key_k)); 921 if (error) { 922 return error; 923 } 924 925 error = vfs_quotactl_get(mp, &key_k, &val_k); 926 if (error) { 927 return error; 928 } 929 930 return copyout(&val_k, val_u, sizeof(val_k)); 931 } 932 933 static int 934 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 935 const struct quotaval *val_u) 936 { 937 struct quotakey key_k; 938 struct quotaval val_k; 939 int error; 940 941 error = copyin(key_u, &key_k, sizeof(key_k)); 942 if (error) { 943 return error; 944 } 945 946 error = copyin(val_u, &val_k, sizeof(val_k)); 947 if (error) { 948 return error; 949 } 950 951 return vfs_quotactl_put(mp, &key_k, &val_k); 952 } 953 954 static int 955 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 956 { 957 struct quotakey key_k; 958 int error; 959 960 error = copyin(key_u, &key_k, sizeof(key_k)); 961 if (error) { 962 return error; 963 } 964 965 return vfs_quotactl_del(mp, &key_k); 966 } 967 968 static int 969 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 970 { 971 struct quotakcursor cursor_k; 972 int error; 973 974 /* ensure any padding bytes are cleared */ 975 memset(&cursor_k, 0, sizeof(cursor_k)); 976 977 error = vfs_quotactl_cursoropen(mp, &cursor_k); 978 if (error) { 979 return error; 980 } 981 982 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 983 } 984 985 static int 986 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 987 { 988 struct quotakcursor cursor_k; 989 int error; 990 991 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 992 if (error) { 993 return error; 994 } 995 996 return vfs_quotactl_cursorclose(mp, &cursor_k); 997 } 998 999 static int 1000 do_sys_quotactl_cursorskipidtype(struct mount *mp, 1001 struct quotakcursor *cursor_u, int idtype) 1002 { 1003 struct quotakcursor cursor_k; 1004 int error; 1005 1006 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1007 if (error) { 1008 return error; 1009 } 1010 1011 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 1012 if (error) { 1013 return error; 1014 } 1015 1016 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1017 } 1018 1019 static int 1020 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 1021 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 1022 unsigned *ret_u) 1023 { 1024 #define CGET_STACK_MAX 8 1025 struct quotakcursor cursor_k; 1026 struct quotakey stackkeys[CGET_STACK_MAX]; 1027 struct quotaval stackvals[CGET_STACK_MAX]; 1028 struct quotakey *keys_k; 1029 struct quotaval *vals_k; 1030 unsigned ret_k; 1031 int error; 1032 1033 if (maxnum > 128) { 1034 maxnum = 128; 1035 } 1036 1037 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1038 if (error) { 1039 return error; 1040 } 1041 1042 if (maxnum <= CGET_STACK_MAX) { 1043 keys_k = stackkeys; 1044 vals_k = stackvals; 1045 /* ensure any padding bytes are cleared */ 1046 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 1047 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 1048 } else { 1049 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 1050 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 1051 } 1052 1053 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 1054 &ret_k); 1055 if (error) { 1056 goto fail; 1057 } 1058 1059 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 1060 if (error) { 1061 goto fail; 1062 } 1063 1064 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 1065 if (error) { 1066 goto fail; 1067 } 1068 1069 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1070 if (error) { 1071 goto fail; 1072 } 1073 1074 /* do last to maximize the chance of being able to recover a failure */ 1075 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1076 1077 fail: 1078 if (keys_k != stackkeys) { 1079 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 1080 } 1081 if (vals_k != stackvals) { 1082 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 1083 } 1084 return error; 1085 } 1086 1087 static int 1088 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 1089 int *ret_u) 1090 { 1091 struct quotakcursor cursor_k; 1092 int ret_k; 1093 int error; 1094 1095 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1096 if (error) { 1097 return error; 1098 } 1099 1100 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 1101 if (error) { 1102 return error; 1103 } 1104 1105 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1106 if (error) { 1107 return error; 1108 } 1109 1110 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1111 } 1112 1113 static int 1114 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 1115 { 1116 struct quotakcursor cursor_k; 1117 int error; 1118 1119 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1120 if (error) { 1121 return error; 1122 } 1123 1124 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 1125 if (error) { 1126 return error; 1127 } 1128 1129 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1130 } 1131 1132 static int 1133 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 1134 { 1135 char *path_k; 1136 int error; 1137 1138 /* XXX this should probably be a struct pathbuf */ 1139 path_k = PNBUF_GET(); 1140 error = copyin(path_u, path_k, PATH_MAX); 1141 if (error) { 1142 PNBUF_PUT(path_k); 1143 return error; 1144 } 1145 1146 error = vfs_quotactl_quotaon(mp, idtype, path_k); 1147 1148 PNBUF_PUT(path_k); 1149 return error; 1150 } 1151 1152 static int 1153 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 1154 { 1155 1156 return vfs_quotactl_quotaoff(mp, idtype); 1157 } 1158 1159 int 1160 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 1161 { 1162 struct mount *mp; 1163 struct vnode *vp; 1164 int error; 1165 1166 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 1167 if (error != 0) 1168 return (error); 1169 mp = vp->v_mount; 1170 1171 switch (args->qc_op) { 1172 case QUOTACTL_STAT: 1173 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 1174 break; 1175 case QUOTACTL_IDTYPESTAT: 1176 error = do_sys_quotactl_idtypestat(mp, 1177 args->u.idtypestat.qc_idtype, 1178 args->u.idtypestat.qc_info); 1179 break; 1180 case QUOTACTL_OBJTYPESTAT: 1181 error = do_sys_quotactl_objtypestat(mp, 1182 args->u.objtypestat.qc_objtype, 1183 args->u.objtypestat.qc_info); 1184 break; 1185 case QUOTACTL_GET: 1186 error = do_sys_quotactl_get(mp, 1187 args->u.get.qc_key, 1188 args->u.get.qc_val); 1189 break; 1190 case QUOTACTL_PUT: 1191 error = do_sys_quotactl_put(mp, 1192 args->u.put.qc_key, 1193 args->u.put.qc_val); 1194 break; 1195 case QUOTACTL_DEL: 1196 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1197 break; 1198 case QUOTACTL_CURSOROPEN: 1199 error = do_sys_quotactl_cursoropen(mp, 1200 args->u.cursoropen.qc_cursor); 1201 break; 1202 case QUOTACTL_CURSORCLOSE: 1203 error = do_sys_quotactl_cursorclose(mp, 1204 args->u.cursorclose.qc_cursor); 1205 break; 1206 case QUOTACTL_CURSORSKIPIDTYPE: 1207 error = do_sys_quotactl_cursorskipidtype(mp, 1208 args->u.cursorskipidtype.qc_cursor, 1209 args->u.cursorskipidtype.qc_idtype); 1210 break; 1211 case QUOTACTL_CURSORGET: 1212 error = do_sys_quotactl_cursorget(mp, 1213 args->u.cursorget.qc_cursor, 1214 args->u.cursorget.qc_keys, 1215 args->u.cursorget.qc_vals, 1216 args->u.cursorget.qc_maxnum, 1217 args->u.cursorget.qc_ret); 1218 break; 1219 case QUOTACTL_CURSORATEND: 1220 error = do_sys_quotactl_cursoratend(mp, 1221 args->u.cursoratend.qc_cursor, 1222 args->u.cursoratend.qc_ret); 1223 break; 1224 case QUOTACTL_CURSORREWIND: 1225 error = do_sys_quotactl_cursorrewind(mp, 1226 args->u.cursorrewind.qc_cursor); 1227 break; 1228 case QUOTACTL_QUOTAON: 1229 error = do_sys_quotactl_quotaon(mp, 1230 args->u.quotaon.qc_idtype, 1231 args->u.quotaon.qc_quotafile); 1232 break; 1233 case QUOTACTL_QUOTAOFF: 1234 error = do_sys_quotactl_quotaoff(mp, 1235 args->u.quotaoff.qc_idtype); 1236 break; 1237 default: 1238 error = EINVAL; 1239 break; 1240 } 1241 1242 vrele(vp); 1243 return error; 1244 } 1245 1246 /* ARGSUSED */ 1247 int 1248 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1249 register_t *retval) 1250 { 1251 /* { 1252 syscallarg(const char *) path; 1253 syscallarg(struct quotactl_args *) args; 1254 } */ 1255 struct quotactl_args args; 1256 int error; 1257 1258 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1259 if (error) { 1260 return error; 1261 } 1262 1263 return do_sys_quotactl(SCARG(uap, path), &args); 1264 } 1265 1266 int 1267 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1268 int root) 1269 { 1270 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1271 bool chrooted; 1272 int error = 0; 1273 1274 KASSERT(l == curlwp); 1275 1276 /* 1277 * This is safe unlocked. cwdi_rdir never goes non-NULL -> NULL, 1278 * since it would imply chroots can be escaped. Just make sure this 1279 * routine is self-consistent. 1280 */ 1281 chrooted = (atomic_load_relaxed(&cwdi->cwdi_rdir) != NULL); 1282 1283 /* 1284 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1285 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1286 * overrides MNT_NOWAIT. 1287 */ 1288 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1289 (flags != MNT_WAIT && flags != 0)) { 1290 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1291 } else { 1292 /* Get the filesystem stats now */ 1293 memset(sp, 0, sizeof(*sp)); 1294 if ((error = VFS_STATVFS(mp, sp)) != 0) 1295 return error; 1296 if (!chrooted) 1297 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1298 } 1299 1300 if (chrooted) { 1301 size_t len; 1302 char *bp; 1303 char c; 1304 char *path = PNBUF_GET(); 1305 1306 bp = path + MAXPATHLEN; 1307 *--bp = '\0'; 1308 rw_enter(&cwdi->cwdi_lock, RW_READER); 1309 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1310 MAXPATHLEN / 2, 0, l); 1311 rw_exit(&cwdi->cwdi_lock); 1312 if (error) { 1313 PNBUF_PUT(path); 1314 return error; 1315 } 1316 len = strlen(bp); 1317 if (len != 1) { 1318 /* 1319 * for mount points that are below our root, we can see 1320 * them, so we fix up the pathname and return them. The 1321 * rest we cannot see, so we don't allow viewing the 1322 * data. 1323 */ 1324 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1325 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1326 (void)strlcpy(sp->f_mntonname, 1327 c == '\0' ? "/" : &sp->f_mntonname[len], 1328 sizeof(sp->f_mntonname)); 1329 } else { 1330 if (root) 1331 (void)strlcpy(sp->f_mntonname, "/", 1332 sizeof(sp->f_mntonname)); 1333 else 1334 error = EPERM; 1335 } 1336 } 1337 PNBUF_PUT(path); 1338 } 1339 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1340 return error; 1341 } 1342 1343 /* 1344 * Get filesystem statistics by path. 1345 */ 1346 int 1347 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1348 { 1349 struct mount *mp; 1350 int error; 1351 struct vnode *vp; 1352 1353 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1354 if (error != 0) 1355 return error; 1356 mp = vp->v_mount; 1357 error = dostatvfs(mp, sb, l, flags, 1); 1358 vrele(vp); 1359 return error; 1360 } 1361 1362 /* ARGSUSED */ 1363 int 1364 sys___statvfs190(struct lwp *l, const struct sys___statvfs190_args *uap, 1365 register_t *retval) 1366 { 1367 /* { 1368 syscallarg(const char *) path; 1369 syscallarg(struct statvfs *) buf; 1370 syscallarg(int) flags; 1371 } */ 1372 struct statvfs *sb; 1373 int error; 1374 1375 sb = STATVFSBUF_GET(); 1376 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1377 if (error == 0) 1378 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1379 STATVFSBUF_PUT(sb); 1380 return error; 1381 } 1382 1383 /* 1384 * Get filesystem statistics by fd. 1385 */ 1386 int 1387 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1388 { 1389 file_t *fp; 1390 struct mount *mp; 1391 int error; 1392 1393 /* fd_getvnode() will use the descriptor for us */ 1394 if ((error = fd_getvnode(fd, &fp)) != 0) 1395 return (error); 1396 mp = fp->f_vnode->v_mount; 1397 error = dostatvfs(mp, sb, curlwp, flags, 1); 1398 fd_putfile(fd); 1399 return error; 1400 } 1401 1402 /* ARGSUSED */ 1403 int 1404 sys___fstatvfs190(struct lwp *l, const struct sys___fstatvfs190_args *uap, 1405 register_t *retval) 1406 { 1407 /* { 1408 syscallarg(int) fd; 1409 syscallarg(struct statvfs *) buf; 1410 syscallarg(int) flags; 1411 } */ 1412 struct statvfs *sb; 1413 int error; 1414 1415 sb = STATVFSBUF_GET(); 1416 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1417 if (error == 0) 1418 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1419 STATVFSBUF_PUT(sb); 1420 return error; 1421 } 1422 1423 /* 1424 * Get statistics on all filesystems. 1425 */ 1426 int 1427 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1428 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1429 register_t *retval) 1430 { 1431 int root = 0; 1432 mount_iterator_t *iter; 1433 struct proc *p = l->l_proc; 1434 struct mount *mp; 1435 struct statvfs *sb; 1436 size_t count, maxcount; 1437 int error = 0; 1438 1439 sb = STATVFSBUF_GET(); 1440 maxcount = bufsize / entry_sz; 1441 count = 0; 1442 mountlist_iterator_init(&iter); 1443 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1444 if (sfsp && count < maxcount) { 1445 error = dostatvfs(mp, sb, l, flags, 0); 1446 if (error) { 1447 error = 0; 1448 continue; 1449 } 1450 error = copyfn(sb, sfsp, entry_sz); 1451 if (error) 1452 goto out; 1453 sfsp = (char *)sfsp + entry_sz; 1454 root |= strcmp(sb->f_mntonname, "/") == 0; 1455 } 1456 count++; 1457 } 1458 1459 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1460 /* 1461 * fake a root entry 1462 */ 1463 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1464 sb, l, flags, 1); 1465 if (error != 0) 1466 goto out; 1467 if (sfsp) { 1468 error = copyfn(sb, sfsp, entry_sz); 1469 if (error != 0) 1470 goto out; 1471 } 1472 count++; 1473 } 1474 if (sfsp && count > maxcount) 1475 *retval = maxcount; 1476 else 1477 *retval = count; 1478 out: 1479 mountlist_iterator_destroy(iter); 1480 STATVFSBUF_PUT(sb); 1481 return error; 1482 } 1483 1484 int 1485 sys___getvfsstat90(struct lwp *l, const struct sys___getvfsstat90_args *uap, 1486 register_t *retval) 1487 { 1488 /* { 1489 syscallarg(struct statvfs *) buf; 1490 syscallarg(size_t) bufsize; 1491 syscallarg(int) flags; 1492 } */ 1493 1494 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1495 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1496 } 1497 1498 /* 1499 * Change current working directory to a given file descriptor. 1500 */ 1501 int 1502 do_sys_fchdir(struct lwp *l, int fd, register_t *retval) 1503 { 1504 struct proc *p = l->l_proc; 1505 struct cwdinfo *cwdi; 1506 struct vnode *vp, *tdp; 1507 struct mount *mp; 1508 file_t *fp; 1509 int error; 1510 1511 /* fd_getvnode() will use the descriptor for us */ 1512 if ((error = fd_getvnode(fd, &fp)) != 0) 1513 return error; 1514 vp = fp->f_vnode; 1515 1516 vref(vp); 1517 vn_lock(vp, LK_SHARED | LK_RETRY); 1518 if (vp->v_type != VDIR) 1519 error = ENOTDIR; 1520 else 1521 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1522 if (error) { 1523 vput(vp); 1524 goto out; 1525 } 1526 while ((mp = vp->v_mountedhere) != NULL) { 1527 error = vfs_busy(mp); 1528 vput(vp); 1529 if (error != 0) 1530 goto out; 1531 error = VFS_ROOT(mp, LK_SHARED, &tdp); 1532 vfs_unbusy(mp); 1533 if (error) 1534 goto out; 1535 vp = tdp; 1536 } 1537 VOP_UNLOCK(vp); 1538 1539 /* 1540 * Disallow changing to a directory not under the process's 1541 * current root directory (if there is one). 1542 */ 1543 cwdi = p->p_cwdi; 1544 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1545 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1546 vrele(vp); 1547 error = EPERM; /* operation not permitted */ 1548 } else { 1549 vrele(cwdi->cwdi_cdir); 1550 cwdi->cwdi_cdir = vp; 1551 } 1552 rw_exit(&cwdi->cwdi_lock); 1553 1554 out: 1555 fd_putfile(fd); 1556 return error; 1557 } 1558 1559 /* 1560 * Change current working directory to a given file descriptor. 1561 */ 1562 /* ARGSUSED */ 1563 int 1564 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, 1565 register_t *retval) 1566 { 1567 /* { 1568 syscallarg(int) fd; 1569 } */ 1570 1571 return do_sys_fchdir(l, SCARG(uap, fd), retval); 1572 } 1573 1574 /* 1575 * Change this process's notion of the root directory to a given file 1576 * descriptor. 1577 */ 1578 int 1579 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, 1580 register_t *retval) 1581 { 1582 struct vnode *vp; 1583 file_t *fp; 1584 int error, fd = SCARG(uap, fd); 1585 1586 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1587 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1588 return error; 1589 /* fd_getvnode() will use the descriptor for us */ 1590 if ((error = fd_getvnode(fd, &fp)) != 0) 1591 return error; 1592 vp = fp->f_vnode; 1593 vn_lock(vp, LK_SHARED | LK_RETRY); 1594 if (vp->v_type != VDIR) 1595 error = ENOTDIR; 1596 else 1597 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1598 VOP_UNLOCK(vp); 1599 if (error) 1600 goto out; 1601 vref(vp); 1602 change_root(vp); 1603 1604 out: 1605 fd_putfile(fd); 1606 return (error); 1607 } 1608 1609 /* 1610 * Change current working directory (``.''). 1611 */ 1612 int 1613 do_sys_chdir(struct lwp *l, const char *path, enum uio_seg seg, 1614 register_t *retval) 1615 { 1616 struct proc *p = l->l_proc; 1617 struct cwdinfo * cwdi; 1618 int error; 1619 struct vnode *vp; 1620 1621 if ((error = chdir_lookup(path, seg, &vp, l)) != 0) 1622 return error; 1623 cwdi = p->p_cwdi; 1624 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1625 vrele(cwdi->cwdi_cdir); 1626 cwdi->cwdi_cdir = vp; 1627 rw_exit(&cwdi->cwdi_lock); 1628 return 0; 1629 } 1630 1631 /* 1632 * Change current working directory (``.''). 1633 */ 1634 /* ARGSUSED */ 1635 int 1636 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1637 { 1638 /* { 1639 syscallarg(const char *) path; 1640 } */ 1641 1642 return do_sys_chdir(l, SCARG(uap, path), UIO_USERSPACE, retval); 1643 } 1644 1645 /* 1646 * Change notion of root (``/'') directory. 1647 */ 1648 /* ARGSUSED */ 1649 int 1650 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, 1651 register_t *retval) 1652 { 1653 /* { 1654 syscallarg(const char *) path; 1655 } */ 1656 int error; 1657 struct vnode *vp; 1658 1659 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1660 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1661 return (error); 1662 1663 error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l); 1664 if (error == 0) 1665 change_root(vp); 1666 return error; 1667 } 1668 1669 /* 1670 * Common routine for chroot and fchroot. 1671 * NB: callers need to properly authorize the change root operation. 1672 */ 1673 void 1674 change_root(struct vnode *vp) 1675 { 1676 kauth_cred_t ncred; 1677 struct lwp *l = curlwp; 1678 struct proc *p = l->l_proc; 1679 struct cwdinfo *cwdi = p->p_cwdi; 1680 1681 ncred = kauth_cred_alloc(); 1682 1683 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1684 if (cwdi->cwdi_rdir != NULL) 1685 vrele(cwdi->cwdi_rdir); 1686 cwdi->cwdi_rdir = vp; 1687 1688 /* 1689 * Prevent escaping from chroot by putting the root under 1690 * the working directory. Silently chdir to / if we aren't 1691 * already there. 1692 */ 1693 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1694 /* 1695 * XXX would be more failsafe to change directory to a 1696 * deadfs node here instead 1697 */ 1698 vrele(cwdi->cwdi_cdir); 1699 vref(vp); 1700 cwdi->cwdi_cdir = vp; 1701 } 1702 rw_exit(&cwdi->cwdi_lock); 1703 1704 /* Get a write lock on the process credential. */ 1705 proc_crmod_enter(); 1706 1707 kauth_cred_clone(p->p_cred, ncred); 1708 kauth_proc_chroot(ncred, p->p_cwdi); 1709 1710 /* Broadcast our credentials to the process and other LWPs. */ 1711 proc_crmod_leave(ncred, p->p_cred, true); 1712 } 1713 1714 /* 1715 * Common routine for chroot and chdir. 1716 * XXX "where" should be enum uio_seg 1717 */ 1718 int 1719 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1720 { 1721 struct pathbuf *pb; 1722 struct nameidata nd; 1723 int error; 1724 1725 error = pathbuf_maybe_copyin(path, where, &pb); 1726 if (error) { 1727 return error; 1728 } 1729 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); 1730 if ((error = namei(&nd)) != 0) { 1731 pathbuf_destroy(pb); 1732 return error; 1733 } 1734 *vpp = nd.ni_vp; 1735 pathbuf_destroy(pb); 1736 1737 if ((*vpp)->v_type != VDIR) 1738 error = ENOTDIR; 1739 else 1740 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1741 1742 if (error) 1743 vput(*vpp); 1744 else 1745 VOP_UNLOCK(*vpp); 1746 return (error); 1747 } 1748 1749 /* 1750 * Internals of sys_open - path has already been converted into a pathbuf 1751 * (so we can easily reuse this function from other parts of the kernel, 1752 * like posix_spawn post-processing). 1753 */ 1754 int 1755 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1756 int open_mode, int *fd) 1757 { 1758 struct proc *p = l->l_proc; 1759 struct cwdinfo *cwdi = p->p_cwdi; 1760 file_t *fp; 1761 struct vnode *vp; 1762 int dupfd; 1763 bool dupfd_move; 1764 int flags, cmode; 1765 int indx, error; 1766 1767 if (open_flags & O_SEARCH) { 1768 open_flags &= ~(int)O_SEARCH; 1769 } 1770 1771 /* 1772 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1773 * may be specified. 1774 */ 1775 if ((open_flags & O_EXEC) && (open_flags & O_ACCMODE)) 1776 return EINVAL; 1777 1778 flags = FFLAGS(open_flags); 1779 if ((flags & (FREAD | FWRITE)) == 0) 1780 return EINVAL; 1781 1782 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1783 return error; 1784 } 1785 1786 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1787 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1788 1789 error = vn_open(dvp, pb, TRYEMULROOT, flags, cmode, 1790 &vp, &dupfd_move, &dupfd); 1791 if (error != 0) { 1792 fd_abort(p, fp, indx); 1793 return error; 1794 } 1795 1796 if (vp == NULL) { 1797 fd_abort(p, fp, indx); 1798 error = fd_dupopen(dupfd, dupfd_move, flags, &indx); 1799 if (error) 1800 return error; 1801 *fd = indx; 1802 } else { 1803 error = open_setfp(l, fp, vp, indx, flags); 1804 if (error) 1805 return error; 1806 VOP_UNLOCK(vp); 1807 *fd = indx; 1808 fd_affix(p, fp, indx); 1809 } 1810 1811 return 0; 1812 } 1813 1814 int 1815 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1816 { 1817 struct pathbuf *pb; 1818 int error, oflags; 1819 1820 oflags = FFLAGS(open_flags); 1821 if ((oflags & (FREAD | FWRITE)) == 0) 1822 return EINVAL; 1823 1824 pb = pathbuf_create(path); 1825 if (pb == NULL) 1826 return ENOMEM; 1827 1828 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1829 pathbuf_destroy(pb); 1830 1831 return error; 1832 } 1833 1834 static int 1835 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1836 int mode, int *fd) 1837 { 1838 file_t *dfp = NULL; 1839 struct vnode *dvp = NULL; 1840 struct pathbuf *pb; 1841 const char *pathstring = NULL; 1842 int error; 1843 1844 if (path == NULL) { 1845 MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error); 1846 if (error == ENOSYS) 1847 goto no_compat; 1848 if (error) 1849 return error; 1850 } else { 1851 no_compat: 1852 error = pathbuf_copyin(path, &pb); 1853 if (error) 1854 return error; 1855 } 1856 1857 pathstring = pathbuf_stringcopy_get(pb); 1858 1859 /* 1860 * fdat is ignored if: 1861 * 1) if fdat is AT_FDCWD, which means use current directory as base. 1862 * 2) if path is absolute, then fdat is useless. 1863 */ 1864 if (fdat != AT_FDCWD && pathstring[0] != '/') { 1865 /* fd_getvnode() will use the descriptor for us */ 1866 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1867 goto out; 1868 1869 dvp = dfp->f_vnode; 1870 } 1871 1872 error = do_open(l, dvp, pb, flags, mode, fd); 1873 1874 if (dfp != NULL) 1875 fd_putfile(fdat); 1876 out: 1877 pathbuf_stringcopy_put(pb, pathstring); 1878 pathbuf_destroy(pb); 1879 return error; 1880 } 1881 1882 int 1883 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1884 { 1885 /* { 1886 syscallarg(const char *) path; 1887 syscallarg(int) flags; 1888 syscallarg(int) mode; 1889 } */ 1890 int error; 1891 int fd; 1892 1893 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1894 SCARG(uap, flags), SCARG(uap, mode), &fd); 1895 1896 if (error == 0) 1897 *retval = fd; 1898 1899 return error; 1900 } 1901 1902 int 1903 sys_openat(struct lwp *l, const struct sys_openat_args *uap, 1904 register_t *retval) 1905 { 1906 /* { 1907 syscallarg(int) fd; 1908 syscallarg(const char *) path; 1909 syscallarg(int) oflags; 1910 syscallarg(int) mode; 1911 } */ 1912 int error; 1913 int fd; 1914 1915 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1916 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1917 1918 if (error == 0) 1919 *retval = fd; 1920 1921 return error; 1922 } 1923 1924 static void 1925 vfs__fhfree(fhandle_t *fhp) 1926 { 1927 size_t fhsize; 1928 1929 fhsize = FHANDLE_SIZE(fhp); 1930 kmem_free(fhp, fhsize); 1931 } 1932 1933 /* 1934 * vfs_composefh: compose a filehandle. 1935 */ 1936 1937 int 1938 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1939 { 1940 struct mount *mp; 1941 struct fid *fidp; 1942 int error; 1943 size_t needfhsize; 1944 size_t fidsize; 1945 1946 mp = vp->v_mount; 1947 fidp = NULL; 1948 if (*fh_size < FHANDLE_SIZE_MIN) { 1949 fidsize = 0; 1950 } else { 1951 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1952 if (fhp != NULL) { 1953 memset(fhp, 0, *fh_size); 1954 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1955 fidp = &fhp->fh_fid; 1956 } 1957 } 1958 error = VFS_VPTOFH(vp, fidp, &fidsize); 1959 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1960 if (error == 0 && *fh_size < needfhsize) { 1961 error = E2BIG; 1962 } 1963 *fh_size = needfhsize; 1964 return error; 1965 } 1966 1967 int 1968 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1969 { 1970 struct mount *mp; 1971 fhandle_t *fhp; 1972 size_t fhsize; 1973 size_t fidsize; 1974 int error; 1975 1976 mp = vp->v_mount; 1977 fidsize = 0; 1978 error = VFS_VPTOFH(vp, NULL, &fidsize); 1979 KASSERT(error != 0); 1980 if (error != E2BIG) { 1981 goto out; 1982 } 1983 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1984 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1985 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1986 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1987 if (error == 0) { 1988 KASSERT(FHANDLE_SIZE(fhp) == fhsize); 1989 KASSERT(FHANDLE_FILEID(fhp)->fid_len == fidsize); 1990 *fhpp = fhp; 1991 } else { 1992 kmem_free(fhp, fhsize); 1993 } 1994 out: 1995 return error; 1996 } 1997 1998 void 1999 vfs_composefh_free(fhandle_t *fhp) 2000 { 2001 2002 vfs__fhfree(fhp); 2003 } 2004 2005 /* 2006 * vfs_fhtovp: lookup a vnode by a filehandle. 2007 */ 2008 2009 int 2010 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 2011 { 2012 struct mount *mp; 2013 int error; 2014 2015 *vpp = NULL; 2016 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 2017 if (mp == NULL) { 2018 error = ESTALE; 2019 goto out; 2020 } 2021 if (mp->mnt_op->vfs_fhtovp == NULL) { 2022 error = EOPNOTSUPP; 2023 goto out; 2024 } 2025 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), LK_EXCLUSIVE, vpp); 2026 out: 2027 return error; 2028 } 2029 2030 /* 2031 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 2032 * the needed size. 2033 */ 2034 2035 int 2036 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 2037 { 2038 fhandle_t *fhp; 2039 int error; 2040 2041 if (fhsize > FHANDLE_SIZE_MAX) { 2042 return EINVAL; 2043 } 2044 if (fhsize < FHANDLE_SIZE_MIN) { 2045 return EINVAL; 2046 } 2047 again: 2048 fhp = kmem_alloc(fhsize, KM_SLEEP); 2049 error = copyin(ufhp, fhp, fhsize); 2050 if (error == 0) { 2051 /* XXX this check shouldn't be here */ 2052 if (FHANDLE_SIZE(fhp) == fhsize) { 2053 *fhpp = fhp; 2054 return 0; 2055 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 2056 /* 2057 * a kludge for nfsv2 padded handles. 2058 */ 2059 size_t sz; 2060 2061 sz = FHANDLE_SIZE(fhp); 2062 kmem_free(fhp, fhsize); 2063 fhsize = sz; 2064 goto again; 2065 } else { 2066 /* 2067 * userland told us wrong size. 2068 */ 2069 error = EINVAL; 2070 } 2071 } 2072 kmem_free(fhp, fhsize); 2073 return error; 2074 } 2075 2076 void 2077 vfs_copyinfh_free(fhandle_t *fhp) 2078 { 2079 2080 vfs__fhfree(fhp); 2081 } 2082 2083 /* 2084 * Get file handle system call 2085 */ 2086 int 2087 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, 2088 register_t *retval) 2089 { 2090 /* { 2091 syscallarg(char *) fname; 2092 syscallarg(fhandle_t *) fhp; 2093 syscallarg(size_t *) fh_size; 2094 } */ 2095 struct vnode *vp; 2096 fhandle_t *fh; 2097 int error; 2098 struct pathbuf *pb; 2099 struct nameidata nd; 2100 size_t sz; 2101 size_t usz; 2102 2103 /* 2104 * Must be super user 2105 */ 2106 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2107 0, NULL, NULL, NULL); 2108 if (error) 2109 return (error); 2110 2111 error = pathbuf_copyin(SCARG(uap, fname), &pb); 2112 if (error) { 2113 return error; 2114 } 2115 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2116 error = namei(&nd); 2117 if (error) { 2118 pathbuf_destroy(pb); 2119 return error; 2120 } 2121 vp = nd.ni_vp; 2122 pathbuf_destroy(pb); 2123 2124 error = vfs_composefh_alloc(vp, &fh); 2125 vput(vp); 2126 if (error != 0) { 2127 return error; 2128 } 2129 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 2130 if (error != 0) { 2131 goto out; 2132 } 2133 sz = FHANDLE_SIZE(fh); 2134 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 2135 if (error != 0) { 2136 goto out; 2137 } 2138 if (usz >= sz) { 2139 error = copyout(fh, SCARG(uap, fhp), sz); 2140 } else { 2141 error = E2BIG; 2142 } 2143 out: 2144 vfs_composefh_free(fh); 2145 return (error); 2146 } 2147 2148 /* 2149 * Open a file given a file handle. 2150 * 2151 * Check permissions, allocate an open file structure, 2152 * and call the device open routine if any. 2153 */ 2154 2155 int 2156 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 2157 register_t *retval) 2158 { 2159 file_t *fp; 2160 struct vnode *vp = NULL; 2161 kauth_cred_t cred = l->l_cred; 2162 file_t *nfp; 2163 int indx, error; 2164 struct vattr va; 2165 fhandle_t *fh; 2166 int flags; 2167 proc_t *p; 2168 2169 p = curproc; 2170 2171 /* 2172 * Must be super user 2173 */ 2174 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2175 0, NULL, NULL, NULL))) 2176 return (error); 2177 2178 if (oflags & O_SEARCH) { 2179 oflags &= ~(int)O_SEARCH; 2180 } 2181 2182 flags = FFLAGS(oflags); 2183 if ((flags & (FREAD | FWRITE)) == 0) 2184 return (EINVAL); 2185 if ((flags & O_CREAT)) 2186 return (EINVAL); 2187 if ((error = fd_allocfile(&nfp, &indx)) != 0) 2188 return (error); 2189 fp = nfp; 2190 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2191 if (error != 0) { 2192 goto bad; 2193 } 2194 error = vfs_fhtovp(fh, &vp); 2195 vfs_copyinfh_free(fh); 2196 if (error != 0) { 2197 goto bad; 2198 } 2199 2200 /* Now do an effective vn_open */ 2201 2202 if (vp->v_type == VSOCK) { 2203 error = EOPNOTSUPP; 2204 goto bad; 2205 } 2206 error = vn_openchk(vp, cred, flags); 2207 if (error != 0) 2208 goto bad; 2209 if (flags & O_TRUNC) { 2210 VOP_UNLOCK(vp); /* XXX */ 2211 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2212 vattr_null(&va); 2213 va.va_size = 0; 2214 error = VOP_SETATTR(vp, &va, cred); 2215 if (error) 2216 goto bad; 2217 } 2218 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2219 goto bad; 2220 if (flags & FWRITE) { 2221 mutex_enter(vp->v_interlock); 2222 vp->v_writecount++; 2223 mutex_exit(vp->v_interlock); 2224 } 2225 2226 /* done with modified vn_open, now finish what sys_open does. */ 2227 if ((error = open_setfp(l, fp, vp, indx, flags))) 2228 return error; 2229 2230 VOP_UNLOCK(vp); 2231 *retval = indx; 2232 fd_affix(p, fp, indx); 2233 return (0); 2234 2235 bad: 2236 fd_abort(p, fp, indx); 2237 if (vp != NULL) 2238 vput(vp); 2239 if (error == EDUPFD || error == EMOVEFD) { 2240 /* XXX should probably close curlwp->l_dupfd */ 2241 error = EOPNOTSUPP; 2242 } 2243 return (error); 2244 } 2245 2246 int 2247 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, 2248 register_t *retval) 2249 { 2250 /* { 2251 syscallarg(const void *) fhp; 2252 syscallarg(size_t) fh_size; 2253 syscallarg(int) flags; 2254 } */ 2255 2256 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2257 SCARG(uap, flags), retval); 2258 } 2259 2260 int 2261 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2262 { 2263 int error; 2264 fhandle_t *fh; 2265 struct vnode *vp; 2266 2267 /* 2268 * Must be super user 2269 */ 2270 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2271 0, NULL, NULL, NULL))) 2272 return error; 2273 2274 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2275 if (error != 0) 2276 return error; 2277 2278 error = vfs_fhtovp(fh, &vp); 2279 vfs_copyinfh_free(fh); 2280 if (error != 0) 2281 return error; 2282 2283 error = vn_stat(vp, sb); 2284 vput(vp); 2285 return error; 2286 } 2287 2288 /* ARGSUSED */ 2289 int 2290 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, 2291 register_t *retval) 2292 { 2293 /* { 2294 syscallarg(const void *) fhp; 2295 syscallarg(size_t) fh_size; 2296 syscallarg(struct stat *) sb; 2297 } */ 2298 struct stat sb; 2299 int error; 2300 2301 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2302 if (error) 2303 return error; 2304 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2305 } 2306 2307 int 2308 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, 2309 struct statvfs *sb, int flags) 2310 { 2311 fhandle_t *fh; 2312 struct mount *mp; 2313 struct vnode *vp; 2314 int error; 2315 2316 /* 2317 * Must be super user 2318 */ 2319 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2320 0, NULL, NULL, NULL))) 2321 return error; 2322 2323 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2324 if (error != 0) 2325 return error; 2326 2327 error = vfs_fhtovp(fh, &vp); 2328 vfs_copyinfh_free(fh); 2329 if (error != 0) 2330 return error; 2331 2332 mp = vp->v_mount; 2333 error = dostatvfs(mp, sb, l, flags, 1); 2334 vput(vp); 2335 return error; 2336 } 2337 2338 /* ARGSUSED */ 2339 int 2340 sys___fhstatvfs190(struct lwp *l, const struct sys___fhstatvfs190_args *uap, 2341 register_t *retval) 2342 { 2343 /* { 2344 syscallarg(const void *) fhp; 2345 syscallarg(size_t) fh_size; 2346 syscallarg(struct statvfs *) buf; 2347 syscallarg(int) flags; 2348 } */ 2349 struct statvfs *sb = STATVFSBUF_GET(); 2350 int error; 2351 2352 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2353 SCARG(uap, flags)); 2354 if (error == 0) 2355 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2356 STATVFSBUF_PUT(sb); 2357 return error; 2358 } 2359 2360 int 2361 do_posix_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2362 dev_t dev) 2363 { 2364 2365 /* 2366 * The POSIX mknod(2) call is an alias for mkfifo(2) for S_IFIFO 2367 * in mode and dev=0. 2368 * 2369 * In all the other cases it's implementation defined behavior. 2370 */ 2371 2372 if ((mode & S_IFIFO) && dev == 0) 2373 return do_sys_mkfifoat(l, fdat, pathname, mode); 2374 else 2375 return do_sys_mknodat(l, fdat, pathname, mode, dev, 2376 UIO_USERSPACE); 2377 } 2378 2379 /* 2380 * Create a special file. 2381 */ 2382 /* ARGSUSED */ 2383 int 2384 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2385 register_t *retval) 2386 { 2387 /* { 2388 syscallarg(const char *) path; 2389 syscallarg(mode_t) mode; 2390 syscallarg(dev_t) dev; 2391 } */ 2392 return do_posix_mknodat(l, AT_FDCWD, SCARG(uap, path), 2393 SCARG(uap, mode), SCARG(uap, dev)); 2394 } 2395 2396 int 2397 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2398 register_t *retval) 2399 { 2400 /* { 2401 syscallarg(int) fd; 2402 syscallarg(const char *) path; 2403 syscallarg(mode_t) mode; 2404 syscallarg(int) pad; 2405 syscallarg(dev_t) dev; 2406 } */ 2407 2408 return do_posix_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2409 SCARG(uap, mode), SCARG(uap, dev)); 2410 } 2411 2412 int 2413 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2414 enum uio_seg seg) 2415 { 2416 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, seg); 2417 } 2418 2419 int 2420 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2421 dev_t dev, enum uio_seg seg) 2422 { 2423 struct proc *p = l->l_proc; 2424 struct vnode *vp; 2425 struct vattr vattr; 2426 int error, optype; 2427 struct pathbuf *pb; 2428 struct nameidata nd; 2429 const char *pathstring; 2430 2431 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2432 0, NULL, NULL, NULL)) != 0) 2433 return (error); 2434 2435 optype = VOP_MKNOD_DESCOFFSET; 2436 2437 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2438 if (error) { 2439 return error; 2440 } 2441 pathstring = pathbuf_stringcopy_get(pb); 2442 if (pathstring == NULL) { 2443 pathbuf_destroy(pb); 2444 return ENOMEM; 2445 } 2446 2447 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2448 2449 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2450 goto out; 2451 vp = nd.ni_vp; 2452 2453 if (vp != NULL) 2454 error = EEXIST; 2455 else { 2456 vattr_null(&vattr); 2457 /* We will read cwdi->cwdi_cmask unlocked. */ 2458 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2459 vattr.va_rdev = dev; 2460 2461 switch (mode & S_IFMT) { 2462 case S_IFMT: /* used by badsect to flag bad sectors */ 2463 vattr.va_type = VBAD; 2464 break; 2465 case S_IFCHR: 2466 vattr.va_type = VCHR; 2467 break; 2468 case S_IFBLK: 2469 vattr.va_type = VBLK; 2470 break; 2471 case S_IFWHT: 2472 optype = VOP_WHITEOUT_DESCOFFSET; 2473 break; 2474 case S_IFREG: 2475 #if NVERIEXEC > 0 2476 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2477 O_CREAT); 2478 #endif /* NVERIEXEC > 0 */ 2479 vattr.va_type = VREG; 2480 vattr.va_rdev = VNOVAL; 2481 optype = VOP_CREATE_DESCOFFSET; 2482 break; 2483 default: 2484 error = EINVAL; 2485 break; 2486 } 2487 2488 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET && 2489 vattr.va_rdev == VNOVAL) 2490 error = EINVAL; 2491 } 2492 2493 if (!error) { 2494 switch (optype) { 2495 case VOP_WHITEOUT_DESCOFFSET: 2496 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2497 if (error) 2498 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2499 vput(nd.ni_dvp); 2500 break; 2501 2502 case VOP_MKNOD_DESCOFFSET: 2503 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2504 &nd.ni_cnd, &vattr); 2505 if (error == 0) 2506 vrele(nd.ni_vp); 2507 vput(nd.ni_dvp); 2508 break; 2509 2510 case VOP_CREATE_DESCOFFSET: 2511 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2512 &nd.ni_cnd, &vattr); 2513 if (error == 0) 2514 vrele(nd.ni_vp); 2515 vput(nd.ni_dvp); 2516 break; 2517 } 2518 } else { 2519 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2520 if (nd.ni_dvp == vp) 2521 vrele(nd.ni_dvp); 2522 else 2523 vput(nd.ni_dvp); 2524 if (vp) 2525 vrele(vp); 2526 } 2527 out: 2528 pathbuf_stringcopy_put(pb, pathstring); 2529 pathbuf_destroy(pb); 2530 return (error); 2531 } 2532 2533 /* 2534 * Create a named pipe. 2535 */ 2536 /* ARGSUSED */ 2537 int 2538 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, 2539 register_t *retval) 2540 { 2541 /* { 2542 syscallarg(const char *) path; 2543 syscallarg(int) mode; 2544 } */ 2545 2546 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), 2547 SCARG(uap, mode)); 2548 } 2549 2550 int 2551 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2552 register_t *retval) 2553 { 2554 /* { 2555 syscallarg(int) fd; 2556 syscallarg(const char *) path; 2557 syscallarg(int) mode; 2558 } */ 2559 2560 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2561 SCARG(uap, mode)); 2562 } 2563 2564 static int 2565 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2566 { 2567 struct proc *p = l->l_proc; 2568 struct vattr vattr; 2569 int error; 2570 struct pathbuf *pb; 2571 struct nameidata nd; 2572 2573 error = pathbuf_copyin(path, &pb); 2574 if (error) { 2575 return error; 2576 } 2577 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2578 2579 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2580 pathbuf_destroy(pb); 2581 return error; 2582 } 2583 if (nd.ni_vp != NULL) { 2584 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2585 if (nd.ni_dvp == nd.ni_vp) 2586 vrele(nd.ni_dvp); 2587 else 2588 vput(nd.ni_dvp); 2589 vrele(nd.ni_vp); 2590 pathbuf_destroy(pb); 2591 return (EEXIST); 2592 } 2593 vattr_null(&vattr); 2594 vattr.va_type = VFIFO; 2595 /* We will read cwdi->cwdi_cmask unlocked. */ 2596 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2597 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2598 if (error == 0) 2599 vrele(nd.ni_vp); 2600 vput(nd.ni_dvp); 2601 pathbuf_destroy(pb); 2602 return (error); 2603 } 2604 2605 /* 2606 * Make a hard file link. 2607 */ 2608 /* ARGSUSED */ 2609 int 2610 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2611 const char *link, int follow, register_t *retval) 2612 { 2613 struct vnode *vp; 2614 struct pathbuf *linkpb; 2615 struct nameidata nd; 2616 namei_simple_flags_t ns_flags; 2617 int error; 2618 2619 if (follow & AT_SYMLINK_FOLLOW) 2620 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2621 else 2622 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2623 2624 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2625 if (error != 0) 2626 return (error); 2627 error = pathbuf_copyin(link, &linkpb); 2628 if (error) { 2629 goto out1; 2630 } 2631 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2632 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2633 goto out2; 2634 if (nd.ni_vp) { 2635 error = EEXIST; 2636 goto abortop; 2637 } 2638 /* Prevent hard links on directories. */ 2639 if (vp->v_type == VDIR) { 2640 error = EPERM; 2641 goto abortop; 2642 } 2643 /* Prevent cross-mount operation. */ 2644 if (nd.ni_dvp->v_mount != vp->v_mount) { 2645 error = EXDEV; 2646 goto abortop; 2647 } 2648 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2649 VOP_UNLOCK(nd.ni_dvp); 2650 vrele(nd.ni_dvp); 2651 out2: 2652 pathbuf_destroy(linkpb); 2653 out1: 2654 vrele(vp); 2655 return (error); 2656 abortop: 2657 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2658 if (nd.ni_dvp == nd.ni_vp) 2659 vrele(nd.ni_dvp); 2660 else 2661 vput(nd.ni_dvp); 2662 if (nd.ni_vp != NULL) 2663 vrele(nd.ni_vp); 2664 goto out2; 2665 } 2666 2667 int 2668 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2669 { 2670 /* { 2671 syscallarg(const char *) path; 2672 syscallarg(const char *) link; 2673 } */ 2674 const char *path = SCARG(uap, path); 2675 const char *link = SCARG(uap, link); 2676 2677 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2678 AT_SYMLINK_FOLLOW, retval); 2679 } 2680 2681 int 2682 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2683 register_t *retval) 2684 { 2685 /* { 2686 syscallarg(int) fd1; 2687 syscallarg(const char *) name1; 2688 syscallarg(int) fd2; 2689 syscallarg(const char *) name2; 2690 syscallarg(int) flags; 2691 } */ 2692 int fd1 = SCARG(uap, fd1); 2693 const char *name1 = SCARG(uap, name1); 2694 int fd2 = SCARG(uap, fd2); 2695 const char *name2 = SCARG(uap, name2); 2696 int follow; 2697 2698 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2699 2700 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2701 } 2702 2703 int 2704 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2705 { 2706 2707 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2708 } 2709 2710 static int 2711 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2712 const char *link, enum uio_seg seg) 2713 { 2714 struct proc *p = curproc; 2715 struct vattr vattr; 2716 char *path; 2717 int error; 2718 size_t len; 2719 struct pathbuf *linkpb; 2720 struct nameidata nd; 2721 2722 KASSERT(l != NULL || fdat == AT_FDCWD); 2723 2724 path = PNBUF_GET(); 2725 if (seg == UIO_USERSPACE) { 2726 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0) 2727 goto out1; 2728 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2729 goto out1; 2730 } else { 2731 len = strlen(patharg) + 1; 2732 KASSERT(len <= MAXPATHLEN); 2733 memcpy(path, patharg, len); 2734 linkpb = pathbuf_create(link); 2735 if (linkpb == NULL) { 2736 error = ENOMEM; 2737 goto out1; 2738 } 2739 } 2740 ktrkuser("symlink-target", path, len - 1); 2741 2742 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2743 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2744 goto out2; 2745 if (nd.ni_vp) { 2746 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2747 if (nd.ni_dvp == nd.ni_vp) 2748 vrele(nd.ni_dvp); 2749 else 2750 vput(nd.ni_dvp); 2751 vrele(nd.ni_vp); 2752 error = EEXIST; 2753 goto out2; 2754 } 2755 vattr_null(&vattr); 2756 vattr.va_type = VLNK; 2757 /* We will read cwdi->cwdi_cmask unlocked. */ 2758 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2759 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2760 if (error == 0) 2761 vrele(nd.ni_vp); 2762 vput(nd.ni_dvp); 2763 out2: 2764 pathbuf_destroy(linkpb); 2765 out1: 2766 PNBUF_PUT(path); 2767 return (error); 2768 } 2769 2770 /* 2771 * Make a symbolic link. 2772 */ 2773 /* ARGSUSED */ 2774 int 2775 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2776 { 2777 /* { 2778 syscallarg(const char *) path; 2779 syscallarg(const char *) link; 2780 } */ 2781 2782 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2783 UIO_USERSPACE); 2784 } 2785 2786 int 2787 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2788 register_t *retval) 2789 { 2790 /* { 2791 syscallarg(const char *) path1; 2792 syscallarg(int) fd; 2793 syscallarg(const char *) path2; 2794 } */ 2795 2796 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2797 SCARG(uap, path2), UIO_USERSPACE); 2798 } 2799 2800 /* 2801 * Delete a whiteout from the filesystem. 2802 */ 2803 /* ARGSUSED */ 2804 int 2805 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, 2806 register_t *retval) 2807 { 2808 /* { 2809 syscallarg(const char *) path; 2810 } */ 2811 int error; 2812 struct pathbuf *pb; 2813 struct nameidata nd; 2814 2815 error = pathbuf_copyin(SCARG(uap, path), &pb); 2816 if (error) { 2817 return error; 2818 } 2819 2820 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2821 error = namei(&nd); 2822 if (error) { 2823 pathbuf_destroy(pb); 2824 return (error); 2825 } 2826 2827 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2828 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2829 if (nd.ni_dvp == nd.ni_vp) 2830 vrele(nd.ni_dvp); 2831 else 2832 vput(nd.ni_dvp); 2833 if (nd.ni_vp) 2834 vrele(nd.ni_vp); 2835 pathbuf_destroy(pb); 2836 return (EEXIST); 2837 } 2838 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2839 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2840 vput(nd.ni_dvp); 2841 pathbuf_destroy(pb); 2842 return (error); 2843 } 2844 2845 /* 2846 * Delete a name from the filesystem. 2847 */ 2848 /* ARGSUSED */ 2849 int 2850 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, 2851 register_t *retval) 2852 { 2853 /* { 2854 syscallarg(const char *) path; 2855 } */ 2856 2857 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, 2858 UIO_USERSPACE); 2859 } 2860 2861 int 2862 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2863 register_t *retval) 2864 { 2865 /* { 2866 syscallarg(int) fd; 2867 syscallarg(const char *) path; 2868 syscallarg(int) flag; 2869 } */ 2870 2871 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2872 SCARG(uap, flag), UIO_USERSPACE); 2873 } 2874 2875 int 2876 do_sys_unlink(const char *arg, enum uio_seg seg) 2877 { 2878 2879 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2880 } 2881 2882 static int 2883 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2884 enum uio_seg seg) 2885 { 2886 struct vnode *vp; 2887 int error; 2888 struct pathbuf *pb; 2889 struct nameidata nd; 2890 const char *pathstring; 2891 2892 KASSERT(l != NULL || fdat == AT_FDCWD); 2893 2894 error = pathbuf_maybe_copyin(arg, seg, &pb); 2895 if (error) { 2896 return error; 2897 } 2898 pathstring = pathbuf_stringcopy_get(pb); 2899 if (pathstring == NULL) { 2900 pathbuf_destroy(pb); 2901 return ENOMEM; 2902 } 2903 2904 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2905 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2906 goto out; 2907 vp = nd.ni_vp; 2908 2909 /* 2910 * The root of a mounted filesystem cannot be deleted. 2911 */ 2912 if ((vp->v_vflag & VV_ROOT) != 0) { 2913 error = EBUSY; 2914 goto abort; 2915 } 2916 2917 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2918 error = EBUSY; 2919 goto abort; 2920 } 2921 2922 /* 2923 * No rmdir "." please. 2924 */ 2925 if (nd.ni_dvp == vp) { 2926 error = EINVAL; 2927 goto abort; 2928 } 2929 2930 /* 2931 * AT_REMOVEDIR is required to remove a directory 2932 */ 2933 if (vp->v_type == VDIR) { 2934 if (!(flags & AT_REMOVEDIR)) { 2935 error = EPERM; 2936 goto abort; 2937 } else { 2938 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2939 vput(nd.ni_dvp); 2940 goto out; 2941 } 2942 } 2943 2944 /* 2945 * Starting here we only deal with non directories. 2946 */ 2947 if (flags & AT_REMOVEDIR) { 2948 error = ENOTDIR; 2949 goto abort; 2950 } 2951 2952 #if NVERIEXEC > 0 2953 /* Handle remove requests for veriexec entries. */ 2954 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2955 goto abort; 2956 } 2957 #endif /* NVERIEXEC > 0 */ 2958 2959 #ifdef FILEASSOC 2960 (void)fileassoc_file_delete(vp); 2961 #endif /* FILEASSOC */ 2962 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2963 vput(nd.ni_dvp); 2964 goto out; 2965 2966 abort: 2967 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2968 if (nd.ni_dvp == vp) 2969 vrele(nd.ni_dvp); 2970 else 2971 vput(nd.ni_dvp); 2972 vput(vp); 2973 2974 out: 2975 pathbuf_stringcopy_put(pb, pathstring); 2976 pathbuf_destroy(pb); 2977 return (error); 2978 } 2979 2980 /* 2981 * Reposition read/write file offset. 2982 */ 2983 int 2984 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2985 { 2986 /* { 2987 syscallarg(int) fd; 2988 syscallarg(int) pad; 2989 syscallarg(off_t) offset; 2990 syscallarg(int) whence; 2991 } */ 2992 file_t *fp; 2993 int error, fd; 2994 2995 switch (SCARG(uap, whence)) { 2996 case SEEK_CUR: 2997 case SEEK_END: 2998 case SEEK_SET: 2999 break; 3000 default: 3001 return EINVAL; 3002 } 3003 3004 fd = SCARG(uap, fd); 3005 3006 if ((fp = fd_getfile(fd)) == NULL) 3007 return (EBADF); 3008 3009 if (fp->f_ops->fo_seek == NULL) { 3010 error = ESPIPE; 3011 goto out; 3012 } 3013 3014 error = (*fp->f_ops->fo_seek)(fp, SCARG(uap, offset), 3015 SCARG(uap, whence), (off_t *)retval, FOF_UPDATE_OFFSET); 3016 out: 3017 fd_putfile(fd); 3018 return (error); 3019 } 3020 3021 /* 3022 * Positional read system call. 3023 */ 3024 int 3025 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 3026 { 3027 /* { 3028 syscallarg(int) fd; 3029 syscallarg(void *) buf; 3030 syscallarg(size_t) nbyte; 3031 syscallarg(off_t) offset; 3032 } */ 3033 file_t *fp; 3034 off_t offset; 3035 int error, fd = SCARG(uap, fd); 3036 3037 if ((fp = fd_getfile(fd)) == NULL) 3038 return (EBADF); 3039 3040 if ((fp->f_flag & FREAD) == 0) { 3041 fd_putfile(fd); 3042 return (EBADF); 3043 } 3044 3045 if (fp->f_ops->fo_seek == NULL) { 3046 error = ESPIPE; 3047 goto out; 3048 } 3049 3050 offset = SCARG(uap, offset); 3051 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 3052 if (error) 3053 goto out; 3054 3055 /* dofileread() will unuse the descriptor for us */ 3056 return dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3057 &offset, 0, retval); 3058 3059 out: 3060 fd_putfile(fd); 3061 return (error); 3062 } 3063 3064 /* 3065 * Positional scatter read system call. 3066 */ 3067 int 3068 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, 3069 register_t *retval) 3070 { 3071 /* { 3072 syscallarg(int) fd; 3073 syscallarg(const struct iovec *) iovp; 3074 syscallarg(int) iovcnt; 3075 syscallarg(off_t) offset; 3076 } */ 3077 off_t offset = SCARG(uap, offset); 3078 3079 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 3080 SCARG(uap, iovcnt), &offset, 0, retval); 3081 } 3082 3083 /* 3084 * Positional write system call. 3085 */ 3086 int 3087 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, 3088 register_t *retval) 3089 { 3090 /* { 3091 syscallarg(int) fd; 3092 syscallarg(const void *) buf; 3093 syscallarg(size_t) nbyte; 3094 syscallarg(off_t) offset; 3095 } */ 3096 file_t *fp; 3097 off_t offset; 3098 int error, fd = SCARG(uap, fd); 3099 3100 if ((fp = fd_getfile(fd)) == NULL) 3101 return (EBADF); 3102 3103 if ((fp->f_flag & FWRITE) == 0) { 3104 fd_putfile(fd); 3105 return (EBADF); 3106 } 3107 3108 if (fp->f_ops->fo_seek == NULL) { 3109 error = ESPIPE; 3110 goto out; 3111 } 3112 3113 offset = SCARG(uap, offset); 3114 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 3115 if (error) 3116 goto out; 3117 3118 /* dofilewrite() will unuse the descriptor for us */ 3119 return dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3120 &offset, 0, retval); 3121 3122 out: 3123 fd_putfile(fd); 3124 return (error); 3125 } 3126 3127 /* 3128 * Positional gather write system call. 3129 */ 3130 int 3131 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, 3132 register_t *retval) 3133 { 3134 /* { 3135 syscallarg(int) fd; 3136 syscallarg(const struct iovec *) iovp; 3137 syscallarg(int) iovcnt; 3138 syscallarg(off_t) offset; 3139 } */ 3140 off_t offset = SCARG(uap, offset); 3141 3142 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 3143 SCARG(uap, iovcnt), &offset, 0, retval); 3144 } 3145 3146 /* 3147 * Check access permissions. 3148 */ 3149 int 3150 sys_access(struct lwp *l, const struct sys_access_args *uap, 3151 register_t *retval) 3152 { 3153 /* { 3154 syscallarg(const char *) path; 3155 syscallarg(int) flags; 3156 } */ 3157 3158 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 3159 SCARG(uap, flags), 0); 3160 } 3161 3162 int 3163 do_sys_accessat(struct lwp *l, int fdat, const char *path, 3164 int mode, int flags) 3165 { 3166 kauth_cred_t cred; 3167 struct vnode *vp; 3168 int error, nd_flag, vmode; 3169 struct pathbuf *pb; 3170 struct nameidata nd; 3171 3172 CTASSERT(F_OK == 0); 3173 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 3174 /* nonsense mode */ 3175 return EINVAL; 3176 } 3177 3178 nd_flag = FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT; 3179 if (flags & AT_SYMLINK_NOFOLLOW) 3180 nd_flag &= ~FOLLOW; 3181 3182 error = pathbuf_copyin(path, &pb); 3183 if (error) 3184 return error; 3185 3186 NDINIT(&nd, LOOKUP, nd_flag, pb); 3187 3188 /* Override default credentials */ 3189 if (!(flags & AT_EACCESS)) { 3190 cred = kauth_cred_dup(l->l_cred); 3191 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 3192 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 3193 } else 3194 cred = l->l_cred; 3195 nd.ni_cnd.cn_cred = cred; 3196 3197 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3198 pathbuf_destroy(pb); 3199 goto out; 3200 } 3201 vp = nd.ni_vp; 3202 pathbuf_destroy(pb); 3203 3204 /* Flags == 0 means only check for existence. */ 3205 if (mode) { 3206 vmode = 0; 3207 if (mode & R_OK) 3208 vmode |= VREAD; 3209 if (mode & W_OK) 3210 vmode |= VWRITE; 3211 if (mode & X_OK) 3212 vmode |= VEXEC; 3213 3214 error = VOP_ACCESS(vp, vmode, cred); 3215 if (!error && (vmode & VWRITE)) 3216 error = vn_writechk(vp); 3217 } 3218 vput(vp); 3219 out: 3220 if (!(flags & AT_EACCESS)) 3221 kauth_cred_free(cred); 3222 return (error); 3223 } 3224 3225 int 3226 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3227 register_t *retval) 3228 { 3229 /* { 3230 syscallarg(int) fd; 3231 syscallarg(const char *) path; 3232 syscallarg(int) amode; 3233 syscallarg(int) flag; 3234 } */ 3235 3236 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3237 SCARG(uap, amode), SCARG(uap, flag)); 3238 } 3239 3240 /* 3241 * Common code for all sys_stat functions, including compat versions. 3242 */ 3243 int 3244 do_sys_stat(const char *userpath, unsigned int nd_flag, struct stat *sb) 3245 { 3246 3247 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3248 } 3249 3250 int 3251 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3252 unsigned int nd_flag, struct stat *sb) 3253 { 3254 int error; 3255 struct pathbuf *pb; 3256 struct nameidata nd; 3257 3258 KASSERT(l != NULL || fdat == AT_FDCWD); 3259 3260 error = pathbuf_copyin(userpath, &pb); 3261 if (error) { 3262 return error; 3263 } 3264 3265 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3266 3267 error = fd_nameiat(l, fdat, &nd); 3268 if (error != 0) { 3269 pathbuf_destroy(pb); 3270 return error; 3271 } 3272 error = vn_stat(nd.ni_vp, sb); 3273 vput(nd.ni_vp); 3274 pathbuf_destroy(pb); 3275 return error; 3276 } 3277 3278 /* 3279 * Get file status; this version follows links. 3280 */ 3281 /* ARGSUSED */ 3282 int 3283 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, 3284 register_t *retval) 3285 { 3286 /* { 3287 syscallarg(const char *) path; 3288 syscallarg(struct stat *) ub; 3289 } */ 3290 struct stat sb; 3291 int error; 3292 3293 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3294 if (error) 3295 return error; 3296 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3297 } 3298 3299 /* 3300 * Get file status; this version does not follow links. 3301 */ 3302 /* ARGSUSED */ 3303 int 3304 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, 3305 register_t *retval) 3306 { 3307 /* { 3308 syscallarg(const char *) path; 3309 syscallarg(struct stat *) ub; 3310 } */ 3311 struct stat sb; 3312 int error; 3313 3314 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3315 if (error) 3316 return error; 3317 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3318 } 3319 3320 int 3321 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3322 register_t *retval) 3323 { 3324 /* { 3325 syscallarg(int) fd; 3326 syscallarg(const char *) path; 3327 syscallarg(struct stat *) buf; 3328 syscallarg(int) flag; 3329 } */ 3330 unsigned int nd_flag; 3331 struct stat sb; 3332 int error; 3333 3334 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3335 nd_flag = NOFOLLOW; 3336 else 3337 nd_flag = FOLLOW; 3338 3339 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3340 &sb); 3341 if (error) 3342 return error; 3343 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3344 } 3345 3346 static int 3347 kern_pathconf(register_t *retval, const char *path, int name, int flag) 3348 { 3349 int error; 3350 struct pathbuf *pb; 3351 struct nameidata nd; 3352 3353 error = pathbuf_copyin(path, &pb); 3354 if (error) { 3355 return error; 3356 } 3357 NDINIT(&nd, LOOKUP, flag | LOCKLEAF | TRYEMULROOT, pb); 3358 if ((error = namei(&nd)) != 0) { 3359 pathbuf_destroy(pb); 3360 return error; 3361 } 3362 error = VOP_PATHCONF(nd.ni_vp, name, retval); 3363 vput(nd.ni_vp); 3364 pathbuf_destroy(pb); 3365 return error; 3366 } 3367 3368 /* 3369 * Get configurable pathname variables. 3370 */ 3371 /* ARGSUSED */ 3372 int 3373 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, 3374 register_t *retval) 3375 { 3376 /* { 3377 syscallarg(const char *) path; 3378 syscallarg(int) name; 3379 } */ 3380 3381 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3382 FOLLOW); 3383 } 3384 3385 /* ARGSUSED */ 3386 int 3387 sys_lpathconf(struct lwp *l, const struct sys_lpathconf_args *uap, 3388 register_t *retval) 3389 { 3390 /* { 3391 syscallarg(const char *) path; 3392 syscallarg(int) name; 3393 } */ 3394 3395 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3396 NOFOLLOW); 3397 } 3398 3399 /* 3400 * Return target name of a symbolic link. 3401 */ 3402 /* ARGSUSED */ 3403 int 3404 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3405 register_t *retval) 3406 { 3407 /* { 3408 syscallarg(const char *) path; 3409 syscallarg(char *) buf; 3410 syscallarg(size_t) count; 3411 } */ 3412 3413 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3414 SCARG(uap, buf), SCARG(uap, count), retval); 3415 } 3416 3417 static int 3418 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3419 size_t count, register_t *retval) 3420 { 3421 struct vnode *vp; 3422 struct iovec aiov; 3423 struct uio auio; 3424 int error; 3425 struct pathbuf *pb; 3426 struct nameidata nd; 3427 3428 error = pathbuf_copyin(path, &pb); 3429 if (error) { 3430 return error; 3431 } 3432 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, 3433 pb); 3434 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3435 pathbuf_destroy(pb); 3436 return error; 3437 } 3438 vp = nd.ni_vp; 3439 pathbuf_destroy(pb); 3440 if (vp->v_type != VLNK) 3441 error = EINVAL; 3442 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3443 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3444 aiov.iov_base = buf; 3445 aiov.iov_len = count; 3446 auio.uio_iov = &aiov; 3447 auio.uio_iovcnt = 1; 3448 auio.uio_offset = 0; 3449 auio.uio_rw = UIO_READ; 3450 KASSERT(l == curlwp); 3451 auio.uio_vmspace = l->l_proc->p_vmspace; 3452 auio.uio_resid = count; 3453 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3454 *retval = count - auio.uio_resid; 3455 } 3456 vput(vp); 3457 return (error); 3458 } 3459 3460 int 3461 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3462 register_t *retval) 3463 { 3464 /* { 3465 syscallarg(int) fd; 3466 syscallarg(const char *) path; 3467 syscallarg(char *) buf; 3468 syscallarg(size_t) bufsize; 3469 } */ 3470 3471 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3472 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3473 } 3474 3475 /* 3476 * Change flags of a file given a path name. 3477 */ 3478 /* ARGSUSED */ 3479 int 3480 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, 3481 register_t *retval) 3482 { 3483 /* { 3484 syscallarg(const char *) path; 3485 syscallarg(u_long) flags; 3486 } */ 3487 struct vnode *vp; 3488 int error; 3489 3490 error = namei_simple_user(SCARG(uap, path), 3491 NSM_FOLLOW_TRYEMULROOT, &vp); 3492 if (error != 0) 3493 return (error); 3494 error = change_flags(vp, SCARG(uap, flags), l); 3495 vput(vp); 3496 return (error); 3497 } 3498 3499 /* 3500 * Change flags of a file given a file descriptor. 3501 */ 3502 /* ARGSUSED */ 3503 int 3504 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, 3505 register_t *retval) 3506 { 3507 /* { 3508 syscallarg(int) fd; 3509 syscallarg(u_long) flags; 3510 } */ 3511 struct vnode *vp; 3512 file_t *fp; 3513 int error; 3514 3515 /* fd_getvnode() will use the descriptor for us */ 3516 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3517 return (error); 3518 vp = fp->f_vnode; 3519 error = change_flags(vp, SCARG(uap, flags), l); 3520 VOP_UNLOCK(vp); 3521 fd_putfile(SCARG(uap, fd)); 3522 return (error); 3523 } 3524 3525 /* 3526 * Change flags of a file given a path name; this version does 3527 * not follow links. 3528 */ 3529 int 3530 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, 3531 register_t *retval) 3532 { 3533 /* { 3534 syscallarg(const char *) path; 3535 syscallarg(u_long) flags; 3536 } */ 3537 struct vnode *vp; 3538 int error; 3539 3540 error = namei_simple_user(SCARG(uap, path), 3541 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3542 if (error != 0) 3543 return (error); 3544 error = change_flags(vp, SCARG(uap, flags), l); 3545 vput(vp); 3546 return (error); 3547 } 3548 3549 /* 3550 * Common routine to change flags of a file. 3551 */ 3552 int 3553 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3554 { 3555 struct vattr vattr; 3556 int error; 3557 3558 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3559 3560 vattr_null(&vattr); 3561 vattr.va_flags = flags; 3562 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3563 3564 return (error); 3565 } 3566 3567 /* 3568 * Change mode of a file given path name; this version follows links. 3569 */ 3570 /* ARGSUSED */ 3571 int 3572 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3573 { 3574 /* { 3575 syscallarg(const char *) path; 3576 syscallarg(int) mode; 3577 } */ 3578 3579 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3580 SCARG(uap, mode), 0); 3581 } 3582 3583 int 3584 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3585 { 3586 int error; 3587 struct vnode *vp; 3588 namei_simple_flags_t ns_flag; 3589 3590 if (flags & AT_SYMLINK_NOFOLLOW) 3591 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3592 else 3593 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3594 3595 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3596 if (error != 0) 3597 return error; 3598 3599 error = change_mode(vp, mode, l); 3600 3601 vrele(vp); 3602 3603 return (error); 3604 } 3605 3606 /* 3607 * Change mode of a file given a file descriptor. 3608 */ 3609 /* ARGSUSED */ 3610 int 3611 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, 3612 register_t *retval) 3613 { 3614 /* { 3615 syscallarg(int) fd; 3616 syscallarg(int) mode; 3617 } */ 3618 file_t *fp; 3619 int error; 3620 3621 /* fd_getvnode() will use the descriptor for us */ 3622 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3623 return (error); 3624 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3625 fd_putfile(SCARG(uap, fd)); 3626 return (error); 3627 } 3628 3629 int 3630 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3631 register_t *retval) 3632 { 3633 /* { 3634 syscallarg(int) fd; 3635 syscallarg(const char *) path; 3636 syscallarg(int) mode; 3637 syscallarg(int) flag; 3638 } */ 3639 3640 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3641 SCARG(uap, mode), SCARG(uap, flag)); 3642 } 3643 3644 /* 3645 * Change mode of a file given path name; this version does not follow links. 3646 */ 3647 /* ARGSUSED */ 3648 int 3649 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, 3650 register_t *retval) 3651 { 3652 /* { 3653 syscallarg(const char *) path; 3654 syscallarg(int) mode; 3655 } */ 3656 int error; 3657 struct vnode *vp; 3658 3659 error = namei_simple_user(SCARG(uap, path), 3660 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3661 if (error != 0) 3662 return (error); 3663 3664 error = change_mode(vp, SCARG(uap, mode), l); 3665 3666 vrele(vp); 3667 return (error); 3668 } 3669 3670 /* 3671 * Common routine to set mode given a vnode. 3672 */ 3673 static int 3674 change_mode(struct vnode *vp, int mode, struct lwp *l) 3675 { 3676 struct vattr vattr; 3677 int error; 3678 3679 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3680 vattr_null(&vattr); 3681 vattr.va_mode = mode & ALLPERMS; 3682 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3683 VOP_UNLOCK(vp); 3684 return (error); 3685 } 3686 3687 /* 3688 * Set ownership given a path name; this version follows links. 3689 */ 3690 /* ARGSUSED */ 3691 int 3692 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3693 { 3694 /* { 3695 syscallarg(const char *) path; 3696 syscallarg(uid_t) uid; 3697 syscallarg(gid_t) gid; 3698 } */ 3699 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3700 SCARG(uap, gid), 0); 3701 } 3702 3703 int 3704 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3705 gid_t gid, int flags) 3706 { 3707 int error; 3708 struct vnode *vp; 3709 namei_simple_flags_t ns_flag; 3710 3711 if (flags & AT_SYMLINK_NOFOLLOW) 3712 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3713 else 3714 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3715 3716 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3717 if (error != 0) 3718 return error; 3719 3720 error = change_owner(vp, uid, gid, l, 0); 3721 3722 vrele(vp); 3723 3724 return (error); 3725 } 3726 3727 /* 3728 * Set ownership given a path name; this version follows links. 3729 * Provides POSIX semantics. 3730 */ 3731 /* ARGSUSED */ 3732 int 3733 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, 3734 register_t *retval) 3735 { 3736 /* { 3737 syscallarg(const char *) path; 3738 syscallarg(uid_t) uid; 3739 syscallarg(gid_t) gid; 3740 } */ 3741 int error; 3742 struct vnode *vp; 3743 3744 error = namei_simple_user(SCARG(uap, path), 3745 NSM_FOLLOW_TRYEMULROOT, &vp); 3746 if (error != 0) 3747 return (error); 3748 3749 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3750 3751 vrele(vp); 3752 return (error); 3753 } 3754 3755 /* 3756 * Set ownership given a file descriptor. 3757 */ 3758 /* ARGSUSED */ 3759 int 3760 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, 3761 register_t *retval) 3762 { 3763 /* { 3764 syscallarg(int) fd; 3765 syscallarg(uid_t) uid; 3766 syscallarg(gid_t) gid; 3767 } */ 3768 int error; 3769 file_t *fp; 3770 3771 /* fd_getvnode() will use the descriptor for us */ 3772 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3773 return (error); 3774 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3775 l, 0); 3776 fd_putfile(SCARG(uap, fd)); 3777 return (error); 3778 } 3779 3780 int 3781 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3782 register_t *retval) 3783 { 3784 /* { 3785 syscallarg(int) fd; 3786 syscallarg(const char *) path; 3787 syscallarg(uid_t) owner; 3788 syscallarg(gid_t) group; 3789 syscallarg(int) flag; 3790 } */ 3791 3792 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3793 SCARG(uap, owner), SCARG(uap, group), 3794 SCARG(uap, flag)); 3795 } 3796 3797 /* 3798 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3799 */ 3800 /* ARGSUSED */ 3801 int 3802 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, 3803 register_t *retval) 3804 { 3805 /* { 3806 syscallarg(int) fd; 3807 syscallarg(uid_t) uid; 3808 syscallarg(gid_t) gid; 3809 } */ 3810 int error; 3811 file_t *fp; 3812 3813 /* fd_getvnode() will use the descriptor for us */ 3814 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3815 return (error); 3816 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3817 l, 1); 3818 fd_putfile(SCARG(uap, fd)); 3819 return (error); 3820 } 3821 3822 /* 3823 * Set ownership given a path name; this version does not follow links. 3824 */ 3825 /* ARGSUSED */ 3826 int 3827 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, 3828 register_t *retval) 3829 { 3830 /* { 3831 syscallarg(const char *) path; 3832 syscallarg(uid_t) uid; 3833 syscallarg(gid_t) gid; 3834 } */ 3835 int error; 3836 struct vnode *vp; 3837 3838 error = namei_simple_user(SCARG(uap, path), 3839 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3840 if (error != 0) 3841 return (error); 3842 3843 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3844 3845 vrele(vp); 3846 return (error); 3847 } 3848 3849 /* 3850 * Set ownership given a path name; this version does not follow links. 3851 * Provides POSIX/XPG semantics. 3852 */ 3853 /* ARGSUSED */ 3854 int 3855 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, 3856 register_t *retval) 3857 { 3858 /* { 3859 syscallarg(const char *) path; 3860 syscallarg(uid_t) uid; 3861 syscallarg(gid_t) gid; 3862 } */ 3863 int error; 3864 struct vnode *vp; 3865 3866 error = namei_simple_user(SCARG(uap, path), 3867 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3868 if (error != 0) 3869 return (error); 3870 3871 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3872 3873 vrele(vp); 3874 return (error); 3875 } 3876 3877 /* 3878 * Common routine to set ownership given a vnode. 3879 */ 3880 static int 3881 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3882 int posix_semantics) 3883 { 3884 struct vattr vattr; 3885 mode_t newmode; 3886 int error; 3887 3888 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3889 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3890 goto out; 3891 3892 #define CHANGED(x) ((int)(x) != -1) 3893 newmode = vattr.va_mode; 3894 if (posix_semantics) { 3895 /* 3896 * POSIX/XPG semantics: if the caller is not the super-user, 3897 * clear set-user-id and set-group-id bits. Both POSIX and 3898 * the XPG consider the behaviour for calls by the super-user 3899 * implementation-defined; we leave the set-user-id and set- 3900 * group-id settings intact in that case. 3901 */ 3902 if (vattr.va_mode & S_ISUID) { 3903 if (kauth_authorize_vnode(l->l_cred, 3904 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3905 newmode &= ~S_ISUID; 3906 } 3907 if (vattr.va_mode & S_ISGID) { 3908 if (kauth_authorize_vnode(l->l_cred, 3909 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3910 newmode &= ~S_ISGID; 3911 } 3912 } else { 3913 /* 3914 * NetBSD semantics: when changing owner and/or group, 3915 * clear the respective bit(s). 3916 */ 3917 if (CHANGED(uid)) 3918 newmode &= ~S_ISUID; 3919 if (CHANGED(gid)) 3920 newmode &= ~S_ISGID; 3921 } 3922 /* Update va_mode iff altered. */ 3923 if (vattr.va_mode == newmode) 3924 newmode = VNOVAL; 3925 3926 vattr_null(&vattr); 3927 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3928 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3929 vattr.va_mode = newmode; 3930 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3931 #undef CHANGED 3932 3933 out: 3934 VOP_UNLOCK(vp); 3935 return (error); 3936 } 3937 3938 /* 3939 * Set the access and modification times given a path name; this 3940 * version follows links. 3941 */ 3942 /* ARGSUSED */ 3943 int 3944 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3945 register_t *retval) 3946 { 3947 /* { 3948 syscallarg(const char *) path; 3949 syscallarg(const struct timeval *) tptr; 3950 } */ 3951 3952 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3953 SCARG(uap, tptr), UIO_USERSPACE); 3954 } 3955 3956 /* 3957 * Set the access and modification times given a file descriptor. 3958 */ 3959 /* ARGSUSED */ 3960 int 3961 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3962 register_t *retval) 3963 { 3964 /* { 3965 syscallarg(int) fd; 3966 syscallarg(const struct timeval *) tptr; 3967 } */ 3968 int error; 3969 file_t *fp; 3970 3971 /* fd_getvnode() will use the descriptor for us */ 3972 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3973 return (error); 3974 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3975 UIO_USERSPACE); 3976 fd_putfile(SCARG(uap, fd)); 3977 return (error); 3978 } 3979 3980 int 3981 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3982 register_t *retval) 3983 { 3984 /* { 3985 syscallarg(int) fd; 3986 syscallarg(const struct timespec *) tptr; 3987 } */ 3988 int error; 3989 file_t *fp; 3990 3991 /* fd_getvnode() will use the descriptor for us */ 3992 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3993 return (error); 3994 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3995 SCARG(uap, tptr), UIO_USERSPACE); 3996 fd_putfile(SCARG(uap, fd)); 3997 return (error); 3998 } 3999 4000 /* 4001 * Set the access and modification times given a path name; this 4002 * version does not follow links. 4003 */ 4004 int 4005 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 4006 register_t *retval) 4007 { 4008 /* { 4009 syscallarg(const char *) path; 4010 syscallarg(const struct timeval *) tptr; 4011 } */ 4012 4013 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 4014 SCARG(uap, tptr), UIO_USERSPACE); 4015 } 4016 4017 int 4018 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 4019 register_t *retval) 4020 { 4021 /* { 4022 syscallarg(int) fd; 4023 syscallarg(const char *) path; 4024 syscallarg(const struct timespec *) tptr; 4025 syscallarg(int) flag; 4026 } */ 4027 int follow; 4028 const struct timespec *tptr; 4029 int error; 4030 4031 tptr = SCARG(uap, tptr); 4032 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 4033 4034 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 4035 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 4036 4037 return error; 4038 } 4039 4040 /* 4041 * Common routine to set access and modification times given a vnode. 4042 */ 4043 int 4044 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 4045 const struct timespec *tptr, enum uio_seg seg) 4046 { 4047 4048 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 4049 } 4050 4051 int 4052 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 4053 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 4054 { 4055 struct vattr vattr; 4056 int error, dorele = 0; 4057 namei_simple_flags_t sflags; 4058 bool vanull, setbirthtime; 4059 struct timespec ts[2]; 4060 4061 KASSERT(l != NULL || fdat == AT_FDCWD); 4062 4063 /* 4064 * I have checked all callers and they pass either FOLLOW, 4065 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 4066 * is 0. More to the point, they don't pass anything else. 4067 * Let's keep it that way at least until the namei interfaces 4068 * are fully sanitized. 4069 */ 4070 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 4071 sflags = (flag == FOLLOW) ? 4072 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 4073 4074 if (tptr == NULL) { 4075 vanull = true; 4076 nanotime(&ts[0]); 4077 ts[1] = ts[0]; 4078 } else { 4079 vanull = false; 4080 if (seg != UIO_SYSSPACE) { 4081 error = copyin(tptr, ts, sizeof (ts)); 4082 if (error != 0) 4083 return error; 4084 } else { 4085 ts[0] = tptr[0]; 4086 ts[1] = tptr[1]; 4087 } 4088 } 4089 4090 if (ts[0].tv_nsec == UTIME_NOW) { 4091 nanotime(&ts[0]); 4092 if (ts[1].tv_nsec == UTIME_NOW) { 4093 vanull = true; 4094 ts[1] = ts[0]; 4095 } 4096 } else if (ts[1].tv_nsec == UTIME_NOW) 4097 nanotime(&ts[1]); 4098 4099 if (vp == NULL) { 4100 /* note: SEG describes TPTR, not PATH; PATH is always user */ 4101 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 4102 if (error != 0) 4103 return error; 4104 dorele = 1; 4105 } 4106 4107 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4108 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 4109 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 4110 vattr_null(&vattr); 4111 4112 if (ts[0].tv_nsec != UTIME_OMIT) 4113 vattr.va_atime = ts[0]; 4114 4115 if (ts[1].tv_nsec != UTIME_OMIT) { 4116 vattr.va_mtime = ts[1]; 4117 if (setbirthtime) 4118 vattr.va_birthtime = ts[1]; 4119 } 4120 4121 if (vanull) 4122 vattr.va_vaflags |= VA_UTIMES_NULL; 4123 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4124 VOP_UNLOCK(vp); 4125 4126 if (dorele != 0) 4127 vrele(vp); 4128 4129 return error; 4130 } 4131 4132 int 4133 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 4134 const struct timeval *tptr, enum uio_seg seg) 4135 { 4136 struct timespec ts[2]; 4137 struct timespec *tsptr = NULL; 4138 int error; 4139 4140 if (tptr != NULL) { 4141 struct timeval tv[2]; 4142 4143 if (seg != UIO_SYSSPACE) { 4144 error = copyin(tptr, tv, sizeof(tv)); 4145 if (error != 0) 4146 return error; 4147 tptr = tv; 4148 } 4149 4150 if ((tptr[0].tv_usec == UTIME_NOW) || 4151 (tptr[0].tv_usec == UTIME_OMIT)) 4152 ts[0].tv_nsec = tptr[0].tv_usec; 4153 else { 4154 if (tptr[0].tv_usec < 0 || tptr[0].tv_usec >= 1000000) 4155 return EINVAL; 4156 4157 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 4158 } 4159 4160 if ((tptr[1].tv_usec == UTIME_NOW) || 4161 (tptr[1].tv_usec == UTIME_OMIT)) 4162 ts[1].tv_nsec = tptr[1].tv_usec; 4163 else { 4164 if (tptr[1].tv_usec < 0 || tptr[1].tv_usec >= 1000000) 4165 return EINVAL; 4166 4167 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 4168 } 4169 4170 tsptr = &ts[0]; 4171 } 4172 4173 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 4174 } 4175 4176 /* 4177 * Truncate a file given its path name. 4178 */ 4179 /* ARGSUSED */ 4180 int 4181 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, 4182 register_t *retval) 4183 { 4184 /* { 4185 syscallarg(const char *) path; 4186 syscallarg(int) pad; 4187 syscallarg(off_t) length; 4188 } */ 4189 struct vnode *vp; 4190 struct vattr vattr; 4191 int error; 4192 4193 if (SCARG(uap, length) < 0) 4194 return EINVAL; 4195 4196 error = namei_simple_user(SCARG(uap, path), 4197 NSM_FOLLOW_TRYEMULROOT, &vp); 4198 if (error != 0) 4199 return (error); 4200 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4201 if (vp->v_type == VDIR) 4202 error = EISDIR; 4203 else if ((error = vn_writechk(vp)) == 0 && 4204 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 4205 vattr_null(&vattr); 4206 vattr.va_size = SCARG(uap, length); 4207 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4208 } 4209 vput(vp); 4210 return (error); 4211 } 4212 4213 /* 4214 * Truncate a file given a file descriptor. 4215 */ 4216 /* ARGSUSED */ 4217 int 4218 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, 4219 register_t *retval) 4220 { 4221 /* { 4222 syscallarg(int) fd; 4223 syscallarg(int) pad; 4224 syscallarg(off_t) length; 4225 } */ 4226 file_t *fp; 4227 int error, fd = SCARG(uap, fd); 4228 4229 fp = fd_getfile(fd); 4230 if (fp == NULL) 4231 return EBADF; 4232 if (fp->f_ops->fo_truncate == NULL) 4233 error = EOPNOTSUPP; 4234 else 4235 error = (*fp->f_ops->fo_truncate)(fp, SCARG(uap, length)); 4236 4237 fd_putfile(fd); 4238 return error; 4239 } 4240 4241 /* 4242 * Sync an open file. 4243 */ 4244 /* ARGSUSED */ 4245 int 4246 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 4247 { 4248 /* { 4249 syscallarg(int) fd; 4250 } */ 4251 struct vnode *vp; 4252 file_t *fp; 4253 int error; 4254 4255 /* fd_getvnode() will use the descriptor for us */ 4256 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4257 return (error); 4258 vp = fp->f_vnode; 4259 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4260 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4261 VOP_UNLOCK(vp); 4262 fd_putfile(SCARG(uap, fd)); 4263 return (error); 4264 } 4265 4266 /* 4267 * Sync a range of file data. API modeled after that found in AIX. 4268 * 4269 * FDATASYNC indicates that we need only save enough metadata to be able 4270 * to re-read the written data. 4271 */ 4272 /* ARGSUSED */ 4273 int 4274 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, 4275 register_t *retval) 4276 { 4277 /* { 4278 syscallarg(int) fd; 4279 syscallarg(int) flags; 4280 syscallarg(off_t) start; 4281 syscallarg(off_t) length; 4282 } */ 4283 struct vnode *vp; 4284 file_t *fp; 4285 int flags, nflags; 4286 off_t s, e, len; 4287 int error; 4288 4289 /* fd_getvnode() will use the descriptor for us */ 4290 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4291 return (error); 4292 4293 if ((fp->f_flag & FWRITE) == 0) { 4294 error = EBADF; 4295 goto out; 4296 } 4297 4298 flags = SCARG(uap, flags); 4299 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4300 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4301 error = EINVAL; 4302 goto out; 4303 } 4304 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4305 if (flags & FDATASYNC) 4306 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4307 else 4308 nflags = FSYNC_WAIT; 4309 if (flags & FDISKSYNC) 4310 nflags |= FSYNC_CACHE; 4311 4312 len = SCARG(uap, length); 4313 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4314 if (len) { 4315 s = SCARG(uap, start); 4316 if (s < 0 || len < 0 || len > OFF_T_MAX - s) { 4317 error = EINVAL; 4318 goto out; 4319 } 4320 e = s + len; 4321 KASSERT(s <= e); 4322 } else { 4323 e = 0; 4324 s = 0; 4325 } 4326 4327 vp = fp->f_vnode; 4328 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4329 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4330 VOP_UNLOCK(vp); 4331 out: 4332 fd_putfile(SCARG(uap, fd)); 4333 return (error); 4334 } 4335 4336 /* 4337 * Sync the data of an open file. 4338 */ 4339 /* ARGSUSED */ 4340 int 4341 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, 4342 register_t *retval) 4343 { 4344 /* { 4345 syscallarg(int) fd; 4346 } */ 4347 struct vnode *vp; 4348 file_t *fp; 4349 int error; 4350 4351 /* fd_getvnode() will use the descriptor for us */ 4352 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4353 return (error); 4354 vp = fp->f_vnode; 4355 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4356 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4357 VOP_UNLOCK(vp); 4358 fd_putfile(SCARG(uap, fd)); 4359 return (error); 4360 } 4361 4362 /* 4363 * Rename files, (standard) BSD semantics frontend. 4364 */ 4365 /* ARGSUSED */ 4366 int 4367 sys_rename(struct lwp *l, const struct sys_rename_args *uap, 4368 register_t *retval) 4369 { 4370 /* { 4371 syscallarg(const char *) from; 4372 syscallarg(const char *) to; 4373 } */ 4374 4375 return do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4376 SCARG(uap, to), UIO_USERSPACE, 0); 4377 } 4378 4379 int 4380 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4381 register_t *retval) 4382 { 4383 /* { 4384 syscallarg(int) fromfd; 4385 syscallarg(const char *) from; 4386 syscallarg(int) tofd; 4387 syscallarg(const char *) to; 4388 } */ 4389 4390 return do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4391 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0); 4392 } 4393 4394 /* 4395 * Rename files, POSIX semantics frontend. 4396 */ 4397 /* ARGSUSED */ 4398 int 4399 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, 4400 register_t *retval) 4401 { 4402 /* { 4403 syscallarg(const char *) from; 4404 syscallarg(const char *) to; 4405 } */ 4406 4407 return do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4408 SCARG(uap, to), UIO_USERSPACE, 1); 4409 } 4410 4411 /* 4412 * Rename files. Source and destination must either both be directories, 4413 * or both not be directories. If target is a directory, it must be empty. 4414 * If `from' and `to' refer to the same object, the value of the `retain' 4415 * argument is used to determine whether `from' will be 4416 * 4417 * (retain == 0) deleted unless `from' and `to' refer to the same 4418 * object in the file system's name space (BSD). 4419 * (retain == 1) always retained (POSIX). 4420 * 4421 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4422 */ 4423 int 4424 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4425 { 4426 4427 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, 4428 retain); 4429 } 4430 4431 static int 4432 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4433 const char *to, enum uio_seg seg, int retain) 4434 { 4435 struct pathbuf *fpb, *tpb; 4436 struct nameidata fnd, tnd; 4437 struct vnode *fdvp, *fvp; 4438 struct vnode *tdvp, *tvp; 4439 struct mount *mp, *tmp; 4440 int error; 4441 4442 KASSERT(l != NULL || fromfd == AT_FDCWD); 4443 KASSERT(l != NULL || tofd == AT_FDCWD); 4444 4445 error = pathbuf_maybe_copyin(from, seg, &fpb); 4446 if (error) 4447 goto out0; 4448 KASSERT(fpb != NULL); 4449 4450 error = pathbuf_maybe_copyin(to, seg, &tpb); 4451 if (error) 4452 goto out1; 4453 KASSERT(tpb != NULL); 4454 4455 /* 4456 * Lookup from. 4457 * 4458 * XXX LOCKPARENT is wrong because we don't actually want it 4459 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4460 * insane, so for the time being we need to leave it like this. 4461 */ 4462 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb); 4463 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4464 goto out2; 4465 4466 /* 4467 * Pull out the important results of the lookup, fdvp and fvp. 4468 * Of course, fvp is bogus because we're about to unlock fdvp. 4469 */ 4470 fdvp = fnd.ni_dvp; 4471 fvp = fnd.ni_vp; 4472 mp = fdvp->v_mount; 4473 KASSERT(fdvp != NULL); 4474 KASSERT(fvp != NULL); 4475 KASSERT(fdvp == fvp || VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE); 4476 /* 4477 * Bracket the operation with fstrans_start()/fstrans_done(). 4478 * 4479 * Inside the bracket this file system cannot be unmounted so 4480 * a vnode on this file system cannot change its v_mount. 4481 * A vnode on another file system may still change to dead mount. 4482 */ 4483 fstrans_start(mp); 4484 4485 /* 4486 * Make sure neither fdvp nor fvp is locked. 4487 */ 4488 if (fdvp != fvp) 4489 VOP_UNLOCK(fdvp); 4490 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4491 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4492 4493 /* 4494 * Reject renaming `.' and `..'. Can't do this until after 4495 * namei because we need namei's parsing to find the final 4496 * component name. (namei should just leave us with the final 4497 * component name and not look it up itself, but anyway...) 4498 * 4499 * This was here before because we used to relookup from 4500 * instead of to and relookup requires the caller to check 4501 * this, but now file systems may depend on this check, so we 4502 * must retain it until the file systems are all rototilled. 4503 */ 4504 if ((fnd.ni_cnd.cn_namelen == 1 && 4505 fnd.ni_cnd.cn_nameptr[0] == '.') || 4506 (fnd.ni_cnd.cn_namelen == 2 && 4507 fnd.ni_cnd.cn_nameptr[0] == '.' && 4508 fnd.ni_cnd.cn_nameptr[1] == '.')) { 4509 error = EINVAL; /* XXX EISDIR? */ 4510 goto abort0; 4511 } 4512 4513 /* 4514 * Lookup to. 4515 * 4516 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4517 * fvp here to decide whether to add CREATEDIR is a load of 4518 * bollocks because fvp might be the wrong node by now, since 4519 * fdvp is unlocked. 4520 * 4521 * XXX Why not pass CREATEDIR always? 4522 */ 4523 NDINIT(&tnd, RENAME, 4524 (LOCKPARENT | NOCACHE | TRYEMULROOT | 4525 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4526 tpb); 4527 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4528 goto abort0; 4529 4530 /* 4531 * Pull out the important results of the lookup, tdvp and tvp. 4532 * Of course, tvp is bogus because we're about to unlock tdvp. 4533 */ 4534 tdvp = tnd.ni_dvp; 4535 tvp = tnd.ni_vp; 4536 KASSERT(tdvp != NULL); 4537 KASSERT(tdvp == tvp || VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4538 4539 if (fvp->v_type == VDIR) 4540 tnd.ni_cnd.cn_flags |= WILLBEDIR; 4541 /* 4542 * Make sure neither tdvp nor tvp is locked. 4543 */ 4544 if (tdvp != tvp) 4545 VOP_UNLOCK(tdvp); 4546 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4547 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4548 4549 /* 4550 * Reject renaming onto `.' or `..'. relookup is unhappy with 4551 * these, which is why we must do this here. Once upon a time 4552 * we relooked up from instead of to, and consequently didn't 4553 * need this check, but now that we relookup to instead of 4554 * from, we need this; and we shall need it forever forward 4555 * until the VOP_RENAME protocol changes, because file systems 4556 * will no doubt begin to depend on this check. 4557 */ 4558 if (tnd.ni_cnd.cn_namelen == 1 && tnd.ni_cnd.cn_nameptr[0] == '.') { 4559 error = EISDIR; 4560 goto abort1; 4561 } 4562 if (tnd.ni_cnd.cn_namelen == 2 && 4563 tnd.ni_cnd.cn_nameptr[0] == '.' && 4564 tnd.ni_cnd.cn_nameptr[1] == '.') { 4565 error = EINVAL; 4566 goto abort1; 4567 } 4568 4569 /* 4570 * Make sure the mount points match. Although we don't hold 4571 * any vnode locks, the v_mount on fdvp file system are stable. 4572 * 4573 * Unmounting another file system at an inopportune moment may 4574 * cause tdvp to disappear and change its v_mount to dead. 4575 * 4576 * So in either case different v_mount means cross-device rename. 4577 */ 4578 KASSERT(mp != NULL); 4579 tmp = tdvp->v_mount; 4580 4581 if (mp != tmp) { 4582 error = EXDEV; 4583 goto abort1; 4584 } 4585 4586 /* 4587 * Take the vfs rename lock to avoid cross-directory screw cases. 4588 * Nothing is locked currently, so taking this lock is safe. 4589 */ 4590 error = VFS_RENAMELOCK_ENTER(mp); 4591 if (error) 4592 goto abort1; 4593 4594 /* 4595 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4596 * and nothing is locked except for the vfs rename lock. 4597 * 4598 * The next step is a little rain dance to conform to the 4599 * insane lock protocol, even though it does nothing to ward 4600 * off race conditions. 4601 * 4602 * We need tdvp and tvp to be locked. However, because we have 4603 * unlocked tdvp in order to hold no locks while we take the 4604 * vfs rename lock, tvp may be wrong here, and we can't safely 4605 * lock it even if the sensible file systems will just unlock 4606 * it straight away. Consequently, we must lock tdvp and then 4607 * relookup tvp to get it locked. 4608 * 4609 * Finally, because the VOP_RENAME protocol is brain-damaged 4610 * and various file systems insanely depend on the semantics of 4611 * this brain damage, the lookup of to must be the last lookup 4612 * before VOP_RENAME. 4613 */ 4614 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4615 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4616 if (error) 4617 goto abort2; 4618 4619 /* 4620 * Drop the old tvp and pick up the new one -- which might be 4621 * the same, but that doesn't matter to us. After this, tdvp 4622 * and tvp should both be locked. 4623 */ 4624 if (tvp != NULL) 4625 vrele(tvp); 4626 tvp = tnd.ni_vp; 4627 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4628 KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE); 4629 4630 /* 4631 * The old do_sys_rename had various consistency checks here 4632 * involving fvp and tvp. fvp is bogus already here, and tvp 4633 * will become bogus soon in any sensible file system, so the 4634 * only purpose in putting these checks here is to give lip 4635 * service to these screw cases and to acknowledge that they 4636 * exist, not actually to handle them, but here you go 4637 * anyway... 4638 */ 4639 4640 /* 4641 * Acknowledge that directories and non-directories aren't 4642 * supposed to mix. 4643 */ 4644 if (tvp != NULL) { 4645 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 4646 error = ENOTDIR; 4647 goto abort3; 4648 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 4649 error = EISDIR; 4650 goto abort3; 4651 } 4652 } 4653 4654 /* 4655 * Acknowledge some random screw case, among the dozens that 4656 * might arise. 4657 */ 4658 if (fvp == tdvp) { 4659 error = EINVAL; 4660 goto abort3; 4661 } 4662 4663 /* 4664 * Acknowledge that POSIX has a wacky screw case. 4665 * 4666 * XXX Eventually the retain flag needs to be passed on to 4667 * VOP_RENAME. 4668 */ 4669 if (fvp == tvp) { 4670 if (retain) { 4671 error = 0; 4672 goto abort3; 4673 } else if (fdvp == tdvp && 4674 fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen && 4675 0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4676 fnd.ni_cnd.cn_namelen)) { 4677 error = 0; 4678 goto abort3; 4679 } 4680 } 4681 4682 /* 4683 * Make sure veriexec can screw us up. (But a race can screw 4684 * up veriexec, of course -- remember, fvp and (soon) tvp are 4685 * bogus.) 4686 */ 4687 #if NVERIEXEC > 0 4688 { 4689 char *f1, *f2; 4690 size_t f1_len; 4691 size_t f2_len; 4692 4693 f1_len = fnd.ni_cnd.cn_namelen + 1; 4694 f1 = kmem_alloc(f1_len, KM_SLEEP); 4695 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4696 4697 f2_len = tnd.ni_cnd.cn_namelen + 1; 4698 f2 = kmem_alloc(f2_len, KM_SLEEP); 4699 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4700 4701 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4702 4703 kmem_free(f1, f1_len); 4704 kmem_free(f2, f2_len); 4705 4706 if (error) 4707 goto abort3; 4708 } 4709 #endif /* NVERIEXEC > 0 */ 4710 4711 /* 4712 * All ready. Incant the rename vop. 4713 */ 4714 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4715 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4716 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4717 KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE); 4718 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4719 4720 /* 4721 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4722 * tdvp and tvp. But we can't assert any of that. 4723 */ 4724 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4725 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4726 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4727 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4728 4729 /* 4730 * So all we have left to do is to drop the rename lock and 4731 * destroy the pathbufs. 4732 */ 4733 VFS_RENAMELOCK_EXIT(mp); 4734 fstrans_done(mp); 4735 goto out2; 4736 4737 abort3: if (tvp != NULL && tvp != tdvp) 4738 VOP_UNLOCK(tvp); 4739 abort2: VOP_UNLOCK(tdvp); 4740 VFS_RENAMELOCK_EXIT(mp); 4741 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4742 vrele(tdvp); 4743 if (tvp != NULL) 4744 vrele(tvp); 4745 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4746 vrele(fdvp); 4747 vrele(fvp); 4748 fstrans_done(mp); 4749 out2: pathbuf_destroy(tpb); 4750 out1: pathbuf_destroy(fpb); 4751 out0: return error; 4752 } 4753 4754 /* 4755 * Make a directory file. 4756 */ 4757 /* ARGSUSED */ 4758 int 4759 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4760 { 4761 /* { 4762 syscallarg(const char *) path; 4763 syscallarg(int) mode; 4764 } */ 4765 4766 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4767 SCARG(uap, mode), UIO_USERSPACE); 4768 } 4769 4770 int 4771 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4772 register_t *retval) 4773 { 4774 /* { 4775 syscallarg(int) fd; 4776 syscallarg(const char *) path; 4777 syscallarg(int) mode; 4778 } */ 4779 4780 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4781 SCARG(uap, mode), UIO_USERSPACE); 4782 } 4783 4784 int 4785 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4786 { 4787 4788 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg); 4789 } 4790 4791 static int 4792 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4793 enum uio_seg seg) 4794 { 4795 struct proc *p = curlwp->l_proc; 4796 struct vnode *vp; 4797 struct vattr vattr; 4798 int error; 4799 struct pathbuf *pb; 4800 struct nameidata nd; 4801 4802 KASSERT(l != NULL || fdat == AT_FDCWD); 4803 4804 /* XXX bollocks, should pass in a pathbuf */ 4805 error = pathbuf_maybe_copyin(path, seg, &pb); 4806 if (error) { 4807 return error; 4808 } 4809 4810 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4811 4812 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4813 pathbuf_destroy(pb); 4814 return (error); 4815 } 4816 vp = nd.ni_vp; 4817 if (vp != NULL) { 4818 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4819 if (nd.ni_dvp == vp) 4820 vrele(nd.ni_dvp); 4821 else 4822 vput(nd.ni_dvp); 4823 vrele(vp); 4824 pathbuf_destroy(pb); 4825 return (EEXIST); 4826 } 4827 vattr_null(&vattr); 4828 vattr.va_type = VDIR; 4829 /* We will read cwdi->cwdi_cmask unlocked. */ 4830 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4831 nd.ni_cnd.cn_flags |= WILLBEDIR; 4832 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4833 if (!error) 4834 vrele(nd.ni_vp); 4835 vput(nd.ni_dvp); 4836 pathbuf_destroy(pb); 4837 return (error); 4838 } 4839 4840 /* 4841 * Remove a directory file. 4842 */ 4843 /* ARGSUSED */ 4844 int 4845 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4846 { 4847 /* { 4848 syscallarg(char *) path; 4849 } */ 4850 4851 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR, 4852 UIO_USERSPACE); 4853 } 4854 4855 /* 4856 * Read a block of directory entries in a file system independent format. 4857 */ 4858 int 4859 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, 4860 register_t *retval) 4861 { 4862 /* { 4863 syscallarg(int) fd; 4864 syscallarg(char *) buf; 4865 syscallarg(size_t) count; 4866 } */ 4867 file_t *fp; 4868 int error, done; 4869 4870 /* fd_getvnode() will use the descriptor for us */ 4871 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4872 return (error); 4873 if ((fp->f_flag & FREAD) == 0) { 4874 error = EBADF; 4875 goto out; 4876 } 4877 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4878 SCARG(uap, count), &done, l, 0, 0); 4879 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4880 *retval = done; 4881 out: 4882 fd_putfile(SCARG(uap, fd)); 4883 return (error); 4884 } 4885 4886 /* 4887 * Set the mode mask for creation of filesystem nodes. 4888 */ 4889 int 4890 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4891 { 4892 /* { 4893 syscallarg(mode_t) newmask; 4894 } */ 4895 4896 /* 4897 * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of 4898 * serialization with those reads is required. It's important to 4899 * return a coherent answer for the caller of umask() though, and 4900 * the atomic operation accomplishes that. 4901 */ 4902 *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask, 4903 SCARG(uap, newmask) & ALLPERMS); 4904 4905 return (0); 4906 } 4907 4908 int 4909 dorevoke(struct vnode *vp, kauth_cred_t cred) 4910 { 4911 struct vattr vattr; 4912 int error, fs_decision; 4913 4914 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4915 error = VOP_GETATTR(vp, &vattr, cred); 4916 VOP_UNLOCK(vp); 4917 if (error != 0) 4918 return error; 4919 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4920 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4921 fs_decision); 4922 if (!error) 4923 VOP_REVOKE(vp, REVOKEALL); 4924 return (error); 4925 } 4926 4927 /* 4928 * Void all references to file by ripping underlying filesystem 4929 * away from vnode. 4930 */ 4931 /* ARGSUSED */ 4932 int 4933 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, 4934 register_t *retval) 4935 { 4936 /* { 4937 syscallarg(const char *) path; 4938 } */ 4939 struct vnode *vp; 4940 int error; 4941 4942 error = namei_simple_user(SCARG(uap, path), NSM_FOLLOW_TRYEMULROOT, 4943 &vp); 4944 if (error != 0) 4945 return (error); 4946 error = dorevoke(vp, l->l_cred); 4947 vrele(vp); 4948 return (error); 4949 } 4950 4951 /* 4952 * Allocate backing store for a file, filling a hole without having to 4953 * explicitly write anything out. 4954 */ 4955 /* ARGSUSED */ 4956 int 4957 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4958 register_t *retval) 4959 { 4960 /* { 4961 syscallarg(int) fd; 4962 syscallarg(off_t) pos; 4963 syscallarg(off_t) len; 4964 } */ 4965 int fd; 4966 off_t pos, len; 4967 struct file *fp; 4968 struct vnode *vp; 4969 int error; 4970 4971 fd = SCARG(uap, fd); 4972 pos = SCARG(uap, pos); 4973 len = SCARG(uap, len); 4974 4975 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4976 *retval = EINVAL; 4977 return 0; 4978 } 4979 4980 error = fd_getvnode(fd, &fp); 4981 if (error) { 4982 *retval = error; 4983 return 0; 4984 } 4985 if ((fp->f_flag & FWRITE) == 0) { 4986 error = EBADF; 4987 goto fail; 4988 } 4989 vp = fp->f_vnode; 4990 4991 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4992 if (vp->v_type == VDIR) { 4993 error = EISDIR; 4994 } else { 4995 error = VOP_FALLOCATE(vp, pos, len); 4996 } 4997 VOP_UNLOCK(vp); 4998 4999 fail: 5000 fd_putfile(fd); 5001 *retval = error; 5002 return 0; 5003 } 5004 5005 /* 5006 * Deallocate backing store for a file, creating a hole. Also used for 5007 * invoking TRIM on disks. 5008 */ 5009 /* ARGSUSED */ 5010 int 5011 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 5012 register_t *retval) 5013 { 5014 /* { 5015 syscallarg(int) fd; 5016 syscallarg(off_t) pos; 5017 syscallarg(off_t) len; 5018 } */ 5019 int fd; 5020 off_t pos, len; 5021 struct file *fp; 5022 struct vnode *vp; 5023 int error; 5024 5025 fd = SCARG(uap, fd); 5026 pos = SCARG(uap, pos); 5027 len = SCARG(uap, len); 5028 5029 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 5030 return EINVAL; 5031 } 5032 5033 error = fd_getvnode(fd, &fp); 5034 if (error) { 5035 return error; 5036 } 5037 if ((fp->f_flag & FWRITE) == 0) { 5038 error = EBADF; 5039 goto fail; 5040 } 5041 vp = fp->f_vnode; 5042 5043 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 5044 if (vp->v_type == VDIR) { 5045 error = EISDIR; 5046 } else { 5047 error = VOP_FDISCARD(vp, pos, len); 5048 } 5049 VOP_UNLOCK(vp); 5050 5051 fail: 5052 fd_putfile(fd); 5053 return error; 5054 } 5055