1 /* $NetBSD: vfs_syscalls.c,v 1.572 2026/01/03 23:08:53 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009, 2019, 2020, 2023 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.572 2026/01/03 23:08:53 riastradh Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/types.h> 82 83 #include <sys/atomic.h> 84 #include <sys/buf.h> 85 #include <sys/compat_stub.h> 86 #include <sys/dirent.h> 87 #include <sys/event.h> 88 #include <sys/extattr.h> 89 #include <sys/fcntl.h> 90 #include <sys/file.h> 91 #ifdef FILEASSOC 92 #include <sys/fileassoc.h> 93 #endif /* FILEASSOC */ 94 #include <sys/filedesc.h> 95 #include <sys/fstrans.h> 96 #include <sys/kauth.h> 97 #include <sys/kernel.h> 98 #include <sys/kmem.h> 99 #include <sys/ktrace.h> 100 #include <sys/module.h> 101 #include <sys/mount.h> 102 #include <sys/namei.h> 103 #include <sys/proc.h> 104 #include <sys/quota.h> 105 #include <sys/quotactl.h> 106 #include <sys/sdt.h> 107 #include <sys/stat.h> 108 #include <sys/syscallargs.h> 109 #include <sys/sysctl.h> 110 #include <sys/systm.h> 111 #include <sys/uio.h> 112 #include <sys/verified_exec.h> 113 #include <sys/vfs_syscalls.h> 114 #include <sys/vnode.h> 115 116 #include <miscfs/genfs/genfs.h> 117 #include <miscfs/specfs/specdev.h> 118 119 #include <nfs/nfs.h> 120 #include <nfs/nfs_var.h> 121 #include <nfs/nfsproto.h> 122 #include <nfs/rpcv2.h> 123 124 /* XXX this shouldn't be here */ 125 #ifndef OFF_T_MAX 126 #define OFF_T_MAX __type_max(off_t) 127 #endif 128 129 static int change_flags(struct vnode *, u_long, struct lwp *); 130 static int change_mode(struct vnode *, int, struct lwp *); 131 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 132 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 133 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 134 enum uio_seg); 135 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 136 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 137 enum uio_seg); 138 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 139 enum uio_seg, int); 140 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 141 size_t, register_t *); 142 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 143 144 static int fd_nameiat(struct lwp *, int, struct nameidata *); 145 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 146 namei_simple_flags_t, struct vnode **); 147 148 /* 149 * This table is used to maintain compatibility with 4.3BSD 150 * and NetBSD 0.9 mount syscalls - and possibly other systems. 151 * Note, the order is important! 152 * 153 * Do not modify this table. It should only contain filesystems 154 * supported by NetBSD 0.9 and 4.3BSD. 155 */ 156 const char * const mountcompatnames[] = { 157 NULL, /* 0 = MOUNT_NONE */ 158 MOUNT_FFS, /* 1 = MOUNT_UFS */ 159 MOUNT_NFS, /* 2 */ 160 MOUNT_MFS, /* 3 */ 161 MOUNT_MSDOS, /* 4 */ 162 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 163 MOUNT_FDESC, /* 6 */ 164 MOUNT_KERNFS, /* 7 */ 165 NULL, /* 8 = MOUNT_DEVFS */ 166 MOUNT_AFS, /* 9 */ 167 }; 168 169 const u_int nmountcompatnames = __arraycount(mountcompatnames); 170 171 /* 172 * Filter event method for EVFILT_FS. 173 */ 174 static struct klist fs_klist; 175 static kmutex_t fs_klist_lock; 176 177 CTASSERT((NOTE_SUBMIT & VQ_MOUNT) == 0); 178 CTASSERT((NOTE_SUBMIT & VQ_UNMOUNT) == 0); 179 180 void 181 vfs_evfilt_fs_init(void) 182 { 183 184 klist_init(&fs_klist); 185 mutex_init(&fs_klist_lock, MUTEX_DEFAULT, IPL_NONE); 186 } 187 188 static int 189 filt_fsattach(struct knote *kn) 190 { 191 192 mutex_enter(&fs_klist_lock); 193 kn->kn_flags |= EV_CLEAR; 194 klist_insert(&fs_klist, kn); 195 mutex_exit(&fs_klist_lock); 196 197 return 0; 198 } 199 200 static void 201 filt_fsdetach(struct knote *kn) 202 { 203 204 mutex_enter(&fs_klist_lock); 205 klist_remove(&fs_klist, kn); 206 mutex_exit(&fs_klist_lock); 207 } 208 209 static int 210 filt_fs(struct knote *kn, long hint) 211 { 212 int rv; 213 214 if (hint & NOTE_SUBMIT) { 215 KASSERT(mutex_owned(&fs_klist_lock)); 216 kn->kn_fflags |= hint & ~NOTE_SUBMIT; 217 } else { 218 mutex_enter(&fs_klist_lock); 219 } 220 221 rv = (kn->kn_fflags != 0); 222 223 if ((hint & NOTE_SUBMIT) == 0) { 224 mutex_exit(&fs_klist_lock); 225 } 226 227 return rv; 228 } 229 230 /* referenced in kern_event.c */ 231 const struct filterops fs_filtops = { 232 .f_flags = FILTEROP_MPSAFE, 233 .f_attach = filt_fsattach, 234 .f_detach = filt_fsdetach, 235 .f_event = filt_fs, 236 }; 237 238 static int 239 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 240 { 241 file_t *dfp; 242 int error; 243 const char *path = pathbuf_stringcopy_get(ndp->ni_pathbuf); 244 245 if (fdat != AT_FDCWD && path[0] != '/') { 246 if ((error = fd_getvnode(fdat, &dfp)) != 0) 247 goto out; 248 249 NDAT(ndp, dfp->f_vnode); 250 } 251 252 error = namei(ndp); 253 254 if (fdat != AT_FDCWD && path[0] != '/') 255 fd_putfile(fdat); 256 out: 257 pathbuf_stringcopy_put(ndp->ni_pathbuf, path); 258 return error; 259 } 260 261 static int 262 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 263 namei_simple_flags_t sflags, struct vnode **vp_ret) 264 { 265 file_t *dfp; 266 struct vnode *dvp; 267 int error; 268 struct pathbuf *pb; 269 const char *p; 270 271 error = pathbuf_copyin(path, &pb); 272 if (error) { 273 return error; 274 } 275 p = pathbuf_stringcopy_get(pb); 276 277 if (fdat != AT_FDCWD && p[0] != '/') { 278 if ((error = fd_getvnode(fdat, &dfp)) != 0) 279 goto out; 280 281 dvp = dfp->f_vnode; 282 } else { 283 dvp = NULL; 284 } 285 286 error = nameiat_simple(dvp, pb, sflags, vp_ret); 287 288 if (fdat != AT_FDCWD && p[0] != '/') 289 fd_putfile(fdat); 290 291 out: 292 pathbuf_stringcopy_put(pb, p); 293 pathbuf_destroy(pb); 294 295 return error; 296 } 297 298 static int 299 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 300 { 301 int error; 302 303 fp->f_flag = flags & FMASK; 304 fp->f_type = DTYPE_VNODE; 305 fp->f_ops = &vnops; 306 fp->f_vnode = vp; 307 308 if (flags & (O_EXLOCK | O_SHLOCK)) { 309 struct flock lf; 310 int type; 311 312 lf.l_whence = SEEK_SET; 313 lf.l_start = 0; 314 lf.l_len = 0; 315 if (flags & O_EXLOCK) 316 lf.l_type = F_WRLCK; 317 else 318 lf.l_type = F_RDLCK; 319 type = F_FLOCK; 320 if ((flags & FNONBLOCK) == 0) 321 type |= F_WAIT; 322 VOP_UNLOCK(vp); 323 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 324 if (error) { 325 (void) vn_close(vp, fp->f_flag, fp->f_cred); 326 fd_abort(l->l_proc, fp, indx); 327 return error; 328 } 329 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 330 atomic_or_uint(&fp->f_flag, FHASLOCK); 331 } 332 if (flags & O_CLOEXEC) 333 fd_set_exclose(l, indx, true); 334 if (flags & O_CLOFORK) 335 fd_set_foclose(l, indx, true); 336 return 0; 337 } 338 339 static int 340 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 341 void *data, size_t *data_len) 342 { 343 struct mount *mp; 344 int error = 0, saved_flags; 345 346 mp = vp->v_mount; 347 saved_flags = mp->mnt_flag; 348 349 /* We can operate only on VV_ROOT nodes. */ 350 if ((vp->v_vflag & VV_ROOT) == 0) { 351 error = SET_ERROR(EINVAL); 352 goto out; 353 } 354 355 /* 356 * We only allow the filesystem to be reloaded if it 357 * is currently mounted read-only. Additionally, we 358 * prevent read-write to read-only downgrades. 359 */ 360 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 361 (mp->mnt_flag & MNT_RDONLY) == 0 && 362 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 363 error = SET_ERROR(EOPNOTSUPP); /* Needs translation */ 364 goto out; 365 } 366 367 /* 368 * Enabling MNT_UNION requires a covered mountpoint and 369 * must not happen on the root mount. 370 */ 371 if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) { 372 error = SET_ERROR(EOPNOTSUPP); 373 goto out; 374 } 375 376 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 377 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 378 if (error) 379 goto out; 380 381 error = vfs_suspend(mp, 0); 382 if (error) 383 goto out; 384 385 mutex_enter(mp->mnt_updating); 386 387 mp->mnt_flag &= ~MNT_OP_FLAGS; 388 mp->mnt_flag |= flags & MNT_OP_FLAGS; 389 390 /* 391 * Set the mount level flags. 392 */ 393 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) { 394 if ((flags & MNT_RDONLY)) 395 mp->mnt_iflag |= IMNT_WANTRDONLY; 396 else 397 mp->mnt_iflag |= IMNT_WANTRDWR; 398 } 399 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 400 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 401 if ((mp->mnt_iflag & IMNT_WANTRDONLY)) 402 mp->mnt_flag &= ~MNT_RDONLY; 403 404 error = VFS_MOUNT(mp, path, data, data_len); 405 406 if (error && data != NULL) { 407 int error2; 408 409 /* 410 * Update failed; let's try and see if it was an 411 * export request. For compat with 3.0 and earlier. 412 */ 413 error2 = vfs_hooks_reexport(mp, path, data); 414 415 /* 416 * Only update error code if the export request was 417 * understood but some problem occurred while 418 * processing it. 419 */ 420 if (error2 != EJUSTRETURN) 421 error = error2; 422 } 423 424 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) 425 mp->mnt_flag |= MNT_RDONLY; 426 if (error) 427 mp->mnt_flag = saved_flags; 428 mp->mnt_flag &= ~MNT_OP_FLAGS; 429 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR); 430 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 431 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0) 432 vfs_syncer_add_to_worklist(mp); 433 } else { 434 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0) 435 vfs_syncer_remove_from_worklist(mp); 436 } 437 mutex_exit(mp->mnt_updating); 438 vfs_resume(mp); 439 440 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 441 (flags & MNT_EXTATTR)) { 442 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 443 NULL, 0, NULL) != 0) { 444 printf("%s: failed to start extattr, error = %d", 445 mp->mnt_stat.f_mntonname, error); 446 mp->mnt_flag &= ~MNT_EXTATTR; 447 } 448 } 449 450 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 451 !(flags & MNT_EXTATTR)) { 452 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 453 NULL, 0, NULL) != 0) { 454 printf("%s: failed to stop extattr, error = %d", 455 mp->mnt_stat.f_mntonname, error); 456 mp->mnt_flag |= MNT_RDONLY; 457 } 458 } 459 out: 460 return (error); 461 } 462 463 static int 464 mount_get_vfsops(const char *fstype, enum uio_seg type_seg, 465 struct vfsops **vfsops) 466 { 467 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 468 int error; 469 470 if (type_seg == UIO_USERSPACE) { 471 /* Copy file-system type from userspace. */ 472 error = copyinstr(fstype, fstypename, sizeof(fstypename), 473 NULL); 474 } else { 475 error = copystr(fstype, fstypename, sizeof(fstypename), NULL); 476 KASSERT(error == 0); 477 } 478 479 if (error) { 480 /* 481 * Historically, filesystem types were identified by numbers. 482 * If we get an integer for the filesystem type instead of a 483 * string, we check to see if it matches one of the historic 484 * filesystem types. 485 */ 486 u_long fsindex = (u_long)fstype; 487 if (fsindex >= nmountcompatnames || 488 mountcompatnames[fsindex] == NULL) 489 return SET_ERROR(ENODEV); 490 strlcpy(fstypename, mountcompatnames[fsindex], 491 sizeof(fstypename)); 492 } 493 494 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 495 if (strcmp(fstypename, "ufs") == 0) 496 fstypename[0] = 'f'; 497 498 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 499 return 0; 500 501 /* If we can autoload a vfs module, try again */ 502 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 503 504 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 505 return 0; 506 507 return SET_ERROR(ENODEV); 508 } 509 510 static int 511 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 512 void *data, size_t *data_len) 513 { 514 struct mount *mp; 515 int error; 516 517 /* If MNT_GETARGS is specified, it should be the only flag. */ 518 if (flags & ~MNT_GETARGS) 519 return SET_ERROR(EINVAL); 520 521 mp = vp->v_mount; 522 523 /* XXX: probably some notion of "can see" here if we want isolation. */ 524 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 525 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 526 if (error) 527 return error; 528 529 if ((vp->v_vflag & VV_ROOT) == 0) 530 return SET_ERROR(EINVAL); 531 532 if (vfs_busy(mp)) 533 return SET_ERROR(EPERM); 534 535 mutex_enter(mp->mnt_updating); 536 mp->mnt_flag &= ~MNT_OP_FLAGS; 537 mp->mnt_flag |= MNT_GETARGS; 538 error = VFS_MOUNT(mp, path, data, data_len); 539 mp->mnt_flag &= ~MNT_OP_FLAGS; 540 mutex_exit(mp->mnt_updating); 541 542 vfs_unbusy(mp); 543 return (error); 544 } 545 546 int 547 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, 548 register_t *retval) 549 { 550 /* { 551 syscallarg(const char *) type; 552 syscallarg(const char *) path; 553 syscallarg(int) flags; 554 syscallarg(void *) data; 555 syscallarg(size_t) data_len; 556 } */ 557 558 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, 559 SCARG(uap, path), SCARG(uap, flags), 560 SCARG(uap, data), UIO_USERSPACE, SCARG(uap, data_len), 561 retval); 562 } 563 564 int 565 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg, 566 const char *path, int flags, 567 void *data, enum uio_seg data_seg, size_t data_len, 568 register_t *retval) 569 { 570 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */ 571 struct vnode *vp; 572 void *data_buf = data; 573 bool vfsopsrele = false; 574 size_t alloc_sz = 0; 575 int error; 576 577 /* 578 * Get vnode to be covered 579 */ 580 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 581 if (error != 0) { 582 vp = NULL; 583 goto done; 584 } 585 586 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 587 vfsops = vp->v_mount->mnt_op; 588 } else { 589 /* 'type' is userspace */ 590 error = mount_get_vfsops(type, type_seg, &vfsops); 591 if (error != 0) 592 goto done; 593 vfsopsrele = true; 594 } 595 596 /* 597 * We allow data to be NULL, even for userspace. Some fs's don't need 598 * it. The others will handle NULL. 599 */ 600 if (data != NULL && data_seg == UIO_USERSPACE) { 601 if (data_len == 0) { 602 /* No length supplied, use default for filesystem */ 603 data_len = vfsops->vfs_min_mount_data; 604 605 /* 606 * Hopefully a longer buffer won't make copyin() fail. 607 * For compatibility with 3.0 and earlier. 608 */ 609 if (flags & MNT_UPDATE 610 && data_len < sizeof (struct mnt_export_args30)) 611 data_len = sizeof (struct mnt_export_args30); 612 } 613 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 614 error = SET_ERROR(EINVAL); 615 goto done; 616 } 617 alloc_sz = data_len; 618 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 619 620 /* NFS needs the buffer even for mnt_getargs .... */ 621 error = copyin(data, data_buf, data_len); 622 if (error != 0) 623 goto done; 624 } 625 626 if (flags & MNT_GETARGS) { 627 if (data_len == 0) { 628 error = SET_ERROR(EINVAL); 629 goto done; 630 } 631 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 632 if (error != 0) 633 goto done; 634 if (data_seg == UIO_USERSPACE) 635 error = copyout(data_buf, data, data_len); 636 *retval = data_len; 637 } else if (flags & MNT_UPDATE) { 638 error = mount_update(l, vp, path, flags, data_buf, &data_len); 639 } else { 640 /* Locking is handled internally in mount_domount(). */ 641 KASSERT(vfsopsrele == true); 642 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 643 &data_len); 644 vfsopsrele = false; 645 } 646 if (!error) { 647 mutex_enter(&fs_klist_lock); 648 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_MOUNT); 649 mutex_exit(&fs_klist_lock); 650 } 651 652 done: 653 if (vfsopsrele) 654 vfs_delref(vfsops); 655 if (vp != NULL) { 656 vrele(vp); 657 } 658 if (data_buf != data) 659 kmem_free(data_buf, alloc_sz); 660 return (error); 661 } 662 663 /* 664 * Unmount a file system. 665 * 666 * Note: unmount takes a path to the vnode mounted on as argument, 667 * not special file (as before). 668 */ 669 /* ARGSUSED */ 670 int 671 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, 672 register_t *retval) 673 { 674 /* { 675 syscallarg(const char *) path; 676 syscallarg(int) flags; 677 } */ 678 struct vnode *vp; 679 struct mount *mp; 680 int error; 681 struct pathbuf *pb; 682 struct nameidata nd; 683 684 error = pathbuf_copyin(SCARG(uap, path), &pb); 685 if (error) { 686 return error; 687 } 688 689 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 690 if ((error = namei(&nd)) != 0) { 691 pathbuf_destroy(pb); 692 return error; 693 } 694 vp = nd.ni_vp; 695 pathbuf_destroy(pb); 696 697 mp = vp->v_mount; 698 vfs_ref(mp); 699 VOP_UNLOCK(vp); 700 701 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 702 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 703 if (error) { 704 vrele(vp); 705 vfs_rele(mp); 706 return (error); 707 } 708 709 /* 710 * Don't allow unmounting the root file system. 711 */ 712 if (mp->mnt_flag & MNT_ROOTFS) { 713 vrele(vp); 714 vfs_rele(mp); 715 return SET_ERROR(EINVAL); 716 } 717 718 /* 719 * Must be the root of the filesystem 720 */ 721 if ((vp->v_vflag & VV_ROOT) == 0) { 722 vrele(vp); 723 vfs_rele(mp); 724 return SET_ERROR(EINVAL); 725 } 726 727 vrele(vp); 728 error = dounmount(mp, SCARG(uap, flags), l); 729 vfs_rele(mp); 730 if (!error) { 731 mutex_enter(&fs_klist_lock); 732 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_UNMOUNT); 733 mutex_exit(&fs_klist_lock); 734 } 735 return error; 736 } 737 738 /* 739 * Sync each mounted filesystem. 740 */ 741 #ifdef DEBUG 742 int syncprt = 0; 743 struct ctldebug debug0 = { "syncprt", &syncprt }; 744 #endif 745 746 void 747 do_sys_sync(struct lwp *l) 748 { 749 mount_iterator_t *iter; 750 struct mount *mp; 751 int asyncflag; 752 753 mountlist_iterator_init(&iter); 754 while ((mp = mountlist_iterator_next(iter)) != NULL) { 755 mutex_enter(mp->mnt_updating); 756 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 757 /* 758 * Temporarily clear the MNT_ASYNC flags so that 759 * bwrite() doesnt convert the sync writes to 760 * delayed writes. 761 */ 762 asyncflag = mp->mnt_flag & MNT_ASYNC; 763 mp->mnt_flag &= ~MNT_ASYNC; 764 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 765 mp->mnt_flag |= asyncflag; 766 } 767 mutex_exit(mp->mnt_updating); 768 } 769 mountlist_iterator_destroy(iter); 770 #ifdef DEBUG 771 if (syncprt) 772 vfs_bufstats(); 773 #endif /* DEBUG */ 774 } 775 776 static bool 777 sync_vnode_filter(void *cookie, vnode_t *vp) 778 { 779 780 if (vp->v_numoutput > 0) { 781 ++*(int *)cookie; 782 } 783 return false; 784 } 785 786 int 787 vfs_syncwait(void) 788 { 789 int nbusy, nbusy_prev, iter; 790 struct vnode_iterator *vniter; 791 mount_iterator_t *mpiter; 792 struct mount *mp; 793 794 for (nbusy_prev = 0, iter = 0; iter < 20;) { 795 nbusy = 0; 796 mountlist_iterator_init(&mpiter); 797 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 798 vnode_t *vp __diagused; 799 vfs_vnode_iterator_init(mp, &vniter); 800 vp = vfs_vnode_iterator_next(vniter, 801 sync_vnode_filter, &nbusy); 802 KASSERT(vp == NULL); 803 vfs_vnode_iterator_destroy(vniter); 804 } 805 mountlist_iterator_destroy(mpiter); 806 807 if (nbusy == 0) 808 break; 809 if (nbusy_prev == 0) 810 nbusy_prev = nbusy; 811 printf("%d ", nbusy); 812 kpause("syncwait", false, MAX(1, hz / 25 * iter), NULL); 813 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 814 iter++; 815 else 816 nbusy_prev = nbusy; 817 } 818 819 if (nbusy) { 820 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 821 printf("giving up\nPrinting vnodes for busy buffers\n"); 822 mountlist_iterator_init(&mpiter); 823 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 824 vnode_t *vp; 825 vfs_vnode_iterator_init(mp, &vniter); 826 vp = vfs_vnode_iterator_next(vniter, 827 NULL, NULL); 828 mutex_enter(vp->v_interlock); 829 if (vp->v_numoutput > 0) 830 vprint(NULL, vp); 831 mutex_exit(vp->v_interlock); 832 vrele(vp); 833 vfs_vnode_iterator_destroy(vniter); 834 } 835 mountlist_iterator_destroy(mpiter); 836 #endif 837 } 838 839 return nbusy; 840 } 841 842 /* ARGSUSED */ 843 int 844 sys_sync(struct lwp *l, const void *v, register_t *retval) 845 { 846 847 do_sys_sync(l); 848 return (0); 849 } 850 851 /* 852 * Access or change filesystem quotas. 853 * 854 * (this is really 14 different calls bundled into one) 855 */ 856 857 static int 858 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 859 { 860 struct quotastat info_k; 861 int error; 862 863 /* ensure any padding bytes are cleared */ 864 memset(&info_k, 0, sizeof(info_k)); 865 866 error = vfs_quotactl_stat(mp, &info_k); 867 if (error) { 868 return error; 869 } 870 871 return copyout(&info_k, info_u, sizeof(info_k)); 872 } 873 874 static int 875 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 876 struct quotaidtypestat *info_u) 877 { 878 struct quotaidtypestat info_k; 879 int error; 880 881 /* ensure any padding bytes are cleared */ 882 memset(&info_k, 0, sizeof(info_k)); 883 884 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 885 if (error) { 886 return error; 887 } 888 889 return copyout(&info_k, info_u, sizeof(info_k)); 890 } 891 892 static int 893 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 894 struct quotaobjtypestat *info_u) 895 { 896 struct quotaobjtypestat info_k; 897 int error; 898 899 /* ensure any padding bytes are cleared */ 900 memset(&info_k, 0, sizeof(info_k)); 901 902 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 903 if (error) { 904 return error; 905 } 906 907 return copyout(&info_k, info_u, sizeof(info_k)); 908 } 909 910 static int 911 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 912 struct quotaval *val_u) 913 { 914 struct quotakey key_k; 915 struct quotaval val_k; 916 int error; 917 918 /* ensure any padding bytes are cleared */ 919 memset(&val_k, 0, sizeof(val_k)); 920 921 error = copyin(key_u, &key_k, sizeof(key_k)); 922 if (error) { 923 return error; 924 } 925 926 error = vfs_quotactl_get(mp, &key_k, &val_k); 927 if (error) { 928 return error; 929 } 930 931 return copyout(&val_k, val_u, sizeof(val_k)); 932 } 933 934 static int 935 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 936 const struct quotaval *val_u) 937 { 938 struct quotakey key_k; 939 struct quotaval val_k; 940 int error; 941 942 error = copyin(key_u, &key_k, sizeof(key_k)); 943 if (error) { 944 return error; 945 } 946 947 error = copyin(val_u, &val_k, sizeof(val_k)); 948 if (error) { 949 return error; 950 } 951 952 return vfs_quotactl_put(mp, &key_k, &val_k); 953 } 954 955 static int 956 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 957 { 958 struct quotakey key_k; 959 int error; 960 961 error = copyin(key_u, &key_k, sizeof(key_k)); 962 if (error) { 963 return error; 964 } 965 966 return vfs_quotactl_del(mp, &key_k); 967 } 968 969 static int 970 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 971 { 972 struct quotakcursor cursor_k; 973 int error; 974 975 /* ensure any padding bytes are cleared */ 976 memset(&cursor_k, 0, sizeof(cursor_k)); 977 978 error = vfs_quotactl_cursoropen(mp, &cursor_k); 979 if (error) { 980 return error; 981 } 982 983 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 984 } 985 986 static int 987 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 988 { 989 struct quotakcursor cursor_k; 990 int error; 991 992 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 993 if (error) { 994 return error; 995 } 996 997 return vfs_quotactl_cursorclose(mp, &cursor_k); 998 } 999 1000 static int 1001 do_sys_quotactl_cursorskipidtype(struct mount *mp, 1002 struct quotakcursor *cursor_u, int idtype) 1003 { 1004 struct quotakcursor cursor_k; 1005 int error; 1006 1007 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1008 if (error) { 1009 return error; 1010 } 1011 1012 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 1013 if (error) { 1014 return error; 1015 } 1016 1017 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1018 } 1019 1020 static int 1021 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 1022 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 1023 unsigned *ret_u) 1024 { 1025 #define CGET_STACK_MAX 8 1026 struct quotakcursor cursor_k; 1027 struct quotakey stackkeys[CGET_STACK_MAX]; 1028 struct quotaval stackvals[CGET_STACK_MAX]; 1029 struct quotakey *keys_k; 1030 struct quotaval *vals_k; 1031 unsigned ret_k; 1032 int error; 1033 1034 if (maxnum > 128) { 1035 maxnum = 128; 1036 } 1037 1038 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1039 if (error) { 1040 return error; 1041 } 1042 1043 if (maxnum <= CGET_STACK_MAX) { 1044 keys_k = stackkeys; 1045 vals_k = stackvals; 1046 /* ensure any padding bytes are cleared */ 1047 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 1048 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 1049 } else { 1050 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 1051 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 1052 } 1053 1054 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 1055 &ret_k); 1056 if (error) { 1057 goto fail; 1058 } 1059 1060 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 1061 if (error) { 1062 goto fail; 1063 } 1064 1065 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 1066 if (error) { 1067 goto fail; 1068 } 1069 1070 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1071 if (error) { 1072 goto fail; 1073 } 1074 1075 /* do last to maximize the chance of being able to recover a failure */ 1076 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1077 1078 fail: 1079 if (keys_k != stackkeys) { 1080 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 1081 } 1082 if (vals_k != stackvals) { 1083 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 1084 } 1085 return error; 1086 } 1087 1088 static int 1089 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 1090 int *ret_u) 1091 { 1092 struct quotakcursor cursor_k; 1093 int ret_k; 1094 int error; 1095 1096 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1097 if (error) { 1098 return error; 1099 } 1100 1101 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 1102 if (error) { 1103 return error; 1104 } 1105 1106 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1107 if (error) { 1108 return error; 1109 } 1110 1111 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1112 } 1113 1114 static int 1115 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 1116 { 1117 struct quotakcursor cursor_k; 1118 int error; 1119 1120 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1121 if (error) { 1122 return error; 1123 } 1124 1125 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 1126 if (error) { 1127 return error; 1128 } 1129 1130 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1131 } 1132 1133 static int 1134 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 1135 { 1136 char *path_k; 1137 int error; 1138 1139 /* XXX this should probably be a struct pathbuf */ 1140 path_k = PNBUF_GET(); 1141 error = copyin(path_u, path_k, PATH_MAX); 1142 if (error) { 1143 PNBUF_PUT(path_k); 1144 return error; 1145 } 1146 1147 error = vfs_quotactl_quotaon(mp, idtype, path_k); 1148 1149 PNBUF_PUT(path_k); 1150 return error; 1151 } 1152 1153 static int 1154 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 1155 { 1156 1157 return vfs_quotactl_quotaoff(mp, idtype); 1158 } 1159 1160 int 1161 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 1162 { 1163 struct mount *mp; 1164 struct vnode *vp; 1165 int error; 1166 1167 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 1168 if (error != 0) 1169 return (error); 1170 mp = vp->v_mount; 1171 1172 switch (args->qc_op) { 1173 case QUOTACTL_STAT: 1174 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 1175 break; 1176 case QUOTACTL_IDTYPESTAT: 1177 error = do_sys_quotactl_idtypestat(mp, 1178 args->u.idtypestat.qc_idtype, 1179 args->u.idtypestat.qc_info); 1180 break; 1181 case QUOTACTL_OBJTYPESTAT: 1182 error = do_sys_quotactl_objtypestat(mp, 1183 args->u.objtypestat.qc_objtype, 1184 args->u.objtypestat.qc_info); 1185 break; 1186 case QUOTACTL_GET: 1187 error = do_sys_quotactl_get(mp, 1188 args->u.get.qc_key, 1189 args->u.get.qc_val); 1190 break; 1191 case QUOTACTL_PUT: 1192 error = do_sys_quotactl_put(mp, 1193 args->u.put.qc_key, 1194 args->u.put.qc_val); 1195 break; 1196 case QUOTACTL_DEL: 1197 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1198 break; 1199 case QUOTACTL_CURSOROPEN: 1200 error = do_sys_quotactl_cursoropen(mp, 1201 args->u.cursoropen.qc_cursor); 1202 break; 1203 case QUOTACTL_CURSORCLOSE: 1204 error = do_sys_quotactl_cursorclose(mp, 1205 args->u.cursorclose.qc_cursor); 1206 break; 1207 case QUOTACTL_CURSORSKIPIDTYPE: 1208 error = do_sys_quotactl_cursorskipidtype(mp, 1209 args->u.cursorskipidtype.qc_cursor, 1210 args->u.cursorskipidtype.qc_idtype); 1211 break; 1212 case QUOTACTL_CURSORGET: 1213 error = do_sys_quotactl_cursorget(mp, 1214 args->u.cursorget.qc_cursor, 1215 args->u.cursorget.qc_keys, 1216 args->u.cursorget.qc_vals, 1217 args->u.cursorget.qc_maxnum, 1218 args->u.cursorget.qc_ret); 1219 break; 1220 case QUOTACTL_CURSORATEND: 1221 error = do_sys_quotactl_cursoratend(mp, 1222 args->u.cursoratend.qc_cursor, 1223 args->u.cursoratend.qc_ret); 1224 break; 1225 case QUOTACTL_CURSORREWIND: 1226 error = do_sys_quotactl_cursorrewind(mp, 1227 args->u.cursorrewind.qc_cursor); 1228 break; 1229 case QUOTACTL_QUOTAON: 1230 error = do_sys_quotactl_quotaon(mp, 1231 args->u.quotaon.qc_idtype, 1232 args->u.quotaon.qc_quotafile); 1233 break; 1234 case QUOTACTL_QUOTAOFF: 1235 error = do_sys_quotactl_quotaoff(mp, 1236 args->u.quotaoff.qc_idtype); 1237 break; 1238 default: 1239 error = SET_ERROR(EINVAL); 1240 break; 1241 } 1242 1243 vrele(vp); 1244 return error; 1245 } 1246 1247 /* ARGSUSED */ 1248 int 1249 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1250 register_t *retval) 1251 { 1252 /* { 1253 syscallarg(const char *) path; 1254 syscallarg(struct quotactl_args *) args; 1255 } */ 1256 struct quotactl_args args; 1257 int error; 1258 1259 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1260 if (error) { 1261 return error; 1262 } 1263 1264 return do_sys_quotactl(SCARG(uap, path), &args); 1265 } 1266 1267 int 1268 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1269 int root) 1270 { 1271 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1272 bool chrooted; 1273 int error = 0; 1274 1275 KASSERT(l == curlwp); 1276 1277 /* 1278 * This is safe unlocked. cwdi_rdir never goes non-NULL -> NULL, 1279 * since it would imply chroots can be escaped. Just make sure this 1280 * routine is self-consistent. 1281 */ 1282 chrooted = (atomic_load_relaxed(&cwdi->cwdi_rdir) != NULL); 1283 1284 /* 1285 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1286 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1287 * overrides MNT_NOWAIT. 1288 */ 1289 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1290 (flags != MNT_WAIT && flags != 0)) { 1291 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1292 } else { 1293 /* Get the filesystem stats now */ 1294 memset(sp, 0, sizeof(*sp)); 1295 if ((error = VFS_STATVFS(mp, sp)) != 0) 1296 return error; 1297 if (!chrooted) 1298 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1299 } 1300 1301 if (chrooted) { 1302 size_t len; 1303 char *bp; 1304 char c; 1305 char *path = PNBUF_GET(); 1306 1307 bp = path + MAXPATHLEN; 1308 *--bp = '\0'; 1309 rw_enter(&cwdi->cwdi_lock, RW_READER); 1310 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1311 MAXPATHLEN / 2, 0, l); 1312 rw_exit(&cwdi->cwdi_lock); 1313 if (error) { 1314 PNBUF_PUT(path); 1315 return error; 1316 } 1317 len = strlen(bp); 1318 if (len != 1) { 1319 /* 1320 * for mount points that are below our root, we can see 1321 * them, so we fix up the pathname and return them. The 1322 * rest we cannot see, so we don't allow viewing the 1323 * data. 1324 */ 1325 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1326 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1327 (void)strlcpy(sp->f_mntonname, 1328 c == '\0' ? "/" : &sp->f_mntonname[len], 1329 sizeof(sp->f_mntonname)); 1330 } else { 1331 if (root) 1332 (void)strlcpy(sp->f_mntonname, "/", 1333 sizeof(sp->f_mntonname)); 1334 else 1335 error = SET_ERROR(EPERM); 1336 } 1337 } 1338 PNBUF_PUT(path); 1339 } 1340 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1341 return error; 1342 } 1343 1344 /* 1345 * Get filesystem statistics by path. 1346 */ 1347 int 1348 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1349 { 1350 struct mount *mp; 1351 int error; 1352 struct vnode *vp; 1353 1354 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1355 if (error != 0) 1356 return error; 1357 mp = vp->v_mount; 1358 error = dostatvfs(mp, sb, l, flags, 1); 1359 vrele(vp); 1360 return error; 1361 } 1362 1363 /* ARGSUSED */ 1364 int 1365 sys___statvfs190(struct lwp *l, const struct sys___statvfs190_args *uap, 1366 register_t *retval) 1367 { 1368 /* { 1369 syscallarg(const char *) path; 1370 syscallarg(struct statvfs *) buf; 1371 syscallarg(int) flags; 1372 } */ 1373 struct statvfs *sb; 1374 int error; 1375 1376 sb = STATVFSBUF_GET(); 1377 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1378 if (error == 0) 1379 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1380 STATVFSBUF_PUT(sb); 1381 return error; 1382 } 1383 1384 /* 1385 * Get filesystem statistics by fd. 1386 */ 1387 int 1388 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1389 { 1390 file_t *fp; 1391 struct mount *mp; 1392 int error; 1393 1394 /* fd_getvnode() will use the descriptor for us */ 1395 if ((error = fd_getvnode(fd, &fp)) != 0) 1396 return (error); 1397 mp = fp->f_vnode->v_mount; 1398 error = dostatvfs(mp, sb, curlwp, flags, 1); 1399 fd_putfile(fd); 1400 return error; 1401 } 1402 1403 /* ARGSUSED */ 1404 int 1405 sys___fstatvfs190(struct lwp *l, const struct sys___fstatvfs190_args *uap, 1406 register_t *retval) 1407 { 1408 /* { 1409 syscallarg(int) fd; 1410 syscallarg(struct statvfs *) buf; 1411 syscallarg(int) flags; 1412 } */ 1413 struct statvfs *sb; 1414 int error; 1415 1416 sb = STATVFSBUF_GET(); 1417 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1418 if (error == 0) 1419 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1420 STATVFSBUF_PUT(sb); 1421 return error; 1422 } 1423 1424 /* 1425 * Get statistics on all filesystems. 1426 */ 1427 int 1428 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1429 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1430 register_t *retval) 1431 { 1432 int root = 0; 1433 mount_iterator_t *iter; 1434 struct proc *p = l->l_proc; 1435 struct mount *mp; 1436 struct statvfs *sb; 1437 size_t count, maxcount; 1438 int error = 0; 1439 1440 sb = STATVFSBUF_GET(); 1441 maxcount = bufsize / entry_sz; 1442 count = 0; 1443 mountlist_iterator_init(&iter); 1444 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1445 if (sfsp && count < maxcount) { 1446 error = dostatvfs(mp, sb, l, flags, 0); 1447 if (error) { 1448 error = 0; 1449 continue; 1450 } 1451 error = copyfn(sb, sfsp, entry_sz); 1452 if (error) 1453 goto out; 1454 sfsp = (char *)sfsp + entry_sz; 1455 root |= strcmp(sb->f_mntonname, "/") == 0; 1456 } 1457 count++; 1458 } 1459 1460 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1461 /* 1462 * fake a root entry 1463 */ 1464 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1465 sb, l, flags, 1); 1466 if (error != 0) 1467 goto out; 1468 if (sfsp) { 1469 error = copyfn(sb, sfsp, entry_sz); 1470 if (error != 0) 1471 goto out; 1472 } 1473 count++; 1474 } 1475 if (sfsp && count > maxcount) 1476 *retval = maxcount; 1477 else 1478 *retval = count; 1479 out: 1480 mountlist_iterator_destroy(iter); 1481 STATVFSBUF_PUT(sb); 1482 return error; 1483 } 1484 1485 int 1486 sys___getvfsstat90(struct lwp *l, const struct sys___getvfsstat90_args *uap, 1487 register_t *retval) 1488 { 1489 /* { 1490 syscallarg(struct statvfs *) buf; 1491 syscallarg(size_t) bufsize; 1492 syscallarg(int) flags; 1493 } */ 1494 1495 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1496 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1497 } 1498 1499 /* 1500 * Change current working directory to a given file descriptor. 1501 */ 1502 int 1503 do_sys_fchdir(struct lwp *l, int fd, register_t *retval) 1504 { 1505 struct proc *p = l->l_proc; 1506 struct cwdinfo *cwdi; 1507 struct vnode *vp, *tdp; 1508 struct mount *mp; 1509 file_t *fp; 1510 int error; 1511 1512 /* fd_getvnode() will use the descriptor for us */ 1513 if ((error = fd_getvnode(fd, &fp)) != 0) 1514 return error; 1515 vp = fp->f_vnode; 1516 1517 vref(vp); 1518 vn_lock(vp, LK_SHARED | LK_RETRY); 1519 if (vp->v_type != VDIR) 1520 error = SET_ERROR(ENOTDIR); 1521 else 1522 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1523 if (error) { 1524 vput(vp); 1525 goto out; 1526 } 1527 while ((mp = vp->v_mountedhere) != NULL) { 1528 error = vfs_busy(mp); 1529 vput(vp); 1530 if (error != 0) 1531 goto out; 1532 error = VFS_ROOT(mp, LK_SHARED, &tdp); 1533 vfs_unbusy(mp); 1534 if (error) 1535 goto out; 1536 vp = tdp; 1537 } 1538 VOP_UNLOCK(vp); 1539 1540 /* 1541 * Disallow changing to a directory not under the process's 1542 * current root directory (if there is one). 1543 */ 1544 cwdi = p->p_cwdi; 1545 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1546 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1547 vrele(vp); 1548 error = SET_ERROR(EPERM); /* operation not permitted */ 1549 } else { 1550 vrele(cwdi->cwdi_cdir); 1551 cwdi->cwdi_cdir = vp; 1552 } 1553 rw_exit(&cwdi->cwdi_lock); 1554 1555 out: 1556 fd_putfile(fd); 1557 return error; 1558 } 1559 1560 /* 1561 * Change current working directory to a given file descriptor. 1562 */ 1563 /* ARGSUSED */ 1564 int 1565 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, 1566 register_t *retval) 1567 { 1568 /* { 1569 syscallarg(int) fd; 1570 } */ 1571 1572 return do_sys_fchdir(l, SCARG(uap, fd), retval); 1573 } 1574 1575 /* 1576 * Change this process's notion of the root directory to a given file 1577 * descriptor. 1578 */ 1579 int 1580 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, 1581 register_t *retval) 1582 { 1583 struct vnode *vp; 1584 file_t *fp; 1585 int error, fd = SCARG(uap, fd); 1586 1587 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1588 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1589 return error; 1590 /* fd_getvnode() will use the descriptor for us */ 1591 if ((error = fd_getvnode(fd, &fp)) != 0) 1592 return error; 1593 vp = fp->f_vnode; 1594 vn_lock(vp, LK_SHARED | LK_RETRY); 1595 if (vp->v_type != VDIR) 1596 error = SET_ERROR(ENOTDIR); 1597 else 1598 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1599 VOP_UNLOCK(vp); 1600 if (error) 1601 goto out; 1602 vref(vp); 1603 change_root(vp); 1604 1605 out: 1606 fd_putfile(fd); 1607 return (error); 1608 } 1609 1610 /* 1611 * Change current working directory (``.''). 1612 */ 1613 int 1614 do_sys_chdir(struct lwp *l, const char *path, enum uio_seg seg, 1615 register_t *retval) 1616 { 1617 struct proc *p = l->l_proc; 1618 struct cwdinfo * cwdi; 1619 int error; 1620 struct vnode *vp; 1621 1622 if ((error = chdir_lookup(path, seg, &vp, l)) != 0) 1623 return error; 1624 cwdi = p->p_cwdi; 1625 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1626 vrele(cwdi->cwdi_cdir); 1627 cwdi->cwdi_cdir = vp; 1628 rw_exit(&cwdi->cwdi_lock); 1629 return 0; 1630 } 1631 1632 /* 1633 * Change current working directory (``.''). 1634 */ 1635 /* ARGSUSED */ 1636 int 1637 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1638 { 1639 /* { 1640 syscallarg(const char *) path; 1641 } */ 1642 1643 return do_sys_chdir(l, SCARG(uap, path), UIO_USERSPACE, retval); 1644 } 1645 1646 /* 1647 * Change notion of root (``/'') directory. 1648 */ 1649 /* ARGSUSED */ 1650 int 1651 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, 1652 register_t *retval) 1653 { 1654 /* { 1655 syscallarg(const char *) path; 1656 } */ 1657 int error; 1658 struct vnode *vp; 1659 1660 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1661 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1662 return (error); 1663 1664 error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l); 1665 if (error == 0) 1666 change_root(vp); 1667 return error; 1668 } 1669 1670 /* 1671 * Common routine for chroot and fchroot. 1672 * NB: callers need to properly authorize the change root operation. 1673 */ 1674 void 1675 change_root(struct vnode *vp) 1676 { 1677 kauth_cred_t ncred; 1678 struct lwp *l = curlwp; 1679 struct proc *p = l->l_proc; 1680 struct cwdinfo *cwdi = p->p_cwdi; 1681 1682 ncred = kauth_cred_alloc(); 1683 1684 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1685 if (cwdi->cwdi_rdir != NULL) 1686 vrele(cwdi->cwdi_rdir); 1687 cwdi->cwdi_rdir = vp; 1688 1689 /* 1690 * Prevent escaping from chroot by putting the root under 1691 * the working directory. Silently chdir to / if we aren't 1692 * already there. 1693 */ 1694 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1695 /* 1696 * XXX would be more failsafe to change directory to a 1697 * deadfs node here instead 1698 */ 1699 vrele(cwdi->cwdi_cdir); 1700 vref(vp); 1701 cwdi->cwdi_cdir = vp; 1702 } 1703 rw_exit(&cwdi->cwdi_lock); 1704 1705 /* Get a write lock on the process credential. */ 1706 proc_crmod_enter(); 1707 1708 kauth_cred_clone(p->p_cred, ncred); 1709 kauth_proc_chroot(ncred, p->p_cwdi); 1710 1711 /* Broadcast our credentials to the process and other LWPs. */ 1712 proc_crmod_leave(ncred, p->p_cred, true); 1713 } 1714 1715 /* 1716 * Common routine for chroot and chdir. 1717 * XXX "where" should be enum uio_seg 1718 */ 1719 int 1720 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1721 { 1722 struct pathbuf *pb; 1723 struct nameidata nd; 1724 int error; 1725 1726 error = pathbuf_maybe_copyin(path, where, &pb); 1727 if (error) { 1728 return error; 1729 } 1730 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); 1731 if ((error = namei(&nd)) != 0) { 1732 pathbuf_destroy(pb); 1733 return error; 1734 } 1735 *vpp = nd.ni_vp; 1736 pathbuf_destroy(pb); 1737 1738 if ((*vpp)->v_type != VDIR) 1739 error = SET_ERROR(ENOTDIR); 1740 else 1741 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1742 1743 if (error) 1744 vput(*vpp); 1745 else 1746 VOP_UNLOCK(*vpp); 1747 return (error); 1748 } 1749 1750 /* 1751 * Internals of sys_open - path has already been converted into a pathbuf 1752 * (so we can easily reuse this function from other parts of the kernel, 1753 * like posix_spawn post-processing). 1754 */ 1755 int 1756 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1757 int open_mode, int *fd) 1758 { 1759 struct proc *p = l->l_proc; 1760 struct cwdinfo *cwdi = p->p_cwdi; 1761 file_t *fp; 1762 struct vnode *vp; 1763 int dupfd; 1764 bool dupfd_move; 1765 int flags, cmode; 1766 int indx, error; 1767 1768 if (open_flags & O_SEARCH) { 1769 open_flags &= ~(int)O_SEARCH; 1770 } 1771 1772 /* 1773 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1774 * may be specified. 1775 */ 1776 if ((open_flags & O_EXEC) && (open_flags & O_ACCMODE)) 1777 return SET_ERROR(EINVAL); 1778 1779 flags = FFLAGS(open_flags); 1780 if ((flags & (FREAD | FWRITE)) == 0) 1781 return SET_ERROR(EINVAL); 1782 1783 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1784 return error; 1785 } 1786 1787 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1788 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1789 1790 error = vn_open(dvp, pb, TRYEMULROOT, flags, cmode, 1791 &vp, &dupfd_move, &dupfd); 1792 if (error != 0) { 1793 fd_abort(p, fp, indx); 1794 return error; 1795 } 1796 1797 if (vp == NULL) { 1798 fd_abort(p, fp, indx); 1799 error = fd_dupopen(dupfd, dupfd_move, flags, &indx); 1800 if (error) 1801 return error; 1802 *fd = indx; 1803 } else { 1804 error = open_setfp(l, fp, vp, indx, flags); 1805 if (error) 1806 return error; 1807 VOP_UNLOCK(vp); 1808 *fd = indx; 1809 fd_affix(p, fp, indx); 1810 } 1811 1812 return 0; 1813 } 1814 1815 int 1816 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1817 { 1818 struct pathbuf *pb; 1819 int error, oflags; 1820 1821 oflags = FFLAGS(open_flags); 1822 if ((oflags & (FREAD | FWRITE)) == 0) 1823 return SET_ERROR(EINVAL); 1824 1825 pb = pathbuf_create(path); 1826 if (pb == NULL) 1827 return SET_ERROR(ENOMEM); 1828 1829 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1830 pathbuf_destroy(pb); 1831 1832 return error; 1833 } 1834 1835 static int 1836 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1837 int mode, int *fd) 1838 { 1839 file_t *dfp = NULL; 1840 struct vnode *dvp = NULL; 1841 struct pathbuf *pb; 1842 const char *pathstring = NULL; 1843 int error; 1844 1845 if (path == NULL) { 1846 MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error); 1847 if (error == ENOSYS) 1848 goto no_compat; 1849 if (error) 1850 return error; 1851 } else { 1852 no_compat: 1853 error = pathbuf_copyin(path, &pb); 1854 if (error) 1855 return error; 1856 } 1857 1858 pathstring = pathbuf_stringcopy_get(pb); 1859 1860 /* 1861 * fdat is ignored if: 1862 * 1) if fdat is AT_FDCWD, which means use current directory as base. 1863 * 2) if path is absolute, then fdat is useless. 1864 */ 1865 if (fdat != AT_FDCWD && pathstring[0] != '/') { 1866 /* fd_getvnode() will use the descriptor for us */ 1867 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1868 goto out; 1869 1870 dvp = dfp->f_vnode; 1871 } 1872 1873 error = do_open(l, dvp, pb, flags, mode, fd); 1874 1875 if (dfp != NULL) 1876 fd_putfile(fdat); 1877 out: 1878 pathbuf_stringcopy_put(pb, pathstring); 1879 pathbuf_destroy(pb); 1880 return error; 1881 } 1882 1883 int 1884 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1885 { 1886 /* { 1887 syscallarg(const char *) path; 1888 syscallarg(int) flags; 1889 syscallarg(int) mode; 1890 } */ 1891 int error; 1892 int fd; 1893 1894 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1895 SCARG(uap, flags), SCARG(uap, mode), &fd); 1896 1897 if (error == 0) 1898 *retval = fd; 1899 1900 return error; 1901 } 1902 1903 int 1904 sys_openat(struct lwp *l, const struct sys_openat_args *uap, 1905 register_t *retval) 1906 { 1907 /* { 1908 syscallarg(int) fd; 1909 syscallarg(const char *) path; 1910 syscallarg(int) oflags; 1911 syscallarg(int) mode; 1912 } */ 1913 int error; 1914 int fd; 1915 1916 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1917 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1918 1919 if (error == 0) 1920 *retval = fd; 1921 1922 return error; 1923 } 1924 1925 static void 1926 vfs__fhfree(fhandle_t *fhp) 1927 { 1928 size_t fhsize; 1929 1930 fhsize = FHANDLE_SIZE(fhp); 1931 kmem_free(fhp, fhsize); 1932 } 1933 1934 /* 1935 * vfs_composefh: compose a filehandle. 1936 */ 1937 1938 int 1939 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1940 { 1941 struct mount *mp; 1942 struct fid *fidp; 1943 int error; 1944 size_t needfhsize; 1945 size_t fidsize; 1946 1947 mp = vp->v_mount; 1948 fidp = NULL; 1949 if (*fh_size < FHANDLE_SIZE_MIN) { 1950 fidsize = 0; 1951 } else { 1952 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1953 if (fhp != NULL) { 1954 memset(fhp, 0, *fh_size); 1955 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1956 fidp = &fhp->fh_fid; 1957 } 1958 } 1959 error = VFS_VPTOFH(vp, fidp, &fidsize); 1960 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1961 if (error == 0 && *fh_size < needfhsize) { 1962 error = SET_ERROR(E2BIG); 1963 } 1964 *fh_size = needfhsize; 1965 return error; 1966 } 1967 1968 int 1969 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1970 { 1971 struct mount *mp; 1972 fhandle_t *fhp; 1973 size_t fhsize; 1974 size_t fidsize; 1975 int error; 1976 1977 mp = vp->v_mount; 1978 fidsize = 0; 1979 error = VFS_VPTOFH(vp, NULL, &fidsize); 1980 KASSERT(error != 0); 1981 if (error != E2BIG) { 1982 goto out; 1983 } 1984 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1985 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1986 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1987 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1988 if (error == 0) { 1989 KASSERT(FHANDLE_SIZE(fhp) == fhsize); 1990 KASSERT(FHANDLE_FILEID(fhp)->fid_len == fidsize); 1991 *fhpp = fhp; 1992 } else { 1993 kmem_free(fhp, fhsize); 1994 } 1995 out: 1996 return error; 1997 } 1998 1999 void 2000 vfs_composefh_free(fhandle_t *fhp) 2001 { 2002 2003 vfs__fhfree(fhp); 2004 } 2005 2006 /* 2007 * vfs_fhtovp: lookup a vnode by a filehandle. 2008 */ 2009 2010 int 2011 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 2012 { 2013 struct mount *mp; 2014 int error; 2015 2016 *vpp = NULL; 2017 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 2018 if (mp == NULL) { 2019 error = SET_ERROR(ESTALE); 2020 goto out; 2021 } 2022 if (mp->mnt_op->vfs_fhtovp == NULL) { 2023 error = SET_ERROR(EOPNOTSUPP); 2024 goto out; 2025 } 2026 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), LK_EXCLUSIVE, vpp); 2027 out: 2028 return error; 2029 } 2030 2031 /* 2032 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 2033 * the needed size. 2034 */ 2035 2036 int 2037 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 2038 { 2039 fhandle_t *fhp; 2040 int error; 2041 2042 if (fhsize > FHANDLE_SIZE_MAX) { 2043 return SET_ERROR(EINVAL); 2044 } 2045 if (fhsize < FHANDLE_SIZE_MIN) { 2046 return SET_ERROR(EINVAL); 2047 } 2048 again: 2049 fhp = kmem_alloc(fhsize, KM_SLEEP); 2050 error = copyin(ufhp, fhp, fhsize); 2051 if (error == 0) { 2052 /* XXX this check shouldn't be here */ 2053 if (FHANDLE_SIZE(fhp) == fhsize) { 2054 *fhpp = fhp; 2055 return 0; 2056 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 2057 /* 2058 * a kludge for nfsv2 padded handles. 2059 */ 2060 size_t sz; 2061 2062 sz = FHANDLE_SIZE(fhp); 2063 kmem_free(fhp, fhsize); 2064 fhsize = sz; 2065 goto again; 2066 } else { 2067 /* 2068 * userland told us wrong size. 2069 */ 2070 error = SET_ERROR(EINVAL); 2071 } 2072 } 2073 kmem_free(fhp, fhsize); 2074 return error; 2075 } 2076 2077 void 2078 vfs_copyinfh_free(fhandle_t *fhp) 2079 { 2080 2081 vfs__fhfree(fhp); 2082 } 2083 2084 /* 2085 * Get file handle system call 2086 */ 2087 int 2088 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, 2089 register_t *retval) 2090 { 2091 /* { 2092 syscallarg(char *) fname; 2093 syscallarg(fhandle_t *) fhp; 2094 syscallarg(size_t *) fh_size; 2095 } */ 2096 struct vnode *vp; 2097 fhandle_t *fh; 2098 int error; 2099 struct pathbuf *pb; 2100 struct nameidata nd; 2101 size_t sz; 2102 size_t usz; 2103 2104 /* 2105 * Must be super user 2106 */ 2107 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2108 0, NULL, NULL, NULL); 2109 if (error) 2110 return (error); 2111 2112 error = pathbuf_copyin(SCARG(uap, fname), &pb); 2113 if (error) { 2114 return error; 2115 } 2116 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2117 error = namei(&nd); 2118 if (error) { 2119 pathbuf_destroy(pb); 2120 return error; 2121 } 2122 vp = nd.ni_vp; 2123 pathbuf_destroy(pb); 2124 2125 error = vfs_composefh_alloc(vp, &fh); 2126 vput(vp); 2127 if (error != 0) { 2128 return error; 2129 } 2130 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 2131 if (error != 0) { 2132 goto out; 2133 } 2134 sz = FHANDLE_SIZE(fh); 2135 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 2136 if (error != 0) { 2137 goto out; 2138 } 2139 if (usz >= sz) { 2140 error = copyout(fh, SCARG(uap, fhp), sz); 2141 } else { 2142 error = SET_ERROR(E2BIG); 2143 } 2144 out: 2145 vfs_composefh_free(fh); 2146 return (error); 2147 } 2148 2149 /* 2150 * Open a file given a file handle. 2151 * 2152 * Check permissions, allocate an open file structure, 2153 * and call the device open routine if any. 2154 */ 2155 2156 int 2157 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 2158 register_t *retval) 2159 { 2160 file_t *fp; 2161 struct vnode *vp = NULL; 2162 kauth_cred_t cred = l->l_cred; 2163 file_t *nfp; 2164 int indx, error; 2165 struct vattr va; 2166 fhandle_t *fh; 2167 int flags; 2168 proc_t *p; 2169 2170 p = curproc; 2171 2172 /* 2173 * Must be super user 2174 */ 2175 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2176 0, NULL, NULL, NULL))) 2177 return (error); 2178 2179 if (oflags & O_SEARCH) { 2180 oflags &= ~(int)O_SEARCH; 2181 } 2182 2183 flags = FFLAGS(oflags); 2184 if ((flags & (FREAD | FWRITE)) == 0) 2185 return SET_ERROR(EINVAL); 2186 if ((flags & O_CREAT)) 2187 return SET_ERROR(EINVAL); 2188 if ((error = fd_allocfile(&nfp, &indx)) != 0) 2189 return (error); 2190 fp = nfp; 2191 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2192 if (error != 0) { 2193 goto bad; 2194 } 2195 error = vfs_fhtovp(fh, &vp); 2196 vfs_copyinfh_free(fh); 2197 if (error != 0) { 2198 goto bad; 2199 } 2200 2201 /* Now do an effective vn_open */ 2202 2203 if (vp->v_type == VSOCK) { 2204 error = SET_ERROR(EOPNOTSUPP); 2205 goto bad; 2206 } 2207 error = vn_openchk(vp, cred, flags); 2208 if (error != 0) 2209 goto bad; 2210 if (flags & O_TRUNC) { 2211 VOP_UNLOCK(vp); /* XXX */ 2212 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2213 vattr_null(&va); 2214 va.va_size = 0; 2215 error = VOP_SETATTR(vp, &va, cred); 2216 if (error) 2217 goto bad; 2218 } 2219 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2220 goto bad; 2221 if (flags & FWRITE) { 2222 mutex_enter(vp->v_interlock); 2223 vp->v_writecount++; 2224 mutex_exit(vp->v_interlock); 2225 } 2226 2227 /* done with modified vn_open, now finish what sys_open does. */ 2228 if ((error = open_setfp(l, fp, vp, indx, flags))) 2229 return error; 2230 2231 VOP_UNLOCK(vp); 2232 *retval = indx; 2233 fd_affix(p, fp, indx); 2234 return (0); 2235 2236 bad: 2237 fd_abort(p, fp, indx); 2238 if (vp != NULL) 2239 vput(vp); 2240 if (error == EDUPFD || error == EMOVEFD) { 2241 /* XXX should probably close curlwp->l_dupfd */ 2242 error = SET_ERROR(EOPNOTSUPP); 2243 } 2244 return (error); 2245 } 2246 2247 int 2248 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, 2249 register_t *retval) 2250 { 2251 /* { 2252 syscallarg(const void *) fhp; 2253 syscallarg(size_t) fh_size; 2254 syscallarg(int) flags; 2255 } */ 2256 2257 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2258 SCARG(uap, flags), retval); 2259 } 2260 2261 int 2262 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2263 { 2264 int error; 2265 fhandle_t *fh; 2266 struct vnode *vp; 2267 2268 /* 2269 * Must be super user 2270 */ 2271 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2272 0, NULL, NULL, NULL))) 2273 return error; 2274 2275 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2276 if (error != 0) 2277 return error; 2278 2279 error = vfs_fhtovp(fh, &vp); 2280 vfs_copyinfh_free(fh); 2281 if (error != 0) 2282 return error; 2283 2284 error = vn_stat(vp, sb); 2285 vput(vp); 2286 return error; 2287 } 2288 2289 /* ARGSUSED */ 2290 int 2291 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, 2292 register_t *retval) 2293 { 2294 /* { 2295 syscallarg(const void *) fhp; 2296 syscallarg(size_t) fh_size; 2297 syscallarg(struct stat *) sb; 2298 } */ 2299 struct stat sb; 2300 int error; 2301 2302 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2303 if (error) 2304 return error; 2305 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2306 } 2307 2308 int 2309 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, 2310 struct statvfs *sb, int flags) 2311 { 2312 fhandle_t *fh; 2313 struct mount *mp; 2314 struct vnode *vp; 2315 int error; 2316 2317 /* 2318 * Must be super user 2319 */ 2320 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2321 0, NULL, NULL, NULL))) 2322 return error; 2323 2324 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2325 if (error != 0) 2326 return error; 2327 2328 error = vfs_fhtovp(fh, &vp); 2329 vfs_copyinfh_free(fh); 2330 if (error != 0) 2331 return error; 2332 2333 mp = vp->v_mount; 2334 error = dostatvfs(mp, sb, l, flags, 1); 2335 vput(vp); 2336 return error; 2337 } 2338 2339 /* ARGSUSED */ 2340 int 2341 sys___fhstatvfs190(struct lwp *l, const struct sys___fhstatvfs190_args *uap, 2342 register_t *retval) 2343 { 2344 /* { 2345 syscallarg(const void *) fhp; 2346 syscallarg(size_t) fh_size; 2347 syscallarg(struct statvfs *) buf; 2348 syscallarg(int) flags; 2349 } */ 2350 struct statvfs *sb = STATVFSBUF_GET(); 2351 int error; 2352 2353 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2354 SCARG(uap, flags)); 2355 if (error == 0) 2356 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2357 STATVFSBUF_PUT(sb); 2358 return error; 2359 } 2360 2361 int 2362 do_posix_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2363 dev_t dev) 2364 { 2365 2366 /* 2367 * The POSIX mknod(2) call is an alias for mkfifo(2) for S_IFIFO 2368 * in mode and dev=0. 2369 * 2370 * In all the other cases it's implementation defined behavior. 2371 */ 2372 2373 if ((mode & S_IFIFO) && dev == 0) 2374 return do_sys_mkfifoat(l, fdat, pathname, mode); 2375 else 2376 return do_sys_mknodat(l, fdat, pathname, mode, dev, 2377 UIO_USERSPACE); 2378 } 2379 2380 /* 2381 * Create a special file. 2382 */ 2383 /* ARGSUSED */ 2384 int 2385 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2386 register_t *retval) 2387 { 2388 /* { 2389 syscallarg(const char *) path; 2390 syscallarg(mode_t) mode; 2391 syscallarg(dev_t) dev; 2392 } */ 2393 return do_posix_mknodat(l, AT_FDCWD, SCARG(uap, path), 2394 SCARG(uap, mode), SCARG(uap, dev)); 2395 } 2396 2397 int 2398 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2399 register_t *retval) 2400 { 2401 /* { 2402 syscallarg(int) fd; 2403 syscallarg(const char *) path; 2404 syscallarg(mode_t) mode; 2405 syscallarg(int) pad; 2406 syscallarg(dev_t) dev; 2407 } */ 2408 2409 return do_posix_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2410 SCARG(uap, mode), SCARG(uap, dev)); 2411 } 2412 2413 int 2414 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2415 enum uio_seg seg) 2416 { 2417 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, seg); 2418 } 2419 2420 int 2421 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2422 dev_t dev, enum uio_seg seg) 2423 { 2424 struct proc *p = l->l_proc; 2425 struct vnode *vp; 2426 struct vattr vattr; 2427 int error, optype; 2428 struct pathbuf *pb; 2429 struct nameidata nd; 2430 const char *pathstring; 2431 2432 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2433 0, NULL, NULL, NULL)) != 0) 2434 return (error); 2435 2436 optype = VOP_MKNOD_DESCOFFSET; 2437 2438 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2439 if (error) { 2440 return error; 2441 } 2442 pathstring = pathbuf_stringcopy_get(pb); 2443 if (pathstring == NULL) { 2444 pathbuf_destroy(pb); 2445 return SET_ERROR(ENOMEM); 2446 } 2447 2448 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2449 2450 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2451 goto out; 2452 vp = nd.ni_vp; 2453 2454 if (vp != NULL) 2455 error = SET_ERROR(EEXIST); 2456 else { 2457 vattr_null(&vattr); 2458 /* We will read cwdi->cwdi_cmask unlocked. */ 2459 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2460 vattr.va_rdev = dev; 2461 2462 switch (mode & S_IFMT) { 2463 case S_IFMT: /* used by badsect to flag bad sectors */ 2464 vattr.va_type = VBAD; 2465 break; 2466 case S_IFCHR: 2467 vattr.va_type = VCHR; 2468 break; 2469 case S_IFBLK: 2470 vattr.va_type = VBLK; 2471 break; 2472 case S_IFWHT: 2473 optype = VOP_WHITEOUT_DESCOFFSET; 2474 break; 2475 case S_IFREG: 2476 #if NVERIEXEC > 0 2477 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2478 O_CREAT); 2479 #endif /* NVERIEXEC > 0 */ 2480 vattr.va_type = VREG; 2481 vattr.va_rdev = VNOVAL; 2482 optype = VOP_CREATE_DESCOFFSET; 2483 break; 2484 default: 2485 error = SET_ERROR(EINVAL); 2486 break; 2487 } 2488 2489 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET && 2490 vattr.va_rdev == VNOVAL) 2491 error = SET_ERROR(EINVAL); 2492 } 2493 2494 if (!error) { 2495 switch (optype) { 2496 case VOP_WHITEOUT_DESCOFFSET: 2497 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2498 if (error) 2499 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2500 vput(nd.ni_dvp); 2501 break; 2502 2503 case VOP_MKNOD_DESCOFFSET: 2504 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2505 &nd.ni_cnd, &vattr); 2506 if (error == 0) 2507 vrele(nd.ni_vp); 2508 vput(nd.ni_dvp); 2509 break; 2510 2511 case VOP_CREATE_DESCOFFSET: 2512 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2513 &nd.ni_cnd, &vattr); 2514 if (error == 0) 2515 vrele(nd.ni_vp); 2516 vput(nd.ni_dvp); 2517 break; 2518 } 2519 } else { 2520 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2521 if (nd.ni_dvp == vp) 2522 vrele(nd.ni_dvp); 2523 else 2524 vput(nd.ni_dvp); 2525 if (vp) 2526 vrele(vp); 2527 } 2528 out: 2529 pathbuf_stringcopy_put(pb, pathstring); 2530 pathbuf_destroy(pb); 2531 return (error); 2532 } 2533 2534 /* 2535 * Create a named pipe. 2536 */ 2537 /* ARGSUSED */ 2538 int 2539 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, 2540 register_t *retval) 2541 { 2542 /* { 2543 syscallarg(const char *) path; 2544 syscallarg(int) mode; 2545 } */ 2546 2547 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), 2548 SCARG(uap, mode)); 2549 } 2550 2551 int 2552 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2553 register_t *retval) 2554 { 2555 /* { 2556 syscallarg(int) fd; 2557 syscallarg(const char *) path; 2558 syscallarg(int) mode; 2559 } */ 2560 2561 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2562 SCARG(uap, mode)); 2563 } 2564 2565 static int 2566 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2567 { 2568 struct proc *p = l->l_proc; 2569 struct vattr vattr; 2570 int error; 2571 struct pathbuf *pb; 2572 struct nameidata nd; 2573 2574 error = pathbuf_copyin(path, &pb); 2575 if (error) { 2576 return error; 2577 } 2578 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2579 2580 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2581 pathbuf_destroy(pb); 2582 return error; 2583 } 2584 if (nd.ni_vp != NULL) { 2585 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2586 if (nd.ni_dvp == nd.ni_vp) 2587 vrele(nd.ni_dvp); 2588 else 2589 vput(nd.ni_dvp); 2590 vrele(nd.ni_vp); 2591 pathbuf_destroy(pb); 2592 return SET_ERROR(EEXIST); 2593 } 2594 vattr_null(&vattr); 2595 vattr.va_type = VFIFO; 2596 /* We will read cwdi->cwdi_cmask unlocked. */ 2597 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2598 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2599 if (error == 0) 2600 vrele(nd.ni_vp); 2601 vput(nd.ni_dvp); 2602 pathbuf_destroy(pb); 2603 return (error); 2604 } 2605 2606 /* 2607 * Make a hard file link. 2608 */ 2609 /* ARGSUSED */ 2610 int 2611 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2612 const char *link, int follow, register_t *retval) 2613 { 2614 struct vnode *vp; 2615 struct pathbuf *linkpb; 2616 struct nameidata nd; 2617 namei_simple_flags_t ns_flags; 2618 int error; 2619 2620 if (follow & AT_SYMLINK_FOLLOW) 2621 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2622 else 2623 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2624 2625 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2626 if (error != 0) 2627 return (error); 2628 error = pathbuf_copyin(link, &linkpb); 2629 if (error) { 2630 goto out1; 2631 } 2632 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2633 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2634 goto out2; 2635 if (nd.ni_vp) { 2636 error = SET_ERROR(EEXIST); 2637 goto abortop; 2638 } 2639 /* Prevent hard links on directories. */ 2640 if (vp->v_type == VDIR) { 2641 error = SET_ERROR(EPERM); 2642 goto abortop; 2643 } 2644 /* Prevent cross-mount operation. */ 2645 if (nd.ni_dvp->v_mount != vp->v_mount) { 2646 error = SET_ERROR(EXDEV); 2647 goto abortop; 2648 } 2649 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2650 VOP_UNLOCK(nd.ni_dvp); 2651 vrele(nd.ni_dvp); 2652 out2: 2653 pathbuf_destroy(linkpb); 2654 out1: 2655 vrele(vp); 2656 return (error); 2657 abortop: 2658 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2659 if (nd.ni_dvp == nd.ni_vp) 2660 vrele(nd.ni_dvp); 2661 else 2662 vput(nd.ni_dvp); 2663 if (nd.ni_vp != NULL) 2664 vrele(nd.ni_vp); 2665 goto out2; 2666 } 2667 2668 int 2669 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2670 { 2671 /* { 2672 syscallarg(const char *) path; 2673 syscallarg(const char *) link; 2674 } */ 2675 const char *path = SCARG(uap, path); 2676 const char *link = SCARG(uap, link); 2677 2678 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2679 AT_SYMLINK_FOLLOW, retval); 2680 } 2681 2682 int 2683 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2684 register_t *retval) 2685 { 2686 /* { 2687 syscallarg(int) fd1; 2688 syscallarg(const char *) name1; 2689 syscallarg(int) fd2; 2690 syscallarg(const char *) name2; 2691 syscallarg(int) flags; 2692 } */ 2693 int fd1 = SCARG(uap, fd1); 2694 const char *name1 = SCARG(uap, name1); 2695 int fd2 = SCARG(uap, fd2); 2696 const char *name2 = SCARG(uap, name2); 2697 int follow; 2698 2699 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2700 2701 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2702 } 2703 2704 int 2705 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2706 { 2707 2708 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2709 } 2710 2711 static int 2712 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2713 const char *link, enum uio_seg seg) 2714 { 2715 struct proc *p = curproc; 2716 struct vattr vattr; 2717 char *path; 2718 int error; 2719 size_t len; 2720 struct pathbuf *linkpb; 2721 struct nameidata nd; 2722 2723 KASSERT(l != NULL || fdat == AT_FDCWD); 2724 2725 path = PNBUF_GET(); 2726 if (seg == UIO_USERSPACE) { 2727 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0) 2728 goto out1; 2729 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2730 goto out1; 2731 } else { 2732 len = strlen(patharg) + 1; 2733 KASSERT(len <= MAXPATHLEN); 2734 memcpy(path, patharg, len); 2735 linkpb = pathbuf_create(link); 2736 if (linkpb == NULL) { 2737 error = SET_ERROR(ENOMEM); 2738 goto out1; 2739 } 2740 } 2741 ktrkuser("symlink-target", path, len - 1); 2742 2743 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2744 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2745 goto out2; 2746 if (nd.ni_vp) { 2747 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2748 if (nd.ni_dvp == nd.ni_vp) 2749 vrele(nd.ni_dvp); 2750 else 2751 vput(nd.ni_dvp); 2752 vrele(nd.ni_vp); 2753 error = SET_ERROR(EEXIST); 2754 goto out2; 2755 } 2756 vattr_null(&vattr); 2757 vattr.va_type = VLNK; 2758 /* We will read cwdi->cwdi_cmask unlocked. */ 2759 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2760 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2761 if (error == 0) 2762 vrele(nd.ni_vp); 2763 vput(nd.ni_dvp); 2764 out2: 2765 pathbuf_destroy(linkpb); 2766 out1: 2767 PNBUF_PUT(path); 2768 return (error); 2769 } 2770 2771 /* 2772 * Make a symbolic link. 2773 */ 2774 /* ARGSUSED */ 2775 int 2776 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2777 { 2778 /* { 2779 syscallarg(const char *) path; 2780 syscallarg(const char *) link; 2781 } */ 2782 2783 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2784 UIO_USERSPACE); 2785 } 2786 2787 int 2788 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2789 register_t *retval) 2790 { 2791 /* { 2792 syscallarg(const char *) path1; 2793 syscallarg(int) fd; 2794 syscallarg(const char *) path2; 2795 } */ 2796 2797 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2798 SCARG(uap, path2), UIO_USERSPACE); 2799 } 2800 2801 /* 2802 * Delete a whiteout from the filesystem. 2803 */ 2804 /* ARGSUSED */ 2805 int 2806 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, 2807 register_t *retval) 2808 { 2809 /* { 2810 syscallarg(const char *) path; 2811 } */ 2812 int error; 2813 struct pathbuf *pb; 2814 struct nameidata nd; 2815 2816 error = pathbuf_copyin(SCARG(uap, path), &pb); 2817 if (error) { 2818 return error; 2819 } 2820 2821 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2822 error = namei(&nd); 2823 if (error) { 2824 pathbuf_destroy(pb); 2825 return (error); 2826 } 2827 2828 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2829 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2830 if (nd.ni_dvp == nd.ni_vp) 2831 vrele(nd.ni_dvp); 2832 else 2833 vput(nd.ni_dvp); 2834 if (nd.ni_vp) 2835 vrele(nd.ni_vp); 2836 pathbuf_destroy(pb); 2837 return SET_ERROR(EEXIST); 2838 } 2839 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2840 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2841 vput(nd.ni_dvp); 2842 pathbuf_destroy(pb); 2843 return (error); 2844 } 2845 2846 /* 2847 * Delete a name from the filesystem. 2848 */ 2849 /* ARGSUSED */ 2850 int 2851 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, 2852 register_t *retval) 2853 { 2854 /* { 2855 syscallarg(const char *) path; 2856 } */ 2857 2858 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, 2859 UIO_USERSPACE); 2860 } 2861 2862 int 2863 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2864 register_t *retval) 2865 { 2866 /* { 2867 syscallarg(int) fd; 2868 syscallarg(const char *) path; 2869 syscallarg(int) flag; 2870 } */ 2871 2872 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2873 SCARG(uap, flag), UIO_USERSPACE); 2874 } 2875 2876 int 2877 do_sys_unlink(const char *arg, enum uio_seg seg) 2878 { 2879 2880 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2881 } 2882 2883 static int 2884 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2885 enum uio_seg seg) 2886 { 2887 struct vnode *vp; 2888 int error; 2889 struct pathbuf *pb; 2890 struct nameidata nd; 2891 const char *pathstring; 2892 2893 KASSERT(l != NULL || fdat == AT_FDCWD); 2894 2895 error = pathbuf_maybe_copyin(arg, seg, &pb); 2896 if (error) { 2897 return error; 2898 } 2899 pathstring = pathbuf_stringcopy_get(pb); 2900 if (pathstring == NULL) { 2901 pathbuf_destroy(pb); 2902 return SET_ERROR(ENOMEM); 2903 } 2904 2905 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2906 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2907 goto out; 2908 vp = nd.ni_vp; 2909 2910 /* 2911 * The root of a mounted filesystem cannot be deleted. 2912 */ 2913 if ((vp->v_vflag & VV_ROOT) != 0) { 2914 error = SET_ERROR(EBUSY); 2915 goto abort; 2916 } 2917 2918 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2919 error = SET_ERROR(EBUSY); 2920 goto abort; 2921 } 2922 2923 /* 2924 * No rmdir "." please. 2925 */ 2926 if (nd.ni_dvp == vp) { 2927 error = SET_ERROR(EINVAL); 2928 goto abort; 2929 } 2930 2931 /* 2932 * AT_REMOVEDIR is required to remove a directory 2933 */ 2934 if (vp->v_type == VDIR) { 2935 if (!(flags & AT_REMOVEDIR)) { 2936 error = SET_ERROR(EPERM); 2937 goto abort; 2938 } else { 2939 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2940 vput(nd.ni_dvp); 2941 goto out; 2942 } 2943 } 2944 2945 /* 2946 * Starting here we only deal with non directories. 2947 */ 2948 if (flags & AT_REMOVEDIR) { 2949 error = SET_ERROR(ENOTDIR); 2950 goto abort; 2951 } 2952 2953 #if NVERIEXEC > 0 2954 /* Handle remove requests for veriexec entries. */ 2955 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2956 goto abort; 2957 } 2958 #endif /* NVERIEXEC > 0 */ 2959 2960 #ifdef FILEASSOC 2961 (void)fileassoc_file_delete(vp); 2962 #endif /* FILEASSOC */ 2963 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2964 vput(nd.ni_dvp); 2965 goto out; 2966 2967 abort: 2968 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2969 if (nd.ni_dvp == vp) 2970 vrele(nd.ni_dvp); 2971 else 2972 vput(nd.ni_dvp); 2973 vput(vp); 2974 2975 out: 2976 pathbuf_stringcopy_put(pb, pathstring); 2977 pathbuf_destroy(pb); 2978 return (error); 2979 } 2980 2981 /* 2982 * Reposition read/write file offset. 2983 */ 2984 int 2985 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2986 { 2987 /* { 2988 syscallarg(int) fd; 2989 syscallarg(int) pad; 2990 syscallarg(off_t) offset; 2991 syscallarg(int) whence; 2992 } */ 2993 file_t *fp; 2994 int error, fd; 2995 2996 switch (SCARG(uap, whence)) { 2997 case SEEK_CUR: 2998 case SEEK_END: 2999 case SEEK_SET: 3000 break; 3001 default: 3002 return SET_ERROR(EINVAL); 3003 } 3004 3005 fd = SCARG(uap, fd); 3006 3007 if ((fp = fd_getfile(fd)) == NULL) 3008 return SET_ERROR(EBADF); 3009 3010 if (fp->f_ops->fo_seek == NULL) { 3011 error = SET_ERROR(ESPIPE); 3012 goto out; 3013 } 3014 3015 error = (*fp->f_ops->fo_seek)(fp, SCARG(uap, offset), 3016 SCARG(uap, whence), (off_t *)retval, FOF_UPDATE_OFFSET); 3017 out: 3018 fd_putfile(fd); 3019 return (error); 3020 } 3021 3022 /* 3023 * Positional read system call. 3024 */ 3025 int 3026 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 3027 { 3028 /* { 3029 syscallarg(int) fd; 3030 syscallarg(void *) buf; 3031 syscallarg(size_t) nbyte; 3032 syscallarg(off_t) offset; 3033 } */ 3034 file_t *fp; 3035 off_t offset; 3036 int error, fd = SCARG(uap, fd); 3037 3038 if ((fp = fd_getfile(fd)) == NULL) 3039 return SET_ERROR(EBADF); 3040 3041 if ((fp->f_flag & FREAD) == 0) { 3042 fd_putfile(fd); 3043 return SET_ERROR(EBADF); 3044 } 3045 3046 if (fp->f_ops->fo_seek == NULL) { 3047 error = SET_ERROR(ESPIPE); 3048 goto out; 3049 } 3050 3051 offset = SCARG(uap, offset); 3052 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 3053 if (error) 3054 goto out; 3055 3056 /* dofileread() will unuse the descriptor for us */ 3057 return dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3058 &offset, 0, retval); 3059 3060 out: 3061 fd_putfile(fd); 3062 return (error); 3063 } 3064 3065 /* 3066 * Positional scatter read system call. 3067 */ 3068 int 3069 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, 3070 register_t *retval) 3071 { 3072 /* { 3073 syscallarg(int) fd; 3074 syscallarg(const struct iovec *) iovp; 3075 syscallarg(int) iovcnt; 3076 syscallarg(off_t) offset; 3077 } */ 3078 off_t offset = SCARG(uap, offset); 3079 3080 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 3081 SCARG(uap, iovcnt), &offset, 0, retval); 3082 } 3083 3084 /* 3085 * Positional write system call. 3086 */ 3087 int 3088 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, 3089 register_t *retval) 3090 { 3091 /* { 3092 syscallarg(int) fd; 3093 syscallarg(const void *) buf; 3094 syscallarg(size_t) nbyte; 3095 syscallarg(off_t) offset; 3096 } */ 3097 file_t *fp; 3098 off_t offset; 3099 int error, fd = SCARG(uap, fd); 3100 3101 if ((fp = fd_getfile(fd)) == NULL) 3102 return SET_ERROR(EBADF); 3103 3104 if ((fp->f_flag & FWRITE) == 0) { 3105 fd_putfile(fd); 3106 return SET_ERROR(EBADF); 3107 } 3108 3109 if (fp->f_ops->fo_seek == NULL) { 3110 error = SET_ERROR(ESPIPE); 3111 goto out; 3112 } 3113 3114 offset = SCARG(uap, offset); 3115 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 3116 if (error) 3117 goto out; 3118 3119 /* dofilewrite() will unuse the descriptor for us */ 3120 return dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3121 &offset, 0, retval); 3122 3123 out: 3124 fd_putfile(fd); 3125 return (error); 3126 } 3127 3128 /* 3129 * Positional gather write system call. 3130 */ 3131 int 3132 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, 3133 register_t *retval) 3134 { 3135 /* { 3136 syscallarg(int) fd; 3137 syscallarg(const struct iovec *) iovp; 3138 syscallarg(int) iovcnt; 3139 syscallarg(off_t) offset; 3140 } */ 3141 off_t offset = SCARG(uap, offset); 3142 3143 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 3144 SCARG(uap, iovcnt), &offset, 0, retval); 3145 } 3146 3147 /* 3148 * Check access permissions. 3149 */ 3150 int 3151 sys_access(struct lwp *l, const struct sys_access_args *uap, 3152 register_t *retval) 3153 { 3154 /* { 3155 syscallarg(const char *) path; 3156 syscallarg(int) flags; 3157 } */ 3158 3159 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 3160 SCARG(uap, flags), 0); 3161 } 3162 3163 int 3164 do_sys_accessat(struct lwp *l, int fdat, const char *path, 3165 int mode, int flags) 3166 { 3167 kauth_cred_t cred; 3168 struct vnode *vp; 3169 int error, nd_flag, vmode; 3170 struct pathbuf *pb; 3171 struct nameidata nd; 3172 3173 CTASSERT(F_OK == 0); 3174 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 3175 /* nonsense mode */ 3176 return SET_ERROR(EINVAL); 3177 } 3178 3179 nd_flag = FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT; 3180 if (flags & AT_SYMLINK_NOFOLLOW) 3181 nd_flag &= ~FOLLOW; 3182 3183 error = pathbuf_copyin(path, &pb); 3184 if (error) 3185 return error; 3186 3187 NDINIT(&nd, LOOKUP, nd_flag, pb); 3188 3189 /* Override default credentials */ 3190 if (!(flags & AT_EACCESS)) { 3191 cred = kauth_cred_dup(l->l_cred); 3192 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 3193 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 3194 } else 3195 cred = l->l_cred; 3196 nd.ni_cnd.cn_cred = cred; 3197 3198 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3199 pathbuf_destroy(pb); 3200 goto out; 3201 } 3202 vp = nd.ni_vp; 3203 pathbuf_destroy(pb); 3204 3205 /* Flags == 0 means only check for existence. */ 3206 if (mode) { 3207 vmode = 0; 3208 if (mode & R_OK) 3209 vmode |= VREAD; 3210 if (mode & W_OK) 3211 vmode |= VWRITE; 3212 if (mode & X_OK) 3213 vmode |= VEXEC; 3214 3215 error = VOP_ACCESS(vp, vmode, cred); 3216 if (!error && (vmode & VWRITE)) 3217 error = vn_writechk(vp); 3218 } 3219 vput(vp); 3220 out: 3221 if (!(flags & AT_EACCESS)) 3222 kauth_cred_free(cred); 3223 return (error); 3224 } 3225 3226 int 3227 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3228 register_t *retval) 3229 { 3230 /* { 3231 syscallarg(int) fd; 3232 syscallarg(const char *) path; 3233 syscallarg(int) amode; 3234 syscallarg(int) flag; 3235 } */ 3236 3237 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3238 SCARG(uap, amode), SCARG(uap, flag)); 3239 } 3240 3241 /* 3242 * Common code for all sys_stat functions, including compat versions. 3243 */ 3244 int 3245 do_sys_stat(const char *userpath, unsigned int nd_flag, struct stat *sb) 3246 { 3247 3248 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3249 } 3250 3251 int 3252 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3253 unsigned int nd_flag, struct stat *sb) 3254 { 3255 int error; 3256 struct pathbuf *pb; 3257 struct nameidata nd; 3258 3259 KASSERT(l != NULL || fdat == AT_FDCWD); 3260 3261 error = pathbuf_copyin(userpath, &pb); 3262 if (error) { 3263 return error; 3264 } 3265 3266 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3267 3268 error = fd_nameiat(l, fdat, &nd); 3269 if (error != 0) { 3270 pathbuf_destroy(pb); 3271 return error; 3272 } 3273 error = vn_stat(nd.ni_vp, sb); 3274 vput(nd.ni_vp); 3275 pathbuf_destroy(pb); 3276 return error; 3277 } 3278 3279 /* 3280 * Get file status; this version follows links. 3281 */ 3282 /* ARGSUSED */ 3283 int 3284 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, 3285 register_t *retval) 3286 { 3287 /* { 3288 syscallarg(const char *) path; 3289 syscallarg(struct stat *) ub; 3290 } */ 3291 struct stat sb; 3292 int error; 3293 3294 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3295 if (error) 3296 return error; 3297 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3298 } 3299 3300 /* 3301 * Get file status; this version does not follow links. 3302 */ 3303 /* ARGSUSED */ 3304 int 3305 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, 3306 register_t *retval) 3307 { 3308 /* { 3309 syscallarg(const char *) path; 3310 syscallarg(struct stat *) ub; 3311 } */ 3312 struct stat sb; 3313 int error; 3314 3315 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3316 if (error) 3317 return error; 3318 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3319 } 3320 3321 int 3322 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3323 register_t *retval) 3324 { 3325 /* { 3326 syscallarg(int) fd; 3327 syscallarg(const char *) path; 3328 syscallarg(struct stat *) buf; 3329 syscallarg(int) flag; 3330 } */ 3331 unsigned int nd_flag; 3332 struct stat sb; 3333 int error; 3334 3335 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3336 nd_flag = NOFOLLOW; 3337 else 3338 nd_flag = FOLLOW; 3339 3340 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3341 &sb); 3342 if (error) 3343 return error; 3344 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3345 } 3346 3347 static int 3348 kern_pathconf(register_t *retval, const char *path, int name, int flag) 3349 { 3350 int error; 3351 struct pathbuf *pb; 3352 struct nameidata nd; 3353 3354 error = pathbuf_copyin(path, &pb); 3355 if (error) { 3356 return error; 3357 } 3358 NDINIT(&nd, LOOKUP, flag | LOCKLEAF | TRYEMULROOT, pb); 3359 if ((error = namei(&nd)) != 0) { 3360 pathbuf_destroy(pb); 3361 return error; 3362 } 3363 error = VOP_PATHCONF(nd.ni_vp, name, retval); 3364 vput(nd.ni_vp); 3365 pathbuf_destroy(pb); 3366 return error; 3367 } 3368 3369 /* 3370 * Get configurable pathname variables. 3371 */ 3372 /* ARGSUSED */ 3373 int 3374 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, 3375 register_t *retval) 3376 { 3377 /* { 3378 syscallarg(const char *) path; 3379 syscallarg(int) name; 3380 } */ 3381 3382 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3383 FOLLOW); 3384 } 3385 3386 /* ARGSUSED */ 3387 int 3388 sys_lpathconf(struct lwp *l, const struct sys_lpathconf_args *uap, 3389 register_t *retval) 3390 { 3391 /* { 3392 syscallarg(const char *) path; 3393 syscallarg(int) name; 3394 } */ 3395 3396 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3397 NOFOLLOW); 3398 } 3399 3400 /* 3401 * Return target name of a symbolic link. 3402 */ 3403 /* ARGSUSED */ 3404 int 3405 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3406 register_t *retval) 3407 { 3408 /* { 3409 syscallarg(const char *) path; 3410 syscallarg(char *) buf; 3411 syscallarg(size_t) count; 3412 } */ 3413 3414 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3415 SCARG(uap, buf), SCARG(uap, count), retval); 3416 } 3417 3418 static int 3419 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3420 size_t count, register_t *retval) 3421 { 3422 struct vnode *vp; 3423 struct iovec aiov; 3424 struct uio auio; 3425 int error; 3426 struct pathbuf *pb; 3427 struct nameidata nd; 3428 3429 error = pathbuf_copyin(path, &pb); 3430 if (error) { 3431 return error; 3432 } 3433 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, 3434 pb); 3435 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3436 pathbuf_destroy(pb); 3437 return error; 3438 } 3439 vp = nd.ni_vp; 3440 pathbuf_destroy(pb); 3441 if (vp->v_type != VLNK) 3442 error = SET_ERROR(EINVAL); 3443 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3444 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3445 aiov.iov_base = buf; 3446 aiov.iov_len = count; 3447 auio.uio_iov = &aiov; 3448 auio.uio_iovcnt = 1; 3449 auio.uio_offset = 0; 3450 auio.uio_rw = UIO_READ; 3451 KASSERT(l == curlwp); 3452 auio.uio_vmspace = l->l_proc->p_vmspace; 3453 auio.uio_resid = count; 3454 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3455 *retval = count - auio.uio_resid; 3456 } 3457 vput(vp); 3458 return (error); 3459 } 3460 3461 int 3462 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3463 register_t *retval) 3464 { 3465 /* { 3466 syscallarg(int) fd; 3467 syscallarg(const char *) path; 3468 syscallarg(char *) buf; 3469 syscallarg(size_t) bufsize; 3470 } */ 3471 3472 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3473 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3474 } 3475 3476 /* 3477 * Change flags of a file given a path name. 3478 */ 3479 /* ARGSUSED */ 3480 int 3481 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, 3482 register_t *retval) 3483 { 3484 /* { 3485 syscallarg(const char *) path; 3486 syscallarg(u_long) flags; 3487 } */ 3488 struct vnode *vp; 3489 int error; 3490 3491 error = namei_simple_user(SCARG(uap, path), 3492 NSM_FOLLOW_TRYEMULROOT, &vp); 3493 if (error != 0) 3494 return (error); 3495 error = change_flags(vp, SCARG(uap, flags), l); 3496 vput(vp); 3497 return (error); 3498 } 3499 3500 /* 3501 * Change flags of a file given a file descriptor. 3502 */ 3503 /* ARGSUSED */ 3504 int 3505 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, 3506 register_t *retval) 3507 { 3508 /* { 3509 syscallarg(int) fd; 3510 syscallarg(u_long) flags; 3511 } */ 3512 struct vnode *vp; 3513 file_t *fp; 3514 int error; 3515 3516 /* fd_getvnode() will use the descriptor for us */ 3517 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3518 return (error); 3519 vp = fp->f_vnode; 3520 error = change_flags(vp, SCARG(uap, flags), l); 3521 VOP_UNLOCK(vp); 3522 fd_putfile(SCARG(uap, fd)); 3523 return (error); 3524 } 3525 3526 /* 3527 * Change flags of a file given a path name; this version does 3528 * not follow links. 3529 */ 3530 int 3531 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, 3532 register_t *retval) 3533 { 3534 /* { 3535 syscallarg(const char *) path; 3536 syscallarg(u_long) flags; 3537 } */ 3538 struct vnode *vp; 3539 int error; 3540 3541 error = namei_simple_user(SCARG(uap, path), 3542 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3543 if (error != 0) 3544 return (error); 3545 error = change_flags(vp, SCARG(uap, flags), l); 3546 vput(vp); 3547 return (error); 3548 } 3549 3550 /* 3551 * Common routine to change flags of a file. 3552 */ 3553 int 3554 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3555 { 3556 struct vattr vattr; 3557 int error; 3558 3559 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3560 3561 vattr_null(&vattr); 3562 vattr.va_flags = flags; 3563 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3564 3565 return (error); 3566 } 3567 3568 /* 3569 * Change mode of a file given path name; this version follows links. 3570 */ 3571 /* ARGSUSED */ 3572 int 3573 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3574 { 3575 /* { 3576 syscallarg(const char *) path; 3577 syscallarg(int) mode; 3578 } */ 3579 3580 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3581 SCARG(uap, mode), 0); 3582 } 3583 3584 int 3585 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3586 { 3587 int error; 3588 struct vnode *vp; 3589 namei_simple_flags_t ns_flag; 3590 3591 if (flags & AT_SYMLINK_NOFOLLOW) 3592 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3593 else 3594 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3595 3596 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3597 if (error != 0) 3598 return error; 3599 3600 error = change_mode(vp, mode, l); 3601 3602 vrele(vp); 3603 3604 return (error); 3605 } 3606 3607 /* 3608 * Change mode of a file given a file descriptor. 3609 */ 3610 /* ARGSUSED */ 3611 int 3612 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, 3613 register_t *retval) 3614 { 3615 /* { 3616 syscallarg(int) fd; 3617 syscallarg(int) mode; 3618 } */ 3619 file_t *fp; 3620 int error; 3621 3622 /* fd_getvnode() will use the descriptor for us */ 3623 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3624 return (error); 3625 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3626 fd_putfile(SCARG(uap, fd)); 3627 return (error); 3628 } 3629 3630 int 3631 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3632 register_t *retval) 3633 { 3634 /* { 3635 syscallarg(int) fd; 3636 syscallarg(const char *) path; 3637 syscallarg(int) mode; 3638 syscallarg(int) flag; 3639 } */ 3640 3641 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3642 SCARG(uap, mode), SCARG(uap, flag)); 3643 } 3644 3645 /* 3646 * Change mode of a file given path name; this version does not follow links. 3647 */ 3648 /* ARGSUSED */ 3649 int 3650 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, 3651 register_t *retval) 3652 { 3653 /* { 3654 syscallarg(const char *) path; 3655 syscallarg(int) mode; 3656 } */ 3657 int error; 3658 struct vnode *vp; 3659 3660 error = namei_simple_user(SCARG(uap, path), 3661 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3662 if (error != 0) 3663 return (error); 3664 3665 error = change_mode(vp, SCARG(uap, mode), l); 3666 3667 vrele(vp); 3668 return (error); 3669 } 3670 3671 /* 3672 * Common routine to set mode given a vnode. 3673 */ 3674 static int 3675 change_mode(struct vnode *vp, int mode, struct lwp *l) 3676 { 3677 struct vattr vattr; 3678 int error; 3679 3680 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3681 vattr_null(&vattr); 3682 vattr.va_mode = mode & ALLPERMS; 3683 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3684 VOP_UNLOCK(vp); 3685 return (error); 3686 } 3687 3688 /* 3689 * Set ownership given a path name; this version follows links. 3690 */ 3691 /* ARGSUSED */ 3692 int 3693 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3694 { 3695 /* { 3696 syscallarg(const char *) path; 3697 syscallarg(uid_t) uid; 3698 syscallarg(gid_t) gid; 3699 } */ 3700 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3701 SCARG(uap, gid), 0); 3702 } 3703 3704 int 3705 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3706 gid_t gid, int flags) 3707 { 3708 int error; 3709 struct vnode *vp; 3710 namei_simple_flags_t ns_flag; 3711 3712 if (flags & AT_SYMLINK_NOFOLLOW) 3713 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3714 else 3715 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3716 3717 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3718 if (error != 0) 3719 return error; 3720 3721 error = change_owner(vp, uid, gid, l, 0); 3722 3723 vrele(vp); 3724 3725 return (error); 3726 } 3727 3728 /* 3729 * Set ownership given a path name; this version follows links. 3730 * Provides POSIX semantics. 3731 */ 3732 /* ARGSUSED */ 3733 int 3734 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, 3735 register_t *retval) 3736 { 3737 /* { 3738 syscallarg(const char *) path; 3739 syscallarg(uid_t) uid; 3740 syscallarg(gid_t) gid; 3741 } */ 3742 int error; 3743 struct vnode *vp; 3744 3745 error = namei_simple_user(SCARG(uap, path), 3746 NSM_FOLLOW_TRYEMULROOT, &vp); 3747 if (error != 0) 3748 return (error); 3749 3750 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3751 3752 vrele(vp); 3753 return (error); 3754 } 3755 3756 /* 3757 * Set ownership given a file descriptor. 3758 */ 3759 /* ARGSUSED */ 3760 int 3761 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, 3762 register_t *retval) 3763 { 3764 /* { 3765 syscallarg(int) fd; 3766 syscallarg(uid_t) uid; 3767 syscallarg(gid_t) gid; 3768 } */ 3769 int error; 3770 file_t *fp; 3771 3772 /* fd_getvnode() will use the descriptor for us */ 3773 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3774 return (error); 3775 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3776 l, 0); 3777 fd_putfile(SCARG(uap, fd)); 3778 return (error); 3779 } 3780 3781 int 3782 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3783 register_t *retval) 3784 { 3785 /* { 3786 syscallarg(int) fd; 3787 syscallarg(const char *) path; 3788 syscallarg(uid_t) owner; 3789 syscallarg(gid_t) group; 3790 syscallarg(int) flag; 3791 } */ 3792 3793 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3794 SCARG(uap, owner), SCARG(uap, group), 3795 SCARG(uap, flag)); 3796 } 3797 3798 /* 3799 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3800 */ 3801 /* ARGSUSED */ 3802 int 3803 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, 3804 register_t *retval) 3805 { 3806 /* { 3807 syscallarg(int) fd; 3808 syscallarg(uid_t) uid; 3809 syscallarg(gid_t) gid; 3810 } */ 3811 int error; 3812 file_t *fp; 3813 3814 /* fd_getvnode() will use the descriptor for us */ 3815 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3816 return (error); 3817 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3818 l, 1); 3819 fd_putfile(SCARG(uap, fd)); 3820 return (error); 3821 } 3822 3823 /* 3824 * Set ownership given a path name; this version does not follow links. 3825 */ 3826 /* ARGSUSED */ 3827 int 3828 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, 3829 register_t *retval) 3830 { 3831 /* { 3832 syscallarg(const char *) path; 3833 syscallarg(uid_t) uid; 3834 syscallarg(gid_t) gid; 3835 } */ 3836 int error; 3837 struct vnode *vp; 3838 3839 error = namei_simple_user(SCARG(uap, path), 3840 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3841 if (error != 0) 3842 return (error); 3843 3844 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3845 3846 vrele(vp); 3847 return (error); 3848 } 3849 3850 /* 3851 * Set ownership given a path name; this version does not follow links. 3852 * Provides POSIX/XPG semantics. 3853 */ 3854 /* ARGSUSED */ 3855 int 3856 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, 3857 register_t *retval) 3858 { 3859 /* { 3860 syscallarg(const char *) path; 3861 syscallarg(uid_t) uid; 3862 syscallarg(gid_t) gid; 3863 } */ 3864 int error; 3865 struct vnode *vp; 3866 3867 error = namei_simple_user(SCARG(uap, path), 3868 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3869 if (error != 0) 3870 return (error); 3871 3872 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3873 3874 vrele(vp); 3875 return (error); 3876 } 3877 3878 /* 3879 * Common routine to set ownership given a vnode. 3880 */ 3881 static int 3882 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3883 int posix_semantics) 3884 { 3885 struct vattr vattr; 3886 mode_t newmode; 3887 int error; 3888 3889 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3890 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3891 goto out; 3892 3893 #define CHANGED(x) ((int)(x) != -1) 3894 newmode = vattr.va_mode; 3895 if (posix_semantics) { 3896 /* 3897 * POSIX/XPG semantics: if the caller is not the super-user, 3898 * clear set-user-id and set-group-id bits. Both POSIX and 3899 * the XPG consider the behaviour for calls by the super-user 3900 * implementation-defined; we leave the set-user-id and set- 3901 * group-id settings intact in that case. 3902 */ 3903 if (vattr.va_mode & S_ISUID) { 3904 if (kauth_authorize_vnode(l->l_cred, 3905 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3906 newmode &= ~S_ISUID; 3907 } 3908 if (vattr.va_mode & S_ISGID) { 3909 if (kauth_authorize_vnode(l->l_cred, 3910 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3911 newmode &= ~S_ISGID; 3912 } 3913 } else { 3914 /* 3915 * NetBSD semantics: when changing owner and/or group, 3916 * clear the respective bit(s). 3917 */ 3918 if (CHANGED(uid)) 3919 newmode &= ~S_ISUID; 3920 if (CHANGED(gid)) 3921 newmode &= ~S_ISGID; 3922 } 3923 /* Update va_mode iff altered. */ 3924 if (vattr.va_mode == newmode) 3925 newmode = VNOVAL; 3926 3927 vattr_null(&vattr); 3928 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3929 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3930 vattr.va_mode = newmode; 3931 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3932 #undef CHANGED 3933 3934 out: 3935 VOP_UNLOCK(vp); 3936 return (error); 3937 } 3938 3939 /* 3940 * Set the access and modification times given a path name; this 3941 * version follows links. 3942 */ 3943 /* ARGSUSED */ 3944 int 3945 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3946 register_t *retval) 3947 { 3948 /* { 3949 syscallarg(const char *) path; 3950 syscallarg(const struct timeval *) tptr; 3951 } */ 3952 3953 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3954 SCARG(uap, tptr), UIO_USERSPACE); 3955 } 3956 3957 /* 3958 * Set the access and modification times given a file descriptor. 3959 */ 3960 /* ARGSUSED */ 3961 int 3962 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3963 register_t *retval) 3964 { 3965 /* { 3966 syscallarg(int) fd; 3967 syscallarg(const struct timeval *) tptr; 3968 } */ 3969 int error; 3970 file_t *fp; 3971 3972 /* fd_getvnode() will use the descriptor for us */ 3973 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3974 return (error); 3975 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3976 UIO_USERSPACE); 3977 fd_putfile(SCARG(uap, fd)); 3978 return (error); 3979 } 3980 3981 int 3982 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3983 register_t *retval) 3984 { 3985 /* { 3986 syscallarg(int) fd; 3987 syscallarg(const struct timespec *) tptr; 3988 } */ 3989 int error; 3990 file_t *fp; 3991 3992 /* fd_getvnode() will use the descriptor for us */ 3993 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3994 return (error); 3995 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3996 SCARG(uap, tptr), UIO_USERSPACE); 3997 fd_putfile(SCARG(uap, fd)); 3998 return (error); 3999 } 4000 4001 /* 4002 * Set the access and modification times given a path name; this 4003 * version does not follow links. 4004 */ 4005 int 4006 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 4007 register_t *retval) 4008 { 4009 /* { 4010 syscallarg(const char *) path; 4011 syscallarg(const struct timeval *) tptr; 4012 } */ 4013 4014 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 4015 SCARG(uap, tptr), UIO_USERSPACE); 4016 } 4017 4018 int 4019 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 4020 register_t *retval) 4021 { 4022 /* { 4023 syscallarg(int) fd; 4024 syscallarg(const char *) path; 4025 syscallarg(const struct timespec *) tptr; 4026 syscallarg(int) flag; 4027 } */ 4028 int follow; 4029 const struct timespec *tptr; 4030 int error; 4031 4032 tptr = SCARG(uap, tptr); 4033 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 4034 4035 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 4036 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 4037 4038 return error; 4039 } 4040 4041 /* 4042 * Common routine to set access and modification times given a vnode. 4043 */ 4044 int 4045 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 4046 const struct timespec *tptr, enum uio_seg seg) 4047 { 4048 4049 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 4050 } 4051 4052 int 4053 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 4054 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 4055 { 4056 struct vattr vattr; 4057 int error, dorele = 0; 4058 namei_simple_flags_t sflags; 4059 bool vanull, setbirthtime; 4060 struct timespec ts[2]; 4061 4062 KASSERT(l != NULL || fdat == AT_FDCWD); 4063 4064 /* 4065 * I have checked all callers and they pass either FOLLOW, 4066 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 4067 * is 0. More to the point, they don't pass anything else. 4068 * Let's keep it that way at least until the namei interfaces 4069 * are fully sanitized. 4070 */ 4071 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 4072 sflags = (flag == FOLLOW) ? 4073 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 4074 4075 if (tptr == NULL) { 4076 vanull = true; 4077 nanotime(&ts[0]); 4078 ts[1] = ts[0]; 4079 } else { 4080 vanull = false; 4081 if (seg != UIO_SYSSPACE) { 4082 error = copyin(tptr, ts, sizeof (ts)); 4083 if (error != 0) 4084 return error; 4085 } else { 4086 ts[0] = tptr[0]; 4087 ts[1] = tptr[1]; 4088 } 4089 } 4090 4091 if (ts[0].tv_nsec == UTIME_NOW) { 4092 nanotime(&ts[0]); 4093 if (ts[1].tv_nsec == UTIME_NOW) { 4094 vanull = true; 4095 ts[1] = ts[0]; 4096 } 4097 } else if (ts[1].tv_nsec == UTIME_NOW) 4098 nanotime(&ts[1]); 4099 4100 if (vp == NULL) { 4101 /* note: SEG describes TPTR, not PATH; PATH is always user */ 4102 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 4103 if (error != 0) 4104 return error; 4105 dorele = 1; 4106 } 4107 4108 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4109 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 4110 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 4111 vattr_null(&vattr); 4112 4113 if (ts[0].tv_nsec != UTIME_OMIT) 4114 vattr.va_atime = ts[0]; 4115 4116 if (ts[1].tv_nsec != UTIME_OMIT) { 4117 vattr.va_mtime = ts[1]; 4118 if (setbirthtime) 4119 vattr.va_birthtime = ts[1]; 4120 } 4121 4122 if (vanull) 4123 vattr.va_vaflags |= VA_UTIMES_NULL; 4124 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4125 VOP_UNLOCK(vp); 4126 4127 if (dorele != 0) 4128 vrele(vp); 4129 4130 return error; 4131 } 4132 4133 int 4134 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 4135 const struct timeval *tptr, enum uio_seg seg) 4136 { 4137 struct timespec ts[2]; 4138 struct timespec *tsptr = NULL; 4139 int error; 4140 4141 if (tptr != NULL) { 4142 struct timeval tv[2]; 4143 4144 if (seg != UIO_SYSSPACE) { 4145 error = copyin(tptr, tv, sizeof(tv)); 4146 if (error != 0) 4147 return error; 4148 tptr = tv; 4149 } 4150 4151 if ((tptr[0].tv_usec == UTIME_NOW) || 4152 (tptr[0].tv_usec == UTIME_OMIT)) 4153 ts[0].tv_nsec = tptr[0].tv_usec; 4154 else { 4155 if (tptr[0].tv_usec < 0 || tptr[0].tv_usec >= 1000000) 4156 return SET_ERROR(EINVAL); 4157 4158 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 4159 } 4160 4161 if ((tptr[1].tv_usec == UTIME_NOW) || 4162 (tptr[1].tv_usec == UTIME_OMIT)) 4163 ts[1].tv_nsec = tptr[1].tv_usec; 4164 else { 4165 if (tptr[1].tv_usec < 0 || tptr[1].tv_usec >= 1000000) 4166 return SET_ERROR(EINVAL); 4167 4168 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 4169 } 4170 4171 tsptr = &ts[0]; 4172 } 4173 4174 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 4175 } 4176 4177 /* 4178 * Truncate a file given its path name. 4179 */ 4180 /* ARGSUSED */ 4181 int 4182 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, 4183 register_t *retval) 4184 { 4185 /* { 4186 syscallarg(const char *) path; 4187 syscallarg(int) pad; 4188 syscallarg(off_t) length; 4189 } */ 4190 struct vnode *vp; 4191 struct vattr vattr; 4192 int error; 4193 4194 if (SCARG(uap, length) < 0) 4195 return SET_ERROR(EINVAL); 4196 4197 error = namei_simple_user(SCARG(uap, path), 4198 NSM_FOLLOW_TRYEMULROOT, &vp); 4199 if (error != 0) 4200 return (error); 4201 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4202 if (vp->v_type == VDIR) 4203 error = SET_ERROR(EISDIR); 4204 else if ((error = vn_writechk(vp)) == 0 && 4205 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 4206 vattr_null(&vattr); 4207 vattr.va_size = SCARG(uap, length); 4208 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4209 } 4210 vput(vp); 4211 return (error); 4212 } 4213 4214 /* 4215 * Truncate a file given a file descriptor. 4216 */ 4217 /* ARGSUSED */ 4218 int 4219 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, 4220 register_t *retval) 4221 { 4222 /* { 4223 syscallarg(int) fd; 4224 syscallarg(int) pad; 4225 syscallarg(off_t) length; 4226 } */ 4227 file_t *fp; 4228 int error, fd = SCARG(uap, fd); 4229 4230 fp = fd_getfile(fd); 4231 if (fp == NULL) 4232 return SET_ERROR(EBADF); 4233 if (fp->f_ops->fo_truncate == NULL) 4234 error = SET_ERROR(EOPNOTSUPP); 4235 else 4236 error = (*fp->f_ops->fo_truncate)(fp, SCARG(uap, length)); 4237 4238 fd_putfile(fd); 4239 return error; 4240 } 4241 4242 /* 4243 * Sync an open file. 4244 */ 4245 /* ARGSUSED */ 4246 int 4247 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 4248 { 4249 /* { 4250 syscallarg(int) fd; 4251 } */ 4252 struct vnode *vp; 4253 file_t *fp; 4254 int error; 4255 4256 /* fd_getvnode() will use the descriptor for us */ 4257 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4258 return (error); 4259 vp = fp->f_vnode; 4260 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4261 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4262 VOP_UNLOCK(vp); 4263 fd_putfile(SCARG(uap, fd)); 4264 return (error); 4265 } 4266 4267 /* 4268 * Sync a range of file data. API modeled after that found in AIX. 4269 * 4270 * FDATASYNC indicates that we need only save enough metadata to be able 4271 * to re-read the written data. 4272 */ 4273 /* ARGSUSED */ 4274 int 4275 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, 4276 register_t *retval) 4277 { 4278 /* { 4279 syscallarg(int) fd; 4280 syscallarg(int) flags; 4281 syscallarg(off_t) start; 4282 syscallarg(off_t) length; 4283 } */ 4284 struct vnode *vp; 4285 file_t *fp; 4286 int flags, nflags; 4287 off_t s, e, len; 4288 int error; 4289 4290 /* fd_getvnode() will use the descriptor for us */ 4291 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4292 return (error); 4293 4294 if ((fp->f_flag & FWRITE) == 0) { 4295 error = SET_ERROR(EBADF); 4296 goto out; 4297 } 4298 4299 flags = SCARG(uap, flags); 4300 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4301 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4302 error = SET_ERROR(EINVAL); 4303 goto out; 4304 } 4305 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4306 if (flags & FDATASYNC) 4307 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4308 else 4309 nflags = FSYNC_WAIT; 4310 if (flags & FDISKSYNC) 4311 nflags |= FSYNC_CACHE; 4312 4313 len = SCARG(uap, length); 4314 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4315 if (len) { 4316 s = SCARG(uap, start); 4317 if (s < 0 || len < 0 || len > OFF_T_MAX - s) { 4318 error = SET_ERROR(EINVAL); 4319 goto out; 4320 } 4321 e = s + len; 4322 KASSERT(s <= e); 4323 } else { 4324 e = 0; 4325 s = 0; 4326 } 4327 4328 vp = fp->f_vnode; 4329 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4330 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4331 VOP_UNLOCK(vp); 4332 out: 4333 fd_putfile(SCARG(uap, fd)); 4334 return (error); 4335 } 4336 4337 /* 4338 * Sync the data of an open file. 4339 */ 4340 /* ARGSUSED */ 4341 int 4342 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, 4343 register_t *retval) 4344 { 4345 /* { 4346 syscallarg(int) fd; 4347 } */ 4348 struct vnode *vp; 4349 file_t *fp; 4350 int error; 4351 4352 /* fd_getvnode() will use the descriptor for us */ 4353 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4354 return (error); 4355 vp = fp->f_vnode; 4356 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4357 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4358 VOP_UNLOCK(vp); 4359 fd_putfile(SCARG(uap, fd)); 4360 return (error); 4361 } 4362 4363 /* 4364 * Rename files, (standard) BSD semantics frontend. 4365 */ 4366 /* ARGSUSED */ 4367 int 4368 sys_rename(struct lwp *l, const struct sys_rename_args *uap, 4369 register_t *retval) 4370 { 4371 /* { 4372 syscallarg(const char *) from; 4373 syscallarg(const char *) to; 4374 } */ 4375 4376 return do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4377 SCARG(uap, to), UIO_USERSPACE, 0); 4378 } 4379 4380 int 4381 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4382 register_t *retval) 4383 { 4384 /* { 4385 syscallarg(int) fromfd; 4386 syscallarg(const char *) from; 4387 syscallarg(int) tofd; 4388 syscallarg(const char *) to; 4389 } */ 4390 4391 return do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4392 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0); 4393 } 4394 4395 /* 4396 * Rename files, POSIX semantics frontend. 4397 */ 4398 /* ARGSUSED */ 4399 int 4400 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, 4401 register_t *retval) 4402 { 4403 /* { 4404 syscallarg(const char *) from; 4405 syscallarg(const char *) to; 4406 } */ 4407 4408 return do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4409 SCARG(uap, to), UIO_USERSPACE, 1); 4410 } 4411 4412 /* 4413 * Rename files. Source and destination must either both be directories, 4414 * or both not be directories. If target is a directory, it must be empty. 4415 * If `from' and `to' refer to the same object, the value of the `retain' 4416 * argument is used to determine whether `from' will be 4417 * 4418 * (retain == 0) deleted unless `from' and `to' refer to the same 4419 * object in the file system's name space (BSD). 4420 * (retain == 1) always retained (POSIX). 4421 * 4422 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4423 */ 4424 int 4425 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4426 { 4427 4428 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, 4429 retain); 4430 } 4431 4432 static int 4433 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4434 const char *to, enum uio_seg seg, int retain) 4435 { 4436 struct pathbuf *fpb, *tpb; 4437 struct nameidata fnd, tnd; 4438 struct vnode *fdvp, *fvp; 4439 struct vnode *tdvp, *tvp; 4440 struct mount *mp, *tmp; 4441 int error; 4442 4443 KASSERT(l != NULL || fromfd == AT_FDCWD); 4444 KASSERT(l != NULL || tofd == AT_FDCWD); 4445 4446 error = pathbuf_maybe_copyin(from, seg, &fpb); 4447 if (error) 4448 goto out0; 4449 KASSERT(fpb != NULL); 4450 4451 error = pathbuf_maybe_copyin(to, seg, &tpb); 4452 if (error) 4453 goto out1; 4454 KASSERT(tpb != NULL); 4455 4456 /* 4457 * Lookup from. 4458 * 4459 * XXX LOCKPARENT is wrong because we don't actually want it 4460 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4461 * insane, so for the time being we need to leave it like this. 4462 */ 4463 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb); 4464 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4465 goto out2; 4466 4467 /* 4468 * Pull out the important results of the lookup, fdvp and fvp. 4469 * Of course, fvp is bogus because we're about to unlock fdvp. 4470 */ 4471 fdvp = fnd.ni_dvp; 4472 fvp = fnd.ni_vp; 4473 mp = fdvp->v_mount; 4474 KASSERT(fdvp != NULL); 4475 KASSERT(fvp != NULL); 4476 KASSERT(fdvp == fvp || VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE); 4477 /* 4478 * Bracket the operation with fstrans_start()/fstrans_done(). 4479 * 4480 * Inside the bracket this file system cannot be unmounted so 4481 * a vnode on this file system cannot change its v_mount. 4482 * A vnode on another file system may still change to dead mount. 4483 */ 4484 fstrans_start(mp); 4485 4486 /* 4487 * Make sure neither fdvp nor fvp is locked. 4488 */ 4489 if (fdvp != fvp) 4490 VOP_UNLOCK(fdvp); 4491 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4492 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4493 4494 /* 4495 * Reject renaming `.' and `..'. Can't do this until after 4496 * namei because we need namei's parsing to find the final 4497 * component name. (namei should just leave us with the final 4498 * component name and not look it up itself, but anyway...) 4499 * 4500 * This was here before because we used to relookup from 4501 * instead of to and relookup requires the caller to check 4502 * this, but now file systems may depend on this check, so we 4503 * must retain it until the file systems are all rototilled. 4504 */ 4505 if ((fnd.ni_cnd.cn_namelen == 1 && 4506 fnd.ni_cnd.cn_nameptr[0] == '.') || 4507 (fnd.ni_cnd.cn_namelen == 2 && 4508 fnd.ni_cnd.cn_nameptr[0] == '.' && 4509 fnd.ni_cnd.cn_nameptr[1] == '.')) { 4510 error = SET_ERROR(EINVAL); /* XXX EISDIR? */ 4511 goto abort0; 4512 } 4513 4514 /* 4515 * Lookup to. 4516 * 4517 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4518 * fvp here to decide whether to add CREATEDIR is a load of 4519 * bollocks because fvp might be the wrong node by now, since 4520 * fdvp is unlocked. 4521 * 4522 * XXX Why not pass CREATEDIR always? 4523 */ 4524 NDINIT(&tnd, RENAME, 4525 (LOCKPARENT | NOCACHE | TRYEMULROOT | 4526 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4527 tpb); 4528 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4529 goto abort0; 4530 4531 /* 4532 * Pull out the important results of the lookup, tdvp and tvp. 4533 * Of course, tvp is bogus because we're about to unlock tdvp. 4534 */ 4535 tdvp = tnd.ni_dvp; 4536 tvp = tnd.ni_vp; 4537 KASSERT(tdvp != NULL); 4538 KASSERT(tdvp == tvp || VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4539 4540 if (fvp->v_type == VDIR) 4541 tnd.ni_cnd.cn_flags |= WILLBEDIR; 4542 /* 4543 * Make sure neither tdvp nor tvp is locked. 4544 */ 4545 if (tdvp != tvp) 4546 VOP_UNLOCK(tdvp); 4547 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4548 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4549 4550 /* 4551 * Reject renaming onto `.' or `..'. relookup is unhappy with 4552 * these, which is why we must do this here. Once upon a time 4553 * we relooked up from instead of to, and consequently didn't 4554 * need this check, but now that we relookup to instead of 4555 * from, we need this; and we shall need it forever forward 4556 * until the VOP_RENAME protocol changes, because file systems 4557 * will no doubt begin to depend on this check. 4558 */ 4559 if (tnd.ni_cnd.cn_namelen == 1 && tnd.ni_cnd.cn_nameptr[0] == '.') { 4560 error = SET_ERROR(EISDIR); 4561 goto abort1; 4562 } 4563 if (tnd.ni_cnd.cn_namelen == 2 && 4564 tnd.ni_cnd.cn_nameptr[0] == '.' && 4565 tnd.ni_cnd.cn_nameptr[1] == '.') { 4566 error = SET_ERROR(EINVAL); 4567 goto abort1; 4568 } 4569 4570 /* 4571 * Make sure the mount points match. Although we don't hold 4572 * any vnode locks, the v_mount on fdvp file system are stable. 4573 * 4574 * Unmounting another file system at an inopportune moment may 4575 * cause tdvp to disappear and change its v_mount to dead. 4576 * 4577 * So in either case different v_mount means cross-device rename. 4578 */ 4579 KASSERT(mp != NULL); 4580 tmp = tdvp->v_mount; 4581 4582 if (mp != tmp) { 4583 error = SET_ERROR(EXDEV); 4584 goto abort1; 4585 } 4586 4587 /* 4588 * Take the vfs rename lock to avoid cross-directory screw cases. 4589 * Nothing is locked currently, so taking this lock is safe. 4590 */ 4591 error = VFS_RENAMELOCK_ENTER(mp); 4592 if (error) 4593 goto abort1; 4594 4595 /* 4596 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4597 * and nothing is locked except for the vfs rename lock. 4598 * 4599 * The next step is a little rain dance to conform to the 4600 * insane lock protocol, even though it does nothing to ward 4601 * off race conditions. 4602 * 4603 * We need tdvp and tvp to be locked. However, because we have 4604 * unlocked tdvp in order to hold no locks while we take the 4605 * vfs rename lock, tvp may be wrong here, and we can't safely 4606 * lock it even if the sensible file systems will just unlock 4607 * it straight away. Consequently, we must lock tdvp and then 4608 * relookup tvp to get it locked. 4609 * 4610 * Finally, because the VOP_RENAME protocol is brain-damaged 4611 * and various file systems insanely depend on the semantics of 4612 * this brain damage, the lookup of to must be the last lookup 4613 * before VOP_RENAME. 4614 */ 4615 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4616 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4617 if (error) 4618 goto abort2; 4619 4620 /* 4621 * Drop the old tvp and pick up the new one -- which might be 4622 * the same, but that doesn't matter to us. After this, tdvp 4623 * and tvp should both be locked. 4624 */ 4625 if (tvp != NULL) 4626 vrele(tvp); 4627 tvp = tnd.ni_vp; 4628 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4629 KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE); 4630 4631 /* 4632 * The old do_sys_rename had various consistency checks here 4633 * involving fvp and tvp. fvp is bogus already here, and tvp 4634 * will become bogus soon in any sensible file system, so the 4635 * only purpose in putting these checks here is to give lip 4636 * service to these screw cases and to acknowledge that they 4637 * exist, not actually to handle them, but here you go 4638 * anyway... 4639 */ 4640 4641 /* 4642 * Acknowledge that directories and non-directories aren't 4643 * supposed to mix. 4644 */ 4645 if (tvp != NULL) { 4646 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 4647 error = SET_ERROR(ENOTDIR); 4648 goto abort3; 4649 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 4650 error = SET_ERROR(EISDIR); 4651 goto abort3; 4652 } 4653 } 4654 4655 /* 4656 * Acknowledge some random screw case, among the dozens that 4657 * might arise. 4658 */ 4659 if (fvp == tdvp) { 4660 error = SET_ERROR(EINVAL); 4661 goto abort3; 4662 } 4663 4664 /* 4665 * Acknowledge that POSIX has a wacky screw case. 4666 * 4667 * XXX Eventually the retain flag needs to be passed on to 4668 * VOP_RENAME. 4669 */ 4670 if (fvp == tvp) { 4671 if (retain) { 4672 error = 0; 4673 goto abort3; 4674 } else if (fdvp == tdvp && 4675 fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen && 4676 0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4677 fnd.ni_cnd.cn_namelen)) { 4678 error = 0; 4679 goto abort3; 4680 } 4681 } 4682 4683 /* 4684 * Make sure veriexec can screw us up. (But a race can screw 4685 * up veriexec, of course -- remember, fvp and (soon) tvp are 4686 * bogus.) 4687 */ 4688 #if NVERIEXEC > 0 4689 { 4690 char *f1, *f2; 4691 size_t f1_len; 4692 size_t f2_len; 4693 4694 f1_len = fnd.ni_cnd.cn_namelen + 1; 4695 f1 = kmem_alloc(f1_len, KM_SLEEP); 4696 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4697 4698 f2_len = tnd.ni_cnd.cn_namelen + 1; 4699 f2 = kmem_alloc(f2_len, KM_SLEEP); 4700 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4701 4702 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4703 4704 kmem_free(f1, f1_len); 4705 kmem_free(f2, f2_len); 4706 4707 if (error) 4708 goto abort3; 4709 } 4710 #endif /* NVERIEXEC > 0 */ 4711 4712 /* 4713 * All ready. Incant the rename vop. 4714 */ 4715 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4716 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4717 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4718 KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE); 4719 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4720 4721 /* 4722 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4723 * tdvp and tvp. But we can't assert any of that. 4724 */ 4725 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4726 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4727 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4728 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4729 4730 /* 4731 * So all we have left to do is to drop the rename lock and 4732 * destroy the pathbufs. 4733 */ 4734 VFS_RENAMELOCK_EXIT(mp); 4735 fstrans_done(mp); 4736 goto out2; 4737 4738 abort3: if (tvp != NULL && tvp != tdvp) 4739 VOP_UNLOCK(tvp); 4740 abort2: VOP_UNLOCK(tdvp); 4741 VFS_RENAMELOCK_EXIT(mp); 4742 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4743 vrele(tdvp); 4744 if (tvp != NULL) 4745 vrele(tvp); 4746 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4747 vrele(fdvp); 4748 vrele(fvp); 4749 fstrans_done(mp); 4750 out2: pathbuf_destroy(tpb); 4751 out1: pathbuf_destroy(fpb); 4752 out0: return error; 4753 } 4754 4755 /* 4756 * Make a directory file. 4757 */ 4758 /* ARGSUSED */ 4759 int 4760 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4761 { 4762 /* { 4763 syscallarg(const char *) path; 4764 syscallarg(int) mode; 4765 } */ 4766 4767 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4768 SCARG(uap, mode), UIO_USERSPACE); 4769 } 4770 4771 int 4772 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4773 register_t *retval) 4774 { 4775 /* { 4776 syscallarg(int) fd; 4777 syscallarg(const char *) path; 4778 syscallarg(int) mode; 4779 } */ 4780 4781 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4782 SCARG(uap, mode), UIO_USERSPACE); 4783 } 4784 4785 int 4786 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4787 { 4788 4789 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg); 4790 } 4791 4792 static int 4793 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4794 enum uio_seg seg) 4795 { 4796 struct proc *p = curlwp->l_proc; 4797 struct vnode *vp; 4798 struct vattr vattr; 4799 int error; 4800 struct pathbuf *pb; 4801 struct nameidata nd; 4802 4803 KASSERT(l != NULL || fdat == AT_FDCWD); 4804 4805 /* XXX bollocks, should pass in a pathbuf */ 4806 error = pathbuf_maybe_copyin(path, seg, &pb); 4807 if (error) { 4808 return error; 4809 } 4810 4811 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4812 4813 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4814 pathbuf_destroy(pb); 4815 return (error); 4816 } 4817 vp = nd.ni_vp; 4818 if (vp != NULL) { 4819 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4820 if (nd.ni_dvp == vp) 4821 vrele(nd.ni_dvp); 4822 else 4823 vput(nd.ni_dvp); 4824 vrele(vp); 4825 pathbuf_destroy(pb); 4826 return SET_ERROR(EEXIST); 4827 } 4828 vattr_null(&vattr); 4829 vattr.va_type = VDIR; 4830 /* We will read cwdi->cwdi_cmask unlocked. */ 4831 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4832 nd.ni_cnd.cn_flags |= WILLBEDIR; 4833 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4834 if (!error) 4835 vrele(nd.ni_vp); 4836 vput(nd.ni_dvp); 4837 pathbuf_destroy(pb); 4838 return (error); 4839 } 4840 4841 /* 4842 * Remove a directory file. 4843 */ 4844 /* ARGSUSED */ 4845 int 4846 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4847 { 4848 /* { 4849 syscallarg(char *) path; 4850 } */ 4851 4852 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR, 4853 UIO_USERSPACE); 4854 } 4855 4856 /* 4857 * Read a block of directory entries in a file system independent format. 4858 */ 4859 int 4860 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, 4861 register_t *retval) 4862 { 4863 /* { 4864 syscallarg(int) fd; 4865 syscallarg(char *) buf; 4866 syscallarg(size_t) count; 4867 } */ 4868 file_t *fp; 4869 int error, done; 4870 4871 /* fd_getvnode() will use the descriptor for us */ 4872 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4873 return (error); 4874 if ((fp->f_flag & FREAD) == 0) { 4875 error = SET_ERROR(EBADF); 4876 goto out; 4877 } 4878 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4879 SCARG(uap, count), &done, l, 0, 0); 4880 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4881 *retval = done; 4882 out: 4883 fd_putfile(SCARG(uap, fd)); 4884 return (error); 4885 } 4886 4887 /* 4888 * Set the mode mask for creation of filesystem nodes. 4889 */ 4890 int 4891 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4892 { 4893 /* { 4894 syscallarg(mode_t) newmask; 4895 } */ 4896 4897 /* 4898 * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of 4899 * serialization with those reads is required. It's important to 4900 * return a coherent answer for the caller of umask() though, and 4901 * the atomic operation accomplishes that. 4902 */ 4903 *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask, 4904 SCARG(uap, newmask) & ALLPERMS); 4905 4906 return (0); 4907 } 4908 4909 int 4910 dorevoke(struct vnode *vp, kauth_cred_t cred) 4911 { 4912 struct vattr vattr; 4913 int error, fs_decision; 4914 4915 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4916 error = VOP_GETATTR(vp, &vattr, cred); 4917 VOP_UNLOCK(vp); 4918 if (error != 0) 4919 return error; 4920 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 4921 : SET_ERROR(EPERM); 4922 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4923 fs_decision); 4924 if (!error) 4925 VOP_REVOKE(vp, REVOKEALL); 4926 return (error); 4927 } 4928 4929 /* 4930 * Void all references to file by ripping underlying filesystem 4931 * away from vnode. 4932 */ 4933 /* ARGSUSED */ 4934 int 4935 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, 4936 register_t *retval) 4937 { 4938 /* { 4939 syscallarg(const char *) path; 4940 } */ 4941 struct vnode *vp; 4942 int error; 4943 4944 error = namei_simple_user(SCARG(uap, path), NSM_FOLLOW_TRYEMULROOT, 4945 &vp); 4946 if (error != 0) 4947 return (error); 4948 error = dorevoke(vp, l->l_cred); 4949 vrele(vp); 4950 return (error); 4951 } 4952 4953 /* 4954 * Allocate backing store for a file, filling a hole without having to 4955 * explicitly write anything out. 4956 */ 4957 /* ARGSUSED */ 4958 int 4959 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4960 register_t *retval) 4961 { 4962 /* { 4963 syscallarg(int) fd; 4964 syscallarg(off_t) pos; 4965 syscallarg(off_t) len; 4966 } */ 4967 int fd; 4968 off_t pos, len; 4969 struct file *fp; 4970 struct vnode *vp; 4971 int error; 4972 4973 fd = SCARG(uap, fd); 4974 pos = SCARG(uap, pos); 4975 len = SCARG(uap, len); 4976 4977 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4978 *retval = SET_ERROR(EINVAL); 4979 return 0; 4980 } 4981 4982 error = fd_getvnode(fd, &fp); 4983 if (error) { 4984 *retval = error; 4985 return 0; 4986 } 4987 if ((fp->f_flag & FWRITE) == 0) { 4988 error = SET_ERROR(EBADF); 4989 goto fail; 4990 } 4991 vp = fp->f_vnode; 4992 4993 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4994 if (vp->v_type == VDIR) { 4995 error = SET_ERROR(EISDIR); 4996 } else { 4997 error = VOP_FALLOCATE(vp, pos, len); 4998 } 4999 VOP_UNLOCK(vp); 5000 5001 fail: 5002 fd_putfile(fd); 5003 *retval = error; 5004 return 0; 5005 } 5006 5007 /* 5008 * Deallocate backing store for a file, creating a hole. Also used for 5009 * invoking TRIM on disks. 5010 */ 5011 /* ARGSUSED */ 5012 int 5013 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 5014 register_t *retval) 5015 { 5016 /* { 5017 syscallarg(int) fd; 5018 syscallarg(off_t) pos; 5019 syscallarg(off_t) len; 5020 } */ 5021 int fd; 5022 off_t pos, len; 5023 struct file *fp; 5024 struct vnode *vp; 5025 int error; 5026 5027 fd = SCARG(uap, fd); 5028 pos = SCARG(uap, pos); 5029 len = SCARG(uap, len); 5030 5031 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 5032 return SET_ERROR(EINVAL); 5033 } 5034 5035 error = fd_getvnode(fd, &fp); 5036 if (error) { 5037 return error; 5038 } 5039 if ((fp->f_flag & FWRITE) == 0) { 5040 error = SET_ERROR(EBADF); 5041 goto fail; 5042 } 5043 vp = fp->f_vnode; 5044 5045 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 5046 if (vp->v_type == VDIR) { 5047 error = SET_ERROR(EISDIR); 5048 } else { 5049 error = VOP_FDISCARD(vp, pos, len); 5050 } 5051 VOP_UNLOCK(vp); 5052 5053 fail: 5054 fd_putfile(fd); 5055 return error; 5056 } 5057