1 /* $NetBSD: lfs_vnops.c,v 1.348 2025/11/06 15:45:32 perseant Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant (at) hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (c) 1986, 1989, 1991, 1993, 1995 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95 60 */ 61 62 /* from NetBSD: ufs_vnops.c,v 1.232 2016/05/19 18:32:03 riastradh Exp */ 63 /*- 64 * Copyright (c) 2008 The NetBSD Foundation, Inc. 65 * All rights reserved. 66 * 67 * This code is derived from software contributed to The NetBSD Foundation 68 * by Wasabi Systems, Inc. 69 * 70 * Redistribution and use in source and binary forms, with or without 71 * modification, are permitted provided that the following conditions 72 * are met: 73 * 1. Redistributions of source code must retain the above copyright 74 * notice, this list of conditions and the following disclaimer. 75 * 2. Redistributions in binary form must reproduce the above copyright 76 * notice, this list of conditions and the following disclaimer in the 77 * documentation and/or other materials provided with the distribution. 78 * 79 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 80 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 81 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 82 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 83 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 84 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 85 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 86 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 87 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 88 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 89 * POSSIBILITY OF SUCH DAMAGE. 90 */ 91 /* 92 * Copyright (c) 1982, 1986, 1989, 1993, 1995 93 * The Regents of the University of California. All rights reserved. 94 * (c) UNIX System Laboratories, Inc. 95 * All or some portions of this file are derived from material licensed 96 * to the University of California by American Telephone and Telegraph 97 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 98 * the permission of UNIX System Laboratories, Inc. 99 * 100 * Redistribution and use in source and binary forms, with or without 101 * modification, are permitted provided that the following conditions 102 * are met: 103 * 1. Redistributions of source code must retain the above copyright 104 * notice, this list of conditions and the following disclaimer. 105 * 2. Redistributions in binary form must reproduce the above copyright 106 * notice, this list of conditions and the following disclaimer in the 107 * documentation and/or other materials provided with the distribution. 108 * 3. Neither the name of the University nor the names of its contributors 109 * may be used to endorse or promote products derived from this software 110 * without specific prior written permission. 111 * 112 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 113 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 114 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 115 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 116 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 117 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 118 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 119 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 120 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 121 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 122 * SUCH DAMAGE. 123 * 124 * @(#)ufs_vnops.c 8.28 (Berkeley) 7/31/95 125 */ 126 127 #include <sys/cdefs.h> 128 __KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.348 2025/11/06 15:45:32 perseant Exp $"); 129 130 #ifdef _KERNEL_OPT 131 #include "opt_compat_netbsd.h" 132 #include "opt_uvm_page_trkown.h" 133 #endif 134 135 #include <sys/param.h> 136 #include <sys/systm.h> 137 #include <sys/namei.h> 138 #include <sys/resourcevar.h> 139 #include <sys/kernel.h> 140 #include <sys/file.h> 141 #include <sys/stat.h> 142 #include <sys/buf.h> 143 #include <sys/proc.h> 144 #include <sys/mount.h> 145 #include <sys/vnode.h> 146 #include <sys/pool.h> 147 #include <sys/signalvar.h> 148 #include <sys/kauth.h> 149 #include <sys/syslog.h> 150 151 #include <miscfs/fifofs/fifo.h> 152 #include <miscfs/genfs/genfs.h> 153 #include <miscfs/specfs/specdev.h> 154 155 #include <ufs/lfs/ulfs_inode.h> 156 #include <ufs/lfs/ulfsmount.h> 157 #include <ufs/lfs/ulfs_bswap.h> 158 #include <ufs/lfs/ulfs_extern.h> 159 160 #include <uvm/uvm_extern.h> 161 162 #include <ufs/lfs/lfs.h> 163 #include <ufs/lfs/lfs_accessors.h> 164 #include <ufs/lfs/lfs_kernel.h> 165 #include <ufs/lfs/lfs_extern.h> 166 167 extern kcondvar_t lfs_writerd_cv; 168 int lfs_ignore_lazy_sync = 1; 169 170 static int lfs_openextattr(void *v); 171 static int lfs_closeextattr(void *v); 172 static int lfs_getextattr(void *v); 173 static int lfs_setextattr(void *v); 174 static int lfs_listextattr(void *v); 175 static int lfs_deleteextattr(void *v); 176 177 static int lfs_makeinode(struct vattr *vap, struct vnode *, 178 const struct ulfs_lookup_results *, 179 struct vnode **, struct componentname *); 180 static int lfs_filestats(struct lfs *, ino_t, struct lfs_filestats *); 181 182 /* Global vfs data structures for lfs. */ 183 int (**lfs_vnodeop_p)(void *); 184 const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = { 185 { &vop_default_desc, vn_default_error }, 186 { &vop_parsepath_desc, genfs_parsepath }, /* parsepath */ 187 { &vop_lookup_desc, ulfs_lookup }, /* lookup */ 188 { &vop_create_desc, lfs_create }, /* create */ 189 { &vop_whiteout_desc, ulfs_whiteout }, /* whiteout */ 190 { &vop_mknod_desc, lfs_mknod }, /* mknod */ 191 { &vop_open_desc, ulfs_open }, /* open */ 192 { &vop_close_desc, lfs_close }, /* close */ 193 { &vop_access_desc, ulfs_access }, /* access */ 194 { &vop_accessx_desc, genfs_accessx }, /* accessx */ 195 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 196 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 197 { &vop_read_desc, lfs_read }, /* read */ 198 { &vop_write_desc, lfs_write }, /* write */ 199 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */ 200 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */ 201 { &vop_ioctl_desc, genfs_enoioctl }, /* ioctl */ 202 { &vop_fcntl_desc, lfs_fcntl }, /* fcntl */ 203 { &vop_poll_desc, genfs_poll }, /* poll */ 204 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */ 205 { &vop_revoke_desc, genfs_revoke }, /* revoke */ 206 { &vop_mmap_desc, lfs_mmap }, /* mmap */ 207 { &vop_fsync_desc, lfs_fsync }, /* fsync */ 208 { &vop_seek_desc, genfs_seek }, /* seek */ 209 { &vop_remove_desc, lfs_remove }, /* remove */ 210 { &vop_link_desc, lfs_link }, /* link */ 211 { &vop_rename_desc, lfs_rename }, /* rename */ 212 { &vop_mkdir_desc, lfs_mkdir }, /* mkdir */ 213 { &vop_rmdir_desc, lfs_rmdir }, /* rmdir */ 214 { &vop_symlink_desc, lfs_symlink }, /* symlink */ 215 { &vop_readdir_desc, ulfs_readdir }, /* readdir */ 216 { &vop_readlink_desc, ulfs_readlink }, /* readlink */ 217 { &vop_abortop_desc, genfs_abortop }, /* abortop */ 218 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 219 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 220 { &vop_lock_desc, genfs_lock }, /* lock */ 221 { &vop_unlock_desc, genfs_unlock }, /* unlock */ 222 { &vop_bmap_desc, ulfs_bmap }, /* bmap */ 223 { &vop_strategy_desc, lfs_strategy }, /* strategy */ 224 { &vop_print_desc, ulfs_print }, /* print */ 225 { &vop_islocked_desc, genfs_islocked }, /* islocked */ 226 { &vop_pathconf_desc, ulfs_pathconf }, /* pathconf */ 227 { &vop_advlock_desc, ulfs_advlock }, /* advlock */ 228 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 229 { &vop_getpages_desc, lfs_getpages }, /* getpages */ 230 { &vop_putpages_desc, lfs_putpages }, /* putpages */ 231 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */ 232 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */ 233 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */ 234 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */ 235 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */ 236 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */ 237 { NULL, NULL } 238 }; 239 const struct vnodeopv_desc lfs_vnodeop_opv_desc = 240 { &lfs_vnodeop_p, lfs_vnodeop_entries }; 241 242 int (**lfs_specop_p)(void *); 243 const struct vnodeopv_entry_desc lfs_specop_entries[] = { 244 { &vop_default_desc, vn_default_error }, 245 GENFS_SPECOP_ENTRIES, 246 { &vop_close_desc, lfsspec_close }, /* close */ 247 { &vop_access_desc, ulfs_access }, /* access */ 248 { &vop_accessx_desc, genfs_accessx }, /* accessx */ 249 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 250 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 251 { &vop_read_desc, ulfsspec_read }, /* read */ 252 { &vop_write_desc, ulfsspec_write }, /* write */ 253 { &vop_fcntl_desc, genfs_fcntl }, /* fcntl */ 254 { &vop_fsync_desc, spec_fsync }, /* fsync */ 255 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 256 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 257 { &vop_lock_desc, genfs_lock }, /* lock */ 258 { &vop_unlock_desc, genfs_unlock }, /* unlock */ 259 { &vop_print_desc, ulfs_print }, /* print */ 260 { &vop_islocked_desc, genfs_islocked }, /* islocked */ 261 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */ 262 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */ 263 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */ 264 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */ 265 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */ 266 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */ 267 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */ 268 { NULL, NULL } 269 }; 270 const struct vnodeopv_desc lfs_specop_opv_desc = 271 { &lfs_specop_p, lfs_specop_entries }; 272 273 int (**lfs_fifoop_p)(void *); 274 const struct vnodeopv_entry_desc lfs_fifoop_entries[] = { 275 { &vop_default_desc, vn_default_error }, 276 GENFS_FIFOOP_ENTRIES, 277 { &vop_close_desc, lfsfifo_close }, /* close */ 278 { &vop_access_desc, ulfs_access }, /* access */ 279 { &vop_accessx_desc, genfs_accessx }, /* accessx */ 280 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 281 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 282 { &vop_read_desc, ulfsfifo_read }, /* read */ 283 { &vop_write_desc, ulfsfifo_write }, /* write */ 284 { &vop_fcntl_desc, genfs_fcntl }, /* fcntl */ 285 { &vop_fsync_desc, vn_fifo_bypass }, /* fsync */ 286 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 287 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 288 { &vop_lock_desc, genfs_lock }, /* lock */ 289 { &vop_unlock_desc, genfs_unlock }, /* unlock */ 290 { &vop_strategy_desc, vn_fifo_bypass }, /* strategy */ 291 { &vop_print_desc, ulfs_print }, /* print */ 292 { &vop_islocked_desc, genfs_islocked }, /* islocked */ 293 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 294 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */ 295 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */ 296 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */ 297 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */ 298 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */ 299 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */ 300 { NULL, NULL } 301 }; 302 const struct vnodeopv_desc lfs_fifoop_opv_desc = 303 { &lfs_fifoop_p, lfs_fifoop_entries }; 304 305 #include <ufs/lfs/ulfs_readwrite.c> 306 307 /* 308 * Allocate a new inode. 309 */ 310 static int 311 lfs_makeinode(struct vattr *vap, struct vnode *dvp, 312 const struct ulfs_lookup_results *ulr, 313 struct vnode **vpp, struct componentname *cnp) 314 { 315 struct inode *ip; 316 struct vnode *tvp; 317 int error; 318 319 error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, NULL, &tvp); 320 if (error) 321 return error; 322 error = vn_lock(tvp, LK_EXCLUSIVE); 323 if (error) { 324 vrele(tvp); 325 return error; 326 } 327 MARK_VNODE(tvp); 328 *vpp = tvp; 329 ip = VTOI(tvp); 330 ip->i_state |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 331 ip->i_nlink = 1; 332 DIP_ASSIGN(ip, nlink, 1); 333 334 /* Authorize setting SGID if needed. */ 335 if (ip->i_mode & ISGID) { 336 error = kauth_authorize_vnode(cnp->cn_cred, 337 KAUTH_VNODE_WRITE_SECURITY, 338 tvp, NULL, genfs_can_chmod(tvp, cnp->cn_cred, ip->i_uid, 339 ip->i_gid, MAKEIMODE(vap->va_type, vap->va_mode))); 340 if (error) { 341 ip->i_mode &= ~ISGID; 342 DIP_ASSIGN(ip, mode, ip->i_mode); 343 } 344 } 345 346 if (cnp->cn_flags & ISWHITEOUT) { 347 ip->i_flags |= UF_OPAQUE; 348 DIP_ASSIGN(ip, flags, ip->i_flags); 349 } 350 351 /* 352 * Make sure inode goes to disk before directory entry. 353 */ 354 if ((error = lfs_update(tvp, NULL, NULL, UPDATE_DIROP)) != 0) 355 goto bad; 356 error = ulfs_direnter(dvp, ulr, tvp, 357 cnp, ip->i_number, LFS_IFTODT(ip->i_mode), NULL); 358 if (error) 359 goto bad; 360 *vpp = tvp; 361 cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_flags); 362 KASSERT(VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE); 363 return (0); 364 365 bad: 366 /* 367 * Write error occurred trying to update the inode 368 * or the directory so must deallocate the inode. 369 */ 370 ip->i_nlink = 0; 371 DIP_ASSIGN(ip, nlink, 0); 372 ip->i_state |= IN_CHANGE; 373 /* If IN_ADIROP, account for it */ 374 UNMARK_VNODE(tvp); 375 vput(tvp); 376 return (error); 377 } 378 379 /* 380 * Synch an open file. 381 */ 382 /* ARGSUSED */ 383 int 384 lfs_fsync(void *v) 385 { 386 struct vop_fsync_args /* { 387 struct vnode *a_vp; 388 kauth_cred_t a_cred; 389 int a_flags; 390 off_t offlo; 391 off_t offhi; 392 } */ *ap = v; 393 struct vnode *vp = ap->a_vp; 394 int wait; 395 struct inode *ip = VTOI(vp); 396 struct lfs *fs = ip->i_lfs; 397 int error = 0; 398 399 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 400 401 /* If we're mounted read-only, don't try to sync. */ 402 if (fs->lfs_ronly) 403 goto out; 404 405 /* If a removed vnode is being cleaned, no need to sync here. */ 406 if ((ap->a_flags & FSYNC_RECLAIM) != 0 && ip->i_mode == 0) 407 goto out; 408 409 /* 410 * Trickle sync simply adds this vnode to the pager list, as if 411 * the pagedaemon had requested a pageout. 412 */ 413 if (ap->a_flags & FSYNC_LAZY) { 414 if (lfs_ignore_lazy_sync == 0) { 415 mutex_enter(&lfs_lock); 416 if (!(ip->i_state & IN_PAGING)) { 417 ip->i_state |= IN_PAGING; 418 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, 419 i_lfs_pchain); 420 } 421 cv_broadcast(&lfs_writerd_cv); 422 mutex_exit(&lfs_lock); 423 } 424 goto out; 425 } 426 427 KASSERT(!(ap->a_flags & FSYNC_RECLAIM && ip->i_state & IN_CLEANING)); 428 429 wait = (ap->a_flags & FSYNC_WAIT); 430 do { 431 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 432 error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), 433 round_page(ap->a_offhi), 434 PGO_CLEANIT | (wait ? PGO_SYNCIO : 0)); 435 if (error == EAGAIN) { 436 mutex_enter(&lfs_lock); 437 mtsleep(&fs->lfs_availsleep, PCATCH | PUSER, 438 "lfs_fsync", hz / 100 + 1, &lfs_lock); 439 mutex_exit(&lfs_lock); 440 } 441 } while (error == EAGAIN); 442 if (error) 443 goto out; 444 445 if ((ap->a_flags & FSYNC_DATAONLY) == 0) 446 error = lfs_update(vp, NULL, NULL, wait ? UPDATE_WAIT : 0); 447 448 if (error == 0 && ap->a_flags & FSYNC_CACHE) { 449 int l = 0; 450 error = VOP_IOCTL(ip->i_devvp, DIOCCACHESYNC, &l, FWRITE, 451 curlwp->l_cred); 452 } 453 if (wait && !VPISEMPTY(vp)) 454 LFS_SET_UINO(ip, IN_MODIFIED); 455 456 out: 457 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 458 return error; 459 } 460 461 /* 462 * Take IN_ADIROP off, then call ulfs_inactive. 463 */ 464 int 465 lfs_inactive(void *v) 466 { 467 struct vop_inactive_v2_args /* { 468 struct vnode *a_vp; 469 bool *a_recycle; 470 } */ *ap = v; 471 struct inode *ip; 472 473 ip = VTOI(ap->a_vp); 474 KASSERT(VOP_ISLOCKED(ap->a_vp) == LK_EXCLUSIVE); 475 KASSERT(!(ip->i_state & IN_CLEANING)); 476 477 UNMARK_VNODE(ap->a_vp); 478 479 /* 480 * The Ifile is only ever inactivated on unmount. 481 * Streamline this process by not giving it more dirty blocks. 482 */ 483 if (ip->i_number == LFS_IFILE_INUM) { 484 mutex_enter(&lfs_lock); 485 LFS_CLR_UINO(ip, IN_ALLMOD); 486 mutex_exit(&lfs_lock); 487 return 0; 488 } 489 490 #ifdef DEBUG 491 /* 492 * This might happen on unmount. 493 * XXX If it happens at any other time, it should be a panic. 494 */ 495 if (ap->a_vp->v_uflag & VU_DIROP) { 496 printf("lfs_inactive: inactivating VU_DIROP? ino = %llu\n", 497 (unsigned long long) ip->i_number); 498 } 499 #endif /* DIAGNOSTIC */ 500 501 return ulfs_inactive(v); 502 } 503 504 int 505 lfs_set_dirop(struct vnode *dvp, struct vnode *vp) 506 { 507 struct lfs *fs; 508 int error; 509 510 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 511 KASSERT(vp == NULL || VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 512 513 fs = VTOI(dvp)->i_lfs; 514 515 ASSERT_NO_SEGLOCK(fs); 516 /* 517 * LFS_NRESERVE calculates direct and indirect blocks as well 518 * as an inode block; an overestimate in most cases. 519 */ 520 if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0) 521 return (error); 522 523 restart: 524 mutex_enter(&lfs_lock); 525 if (fs->lfs_dirops == 0) { 526 mutex_exit(&lfs_lock); 527 lfs_check(dvp, LFS_UNUSED_LBN, 0); 528 mutex_enter(&lfs_lock); 529 } 530 while (fs->lfs_writer) { 531 error = cv_wait_sig(&fs->lfs_diropscv, &lfs_lock); 532 if (error == EINTR) { 533 mutex_exit(&lfs_lock); 534 goto unreserve; 535 } 536 } 537 if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) { 538 cv_broadcast(&lfs_writerd_cv); 539 mutex_exit(&lfs_lock); 540 preempt(); 541 goto restart; 542 } 543 544 if (lfs_dirvcount > LFS_MAX_DIROP) { 545 DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, " 546 "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount)); 547 if ((error = mtsleep(&lfs_dirvcount, 548 PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0, 549 &lfs_lock)) != 0) { 550 mutex_exit(&lfs_lock); 551 goto unreserve; 552 } 553 mutex_exit(&lfs_lock); 554 goto restart; 555 } 556 557 ++fs->lfs_dirops; 558 /* fs->lfs_doifile = 1; */ /* XXX why? --ks */ 559 mutex_exit(&lfs_lock); 560 561 /* Hold a reference so SET_ENDOP will be happy */ 562 vref(dvp); 563 if (vp) { 564 vref(vp); 565 MARK_VNODE(vp); 566 } 567 568 MARK_VNODE(dvp); 569 return 0; 570 571 unreserve: 572 lfs_reserve(fs, dvp, vp, -LFS_NRESERVE(fs)); 573 return error; 574 } 575 576 /* 577 * Opposite of lfs_set_dirop... mostly. For now at least must call 578 * UNMARK_VNODE(dvp) explicitly first. (XXX: clean that up) 579 */ 580 void 581 lfs_unset_dirop(struct lfs *fs, struct vnode *dvp, const char *str) 582 { 583 mutex_enter(&lfs_lock); 584 --fs->lfs_dirops; 585 if (!fs->lfs_dirops) { 586 if (fs->lfs_nadirop) { 587 panic("lfs_unset_dirop: %s: no dirops but " 588 " nadirop=%d", str, 589 fs->lfs_nadirop); 590 } 591 wakeup(&fs->lfs_writer); 592 mutex_exit(&lfs_lock); 593 lfs_check(dvp, LFS_UNUSED_LBN, 0); 594 } else { 595 mutex_exit(&lfs_lock); 596 } 597 lfs_reserve(fs, dvp, NULL, -LFS_NRESERVE(fs)); 598 } 599 600 void 601 lfs_mark_vnode(struct vnode *vp) 602 { 603 struct inode *ip = VTOI(vp); 604 struct lfs *fs = ip->i_lfs; 605 606 mutex_enter(&lfs_lock); 607 if (!(ip->i_state & IN_ADIROP)) { 608 if (!(vp->v_uflag & VU_DIROP)) { 609 mutex_exit(&lfs_lock); 610 vref(vp); 611 mutex_enter(&lfs_lock); 612 ++lfs_dirvcount; 613 ++fs->lfs_dirvcount; 614 TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain); 615 vp->v_uflag |= VU_DIROP; 616 } 617 ++fs->lfs_nadirop; 618 ip->i_state &= ~IN_CDIROP; 619 ip->i_state |= IN_ADIROP; 620 } else 621 KASSERT(vp->v_uflag & VU_DIROP); 622 mutex_exit(&lfs_lock); 623 } 624 625 void 626 lfs_unmark_vnode(struct vnode *vp) 627 { 628 struct inode *ip = VTOI(vp); 629 630 mutex_enter(&lfs_lock); 631 if (ip && (ip->i_state & IN_ADIROP)) { 632 KASSERT(vp->v_uflag & VU_DIROP); 633 --ip->i_lfs->lfs_nadirop; 634 ip->i_state &= ~IN_ADIROP; 635 } 636 mutex_exit(&lfs_lock); 637 } 638 639 int 640 lfs_symlink(void *v) 641 { 642 struct vop_symlink_v3_args /* { 643 struct vnode *a_dvp; 644 struct vnode **a_vpp; 645 struct componentname *a_cnp; 646 struct vattr *a_vap; 647 char *a_target; 648 } */ *ap = v; 649 struct lfs *fs; 650 struct vnode *dvp, **vpp; 651 struct inode *ip; 652 struct ulfs_lookup_results *ulr; 653 ssize_t len; /* XXX should be size_t */ 654 int error; 655 656 dvp = ap->a_dvp; 657 vpp = ap->a_vpp; 658 659 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 660 KASSERT(vpp != NULL); 661 KASSERT(*vpp == NULL); 662 KASSERT(ap->a_vap->va_type == VLNK); 663 664 /* XXX should handle this material another way */ 665 ulr = &VTOI(ap->a_dvp)->i_crap; 666 ULFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp)); 667 668 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 669 ASSERT_NO_SEGLOCK(fs); 670 if (fs->lfs_ronly) { 671 return EROFS; 672 } 673 674 error = lfs_set_dirop(dvp, NULL); 675 if (error) 676 return error; 677 678 error = lfs_makeinode(ap->a_vap, dvp, ulr, vpp, ap->a_cnp); 679 if (error) { 680 goto out; 681 } 682 KASSERT(VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE); 683 684 ip = VTOI(*vpp); 685 686 /* 687 * This test is off by one. um_maxsymlinklen contains the 688 * number of bytes available, and we aren't storing a \0, so 689 * the test should properly be <=. However, it cannot be 690 * changed as this would break compatibility with existing fs 691 * images -- see the way ulfs_readlink() works. 692 */ 693 len = strlen(ap->a_target); 694 if (len < ip->i_lfs->um_maxsymlinklen) { 695 memcpy((char *)SHORTLINK(ip), ap->a_target, len); 696 ip->i_size = len; 697 DIP_ASSIGN(ip, size, len); 698 uvm_vnp_setsize(*vpp, ip->i_size); 699 ip->i_state |= IN_CHANGE | IN_UPDATE; 700 if ((*vpp)->v_mount->mnt_flag & MNT_RELATIME) 701 ip->i_state |= IN_ACCESS; 702 } else { 703 error = ulfs_bufio(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0, 704 IO_NODELOCKED | IO_JOURNALLOCKED, ap->a_cnp->cn_cred, NULL, 705 NULL); 706 } 707 708 VOP_UNLOCK(*vpp); 709 if (error) 710 vrele(*vpp); 711 712 out: 713 UNMARK_VNODE(dvp); 714 /* XXX: is it even possible for the symlink to get MARK'd? */ 715 UNMARK_VNODE(*vpp); 716 if (error) { 717 *vpp = NULL; 718 } 719 lfs_unset_dirop(fs, dvp, "symlink"); 720 721 vrele(dvp); 722 return (error); 723 } 724 725 int 726 lfs_mknod(void *v) 727 { 728 struct vop_mknod_v3_args /* { 729 struct vnode *a_dvp; 730 struct vnode **a_vpp; 731 struct componentname *a_cnp; 732 struct vattr *a_vap; 733 } */ *ap = v; 734 struct lfs *fs; 735 struct vnode *dvp, **vpp; 736 struct vattr *vap; 737 struct inode *ip; 738 int error; 739 ino_t ino; 740 struct ulfs_lookup_results *ulr; 741 742 dvp = ap->a_dvp; 743 vpp = ap->a_vpp; 744 vap = ap->a_vap; 745 746 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 747 KASSERT(vpp != NULL); 748 KASSERT(*vpp == NULL); 749 750 /* XXX should handle this material another way */ 751 ulr = &VTOI(dvp)->i_crap; 752 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 753 754 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 755 ASSERT_NO_SEGLOCK(fs); 756 if (fs->lfs_ronly) { 757 return EROFS; 758 } 759 760 error = lfs_set_dirop(dvp, NULL); 761 if (error) 762 return error; 763 764 error = lfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp); 765 766 /* Either way we're done with the dirop at this point */ 767 UNMARK_VNODE(dvp); 768 UNMARK_VNODE(*vpp); 769 lfs_unset_dirop(fs, dvp, "mknod"); 770 771 if (error) { 772 vrele(dvp); 773 *vpp = NULL; 774 return (error); 775 } 776 KASSERT(VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE); 777 778 ip = VTOI(*vpp); 779 ino = ip->i_number; 780 ip->i_state |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 781 782 /* 783 * Call fsync to write the vnode so that we don't have to deal with 784 * flushing it when it's marked VU_DIROP or reclaiming. 785 * 786 * XXX KS - If we can't flush we also can't call vgone(), so must 787 * return. But, that leaves this vnode in limbo, also not good. 788 * Can this ever happen (barring hardware failure)? 789 */ 790 if ((error = VOP_FSYNC(*vpp, NOCRED, FSYNC_WAIT, 0, 0)) != 0) { 791 panic("lfs_mknod: couldn't fsync (ino %llu)", 792 (unsigned long long) ino); 793 /* return (error); */ 794 } 795 796 vrele(dvp); 797 KASSERT(error == 0); 798 VOP_UNLOCK(*vpp); 799 return (0); 800 } 801 802 /* 803 * Create a regular file 804 */ 805 int 806 lfs_create(void *v) 807 { 808 struct vop_create_v3_args /* { 809 struct vnode *a_dvp; 810 struct vnode **a_vpp; 811 struct componentname *a_cnp; 812 struct vattr *a_vap; 813 } */ *ap = v; 814 struct lfs *fs; 815 struct vnode *dvp, **vpp; 816 struct vattr *vap; 817 struct ulfs_lookup_results *ulr; 818 int error; 819 820 dvp = ap->a_dvp; 821 vpp = ap->a_vpp; 822 vap = ap->a_vap; 823 824 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 825 KASSERT(vpp != NULL); 826 KASSERT(*vpp == NULL); 827 828 /* XXX should handle this material another way */ 829 ulr = &VTOI(dvp)->i_crap; 830 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 831 832 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 833 ASSERT_NO_SEGLOCK(fs); 834 if (fs->lfs_ronly) { 835 return EROFS; 836 } 837 838 error = lfs_set_dirop(dvp, NULL); 839 if (error) 840 return error; 841 842 error = lfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp); 843 if (error) { 844 goto out; 845 } 846 KASSERT(VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE); 847 VOP_UNLOCK(*vpp); 848 849 out: 850 851 UNMARK_VNODE(dvp); 852 UNMARK_VNODE(*vpp); 853 if (error) { 854 *vpp = NULL; 855 } 856 lfs_unset_dirop(fs, dvp, "create"); 857 858 vrele(dvp); 859 return (error); 860 } 861 862 int 863 lfs_mkdir(void *v) 864 { 865 struct vop_mkdir_v3_args /* { 866 struct vnode *a_dvp; 867 struct vnode **a_vpp; 868 struct componentname *a_cnp; 869 struct vattr *a_vap; 870 } */ *ap = v; 871 struct lfs *fs; 872 struct vnode *dvp, *tvp, **vpp; 873 struct inode *dp, *ip; 874 struct componentname *cnp; 875 struct vattr *vap; 876 struct ulfs_lookup_results *ulr; 877 struct buf *bp; 878 LFS_DIRHEADER *dirp; 879 int dirblksiz; 880 int error; 881 882 dvp = ap->a_dvp; 883 tvp = NULL; 884 vpp = ap->a_vpp; 885 cnp = ap->a_cnp; 886 vap = ap->a_vap; 887 888 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 889 890 dp = VTOI(dvp); 891 ip = NULL; 892 893 KASSERT(vap->va_type == VDIR); 894 KASSERT(vpp != NULL); 895 KASSERT(*vpp == NULL); 896 897 /* XXX should handle this material another way */ 898 ulr = &dp->i_crap; 899 ULFS_CHECK_CRAPCOUNTER(dp); 900 901 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 902 ASSERT_NO_SEGLOCK(fs); 903 if (fs->lfs_ronly) { 904 return EROFS; 905 } 906 907 if ((nlink_t)dp->i_nlink >= LINK_MAX) { 908 return EMLINK; 909 } 910 911 dirblksiz = fs->um_dirblksiz; 912 /* XXX dholland 20150911 I believe this to be true, but... */ 913 //KASSERT(dirblksiz == LFS_DIRBLKSIZ); 914 915 error = lfs_set_dirop(dvp, NULL); 916 if (error) 917 return error; 918 919 /* 920 * Must simulate part of lfs_makeinode here to acquire the inode, 921 * but not have it entered in the parent directory. The entry is 922 * made later after writing "." and ".." entries. 923 */ 924 error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, NULL, 925 ap->a_vpp); 926 if (error) 927 goto out; 928 929 error = vn_lock(*ap->a_vpp, LK_EXCLUSIVE); 930 if (error) { 931 vrele(*ap->a_vpp); 932 *ap->a_vpp = NULL; 933 goto out; 934 } 935 936 tvp = *ap->a_vpp; 937 MARK_VNODE(tvp); 938 ip = VTOI(tvp); 939 ip->i_state |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 940 ip->i_nlink = 2; 941 DIP_ASSIGN(ip, nlink, 2); 942 if (cnp->cn_flags & ISWHITEOUT) { 943 ip->i_flags |= UF_OPAQUE; 944 DIP_ASSIGN(ip, flags, ip->i_flags); 945 } 946 947 /* 948 * Bump link count in parent directory to reflect work done below. 949 */ 950 dp->i_nlink++; 951 DIP_ASSIGN(dp, nlink, dp->i_nlink); 952 dp->i_state |= IN_CHANGE; 953 if ((error = lfs_update(dvp, NULL, NULL, UPDATE_DIROP)) != 0) 954 goto bad; 955 956 /* 957 * Initialize directory with "." and "..". This used to use a 958 * static template but that adds moving parts for very little 959 * benefit. 960 */ 961 if ((error = lfs_balloc(tvp, (off_t)0, dirblksiz, cnp->cn_cred, 962 B_CLRBUF, &bp)) != 0) 963 goto bad; 964 ip->i_size = dirblksiz; 965 DIP_ASSIGN(ip, size, dirblksiz); 966 ip->i_state |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 967 uvm_vnp_setsize(tvp, ip->i_size); 968 dirp = bp->b_data; 969 970 /* . */ 971 lfs_dir_setino(fs, dirp, ip->i_number); 972 lfs_dir_setreclen(fs, dirp, LFS_DIRECTSIZ(fs, 1)); 973 lfs_dir_settype(fs, dirp, LFS_DT_DIR); 974 lfs_dir_setnamlen(fs, dirp, 1); 975 lfs_copydirname(fs, lfs_dir_nameptr(fs, dirp), ".", 1, 976 LFS_DIRECTSIZ(fs, 1)); 977 dirp = LFS_NEXTDIR(fs, dirp); 978 /* .. */ 979 lfs_dir_setino(fs, dirp, dp->i_number); 980 lfs_dir_setreclen(fs, dirp, dirblksiz - LFS_DIRECTSIZ(fs, 1)); 981 lfs_dir_settype(fs, dirp, LFS_DT_DIR); 982 lfs_dir_setnamlen(fs, dirp, 2); 983 lfs_copydirname(fs, lfs_dir_nameptr(fs, dirp), "..", 2, 984 dirblksiz - LFS_DIRECTSIZ(fs, 1)); 985 986 /* 987 * Directory set up; now install its entry in the parent directory. 988 */ 989 if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0) 990 goto bad; 991 if ((error = lfs_update(tvp, NULL, NULL, UPDATE_DIROP)) != 0) { 992 goto bad; 993 } 994 error = ulfs_direnter(dvp, ulr, tvp, 995 cnp, ip->i_number, LFS_IFTODT(ip->i_mode), bp); 996 bad: 997 if (error == 0) { 998 VOP_UNLOCK(tvp); 999 } else { 1000 dp->i_nlink--; 1001 DIP_ASSIGN(dp, nlink, dp->i_nlink); 1002 dp->i_state |= IN_CHANGE; 1003 /* 1004 * No need to do an explicit lfs_truncate here, vrele will 1005 * do this for us because we set the link count to 0. 1006 */ 1007 ip->i_nlink = 0; 1008 DIP_ASSIGN(ip, nlink, 0); 1009 ip->i_state |= IN_CHANGE; 1010 /* If IN_ADIROP, account for it */ 1011 UNMARK_VNODE(tvp); 1012 vput(tvp); 1013 } 1014 1015 out: 1016 UNMARK_VNODE(dvp); 1017 UNMARK_VNODE(*vpp); 1018 if (error) { 1019 *vpp = NULL; 1020 } 1021 lfs_unset_dirop(fs, dvp, "mkdir"); 1022 1023 vrele(dvp); 1024 return (error); 1025 } 1026 1027 int 1028 lfs_remove(void *v) 1029 { 1030 struct vop_remove_v3_args /* { 1031 struct vnode *a_dvp; 1032 struct vnode *a_vp; 1033 struct componentname *a_cnp; 1034 nlink_t ctx_vp_new_nlink; 1035 } */ *ap = v; 1036 struct vnode *dvp, *vp; 1037 struct inode *ip; 1038 int error; 1039 1040 dvp = ap->a_dvp; 1041 vp = ap->a_vp; 1042 1043 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 1044 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1045 1046 ip = VTOI(vp); 1047 if ((error = lfs_set_dirop(dvp, vp)) != 0) { 1048 if (dvp == vp) 1049 vrele(vp); 1050 else 1051 vput(vp); 1052 return error; 1053 } 1054 error = ulfs_remove(ap); 1055 if (ip->i_nlink == 0) 1056 lfs_orphan(ip->i_lfs, vp); 1057 1058 UNMARK_VNODE(dvp); 1059 if (ap->a_vp) { 1060 UNMARK_VNODE(ap->a_vp); 1061 } 1062 lfs_unset_dirop(ip->i_lfs, dvp, "remove"); 1063 vrele(dvp); 1064 if (ap->a_vp) { 1065 vrele(ap->a_vp); 1066 } 1067 1068 return (error); 1069 } 1070 1071 int 1072 lfs_rmdir(void *v) 1073 { 1074 struct vop_rmdir_v2_args /* { 1075 struct vnodeop_desc *a_desc; 1076 struct vnode *a_dvp; 1077 struct vnode *a_vp; 1078 struct componentname *a_cnp; 1079 } */ *ap = v; 1080 struct vnode *vp; 1081 struct inode *ip; 1082 int error; 1083 1084 vp = ap->a_vp; 1085 1086 KASSERT(VOP_ISLOCKED(ap->a_dvp) == LK_EXCLUSIVE); 1087 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1088 1089 ip = VTOI(vp); 1090 if ((error = lfs_set_dirop(ap->a_dvp, ap->a_vp)) != 0) { 1091 if (ap->a_dvp == vp) 1092 vrele(vp); 1093 else 1094 vput(vp); 1095 return error; 1096 } 1097 error = ulfs_rmdir(ap); 1098 if (ip->i_nlink == 0) 1099 lfs_orphan(ip->i_lfs, vp); 1100 1101 UNMARK_VNODE(ap->a_dvp); 1102 if (ap->a_vp) { 1103 UNMARK_VNODE(ap->a_vp); 1104 } 1105 lfs_unset_dirop(ip->i_lfs, ap->a_dvp, "rmdir"); 1106 vrele(ap->a_dvp); 1107 if (ap->a_vp) { 1108 vrele(ap->a_vp); 1109 } 1110 1111 return (error); 1112 } 1113 1114 int 1115 lfs_link(void *v) 1116 { 1117 struct vop_link_v2_args /* { 1118 struct vnode *a_dvp; 1119 struct vnode *a_vp; 1120 struct componentname *a_cnp; 1121 } */ *ap = v; 1122 struct lfs *fs; 1123 struct vnode *dvp, *vp; 1124 int error; 1125 1126 dvp = ap->a_dvp; 1127 vp = ap->a_vp; 1128 1129 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 1130 1131 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 1132 ASSERT_NO_SEGLOCK(fs); 1133 if (fs->lfs_ronly) { 1134 return EROFS; 1135 } 1136 1137 error = vn_lock(vp, LK_EXCLUSIVE); 1138 if (error) 1139 return error; 1140 error = lfs_set_dirop(dvp, vp); 1141 VOP_UNLOCK(vp); 1142 if (error) 1143 return error; 1144 1145 error = ulfs_link(ap); 1146 1147 UNMARK_VNODE(vp); 1148 UNMARK_VNODE(dvp); 1149 lfs_unset_dirop(fs, dvp, "link"); 1150 vrele(vp); 1151 vrele(dvp); 1152 1153 return (error); 1154 } 1155 1156 /* XXX hack to avoid calling ITIMES in getattr */ 1157 int 1158 lfs_getattr(void *v) 1159 { 1160 struct vop_getattr_args /* { 1161 struct vnode *a_vp; 1162 struct vattr *a_vap; 1163 kauth_cred_t a_cred; 1164 } */ *ap = v; 1165 struct vnode *vp = ap->a_vp; 1166 struct inode *ip; 1167 struct vattr *vap = ap->a_vap; 1168 struct lfs *fs; 1169 1170 KASSERT(VOP_ISLOCKED(vp)); 1171 1172 ip = VTOI(vp); 1173 fs = ip->i_lfs; 1174 1175 /* 1176 * Copy from inode table 1177 */ 1178 vap->va_fsid = ip->i_dev; 1179 vap->va_fileid = ip->i_number; 1180 vap->va_mode = ip->i_mode & ~LFS_IFMT; 1181 vap->va_nlink = ip->i_nlink; 1182 vap->va_uid = ip->i_uid; 1183 vap->va_gid = ip->i_gid; 1184 switch (vp->v_type) { 1185 case VBLK: 1186 case VCHR: 1187 vap->va_rdev = (dev_t)lfs_dino_getrdev(fs, ip->i_din); 1188 break; 1189 default: 1190 vap->va_rdev = NODEV; 1191 break; 1192 } 1193 vap->va_size = vp->v_size; 1194 vap->va_atime.tv_sec = lfs_dino_getatime(fs, ip->i_din); 1195 vap->va_atime.tv_nsec = lfs_dino_getatimensec(fs, ip->i_din); 1196 vap->va_mtime.tv_sec = lfs_dino_getmtime(fs, ip->i_din); 1197 vap->va_mtime.tv_nsec = lfs_dino_getmtimensec(fs, ip->i_din); 1198 vap->va_ctime.tv_sec = lfs_dino_getctime(fs, ip->i_din); 1199 vap->va_ctime.tv_nsec = lfs_dino_getctimensec(fs, ip->i_din); 1200 vap->va_flags = ip->i_flags; 1201 vap->va_gen = ip->i_gen; 1202 /* this doesn't belong here */ 1203 if (vp->v_type == VBLK) 1204 vap->va_blocksize = BLKDEV_IOSIZE; 1205 else if (vp->v_type == VCHR) 1206 vap->va_blocksize = MAXBSIZE; 1207 else 1208 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 1209 vap->va_bytes = lfs_fsbtob(fs, ip->i_lfs_effnblks); 1210 vap->va_type = vp->v_type; 1211 vap->va_filerev = ip->i_modrev; 1212 return (0); 1213 } 1214 1215 /* 1216 * Check to make sure the inode blocks won't choke the buffer 1217 * cache, then call ulfs_setattr as usual. 1218 */ 1219 int 1220 lfs_setattr(void *v) 1221 { 1222 struct vop_setattr_args /* { 1223 struct vnode *a_vp; 1224 struct vattr *a_vap; 1225 kauth_cred_t a_cred; 1226 } */ *ap = v; 1227 struct vnode *vp = ap->a_vp; 1228 1229 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1230 lfs_check(vp, LFS_UNUSED_LBN, 0); 1231 return ulfs_setattr(v); 1232 } 1233 1234 /* 1235 * Release the block we hold on lfs_newseg wrapping. Called on file close, 1236 * or explicitly from LFCNWRAPGO. Called with the interlock held. 1237 */ 1238 static int 1239 lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor) 1240 { 1241 if (fs->lfs_stoplwp != curlwp) 1242 return EBUSY; 1243 1244 fs->lfs_stoplwp = NULL; 1245 cv_signal(&fs->lfs_stopcv); 1246 1247 KASSERT(fs->lfs_nowrap > 0); 1248 if (fs->lfs_nowrap <= 0) { 1249 return 0; 1250 } 1251 1252 if (--fs->lfs_nowrap == 0) { 1253 log(LOG_NOTICE, "%s: re-enabled log wrap\n", 1254 lfs_sb_getfsmnt(fs)); 1255 wakeup(&fs->lfs_wrappass); 1256 lfs_wakeup_cleaner(fs); 1257 } 1258 if (waitfor) { 1259 cv_wait_sig(&fs->lfs_nextsegsleep, &lfs_lock); 1260 } 1261 1262 return 0; 1263 } 1264 1265 /* 1266 * Close called. 1267 * 1268 * Update the times on the inode. 1269 */ 1270 /* ARGSUSED */ 1271 int 1272 lfs_close(void *v) 1273 { 1274 struct vop_close_args /* { 1275 struct vnode *a_vp; 1276 int a_fflag; 1277 kauth_cred_t a_cred; 1278 } */ *ap = v; 1279 struct vnode *vp = ap->a_vp; 1280 struct inode *ip; 1281 struct lfs *fs; 1282 1283 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1284 1285 ip = VTOI(vp); 1286 fs = ip->i_lfs; 1287 1288 if ((ip->i_number == ULFS_ROOTINO || ip->i_number == LFS_IFILE_INUM) && 1289 fs->lfs_stoplwp == curlwp) { 1290 mutex_enter(&lfs_lock); 1291 log(LOG_NOTICE, "lfs_close: releasing log wrap control\n"); 1292 lfs_wrapgo(fs, ip, 0); 1293 mutex_exit(&lfs_lock); 1294 } 1295 1296 /* When closing an anonymous file, maybe mark it IN_DEAD */ 1297 if (ip->i_nlink == 0) 1298 lfs_orphan(fs, vp); 1299 1300 if (vp == ip->i_lfs->lfs_ivnode && 1301 vp->v_mount->mnt_iflag & IMNT_UNMOUNT) 1302 return 0; 1303 1304 if (vrefcnt(vp) > 1 && vp != ip->i_lfs->lfs_ivnode) { 1305 LFS_ITIMES(ip, NULL, NULL, NULL); 1306 } 1307 return (0); 1308 } 1309 1310 /* 1311 * Close wrapper for special devices. 1312 * 1313 * Update the times on the inode then do device close. 1314 */ 1315 int 1316 lfsspec_close(void *v) 1317 { 1318 struct vop_close_args /* { 1319 struct vnode *a_vp; 1320 int a_fflag; 1321 kauth_cred_t a_cred; 1322 } */ *ap = v; 1323 struct vnode *vp; 1324 struct inode *ip; 1325 1326 vp = ap->a_vp; 1327 1328 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1329 1330 ip = VTOI(vp); 1331 if (vrefcnt(vp) > 1) { 1332 LFS_ITIMES(ip, NULL, NULL, NULL); 1333 } 1334 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); 1335 } 1336 1337 /* 1338 * Close wrapper for fifo's. 1339 * 1340 * Update the times on the inode then do device close. 1341 */ 1342 int 1343 lfsfifo_close(void *v) 1344 { 1345 struct vop_close_args /* { 1346 struct vnode *a_vp; 1347 int a_fflag; 1348 kauth_cred_ a_cred; 1349 } */ *ap = v; 1350 struct vnode *vp; 1351 struct inode *ip; 1352 1353 vp = ap->a_vp; 1354 1355 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1356 1357 ip = VTOI(vp); 1358 if (vrefcnt(ap->a_vp) > 1) { 1359 LFS_ITIMES(ip, NULL, NULL, NULL); 1360 } 1361 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); 1362 } 1363 1364 /* 1365 * Reclaim an inode so that it can be used for other purposes. 1366 */ 1367 1368 int 1369 lfs_reclaim(void *v) 1370 { 1371 struct vop_reclaim_v2_args /* { 1372 struct vnode *a_vp; 1373 } */ *ap = v; 1374 struct vnode *vp = ap->a_vp; 1375 struct inode *ip; 1376 struct lfs *fs; 1377 int error; 1378 1379 VOP_UNLOCK(vp); 1380 1381 ip = VTOI(vp); 1382 fs = ip->i_lfs; 1383 1384 KASSERT(!(ip->i_state & IN_CLEANING)); 1385 1386 /* 1387 * The inode must be freed and updated before being removed 1388 * from its hash chain. Other threads trying to gain a hold 1389 * or lock on the inode will be stalled. 1390 */ 1391 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1392 lfs_vfree(vp, ip->i_number, ip->i_omode); 1393 1394 mutex_enter(&lfs_lock); 1395 LFS_CLR_UINO(ip, IN_ALLMOD); 1396 mutex_exit(&lfs_lock); 1397 if ((error = ulfs_reclaim(vp))) 1398 return (error); 1399 1400 /* 1401 * Take us off the paging and/or dirop queues if we were on them. 1402 * We shouldn't be on them. 1403 */ 1404 mutex_enter(&lfs_lock); 1405 if (ip->i_state & IN_PAGING) { 1406 log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n", 1407 lfs_sb_getfsmnt(fs)); 1408 ip->i_state &= ~IN_PAGING; 1409 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 1410 } 1411 if (vp->v_uflag & VU_DIROP) 1412 panic("reclaimed vnode is VU_DIROP"); 1413 mutex_exit(&lfs_lock); 1414 1415 pool_put(&lfs_dinode_pool, ip->i_din); 1416 lfs_deregister_all(vp); 1417 pool_put(&lfs_inoext_pool, ip->inode_ext.lfs); 1418 ip->inode_ext.lfs = NULL; 1419 genfs_node_destroy(vp); 1420 pool_put(&lfs_inode_pool, vp->v_data); 1421 vp->v_data = NULL; 1422 return (0); 1423 } 1424 1425 /* 1426 * Read a block from a storage device. 1427 * 1428 * Calculate the logical to physical mapping if not done already, 1429 * then call the device strategy routine. 1430 * 1431 * In order to avoid reading blocks that are in the process of being 1432 * written by the cleaner---and hence are not mutexed by the normal 1433 * buffer cache / page cache mechanisms---check for collisions before 1434 * reading. 1435 * 1436 * We inline ulfs_strategy to make sure that the VOP_BMAP occurs *before* 1437 * the active cleaner test. 1438 * 1439 * XXX This code assumes that lfs_markv makes synchronous checkpoints. 1440 */ 1441 int 1442 lfs_strategy(void *v) 1443 { 1444 struct vop_strategy_args /* { 1445 struct vnode *a_vp; 1446 struct buf *a_bp; 1447 } */ *ap = v; 1448 struct buf *bp; 1449 struct lfs *fs; 1450 struct vnode *vp; 1451 struct inode *ip; 1452 daddr_t tbn; 1453 #define MAXLOOP 25 1454 int i, sn, error, slept, loopcount; 1455 1456 bp = ap->a_bp; 1457 vp = ap->a_vp; 1458 ip = VTOI(vp); 1459 fs = ip->i_lfs; 1460 1461 /* lfs uses its strategy routine only for read */ 1462 KASSERT(bp->b_flags & B_READ); 1463 1464 if (vp->v_type == VBLK || vp->v_type == VCHR) 1465 panic("lfs_strategy: spec"); 1466 KASSERT(bp->b_bcount != 0); 1467 if (bp->b_blkno == bp->b_lblkno) { 1468 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, 1469 NULL); 1470 if (error) { 1471 bp->b_error = error; 1472 bp->b_resid = bp->b_bcount; 1473 biodone(bp); 1474 return (error); 1475 } 1476 if ((long)bp->b_blkno == -1) /* no valid data */ 1477 clrbuf(bp); 1478 } 1479 if ((long)bp->b_blkno < 0) { /* block is not on disk */ 1480 bp->b_resid = bp->b_bcount; 1481 biodone(bp); 1482 return (0); 1483 } 1484 1485 slept = 1; 1486 loopcount = 0; 1487 mutex_enter(&lfs_lock); 1488 while (slept && fs->lfs_seglock) { 1489 mutex_exit(&lfs_lock); 1490 /* 1491 * Look through list of intervals. 1492 * There will only be intervals to look through 1493 * if the cleaner holds the seglock. 1494 * Since the cleaner is synchronous, we can trust 1495 * the list of intervals to be current. 1496 */ 1497 tbn = LFS_DBTOFSB(fs, bp->b_blkno); 1498 sn = lfs_dtosn(fs, tbn); 1499 slept = 0; 1500 for (i = 0; i < fs->lfs_cleanind; i++) { 1501 if (sn == lfs_dtosn(fs, fs->lfs_cleanint[i]) && 1502 tbn >= fs->lfs_cleanint[i]) { 1503 DLOG((DLOG_CLEAN, 1504 "lfs_strategy: ino %llu lbn %" PRId64 1505 " ind %d sn %d fsb %" PRIx64 1506 " given sn %d fsb %" PRIx64 "\n", 1507 (unsigned long long) ip->i_number, 1508 bp->b_lblkno, i, 1509 lfs_dtosn(fs, fs->lfs_cleanint[i]), 1510 fs->lfs_cleanint[i], sn, tbn)); 1511 DLOG((DLOG_CLEAN, 1512 "lfs_strategy: sleeping on ino %llu lbn %" 1513 PRId64 "\n", 1514 (unsigned long long) ip->i_number, 1515 bp->b_lblkno)); 1516 mutex_enter(&lfs_lock); 1517 if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) { 1518 /* 1519 * Cleaner can't wait for itself. 1520 * Instead, wait for the blocks 1521 * to be written to disk. 1522 * XXX we need pribio in the test 1523 * XXX here. 1524 */ 1525 mtsleep(&fs->lfs_iocount, 1526 (PRIBIO + 1) | PNORELOCK, 1527 "clean2", hz/10 + 1, 1528 &lfs_lock); 1529 slept = 1; 1530 ++loopcount; 1531 break; 1532 } else if (fs->lfs_seglock) { 1533 mtsleep(&fs->lfs_seglock, 1534 (PRIBIO + 1) | PNORELOCK, 1535 "clean1", 0, 1536 &lfs_lock); 1537 slept = 1; 1538 break; 1539 } 1540 mutex_exit(&lfs_lock); 1541 } 1542 } 1543 mutex_enter(&lfs_lock); 1544 if (loopcount > MAXLOOP) { 1545 printf("lfs_strategy: breaking out of clean2 loop\n"); 1546 break; 1547 } 1548 } 1549 mutex_exit(&lfs_lock); 1550 1551 vp = ip->i_devvp; 1552 return VOP_STRATEGY(vp, bp); 1553 } 1554 1555 /* 1556 * Inline lfs_segwrite/lfs_writevnodes, but just for dirops. 1557 * Technically this is a checkpoint (the on-disk state is valid) 1558 * even though we are leaving out all the file data. 1559 */ 1560 int 1561 lfs_flush_dirops(struct lfs *fs) 1562 { 1563 struct inode *ip, *marker; 1564 struct vnode *vp; 1565 extern int lfs_dostats; /* XXX this does not belong here */ 1566 struct segment *sp; 1567 SEGSUM *ssp; 1568 int flags = 0; 1569 int error = 0; 1570 1571 ASSERT_MAYBE_SEGLOCK(fs); 1572 KASSERT(fs->lfs_nadirop == 0); /* stable during lfs_writer */ 1573 KASSERT(fs->lfs_dirops == 0); /* stable during lfs_writer */ 1574 1575 if (fs->lfs_ronly) 1576 return EROFS; 1577 1578 mutex_enter(&lfs_lock); 1579 if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) { 1580 mutex_exit(&lfs_lock); 1581 return 0; 1582 } else 1583 mutex_exit(&lfs_lock); 1584 1585 if (lfs_dostats) 1586 ++lfs_stats.flush_invoked; 1587 1588 marker = pool_get(&lfs_inode_pool, PR_WAITOK); 1589 memset(marker, 0, sizeof(*marker)); 1590 marker->inode_ext.lfs = pool_get(&lfs_inoext_pool, PR_WAITOK); 1591 memset(marker->inode_ext.lfs, 0, sizeof(*marker->inode_ext.lfs)); 1592 marker->i_state = IN_MARKER; 1593 1594 lfs_imtime(fs); 1595 lfs_seglock(fs, flags); 1596 sp = fs->lfs_sp; 1597 1598 /* 1599 * lfs_writevnodes, optimized to get dirops out of the way. 1600 * Only write dirops, and don't flush files' pages, only 1601 * blocks from the directories. 1602 * 1603 * We don't need to vref these files because they are 1604 * dirops and so hold an extra reference until the 1605 * segunlock clears them of that status. 1606 * 1607 * We don't need to check for IN_ADIROP because we know that 1608 * no dirops are active. 1609 * 1610 */ 1611 mutex_enter(&lfs_lock); 1612 KASSERT(fs->lfs_writer); 1613 TAILQ_INSERT_HEAD(&fs->lfs_dchainhd, marker, i_lfs_dchain); 1614 while ((ip = TAILQ_NEXT(marker, i_lfs_dchain)) != NULL) { 1615 TAILQ_REMOVE(&fs->lfs_dchainhd, marker, i_lfs_dchain); 1616 TAILQ_INSERT_AFTER(&fs->lfs_dchainhd, ip, marker, 1617 i_lfs_dchain); 1618 if (ip->i_state & IN_MARKER) 1619 continue; 1620 vp = ITOV(ip); 1621 1622 /* 1623 * Prevent the vnode from going away if it's just been 1624 * put out in the segment and lfs_unmark_dirop is about 1625 * to release it. While it is on the list it is always 1626 * referenced, so it cannot be reclaimed until we 1627 * release it. 1628 */ 1629 vref(vp); 1630 1631 /* 1632 * Since we hold lfs_writer, the node can't be in an 1633 * active dirop. Since it's on the list and we hold a 1634 * reference to it, it can't be reclaimed now. 1635 */ 1636 KASSERT((ip->i_state & IN_ADIROP) == 0); 1637 KASSERT(vp->v_uflag & VU_DIROP); 1638 1639 /* 1640 * After we release lfs_lock, if we were in the middle 1641 * of writing a segment, lfs_unmark_dirop may end up 1642 * clearing VU_DIROP, and we have no way to stop it. 1643 * That should be OK -- we'll just have less to do 1644 * here. 1645 */ 1646 mutex_exit(&lfs_lock); 1647 1648 /* 1649 * All writes to directories come from dirops; all 1650 * writes to files' direct blocks go through the page 1651 * cache, which we're not touching. Reads to files 1652 * and/or directories will not be affected by writing 1653 * directory blocks inodes and file inodes. So we don't 1654 * really need to lock. 1655 */ 1656 if (vp->v_type != VREG && 1657 ((ip->i_state & IN_ALLMOD) || !VPISEMPTY(vp))) { 1658 error = lfs_writefile(fs, sp, vp); 1659 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1660 !(ip->i_state & IN_ALLMOD)) { 1661 mutex_enter(&lfs_lock); 1662 LFS_SET_UINO(ip, IN_MODIFIED); 1663 mutex_exit(&lfs_lock); 1664 } 1665 if (error && (sp->seg_flags & SEGM_SINGLE)) { 1666 vrele(vp); 1667 mutex_enter(&lfs_lock); 1668 error = EAGAIN; 1669 break; 1670 } 1671 } 1672 KASSERT(ip->i_number != LFS_IFILE_INUM); 1673 error = lfs_writeinode(fs, sp, ip); 1674 if (error && (sp->seg_flags & SEGM_SINGLE)) { 1675 vrele(vp); 1676 mutex_enter(&lfs_lock); 1677 error = EAGAIN; 1678 break; 1679 } 1680 1681 /* 1682 * We might need to update files' inodes again; 1683 * for example, if they have data blocks to write. 1684 * Make sure that after this flush, they are still 1685 * marked IN_MODIFIED so that we don't forget to 1686 * write them. 1687 */ 1688 mutex_enter(&lfs_lock); 1689 if (vp->v_type == VREG) 1690 LFS_SET_UINO(ip, IN_MODIFIED); 1691 mutex_exit(&lfs_lock); 1692 1693 vrele(vp); 1694 mutex_enter(&lfs_lock); 1695 } 1696 TAILQ_REMOVE(&fs->lfs_dchainhd, marker, i_lfs_dchain); 1697 mutex_exit(&lfs_lock); 1698 1699 /* We've written all the dirops there are */ 1700 ssp = (SEGSUM *)sp->segsum; 1701 lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) & ~(SS_CONT)); 1702 lfs_finalize_fs_seguse(fs); 1703 (void) lfs_writeseg(fs, sp); 1704 lfs_segunlock(fs); 1705 1706 pool_put(&lfs_inoext_pool, marker->inode_ext.lfs); 1707 pool_put(&lfs_inode_pool, marker); 1708 1709 return error; 1710 } 1711 1712 /* 1713 * Flush all vnodes for which the pagedaemon has requested pageouts. 1714 * Skip over any files that are marked VU_DIROP (since lfs_flush_dirop() 1715 * has just run, this would be an error). If we have to skip a vnode 1716 * for any reason, just skip it; if we have to wait for the cleaner, 1717 * abort. The writer daemon will call us again later. 1718 */ 1719 int 1720 lfs_flush_pchain(struct lfs *fs) 1721 { 1722 struct inode *ip, *nip; 1723 struct vnode *vp; 1724 extern int lfs_dostats; 1725 struct segment *sp; 1726 int error, error2; 1727 1728 ASSERT_NO_SEGLOCK(fs); 1729 KASSERT(fs->lfs_writer); 1730 1731 if (fs->lfs_ronly) 1732 return EROFS; 1733 1734 mutex_enter(&lfs_lock); 1735 if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) { 1736 mutex_exit(&lfs_lock); 1737 return 0; 1738 } else 1739 mutex_exit(&lfs_lock); 1740 1741 /* Get dirops out of the way */ 1742 if ((error = lfs_flush_dirops(fs)) != 0) 1743 return error; 1744 1745 if (lfs_dostats) 1746 ++lfs_stats.flush_invoked; 1747 1748 /* 1749 * Inline lfs_segwrite/lfs_writevnodes, but just for pageouts. 1750 */ 1751 lfs_imtime(fs); 1752 lfs_seglock(fs, 0); 1753 sp = fs->lfs_sp; 1754 1755 /* 1756 * lfs_writevnodes, optimized to clear pageout requests. 1757 * Only write non-dirop files that are in the pageout queue. 1758 * We're very conservative about what we write; we want to be 1759 * fast and async. 1760 */ 1761 mutex_enter(&lfs_lock); 1762 top: 1763 for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) { 1764 struct mount *mp = ITOV(ip)->v_mount; 1765 ino_t ino = ip->i_number; 1766 1767 nip = TAILQ_NEXT(ip, i_lfs_pchain); 1768 1769 if (!(ip->i_state & IN_PAGING)) 1770 goto top; 1771 1772 mutex_exit(&lfs_lock); 1773 if (vcache_get(mp, &ino, sizeof(ino), &vp) != 0) { 1774 mutex_enter(&lfs_lock); 1775 continue; 1776 }; 1777 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { 1778 vrele(vp); 1779 mutex_enter(&lfs_lock); 1780 continue; 1781 } 1782 ip = VTOI(vp); 1783 mutex_enter(&lfs_lock); 1784 if ((vp->v_uflag & VU_DIROP) != 0 || vp->v_type != VREG || 1785 !(ip->i_state & IN_PAGING)) { 1786 mutex_exit(&lfs_lock); 1787 vput(vp); 1788 mutex_enter(&lfs_lock); 1789 goto top; 1790 } 1791 mutex_exit(&lfs_lock); 1792 1793 error = lfs_writefile(fs, sp, vp); 1794 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1795 !(ip->i_state & IN_ALLMOD)) { 1796 mutex_enter(&lfs_lock); 1797 LFS_SET_UINO(ip, IN_MODIFIED); 1798 mutex_exit(&lfs_lock); 1799 } 1800 KASSERT(ip->i_number != LFS_IFILE_INUM); 1801 error2 = lfs_writeinode(fs, sp, ip); 1802 1803 VOP_UNLOCK(vp); 1804 vrele(vp); 1805 1806 if (error == EAGAIN || error2 == EAGAIN) { 1807 lfs_writeseg(fs, sp); 1808 mutex_enter(&lfs_lock); 1809 break; 1810 } 1811 mutex_enter(&lfs_lock); 1812 } 1813 mutex_exit(&lfs_lock); 1814 (void) lfs_writeseg(fs, sp); 1815 lfs_segunlock(fs); 1816 1817 return 0; 1818 } 1819 1820 /* 1821 * Conversion for compat. 1822 */ 1823 static void 1824 block_info_from_70(BLOCK_INFO *bi, const BLOCK_INFO_70 *bi70) 1825 { 1826 bi->bi_inode = bi70->bi_inode; 1827 bi->bi_lbn = bi70->bi_lbn; 1828 bi->bi_daddr = bi70->bi_daddr; 1829 bi->bi_segcreate = bi70->bi_segcreate; 1830 bi->bi_version = bi70->bi_version; 1831 bi->bi_bp = bi70->bi_bp; 1832 bi->bi_size = bi70->bi_size; 1833 } 1834 1835 static void 1836 block_info_to_70(BLOCK_INFO_70 *bi70, const BLOCK_INFO *bi) 1837 { 1838 bi70->bi_inode = bi->bi_inode; 1839 bi70->bi_lbn = bi->bi_lbn; 1840 bi70->bi_daddr = bi->bi_daddr; 1841 bi70->bi_segcreate = bi->bi_segcreate; 1842 bi70->bi_version = bi->bi_version; 1843 bi70->bi_bp = bi->bi_bp; 1844 bi70->bi_size = bi->bi_size; 1845 } 1846 1847 /* 1848 * Provide a fcntl interface to sys_lfs_{segwait,bmapv,markv}. 1849 */ 1850 int 1851 lfs_fcntl(void *v) 1852 { 1853 struct vop_fcntl_args /* { 1854 struct vnode *a_vp; 1855 u_int a_command; 1856 void * a_data; 1857 int a_fflag; 1858 kauth_cred_t a_cred; 1859 } */ *ap = v; 1860 struct timeval tv; 1861 struct timeval *tvp; 1862 struct timeval50 *tvp50; 1863 BLOCK_INFO *blkiov; 1864 BLOCK_INFO_70 *blkiov70; 1865 CLEANERINFO *cip; 1866 CLEANERINFO64 ci; 1867 SEGUSE *sup, *sua; 1868 int blkcnt, i, error; 1869 size_t fh_size, size; 1870 struct lfs_fcntl_markv blkvp; 1871 struct lfs_fcntl_markv_70 blkvp70; 1872 struct lfs_inode_array inotbl; 1873 struct lfs_segnum_array snap; 1874 struct lfs_filestat_req lfr; 1875 struct lfs_write_stats lws; 1876 struct lfs_filestats *fss; 1877 struct lfs_seguse_array suap; 1878 struct lfs_autoclean_params params; 1879 struct lwp *l; 1880 fsid_t *fsidp; 1881 struct lfs *fs; 1882 struct buf *bp; 1883 fhandle_t *fhp; 1884 daddr_t off; 1885 int oclean, *sna, direct, offset; 1886 ino_t *inoa; 1887 bool scramble; 1888 ino_t maxino; 1889 1890 /* Only respect LFS fcntls on fs root or Ifile */ 1891 if (VTOI(ap->a_vp)->i_number != ULFS_ROOTINO && 1892 VTOI(ap->a_vp)->i_number != LFS_IFILE_INUM) { 1893 return genfs_fcntl(v); 1894 } 1895 1896 /* Avoid locking a draining lock */ 1897 if (ap->a_vp->v_mount->mnt_iflag & IMNT_UNMOUNT) { 1898 return ESHUTDOWN; 1899 } 1900 1901 /* LFS control and monitoring fcntls are available only to root */ 1902 l = curlwp; 1903 if (((ap->a_command & 0xff00) >> 8) == 'L' && 1904 (error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS, 1905 KAUTH_REQ_SYSTEM_LFS_FCNTL, NULL, NULL, NULL)) != 0) 1906 return (error); 1907 1908 fs = VTOI(ap->a_vp)->i_lfs; 1909 fsidp = &ap->a_vp->v_mount->mnt_stat.f_fsidx; 1910 1911 maxino = ((VTOI(fs->lfs_ivnode)->i_size >> lfs_sb_getbshift(fs)) 1912 - lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) 1913 * lfs_sb_getifpb(fs); 1914 1915 error = 0; 1916 switch ((int)ap->a_command) { 1917 case LFCNSEGWAITALL_COMPAT_50: 1918 case LFCNSEGWAITALL_COMPAT: 1919 fsidp = NULL; 1920 /* FALLTHROUGH */ 1921 case LFCNSEGWAIT_COMPAT_50: 1922 case LFCNSEGWAIT_COMPAT: 1923 tvp50 = (struct timeval50 *)ap->a_data; 1924 timeval50_to_timeval(tvp50, &tv); 1925 tvp = &tv; 1926 goto segwait_common; 1927 1928 case LFCNSEGWAITALL: 1929 fsidp = NULL; 1930 /* FALLTHROUGH */ 1931 case LFCNSEGWAIT: 1932 tvp = (struct timeval *)ap->a_data; 1933 segwait_common: 1934 mutex_enter(&lfs_lock); 1935 ++fs->lfs_sleepers; 1936 mutex_exit(&lfs_lock); 1937 1938 error = lfs_segwait(fsidp, tvp); 1939 1940 mutex_enter(&lfs_lock); 1941 if (--fs->lfs_sleepers == 0) 1942 cv_broadcast(&fs->lfs_sleeperscv); 1943 mutex_exit(&lfs_lock); 1944 return error; 1945 1946 case LFCNBMAPV_COMPAT_70: 1947 case LFCNMARKV_COMPAT_70: 1948 blkvp70 = *(struct lfs_fcntl_markv_70 *)ap->a_data; 1949 1950 blkcnt = blkvp70.blkcnt; 1951 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT) 1952 return (EINVAL); 1953 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); 1954 blkiov70 = lfs_malloc(fs, sizeof(BLOCK_INFO_70), LFS_NB_BLKIOV); 1955 for (i = 0; i < blkcnt; i++) { 1956 error = copyin(&blkvp70.blkiov[i], blkiov70, 1957 sizeof(*blkiov70)); 1958 if (error) { 1959 lfs_free(fs, blkiov70, LFS_NB_BLKIOV); 1960 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1961 return error; 1962 } 1963 block_info_from_70(&blkiov[i], blkiov70); 1964 } 1965 1966 mutex_enter(&lfs_lock); 1967 ++fs->lfs_sleepers; 1968 mutex_exit(&lfs_lock); 1969 if (ap->a_command == LFCNBMAPV) 1970 error = lfs_bmapv(l, fsidp, blkiov, blkcnt); 1971 else /* LFCNMARKV */ 1972 error = lfs_markv(l, fsidp, blkiov, blkcnt); 1973 if (error == 0) { 1974 for (i = 0; i < blkcnt; i++) { 1975 block_info_to_70(blkiov70, &blkiov[i]); 1976 error = copyout(blkiov70, &blkvp70.blkiov[i], 1977 sizeof(*blkiov70)); 1978 if (error) { 1979 break; 1980 } 1981 } 1982 } 1983 mutex_enter(&lfs_lock); 1984 if (--fs->lfs_sleepers == 0) 1985 cv_broadcast(&fs->lfs_sleeperscv); 1986 mutex_exit(&lfs_lock); 1987 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1988 return error; 1989 1990 case LFCNBMAPV: 1991 case LFCNMARKV: 1992 blkvp = *(struct lfs_fcntl_markv *)ap->a_data; 1993 1994 blkcnt = blkvp.blkcnt; 1995 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT) 1996 return (EINVAL); 1997 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); 1998 if ((error = copyin(blkvp.blkiov, blkiov, 1999 blkcnt * sizeof(BLOCK_INFO))) != 0) { 2000 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 2001 return error; 2002 } 2003 2004 mutex_enter(&lfs_lock); 2005 ++fs->lfs_sleepers; 2006 mutex_exit(&lfs_lock); 2007 if (ap->a_command == LFCNBMAPV) 2008 error = lfs_bmapv(l, fsidp, blkiov, blkcnt); 2009 else /* LFCNMARKV */ 2010 error = lfs_markv(l, fsidp, blkiov, blkcnt); 2011 if (error == 0) 2012 error = copyout(blkiov, blkvp.blkiov, 2013 blkcnt * sizeof(BLOCK_INFO)); 2014 mutex_enter(&lfs_lock); 2015 if (--fs->lfs_sleepers == 0) 2016 cv_broadcast(&fs->lfs_sleeperscv); 2017 mutex_exit(&lfs_lock); 2018 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 2019 return error; 2020 2021 case LFCNRECLAIM: 2022 /* 2023 * Flush dirops and write Ifile, allowing empty segments 2024 * to be immediately reclaimed. 2025 */ 2026 lfs_writer_enter(fs, "pndirop"); 2027 off = lfs_sb_getoffset(fs); 2028 lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP); 2029 lfs_flush_dirops(fs); 2030 LFS_CLEANERINFO(cip, fs, bp); 2031 oclean = lfs_ci_getclean(fs, cip); 2032 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 2033 lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP); 2034 fs->lfs_sp->seg_flags |= SEGM_PROT; 2035 /* Copy out write stats */ 2036 if (ap != NULL) { 2037 lws.direct = 0; 2038 lws.offset = lfs_btofsb(fs, fs->lfs_sp->bytes_written); 2039 *(struct lfs_write_stats *)ap->a_data = lws; 2040 } 2041 lfs_segunlock(fs); 2042 lfs_writer_leave(fs); 2043 2044 #ifdef DEBUG 2045 LFS_CLEANERINFO(cip, fs, bp); 2046 DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64 2047 " blocks, cleaned %" PRId32 " segments (activesb %d)\n", 2048 lfs_sb_getoffset(fs) - off, 2049 lfs_ci_getclean(fs, cip) - oclean, 2050 fs->lfs_activesb)); 2051 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); 2052 #else 2053 __USE(oclean); 2054 __USE(off); 2055 #endif 2056 2057 return 0; 2058 2059 case LFCNIFILEFH_COMPAT: 2060 /* Return the filehandle of the Ifile */ 2061 if ((error = kauth_authorize_system(l->l_cred, 2062 KAUTH_SYSTEM_FILEHANDLE, 2063 0, NULL, NULL, NULL)) != 0) 2064 return (error); 2065 fhp = (struct fhandle *)ap->a_data; 2066 fhp->fh_fsid = *fsidp; 2067 fh_size = 16; /* former VFS_MAXFIDSIZ */ 2068 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 2069 2070 case LFCNIFILEFH_COMPAT2: 2071 case LFCNIFILEFH: 2072 /* Return the filehandle of the Ifile */ 2073 fhp = (struct fhandle *)ap->a_data; 2074 fhp->fh_fsid = *fsidp; 2075 fh_size = sizeof(struct lfs_fhandle) - 2076 offsetof(fhandle_t, fh_fid); 2077 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 2078 2079 case LFCNREWIND: 2080 /* Move lfs_offset to the lowest-numbered segment */ 2081 return lfs_rewind(fs, *(int *)ap->a_data); 2082 2083 case LFCNINVAL: 2084 /* Mark a segment SEGUSE_INVAL */ 2085 return lfs_invalidate(fs, *(int *)ap->a_data); 2086 2087 case LFCNRESIZE: 2088 /* Resize the filesystem */ 2089 return lfs_resize_fs(fs, *(int *)ap->a_data); 2090 2091 case LFCNWRAPSTOP: 2092 case LFCNWRAPSTOP_COMPAT: 2093 /* 2094 * Hold lfs_newseg at segment 0; if requested, sleep until 2095 * the filesystem wraps around. To support external agents 2096 * (dump, fsck-based regression test) that need to look at 2097 * a snapshot of the filesystem, without necessarily 2098 * requiring that all fs activity stops. 2099 */ 2100 if (fs->lfs_stoplwp == curlwp) 2101 return EALREADY; 2102 2103 mutex_enter(&lfs_lock); 2104 while (fs->lfs_stoplwp != NULL) 2105 cv_wait(&fs->lfs_stopcv, &lfs_lock); 2106 fs->lfs_stoplwp = curlwp; 2107 if (fs->lfs_nowrap == 0) 2108 log(LOG_NOTICE, "%s: disabled log wrap\n", 2109 lfs_sb_getfsmnt(fs)); 2110 ++fs->lfs_nowrap; 2111 if (*(int *)ap->a_data == 1 2112 || ap->a_command == LFCNWRAPSTOP_COMPAT) { 2113 log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n"); 2114 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 2115 "segwrap", 0, &lfs_lock); 2116 log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n"); 2117 if (error) { 2118 lfs_wrapgo(fs, VTOI(ap->a_vp), 0); 2119 } 2120 } 2121 mutex_exit(&lfs_lock); 2122 return 0; 2123 2124 case LFCNWRAPGO: 2125 case LFCNWRAPGO_COMPAT: 2126 /* 2127 * Having done its work, the agent wakes up the writer. 2128 * If the argument is 1, it sleeps until a new segment 2129 * is selected. 2130 */ 2131 mutex_enter(&lfs_lock); 2132 error = lfs_wrapgo(fs, VTOI(ap->a_vp), 2133 ap->a_command == LFCNWRAPGO_COMPAT ? 1 : 2134 *((int *)ap->a_data)); 2135 mutex_exit(&lfs_lock); 2136 return error; 2137 2138 case LFCNWRAPPASS: 2139 if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT)) 2140 return EALREADY; 2141 mutex_enter(&lfs_lock); 2142 if (fs->lfs_stoplwp != curlwp) { 2143 mutex_exit(&lfs_lock); 2144 return EALREADY; 2145 } 2146 if (fs->lfs_nowrap == 0) { 2147 mutex_exit(&lfs_lock); 2148 return EBUSY; 2149 } 2150 fs->lfs_wrappass = 1; 2151 wakeup(&fs->lfs_wrappass); 2152 /* Wait for the log to wrap, if asked */ 2153 if (*(int *)ap->a_data) { 2154 vref(ap->a_vp); 2155 VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT; 2156 log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n"); 2157 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 2158 "segwrap", 0, &lfs_lock); 2159 log(LOG_NOTICE, "LFCNPASS done waiting\n"); 2160 VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT; 2161 vrele(ap->a_vp); 2162 } 2163 mutex_exit(&lfs_lock); 2164 return error; 2165 2166 case LFCNWRAPSTATUS: 2167 mutex_enter(&lfs_lock); 2168 *(int *)ap->a_data = fs->lfs_wrapstatus; 2169 mutex_exit(&lfs_lock); 2170 return 0; 2171 2172 case LFCNFILESTATS: 2173 /* Retrieve fragmentation statistics from these inodes */ 2174 lfr = *(struct lfs_filestat_req *)ap->a_data; 2175 if (lfr.len < 0 || lfr.len > LFS_FILESTATS_MAXCNT) 2176 return EINVAL; 2177 if (lfr.ino < LFS_IFILE_INUM || lfr.len < 1 2178 || lfr.ino >= maxino || lfr.ino + lfr.len >= maxino) 2179 return EINVAL; 2180 fss = lfs_malloc(fs, lfr.len * sizeof(*fss), LFS_NB_BLKIOV); 2181 if ((error = copyin(lfr.fss, fss, 2182 lfr.len * sizeof(*fss))) != 0) { 2183 lfs_free(fs, fss, LFS_NB_BLKIOV); 2184 return error; 2185 } 2186 2187 for (i = 0; i < lfr.len; ++i) { 2188 error = lfs_filestats(fs, lfr.ino + i, &fss[i]); 2189 if (error == ENOENT) 2190 error = 0; 2191 if (error) 2192 break; 2193 } 2194 2195 if (error == 0) 2196 error = copyout(fss, lfr.fss, lfr.len * sizeof(*fss)); 2197 2198 lfs_free(fs, fss, LFS_NB_BLKIOV); 2199 return error; 2200 2201 case LFCNREWRITESEGS: 2202 /* Rewrite (clean) the listed segments */ 2203 snap = *(struct lfs_segnum_array *)ap->a_data; 2204 if (snap.len > LFS_REWRITE_MAXCNT) 2205 return EINVAL; 2206 sna = lfs_malloc(fs, snap.len * sizeof(int), LFS_NB_BLKIOV); 2207 if ((error = copyin(snap.segments, sna, 2208 snap.len * sizeof(int))) != 0) { 2209 lfs_free(fs, sna, LFS_NB_BLKIOV); 2210 return error; 2211 } 2212 2213 for (i = 0; i < snap.len; i++) 2214 if (sna[i] < 0 || sna[i] >= lfs_sb_getnseg(fs)) 2215 return EINVAL; 2216 2217 direct = offset = 0; 2218 error = lfs_rewrite_segments(fs, sna, snap.len, &direct, 2219 &offset, curlwp); 2220 lfs_free(fs, sna, LFS_NB_BLKIOV); 2221 2222 /* Copy out write stats */ 2223 snap.stats.direct = direct; 2224 snap.stats.offset = offset; 2225 *(struct lfs_write_stats *) 2226 &(((struct lfs_segnum_array *)ap->a_data)->stats) 2227 = snap.stats; 2228 return error; 2229 2230 case LFCNREWRITEFILE: 2231 case LFCNSCRAMBLE: 2232 /* Rewrite (coalesce) the listed inodes */ 2233 scramble = ((int)ap->a_command == LFCNSCRAMBLE); 2234 inotbl = *(struct lfs_inode_array *)ap->a_data; 2235 if (inotbl.len > LFS_REWRITE_MAXCNT) 2236 return EINVAL; 2237 inoa = lfs_malloc(fs, inotbl.len * sizeof(ino_t), 2238 LFS_NB_BLKIOV); 2239 if ((error = copyin(inotbl.inodes, inoa, 2240 inotbl.len * sizeof(ino_t))) != 0) { 2241 lfs_free(fs, inoa, LFS_NB_BLKIOV); 2242 return error; 2243 } 2244 2245 for (i = 0; i < inotbl.len; i++) { 2246 if (inoa[i] <= LFS_IFILE_INUM || inoa[i] >= maxino) 2247 return EINVAL; 2248 } 2249 2250 direct = offset = 0; 2251 error = lfs_rewrite_file(fs, inoa, inotbl.len, scramble, 2252 &direct, &offset); 2253 lfs_free(fs, inoa, LFS_NB_BLKIOV); 2254 2255 /* Copy out write stats */ 2256 inotbl.stats.direct = direct; 2257 inotbl.stats.offset = offset; 2258 *(struct lfs_write_stats *) 2259 &(((struct lfs_inode_array *)ap->a_data)->stats) 2260 = inotbl.stats; 2261 2262 return error; 2263 2264 case LFCNCLEANERINFO: 2265 /* 2266 * Get current CLEANERINFO information. 2267 */ 2268 memset(&ci, 0, sizeof ci); 2269 ci.clean = lfs_sb_getnclean(fs); 2270 ci.dirty = lfs_sb_getnseg(fs) - lfs_sb_getnclean(fs); 2271 ci.bfree = lfs_sb_getbfree(fs); 2272 ci.avail = lfs_sb_getavail(fs) - fs->lfs_ravail 2273 - fs->lfs_favail; 2274 ci.flags = (fs->lfs_flags & LFS_MUSTCLEAN) 2275 ? LFS_CLEANER_MUST_CLEAN : 0; 2276 *(CLEANERINFO64 *)ap->a_data = ci; 2277 2278 return 0; 2279 2280 case LFCNSEGUSE: 2281 /* 2282 * Retrieve SEGUSE information for one or more segments. 2283 */ 2284 if (lfs_sb_getversion(fs) == 1) 2285 return EINVAL; 2286 suap = *(struct lfs_seguse_array *)ap->a_data; 2287 if (suap.start < 0 2288 /* || suap.len < 0 */ 2289 || suap.len > LFS_SEGUSE_MAXCNT 2290 || suap.start >= lfs_sb_getnseg(fs)) 2291 return EINVAL; 2292 if (suap.start + suap.len >= lfs_sb_getnseg(fs)) { 2293 suap.len = lfs_sb_getnseg(fs) - suap.start; 2294 *(struct lfs_seguse_array *)ap->a_data = suap; 2295 } 2296 sua = lfs_malloc(fs, suap.len * sizeof *sua, LFS_NB_BLKIOV); 2297 2298 for (i = 0; i < suap.len; i++) { 2299 LFS_SEGENTRY(sup, fs, suap.start + i, bp); 2300 memcpy(sua + i, sup, sizeof(*sup)); 2301 brelse(bp, 0); 2302 } 2303 2304 error = copyout(sua, suap.seguse, suap.len * sizeof *sua); 2305 lfs_free(fs, sua, LFS_NB_BLKIOV); 2306 return error; 2307 2308 case LFCNAUTOCLEAN: 2309 /* 2310 * Control the in-kernel cleaner. 2311 */ 2312 size = *(size_t *)ap->a_data; 2313 if (size > sizeof(params)) 2314 return EINVAL; 2315 memset(¶ms, 0, sizeof(params)); 2316 memcpy(¶ms, (struct lfs_autoclean_params *)ap->a_data, 2317 size); 2318 return lfs_cleanctl(fs, ¶ms); 2319 2320 default: 2321 return genfs_fcntl(v); 2322 } 2323 return 0; 2324 } 2325 2326 /* 2327 * Report continuity statistics for this file. Two measures are provided: 2328 * the number of discontinuities, and the total length, in fragment units, 2329 * of all the gaps between contiguously allocated file extents. Only 2330 * direct blocks are considered. 2331 * 2332 * A single-block file will show zero for both measures, as will any file 2333 * that fits completely within its partial-segment. In general, the minimum 2334 * discontinuity count for any files will be N-1, where N is the number 2335 * of segments required to store the file, rounded up; and the minimum 2336 * total gap length will also be N, with only the partial-segment headers 2337 * breaking up the file data (indirect blocks are written at the end). 2338 * 2339 * Some files will be too large to be written in their entirety without 2340 * a checkpoint in the middle; those will have a higher minimum total gap 2341 * measure but about the same discountinuity count. 2342 * 2343 * The coalescing cleaner will use these statistics to identify files that 2344 * need to be rewritten to be contiguous on disk. 2345 */ 2346 static int 2347 lfs_filestats(struct lfs *fs, ino_t ino, struct lfs_filestats *lfp) 2348 { 2349 int error, step, run; 2350 daddr_t lbn, odaddr, daddr, diff, hiblk; 2351 struct vnode *vp; 2352 struct inode *ip; 2353 2354 memset(lfp, 0, sizeof(*lfp)); 2355 lfp->ino = ino; 2356 2357 /* Contiguous blocks will be this far apart */ 2358 step = lfs_sb_getbsize(fs) >> DEV_BSHIFT; 2359 2360 error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_SHARED, &vp); 2361 if (error) 2362 return error; 2363 ip = VTOI(vp); 2364 2365 /* Highest block in this inode */ 2366 hiblk = lfs_lblkno(fs, ip->i_size + lfs_sb_getbsize(fs) - 1) - 1; 2367 lfp->nblk = 0; 2368 2369 odaddr = 0x0; 2370 for (lbn = 0; lbn <= hiblk; ++lbn) { 2371 error = VOP_BMAP(vp, lbn, NULL, &daddr, &run); 2372 if (error) 2373 break; 2374 2375 /* Count all blocks */ 2376 if (daddr > 0) 2377 lfp->nblk += (run + 1); 2378 2379 /* Holes and yet-unwritten data only count once */ 2380 if (daddr == odaddr && daddr <= 0) 2381 continue; 2382 2383 /* Count any discontinuities */ 2384 if (lbn > 0 && daddr != odaddr + step) { 2385 ++lfp->dc_count; 2386 diff = daddr - odaddr; 2387 if (diff < 0) 2388 diff = -diff; 2389 lfp->dc_sum += diff; 2390 } 2391 lbn += run; 2392 odaddr = daddr + run * step; 2393 } 2394 VOP_UNLOCK(vp); 2395 vrele(vp); 2396 2397 return 0; 2398 } 2399 2400 /* 2401 * Return the last logical file offset that should be written for this file 2402 * if we're doing a write that ends at "size". If writing, we need to know 2403 * about sizes on disk, i.e. fragments if there are any; if reading, we need 2404 * to know about entire blocks. 2405 */ 2406 void 2407 lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags) 2408 { 2409 struct inode *ip = VTOI(vp); 2410 struct lfs *fs = ip->i_lfs; 2411 daddr_t olbn, nlbn; 2412 2413 olbn = lfs_lblkno(fs, ip->i_size); 2414 nlbn = lfs_lblkno(fs, size); 2415 if (!(flags & GOP_SIZE_MEM) && nlbn < ULFS_NDADDR && olbn <= nlbn) { 2416 *eobp = lfs_fragroundup(fs, size); 2417 } else { 2418 *eobp = lfs_blkroundup(fs, size); 2419 } 2420 } 2421 2422 #ifdef DEBUG 2423 void lfs_dump_vop(void *); 2424 2425 void 2426 lfs_dump_vop(void *v) 2427 { 2428 struct vop_putpages_args /* { 2429 struct vnode *a_vp; 2430 voff_t a_offlo; 2431 voff_t a_offhi; 2432 int a_flags; 2433 } */ *ap = v; 2434 2435 struct inode *ip = VTOI(ap->a_vp); 2436 struct lfs *fs = ip->i_lfs; 2437 2438 #ifdef DDB 2439 vfs_vnode_print(ap->a_vp, 0, printf); 2440 #endif 2441 lfs_dump_dinode(fs, ip->i_din); 2442 } 2443 #endif 2444 2445 int 2446 lfs_mmap(void *v) 2447 { 2448 struct vop_mmap_args /* { 2449 const struct vnodeop_desc *a_desc; 2450 struct vnode *a_vp; 2451 vm_prot_t a_prot; 2452 kauth_cred_t a_cred; 2453 } */ *ap = v; 2454 2455 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) 2456 return EOPNOTSUPP; 2457 return genfs_mmap(v); 2458 } 2459 2460 static int 2461 lfs_openextattr(void *v) 2462 { 2463 struct vop_openextattr_args /* { 2464 struct vnode *a_vp; 2465 kauth_cred_t a_cred; 2466 struct proc *a_p; 2467 } */ *ap = v; 2468 struct vnode *vp = ap->a_vp; 2469 struct inode *ip; 2470 struct ulfsmount *ump; 2471 2472 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 2473 2474 ip = VTOI(vp); 2475 ump = ip->i_ump; 2476 2477 /* Not supported for ULFS1 file systems. */ 2478 if (ump->um_fstype == ULFS1) 2479 return (EOPNOTSUPP); 2480 2481 /* XXX Not implemented for ULFS2 file systems. */ 2482 return (EOPNOTSUPP); 2483 } 2484 2485 static int 2486 lfs_closeextattr(void *v) 2487 { 2488 struct vop_closeextattr_args /* { 2489 struct vnode *a_vp; 2490 int a_commit; 2491 kauth_cred_t a_cred; 2492 struct proc *a_p; 2493 } */ *ap = v; 2494 struct vnode *vp = ap->a_vp; 2495 struct inode *ip; 2496 struct ulfsmount *ump; 2497 2498 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 2499 2500 ip = VTOI(vp); 2501 ump = ip->i_ump; 2502 2503 /* Not supported for ULFS1 file systems. */ 2504 if (ump->um_fstype == ULFS1) 2505 return (EOPNOTSUPP); 2506 2507 /* XXX Not implemented for ULFS2 file systems. */ 2508 return (EOPNOTSUPP); 2509 } 2510 2511 static int 2512 lfs_getextattr(void *v) 2513 { 2514 struct vop_getextattr_args /* { 2515 struct vnode *a_vp; 2516 int a_attrnamespace; 2517 const char *a_name; 2518 struct uio *a_uio; 2519 size_t *a_size; 2520 kauth_cred_t a_cred; 2521 struct proc *a_p; 2522 } */ *ap = v; 2523 struct vnode *vp = ap->a_vp; 2524 struct inode *ip; 2525 struct ulfsmount *ump; 2526 int error; 2527 2528 KASSERT(VOP_ISLOCKED(vp)); 2529 2530 ip = VTOI(vp); 2531 ump = ip->i_ump; 2532 2533 if (ump->um_fstype == ULFS1) { 2534 #ifdef LFS_EXTATTR 2535 error = ulfs_getextattr(ap); 2536 #else 2537 error = EOPNOTSUPP; 2538 #endif 2539 return error; 2540 } 2541 2542 /* XXX Not implemented for ULFS2 file systems. */ 2543 return (EOPNOTSUPP); 2544 } 2545 2546 static int 2547 lfs_setextattr(void *v) 2548 { 2549 struct vop_setextattr_args /* { 2550 struct vnode *a_vp; 2551 int a_attrnamespace; 2552 const char *a_name; 2553 struct uio *a_uio; 2554 kauth_cred_t a_cred; 2555 struct proc *a_p; 2556 } */ *ap = v; 2557 struct vnode *vp = ap->a_vp; 2558 struct inode *ip; 2559 struct ulfsmount *ump; 2560 int error; 2561 2562 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 2563 2564 ip = VTOI(vp); 2565 ump = ip->i_ump; 2566 2567 if (ump->um_fstype == ULFS1) { 2568 #ifdef LFS_EXTATTR 2569 error = ulfs_setextattr(ap); 2570 #else 2571 error = EOPNOTSUPP; 2572 #endif 2573 return error; 2574 } 2575 2576 /* XXX Not implemented for ULFS2 file systems. */ 2577 return (EOPNOTSUPP); 2578 } 2579 2580 static int 2581 lfs_listextattr(void *v) 2582 { 2583 struct vop_listextattr_args /* { 2584 struct vnode *a_vp; 2585 int a_attrnamespace; 2586 struct uio *a_uio; 2587 size_t *a_size; 2588 kauth_cred_t a_cred; 2589 struct proc *a_p; 2590 } */ *ap = v; 2591 struct vnode *vp = ap->a_vp; 2592 struct inode *ip; 2593 struct ulfsmount *ump; 2594 int error; 2595 2596 KASSERT(VOP_ISLOCKED(vp)); 2597 2598 ip = VTOI(vp); 2599 ump = ip->i_ump; 2600 2601 if (ump->um_fstype == ULFS1) { 2602 #ifdef LFS_EXTATTR 2603 error = ulfs_listextattr(ap); 2604 #else 2605 error = EOPNOTSUPP; 2606 #endif 2607 return error; 2608 } 2609 2610 /* XXX Not implemented for ULFS2 file systems. */ 2611 return (EOPNOTSUPP); 2612 } 2613 2614 static int 2615 lfs_deleteextattr(void *v) 2616 { 2617 struct vop_deleteextattr_args /* { 2618 struct vnode *a_vp; 2619 int a_attrnamespace; 2620 kauth_cred_t a_cred; 2621 struct proc *a_p; 2622 } */ *ap = v; 2623 struct vnode *vp = ap->a_vp; 2624 struct inode *ip; 2625 struct ulfsmount *ump; 2626 int error; 2627 2628 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 2629 2630 ip = VTOI(vp); 2631 ump = ip->i_ump; 2632 2633 if (ump->um_fstype == ULFS1) { 2634 #ifdef LFS_EXTATTR 2635 error = ulfs_deleteextattr(ap); 2636 #else 2637 error = EOPNOTSUPP; 2638 #endif 2639 return error; 2640 } 2641 2642 /* XXX Not implemented for ULFS2 file systems. */ 2643 return (EOPNOTSUPP); 2644 } 2645 2646