1 /* $NetBSD: lfs_vnops.c,v 1.345 2025/10/20 04:20:37 perseant Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant (at) hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (c) 1986, 1989, 1991, 1993, 1995 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95 60 */ 61 62 /* from NetBSD: ufs_vnops.c,v 1.232 2016/05/19 18:32:03 riastradh Exp */ 63 /*- 64 * Copyright (c) 2008 The NetBSD Foundation, Inc. 65 * All rights reserved. 66 * 67 * This code is derived from software contributed to The NetBSD Foundation 68 * by Wasabi Systems, Inc. 69 * 70 * Redistribution and use in source and binary forms, with or without 71 * modification, are permitted provided that the following conditions 72 * are met: 73 * 1. Redistributions of source code must retain the above copyright 74 * notice, this list of conditions and the following disclaimer. 75 * 2. Redistributions in binary form must reproduce the above copyright 76 * notice, this list of conditions and the following disclaimer in the 77 * documentation and/or other materials provided with the distribution. 78 * 79 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 80 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 81 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 82 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 83 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 84 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 85 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 86 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 87 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 88 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 89 * POSSIBILITY OF SUCH DAMAGE. 90 */ 91 /* 92 * Copyright (c) 1982, 1986, 1989, 1993, 1995 93 * The Regents of the University of California. All rights reserved. 94 * (c) UNIX System Laboratories, Inc. 95 * All or some portions of this file are derived from material licensed 96 * to the University of California by American Telephone and Telegraph 97 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 98 * the permission of UNIX System Laboratories, Inc. 99 * 100 * Redistribution and use in source and binary forms, with or without 101 * modification, are permitted provided that the following conditions 102 * are met: 103 * 1. Redistributions of source code must retain the above copyright 104 * notice, this list of conditions and the following disclaimer. 105 * 2. Redistributions in binary form must reproduce the above copyright 106 * notice, this list of conditions and the following disclaimer in the 107 * documentation and/or other materials provided with the distribution. 108 * 3. Neither the name of the University nor the names of its contributors 109 * may be used to endorse or promote products derived from this software 110 * without specific prior written permission. 111 * 112 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 113 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 114 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 115 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 116 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 117 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 118 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 119 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 120 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 121 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 122 * SUCH DAMAGE. 123 * 124 * @(#)ufs_vnops.c 8.28 (Berkeley) 7/31/95 125 */ 126 127 #include <sys/cdefs.h> 128 __KERNEL_RCSID(0, "$NetBSD: lfs_vnops.c,v 1.345 2025/10/20 04:20:37 perseant Exp $"); 129 130 #ifdef _KERNEL_OPT 131 #include "opt_compat_netbsd.h" 132 #include "opt_uvm_page_trkown.h" 133 #endif 134 135 #include <sys/param.h> 136 #include <sys/systm.h> 137 #include <sys/namei.h> 138 #include <sys/resourcevar.h> 139 #include <sys/kernel.h> 140 #include <sys/file.h> 141 #include <sys/stat.h> 142 #include <sys/buf.h> 143 #include <sys/proc.h> 144 #include <sys/mount.h> 145 #include <sys/vnode.h> 146 #include <sys/pool.h> 147 #include <sys/signalvar.h> 148 #include <sys/kauth.h> 149 #include <sys/syslog.h> 150 151 #include <miscfs/fifofs/fifo.h> 152 #include <miscfs/genfs/genfs.h> 153 #include <miscfs/specfs/specdev.h> 154 155 #include <ufs/lfs/ulfs_inode.h> 156 #include <ufs/lfs/ulfsmount.h> 157 #include <ufs/lfs/ulfs_bswap.h> 158 #include <ufs/lfs/ulfs_extern.h> 159 160 #include <uvm/uvm_extern.h> 161 162 #include <ufs/lfs/lfs.h> 163 #include <ufs/lfs/lfs_accessors.h> 164 #include <ufs/lfs/lfs_kernel.h> 165 #include <ufs/lfs/lfs_extern.h> 166 167 extern kcondvar_t lfs_writerd_cv; 168 int lfs_ignore_lazy_sync = 1; 169 170 static int lfs_openextattr(void *v); 171 static int lfs_closeextattr(void *v); 172 static int lfs_getextattr(void *v); 173 static int lfs_setextattr(void *v); 174 static int lfs_listextattr(void *v); 175 static int lfs_deleteextattr(void *v); 176 177 static int lfs_makeinode(struct vattr *vap, struct vnode *, 178 const struct ulfs_lookup_results *, 179 struct vnode **, struct componentname *); 180 static int lfs_filestats(struct lfs *, ino_t, struct lfs_filestats *); 181 static int lfs_rewrite_file(struct lfs *, ino_t *, int, bool, int *, int *); 182 183 /* Global vfs data structures for lfs. */ 184 int (**lfs_vnodeop_p)(void *); 185 const struct vnodeopv_entry_desc lfs_vnodeop_entries[] = { 186 { &vop_default_desc, vn_default_error }, 187 { &vop_parsepath_desc, genfs_parsepath }, /* parsepath */ 188 { &vop_lookup_desc, ulfs_lookup }, /* lookup */ 189 { &vop_create_desc, lfs_create }, /* create */ 190 { &vop_whiteout_desc, ulfs_whiteout }, /* whiteout */ 191 { &vop_mknod_desc, lfs_mknod }, /* mknod */ 192 { &vop_open_desc, ulfs_open }, /* open */ 193 { &vop_close_desc, lfs_close }, /* close */ 194 { &vop_access_desc, ulfs_access }, /* access */ 195 { &vop_accessx_desc, genfs_accessx }, /* accessx */ 196 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 197 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 198 { &vop_read_desc, lfs_read }, /* read */ 199 { &vop_write_desc, lfs_write }, /* write */ 200 { &vop_fallocate_desc, genfs_eopnotsupp }, /* fallocate */ 201 { &vop_fdiscard_desc, genfs_eopnotsupp }, /* fdiscard */ 202 { &vop_ioctl_desc, genfs_enoioctl }, /* ioctl */ 203 { &vop_fcntl_desc, lfs_fcntl }, /* fcntl */ 204 { &vop_poll_desc, genfs_poll }, /* poll */ 205 { &vop_kqfilter_desc, genfs_kqfilter }, /* kqfilter */ 206 { &vop_revoke_desc, genfs_revoke }, /* revoke */ 207 { &vop_mmap_desc, lfs_mmap }, /* mmap */ 208 { &vop_fsync_desc, lfs_fsync }, /* fsync */ 209 { &vop_seek_desc, genfs_seek }, /* seek */ 210 { &vop_remove_desc, lfs_remove }, /* remove */ 211 { &vop_link_desc, lfs_link }, /* link */ 212 { &vop_rename_desc, lfs_rename }, /* rename */ 213 { &vop_mkdir_desc, lfs_mkdir }, /* mkdir */ 214 { &vop_rmdir_desc, lfs_rmdir }, /* rmdir */ 215 { &vop_symlink_desc, lfs_symlink }, /* symlink */ 216 { &vop_readdir_desc, ulfs_readdir }, /* readdir */ 217 { &vop_readlink_desc, ulfs_readlink }, /* readlink */ 218 { &vop_abortop_desc, genfs_abortop }, /* abortop */ 219 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 220 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 221 { &vop_lock_desc, genfs_lock }, /* lock */ 222 { &vop_unlock_desc, genfs_unlock }, /* unlock */ 223 { &vop_bmap_desc, ulfs_bmap }, /* bmap */ 224 { &vop_strategy_desc, lfs_strategy }, /* strategy */ 225 { &vop_print_desc, ulfs_print }, /* print */ 226 { &vop_islocked_desc, genfs_islocked }, /* islocked */ 227 { &vop_pathconf_desc, ulfs_pathconf }, /* pathconf */ 228 { &vop_advlock_desc, ulfs_advlock }, /* advlock */ 229 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 230 { &vop_getpages_desc, lfs_getpages }, /* getpages */ 231 { &vop_putpages_desc, lfs_putpages }, /* putpages */ 232 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */ 233 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */ 234 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */ 235 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */ 236 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */ 237 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */ 238 { NULL, NULL } 239 }; 240 const struct vnodeopv_desc lfs_vnodeop_opv_desc = 241 { &lfs_vnodeop_p, lfs_vnodeop_entries }; 242 243 int (**lfs_specop_p)(void *); 244 const struct vnodeopv_entry_desc lfs_specop_entries[] = { 245 { &vop_default_desc, vn_default_error }, 246 GENFS_SPECOP_ENTRIES, 247 { &vop_close_desc, lfsspec_close }, /* close */ 248 { &vop_access_desc, ulfs_access }, /* access */ 249 { &vop_accessx_desc, genfs_accessx }, /* accessx */ 250 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 251 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 252 { &vop_read_desc, ulfsspec_read }, /* read */ 253 { &vop_write_desc, ulfsspec_write }, /* write */ 254 { &vop_fcntl_desc, genfs_fcntl }, /* fcntl */ 255 { &vop_fsync_desc, spec_fsync }, /* fsync */ 256 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 257 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 258 { &vop_lock_desc, genfs_lock }, /* lock */ 259 { &vop_unlock_desc, genfs_unlock }, /* unlock */ 260 { &vop_print_desc, ulfs_print }, /* print */ 261 { &vop_islocked_desc, genfs_islocked }, /* islocked */ 262 { &vop_bwrite_desc, vn_bwrite }, /* bwrite */ 263 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */ 264 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */ 265 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */ 266 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */ 267 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */ 268 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */ 269 { NULL, NULL } 270 }; 271 const struct vnodeopv_desc lfs_specop_opv_desc = 272 { &lfs_specop_p, lfs_specop_entries }; 273 274 int (**lfs_fifoop_p)(void *); 275 const struct vnodeopv_entry_desc lfs_fifoop_entries[] = { 276 { &vop_default_desc, vn_default_error }, 277 GENFS_FIFOOP_ENTRIES, 278 { &vop_close_desc, lfsfifo_close }, /* close */ 279 { &vop_access_desc, ulfs_access }, /* access */ 280 { &vop_accessx_desc, genfs_accessx }, /* accessx */ 281 { &vop_getattr_desc, lfs_getattr }, /* getattr */ 282 { &vop_setattr_desc, lfs_setattr }, /* setattr */ 283 { &vop_read_desc, ulfsfifo_read }, /* read */ 284 { &vop_write_desc, ulfsfifo_write }, /* write */ 285 { &vop_fcntl_desc, genfs_fcntl }, /* fcntl */ 286 { &vop_fsync_desc, vn_fifo_bypass }, /* fsync */ 287 { &vop_inactive_desc, lfs_inactive }, /* inactive */ 288 { &vop_reclaim_desc, lfs_reclaim }, /* reclaim */ 289 { &vop_lock_desc, genfs_lock }, /* lock */ 290 { &vop_unlock_desc, genfs_unlock }, /* unlock */ 291 { &vop_strategy_desc, vn_fifo_bypass }, /* strategy */ 292 { &vop_print_desc, ulfs_print }, /* print */ 293 { &vop_islocked_desc, genfs_islocked }, /* islocked */ 294 { &vop_bwrite_desc, lfs_bwrite }, /* bwrite */ 295 { &vop_openextattr_desc, lfs_openextattr }, /* openextattr */ 296 { &vop_closeextattr_desc, lfs_closeextattr }, /* closeextattr */ 297 { &vop_getextattr_desc, lfs_getextattr }, /* getextattr */ 298 { &vop_setextattr_desc, lfs_setextattr }, /* setextattr */ 299 { &vop_listextattr_desc, lfs_listextattr }, /* listextattr */ 300 { &vop_deleteextattr_desc, lfs_deleteextattr }, /* deleteextattr */ 301 { NULL, NULL } 302 }; 303 const struct vnodeopv_desc lfs_fifoop_opv_desc = 304 { &lfs_fifoop_p, lfs_fifoop_entries }; 305 306 #include <ufs/lfs/ulfs_readwrite.c> 307 308 /* 309 * Allocate a new inode. 310 */ 311 static int 312 lfs_makeinode(struct vattr *vap, struct vnode *dvp, 313 const struct ulfs_lookup_results *ulr, 314 struct vnode **vpp, struct componentname *cnp) 315 { 316 struct inode *ip; 317 struct vnode *tvp; 318 int error; 319 320 error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, NULL, &tvp); 321 if (error) 322 return error; 323 error = vn_lock(tvp, LK_EXCLUSIVE); 324 if (error) { 325 vrele(tvp); 326 return error; 327 } 328 MARK_VNODE(tvp); 329 *vpp = tvp; 330 ip = VTOI(tvp); 331 ip->i_state |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 332 ip->i_nlink = 1; 333 DIP_ASSIGN(ip, nlink, 1); 334 335 /* Authorize setting SGID if needed. */ 336 if (ip->i_mode & ISGID) { 337 error = kauth_authorize_vnode(cnp->cn_cred, 338 KAUTH_VNODE_WRITE_SECURITY, 339 tvp, NULL, genfs_can_chmod(tvp, cnp->cn_cred, ip->i_uid, 340 ip->i_gid, MAKEIMODE(vap->va_type, vap->va_mode))); 341 if (error) { 342 ip->i_mode &= ~ISGID; 343 DIP_ASSIGN(ip, mode, ip->i_mode); 344 } 345 } 346 347 if (cnp->cn_flags & ISWHITEOUT) { 348 ip->i_flags |= UF_OPAQUE; 349 DIP_ASSIGN(ip, flags, ip->i_flags); 350 } 351 352 /* 353 * Make sure inode goes to disk before directory entry. 354 */ 355 if ((error = lfs_update(tvp, NULL, NULL, UPDATE_DIROP)) != 0) 356 goto bad; 357 error = ulfs_direnter(dvp, ulr, tvp, 358 cnp, ip->i_number, LFS_IFTODT(ip->i_mode), NULL); 359 if (error) 360 goto bad; 361 *vpp = tvp; 362 cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_flags); 363 KASSERT(VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE); 364 return (0); 365 366 bad: 367 /* 368 * Write error occurred trying to update the inode 369 * or the directory so must deallocate the inode. 370 */ 371 ip->i_nlink = 0; 372 DIP_ASSIGN(ip, nlink, 0); 373 ip->i_state |= IN_CHANGE; 374 /* If IN_ADIROP, account for it */ 375 UNMARK_VNODE(tvp); 376 vput(tvp); 377 return (error); 378 } 379 380 /* 381 * Synch an open file. 382 */ 383 /* ARGSUSED */ 384 int 385 lfs_fsync(void *v) 386 { 387 struct vop_fsync_args /* { 388 struct vnode *a_vp; 389 kauth_cred_t a_cred; 390 int a_flags; 391 off_t offlo; 392 off_t offhi; 393 } */ *ap = v; 394 struct vnode *vp = ap->a_vp; 395 int wait; 396 struct inode *ip = VTOI(vp); 397 struct lfs *fs = ip->i_lfs; 398 int error = 0; 399 400 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 401 402 /* If we're mounted read-only, don't try to sync. */ 403 if (fs->lfs_ronly) 404 goto out; 405 406 /* If a removed vnode is being cleaned, no need to sync here. */ 407 if ((ap->a_flags & FSYNC_RECLAIM) != 0 && ip->i_mode == 0) 408 goto out; 409 410 /* 411 * Trickle sync simply adds this vnode to the pager list, as if 412 * the pagedaemon had requested a pageout. 413 */ 414 if (ap->a_flags & FSYNC_LAZY) { 415 if (lfs_ignore_lazy_sync == 0) { 416 mutex_enter(&lfs_lock); 417 if (!(ip->i_state & IN_PAGING)) { 418 ip->i_state |= IN_PAGING; 419 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, 420 i_lfs_pchain); 421 } 422 cv_broadcast(&lfs_writerd_cv); 423 mutex_exit(&lfs_lock); 424 } 425 goto out; 426 } 427 428 KASSERT(!(ap->a_flags & FSYNC_RECLAIM && ip->i_state & IN_CLEANING)); 429 430 wait = (ap->a_flags & FSYNC_WAIT); 431 do { 432 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 433 error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), 434 round_page(ap->a_offhi), 435 PGO_CLEANIT | (wait ? PGO_SYNCIO : 0)); 436 if (error == EAGAIN) { 437 mutex_enter(&lfs_lock); 438 mtsleep(&fs->lfs_availsleep, PCATCH | PUSER, 439 "lfs_fsync", hz / 100 + 1, &lfs_lock); 440 mutex_exit(&lfs_lock); 441 } 442 } while (error == EAGAIN); 443 if (error) 444 goto out; 445 446 if ((ap->a_flags & FSYNC_DATAONLY) == 0) 447 error = lfs_update(vp, NULL, NULL, wait ? UPDATE_WAIT : 0); 448 449 if (error == 0 && ap->a_flags & FSYNC_CACHE) { 450 int l = 0; 451 error = VOP_IOCTL(ip->i_devvp, DIOCCACHESYNC, &l, FWRITE, 452 curlwp->l_cred); 453 } 454 if (wait && !VPISEMPTY(vp)) 455 LFS_SET_UINO(ip, IN_MODIFIED); 456 457 out: 458 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 459 return error; 460 } 461 462 /* 463 * Take IN_ADIROP off, then call ulfs_inactive. 464 */ 465 int 466 lfs_inactive(void *v) 467 { 468 struct vop_inactive_v2_args /* { 469 struct vnode *a_vp; 470 bool *a_recycle; 471 } */ *ap = v; 472 473 KASSERT(VOP_ISLOCKED(ap->a_vp) == LK_EXCLUSIVE); 474 475 UNMARK_VNODE(ap->a_vp); 476 477 /* 478 * The Ifile is only ever inactivated on unmount. 479 * Streamline this process by not giving it more dirty blocks. 480 */ 481 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) { 482 mutex_enter(&lfs_lock); 483 LFS_CLR_UINO(VTOI(ap->a_vp), IN_ALLMOD); 484 mutex_exit(&lfs_lock); 485 return 0; 486 } 487 488 #ifdef DEBUG 489 /* 490 * This might happen on unmount. 491 * XXX If it happens at any other time, it should be a panic. 492 */ 493 if (ap->a_vp->v_uflag & VU_DIROP) { 494 struct inode *ip = VTOI(ap->a_vp); 495 printf("lfs_inactive: inactivating VU_DIROP? ino = %llu\n", 496 (unsigned long long) ip->i_number); 497 } 498 #endif /* DIAGNOSTIC */ 499 500 return ulfs_inactive(v); 501 } 502 503 int 504 lfs_set_dirop(struct vnode *dvp, struct vnode *vp) 505 { 506 struct lfs *fs; 507 int error; 508 509 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 510 KASSERT(vp == NULL || VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 511 512 fs = VTOI(dvp)->i_lfs; 513 514 ASSERT_NO_SEGLOCK(fs); 515 /* 516 * LFS_NRESERVE calculates direct and indirect blocks as well 517 * as an inode block; an overestimate in most cases. 518 */ 519 if ((error = lfs_reserve(fs, dvp, vp, LFS_NRESERVE(fs))) != 0) 520 return (error); 521 522 restart: 523 mutex_enter(&lfs_lock); 524 if (fs->lfs_dirops == 0) { 525 mutex_exit(&lfs_lock); 526 lfs_check(dvp, LFS_UNUSED_LBN, 0); 527 mutex_enter(&lfs_lock); 528 } 529 while (fs->lfs_writer) { 530 error = cv_wait_sig(&fs->lfs_diropscv, &lfs_lock); 531 if (error == EINTR) { 532 mutex_exit(&lfs_lock); 533 goto unreserve; 534 } 535 } 536 if (lfs_dirvcount > LFS_MAX_DIROP && fs->lfs_dirops == 0) { 537 cv_broadcast(&lfs_writerd_cv); 538 mutex_exit(&lfs_lock); 539 preempt(); 540 goto restart; 541 } 542 543 if (lfs_dirvcount > LFS_MAX_DIROP) { 544 DLOG((DLOG_DIROP, "lfs_set_dirop: sleeping with dirops=%d, " 545 "dirvcount=%d\n", fs->lfs_dirops, lfs_dirvcount)); 546 if ((error = mtsleep(&lfs_dirvcount, 547 PCATCH | PUSER | PNORELOCK, "lfs_maxdirop", 0, 548 &lfs_lock)) != 0) { 549 mutex_exit(&lfs_lock); 550 goto unreserve; 551 } 552 mutex_exit(&lfs_lock); 553 goto restart; 554 } 555 556 ++fs->lfs_dirops; 557 /* fs->lfs_doifile = 1; */ /* XXX why? --ks */ 558 mutex_exit(&lfs_lock); 559 560 /* Hold a reference so SET_ENDOP will be happy */ 561 vref(dvp); 562 if (vp) { 563 vref(vp); 564 MARK_VNODE(vp); 565 } 566 567 MARK_VNODE(dvp); 568 return 0; 569 570 unreserve: 571 lfs_reserve(fs, dvp, vp, -LFS_NRESERVE(fs)); 572 return error; 573 } 574 575 /* 576 * Opposite of lfs_set_dirop... mostly. For now at least must call 577 * UNMARK_VNODE(dvp) explicitly first. (XXX: clean that up) 578 */ 579 void 580 lfs_unset_dirop(struct lfs *fs, struct vnode *dvp, const char *str) 581 { 582 mutex_enter(&lfs_lock); 583 --fs->lfs_dirops; 584 if (!fs->lfs_dirops) { 585 if (fs->lfs_nadirop) { 586 panic("lfs_unset_dirop: %s: no dirops but " 587 " nadirop=%d", str, 588 fs->lfs_nadirop); 589 } 590 wakeup(&fs->lfs_writer); 591 mutex_exit(&lfs_lock); 592 lfs_check(dvp, LFS_UNUSED_LBN, 0); 593 } else { 594 mutex_exit(&lfs_lock); 595 } 596 lfs_reserve(fs, dvp, NULL, -LFS_NRESERVE(fs)); 597 } 598 599 void 600 lfs_mark_vnode(struct vnode *vp) 601 { 602 struct inode *ip = VTOI(vp); 603 struct lfs *fs = ip->i_lfs; 604 605 mutex_enter(&lfs_lock); 606 if (!(ip->i_state & IN_ADIROP)) { 607 if (!(vp->v_uflag & VU_DIROP)) { 608 mutex_exit(&lfs_lock); 609 vref(vp); 610 mutex_enter(&lfs_lock); 611 ++lfs_dirvcount; 612 ++fs->lfs_dirvcount; 613 TAILQ_INSERT_TAIL(&fs->lfs_dchainhd, ip, i_lfs_dchain); 614 vp->v_uflag |= VU_DIROP; 615 } 616 ++fs->lfs_nadirop; 617 ip->i_state &= ~IN_CDIROP; 618 ip->i_state |= IN_ADIROP; 619 } else 620 KASSERT(vp->v_uflag & VU_DIROP); 621 mutex_exit(&lfs_lock); 622 } 623 624 void 625 lfs_unmark_vnode(struct vnode *vp) 626 { 627 struct inode *ip = VTOI(vp); 628 629 mutex_enter(&lfs_lock); 630 if (ip && (ip->i_state & IN_ADIROP)) { 631 KASSERT(vp->v_uflag & VU_DIROP); 632 --ip->i_lfs->lfs_nadirop; 633 ip->i_state &= ~IN_ADIROP; 634 } 635 mutex_exit(&lfs_lock); 636 } 637 638 int 639 lfs_symlink(void *v) 640 { 641 struct vop_symlink_v3_args /* { 642 struct vnode *a_dvp; 643 struct vnode **a_vpp; 644 struct componentname *a_cnp; 645 struct vattr *a_vap; 646 char *a_target; 647 } */ *ap = v; 648 struct lfs *fs; 649 struct vnode *dvp, **vpp; 650 struct inode *ip; 651 struct ulfs_lookup_results *ulr; 652 ssize_t len; /* XXX should be size_t */ 653 int error; 654 655 dvp = ap->a_dvp; 656 vpp = ap->a_vpp; 657 658 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 659 KASSERT(vpp != NULL); 660 KASSERT(*vpp == NULL); 661 KASSERT(ap->a_vap->va_type == VLNK); 662 663 /* XXX should handle this material another way */ 664 ulr = &VTOI(ap->a_dvp)->i_crap; 665 ULFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp)); 666 667 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 668 ASSERT_NO_SEGLOCK(fs); 669 if (fs->lfs_ronly) { 670 return EROFS; 671 } 672 673 error = lfs_set_dirop(dvp, NULL); 674 if (error) 675 return error; 676 677 error = lfs_makeinode(ap->a_vap, dvp, ulr, vpp, ap->a_cnp); 678 if (error) { 679 goto out; 680 } 681 KASSERT(VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE); 682 683 ip = VTOI(*vpp); 684 685 /* 686 * This test is off by one. um_maxsymlinklen contains the 687 * number of bytes available, and we aren't storing a \0, so 688 * the test should properly be <=. However, it cannot be 689 * changed as this would break compatibility with existing fs 690 * images -- see the way ulfs_readlink() works. 691 */ 692 len = strlen(ap->a_target); 693 if (len < ip->i_lfs->um_maxsymlinklen) { 694 memcpy((char *)SHORTLINK(ip), ap->a_target, len); 695 ip->i_size = len; 696 DIP_ASSIGN(ip, size, len); 697 uvm_vnp_setsize(*vpp, ip->i_size); 698 ip->i_state |= IN_CHANGE | IN_UPDATE; 699 if ((*vpp)->v_mount->mnt_flag & MNT_RELATIME) 700 ip->i_state |= IN_ACCESS; 701 } else { 702 error = ulfs_bufio(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0, 703 IO_NODELOCKED | IO_JOURNALLOCKED, ap->a_cnp->cn_cred, NULL, 704 NULL); 705 } 706 707 VOP_UNLOCK(*vpp); 708 if (error) 709 vrele(*vpp); 710 711 out: 712 UNMARK_VNODE(dvp); 713 /* XXX: is it even possible for the symlink to get MARK'd? */ 714 UNMARK_VNODE(*vpp); 715 if (error) { 716 *vpp = NULL; 717 } 718 lfs_unset_dirop(fs, dvp, "symlink"); 719 720 vrele(dvp); 721 return (error); 722 } 723 724 int 725 lfs_mknod(void *v) 726 { 727 struct vop_mknod_v3_args /* { 728 struct vnode *a_dvp; 729 struct vnode **a_vpp; 730 struct componentname *a_cnp; 731 struct vattr *a_vap; 732 } */ *ap = v; 733 struct lfs *fs; 734 struct vnode *dvp, **vpp; 735 struct vattr *vap; 736 struct inode *ip; 737 int error; 738 ino_t ino; 739 struct ulfs_lookup_results *ulr; 740 741 dvp = ap->a_dvp; 742 vpp = ap->a_vpp; 743 vap = ap->a_vap; 744 745 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 746 KASSERT(vpp != NULL); 747 KASSERT(*vpp == NULL); 748 749 /* XXX should handle this material another way */ 750 ulr = &VTOI(dvp)->i_crap; 751 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 752 753 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 754 ASSERT_NO_SEGLOCK(fs); 755 if (fs->lfs_ronly) { 756 return EROFS; 757 } 758 759 error = lfs_set_dirop(dvp, NULL); 760 if (error) 761 return error; 762 763 error = lfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp); 764 765 /* Either way we're done with the dirop at this point */ 766 UNMARK_VNODE(dvp); 767 UNMARK_VNODE(*vpp); 768 lfs_unset_dirop(fs, dvp, "mknod"); 769 770 if (error) { 771 vrele(dvp); 772 *vpp = NULL; 773 return (error); 774 } 775 KASSERT(VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE); 776 777 ip = VTOI(*vpp); 778 ino = ip->i_number; 779 ip->i_state |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 780 781 /* 782 * Call fsync to write the vnode so that we don't have to deal with 783 * flushing it when it's marked VU_DIROP or reclaiming. 784 * 785 * XXX KS - If we can't flush we also can't call vgone(), so must 786 * return. But, that leaves this vnode in limbo, also not good. 787 * Can this ever happen (barring hardware failure)? 788 */ 789 if ((error = VOP_FSYNC(*vpp, NOCRED, FSYNC_WAIT, 0, 0)) != 0) { 790 panic("lfs_mknod: couldn't fsync (ino %llu)", 791 (unsigned long long) ino); 792 /* return (error); */ 793 } 794 795 vrele(dvp); 796 KASSERT(error == 0); 797 VOP_UNLOCK(*vpp); 798 return (0); 799 } 800 801 /* 802 * Create a regular file 803 */ 804 int 805 lfs_create(void *v) 806 { 807 struct vop_create_v3_args /* { 808 struct vnode *a_dvp; 809 struct vnode **a_vpp; 810 struct componentname *a_cnp; 811 struct vattr *a_vap; 812 } */ *ap = v; 813 struct lfs *fs; 814 struct vnode *dvp, **vpp; 815 struct vattr *vap; 816 struct ulfs_lookup_results *ulr; 817 int error; 818 819 dvp = ap->a_dvp; 820 vpp = ap->a_vpp; 821 vap = ap->a_vap; 822 823 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 824 KASSERT(vpp != NULL); 825 KASSERT(*vpp == NULL); 826 827 /* XXX should handle this material another way */ 828 ulr = &VTOI(dvp)->i_crap; 829 ULFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 830 831 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 832 ASSERT_NO_SEGLOCK(fs); 833 if (fs->lfs_ronly) { 834 return EROFS; 835 } 836 837 error = lfs_set_dirop(dvp, NULL); 838 if (error) 839 return error; 840 841 error = lfs_makeinode(vap, dvp, ulr, vpp, ap->a_cnp); 842 if (error) { 843 goto out; 844 } 845 KASSERT(VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE); 846 VOP_UNLOCK(*vpp); 847 848 out: 849 850 UNMARK_VNODE(dvp); 851 UNMARK_VNODE(*vpp); 852 if (error) { 853 *vpp = NULL; 854 } 855 lfs_unset_dirop(fs, dvp, "create"); 856 857 vrele(dvp); 858 return (error); 859 } 860 861 int 862 lfs_mkdir(void *v) 863 { 864 struct vop_mkdir_v3_args /* { 865 struct vnode *a_dvp; 866 struct vnode **a_vpp; 867 struct componentname *a_cnp; 868 struct vattr *a_vap; 869 } */ *ap = v; 870 struct lfs *fs; 871 struct vnode *dvp, *tvp, **vpp; 872 struct inode *dp, *ip; 873 struct componentname *cnp; 874 struct vattr *vap; 875 struct ulfs_lookup_results *ulr; 876 struct buf *bp; 877 LFS_DIRHEADER *dirp; 878 int dirblksiz; 879 int error; 880 881 dvp = ap->a_dvp; 882 tvp = NULL; 883 vpp = ap->a_vpp; 884 cnp = ap->a_cnp; 885 vap = ap->a_vap; 886 887 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 888 889 dp = VTOI(dvp); 890 ip = NULL; 891 892 KASSERT(vap->va_type == VDIR); 893 KASSERT(vpp != NULL); 894 KASSERT(*vpp == NULL); 895 896 /* XXX should handle this material another way */ 897 ulr = &dp->i_crap; 898 ULFS_CHECK_CRAPCOUNTER(dp); 899 900 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 901 ASSERT_NO_SEGLOCK(fs); 902 if (fs->lfs_ronly) { 903 return EROFS; 904 } 905 906 if ((nlink_t)dp->i_nlink >= LINK_MAX) { 907 return EMLINK; 908 } 909 910 dirblksiz = fs->um_dirblksiz; 911 /* XXX dholland 20150911 I believe this to be true, but... */ 912 //KASSERT(dirblksiz == LFS_DIRBLKSIZ); 913 914 error = lfs_set_dirop(dvp, NULL); 915 if (error) 916 return error; 917 918 /* 919 * Must simulate part of lfs_makeinode here to acquire the inode, 920 * but not have it entered in the parent directory. The entry is 921 * made later after writing "." and ".." entries. 922 */ 923 error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, NULL, 924 ap->a_vpp); 925 if (error) 926 goto out; 927 928 error = vn_lock(*ap->a_vpp, LK_EXCLUSIVE); 929 if (error) { 930 vrele(*ap->a_vpp); 931 *ap->a_vpp = NULL; 932 goto out; 933 } 934 935 tvp = *ap->a_vpp; 936 MARK_VNODE(tvp); 937 ip = VTOI(tvp); 938 ip->i_state |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 939 ip->i_nlink = 2; 940 DIP_ASSIGN(ip, nlink, 2); 941 if (cnp->cn_flags & ISWHITEOUT) { 942 ip->i_flags |= UF_OPAQUE; 943 DIP_ASSIGN(ip, flags, ip->i_flags); 944 } 945 946 /* 947 * Bump link count in parent directory to reflect work done below. 948 */ 949 dp->i_nlink++; 950 DIP_ASSIGN(dp, nlink, dp->i_nlink); 951 dp->i_state |= IN_CHANGE; 952 if ((error = lfs_update(dvp, NULL, NULL, UPDATE_DIROP)) != 0) 953 goto bad; 954 955 /* 956 * Initialize directory with "." and "..". This used to use a 957 * static template but that adds moving parts for very little 958 * benefit. 959 */ 960 if ((error = lfs_balloc(tvp, (off_t)0, dirblksiz, cnp->cn_cred, 961 B_CLRBUF, &bp)) != 0) 962 goto bad; 963 ip->i_size = dirblksiz; 964 DIP_ASSIGN(ip, size, dirblksiz); 965 ip->i_state |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 966 uvm_vnp_setsize(tvp, ip->i_size); 967 dirp = bp->b_data; 968 969 /* . */ 970 lfs_dir_setino(fs, dirp, ip->i_number); 971 lfs_dir_setreclen(fs, dirp, LFS_DIRECTSIZ(fs, 1)); 972 lfs_dir_settype(fs, dirp, LFS_DT_DIR); 973 lfs_dir_setnamlen(fs, dirp, 1); 974 lfs_copydirname(fs, lfs_dir_nameptr(fs, dirp), ".", 1, 975 LFS_DIRECTSIZ(fs, 1)); 976 dirp = LFS_NEXTDIR(fs, dirp); 977 /* .. */ 978 lfs_dir_setino(fs, dirp, dp->i_number); 979 lfs_dir_setreclen(fs, dirp, dirblksiz - LFS_DIRECTSIZ(fs, 1)); 980 lfs_dir_settype(fs, dirp, LFS_DT_DIR); 981 lfs_dir_setnamlen(fs, dirp, 2); 982 lfs_copydirname(fs, lfs_dir_nameptr(fs, dirp), "..", 2, 983 dirblksiz - LFS_DIRECTSIZ(fs, 1)); 984 985 /* 986 * Directory set up; now install its entry in the parent directory. 987 */ 988 if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0) 989 goto bad; 990 if ((error = lfs_update(tvp, NULL, NULL, UPDATE_DIROP)) != 0) { 991 goto bad; 992 } 993 error = ulfs_direnter(dvp, ulr, tvp, 994 cnp, ip->i_number, LFS_IFTODT(ip->i_mode), bp); 995 bad: 996 if (error == 0) { 997 VOP_UNLOCK(tvp); 998 } else { 999 dp->i_nlink--; 1000 DIP_ASSIGN(dp, nlink, dp->i_nlink); 1001 dp->i_state |= IN_CHANGE; 1002 /* 1003 * No need to do an explicit lfs_truncate here, vrele will 1004 * do this for us because we set the link count to 0. 1005 */ 1006 ip->i_nlink = 0; 1007 DIP_ASSIGN(ip, nlink, 0); 1008 ip->i_state |= IN_CHANGE; 1009 /* If IN_ADIROP, account for it */ 1010 UNMARK_VNODE(tvp); 1011 vput(tvp); 1012 } 1013 1014 out: 1015 UNMARK_VNODE(dvp); 1016 UNMARK_VNODE(*vpp); 1017 if (error) { 1018 *vpp = NULL; 1019 } 1020 lfs_unset_dirop(fs, dvp, "mkdir"); 1021 1022 vrele(dvp); 1023 return (error); 1024 } 1025 1026 int 1027 lfs_remove(void *v) 1028 { 1029 struct vop_remove_v3_args /* { 1030 struct vnode *a_dvp; 1031 struct vnode *a_vp; 1032 struct componentname *a_cnp; 1033 nlink_t ctx_vp_new_nlink; 1034 } */ *ap = v; 1035 struct vnode *dvp, *vp; 1036 struct inode *ip; 1037 int error; 1038 1039 dvp = ap->a_dvp; 1040 vp = ap->a_vp; 1041 1042 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 1043 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1044 1045 ip = VTOI(vp); 1046 if ((error = lfs_set_dirop(dvp, vp)) != 0) { 1047 if (dvp == vp) 1048 vrele(vp); 1049 else 1050 vput(vp); 1051 return error; 1052 } 1053 error = ulfs_remove(ap); 1054 if (ip->i_nlink == 0) 1055 lfs_orphan(ip->i_lfs, ip->i_number); 1056 1057 UNMARK_VNODE(dvp); 1058 if (ap->a_vp) { 1059 UNMARK_VNODE(ap->a_vp); 1060 } 1061 lfs_unset_dirop(ip->i_lfs, dvp, "remove"); 1062 vrele(dvp); 1063 if (ap->a_vp) { 1064 vrele(ap->a_vp); 1065 } 1066 1067 return (error); 1068 } 1069 1070 int 1071 lfs_rmdir(void *v) 1072 { 1073 struct vop_rmdir_v2_args /* { 1074 struct vnodeop_desc *a_desc; 1075 struct vnode *a_dvp; 1076 struct vnode *a_vp; 1077 struct componentname *a_cnp; 1078 } */ *ap = v; 1079 struct vnode *vp; 1080 struct inode *ip; 1081 int error; 1082 1083 vp = ap->a_vp; 1084 1085 KASSERT(VOP_ISLOCKED(ap->a_dvp) == LK_EXCLUSIVE); 1086 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1087 1088 ip = VTOI(vp); 1089 if ((error = lfs_set_dirop(ap->a_dvp, ap->a_vp)) != 0) { 1090 if (ap->a_dvp == vp) 1091 vrele(vp); 1092 else 1093 vput(vp); 1094 return error; 1095 } 1096 error = ulfs_rmdir(ap); 1097 if (ip->i_nlink == 0) 1098 lfs_orphan(ip->i_lfs, ip->i_number); 1099 1100 UNMARK_VNODE(ap->a_dvp); 1101 if (ap->a_vp) { 1102 UNMARK_VNODE(ap->a_vp); 1103 } 1104 lfs_unset_dirop(ip->i_lfs, ap->a_dvp, "rmdir"); 1105 vrele(ap->a_dvp); 1106 if (ap->a_vp) { 1107 vrele(ap->a_vp); 1108 } 1109 1110 return (error); 1111 } 1112 1113 int 1114 lfs_link(void *v) 1115 { 1116 struct vop_link_v2_args /* { 1117 struct vnode *a_dvp; 1118 struct vnode *a_vp; 1119 struct componentname *a_cnp; 1120 } */ *ap = v; 1121 struct lfs *fs; 1122 struct vnode *dvp, *vp; 1123 int error; 1124 1125 dvp = ap->a_dvp; 1126 vp = ap->a_vp; 1127 1128 KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); 1129 1130 fs = VFSTOULFS(dvp->v_mount)->um_lfs; 1131 ASSERT_NO_SEGLOCK(fs); 1132 if (fs->lfs_ronly) { 1133 return EROFS; 1134 } 1135 1136 error = vn_lock(vp, LK_EXCLUSIVE); 1137 if (error) 1138 return error; 1139 error = lfs_set_dirop(dvp, vp); 1140 VOP_UNLOCK(vp); 1141 if (error) 1142 return error; 1143 1144 error = ulfs_link(ap); 1145 1146 UNMARK_VNODE(vp); 1147 UNMARK_VNODE(dvp); 1148 lfs_unset_dirop(fs, dvp, "link"); 1149 vrele(vp); 1150 vrele(dvp); 1151 1152 return (error); 1153 } 1154 1155 /* XXX hack to avoid calling ITIMES in getattr */ 1156 int 1157 lfs_getattr(void *v) 1158 { 1159 struct vop_getattr_args /* { 1160 struct vnode *a_vp; 1161 struct vattr *a_vap; 1162 kauth_cred_t a_cred; 1163 } */ *ap = v; 1164 struct vnode *vp = ap->a_vp; 1165 struct inode *ip; 1166 struct vattr *vap = ap->a_vap; 1167 struct lfs *fs; 1168 1169 KASSERT(VOP_ISLOCKED(vp)); 1170 1171 ip = VTOI(vp); 1172 fs = ip->i_lfs; 1173 1174 /* 1175 * Copy from inode table 1176 */ 1177 vap->va_fsid = ip->i_dev; 1178 vap->va_fileid = ip->i_number; 1179 vap->va_mode = ip->i_mode & ~LFS_IFMT; 1180 vap->va_nlink = ip->i_nlink; 1181 vap->va_uid = ip->i_uid; 1182 vap->va_gid = ip->i_gid; 1183 switch (vp->v_type) { 1184 case VBLK: 1185 case VCHR: 1186 vap->va_rdev = (dev_t)lfs_dino_getrdev(fs, ip->i_din); 1187 break; 1188 default: 1189 vap->va_rdev = NODEV; 1190 break; 1191 } 1192 vap->va_size = vp->v_size; 1193 vap->va_atime.tv_sec = lfs_dino_getatime(fs, ip->i_din); 1194 vap->va_atime.tv_nsec = lfs_dino_getatimensec(fs, ip->i_din); 1195 vap->va_mtime.tv_sec = lfs_dino_getmtime(fs, ip->i_din); 1196 vap->va_mtime.tv_nsec = lfs_dino_getmtimensec(fs, ip->i_din); 1197 vap->va_ctime.tv_sec = lfs_dino_getctime(fs, ip->i_din); 1198 vap->va_ctime.tv_nsec = lfs_dino_getctimensec(fs, ip->i_din); 1199 vap->va_flags = ip->i_flags; 1200 vap->va_gen = ip->i_gen; 1201 /* this doesn't belong here */ 1202 if (vp->v_type == VBLK) 1203 vap->va_blocksize = BLKDEV_IOSIZE; 1204 else if (vp->v_type == VCHR) 1205 vap->va_blocksize = MAXBSIZE; 1206 else 1207 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 1208 vap->va_bytes = lfs_fsbtob(fs, ip->i_lfs_effnblks); 1209 vap->va_type = vp->v_type; 1210 vap->va_filerev = ip->i_modrev; 1211 return (0); 1212 } 1213 1214 /* 1215 * Check to make sure the inode blocks won't choke the buffer 1216 * cache, then call ulfs_setattr as usual. 1217 */ 1218 int 1219 lfs_setattr(void *v) 1220 { 1221 struct vop_setattr_args /* { 1222 struct vnode *a_vp; 1223 struct vattr *a_vap; 1224 kauth_cred_t a_cred; 1225 } */ *ap = v; 1226 struct vnode *vp = ap->a_vp; 1227 1228 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1229 lfs_check(vp, LFS_UNUSED_LBN, 0); 1230 return ulfs_setattr(v); 1231 } 1232 1233 /* 1234 * Release the block we hold on lfs_newseg wrapping. Called on file close, 1235 * or explicitly from LFCNWRAPGO. Called with the interlock held. 1236 */ 1237 static int 1238 lfs_wrapgo(struct lfs *fs, struct inode *ip, int waitfor) 1239 { 1240 if (fs->lfs_stoplwp != curlwp) 1241 return EBUSY; 1242 1243 fs->lfs_stoplwp = NULL; 1244 cv_signal(&fs->lfs_stopcv); 1245 1246 KASSERT(fs->lfs_nowrap > 0); 1247 if (fs->lfs_nowrap <= 0) { 1248 return 0; 1249 } 1250 1251 if (--fs->lfs_nowrap == 0) { 1252 log(LOG_NOTICE, "%s: re-enabled log wrap\n", 1253 lfs_sb_getfsmnt(fs)); 1254 wakeup(&fs->lfs_wrappass); 1255 lfs_wakeup_cleaner(fs); 1256 } 1257 if (waitfor) { 1258 cv_wait_sig(&fs->lfs_nextsegsleep, &lfs_lock); 1259 } 1260 1261 return 0; 1262 } 1263 1264 /* 1265 * Close called. 1266 * 1267 * Update the times on the inode. 1268 */ 1269 /* ARGSUSED */ 1270 int 1271 lfs_close(void *v) 1272 { 1273 struct vop_close_args /* { 1274 struct vnode *a_vp; 1275 int a_fflag; 1276 kauth_cred_t a_cred; 1277 } */ *ap = v; 1278 struct vnode *vp = ap->a_vp; 1279 struct inode *ip; 1280 struct lfs *fs; 1281 1282 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1283 1284 ip = VTOI(vp); 1285 fs = ip->i_lfs; 1286 1287 if ((ip->i_number == ULFS_ROOTINO || ip->i_number == LFS_IFILE_INUM) && 1288 fs->lfs_stoplwp == curlwp) { 1289 mutex_enter(&lfs_lock); 1290 log(LOG_NOTICE, "lfs_close: releasing log wrap control\n"); 1291 lfs_wrapgo(fs, ip, 0); 1292 mutex_exit(&lfs_lock); 1293 } 1294 1295 if (vp == ip->i_lfs->lfs_ivnode && 1296 vp->v_mount->mnt_iflag & IMNT_UNMOUNT) 1297 return 0; 1298 1299 if (vrefcnt(vp) > 1 && vp != ip->i_lfs->lfs_ivnode) { 1300 LFS_ITIMES(ip, NULL, NULL, NULL); 1301 } 1302 return (0); 1303 } 1304 1305 /* 1306 * Close wrapper for special devices. 1307 * 1308 * Update the times on the inode then do device close. 1309 */ 1310 int 1311 lfsspec_close(void *v) 1312 { 1313 struct vop_close_args /* { 1314 struct vnode *a_vp; 1315 int a_fflag; 1316 kauth_cred_t a_cred; 1317 } */ *ap = v; 1318 struct vnode *vp; 1319 struct inode *ip; 1320 1321 vp = ap->a_vp; 1322 1323 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1324 1325 ip = VTOI(vp); 1326 if (vrefcnt(vp) > 1) { 1327 LFS_ITIMES(ip, NULL, NULL, NULL); 1328 } 1329 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); 1330 } 1331 1332 /* 1333 * Close wrapper for fifo's. 1334 * 1335 * Update the times on the inode then do device close. 1336 */ 1337 int 1338 lfsfifo_close(void *v) 1339 { 1340 struct vop_close_args /* { 1341 struct vnode *a_vp; 1342 int a_fflag; 1343 kauth_cred_ a_cred; 1344 } */ *ap = v; 1345 struct vnode *vp; 1346 struct inode *ip; 1347 1348 vp = ap->a_vp; 1349 1350 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 1351 1352 ip = VTOI(vp); 1353 if (vrefcnt(ap->a_vp) > 1) { 1354 LFS_ITIMES(ip, NULL, NULL, NULL); 1355 } 1356 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); 1357 } 1358 1359 /* 1360 * Reclaim an inode so that it can be used for other purposes. 1361 */ 1362 1363 int 1364 lfs_reclaim(void *v) 1365 { 1366 struct vop_reclaim_v2_args /* { 1367 struct vnode *a_vp; 1368 } */ *ap = v; 1369 struct vnode *vp = ap->a_vp; 1370 struct inode *ip; 1371 struct lfs *fs; 1372 int error; 1373 1374 VOP_UNLOCK(vp); 1375 1376 ip = VTOI(vp); 1377 fs = ip->i_lfs; 1378 1379 /* 1380 * The inode must be freed and updated before being removed 1381 * from its hash chain. Other threads trying to gain a hold 1382 * or lock on the inode will be stalled. 1383 */ 1384 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1385 lfs_vfree(vp, ip->i_number, ip->i_omode); 1386 1387 mutex_enter(&lfs_lock); 1388 LFS_CLR_UINO(ip, IN_ALLMOD); 1389 mutex_exit(&lfs_lock); 1390 if ((error = ulfs_reclaim(vp))) 1391 return (error); 1392 1393 /* 1394 * Take us off the paging and/or dirop queues if we were on them. 1395 * We shouldn't be on them. 1396 */ 1397 mutex_enter(&lfs_lock); 1398 if (ip->i_state & IN_PAGING) { 1399 log(LOG_WARNING, "%s: reclaimed vnode is IN_PAGING\n", 1400 lfs_sb_getfsmnt(fs)); 1401 ip->i_state &= ~IN_PAGING; 1402 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 1403 } 1404 if (vp->v_uflag & VU_DIROP) 1405 panic("reclaimed vnode is VU_DIROP"); 1406 mutex_exit(&lfs_lock); 1407 1408 pool_put(&lfs_dinode_pool, ip->i_din); 1409 lfs_deregister_all(vp); 1410 pool_put(&lfs_inoext_pool, ip->inode_ext.lfs); 1411 ip->inode_ext.lfs = NULL; 1412 genfs_node_destroy(vp); 1413 pool_put(&lfs_inode_pool, vp->v_data); 1414 vp->v_data = NULL; 1415 return (0); 1416 } 1417 1418 /* 1419 * Read a block from a storage device. 1420 * 1421 * Calculate the logical to physical mapping if not done already, 1422 * then call the device strategy routine. 1423 * 1424 * In order to avoid reading blocks that are in the process of being 1425 * written by the cleaner---and hence are not mutexed by the normal 1426 * buffer cache / page cache mechanisms---check for collisions before 1427 * reading. 1428 * 1429 * We inline ulfs_strategy to make sure that the VOP_BMAP occurs *before* 1430 * the active cleaner test. 1431 * 1432 * XXX This code assumes that lfs_markv makes synchronous checkpoints. 1433 */ 1434 int 1435 lfs_strategy(void *v) 1436 { 1437 struct vop_strategy_args /* { 1438 struct vnode *a_vp; 1439 struct buf *a_bp; 1440 } */ *ap = v; 1441 struct buf *bp; 1442 struct lfs *fs; 1443 struct vnode *vp; 1444 struct inode *ip; 1445 daddr_t tbn; 1446 #define MAXLOOP 25 1447 int i, sn, error, slept, loopcount; 1448 1449 bp = ap->a_bp; 1450 vp = ap->a_vp; 1451 ip = VTOI(vp); 1452 fs = ip->i_lfs; 1453 1454 /* lfs uses its strategy routine only for read */ 1455 KASSERT(bp->b_flags & B_READ); 1456 1457 if (vp->v_type == VBLK || vp->v_type == VCHR) 1458 panic("lfs_strategy: spec"); 1459 KASSERT(bp->b_bcount != 0); 1460 if (bp->b_blkno == bp->b_lblkno) { 1461 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, 1462 NULL); 1463 if (error) { 1464 bp->b_error = error; 1465 bp->b_resid = bp->b_bcount; 1466 biodone(bp); 1467 return (error); 1468 } 1469 if ((long)bp->b_blkno == -1) /* no valid data */ 1470 clrbuf(bp); 1471 } 1472 if ((long)bp->b_blkno < 0) { /* block is not on disk */ 1473 bp->b_resid = bp->b_bcount; 1474 biodone(bp); 1475 return (0); 1476 } 1477 1478 slept = 1; 1479 loopcount = 0; 1480 mutex_enter(&lfs_lock); 1481 while (slept && fs->lfs_seglock) { 1482 mutex_exit(&lfs_lock); 1483 /* 1484 * Look through list of intervals. 1485 * There will only be intervals to look through 1486 * if the cleaner holds the seglock. 1487 * Since the cleaner is synchronous, we can trust 1488 * the list of intervals to be current. 1489 */ 1490 tbn = LFS_DBTOFSB(fs, bp->b_blkno); 1491 sn = lfs_dtosn(fs, tbn); 1492 slept = 0; 1493 for (i = 0; i < fs->lfs_cleanind; i++) { 1494 if (sn == lfs_dtosn(fs, fs->lfs_cleanint[i]) && 1495 tbn >= fs->lfs_cleanint[i]) { 1496 DLOG((DLOG_CLEAN, 1497 "lfs_strategy: ino %llu lbn %" PRId64 1498 " ind %d sn %d fsb %" PRIx64 1499 " given sn %d fsb %" PRIx64 "\n", 1500 (unsigned long long) ip->i_number, 1501 bp->b_lblkno, i, 1502 lfs_dtosn(fs, fs->lfs_cleanint[i]), 1503 fs->lfs_cleanint[i], sn, tbn)); 1504 DLOG((DLOG_CLEAN, 1505 "lfs_strategy: sleeping on ino %llu lbn %" 1506 PRId64 "\n", 1507 (unsigned long long) ip->i_number, 1508 bp->b_lblkno)); 1509 mutex_enter(&lfs_lock); 1510 if (LFS_SEGLOCK_HELD(fs) && fs->lfs_iocount) { 1511 /* 1512 * Cleaner can't wait for itself. 1513 * Instead, wait for the blocks 1514 * to be written to disk. 1515 * XXX we need pribio in the test 1516 * XXX here. 1517 */ 1518 mtsleep(&fs->lfs_iocount, 1519 (PRIBIO + 1) | PNORELOCK, 1520 "clean2", hz/10 + 1, 1521 &lfs_lock); 1522 slept = 1; 1523 ++loopcount; 1524 break; 1525 } else if (fs->lfs_seglock) { 1526 mtsleep(&fs->lfs_seglock, 1527 (PRIBIO + 1) | PNORELOCK, 1528 "clean1", 0, 1529 &lfs_lock); 1530 slept = 1; 1531 break; 1532 } 1533 mutex_exit(&lfs_lock); 1534 } 1535 } 1536 mutex_enter(&lfs_lock); 1537 if (loopcount > MAXLOOP) { 1538 printf("lfs_strategy: breaking out of clean2 loop\n"); 1539 break; 1540 } 1541 } 1542 mutex_exit(&lfs_lock); 1543 1544 vp = ip->i_devvp; 1545 return VOP_STRATEGY(vp, bp); 1546 } 1547 1548 /* 1549 * Inline lfs_segwrite/lfs_writevnodes, but just for dirops. 1550 * Technically this is a checkpoint (the on-disk state is valid) 1551 * even though we are leaving out all the file data. 1552 */ 1553 int 1554 lfs_flush_dirops(struct lfs *fs) 1555 { 1556 struct inode *ip, *marker; 1557 struct vnode *vp; 1558 extern int lfs_dostats; /* XXX this does not belong here */ 1559 struct segment *sp; 1560 SEGSUM *ssp; 1561 int flags = 0; 1562 int error = 0; 1563 1564 ASSERT_MAYBE_SEGLOCK(fs); 1565 KASSERT(fs->lfs_nadirop == 0); /* stable during lfs_writer */ 1566 KASSERT(fs->lfs_dirops == 0); /* stable during lfs_writer */ 1567 1568 if (fs->lfs_ronly) 1569 return EROFS; 1570 1571 mutex_enter(&lfs_lock); 1572 if (TAILQ_FIRST(&fs->lfs_dchainhd) == NULL) { 1573 mutex_exit(&lfs_lock); 1574 return 0; 1575 } else 1576 mutex_exit(&lfs_lock); 1577 1578 if (lfs_dostats) 1579 ++lfs_stats.flush_invoked; 1580 1581 marker = pool_get(&lfs_inode_pool, PR_WAITOK); 1582 memset(marker, 0, sizeof(*marker)); 1583 marker->inode_ext.lfs = pool_get(&lfs_inoext_pool, PR_WAITOK); 1584 memset(marker->inode_ext.lfs, 0, sizeof(*marker->inode_ext.lfs)); 1585 marker->i_state = IN_MARKER; 1586 1587 lfs_imtime(fs); 1588 lfs_seglock(fs, flags); 1589 sp = fs->lfs_sp; 1590 1591 /* 1592 * lfs_writevnodes, optimized to get dirops out of the way. 1593 * Only write dirops, and don't flush files' pages, only 1594 * blocks from the directories. 1595 * 1596 * We don't need to vref these files because they are 1597 * dirops and so hold an extra reference until the 1598 * segunlock clears them of that status. 1599 * 1600 * We don't need to check for IN_ADIROP because we know that 1601 * no dirops are active. 1602 * 1603 */ 1604 mutex_enter(&lfs_lock); 1605 KASSERT(fs->lfs_writer); 1606 TAILQ_INSERT_HEAD(&fs->lfs_dchainhd, marker, i_lfs_dchain); 1607 while ((ip = TAILQ_NEXT(marker, i_lfs_dchain)) != NULL) { 1608 TAILQ_REMOVE(&fs->lfs_dchainhd, marker, i_lfs_dchain); 1609 TAILQ_INSERT_AFTER(&fs->lfs_dchainhd, ip, marker, 1610 i_lfs_dchain); 1611 if (ip->i_state & IN_MARKER) 1612 continue; 1613 vp = ITOV(ip); 1614 1615 /* 1616 * Prevent the vnode from going away if it's just been 1617 * put out in the segment and lfs_unmark_dirop is about 1618 * to release it. While it is on the list it is always 1619 * referenced, so it cannot be reclaimed until we 1620 * release it. 1621 */ 1622 vref(vp); 1623 1624 /* 1625 * Since we hold lfs_writer, the node can't be in an 1626 * active dirop. Since it's on the list and we hold a 1627 * reference to it, it can't be reclaimed now. 1628 */ 1629 KASSERT((ip->i_state & IN_ADIROP) == 0); 1630 KASSERT(vp->v_uflag & VU_DIROP); 1631 1632 /* 1633 * After we release lfs_lock, if we were in the middle 1634 * of writing a segment, lfs_unmark_dirop may end up 1635 * clearing VU_DIROP, and we have no way to stop it. 1636 * That should be OK -- we'll just have less to do 1637 * here. 1638 */ 1639 mutex_exit(&lfs_lock); 1640 1641 /* 1642 * All writes to directories come from dirops; all 1643 * writes to files' direct blocks go through the page 1644 * cache, which we're not touching. Reads to files 1645 * and/or directories will not be affected by writing 1646 * directory blocks inodes and file inodes. So we don't 1647 * really need to lock. 1648 */ 1649 if (vp->v_type != VREG && 1650 ((ip->i_state & IN_ALLMOD) || !VPISEMPTY(vp))) { 1651 error = lfs_writefile(fs, sp, vp); 1652 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1653 !(ip->i_state & IN_ALLMOD)) { 1654 mutex_enter(&lfs_lock); 1655 LFS_SET_UINO(ip, IN_MODIFIED); 1656 mutex_exit(&lfs_lock); 1657 } 1658 if (error && (sp->seg_flags & SEGM_SINGLE)) { 1659 vrele(vp); 1660 mutex_enter(&lfs_lock); 1661 error = EAGAIN; 1662 break; 1663 } 1664 } 1665 KASSERT(ip->i_number != LFS_IFILE_INUM); 1666 error = lfs_writeinode(fs, sp, ip); 1667 if (error && (sp->seg_flags & SEGM_SINGLE)) { 1668 vrele(vp); 1669 mutex_enter(&lfs_lock); 1670 error = EAGAIN; 1671 break; 1672 } 1673 1674 /* 1675 * We might need to update files' inodes again; 1676 * for example, if they have data blocks to write. 1677 * Make sure that after this flush, they are still 1678 * marked IN_MODIFIED so that we don't forget to 1679 * write them. 1680 */ 1681 mutex_enter(&lfs_lock); 1682 if (vp->v_type == VREG) 1683 LFS_SET_UINO(ip, IN_MODIFIED); 1684 mutex_exit(&lfs_lock); 1685 1686 vrele(vp); 1687 mutex_enter(&lfs_lock); 1688 } 1689 TAILQ_REMOVE(&fs->lfs_dchainhd, marker, i_lfs_dchain); 1690 mutex_exit(&lfs_lock); 1691 1692 /* We've written all the dirops there are */ 1693 ssp = (SEGSUM *)sp->segsum; 1694 lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) & ~(SS_CONT)); 1695 lfs_finalize_fs_seguse(fs); 1696 (void) lfs_writeseg(fs, sp); 1697 lfs_segunlock(fs); 1698 1699 pool_put(&lfs_inoext_pool, marker->inode_ext.lfs); 1700 pool_put(&lfs_inode_pool, marker); 1701 1702 return error; 1703 } 1704 1705 /* 1706 * Flush all vnodes for which the pagedaemon has requested pageouts. 1707 * Skip over any files that are marked VU_DIROP (since lfs_flush_dirop() 1708 * has just run, this would be an error). If we have to skip a vnode 1709 * for any reason, just skip it; if we have to wait for the cleaner, 1710 * abort. The writer daemon will call us again later. 1711 */ 1712 int 1713 lfs_flush_pchain(struct lfs *fs) 1714 { 1715 struct inode *ip, *nip; 1716 struct vnode *vp; 1717 extern int lfs_dostats; 1718 struct segment *sp; 1719 int error, error2; 1720 1721 ASSERT_NO_SEGLOCK(fs); 1722 KASSERT(fs->lfs_writer); 1723 1724 if (fs->lfs_ronly) 1725 return EROFS; 1726 1727 mutex_enter(&lfs_lock); 1728 if (TAILQ_FIRST(&fs->lfs_pchainhd) == NULL) { 1729 mutex_exit(&lfs_lock); 1730 return 0; 1731 } else 1732 mutex_exit(&lfs_lock); 1733 1734 /* Get dirops out of the way */ 1735 if ((error = lfs_flush_dirops(fs)) != 0) 1736 return error; 1737 1738 if (lfs_dostats) 1739 ++lfs_stats.flush_invoked; 1740 1741 /* 1742 * Inline lfs_segwrite/lfs_writevnodes, but just for pageouts. 1743 */ 1744 lfs_imtime(fs); 1745 lfs_seglock(fs, 0); 1746 sp = fs->lfs_sp; 1747 1748 /* 1749 * lfs_writevnodes, optimized to clear pageout requests. 1750 * Only write non-dirop files that are in the pageout queue. 1751 * We're very conservative about what we write; we want to be 1752 * fast and async. 1753 */ 1754 mutex_enter(&lfs_lock); 1755 top: 1756 for (ip = TAILQ_FIRST(&fs->lfs_pchainhd); ip != NULL; ip = nip) { 1757 struct mount *mp = ITOV(ip)->v_mount; 1758 ino_t ino = ip->i_number; 1759 1760 nip = TAILQ_NEXT(ip, i_lfs_pchain); 1761 1762 if (!(ip->i_state & IN_PAGING)) 1763 goto top; 1764 1765 mutex_exit(&lfs_lock); 1766 if (vcache_get(mp, &ino, sizeof(ino), &vp) != 0) { 1767 mutex_enter(&lfs_lock); 1768 continue; 1769 }; 1770 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { 1771 vrele(vp); 1772 mutex_enter(&lfs_lock); 1773 continue; 1774 } 1775 ip = VTOI(vp); 1776 mutex_enter(&lfs_lock); 1777 if ((vp->v_uflag & VU_DIROP) != 0 || vp->v_type != VREG || 1778 !(ip->i_state & IN_PAGING)) { 1779 mutex_exit(&lfs_lock); 1780 vput(vp); 1781 mutex_enter(&lfs_lock); 1782 goto top; 1783 } 1784 mutex_exit(&lfs_lock); 1785 1786 error = lfs_writefile(fs, sp, vp); 1787 if (!VPISEMPTY(vp) && !WRITEINPROG(vp) && 1788 !(ip->i_state & IN_ALLMOD)) { 1789 mutex_enter(&lfs_lock); 1790 LFS_SET_UINO(ip, IN_MODIFIED); 1791 mutex_exit(&lfs_lock); 1792 } 1793 KASSERT(ip->i_number != LFS_IFILE_INUM); 1794 error2 = lfs_writeinode(fs, sp, ip); 1795 1796 VOP_UNLOCK(vp); 1797 vrele(vp); 1798 1799 if (error == EAGAIN || error2 == EAGAIN) { 1800 lfs_writeseg(fs, sp); 1801 mutex_enter(&lfs_lock); 1802 break; 1803 } 1804 mutex_enter(&lfs_lock); 1805 } 1806 mutex_exit(&lfs_lock); 1807 (void) lfs_writeseg(fs, sp); 1808 lfs_segunlock(fs); 1809 1810 return 0; 1811 } 1812 1813 /* 1814 * Conversion for compat. 1815 */ 1816 static void 1817 block_info_from_70(BLOCK_INFO *bi, const BLOCK_INFO_70 *bi70) 1818 { 1819 bi->bi_inode = bi70->bi_inode; 1820 bi->bi_lbn = bi70->bi_lbn; 1821 bi->bi_daddr = bi70->bi_daddr; 1822 bi->bi_segcreate = bi70->bi_segcreate; 1823 bi->bi_version = bi70->bi_version; 1824 bi->bi_bp = bi70->bi_bp; 1825 bi->bi_size = bi70->bi_size; 1826 } 1827 1828 static void 1829 block_info_to_70(BLOCK_INFO_70 *bi70, const BLOCK_INFO *bi) 1830 { 1831 bi70->bi_inode = bi->bi_inode; 1832 bi70->bi_lbn = bi->bi_lbn; 1833 bi70->bi_daddr = bi->bi_daddr; 1834 bi70->bi_segcreate = bi->bi_segcreate; 1835 bi70->bi_version = bi->bi_version; 1836 bi70->bi_bp = bi->bi_bp; 1837 bi70->bi_size = bi->bi_size; 1838 } 1839 1840 /* 1841 * Provide a fcntl interface to sys_lfs_{segwait,bmapv,markv}. 1842 */ 1843 int 1844 lfs_fcntl(void *v) 1845 { 1846 struct vop_fcntl_args /* { 1847 struct vnode *a_vp; 1848 u_int a_command; 1849 void * a_data; 1850 int a_fflag; 1851 kauth_cred_t a_cred; 1852 } */ *ap = v; 1853 struct timeval tv; 1854 struct timeval *tvp; 1855 struct timeval50 *tvp50; 1856 BLOCK_INFO *blkiov; 1857 BLOCK_INFO_70 *blkiov70; 1858 CLEANERINFO *cip; 1859 CLEANERINFO64 ci; 1860 SEGUSE *sup, *sua; 1861 int blkcnt, i, error; 1862 size_t fh_size; 1863 struct lfs_fcntl_markv blkvp; 1864 struct lfs_fcntl_markv_70 blkvp70; 1865 struct lfs_inode_array inotbl; 1866 struct lfs_segnum_array snap; 1867 struct lfs_filestat_req lfr; 1868 struct lfs_write_stats lws; 1869 struct lfs_filestats *fss; 1870 struct lfs_seguse_array suap; 1871 struct lwp *l; 1872 fsid_t *fsidp; 1873 struct lfs *fs; 1874 struct buf *bp; 1875 fhandle_t *fhp; 1876 daddr_t off; 1877 int oclean, *sna, direct, offset; 1878 ino_t *inoa; 1879 bool scramble; 1880 ino_t maxino; 1881 1882 /* Only respect LFS fcntls on fs root or Ifile */ 1883 if (VTOI(ap->a_vp)->i_number != ULFS_ROOTINO && 1884 VTOI(ap->a_vp)->i_number != LFS_IFILE_INUM) { 1885 return genfs_fcntl(v); 1886 } 1887 1888 /* Avoid locking a draining lock */ 1889 if (ap->a_vp->v_mount->mnt_iflag & IMNT_UNMOUNT) { 1890 return ESHUTDOWN; 1891 } 1892 1893 /* LFS control and monitoring fcntls are available only to root */ 1894 l = curlwp; 1895 if (((ap->a_command & 0xff00) >> 8) == 'L' && 1896 (error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS, 1897 KAUTH_REQ_SYSTEM_LFS_FCNTL, NULL, NULL, NULL)) != 0) 1898 return (error); 1899 1900 fs = VTOI(ap->a_vp)->i_lfs; 1901 fsidp = &ap->a_vp->v_mount->mnt_stat.f_fsidx; 1902 1903 maxino = ((VTOI(fs->lfs_ivnode)->i_size >> lfs_sb_getbshift(fs)) 1904 - lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) 1905 * lfs_sb_getifpb(fs); 1906 1907 error = 0; 1908 switch ((int)ap->a_command) { 1909 case LFCNSEGWAITALL_COMPAT_50: 1910 case LFCNSEGWAITALL_COMPAT: 1911 fsidp = NULL; 1912 /* FALLTHROUGH */ 1913 case LFCNSEGWAIT_COMPAT_50: 1914 case LFCNSEGWAIT_COMPAT: 1915 tvp50 = (struct timeval50 *)ap->a_data; 1916 timeval50_to_timeval(tvp50, &tv); 1917 tvp = &tv; 1918 goto segwait_common; 1919 1920 case LFCNSEGWAITALL: 1921 fsidp = NULL; 1922 /* FALLTHROUGH */ 1923 case LFCNSEGWAIT: 1924 tvp = (struct timeval *)ap->a_data; 1925 segwait_common: 1926 mutex_enter(&lfs_lock); 1927 ++fs->lfs_sleepers; 1928 mutex_exit(&lfs_lock); 1929 1930 error = lfs_segwait(fsidp, tvp); 1931 1932 mutex_enter(&lfs_lock); 1933 if (--fs->lfs_sleepers == 0) 1934 cv_broadcast(&fs->lfs_sleeperscv); 1935 mutex_exit(&lfs_lock); 1936 return error; 1937 1938 case LFCNBMAPV_COMPAT_70: 1939 case LFCNMARKV_COMPAT_70: 1940 blkvp70 = *(struct lfs_fcntl_markv_70 *)ap->a_data; 1941 1942 blkcnt = blkvp70.blkcnt; 1943 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT) 1944 return (EINVAL); 1945 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); 1946 blkiov70 = lfs_malloc(fs, sizeof(BLOCK_INFO_70), LFS_NB_BLKIOV); 1947 for (i = 0; i < blkcnt; i++) { 1948 error = copyin(&blkvp70.blkiov[i], blkiov70, 1949 sizeof(*blkiov70)); 1950 if (error) { 1951 lfs_free(fs, blkiov70, LFS_NB_BLKIOV); 1952 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1953 return error; 1954 } 1955 block_info_from_70(&blkiov[i], blkiov70); 1956 } 1957 1958 mutex_enter(&lfs_lock); 1959 ++fs->lfs_sleepers; 1960 mutex_exit(&lfs_lock); 1961 if (ap->a_command == LFCNBMAPV) 1962 error = lfs_bmapv(l, fsidp, blkiov, blkcnt); 1963 else /* LFCNMARKV */ 1964 error = lfs_markv(l, fsidp, blkiov, blkcnt); 1965 if (error == 0) { 1966 for (i = 0; i < blkcnt; i++) { 1967 block_info_to_70(blkiov70, &blkiov[i]); 1968 error = copyout(blkiov70, &blkvp70.blkiov[i], 1969 sizeof(*blkiov70)); 1970 if (error) { 1971 break; 1972 } 1973 } 1974 } 1975 mutex_enter(&lfs_lock); 1976 if (--fs->lfs_sleepers == 0) 1977 cv_broadcast(&fs->lfs_sleeperscv); 1978 mutex_exit(&lfs_lock); 1979 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1980 return error; 1981 1982 case LFCNBMAPV: 1983 case LFCNMARKV: 1984 blkvp = *(struct lfs_fcntl_markv *)ap->a_data; 1985 1986 blkcnt = blkvp.blkcnt; 1987 if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT) 1988 return (EINVAL); 1989 blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); 1990 if ((error = copyin(blkvp.blkiov, blkiov, 1991 blkcnt * sizeof(BLOCK_INFO))) != 0) { 1992 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 1993 return error; 1994 } 1995 1996 mutex_enter(&lfs_lock); 1997 ++fs->lfs_sleepers; 1998 mutex_exit(&lfs_lock); 1999 if (ap->a_command == LFCNBMAPV) 2000 error = lfs_bmapv(l, fsidp, blkiov, blkcnt); 2001 else /* LFCNMARKV */ 2002 error = lfs_markv(l, fsidp, blkiov, blkcnt); 2003 if (error == 0) 2004 error = copyout(blkiov, blkvp.blkiov, 2005 blkcnt * sizeof(BLOCK_INFO)); 2006 mutex_enter(&lfs_lock); 2007 if (--fs->lfs_sleepers == 0) 2008 cv_broadcast(&fs->lfs_sleeperscv); 2009 mutex_exit(&lfs_lock); 2010 lfs_free(fs, blkiov, LFS_NB_BLKIOV); 2011 return error; 2012 2013 case LFCNRECLAIM: 2014 /* 2015 * Flush dirops and write Ifile, allowing empty segments 2016 * to be immediately reclaimed. 2017 */ 2018 lfs_writer_enter(fs, "pndirop"); 2019 off = lfs_sb_getoffset(fs); 2020 lfs_seglock(fs, SEGM_FORCE_CKP | SEGM_CKP); 2021 lfs_flush_dirops(fs); 2022 LFS_CLEANERINFO(cip, fs, bp); 2023 oclean = lfs_ci_getclean(fs, cip); 2024 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 2025 lfs_segwrite(ap->a_vp->v_mount, SEGM_FORCE_CKP); 2026 fs->lfs_sp->seg_flags |= SEGM_PROT; 2027 /* Copy out write stats */ 2028 lws.direct = 0; 2029 lws.offset = lfs_btofsb(fs, fs->lfs_sp->bytes_written); 2030 *(struct lfs_write_stats *)ap->a_data = lws; 2031 lfs_segunlock(fs); 2032 lfs_writer_leave(fs); 2033 2034 #ifdef DEBUG 2035 LFS_CLEANERINFO(cip, fs, bp); 2036 DLOG((DLOG_CLEAN, "lfs_fcntl: reclaim wrote %" PRId64 2037 " blocks, cleaned %" PRId32 " segments (activesb %d)\n", 2038 lfs_sb_getoffset(fs) - off, 2039 lfs_ci_getclean(fs, cip) - oclean, 2040 fs->lfs_activesb)); 2041 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); 2042 #else 2043 __USE(oclean); 2044 __USE(off); 2045 #endif 2046 2047 return 0; 2048 2049 case LFCNIFILEFH_COMPAT: 2050 /* Return the filehandle of the Ifile */ 2051 if ((error = kauth_authorize_system(l->l_cred, 2052 KAUTH_SYSTEM_FILEHANDLE, 2053 0, NULL, NULL, NULL)) != 0) 2054 return (error); 2055 fhp = (struct fhandle *)ap->a_data; 2056 fhp->fh_fsid = *fsidp; 2057 fh_size = 16; /* former VFS_MAXFIDSIZ */ 2058 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 2059 2060 case LFCNIFILEFH_COMPAT2: 2061 case LFCNIFILEFH: 2062 /* Return the filehandle of the Ifile */ 2063 fhp = (struct fhandle *)ap->a_data; 2064 fhp->fh_fsid = *fsidp; 2065 fh_size = sizeof(struct lfs_fhandle) - 2066 offsetof(fhandle_t, fh_fid); 2067 return lfs_vptofh(fs->lfs_ivnode, &(fhp->fh_fid), &fh_size); 2068 2069 case LFCNREWIND: 2070 /* Move lfs_offset to the lowest-numbered segment */ 2071 return lfs_rewind(fs, *(int *)ap->a_data); 2072 2073 case LFCNINVAL: 2074 /* Mark a segment SEGUSE_INVAL */ 2075 return lfs_invalidate(fs, *(int *)ap->a_data); 2076 2077 case LFCNRESIZE: 2078 /* Resize the filesystem */ 2079 return lfs_resize_fs(fs, *(int *)ap->a_data); 2080 2081 case LFCNWRAPSTOP: 2082 case LFCNWRAPSTOP_COMPAT: 2083 /* 2084 * Hold lfs_newseg at segment 0; if requested, sleep until 2085 * the filesystem wraps around. To support external agents 2086 * (dump, fsck-based regression test) that need to look at 2087 * a snapshot of the filesystem, without necessarily 2088 * requiring that all fs activity stops. 2089 */ 2090 if (fs->lfs_stoplwp == curlwp) 2091 return EALREADY; 2092 2093 mutex_enter(&lfs_lock); 2094 while (fs->lfs_stoplwp != NULL) 2095 cv_wait(&fs->lfs_stopcv, &lfs_lock); 2096 fs->lfs_stoplwp = curlwp; 2097 if (fs->lfs_nowrap == 0) 2098 log(LOG_NOTICE, "%s: disabled log wrap\n", 2099 lfs_sb_getfsmnt(fs)); 2100 ++fs->lfs_nowrap; 2101 if (*(int *)ap->a_data == 1 2102 || ap->a_command == LFCNWRAPSTOP_COMPAT) { 2103 log(LOG_NOTICE, "LFCNSTOPWRAP waiting for log wrap\n"); 2104 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 2105 "segwrap", 0, &lfs_lock); 2106 log(LOG_NOTICE, "LFCNSTOPWRAP done waiting\n"); 2107 if (error) { 2108 lfs_wrapgo(fs, VTOI(ap->a_vp), 0); 2109 } 2110 } 2111 mutex_exit(&lfs_lock); 2112 return 0; 2113 2114 case LFCNWRAPGO: 2115 case LFCNWRAPGO_COMPAT: 2116 /* 2117 * Having done its work, the agent wakes up the writer. 2118 * If the argument is 1, it sleeps until a new segment 2119 * is selected. 2120 */ 2121 mutex_enter(&lfs_lock); 2122 error = lfs_wrapgo(fs, VTOI(ap->a_vp), 2123 ap->a_command == LFCNWRAPGO_COMPAT ? 1 : 2124 *((int *)ap->a_data)); 2125 mutex_exit(&lfs_lock); 2126 return error; 2127 2128 case LFCNWRAPPASS: 2129 if ((VTOI(ap->a_vp)->i_lfs_iflags & LFSI_WRAPWAIT)) 2130 return EALREADY; 2131 mutex_enter(&lfs_lock); 2132 if (fs->lfs_stoplwp != curlwp) { 2133 mutex_exit(&lfs_lock); 2134 return EALREADY; 2135 } 2136 if (fs->lfs_nowrap == 0) { 2137 mutex_exit(&lfs_lock); 2138 return EBUSY; 2139 } 2140 fs->lfs_wrappass = 1; 2141 wakeup(&fs->lfs_wrappass); 2142 /* Wait for the log to wrap, if asked */ 2143 if (*(int *)ap->a_data) { 2144 vref(ap->a_vp); 2145 VTOI(ap->a_vp)->i_lfs_iflags |= LFSI_WRAPWAIT; 2146 log(LOG_NOTICE, "LFCNPASS waiting for log wrap\n"); 2147 error = mtsleep(&fs->lfs_nowrap, PCATCH | PUSER, 2148 "segwrap", 0, &lfs_lock); 2149 log(LOG_NOTICE, "LFCNPASS done waiting\n"); 2150 VTOI(ap->a_vp)->i_lfs_iflags &= ~LFSI_WRAPWAIT; 2151 vrele(ap->a_vp); 2152 } 2153 mutex_exit(&lfs_lock); 2154 return error; 2155 2156 case LFCNWRAPSTATUS: 2157 mutex_enter(&lfs_lock); 2158 *(int *)ap->a_data = fs->lfs_wrapstatus; 2159 mutex_exit(&lfs_lock); 2160 return 0; 2161 2162 case LFCNFILESTATS: 2163 /* Retrieve fragmentation statistics from these inodes */ 2164 lfr = *(struct lfs_filestat_req *)ap->a_data; 2165 if (lfr.len < 0 || lfr.len > LFS_FILESTATS_MAXCNT) 2166 return EINVAL; 2167 if (lfr.ino < LFS_IFILE_INUM || lfr.len < 1 2168 || lfr.ino >= maxino || lfr.ino + lfr.len >= maxino) 2169 return EINVAL; 2170 fss = lfs_malloc(fs, lfr.len * sizeof(*fss), LFS_NB_BLKIOV); 2171 if ((error = copyin(lfr.fss, fss, 2172 lfr.len * sizeof(*fss))) != 0) { 2173 lfs_free(fs, fss, LFS_NB_BLKIOV); 2174 return error; 2175 } 2176 2177 for (i = 0; i < lfr.len; ++i) { 2178 error = lfs_filestats(fs, lfr.ino + i, &fss[i]); 2179 if (error == ENOENT) 2180 error = 0; 2181 if (error) 2182 break; 2183 } 2184 2185 if (error == 0) 2186 error = copyout(fss, lfr.fss, lfr.len * sizeof(*fss)); 2187 2188 lfs_free(fs, fss, LFS_NB_BLKIOV); 2189 return error; 2190 2191 case LFCNREWRITESEGS: 2192 /* Rewrite (clean) the listed segments */ 2193 snap = *(struct lfs_segnum_array *)ap->a_data; 2194 if (snap.len > LFS_REWRITE_MAXCNT) 2195 return EINVAL; 2196 sna = lfs_malloc(fs, snap.len * sizeof(int), LFS_NB_BLKIOV); 2197 if ((error = copyin(snap.segments, sna, 2198 snap.len * sizeof(int))) != 0) { 2199 lfs_free(fs, sna, LFS_NB_BLKIOV); 2200 return error; 2201 } 2202 2203 for (i = 0; i < snap.len; i++) 2204 if (sna[i] < 0 || sna[i] >= lfs_sb_getnseg(fs)) 2205 return EINVAL; 2206 2207 direct = offset = 0; 2208 error = lfs_rewrite_segments(fs, sna, snap.len, &direct, 2209 &offset, curlwp); 2210 lfs_free(fs, sna, LFS_NB_BLKIOV); 2211 2212 /* Copy out write stats */ 2213 snap.stats.direct = direct; 2214 snap.stats.offset = offset; 2215 *(struct lfs_write_stats *) 2216 &(((struct lfs_segnum_array *)ap->a_data)->stats) 2217 = snap.stats; 2218 return error; 2219 2220 case LFCNREWRITEFILE: 2221 case LFCNSCRAMBLE: 2222 /* Rewrite (coalesce) the listed inodes */ 2223 scramble = ((int)ap->a_command == LFCNSCRAMBLE); 2224 inotbl = *(struct lfs_inode_array *)ap->a_data; 2225 if (inotbl.len > LFS_REWRITE_MAXCNT) 2226 return EINVAL; 2227 inoa = lfs_malloc(fs, inotbl.len * sizeof(ino_t), 2228 LFS_NB_BLKIOV); 2229 if ((error = copyin(inotbl.inodes, inoa, 2230 inotbl.len * sizeof(ino_t))) != 0) { 2231 lfs_free(fs, inoa, LFS_NB_BLKIOV); 2232 return error; 2233 } 2234 2235 for (i = 0; i < inotbl.len; i++) { 2236 if (inoa[i] <= LFS_IFILE_INUM || inoa[i] >= maxino) 2237 return EINVAL; 2238 } 2239 2240 direct = offset = 0; 2241 error = lfs_rewrite_file(fs, inoa, inotbl.len, scramble, 2242 &direct, &offset); 2243 lfs_free(fs, inoa, LFS_NB_BLKIOV); 2244 2245 /* Copy out write stats */ 2246 inotbl.stats.direct = direct; 2247 inotbl.stats.offset = offset; 2248 *(struct lfs_write_stats *) 2249 &(((struct lfs_inode_array *)ap->a_data)->stats) 2250 = inotbl.stats; 2251 2252 return error; 2253 2254 case LFCNCLEANERINFO: 2255 /* 2256 * Get current CLEANERINFO information. 2257 */ 2258 memset(&ci, 0, sizeof ci); 2259 ci.clean = lfs_sb_getnclean(fs); 2260 ci.dirty = lfs_sb_getnseg(fs) - lfs_sb_getnclean(fs); 2261 ci.bfree = lfs_sb_getbfree(fs); 2262 ci.avail = lfs_sb_getavail(fs) - fs->lfs_ravail 2263 - fs->lfs_favail; 2264 LFS_CLEANERINFO(cip, fs, bp); 2265 ci.flags = lfs_ci_getflags(fs, cip); 2266 brelse(bp, 0); 2267 *(CLEANERINFO64 *)ap->a_data = ci; 2268 2269 return 0; 2270 2271 case LFCNSEGUSE: 2272 /* 2273 * Retrieve SEGUSE information for one or more segments. 2274 */ 2275 if (lfs_sb_getversion(fs) == 1) 2276 return EINVAL; 2277 suap = *(struct lfs_seguse_array *)ap->a_data; 2278 if (suap.start < 0 2279 /* || suap.len < 0 */ 2280 || suap.len > LFS_SEGUSE_MAXCNT 2281 || suap.start >= lfs_sb_getnseg(fs)) 2282 return EINVAL; 2283 if (suap.start + suap.len >= lfs_sb_getnseg(fs)) { 2284 suap.len = lfs_sb_getnseg(fs) - suap.start; 2285 *(struct lfs_seguse_array *)ap->a_data = suap; 2286 } 2287 sua = lfs_malloc(fs, suap.len * sizeof *sua, LFS_NB_BLKIOV); 2288 2289 for (i = 0; i < suap.len; i++) { 2290 LFS_SEGENTRY(sup, fs, suap.start + i, bp); 2291 memcpy(sua + i, sup, sizeof(*sup)); 2292 brelse(bp, 0); 2293 } 2294 2295 error = copyout(sua, suap.seguse, suap.len * sizeof *sua); 2296 lfs_free(fs, sua, LFS_NB_BLKIOV); 2297 return error; 2298 2299 default: 2300 return genfs_fcntl(v); 2301 } 2302 return 0; 2303 } 2304 2305 /* 2306 * Report continuity statistics for this file. Two measures are provided: 2307 * the number of discontinuities, and the total length, in fragment units, 2308 * of all the gaps between contiguously allocated file extents. Only 2309 * direct blocks are considered. 2310 * 2311 * A single-block file will show zero for both measures, as will any file 2312 * that fits completely within its partial-segment. In general, the minimum 2313 * discontinuity count for any files will be N-1, where N is the number 2314 * of segments required to store the file, rounded up; and the minimum 2315 * total gap length will also be N, with only the partial-segment headers 2316 * breaking up the file data (indirect blocks are written at the end). 2317 * 2318 * Some files will be too large to be written in their entirety without 2319 * a checkpoint in the middle; those will have a higher minimum total gap 2320 * measure but about the same discountinuity count. 2321 * 2322 * The coalescing cleaner will use these statistics to identify files that 2323 * need to be rewritten to be contiguous on disk. 2324 */ 2325 static int 2326 lfs_filestats(struct lfs *fs, ino_t ino, struct lfs_filestats *lfp) 2327 { 2328 int error, step, run; 2329 daddr_t lbn, odaddr, daddr, diff, hiblk; 2330 struct vnode *vp; 2331 struct inode *ip; 2332 2333 memset(lfp, 0, sizeof(*lfp)); 2334 lfp->ino = ino; 2335 2336 /* Contiguous blocks will be this far apart */ 2337 step = lfs_sb_getbsize(fs) >> DEV_BSHIFT; 2338 2339 error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_SHARED, &vp); 2340 if (error) 2341 return error; 2342 ip = VTOI(vp); 2343 2344 /* Highest block in this inode */ 2345 hiblk = lfs_lblkno(fs, ip->i_size + lfs_sb_getbsize(fs) - 1) - 1; 2346 lfp->nblk = 0; 2347 2348 odaddr = 0x0; 2349 for (lbn = 0; lbn <= hiblk; ++lbn) { 2350 error = VOP_BMAP(vp, lbn, NULL, &daddr, &run); 2351 if (error) 2352 break; 2353 2354 /* Count all blocks */ 2355 if (daddr > 0) 2356 lfp->nblk += (run + 1); 2357 2358 /* Holes and yet-unwritten data only count once */ 2359 if (daddr == odaddr && daddr <= 0) 2360 continue; 2361 2362 /* Count any discontinuities */ 2363 if (lbn > 0 && daddr != odaddr + step) { 2364 ++lfp->dc_count; 2365 diff = daddr - odaddr; 2366 if (diff < 0) 2367 diff = -diff; 2368 lfp->dc_sum += diff; 2369 } 2370 lbn += run; 2371 odaddr = daddr + run * step; 2372 } 2373 VOP_UNLOCK(vp); 2374 vrele(vp); 2375 2376 return 0; 2377 } 2378 2379 /* 2380 * Rewrite a file in its entirety. 2381 * 2382 * Generally this would be done to coalesce a file that is scattered 2383 * around the disk; but if the "scramble" flag is set, instead rewrite 2384 * only the even-numbered blocks, which provides the opposite effect 2385 * for testing purposes. 2386 * 2387 * It is the caller's responsibility to check the bounds of the inode 2388 * numbers. 2389 */ 2390 static int 2391 lfs_rewrite_file(struct lfs *fs, ino_t *inoa, int len, bool scramble, 2392 int *directp, int *offsetp) 2393 { 2394 daddr_t hiblk, lbn; 2395 struct vnode *vp; 2396 struct inode *ip; 2397 struct buf *bp; 2398 int i, error, flags; 2399 2400 *directp = 0; 2401 if ((error = lfs_cleanerlock(fs)) != 0) 2402 return error; 2403 flags = SEGM_PROT; 2404 lfs_seglock(fs, flags); 2405 for (i = 0; i < len; ++i) { 2406 error = VFS_VGET(fs->lfs_ivnode->v_mount, inoa[i], LK_EXCLUSIVE, &vp); 2407 if (error) 2408 goto out; 2409 2410 ip = VTOI(vp); 2411 if ((vp->v_uflag & VU_DIROP) || (ip->i_flags & IN_ADIROP)) { 2412 VOP_UNLOCK(vp); 2413 vrele(vp); 2414 error = EAGAIN; 2415 goto out; 2416 } 2417 2418 /* Highest block in this inode */ 2419 hiblk = lfs_lblkno(fs, ip->i_size + lfs_sb_getbsize(fs) - 1) - 1; 2420 2421 for (lbn = 0; lbn <= hiblk; ++lbn) { 2422 if (scramble && (lbn & 0x01)) 2423 continue; 2424 2425 if (lfs_needsflush(fs)) { 2426 lfs_segwrite(fs->lfs_ivnode->v_mount, flags); 2427 } 2428 2429 error = bread(vp, lbn, lfs_blksize(fs, ip, lbn), 0, &bp); 2430 if (error) 2431 break; 2432 2433 /* bp->b_cflags |= BC_INVAL; */ 2434 lfs_bwrite_ext(bp, (flags & SEGM_CLEAN ? BW_CLEAN : 0)); 2435 *directp += lfs_btofsb(fs, bp->b_bcount); 2436 } 2437 2438 /* Done with this vnode */ 2439 VOP_UNLOCK(vp); 2440 vrele(vp); 2441 if (error) 2442 break; 2443 } 2444 out: 2445 lfs_segwrite(fs->lfs_ivnode->v_mount, flags); 2446 *offsetp += lfs_btofsb(fs, fs->lfs_sp->bytes_written); 2447 lfs_segunlock(fs); 2448 lfs_cleanerunlock(fs); 2449 2450 return error; 2451 } 2452 2453 /* 2454 * Return the last logical file offset that should be written for this file 2455 * if we're doing a write that ends at "size". If writing, we need to know 2456 * about sizes on disk, i.e. fragments if there are any; if reading, we need 2457 * to know about entire blocks. 2458 */ 2459 void 2460 lfs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags) 2461 { 2462 struct inode *ip = VTOI(vp); 2463 struct lfs *fs = ip->i_lfs; 2464 daddr_t olbn, nlbn; 2465 2466 olbn = lfs_lblkno(fs, ip->i_size); 2467 nlbn = lfs_lblkno(fs, size); 2468 if (!(flags & GOP_SIZE_MEM) && nlbn < ULFS_NDADDR && olbn <= nlbn) { 2469 *eobp = lfs_fragroundup(fs, size); 2470 } else { 2471 *eobp = lfs_blkroundup(fs, size); 2472 } 2473 } 2474 2475 #ifdef DEBUG 2476 void lfs_dump_vop(void *); 2477 2478 void 2479 lfs_dump_vop(void *v) 2480 { 2481 struct vop_putpages_args /* { 2482 struct vnode *a_vp; 2483 voff_t a_offlo; 2484 voff_t a_offhi; 2485 int a_flags; 2486 } */ *ap = v; 2487 2488 struct inode *ip = VTOI(ap->a_vp); 2489 struct lfs *fs = ip->i_lfs; 2490 2491 #ifdef DDB 2492 vfs_vnode_print(ap->a_vp, 0, printf); 2493 #endif 2494 lfs_dump_dinode(fs, ip->i_din); 2495 } 2496 #endif 2497 2498 int 2499 lfs_mmap(void *v) 2500 { 2501 struct vop_mmap_args /* { 2502 const struct vnodeop_desc *a_desc; 2503 struct vnode *a_vp; 2504 vm_prot_t a_prot; 2505 kauth_cred_t a_cred; 2506 } */ *ap = v; 2507 2508 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM) 2509 return EOPNOTSUPP; 2510 return genfs_mmap(v); 2511 } 2512 2513 static int 2514 lfs_openextattr(void *v) 2515 { 2516 struct vop_openextattr_args /* { 2517 struct vnode *a_vp; 2518 kauth_cred_t a_cred; 2519 struct proc *a_p; 2520 } */ *ap = v; 2521 struct vnode *vp = ap->a_vp; 2522 struct inode *ip; 2523 struct ulfsmount *ump; 2524 2525 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 2526 2527 ip = VTOI(vp); 2528 ump = ip->i_ump; 2529 2530 /* Not supported for ULFS1 file systems. */ 2531 if (ump->um_fstype == ULFS1) 2532 return (EOPNOTSUPP); 2533 2534 /* XXX Not implemented for ULFS2 file systems. */ 2535 return (EOPNOTSUPP); 2536 } 2537 2538 static int 2539 lfs_closeextattr(void *v) 2540 { 2541 struct vop_closeextattr_args /* { 2542 struct vnode *a_vp; 2543 int a_commit; 2544 kauth_cred_t a_cred; 2545 struct proc *a_p; 2546 } */ *ap = v; 2547 struct vnode *vp = ap->a_vp; 2548 struct inode *ip; 2549 struct ulfsmount *ump; 2550 2551 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 2552 2553 ip = VTOI(vp); 2554 ump = ip->i_ump; 2555 2556 /* Not supported for ULFS1 file systems. */ 2557 if (ump->um_fstype == ULFS1) 2558 return (EOPNOTSUPP); 2559 2560 /* XXX Not implemented for ULFS2 file systems. */ 2561 return (EOPNOTSUPP); 2562 } 2563 2564 static int 2565 lfs_getextattr(void *v) 2566 { 2567 struct vop_getextattr_args /* { 2568 struct vnode *a_vp; 2569 int a_attrnamespace; 2570 const char *a_name; 2571 struct uio *a_uio; 2572 size_t *a_size; 2573 kauth_cred_t a_cred; 2574 struct proc *a_p; 2575 } */ *ap = v; 2576 struct vnode *vp = ap->a_vp; 2577 struct inode *ip; 2578 struct ulfsmount *ump; 2579 int error; 2580 2581 KASSERT(VOP_ISLOCKED(vp)); 2582 2583 ip = VTOI(vp); 2584 ump = ip->i_ump; 2585 2586 if (ump->um_fstype == ULFS1) { 2587 #ifdef LFS_EXTATTR 2588 error = ulfs_getextattr(ap); 2589 #else 2590 error = EOPNOTSUPP; 2591 #endif 2592 return error; 2593 } 2594 2595 /* XXX Not implemented for ULFS2 file systems. */ 2596 return (EOPNOTSUPP); 2597 } 2598 2599 static int 2600 lfs_setextattr(void *v) 2601 { 2602 struct vop_setextattr_args /* { 2603 struct vnode *a_vp; 2604 int a_attrnamespace; 2605 const char *a_name; 2606 struct uio *a_uio; 2607 kauth_cred_t a_cred; 2608 struct proc *a_p; 2609 } */ *ap = v; 2610 struct vnode *vp = ap->a_vp; 2611 struct inode *ip; 2612 struct ulfsmount *ump; 2613 int error; 2614 2615 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 2616 2617 ip = VTOI(vp); 2618 ump = ip->i_ump; 2619 2620 if (ump->um_fstype == ULFS1) { 2621 #ifdef LFS_EXTATTR 2622 error = ulfs_setextattr(ap); 2623 #else 2624 error = EOPNOTSUPP; 2625 #endif 2626 return error; 2627 } 2628 2629 /* XXX Not implemented for ULFS2 file systems. */ 2630 return (EOPNOTSUPP); 2631 } 2632 2633 static int 2634 lfs_listextattr(void *v) 2635 { 2636 struct vop_listextattr_args /* { 2637 struct vnode *a_vp; 2638 int a_attrnamespace; 2639 struct uio *a_uio; 2640 size_t *a_size; 2641 kauth_cred_t a_cred; 2642 struct proc *a_p; 2643 } */ *ap = v; 2644 struct vnode *vp = ap->a_vp; 2645 struct inode *ip; 2646 struct ulfsmount *ump; 2647 int error; 2648 2649 KASSERT(VOP_ISLOCKED(vp)); 2650 2651 ip = VTOI(vp); 2652 ump = ip->i_ump; 2653 2654 if (ump->um_fstype == ULFS1) { 2655 #ifdef LFS_EXTATTR 2656 error = ulfs_listextattr(ap); 2657 #else 2658 error = EOPNOTSUPP; 2659 #endif 2660 return error; 2661 } 2662 2663 /* XXX Not implemented for ULFS2 file systems. */ 2664 return (EOPNOTSUPP); 2665 } 2666 2667 static int 2668 lfs_deleteextattr(void *v) 2669 { 2670 struct vop_deleteextattr_args /* { 2671 struct vnode *a_vp; 2672 int a_attrnamespace; 2673 kauth_cred_t a_cred; 2674 struct proc *a_p; 2675 } */ *ap = v; 2676 struct vnode *vp = ap->a_vp; 2677 struct inode *ip; 2678 struct ulfsmount *ump; 2679 int error; 2680 2681 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 2682 2683 ip = VTOI(vp); 2684 ump = ip->i_ump; 2685 2686 if (ump->um_fstype == ULFS1) { 2687 #ifdef LFS_EXTATTR 2688 error = ulfs_deleteextattr(ap); 2689 #else 2690 error = EOPNOTSUPP; 2691 #endif 2692 return error; 2693 } 2694 2695 /* XXX Not implemented for ULFS2 file systems. */ 2696 return (EOPNOTSUPP); 2697 } 2698 2699