1 /* $NetBSD: segwrite.c,v 1.50 2026/01/05 05:02:47 perseant Exp $ */ 2 /*- 3 * Copyright (c) 2003 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Konrad E. Schroder <perseant (at) hhhh.org>. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 /* 31 * Copyright (c) 1991, 1993 32 * The Regents of the University of California. All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. Neither the name of the University nor the names of its contributors 43 * may be used to endorse or promote products derived from this software 44 * without specific prior written permission. 45 * 46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56 * SUCH DAMAGE. 57 * 58 * @(#)lfs_segment.c 8.10 (Berkeley) 6/10/95 59 */ 60 61 /* 62 * Partial segment writer, taken from the kernel and adapted for userland. 63 */ 64 #include <sys/types.h> 65 #include <sys/param.h> 66 #include <sys/time.h> 67 #include <sys/buf.h> 68 #include <sys/mount.h> 69 70 /* Override certain things to make <ufs/lfs/lfs.h> work */ 71 #define VU_DIROP 0x01000000 /* XXX XXX from sys/vnode.h */ 72 #define vnode uvnode 73 #define buf ubuf 74 #define panic call_panic 75 76 #include <ufs/lfs/lfs.h> 77 #include <ufs/lfs/lfs_accessors.h> 78 #include <ufs/lfs/lfs_inode.h> 79 80 #include <assert.h> 81 #include <stdio.h> 82 #include <stdlib.h> 83 #include <string.h> 84 #include <err.h> 85 #include <errno.h> 86 #include <util.h> 87 88 #include "bufcache.h" 89 #include "extern.h" 90 #include "lfs_user.h" 91 #include "segwrite.h" 92 93 /* Compatibility definitions */ 94 off_t written_bytes = 0; 95 off_t written_data = 0; 96 off_t written_indir = 0; 97 off_t written_dev = 0; 98 int written_inodes = 0; 99 100 /* Global variables */ 101 time_t write_time; 102 103 static void lfs_shellsort(struct lfs *, 104 struct ubuf **, union lfs_blocks *, int, int); 105 106 /* 107 * Logical block number match routines used when traversing the dirty block 108 * chain. 109 */ 110 int 111 lfs_match_data(struct lfs * fs, struct ubuf * bp) 112 { 113 return (bp->b_lblkno >= 0); 114 } 115 116 int 117 lfs_match_indir(struct lfs * fs, struct ubuf * bp) 118 { 119 daddr_t lbn; 120 121 lbn = bp->b_lblkno; 122 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 0); 123 } 124 125 int 126 lfs_match_dindir(struct lfs * fs, struct ubuf * bp) 127 { 128 daddr_t lbn; 129 130 lbn = bp->b_lblkno; 131 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 1); 132 } 133 134 int 135 lfs_match_tindir(struct lfs * fs, struct ubuf * bp) 136 { 137 daddr_t lbn; 138 139 lbn = bp->b_lblkno; 140 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 2); 141 } 142 143 /* 144 * Do a checkpoint. 145 */ 146 int 147 lfs_segwrite(struct lfs * fs, int flags) 148 { 149 struct inode *ip; 150 struct segment *sp; 151 struct uvnode *vp; 152 SEGSUM *ssp; 153 int redo; 154 155 lfs_seglock(fs, flags | SEGM_CKP); 156 sp = fs->lfs_sp; 157 158 lfs_writevnodes(fs, sp, VN_REG); 159 lfs_writevnodes(fs, sp, VN_DIROP); 160 ssp = (SEGSUM *)sp->segsum; 161 lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) & ~(SS_CONT)); 162 163 do { 164 vp = fs->lfs_ivnode; 165 fs->lfs_flags &= ~LFS_IFDIRTY; 166 ip = VTOI(vp); 167 if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL || lfs_sb_getidaddr(fs) <= 0) 168 lfs_writefile(fs, sp, vp); 169 170 redo = lfs_writeinode(fs, sp, ip); 171 redo += lfs_writeseg(fs, sp); 172 redo += (fs->lfs_flags & LFS_IFDIRTY); 173 } while (redo); 174 175 lfs_segunlock(fs); 176 #if 0 177 printf("wrote %" PRId64 " bytes (%" PRId32 " fsb)\n", 178 written_bytes, (ulfs_daddr_t)lfs_btofsb(fs, written_bytes)); 179 printf("wrote %" PRId64 " bytes data (%" PRId32 " fsb)\n", 180 written_data, (ulfs_daddr_t)lfs_btofsb(fs, written_data)); 181 printf("wrote %" PRId64 " bytes indir (%" PRId32 " fsb)\n", 182 written_indir, (ulfs_daddr_t)lfs_btofsb(fs, written_indir)); 183 printf("wrote %" PRId64 " bytes dev (%" PRId32 " fsb)\n", 184 written_dev, (ulfs_daddr_t)lfs_btofsb(fs, written_dev)); 185 printf("wrote %d inodes (%" PRId32 " fsb)\n", 186 written_inodes, lfs_btofsb(fs, written_inodes * fs->lfs_ibsize)); 187 #endif 188 return 0; 189 } 190 191 /* 192 * Write the dirty blocks associated with a vnode. 193 */ 194 void 195 lfs_writefile(struct lfs * fs, struct segment * sp, struct uvnode * vp) 196 { 197 struct ubuf *bp; 198 FINFO *fip; 199 struct inode *ip; 200 IFILE *ifp; 201 SEGSUM *ssp; 202 203 ip = VTOI(vp); 204 205 if (sp->seg_bytes_left < lfs_sb_getbsize(fs) || 206 sp->sum_bytes_left < FINFOSIZE(fs) + LFS_BLKPTRSIZE(fs)) 207 (void) lfs_writeseg(fs, sp); 208 209 sp->sum_bytes_left -= FINFOSIZE(fs); 210 ssp = (SEGSUM *)sp->segsum; 211 lfs_ss_setnfinfo(fs, ssp, lfs_ss_getnfinfo(fs, ssp) + 1); 212 213 if (vp->v_uflag & VU_DIROP) { 214 lfs_ss_setflags(fs, ssp, 215 lfs_ss_getflags(fs, ssp) | (SS_DIROP | SS_CONT)); 216 } 217 218 fip = sp->fip; 219 lfs_fi_setnblocks(fs, fip, 0); 220 lfs_fi_setino(fs, fip, ip->i_number); 221 LFS_IENTRY(ifp, fs, lfs_fi_getino(fs, fip), bp); 222 lfs_fi_setversion(fs, fip, lfs_if_getversion(fs, ifp)); 223 brelse(bp, 0); 224 225 lfs_gather(fs, sp, vp, lfs_match_data); 226 lfs_gather(fs, sp, vp, lfs_match_indir); 227 lfs_gather(fs, sp, vp, lfs_match_dindir); 228 lfs_gather(fs, sp, vp, lfs_match_tindir); 229 230 fip = sp->fip; 231 if (lfs_fi_getnblocks(fs, fip) != 0) { 232 sp->fip = NEXT_FINFO(fs, fip); 233 lfs_blocks_fromfinfo(fs, &sp->start_lbp, sp->fip); 234 } else { 235 /* XXX shouldn't this update sp->fip? */ 236 sp->sum_bytes_left += FINFOSIZE(fs); 237 lfs_ss_setnfinfo(fs, ssp, lfs_ss_getnfinfo(fs, ssp) - 1); 238 } 239 } 240 241 int 242 lfs_writeinode(struct lfs * fs, struct segment * sp, struct inode * ip) 243 { 244 struct ubuf *bp, *ibp; 245 union lfs_dinode *cdp; 246 IFILE *ifp; 247 SEGUSE *sup; 248 SEGSUM *ssp; 249 daddr_t daddr; 250 ino_t ino; 251 IINFO *iip; 252 int i, fsb = 0; 253 int redo_ifile = 0; 254 struct timespec ts; 255 int gotblk = 0; 256 257 /* Allocate a new inode block if necessary. */ 258 if ((ip->i_number != LFS_IFILE_INUM || sp->idp == NULL) && 259 sp->ibp == NULL) { 260 /* Allocate a new segment if necessary. */ 261 if (sp->seg_bytes_left < lfs_sb_getibsize(fs) || 262 sp->sum_bytes_left < LFS_BLKPTRSIZE(fs)) 263 (void) lfs_writeseg(fs, sp); 264 265 /* Get next inode block. */ 266 daddr = lfs_sb_getoffset(fs); 267 lfs_sb_addoffset(fs, lfs_btofsb(fs, lfs_sb_getibsize(fs))); 268 sp->ibp = *sp->cbpp++ = 269 getblk(fs->lfs_devvp, LFS_FSBTODB(fs, daddr), 270 lfs_sb_getibsize(fs)); 271 sp->ibp->b_flags |= B_GATHERED; 272 gotblk++; 273 274 /* Zero out inode numbers */ 275 for (i = 0; i < LFS_INOPB(fs); ++i) { 276 union lfs_dinode *tmpdip; 277 278 tmpdip = DINO_IN_BLOCK(fs, sp->ibp->b_data, i); 279 lfs_dino_setinumber(fs, tmpdip, 0); 280 } 281 282 ++sp->start_bpp; 283 lfs_sb_subavail(fs, lfs_btofsb(fs, lfs_sb_getibsize(fs))); 284 /* Set remaining space counters. */ 285 sp->seg_bytes_left -= lfs_sb_getibsize(fs); 286 sp->sum_bytes_left -= LFS_BLKPTRSIZE(fs); 287 288 /* Store the address in the segment summary. */ 289 iip = NTH_IINFO(fs, sp->segsum, sp->ninodes / LFS_INOPB(fs)); 290 lfs_ii_setblock(fs, iip, daddr); 291 } 292 /* Update the inode times and copy the inode onto the inode page. */ 293 ts.tv_nsec = 0; 294 ts.tv_sec = write_time; 295 /* XXX kludge --- don't redirty the ifile just to put times on it */ 296 if (ip->i_number != LFS_IFILE_INUM) 297 LFS_ITIMES(ip, &ts, &ts, &ts); 298 299 /* 300 * If this is the Ifile, and we've already written the Ifile in this 301 * partial segment, just overwrite it (it's not on disk yet) and 302 * continue. 303 * 304 * XXX we know that the bp that we get the second time around has 305 * already been gathered. 306 */ 307 if (ip->i_number == LFS_IFILE_INUM && sp->idp) { 308 lfs_copy_dinode(fs, sp->idp, ip->i_din); 309 ip->i_lfs_osize = lfs_dino_getsize(fs, ip->i_din); 310 return 0; 311 } 312 bp = sp->ibp; 313 cdp = DINO_IN_BLOCK(fs, bp->b_data, sp->ninodes % LFS_INOPB(fs)); 314 lfs_copy_dinode(fs, cdp, ip->i_din); 315 316 /* If all blocks are goig to disk, update the "size on disk" */ 317 ip->i_lfs_osize = lfs_dino_getsize(fs, ip->i_din); 318 319 if (ip->i_number == LFS_IFILE_INUM) /* We know sp->idp == NULL */ 320 sp->idp = DINO_IN_BLOCK(fs, bp->b_data, sp->ninodes % LFS_INOPB(fs)); 321 if (gotblk) { 322 LFS_LOCK_BUF(bp); 323 assert(!(bp->b_flags & B_INVAL)); 324 brelse(bp, 0); 325 } 326 /* Increment inode count in segment summary block. */ 327 ssp = (SEGSUM *)sp->segsum; 328 lfs_ss_setninos(fs, ssp, lfs_ss_getninos(fs, ssp) + 1); 329 330 /* If this page is full, set flag to allocate a new page. */ 331 if (++sp->ninodes % LFS_INOPB(fs) == 0) 332 sp->ibp = NULL; 333 334 /* 335 * If updating the ifile, update the super-block. Update the disk 336 * address for this inode in the ifile. 337 */ 338 ino = ip->i_number; 339 if (ino == LFS_IFILE_INUM) { 340 daddr = lfs_sb_getidaddr(fs); 341 lfs_sb_setidaddr(fs, LFS_DBTOFSB(fs, bp->b_blkno)); 342 sbdirty(); 343 } else { 344 LFS_IENTRY(ifp, fs, ino, ibp); 345 daddr = lfs_if_getdaddr(fs, ifp); 346 lfs_if_setdaddr(fs, ifp, LFS_DBTOFSB(fs, bp->b_blkno) + fsb); 347 (void)LFS_BWRITE_LOG(ibp); /* Ifile */ 348 } 349 350 /* 351 * Account the inode: it no longer belongs to its former segment, 352 * though it will not belong to the new segment until that segment 353 * is actually written. 354 */ 355 if (daddr != LFS_UNUSED_DADDR) { 356 u_int32_t oldsn = lfs_dtosn(fs, daddr); 357 LFS_SEGENTRY(sup, fs, oldsn, bp); 358 sup->su_nbytes -= DINOSIZE(fs); 359 redo_ifile = 360 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 361 if (redo_ifile) 362 fs->lfs_flags |= LFS_IFDIRTY; 363 LFS_WRITESEGENTRY(sup, fs, oldsn, bp); /* Ifile */ 364 } 365 return redo_ifile; 366 } 367 368 int 369 lfs_gatherblock(struct segment * sp, struct ubuf * bp) 370 { 371 struct lfs *fs; 372 SEGSUM *ssp; 373 int version; 374 int j, blksinblk; 375 376 /* 377 * If full, finish this segment. We may be doing I/O, so 378 * release and reacquire the splbio(). 379 */ 380 fs = sp->fs; 381 blksinblk = howmany(bp->b_bcount, lfs_sb_getbsize(fs)); 382 if (sp->sum_bytes_left < LFS_BLKPTRSIZE(fs) * blksinblk || 383 sp->seg_bytes_left < bp->b_bcount) { 384 lfs_updatemeta(sp); 385 386 version = lfs_fi_getversion(fs, sp->fip); 387 (void) lfs_writeseg(fs, sp); 388 389 lfs_fi_setversion(fs, sp->fip, version); 390 lfs_fi_setino(fs, sp->fip, VTOI(sp->vp)->i_number); 391 /* Add the current file to the segment summary. */ 392 ssp = (SEGSUM *)sp->segsum; 393 lfs_ss_setnfinfo(fs, ssp, lfs_ss_getnfinfo(fs, ssp) + 1); 394 sp->sum_bytes_left -= FINFOSIZE(fs); 395 396 return 1; 397 } 398 /* Insert into the buffer list, update the FINFO block. */ 399 bp->b_flags |= B_GATHERED; 400 /* bp->b_flags &= ~B_DONE; */ 401 402 *sp->cbpp++ = bp; 403 for (j = 0; j < blksinblk; j++) { 404 unsigned bn; 405 406 bn = lfs_fi_getnblocks(fs, sp->fip); 407 lfs_fi_setnblocks(fs, sp->fip, bn + 1); 408 lfs_fi_setblock(fs, sp->fip, bn, bp->b_lblkno + j); 409 } 410 411 sp->sum_bytes_left -= LFS_BLKPTRSIZE(fs) * blksinblk; 412 sp->seg_bytes_left -= bp->b_bcount; 413 return 0; 414 } 415 416 int 417 lfs_gather(struct lfs * fs, struct segment * sp, struct uvnode * vp, int (*match) (struct lfs *, struct ubuf *)) 418 { 419 struct ubuf *bp, *nbp; 420 int count = 0; 421 422 sp->vp = vp; 423 loop: 424 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 425 nbp = LIST_NEXT(bp, b_vnbufs); 426 427 assert(bp->b_flags & B_DELWRI); 428 if ((bp->b_flags & (B_BUSY | B_GATHERED)) || !match(fs, bp)) { 429 continue; 430 } 431 if (lfs_gatherblock(sp, bp)) { 432 goto loop; 433 } 434 count++; 435 } 436 437 lfs_updatemeta(sp); 438 sp->vp = NULL; 439 return count; 440 } 441 442 443 /* 444 * Change the given block's address to ndaddr, finding its previous 445 * location using ulfs_bmaparray(). 446 * 447 * Account for this change in the segment table. 448 */ 449 static void 450 lfs_update_single(struct lfs * fs, struct segment * sp, daddr_t lbn, 451 daddr_t ndaddr, int size) 452 { 453 SEGUSE *sup; 454 struct ubuf *bp; 455 struct indir a[ULFS_NIADDR + 2], *ap; 456 struct inode *ip; 457 struct uvnode *vp; 458 daddr_t daddr, ooff; 459 int num, error; 460 int osize; 461 int frags, ofrags; 462 463 vp = sp->vp; 464 ip = VTOI(vp); 465 466 error = ulfs_bmaparray(fs, vp, lbn, &daddr, a, &num); 467 if (error) 468 errx(EXIT_FAILURE, "%s: ulfs_bmaparray returned %d looking up lbn %" 469 PRId64 "", __func__, error, lbn); 470 if (daddr > 0) 471 daddr = LFS_DBTOFSB(fs, daddr); 472 473 frags = lfs_numfrags(fs, size); 474 switch (num) { 475 case 0: 476 ooff = lfs_dino_getdb(fs, ip->i_din, lbn); 477 if (ooff == UNWRITTEN) 478 lfs_dino_setblocks(fs, ip->i_din, 479 lfs_dino_getblocks(fs, ip->i_din) + frags); 480 else { 481 /* possible fragment truncation or extension */ 482 ofrags = lfs_btofsb(fs, ip->i_lfs_fragsize[lbn]); 483 lfs_dino_setblocks(fs, ip->i_din, 484 lfs_dino_getblocks(fs, ip->i_din) + (frags - ofrags)); 485 } 486 lfs_dino_setdb(fs, ip->i_din, lbn, ndaddr); 487 break; 488 case 1: 489 ooff = lfs_dino_getib(fs, ip->i_din, a[0].in_off); 490 if (ooff == UNWRITTEN) 491 lfs_dino_setblocks(fs, ip->i_din, 492 lfs_dino_getblocks(fs, ip->i_din) + frags); 493 lfs_dino_setib(fs, ip->i_din, a[0].in_off, ndaddr); 494 break; 495 default: 496 ap = &a[num - 1]; 497 if (bread(vp, ap->in_lbn, lfs_sb_getbsize(fs), 0, &bp)) 498 errx(EXIT_FAILURE, "%s: bread bno %" PRId64, __func__, 499 ap->in_lbn); 500 501 ooff = lfs_iblock_get(fs, bp->b_data, ap->in_off); 502 if (ooff == UNWRITTEN) 503 lfs_dino_setblocks(fs, ip->i_din, 504 lfs_dino_getblocks(fs, ip->i_din) + frags); 505 lfs_iblock_set(fs, bp->b_data, ap->in_off, ndaddr); 506 (void) VOP_BWRITE(bp); 507 } 508 509 /* 510 * Update segment usage information, based on old size 511 * and location. 512 */ 513 if (daddr > 0) { 514 u_int32_t oldsn = lfs_dtosn(fs, daddr); 515 if (lbn >= 0 && lbn < ULFS_NDADDR) 516 osize = ip->i_lfs_fragsize[lbn]; 517 else 518 osize = lfs_sb_getbsize(fs); 519 LFS_SEGENTRY(sup, fs, oldsn, bp); 520 sup->su_nbytes -= osize; 521 if (!(bp->b_flags & B_GATHERED)) 522 fs->lfs_flags |= LFS_IFDIRTY; 523 LFS_WRITESEGENTRY(sup, fs, oldsn, bp); 524 } 525 /* 526 * Now that this block has a new address, and its old 527 * segment no longer owns it, we can forget about its 528 * old size. 529 */ 530 if (lbn >= 0 && lbn < ULFS_NDADDR) 531 ip->i_lfs_fragsize[lbn] = size; 532 } 533 534 /* 535 * Update the metadata that points to the blocks listed in the FINFO 536 * array. 537 */ 538 void 539 lfs_updatemeta(struct segment * sp) 540 { 541 struct ubuf *sbp; 542 struct lfs *fs; 543 struct uvnode *vp; 544 daddr_t lbn; 545 int i, nblocks, num; 546 int frags; 547 int bytesleft, size; 548 union lfs_blocks tmpptr; 549 550 fs = sp->fs; 551 vp = sp->vp; 552 553 /* 554 * This code was cutpasted from the kernel. See the 555 * corresponding comment in lfs_segment.c. 556 */ 557 #if 0 558 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp; 559 #else 560 lfs_blocks_fromvoid(fs, &tmpptr, (void *)NEXT_FINFO(fs, sp->fip)); 561 nblocks = lfs_blocks_sub(fs, &tmpptr, &sp->start_lbp); 562 //nblocks_orig = nblocks; 563 #endif 564 565 if (vp == NULL || nblocks == 0) 566 return; 567 568 /* 569 * This count may be high due to oversize blocks from lfs_gop_write. 570 * Correct for this. (XXX we should be able to keep track of these.) 571 */ 572 for (i = 0; i < nblocks; i++) { 573 if (sp->start_bpp[i] == NULL) { 574 printf("nblocks = %d, not %d\n", i, nblocks); 575 nblocks = i; 576 break; 577 } 578 num = howmany(sp->start_bpp[i]->b_bcount, lfs_sb_getbsize(fs)); 579 nblocks -= num - 1; 580 } 581 582 /* 583 * Sort the blocks. 584 */ 585 lfs_shellsort(fs, sp->start_bpp, &sp->start_lbp, nblocks, lfs_sb_getbsize(fs)); 586 587 /* 588 * Record the length of the last block in case it's a fragment. 589 * If there are indirect blocks present, they sort last. An 590 * indirect block will be lfs_bsize and its presence indicates 591 * that you cannot have fragments. 592 */ 593 lfs_fi_setlastlength(fs, sp->fip, ((sp->start_bpp[nblocks - 1]->b_bcount - 1) & 594 lfs_sb_getbmask(fs)) + 1); 595 596 /* 597 * Assign disk addresses, and update references to the logical 598 * block and the segment usage information. 599 */ 600 for (i = nblocks; i--; ++sp->start_bpp) { 601 sbp = *sp->start_bpp; 602 lbn = lfs_blocks_get(fs, &sp->start_lbp, 0); 603 604 sbp->b_blkno = LFS_FSBTODB(fs, lfs_sb_getoffset(fs)); 605 606 /* 607 * If we write a frag in the wrong place, the cleaner won't 608 * be able to correctly identify its size later, and the 609 * segment will be uncleanable. (Even worse, it will assume 610 * that the indirect block that actually ends the list 611 * is of a smaller size!) 612 */ 613 if ((sbp->b_bcount & lfs_sb_getbmask(fs)) && i != 0) 614 errx(EXIT_FAILURE, "%s: fragment is not last block", __func__); 615 616 /* 617 * For each subblock in this possibly oversized block, 618 * update its address on disk. 619 */ 620 for (bytesleft = sbp->b_bcount; bytesleft > 0; 621 bytesleft -= lfs_sb_getbsize(fs)) { 622 size = MIN(bytesleft, lfs_sb_getbsize(fs)); 623 frags = lfs_numfrags(fs, size); 624 lbn = lfs_blocks_get(fs, &sp->start_lbp, 0); 625 lfs_blocks_inc(fs, &sp->start_lbp); 626 lfs_update_single(fs, sp, lbn, lfs_sb_getoffset(fs), size); 627 lfs_sb_addoffset(fs, frags); 628 } 629 630 } 631 } 632 633 /* 634 * Start a new segment. 635 */ 636 int 637 lfs_initseg(struct lfs * fs) 638 { 639 struct segment *sp; 640 SEGUSE *sup; 641 SEGSUM *ssp; 642 struct ubuf *bp, *sbp; 643 int repeat; 644 645 sp = fs->lfs_sp; 646 647 repeat = 0; 648 649 /* Advance to the next segment. */ 650 if (!LFS_PARTIAL_FITS(fs)) { 651 /* lfs_avail eats the remaining space */ 652 lfs_sb_subavail(fs, lfs_sb_getfsbpseg(fs) - (lfs_sb_getoffset(fs) - 653 lfs_sb_getcurseg(fs))); 654 lfs_newseg(fs); 655 repeat = 1; 656 lfs_sb_setoffset(fs, lfs_sb_getcurseg(fs)); 657 658 sp->seg_number = lfs_dtosn(fs, lfs_sb_getcurseg(fs)); 659 sp->seg_bytes_left = lfs_fsbtob(fs, lfs_sb_getfsbpseg(fs)); 660 661 /* 662 * If the segment contains a superblock, update the offset 663 * and summary address to skip over it. 664 */ 665 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 666 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 667 lfs_sb_addoffset(fs, lfs_btofsb(fs, LFS_SBPAD)); 668 sp->seg_bytes_left -= LFS_SBPAD; 669 } 670 brelse(bp, 0); 671 /* Segment zero could also contain the labelpad */ 672 if (lfs_sb_getversion(fs) > 1 && sp->seg_number == 0 && 673 lfs_sb_gets0addr(fs) < lfs_btofsb(fs, LFS_LABELPAD)) { 674 lfs_sb_addoffset(fs, lfs_btofsb(fs, LFS_LABELPAD) - lfs_sb_gets0addr(fs)); 675 sp->seg_bytes_left -= LFS_LABELPAD - lfs_fsbtob(fs, lfs_sb_gets0addr(fs)); 676 } 677 } else { 678 sp->seg_number = lfs_dtosn(fs, lfs_sb_getcurseg(fs)); 679 sp->seg_bytes_left = lfs_fsbtob(fs, lfs_sb_getfsbpseg(fs) - 680 (lfs_sb_getoffset(fs) - lfs_sb_getcurseg(fs))); 681 } 682 lfs_sb_setlastpseg(fs, lfs_sb_getoffset(fs)); 683 684 sp->fs = fs; 685 sp->ibp = NULL; 686 sp->idp = NULL; 687 sp->ninodes = 0; 688 689 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 690 sp->cbpp = sp->bpp; 691 sbp = *sp->cbpp = getblk(fs->lfs_devvp, 692 LFS_FSBTODB(fs, lfs_sb_getoffset(fs)), lfs_sb_getsumsize(fs)); 693 sp->segsum = sbp->b_data; 694 memset(sp->segsum, 0, lfs_sb_getsumsize(fs)); 695 sp->start_bpp = ++sp->cbpp; 696 lfs_sb_addoffset(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs))); 697 698 /* Set point to SEGSUM, initialize it. */ 699 ssp = sp->segsum; 700 lfs_ss_setnext(fs, ssp, lfs_sb_getnextseg(fs)); 701 lfs_ss_setnfinfo(fs, ssp, 0); 702 lfs_ss_setninos(fs, ssp, 0); 703 lfs_ss_setmagic(fs, ssp, SS_MAGIC); 704 705 /* Set pointer to first FINFO, initialize it. */ 706 sp->fip = SEGSUM_FINFOBASE(fs, ssp); 707 lfs_fi_setnblocks(fs, sp->fip, 0); 708 lfs_blocks_fromfinfo(fs, &sp->start_lbp, sp->fip); 709 lfs_fi_setlastlength(fs, sp->fip, 0); 710 711 sp->seg_bytes_left -= lfs_sb_getsumsize(fs); 712 sp->sum_bytes_left = lfs_sb_getsumsize(fs) - SEGSUM_SIZE(fs); 713 714 LFS_LOCK_BUF(sbp); 715 brelse(sbp, 0); 716 return repeat; 717 } 718 719 /* 720 * Return the next segment to write. 721 */ 722 void 723 lfs_newseg(struct lfs * fs) 724 { 725 CLEANERINFO *cip; 726 SEGUSE *sup; 727 struct ubuf *bp; 728 int curseg, isdirty, sn; 729 730 LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, lfs_sb_getnextseg(fs)), bp); 731 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 732 sup->su_nbytes = 0; 733 sup->su_nsums = 0; 734 sup->su_ninos = 0; 735 LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, lfs_sb_getnextseg(fs)), bp); 736 737 LFS_CLEANERINFO(cip, fs, bp); 738 lfs_ci_shiftcleantodirty(fs, cip, 1); 739 lfs_sb_setnclean(fs, lfs_ci_getclean(fs, cip)); 740 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 741 742 lfs_sb_setlastseg(fs, lfs_sb_getcurseg(fs)); 743 lfs_sb_setcurseg(fs, lfs_sb_getnextseg(fs)); 744 for (sn = curseg = lfs_dtosn(fs, lfs_sb_getcurseg(fs)) + lfs_sb_getinterleave(fs);;) { 745 sn = (sn + 1) % lfs_sb_getnseg(fs); 746 if (sn == curseg) 747 errx(EXIT_FAILURE, "%s: no clean segments", __func__); 748 LFS_SEGENTRY(sup, fs, sn, bp); 749 isdirty = sup->su_flags & SEGUSE_DIRTY; 750 brelse(bp, 0); 751 752 if (!isdirty) 753 break; 754 } 755 756 ++fs->lfs_nactive; 757 lfs_sb_setnextseg(fs, lfs_sntod(fs, sn)); 758 } 759 760 761 int 762 lfs_writeseg(struct lfs * fs, struct segment * sp) 763 { 764 struct ubuf **bpp, *bp; 765 SEGUSE *sup; 766 SEGSUM *ssp; 767 char *datap, *dp; 768 int i; 769 int do_again, nblocks, byteoffset; 770 size_t el_size; 771 u_short ninos; 772 size_t sumstart; 773 struct uvnode *devvp; 774 775 /* 776 * If there are no buffers other than the segment summary to write 777 * and it is not a checkpoint, don't do anything. On a checkpoint, 778 * even if there aren't any buffers, you need to write the superblock. 779 */ 780 nblocks = sp->cbpp - sp->bpp; 781 #if 0 782 printf("write %d blocks at 0x%x\n", 783 nblocks, (int)LFS_DBTOFSB(fs, (*sp->bpp)->b_blkno)); 784 #endif 785 if (nblocks == 1) 786 return 0; 787 788 devvp = fs->lfs_devvp; 789 790 /* Update the segment usage information. */ 791 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 792 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 793 794 /* Loop through all blocks, except the segment summary. */ 795 for (bpp = sp->bpp; ++bpp < sp->cbpp;) { 796 if ((*bpp)->b_vp != devvp) { 797 sup->su_nbytes += (*bpp)->b_bcount; 798 } 799 assert(lfs_dtosn(fs, LFS_DBTOFSB(fs, (*bpp)->b_blkno)) == sp->seg_number); 800 } 801 802 ssp = (SEGSUM *) sp->segsum; 803 lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) | SS_RFW); 804 805 ninos = (lfs_ss_getninos(fs, ssp) + LFS_INOPB(fs) - 1) / LFS_INOPB(fs); 806 sup->su_nbytes += lfs_ss_getninos(fs, ssp) * DINOSIZE(fs); 807 808 if (lfs_sb_getversion(fs) == 1) 809 sup->su_olastmod = write_time; 810 else 811 sup->su_lastmod = write_time; 812 sup->su_ninos += ninos; 813 ++sup->su_nsums; 814 lfs_sb_adddmeta(fs, (lfs_btofsb(fs, lfs_sb_getsumsize(fs)) + lfs_btofsb(fs, ninos * 815 lfs_sb_getibsize(fs)))); 816 lfs_sb_subavail(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs))); 817 818 do_again = !(bp->b_flags & B_GATHERED); 819 LFS_WRITESEGENTRY(sup, fs, sp->seg_number, bp); /* Ifile */ 820 821 /* 822 * Compute checksum across data and then across summary; the first 823 * block (the summary block) is skipped. Set the create time here 824 * so that it's guaranteed to be later than the inode mod times. 825 */ 826 if (lfs_sb_getversion(fs) == 1) 827 el_size = sizeof(u_long); 828 else 829 el_size = sizeof(u_int32_t); 830 datap = dp = emalloc(nblocks * el_size); 831 for (bpp = sp->bpp, i = nblocks - 1; i--;) { 832 ++bpp; 833 /* Loop through gop_write cluster blocks */ 834 for (byteoffset = 0; byteoffset < (*bpp)->b_bcount; 835 byteoffset += lfs_sb_getbsize(fs)) { 836 memcpy(dp, (*bpp)->b_data + byteoffset, el_size); 837 dp += el_size; 838 } 839 bremfree(*bpp); 840 (*bpp)->b_flags |= B_BUSY; 841 } 842 if (lfs_sb_getversion(fs) == 1) 843 lfs_ss_setocreate(fs, ssp, write_time); 844 else { 845 lfs_ss_setcreate(fs, ssp, write_time); 846 lfs_sb_addserial(fs, 1); 847 lfs_ss_setserial(fs, ssp, lfs_sb_getserial(fs)); 848 lfs_ss_setident(fs, ssp, lfs_sb_getident(fs)); 849 } 850 /* Set the summary block busy too */ 851 bremfree(*(sp->bpp)); 852 (*(sp->bpp))->b_flags |= B_BUSY; 853 854 lfs_ss_setdatasum(fs, ssp, cksum(datap, (nblocks - 1) * el_size)); 855 sumstart = lfs_ss_getsumstart(fs); 856 lfs_ss_setsumsum(fs, ssp, 857 cksum((char *)ssp + sumstart, lfs_sb_getsumsize(fs) - sumstart)); 858 free(datap); 859 datap = dp = NULL; 860 lfs_sb_subbfree(fs, (lfs_btofsb(fs, ninos * lfs_sb_getibsize(fs)) + 861 lfs_btofsb(fs, lfs_sb_getsumsize(fs)))); 862 863 if (devvp == NULL) 864 errx(EXIT_FAILURE, "devvp is NULL"); 865 for (bpp = sp->bpp, i = nblocks; i; bpp++, i--) { 866 bp = *bpp; 867 #if 0 868 printf("i = %d, bp = %p, flags %lx, bn = %" PRIx64 "\n", 869 nblocks - i, bp, bp->b_flags, bp->b_blkno); 870 printf(" vp = %p\n", bp->b_vp); 871 if (bp->b_vp != fs->lfs_devvp) 872 printf(" ino = %d lbn = %" PRId64 "\n", 873 VTOI(bp->b_vp)->i_number, bp->b_lblkno); 874 #endif 875 if (bp->b_vp == fs->lfs_devvp) 876 written_dev += bp->b_bcount; 877 else { 878 if (bp->b_lblkno >= 0) 879 written_data += bp->b_bcount; 880 else 881 written_indir += bp->b_bcount; 882 } 883 bp->b_flags &= ~(B_DELWRI | B_READ | B_GATHERED | B_ERROR | 884 B_LOCKED); 885 bwrite(bp); 886 written_bytes += bp->b_bcount; 887 } 888 written_inodes += ninos; 889 890 return (lfs_initseg(fs) || do_again); 891 } 892 893 /* 894 * Our own copy of shellsort. XXX use qsort or heapsort. 895 */ 896 static void 897 lfs_shellsort(struct lfs *fs, 898 struct ubuf ** bp_array, union lfs_blocks *lb_array, int nmemb, int size) 899 { 900 static int __rsshell_increments[] = {4, 1, 0}; 901 int incr, *incrp, t1, t2; 902 struct ubuf *bp_temp; 903 904 for (incrp = __rsshell_increments; (incr = *incrp++) != 0;) 905 for (t1 = incr; t1 < nmemb; ++t1) 906 for (t2 = t1 - incr; t2 >= 0;) 907 if ((u_int32_t) bp_array[t2]->b_lblkno > 908 (u_int32_t) bp_array[t2 + incr]->b_lblkno) { 909 bp_temp = bp_array[t2]; 910 bp_array[t2] = bp_array[t2 + incr]; 911 bp_array[t2 + incr] = bp_temp; 912 t2 -= incr; 913 } else 914 break; 915 916 /* Reform the list of logical blocks */ 917 incr = 0; 918 for (t1 = 0; t1 < nmemb; t1++) { 919 for (t2 = 0; t2 * size < bp_array[t1]->b_bcount; t2++) { 920 lfs_blocks_set(fs, lb_array, incr++, 921 bp_array[t1]->b_lblkno + t2); 922 } 923 } 924 } 925 926 927 /* 928 * lfs_seglock -- 929 * Single thread the segment writer. 930 */ 931 int 932 lfs_seglock(struct lfs * fs, unsigned long flags) 933 { 934 struct segment *sp; 935 size_t allocsize; 936 937 if (fs->lfs_seglock) { 938 ++fs->lfs_seglock; 939 fs->lfs_sp->seg_flags |= flags; 940 return 0; 941 } 942 fs->lfs_seglock = 1; 943 944 sp = fs->lfs_sp = emalloc(sizeof(*sp)); 945 allocsize = lfs_sb_getssize(fs) * sizeof(struct ubuf *); 946 sp->bpp = emalloc(allocsize); 947 if (!sp->bpp) 948 err(!preen, "Could not allocate %zu bytes", allocsize); 949 sp->seg_flags = flags; 950 sp->vp = NULL; 951 sp->seg_iocount = 0; 952 (void) lfs_initseg(fs); 953 954 return 0; 955 } 956 957 /* 958 * lfs_segunlock -- 959 * Single thread the segment writer. 960 */ 961 void 962 lfs_segunlock(struct lfs * fs) 963 { 964 struct segment *sp; 965 struct ubuf *bp; 966 967 sp = fs->lfs_sp; 968 969 if (fs->lfs_seglock == 1) { 970 if (sp->bpp != sp->cbpp) { 971 /* Free allocated segment summary */ 972 lfs_sb_suboffset(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs))); 973 bp = *sp->bpp; 974 bremfree(bp); 975 bp->b_flags |= B_DONE | B_INVAL; 976 bp->b_flags &= ~B_DELWRI; 977 reassignbuf(bp, bp->b_vp); 978 bp->b_flags |= B_BUSY; /* XXX */ 979 brelse(bp, 0); 980 } else 981 printf("unlock to 0 with no summary"); 982 983 free(sp->bpp); 984 sp->bpp = NULL; 985 free(sp); 986 fs->lfs_sp = NULL; 987 988 fs->lfs_nactive = 0; 989 990 /* Since we *know* everything's on disk, write both sbs */ 991 lfs_writesuper(fs, lfs_sb_getsboff(fs, 0)); 992 lfs_writesuper(fs, lfs_sb_getsboff(fs, 1)); 993 994 --fs->lfs_seglock; 995 } else if (fs->lfs_seglock == 0) { 996 errx(EXIT_FAILURE, "Seglock not held"); 997 } else { 998 --fs->lfs_seglock; 999 } 1000 } 1001 1002 int 1003 lfs_writevnodes(struct lfs *fs, struct segment *sp, int op) 1004 { 1005 struct inode *ip; 1006 struct uvnode *vp; 1007 int inodes_written = 0; 1008 1009 LIST_FOREACH(vp, &vnodelist, v_mntvnodes) { 1010 if (vp->v_bmap_op != lfs_vop_bmap) 1011 continue; 1012 1013 ip = VTOI(vp); 1014 1015 if ((op == VN_DIROP && !(vp->v_uflag & VU_DIROP)) || 1016 (op != VN_DIROP && (vp->v_uflag & VU_DIROP))) { 1017 continue; 1018 } 1019 /* 1020 * Write the inode/file if dirty and it's not the IFILE. 1021 */ 1022 if (ip->i_state & IN_ALLMOD || !LIST_EMPTY(&vp->v_dirtyblkhd)) { 1023 if (ip->i_number != LFS_IFILE_INUM) 1024 lfs_writefile(fs, sp, vp); 1025 (void) lfs_writeinode(fs, sp, ip); 1026 inodes_written++; 1027 } 1028 } 1029 return inodes_written; 1030 } 1031 1032 void 1033 lfs_writesuper(struct lfs *fs, daddr_t daddr) 1034 { 1035 struct ubuf *bp; 1036 1037 /* Set timestamp of this version of the superblock */ 1038 if (lfs_sb_getversion(fs) == 1) 1039 lfs_sb_setotstamp(fs, write_time); 1040 lfs_sb_settstamp(fs, write_time); 1041 1042 __CTASSERT(sizeof(struct dlfs) == sizeof(struct dlfs64)); 1043 1044 /* Checksum the superblock and copy it into a buffer. */ 1045 lfs_sb_setcksum(fs, lfs_sb_cksum(fs)); 1046 assert(daddr > 0); 1047 bp = getblk(fs->lfs_devvp, LFS_FSBTODB(fs, daddr), LFS_SBPAD); 1048 memcpy(bp->b_data, &fs->lfs_dlfs_u, sizeof(struct dlfs)); 1049 memset(bp->b_data + sizeof(struct dlfs), 0, 1050 LFS_SBPAD - sizeof(struct dlfs)); 1051 1052 bwrite(bp); 1053 } 1054