segwrite.c revision 1.42 1 /* $NetBSD: segwrite.c,v 1.42 2015/08/19 20:33:29 dholland Exp $ */
2 /*-
3 * Copyright (c) 2003 The NetBSD Foundation, Inc.
4 * All rights reserved.
5 *
6 * This code is derived from software contributed to The NetBSD Foundation
7 * by Konrad E. Schroder <perseant (at) hhhh.org>.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30 /*
31 * Copyright (c) 1991, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 *
58 * @(#)lfs_segment.c 8.10 (Berkeley) 6/10/95
59 */
60
61 /*
62 * Partial segment writer, taken from the kernel and adapted for userland.
63 */
64 #include <sys/types.h>
65 #include <sys/param.h>
66 #include <sys/time.h>
67 #include <sys/buf.h>
68 #include <sys/mount.h>
69
70 /* Override certain things to make <ufs/lfs/lfs.h> work */
71 #define VU_DIROP 0x01000000 /* XXX XXX from sys/vnode.h */
72 #define vnode uvnode
73 #define buf ubuf
74 #define panic call_panic
75
76 #include <ufs/lfs/lfs.h>
77 #include <ufs/lfs/lfs_accessors.h>
78 #include <ufs/lfs/lfs_inode.h>
79
80 #include <assert.h>
81 #include <stdio.h>
82 #include <stdlib.h>
83 #include <string.h>
84 #include <err.h>
85 #include <errno.h>
86 #include <util.h>
87
88 #include "bufcache.h"
89 #include "vnode.h"
90 #include "lfs_user.h"
91 #include "segwrite.h"
92
93 /* Compatibility definitions */
94 extern off_t locked_queue_bytes;
95 int locked_queue_count;
96 off_t written_bytes = 0;
97 off_t written_data = 0;
98 off_t written_indir = 0;
99 off_t written_dev = 0;
100 int written_inodes = 0;
101
102 /* Global variables */
103 time_t write_time;
104
105 extern u_int32_t cksum(void *, size_t);
106 extern u_int32_t lfs_sb_cksum(struct lfs *);
107 extern int preen;
108
109 static void lfs_shellsort(struct lfs *,
110 struct ubuf **, union lfs_blocks *, int, int);
111
112 /*
113 * Logical block number match routines used when traversing the dirty block
114 * chain.
115 */
116 int
117 lfs_match_data(struct lfs * fs, struct ubuf * bp)
118 {
119 return (bp->b_lblkno >= 0);
120 }
121
122 int
123 lfs_match_indir(struct lfs * fs, struct ubuf * bp)
124 {
125 daddr_t lbn;
126
127 lbn = bp->b_lblkno;
128 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 0);
129 }
130
131 int
132 lfs_match_dindir(struct lfs * fs, struct ubuf * bp)
133 {
134 daddr_t lbn;
135
136 lbn = bp->b_lblkno;
137 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 1);
138 }
139
140 int
141 lfs_match_tindir(struct lfs * fs, struct ubuf * bp)
142 {
143 daddr_t lbn;
144
145 lbn = bp->b_lblkno;
146 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 2);
147 }
148
149 /*
150 * Do a checkpoint.
151 */
152 int
153 lfs_segwrite(struct lfs * fs, int flags)
154 {
155 struct inode *ip;
156 struct segment *sp;
157 struct uvnode *vp;
158 SEGSUM *ssp;
159 int redo;
160
161 lfs_seglock(fs, flags | SEGM_CKP);
162 sp = fs->lfs_sp;
163
164 lfs_writevnodes(fs, sp, VN_REG);
165 lfs_writevnodes(fs, sp, VN_DIROP);
166 ssp = (SEGSUM *)sp->segsum;
167 lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) & ~(SS_CONT));
168
169 do {
170 vp = fs->lfs_ivnode;
171 fs->lfs_flags &= ~LFS_IFDIRTY;
172 ip = VTOI(vp);
173 if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL || lfs_sb_getidaddr(fs) <= 0)
174 lfs_writefile(fs, sp, vp);
175
176 redo = lfs_writeinode(fs, sp, ip);
177 redo += lfs_writeseg(fs, sp);
178 redo += (fs->lfs_flags & LFS_IFDIRTY);
179 } while (redo);
180
181 lfs_segunlock(fs);
182 #if 0
183 printf("wrote %" PRId64 " bytes (%" PRId32 " fsb)\n",
184 written_bytes, (ulfs_daddr_t)lfs_btofsb(fs, written_bytes));
185 printf("wrote %" PRId64 " bytes data (%" PRId32 " fsb)\n",
186 written_data, (ulfs_daddr_t)lfs_btofsb(fs, written_data));
187 printf("wrote %" PRId64 " bytes indir (%" PRId32 " fsb)\n",
188 written_indir, (ulfs_daddr_t)lfs_btofsb(fs, written_indir));
189 printf("wrote %" PRId64 " bytes dev (%" PRId32 " fsb)\n",
190 written_dev, (ulfs_daddr_t)lfs_btofsb(fs, written_dev));
191 printf("wrote %d inodes (%" PRId32 " fsb)\n",
192 written_inodes, lfs_btofsb(fs, written_inodes * fs->lfs_ibsize));
193 #endif
194 return 0;
195 }
196
197 /*
198 * Write the dirty blocks associated with a vnode.
199 */
200 void
201 lfs_writefile(struct lfs * fs, struct segment * sp, struct uvnode * vp)
202 {
203 struct ubuf *bp;
204 FINFO *fip;
205 struct inode *ip;
206 IFILE *ifp;
207 SEGSUM *ssp;
208
209 ip = VTOI(vp);
210
211 if (sp->seg_bytes_left < lfs_sb_getbsize(fs) ||
212 sp->sum_bytes_left < FINFOSIZE(fs) + LFS_BLKPTRSIZE(fs))
213 (void) lfs_writeseg(fs, sp);
214
215 sp->sum_bytes_left -= FINFOSIZE(fs);
216 ssp = (SEGSUM *)sp->segsum;
217 lfs_ss_setnfinfo(fs, ssp, lfs_ss_getnfinfo(fs, ssp) + 1);
218
219 if (vp->v_uflag & VU_DIROP) {
220 lfs_ss_setflags(fs, ssp,
221 lfs_ss_getflags(fs, ssp) | (SS_DIROP | SS_CONT));
222 }
223
224 fip = sp->fip;
225 lfs_fi_setnblocks(fs, fip, 0);
226 lfs_fi_setino(fs, fip, ip->i_number);
227 LFS_IENTRY(ifp, fs, lfs_fi_getino(fs, fip), bp);
228 lfs_fi_setversion(fs, fip, lfs_if_getversion(fs, ifp));
229 brelse(bp, 0);
230
231 lfs_gather(fs, sp, vp, lfs_match_data);
232 lfs_gather(fs, sp, vp, lfs_match_indir);
233 lfs_gather(fs, sp, vp, lfs_match_dindir);
234 lfs_gather(fs, sp, vp, lfs_match_tindir);
235
236 fip = sp->fip;
237 if (lfs_fi_getnblocks(fs, fip) != 0) {
238 sp->fip = NEXT_FINFO(fs, fip);
239 lfs_blocks_fromfinfo(fs, &sp->start_lbp, sp->fip);
240 } else {
241 /* XXX shouldn't this update sp->fip? */
242 sp->sum_bytes_left += FINFOSIZE(fs);
243 lfs_ss_setnfinfo(fs, ssp, lfs_ss_getnfinfo(fs, ssp) - 1);
244 }
245 }
246
247 int
248 lfs_writeinode(struct lfs * fs, struct segment * sp, struct inode * ip)
249 {
250 struct ubuf *bp, *ibp;
251 union lfs_dinode *cdp;
252 IFILE *ifp;
253 SEGUSE *sup;
254 SEGSUM *ssp;
255 daddr_t daddr;
256 ino_t ino;
257 int i, ndx, fsb = 0;
258 int redo_ifile = 0;
259 struct timespec ts;
260 int gotblk = 0;
261
262 /* Allocate a new inode block if necessary. */
263 if ((ip->i_number != LFS_IFILE_INUM || sp->idp == NULL) &&
264 sp->ibp == NULL) {
265 /* Allocate a new segment if necessary. */
266 if (sp->seg_bytes_left < lfs_sb_getibsize(fs) ||
267 sp->sum_bytes_left < sizeof(ulfs_daddr_t))
268 (void) lfs_writeseg(fs, sp);
269
270 /* Get next inode block. */
271 daddr = lfs_sb_getoffset(fs);
272 lfs_sb_addoffset(fs, lfs_btofsb(fs, lfs_sb_getibsize(fs)));
273 sp->ibp = *sp->cbpp++ =
274 getblk(fs->lfs_devvp, LFS_FSBTODB(fs, daddr),
275 lfs_sb_getibsize(fs));
276 sp->ibp->b_flags |= B_GATHERED;
277 gotblk++;
278
279 /* Zero out inode numbers */
280 for (i = 0; i < LFS_INOPB(fs); ++i) {
281 union lfs_dinode *tmpdip;
282
283 tmpdip = DINO_IN_BLOCK(fs, sp->ibp->b_data, i);
284 lfs_dino_setinumber(fs, tmpdip, 0);
285 }
286
287 ++sp->start_bpp;
288 lfs_sb_subavail(fs, lfs_btofsb(fs, lfs_sb_getibsize(fs)));
289 /* Set remaining space counters. */
290 sp->seg_bytes_left -= lfs_sb_getibsize(fs);
291 sp->sum_bytes_left -= sizeof(ulfs_daddr_t);
292 ndx = lfs_sb_getsumsize(fs) / sizeof(ulfs_daddr_t) -
293 sp->ninodes / LFS_INOPB(fs) - 1;
294 /* XXX ondisk32 */
295 ((ulfs_daddr_t *) (sp->segsum))[ndx] = daddr;
296 }
297 /* Update the inode times and copy the inode onto the inode page. */
298 ts.tv_nsec = 0;
299 ts.tv_sec = write_time;
300 /* XXX kludge --- don't redirty the ifile just to put times on it */
301 if (ip->i_number != LFS_IFILE_INUM)
302 LFS_ITIMES(ip, &ts, &ts, &ts);
303
304 /*
305 * If this is the Ifile, and we've already written the Ifile in this
306 * partial segment, just overwrite it (it's not on disk yet) and
307 * continue.
308 *
309 * XXX we know that the bp that we get the second time around has
310 * already been gathered.
311 */
312 if (ip->i_number == LFS_IFILE_INUM && sp->idp) {
313 lfs_copy_dinode(fs, sp->idp, ip->i_din);
314 ip->i_lfs_osize = ip->i_ffs1_size;
315 return 0;
316 }
317 bp = sp->ibp;
318 cdp = DINO_IN_BLOCK(fs, bp->b_data, sp->ninodes % LFS_INOPB(fs));
319 lfs_copy_dinode(fs, cdp, ip->i_din);
320
321 /* If all blocks are goig to disk, update the "size on disk" */
322 ip->i_lfs_osize = ip->i_ffs1_size;
323
324 if (ip->i_number == LFS_IFILE_INUM) /* We know sp->idp == NULL */
325 sp->idp = DINO_IN_BLOCK(fs, bp->b_data, sp->ninodes % LFS_INOPB(fs));
326 if (gotblk) {
327 LFS_LOCK_BUF(bp);
328 assert(!(bp->b_flags & B_INVAL));
329 brelse(bp, 0);
330 }
331 /* Increment inode count in segment summary block. */
332 ssp = (SEGSUM *)sp->segsum;
333 lfs_ss_setninos(fs, ssp, lfs_ss_getninos(fs, ssp) + 1);
334
335 /* If this page is full, set flag to allocate a new page. */
336 if (++sp->ninodes % LFS_INOPB(fs) == 0)
337 sp->ibp = NULL;
338
339 /*
340 * If updating the ifile, update the super-block. Update the disk
341 * address for this inode in the ifile.
342 */
343 ino = ip->i_number;
344 if (ino == LFS_IFILE_INUM) {
345 daddr = lfs_sb_getidaddr(fs);
346 lfs_sb_setidaddr(fs, LFS_DBTOFSB(fs, bp->b_blkno));
347 sbdirty();
348 } else {
349 LFS_IENTRY(ifp, fs, ino, ibp);
350 daddr = lfs_if_getdaddr(fs, ifp);
351 lfs_if_setdaddr(fs, ifp, LFS_DBTOFSB(fs, bp->b_blkno) + fsb);
352 (void)LFS_BWRITE_LOG(ibp); /* Ifile */
353 }
354
355 /*
356 * Account the inode: it no longer belongs to its former segment,
357 * though it will not belong to the new segment until that segment
358 * is actually written.
359 */
360 if (daddr != LFS_UNUSED_DADDR) {
361 u_int32_t oldsn = lfs_dtosn(fs, daddr);
362 LFS_SEGENTRY(sup, fs, oldsn, bp);
363 sup->su_nbytes -= DINOSIZE(fs);
364 redo_ifile =
365 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED));
366 if (redo_ifile)
367 fs->lfs_flags |= LFS_IFDIRTY;
368 LFS_WRITESEGENTRY(sup, fs, oldsn, bp); /* Ifile */
369 }
370 return redo_ifile;
371 }
372
373 int
374 lfs_gatherblock(struct segment * sp, struct ubuf * bp)
375 {
376 struct lfs *fs;
377 SEGSUM *ssp;
378 int version;
379 int j, blksinblk;
380
381 /*
382 * If full, finish this segment. We may be doing I/O, so
383 * release and reacquire the splbio().
384 */
385 fs = sp->fs;
386 blksinblk = howmany(bp->b_bcount, lfs_sb_getbsize(fs));
387 if (sp->sum_bytes_left < sizeof(ulfs_daddr_t) * blksinblk ||
388 sp->seg_bytes_left < bp->b_bcount) {
389 lfs_updatemeta(sp);
390
391 version = lfs_fi_getversion(fs, sp->fip);
392 (void) lfs_writeseg(fs, sp);
393
394 lfs_fi_setversion(fs, sp->fip, version);
395 lfs_fi_setino(fs, sp->fip, VTOI(sp->vp)->i_number);
396 /* Add the current file to the segment summary. */
397 ssp = (SEGSUM *)sp->segsum;
398 lfs_ss_setnfinfo(fs, ssp, lfs_ss_getnfinfo(fs, ssp) + 1);
399 sp->sum_bytes_left -= FINFOSIZE(fs);
400
401 return 1;
402 }
403 /* Insert into the buffer list, update the FINFO block. */
404 bp->b_flags |= B_GATHERED;
405 /* bp->b_flags &= ~B_DONE; */
406
407 *sp->cbpp++ = bp;
408 for (j = 0; j < blksinblk; j++) {
409 unsigned bn;
410
411 bn = lfs_fi_getnblocks(fs, sp->fip);
412 lfs_fi_setnblocks(fs, sp->fip, bn + 1);
413 lfs_fi_setblock(fs, sp->fip, bn, bp->b_lblkno + j);;
414 }
415
416 sp->sum_bytes_left -= sizeof(ulfs_daddr_t) * blksinblk;
417 sp->seg_bytes_left -= bp->b_bcount;
418 return 0;
419 }
420
421 int
422 lfs_gather(struct lfs * fs, struct segment * sp, struct uvnode * vp, int (*match) (struct lfs *, struct ubuf *))
423 {
424 struct ubuf *bp, *nbp;
425 int count = 0;
426
427 sp->vp = vp;
428 loop:
429 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
430 nbp = LIST_NEXT(bp, b_vnbufs);
431
432 assert(bp->b_flags & B_DELWRI);
433 if ((bp->b_flags & (B_BUSY | B_GATHERED)) || !match(fs, bp)) {
434 continue;
435 }
436 if (lfs_gatherblock(sp, bp)) {
437 goto loop;
438 }
439 count++;
440 }
441
442 lfs_updatemeta(sp);
443 sp->vp = NULL;
444 return count;
445 }
446
447
448 /*
449 * Change the given block's address to ndaddr, finding its previous
450 * location using ulfs_bmaparray().
451 *
452 * Account for this change in the segment table.
453 */
454 void
455 lfs_update_single(struct lfs * fs, struct segment * sp, daddr_t lbn,
456 ulfs_daddr_t ndaddr, int size)
457 {
458 SEGUSE *sup;
459 struct ubuf *bp;
460 struct indir a[ULFS_NIADDR + 2], *ap;
461 struct inode *ip;
462 struct uvnode *vp;
463 daddr_t daddr, ooff;
464 int num, error;
465 int osize;
466 int frags, ofrags;
467
468 vp = sp->vp;
469 ip = VTOI(vp);
470
471 error = ulfs_bmaparray(fs, vp, lbn, &daddr, a, &num);
472 if (error)
473 errx(EXIT_FAILURE, "%s: ulfs_bmaparray returned %d looking up lbn %"
474 PRId64 "", __func__, error, lbn);
475 if (daddr > 0)
476 daddr = LFS_DBTOFSB(fs, daddr);
477
478 frags = lfs_numfrags(fs, size);
479 switch (num) {
480 case 0:
481 ooff = ip->i_ffs1_db[lbn];
482 if (ooff == UNWRITTEN)
483 ip->i_ffs1_blocks += frags;
484 else {
485 /* possible fragment truncation or extension */
486 ofrags = lfs_btofsb(fs, ip->i_lfs_fragsize[lbn]);
487 ip->i_ffs1_blocks += (frags - ofrags);
488 }
489 ip->i_ffs1_db[lbn] = ndaddr;
490 break;
491 case 1:
492 ooff = ip->i_ffs1_ib[a[0].in_off];
493 if (ooff == UNWRITTEN)
494 ip->i_ffs1_blocks += frags;
495 ip->i_ffs1_ib[a[0].in_off] = ndaddr;
496 break;
497 default:
498 ap = &a[num - 1];
499 if (bread(vp, ap->in_lbn, lfs_sb_getbsize(fs), 0, &bp))
500 errx(EXIT_FAILURE, "%s: bread bno %" PRId64, __func__,
501 ap->in_lbn);
502
503 ooff = ((ulfs_daddr_t *) bp->b_data)[ap->in_off];
504 if (ooff == UNWRITTEN)
505 ip->i_ffs1_blocks += frags;
506 ((ulfs_daddr_t *) bp->b_data)[ap->in_off] = ndaddr;
507 (void) VOP_BWRITE(bp);
508 }
509
510 /*
511 * Update segment usage information, based on old size
512 * and location.
513 */
514 if (daddr > 0) {
515 u_int32_t oldsn = lfs_dtosn(fs, daddr);
516 if (lbn >= 0 && lbn < ULFS_NDADDR)
517 osize = ip->i_lfs_fragsize[lbn];
518 else
519 osize = lfs_sb_getbsize(fs);
520 LFS_SEGENTRY(sup, fs, oldsn, bp);
521 sup->su_nbytes -= osize;
522 if (!(bp->b_flags & B_GATHERED))
523 fs->lfs_flags |= LFS_IFDIRTY;
524 LFS_WRITESEGENTRY(sup, fs, oldsn, bp);
525 }
526 /*
527 * Now that this block has a new address, and its old
528 * segment no longer owns it, we can forget about its
529 * old size.
530 */
531 if (lbn >= 0 && lbn < ULFS_NDADDR)
532 ip->i_lfs_fragsize[lbn] = size;
533 }
534
535 /*
536 * Update the metadata that points to the blocks listed in the FINFO
537 * array.
538 */
539 void
540 lfs_updatemeta(struct segment * sp)
541 {
542 struct ubuf *sbp;
543 struct lfs *fs;
544 struct uvnode *vp;
545 daddr_t lbn;
546 int i, nblocks, num;
547 int frags;
548 int bytesleft, size;
549 union lfs_blocks tmpptr;
550
551 fs = sp->fs;
552 vp = sp->vp;
553
554 /*
555 * This code was cutpasted from the kernel. See the
556 * corresponding comment in lfs_segment.c.
557 */
558 #if 0
559 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp;
560 #else
561 lfs_blocks_fromvoid(fs, &tmpptr, (void *)NEXT_FINFO(fs, sp->fip));
562 nblocks = lfs_blocks_sub(fs, &tmpptr, &sp->start_lbp);
563 //nblocks_orig = nblocks;
564 #endif
565
566 if (vp == NULL || nblocks == 0)
567 return;
568
569 /*
570 * This count may be high due to oversize blocks from lfs_gop_write.
571 * Correct for this. (XXX we should be able to keep track of these.)
572 */
573 for (i = 0; i < nblocks; i++) {
574 if (sp->start_bpp[i] == NULL) {
575 printf("nblocks = %d, not %d\n", i, nblocks);
576 nblocks = i;
577 break;
578 }
579 num = howmany(sp->start_bpp[i]->b_bcount, lfs_sb_getbsize(fs));
580 nblocks -= num - 1;
581 }
582
583 /*
584 * Sort the blocks.
585 */
586 lfs_shellsort(fs, sp->start_bpp, &sp->start_lbp, nblocks, lfs_sb_getbsize(fs));
587
588 /*
589 * Record the length of the last block in case it's a fragment.
590 * If there are indirect blocks present, they sort last. An
591 * indirect block will be lfs_bsize and its presence indicates
592 * that you cannot have fragments.
593 */
594 lfs_fi_setlastlength(fs, sp->fip, ((sp->start_bpp[nblocks - 1]->b_bcount - 1) &
595 lfs_sb_getbmask(fs)) + 1);
596
597 /*
598 * Assign disk addresses, and update references to the logical
599 * block and the segment usage information.
600 */
601 for (i = nblocks; i--; ++sp->start_bpp) {
602 sbp = *sp->start_bpp;
603 lbn = lfs_blocks_get(fs, &sp->start_lbp, 0);
604
605 sbp->b_blkno = LFS_FSBTODB(fs, lfs_sb_getoffset(fs));
606
607 /*
608 * If we write a frag in the wrong place, the cleaner won't
609 * be able to correctly identify its size later, and the
610 * segment will be uncleanable. (Even worse, it will assume
611 * that the indirect block that actually ends the list
612 * is of a smaller size!)
613 */
614 if ((sbp->b_bcount & lfs_sb_getbmask(fs)) && i != 0)
615 errx(EXIT_FAILURE, "%s: fragment is not last block", __func__);
616
617 /*
618 * For each subblock in this possibly oversized block,
619 * update its address on disk.
620 */
621 for (bytesleft = sbp->b_bcount; bytesleft > 0;
622 bytesleft -= lfs_sb_getbsize(fs)) {
623 size = MIN(bytesleft, lfs_sb_getbsize(fs));
624 frags = lfs_numfrags(fs, size);
625 lbn = lfs_blocks_get(fs, &sp->start_lbp, 0);
626 lfs_blocks_inc(fs, &sp->start_lbp);
627 lfs_update_single(fs, sp, lbn, lfs_sb_getoffset(fs), size);
628 lfs_sb_addoffset(fs, frags);
629 }
630
631 }
632 }
633
634 /*
635 * Start a new segment.
636 */
637 int
638 lfs_initseg(struct lfs * fs)
639 {
640 struct segment *sp;
641 SEGUSE *sup;
642 SEGSUM *ssp;
643 struct ubuf *bp, *sbp;
644 int repeat;
645
646 sp = fs->lfs_sp;
647
648 repeat = 0;
649
650 /* Advance to the next segment. */
651 if (!LFS_PARTIAL_FITS(fs)) {
652 /* lfs_avail eats the remaining space */
653 lfs_sb_subavail(fs, lfs_sb_getfsbpseg(fs) - (lfs_sb_getoffset(fs) -
654 lfs_sb_getcurseg(fs)));
655 lfs_newseg(fs);
656 repeat = 1;
657 lfs_sb_setoffset(fs, lfs_sb_getcurseg(fs));
658
659 sp->seg_number = lfs_dtosn(fs, lfs_sb_getcurseg(fs));
660 sp->seg_bytes_left = lfs_fsbtob(fs, lfs_sb_getfsbpseg(fs));
661
662 /*
663 * If the segment contains a superblock, update the offset
664 * and summary address to skip over it.
665 */
666 LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
667 if (sup->su_flags & SEGUSE_SUPERBLOCK) {
668 lfs_sb_addoffset(fs, lfs_btofsb(fs, LFS_SBPAD));
669 sp->seg_bytes_left -= LFS_SBPAD;
670 }
671 brelse(bp, 0);
672 /* Segment zero could also contain the labelpad */
673 if (lfs_sb_getversion(fs) > 1 && sp->seg_number == 0 &&
674 lfs_sb_gets0addr(fs) < lfs_btofsb(fs, LFS_LABELPAD)) {
675 lfs_sb_addoffset(fs, lfs_btofsb(fs, LFS_LABELPAD) - lfs_sb_gets0addr(fs));
676 sp->seg_bytes_left -= LFS_LABELPAD - lfs_fsbtob(fs, lfs_sb_gets0addr(fs));
677 }
678 } else {
679 sp->seg_number = lfs_dtosn(fs, lfs_sb_getcurseg(fs));
680 sp->seg_bytes_left = lfs_fsbtob(fs, lfs_sb_getfsbpseg(fs) -
681 (lfs_sb_getoffset(fs) - lfs_sb_getcurseg(fs)));
682 }
683 lfs_sb_setlastpseg(fs, lfs_sb_getoffset(fs));
684
685 sp->fs = fs;
686 sp->ibp = NULL;
687 sp->idp = NULL;
688 sp->ninodes = 0;
689 sp->ndupino = 0;
690
691 /* Get a new buffer for SEGSUM and enter it into the buffer list. */
692 sp->cbpp = sp->bpp;
693 sbp = *sp->cbpp = getblk(fs->lfs_devvp,
694 LFS_FSBTODB(fs, lfs_sb_getoffset(fs)), lfs_sb_getsumsize(fs));
695 sp->segsum = sbp->b_data;
696 memset(sp->segsum, 0, lfs_sb_getsumsize(fs));
697 sp->start_bpp = ++sp->cbpp;
698 lfs_sb_addoffset(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs)));
699
700 /* Set point to SEGSUM, initialize it. */
701 ssp = sp->segsum;
702 lfs_ss_setnext(fs, ssp, lfs_sb_getnextseg(fs));
703 lfs_ss_setnfinfo(fs, ssp, 0);
704 lfs_ss_setninos(fs, ssp, 0);
705 lfs_ss_setmagic(fs, ssp, SS_MAGIC);
706
707 /* Set pointer to first FINFO, initialize it. */
708 sp->fip = SEGSUM_FINFOBASE(fs, ssp);
709 lfs_fi_setnblocks(fs, sp->fip, 0);
710 lfs_blocks_fromfinfo(fs, &sp->start_lbp, sp->fip);
711 lfs_fi_setlastlength(fs, sp->fip, 0);
712
713 sp->seg_bytes_left -= lfs_sb_getsumsize(fs);
714 sp->sum_bytes_left = lfs_sb_getsumsize(fs) - SEGSUM_SIZE(fs);
715
716 LFS_LOCK_BUF(sbp);
717 brelse(sbp, 0);
718 return repeat;
719 }
720
721 /*
722 * Return the next segment to write.
723 */
724 void
725 lfs_newseg(struct lfs * fs)
726 {
727 CLEANERINFO *cip;
728 SEGUSE *sup;
729 struct ubuf *bp;
730 int curseg, isdirty, sn;
731
732 LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, lfs_sb_getnextseg(fs)), bp);
733 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
734 sup->su_nbytes = 0;
735 sup->su_nsums = 0;
736 sup->su_ninos = 0;
737 LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, lfs_sb_getnextseg(fs)), bp);
738
739 LFS_CLEANERINFO(cip, fs, bp);
740 lfs_ci_shiftcleantodirty(fs, cip, 1);
741 lfs_sb_setnclean(fs, lfs_ci_getclean(fs, cip));
742 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
743
744 lfs_sb_setlastseg(fs, lfs_sb_getcurseg(fs));
745 lfs_sb_setcurseg(fs, lfs_sb_getnextseg(fs));
746 for (sn = curseg = lfs_dtosn(fs, lfs_sb_getcurseg(fs)) + lfs_sb_getinterleave(fs);;) {
747 sn = (sn + 1) % lfs_sb_getnseg(fs);
748 if (sn == curseg)
749 errx(EXIT_FAILURE, "%s: no clean segments", __func__);
750 LFS_SEGENTRY(sup, fs, sn, bp);
751 isdirty = sup->su_flags & SEGUSE_DIRTY;
752 brelse(bp, 0);
753
754 if (!isdirty)
755 break;
756 }
757
758 ++fs->lfs_nactive;
759 lfs_sb_setnextseg(fs, lfs_sntod(fs, sn));
760 }
761
762
763 int
764 lfs_writeseg(struct lfs * fs, struct segment * sp)
765 {
766 struct ubuf **bpp, *bp;
767 SEGUSE *sup;
768 SEGSUM *ssp;
769 char *datap, *dp;
770 int i;
771 int do_again, nblocks, byteoffset;
772 size_t el_size;
773 u_short ninos;
774 size_t sumstart;
775 struct uvnode *devvp;
776
777 /*
778 * If there are no buffers other than the segment summary to write
779 * and it is not a checkpoint, don't do anything. On a checkpoint,
780 * even if there aren't any buffers, you need to write the superblock.
781 */
782 nblocks = sp->cbpp - sp->bpp;
783 #if 0
784 printf("write %d blocks at 0x%x\n",
785 nblocks, (int)LFS_DBTOFSB(fs, (*sp->bpp)->b_blkno));
786 #endif
787 if (nblocks == 1)
788 return 0;
789
790 devvp = fs->lfs_devvp;
791
792 /* Update the segment usage information. */
793 LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
794 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
795
796 /* Loop through all blocks, except the segment summary. */
797 for (bpp = sp->bpp; ++bpp < sp->cbpp;) {
798 if ((*bpp)->b_vp != devvp) {
799 sup->su_nbytes += (*bpp)->b_bcount;
800 }
801 assert(lfs_dtosn(fs, LFS_DBTOFSB(fs, (*bpp)->b_blkno)) == sp->seg_number);
802 }
803
804 ssp = (SEGSUM *) sp->segsum;
805 lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) | SS_RFW);
806
807 ninos = (lfs_ss_getninos(fs, ssp) + LFS_INOPB(fs) - 1) / LFS_INOPB(fs);
808 sup->su_nbytes += lfs_ss_getninos(fs, ssp) * DINOSIZE(fs);
809
810 if (lfs_sb_getversion(fs) == 1)
811 sup->su_olastmod = write_time;
812 else
813 sup->su_lastmod = write_time;
814 sup->su_ninos += ninos;
815 ++sup->su_nsums;
816 lfs_sb_adddmeta(fs, (lfs_btofsb(fs, lfs_sb_getsumsize(fs)) + lfs_btofsb(fs, ninos *
817 lfs_sb_getibsize(fs))));
818 lfs_sb_subavail(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs)));
819
820 do_again = !(bp->b_flags & B_GATHERED);
821 LFS_WRITESEGENTRY(sup, fs, sp->seg_number, bp); /* Ifile */
822
823 /*
824 * Compute checksum across data and then across summary; the first
825 * block (the summary block) is skipped. Set the create time here
826 * so that it's guaranteed to be later than the inode mod times.
827 */
828 if (lfs_sb_getversion(fs) == 1)
829 el_size = sizeof(u_long);
830 else
831 el_size = sizeof(u_int32_t);
832 datap = dp = emalloc(nblocks * el_size);
833 for (bpp = sp->bpp, i = nblocks - 1; i--;) {
834 ++bpp;
835 /* Loop through gop_write cluster blocks */
836 for (byteoffset = 0; byteoffset < (*bpp)->b_bcount;
837 byteoffset += lfs_sb_getbsize(fs)) {
838 memcpy(dp, (*bpp)->b_data + byteoffset, el_size);
839 dp += el_size;
840 }
841 bremfree(*bpp);
842 (*bpp)->b_flags |= B_BUSY;
843 }
844 if (lfs_sb_getversion(fs) == 1)
845 lfs_ss_setocreate(fs, ssp, write_time);
846 else {
847 lfs_ss_setcreate(fs, ssp, write_time);
848 lfs_sb_addserial(fs, 1);
849 lfs_ss_setserial(fs, ssp, lfs_sb_getserial(fs));
850 lfs_ss_setident(fs, ssp, lfs_sb_getident(fs));
851 }
852 /* Set the summary block busy too */
853 bremfree(*(sp->bpp));
854 (*(sp->bpp))->b_flags |= B_BUSY;
855
856 lfs_ss_setdatasum(fs, ssp, cksum(datap, (nblocks - 1) * el_size));
857 sumstart = lfs_ss_getsumstart(fs);
858 lfs_ss_setsumsum(fs, ssp,
859 cksum((char *)ssp + sumstart, lfs_sb_getsumsize(fs) - sumstart));
860 free(datap);
861 datap = dp = NULL;
862 lfs_sb_subbfree(fs, (lfs_btofsb(fs, ninos * lfs_sb_getibsize(fs)) +
863 lfs_btofsb(fs, lfs_sb_getsumsize(fs))));
864
865 if (devvp == NULL)
866 errx(EXIT_FAILURE, "devvp is NULL");
867 for (bpp = sp->bpp, i = nblocks; i; bpp++, i--) {
868 bp = *bpp;
869 #if 0
870 printf("i = %d, bp = %p, flags %lx, bn = %" PRIx64 "\n",
871 nblocks - i, bp, bp->b_flags, bp->b_blkno);
872 printf(" vp = %p\n", bp->b_vp);
873 if (bp->b_vp != fs->lfs_devvp)
874 printf(" ino = %d lbn = %" PRId64 "\n",
875 VTOI(bp->b_vp)->i_number, bp->b_lblkno);
876 #endif
877 if (bp->b_vp == fs->lfs_devvp)
878 written_dev += bp->b_bcount;
879 else {
880 if (bp->b_lblkno >= 0)
881 written_data += bp->b_bcount;
882 else
883 written_indir += bp->b_bcount;
884 }
885 bp->b_flags &= ~(B_DELWRI | B_READ | B_GATHERED | B_ERROR |
886 B_LOCKED);
887 bwrite(bp);
888 written_bytes += bp->b_bcount;
889 }
890 written_inodes += ninos;
891
892 return (lfs_initseg(fs) || do_again);
893 }
894
895 /*
896 * Our own copy of shellsort. XXX use qsort or heapsort.
897 */
898 static void
899 lfs_shellsort(struct lfs *fs,
900 struct ubuf ** bp_array, union lfs_blocks *lb_array, int nmemb, int size)
901 {
902 static int __rsshell_increments[] = {4, 1, 0};
903 int incr, *incrp, t1, t2;
904 struct ubuf *bp_temp;
905
906 for (incrp = __rsshell_increments; (incr = *incrp++) != 0;)
907 for (t1 = incr; t1 < nmemb; ++t1)
908 for (t2 = t1 - incr; t2 >= 0;)
909 if ((u_int32_t) bp_array[t2]->b_lblkno >
910 (u_int32_t) bp_array[t2 + incr]->b_lblkno) {
911 bp_temp = bp_array[t2];
912 bp_array[t2] = bp_array[t2 + incr];
913 bp_array[t2 + incr] = bp_temp;
914 t2 -= incr;
915 } else
916 break;
917
918 /* Reform the list of logical blocks */
919 incr = 0;
920 for (t1 = 0; t1 < nmemb; t1++) {
921 for (t2 = 0; t2 * size < bp_array[t1]->b_bcount; t2++) {
922 lfs_blocks_set(fs, lb_array, incr++,
923 bp_array[t1]->b_lblkno + t2);
924 }
925 }
926 }
927
928
929 /*
930 * lfs_seglock --
931 * Single thread the segment writer.
932 */
933 int
934 lfs_seglock(struct lfs * fs, unsigned long flags)
935 {
936 struct segment *sp;
937 size_t allocsize;
938
939 if (fs->lfs_seglock) {
940 ++fs->lfs_seglock;
941 fs->lfs_sp->seg_flags |= flags;
942 return 0;
943 }
944 fs->lfs_seglock = 1;
945
946 sp = fs->lfs_sp = emalloc(sizeof(*sp));
947 allocsize = lfs_sb_getssize(fs) * sizeof(struct ubuf *);
948 sp->bpp = emalloc(allocsize);
949 if (!sp->bpp)
950 err(!preen, "Could not allocate %zu bytes", allocsize);
951 sp->seg_flags = flags;
952 sp->vp = NULL;
953 sp->seg_iocount = 0;
954 (void) lfs_initseg(fs);
955
956 return 0;
957 }
958
959 /*
960 * lfs_segunlock --
961 * Single thread the segment writer.
962 */
963 void
964 lfs_segunlock(struct lfs * fs)
965 {
966 struct segment *sp;
967 struct ubuf *bp;
968
969 sp = fs->lfs_sp;
970
971 if (fs->lfs_seglock == 1) {
972 if (sp->bpp != sp->cbpp) {
973 /* Free allocated segment summary */
974 lfs_sb_suboffset(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs)));
975 bp = *sp->bpp;
976 bremfree(bp);
977 bp->b_flags |= B_DONE | B_INVAL;
978 bp->b_flags &= ~B_DELWRI;
979 reassignbuf(bp, bp->b_vp);
980 bp->b_flags |= B_BUSY; /* XXX */
981 brelse(bp, 0);
982 } else
983 printf("unlock to 0 with no summary");
984
985 free(sp->bpp);
986 sp->bpp = NULL;
987 free(sp);
988 fs->lfs_sp = NULL;
989
990 fs->lfs_nactive = 0;
991
992 /* Since we *know* everything's on disk, write both sbs */
993 lfs_writesuper(fs, lfs_sb_getsboff(fs, 0));
994 lfs_writesuper(fs, lfs_sb_getsboff(fs, 1));
995
996 --fs->lfs_seglock;
997 fs->lfs_lockpid = 0;
998 } else if (fs->lfs_seglock == 0) {
999 errx(EXIT_FAILURE, "Seglock not held");
1000 } else {
1001 --fs->lfs_seglock;
1002 }
1003 }
1004
1005 int
1006 lfs_writevnodes(struct lfs *fs, struct segment *sp, int op)
1007 {
1008 struct inode *ip;
1009 struct uvnode *vp;
1010 int inodes_written = 0;
1011
1012 LIST_FOREACH(vp, &vnodelist, v_mntvnodes) {
1013 if (vp->v_bmap_op != lfs_vop_bmap)
1014 continue;
1015
1016 ip = VTOI(vp);
1017
1018 if ((op == VN_DIROP && !(vp->v_uflag & VU_DIROP)) ||
1019 (op != VN_DIROP && (vp->v_uflag & VU_DIROP))) {
1020 continue;
1021 }
1022 /*
1023 * Write the inode/file if dirty and it's not the IFILE.
1024 */
1025 if (ip->i_flag & IN_ALLMOD || !LIST_EMPTY(&vp->v_dirtyblkhd)) {
1026 if (ip->i_number != LFS_IFILE_INUM)
1027 lfs_writefile(fs, sp, vp);
1028 (void) lfs_writeinode(fs, sp, ip);
1029 inodes_written++;
1030 }
1031 }
1032 return inodes_written;
1033 }
1034
1035 void
1036 lfs_writesuper(struct lfs *fs, ulfs_daddr_t daddr)
1037 {
1038 struct ubuf *bp;
1039
1040 /* Set timestamp of this version of the superblock */
1041 if (lfs_sb_getversion(fs) == 1)
1042 lfs_sb_setotstamp(fs, write_time);
1043 lfs_sb_settstamp(fs, write_time);
1044
1045 __CTASSERT(sizeof(struct dlfs) == sizeof(struct dlfs64));
1046
1047 /* Checksum the superblock and copy it into a buffer. */
1048 lfs_sb_setcksum(fs, lfs_sb_cksum(fs));
1049 assert(daddr > 0);
1050 bp = getblk(fs->lfs_devvp, LFS_FSBTODB(fs, daddr), LFS_SBPAD);
1051 memcpy(bp->b_data, &fs->lfs_dlfs_u, sizeof(struct dlfs));
1052 memset(bp->b_data + sizeof(struct dlfs), 0,
1053 LFS_SBPAD - sizeof(struct dlfs));
1054
1055 bwrite(bp);
1056 }
1057