lfs_segment.c revision 1.68.2.1 1 /* $NetBSD: lfs_segment.c,v 1.68.2.1 2001/06/27 03:49:40 perseant Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38 /*
39 * Copyright (c) 1991, 1993
40 * The Regents of the University of California. All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. All advertising materials mentioning features or use of this software
51 * must display the following acknowledgement:
52 * This product includes software developed by the University of
53 * California, Berkeley and its contributors.
54 * 4. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * @(#)lfs_segment.c 8.10 (Berkeley) 6/10/95
71 */
72
73 #define ivndebug(vp,str) printf("ino %d: %s\n",VTOI(vp)->i_number,(str))
74
75 #if defined(_KERNEL_OPT)
76 #include "opt_ddb.h"
77 #endif
78
79 #include <sys/param.h>
80 #include <sys/systm.h>
81 #include <sys/namei.h>
82 #include <sys/kernel.h>
83 #include <sys/resourcevar.h>
84 #include <sys/file.h>
85 #include <sys/stat.h>
86 #include <sys/buf.h>
87 #include <sys/proc.h>
88 #include <sys/conf.h>
89 #include <sys/vnode.h>
90 #include <sys/malloc.h>
91 #include <sys/mount.h>
92
93 #include <miscfs/specfs/specdev.h>
94 #include <miscfs/fifofs/fifo.h>
95
96 #include <ufs/ufs/quota.h>
97 #include <ufs/ufs/inode.h>
98 #include <ufs/ufs/dir.h>
99 #include <ufs/ufs/ufsmount.h>
100 #include <ufs/ufs/ufs_extern.h>
101
102 #include <ufs/lfs/lfs.h>
103 #include <ufs/lfs/lfs_extern.h>
104
105 extern int count_lock_queue __P((void));
106 extern struct simplelock vnode_free_list_slock; /* XXX */
107
108 /*
109 * Determine if it's OK to start a partial in this segment, or if we need
110 * to go on to a new segment.
111 */
112 #define LFS_PARTIAL_FITS(fs) \
113 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \
114 1 << (fs)->lfs_fsbtodb)
115
116 void lfs_callback __P((struct buf *));
117 int lfs_gather __P((struct lfs *, struct segment *,
118 struct vnode *, int (*) __P((struct lfs *, struct buf *))));
119 int lfs_gatherblock __P((struct segment *, struct buf *, int *));
120 void lfs_iset __P((struct inode *, ufs_daddr_t, time_t));
121 int lfs_match_fake __P((struct lfs *, struct buf *));
122 int lfs_match_data __P((struct lfs *, struct buf *));
123 int lfs_match_dindir __P((struct lfs *, struct buf *));
124 int lfs_match_indir __P((struct lfs *, struct buf *));
125 int lfs_match_tindir __P((struct lfs *, struct buf *));
126 void lfs_newseg __P((struct lfs *));
127 void lfs_shellsort __P((struct buf **, ufs_daddr_t *, int));
128 void lfs_supercallback __P((struct buf *));
129 void lfs_updatemeta __P((struct segment *));
130 int lfs_vref __P((struct vnode *));
131 void lfs_vunref __P((struct vnode *));
132 void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *));
133 int lfs_writeinode __P((struct lfs *, struct segment *, struct inode *));
134 int lfs_writeseg __P((struct lfs *, struct segment *));
135 void lfs_writesuper __P((struct lfs *, daddr_t));
136 int lfs_writevnodes __P((struct lfs *fs, struct mount *mp,
137 struct segment *sp, int dirops));
138
139 int lfs_allclean_wakeup; /* Cleaner wakeup address. */
140 int lfs_writeindir = 1; /* whether to flush indir on non-ckp */
141 int lfs_clean_vnhead = 0; /* Allow freeing to head of vn list */
142 int lfs_dirvcount = 0; /* # active dirops */
143
144 /* Statistics Counters */
145 int lfs_dostats = 1;
146 struct lfs_stats lfs_stats;
147
148 extern int locked_queue_count;
149 extern long locked_queue_bytes;
150
151 /* op values to lfs_writevnodes */
152 #define VN_REG 0
153 #define VN_DIROP 1
154 #define VN_EMPTY 2
155 #define VN_CLEAN 3
156
157 #define LFS_MAX_ACTIVE 10
158
159 /*
160 * XXX KS - Set modification time on the Ifile, so the cleaner can
161 * read the fs mod time off of it. We don't set IN_UPDATE here,
162 * since we don't really need this to be flushed to disk (and in any
163 * case that wouldn't happen to the Ifile until we checkpoint).
164 */
165 void
166 lfs_imtime(fs)
167 struct lfs *fs;
168 {
169 struct timespec ts;
170 struct inode *ip;
171
172 TIMEVAL_TO_TIMESPEC(&time, &ts);
173 ip = VTOI(fs->lfs_ivnode);
174 ip->i_ffs_mtime = ts.tv_sec;
175 ip->i_ffs_mtimensec = ts.tv_nsec;
176 }
177
178 /*
179 * Ifile and meta data blocks are not marked busy, so segment writes MUST be
180 * single threaded. Currently, there are two paths into lfs_segwrite, sync()
181 * and getnewbuf(). They both mark the file system busy. Lfs_vflush()
182 * explicitly marks the file system busy. So lfs_segwrite is safe. I think.
183 */
184
185 #define SET_FLUSHING(fs,vp) (fs)->lfs_flushvp = (vp)
186 #define IS_FLUSHING(fs,vp) ((fs)->lfs_flushvp == (vp))
187 #define CLR_FLUSHING(fs,vp) (fs)->lfs_flushvp = NULL
188
189 int
190 lfs_vflush(vp)
191 struct vnode *vp;
192 {
193 struct inode *ip;
194 struct lfs *fs;
195 struct segment *sp;
196 struct buf *bp, *nbp, *tbp, *tnbp;
197 int error, s;
198
199 ip = VTOI(vp);
200 fs = VFSTOUFS(vp->v_mount)->um_lfs;
201
202 if(ip->i_flag & IN_CLEANING) {
203 #ifdef DEBUG_LFS
204 ivndebug(vp,"vflush/in_cleaning");
205 #endif
206 LFS_CLR_UINO(ip, IN_CLEANING);
207 LFS_SET_UINO(ip, IN_MODIFIED);
208
209 /*
210 * Toss any cleaning buffers that have real counterparts
211 * to avoid losing new data
212 */
213 s = splbio();
214 for(bp=vp->v_dirtyblkhd.lh_first; bp; bp=nbp) {
215 nbp = bp->b_vnbufs.le_next;
216 if(bp->b_flags & B_CALL) {
217 for(tbp=vp->v_dirtyblkhd.lh_first; tbp;
218 tbp=tnbp)
219 {
220 tnbp = tbp->b_vnbufs.le_next;
221 if(tbp->b_vp == bp->b_vp
222 && tbp->b_lblkno == bp->b_lblkno
223 && tbp != bp)
224 {
225 fs->lfs_avail += btodb(bp->b_bcount);
226 wakeup(&fs->lfs_avail);
227 lfs_freebuf(bp);
228 bp = NULL;
229 break;
230 }
231 }
232 }
233 }
234 splx(s);
235 }
236
237 /* If the node is being written, wait until that is done */
238 if(WRITEINPROG(vp)) {
239 #ifdef DEBUG_LFS
240 ivndebug(vp,"vflush/writeinprog");
241 #endif
242 tsleep(vp, PRIBIO+1, "lfs_vw", 0);
243 }
244
245 /* Protect against VXLOCK deadlock in vinvalbuf() */
246 lfs_seglock(fs, SEGM_SYNC);
247
248 /* If we're supposed to flush a freed inode, just toss it */
249 /* XXX - seglock, so these buffers can't be gathered, right? */
250 if(ip->i_ffs_mode == 0) {
251 printf("lfs_vflush: ino %d is freed, not flushing\n",
252 ip->i_number);
253 s = splbio();
254 for(bp=vp->v_dirtyblkhd.lh_first; bp; bp=nbp) {
255 nbp = bp->b_vnbufs.le_next;
256 if (bp->b_flags & B_DELWRI) { /* XXX always true? */
257 fs->lfs_avail += btodb(bp->b_bcount);
258 wakeup(&fs->lfs_avail);
259 }
260 /* Copied from lfs_writeseg */
261 if (bp->b_flags & B_CALL) {
262 /* if B_CALL, it was created with newbuf */
263 lfs_freebuf(bp);
264 bp = NULL;
265 } else {
266 bremfree(bp);
267 LFS_UNLOCK_BUF(bp);
268 bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI |
269 B_GATHERED);
270 bp->b_flags |= B_DONE;
271 reassignbuf(bp, vp);
272 brelse(bp);
273 }
274 }
275 splx(s);
276 LFS_CLR_UINO(ip, IN_CLEANING);
277 LFS_CLR_UINO(ip, IN_MODIFIED | IN_ACCESSED);
278 ip->i_flag &= ~IN_ALLMOD;
279 printf("lfs_vflush: done not flushing ino %d\n",
280 ip->i_number);
281 lfs_segunlock(fs);
282 return 0;
283 }
284
285 SET_FLUSHING(fs,vp);
286 if (fs->lfs_nactive > LFS_MAX_ACTIVE) {
287 error = lfs_segwrite(vp->v_mount, SEGM_SYNC|SEGM_CKP);
288 CLR_FLUSHING(fs,vp);
289 lfs_segunlock(fs);
290 return error;
291 }
292 sp = fs->lfs_sp;
293
294 if (vp->v_dirtyblkhd.lh_first == NULL) {
295 lfs_writevnodes(fs, vp->v_mount, sp, VN_EMPTY);
296 } else if((ip->i_flag & IN_CLEANING) &&
297 (fs->lfs_sp->seg_flags & SEGM_CLEAN)) {
298 #ifdef DEBUG_LFS
299 ivndebug(vp,"vflush/clean");
300 #endif
301 lfs_writevnodes(fs, vp->v_mount, sp, VN_CLEAN);
302 }
303 else if(lfs_dostats) {
304 if(vp->v_dirtyblkhd.lh_first || (VTOI(vp)->i_flag & IN_ALLMOD))
305 ++lfs_stats.vflush_invoked;
306 #ifdef DEBUG_LFS
307 ivndebug(vp,"vflush");
308 #endif
309 }
310
311 #ifdef DIAGNOSTIC
312 /* XXX KS This actually can happen right now, though it shouldn't(?) */
313 if(vp->v_flag & VDIROP) {
314 printf("lfs_vflush: flushing VDIROP, this shouldn\'t be\n");
315 /* panic("VDIROP being flushed...this can\'t happen"); */
316 }
317 if(vp->v_usecount<0) {
318 printf("usecount=%ld\n", (long)vp->v_usecount);
319 panic("lfs_vflush: usecount<0");
320 }
321 #endif
322
323 do {
324 do {
325 if (vp->v_dirtyblkhd.lh_first != NULL)
326 lfs_writefile(fs, sp, vp);
327 } while (lfs_writeinode(fs, sp, ip));
328 } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM);
329
330 if(lfs_dostats) {
331 ++lfs_stats.nwrites;
332 if (sp->seg_flags & SEGM_SYNC)
333 ++lfs_stats.nsync_writes;
334 if (sp->seg_flags & SEGM_CKP)
335 ++lfs_stats.ncheckpoints;
336 }
337 lfs_segunlock(fs);
338
339 CLR_FLUSHING(fs,vp);
340 return (0);
341 }
342
343 #ifdef DEBUG_LFS_VERBOSE
344 # define vndebug(vp,str) if(VTOI(vp)->i_flag & IN_CLEANING) printf("not writing ino %d because %s (op %d)\n",VTOI(vp)->i_number,(str),op)
345 #else
346 # define vndebug(vp,str)
347 #endif
348
349 int
350 lfs_writevnodes(fs, mp, sp, op)
351 struct lfs *fs;
352 struct mount *mp;
353 struct segment *sp;
354 int op;
355 {
356 struct inode *ip;
357 struct vnode *vp;
358 int inodes_written=0, only_cleaning;
359 int needs_unlock;
360
361 #ifndef LFS_NO_BACKVP_HACK
362 /* BEGIN HACK */
363 #define VN_OFFSET (((caddr_t)&vp->v_mntvnodes.le_next) - (caddr_t)vp)
364 #define BACK_VP(VP) ((struct vnode *)(((caddr_t)VP->v_mntvnodes.le_prev) - VN_OFFSET))
365 #define BEG_OF_VLIST ((struct vnode *)(((caddr_t)&mp->mnt_vnodelist.lh_first) - VN_OFFSET))
366
367 /* Find last vnode. */
368 loop: for (vp = mp->mnt_vnodelist.lh_first;
369 vp && vp->v_mntvnodes.le_next != NULL;
370 vp = vp->v_mntvnodes.le_next);
371 for (; vp && vp != BEG_OF_VLIST; vp = BACK_VP(vp)) {
372 #else
373 loop:
374 for (vp = mp->mnt_vnodelist.lh_first;
375 vp != NULL;
376 vp = vp->v_mntvnodes.le_next) {
377 #endif
378 /*
379 * If the vnode that we are about to sync is no longer
380 * associated with this mount point, start over.
381 */
382 if (vp->v_mount != mp) {
383 printf("lfs_writevnodes: starting over\n");
384 goto loop;
385 }
386
387 ip = VTOI(vp);
388 if ((op == VN_DIROP && !(vp->v_flag & VDIROP)) ||
389 (op != VN_DIROP && op != VN_CLEAN && (vp->v_flag & VDIROP))) {
390 vndebug(vp,"dirop");
391 continue;
392 }
393
394 if (op == VN_EMPTY && vp->v_dirtyblkhd.lh_first) {
395 vndebug(vp,"empty");
396 continue;
397 }
398
399 if (vp->v_type == VNON) {
400 continue;
401 }
402
403 if(op == VN_CLEAN && ip->i_number != LFS_IFILE_INUM
404 && vp != fs->lfs_flushvp
405 && !(ip->i_flag & IN_CLEANING)) {
406 vndebug(vp,"cleaning");
407 continue;
408 }
409
410 if (lfs_vref(vp)) {
411 vndebug(vp,"vref");
412 continue;
413 }
414
415 needs_unlock = 0;
416 if (VOP_ISLOCKED(vp)) {
417 if (vp != fs->lfs_ivnode &&
418 vp->v_lock.lk_lockholder != curproc->p_pid) {
419 #ifdef DEBUG_LFS
420 printf("lfs_writevnodes: not writing ino %d,"
421 " locked by pid %d\n",
422 VTOI(vp)->i_number,
423 vp->v_lock.lk_lockholder);
424 #endif
425 lfs_vunref(vp);
426 continue;
427 }
428 } else if (vp != fs->lfs_ivnode) {
429 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
430 needs_unlock = 1;
431 }
432
433 only_cleaning = 0;
434 /*
435 * Write the inode/file if dirty and it's not the IFILE.
436 */
437 if ((ip->i_flag & IN_ALLMOD) ||
438 (vp->v_dirtyblkhd.lh_first != NULL))
439 {
440 only_cleaning = ((ip->i_flag & IN_ALLMOD)==IN_CLEANING);
441
442 if(ip->i_number != LFS_IFILE_INUM
443 && vp->v_dirtyblkhd.lh_first != NULL)
444 {
445 lfs_writefile(fs, sp, vp);
446 }
447 if(vp->v_dirtyblkhd.lh_first != NULL) {
448 if(WRITEINPROG(vp)) {
449 #ifdef DEBUG_LFS
450 ivndebug(vp,"writevnodes/write2");
451 #endif
452 } else if(!(ip->i_flag & IN_ALLMOD)) {
453 #ifdef DEBUG_LFS
454 printf("<%d>",ip->i_number);
455 #endif
456 LFS_SET_UINO(ip, IN_MODIFIED);
457 }
458 }
459 (void) lfs_writeinode(fs, sp, ip);
460 inodes_written++;
461 }
462
463 if (needs_unlock)
464 VOP_UNLOCK(vp, 0);
465
466 if (lfs_clean_vnhead && only_cleaning)
467 lfs_vunref_head(vp);
468 else
469 lfs_vunref(vp);
470 }
471 return inodes_written;
472 }
473
474 int
475 lfs_segwrite(mp, flags)
476 struct mount *mp;
477 int flags; /* Do a checkpoint. */
478 {
479 struct buf *bp;
480 struct inode *ip;
481 struct lfs *fs;
482 struct segment *sp;
483 struct vnode *vp;
484 SEGUSE *segusep;
485 ufs_daddr_t ibno;
486 int do_ckp, did_ckp, error, i;
487 int writer_set = 0;
488 int dirty;
489
490 fs = VFSTOUFS(mp)->um_lfs;
491
492 if (fs->lfs_ronly)
493 return EROFS;
494
495 lfs_imtime(fs);
496
497 /* printf("lfs_segwrite: ifile flags are 0x%lx\n",
498 (long)(VTOI(fs->lfs_ivnode)->i_flag)); */
499
500 #if 0
501 /*
502 * If we are not the cleaner, and there is no space available,
503 * wait until cleaner writes.
504 */
505 if(!(flags & SEGM_CLEAN) && !(fs->lfs_seglock && fs->lfs_sp &&
506 (fs->lfs_sp->seg_flags & SEGM_CLEAN)))
507 {
508 while (fs->lfs_avail <= 0) {
509 LFS_CLEANERINFO(cip, fs, bp);
510 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0);
511
512 wakeup(&lfs_allclean_wakeup);
513 wakeup(&fs->lfs_nextseg);
514 error = tsleep(&fs->lfs_avail, PRIBIO + 1, "lfs_av2",
515 0);
516 if (error) {
517 return (error);
518 }
519 }
520 }
521 #endif
522 /*
523 * Allocate a segment structure and enough space to hold pointers to
524 * the maximum possible number of buffers which can be described in a
525 * single summary block.
526 */
527 do_ckp = (flags & SEGM_CKP) || fs->lfs_nactive > LFS_MAX_ACTIVE;
528 lfs_seglock(fs, flags | (do_ckp ? SEGM_CKP : 0));
529 sp = fs->lfs_sp;
530
531 /*
532 * If lfs_flushvp is non-NULL, we are called from lfs_vflush,
533 * in which case we have to flush *all* buffers off of this vnode.
534 * We don't care about other nodes, but write any non-dirop nodes
535 * anyway in anticipation of another getnewvnode().
536 *
537 * If we're cleaning we only write cleaning and ifile blocks, and
538 * no dirops, since otherwise we'd risk corruption in a crash.
539 */
540 if(sp->seg_flags & SEGM_CLEAN)
541 lfs_writevnodes(fs, mp, sp, VN_CLEAN);
542 else {
543 lfs_writevnodes(fs, mp, sp, VN_REG);
544 if(!fs->lfs_dirops || !fs->lfs_flushvp) {
545 while(fs->lfs_dirops)
546 if((error = tsleep(&fs->lfs_writer, PRIBIO + 1,
547 "lfs writer", 0)))
548 {
549 /* XXX why not segunlock? */
550 free(sp->bpp, M_SEGMENT);
551 sp->bpp = NULL;
552 free(sp, M_SEGMENT);
553 fs->lfs_sp = NULL;
554 return (error);
555 }
556 fs->lfs_writer++;
557 writer_set=1;
558 lfs_writevnodes(fs, mp, sp, VN_DIROP);
559 ((SEGSUM *)(sp->segsum))->ss_flags &= ~(SS_CONT);
560 }
561 }
562
563 /*
564 * If we are doing a checkpoint, mark everything since the
565 * last checkpoint as no longer ACTIVE.
566 */
567 if (do_ckp) {
568 for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz;
569 --ibno >= fs->lfs_cleansz; ) {
570 dirty = 0;
571 if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize, NOCRED, &bp))
572
573 panic("lfs_segwrite: ifile read");
574 segusep = (SEGUSE *)bp->b_data;
575 for (i = fs->lfs_sepb; i--;) {
576 if (segusep->su_flags & SEGUSE_ACTIVE) {
577 segusep->su_flags &= ~SEGUSE_ACTIVE;
578 ++dirty;
579 }
580 if (fs->lfs_version > 1)
581 ++segusep;
582 else
583 segusep = (SEGUSE *)
584 ((SEGUSE_V1 *)segusep + 1);
585 }
586
587 /* But the current segment is still ACTIVE */
588 segusep = (SEGUSE *)bp->b_data;
589 if (datosn(fs, fs->lfs_curseg) / fs->lfs_sepb ==
590 (ibno-fs->lfs_cleansz)) {
591 if (fs->lfs_version > 1)
592 segusep[datosn(fs, fs->lfs_curseg) %
593 fs->lfs_sepb].su_flags |=
594 SEGUSE_ACTIVE;
595 else
596 ((SEGUSE *)
597 ((SEGUSE_V1 *)(bp->b_data) +
598 (datosn(fs, fs->lfs_curseg) %
599 fs->lfs_sepb)))->su_flags
600 |= SEGUSE_ACTIVE;
601 --dirty;
602 }
603 if (dirty)
604 error = VOP_BWRITE(bp); /* Ifile */
605 else
606 brelse(bp);
607 }
608 }
609
610 did_ckp = 0;
611 if (do_ckp || fs->lfs_doifile) {
612 do {
613 vp = fs->lfs_ivnode;
614
615 vget(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
616
617 ip = VTOI(vp);
618 if (vp->v_dirtyblkhd.lh_first != NULL)
619 lfs_writefile(fs, sp, vp);
620 if (ip->i_flag & IN_ALLMOD)
621 ++did_ckp;
622 (void) lfs_writeinode(fs, sp, ip);
623
624 vput(vp);
625 } while (lfs_writeseg(fs, sp) && do_ckp);
626
627 /* The ifile should now be all clear */
628 LFS_CLR_UINO(ip, IN_ALLMOD);
629 } else {
630 (void) lfs_writeseg(fs, sp);
631 }
632
633 /*
634 * If the I/O count is non-zero, sleep until it reaches zero.
635 * At the moment, the user's process hangs around so we can
636 * sleep.
637 */
638 fs->lfs_doifile = 0;
639 if(writer_set && --fs->lfs_writer==0)
640 wakeup(&fs->lfs_dirops);
641
642 /*
643 * If we didn't write the Ifile, we didn't really do anything.
644 * That means that (1) there is a checkpoint on disk and (2)
645 * nothing has changed since it was written.
646 *
647 * Take the flags off of the segment so that lfs_segunlock
648 * doesn't have to write the superblock either.
649 */
650 if (did_ckp == 0) {
651 sp->seg_flags &= ~(SEGM_SYNC|SEGM_CKP);
652 /* if(do_ckp) printf("lfs_segwrite: no checkpoint\n"); */
653 }
654
655 if(lfs_dostats) {
656 ++lfs_stats.nwrites;
657 if (sp->seg_flags & SEGM_SYNC)
658 ++lfs_stats.nsync_writes;
659 if (sp->seg_flags & SEGM_CKP)
660 ++lfs_stats.ncheckpoints;
661 }
662 lfs_segunlock(fs);
663 return (0);
664 }
665
666 /*
667 * Write the dirty blocks associated with a vnode.
668 */
669 void
670 lfs_writefile(fs, sp, vp)
671 struct lfs *fs;
672 struct segment *sp;
673 struct vnode *vp;
674 {
675 struct buf *bp;
676 struct finfo *fip;
677 IFILE *ifp;
678
679
680 if (sp->seg_bytes_left < fs->lfs_bsize ||
681 sp->sum_bytes_left < sizeof(struct finfo))
682 (void) lfs_writeseg(fs, sp);
683
684 sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(ufs_daddr_t);
685 ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
686
687 if(vp->v_flag & VDIROP)
688 ((SEGSUM *)(sp->segsum))->ss_flags |= (SS_DIROP|SS_CONT);
689
690 fip = sp->fip;
691 fip->fi_nblocks = 0;
692 fip->fi_ino = VTOI(vp)->i_number;
693 LFS_IENTRY(ifp, fs, fip->fi_ino, bp);
694 fip->fi_version = ifp->if_version;
695 brelse(bp);
696
697 if(sp->seg_flags & SEGM_CLEAN)
698 {
699 lfs_gather(fs, sp, vp, lfs_match_fake);
700 /*
701 * For a file being flushed, we need to write *all* blocks.
702 * This means writing the cleaning blocks first, and then
703 * immediately following with any non-cleaning blocks.
704 * The same is true of the Ifile since checkpoints assume
705 * that all valid Ifile blocks are written.
706 */
707 if(IS_FLUSHING(fs,vp) || VTOI(vp)->i_number == LFS_IFILE_INUM)
708 lfs_gather(fs, sp, vp, lfs_match_data);
709 } else
710 lfs_gather(fs, sp, vp, lfs_match_data);
711
712 /*
713 * It may not be necessary to write the meta-data blocks at this point,
714 * as the roll-forward recovery code should be able to reconstruct the
715 * list.
716 *
717 * We have to write them anyway, though, under two conditions: (1) the
718 * vnode is being flushed (for reuse by vinvalbuf); or (2) we are
719 * checkpointing.
720 */
721 if(lfs_writeindir
722 || IS_FLUSHING(fs,vp)
723 || (sp->seg_flags & SEGM_CKP))
724 {
725 lfs_gather(fs, sp, vp, lfs_match_indir);
726 lfs_gather(fs, sp, vp, lfs_match_dindir);
727 lfs_gather(fs, sp, vp, lfs_match_tindir);
728 }
729 fip = sp->fip;
730 if (fip->fi_nblocks != 0) {
731 sp->fip = (FINFO*)((caddr_t)fip + sizeof(struct finfo) +
732 sizeof(ufs_daddr_t) * (fip->fi_nblocks-1));
733 sp->start_lbp = &sp->fip->fi_blocks[0];
734 } else {
735 sp->sum_bytes_left += sizeof(FINFO) - sizeof(ufs_daddr_t);
736 --((SEGSUM *)(sp->segsum))->ss_nfinfo;
737 }
738 }
739
740 int
741 lfs_writeinode(fs, sp, ip)
742 struct lfs *fs;
743 struct segment *sp;
744 struct inode *ip;
745 {
746 struct buf *bp, *ibp;
747 struct dinode *cdp;
748 IFILE *ifp;
749 SEGUSE *sup;
750 ufs_daddr_t daddr;
751 daddr_t *daddrp;
752 ino_t ino;
753 int error, i, ndx, sec = 0;
754 int redo_ifile = 0;
755 struct timespec ts;
756 int gotblk = 0;
757
758 if (!(ip->i_flag & IN_ALLMOD))
759 return(0);
760
761 /* Allocate a new inode block if necessary. */
762 if ((ip->i_number != LFS_IFILE_INUM || sp->idp==NULL) && sp->ibp == NULL) {
763 /* Allocate a new segment if necessary. */
764 if (sp->seg_bytes_left < fs->lfs_bsize ||
765 sp->sum_bytes_left < sizeof(ufs_daddr_t))
766 (void) lfs_writeseg(fs, sp);
767
768 /* Get next inode block. */
769 daddr = fs->lfs_offset;
770 fs->lfs_offset += btodb(fs->lfs_ibsize);
771 sp->ibp = *sp->cbpp++ =
772 getblk(VTOI(fs->lfs_ivnode)->i_devvp, daddr,
773 fs->lfs_ibsize, 0, 0);
774 gotblk++;
775
776 /* Zero out inode numbers */
777 for (i = 0; i < INOPB(fs); ++i)
778 ((struct dinode *)sp->ibp->b_data)[i].di_inumber = 0;
779
780 ++sp->start_bpp;
781 fs->lfs_avail -= btodb(fs->lfs_ibsize);
782 /* Set remaining space counters. */
783 sp->seg_bytes_left -= fs->lfs_ibsize;
784 sp->sum_bytes_left -= sizeof(ufs_daddr_t);
785 ndx = fs->lfs_sumsize / sizeof(ufs_daddr_t) -
786 sp->ninodes / INOPB(fs) - 1;
787 ((ufs_daddr_t *)(sp->segsum))[ndx] = daddr;
788 }
789
790 /* Update the inode times and copy the inode onto the inode page. */
791 TIMEVAL_TO_TIMESPEC(&time, &ts);
792 LFS_ITIMES(ip, &ts, &ts, &ts);
793
794 /*
795 * If this is the Ifile, and we've already written the Ifile in this
796 * partial segment, just overwrite it (it's not on disk yet) and
797 * continue.
798 *
799 * XXX we know that the bp that we get the second time around has
800 * already been gathered.
801 */
802 if(ip->i_number == LFS_IFILE_INUM && sp->idp) {
803 *(sp->idp) = ip->i_din.ffs_din;
804 return 0;
805 }
806
807 bp = sp->ibp;
808 cdp = ((struct dinode *)bp->b_data) + (sp->ninodes % INOPB(fs));
809 *cdp = ip->i_din.ffs_din;
810 if (fs->lfs_version > 1)
811 sec = (sp->ninodes % INOPB(fs)) / INOPS(fs);
812
813 /*
814 * If we are cleaning, ensure that we don't write UNWRITTEN disk
815 * addresses to disk.
816 */
817 if (ip->i_lfs_effnblks != ip->i_ffs_blocks) {
818 #ifdef DEBUG_LFS
819 printf("lfs_writeinode: cleansing ino %d (%d != %d)\n",
820 ip->i_number, ip->i_lfs_effnblks, ip->i_ffs_blocks);
821 #endif
822 for (daddrp = cdp->di_db; daddrp < cdp->di_ib + NIADDR;
823 daddrp++) {
824 if (*daddrp == UNWRITTEN) {
825 #ifdef DEBUG_LFS
826 printf("lfs_writeinode: wiping UNWRITTEN\n");
827 #endif
828 *daddrp = 0;
829 }
830 }
831 }
832
833 if(ip->i_flag & IN_CLEANING)
834 LFS_CLR_UINO(ip, IN_CLEANING);
835 else {
836 /* XXX IN_ALLMOD */
837 LFS_CLR_UINO(ip, IN_ACCESSED | IN_ACCESS | IN_CHANGE |
838 IN_UPDATE);
839 if (ip->i_lfs_effnblks == ip->i_ffs_blocks)
840 LFS_CLR_UINO(ip, IN_MODIFIED);
841 #ifdef DEBUG_LFS
842 else
843 printf("lfs_writeinode: ino %d: real blks=%d, "
844 "eff=%d\n", ip->i_number, ip->i_ffs_blocks,
845 ip->i_lfs_effnblks);
846 #endif
847 }
848
849 if(ip->i_number == LFS_IFILE_INUM) /* We know sp->idp == NULL */
850 sp->idp = ((struct dinode *)bp->b_data) +
851 (sp->ninodes % INOPB(fs));
852 if(gotblk) {
853 LFS_LOCK_BUF(bp);
854 brelse(bp);
855 }
856
857 /* Increment inode count in segment summary block. */
858 ++((SEGSUM *)(sp->segsum))->ss_ninos;
859
860 /* If this page is full, set flag to allocate a new page. */
861 if (++sp->ninodes % INOPB(fs) == 0)
862 sp->ibp = NULL;
863
864 /*
865 * If updating the ifile, update the super-block. Update the disk
866 * address and access times for this inode in the ifile.
867 */
868 ino = ip->i_number;
869 if (ino == LFS_IFILE_INUM) {
870 daddr = fs->lfs_idaddr;
871 fs->lfs_idaddr = bp->b_blkno;
872 } else {
873 LFS_IENTRY(ifp, fs, ino, ibp);
874 daddr = ifp->if_daddr;
875 ifp->if_daddr = bp->b_blkno + sec;
876 #ifdef LFS_DEBUG_NEXTFREE
877 if(ino > 3 && ifp->if_nextfree) {
878 vprint("lfs_writeinode",ITOV(ip));
879 printf("lfs_writeinode: updating free ino %d\n",
880 ip->i_number);
881 }
882 #endif
883 error = VOP_BWRITE(ibp); /* Ifile */
884 }
885
886 /*
887 * Account the inode: it no longer belongs to its former segment,
888 * though it will not belong to the new segment until that segment
889 * is actually written.
890 */
891 #ifdef DEBUG
892 /*
893 * The inode's last address should not be in the current partial
894 * segment, except under exceptional circumstances (lfs_writevnodes
895 * had to start over, and in the meantime more blocks were written
896 * to a vnode). Although the previous inode won't be accounted in
897 * su_nbytes until lfs_writeseg, this shouldn't be a problem as we
898 * have more data blocks in the current partial segment.
899 */
900 if (daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)
901 printf("lfs_writeinode: last inode addr in current pseg "
902 "(ino %d daddr 0x%x)\n", ino, daddr);
903 #endif
904 if (daddr != LFS_UNUSED_DADDR) {
905 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
906 #ifdef DIAGNOSTIC
907 if (sup->su_nbytes < DINODE_SIZE) {
908 printf("lfs_writeinode: negative bytes "
909 "(segment %d short by %d)\n",
910 datosn(fs, daddr),
911 (int)DINODE_SIZE - sup->su_nbytes);
912 panic("lfs_writeinode: negative bytes");
913 sup->su_nbytes = DINODE_SIZE;
914 }
915 #endif
916 sup->su_nbytes -= DINODE_SIZE;
917 redo_ifile =
918 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED));
919 error = VOP_BWRITE(bp); /* Ifile */
920 }
921 return (redo_ifile);
922 }
923
924 int
925 lfs_gatherblock(sp, bp, sptr)
926 struct segment *sp;
927 struct buf *bp;
928 int *sptr;
929 {
930 struct lfs *fs;
931 int version;
932
933 /*
934 * If full, finish this segment. We may be doing I/O, so
935 * release and reacquire the splbio().
936 */
937 #ifdef DIAGNOSTIC
938 if (sp->vp == NULL)
939 panic ("lfs_gatherblock: Null vp in segment");
940 #endif
941 fs = sp->fs;
942 if (sp->sum_bytes_left < sizeof(ufs_daddr_t) ||
943 sp->seg_bytes_left < bp->b_bcount) {
944 if (sptr)
945 splx(*sptr);
946 lfs_updatemeta(sp);
947
948 version = sp->fip->fi_version;
949 (void) lfs_writeseg(fs, sp);
950
951 sp->fip->fi_version = version;
952 sp->fip->fi_ino = VTOI(sp->vp)->i_number;
953 /* Add the current file to the segment summary. */
954 ++((SEGSUM *)(sp->segsum))->ss_nfinfo;
955 sp->sum_bytes_left -=
956 sizeof(struct finfo) - sizeof(ufs_daddr_t);
957
958 if (sptr)
959 *sptr = splbio();
960 return(1);
961 }
962
963 #ifdef DEBUG
964 if(bp->b_flags & B_GATHERED) {
965 printf("lfs_gatherblock: already gathered! Ino %d, lbn %d\n",
966 sp->fip->fi_ino, bp->b_lblkno);
967 return(0);
968 }
969 #endif
970 /* Insert into the buffer list, update the FINFO block. */
971 bp->b_flags |= B_GATHERED;
972 *sp->cbpp++ = bp;
973 sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno;
974
975 sp->sum_bytes_left -= sizeof(ufs_daddr_t);
976 sp->seg_bytes_left -= bp->b_bcount;
977 return(0);
978 }
979
980 int
981 lfs_gather(fs, sp, vp, match)
982 struct lfs *fs;
983 struct segment *sp;
984 struct vnode *vp;
985 int (*match) __P((struct lfs *, struct buf *));
986 {
987 struct buf *bp;
988 int s, count=0;
989
990 sp->vp = vp;
991 s = splbio();
992
993 #ifndef LFS_NO_BACKBUF_HACK
994 loop: for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) {
995 #else /* LFS_NO_BACKBUF_HACK */
996 /* This is a hack to see if ordering the blocks in LFS makes a difference. */
997 # define BUF_OFFSET (((void *)&bp->b_vnbufs.le_next) - (void *)bp)
998 # define BACK_BUF(BP) ((struct buf *)(((void *)BP->b_vnbufs.le_prev) - BUF_OFFSET))
999 # define BEG_OF_LIST ((struct buf *)(((void *)&vp->v_dirtyblkhd.lh_first) - BUF_OFFSET))
1000 /* Find last buffer. */
1001 loop: for (bp = vp->v_dirtyblkhd.lh_first; bp && bp->b_vnbufs.le_next != NULL;
1002 bp = bp->b_vnbufs.le_next);
1003 for (; bp && bp != BEG_OF_LIST; bp = BACK_BUF(bp)) {
1004 #endif /* LFS_NO_BACKBUF_HACK */
1005 if ((bp->b_flags & (B_BUSY|B_GATHERED)) || !match(fs, bp))
1006 continue;
1007 if(vp->v_type == VBLK) {
1008 /* For block devices, just write the blocks. */
1009 /* XXX Do we really need to even do this? */
1010 #ifdef DEBUG_LFS
1011 if(count==0)
1012 printf("BLK(");
1013 printf(".");
1014 #endif
1015 /* Get the block before bwrite, so we don't corrupt the free list */
1016 bp->b_flags |= B_BUSY;
1017 bremfree(bp);
1018 bwrite(bp);
1019 } else {
1020 #ifdef DIAGNOSTIC
1021 if ((bp->b_flags & (B_CALL|B_INVAL))==B_INVAL) {
1022 printf("lfs_gather: lbn %d is B_INVAL\n",
1023 bp->b_lblkno);
1024 VOP_PRINT(bp->b_vp);
1025 }
1026 if (!(bp->b_flags & B_DELWRI))
1027 panic("lfs_gather: bp not B_DELWRI");
1028 if (!(bp->b_flags & B_LOCKED)) {
1029 printf("lfs_gather: lbn %d blk %d"
1030 " not B_LOCKED\n", bp->b_lblkno,
1031 bp->b_blkno);
1032 VOP_PRINT(bp->b_vp);
1033 panic("lfs_gather: bp not B_LOCKED");
1034 }
1035 #endif
1036 if (lfs_gatherblock(sp, bp, &s)) {
1037 goto loop;
1038 }
1039 }
1040 count++;
1041 }
1042 splx(s);
1043 #ifdef DEBUG_LFS
1044 if(vp->v_type == VBLK && count)
1045 printf(")\n");
1046 #endif
1047 lfs_updatemeta(sp);
1048 sp->vp = NULL;
1049 return count;
1050 }
1051
1052 /*
1053 * Update the metadata that points to the blocks listed in the FINFO
1054 * array.
1055 */
1056 void
1057 lfs_updatemeta(sp)
1058 struct segment *sp;
1059 {
1060 SEGUSE *sup;
1061 struct buf *bp;
1062 struct lfs *fs;
1063 struct vnode *vp;
1064 struct indir a[NIADDR + 2], *ap;
1065 struct inode *ip;
1066 ufs_daddr_t daddr, lbn, off;
1067 daddr_t ooff;
1068 int error, i, nblocks, num;
1069 int bb;
1070
1071 vp = sp->vp;
1072 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp;
1073 if (nblocks < 0)
1074 panic("This is a bad thing\n");
1075 if (vp == NULL || nblocks == 0)
1076 return;
1077
1078 /* Sort the blocks. */
1079 /*
1080 * XXX KS - We have to sort even if the blocks come from the
1081 * cleaner, because there might be other pending blocks on the
1082 * same inode...and if we don't sort, and there are fragments
1083 * present, blocks may be written in the wrong place.
1084 */
1085 /* if (!(sp->seg_flags & SEGM_CLEAN)) */
1086 lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks);
1087
1088 /*
1089 * Record the length of the last block in case it's a fragment.
1090 * If there are indirect blocks present, they sort last. An
1091 * indirect block will be lfs_bsize and its presence indicates
1092 * that you cannot have fragments.
1093 */
1094 sp->fip->fi_lastlength = sp->start_bpp[nblocks - 1]->b_bcount;
1095
1096 /*
1097 * Assign disk addresses, and update references to the logical
1098 * block and the segment usage information.
1099 */
1100 fs = sp->fs;
1101 for (i = nblocks; i--; ++sp->start_bpp) {
1102 lbn = *sp->start_lbp++;
1103
1104 (*sp->start_bpp)->b_blkno = off = fs->lfs_offset;
1105 if((*sp->start_bpp)->b_blkno == (*sp->start_bpp)->b_lblkno) {
1106 printf("lfs_updatemeta: ino %d blk %d"
1107 " has same lbn and daddr\n",
1108 VTOI(vp)->i_number, off);
1109 }
1110 #ifdef DIAGNOSTIC
1111 if((*sp->start_bpp)->b_bcount < fs->lfs_bsize && i != 0)
1112 panic("lfs_updatemeta: fragment is not last block\n");
1113 #endif
1114 bb = fragstodb(fs, numfrags(fs, (*sp->start_bpp)->b_bcount));
1115 fs->lfs_offset += bb;
1116 error = ufs_bmaparray(vp, lbn, &daddr, a, &num, NULL);
1117 if (error)
1118 panic("lfs_updatemeta: ufs_bmaparray %d", error);
1119 ip = VTOI(vp);
1120 switch (num) {
1121 case 0:
1122 ooff = ip->i_ffs_db[lbn];
1123 #ifdef DEBUG
1124 if (ooff == 0) {
1125 printf("lfs_updatemeta[1]: warning: writing "
1126 "ino %d lbn %d at 0x%x, was 0x0\n",
1127 ip->i_number, lbn, off);
1128 }
1129 #endif
1130 if (ooff == UNWRITTEN)
1131 ip->i_ffs_blocks += bb;
1132 ip->i_ffs_db[lbn] = off;
1133 break;
1134 case 1:
1135 ooff = ip->i_ffs_ib[a[0].in_off];
1136 #ifdef DEBUG
1137 if (ooff == 0) {
1138 printf("lfs_updatemeta[2]: warning: writing "
1139 "ino %d lbn %d at 0x%x, was 0x0\n",
1140 ip->i_number, lbn, off);
1141 }
1142 #endif
1143 if (ooff == UNWRITTEN)
1144 ip->i_ffs_blocks += bb;
1145 ip->i_ffs_ib[a[0].in_off] = off;
1146 break;
1147 default:
1148 ap = &a[num - 1];
1149 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp))
1150 panic("lfs_updatemeta: bread bno %d",
1151 ap->in_lbn);
1152
1153 ooff = ((ufs_daddr_t *)bp->b_data)[ap->in_off];
1154 #if DEBUG
1155 if (ooff == 0) {
1156 printf("lfs_updatemeta[3]: warning: writing "
1157 "ino %d lbn %d at 0x%x, was 0x0\n",
1158 ip->i_number, lbn, off);
1159 }
1160 #endif
1161 if (ooff == UNWRITTEN)
1162 ip->i_ffs_blocks += bb;
1163 ((ufs_daddr_t *)bp->b_data)[ap->in_off] = off;
1164 (void) VOP_BWRITE(bp);
1165 }
1166 #ifdef DEBUG
1167 if (daddr >= fs->lfs_lastpseg && daddr <= off) {
1168 printf("lfs_updatemeta: ino %d, lbn %d, addr = %x "
1169 "in same pseg\n", VTOI(sp->vp)->i_number,
1170 (*sp->start_bpp)->b_lblkno, daddr);
1171 }
1172 #endif
1173 /* Update segment usage information. */
1174 if (daddr > 0) {
1175 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
1176 #ifdef DIAGNOSTIC
1177 if (sup->su_nbytes < (*sp->start_bpp)->b_bcount) {
1178 /* XXX -- Change to a panic. */
1179 printf("lfs_updatemeta: negative bytes "
1180 "(segment %d short by %ld)\n",
1181 datosn(fs, daddr),
1182 (*sp->start_bpp)->b_bcount -
1183 sup->su_nbytes);
1184 printf("lfs_updatemeta: ino %d, lbn %d, "
1185 "addr = %x\n", VTOI(sp->vp)->i_number,
1186 (*sp->start_bpp)->b_lblkno, daddr);
1187 panic("lfs_updatemeta: negative bytes");
1188 sup->su_nbytes = (*sp->start_bpp)->b_bcount;
1189 }
1190 #endif
1191 sup->su_nbytes -= (*sp->start_bpp)->b_bcount;
1192 error = VOP_BWRITE(bp); /* Ifile */
1193 }
1194 }
1195 }
1196
1197 /*
1198 * Start a new segment.
1199 */
1200 int
1201 lfs_initseg(fs)
1202 struct lfs *fs;
1203 {
1204 struct segment *sp;
1205 SEGUSE *sup;
1206 SEGSUM *ssp;
1207 struct buf *bp;
1208 int repeat;
1209
1210 sp = fs->lfs_sp;
1211
1212 repeat = 0;
1213 /* Advance to the next segment. */
1214 if (!LFS_PARTIAL_FITS(fs)) {
1215 /* lfs_avail eats the remaining space */
1216 fs->lfs_avail -= fs->lfs_dbpseg - (fs->lfs_offset -
1217 fs->lfs_curseg);
1218 /* Wake up any cleaning procs waiting on this file system. */
1219 wakeup(&lfs_allclean_wakeup);
1220 wakeup(&fs->lfs_nextseg);
1221 lfs_newseg(fs);
1222 repeat = 1;
1223 fs->lfs_offset = fs->lfs_curseg;
1224 sp->seg_number = datosn(fs, fs->lfs_curseg);
1225 sp->seg_bytes_left = dbtob(fs->lfs_dbpseg);
1226 /*
1227 * If the segment contains a superblock, update the offset
1228 * and summary address to skip over it.
1229 */
1230 LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
1231 if (sup->su_flags & SEGUSE_SUPERBLOCK) {
1232 fs->lfs_offset += btodb(LFS_SBPAD);
1233 sp->seg_bytes_left -= LFS_SBPAD;
1234 }
1235 brelse(bp);
1236 /* Segment zero could also contain the labelpad */
1237 if (fs->lfs_version > 1 && sp->seg_number == 0 &&
1238 fs->lfs_start < btodb(LFS_LABELPAD)) {
1239 fs->lfs_offset += btodb(LFS_LABELPAD) - fs->lfs_start;
1240 sp->seg_bytes_left -= LFS_LABELPAD - dbtob(fs->lfs_start);
1241 }
1242 } else {
1243 sp->seg_number = datosn(fs, fs->lfs_curseg);
1244 sp->seg_bytes_left = dbtob(fs->lfs_dbpseg -
1245 (fs->lfs_offset - fs->lfs_curseg));
1246 }
1247 fs->lfs_lastpseg = fs->lfs_offset;
1248
1249 sp->fs = fs;
1250 sp->ibp = NULL;
1251 sp->idp = NULL;
1252 sp->ninodes = 0;
1253
1254 /* Get a new buffer for SEGSUM and enter it into the buffer list. */
1255 sp->cbpp = sp->bpp;
1256 *sp->cbpp = lfs_newbuf(fs, VTOI(fs->lfs_ivnode)->i_devvp,
1257 fs->lfs_offset, fs->lfs_sumsize);
1258 sp->segsum = (*sp->cbpp)->b_data;
1259 bzero(sp->segsum, fs->lfs_sumsize);
1260 sp->start_bpp = ++sp->cbpp;
1261 fs->lfs_offset += btodb(fs->lfs_sumsize);
1262
1263 /* Set point to SEGSUM, initialize it. */
1264 ssp = sp->segsum;
1265 ssp->ss_next = fs->lfs_nextseg;
1266 ssp->ss_nfinfo = ssp->ss_ninos = 0;
1267 ssp->ss_magic = SS_MAGIC;
1268
1269 /* Set pointer to first FINFO, initialize it. */
1270 sp->fip = (struct finfo *)((caddr_t)sp->segsum + SEGSUM_SIZE(fs));
1271 sp->fip->fi_nblocks = 0;
1272 sp->start_lbp = &sp->fip->fi_blocks[0];
1273 sp->fip->fi_lastlength = 0;
1274
1275 sp->seg_bytes_left -= fs->lfs_sumsize;
1276 sp->sum_bytes_left = fs->lfs_sumsize - SEGSUM_SIZE(fs);
1277
1278 return(repeat);
1279 }
1280
1281 /*
1282 * Return the next segment to write.
1283 */
1284 void
1285 lfs_newseg(fs)
1286 struct lfs *fs;
1287 {
1288 CLEANERINFO *cip;
1289 SEGUSE *sup;
1290 struct buf *bp;
1291 int curseg, isdirty, sn;
1292
1293 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp);
1294 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
1295 sup->su_nbytes = 0;
1296 sup->su_nsums = 0;
1297 sup->su_ninos = 0;
1298 (void) VOP_BWRITE(bp); /* Ifile */
1299
1300 LFS_CLEANERINFO(cip, fs, bp);
1301 --cip->clean;
1302 ++cip->dirty;
1303 fs->lfs_nclean = cip->clean;
1304 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
1305
1306 fs->lfs_lastseg = fs->lfs_curseg;
1307 fs->lfs_curseg = fs->lfs_nextseg;
1308 for (sn = curseg = datosn(fs, fs->lfs_curseg) + fs->lfs_interleave;;) {
1309 sn = (sn + 1) % fs->lfs_nseg;
1310 if (sn == curseg)
1311 panic("lfs_nextseg: no clean segments");
1312 LFS_SEGENTRY(sup, fs, sn, bp);
1313 isdirty = sup->su_flags & SEGUSE_DIRTY;
1314 brelse(bp);
1315 if (!isdirty)
1316 break;
1317 }
1318
1319 ++fs->lfs_nactive;
1320 fs->lfs_nextseg = sntoda(fs, sn);
1321 if(lfs_dostats) {
1322 ++lfs_stats.segsused;
1323 }
1324 }
1325
1326 int
1327 lfs_writeseg(fs, sp)
1328 struct lfs *fs;
1329 struct segment *sp;
1330 {
1331 struct buf **bpp, *bp, *cbp, *newbp;
1332 SEGUSE *sup;
1333 SEGSUM *ssp;
1334 dev_t i_dev;
1335 char *datap, *dp;
1336 int do_again, i, nblocks, s, el_size;
1337 #ifdef LFS_TRACK_IOS
1338 int j;
1339 #endif
1340 int (*strategy)__P((void *));
1341 struct vop_strategy_args vop_strategy_a;
1342 u_short ninos;
1343 struct vnode *devvp;
1344 char *p;
1345 struct vnode *vp;
1346 struct inode *ip;
1347 daddr_t *daddrp;
1348 int changed;
1349 #if defined(DEBUG) && defined(LFS_PROPELLER)
1350 static int propeller;
1351 char propstring[4] = "-\\|/";
1352
1353 printf("%c\b",propstring[propeller++]);
1354 if(propeller==4)
1355 propeller = 0;
1356 #endif
1357
1358 /*
1359 * If there are no buffers other than the segment summary to write
1360 * and it is not a checkpoint, don't do anything. On a checkpoint,
1361 * even if there aren't any buffers, you need to write the superblock.
1362 */
1363 if ((nblocks = sp->cbpp - sp->bpp) == 1)
1364 return (0);
1365
1366 i_dev = VTOI(fs->lfs_ivnode)->i_dev;
1367 devvp = VTOI(fs->lfs_ivnode)->i_devvp;
1368
1369 /* Update the segment usage information. */
1370 LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
1371
1372 /* Loop through all blocks, except the segment summary. */
1373 for (bpp = sp->bpp; ++bpp < sp->cbpp; ) {
1374 if((*bpp)->b_vp != devvp)
1375 sup->su_nbytes += (*bpp)->b_bcount;
1376 }
1377
1378 ssp = (SEGSUM *)sp->segsum;
1379
1380 ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs);
1381 sup->su_nbytes += ssp->ss_ninos * DINODE_SIZE;
1382 /* sup->su_nbytes += fs->lfs_sumsize; */
1383 if (fs->lfs_version == 1)
1384 sup->su_olastmod = time.tv_sec;
1385 else
1386 sup->su_lastmod = time.tv_sec;
1387 sup->su_ninos += ninos;
1388 ++sup->su_nsums;
1389 fs->lfs_dmeta += (btodb(fs->lfs_sumsize) + btodb(ninos *
1390 fs->lfs_ibsize));
1391 fs->lfs_avail -= btodb(fs->lfs_sumsize);
1392
1393 do_again = !(bp->b_flags & B_GATHERED);
1394 (void)VOP_BWRITE(bp); /* Ifile */
1395 /*
1396 * Mark blocks B_BUSY, to prevent then from being changed between
1397 * the checksum computation and the actual write.
1398 *
1399 * If we are cleaning, check indirect blocks for UNWRITTEN, and if
1400 * there are any, replace them with copies that have UNASSIGNED
1401 * instead.
1402 */
1403 for (bpp = sp->bpp, i = nblocks - 1; i--;) {
1404 ++bpp;
1405 if((*bpp)->b_flags & B_CALL)
1406 continue;
1407 bp = *bpp;
1408 again:
1409 s = splbio();
1410 if(bp->b_flags & B_BUSY) {
1411 #ifdef DEBUG
1412 printf("lfs_writeseg: avoiding potential data "
1413 "summary corruption for ino %d, lbn %d\n",
1414 VTOI(bp->b_vp)->i_number, bp->b_lblkno);
1415 #endif
1416 bp->b_flags |= B_WANTED;
1417 tsleep(bp, (PRIBIO + 1), "lfs_writeseg", 0);
1418 splx(s);
1419 goto again;
1420 }
1421 bp->b_flags |= B_BUSY;
1422 splx(s);
1423 /* Check and replace indirect block UNWRITTEN bogosity */
1424 if(bp->b_lblkno < 0 && bp->b_vp != devvp && bp->b_vp &&
1425 VTOI(bp->b_vp)->i_ffs_blocks !=
1426 VTOI(bp->b_vp)->i_lfs_effnblks) {
1427 #ifdef DEBUG_LFS
1428 printf("lfs_writeseg: cleansing ino %d (%d != %d)\n",
1429 VTOI(bp->b_vp)->i_number,
1430 VTOI(bp->b_vp)->i_lfs_effnblks,
1431 VTOI(bp->b_vp)->i_ffs_blocks);
1432 #endif
1433 /* Make a copy we'll make changes to */
1434 newbp = lfs_newbuf(fs, bp->b_vp, bp->b_lblkno,
1435 bp->b_bcount);
1436 newbp->b_blkno = bp->b_blkno;
1437 memcpy(newbp->b_data, bp->b_data,
1438 newbp->b_bcount);
1439 *bpp = newbp;
1440
1441 changed = 0;
1442 for (daddrp = (daddr_t *)(newbp->b_data);
1443 daddrp < (daddr_t *)(newbp->b_data +
1444 newbp->b_bcount); daddrp++) {
1445 if (*daddrp == UNWRITTEN) {
1446 ++changed;
1447 #ifdef DEBUG_LFS
1448 printf("lfs_writeseg: replacing UNWRITTEN\n");
1449 #endif
1450 *daddrp = 0;
1451 }
1452 }
1453 /*
1454 * Get rid of the old buffer. Don't mark it clean,
1455 * though, if it still has dirty data on it.
1456 */
1457 if (changed) {
1458 bp->b_flags &= ~(B_ERROR | B_GATHERED);
1459 if (bp->b_flags & B_CALL) {
1460 lfs_freebuf(bp);
1461 bp = NULL;
1462 } else {
1463 /* Still on free list, leave it there */
1464 s = splbio();
1465 bp->b_flags &= ~B_BUSY;
1466 if (bp->b_flags & B_WANTED)
1467 wakeup(bp);
1468 splx(s);
1469 /*
1470 * We have to re-decrement lfs_avail
1471 * since this block is going to come
1472 * back around to us in the next
1473 * segment.
1474 */
1475 fs->lfs_avail -= btodb(bp->b_bcount);
1476 }
1477 } else {
1478 bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI |
1479 B_GATHERED);
1480 LFS_UNLOCK_BUF(bp);
1481 if (bp->b_flags & B_CALL) {
1482 lfs_freebuf(bp);
1483 bp = NULL;
1484 } else {
1485 bremfree(bp);
1486 bp->b_flags |= B_DONE;
1487 reassignbuf(bp, bp->b_vp);
1488 brelse(bp);
1489 }
1490 }
1491
1492 }
1493 }
1494 /*
1495 * Compute checksum across data and then across summary; the first
1496 * block (the summary block) is skipped. Set the create time here
1497 * so that it's guaranteed to be later than the inode mod times.
1498 *
1499 * XXX
1500 * Fix this to do it inline, instead of malloc/copy.
1501 */
1502 if (fs->lfs_version == 1)
1503 el_size = sizeof(u_long);
1504 else
1505 el_size = sizeof(u_int32_t);
1506 datap = dp = malloc(nblocks * el_size, M_SEGMENT, M_WAITOK);
1507 for (bpp = sp->bpp, i = nblocks - 1; i--;) {
1508 if (((*++bpp)->b_flags & (B_CALL|B_INVAL)) == (B_CALL|B_INVAL)) {
1509 if (copyin((*bpp)->b_saveaddr, dp, el_size))
1510 panic("lfs_writeseg: copyin failed [1]: "
1511 "ino %d blk %d",
1512 VTOI((*bpp)->b_vp)->i_number,
1513 (*bpp)->b_lblkno);
1514 } else
1515 memcpy(dp, (*bpp)->b_data, el_size);
1516 dp += el_size;
1517 }
1518 if (fs->lfs_version == 1)
1519 /* Ident is where timestamp was in v1 */
1520 ssp->ss_ident = time.tv_sec;
1521 else {
1522 ssp->ss_create = time.tv_sec;
1523 ssp->ss_serial = ++fs->lfs_serial;
1524 ssp->ss_ident = fs->lfs_ident;
1525 }
1526 ssp->ss_datasum = cksum(datap, (nblocks - 1) * el_size);
1527 ssp->ss_sumsum =
1528 cksum(&ssp->ss_datasum, fs->lfs_sumsize - sizeof(ssp->ss_sumsum));
1529 free(datap, M_SEGMENT);
1530 datap = dp = NULL;
1531 #ifdef DIAGNOSTIC
1532 if (fs->lfs_bfree < fsbtodb(fs, ninos) + btodb(fs->lfs_sumsize))
1533 panic("lfs_writeseg: No diskspace for summary");
1534 #endif
1535 fs->lfs_bfree -= (btodb(ninos * fs->lfs_ibsize) +
1536 btodb(fs->lfs_sumsize));
1537
1538 strategy = devvp->v_op[VOFFSET(vop_strategy)];
1539
1540 /*
1541 * When we simply write the blocks we lose a rotation for every block
1542 * written. To avoid this problem, we allocate memory in chunks, copy
1543 * the buffers into the chunk and write the chunk. CHUNKSIZE is the
1544 * largest size I/O devices can handle.
1545 * When the data is copied to the chunk, turn off the B_LOCKED bit
1546 * and brelse the buffer (which will move them to the LRU list). Add
1547 * the B_CALL flag to the buffer header so we can count I/O's for the
1548 * checkpoints and so we can release the allocated memory.
1549 *
1550 * XXX
1551 * This should be removed if the new virtual memory system allows us to
1552 * easily make the buffers contiguous in kernel memory and if that's
1553 * fast enough.
1554 */
1555
1556 #define CHUNKSIZE MAXPHYS
1557
1558 if(devvp==NULL)
1559 panic("devvp is NULL");
1560 for (bpp = sp->bpp,i = nblocks; i;) {
1561 cbp = lfs_newbuf(fs, devvp, (*bpp)->b_blkno, CHUNKSIZE);
1562 cbp->b_dev = i_dev;
1563 cbp->b_flags |= B_ASYNC | B_BUSY;
1564 cbp->b_bcount = 0;
1565
1566 #ifdef DIAGNOSTIC
1567 if(datosn(fs, (*bpp)->b_blkno + btodb((*bpp)->b_bcount) - 1) !=
1568 datosn(fs, cbp->b_blkno)) {
1569 panic("lfs_writeseg: Segment overwrite");
1570 }
1571 #endif
1572
1573 s = splbio();
1574 if(fs->lfs_iocount >= LFS_THROTTLE) {
1575 tsleep(&fs->lfs_iocount, PRIBIO+1, "lfs throttle", 0);
1576 }
1577 ++fs->lfs_iocount;
1578 #ifdef LFS_TRACK_IOS
1579 for(j=0;j<LFS_THROTTLE;j++) {
1580 if(fs->lfs_pending[j]==LFS_UNUSED_DADDR) {
1581 fs->lfs_pending[j] = cbp->b_blkno;
1582 break;
1583 }
1584 }
1585 #endif /* LFS_TRACK_IOS */
1586 for (p = cbp->b_data; i && cbp->b_bcount < CHUNKSIZE; i--) {
1587 bp = *bpp;
1588
1589 if (bp->b_bcount > (CHUNKSIZE - cbp->b_bcount))
1590 break;
1591
1592 /*
1593 * Fake buffers from the cleaner are marked as B_INVAL.
1594 * We need to copy the data from user space rather than
1595 * from the buffer indicated.
1596 * XXX == what do I do on an error?
1597 */
1598 if ((bp->b_flags & (B_CALL|B_INVAL)) == (B_CALL|B_INVAL)) {
1599 if (copyin(bp->b_saveaddr, p, bp->b_bcount))
1600 panic("lfs_writeseg: copyin failed [2]");
1601 } else
1602 bcopy(bp->b_data, p, bp->b_bcount);
1603 p += bp->b_bcount;
1604 cbp->b_bcount += bp->b_bcount;
1605 LFS_UNLOCK_BUF(bp);
1606 bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI |
1607 B_GATHERED);
1608 vp = bp->b_vp;
1609 if (bp->b_flags & B_CALL) {
1610 /* if B_CALL, it was created with newbuf */
1611 lfs_freebuf(bp);
1612 bp = NULL;
1613 } else {
1614 bremfree(bp);
1615 bp->b_flags |= B_DONE;
1616 if(vp)
1617 reassignbuf(bp, vp);
1618 brelse(bp);
1619 }
1620
1621 bpp++;
1622
1623 /*
1624 * If this is the last block for this vnode, but
1625 * there are other blocks on its dirty list,
1626 * set IN_MODIFIED/IN_CLEANING depending on what
1627 * sort of block. Only do this for our mount point,
1628 * not for, e.g., inode blocks that are attached to
1629 * the devvp.
1630 * XXX KS - Shouldn't we set *both* if both types
1631 * of blocks are present (traverse the dirty list?)
1632 */
1633 if((i == 1 ||
1634 (i > 1 && vp && *bpp && (*bpp)->b_vp != vp)) &&
1635 (bp = vp->v_dirtyblkhd.lh_first) != NULL &&
1636 vp->v_mount == fs->lfs_ivnode->v_mount)
1637 {
1638 ip = VTOI(vp);
1639 #ifdef DEBUG_LFS
1640 printf("lfs_writeseg: marking ino %d\n",
1641 ip->i_number);
1642 #endif
1643 if(bp->b_flags & B_CALL)
1644 LFS_SET_UINO(ip, IN_CLEANING);
1645 else
1646 LFS_SET_UINO(ip, IN_MODIFIED);
1647 }
1648 wakeup(vp);
1649 }
1650 ++cbp->b_vp->v_numoutput;
1651 splx(s);
1652 /*
1653 * XXXX This is a gross and disgusting hack. Since these
1654 * buffers are physically addressed, they hang off the
1655 * device vnode (devvp). As a result, they have no way
1656 * of getting to the LFS superblock or lfs structure to
1657 * keep track of the number of I/O's pending. So, I am
1658 * going to stuff the fs into the saveaddr field of
1659 * the buffer (yuk).
1660 */
1661 cbp->b_saveaddr = (caddr_t)fs;
1662 vop_strategy_a.a_desc = VDESC(vop_strategy);
1663 vop_strategy_a.a_bp = cbp;
1664 (strategy)(&vop_strategy_a);
1665 }
1666 #if 1 || defined(DEBUG)
1667 /*
1668 * After doing a big write, we recalculate how many buffers are
1669 * really still left on the locked queue.
1670 */
1671 s = splbio();
1672 lfs_countlocked(&locked_queue_count, &locked_queue_bytes);
1673 splx(s);
1674 wakeup(&locked_queue_count);
1675 #endif /* 1 || DEBUG */
1676 if(lfs_dostats) {
1677 ++lfs_stats.psegwrites;
1678 lfs_stats.blocktot += nblocks - 1;
1679 if (fs->lfs_sp->seg_flags & SEGM_SYNC)
1680 ++lfs_stats.psyncwrites;
1681 if (fs->lfs_sp->seg_flags & SEGM_CLEAN) {
1682 ++lfs_stats.pcleanwrites;
1683 lfs_stats.cleanblocks += nblocks - 1;
1684 }
1685 }
1686 return (lfs_initseg(fs) || do_again);
1687 }
1688
1689 void
1690 lfs_writesuper(fs, daddr)
1691 struct lfs *fs;
1692 daddr_t daddr;
1693 {
1694 struct buf *bp;
1695 dev_t i_dev;
1696 int (*strategy) __P((void *));
1697 int s;
1698 struct vop_strategy_args vop_strategy_a;
1699
1700 /*
1701 * If we can write one superblock while another is in
1702 * progress, we risk not having a complete checkpoint if we crash.
1703 * So, block here if a superblock write is in progress.
1704 */
1705 s = splbio();
1706 while(fs->lfs_sbactive) {
1707 tsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs sb", 0);
1708 }
1709 fs->lfs_sbactive = daddr;
1710 splx(s);
1711 i_dev = VTOI(fs->lfs_ivnode)->i_dev;
1712 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)];
1713
1714 /* Set timestamp of this version of the superblock */
1715 if (fs->lfs_version == 1)
1716 fs->lfs_otstamp = time.tv_sec;
1717 fs->lfs_tstamp = time.tv_sec;
1718
1719 /* Checksum the superblock and copy it into a buffer. */
1720 fs->lfs_cksum = lfs_sb_cksum(&(fs->lfs_dlfs));
1721 bp = lfs_newbuf(fs, VTOI(fs->lfs_ivnode)->i_devvp, daddr, LFS_SBPAD);
1722 *(struct dlfs *)bp->b_data = fs->lfs_dlfs;
1723
1724 bp->b_dev = i_dev;
1725 bp->b_flags |= B_BUSY | B_CALL | B_ASYNC;
1726 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
1727 bp->b_iodone = lfs_supercallback;
1728 /* XXX KS - same nasty hack as above */
1729 bp->b_saveaddr = (caddr_t)fs;
1730
1731 vop_strategy_a.a_desc = VDESC(vop_strategy);
1732 vop_strategy_a.a_bp = bp;
1733 s = splbio();
1734 ++bp->b_vp->v_numoutput;
1735 ++fs->lfs_iocount;
1736 splx(s);
1737 (strategy)(&vop_strategy_a);
1738 }
1739
1740 /*
1741 * Logical block number match routines used when traversing the dirty block
1742 * chain.
1743 */
1744 int
1745 lfs_match_fake(fs, bp)
1746 struct lfs *fs;
1747 struct buf *bp;
1748 {
1749 return (bp->b_flags & B_CALL);
1750 }
1751
1752 int
1753 lfs_match_data(fs, bp)
1754 struct lfs *fs;
1755 struct buf *bp;
1756 {
1757 return (bp->b_lblkno >= 0);
1758 }
1759
1760 int
1761 lfs_match_indir(fs, bp)
1762 struct lfs *fs;
1763 struct buf *bp;
1764 {
1765 int lbn;
1766
1767 lbn = bp->b_lblkno;
1768 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0);
1769 }
1770
1771 int
1772 lfs_match_dindir(fs, bp)
1773 struct lfs *fs;
1774 struct buf *bp;
1775 {
1776 int lbn;
1777
1778 lbn = bp->b_lblkno;
1779 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1);
1780 }
1781
1782 int
1783 lfs_match_tindir(fs, bp)
1784 struct lfs *fs;
1785 struct buf *bp;
1786 {
1787 int lbn;
1788
1789 lbn = bp->b_lblkno;
1790 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2);
1791 }
1792
1793 /*
1794 * XXX - The only buffers that are going to hit these functions are the
1795 * segment write blocks, or the segment summaries, or the superblocks.
1796 *
1797 * All of the above are created by lfs_newbuf, and so do not need to be
1798 * released via brelse.
1799 */
1800 void
1801 lfs_callback(bp)
1802 struct buf *bp;
1803 {
1804 struct lfs *fs;
1805 #ifdef LFS_TRACK_IOS
1806 int j;
1807 #endif
1808
1809 fs = (struct lfs *)bp->b_saveaddr;
1810 #ifdef DIAGNOSTIC
1811 if (fs->lfs_iocount == 0)
1812 panic("lfs_callback: zero iocount\n");
1813 #endif
1814 if (--fs->lfs_iocount < LFS_THROTTLE)
1815 wakeup(&fs->lfs_iocount);
1816 #ifdef LFS_TRACK_IOS
1817 for(j=0;j<LFS_THROTTLE;j++) {
1818 if(fs->lfs_pending[j]==bp->b_blkno) {
1819 fs->lfs_pending[j] = LFS_UNUSED_DADDR;
1820 wakeup(&(fs->lfs_pending[j]));
1821 break;
1822 }
1823 }
1824 #endif /* LFS_TRACK_IOS */
1825
1826 lfs_freebuf(bp);
1827 }
1828
1829 void
1830 lfs_supercallback(bp)
1831 struct buf *bp;
1832 {
1833 struct lfs *fs;
1834
1835 fs = (struct lfs *)bp->b_saveaddr;
1836 fs->lfs_sbactive = 0;
1837 wakeup(&fs->lfs_sbactive);
1838 if (--fs->lfs_iocount < LFS_THROTTLE)
1839 wakeup(&fs->lfs_iocount);
1840 lfs_freebuf(bp);
1841 }
1842
1843 /*
1844 * Shellsort (diminishing increment sort) from Data Structures and
1845 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
1846 * see also Knuth Vol. 3, page 84. The increments are selected from
1847 * formula (8), page 95. Roughly O(N^3/2).
1848 */
1849 /*
1850 * This is our own private copy of shellsort because we want to sort
1851 * two parallel arrays (the array of buffer pointers and the array of
1852 * logical block numbers) simultaneously. Note that we cast the array
1853 * of logical block numbers to a unsigned in this routine so that the
1854 * negative block numbers (meta data blocks) sort AFTER the data blocks.
1855 */
1856
1857 void
1858 lfs_shellsort(bp_array, lb_array, nmemb)
1859 struct buf **bp_array;
1860 ufs_daddr_t *lb_array;
1861 int nmemb;
1862 {
1863 static int __rsshell_increments[] = { 4, 1, 0 };
1864 int incr, *incrp, t1, t2;
1865 struct buf *bp_temp;
1866 u_long lb_temp;
1867
1868 for (incrp = __rsshell_increments; (incr = *incrp++) != 0;)
1869 for (t1 = incr; t1 < nmemb; ++t1)
1870 for (t2 = t1 - incr; t2 >= 0;)
1871 if (lb_array[t2] > lb_array[t2 + incr]) {
1872 lb_temp = lb_array[t2];
1873 lb_array[t2] = lb_array[t2 + incr];
1874 lb_array[t2 + incr] = lb_temp;
1875 bp_temp = bp_array[t2];
1876 bp_array[t2] = bp_array[t2 + incr];
1877 bp_array[t2 + incr] = bp_temp;
1878 t2 -= incr;
1879 } else
1880 break;
1881 }
1882
1883 /*
1884 * Check VXLOCK. Return 1 if the vnode is locked. Otherwise, vget it.
1885 */
1886 int
1887 lfs_vref(vp)
1888 struct vnode *vp;
1889 {
1890 /*
1891 * If we return 1 here during a flush, we risk vinvalbuf() not
1892 * being able to flush all of the pages from this vnode, which
1893 * will cause it to panic. So, return 0 if a flush is in progress.
1894 */
1895 if (vp->v_flag & VXLOCK) {
1896 if(IS_FLUSHING(VTOI(vp)->i_lfs,vp)) {
1897 return 0;
1898 }
1899 return(1);
1900 }
1901 return (vget(vp, 0));
1902 }
1903
1904 /*
1905 * This is vrele except that we do not want to VOP_INACTIVE this vnode. We
1906 * inline vrele here to avoid the vn_lock and VOP_INACTIVE call at the end.
1907 */
1908 void
1909 lfs_vunref(vp)
1910 struct vnode *vp;
1911 {
1912 /*
1913 * Analogous to lfs_vref, if the node is flushing, fake it.
1914 */
1915 if((vp->v_flag & VXLOCK) && IS_FLUSHING(VTOI(vp)->i_lfs,vp)) {
1916 return;
1917 }
1918
1919 simple_lock(&vp->v_interlock);
1920 #ifdef DIAGNOSTIC
1921 if(vp->v_usecount<=0) {
1922 printf("lfs_vunref: inum is %d\n", VTOI(vp)->i_number);
1923 printf("lfs_vunref: flags are 0x%lx\n", (u_long)vp->v_flag);
1924 printf("lfs_vunref: usecount = %ld\n", (long)vp->v_usecount);
1925 panic("lfs_vunref: v_usecount<0");
1926 }
1927 #endif
1928 vp->v_usecount--;
1929 if (vp->v_usecount > 0) {
1930 simple_unlock(&vp->v_interlock);
1931 return;
1932 }
1933 /*
1934 * insert at tail of LRU list
1935 */
1936 simple_lock(&vnode_free_list_slock);
1937 if (vp->v_holdcnt > 0)
1938 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
1939 else
1940 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1941 simple_unlock(&vnode_free_list_slock);
1942 simple_unlock(&vp->v_interlock);
1943 }
1944
1945 /*
1946 * We use this when we have vnodes that were loaded in solely for cleaning.
1947 * There is no reason to believe that these vnodes will be referenced again
1948 * soon, since the cleaning process is unrelated to normal filesystem
1949 * activity. Putting cleaned vnodes at the tail of the list has the effect
1950 * of flushing the vnode LRU. So, put vnodes that were loaded only for
1951 * cleaning at the head of the list, instead.
1952 */
1953 void
1954 lfs_vunref_head(vp)
1955 struct vnode *vp;
1956 {
1957 simple_lock(&vp->v_interlock);
1958 #ifdef DIAGNOSTIC
1959 if(vp->v_usecount==0) {
1960 panic("lfs_vunref: v_usecount<0");
1961 }
1962 #endif
1963 vp->v_usecount--;
1964 if (vp->v_usecount > 0) {
1965 simple_unlock(&vp->v_interlock);
1966 return;
1967 }
1968 /*
1969 * insert at head of LRU list
1970 */
1971 simple_lock(&vnode_free_list_slock);
1972 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1973 simple_unlock(&vnode_free_list_slock);
1974 simple_unlock(&vp->v_interlock);
1975 }
1976
1977