check-all revision 1.4
1#!/usr/pkg/bin/perl
2#
3#	$NetBSD: check-all,v 1.4 2006/07/21 00:29:23 perseant Exp $
4#
5# Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
6# All rights reserved.
7#
8# This code is derived from software contributed to The NetBSD Foundation
9# by Konrad E. Schroder <perseant@hhhh.org>.
10#
11# Redistribution and use in source and binary forms, with or without
12# modification, are permitted provided that the following conditions
13# are met:
14# 1. Redistributions of source code must retain the above copyright
15#    notice, this list of conditions and the following disclaimer.
16# 2. Redistributions in binary form must reproduce the above copyright
17#    notice, this list of conditions and the following disclaimer in the
18#    documentation and/or other materials provided with the distribution.
19# 3. All advertising materials mentioning features or use of this software
20#    must display the following acknowledgement:
21#	This product includes software developed by the NetBSD
22#	Foundation, Inc. and its contributors.
23# 4. Neither the name of The NetBSD Foundation nor the names of its
24#    contributors may be used to endorse or promote products derived
25#    from this software without specific prior written permission.
26#
27# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37# POSSIBILITY OF SUCH DAMAGE.
38#
39
40#
41# Use dumplfs to find all locations of the Ifile inode on a given disk.
42# Order these by serial number and call fsck_lfs on the raw disk for each.
43# If any fsck gives errors (any line of all capital letters, with a few
44# exceptions) print an error code with the daddr of the failing Ifile inode
45# location.
46#
47
48$| = 1;
49$rdev = $ARGV[0];
50$gfile = $ARGV[1];
51$wfile = $ARGV[2];
52$sstart = $ARGV[3];
53$test_rfw = 1; # $ARGV[4];
54$rollid = 0;
55open(DUMPLFS, "dumplfs $rdev |");
56
57# Look for "roll_id" so we don't use garbage
58while (<DUMPLFS>) {
59	if ($ssize == 0 && m/ssize *([0-9]*)/) {
60		$ssize = $1;
61	}
62	if ($fsize == 0 && m/fsize *([0-9]*)/) {
63		$fsize = $1;
64	}
65	if (m/roll_id *([x0-9a-f]*)/) {
66		$rollid = $1;
67		last;
68	}
69}
70
71# Now look for inodes and segment summaries.  Build a hash table of these
72# based on serial number.  Ignore any with serial numbers lower than $sstart.
73
74%iloc = ();
75%snloc = ();
76%sumloc = ();
77print "Reading segments:";
78while (<DUMPLFS>) {
79	if (m/roll_id *([0-9a-f]*)/) {
80		# print "rollid $1\n";
81		if ("0x$1" ne $rollid) {
82			# Skip the rest of this segment
83			print "{skip bad rollid 0x$1}";
84			while(<DUMPLFS>) {
85				last if m/SEGMENT/;
86			}
87			# Fall through
88		}
89	}
90	if (m/roll_id.*serial *([0-9]*)/) {
91		$serno = $1;
92		$snloc{$serno} = $segnum;
93		$sumloc{$serno} = $sumloc;
94		print "($serno)";
95		if ($serno < $sstart) {
96			# Skip the rest of this partial segment
97			#print "{skip bad serno $serno}";
98			while(<DUMPLFS>) {
99				last if m/Segment Summary/ ||
100					m/SEGMENT/;
101			}
102			# Fall through
103		}
104	}
105	if (m/Segment Summary Info at 0x([0-9a-f]*)/) {
106		$sumloc = $1;
107		next;
108	}
109	if (m/0x([0-9a-f]*)/) {
110		foreach $ss (split "0x", $_) {
111			if ($ss =~ m/^([0-9a-f][0-9a-f]*)/) {
112				# print "iblk 0x$1\n";
113				$daddr = $1;
114				if (m/[^0-9]1v1/) {
115					# print "** ifblk 0x$daddr\n";
116					$iloc{$serno} = $daddr;
117					$lastaddr = $daddr;
118				}
119			}
120		}
121	}
122	if (m/SEGMENT *([0-9]*)/) {
123		$segnum = $1;
124		print "[$segnum]";
125	}
126}
127print "\n";
128close(DUMPLFS);
129
130# Complain about missing partial-segments
131for ($i = $sstart; $i < $serno; ++$i) {
132	if (hex $sumloc{$i} == 0 && $i > 0) {
133		print "Oops, couldn't find pseg $i\n";
134	}
135}
136
137# If there were no checkpoints, print *something*
138if ($#iloc == 0) {
139	print "0 $sstart 0\n";
140	exit 0;
141}
142
143#
144# Now fsck each checkpoint in turn, beginning with $sstart.
145# Because the log wraps we will have to reconstruct the filesystem image
146# as it existed at each checkpoint before running fsck.
147#
148# Look for lines containing only caps or "!", but ignore known
149# false positives.
150#
151$error = 0;
152$lastgood = $sstart - 1;
153open(LOG, ">>check-all.log");
154print "Available checkpoints:";
155print LOG "Available checkpoints:";
156foreach $k (sort { $a <=> $b } keys %iloc) {
157	$a = $iloc{$k};
158	print " $a";
159	print LOG " $a";
160}
161print "\n";
162print LOG "\n";
163
164#
165# Copy the partial segments $_[0]--$_[1] from the raw device onto
166# the working file.  Return the next partial-segment serial number
167# after the last one we copied (usually $_[1] + 1, except in case of
168# an error).
169#
170sub copypseg
171{
172	my ($blstart, $blstop, $segstop, $cmd);
173	my ($totalstart, $totalstop);
174
175	$totalstart = 0;
176	$totalstop = 0;
177	for ($i = $_[0]; $i <= $_[1]; ++$i) {
178		$blstart = hex $sumloc{$i};
179		last if $blstart <= 0;
180		$totalstart = $blstart if $totalstart == 0;
181		$blstop = hex $sumloc{$i + 1};
182		$segstop = ((int ($blstart / $fps)) + 1) * $fps;
183		if ($segstop < $blstop || $blstop < $blstart) {
184			#print "Adjusting $blstop -> $segstop\n";
185			$blstop = $segstop;
186		}
187		$totalstop = $blstop;
188
189		print "pseg $i: write blocks ", hex $blstart, "-", hex ($blstop - 1), "\n";
190		$blstart = $blstop;
191	}
192	$cmd = "dd if=$rdev of=$wfile bs=$fsize seek=$totalstart " .
193		"skip=$totalstart conv=notrunc count=" .
194		($totalstop - $totalstart);
195#	print "$cmd\n";
196	system("$cmd >/dev/null 2>&1");
197
198	return $i;
199}
200
201print "Recreating filesystem image as of $sstart:\n";
202if ($sstart == 0) {
203	$cmd = "dd if=$rdev of=$wfile bs=1m conv=swab,oldebcdic"; # garbage
204} else {
205	$cmd = "dd if=$gfile of=$wfile bs=1m";
206}
207print "$cmd\n";
208system("$cmd >/dev/null 2>&1");
209
210print "Copying over first superblock\n";
211system("dd if=$rdev of=$wfile bs=8k count=2 conv=notrunc >/dev/null 2>&1");
212
213sub test_fsck
214{
215	my $a = $_[0];
216	my $flags = $_[1];
217	my $printit = $_[2];
218	my $output = "";
219
220	$flags = "-n -f -i 0x$a $wfile" unless $flags;
221
222	$cmd = "fsck_lfs $flags";
223	print "$cmd\n";
224	print LOG "$cmd\n";
225	open(FSCK, "$cmd 2>&1 |");
226	while(<FSCK>) {
227		print LOG;
228		$rline = "$_";
229		chomp;
230
231		# Known false positives (mismatch between sb and ifile,
232		# which should be expected given we're using an arbitrarily
233		# old version of the ifile)
234		if (m/AVAIL GIVEN/ ||
235		    m/BFREE GIVEN/ ||
236		    m/DMETA GIVEN/ ||
237		    m/NCLEAN GIVEN/ ||
238		    m/FREE BUT NOT ON FREE LIST/ ||	# UNWRITTEN inodes OK
239		    m/FILE SYSTEM WAS MODIFIED/ ||
240		    m/FREE LIST HEAD IN SUPERBLOCK/ ) {
241			next;
242		}
243
244		# Fsck reports errors in ALL CAPS
245		# But don't count hex numbers as "lowercase".
246		$oline = "$_";
247		s/0x[0-9a-f]*//g;
248		if (m/[A-Z]/ && ! m/[a-z]/) {
249			$error = 1;
250			$errsn = $k;
251			$errstr = "1 $k 0x$a $oline";
252			# last;
253		}
254
255		# Log everything we get, except for some things we
256		# will see every single time.
257		if (m/checkpoint invalid/ ||
258		    m/skipping free list check/ ||
259		    m/expect discrepancies/) {
260			next;
261		}
262		$output .= $rline;
263	}
264	close(FSCK);
265
266	if ($? != 0) {
267		$error = 1;
268		$errsn = $k;
269		$errstr = "1 $k 0x$a <" . (hex $?) . ">";
270	}
271
272	if ($error || $printit) {
273		print $output;
274	}
275}
276
277$blstart = 0;
278$fps = $ssize / $fsize;
279$oind = ($sstart ? $sstart : 1);
280BIGLOOP: foreach $k (sort { $a <=> $b } keys %iloc) {
281	$a = $iloc{$k};
282
283	if (hex($a) > hex($lastaddr)) {
284		print "Skipping out-of-place checkpoint $k at $a\n";
285		next;
286	}
287
288	if ($test_rfw && $iloc{$oind - 1}) {
289		for ($tk = $oind; $tk < $k; $tk++) {
290			print "Test roll-forward agent at non-checkpoint pseg $tk\n";
291			print LOG "Test roll-forward agent at non-checkpoint pseg $tk\n";
292			&copypseg($oind, $tk);
293			# Add -d flag here for verbose debugging info
294			$flags = "-p -f -i 0x" . $iloc{$oind - 1} . " $wfile";
295			&test_fsck($iloc{$oind - 1}, $flags, 1);
296			last BIGLOOP if $error;
297
298			# note lack of -i flag, since the roll-forward
299			# will have rewritten the superblocks.
300			&test_fsck($iloc{$oind - 1}, "-n -f $wfile", 0);
301			last BIGLOOP if $error;
302		}
303	}
304
305	print "Recreate fs state at checkpoint pseg $k (from " . ($oind - 1) .
306	      ")\n";
307	$oind = &copypseg($oind, $k);
308
309	&test_fsck($a, "", 0);
310
311	last if $error;
312	$lastgood = $k;	# record last good serial number
313}
314
315if ($errstr) {
316	print "$errstr\n";
317	exit 0;
318}
319
320if (!$errstr) {
321	print "Bring filesystem state up to log wrap\n";
322	$lastgood = &copypseg($oind, 100000000000) - 1;
323
324	print "Copying this good image to $gfile\n";
325	system("dd bs=1m if=$rdev of=$gfile >/dev/null 2>&1");
326	print "0 $lastgood 0x$a\n";
327	exit 0;
328}
329
330#
331# Ifile write-checking paranoia.
332#
333# If we found an error, try to find which blocks of the Ifile inode changed
334# between the last good checkpoint and this checkpoint; and which blocks
335# *should* have changed.  This means (1) which segments were written; and
336# (2) which inodes were written.  The 0 block of the Ifile should always
337# have changed since lfs_avail is always in flux.
338#
339
340$cmd = "dumplfs";
341$oseg = -1;
342%iblk = ();
343%iblk_done = ();
344%why = ();
345$iblk{0} = 1;
346for ($i = $lastgood + 1; $i <= $errsn; $i++) {
347	if ($oseg != $snloc{$i}) {
348		$oseg = 0 + $snloc{$i};
349		$cmd .= " -s$oseg";
350	}
351}
352$cmd .= " $rdev";
353
354open(DUMPLFS, "$cmd |");
355while(<DUMPLFS>) {
356	if (m/ifpb *([0-9]*)/) {
357		$ifpb = $1;
358	}
359	if (m/sepb *([0-9]*)/) {
360		$sepb = $1;
361	}
362	if (m/cleansz *([0-9]*)/) {
363		$cleansz = $1;
364	}
365	if (m/segtabsz *([0-9]*)/) {
366		$segtabsz = $1;
367	}
368	last if m/SEGMENT/;
369}
370while(<DUMPLFS>) {
371	chomp;
372
373	# Skip over partial segments outside our range of interest
374	if (m/roll_id.*serial *([0-9]*)/) {
375		$serno = $1;
376		if ($serno <= $lastgood || $serno > $errsn) {
377			# Skip the rest of this partial segment
378			while(<DUMPLFS>) {
379				last if m/Segment Summary/ || m/SEGMENT/;
380			}
381			next;
382		}
383	}
384
385	# Look for inodes
386	if (m/Inode addresses/) {
387		s/^[^{]*{/ /o;
388		s/}[^{]*$/ /o;
389		s/}[^{]*{/,/og;
390		s/v[0-9]*//og;
391		@ilist = split(',');
392		foreach $i (@ilist) {
393			$i =~ s/ *//og;
394			next if $i == 1;
395			$iaddr = $cleansz + $segtabsz + int ($i / $ifpb);
396			$iblk{$iaddr} = 1;
397			$why{$iaddr} .= " $i";
398		}
399	}
400
401	# Look for Ifile blocks actually written
402	if (m/FINFO for inode: ([0-9]*) version/) {
403		$i = $1;
404		$inoblkmode = ($i == 1);
405	}
406	if ($inoblkmode && m/^[-\t 0-9]*$/) {
407		s/\t/ /og;
408		s/^ *//o;
409		s/ *$//o;
410		@bn = split(' ');
411		foreach $b (@bn) {
412			$iblk_done{$b} = 1;
413		}
414	}
415}
416close(DUMPLFS);
417
418# Report found and missing Ifile blocks
419print "Ifile blocks found:";
420foreach $b (sort { $a <=> $b } keys %iblk) {
421	if ($iblk_done{$b} == 1) {
422		print " $b";
423	}
424}
425print "\n";
426	
427print "Ifile blocks missing:";
428foreach $b (sort { $a <=> $b } keys %iblk) {
429	if ($iblk_done{$b} == 0) {
430		$why{$b} =~ s/^ *//o;
431		print " $b ($why{$b})";
432	}
433}
434print "\n";
435
436print "$errstr\n";
437exit 0;
438