check-all revision 1.5
11.1Sperseant#!/usr/pkg/bin/perl
21.3Sperseant#
31.5Smartin#	$NetBSD: check-all,v 1.5 2008/04/30 13:10:52 martin Exp $
41.3Sperseant#
51.3Sperseant# Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
61.3Sperseant# All rights reserved.
71.3Sperseant#
81.3Sperseant# This code is derived from software contributed to The NetBSD Foundation
91.3Sperseant# by Konrad E. Schroder <perseant@hhhh.org>.
101.3Sperseant#
111.3Sperseant# Redistribution and use in source and binary forms, with or without
121.3Sperseant# modification, are permitted provided that the following conditions
131.3Sperseant# are met:
141.3Sperseant# 1. Redistributions of source code must retain the above copyright
151.3Sperseant#    notice, this list of conditions and the following disclaimer.
161.3Sperseant# 2. Redistributions in binary form must reproduce the above copyright
171.3Sperseant#    notice, this list of conditions and the following disclaimer in the
181.3Sperseant#    documentation and/or other materials provided with the distribution.
191.3Sperseant#
201.3Sperseant# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
211.3Sperseant# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
221.3Sperseant# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
231.3Sperseant# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
241.3Sperseant# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
251.3Sperseant# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
261.3Sperseant# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
271.3Sperseant# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
281.3Sperseant# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
291.3Sperseant# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
301.3Sperseant# POSSIBILITY OF SUCH DAMAGE.
311.3Sperseant#
321.1Sperseant
331.1Sperseant#
341.1Sperseant# Use dumplfs to find all locations of the Ifile inode on a given disk.
351.1Sperseant# Order these by serial number and call fsck_lfs on the raw disk for each.
361.3Sperseant# If any fsck gives errors (any line of all capital letters, with a few
371.3Sperseant# exceptions) print an error code with the daddr of the failing Ifile inode
381.3Sperseant# location.
391.1Sperseant#
401.1Sperseant
411.2Sperseant$| = 1;
421.1Sperseant$rdev = $ARGV[0];
431.2Sperseant$gfile = $ARGV[1];
441.2Sperseant$wfile = $ARGV[2];
451.2Sperseant$sstart = $ARGV[3];
461.4Sperseant$test_rfw = 1; # $ARGV[4];
471.1Sperseant$rollid = 0;
481.1Sperseantopen(DUMPLFS, "dumplfs $rdev |");
491.1Sperseant
501.1Sperseant# Look for "roll_id" so we don't use garbage
511.1Sperseantwhile (<DUMPLFS>) {
521.2Sperseant	if ($ssize == 0 && m/ssize *([0-9]*)/) {
531.2Sperseant		$ssize = $1;
541.2Sperseant	}
551.2Sperseant	if ($fsize == 0 && m/fsize *([0-9]*)/) {
561.2Sperseant		$fsize = $1;
571.2Sperseant	}
581.1Sperseant	if (m/roll_id *([x0-9a-f]*)/) {
591.1Sperseant		$rollid = $1;
601.1Sperseant		last;
611.1Sperseant	}
621.1Sperseant}
631.1Sperseant
641.1Sperseant# Now look for inodes and segment summaries.  Build a hash table of these
651.1Sperseant# based on serial number.  Ignore any with serial numbers lower than $sstart.
661.1Sperseant
671.1Sperseant%iloc = ();
681.2Sperseant%snloc = ();
691.2Sperseant%sumloc = ();
701.2Sperseantprint "Reading segments:";
711.1Sperseantwhile (<DUMPLFS>) {
721.1Sperseant	if (m/roll_id *([0-9a-f]*)/) {
731.1Sperseant		# print "rollid $1\n";
741.1Sperseant		if ("0x$1" ne $rollid) {
751.1Sperseant			# Skip the rest of this segment
761.3Sperseant			print "{skip bad rollid 0x$1}";
771.1Sperseant			while(<DUMPLFS>) {
781.1Sperseant				last if m/SEGMENT/;
791.1Sperseant			}
801.2Sperseant			# Fall through
811.1Sperseant		}
821.1Sperseant	}
831.2Sperseant	if (m/roll_id.*serial *([0-9]*)/) {
841.1Sperseant		$serno = $1;
851.2Sperseant		$snloc{$serno} = $segnum;
861.2Sperseant		$sumloc{$serno} = $sumloc;
871.3Sperseant		print "($serno)";
881.1Sperseant		if ($serno < $sstart) {
891.1Sperseant			# Skip the rest of this partial segment
901.3Sperseant			#print "{skip bad serno $serno}";
911.1Sperseant			while(<DUMPLFS>) {
921.2Sperseant				last if m/Segment Summary/ ||
931.2Sperseant					m/SEGMENT/;
941.1Sperseant			}
951.2Sperseant			# Fall through
961.1Sperseant		}
971.1Sperseant	}
981.2Sperseant	if (m/Segment Summary Info at 0x([0-9a-f]*)/) {
991.2Sperseant		$sumloc = $1;
1001.3Sperseant		next;
1011.2Sperseant	}
1021.1Sperseant	if (m/0x([0-9a-f]*)/) {
1031.1Sperseant		foreach $ss (split "0x", $_) {
1041.1Sperseant			if ($ss =~ m/^([0-9a-f][0-9a-f]*)/) {
1051.1Sperseant				# print "iblk 0x$1\n";
1061.1Sperseant				$daddr = $1;
1071.1Sperseant				if (m/[^0-9]1v1/) {
1081.1Sperseant					# print "** ifblk 0x$daddr\n";
1091.1Sperseant					$iloc{$serno} = $daddr;
1101.2Sperseant					$lastaddr = $daddr;
1111.1Sperseant				}
1121.1Sperseant			}
1131.1Sperseant		}
1141.1Sperseant	}
1151.2Sperseant	if (m/SEGMENT *([0-9]*)/) {
1161.2Sperseant		$segnum = $1;
1171.3Sperseant		print "[$segnum]";
1181.1Sperseant	}
1191.1Sperseant}
1201.2Sperseantprint "\n";
1211.1Sperseantclose(DUMPLFS);
1221.1Sperseant
1231.3Sperseant# Complain about missing partial-segments
1241.3Sperseantfor ($i = $sstart; $i < $serno; ++$i) {
1251.3Sperseant	if (hex $sumloc{$i} == 0 && $i > 0) {
1261.3Sperseant		print "Oops, couldn't find pseg $i\n";
1271.3Sperseant	}
1281.3Sperseant}
1291.3Sperseant
1301.2Sperseant# If there were no checkpoints, print *something*
1311.2Sperseantif ($#iloc == 0) {
1321.2Sperseant	print "0 $sstart 0\n";
1331.2Sperseant	exit 0;
1341.2Sperseant}
1351.2Sperseant
1361.1Sperseant#
1371.1Sperseant# Now fsck each checkpoint in turn, beginning with $sstart.
1381.2Sperseant# Because the log wraps we will have to reconstruct the filesystem image
1391.2Sperseant# as it existed at each checkpoint before running fsck.
1401.2Sperseant#
1411.1Sperseant# Look for lines containing only caps or "!", but ignore known
1421.1Sperseant# false positives.
1431.1Sperseant#
1441.1Sperseant$error = 0;
1451.2Sperseant$lastgood = $sstart - 1;
1461.1Sperseantopen(LOG, ">>check-all.log");
1471.2Sperseantprint "Available checkpoints:";
1481.2Sperseantprint LOG "Available checkpoints:";
1491.1Sperseantforeach $k (sort { $a <=> $b } keys %iloc) {
1501.1Sperseant	$a = $iloc{$k};
1511.2Sperseant	print " $a";
1521.2Sperseant	print LOG " $a";
1531.2Sperseant}
1541.2Sperseantprint "\n";
1551.2Sperseantprint LOG "\n";
1561.2Sperseant
1571.3Sperseant#
1581.3Sperseant# Copy the partial segments $_[0]--$_[1] from the raw device onto
1591.3Sperseant# the working file.  Return the next partial-segment serial number
1601.3Sperseant# after the last one we copied (usually $_[1] + 1, except in case of
1611.3Sperseant# an error).
1621.3Sperseant#
1631.3Sperseantsub copypseg
1641.3Sperseant{
1651.3Sperseant	my ($blstart, $blstop, $segstop, $cmd);
1661.3Sperseant	my ($totalstart, $totalstop);
1671.3Sperseant
1681.3Sperseant	$totalstart = 0;
1691.3Sperseant	$totalstop = 0;
1701.3Sperseant	for ($i = $_[0]; $i <= $_[1]; ++$i) {
1711.3Sperseant		$blstart = hex $sumloc{$i};
1721.3Sperseant		last if $blstart <= 0;
1731.3Sperseant		$totalstart = $blstart if $totalstart == 0;
1741.3Sperseant		$blstop = hex $sumloc{$i + 1};
1751.3Sperseant		$segstop = ((int ($blstart / $fps)) + 1) * $fps;
1761.3Sperseant		if ($segstop < $blstop || $blstop < $blstart) {
1771.3Sperseant			#print "Adjusting $blstop -> $segstop\n";
1781.3Sperseant			$blstop = $segstop;
1791.3Sperseant		}
1801.3Sperseant		$totalstop = $blstop;
1811.3Sperseant
1821.4Sperseant		print "pseg $i: write blocks ", hex $blstart, "-", hex ($blstop - 1), "\n";
1831.3Sperseant		$blstart = $blstop;
1841.3Sperseant	}
1851.3Sperseant	$cmd = "dd if=$rdev of=$wfile bs=$fsize seek=$totalstart " .
1861.3Sperseant		"skip=$totalstart conv=notrunc count=" .
1871.3Sperseant		($totalstop - $totalstart);
1881.3Sperseant#	print "$cmd\n";
1891.3Sperseant	system("$cmd >/dev/null 2>&1");
1901.3Sperseant
1911.3Sperseant	return $i;
1921.3Sperseant}
1931.3Sperseant
1941.2Sperseantprint "Recreating filesystem image as of $sstart:\n";
1951.2Sperseantif ($sstart == 0) {
1961.2Sperseant	$cmd = "dd if=$rdev of=$wfile bs=1m conv=swab,oldebcdic"; # garbage
1971.2Sperseant} else {
1981.2Sperseant	$cmd = "dd if=$gfile of=$wfile bs=1m";
1991.2Sperseant}
2001.2Sperseantprint "$cmd\n";
2011.2Sperseantsystem("$cmd >/dev/null 2>&1");
2021.2Sperseant
2031.3Sperseantprint "Copying over first superblock\n";
2041.3Sperseantsystem("dd if=$rdev of=$wfile bs=8k count=2 conv=notrunc >/dev/null 2>&1");
2051.3Sperseant
2061.4Sperseantsub test_fsck
2071.4Sperseant{
2081.4Sperseant	my $a = $_[0];
2091.4Sperseant	my $flags = $_[1];
2101.4Sperseant	my $printit = $_[2];
2111.4Sperseant	my $output = "";
2121.2Sperseant
2131.4Sperseant	$flags = "-n -f -i 0x$a $wfile" unless $flags;
2141.2Sperseant
2151.4Sperseant	$cmd = "fsck_lfs $flags";
2161.2Sperseant	print "$cmd\n";
2171.2Sperseant	print LOG "$cmd\n";
2181.2Sperseant	open(FSCK, "$cmd 2>&1 |");
2191.1Sperseant	while(<FSCK>) {
2201.1Sperseant		print LOG;
2211.4Sperseant		$rline = "$_";
2221.1Sperseant		chomp;
2231.1Sperseant
2241.1Sperseant		# Known false positives (mismatch between sb and ifile,
2251.1Sperseant		# which should be expected given we're using an arbitrarily
2261.3Sperseant		# old version of the ifile)
2271.1Sperseant		if (m/AVAIL GIVEN/ ||
2281.1Sperseant		    m/BFREE GIVEN/ ||
2291.1Sperseant		    m/DMETA GIVEN/ ||
2301.2Sperseant		    m/NCLEAN GIVEN/ ||
2311.2Sperseant		    m/FREE BUT NOT ON FREE LIST/ ||	# UNWRITTEN inodes OK
2321.4Sperseant		    m/FILE SYSTEM WAS MODIFIED/ ||
2331.2Sperseant		    m/FREE LIST HEAD IN SUPERBLOCK/ ) {
2341.1Sperseant			next;
2351.1Sperseant		}
2361.1Sperseant
2371.1Sperseant		# Fsck reports errors in ALL CAPS
2381.1Sperseant		# But don't count hex numbers as "lowercase".
2391.4Sperseant		$oline = "$_";
2401.1Sperseant		s/0x[0-9a-f]*//g;
2411.1Sperseant		if (m/[A-Z]/ && ! m/[a-z]/) {
2421.1Sperseant			$error = 1;
2431.2Sperseant			$errsn = $k;
2441.4Sperseant			$errstr = "1 $k 0x$a $oline";
2451.4Sperseant			# last;
2461.1Sperseant		}
2471.4Sperseant
2481.4Sperseant		# Log everything we get, except for some things we
2491.4Sperseant		# will see every single time.
2501.4Sperseant		if (m/checkpoint invalid/ ||
2511.4Sperseant		    m/skipping free list check/ ||
2521.4Sperseant		    m/expect discrepancies/) {
2531.4Sperseant			next;
2541.4Sperseant		}
2551.4Sperseant		$output .= $rline;
2561.1Sperseant	}
2571.1Sperseant	close(FSCK);
2581.4Sperseant
2591.4Sperseant	if ($? != 0) {
2601.4Sperseant		$error = 1;
2611.4Sperseant		$errsn = $k;
2621.4Sperseant		$errstr = "1 $k 0x$a <" . (hex $?) . ">";
2631.4Sperseant	}
2641.4Sperseant
2651.4Sperseant	if ($error || $printit) {
2661.4Sperseant		print $output;
2671.4Sperseant	}
2681.4Sperseant}
2691.4Sperseant
2701.4Sperseant$blstart = 0;
2711.4Sperseant$fps = $ssize / $fsize;
2721.4Sperseant$oind = ($sstart ? $sstart : 1);
2731.4SperseantBIGLOOP: foreach $k (sort { $a <=> $b } keys %iloc) {
2741.4Sperseant	$a = $iloc{$k};
2751.4Sperseant
2761.4Sperseant	if (hex($a) > hex($lastaddr)) {
2771.4Sperseant		print "Skipping out-of-place checkpoint $k at $a\n";
2781.4Sperseant		next;
2791.4Sperseant	}
2801.4Sperseant
2811.4Sperseant	if ($test_rfw && $iloc{$oind - 1}) {
2821.4Sperseant		for ($tk = $oind; $tk < $k; $tk++) {
2831.4Sperseant			print "Test roll-forward agent at non-checkpoint pseg $tk\n";
2841.4Sperseant			print LOG "Test roll-forward agent at non-checkpoint pseg $tk\n";
2851.4Sperseant			&copypseg($oind, $tk);
2861.4Sperseant			# Add -d flag here for verbose debugging info
2871.4Sperseant			$flags = "-p -f -i 0x" . $iloc{$oind - 1} . " $wfile";
2881.4Sperseant			&test_fsck($iloc{$oind - 1}, $flags, 1);
2891.4Sperseant			last BIGLOOP if $error;
2901.4Sperseant
2911.4Sperseant			# note lack of -i flag, since the roll-forward
2921.4Sperseant			# will have rewritten the superblocks.
2931.4Sperseant			&test_fsck($iloc{$oind - 1}, "-n -f $wfile", 0);
2941.4Sperseant			last BIGLOOP if $error;
2951.4Sperseant		}
2961.4Sperseant	}
2971.4Sperseant
2981.4Sperseant	print "Recreate fs state at checkpoint pseg $k (from " . ($oind - 1) .
2991.4Sperseant	      ")\n";
3001.4Sperseant	$oind = &copypseg($oind, $k);
3011.4Sperseant
3021.4Sperseant	&test_fsck($a, "", 0);
3031.4Sperseant
3041.1Sperseant	last if $error;
3051.2Sperseant	$lastgood = $k;	# record last good serial number
3061.2Sperseant}
3071.4Sperseant
3081.4Sperseantif ($errstr) {
3091.4Sperseant	print "$errstr\n";
3101.4Sperseant	exit 0;
3111.4Sperseant}
3121.4Sperseant
3131.3Sperseantif (!$errstr) {
3141.3Sperseant	print "Bring filesystem state up to log wrap\n";
3151.3Sperseant	$lastgood = &copypseg($oind, 100000000000) - 1;
3161.2Sperseant
3171.2Sperseant	print "Copying this good image to $gfile\n";
3181.2Sperseant	system("dd bs=1m if=$rdev of=$gfile >/dev/null 2>&1");
3191.3Sperseant	print "0 $lastgood 0x$a\n";
3201.2Sperseant	exit 0;
3211.2Sperseant}
3221.2Sperseant
3231.2Sperseant#
3241.3Sperseant# Ifile write-checking paranoia.
3251.3Sperseant#
3261.2Sperseant# If we found an error, try to find which blocks of the Ifile inode changed
3271.2Sperseant# between the last good checkpoint and this checkpoint; and which blocks
3281.2Sperseant# *should* have changed.  This means (1) which segments were written; and
3291.2Sperseant# (2) which inodes were written.  The 0 block of the Ifile should always
3301.2Sperseant# have changed since lfs_avail is always in flux.
3311.2Sperseant#
3321.2Sperseant
3331.2Sperseant$cmd = "dumplfs";
3341.2Sperseant$oseg = -1;
3351.2Sperseant%iblk = ();
3361.2Sperseant%iblk_done = ();
3371.2Sperseant%why = ();
3381.2Sperseant$iblk{0} = 1;
3391.2Sperseantfor ($i = $lastgood + 1; $i <= $errsn; $i++) {
3401.2Sperseant	if ($oseg != $snloc{$i}) {
3411.2Sperseant		$oseg = 0 + $snloc{$i};
3421.2Sperseant		$cmd .= " -s$oseg";
3431.2Sperseant	}
3441.1Sperseant}
3451.2Sperseant$cmd .= " $rdev";
3461.1Sperseant
3471.2Sperseantopen(DUMPLFS, "$cmd |");
3481.2Sperseantwhile(<DUMPLFS>) {
3491.2Sperseant	if (m/ifpb *([0-9]*)/) {
3501.2Sperseant		$ifpb = $1;
3511.2Sperseant	}
3521.2Sperseant	if (m/sepb *([0-9]*)/) {
3531.2Sperseant		$sepb = $1;
3541.2Sperseant	}
3551.2Sperseant	if (m/cleansz *([0-9]*)/) {
3561.2Sperseant		$cleansz = $1;
3571.2Sperseant	}
3581.2Sperseant	if (m/segtabsz *([0-9]*)/) {
3591.2Sperseant		$segtabsz = $1;
3601.2Sperseant	}
3611.2Sperseant	last if m/SEGMENT/;
3621.2Sperseant}
3631.2Sperseantwhile(<DUMPLFS>) {
3641.2Sperseant	chomp;
3651.2Sperseant
3661.2Sperseant	# Skip over partial segments outside our range of interest
3671.2Sperseant	if (m/roll_id.*serial *([0-9]*)/) {
3681.2Sperseant		$serno = $1;
3691.2Sperseant		if ($serno <= $lastgood || $serno > $errsn) {
3701.2Sperseant			# Skip the rest of this partial segment
3711.2Sperseant			while(<DUMPLFS>) {
3721.2Sperseant				last if m/Segment Summary/ || m/SEGMENT/;
3731.2Sperseant			}
3741.2Sperseant			next;
3751.2Sperseant		}
3761.2Sperseant	}
3771.2Sperseant
3781.2Sperseant	# Look for inodes
3791.2Sperseant	if (m/Inode addresses/) {
3801.2Sperseant		s/^[^{]*{/ /o;
3811.2Sperseant		s/}[^{]*$/ /o;
3821.2Sperseant		s/}[^{]*{/,/og;
3831.2Sperseant		s/v[0-9]*//og;
3841.2Sperseant		@ilist = split(',');
3851.2Sperseant		foreach $i (@ilist) {
3861.2Sperseant			$i =~ s/ *//og;
3871.2Sperseant			next if $i == 1;
3881.2Sperseant			$iaddr = $cleansz + $segtabsz + int ($i / $ifpb);
3891.2Sperseant			$iblk{$iaddr} = 1;
3901.2Sperseant			$why{$iaddr} .= " $i";
3911.2Sperseant		}
3921.2Sperseant	}
3931.2Sperseant
3941.2Sperseant	# Look for Ifile blocks actually written
3951.2Sperseant	if (m/FINFO for inode: ([0-9]*) version/) {
3961.2Sperseant		$i = $1;
3971.2Sperseant		$inoblkmode = ($i == 1);
3981.2Sperseant	}
3991.2Sperseant	if ($inoblkmode && m/^[-\t 0-9]*$/) {
4001.2Sperseant		s/\t/ /og;
4011.2Sperseant		s/^ *//o;
4021.2Sperseant		s/ *$//o;
4031.2Sperseant		@bn = split(' ');
4041.2Sperseant		foreach $b (@bn) {
4051.2Sperseant			$iblk_done{$b} = 1;
4061.2Sperseant		}
4071.2Sperseant	}
4081.2Sperseant}
4091.2Sperseantclose(DUMPLFS);
4101.2Sperseant
4111.2Sperseant# Report found and missing Ifile blocks
4121.2Sperseantprint "Ifile blocks found:";
4131.2Sperseantforeach $b (sort { $a <=> $b } keys %iblk) {
4141.2Sperseant	if ($iblk_done{$b} == 1) {
4151.2Sperseant		print " $b";
4161.2Sperseant	}
4171.2Sperseant}
4181.2Sperseantprint "\n";
4191.4Sperseant	
4201.2Sperseantprint "Ifile blocks missing:";
4211.2Sperseantforeach $b (sort { $a <=> $b } keys %iblk) {
4221.2Sperseant	if ($iblk_done{$b} == 0) {
4231.2Sperseant		$why{$b} =~ s/^ *//o;
4241.2Sperseant		print " $b ($why{$b})";
4251.2Sperseant	}
4261.1Sperseant}
4271.2Sperseantprint "\n";
4281.2Sperseant
4291.2Sperseantprint "$errstr\n";
4301.2Sperseantexit 0;
431