Home | History | Annotate | Line # | Download | only in common
extract-contrib-string.pl revision 1.12
      1 #!/usr/bin/env perl
      2 #
      3 # Copyright (c) 2004, 2008 The NetBSD Foundation, Inc.
      4 # All rights reserved.
      5 #
      6 # This code is derived from software contributed to The NetBSD Foundation
      7 # by Hubert Feyrer <hubert (at] feyrer.de>.
      8 # 
      9 # Redistribution and use in source and binary forms, with or without
     10 # modification, are permitted provided that the following conditions
     11 # are met:
     12 # 1. Redistributions of source code must retain the above copyright
     13 #    notice, this list of conditions and the following disclaimer.
     14 # 2. Redistributions in binary form must reproduce the above copyright
     15 #    notice, this list of conditions and the following disclaimer in the
     16 #    documentation and/or other materials provided with the distribution.
     17 # 
     18 # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19 # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21 # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28 # POSSIBILITY OF SUCH DAMAGE.
     29 
     30 #
     31 # Extract BSD-mandated copyright messages for NetBSD documentation
     32 #
     33 # Usage:
     34 # 1) find /usr/src -type f -print \
     35 #    | perl extract-contrib-string.pl
     36 #    >x
     37 #
     38 # 2) merge text after "--------" in "x" into
     39 #    src/distrib/notes/common/legal.common
     40 #
     41 # Options:
     42 #
     43 #     perl extract-contrib-string.pl [-d] [-h] [-x] [-?]
     44 #
     45 # where
     46 #     -d  debug output
     47 #     -h  html output
     48 #     -x  xml/docbook output
     49 #     -?  display help/usage message
     50 
     51 
     52 $ack_line1="[aA]ll( commercial)?( marketing or)? advertising materials mentioning( features)?";
     53 $ack_line2="display the following( acknowledge?ment)?";
     54 $ack_endline=
     55       '(\d\.\s*(Neither the name'
     56     .         '|The name of the company nor the name'	# Wasn't my idea
     57     .         '|The name of the author may not'
     58     .         '|The name of .* must not be used to endorse'
     59     .         '|The names? (of )?.* nor the names? of'
     60     .         '|The names? (of )?.* or any of it\'?s members'
     61     .         '|Redistributions of any form whatsoever'
     62     .         '|The names .*"OpenSSL Toolkit.*" and .*"OpenSSL Project.*" must not be used))'
     63     .'|(THIS SOFTWARE IS PROVIDED)'
     64     .'|(The word \'cryptographic\' can be left out if)'
     65     .'|(may be used to endorse)'
     66     .'|(@end cartouche)'
     67     .'|(Redistribution and use in source and binary forms)'
     68     .'|(may not be used to endorse)'
     69     .'|(\.IP 4)'
     70     .'|(ALLOW FREE USE OF)'
     71     .'|(materials provided with the distribution)'
     72     .'|(@InsertRedistribution@)';
     73 
     74 $known_bad_clause_3_wording=
     75       'usr.bin/lex/.*'				# UCB
     76     .'|dist/bind/contrib/nslint-2.1a3/lbl/.*'	#
     77     .'|usr.sbin/hilinfo/hilinfo.c'	   	# CSS @ Utah
     78     ;	
     79 
     80 sub warning {
     81     local($fn,$msg) = @_;
     82     print "XXX $fn line $.: $msg\n"
     83 }
     84 
     85 while ($#ARGV >= 0) {
     86     $debug=1 if ($ARGV[0] =~ /-d/i);
     87     $html=1  if ($ARGV[0] =~ /-h/i);
     88     $xml=1  if ($ARGV[0] =~ /-x/i);
     89     $usage=1  if ($ARGV[0] =~ /-\?/);
     90     shift(@ARGV);
     91 }
     92 
     93 if ($usage) {
     94     print "usage: find /usr/src -type f -print |\n" .
     95 	" perl extract-contrib-string.pl [-h] [-x] [-?] [-d]\n" .
     96 	"   where\n" .
     97 	"    -h   output html\n" .
     98 	"    -x   output xml/docbook\n" .
     99 	"    -d   debug\n" .
    100 	"    -?   display this help message\n";
    101     exit(0);
    102 }
    103 
    104 $comments = !$html && !$xml;
    105 
    106 file:
    107 while(<>) {
    108     chomp();
    109     $fn=$_;
    110     
    111     open(F, "$fn") || die "cannot read $fn: $!\n";
    112 
    113   line:
    114     while(<F>) {
    115 	if (0 and /$ack_line2/i){
    116 	    print "?> $_" if $debug;
    117 	    
    118 	    if ($fn !~ m,$known_bad_clause_3_wording,) {
    119 		warning($fn, "clause 3 start not caught");
    120 	    }
    121 	    last line;
    122 	}
    123 	
    124 	print "0> $_" if $debug;
    125 
    126 	# special case perl script generating a license (openssl's
    127 	# mkerr.pl) - ignore the quoted license, there is another one
    128 	# inside:
    129 	if (/^\"\s\*.*$ack_line1.*\\n\"\,/) {
    130 		while(!/$ack_endline/i) {
    131 		    print "S> $_" if $debug;
    132 		    $_ = <F>;
    133 		}
    134 	}
    135 
    136 	if (/$ack_line1/i
    137 	    or (/$ack_line2/ and $fn =~ m,$known_bad_clause_3_wording,)) {
    138 	    
    139 	    print "1> $_" if $debug;
    140 
    141 	    $_=<F>
    142 		unless $fn =~ m,$known_bad_clause_3_wording,;
    143 	    if (/$ack_line2/i or $fn =~ m,$known_bad_clause_3_wording,){
    144 		
    145 		print "2> $_" if $debug;
    146 		
    147 		$msg="";
    148 		$msg = $_ if ($fn =~ m,$known_bad_clause_3_wording, and /``/);
    149 		$cnt=0;
    150 		$_=<F>;
    151 		while(!/$ack_endline/i) {
    152 		    
    153 		    print "C> $_" if $debug;
    154 
    155 		    $msg .= $_;
    156 		    $cnt++;
    157 		    $_ = <F>;
    158 		    if ($cnt > 10) {
    159 			warning($fn,"loooong copyright?");
    160 			last line;
    161 		    }
    162 		}
    163 
    164 		print "E> $_" if $debug;
    165 		
    166 		# post-process
    167 
    168 		if ($fn =~ m,$known_bad_clause_3_wording,) {
    169 			while ($msg !~ /^.*``.*\n/) {
    170 				last if (!$msg);
    171 				$msg =~ s/^.*\n//o;
    172 			}
    173 			$msg =~ s/^.*``//o;
    174 			$msg =~ s/\n.*``//o;
    175 			$msg =~ s/''.*$//o;
    176 		}
    177 
    178 		# *roff
    179 		while ($msg =~ /^\.\\"\s*/) {
    180 			$msg =~ s/^\.\\"\s*//o;
    181 		}
    182 		while ($msg =~ /\n\.\\"\s*/) {
    183 			$msg =~ s/\n\.\\"\s*/\n/o;
    184 		}
    185 		$msg =~ s/\n\.\\"\s*$/\n/g;
    186 
    187 		# C++/C99
    188 		while ($msg =~ /^\s*\/\/\s*/) {
    189 			$msg =~ s/^\s*\/\/\s*//o;
    190 		}
    191 		while ($msg =~ /\n\s*\/\/\s*$/) {
    192 			$msg =~ s/\n\s*\/\/\s*$//o;
    193 		}
    194 		$msg =~ s/\n\s*\/\/\s*/\n/g;
    195 
    196 		# C
    197 		while ($msg =~ /^\s*\*\s*/) {
    198 			$msg =~ s/^\s*\*\s*//o;
    199 		}
    200 		while ($msg =~ /\n\s*\*\s*$/) {
    201 			$msg =~ s/\n\s*\*\s*$//o;
    202 		}
    203 		$msg =~ s/\n\s*\*\s*/\n/g;
    204 
    205 		# texinfo @c
    206 		while ($msg =~ /^\s*\@c\s+/) {
    207 			$msg =~ s/^\s*\@c\s+//o;
    208 		}
    209 		while ($msg =~ /\n\s*\@c\s+$/) {
    210 			$msg =~ s/\n\s*\@c\s+$//o;
    211 		}
    212 		$msg =~ s/\n\s*\@c\s+/\n/g;
    213 
    214 		$msg =~ s/^REM\s*//g;			# BASIC?!?
    215 		$msg =~ s/\nREM\s*/\n/g;		# BASIC?!?
    216 		$msg =~ s/^dnl\s*//g;			# m4
    217 		$msg =~ s/\dnl\s*/\n/g;			# m4
    218 		$msg =~ s/^\s+-\s+//g;			# seen in docbook files
    219 		$msg =~ s/\n\s+-\s+/ /g;		#
    220 		$msg =~ s/^[#\\\|";]*\s*//g;		# sh etc.
    221 		$msg =~ s/\n[#\\\|";]\s*/\n/g;		# sh etc.
    222 		$msg =~ s/^[ 	*]*//g;      		# C
    223 		$msg =~ s/\n[ 	*]*/\n/g;    		# C
    224 
    225 		$msg =~ s/\@cartouche\n//;              # texinfo
    226 
    227 		$msg =~ s/
//g;
    229 		$msg =~ s/\s*\n/\n/g;
    230 		$msg =~ s/^\s*//;
    231 		$msg =~ s/\\\@/\@/g;
    232 		$msg =~ s/\n\n/\n/g;
    233 	        $msg =~ s/^\s*``//;
    234 	        $msg =~ s/''\s*$//;
    235 		$msg =~ s/^\"//o;
    236 		$msg =~ s/\"$//o;
    237 
    238 		# Split up into separate paragraphs
    239 		#
    240 		$msgs=$msg;
    241 		$msgs=~s/(This (software|product))/|$1/g;
    242 		$msgs=~s,^\|,,;
    243 	      msg:
    244 		foreach $msg (split(/\|/, $msgs)) {
    245 		    while ($msg =~ /[\n\s]+$/) {
    246 			$msg =~ s/[\n\s]+$//o;
    247 		    }
    248 		    next if ($msg eq "");
    249 		    if ($comments) {
    250 			print ".\\\" File $fn:\n";
    251 			print "$msg";
    252 			print "\n\n";
    253 		    }
    254 		    
    255 		    # Figure out if there's a version w/ or w/o trailing dot
    256 		    # 
    257 		    if ($msg =~ /\.$/) {
    258 			# check if there's a version of the same msg
    259 			# w/o a trailing dot
    260 			$msg2=$msg;
    261 			$msg2=~s,\.$,,;
    262 			if ($copyrights{"$msg2"}) {
    263 			    # already there - skip
    264 			    print "already there, w/o dot - skipping!\n"
    265 				if $debug;
    266 			    next msg;
    267 			}
    268 			
    269 			# ... maybe with other case?
    270 			$lc_msg2=lc($msg2);
    271 			if ($lc_copyrights{$lc_msg2}) {
    272 			    print "already there, in different case - skipping\n"
    273 				if $debug;
    274 			    next msg;
    275 			}
    276 		    } else {
    277 			# check if there's a version of the same msg
    278 			# with a trailing dot
    279 			$msg2=$msg;
    280 			$msg2.=".";
    281 			if ($copyrights{"$msg2"}) {
    282 			    # already there - skip
    283 			    print "already there, w/ dot - skipping!\n"
    284 				if $debug;
    285 			    next msg;
    286 			}
    287 			
    288 			# ... maybe with other case?
    289 			$lc_msg2=lc($msg2);
    290 			if ($lc_copyrights{$lc_msg2}) {
    291 			    print "already there, in different case - skipping\n"
    292 				if $debug;
    293 			    next msg;
    294 			}
    295 		    }
    296 
    297 		    $copyrights{$msg} = 1;
    298 		    $lc_copyrights{$lc_msg} = 1;
    299 		}		 
    300 
    301 	    } else {
    302 		print "?> $_" if $debug;
    303 
    304                 if ($fn !~ m,$known_bad_clause_3_wording,) {
    305 		    warning($fn, "bad clause 3?");
    306                 }
    307 		last line;
    308 	    }
    309 	}
    310     }
    311     close(F);
    312 }
    313 
    314 
    315 if ($html) {
    316     print "<ul>\n";
    317     foreach $msg (sort keys %copyrights) {
    318 	print "<li>$msg</li>\n";
    319     }
    320     print "</ul>\n";
    321 } elsif ($xml) {
    322     foreach $msg (sort keys %copyrights) {
    323 	print "<listitem>$msg</listitem>\n";
    324     }
    325 } else {
    326     print "------------------------------------------------------------\n";
    327 
    328     $firsttime=1;
    329     foreach $msg (sort keys %copyrights) {
    330 	if ($firsttime) {
    331 	    $firsttime=0;
    332 	} else {
    333 	    print ".It\n";
    334 	}
    335 	print "$msg\n";
    336     }
    337 }
    338