Home | History | Annotate | Line # | Download | only in common
extract-contrib-string.pl revision 1.6
      1 #!/usr/bin/env perl
      2 #
      3 # Copyright (c) 2004, 2008 The NetBSD Foundation, Inc.
      4 # All rights reserved.
      5 #
      6 # This code is derived from software contributed to The NetBSD Foundation
      7 # by Hubert Feyrer <hubert (at] feyrer.de>.
      8 # 
      9 # Redistribution and use in source and binary forms, with or without
     10 # modification, are permitted provided that the following conditions
     11 # are met:
     12 # 1. Redistributions of source code must retain the above copyright
     13 #    notice, this list of conditions and the following disclaimer.
     14 # 2. Redistributions in binary form must reproduce the above copyright
     15 #    notice, this list of conditions and the following disclaimer in the
     16 #    documentation and/or other materials provided with the distribution.
     17 # 
     18 # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19 # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21 # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28 # POSSIBILITY OF SUCH DAMAGE.
     29 
     30 #
     31 # Extract BSD-mandated copyright messages for NetBSD documentation
     32 #
     33 # Usage:
     34 # 1) find /usr/src -type f -print \
     35 #    | perl extract-contrib-string.pl
     36 #    >x
     37 #
     38 # 2) merge text after "--------" in "x" into
     39 #    src/distrib/notes/common/legal.common
     40 #
     41 # Options:
     42 #
     43 #     perl extract-contrib-string.pl [-d] [-h]
     44 #
     45 # where
     46 #     -d  debug output
     47 #     -h  html output
     48 
     49 
     50 $ack_line1="[aA]ll( commercial)?( marketing or)? advertising materials mentioning( features)?";
     51 $ack_line2="display the following( acknowledge?ment)?";
     52 $ack_endline=
     53       '(\d\.\s*(Neither the name'
     54     .         '|The name of the company nor the name'	# Wasn't my idea
     55     .         '|The name of the author may not'
     56     .         '|The name of .* must not be used to endorse'
     57     .         '|The names? (of )?.* nor the names? of'
     58     .         '|The names? (of )?.* or any of it\'?s members'
     59     .         '|Redistributions of any form whatsoever'
     60     .         '|The names .*"OpenSSL Toolkit.*" and .*"OpenSSL Project.*" must not be used))'
     61     .'|(THIS SOFTWARE IS PROVIDED)'
     62     .'|(The word \'cryptographic\' can be left out if)'
     63     .'|(may be used to endorse)'
     64     .'|(@end cartouche)'
     65     .'|(Redistribution and use in source and binary forms)'
     66     .'|(may not be used to endorse)'
     67     .'|(\.IP 4)'
     68     .'|(ALLOW FREE USE OF)'
     69     .'|(materials provided with the distribution)'
     70     .'|(@InsertRedistribution@)';
     71 
     72 $known_bad_clause_3_wording=
     73       'usr.bin/lex/.*'				# UCB
     74     .'|usr.sbin/hilinfo/hilinfo.c'	   	# CSS @ Utah
     75     ;	
     76 
     77 sub warning {
     78     local($fn,$msg) = @_;
     79     print "XXX $fn line $.: $msg\n"
     80 }
     81 
     82 
     83 if ($ARGV[0] =~ /-[dD]/) {
     84     $debug=1;
     85     shift(@ARGV);
     86 }
     87 if ($ARGV[0] =~ /-[hH]/) {
     88     $html=1;
     89     shift(@ARGV);
     90 }
     91 
     92 
     93 file:
     94 while(<>) {
     95     chomp();
     96     $fn=$_;
     97     
     98     open(F, "$fn") || die "cannot read $fn: $!\n";
     99 
    100   line:
    101     while(<F>) {
    102 	if (0 and /$ack_line2/i){
    103 	    print "?> $_" if $debug;
    104 	    
    105 	    if ($fn !~ m,$known_bad_clause_3_wording,) {
    106 		warning($fn, "clause 3 start not caught");
    107 	    }
    108 	    last line;
    109 	}
    110 	
    111 	print "0> $_" if $debug;
    112 
    113 	if (/$ack_line1/i
    114 	    or (/$ack_line2/ and $fn =~ m,$known_bad_clause_3_wording,)) {
    115 	    
    116 	    print "1> $_" if $debug;
    117 
    118 	    $_=<F>
    119 		unless $fn =~ m,$known_bad_clause_3_wording,;
    120 	    if (/$ack_line2/i or $fn =~ m,$known_bad_clause_3_wording,){
    121 		
    122 		print "2> $_" if $debug;
    123 		
    124 		$msg="";
    125 		$cnt=0;
    126 		$_=<F>;
    127 		while(!/$ack_endline/i) {
    128 		    
    129 		    print "C> $_" if $debug;
    130 
    131 		    $msg .= $_;
    132 		    $cnt++;
    133 		    $_ = <F>;
    134 		    if ($cnt > 10) {
    135 			warning($fn,"loooong copyright?");
    136 			last line;
    137 		    }
    138 		}
    139 
    140 		print "E> $_" if $debug;
    141 		
    142 		# post-process
    143 		$msg =~ s/^\@c\s*//g;			# texinfo
    144 		$msg =~ s/\n\@c\s*/\n/g;		# texinfo
    145 		$msg =~ s/^REM\s*//g;			# BASIC?!?
    146 		$msg =~ s/\nREM\s*/\n/g;		# BASIC?!?
    147 		$msg =~ s/^dnl\s*//g;			# m4
    148 		$msg =~ s/\dnl\s*/\n/g;			# m4
    149 		$msg =~ s/^\.\\"\s*//g;			# *roff
    150 		$msg =~ s/\n\.\\"\s*/\n/g;		# *roff
    151 		$msg =~ s/^[#\\\|";]*\s*//g;		# sh etc.
    152 		$msg =~ s/\n[#\\\|";]\s*/\n/g;		# sh etc.
    153 		$msg =~ s/^[ 	*]*//g;      		# C
    154 		$msg =~ s/\n[ 	*]*/\n/g;    		# C
    155 
    156 		# C++/C99
    157 		while ($msg =~ /^\s*\/\/\s*/) {
    158 			$msg =~ s/^\s*\/\/\s*//o;
    159 		}
    160 		while ($msg =~ /\ns*\/\/\s*$/) {
    161 			$msg =~ s/\ns*\/\/\s*$//o;
    162 		}
    163 		$msg =~ s/\ns*\/\/\s*/ /g;
    164 
    165 		$msg =~ s/\@cartouche\n//;              # texinfo
    166 
    167 		$msg =~ s/
//g;
    169 		$msg =~ s/\s*\n/\n/g;
    170 		$msg =~ s/^\s*//;
    171 		$msg =~ s/\\\@/\@/g;
    172 		$msg =~ s/\n\n/\n/g;
    173 	        $msg =~ s/^\s*"//;
    174 	        $msg =~ s/"\s*$//;
    175 	        $msg =~ s/^\s*``//;
    176 	        $msg =~ s/''\s*$//;
    177 		while ($msg =~ /[\n\s]+$/) {
    178 			$msg =~ s/[\n\s]+$//o;
    179 		}
    180 
    181 		# Split up into separate paragraphs
    182 		#
    183 		$msgs=$msg;
    184 		$msgs=~s/(This (software|product))/|$1/g;
    185 		$msgs=~s,^\|,,;
    186 	      msg:
    187 		foreach $msg (split(/\|/, $msgs)) {
    188 		    if (!$html) {
    189 			print ".\\\" File $fn:\n";
    190 			print "$msg";
    191 			print "\n\n";
    192 		    }
    193 		    
    194 		    # Figure out if there's a version w/ or w/o trailing dot
    195 		    # 
    196 		    if ($msg =~ /\.\n$/) {
    197 			# check if there's a version of the same msg
    198 			# w/ a trailing dot
    199 			$msg2=$msg;
    200 			$msg2=~s,\.\n$,\n,;
    201 			if ($copyrights{"$msg2"}) {
    202 			    # already there - skip
    203 			    print "already there, w/o dot - skipping!\n"
    204 				if $debug;
    205 			    next msg;
    206 			}
    207 			
    208 			# ... maybe with other case?
    209 			$lc_msg2=lc($msg2);
    210 			if ($lc_copyrights{$lc_msg2}) {
    211 			    print "already there, in different case - skipping\n"
    212 				if $debug;
    213 			    next msg;
    214 			}
    215 		    } else {
    216 			# check if there's a version of the same msg
    217 			# w/o the trailing dot
    218 			$msg2=$msg;
    219 			chomp($msg2);
    220 			$msg2.=".\n";
    221 			if ($copyrights{"$msg2"}) {
    222 			    # already there - skip
    223 			    print "already there, w/ dot - skipping!\n"
    224 				if $debug;
    225 			    next msg;
    226 			}
    227 			
    228 			# ... maybe with other case?
    229 			$lc_msg2=lc($msg2);
    230 			if ($lc_copyrights{$lc_msg2}) {
    231 			    print "already there, in different case - skipping\n"
    232 				if $debug;
    233 			    next msg;
    234 			}
    235 		    }
    236 
    237 		    $copyrights{$msg} = 1;
    238 		    $lc_copyrights{$lc_msg} = 1;
    239 		}		 
    240 
    241 	    } else {
    242 		print "?> $_" if $debug;
    243 
    244                 if ($fn !~ m,$known_bad_clause_3_wording,) {
    245 		    warning($fn, "bad clause 3?");
    246                 }
    247 		last line;
    248 	    }
    249 	}
    250     }
    251     close(F);
    252 }
    253 
    254 
    255 if ($html) {
    256     print "<ul>\n";
    257     foreach $msg (sort keys %copyrights) {
    258 	print "<li>$msg</li>\n";
    259     }
    260     print "</ul>\n";
    261 } else {
    262     print "------------------------------------------------------------\n";
    263 
    264     $firsttime=1;
    265     foreach $msg (sort keys %copyrights) {
    266 	if ($firsttime) {
    267 	    $firsttime=0;
    268 	} else {
    269 	    print ".It\n";
    270 	}
    271 	print "$msg\n";
    272     }
    273 }
    274