Home | History | Annotate | Line # | Download | only in common
extract-contrib-string.pl revision 1.14.62.1
      1 #!/usr/bin/env perl
      2 #
      3 # Copyright (c) 2004, 2008 The NetBSD Foundation, Inc.
      4 # All rights reserved.
      5 #
      6 # This code is derived from software contributed to The NetBSD Foundation
      7 # by Hubert Feyrer <hubert (at] feyrer.de>.
      8 # 
      9 # Redistribution and use in source and binary forms, with or without
     10 # modification, are permitted provided that the following conditions
     11 # are met:
     12 # 1. Redistributions of source code must retain the above copyright
     13 #    notice, this list of conditions and the following disclaimer.
     14 # 2. Redistributions in binary form must reproduce the above copyright
     15 #    notice, this list of conditions and the following disclaimer in the
     16 #    documentation and/or other materials provided with the distribution.
     17 # 
     18 # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19 # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21 # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28 # POSSIBILITY OF SUCH DAMAGE.
     29 
     30 #
     31 # Extract BSD-mandated copyright messages for NetBSD documentation
     32 #
     33 # Usage:
     34 # 1) find src xsrc -type f -print \
     35 #    | perl extract-contrib-string.pl
     36 #    >x
     37 #
     38 # 2) merge text after "--------" in "x" into
     39 #    src/distrib/notes/common/legal.common
     40 #
     41 # Options:
     42 #
     43 #     perl extract-contrib-string.pl [-d] [-h] [-x] [-?]
     44 #
     45 # where
     46 #     -d  debug output
     47 #     -h  html output
     48 #     -x  xml/docbook output
     49 #     -?  display help/usage message
     50 
     51 
     52 $ack_line1='([aA]ll( commercial)?( marketing or)? advertising materials mentioning( features)?'
     53     .      '|\d\. Redistributions of any form whatsoever)';
     54 $ack_line2='(display the( following)?( acknowledge?ment)?|acknowledge?ment:$)';
     55 $ack_endline=
     56       '(\d\.\s*(Neither the name'
     57     .         '|The name of the company nor the name'	# Wasn't my idea
     58     .         '|The name of the author may not'
     59     .         '|The name of .* must not be used to endorse'
     60     .         '|The names? (of )?.* nor the names? of'
     61     .         '|The names? (of )?.* or any of it\'?s members'
     62     .         '|Redistributions of any form whatsoever'
     63     .         '|The names .*"OpenSSL Toolkit.*" and .*"OpenSSL Project.*" must not be used'
     64     .         "|Urbana-Champaign Independent Media Center's name"
     65     . '))'
     66     .'|(^Neither the name)'
     67     .'|(THIS SOFTWARE IS PROVIDED)'
     68     .'|(ALL WARRANTIES WITH REGARD)'
     69     .'|(The word \'cryptographic\' can be left out if)'
     70     .'|(may be used to endorse)'
     71     .'|(@end cartouche)'
     72     .'|(</para>)'
     73     .'|(Redistribution and use in source and binary forms)'
     74     .'|(may not be used to endorse)'
     75     .'|(\.IP 4)'
     76     .'|(ALLOW FREE USE OF)'
     77     .'|(materials provided with the distribution)'
     78     .'|(@InsertRedistribution@)';
     79 
     80 $known_bad_clause_3_wording=
     81       'usr.bin/lex/.*'				# UCB
     82     .'|dist/bind/contrib/nslint-2.1a3/lbl/.*'	#
     83     .'|usr.sbin/traceroute/ifaddrlist.h'	#
     84     .'|usr.sbin/traceroute/traceroute.c'	#
     85     .'|usr.sbin/hilinfo/hilinfo.c'	   	# CSS @ Utah
     86     ;	
     87 
     88 sub warning {
     89     local($fn,$msg) = @_;
     90     print "XXX $fn line $.: $msg\n"
     91 }
     92 
     93 while ($#ARGV >= 0) {
     94     $debug=1 if ($ARGV[0] =~ /-d/i);
     95     $html=1  if ($ARGV[0] =~ /-h/i);
     96     $xml=1  if ($ARGV[0] =~ /-x/i);
     97     $usage=1  if ($ARGV[0] =~ /-\?/);
     98     shift(@ARGV);
     99 }
    100 
    101 if ($usage) {
    102     print "usage: find /usr/src -type f -print |\n" .
    103 	" perl extract-contrib-string.pl [-h] [-x] [-?] [-d]\n" .
    104 	"   where\n" .
    105 	"    -h   output html\n" .
    106 	"    -x   output xml/docbook\n" .
    107 	"    -d   debug\n" .
    108 	"    -?   display this help message\n";
    109     exit(0);
    110 }
    111 
    112 $comments = !$html && !$xml;
    113 
    114 file:
    115 while(<>) {
    116     chomp();
    117     $fn=$_;
    118     
    119     open(F, "$fn") || die "cannot read $fn: $!\n";
    120 
    121   line:
    122     while(<F>) {
    123 	if (0 and /$ack_line2/in){
    124 	    print "?> $_" if $debug;
    125 	    
    126 	    if ($fn !~ m,$known_bad_clause_3_wording,) {
    127 		warning($fn, "clause 3 start not caught");
    128 	    }
    129 	    last line;
    130 	}
    131 	
    132 	print "0> $_" if $debug;
    133 
    134 	# special case perl script generating a license (openssl's
    135 	# mkerr.pl) - ignore the quoted license, there is another one
    136 	# inside:
    137 	if (/^\"\s\*.*$ack_line1.*\\n\"\,/n) {
    138 		while(!/$ack_endline/in) {
    139 		    print "S> $_" if $debug;
    140 		    $_ = <F>;
    141 		}
    142 	}
    143 
    144 	if (/$ack_line1/in
    145 	    or (/$ack_line2/n and $fn =~ m,$known_bad_clause_3_wording,)) {
    146 	    
    147 	    print "1> $_" if $debug;
    148 
    149 	    $_=<F>
    150 		unless $fn =~ m,$known_bad_clause_3_wording,;
    151 	    if (/$ack_line2/in or $fn =~ m,$known_bad_clause_3_wording,){
    152 		
    153 		print "2> $_" if $debug;
    154 		
    155 		$msg="";
    156 
    157 		if ($fn =~ m,$known_bad_clause_3_wording, and /``/) {
    158 		    $msg = $_;
    159 		}
    160 		elsif (/:\s+This product/) {
    161 		    # src/sys/lib/libkern/rngtest.c - bad clause 3 wording
    162 		    # that is not like others, so special case it here
    163 		    $msg = $_;
    164 		    $msg =~ s/^.*:\s+(This product.*)$/$1/;
    165 		}
    166 
    167 		$cnt=0;
    168 		$_=<F>;
    169 		while(!/$ack_endline/in) {
    170 		    
    171 		    print "C> $_" if $debug;
    172 
    173 		    $msg .= $_;
    174 		    $cnt++;
    175 		    $_ = <F>;
    176 		    if ($cnt > 10) {
    177 			warning($fn,"loooong copyright?");
    178 			last line;
    179 		    }
    180 		}
    181 
    182 		print "E> $_" if $debug;
    183 		
    184 		# post-process
    185 
    186 		if ($fn =~ m,$known_bad_clause_3_wording,) {
    187 			while ($msg !~ /^.*``.*\n/) {
    188 				last if (!$msg);
    189 				$msg =~ s/^.*\n//o;
    190 			}
    191 			$msg =~ s/^.*``//o;
    192 			$msg =~ s/\n.*``//o;
    193 			$msg =~ s/''.*$//o;
    194 		}
    195 
    196 		# XXX: pcap &c - add to known_bad_clause_3_wording but
    197 		# that code seems to have problems.  Easier to add a
    198 		# hack here, shouldn't affect good clause 3.
    199 		$msg =~ s/''\s+Neither the name.*$//;
    200 
    201 		# *roff
    202 		while ($msg =~ /^\.\\"\s*/) {
    203 			$msg =~ s/^\.\\"\s*//o;
    204 		}
    205 		while ($msg =~ /\n\.\\"\s*/) {
    206 			$msg =~ s/\n\.\\"\s*/\n/o;
    207 		}
    208 		$msg =~ s/\n\.\\"\s*$/\n/g;
    209 
    210 		# C++/C99
    211 		while ($msg =~ /^\s*\/\/\s*/) {
    212 			$msg =~ s/^\s*\/\/\s*//o;
    213 		}
    214 		while ($msg =~ /\n\s*\/\/\s*$/) {
    215 			$msg =~ s/\n\s*\/\/\s*$//o;
    216 		}
    217 		$msg =~ s/\n\s*\/\/\s*/\n/g;
    218 
    219 		# C
    220 		while ($msg =~ /^\s*\*\s*/) {
    221 			$msg =~ s/^\s*\*\s*//o;
    222 		}
    223 		while ($msg =~ /\n\s*\*\s*$/) {
    224 			$msg =~ s/\n\s*\*\s*$//o;
    225 		}
    226 		$msg =~ s/\n\s*\*\s*/\n/g;
    227 
    228 		# texinfo @c
    229 		while ($msg =~ /^\s*\@c\s+/) {
    230 			$msg =~ s/^\s*\@c\s+//o;
    231 		}
    232 		while ($msg =~ /\n\s*\@c\s+$/) {
    233 			$msg =~ s/\n\s*\@c\s+$//o;
    234 		}
    235 		$msg =~ s/\n\s*\@c\s+/\n/g;
    236 
    237 		$msg =~ s/^REM\s*//g;			# BASIC?!?
    238 		$msg =~ s/\nREM\s*/\n/g;		# BASIC?!?
    239 		$msg =~ s/^dnl\s*//g;			# m4
    240 		$msg =~ s/\ndnl\s*/\n/g;		# m4
    241 		$msg =~ s/^\s+-\s+//g;			# seen in docbook files
    242 		$msg =~ s/\n\s+-\s+/ /g;		#
    243 		$msg =~ s/^[#\\\|";]+\s*//g;		# sh etc.
    244 		$msg =~ s/\n[#\\\|";]+\s*/\n/g;		# sh etc.
    245 		$msg =~ s/^[ 	*]*//g;      		# C
    246 		$msg =~ s/\n[ 	*]*/\n/g;    		# C
    247 
    248 		$msg =~ s/\@cartouche\n//;              # texinfo
    249 
    250 		$msg =~ s/
//g;
    252 		$msg =~ s/\s*\n/\n/g;
    253 		$msg =~ s/^\s*//;
    254 		$msg =~ s/\\\@/\@/g;
    255 		$msg =~ s/\n\n/\n/g;
    256 	        $msg =~ s/^\s*``//;
    257 	        $msg =~ s/''\s*$//;
    258 		$msg =~ s/^\"//o;
    259 		$msg =~ s/\"$//o;
    260 		$msg =~ s/\"\.$/./o;
    261 
    262 		# Fix ISO-646-SE spelling of Lule\(oa
    263 		$msg =~ s/Lule\}/Lule\\(oa/g;
    264 
    265 		# Collapse multiple spaces between words.  There are a
    266 		# few entries with "by__Name" that affects sorting.
    267 		$msg =~ s/(\w)  +(\w)/$1 $2/g;
    268 
    269 		# Split up into separate paragraphs
    270 		#
    271 		$msgs=$msg;
    272 		$msgs=~s/(This (software|product))/|$1/g;
    273 		$msgs=~s,^\|,,;
    274 	      msg:
    275 		foreach $msg (split(/\|/, $msgs)) {
    276 		    while ($msg =~ /[\n\s]+$/) {
    277 			$msg =~ s/[\n\s]+$//o;
    278 		    }
    279 		    next if ($msg eq "");
    280 		    if ($comments) {
    281 			print ".\\\" File $fn:\n";
    282 			print "$msg";
    283 			print "\n\n";
    284 		    }
    285 
    286 		    my $key = lc($msg);	# ignore difference in case
    287 		    $key =~ s/\n/ /g;	# ignore difference in line breaks
    288 		    $key =~ s/\.$//g;	# drop the final dot
    289 
    290 		    # push organizations ("by the") to the end of the
    291 		    # sorting order
    292 		    $key =~ s/(developed by) the/$1 ~the/;
    293 
    294 		    if (defined $copyrights{$key}) {
    295 			if ($copyrights{$key} !~ /\.$/ && $msg =~ /\.$/) {
    296 			    print "already there, without dot - overriding!\n"
    297 				if 1 || $debug;
    298 			}
    299 			else {
    300 			    next msg;
    301 			}
    302 		    }
    303 
    304 		    $copyrights{$key} = $msg;
    305 		}
    306 
    307 	    } else {
    308 		print "?> $_" if $debug;
    309 
    310                 if ($fn !~ m,$known_bad_clause_3_wording,) {
    311 		    warning($fn, "bad clause 3?");
    312                 }
    313 		last line;
    314 	    }
    315 	}
    316     }
    317     close(F);
    318 }
    319 
    320 
    321 if ($html) {
    322     print "<ul>\n";
    323     foreach $key (sort keys %copyrights) {
    324 	my $msg = $copyrights{$key};
    325 	print "<li>$msg</li>\n";
    326     }
    327     print "</ul>\n";
    328 } elsif ($xml) {
    329     foreach $key (sort keys %copyrights) {
    330 	my $msg = $copyrights{$key};
    331 	print "<listitem>$msg</listitem>\n";
    332     }
    333 } else {
    334     print "------------------------------------------------------------\n";
    335 
    336     $firsttime=1;
    337     foreach $key (sort keys %copyrights) {
    338 	my $msg = $copyrights{$key};
    339 	if ($firsttime) {
    340 	    $firsttime=0;
    341 	} else {
    342 	    print ".It\n";
    343 	}
    344 	print "$msg\n";
    345     }
    346 }
    347