Home | History | Annotate | Line # | Download | only in common
extract-contrib-string.pl revision 1.14.62.1
      1        1.2    lukem #!/usr/bin/env perl
      2        1.1  hubertf #
      3        1.5   martin # Copyright (c) 2004, 2008 The NetBSD Foundation, Inc.
      4        1.1  hubertf # All rights reserved.
      5        1.5   martin #
      6        1.5   martin # This code is derived from software contributed to The NetBSD Foundation
      7        1.5   martin # by Hubert Feyrer <hubert (at] feyrer.de>.
      8        1.1  hubertf # 
      9        1.1  hubertf # Redistribution and use in source and binary forms, with or without
     10        1.1  hubertf # modification, are permitted provided that the following conditions
     11        1.1  hubertf # are met:
     12        1.1  hubertf # 1. Redistributions of source code must retain the above copyright
     13        1.1  hubertf #    notice, this list of conditions and the following disclaimer.
     14        1.1  hubertf # 2. Redistributions in binary form must reproduce the above copyright
     15        1.1  hubertf #    notice, this list of conditions and the following disclaimer in the
     16        1.1  hubertf #    documentation and/or other materials provided with the distribution.
     17        1.1  hubertf # 
     18        1.5   martin # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19        1.5   martin # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20        1.5   martin # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21        1.5   martin # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22        1.5   martin # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23        1.5   martin # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24        1.5   martin # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25        1.5   martin # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26        1.5   martin # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27        1.5   martin # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28        1.5   martin # POSSIBILITY OF SUCH DAMAGE.
     29        1.1  hubertf 
     30        1.1  hubertf #
     31        1.1  hubertf # Extract BSD-mandated copyright messages for NetBSD documentation
     32        1.1  hubertf #
     33        1.1  hubertf # Usage:
     34  1.14.62.1   martin # 1) find src xsrc -type f -print \
     35        1.1  hubertf #    | perl extract-contrib-string.pl
     36        1.1  hubertf #    >x
     37        1.6   martin #
     38        1.1  hubertf # 2) merge text after "--------" in "x" into
     39        1.1  hubertf #    src/distrib/notes/common/legal.common
     40        1.1  hubertf #
     41        1.6   martin # Options:
     42        1.6   martin #
     43        1.9   martin #     perl extract-contrib-string.pl [-d] [-h] [-x] [-?]
     44        1.6   martin #
     45        1.6   martin # where
     46        1.6   martin #     -d  debug output
     47        1.6   martin #     -h  html output
     48        1.9   martin #     -x  xml/docbook output
     49        1.9   martin #     -?  display help/usage message
     50        1.1  hubertf 
     51        1.1  hubertf 
     52  1.14.62.1   martin $ack_line1='([aA]ll( commercial)?( marketing or)? advertising materials mentioning( features)?'
     53  1.14.62.1   martin     .      '|\d\. Redistributions of any form whatsoever)';
     54  1.14.62.1   martin $ack_line2='(display the( following)?( acknowledge?ment)?|acknowledge?ment:$)';
     55        1.1  hubertf $ack_endline=
     56        1.1  hubertf       '(\d\.\s*(Neither the name'
     57        1.1  hubertf     .         '|The name of the company nor the name'	# Wasn't my idea
     58        1.1  hubertf     .         '|The name of the author may not'
     59        1.1  hubertf     .         '|The name of .* must not be used to endorse'
     60        1.1  hubertf     .         '|The names? (of )?.* nor the names? of'
     61        1.1  hubertf     .         '|The names? (of )?.* or any of it\'?s members'
     62        1.1  hubertf     .         '|Redistributions of any form whatsoever'
     63  1.14.62.1   martin     .         '|The names .*"OpenSSL Toolkit.*" and .*"OpenSSL Project.*" must not be used'
     64  1.14.62.1   martin     .         "|Urbana-Champaign Independent Media Center's name"
     65  1.14.62.1   martin     . '))'
     66  1.14.62.1   martin     .'|(^Neither the name)'
     67        1.1  hubertf     .'|(THIS SOFTWARE IS PROVIDED)'
     68  1.14.62.1   martin     .'|(ALL WARRANTIES WITH REGARD)'
     69        1.1  hubertf     .'|(The word \'cryptographic\' can be left out if)'
     70        1.1  hubertf     .'|(may be used to endorse)'
     71        1.1  hubertf     .'|(@end cartouche)'
     72  1.14.62.1   martin     .'|(</para>)'
     73        1.1  hubertf     .'|(Redistribution and use in source and binary forms)'
     74        1.1  hubertf     .'|(may not be used to endorse)'
     75        1.1  hubertf     .'|(\.IP 4)'
     76        1.1  hubertf     .'|(ALLOW FREE USE OF)'
     77        1.1  hubertf     .'|(materials provided with the distribution)'
     78        1.1  hubertf     .'|(@InsertRedistribution@)';
     79        1.1  hubertf 
     80        1.1  hubertf $known_bad_clause_3_wording=
     81        1.4   martin       'usr.bin/lex/.*'				# UCB
     82       1.11   martin     .'|dist/bind/contrib/nslint-2.1a3/lbl/.*'	#
     83       1.13   martin     .'|usr.sbin/traceroute/ifaddrlist.h'	#
     84       1.13   martin     .'|usr.sbin/traceroute/traceroute.c'	#
     85        1.1  hubertf     .'|usr.sbin/hilinfo/hilinfo.c'	   	# CSS @ Utah
     86        1.1  hubertf     ;	
     87        1.1  hubertf 
     88        1.1  hubertf sub warning {
     89        1.1  hubertf     local($fn,$msg) = @_;
     90        1.1  hubertf     print "XXX $fn line $.: $msg\n"
     91        1.1  hubertf }
     92        1.1  hubertf 
     93        1.7   martin while ($#ARGV >= 0) {
     94        1.7   martin     $debug=1 if ($ARGV[0] =~ /-d/i);
     95        1.7   martin     $html=1  if ($ARGV[0] =~ /-h/i);
     96        1.7   martin     $xml=1  if ($ARGV[0] =~ /-x/i);
     97        1.9   martin     $usage=1  if ($ARGV[0] =~ /-\?/);
     98        1.6   martin     shift(@ARGV);
     99        1.6   martin }
    100        1.1  hubertf 
    101        1.9   martin if ($usage) {
    102        1.9   martin     print "usage: find /usr/src -type f -print |\n" .
    103        1.9   martin 	" perl extract-contrib-string.pl [-h] [-x] [-?] [-d]\n" .
    104        1.9   martin 	"   where\n" .
    105        1.9   martin 	"    -h   output html\n" .
    106        1.9   martin 	"    -x   output xml/docbook\n" .
    107        1.9   martin 	"    -d   debug\n" .
    108        1.9   martin 	"    -?   display this help message\n";
    109        1.9   martin     exit(0);
    110        1.9   martin }
    111        1.9   martin 
    112        1.7   martin $comments = !$html && !$xml;
    113        1.1  hubertf 
    114        1.1  hubertf file:
    115        1.1  hubertf while(<>) {
    116        1.1  hubertf     chomp();
    117        1.1  hubertf     $fn=$_;
    118        1.1  hubertf     
    119        1.1  hubertf     open(F, "$fn") || die "cannot read $fn: $!\n";
    120        1.1  hubertf 
    121        1.1  hubertf   line:
    122        1.1  hubertf     while(<F>) {
    123  1.14.62.1   martin 	if (0 and /$ack_line2/in){
    124        1.1  hubertf 	    print "?> $_" if $debug;
    125        1.1  hubertf 	    
    126        1.1  hubertf 	    if ($fn !~ m,$known_bad_clause_3_wording,) {
    127        1.1  hubertf 		warning($fn, "clause 3 start not caught");
    128        1.1  hubertf 	    }
    129        1.1  hubertf 	    last line;
    130        1.1  hubertf 	}
    131        1.1  hubertf 	
    132        1.1  hubertf 	print "0> $_" if $debug;
    133        1.1  hubertf 
    134       1.10   martin 	# special case perl script generating a license (openssl's
    135       1.10   martin 	# mkerr.pl) - ignore the quoted license, there is another one
    136       1.10   martin 	# inside:
    137  1.14.62.1   martin 	if (/^\"\s\*.*$ack_line1.*\\n\"\,/n) {
    138  1.14.62.1   martin 		while(!/$ack_endline/in) {
    139       1.10   martin 		    print "S> $_" if $debug;
    140       1.10   martin 		    $_ = <F>;
    141       1.10   martin 		}
    142       1.10   martin 	}
    143       1.10   martin 
    144  1.14.62.1   martin 	if (/$ack_line1/in
    145  1.14.62.1   martin 	    or (/$ack_line2/n and $fn =~ m,$known_bad_clause_3_wording,)) {
    146        1.1  hubertf 	    
    147        1.1  hubertf 	    print "1> $_" if $debug;
    148        1.1  hubertf 
    149        1.1  hubertf 	    $_=<F>
    150        1.1  hubertf 		unless $fn =~ m,$known_bad_clause_3_wording,;
    151  1.14.62.1   martin 	    if (/$ack_line2/in or $fn =~ m,$known_bad_clause_3_wording,){
    152        1.1  hubertf 		
    153        1.1  hubertf 		print "2> $_" if $debug;
    154        1.1  hubertf 		
    155        1.1  hubertf 		$msg="";
    156  1.14.62.1   martin 
    157  1.14.62.1   martin 		if ($fn =~ m,$known_bad_clause_3_wording, and /``/) {
    158  1.14.62.1   martin 		    $msg = $_;
    159  1.14.62.1   martin 		}
    160  1.14.62.1   martin 		elsif (/:\s+This product/) {
    161  1.14.62.1   martin 		    # src/sys/lib/libkern/rngtest.c - bad clause 3 wording
    162  1.14.62.1   martin 		    # that is not like others, so special case it here
    163  1.14.62.1   martin 		    $msg = $_;
    164  1.14.62.1   martin 		    $msg =~ s/^.*:\s+(This product.*)$/$1/;
    165  1.14.62.1   martin 		}
    166  1.14.62.1   martin 
    167        1.1  hubertf 		$cnt=0;
    168        1.1  hubertf 		$_=<F>;
    169  1.14.62.1   martin 		while(!/$ack_endline/in) {
    170        1.1  hubertf 		    
    171        1.1  hubertf 		    print "C> $_" if $debug;
    172        1.1  hubertf 
    173        1.1  hubertf 		    $msg .= $_;
    174        1.1  hubertf 		    $cnt++;
    175        1.1  hubertf 		    $_ = <F>;
    176        1.1  hubertf 		    if ($cnt > 10) {
    177        1.1  hubertf 			warning($fn,"loooong copyright?");
    178        1.1  hubertf 			last line;
    179        1.1  hubertf 		    }
    180        1.1  hubertf 		}
    181        1.1  hubertf 
    182        1.1  hubertf 		print "E> $_" if $debug;
    183        1.1  hubertf 		
    184        1.1  hubertf 		# post-process
    185        1.6   martin 
    186       1.11   martin 		if ($fn =~ m,$known_bad_clause_3_wording,) {
    187       1.12   martin 			while ($msg !~ /^.*``.*\n/) {
    188       1.12   martin 				last if (!$msg);
    189       1.11   martin 				$msg =~ s/^.*\n//o;
    190       1.11   martin 			}
    191       1.12   martin 			$msg =~ s/^.*``//o;
    192       1.11   martin 			$msg =~ s/\n.*``//o;
    193       1.11   martin 			$msg =~ s/''.*$//o;
    194       1.11   martin 		}
    195       1.11   martin 
    196  1.14.62.1   martin 		# XXX: pcap &c - add to known_bad_clause_3_wording but
    197  1.14.62.1   martin 		# that code seems to have problems.  Easier to add a
    198  1.14.62.1   martin 		# hack here, shouldn't affect good clause 3.
    199  1.14.62.1   martin 		$msg =~ s/''\s+Neither the name.*$//;
    200  1.14.62.1   martin 
    201        1.9   martin 		# *roff
    202        1.9   martin 		while ($msg =~ /^\.\\"\s*/) {
    203        1.9   martin 			$msg =~ s/^\.\\"\s*//o;
    204        1.9   martin 		}
    205        1.9   martin 		while ($msg =~ /\n\.\\"\s*/) {
    206        1.9   martin 			$msg =~ s/\n\.\\"\s*/\n/o;
    207        1.9   martin 		}
    208        1.9   martin 		$msg =~ s/\n\.\\"\s*$/\n/g;
    209        1.9   martin 
    210        1.6   martin 		# C++/C99
    211        1.6   martin 		while ($msg =~ /^\s*\/\/\s*/) {
    212        1.6   martin 			$msg =~ s/^\s*\/\/\s*//o;
    213        1.6   martin 		}
    214        1.9   martin 		while ($msg =~ /\n\s*\/\/\s*$/) {
    215        1.9   martin 			$msg =~ s/\n\s*\/\/\s*$//o;
    216        1.9   martin 		}
    217        1.9   martin 		$msg =~ s/\n\s*\/\/\s*/\n/g;
    218        1.9   martin 
    219        1.9   martin 		# C
    220        1.9   martin 		while ($msg =~ /^\s*\*\s*/) {
    221        1.9   martin 			$msg =~ s/^\s*\*\s*//o;
    222        1.9   martin 		}
    223        1.9   martin 		while ($msg =~ /\n\s*\*\s*$/) {
    224        1.9   martin 			$msg =~ s/\n\s*\*\s*$//o;
    225        1.6   martin 		}
    226        1.9   martin 		$msg =~ s/\n\s*\*\s*/\n/g;
    227        1.9   martin 
    228        1.9   martin 		# texinfo @c
    229        1.9   martin 		while ($msg =~ /^\s*\@c\s+/) {
    230        1.9   martin 			$msg =~ s/^\s*\@c\s+//o;
    231        1.9   martin 		}
    232        1.9   martin 		while ($msg =~ /\n\s*\@c\s+$/) {
    233        1.9   martin 			$msg =~ s/\n\s*\@c\s+$//o;
    234        1.9   martin 		}
    235        1.9   martin 		$msg =~ s/\n\s*\@c\s+/\n/g;
    236        1.6   martin 
    237       1.10   martin 		$msg =~ s/^REM\s*//g;			# BASIC?!?
    238       1.10   martin 		$msg =~ s/\nREM\s*/\n/g;		# BASIC?!?
    239       1.10   martin 		$msg =~ s/^dnl\s*//g;			# m4
    240  1.14.62.1   martin 		$msg =~ s/\ndnl\s*/\n/g;		# m4
    241       1.10   martin 		$msg =~ s/^\s+-\s+//g;			# seen in docbook files
    242       1.10   martin 		$msg =~ s/\n\s+-\s+/ /g;		#
    243       1.14   martin 		$msg =~ s/^[#\\\|";]+\s*//g;		# sh etc.
    244       1.14   martin 		$msg =~ s/\n[#\\\|";]+\s*/\n/g;		# sh etc.
    245       1.10   martin 		$msg =~ s/^[ 	*]*//g;      		# C
    246       1.10   martin 		$msg =~ s/\n[ 	*]*/\n/g;    		# C
    247       1.10   martin 
    248        1.1  hubertf 		$msg =~ s/\@cartouche\n//;              # texinfo
    249        1.1  hubertf 
    250        1.1  hubertf 		$msg =~ s/
//g;
    252        1.1  hubertf 		$msg =~ s/\s*\n/\n/g;
    253        1.1  hubertf 		$msg =~ s/^\s*//;
    254        1.1  hubertf 		$msg =~ s/\\\@/\@/g;
    255        1.1  hubertf 		$msg =~ s/\n\n/\n/g;
    256        1.1  hubertf 	        $msg =~ s/^\s*``//;
    257       1.10   martin 	        $msg =~ s/''\s*$//;
    258       1.10   martin 		$msg =~ s/^\"//o;
    259  1.14.62.1   martin 		$msg =~ s/\"$//o;
    260  1.14.62.1   martin 		$msg =~ s/\"\.$/./o;
    261  1.14.62.1   martin 
    262  1.14.62.1   martin 		# Fix ISO-646-SE spelling of Lule\(oa
    263  1.14.62.1   martin 		$msg =~ s/Lule\}/Lule\\(oa/g;
    264  1.14.62.1   martin 
    265  1.14.62.1   martin 		# Collapse multiple spaces between words.  There are a
    266  1.14.62.1   martin 		# few entries with "by__Name" that affects sorting.
    267        1.1  hubertf 		$msg =~ s/(\w)  +(\w)/$1 $2/g;
    268        1.3      wiz 
    269        1.1  hubertf 		# Split up into separate paragraphs
    270        1.1  hubertf 		#
    271        1.1  hubertf 		$msgs=$msg;
    272        1.1  hubertf 		$msgs=~s/(This (software|product))/|$1/g;
    273        1.1  hubertf 		$msgs=~s,^\|,,;
    274        1.1  hubertf 	      msg:
    275        1.9   martin 		foreach $msg (split(/\|/, $msgs)) {
    276        1.9   martin 		    while ($msg =~ /[\n\s]+$/) {
    277        1.9   martin 			$msg =~ s/[\n\s]+$//o;
    278        1.9   martin 		    }
    279        1.7   martin 		    next if ($msg eq "");
    280        1.6   martin 		    if ($comments) {
    281        1.6   martin 			print ".\\\" File $fn:\n";
    282        1.6   martin 			print "$msg";
    283        1.6   martin 			print "\n\n";
    284  1.14.62.1   martin 		    }
    285  1.14.62.1   martin 
    286  1.14.62.1   martin 		    my $key = lc($msg);	# ignore difference in case
    287  1.14.62.1   martin 		    $key =~ s/\n/ /g;	# ignore difference in line breaks
    288  1.14.62.1   martin 		    $key =~ s/\.$//g;	# drop the final dot
    289  1.14.62.1   martin 
    290  1.14.62.1   martin 		    # push organizations ("by the") to the end of the
    291  1.14.62.1   martin 		    # sorting order
    292  1.14.62.1   martin 		    $key =~ s/(developed by) the/$1 ~the/;
    293  1.14.62.1   martin 
    294  1.14.62.1   martin 		    if (defined $copyrights{$key}) {
    295  1.14.62.1   martin 			if ($copyrights{$key} !~ /\.$/ && $msg =~ /\.$/) {
    296  1.14.62.1   martin 			    print "already there, without dot - overriding!\n"
    297        1.1  hubertf 				if 1 || $debug;
    298  1.14.62.1   martin 			}
    299        1.1  hubertf 			else {
    300        1.1  hubertf 			    next msg;
    301        1.1  hubertf 			}
    302        1.1  hubertf 		    }
    303  1.14.62.1   martin 
    304  1.14.62.1   martin 		    $copyrights{$key} = $msg;
    305        1.1  hubertf 		}
    306        1.1  hubertf 
    307        1.1  hubertf 	    } else {
    308        1.1  hubertf 		print "?> $_" if $debug;
    309        1.1  hubertf 
    310        1.1  hubertf                 if ($fn !~ m,$known_bad_clause_3_wording,) {
    311        1.1  hubertf 		    warning($fn, "bad clause 3?");
    312        1.1  hubertf                 }
    313        1.1  hubertf 		last line;
    314        1.1  hubertf 	    }
    315        1.1  hubertf 	}
    316        1.1  hubertf     }
    317        1.1  hubertf     close(F);
    318        1.1  hubertf }
    319        1.1  hubertf 
    320        1.6   martin 
    321        1.6   martin if ($html) {
    322  1.14.62.1   martin     print "<ul>\n";
    323  1.14.62.1   martin     foreach $key (sort keys %copyrights) {
    324        1.6   martin 	my $msg = $copyrights{$key};
    325        1.6   martin 	print "<li>$msg</li>\n";
    326        1.6   martin     }
    327        1.7   martin     print "</ul>\n";
    328  1.14.62.1   martin } elsif ($xml) {
    329  1.14.62.1   martin     foreach $key (sort keys %copyrights) {
    330        1.7   martin 	my $msg = $copyrights{$key};
    331        1.7   martin 	print "<listitem>$msg</listitem>\n";
    332        1.6   martin     }
    333        1.6   martin } else {
    334        1.6   martin     print "------------------------------------------------------------\n";
    335        1.6   martin 
    336  1.14.62.1   martin     $firsttime=1;
    337  1.14.62.1   martin     foreach $key (sort keys %copyrights) {
    338        1.6   martin 	my $msg = $copyrights{$key};
    339        1.6   martin 	if ($firsttime) {
    340        1.6   martin 	    $firsttime=0;
    341        1.6   martin 	} else {
    342        1.6   martin 	    print ".It\n";
    343        1.6   martin 	}
    344        1.1  hubertf 	print "$msg\n";
    345        1.1  hubertf     }
    346                     }
    347