Home | History | Annotate | Line # | Download | only in common
extract-contrib-string.pl revision 1.8
      1  1.2    lukem #!/usr/bin/env perl
      2  1.1  hubertf #
      3  1.5   martin # Copyright (c) 2004, 2008 The NetBSD Foundation, Inc.
      4  1.1  hubertf # All rights reserved.
      5  1.5   martin #
      6  1.5   martin # This code is derived from software contributed to The NetBSD Foundation
      7  1.5   martin # by Hubert Feyrer <hubert (at] feyrer.de>.
      8  1.1  hubertf # 
      9  1.1  hubertf # Redistribution and use in source and binary forms, with or without
     10  1.1  hubertf # modification, are permitted provided that the following conditions
     11  1.1  hubertf # are met:
     12  1.1  hubertf # 1. Redistributions of source code must retain the above copyright
     13  1.1  hubertf #    notice, this list of conditions and the following disclaimer.
     14  1.1  hubertf # 2. Redistributions in binary form must reproduce the above copyright
     15  1.1  hubertf #    notice, this list of conditions and the following disclaimer in the
     16  1.1  hubertf #    documentation and/or other materials provided with the distribution.
     17  1.1  hubertf # 
     18  1.5   martin # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19  1.5   martin # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20  1.5   martin # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21  1.5   martin # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22  1.5   martin # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  1.5   martin # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  1.5   martin # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  1.5   martin # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  1.5   martin # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  1.5   martin # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  1.5   martin # POSSIBILITY OF SUCH DAMAGE.
     29  1.1  hubertf 
     30  1.1  hubertf #
     31  1.1  hubertf # Extract BSD-mandated copyright messages for NetBSD documentation
     32  1.1  hubertf #
     33  1.1  hubertf # Usage:
     34  1.1  hubertf # 1) find /usr/src -type f -print \
     35  1.1  hubertf #    | perl extract-contrib-string.pl
     36  1.1  hubertf #    >x
     37  1.6   martin #
     38  1.1  hubertf # 2) merge text after "--------" in "x" into
     39  1.1  hubertf #    src/distrib/notes/common/legal.common
     40  1.1  hubertf #
     41  1.6   martin # Options:
     42  1.6   martin #
     43  1.6   martin #     perl extract-contrib-string.pl [-d] [-h]
     44  1.6   martin #
     45  1.6   martin # where
     46  1.6   martin #     -d  debug output
     47  1.6   martin #     -h  html output
     48  1.1  hubertf 
     49  1.1  hubertf 
     50  1.1  hubertf $ack_line1="[aA]ll( commercial)?( marketing or)? advertising materials mentioning( features)?";
     51  1.1  hubertf $ack_line2="display the following( acknowledge?ment)?";
     52  1.1  hubertf $ack_endline=
     53  1.1  hubertf       '(\d\.\s*(Neither the name'
     54  1.1  hubertf     .         '|The name of the company nor the name'	# Wasn't my idea
     55  1.1  hubertf     .         '|The name of the author may not'
     56  1.1  hubertf     .         '|The name of .* must not be used to endorse'
     57  1.1  hubertf     .         '|The names? (of )?.* nor the names? of'
     58  1.1  hubertf     .         '|The names? (of )?.* or any of it\'?s members'
     59  1.1  hubertf     .         '|Redistributions of any form whatsoever'
     60  1.1  hubertf     .         '|The names .*"OpenSSL Toolkit.*" and .*"OpenSSL Project.*" must not be used))'
     61  1.1  hubertf     .'|(THIS SOFTWARE IS PROVIDED)'
     62  1.1  hubertf     .'|(The word \'cryptographic\' can be left out if)'
     63  1.1  hubertf     .'|(may be used to endorse)'
     64  1.1  hubertf     .'|(@end cartouche)'
     65  1.1  hubertf     .'|(Redistribution and use in source and binary forms)'
     66  1.1  hubertf     .'|(may not be used to endorse)'
     67  1.1  hubertf     .'|(\.IP 4)'
     68  1.1  hubertf     .'|(ALLOW FREE USE OF)'
     69  1.1  hubertf     .'|(materials provided with the distribution)'
     70  1.1  hubertf     .'|(@InsertRedistribution@)';
     71  1.1  hubertf 
     72  1.1  hubertf $known_bad_clause_3_wording=
     73  1.4   martin       'usr.bin/lex/.*'				# UCB
     74  1.1  hubertf     .'|usr.sbin/hilinfo/hilinfo.c'	   	# CSS @ Utah
     75  1.1  hubertf     ;	
     76  1.1  hubertf 
     77  1.1  hubertf sub warning {
     78  1.1  hubertf     local($fn,$msg) = @_;
     79  1.1  hubertf     print "XXX $fn line $.: $msg\n"
     80  1.1  hubertf }
     81  1.1  hubertf 
     82  1.7   martin while ($#ARGV >= 0) {
     83  1.7   martin     $debug=1 if ($ARGV[0] =~ /-d/i);
     84  1.7   martin     $html=1  if ($ARGV[0] =~ /-h/i);
     85  1.7   martin     $xml=1  if ($ARGV[0] =~ /-x/i);
     86  1.6   martin     shift(@ARGV);
     87  1.6   martin }
     88  1.1  hubertf 
     89  1.7   martin $comments = !$html && !$xml;
     90  1.1  hubertf 
     91  1.1  hubertf file:
     92  1.1  hubertf while(<>) {
     93  1.1  hubertf     chomp();
     94  1.1  hubertf     $fn=$_;
     95  1.1  hubertf     
     96  1.1  hubertf     open(F, "$fn") || die "cannot read $fn: $!\n";
     97  1.1  hubertf 
     98  1.1  hubertf   line:
     99  1.1  hubertf     while(<F>) {
    100  1.1  hubertf 	if (0 and /$ack_line2/i){
    101  1.1  hubertf 	    print "?> $_" if $debug;
    102  1.1  hubertf 	    
    103  1.1  hubertf 	    if ($fn !~ m,$known_bad_clause_3_wording,) {
    104  1.1  hubertf 		warning($fn, "clause 3 start not caught");
    105  1.1  hubertf 	    }
    106  1.1  hubertf 	    last line;
    107  1.1  hubertf 	}
    108  1.1  hubertf 	
    109  1.1  hubertf 	print "0> $_" if $debug;
    110  1.1  hubertf 
    111  1.1  hubertf 	if (/$ack_line1/i
    112  1.1  hubertf 	    or (/$ack_line2/ and $fn =~ m,$known_bad_clause_3_wording,)) {
    113  1.1  hubertf 	    
    114  1.1  hubertf 	    print "1> $_" if $debug;
    115  1.1  hubertf 
    116  1.1  hubertf 	    $_=<F>
    117  1.1  hubertf 		unless $fn =~ m,$known_bad_clause_3_wording,;
    118  1.1  hubertf 	    if (/$ack_line2/i or $fn =~ m,$known_bad_clause_3_wording,){
    119  1.1  hubertf 		
    120  1.1  hubertf 		print "2> $_" if $debug;
    121  1.1  hubertf 		
    122  1.1  hubertf 		$msg="";
    123  1.1  hubertf 		$cnt=0;
    124  1.1  hubertf 		$_=<F>;
    125  1.1  hubertf 		while(!/$ack_endline/i) {
    126  1.1  hubertf 		    
    127  1.1  hubertf 		    print "C> $_" if $debug;
    128  1.1  hubertf 
    129  1.1  hubertf 		    $msg .= $_;
    130  1.1  hubertf 		    $cnt++;
    131  1.1  hubertf 		    $_ = <F>;
    132  1.1  hubertf 		    if ($cnt > 10) {
    133  1.1  hubertf 			warning($fn,"loooong copyright?");
    134  1.1  hubertf 			last line;
    135  1.1  hubertf 		    }
    136  1.1  hubertf 		}
    137  1.1  hubertf 
    138  1.1  hubertf 		print "E> $_" if $debug;
    139  1.1  hubertf 		
    140  1.1  hubertf 		# post-process
    141  1.1  hubertf 		$msg =~ s/^\@c\s*//g;			# texinfo
    142  1.1  hubertf 		$msg =~ s/\n\@c\s*/\n/g;		# texinfo
    143  1.1  hubertf 		$msg =~ s/^REM\s*//g;			# BASIC?!?
    144  1.1  hubertf 		$msg =~ s/\nREM\s*/\n/g;		# BASIC?!?
    145  1.1  hubertf 		$msg =~ s/^dnl\s*//g;			# m4
    146  1.1  hubertf 		$msg =~ s/\dnl\s*/\n/g;			# m4
    147  1.1  hubertf 		$msg =~ s/^\.\\"\s*//g;			# *roff
    148  1.1  hubertf 		$msg =~ s/\n\.\\"\s*/\n/g;		# *roff
    149  1.1  hubertf 		$msg =~ s/^[#\\\|";]*\s*//g;		# sh etc.
    150  1.1  hubertf 		$msg =~ s/\n[#\\\|";]\s*/\n/g;		# sh etc.
    151  1.1  hubertf 		$msg =~ s/^[ 	*]*//g;      		# C
    152  1.1  hubertf 		$msg =~ s/\n[ 	*]*/\n/g;    		# C
    153  1.6   martin 
    154  1.6   martin 		# C++/C99
    155  1.6   martin 		while ($msg =~ /^\s*\/\/\s*/) {
    156  1.6   martin 			$msg =~ s/^\s*\/\/\s*//o;
    157  1.6   martin 		}
    158  1.6   martin 		while ($msg =~ /\ns*\/\/\s*$/) {
    159  1.6   martin 			$msg =~ s/\ns*\/\/\s*$//o;
    160  1.6   martin 		}
    161  1.6   martin 		$msg =~ s/\ns*\/\/\s*/ /g;
    162  1.6   martin 
    163  1.1  hubertf 		$msg =~ s/\@cartouche\n//;              # texinfo
    164  1.1  hubertf 
    165  1.1  hubertf 		$msg =~ s/
//g;
    167  1.1  hubertf 		$msg =~ s/\s*\n/\n/g;
    168  1.1  hubertf 		$msg =~ s/^\s*//;
    169  1.1  hubertf 		$msg =~ s/\\\@/\@/g;
    170  1.1  hubertf 		$msg =~ s/\n\n/\n/g;
    171  1.1  hubertf 	        $msg =~ s/^\s*"//;
    172  1.1  hubertf 	        $msg =~ s/"\s*$//;
    173  1.1  hubertf 	        $msg =~ s/^\s*``//;
    174  1.6   martin 	        $msg =~ s/''\s*$//;
    175  1.6   martin 		while ($msg =~ /[\n\s]+$/) {
    176  1.6   martin 			$msg =~ s/[\n\s]+$//o;
    177  1.1  hubertf 		}
    178  1.3      wiz 
    179  1.1  hubertf 		# Split up into separate paragraphs
    180  1.1  hubertf 		#
    181  1.1  hubertf 		$msgs=$msg;
    182  1.1  hubertf 		$msgs=~s/(This (software|product))/|$1/g;
    183  1.1  hubertf 		$msgs=~s,^\|,,;
    184  1.1  hubertf 	      msg:
    185  1.7   martin 		foreach $msg (split(/\|/, $msgs)) {
    186  1.6   martin 		    if ($comments) {
    187  1.6   martin 			print ".\\\" File $fn:\n";
    188  1.6   martin 			print "$msg";
    189  1.6   martin 			print "\n\n";
    190  1.1  hubertf 		    }
    191  1.1  hubertf 		    
    192  1.1  hubertf 		    # Figure out if there's a version w/ or w/o trailing dot
    193  1.8   martin 		    # 
    194  1.1  hubertf 		    if ($msg =~ /\.$/) {
    195  1.8   martin 			# check if there's a version of the same msg
    196  1.1  hubertf 			# w/o a trailing dot
    197  1.8   martin 			$msg2=$msg;
    198  1.1  hubertf 			$msg2=~s,\.$,,;
    199  1.1  hubertf 			if ($copyrights{"$msg2"}) {
    200  1.1  hubertf 			    # already there - skip
    201  1.1  hubertf 			    print "already there, w/o dot - skipping!\n"
    202  1.1  hubertf 				if $debug;
    203  1.1  hubertf 			    next msg;
    204  1.1  hubertf 			}
    205  1.1  hubertf 			
    206  1.1  hubertf 			# ... maybe with other case?
    207  1.1  hubertf 			$lc_msg2=lc($msg2);
    208  1.2    lukem 			if ($lc_copyrights{$lc_msg2}) {
    209  1.1  hubertf 			    print "already there, in different case - skipping\n"
    210  1.1  hubertf 				if $debug;
    211  1.1  hubertf 			    next msg;
    212  1.1  hubertf 			}
    213  1.1  hubertf 		    } else {
    214  1.8   martin 			# check if there's a version of the same msg
    215  1.1  hubertf 			# with a trailing dot
    216  1.8   martin 			$msg2=$msg;
    217  1.1  hubertf 			$msg2.=".";
    218  1.1  hubertf 			if ($copyrights{"$msg2"}) {
    219  1.1  hubertf 			    # already there - skip
    220  1.1  hubertf 			    print "already there, w/ dot - skipping!\n"
    221  1.1  hubertf 				if $debug;
    222  1.1  hubertf 			    next msg;
    223  1.1  hubertf 			}
    224  1.1  hubertf 			
    225  1.1  hubertf 			# ... maybe with other case?
    226  1.1  hubertf 			$lc_msg2=lc($msg2);
    227  1.2    lukem 			if ($lc_copyrights{$lc_msg2}) {
    228  1.1  hubertf 			    print "already there, in different case - skipping\n"
    229  1.1  hubertf 				if $debug;
    230  1.1  hubertf 			    next msg;
    231  1.1  hubertf 			}
    232  1.1  hubertf 		    }
    233  1.1  hubertf 
    234  1.1  hubertf 		    $copyrights{$msg} = 1;
    235  1.1  hubertf 		    $lc_copyrights{$lc_msg} = 1;
    236  1.1  hubertf 		}		 
    237  1.1  hubertf 
    238  1.1  hubertf 	    } else {
    239  1.1  hubertf 		print "?> $_" if $debug;
    240  1.1  hubertf 
    241  1.1  hubertf                 if ($fn !~ m,$known_bad_clause_3_wording,) {
    242  1.1  hubertf 		    warning($fn, "bad clause 3?");
    243  1.1  hubertf                 }
    244  1.1  hubertf 		last line;
    245  1.1  hubertf 	    }
    246  1.1  hubertf 	}
    247  1.1  hubertf     }
    248  1.1  hubertf     close(F);
    249  1.1  hubertf }
    250  1.1  hubertf 
    251  1.6   martin 
    252  1.6   martin if ($html) {
    253  1.6   martin     print "<ul>\n";
    254  1.6   martin     foreach $msg (sort keys %copyrights) {
    255  1.6   martin 	print "<li>$msg</li>\n";
    256  1.6   martin     }
    257  1.7   martin     print "</ul>\n";
    258  1.7   martin } elsif ($xml) {
    259  1.7   martin     foreach $msg (sort keys %copyrights) {
    260  1.7   martin 	print "<listitem>$msg</listitem>\n";
    261  1.6   martin     }
    262  1.6   martin } else {
    263  1.6   martin     print "------------------------------------------------------------\n";
    264  1.6   martin 
    265  1.6   martin     $firsttime=1;
    266  1.6   martin     foreach $msg (sort keys %copyrights) {
    267  1.6   martin 	if ($firsttime) {
    268  1.6   martin 	    $firsttime=0;
    269  1.6   martin 	} else {
    270  1.6   martin 	    print ".It\n";
    271  1.6   martin 	}
    272  1.1  hubertf 	print "$msg\n";
    273  1.1  hubertf     }
    274               }
    275