Home | History | Annotate | Line # | Download | only in common
extract-contrib-string.pl revision 1.11
      1   1.2    lukem #!/usr/bin/env perl
      2   1.1  hubertf #
      3   1.5   martin # Copyright (c) 2004, 2008 The NetBSD Foundation, Inc.
      4   1.1  hubertf # All rights reserved.
      5   1.5   martin #
      6   1.5   martin # This code is derived from software contributed to The NetBSD Foundation
      7   1.5   martin # by Hubert Feyrer <hubert (at] feyrer.de>.
      8   1.1  hubertf # 
      9   1.1  hubertf # Redistribution and use in source and binary forms, with or without
     10   1.1  hubertf # modification, are permitted provided that the following conditions
     11   1.1  hubertf # are met:
     12   1.1  hubertf # 1. Redistributions of source code must retain the above copyright
     13   1.1  hubertf #    notice, this list of conditions and the following disclaimer.
     14   1.1  hubertf # 2. Redistributions in binary form must reproduce the above copyright
     15   1.1  hubertf #    notice, this list of conditions and the following disclaimer in the
     16   1.1  hubertf #    documentation and/or other materials provided with the distribution.
     17   1.1  hubertf # 
     18   1.5   martin # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     19   1.5   martin # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     20   1.5   martin # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     21   1.5   martin # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     22   1.5   martin # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23   1.5   martin # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24   1.5   martin # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25   1.5   martin # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26   1.5   martin # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27   1.5   martin # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28   1.5   martin # POSSIBILITY OF SUCH DAMAGE.
     29   1.1  hubertf 
     30   1.1  hubertf #
     31   1.1  hubertf # Extract BSD-mandated copyright messages for NetBSD documentation
     32   1.1  hubertf #
     33   1.1  hubertf # Usage:
     34   1.1  hubertf # 1) find /usr/src -type f -print \
     35   1.1  hubertf #    | perl extract-contrib-string.pl
     36   1.1  hubertf #    >x
     37   1.6   martin #
     38   1.1  hubertf # 2) merge text after "--------" in "x" into
     39   1.1  hubertf #    src/distrib/notes/common/legal.common
     40   1.1  hubertf #
     41   1.6   martin # Options:
     42   1.6   martin #
     43   1.9   martin #     perl extract-contrib-string.pl [-d] [-h] [-x] [-?]
     44   1.6   martin #
     45   1.6   martin # where
     46   1.6   martin #     -d  debug output
     47   1.6   martin #     -h  html output
     48   1.9   martin #     -x  xml/docbook output
     49   1.9   martin #     -?  display help/usage message
     50   1.1  hubertf 
     51   1.1  hubertf 
     52   1.1  hubertf $ack_line1="[aA]ll( commercial)?( marketing or)? advertising materials mentioning( features)?";
     53   1.1  hubertf $ack_line2="display the following( acknowledge?ment)?";
     54   1.1  hubertf $ack_endline=
     55   1.1  hubertf       '(\d\.\s*(Neither the name'
     56   1.1  hubertf     .         '|The name of the company nor the name'	# Wasn't my idea
     57   1.1  hubertf     .         '|The name of the author may not'
     58   1.1  hubertf     .         '|The name of .* must not be used to endorse'
     59   1.1  hubertf     .         '|The names? (of )?.* nor the names? of'
     60   1.1  hubertf     .         '|The names? (of )?.* or any of it\'?s members'
     61   1.1  hubertf     .         '|Redistributions of any form whatsoever'
     62   1.1  hubertf     .         '|The names .*"OpenSSL Toolkit.*" and .*"OpenSSL Project.*" must not be used))'
     63   1.1  hubertf     .'|(THIS SOFTWARE IS PROVIDED)'
     64   1.1  hubertf     .'|(The word \'cryptographic\' can be left out if)'
     65   1.1  hubertf     .'|(may be used to endorse)'
     66   1.1  hubertf     .'|(@end cartouche)'
     67   1.1  hubertf     .'|(Redistribution and use in source and binary forms)'
     68   1.1  hubertf     .'|(may not be used to endorse)'
     69   1.1  hubertf     .'|(\.IP 4)'
     70   1.1  hubertf     .'|(ALLOW FREE USE OF)'
     71   1.1  hubertf     .'|(materials provided with the distribution)'
     72   1.1  hubertf     .'|(@InsertRedistribution@)';
     73   1.1  hubertf 
     74   1.1  hubertf $known_bad_clause_3_wording=
     75   1.4   martin       'usr.bin/lex/.*'				# UCB
     76  1.11   martin     .'|dist/bind/contrib/nslint-2.1a3/lbl/.*'	#
     77   1.1  hubertf     .'|usr.sbin/hilinfo/hilinfo.c'	   	# CSS @ Utah
     78   1.1  hubertf     ;	
     79   1.1  hubertf 
     80   1.1  hubertf sub warning {
     81   1.1  hubertf     local($fn,$msg) = @_;
     82   1.1  hubertf     print "XXX $fn line $.: $msg\n"
     83   1.1  hubertf }
     84   1.1  hubertf 
     85   1.7   martin while ($#ARGV >= 0) {
     86   1.7   martin     $debug=1 if ($ARGV[0] =~ /-d/i);
     87   1.7   martin     $html=1  if ($ARGV[0] =~ /-h/i);
     88   1.7   martin     $xml=1  if ($ARGV[0] =~ /-x/i);
     89   1.9   martin     $usage=1  if ($ARGV[0] =~ /-\?/);
     90   1.6   martin     shift(@ARGV);
     91   1.6   martin }
     92   1.1  hubertf 
     93   1.9   martin if ($usage) {
     94   1.9   martin     print "usage: find /usr/src -type f -print |\n" .
     95   1.9   martin 	" perl extract-contrib-string.pl [-h] [-x] [-?] [-d]\n" .
     96   1.9   martin 	"   where\n" .
     97   1.9   martin 	"    -h   output html\n" .
     98   1.9   martin 	"    -x   output xml/docbook\n" .
     99   1.9   martin 	"    -d   debug\n" .
    100   1.9   martin 	"    -?   display this help message\n";
    101   1.9   martin     exit(0);
    102   1.9   martin }
    103   1.9   martin 
    104   1.7   martin $comments = !$html && !$xml;
    105   1.1  hubertf 
    106   1.1  hubertf file:
    107   1.1  hubertf while(<>) {
    108   1.1  hubertf     chomp();
    109   1.1  hubertf     $fn=$_;
    110   1.1  hubertf     
    111   1.1  hubertf     open(F, "$fn") || die "cannot read $fn: $!\n";
    112   1.1  hubertf 
    113   1.1  hubertf   line:
    114   1.1  hubertf     while(<F>) {
    115   1.1  hubertf 	if (0 and /$ack_line2/i){
    116   1.1  hubertf 	    print "?> $_" if $debug;
    117   1.1  hubertf 	    
    118   1.1  hubertf 	    if ($fn !~ m,$known_bad_clause_3_wording,) {
    119   1.1  hubertf 		warning($fn, "clause 3 start not caught");
    120   1.1  hubertf 	    }
    121   1.1  hubertf 	    last line;
    122   1.1  hubertf 	}
    123   1.1  hubertf 	
    124   1.1  hubertf 	print "0> $_" if $debug;
    125   1.1  hubertf 
    126  1.10   martin 	# special case perl script generating a license (openssl's
    127  1.10   martin 	# mkerr.pl) - ignore the quoted license, there is another one
    128  1.10   martin 	# inside:
    129  1.10   martin 	if (/^\"\s\*.*$ack_line1.*\\n\"\,/) {
    130  1.10   martin 		while(!/$ack_endline/i) {
    131  1.10   martin 		    print "S> $_" if $debug;
    132  1.10   martin 		    $_ = <F>;
    133  1.10   martin 		}
    134  1.10   martin 	}
    135  1.10   martin 
    136   1.1  hubertf 	if (/$ack_line1/i
    137   1.1  hubertf 	    or (/$ack_line2/ and $fn =~ m,$known_bad_clause_3_wording,)) {
    138   1.1  hubertf 	    
    139   1.1  hubertf 	    print "1> $_" if $debug;
    140   1.1  hubertf 
    141   1.1  hubertf 	    $_=<F>
    142   1.1  hubertf 		unless $fn =~ m,$known_bad_clause_3_wording,;
    143   1.1  hubertf 	    if (/$ack_line2/i or $fn =~ m,$known_bad_clause_3_wording,){
    144   1.1  hubertf 		
    145   1.1  hubertf 		print "2> $_" if $debug;
    146   1.1  hubertf 		
    147   1.1  hubertf 		$msg="";
    148   1.1  hubertf 		$cnt=0;
    149   1.1  hubertf 		$_=<F>;
    150   1.1  hubertf 		while(!/$ack_endline/i) {
    151   1.1  hubertf 		    
    152   1.1  hubertf 		    print "C> $_" if $debug;
    153   1.1  hubertf 
    154   1.1  hubertf 		    $msg .= $_;
    155   1.1  hubertf 		    $cnt++;
    156   1.1  hubertf 		    $_ = <F>;
    157   1.1  hubertf 		    if ($cnt > 10) {
    158   1.1  hubertf 			warning($fn,"loooong copyright?");
    159   1.1  hubertf 			last line;
    160   1.1  hubertf 		    }
    161   1.1  hubertf 		}
    162   1.1  hubertf 
    163   1.1  hubertf 		print "E> $_" if $debug;
    164   1.1  hubertf 		
    165   1.1  hubertf 		# post-process
    166   1.6   martin 
    167  1.11   martin 		if ($fn =~ m,$known_bad_clause_3_wording,) {
    168  1.11   martin 			while ($msg !~ /^ .*``.*\n/) {
    169  1.11   martin 				$msg =~ s/^.*\n//o;
    170  1.11   martin 			}
    171  1.11   martin 			$msg =~ s/\n.*``//o;
    172  1.11   martin 			$msg =~ s/''.*$//o;
    173  1.11   martin 		}
    174  1.11   martin 
    175   1.9   martin 		# *roff
    176   1.9   martin 		while ($msg =~ /^\.\\"\s*/) {
    177   1.9   martin 			$msg =~ s/^\.\\"\s*//o;
    178   1.9   martin 		}
    179   1.9   martin 		while ($msg =~ /\n\.\\"\s*/) {
    180   1.9   martin 			$msg =~ s/\n\.\\"\s*/\n/o;
    181   1.9   martin 		}
    182   1.9   martin 		$msg =~ s/\n\.\\"\s*$/\n/g;
    183   1.9   martin 
    184   1.6   martin 		# C++/C99
    185   1.6   martin 		while ($msg =~ /^\s*\/\/\s*/) {
    186   1.6   martin 			$msg =~ s/^\s*\/\/\s*//o;
    187   1.6   martin 		}
    188   1.9   martin 		while ($msg =~ /\n\s*\/\/\s*$/) {
    189   1.9   martin 			$msg =~ s/\n\s*\/\/\s*$//o;
    190   1.9   martin 		}
    191   1.9   martin 		$msg =~ s/\n\s*\/\/\s*/\n/g;
    192   1.9   martin 
    193   1.9   martin 		# C
    194   1.9   martin 		while ($msg =~ /^\s*\*\s*/) {
    195   1.9   martin 			$msg =~ s/^\s*\*\s*//o;
    196   1.9   martin 		}
    197   1.9   martin 		while ($msg =~ /\n\s*\*\s*$/) {
    198   1.9   martin 			$msg =~ s/\n\s*\*\s*$//o;
    199   1.6   martin 		}
    200   1.9   martin 		$msg =~ s/\n\s*\*\s*/\n/g;
    201   1.9   martin 
    202   1.9   martin 		# texinfo @c
    203   1.9   martin 		while ($msg =~ /^\s*\@c\s+/) {
    204   1.9   martin 			$msg =~ s/^\s*\@c\s+//o;
    205   1.9   martin 		}
    206   1.9   martin 		while ($msg =~ /\n\s*\@c\s+$/) {
    207   1.9   martin 			$msg =~ s/\n\s*\@c\s+$//o;
    208   1.9   martin 		}
    209   1.9   martin 		$msg =~ s/\n\s*\@c\s+/\n/g;
    210   1.6   martin 
    211  1.10   martin 		$msg =~ s/^REM\s*//g;			# BASIC?!?
    212  1.10   martin 		$msg =~ s/\nREM\s*/\n/g;		# BASIC?!?
    213  1.10   martin 		$msg =~ s/^dnl\s*//g;			# m4
    214  1.10   martin 		$msg =~ s/\dnl\s*/\n/g;			# m4
    215  1.10   martin 		$msg =~ s/^\s+-\s+//g;			# seen in docbook files
    216  1.10   martin 		$msg =~ s/\n\s+-\s+/ /g;		#
    217  1.10   martin 		$msg =~ s/^[#\\\|";]*\s*//g;		# sh etc.
    218  1.10   martin 		$msg =~ s/\n[#\\\|";]\s*/\n/g;		# sh etc.
    219  1.10   martin 		$msg =~ s/^[ 	*]*//g;      		# C
    220  1.10   martin 		$msg =~ s/\n[ 	*]*/\n/g;    		# C
    221  1.10   martin 
    222   1.1  hubertf 		$msg =~ s/\@cartouche\n//;              # texinfo
    223   1.1  hubertf 
    224   1.1  hubertf 		$msg =~ s/
//g;
    226   1.1  hubertf 		$msg =~ s/\s*\n/\n/g;
    227   1.1  hubertf 		$msg =~ s/^\s*//;
    228   1.1  hubertf 		$msg =~ s/\\\@/\@/g;
    229   1.1  hubertf 		$msg =~ s/\n\n/\n/g;
    230   1.1  hubertf 	        $msg =~ s/^\s*``//;
    231  1.10   martin 	        $msg =~ s/''\s*$//;
    232  1.10   martin 		$msg =~ s/^\"//o;
    233   1.1  hubertf 		$msg =~ s/\"$//o;
    234   1.3      wiz 
    235   1.1  hubertf 		# Split up into separate paragraphs
    236   1.1  hubertf 		#
    237   1.1  hubertf 		$msgs=$msg;
    238   1.1  hubertf 		$msgs=~s/(This (software|product))/|$1/g;
    239   1.1  hubertf 		$msgs=~s,^\|,,;
    240   1.1  hubertf 	      msg:
    241   1.9   martin 		foreach $msg (split(/\|/, $msgs)) {
    242   1.9   martin 		    while ($msg =~ /[\n\s]+$/) {
    243   1.9   martin 			$msg =~ s/[\n\s]+$//o;
    244   1.9   martin 		    }
    245   1.7   martin 		    next if ($msg eq "");
    246   1.6   martin 		    if ($comments) {
    247   1.6   martin 			print ".\\\" File $fn:\n";
    248   1.6   martin 			print "$msg";
    249   1.6   martin 			print "\n\n";
    250   1.1  hubertf 		    }
    251   1.1  hubertf 		    
    252   1.1  hubertf 		    # Figure out if there's a version w/ or w/o trailing dot
    253   1.8   martin 		    # 
    254   1.1  hubertf 		    if ($msg =~ /\.$/) {
    255   1.8   martin 			# check if there's a version of the same msg
    256   1.1  hubertf 			# w/o a trailing dot
    257   1.8   martin 			$msg2=$msg;
    258   1.1  hubertf 			$msg2=~s,\.$,,;
    259   1.1  hubertf 			if ($copyrights{"$msg2"}) {
    260   1.1  hubertf 			    # already there - skip
    261   1.1  hubertf 			    print "already there, w/o dot - skipping!\n"
    262   1.1  hubertf 				if $debug;
    263   1.1  hubertf 			    next msg;
    264   1.1  hubertf 			}
    265   1.1  hubertf 			
    266   1.1  hubertf 			# ... maybe with other case?
    267   1.1  hubertf 			$lc_msg2=lc($msg2);
    268   1.2    lukem 			if ($lc_copyrights{$lc_msg2}) {
    269   1.1  hubertf 			    print "already there, in different case - skipping\n"
    270   1.1  hubertf 				if $debug;
    271   1.1  hubertf 			    next msg;
    272   1.1  hubertf 			}
    273   1.1  hubertf 		    } else {
    274   1.8   martin 			# check if there's a version of the same msg
    275   1.1  hubertf 			# with a trailing dot
    276   1.8   martin 			$msg2=$msg;
    277   1.1  hubertf 			$msg2.=".";
    278   1.1  hubertf 			if ($copyrights{"$msg2"}) {
    279   1.1  hubertf 			    # already there - skip
    280   1.1  hubertf 			    print "already there, w/ dot - skipping!\n"
    281   1.1  hubertf 				if $debug;
    282   1.1  hubertf 			    next msg;
    283   1.1  hubertf 			}
    284   1.1  hubertf 			
    285   1.1  hubertf 			# ... maybe with other case?
    286   1.1  hubertf 			$lc_msg2=lc($msg2);
    287   1.2    lukem 			if ($lc_copyrights{$lc_msg2}) {
    288   1.1  hubertf 			    print "already there, in different case - skipping\n"
    289   1.1  hubertf 				if $debug;
    290   1.1  hubertf 			    next msg;
    291   1.1  hubertf 			}
    292   1.1  hubertf 		    }
    293   1.1  hubertf 
    294   1.1  hubertf 		    $copyrights{$msg} = 1;
    295   1.1  hubertf 		    $lc_copyrights{$lc_msg} = 1;
    296   1.1  hubertf 		}		 
    297   1.1  hubertf 
    298   1.1  hubertf 	    } else {
    299   1.1  hubertf 		print "?> $_" if $debug;
    300   1.1  hubertf 
    301   1.1  hubertf                 if ($fn !~ m,$known_bad_clause_3_wording,) {
    302   1.1  hubertf 		    warning($fn, "bad clause 3?");
    303   1.1  hubertf                 }
    304   1.1  hubertf 		last line;
    305   1.1  hubertf 	    }
    306   1.1  hubertf 	}
    307   1.1  hubertf     }
    308   1.1  hubertf     close(F);
    309   1.1  hubertf }
    310   1.1  hubertf 
    311   1.6   martin 
    312   1.6   martin if ($html) {
    313   1.6   martin     print "<ul>\n";
    314   1.6   martin     foreach $msg (sort keys %copyrights) {
    315   1.6   martin 	print "<li>$msg</li>\n";
    316   1.6   martin     }
    317   1.7   martin     print "</ul>\n";
    318   1.7   martin } elsif ($xml) {
    319   1.7   martin     foreach $msg (sort keys %copyrights) {
    320   1.7   martin 	print "<listitem>$msg</listitem>\n";
    321   1.6   martin     }
    322   1.6   martin } else {
    323   1.6   martin     print "------------------------------------------------------------\n";
    324   1.6   martin 
    325   1.6   martin     $firsttime=1;
    326   1.6   martin     foreach $msg (sort keys %copyrights) {
    327   1.6   martin 	if ($firsttime) {
    328   1.6   martin 	    $firsttime=0;
    329   1.6   martin 	} else {
    330   1.6   martin 	    print ".It\n";
    331   1.6   martin 	}
    332   1.1  hubertf 	print "$msg\n";
    333   1.1  hubertf     }
    334                }
    335