1 1.2 lukem #!/usr/bin/env perl 2 1.1 hubertf # 3 1.5 martin # Copyright (c) 2004, 2008 The NetBSD Foundation, Inc. 4 1.1 hubertf # All rights reserved. 5 1.5 martin # 6 1.5 martin # This code is derived from software contributed to The NetBSD Foundation 7 1.5 martin # by Hubert Feyrer <hubert (at] feyrer.de>. 8 1.1 hubertf # 9 1.1 hubertf # Redistribution and use in source and binary forms, with or without 10 1.1 hubertf # modification, are permitted provided that the following conditions 11 1.1 hubertf # are met: 12 1.1 hubertf # 1. Redistributions of source code must retain the above copyright 13 1.1 hubertf # notice, this list of conditions and the following disclaimer. 14 1.1 hubertf # 2. Redistributions in binary form must reproduce the above copyright 15 1.1 hubertf # notice, this list of conditions and the following disclaimer in the 16 1.1 hubertf # documentation and/or other materials provided with the distribution. 17 1.1 hubertf # 18 1.5 martin # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 1.5 martin # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 1.5 martin # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 1.5 martin # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 1.5 martin # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 1.5 martin # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 1.5 martin # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 1.5 martin # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 1.5 martin # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 1.5 martin # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 1.5 martin # POSSIBILITY OF SUCH DAMAGE. 29 1.1 hubertf 30 1.1 hubertf # 31 1.1 hubertf # Extract BSD-mandated copyright messages for NetBSD documentation 32 1.1 hubertf # 33 1.1 hubertf # Usage: 34 1.23 uwe # 1) find src xsrc -type f -print \ 35 1.1 hubertf # | perl extract-contrib-string.pl 36 1.1 hubertf # >x 37 1.6 martin # 38 1.1 hubertf # 2) merge text after "--------" in "x" into 39 1.1 hubertf # src/distrib/notes/common/legal.common 40 1.1 hubertf # 41 1.6 martin # Options: 42 1.6 martin # 43 1.9 martin # perl extract-contrib-string.pl [-d] [-h] [-x] [-?] 44 1.6 martin # 45 1.6 martin # where 46 1.6 martin # -d debug output 47 1.6 martin # -h html output 48 1.9 martin # -x xml/docbook output 49 1.9 martin # -? display help/usage message 50 1.1 hubertf 51 1.1 hubertf 52 1.21 uwe $ack_line1='([aA]ll( commercial)?( marketing or)? advertising materials mentioning( features)?' 53 1.21 uwe . '|\d\. Redistributions of any form whatsoever)'; 54 1.21 uwe $ack_line2='(display the( following)?( acknowledge?ment)?|acknowledge?ment:$)'; 55 1.1 hubertf $ack_endline= 56 1.1 hubertf '(\d\.\s*(Neither the name' 57 1.1 hubertf . '|The name of the company nor the name' # Wasn't my idea 58 1.1 hubertf . '|The name of the author may not' 59 1.1 hubertf . '|The name of .* must not be used to endorse' 60 1.1 hubertf . '|The names? (of )?.* nor the names? of' 61 1.1 hubertf . '|The names? (of )?.* or any of it\'?s members' 62 1.1 hubertf . '|Redistributions of any form whatsoever' 63 1.18 uwe . '|The names .*"OpenSSL Toolkit.*" and .*"OpenSSL Project.*" must not be used' 64 1.18 uwe . "|Urbana-Champaign Independent Media Center's name" 65 1.18 uwe . '))' 66 1.18 uwe .'|(^Neither the name)' 67 1.1 hubertf .'|(THIS SOFTWARE IS PROVIDED)' 68 1.21 uwe .'|(ALL WARRANTIES WITH REGARD)' 69 1.1 hubertf .'|(The word \'cryptographic\' can be left out if)' 70 1.1 hubertf .'|(may be used to endorse)' 71 1.1 hubertf .'|(@end cartouche)' 72 1.18 uwe .'|(</para>)' 73 1.1 hubertf .'|(Redistribution and use in source and binary forms)' 74 1.1 hubertf .'|(may not be used to endorse)' 75 1.1 hubertf .'|(\.IP 4)' 76 1.1 hubertf .'|(ALLOW FREE USE OF)' 77 1.1 hubertf .'|(materials provided with the distribution)' 78 1.1 hubertf .'|(@InsertRedistribution@)'; 79 1.1 hubertf 80 1.1 hubertf $known_bad_clause_3_wording= 81 1.4 martin 'usr.bin/lex/.*' # UCB 82 1.11 martin .'|dist/bind/contrib/nslint-2.1a3/lbl/.*' # 83 1.13 martin .'|usr.sbin/traceroute/ifaddrlist.h' # 84 1.13 martin .'|usr.sbin/traceroute/traceroute.c' # 85 1.1 hubertf .'|usr.sbin/hilinfo/hilinfo.c' # CSS @ Utah 86 1.1 hubertf ; 87 1.1 hubertf 88 1.1 hubertf sub warning { 89 1.1 hubertf local($fn,$msg) = @_; 90 1.1 hubertf print "XXX $fn line $.: $msg\n" 91 1.1 hubertf } 92 1.1 hubertf 93 1.7 martin while ($#ARGV >= 0) { 94 1.7 martin $debug=1 if ($ARGV[0] =~ /-d/i); 95 1.7 martin $html=1 if ($ARGV[0] =~ /-h/i); 96 1.7 martin $xml=1 if ($ARGV[0] =~ /-x/i); 97 1.9 martin $usage=1 if ($ARGV[0] =~ /-\?/); 98 1.6 martin shift(@ARGV); 99 1.6 martin } 100 1.1 hubertf 101 1.9 martin if ($usage) { 102 1.9 martin print "usage: find /usr/src -type f -print |\n" . 103 1.9 martin " perl extract-contrib-string.pl [-h] [-x] [-?] [-d]\n" . 104 1.9 martin " where\n" . 105 1.9 martin " -h output html\n" . 106 1.9 martin " -x output xml/docbook\n" . 107 1.9 martin " -d debug\n" . 108 1.9 martin " -? display this help message\n"; 109 1.9 martin exit(0); 110 1.9 martin } 111 1.9 martin 112 1.7 martin $comments = !$html && !$xml; 113 1.1 hubertf 114 1.1 hubertf file: 115 1.1 hubertf while(<>) { 116 1.1 hubertf chomp(); 117 1.1 hubertf $fn=$_; 118 1.1 hubertf 119 1.1 hubertf open(F, "$fn") || die "cannot read $fn: $!\n"; 120 1.1 hubertf 121 1.1 hubertf line: 122 1.1 hubertf while(<F>) { 123 1.21 uwe if (0 and /$ack_line2/in){ 124 1.1 hubertf print "?> $_" if $debug; 125 1.1 hubertf 126 1.1 hubertf if ($fn !~ m,$known_bad_clause_3_wording,) { 127 1.1 hubertf warning($fn, "clause 3 start not caught"); 128 1.1 hubertf } 129 1.1 hubertf last line; 130 1.1 hubertf } 131 1.1 hubertf 132 1.1 hubertf print "0> $_" if $debug; 133 1.1 hubertf 134 1.10 martin # special case perl script generating a license (openssl's 135 1.10 martin # mkerr.pl) - ignore the quoted license, there is another one 136 1.10 martin # inside: 137 1.21 uwe if (/^\"\s\*.*$ack_line1.*\\n\"\,/n) { 138 1.21 uwe while(!/$ack_endline/in) { 139 1.10 martin print "S> $_" if $debug; 140 1.10 martin $_ = <F>; 141 1.10 martin } 142 1.10 martin } 143 1.10 martin 144 1.21 uwe if (/$ack_line1/in 145 1.21 uwe or (/$ack_line2/n and $fn =~ m,$known_bad_clause_3_wording,)) { 146 1.1 hubertf 147 1.1 hubertf print "1> $_" if $debug; 148 1.1 hubertf 149 1.1 hubertf $_=<F> 150 1.1 hubertf unless $fn =~ m,$known_bad_clause_3_wording,; 151 1.21 uwe if (/$ack_line2/in or $fn =~ m,$known_bad_clause_3_wording,){ 152 1.1 hubertf 153 1.1 hubertf print "2> $_" if $debug; 154 1.1 hubertf 155 1.1 hubertf $msg=""; 156 1.20 uwe 157 1.20 uwe if ($fn =~ m,$known_bad_clause_3_wording, and /``/) { 158 1.20 uwe $msg = $_; 159 1.20 uwe } 160 1.20 uwe elsif (/:\s+This product/) { 161 1.20 uwe # src/sys/lib/libkern/rngtest.c - bad clause 3 wording 162 1.20 uwe # that is not like others, so special case it here 163 1.20 uwe $msg = $_; 164 1.20 uwe $msg =~ s/^.*:\s+(This product.*)$/$1/; 165 1.20 uwe } 166 1.20 uwe 167 1.1 hubertf $cnt=0; 168 1.1 hubertf $_=<F>; 169 1.21 uwe while(!/$ack_endline/in) { 170 1.1 hubertf 171 1.1 hubertf print "C> $_" if $debug; 172 1.1 hubertf 173 1.1 hubertf $msg .= $_; 174 1.1 hubertf $cnt++; 175 1.1 hubertf $_ = <F>; 176 1.1 hubertf if ($cnt > 10) { 177 1.1 hubertf warning($fn,"loooong copyright?"); 178 1.1 hubertf last line; 179 1.1 hubertf } 180 1.1 hubertf } 181 1.1 hubertf 182 1.1 hubertf print "E> $_" if $debug; 183 1.1 hubertf 184 1.1 hubertf # post-process 185 1.6 martin 186 1.11 martin if ($fn =~ m,$known_bad_clause_3_wording,) { 187 1.12 martin while ($msg !~ /^.*``.*\n/) { 188 1.12 martin last if (!$msg); 189 1.11 martin $msg =~ s/^.*\n//o; 190 1.11 martin } 191 1.12 martin $msg =~ s/^.*``//o; 192 1.11 martin $msg =~ s/\n.*``//o; 193 1.11 martin $msg =~ s/''.*$//o; 194 1.11 martin } 195 1.11 martin 196 1.18 uwe # XXX: pcap &c - add to known_bad_clause_3_wording but 197 1.18 uwe # that code seems to have problems. Easier to add a 198 1.18 uwe # hack here, shouldn't affect good clause 3. 199 1.18 uwe $msg =~ s/''\s+Neither the name.*$//; 200 1.18 uwe 201 1.9 martin # *roff 202 1.9 martin while ($msg =~ /^\.\\"\s*/) { 203 1.9 martin $msg =~ s/^\.\\"\s*//o; 204 1.9 martin } 205 1.9 martin while ($msg =~ /\n\.\\"\s*/) { 206 1.9 martin $msg =~ s/\n\.\\"\s*/\n/o; 207 1.9 martin } 208 1.9 martin $msg =~ s/\n\.\\"\s*$/\n/g; 209 1.9 martin 210 1.6 martin # C++/C99 211 1.6 martin while ($msg =~ /^\s*\/\/\s*/) { 212 1.6 martin $msg =~ s/^\s*\/\/\s*//o; 213 1.6 martin } 214 1.9 martin while ($msg =~ /\n\s*\/\/\s*$/) { 215 1.9 martin $msg =~ s/\n\s*\/\/\s*$//o; 216 1.9 martin } 217 1.9 martin $msg =~ s/\n\s*\/\/\s*/\n/g; 218 1.9 martin 219 1.9 martin # C 220 1.9 martin while ($msg =~ /^\s*\*\s*/) { 221 1.9 martin $msg =~ s/^\s*\*\s*//o; 222 1.9 martin } 223 1.9 martin while ($msg =~ /\n\s*\*\s*$/) { 224 1.9 martin $msg =~ s/\n\s*\*\s*$//o; 225 1.6 martin } 226 1.9 martin $msg =~ s/\n\s*\*\s*/\n/g; 227 1.9 martin 228 1.9 martin # texinfo @c 229 1.9 martin while ($msg =~ /^\s*\@c\s+/) { 230 1.9 martin $msg =~ s/^\s*\@c\s+//o; 231 1.9 martin } 232 1.9 martin while ($msg =~ /\n\s*\@c\s+$/) { 233 1.9 martin $msg =~ s/\n\s*\@c\s+$//o; 234 1.9 martin } 235 1.9 martin $msg =~ s/\n\s*\@c\s+/\n/g; 236 1.6 martin 237 1.10 martin $msg =~ s/^REM\s*//g; # BASIC?!? 238 1.10 martin $msg =~ s/\nREM\s*/\n/g; # BASIC?!? 239 1.10 martin $msg =~ s/^dnl\s*//g; # m4 240 1.19 uwe $msg =~ s/\ndnl\s*/\n/g; # m4 241 1.10 martin $msg =~ s/^\s+-\s+//g; # seen in docbook files 242 1.10 martin $msg =~ s/\n\s+-\s+/ /g; # 243 1.14 martin $msg =~ s/^[#\\\|";]+\s*//g; # sh etc. 244 1.14 martin $msg =~ s/\n[#\\\|";]+\s*/\n/g; # sh etc. 245 1.10 martin $msg =~ s/^[ *]*//g; # C 246 1.10 martin $msg =~ s/\n[ *]*/\n/g; # C 247 1.10 martin 248 1.1 hubertf $msg =~ s/\@cartouche\n//; # texinfo 249 1.1 hubertf 250 1.1 hubertf $msg =~ s/ //g; 252 1.1 hubertf $msg =~ s/\s*\n/\n/g; 253 1.1 hubertf $msg =~ s/^\s*//; 254 1.1 hubertf $msg =~ s/\\\@/\@/g; 255 1.1 hubertf $msg =~ s/\n\n/\n/g; 256 1.1 hubertf $msg =~ s/^\s*``//; 257 1.10 martin $msg =~ s/''\s*$//; 258 1.10 martin $msg =~ s/^\"//o; 259 1.22 uwe $msg =~ s/\"$//o; 260 1.1 hubertf $msg =~ s/\"\.$/./o; 261 1.24 uwe 262 1.24 uwe # Fix ISO-646-SE spelling of Lule\(oa 263 1.16 uwe $msg =~ s/Lule\}/Lule\\(oa/g; 264 1.15 uwe 265 1.15 uwe # Collapse multiple spaces between words. There are a 266 1.15 uwe # few entries with "by__Name" that affects sorting. 267 1.15 uwe $msg =~ s/(\w) +(\w)/$1 $2/g; 268 1.3 wiz 269 1.1 hubertf # Split up into separate paragraphs 270 1.1 hubertf # 271 1.1 hubertf $msgs=$msg; 272 1.1 hubertf $msgs=~s/(This (software|product))/|$1/g; 273 1.1 hubertf $msgs=~s,^\|,,; 274 1.1 hubertf msg: 275 1.9 martin foreach $msg (split(/\|/, $msgs)) { 276 1.9 martin while ($msg =~ /[\n\s]+$/) { 277 1.9 martin $msg =~ s/[\n\s]+$//o; 278 1.9 martin } 279 1.7 martin next if ($msg eq ""); 280 1.6 martin if ($comments) { 281 1.6 martin print ".\\\" File $fn:\n"; 282 1.6 martin print "$msg"; 283 1.6 martin print "\n\n"; 284 1.18 uwe } 285 1.18 uwe 286 1.18 uwe my $key = lc($msg); # ignore difference in case 287 1.18 uwe $key =~ s/\n/ /g; # ignore difference in line breaks 288 1.18 uwe $key =~ s/\.$//g; # drop the final dot 289 1.18 uwe 290 1.18 uwe # push organizations ("by the") to the end of the 291 1.18 uwe # sorting order 292 1.18 uwe $key =~ s/(developed by) the/$1 ~the/; 293 1.18 uwe 294 1.18 uwe if (defined $copyrights{$key}) { 295 1.18 uwe if ($copyrights{$key} !~ /\.$/ && $msg =~ /\.$/) { 296 1.18 uwe print "already there, without dot - overriding!\n" 297 1.1 hubertf if 1 || $debug; 298 1.18 uwe } 299 1.1 hubertf else { 300 1.1 hubertf next msg; 301 1.1 hubertf } 302 1.1 hubertf } 303 1.18 uwe 304 1.18 uwe $copyrights{$key} = $msg; 305 1.1 hubertf } 306 1.1 hubertf 307 1.1 hubertf } else { 308 1.1 hubertf print "?> $_" if $debug; 309 1.1 hubertf 310 1.1 hubertf if ($fn !~ m,$known_bad_clause_3_wording,) { 311 1.1 hubertf warning($fn, "bad clause 3?"); 312 1.1 hubertf } 313 1.1 hubertf last line; 314 1.1 hubertf } 315 1.1 hubertf } 316 1.1 hubertf } 317 1.1 hubertf close(F); 318 1.1 hubertf } 319 1.1 hubertf 320 1.6 martin 321 1.6 martin if ($html) { 322 1.18 uwe print "<ul>\n"; 323 1.18 uwe foreach $key (sort keys %copyrights) { 324 1.6 martin my $msg = $copyrights{$key}; 325 1.6 martin print "<li>$msg</li>\n"; 326 1.6 martin } 327 1.7 martin print "</ul>\n"; 328 1.18 uwe } elsif ($xml) { 329 1.18 uwe foreach $key (sort keys %copyrights) { 330 1.7 martin my $msg = $copyrights{$key}; 331 1.7 martin print "<listitem>$msg</listitem>\n"; 332 1.6 martin } 333 1.6 martin } else { 334 1.6 martin print "------------------------------------------------------------\n"; 335 1.6 martin 336 1.18 uwe $firsttime=1; 337 1.18 uwe foreach $key (sort keys %copyrights) { 338 1.6 martin my $msg = $copyrights{$key}; 339 1.6 martin if ($firsttime) { 340 1.6 martin $firsttime=0; 341 1.6 martin } else { 342 1.6 martin print ".It\n"; 343 1.6 martin } 344 1.1 hubertf print "$msg\n"; 345 1.1 hubertf } 346 } 347