extract-contrib-string.pl revision 1.23 1 1.2 lukem #!/usr/bin/env perl
2 1.1 hubertf #
3 1.5 martin # Copyright (c) 2004, 2008 The NetBSD Foundation, Inc.
4 1.1 hubertf # All rights reserved.
5 1.5 martin #
6 1.5 martin # This code is derived from software contributed to The NetBSD Foundation
7 1.5 martin # by Hubert Feyrer <hubert (at] feyrer.de>.
8 1.1 hubertf #
9 1.1 hubertf # Redistribution and use in source and binary forms, with or without
10 1.1 hubertf # modification, are permitted provided that the following conditions
11 1.1 hubertf # are met:
12 1.1 hubertf # 1. Redistributions of source code must retain the above copyright
13 1.1 hubertf # notice, this list of conditions and the following disclaimer.
14 1.1 hubertf # 2. Redistributions in binary form must reproduce the above copyright
15 1.1 hubertf # notice, this list of conditions and the following disclaimer in the
16 1.1 hubertf # documentation and/or other materials provided with the distribution.
17 1.1 hubertf #
18 1.5 martin # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 1.5 martin # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 1.5 martin # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 1.5 martin # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 1.5 martin # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 1.5 martin # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 1.5 martin # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 1.5 martin # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 1.5 martin # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 1.5 martin # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 1.5 martin # POSSIBILITY OF SUCH DAMAGE.
29 1.1 hubertf
30 1.1 hubertf #
31 1.1 hubertf # Extract BSD-mandated copyright messages for NetBSD documentation
32 1.1 hubertf #
33 1.1 hubertf # Usage:
34 1.23 uwe # 1) find src xsrc -type f -print \
35 1.1 hubertf # | perl extract-contrib-string.pl
36 1.1 hubertf # >x
37 1.6 martin #
38 1.1 hubertf # 2) merge text after "--------" in "x" into
39 1.1 hubertf # src/distrib/notes/common/legal.common
40 1.1 hubertf #
41 1.6 martin # Options:
42 1.6 martin #
43 1.9 martin # perl extract-contrib-string.pl [-d] [-h] [-x] [-?]
44 1.6 martin #
45 1.6 martin # where
46 1.6 martin # -d debug output
47 1.6 martin # -h html output
48 1.9 martin # -x xml/docbook output
49 1.9 martin # -? display help/usage message
50 1.1 hubertf
51 1.1 hubertf
52 1.21 uwe $ack_line1='([aA]ll( commercial)?( marketing or)? advertising materials mentioning( features)?'
53 1.21 uwe . '|\d\. Redistributions of any form whatsoever)';
54 1.21 uwe $ack_line2='(display the( following)?( acknowledge?ment)?|acknowledge?ment:$)';
55 1.1 hubertf $ack_endline=
56 1.1 hubertf '(\d\.\s*(Neither the name'
57 1.1 hubertf . '|The name of the company nor the name' # Wasn't my idea
58 1.1 hubertf . '|The name of the author may not'
59 1.1 hubertf . '|The name of .* must not be used to endorse'
60 1.1 hubertf . '|The names? (of )?.* nor the names? of'
61 1.1 hubertf . '|The names? (of )?.* or any of it\'?s members'
62 1.1 hubertf . '|Redistributions of any form whatsoever'
63 1.18 uwe . '|The names .*"OpenSSL Toolkit.*" and .*"OpenSSL Project.*" must not be used'
64 1.18 uwe . "|Urbana-Champaign Independent Media Center's name"
65 1.18 uwe . '))'
66 1.18 uwe .'|(^Neither the name)'
67 1.1 hubertf .'|(THIS SOFTWARE IS PROVIDED)'
68 1.21 uwe .'|(ALL WARRANTIES WITH REGARD)'
69 1.1 hubertf .'|(The word \'cryptographic\' can be left out if)'
70 1.1 hubertf .'|(may be used to endorse)'
71 1.1 hubertf .'|(@end cartouche)'
72 1.18 uwe .'|(</para>)'
73 1.1 hubertf .'|(Redistribution and use in source and binary forms)'
74 1.1 hubertf .'|(may not be used to endorse)'
75 1.1 hubertf .'|(\.IP 4)'
76 1.1 hubertf .'|(ALLOW FREE USE OF)'
77 1.1 hubertf .'|(materials provided with the distribution)'
78 1.1 hubertf .'|(@InsertRedistribution@)';
79 1.1 hubertf
80 1.1 hubertf $known_bad_clause_3_wording=
81 1.4 martin 'usr.bin/lex/.*' # UCB
82 1.11 martin .'|dist/bind/contrib/nslint-2.1a3/lbl/.*' #
83 1.13 martin .'|usr.sbin/traceroute/ifaddrlist.h' #
84 1.13 martin .'|usr.sbin/traceroute/traceroute.c' #
85 1.1 hubertf .'|usr.sbin/hilinfo/hilinfo.c' # CSS @ Utah
86 1.1 hubertf ;
87 1.1 hubertf
88 1.1 hubertf sub warning {
89 1.1 hubertf local($fn,$msg) = @_;
90 1.1 hubertf print "XXX $fn line $.: $msg\n"
91 1.1 hubertf }
92 1.1 hubertf
93 1.7 martin while ($#ARGV >= 0) {
94 1.7 martin $debug=1 if ($ARGV[0] =~ /-d/i);
95 1.7 martin $html=1 if ($ARGV[0] =~ /-h/i);
96 1.7 martin $xml=1 if ($ARGV[0] =~ /-x/i);
97 1.9 martin $usage=1 if ($ARGV[0] =~ /-\?/);
98 1.6 martin shift(@ARGV);
99 1.6 martin }
100 1.1 hubertf
101 1.9 martin if ($usage) {
102 1.9 martin print "usage: find /usr/src -type f -print |\n" .
103 1.9 martin " perl extract-contrib-string.pl [-h] [-x] [-?] [-d]\n" .
104 1.9 martin " where\n" .
105 1.9 martin " -h output html\n" .
106 1.9 martin " -x output xml/docbook\n" .
107 1.9 martin " -d debug\n" .
108 1.9 martin " -? display this help message\n";
109 1.9 martin exit(0);
110 1.9 martin }
111 1.9 martin
112 1.7 martin $comments = !$html && !$xml;
113 1.1 hubertf
114 1.1 hubertf file:
115 1.1 hubertf while(<>) {
116 1.1 hubertf chomp();
117 1.1 hubertf $fn=$_;
118 1.1 hubertf
119 1.1 hubertf open(F, "$fn") || die "cannot read $fn: $!\n";
120 1.1 hubertf
121 1.1 hubertf line:
122 1.1 hubertf while(<F>) {
123 1.21 uwe if (0 and /$ack_line2/in){
124 1.1 hubertf print "?> $_" if $debug;
125 1.1 hubertf
126 1.1 hubertf if ($fn !~ m,$known_bad_clause_3_wording,) {
127 1.1 hubertf warning($fn, "clause 3 start not caught");
128 1.1 hubertf }
129 1.1 hubertf last line;
130 1.1 hubertf }
131 1.1 hubertf
132 1.1 hubertf print "0> $_" if $debug;
133 1.1 hubertf
134 1.10 martin # special case perl script generating a license (openssl's
135 1.10 martin # mkerr.pl) - ignore the quoted license, there is another one
136 1.10 martin # inside:
137 1.21 uwe if (/^\"\s\*.*$ack_line1.*\\n\"\,/n) {
138 1.21 uwe while(!/$ack_endline/in) {
139 1.10 martin print "S> $_" if $debug;
140 1.10 martin $_ = <F>;
141 1.10 martin }
142 1.10 martin }
143 1.10 martin
144 1.21 uwe if (/$ack_line1/in
145 1.21 uwe or (/$ack_line2/n and $fn =~ m,$known_bad_clause_3_wording,)) {
146 1.1 hubertf
147 1.1 hubertf print "1> $_" if $debug;
148 1.1 hubertf
149 1.1 hubertf $_=<F>
150 1.1 hubertf unless $fn =~ m,$known_bad_clause_3_wording,;
151 1.21 uwe if (/$ack_line2/in or $fn =~ m,$known_bad_clause_3_wording,){
152 1.1 hubertf
153 1.1 hubertf print "2> $_" if $debug;
154 1.1 hubertf
155 1.1 hubertf $msg="";
156 1.20 uwe
157 1.20 uwe if ($fn =~ m,$known_bad_clause_3_wording, and /``/) {
158 1.20 uwe $msg = $_;
159 1.20 uwe }
160 1.20 uwe elsif (/:\s+This product/) {
161 1.20 uwe # src/sys/lib/libkern/rngtest.c - bad clause 3 wording
162 1.20 uwe # that is not like others, so special case it here
163 1.20 uwe $msg = $_;
164 1.20 uwe $msg =~ s/^.*:\s+(This product.*)$/$1/;
165 1.20 uwe }
166 1.20 uwe
167 1.1 hubertf $cnt=0;
168 1.1 hubertf $_=<F>;
169 1.21 uwe while(!/$ack_endline/in) {
170 1.1 hubertf
171 1.1 hubertf print "C> $_" if $debug;
172 1.1 hubertf
173 1.1 hubertf $msg .= $_;
174 1.1 hubertf $cnt++;
175 1.1 hubertf $_ = <F>;
176 1.1 hubertf if ($cnt > 10) {
177 1.1 hubertf warning($fn,"loooong copyright?");
178 1.1 hubertf last line;
179 1.1 hubertf }
180 1.1 hubertf }
181 1.1 hubertf
182 1.1 hubertf print "E> $_" if $debug;
183 1.1 hubertf
184 1.1 hubertf # post-process
185 1.6 martin
186 1.11 martin if ($fn =~ m,$known_bad_clause_3_wording,) {
187 1.12 martin while ($msg !~ /^.*``.*\n/) {
188 1.12 martin last if (!$msg);
189 1.11 martin $msg =~ s/^.*\n//o;
190 1.11 martin }
191 1.12 martin $msg =~ s/^.*``//o;
192 1.11 martin $msg =~ s/\n.*``//o;
193 1.11 martin $msg =~ s/''.*$//o;
194 1.11 martin }
195 1.11 martin
196 1.18 uwe # XXX: pcap &c - add to known_bad_clause_3_wording but
197 1.18 uwe # that code seems to have problems. Easier to add a
198 1.18 uwe # hack here, shouldn't affect good clause 3.
199 1.18 uwe $msg =~ s/''\s+Neither the name.*$//;
200 1.18 uwe
201 1.9 martin # *roff
202 1.9 martin while ($msg =~ /^\.\\"\s*/) {
203 1.9 martin $msg =~ s/^\.\\"\s*//o;
204 1.9 martin }
205 1.9 martin while ($msg =~ /\n\.\\"\s*/) {
206 1.9 martin $msg =~ s/\n\.\\"\s*/\n/o;
207 1.9 martin }
208 1.9 martin $msg =~ s/\n\.\\"\s*$/\n/g;
209 1.9 martin
210 1.6 martin # C++/C99
211 1.6 martin while ($msg =~ /^\s*\/\/\s*/) {
212 1.6 martin $msg =~ s/^\s*\/\/\s*//o;
213 1.6 martin }
214 1.9 martin while ($msg =~ /\n\s*\/\/\s*$/) {
215 1.9 martin $msg =~ s/\n\s*\/\/\s*$//o;
216 1.9 martin }
217 1.9 martin $msg =~ s/\n\s*\/\/\s*/\n/g;
218 1.9 martin
219 1.9 martin # C
220 1.9 martin while ($msg =~ /^\s*\*\s*/) {
221 1.9 martin $msg =~ s/^\s*\*\s*//o;
222 1.9 martin }
223 1.9 martin while ($msg =~ /\n\s*\*\s*$/) {
224 1.9 martin $msg =~ s/\n\s*\*\s*$//o;
225 1.6 martin }
226 1.9 martin $msg =~ s/\n\s*\*\s*/\n/g;
227 1.9 martin
228 1.9 martin # texinfo @c
229 1.9 martin while ($msg =~ /^\s*\@c\s+/) {
230 1.9 martin $msg =~ s/^\s*\@c\s+//o;
231 1.9 martin }
232 1.9 martin while ($msg =~ /\n\s*\@c\s+$/) {
233 1.9 martin $msg =~ s/\n\s*\@c\s+$//o;
234 1.9 martin }
235 1.9 martin $msg =~ s/\n\s*\@c\s+/\n/g;
236 1.6 martin
237 1.10 martin $msg =~ s/^REM\s*//g; # BASIC?!?
238 1.10 martin $msg =~ s/\nREM\s*/\n/g; # BASIC?!?
239 1.10 martin $msg =~ s/^dnl\s*//g; # m4
240 1.19 uwe $msg =~ s/\ndnl\s*/\n/g; # m4
241 1.10 martin $msg =~ s/^\s+-\s+//g; # seen in docbook files
242 1.10 martin $msg =~ s/\n\s+-\s+/ /g; #
243 1.14 martin $msg =~ s/^[#\\\|";]+\s*//g; # sh etc.
244 1.14 martin $msg =~ s/\n[#\\\|";]+\s*/\n/g; # sh etc.
245 1.10 martin $msg =~ s/^[ *]*//g; # C
246 1.10 martin $msg =~ s/\n[ *]*/\n/g; # C
247 1.10 martin
248 1.1 hubertf $msg =~ s/\@cartouche\n//; # texinfo
249 1.1 hubertf
250 1.1 hubertf $msg =~ s/
//g;
252 1.1 hubertf $msg =~ s/\s*\n/\n/g;
253 1.1 hubertf $msg =~ s/^\s*//;
254 1.1 hubertf $msg =~ s/\\\@/\@/g;
255 1.1 hubertf $msg =~ s/\n\n/\n/g;
256 1.1 hubertf $msg =~ s/^\s*``//;
257 1.10 martin $msg =~ s/''\s*$//;
258 1.10 martin $msg =~ s/^\"//o;
259 1.22 uwe $msg =~ s/\"$//o;
260 1.1 hubertf $msg =~ s/\"\.$/./o;
261 1.16 uwe
262 1.16 uwe # Fix ISO-646-SE spelling of Lule\[oa]
263 1.16 uwe $msg =~ s/Lule\}/Lulea/g;
264 1.15 uwe
265 1.15 uwe # Collapse multiple spaces between words. There are a
266 1.15 uwe # few entries with "by__Name" that affects sorting.
267 1.15 uwe $msg =~ s/(\w) +(\w)/$1 $2/g;
268 1.3 wiz
269 1.1 hubertf # Split up into separate paragraphs
270 1.1 hubertf #
271 1.1 hubertf $msgs=$msg;
272 1.1 hubertf $msgs=~s/(This (software|product))/|$1/g;
273 1.1 hubertf $msgs=~s,^\|,,;
274 1.1 hubertf msg:
275 1.9 martin foreach $msg (split(/\|/, $msgs)) {
276 1.9 martin while ($msg =~ /[\n\s]+$/) {
277 1.9 martin $msg =~ s/[\n\s]+$//o;
278 1.9 martin }
279 1.7 martin next if ($msg eq "");
280 1.6 martin if ($comments) {
281 1.6 martin print ".\\\" File $fn:\n";
282 1.6 martin print "$msg";
283 1.6 martin print "\n\n";
284 1.18 uwe }
285 1.18 uwe
286 1.18 uwe my $key = lc($msg); # ignore difference in case
287 1.18 uwe $key =~ s/\n/ /g; # ignore difference in line breaks
288 1.18 uwe $key =~ s/\.$//g; # drop the final dot
289 1.18 uwe
290 1.18 uwe # push organizations ("by the") to the end of the
291 1.18 uwe # sorting order
292 1.18 uwe $key =~ s/(developed by) the/$1 ~the/;
293 1.18 uwe
294 1.18 uwe if (defined $copyrights{$key}) {
295 1.18 uwe if ($copyrights{$key} !~ /\.$/ && $msg =~ /\.$/) {
296 1.18 uwe print "already there, without dot - overriding!\n"
297 1.1 hubertf if 1 || $debug;
298 1.18 uwe }
299 1.1 hubertf else {
300 1.1 hubertf next msg;
301 1.1 hubertf }
302 1.1 hubertf }
303 1.18 uwe
304 1.18 uwe $copyrights{$key} = $msg;
305 1.1 hubertf }
306 1.1 hubertf
307 1.1 hubertf } else {
308 1.1 hubertf print "?> $_" if $debug;
309 1.1 hubertf
310 1.1 hubertf if ($fn !~ m,$known_bad_clause_3_wording,) {
311 1.1 hubertf warning($fn, "bad clause 3?");
312 1.1 hubertf }
313 1.1 hubertf last line;
314 1.1 hubertf }
315 1.1 hubertf }
316 1.1 hubertf }
317 1.1 hubertf close(F);
318 1.1 hubertf }
319 1.1 hubertf
320 1.6 martin
321 1.6 martin if ($html) {
322 1.18 uwe print "<ul>\n";
323 1.18 uwe foreach $key (sort keys %copyrights) {
324 1.6 martin my $msg = $copyrights{$key};
325 1.6 martin print "<li>$msg</li>\n";
326 1.6 martin }
327 1.7 martin print "</ul>\n";
328 1.18 uwe } elsif ($xml) {
329 1.18 uwe foreach $key (sort keys %copyrights) {
330 1.7 martin my $msg = $copyrights{$key};
331 1.7 martin print "<listitem>$msg</listitem>\n";
332 1.6 martin }
333 1.6 martin } else {
334 1.6 martin print "------------------------------------------------------------\n";
335 1.6 martin
336 1.18 uwe $firsttime=1;
337 1.18 uwe foreach $key (sort keys %copyrights) {
338 1.6 martin my $msg = $copyrights{$key};
339 1.6 martin if ($firsttime) {
340 1.6 martin $firsttime=0;
341 1.6 martin } else {
342 1.6 martin print ".It\n";
343 1.6 martin }
344 1.1 hubertf print "$msg\n";
345 1.1 hubertf }
346 }
347