extract-contrib-string.pl revision 1.22 1 #!/usr/bin/env perl
2 #
3 # Copyright (c) 2004, 2008 The NetBSD Foundation, Inc.
4 # All rights reserved.
5 #
6 # This code is derived from software contributed to The NetBSD Foundation
7 # by Hubert Feyrer <hubert (at] feyrer.de>.
8 #
9 # Redistribution and use in source and binary forms, with or without
10 # modification, are permitted provided that the following conditions
11 # are met:
12 # 1. Redistributions of source code must retain the above copyright
13 # notice, this list of conditions and the following disclaimer.
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 # POSSIBILITY OF SUCH DAMAGE.
29
30 #
31 # Extract BSD-mandated copyright messages for NetBSD documentation
32 #
33 # Usage:
34 # 1) find /usr/src -type f -print \
35 # | perl extract-contrib-string.pl
36 # >x
37 #
38 # 2) merge text after "--------" in "x" into
39 # src/distrib/notes/common/legal.common
40 #
41 # Options:
42 #
43 # perl extract-contrib-string.pl [-d] [-h] [-x] [-?]
44 #
45 # where
46 # -d debug output
47 # -h html output
48 # -x xml/docbook output
49 # -? display help/usage message
50
51
52 $ack_line1='([aA]ll( commercial)?( marketing or)? advertising materials mentioning( features)?'
53 . '|\d\. Redistributions of any form whatsoever)';
54 $ack_line2='(display the( following)?( acknowledge?ment)?|acknowledge?ment:$)';
55 $ack_endline=
56 '(\d\.\s*(Neither the name'
57 . '|The name of the company nor the name' # Wasn't my idea
58 . '|The name of the author may not'
59 . '|The name of .* must not be used to endorse'
60 . '|The names? (of )?.* nor the names? of'
61 . '|The names? (of )?.* or any of it\'?s members'
62 . '|Redistributions of any form whatsoever'
63 . '|The names .*"OpenSSL Toolkit.*" and .*"OpenSSL Project.*" must not be used'
64 . "|Urbana-Champaign Independent Media Center's name"
65 . '))'
66 .'|(^Neither the name)'
67 .'|(THIS SOFTWARE IS PROVIDED)'
68 .'|(ALL WARRANTIES WITH REGARD)'
69 .'|(The word \'cryptographic\' can be left out if)'
70 .'|(may be used to endorse)'
71 .'|(@end cartouche)'
72 .'|(</para>)'
73 .'|(Redistribution and use in source and binary forms)'
74 .'|(may not be used to endorse)'
75 .'|(\.IP 4)'
76 .'|(ALLOW FREE USE OF)'
77 .'|(materials provided with the distribution)'
78 .'|(@InsertRedistribution@)';
79
80 $known_bad_clause_3_wording=
81 'usr.bin/lex/.*' # UCB
82 .'|dist/bind/contrib/nslint-2.1a3/lbl/.*' #
83 .'|usr.sbin/traceroute/ifaddrlist.h' #
84 .'|usr.sbin/traceroute/traceroute.c' #
85 .'|usr.sbin/hilinfo/hilinfo.c' # CSS @ Utah
86 ;
87
88 sub warning {
89 local($fn,$msg) = @_;
90 print "XXX $fn line $.: $msg\n"
91 }
92
93 while ($#ARGV >= 0) {
94 $debug=1 if ($ARGV[0] =~ /-d/i);
95 $html=1 if ($ARGV[0] =~ /-h/i);
96 $xml=1 if ($ARGV[0] =~ /-x/i);
97 $usage=1 if ($ARGV[0] =~ /-\?/);
98 shift(@ARGV);
99 }
100
101 if ($usage) {
102 print "usage: find /usr/src -type f -print |\n" .
103 " perl extract-contrib-string.pl [-h] [-x] [-?] [-d]\n" .
104 " where\n" .
105 " -h output html\n" .
106 " -x output xml/docbook\n" .
107 " -d debug\n" .
108 " -? display this help message\n";
109 exit(0);
110 }
111
112 $comments = !$html && !$xml;
113
114 file:
115 while(<>) {
116 chomp();
117 $fn=$_;
118
119 open(F, "$fn") || die "cannot read $fn: $!\n";
120
121 line:
122 while(<F>) {
123 if (0 and /$ack_line2/in){
124 print "?> $_" if $debug;
125
126 if ($fn !~ m,$known_bad_clause_3_wording,) {
127 warning($fn, "clause 3 start not caught");
128 }
129 last line;
130 }
131
132 print "0> $_" if $debug;
133
134 # special case perl script generating a license (openssl's
135 # mkerr.pl) - ignore the quoted license, there is another one
136 # inside:
137 if (/^\"\s\*.*$ack_line1.*\\n\"\,/n) {
138 while(!/$ack_endline/in) {
139 print "S> $_" if $debug;
140 $_ = <F>;
141 }
142 }
143
144 if (/$ack_line1/in
145 or (/$ack_line2/n and $fn =~ m,$known_bad_clause_3_wording,)) {
146
147 print "1> $_" if $debug;
148
149 $_=<F>
150 unless $fn =~ m,$known_bad_clause_3_wording,;
151 if (/$ack_line2/in or $fn =~ m,$known_bad_clause_3_wording,){
152
153 print "2> $_" if $debug;
154
155 $msg="";
156
157 if ($fn =~ m,$known_bad_clause_3_wording, and /``/) {
158 $msg = $_;
159 }
160 elsif (/:\s+This product/) {
161 # src/sys/lib/libkern/rngtest.c - bad clause 3 wording
162 # that is not like others, so special case it here
163 $msg = $_;
164 $msg =~ s/^.*:\s+(This product.*)$/$1/;
165 }
166
167 $cnt=0;
168 $_=<F>;
169 while(!/$ack_endline/in) {
170
171 print "C> $_" if $debug;
172
173 $msg .= $_;
174 $cnt++;
175 $_ = <F>;
176 if ($cnt > 10) {
177 warning($fn,"loooong copyright?");
178 last line;
179 }
180 }
181
182 print "E> $_" if $debug;
183
184 # post-process
185
186 if ($fn =~ m,$known_bad_clause_3_wording,) {
187 while ($msg !~ /^.*``.*\n/) {
188 last if (!$msg);
189 $msg =~ s/^.*\n//o;
190 }
191 $msg =~ s/^.*``//o;
192 $msg =~ s/\n.*``//o;
193 $msg =~ s/''.*$//o;
194 }
195
196 # XXX: pcap &c - add to known_bad_clause_3_wording but
197 # that code seems to have problems. Easier to add a
198 # hack here, shouldn't affect good clause 3.
199 $msg =~ s/''\s+Neither the name.*$//;
200
201 # *roff
202 while ($msg =~ /^\.\\"\s*/) {
203 $msg =~ s/^\.\\"\s*//o;
204 }
205 while ($msg =~ /\n\.\\"\s*/) {
206 $msg =~ s/\n\.\\"\s*/\n/o;
207 }
208 $msg =~ s/\n\.\\"\s*$/\n/g;
209
210 # C++/C99
211 while ($msg =~ /^\s*\/\/\s*/) {
212 $msg =~ s/^\s*\/\/\s*//o;
213 }
214 while ($msg =~ /\n\s*\/\/\s*$/) {
215 $msg =~ s/\n\s*\/\/\s*$//o;
216 }
217 $msg =~ s/\n\s*\/\/\s*/\n/g;
218
219 # C
220 while ($msg =~ /^\s*\*\s*/) {
221 $msg =~ s/^\s*\*\s*//o;
222 }
223 while ($msg =~ /\n\s*\*\s*$/) {
224 $msg =~ s/\n\s*\*\s*$//o;
225 }
226 $msg =~ s/\n\s*\*\s*/\n/g;
227
228 # texinfo @c
229 while ($msg =~ /^\s*\@c\s+/) {
230 $msg =~ s/^\s*\@c\s+//o;
231 }
232 while ($msg =~ /\n\s*\@c\s+$/) {
233 $msg =~ s/\n\s*\@c\s+$//o;
234 }
235 $msg =~ s/\n\s*\@c\s+/\n/g;
236
237 $msg =~ s/^REM\s*//g; # BASIC?!?
238 $msg =~ s/\nREM\s*/\n/g; # BASIC?!?
239 $msg =~ s/^dnl\s*//g; # m4
240 $msg =~ s/\ndnl\s*/\n/g; # m4
241 $msg =~ s/^\s+-\s+//g; # seen in docbook files
242 $msg =~ s/\n\s+-\s+/ /g; #
243 $msg =~ s/^[#\\\|";]+\s*//g; # sh etc.
244 $msg =~ s/\n[#\\\|";]+\s*/\n/g; # sh etc.
245 $msg =~ s/^[ *]*//g; # C
246 $msg =~ s/\n[ *]*/\n/g; # C
247
248 $msg =~ s/\@cartouche\n//; # texinfo
249
250 $msg =~ s/
//g;
252 $msg =~ s/\s*\n/\n/g;
253 $msg =~ s/^\s*//;
254 $msg =~ s/\\\@/\@/g;
255 $msg =~ s/\n\n/\n/g;
256 $msg =~ s/^\s*``//;
257 $msg =~ s/''\s*$//;
258 $msg =~ s/^\"//o;
259 $msg =~ s/\"$//o;
260 $msg =~ s/\"\.$/./o;
261
262 # Fix ISO-646-SE spelling of Lule\[oa]
263 $msg =~ s/Lule\}/Lulea/g;
264
265 # Collapse multiple spaces between words. There are a
266 # few entries with "by__Name" that affects sorting.
267 $msg =~ s/(\w) +(\w)/$1 $2/g;
268
269 # Split up into separate paragraphs
270 #
271 $msgs=$msg;
272 $msgs=~s/(This (software|product))/|$1/g;
273 $msgs=~s,^\|,,;
274 msg:
275 foreach $msg (split(/\|/, $msgs)) {
276 while ($msg =~ /[\n\s]+$/) {
277 $msg =~ s/[\n\s]+$//o;
278 }
279 next if ($msg eq "");
280 if ($comments) {
281 print ".\\\" File $fn:\n";
282 print "$msg";
283 print "\n\n";
284 }
285
286 my $key = lc($msg); # ignore difference in case
287 $key =~ s/\n/ /g; # ignore difference in line breaks
288 $key =~ s/\.$//g; # drop the final dot
289
290 # push organizations ("by the") to the end of the
291 # sorting order
292 $key =~ s/(developed by) the/$1 ~the/;
293
294 if (defined $copyrights{$key}) {
295 if ($copyrights{$key} !~ /\.$/ && $msg =~ /\.$/) {
296 print "already there, without dot - overriding!\n"
297 if 1 || $debug;
298 }
299 else {
300 next msg;
301 }
302 }
303
304 $copyrights{$key} = $msg;
305 }
306
307 } else {
308 print "?> $_" if $debug;
309
310 if ($fn !~ m,$known_bad_clause_3_wording,) {
311 warning($fn, "bad clause 3?");
312 }
313 last line;
314 }
315 }
316 }
317 close(F);
318 }
319
320
321 if ($html) {
322 print "<ul>\n";
323 foreach $key (sort keys %copyrights) {
324 my $msg = $copyrights{$key};
325 print "<li>$msg</li>\n";
326 }
327 print "</ul>\n";
328 } elsif ($xml) {
329 foreach $key (sort keys %copyrights) {
330 my $msg = $copyrights{$key};
331 print "<listitem>$msg</listitem>\n";
332 }
333 } else {
334 print "------------------------------------------------------------\n";
335
336 $firsttime=1;
337 foreach $key (sort keys %copyrights) {
338 my $msg = $copyrights{$key};
339 if ($firsttime) {
340 $firsttime=0;
341 } else {
342 print ".It\n";
343 }
344 print "$msg\n";
345 }
346 }
347