extract-contrib-string.pl revision 1.17 1 #!/usr/bin/env perl
2 #
3 # Copyright (c) 2004, 2008 The NetBSD Foundation, Inc.
4 # All rights reserved.
5 #
6 # This code is derived from software contributed to The NetBSD Foundation
7 # by Hubert Feyrer <hubert (at] feyrer.de>.
8 #
9 # Redistribution and use in source and binary forms, with or without
10 # modification, are permitted provided that the following conditions
11 # are met:
12 # 1. Redistributions of source code must retain the above copyright
13 # notice, this list of conditions and the following disclaimer.
14 # 2. Redistributions in binary form must reproduce the above copyright
15 # notice, this list of conditions and the following disclaimer in the
16 # documentation and/or other materials provided with the distribution.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 # POSSIBILITY OF SUCH DAMAGE.
29
30 #
31 # Extract BSD-mandated copyright messages for NetBSD documentation
32 #
33 # Usage:
34 # 1) find /usr/src -type f -print \
35 # | perl extract-contrib-string.pl
36 # >x
37 #
38 # 2) merge text after "--------" in "x" into
39 # src/distrib/notes/common/legal.common
40 #
41 # Options:
42 #
43 # perl extract-contrib-string.pl [-d] [-h] [-x] [-?]
44 #
45 # where
46 # -d debug output
47 # -h html output
48 # -x xml/docbook output
49 # -? display help/usage message
50
51
52 $ack_line1="[aA]ll( commercial)?( marketing or)? advertising materials mentioning( features)?";
53 $ack_line2="display the following( acknowledge?ment)?";
54 $ack_endline=
55 '(\d\.\s*(Neither the name'
56 . '|The name of the company nor the name' # Wasn't my idea
57 . '|The name of the author may not'
58 . '|The name of .* must not be used to endorse'
59 . '|The names? (of )?.* nor the names? of'
60 . '|The names? (of )?.* or any of it\'?s members'
61 . '|Redistributions of any form whatsoever'
62 . '|The names .*"OpenSSL Toolkit.*" and .*"OpenSSL Project.*" must not be used))'
63 .'|(THIS SOFTWARE IS PROVIDED)'
64 .'|(The word \'cryptographic\' can be left out if)'
65 .'|(may be used to endorse)'
66 .'|(@end cartouche)'
67 .'|(Redistribution and use in source and binary forms)'
68 .'|(may not be used to endorse)'
69 .'|(\.IP 4)'
70 .'|(ALLOW FREE USE OF)'
71 .'|(materials provided with the distribution)'
72 .'|(@InsertRedistribution@)';
73
74 $known_bad_clause_3_wording=
75 'usr.bin/lex/.*' # UCB
76 .'|dist/bind/contrib/nslint-2.1a3/lbl/.*' #
77 .'|usr.sbin/traceroute/ifaddrlist.h' #
78 .'|usr.sbin/traceroute/traceroute.c' #
79 .'|usr.sbin/hilinfo/hilinfo.c' # CSS @ Utah
80 ;
81
82 sub warning {
83 local($fn,$msg) = @_;
84 print "XXX $fn line $.: $msg\n"
85 }
86
87 while ($#ARGV >= 0) {
88 $debug=1 if ($ARGV[0] =~ /-d/i);
89 $html=1 if ($ARGV[0] =~ /-h/i);
90 $xml=1 if ($ARGV[0] =~ /-x/i);
91 $usage=1 if ($ARGV[0] =~ /-\?/);
92 shift(@ARGV);
93 }
94
95 if ($usage) {
96 print "usage: find /usr/src -type f -print |\n" .
97 " perl extract-contrib-string.pl [-h] [-x] [-?] [-d]\n" .
98 " where\n" .
99 " -h output html\n" .
100 " -x output xml/docbook\n" .
101 " -d debug\n" .
102 " -? display this help message\n";
103 exit(0);
104 }
105
106 $comments = !$html && !$xml;
107
108 file:
109 while(<>) {
110 chomp();
111 $fn=$_;
112
113 open(F, "$fn") || die "cannot read $fn: $!\n";
114
115 line:
116 while(<F>) {
117 if (0 and /$ack_line2/i){
118 print "?> $_" if $debug;
119
120 if ($fn !~ m,$known_bad_clause_3_wording,) {
121 warning($fn, "clause 3 start not caught");
122 }
123 last line;
124 }
125
126 print "0> $_" if $debug;
127
128 # special case perl script generating a license (openssl's
129 # mkerr.pl) - ignore the quoted license, there is another one
130 # inside:
131 if (/^\"\s\*.*$ack_line1.*\\n\"\,/) {
132 while(!/$ack_endline/i) {
133 print "S> $_" if $debug;
134 $_ = <F>;
135 }
136 }
137
138 if (/$ack_line1/i
139 or (/$ack_line2/ and $fn =~ m,$known_bad_clause_3_wording,)) {
140
141 print "1> $_" if $debug;
142
143 $_=<F>
144 unless $fn =~ m,$known_bad_clause_3_wording,;
145 if (/$ack_line2/i or $fn =~ m,$known_bad_clause_3_wording,){
146
147 print "2> $_" if $debug;
148
149 $msg="";
150 $msg = $_ if ($fn =~ m,$known_bad_clause_3_wording, and /``/);
151 $cnt=0;
152 $_=<F>;
153 while(!/$ack_endline/i) {
154
155 print "C> $_" if $debug;
156
157 $msg .= $_;
158 $cnt++;
159 $_ = <F>;
160 if ($cnt > 10) {
161 warning($fn,"loooong copyright?");
162 last line;
163 }
164 }
165
166 print "E> $_" if $debug;
167
168 # post-process
169
170 if ($fn =~ m,$known_bad_clause_3_wording,) {
171 while ($msg !~ /^.*``.*\n/) {
172 last if (!$msg);
173 $msg =~ s/^.*\n//o;
174 }
175 $msg =~ s/^.*``//o;
176 $msg =~ s/\n.*``//o;
177 $msg =~ s/''.*$//o;
178 }
179
180 # *roff
181 while ($msg =~ /^\.\\"\s*/) {
182 $msg =~ s/^\.\\"\s*//o;
183 }
184 while ($msg =~ /\n\.\\"\s*/) {
185 $msg =~ s/\n\.\\"\s*/\n/o;
186 }
187 $msg =~ s/\n\.\\"\s*$/\n/g;
188
189 # C++/C99
190 while ($msg =~ /^\s*\/\/\s*/) {
191 $msg =~ s/^\s*\/\/\s*//o;
192 }
193 while ($msg =~ /\n\s*\/\/\s*$/) {
194 $msg =~ s/\n\s*\/\/\s*$//o;
195 }
196 $msg =~ s/\n\s*\/\/\s*/\n/g;
197
198 # C
199 while ($msg =~ /^\s*\*\s*/) {
200 $msg =~ s/^\s*\*\s*//o;
201 }
202 while ($msg =~ /\n\s*\*\s*$/) {
203 $msg =~ s/\n\s*\*\s*$//o;
204 }
205 $msg =~ s/\n\s*\*\s*/\n/g;
206
207 # texinfo @c
208 while ($msg =~ /^\s*\@c\s+/) {
209 $msg =~ s/^\s*\@c\s+//o;
210 }
211 while ($msg =~ /\n\s*\@c\s+$/) {
212 $msg =~ s/\n\s*\@c\s+$//o;
213 }
214 $msg =~ s/\n\s*\@c\s+/\n/g;
215
216 $msg =~ s/^REM\s*//g; # BASIC?!?
217 $msg =~ s/\nREM\s*/\n/g; # BASIC?!?
218 $msg =~ s/^dnl\s*//g; # m4
219 $msg =~ s/\dnl\s*/\n/g; # m4
220 $msg =~ s/^\s+-\s+//g; # seen in docbook files
221 $msg =~ s/\n\s+-\s+/ /g; #
222 $msg =~ s/^[#\\\|";]+\s*//g; # sh etc.
223 $msg =~ s/\n[#\\\|";]+\s*/\n/g; # sh etc.
224 $msg =~ s/^[ *]*//g; # C
225 $msg =~ s/\n[ *]*/\n/g; # C
226
227 $msg =~ s/\@cartouche\n//; # texinfo
228
229 $msg =~ s/
//g;
231 $msg =~ s/\s*\n/\n/g;
232 $msg =~ s/^\s*//;
233 $msg =~ s/\\\@/\@/g;
234 $msg =~ s/\n\n/\n/g;
235 $msg =~ s/^\s*``//;
236 $msg =~ s/''\s*$//;
237 $msg =~ s/^\"//o;
238 $msg =~ s/\"$//o;
239
240 # Fix ISO-646-SE spelling of Lule\[oa]
241 $msg =~ s/Lule\}/Lulea/g;
242
243 # Collapse multiple spaces between words. There are a
244 # few entries with "by__Name" that affects sorting.
245 $msg =~ s/(\w) +(\w)/$1 $2/g;
246
247 # Split up into separate paragraphs
248 #
249 $msgs=$msg;
250 $msgs=~s/(This (software|product))/|$1/g;
251 $msgs=~s,^\|,,;
252 msg:
253 foreach $msg (split(/\|/, $msgs)) {
254 while ($msg =~ /[\n\s]+$/) {
255 $msg =~ s/[\n\s]+$//o;
256 }
257 next if ($msg eq "");
258 if ($comments) {
259 print ".\\\" File $fn:\n";
260 print "$msg";
261 print "\n\n";
262 }
263
264 # Figure out if there's a version w/ or w/o trailing dot
265 #
266 if ($msg =~ /\.$/) {
267 # check if there's a version of the same msg
268 # w/o a trailing dot
269 $msg2=$msg;
270 $msg2=~s,\.$,,;
271 if ($copyrights{"$msg2"}) {
272 # already there - skip
273 print "already there, w/o dot - skipping!\n"
274 if $debug;
275 next msg;
276 }
277
278 # ... maybe with other case?
279 $lc_msg2=lc($msg2);
280 if ($lc_copyrights{$lc_msg2}) {
281 print "already there, in different case - skipping\n"
282 if $debug;
283 next msg;
284 }
285 } else {
286 # check if there's a version of the same msg
287 # with a trailing dot
288 $msg2=$msg;
289 $msg2.=".";
290 if ($copyrights{"$msg2"}) {
291 # already there - skip
292 print "already there, w/ dot - skipping!\n"
293 if $debug;
294 next msg;
295 }
296
297 # ... maybe with other case?
298 $lc_msg2=lc($msg2);
299 if ($lc_copyrights{$lc_msg2}) {
300 print "already there, in different case - skipping\n"
301 if $debug;
302 next msg;
303 }
304 }
305
306 $copyrights{$msg} = 1;
307 $lc_copyrights{lc($msg)} = 1;
308 }
309
310 } else {
311 print "?> $_" if $debug;
312
313 if ($fn !~ m,$known_bad_clause_3_wording,) {
314 warning($fn, "bad clause 3?");
315 }
316 last line;
317 }
318 }
319 }
320 close(F);
321 }
322
323
324 if ($html) {
325 print "<ul>\n";
326 foreach $msg (sort keys %copyrights) {
327 print "<li>$msg</li>\n";
328 }
329 print "</ul>\n";
330 } elsif ($xml) {
331 foreach $msg (sort keys %copyrights) {
332 print "<listitem>$msg</listitem>\n";
333 }
334 } else {
335 print "------------------------------------------------------------\n";
336
337 $firsttime=1;
338 foreach $msg (sort keys %copyrights) {
339 if ($firsttime) {
340 $firsttime=0;
341 } else {
342 print ".It\n";
343 }
344 print "$msg\n";
345 }
346 }
347