makewhatis.c revision 1.4.2.1 1 /* $NetBSD: makewhatis.c,v 1.4.2.1 1999/12/27 18:30:17 wrstuden Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthias Scheler.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\
42 All rights reserved.\n");
43 #endif /* not lint */
44
45 #ifndef lint
46 __RCSID("$NetBSD: makewhatis.c,v 1.4.2.1 1999/12/27 18:30:17 wrstuden Exp $");
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/stat.h>
51
52 #include <ctype.h>
53 #include <err.h>
54 #include <errno.h>
55 #include <fts.h>
56 #include <locale.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61 #include <zlib.h>
62
63 typedef struct manpagestruct manpage;
64 struct manpagestruct {
65 manpage *mp_left,*mp_right;
66 ino_t mp_inode;
67 char mp_name[1];
68 };
69
70 typedef struct whatisstruct whatis;
71 struct whatisstruct {
72 whatis *wi_left,*wi_right;
73 char *wi_data;
74 };
75
76 int main (int, char **);
77 int manpagesection (char *);
78 int addmanpage (manpage **, ino_t, char *);
79 int addwhatis (whatis **, char *);
80 char *replacestring (char *, char *, char *);
81 void catpreprocess (char *);
82 char *parsecatpage (gzFile *);
83 int manpreprocess (char *);
84 char *parsemanpage (gzFile *, int);
85 char *getwhatisdata (char *);
86 void processmanpages (manpage **,whatis **);
87 int dumpwhatis (FILE *, whatis *);
88
89 char *default_manpath[] = {
90 "/usr/share/man",
91 NULL
92 };
93
94 char sectionext[] = "0123456789ln";
95 char whatisdb[] = "whatis.db";
96
97 extern char *__progname;
98
99 int
100 main(int argc,char **argv)
101 {
102 char **manpath;
103 FTS *fts;
104 FTSENT *fe;
105 manpage *source;
106 whatis *dest;
107 FILE *out;
108
109 (void)setlocale(LC_ALL, "");
110
111 manpath = (argc < 2) ? default_manpath : &argv[1];
112
113 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) {
114 perror(__progname);
115 return EXIT_FAILURE;
116 }
117
118 source = NULL;
119 while ((fe = fts_read(fts)) != NULL) {
120 switch (fe->fts_info) {
121 case FTS_F:
122 if (manpagesection(fe->fts_path) >= 0)
123 if (!addmanpage(&source,
124 fe->fts_statp->st_ino,
125 fe->fts_path))
126 err(EXIT_FAILURE, NULL);
127 case FTS_D:
128 case FTS_DC:
129 case FTS_DEFAULT:
130 case FTS_DP:
131 case FTS_SLNONE:
132 break;
133 default:
134 errx(EXIT_FAILURE, "%s: %s", fe->fts_path,
135 strerror(fe->fts_errno));
136 /* NOTREACHED */
137 }
138 }
139
140 (void)fts_close(fts);
141
142 dest = NULL;
143 processmanpages(&source, &dest);
144
145 if (chdir(manpath[0]) < 0)
146 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno));
147
148 if ((out = fopen(whatisdb, "w")) == NULL)
149 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
150
151 if (!(dumpwhatis(out, dest) ||
152 (fclose(out) < 0)) ||
153 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0))
154 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
155
156 return EXIT_SUCCESS;
157 }
158
159 int
160 manpagesection(char *name)
161 {
162 char *ptr;
163
164 if ((ptr = strrchr(name, '/')) != NULL)
165 ptr++;
166 else
167 ptr = name;
168
169 while ((ptr = strchr(ptr, '.')) != NULL) {
170 int section;
171
172 ptr++;
173 section=0;
174 while (sectionext[section] != '\0')
175 if (sectionext[section] == *ptr)
176 return section;
177 else
178 section++;
179 }
180
181 return -1;
182 }
183
184 int
185 addmanpage(manpage **tree,ino_t inode,char *name)
186 {
187 manpage *mp;
188
189 while ((mp = *tree) != NULL) {
190 if (mp->mp_inode == inode)
191 return 1;
192 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right);
193 }
194
195 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL)
196 return 0;
197
198 mp->mp_left = NULL;
199 mp->mp_right = NULL;
200 mp->mp_inode = inode;
201 (void) strcpy(mp->mp_name, name);
202 *tree = mp;
203
204 return 1;
205 }
206
207 int
208 addwhatis(whatis **tree, char *data)
209 {
210 whatis *wi;
211 int result;
212
213 while ((wi = *tree) != NULL) {
214 result=strcmp(data, wi->wi_data);
215 if (result == 0) return 1;
216 tree = &((result < 0) ? wi->wi_left : wi->wi_right);
217 }
218
219 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL)
220 return 0;
221
222 wi->wi_left = NULL;
223 wi->wi_right = NULL;
224 wi->wi_data = data;
225 *tree = wi;
226
227 return 1;
228 }
229
230 void
231 catpreprocess(char *from)
232 {
233 char *to;
234
235 to = from;
236 while (isspace(*from)) from++;
237
238 while (*from != '\0')
239 if (isspace(*from)) {
240 while (isspace(*++from));
241 if (*from != '\0')
242 *to++ = ' ';
243 }
244 else if (*(from + 1) == '\10')
245 from += 2;
246 else
247 *to++ = *from++;
248
249 *to = '\0';
250 }
251
252 char *
253 replacestring(char *string, char *old, char *new)
254
255 {
256 char *ptr, *result;
257 int slength, olength, nlength, pos;
258
259 if (new == NULL)
260 return strdup(string);
261
262 ptr = strstr(string, old);
263 if (ptr == NULL)
264 return strdup(string);
265
266 slength = strlen(string);
267 olength = strlen(old);
268 nlength = strlen(new);
269 if ((result = malloc(slength - olength + nlength + 1)) == NULL)
270 return NULL;
271
272 pos = ptr - string;
273 (void) memcpy(result, string, pos);
274 (void) memcpy(&result[pos], new, nlength);
275 (void) strcpy(&result[pos + nlength], &string[pos + olength]);
276
277 return result;
278 }
279
280 char *
281 parsecatpage(gzFile *in)
282 {
283 char buffer[8192];
284 char *section, *ptr, *last;
285 int size;
286
287 do {
288 if (gzgets(in, buffer, sizeof(buffer)) == NULL)
289 return NULL;
290 }
291 while (buffer[0] == '\n');
292
293 section = NULL;
294 if ((ptr = strchr(buffer, '(')) != NULL) {
295 if ((last = strchr(ptr + 1, ')')) !=NULL) {
296 int length;
297
298 length = last - ptr + 1;
299 if ((section = malloc(length + 5)) == NULL)
300 return NULL;
301
302 *section = ' ';
303 (void) memcpy(section + 1, ptr, length);
304 (void) strcpy(section + 1 + length, " - ");
305 }
306 }
307
308 for (;;) {
309 if (gzgets(in, buffer, sizeof(buffer)) == NULL) {
310 free(section);
311 return NULL;
312 }
313 if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0)
314 break;
315 }
316
317 ptr = last = buffer;
318 size = sizeof(buffer) - 1;
319 while ((size > 0) && (gzgets(in, ptr, size) != NULL)) {
320 int length;
321
322 catpreprocess(ptr);
323
324 length = strlen(ptr);
325 if (length == 0) {
326 *last = '\0';
327
328 ptr = replacestring(buffer, " - ", section);
329 free(section);
330 return ptr;
331 }
332 if ((length > 1) && (ptr[length - 1] == '-') &&
333 isalpha(ptr[length - 2]))
334 last = &ptr[--length];
335 else {
336 last = &ptr[length++];
337 *last = ' ';
338 }
339
340 ptr += length;
341 size -= length;
342 }
343
344 free(section);
345
346 return NULL;
347 }
348
349 int
350 manpreprocess(char *line)
351 {
352 char *from, *to;
353
354 to = from = line;
355 while (isspace(*from)) from++;
356 if (strncmp(from, ".\\\"", 3) == 0)
357 return 1;
358
359 while (*from != '\0')
360 if (isspace(*from)) {
361 while (isspace(*++from));
362 if ((*from != '\0') && (*from != ','))
363 *to++ = ' ';
364 }
365 else if (*from == '\\')
366 switch (*++from) {
367 case '\0':
368 case '-':
369 break;
370 default:
371 from++;
372 }
373 else
374 if (*from == '"')
375 from++;
376 else
377 *to++ = *from++;
378
379 *to = '\0';
380
381 if (strncasecmp(line, ".Xr", 3) == 0) {
382 char *sect;
383
384 from = line + 3;
385 if (isspace(*from))
386 from++;
387
388 if ((sect = strchr(from, ' ')) != NULL) {
389 int length;
390
391 *sect++ = '\0';
392 length = strlen(from);
393 (void) memmove(line, from, length);
394 line[length++] = '(';
395 to = &line[length];
396 length = strlen(sect);
397 (void) memmove(to, sect, length);
398 (void) strcpy(&to[length], ")");
399 }
400 }
401
402 return 0;
403 }
404
405 char *
406 parsemanpage(gzFile *in, int defaultsection)
407 {
408 char *section, buffer[8192], *ptr;
409
410 section = NULL;
411 do {
412 if (gzgets(in, buffer, sizeof(buffer) - 1) == NULL) {
413 free(section);
414 return NULL;
415 }
416 if (manpreprocess(buffer))
417 continue;
418 if (strncasecmp(buffer, ".Dt", 3) == 0) {
419 char *end;
420
421 ptr = &buffer[3];
422 if (isspace(*ptr))
423 ptr++;
424 if ((ptr = strchr(ptr, ' ')) == NULL)
425 continue;
426
427 if ((end = strchr(++ptr, ' ')) != NULL)
428 *end = '\0';
429
430 free(section);
431 if ((section = malloc(strlen(ptr) + 7)) != NULL) {
432 section[0] = ' ';
433 section[1] = '(';
434 (void) strcpy(§ion[2], ptr);
435 (void) strcat(§ion[2], ") - ");
436 }
437 }
438 } while ((strncasecmp(buffer, ".Sh NAME", 8) != 0));
439
440 do {
441 if (gzgets(in, buffer, sizeof(buffer) - 1) == NULL) {
442 free(section);
443 return NULL;
444 }
445 } while (manpreprocess(buffer));
446
447 if (strncasecmp(buffer, ".Nm", 3) == 0) {
448 int length, offset;
449
450 ptr = &buffer[3];
451 if (isspace(*ptr))
452 ptr++;
453
454 length = strlen(ptr);
455 if ((length > 1) && (ptr[length - 1] == ',') &&
456 isspace(ptr[length - 2])) {
457 ptr[--length] = '\0';
458 ptr[length - 1] = ',';
459 }
460 (void) memmove(buffer, ptr, length + 1);
461
462 offset = length + 3;
463 ptr = &buffer[offset];
464 for (;;) {
465 int more;
466
467 if ((sizeof(buffer) == offset) ||
468 (gzgets(in, ptr, sizeof(buffer) - offset)
469 == NULL)) {
470 free(section);
471 return NULL;
472 }
473 if (manpreprocess(ptr))
474 continue;
475
476 if (strncasecmp(ptr, ".Nm", 3) != 0) break;
477
478 ptr += 3;
479 if (isspace(*ptr))
480 ptr++;
481
482 buffer[length++] = ' ';
483 more = strlen(ptr);
484 if ((more > 1) && (ptr[more - 1] == ',') &&
485 isspace(ptr[more - 2])) {
486 ptr[--more] = '\0';
487 ptr[more - 1] = ',';
488 }
489
490 (void) memmove(&buffer[length], ptr, more + 1);
491 length += more;
492 offset = length + 3;
493
494 ptr = &buffer[offset];
495 }
496
497 if (strncasecmp(ptr, ".Nd", 3) == 0) {
498 (void) strcpy(&buffer[length], " -");
499
500 while (strncasecmp(ptr, ".Sh", 3) != 0) {
501 int more;
502
503 if (*ptr == '.') {
504 char *space;
505
506 if ((space = strchr(ptr, ' ')) == NULL)
507 ptr = "";
508 else {
509 space++;
510 (void) memmove(ptr, space,
511 strlen(space) + 1);
512 }
513 }
514
515 if (*ptr != '\0') {
516 buffer[offset - 1] = ' ';
517 more = strlen(ptr) + 1;
518 offset += more;
519 }
520 ptr = &buffer[offset];
521 if ((sizeof(buffer) == offset) ||
522 (gzgets(in, ptr, sizeof(buffer) - offset)
523 == NULL)) {
524 free(section);
525 return NULL;
526 }
527 if (manpreprocess(ptr))
528 *ptr = '\0';
529 }
530 }
531 }
532 else {
533 int offset;
534
535 if (*buffer == '.') {
536 char *space;
537
538 if ((space = strchr(buffer, ' ')) == NULL) {
539 free(section);
540 return NULL;
541 }
542 space++;
543 (void) memmove(buffer, space, strlen(space));
544 }
545
546 offset = strlen(buffer) + 1;
547 for (;;) {
548 int more;
549
550 ptr = &buffer[offset];
551 if ((sizeof(buffer) == offset) ||
552 (gzgets(in, ptr, sizeof(buffer) - offset)
553 == NULL)) {
554 free(section);
555 return NULL;
556 }
557 if (manpreprocess(ptr) || (*ptr == '\0'))
558 continue;
559
560 if (strncasecmp(ptr, ".Sh", 3) == 0)
561 break;
562
563 if (*ptr == '.') {
564 char *space;
565
566 if ((space = strchr(ptr, ' ')) == NULL)
567 continue;
568 space++;
569 (void) memmove(ptr, space, strlen(space));
570 }
571
572 buffer[offset - 1] = ' ';
573 more = strlen(ptr);
574 if ((more > 1) && (ptr[more - 1] == ',') &&
575 isspace(ptr[more - 2])) {
576 ptr[more - 1] = '\0';
577 ptr[more - 2] = ',';
578 }
579 else more++;
580 offset += more;
581 }
582 }
583
584 if (section == NULL) {
585 char sectionbuffer[24];
586
587 (void) sprintf(sectionbuffer, " (%c) - ",
588 sectionext[defaultsection]);
589 ptr = replacestring(buffer, " - ", sectionbuffer);
590 }
591 else {
592 ptr = replacestring(buffer, " - ", section);
593 free(section);
594 }
595 return ptr;
596 }
597
598 char *
599 getwhatisdata(char *name)
600 {
601 gzFile *in;
602 char *data;
603 int section;
604
605 if ((in = gzopen(name, "r")) == NULL) {
606 errx(EXIT_FAILURE, "%s: %s",
607 name,
608 strerror((errno == 0) ? ENOMEM : errno));
609 /* NOTREACHED */
610 }
611
612 section = manpagesection(name);
613 data = (section == 0) ? parsecatpage(in) : parsemanpage(in, section);
614
615 (void) gzclose(in);
616 return data;
617 }
618
619 void
620 processmanpages(manpage **source, whatis **dest)
621 {
622 manpage *mp;
623
624 mp = *source;
625 *source = NULL;
626
627 while (mp != NULL) {
628 manpage *obsolete;
629 char *data;
630
631 if (mp->mp_left != NULL)
632 processmanpages(&mp->mp_left,dest);
633
634 if ((data = getwhatisdata(mp->mp_name)) != NULL) {
635 if (!addwhatis(dest,data))
636 err(EXIT_FAILURE, NULL);
637 }
638
639 obsolete = mp;
640 mp = mp->mp_right;
641 free(obsolete);
642 }
643 }
644
645 int
646 dumpwhatis (FILE *out, whatis *tree)
647 {
648 while (tree != NULL) {
649 if (tree->wi_left)
650 if (!dumpwhatis(out, tree->wi_left)) return 0;
651
652 if ((fputs(tree->wi_data, out) == EOF) ||
653 (fputc('\n', out) == EOF))
654 return 0;
655
656 tree = tree->wi_right;
657 }
658
659 return 1;
660 }
661