makewhatis.c revision 1.8 1 /* $NetBSD: makewhatis.c,v 1.8 2000/07/09 23:07:14 tron Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthias Scheler.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\
42 All rights reserved.\n");
43 #endif /* not lint */
44
45 #ifndef lint
46 __RCSID("$NetBSD: makewhatis.c,v 1.8 2000/07/09 23:07:14 tron Exp $");
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/param.h>
51 #include <sys/stat.h>
52 #include <sys/wait.h>
53
54 #include <ctype.h>
55 #include <err.h>
56 #include <errno.h>
57 #include <fts.h>
58 #include <locale.h>
59 #include <paths.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <string.h>
63 #include <unistd.h>
64 #include <zlib.h>
65
66 typedef struct manpagestruct manpage;
67 struct manpagestruct {
68 manpage *mp_left,*mp_right;
69 ino_t mp_inode;
70 char mp_name[1];
71 };
72
73 typedef struct whatisstruct whatis;
74 struct whatisstruct {
75 whatis *wi_left,*wi_right;
76 char *wi_data;
77 };
78
79 int main (int, char **);
80 void sigchildhandler(int);
81 char *findwhitespace(char *);
82 char *GetS(gzFile, char *, int);
83 int manpagesection (char *);
84 int addmanpage (manpage **, ino_t, char *);
85 int addwhatis (whatis **, char *);
86 char *replacestring (char *, char *, char *);
87 void catpreprocess (char *);
88 char *parsecatpage (gzFile *);
89 int manpreprocess (char *);
90 char *nroff (gzFile *);
91 char *parsemanpage (gzFile *, int);
92 char *getwhatisdata (char *);
93 void processmanpages (manpage **,whatis **);
94 int dumpwhatis (FILE *, whatis *);
95
96 char *default_manpath[] = {
97 "/usr/share/man",
98 NULL
99 };
100
101 char sectionext[] = "0123456789ln";
102 char whatisdb[] = "whatis.db";
103
104 extern char *__progname;
105
106 int
107 main(int argc,char **argv)
108 {
109 char **manpath;
110 FTS *fts;
111 FTSENT *fe;
112 manpage *source;
113 whatis *dest;
114 FILE *out;
115
116 (void)setlocale(LC_ALL, "");
117 (void)signal(SIGCHLD, sigchildhandler);
118
119 manpath = (argc < 2) ? default_manpath : &argv[1];
120
121 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) {
122 perror(__progname);
123 return EXIT_FAILURE;
124 }
125
126 source = NULL;
127 while ((fe = fts_read(fts)) != NULL) {
128 switch (fe->fts_info) {
129 case FTS_F:
130 if (manpagesection(fe->fts_path) >= 0)
131 if (!addmanpage(&source,
132 fe->fts_statp->st_ino,
133 fe->fts_path))
134 err(EXIT_FAILURE, NULL);
135 case FTS_D:
136 case FTS_DC:
137 case FTS_DEFAULT:
138 case FTS_DP:
139 case FTS_SLNONE:
140 break;
141 default:
142 errx(EXIT_FAILURE, "%s: %s", fe->fts_path,
143 strerror(fe->fts_errno));
144
145 }
146 }
147
148 (void)fts_close(fts);
149
150 dest = NULL;
151 processmanpages(&source, &dest);
152
153 if (chdir(manpath[0]) < 0)
154 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno));
155
156 if ((out = fopen(whatisdb, "w")) == NULL)
157 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
158
159 if (!(dumpwhatis(out, dest) ||
160 (fclose(out) < 0)) ||
161 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0))
162 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
163
164 return EXIT_SUCCESS;
165 }
166
167 void
168 sigchildhandler(int signum)
169
170 {
171 while (waitpid(-1, NULL, WNOHANG) > 0);
172 }
173
174 char
175 *findwhitespace(char *str)
176
177 {
178 while (!isspace(*str))
179 if (*str++ == '\0') {
180 str = NULL;
181 break;
182 }
183
184 return str;
185 }
186
187 char
188 *GetS(gzFile in, char *buffer, int length)
189
190 {
191 char *ptr;
192
193 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0'))
194 ptr = NULL;
195
196 return ptr;
197 }
198
199 int
200 manpagesection(char *name)
201 {
202 char *ptr;
203
204 if ((ptr = strrchr(name, '/')) != NULL)
205 ptr++;
206 else
207 ptr = name;
208
209 while ((ptr = strchr(ptr, '.')) != NULL) {
210 int section;
211
212 ptr++;
213 section=0;
214 while (sectionext[section] != '\0')
215 if (sectionext[section] == *ptr)
216 return section;
217 else
218 section++;
219 }
220
221 return -1;
222 }
223
224 int
225 addmanpage(manpage **tree,ino_t inode,char *name)
226 {
227 manpage *mp;
228
229 while ((mp = *tree) != NULL) {
230 if (mp->mp_inode == inode)
231 return 1;
232 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right);
233 }
234
235 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL)
236 return 0;
237
238 mp->mp_left = NULL;
239 mp->mp_right = NULL;
240 mp->mp_inode = inode;
241 (void) strcpy(mp->mp_name, name);
242 *tree = mp;
243
244 return 1;
245 }
246
247 int
248 addwhatis(whatis **tree, char *data)
249 {
250 whatis *wi;
251 int result;
252
253 while (isspace(*data))
254 data++;
255
256 if (*data == '/') {
257 char *ptr;
258
259 ptr = ++data;
260 while ((*ptr != '\0') && !isspace(*ptr))
261 if (*ptr++ == '/')
262 data = ptr;
263 }
264
265 while ((wi = *tree) != NULL) {
266 result=strcmp(data, wi->wi_data);
267 if (result == 0) return 1;
268 tree = &((result < 0) ? wi->wi_left : wi->wi_right);
269 }
270
271 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL)
272 return 0;
273
274 wi->wi_left = NULL;
275 wi->wi_right = NULL;
276 wi->wi_data = data;
277 *tree = wi;
278
279 return 1;
280 }
281
282 void
283 catpreprocess(char *from)
284 {
285 char *to;
286
287 to = from;
288 while (isspace(*from)) from++;
289
290 while (*from != '\0')
291 if (isspace(*from)) {
292 while (isspace(*++from));
293 if (*from != '\0')
294 *to++ = ' ';
295 }
296 else if (*(from + 1) == '\10')
297 from += 2;
298 else
299 *to++ = *from++;
300
301 *to = '\0';
302 }
303
304 char *
305 replacestring(char *string, char *old, char *new)
306
307 {
308 char *ptr, *result;
309 int slength, olength, nlength, pos;
310
311 if (new == NULL)
312 return strdup(string);
313
314 ptr = strstr(string, old);
315 if (ptr == NULL)
316 return strdup(string);
317
318 slength = strlen(string);
319 olength = strlen(old);
320 nlength = strlen(new);
321 if ((result = malloc(slength - olength + nlength + 1)) == NULL)
322 return NULL;
323
324 pos = ptr - string;
325 (void) memcpy(result, string, pos);
326 (void) memcpy(&result[pos], new, nlength);
327 (void) strcpy(&result[pos + nlength], &string[pos + olength]);
328
329 return result;
330 }
331
332 char *
333 parsecatpage(gzFile *in)
334 {
335 char buffer[8192];
336 char *section, *ptr, *last;
337 int size;
338
339 do {
340 if (GetS(in, buffer, sizeof(buffer)) == NULL)
341 return NULL;
342 }
343 while (buffer[0] == '\n');
344
345 section = NULL;
346 if ((ptr = strchr(buffer, '(')) != NULL) {
347 if ((last = strchr(ptr + 1, ')')) !=NULL) {
348 int length;
349
350 length = last - ptr + 1;
351 if ((section = malloc(length + 5)) == NULL)
352 return NULL;
353
354 *section = ' ';
355 (void) memcpy(section + 1, ptr, length);
356 (void) strcpy(section + 1 + length, " - ");
357 }
358 }
359
360 for (;;) {
361 if (GetS(in, buffer, sizeof(buffer)) == NULL) {
362 free(section);
363 return NULL;
364 }
365 if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0)
366 break;
367 }
368
369 ptr = last = buffer;
370 size = sizeof(buffer) - 1;
371 while ((size > 0) && (GetS(in, ptr, size) != NULL)) {
372 int length;
373
374 catpreprocess(ptr);
375
376 length = strlen(ptr);
377 if (length == 0) {
378 *last = '\0';
379
380 ptr = replacestring(buffer, " - ", section);
381 free(section);
382 return ptr;
383 }
384 if ((length > 1) && (ptr[length - 1] == '-') &&
385 isalpha(ptr[length - 2]))
386 last = &ptr[--length];
387 else {
388 last = &ptr[length++];
389 *last = ' ';
390 }
391
392 ptr += length;
393 size -= length;
394 }
395
396 free(section);
397
398 return NULL;
399 }
400
401 int
402 manpreprocess(char *line)
403 {
404 char *from, *to;
405
406 to = from = line;
407 while (isspace(*from)) from++;
408 if (strncmp(from, ".\\\"", 3) == 0)
409 return 1;
410
411 while (*from != '\0')
412 if (isspace(*from)) {
413 while (isspace(*++from));
414 if ((*from != '\0') && (*from != ','))
415 *to++ = ' ';
416 }
417 else if (*from == '\\')
418 switch (*++from) {
419 case '\0':
420 case '-':
421 break;
422 case 's':
423 if ((*from=='+') || (*from=='-'))
424 from++;
425 while (isdigit(*from))
426 from++;
427 break;
428 default:
429 from++;
430 }
431 else
432 if (*from == '"')
433 from++;
434 else
435 *to++ = *from++;
436
437 *to = '\0';
438
439 if (strncasecmp(line, ".Xr", 3) == 0) {
440 char *sect;
441
442 from = line + 3;
443 if (isspace(*from))
444 from++;
445
446 if ((sect = findwhitespace(from)) != NULL) {
447 int length;
448
449 *sect++ = '\0';
450 length = strlen(from);
451 (void) memmove(line, from, length);
452 line[length++] = '(';
453 to = &line[length];
454 length = strlen(sect);
455 (void) memmove(to, sect, length);
456 (void) strcpy(&to[length], ")");
457 }
458 }
459
460 return 0;
461 }
462
463 char *
464 nroff(gzFile *in)
465 {
466 char tempname[MAXPATHLEN], buffer[8192], *data;
467 int tempfd, bytes, pipefd[2];
468 pid_t child;
469
470 if (gzrewind(in) < 0) {
471 perror(__progname);
472 return NULL;
473 }
474
475 (void)strcpy(tempname, _PATH_TMP "makewhatis.XXXXXX");
476 if ((tempfd = mkstemp(tempname)) < 0) {
477 perror(__progname);
478 return NULL;
479 }
480
481 while ((bytes = gzread(in, buffer, sizeof(buffer))) > 0)
482 if (write(tempfd, buffer, bytes) != bytes) {
483 bytes = -1;
484 break;
485 }
486
487 if ((bytes < 0) ||
488 (lseek(tempfd, 0, SEEK_SET) < 0) ||
489 (pipe(pipefd) < 0)) {
490 perror(__progname);
491 (void)close(tempfd);
492 (void)unlink(tempname);
493 return NULL;
494 }
495
496 switch (child = vfork()) {
497 case -1:
498 perror(__progname);
499 (void)close(pipefd[1]);
500 (void)close(pipefd[0]);
501 (void)close(tempfd);
502 (void)unlink(tempname);
503 return NULL;
504 /* NOTREACHED */
505 case 0:
506 (void)close(pipefd[0]);
507 if (pipefd[1] != STDOUT_FILENO) {
508 (void)dup2(pipefd[1], STDOUT_FILENO);
509 (void)close(pipefd[1]);
510 }
511 (void)execlp("nroff", "nroff", "-mandoc", tempname, NULL);
512 _exit(EXIT_FAILURE);
513 default:
514 (void)close(pipefd[1]);
515 (void)close(tempfd);
516 /* NOTREACHED */
517 }
518
519 if ((in = gzdopen(pipefd[0], "r")) == NULL) {
520 if (errno == 0)
521 errno = ENOMEM;
522 perror(__progname);
523 (void)close(pipefd[0]); /* Child will be killed by SIGPIPE. */
524 (void)unlink(tempname);
525 return NULL;
526 }
527
528 data = parsecatpage(in);
529
530 (void)gzclose(in); /* Child will be killed by SIGPIPE. */
531 (void)unlink(tempname);
532
533 return data;
534 }
535
536 char *
537 parsemanpage(gzFile *in, int defaultsection)
538 {
539 char *section, buffer[8192], *ptr;
540
541 section = NULL;
542 do {
543 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
544 free(section);
545 return NULL;
546 }
547 if (manpreprocess(buffer))
548 continue;
549 if (strncasecmp(buffer, ".Dt", 3) == 0) {
550 char *end;
551
552 ptr = &buffer[3];
553 if (isspace(*ptr))
554 ptr++;
555 if ((ptr = findwhitespace(ptr)) == NULL)
556 continue;
557
558 if ((end = findwhitespace(++ptr)) != NULL)
559 *end = '\0';
560
561 free(section);
562 if ((section = malloc(strlen(ptr) + 7)) != NULL) {
563 section[0] = ' ';
564 section[1] = '(';
565 (void) strcpy(§ion[2], ptr);
566 (void) strcat(§ion[2], ") - ");
567 }
568 }
569 else if (strncasecmp(buffer, ".Ds", 3) == 0)
570 return nroff(in);
571 } while ((strncasecmp(buffer, ".Sh NAME", 8) != 0));
572
573 do {
574 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
575 free(section);
576 return NULL;
577 }
578 } while (manpreprocess(buffer));
579
580 if (strncasecmp(buffer, ".Nm", 3) == 0) {
581 int length, offset;
582
583 ptr = &buffer[3];
584 while (isspace(*ptr))
585 ptr++;
586
587 length = strlen(ptr);
588 if ((length > 1) && (ptr[length - 1] == ',') &&
589 isspace(ptr[length - 2])) {
590 ptr[--length] = '\0';
591 ptr[length - 1] = ',';
592 }
593 (void) memmove(buffer, ptr, length + 1);
594
595 offset = length + 3;
596 ptr = &buffer[offset];
597 for (;;) {
598 int more;
599
600 if ((sizeof(buffer) == offset) ||
601 (GetS(in, ptr, sizeof(buffer) - offset)
602 == NULL)) {
603 free(section);
604 return NULL;
605 }
606 if (manpreprocess(ptr))
607 continue;
608
609 if (strncasecmp(ptr, ".Nm", 3) != 0) break;
610
611 ptr += 3;
612 if (isspace(*ptr))
613 ptr++;
614
615 buffer[length++] = ' ';
616 more = strlen(ptr);
617 if ((more > 1) && (ptr[more - 1] == ',') &&
618 isspace(ptr[more - 2])) {
619 ptr[--more] = '\0';
620 ptr[more - 1] = ',';
621 }
622
623 (void) memmove(&buffer[length], ptr, more + 1);
624 length += more;
625 offset = length + 3;
626
627 ptr = &buffer[offset];
628 }
629
630 if (strncasecmp(ptr, ".Nd", 3) == 0) {
631 (void) strcpy(&buffer[length], " -");
632
633 while (strncasecmp(ptr, ".Sh", 3) != 0) {
634 int more;
635
636 if (*ptr == '.') {
637 char *space;
638
639 if ((space = findwhitespace(ptr)) == NULL)
640 ptr = "";
641 else {
642 space++;
643 (void) memmove(ptr, space,
644 strlen(space) + 1);
645 }
646 }
647
648 if (*ptr != '\0') {
649 buffer[offset - 1] = ' ';
650 more = strlen(ptr) + 1;
651 offset += more;
652 }
653 ptr = &buffer[offset];
654 if ((sizeof(buffer) == offset) ||
655 (GetS(in, ptr, sizeof(buffer) - offset)
656 == NULL)) {
657 free(section);
658 return NULL;
659 }
660 if (manpreprocess(ptr))
661 *ptr = '\0';
662 }
663 }
664 }
665 else {
666 int offset;
667
668 if (*buffer == '.') {
669 char *space;
670
671 if ((space = findwhitespace(buffer)) == NULL) {
672 free(section);
673 return NULL;
674 }
675 space++;
676 (void) memmove(buffer, space, strlen(space) + 1);
677 }
678
679 offset = strlen(buffer) + 1;
680 for (;;) {
681 int more;
682
683 ptr = &buffer[offset];
684 if ((sizeof(buffer) == offset) ||
685 (GetS(in, ptr, sizeof(buffer) - offset)
686 == NULL)) {
687 free(section);
688 return NULL;
689 }
690 if (manpreprocess(ptr) || (*ptr == '\0'))
691 continue;
692
693 if ((strncasecmp(ptr, ".Sh", 3) == 0) ||
694 (strncasecmp(ptr, ".Ss", 3) == 0))
695 break;
696
697 if (*ptr == '.') {
698 char *space;
699
700 if ((space = findwhitespace(ptr)) == NULL) {
701 continue;
702 }
703
704 space++;
705 (void) memmove(ptr, space, strlen(space) + 1);
706 }
707
708 buffer[offset - 1] = ' ';
709 more = strlen(ptr);
710 if ((more > 1) && (ptr[more - 1] == ',') &&
711 isspace(ptr[more - 2])) {
712 ptr[more - 1] = '\0';
713 ptr[more - 2] = ',';
714 }
715 else more++;
716 offset += more;
717 }
718 }
719
720 if (section == NULL) {
721 char sectionbuffer[24];
722
723 (void) sprintf(sectionbuffer, " (%c) - ",
724 sectionext[defaultsection]);
725 ptr = replacestring(buffer, " - ", sectionbuffer);
726 }
727 else {
728 ptr = replacestring(buffer, " - ", section);
729 free(section);
730 }
731 return ptr;
732 }
733
734 char *
735 getwhatisdata(char *name)
736 {
737 gzFile *in;
738 char *data;
739 int section;
740
741 if ((in = gzopen(name, "r")) == NULL) {
742 errx(EXIT_FAILURE, "%s: %s",
743 name,
744 strerror((errno == 0) ? ENOMEM : errno));
745 /* NOTREACHED */
746 }
747
748 section = manpagesection(name);
749 data = (section == 0) ? parsecatpage(in) : parsemanpage(in, section);
750
751 (void) gzclose(in);
752 return data;
753 }
754
755 void
756 processmanpages(manpage **source, whatis **dest)
757 {
758 manpage *mp;
759
760 mp = *source;
761 *source = NULL;
762
763 while (mp != NULL) {
764 manpage *obsolete;
765 char *data;
766
767 if (mp->mp_left != NULL)
768 processmanpages(&mp->mp_left,dest);
769
770 if ((data = getwhatisdata(mp->mp_name)) != NULL) {
771 if (!addwhatis(dest,data))
772 err(EXIT_FAILURE, NULL);
773 }
774
775 obsolete = mp;
776 mp = mp->mp_right;
777 free(obsolete);
778 }
779 }
780
781 int
782 dumpwhatis (FILE *out, whatis *tree)
783 {
784 while (tree != NULL) {
785 if (tree->wi_left)
786 if (!dumpwhatis(out, tree->wi_left)) return 0;
787
788 if ((fputs(tree->wi_data, out) == EOF) ||
789 (fputc('\n', out) == EOF))
790 return 0;
791
792 tree = tree->wi_right;
793 }
794
795 return 1;
796 }
797