makewhatis.c revision 1.16 1 /* $NetBSD: makewhatis.c,v 1.16 2001/04/10 21:00:00 tron Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthias Scheler.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\
42 All rights reserved.\n");
43 #endif /* not lint */
44
45 #ifndef lint
46 __RCSID("$NetBSD: makewhatis.c,v 1.16 2001/04/10 21:00:00 tron Exp $");
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/param.h>
51 #include <sys/stat.h>
52 #include <sys/wait.h>
53
54 #include <ctype.h>
55 #include <err.h>
56 #include <errno.h>
57 #include <fcntl.h>
58 #include <fts.h>
59 #include <locale.h>
60 #include <paths.h>
61 #include <signal.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <string.h>
65 #include <unistd.h>
66 #include <zlib.h>
67
68 typedef struct manpagestruct manpage;
69 struct manpagestruct {
70 manpage *mp_left,*mp_right;
71 ino_t mp_inode;
72 char mp_name[1];
73 };
74
75 typedef struct whatisstruct whatis;
76 struct whatisstruct {
77 whatis *wi_left,*wi_right;
78 char *wi_data;
79 };
80
81 int main (int, char **);
82 char *findwhitespace (char *);
83 char *strmove (char *,char *);
84 char *GetS (gzFile, char *, int);
85 int manpagesection (char *);
86 char *createsectionstring(char *);
87 int addmanpage (manpage **, ino_t, char *);
88 int addwhatis (whatis **, char *);
89 char *replacestring (char *, char *, char *);
90 void catpreprocess (char *);
91 char *parsecatpage (gzFile *);
92 int manpreprocess (char *);
93 char *nroff (gzFile *);
94 char *parsemanpage (gzFile *, int);
95 char *getwhatisdata (char *);
96 void processmanpages (manpage **,whatis **);
97 int dumpwhatis (FILE *, whatis *);
98
99 char *default_manpath[] = {
100 "/usr/share/man",
101 NULL
102 };
103
104 char sectionext[] = "0123456789ln";
105 char whatisdb[] = "whatis.db";
106
107 int
108 main(int argc,char **argv)
109 {
110 char **manpath;
111 FTS *fts;
112 FTSENT *fe;
113 manpage *source;
114 whatis *dest;
115 FILE *out;
116
117 (void)setlocale(LC_ALL, "");
118
119 manpath = (argc < 2) ? default_manpath : &argv[1];
120
121 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) {
122 perror(getprogname());
123 return EXIT_FAILURE;
124 }
125
126 source = NULL;
127 while ((fe = fts_read(fts)) != NULL) {
128 switch (fe->fts_info) {
129 case FTS_F:
130 if (manpagesection(fe->fts_path) >= 0)
131 if (!addmanpage(&source,
132 fe->fts_statp->st_ino,
133 fe->fts_path))
134 err(EXIT_FAILURE, NULL);
135 case FTS_D:
136 case FTS_DC:
137 case FTS_DEFAULT:
138 case FTS_DP:
139 case FTS_SLNONE:
140 break;
141 default:
142 errx(EXIT_FAILURE, "%s: %s", fe->fts_path,
143 strerror(fe->fts_errno));
144
145 }
146 }
147
148 (void)fts_close(fts);
149
150 dest = NULL;
151 processmanpages(&source, &dest);
152
153 if (chdir(manpath[0]) < 0)
154 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno));
155
156 if ((out = fopen(whatisdb, "w")) == NULL)
157 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
158
159 if (!(dumpwhatis(out, dest) ||
160 (fclose(out) < 0)) ||
161 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0))
162 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
163
164 return EXIT_SUCCESS;
165 }
166
167 char
168 *findwhitespace(char *str)
169
170 {
171 while (!isspace(*str))
172 if (*str++ == '\0') {
173 str = NULL;
174 break;
175 }
176
177 return str;
178 }
179
180 char
181 *strmove(char *dest,char *src)
182
183 {
184 return memmove(dest, src, strlen(src) + 1);
185 }
186
187 char
188 *GetS(gzFile in, char *buffer, int length)
189
190 {
191 char *ptr;
192
193 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0'))
194 ptr = NULL;
195
196 return ptr;
197 }
198
199 int
200 manpagesection(char *name)
201 {
202 char *ptr;
203
204 if ((ptr = strrchr(name, '/')) != NULL)
205 ptr++;
206 else
207 ptr = name;
208
209 while ((ptr = strchr(ptr, '.')) != NULL) {
210 int section;
211
212 ptr++;
213 section=0;
214 while (sectionext[section] != '\0')
215 if (sectionext[section] == *ptr)
216 return section;
217 else
218 section++;
219 }
220
221 return -1;
222 }
223
224 char
225 *createsectionstring(char *section_id)
226 {
227 char *section;
228
229 if ((section = malloc(strlen(section_id) + 7)) != NULL) {
230 section[0] = ' ';
231 section[1] = '(';
232 (void) strcat(strcpy(§ion[2], section_id), ") - ");
233 }
234 return section;
235 }
236
237 int
238 addmanpage(manpage **tree,ino_t inode,char *name)
239 {
240 manpage *mp;
241
242 while ((mp = *tree) != NULL) {
243 if (mp->mp_inode == inode)
244 return 1;
245 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right);
246 }
247
248 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL)
249 return 0;
250
251 mp->mp_left = NULL;
252 mp->mp_right = NULL;
253 mp->mp_inode = inode;
254 (void) strcpy(mp->mp_name, name);
255 *tree = mp;
256
257 return 1;
258 }
259
260 int
261 addwhatis(whatis **tree, char *data)
262 {
263 whatis *wi;
264 int result;
265
266 while (isspace(*data))
267 data++;
268
269 if (*data == '/') {
270 char *ptr;
271
272 ptr = ++data;
273 while ((*ptr != '\0') && !isspace(*ptr))
274 if (*ptr++ == '/')
275 data = ptr;
276 }
277
278 while ((wi = *tree) != NULL) {
279 result=strcmp(data, wi->wi_data);
280 if (result == 0) return 1;
281 tree = &((result < 0) ? wi->wi_left : wi->wi_right);
282 }
283
284 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL)
285 return 0;
286
287 wi->wi_left = NULL;
288 wi->wi_right = NULL;
289 wi->wi_data = data;
290 *tree = wi;
291
292 return 1;
293 }
294
295 void
296 catpreprocess(char *from)
297 {
298 char *to;
299
300 to = from;
301 while (isspace(*from)) from++;
302
303 while (*from != '\0')
304 if (isspace(*from)) {
305 while (isspace(*++from));
306 if (*from != '\0')
307 *to++ = ' ';
308 }
309 else if (*(from + 1) == '\10')
310 from += 2;
311 else
312 *to++ = *from++;
313
314 *to = '\0';
315 }
316
317 char *
318 replacestring(char *string, char *old, char *new)
319
320 {
321 char *ptr, *result;
322 int slength, olength, nlength, pos;
323
324 if (new == NULL)
325 return strdup(string);
326
327 ptr = strstr(string, old);
328 if (ptr == NULL)
329 return strdup(string);
330
331 slength = strlen(string);
332 olength = strlen(old);
333 nlength = strlen(new);
334 if ((result = malloc(slength - olength + nlength + 1)) == NULL)
335 return NULL;
336
337 pos = ptr - string;
338 (void) memcpy(result, string, pos);
339 (void) memcpy(&result[pos], new, nlength);
340 (void) strcpy(&result[pos + nlength], &string[pos + olength]);
341
342 return result;
343 }
344
345 char *
346 parsecatpage(gzFile *in)
347 {
348 char buffer[8192];
349 char *section, *ptr, *last;
350 int size;
351
352 do {
353 if (GetS(in, buffer, sizeof(buffer)) == NULL)
354 return NULL;
355 }
356 while (buffer[0] == '\n');
357
358 section = NULL;
359 if ((ptr = strchr(buffer, '(')) != NULL) {
360 if ((last = strchr(ptr + 1, ')')) !=NULL) {
361 int length;
362
363 length = last - ptr + 1;
364 if ((section = malloc(length + 5)) == NULL)
365 return NULL;
366
367 *section = ' ';
368 (void) memcpy(section + 1, ptr, length);
369 (void) strcpy(section + 1 + length, " - ");
370 }
371 }
372
373 for (;;) {
374 if (GetS(in, buffer, sizeof(buffer)) == NULL) {
375 free(section);
376 return NULL;
377 }
378 catpreprocess(buffer);
379 if (strncmp(buffer, "NAME", 4) == 0)
380 break;
381 }
382
383 ptr = last = buffer;
384 size = sizeof(buffer) - 1;
385 while ((size > 0) && (GetS(in, ptr, size) != NULL)) {
386 int length;
387
388 catpreprocess(ptr);
389
390 length = strlen(ptr);
391 if (length == 0) {
392 *last = '\0';
393
394 ptr = replacestring(buffer, " - ", section);
395 free(section);
396 return ptr;
397 }
398 if ((length > 1) && (ptr[length - 1] == '-') &&
399 isalpha(ptr[length - 2]))
400 last = &ptr[--length];
401 else {
402 last = &ptr[length++];
403 *last = ' ';
404 }
405
406 ptr += length;
407 size -= length;
408 }
409
410 free(section);
411
412 return NULL;
413 }
414
415 int
416 manpreprocess(char *line)
417 {
418 char *from, *to;
419
420 to = from = line;
421 while (isspace(*from)) from++;
422 if (strncmp(from, ".\\\"", 3) == 0)
423 return 1;
424
425 while (*from != '\0')
426 if (isspace(*from)) {
427 while (isspace(*++from));
428 if ((*from != '\0') && (*from != ','))
429 *to++ = ' ';
430 }
431 else if (*from == '\\')
432 switch (*++from) {
433 case '\0':
434 case '-':
435 break;
436 case 'f':
437 case 's':
438 from++;
439 if ((*from=='+') || (*from=='-'))
440 from++;
441 while (isdigit(*from))
442 from++;
443 break;
444 default:
445 from++;
446 }
447 else
448 if (*from == '"')
449 from++;
450 else
451 *to++ = *from++;
452
453 *to = '\0';
454
455 if (strncasecmp(line, ".Xr", 3) == 0) {
456 char *sect;
457
458 from = line + 3;
459 if (isspace(*from))
460 from++;
461
462 if ((sect = findwhitespace(from)) != NULL) {
463 int length;
464
465 *sect++ = '\0';
466 length = strlen(from);
467 (void) memmove(line, from, length);
468 line[length++] = '(';
469 to = &line[length];
470 length = strlen(sect);
471 (void) memmove(to, sect, length);
472 (void) strcpy(&to[length], ")");
473 }
474 }
475
476 return 0;
477 }
478
479 char *
480 nroff(gzFile *in)
481 {
482 char tempname[MAXPATHLEN], buffer[65536], *data;
483 int tempfd, bytes, pipefd[2], status;
484 static int devnull = -1;
485 pid_t child;
486
487 if (gzrewind(in) < 0) {
488 perror(getprogname());
489 return NULL;
490 }
491
492 if ((devnull < 0) &&
493 ((devnull = open(_PATH_DEVNULL, O_WRONLY, 0)) < 0)) {
494 perror(getprogname());
495 return NULL;
496 }
497
498 (void)strcpy(tempname, _PATH_TMP "makewhatis.XXXXXX");
499 if ((tempfd = mkstemp(tempname)) < 0) {
500 perror(getprogname());
501 return NULL;
502 }
503
504 while ((bytes = gzread(in, buffer, sizeof(buffer))) > 0)
505 if (write(tempfd, buffer, bytes) != bytes) {
506 bytes = -1;
507 break;
508 }
509
510 if ((bytes < 0) ||
511 (lseek(tempfd, 0, SEEK_SET) < 0) ||
512 (pipe(pipefd) < 0)) {
513 perror(getprogname());
514 (void)close(tempfd);
515 (void)unlink(tempname);
516 return NULL;
517 }
518
519 switch (child = vfork()) {
520 case -1:
521 perror(getprogname());
522 (void)close(pipefd[1]);
523 (void)close(pipefd[0]);
524 (void)close(tempfd);
525 (void)unlink(tempname);
526 return NULL;
527 /* NOTREACHED */
528 case 0:
529 (void)close(pipefd[0]);
530 if (tempfd != STDIN_FILENO) {
531 (void)dup2(tempfd, STDIN_FILENO);
532 (void)close(tempfd);
533 }
534 if (pipefd[1] != STDOUT_FILENO) {
535 (void)dup2(pipefd[1], STDOUT_FILENO);
536 (void)close(pipefd[1]);
537 }
538 if (devnull != STDERR_FILENO) {
539 (void)dup2(devnull, STDERR_FILENO);
540 (void)close(devnull);
541 }
542 (void)execlp("nroff", "nroff", "-S", "-man", NULL);
543 _exit(EXIT_FAILURE);
544 default:
545 (void)close(pipefd[1]);
546 (void)close(tempfd);
547 /* NOTREACHED */
548 }
549
550 if ((in = gzdopen(pipefd[0], "r")) == NULL) {
551 if (errno == 0)
552 errno = ENOMEM;
553 perror(getprogname());
554 (void)close(pipefd[0]);
555 (void)kill(child, SIGTERM);
556 while (waitpid(child, NULL, 0) != child);
557 (void)unlink(tempname);
558 return NULL;
559 }
560
561 data = parsecatpage(in);
562 while (gzread(in, buffer, sizeof(buffer)) > 0);
563 (void)gzclose(in);
564
565 while (waitpid(child, &status, 0) != child);
566 if ((data != NULL) &&
567 !(WIFEXITED(status) && (WEXITSTATUS(status) == 0))) {
568 free(data);
569 data = NULL;
570 }
571
572 (void)unlink(tempname);
573
574 return data;
575 }
576
577 char *
578 parsemanpage(gzFile *in, int defaultsection)
579 {
580 char *section, buffer[8192], *ptr;
581
582 section = NULL;
583 do {
584 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
585 free(section);
586 return NULL;
587 }
588 if (manpreprocess(buffer))
589 continue;
590 if (strncasecmp(buffer, ".Dt", 3) == 0) {
591 char *end;
592
593 ptr = &buffer[3];
594 if (isspace(*ptr))
595 ptr++;
596 if ((ptr = findwhitespace(ptr)) == NULL)
597 continue;
598
599 if ((end = findwhitespace(++ptr)) != NULL)
600 *end = '\0';
601
602 free(section);
603 section = createsectionstring(ptr);
604 }
605 else if (strncasecmp(buffer, ".TH", 3) == 0) {
606 ptr = &buffer[3];
607 while (isspace(*ptr))
608 ptr++;
609 if ((ptr = findwhitespace(ptr)) != NULL) {
610 char *next;
611
612 while (isspace(*ptr))
613 ptr++;
614 if ((next = findwhitespace(ptr)) != NULL)
615 *next = '\0';
616 free(section);
617 section = createsectionstring(ptr);
618 }
619 }
620 else if (strncasecmp(buffer, ".Ds", 3) == 0) {
621 free(section);
622 return NULL;
623 }
624 } while (strncasecmp(buffer, ".Sh NAME", 8) != 0);
625
626 do {
627 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
628 free(section);
629 return NULL;
630 }
631 } while (manpreprocess(buffer));
632
633 if (strncasecmp(buffer, ".Nm", 3) == 0) {
634 int length, offset;
635
636 ptr = &buffer[3];
637 while (isspace(*ptr))
638 ptr++;
639
640 length = strlen(ptr);
641 if ((length > 1) && (ptr[length - 1] == ',') &&
642 isspace(ptr[length - 2])) {
643 ptr[--length] = '\0';
644 ptr[length - 1] = ',';
645 }
646 (void) memmove(buffer, ptr, length + 1);
647
648 offset = length + 3;
649 ptr = &buffer[offset];
650 for (;;) {
651 int more;
652
653 if ((sizeof(buffer) == offset) ||
654 (GetS(in, ptr, sizeof(buffer) - offset)
655 == NULL)) {
656 free(section);
657 return NULL;
658 }
659 if (manpreprocess(ptr))
660 continue;
661
662 if (strncasecmp(ptr, ".Nm", 3) != 0) break;
663
664 ptr += 3;
665 if (isspace(*ptr))
666 ptr++;
667
668 buffer[length++] = ' ';
669 more = strlen(ptr);
670 if ((more > 1) && (ptr[more - 1] == ',') &&
671 isspace(ptr[more - 2])) {
672 ptr[--more] = '\0';
673 ptr[more - 1] = ',';
674 }
675
676 (void) memmove(&buffer[length], ptr, more + 1);
677 length += more;
678 offset = length + 3;
679
680 ptr = &buffer[offset];
681 }
682
683 if (strncasecmp(ptr, ".Nd", 3) == 0) {
684 (void) strcpy(&buffer[length], " -");
685
686 while (strncasecmp(ptr, ".Sh", 3) != 0) {
687 int more;
688
689 if (*ptr == '.') {
690 char *space;
691
692 if (strncasecmp(ptr, ".Nd", 3) != 0) {
693 free(section);
694 return NULL;
695 }
696 space = findwhitespace(ptr);
697 if (space == NULL)
698 ptr = "";
699 else {
700 space++;
701 (void) strmove(ptr, space);
702 }
703 }
704
705 if (*ptr != '\0') {
706 buffer[offset - 1] = ' ';
707 more = strlen(ptr) + 1;
708 offset += more;
709 }
710 ptr = &buffer[offset];
711 if ((sizeof(buffer) == offset) ||
712 (GetS(in, ptr, sizeof(buffer) - offset)
713 == NULL)) {
714 free(section);
715 return NULL;
716 }
717 if (manpreprocess(ptr))
718 *ptr = '\0';
719 }
720 }
721 }
722 else {
723 int offset;
724
725 if (*buffer == '.') {
726 char *space;
727
728 if ((space = findwhitespace(&buffer[1])) == NULL) {
729 free(section);
730 return NULL;
731 }
732 space++;
733 (void) strmove(buffer, space);
734 }
735
736 offset = strlen(buffer) + 1;
737 for (;;) {
738 int more;
739
740 ptr = &buffer[offset];
741 if ((sizeof(buffer) == offset) ||
742 (GetS(in, ptr, sizeof(buffer) - offset)
743 == NULL)) {
744 free(section);
745 return NULL;
746 }
747 if (manpreprocess(ptr) || (*ptr == '\0'))
748 continue;
749
750 if ((strncasecmp(ptr, ".Sh", 3) == 0) ||
751 (strncasecmp(ptr, ".Ss", 3) == 0))
752 break;
753
754 if (*ptr == '.') {
755 char *space;
756
757 if ((space = findwhitespace(ptr)) == NULL) {
758 continue;
759 }
760
761 space++;
762 (void) memmove(ptr, space, strlen(space) + 1);
763 }
764
765 buffer[offset - 1] = ' ';
766 more = strlen(ptr);
767 if ((more > 1) && (ptr[more - 1] == ',') &&
768 isspace(ptr[more - 2])) {
769 ptr[more - 1] = '\0';
770 ptr[more - 2] = ',';
771 }
772 else more++;
773 offset += more;
774 }
775 }
776
777 if (section == NULL) {
778 char sectionbuffer[24];
779
780 (void) sprintf(sectionbuffer, " (%c) - ",
781 sectionext[defaultsection]);
782 ptr = replacestring(buffer, " - ", sectionbuffer);
783 }
784 else {
785 ptr = replacestring(buffer, " - ", section);
786 free(section);
787 }
788 return ptr;
789 }
790
791 char *
792 getwhatisdata(char *name)
793 {
794 gzFile *in;
795 char *data;
796 int section;
797
798 if ((in = gzopen(name, "r")) == NULL) {
799 errx(EXIT_FAILURE, "%s: %s",
800 name,
801 strerror((errno == 0) ? ENOMEM : errno));
802 /* NOTREACHED */
803 }
804
805 section = manpagesection(name);
806 if (section == 0)
807 data = parsecatpage(in);
808 else {
809 data = parsemanpage(in, section);
810 if (data == NULL)
811 data = nroff(in);
812 }
813
814 (void) gzclose(in);
815 return data;
816 }
817
818 void
819 processmanpages(manpage **source, whatis **dest)
820 {
821 manpage *mp;
822
823 mp = *source;
824 *source = NULL;
825
826 while (mp != NULL) {
827 manpage *obsolete;
828 char *data;
829
830 if (mp->mp_left != NULL)
831 processmanpages(&mp->mp_left,dest);
832
833 if ((data = getwhatisdata(mp->mp_name)) != NULL) {
834 if (!addwhatis(dest,data))
835 err(EXIT_FAILURE, NULL);
836 }
837
838 obsolete = mp;
839 mp = mp->mp_right;
840 free(obsolete);
841 }
842 }
843
844 int
845 dumpwhatis (FILE *out, whatis *tree)
846 {
847 while (tree != NULL) {
848 if (tree->wi_left)
849 if (!dumpwhatis(out, tree->wi_left)) return 0;
850
851 if ((fputs(tree->wi_data, out) == EOF) ||
852 (fputc('\n', out) == EOF))
853 return 0;
854
855 tree = tree->wi_right;
856 }
857
858 return 1;
859 }
860