makewhatis.c revision 1.7.4.3 1 /* $NetBSD: makewhatis.c,v 1.7.4.3 2001/04/22 18:07:14 he Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthias Scheler.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\
42 All rights reserved.\n");
43 #endif /* not lint */
44
45 #ifndef lint
46 __RCSID("$NetBSD: makewhatis.c,v 1.7.4.3 2001/04/22 18:07:14 he Exp $");
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/param.h>
51 #include <sys/stat.h>
52 #include <sys/wait.h>
53
54 #include <ctype.h>
55 #include <err.h>
56 #include <errno.h>
57 #include <fcntl.h>
58 #include <fts.h>
59 #include <locale.h>
60 #include <paths.h>
61 #include <signal.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <string.h>
65 #include <unistd.h>
66 #include <zlib.h>
67
68 typedef struct manpagestruct manpage;
69 struct manpagestruct {
70 manpage *mp_left,*mp_right;
71 ino_t mp_inode;
72 char mp_name[1];
73 };
74
75 typedef struct whatisstruct whatis;
76 struct whatisstruct {
77 whatis *wi_left,*wi_right;
78 char *wi_data;
79 };
80
81 int main (int, char **);
82 char *findwhitespace (char *);
83 char *strmove (char *,char *);
84 char *GetS (gzFile, char *, int);
85 int manpagesection (char *);
86 char *createsectionstring(char *);
87 int addmanpage (manpage **, ino_t, char *);
88 int addwhatis (whatis **, char *);
89 char *replacestring (char *, char *, char *);
90 void catpreprocess (char *);
91 char *parsecatpage (gzFile *);
92 int manpreprocess (char *);
93 char *nroff (gzFile *);
94 char *parsemanpage (gzFile *, int);
95 char *getwhatisdata (char *);
96 void processmanpages (manpage **,whatis **);
97 int dumpwhatis (FILE *, whatis *);
98
99 char *default_manpath[] = {
100 "/usr/share/man",
101 NULL
102 };
103
104 char sectionext[] = "0123456789ln";
105 char whatisdb[] = "whatis.db";
106
107 extern char *__progname;
108
109 int
110 main(int argc,char **argv)
111 {
112 char **manpath;
113 FTS *fts;
114 FTSENT *fe;
115 manpage *source;
116 whatis *dest;
117 FILE *out;
118
119 (void)setlocale(LC_ALL, "");
120
121 manpath = (argc < 2) ? default_manpath : &argv[1];
122
123 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) {
124 perror(__progname);
125 return EXIT_FAILURE;
126 }
127
128 source = NULL;
129 while ((fe = fts_read(fts)) != NULL) {
130 switch (fe->fts_info) {
131 case FTS_F:
132 if (manpagesection(fe->fts_path) >= 0)
133 if (!addmanpage(&source,
134 fe->fts_statp->st_ino,
135 fe->fts_path))
136 err(EXIT_FAILURE, NULL);
137 case FTS_D:
138 case FTS_DC:
139 case FTS_DEFAULT:
140 case FTS_DP:
141 case FTS_SLNONE:
142 break;
143 default:
144 errx(EXIT_FAILURE, "%s: %s", fe->fts_path,
145 strerror(fe->fts_errno));
146
147 }
148 }
149
150 (void)fts_close(fts);
151
152 dest = NULL;
153 processmanpages(&source, &dest);
154
155 if (chdir(manpath[0]) < 0)
156 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno));
157
158 if ((out = fopen(whatisdb, "w")) == NULL)
159 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
160
161 if (!(dumpwhatis(out, dest) ||
162 (fclose(out) < 0)) ||
163 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0))
164 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
165
166 return EXIT_SUCCESS;
167 }
168
169 char
170 *findwhitespace(char *str)
171
172 {
173 while (!isspace(*str))
174 if (*str++ == '\0') {
175 str = NULL;
176 break;
177 }
178
179 return str;
180 }
181
182 char
183 *strmove(char *dest,char *src)
184
185 {
186 return memmove(dest, src, strlen(src) + 1);
187 }
188
189 char
190 *GetS(gzFile in, char *buffer, int length)
191
192 {
193 char *ptr;
194
195 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0'))
196 ptr = NULL;
197
198 return ptr;
199 }
200
201 int
202 manpagesection(char *name)
203 {
204 char *ptr;
205
206 if ((ptr = strrchr(name, '/')) != NULL)
207 ptr++;
208 else
209 ptr = name;
210
211 while ((ptr = strchr(ptr, '.')) != NULL) {
212 int section;
213
214 ptr++;
215 section=0;
216 while (sectionext[section] != '\0')
217 if (sectionext[section] == *ptr)
218 return section;
219 else
220 section++;
221 }
222
223 return -1;
224 }
225
226 char
227 *createsectionstring(char *section_id)
228 {
229 char *section;
230
231 if ((section = malloc(strlen(section_id) + 7)) != NULL) {
232 section[0] = ' ';
233 section[1] = '(';
234 (void) strcat(strcpy(§ion[2], section_id), ") - ");
235 }
236 return section;
237 }
238
239 int
240 addmanpage(manpage **tree,ino_t inode,char *name)
241 {
242 manpage *mp;
243
244 while ((mp = *tree) != NULL) {
245 if (mp->mp_inode == inode)
246 return 1;
247 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right);
248 }
249
250 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL)
251 return 0;
252
253 mp->mp_left = NULL;
254 mp->mp_right = NULL;
255 mp->mp_inode = inode;
256 (void) strcpy(mp->mp_name, name);
257 *tree = mp;
258
259 return 1;
260 }
261
262 int
263 addwhatis(whatis **tree, char *data)
264 {
265 whatis *wi;
266 int result;
267
268 while (isspace(*data))
269 data++;
270
271 if (*data == '/') {
272 char *ptr;
273
274 ptr = ++data;
275 while ((*ptr != '\0') && !isspace(*ptr))
276 if (*ptr++ == '/')
277 data = ptr;
278 }
279
280 while ((wi = *tree) != NULL) {
281 result=strcmp(data, wi->wi_data);
282 if (result == 0) return 1;
283 tree = &((result < 0) ? wi->wi_left : wi->wi_right);
284 }
285
286 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL)
287 return 0;
288
289 wi->wi_left = NULL;
290 wi->wi_right = NULL;
291 wi->wi_data = data;
292 *tree = wi;
293
294 return 1;
295 }
296
297 void
298 catpreprocess(char *from)
299 {
300 char *to;
301
302 to = from;
303 while (isspace(*from)) from++;
304
305 while (*from != '\0')
306 if (isspace(*from)) {
307 while (isspace(*++from));
308 if (*from != '\0')
309 *to++ = ' ';
310 }
311 else if (*(from + 1) == '\10')
312 from += 2;
313 else
314 *to++ = *from++;
315
316 *to = '\0';
317 }
318
319 char *
320 replacestring(char *string, char *old, char *new)
321
322 {
323 char *ptr, *result;
324 int slength, olength, nlength, pos;
325
326 if (new == NULL)
327 return strdup(string);
328
329 ptr = strstr(string, old);
330 if (ptr == NULL)
331 return strdup(string);
332
333 slength = strlen(string);
334 olength = strlen(old);
335 nlength = strlen(new);
336 if ((result = malloc(slength - olength + nlength + 1)) == NULL)
337 return NULL;
338
339 pos = ptr - string;
340 (void) memcpy(result, string, pos);
341 (void) memcpy(&result[pos], new, nlength);
342 (void) strcpy(&result[pos + nlength], &string[pos + olength]);
343
344 return result;
345 }
346
347 char *
348 parsecatpage(gzFile *in)
349 {
350 char buffer[8192];
351 char *section, *ptr, *last;
352 int size;
353
354 do {
355 if (GetS(in, buffer, sizeof(buffer)) == NULL)
356 return NULL;
357 }
358 while (buffer[0] == '\n');
359
360 section = NULL;
361 if ((ptr = strchr(buffer, '(')) != NULL) {
362 if ((last = strchr(ptr + 1, ')')) !=NULL) {
363 int length;
364
365 length = last - ptr + 1;
366 if ((section = malloc(length + 5)) == NULL)
367 return NULL;
368
369 *section = ' ';
370 (void) memcpy(section + 1, ptr, length);
371 (void) strcpy(section + 1 + length, " - ");
372 }
373 }
374
375 for (;;) {
376 if (GetS(in, buffer, sizeof(buffer)) == NULL) {
377 free(section);
378 return NULL;
379 }
380 catpreprocess(buffer);
381 if (strncmp(buffer, "NAME", 4) == 0)
382 break;
383 }
384
385 ptr = last = buffer;
386 size = sizeof(buffer) - 1;
387 while ((size > 0) && (GetS(in, ptr, size) != NULL)) {
388 int length;
389
390 catpreprocess(ptr);
391
392 length = strlen(ptr);
393 if (length == 0) {
394 *last = '\0';
395
396 ptr = replacestring(buffer, " - ", section);
397 free(section);
398 return ptr;
399 }
400 if ((length > 1) && (ptr[length - 1] == '-') &&
401 isalpha(ptr[length - 2]))
402 last = &ptr[--length];
403 else {
404 last = &ptr[length++];
405 *last = ' ';
406 }
407
408 ptr += length;
409 size -= length;
410 }
411
412 free(section);
413
414 return NULL;
415 }
416
417 int
418 manpreprocess(char *line)
419 {
420 char *from, *to;
421
422 to = from = line;
423 while (isspace(*from)) from++;
424 if (strncmp(from, ".\\\"", 3) == 0)
425 return 1;
426
427 while (*from != '\0')
428 if (isspace(*from)) {
429 while (isspace(*++from));
430 if ((*from != '\0') && (*from != ','))
431 *to++ = ' ';
432 }
433 else if (*from == '\\')
434 switch (*++from) {
435 case '\0':
436 case '-':
437 break;
438 case 'f':
439 case 's':
440 from++;
441 if ((*from=='+') || (*from=='-'))
442 from++;
443 while (isdigit(*from))
444 from++;
445 break;
446 default:
447 from++;
448 }
449 else
450 if (*from == '"')
451 from++;
452 else
453 *to++ = *from++;
454
455 *to = '\0';
456
457 if (strncasecmp(line, ".Xr", 3) == 0) {
458 char *sect;
459
460 from = line + 3;
461 if (isspace(*from))
462 from++;
463
464 if ((sect = findwhitespace(from)) != NULL) {
465 int length;
466
467 *sect++ = '\0';
468 length = strlen(from);
469 (void) memmove(line, from, length);
470 line[length++] = '(';
471 to = &line[length];
472 length = strlen(sect);
473 (void) memmove(to, sect, length);
474 (void) strcpy(&to[length], ")");
475 }
476 }
477
478 return 0;
479 }
480
481 char *
482 nroff(gzFile *in)
483 {
484 char tempname[MAXPATHLEN], buffer[65536], *data;
485 int tempfd, bytes, pipefd[2], status;
486 static int devnull = -1;
487 pid_t child;
488
489 if (gzrewind(in) < 0) {
490 perror(__progname);
491 return NULL;
492 }
493
494 if ((devnull < 0) &&
495 ((devnull = open(_PATH_DEVNULL, O_WRONLY, 0)) < 0)) {
496 perror(__progname);
497 return NULL;
498 }
499
500 (void)strcpy(tempname, _PATH_TMP "makewhatis.XXXXXX");
501 if ((tempfd = mkstemp(tempname)) < 0) {
502 perror(__progname);
503 return NULL;
504 }
505
506 while ((bytes = gzread(in, buffer, sizeof(buffer))) > 0)
507 if (write(tempfd, buffer, bytes) != bytes) {
508 bytes = -1;
509 break;
510 }
511
512 if ((bytes < 0) ||
513 (lseek(tempfd, 0, SEEK_SET) < 0) ||
514 (pipe(pipefd) < 0)) {
515 perror(__progname);
516 (void)close(tempfd);
517 (void)unlink(tempname);
518 return NULL;
519 }
520
521 switch (child = vfork()) {
522 case -1:
523 perror(__progname);
524 (void)close(pipefd[1]);
525 (void)close(pipefd[0]);
526 (void)close(tempfd);
527 (void)unlink(tempname);
528 return NULL;
529 /* NOTREACHED */
530 case 0:
531 (void)close(pipefd[0]);
532 if (tempfd != STDIN_FILENO) {
533 (void)dup2(tempfd, STDIN_FILENO);
534 (void)close(tempfd);
535 }
536 if (pipefd[1] != STDOUT_FILENO) {
537 (void)dup2(pipefd[1], STDOUT_FILENO);
538 (void)close(pipefd[1]);
539 }
540 if (devnull != STDERR_FILENO) {
541 (void)dup2(devnull, STDERR_FILENO);
542 (void)close(devnull);
543 }
544 (void)execlp("nroff", "nroff", "-S", "-man", NULL);
545 _exit(EXIT_FAILURE);
546 default:
547 (void)close(pipefd[1]);
548 (void)close(tempfd);
549 /* NOTREACHED */
550 }
551
552 if ((in = gzdopen(pipefd[0], "r")) == NULL) {
553 if (errno == 0)
554 errno = ENOMEM;
555 perror(__progname);
556 (void)close(pipefd[0]);
557 (void)kill(child, SIGTERM);
558 while (waitpid(child, NULL, 0) != child);
559 (void)unlink(tempname);
560 return NULL;
561 }
562
563 data = parsecatpage(in);
564 while (gzread(in, buffer, sizeof(buffer)) > 0);
565 (void)gzclose(in);
566
567 while (waitpid(child, &status, 0) != child);
568 if ((data != NULL) &&
569 !(WIFEXITED(status) && (WEXITSTATUS(status) == 0))) {
570 free(data);
571 data = NULL;
572 }
573
574 (void)unlink(tempname);
575
576 return data;
577 }
578
579 char *
580 parsemanpage(gzFile *in, int defaultsection)
581 {
582 char *section, buffer[8192], *ptr;
583
584 section = NULL;
585 do {
586 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
587 free(section);
588 return NULL;
589 }
590 if (manpreprocess(buffer))
591 continue;
592 if (strncasecmp(buffer, ".Dt", 3) == 0) {
593 char *end;
594
595 ptr = &buffer[3];
596 if (isspace(*ptr))
597 ptr++;
598 if ((ptr = findwhitespace(ptr)) == NULL)
599 continue;
600
601 if ((end = findwhitespace(++ptr)) != NULL)
602 *end = '\0';
603
604 free(section);
605 section = createsectionstring(ptr);
606 }
607 else if (strncasecmp(buffer, ".TH", 3) == 0) {
608 ptr = &buffer[3];
609 while (isspace(*ptr))
610 ptr++;
611 if ((ptr = findwhitespace(ptr)) != NULL) {
612 char *next;
613
614 while (isspace(*ptr))
615 ptr++;
616 if ((next = findwhitespace(ptr)) != NULL)
617 *next = '\0';
618 free(section);
619 section = createsectionstring(ptr);
620 }
621 }
622 else if (strncasecmp(buffer, ".Ds", 3) == 0) {
623 free(section);
624 return NULL;
625 }
626 } while (strncasecmp(buffer, ".Sh NAME", 8) != 0);
627
628 do {
629 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
630 free(section);
631 return NULL;
632 }
633 } while (manpreprocess(buffer));
634
635 if (strncasecmp(buffer, ".Nm", 3) == 0) {
636 int length, offset;
637
638 ptr = &buffer[3];
639 while (isspace(*ptr))
640 ptr++;
641
642 length = strlen(ptr);
643 if ((length > 1) && (ptr[length - 1] == ',') &&
644 isspace(ptr[length - 2])) {
645 ptr[--length] = '\0';
646 ptr[length - 1] = ',';
647 }
648 (void) memmove(buffer, ptr, length + 1);
649
650 offset = length + 3;
651 ptr = &buffer[offset];
652 for (;;) {
653 int more;
654
655 if ((sizeof(buffer) == offset) ||
656 (GetS(in, ptr, sizeof(buffer) - offset)
657 == NULL)) {
658 free(section);
659 return NULL;
660 }
661 if (manpreprocess(ptr))
662 continue;
663
664 if (strncasecmp(ptr, ".Nm", 3) != 0) break;
665
666 ptr += 3;
667 if (isspace(*ptr))
668 ptr++;
669
670 buffer[length++] = ' ';
671 more = strlen(ptr);
672 if ((more > 1) && (ptr[more - 1] == ',') &&
673 isspace(ptr[more - 2])) {
674 ptr[--more] = '\0';
675 ptr[more - 1] = ',';
676 }
677
678 (void) memmove(&buffer[length], ptr, more + 1);
679 length += more;
680 offset = length + 3;
681
682 ptr = &buffer[offset];
683 }
684
685 if (strncasecmp(ptr, ".Nd", 3) == 0) {
686 (void) strcpy(&buffer[length], " -");
687
688 while (strncasecmp(ptr, ".Sh", 3) != 0) {
689 int more;
690
691 if (*ptr == '.') {
692 char *space;
693
694 if (strncasecmp(ptr, ".Nd", 3) != 0) {
695 free(section);
696 return NULL;
697 }
698 space = findwhitespace(ptr);
699 if (space == NULL)
700 ptr = "";
701 else {
702 space++;
703 (void) strmove(ptr, space);
704 }
705 }
706
707 if (*ptr != '\0') {
708 buffer[offset - 1] = ' ';
709 more = strlen(ptr) + 1;
710 offset += more;
711 }
712 ptr = &buffer[offset];
713 if ((sizeof(buffer) == offset) ||
714 (GetS(in, ptr, sizeof(buffer) - offset)
715 == NULL)) {
716 free(section);
717 return NULL;
718 }
719 if (manpreprocess(ptr))
720 *ptr = '\0';
721 }
722 }
723 }
724 else {
725 int offset;
726
727 if (*buffer == '.') {
728 char *space;
729
730 if ((space = findwhitespace(&buffer[1])) == NULL) {
731 free(section);
732 return NULL;
733 }
734 space++;
735 (void) strmove(buffer, space);
736 }
737
738 offset = strlen(buffer) + 1;
739 for (;;) {
740 int more;
741
742 ptr = &buffer[offset];
743 if ((sizeof(buffer) == offset) ||
744 (GetS(in, ptr, sizeof(buffer) - offset)
745 == NULL)) {
746 free(section);
747 return NULL;
748 }
749 if (manpreprocess(ptr) || (*ptr == '\0'))
750 continue;
751
752 if ((strncasecmp(ptr, ".Sh", 3) == 0) ||
753 (strncasecmp(ptr, ".Ss", 3) == 0))
754 break;
755
756 if (*ptr == '.') {
757 char *space;
758
759 if ((space = findwhitespace(ptr)) == NULL) {
760 continue;
761 }
762
763 space++;
764 (void) memmove(ptr, space, strlen(space) + 1);
765 }
766
767 buffer[offset - 1] = ' ';
768 more = strlen(ptr);
769 if ((more > 1) && (ptr[more - 1] == ',') &&
770 isspace(ptr[more - 2])) {
771 ptr[more - 1] = '\0';
772 ptr[more - 2] = ',';
773 }
774 else more++;
775 offset += more;
776 }
777 }
778
779 if (section == NULL) {
780 char sectionbuffer[24];
781
782 (void) sprintf(sectionbuffer, " (%c) - ",
783 sectionext[defaultsection]);
784 ptr = replacestring(buffer, " - ", sectionbuffer);
785 }
786 else {
787 ptr = replacestring(buffer, " - ", section);
788 free(section);
789 }
790 return ptr;
791 }
792
793 char *
794 getwhatisdata(char *name)
795 {
796 gzFile *in;
797 char *data;
798 int section;
799
800 if ((in = gzopen(name, "r")) == NULL) {
801 errx(EXIT_FAILURE, "%s: %s",
802 name,
803 strerror((errno == 0) ? ENOMEM : errno));
804 /* NOTREACHED */
805 }
806
807 section = manpagesection(name);
808 if (section == 0)
809 data = parsecatpage(in);
810 else {
811 data = parsemanpage(in, section);
812 if (data == NULL)
813 data = nroff(in);
814 }
815
816 (void) gzclose(in);
817 return data;
818 }
819
820 void
821 processmanpages(manpage **source, whatis **dest)
822 {
823 manpage *mp;
824
825 mp = *source;
826 *source = NULL;
827
828 while (mp != NULL) {
829 manpage *obsolete;
830 char *data;
831
832 if (mp->mp_left != NULL)
833 processmanpages(&mp->mp_left,dest);
834
835 if ((data = getwhatisdata(mp->mp_name)) != NULL) {
836 if (!addwhatis(dest,data))
837 err(EXIT_FAILURE, NULL);
838 }
839
840 obsolete = mp;
841 mp = mp->mp_right;
842 free(obsolete);
843 }
844 }
845
846 int
847 dumpwhatis (FILE *out, whatis *tree)
848 {
849 while (tree != NULL) {
850 if (tree->wi_left)
851 if (!dumpwhatis(out, tree->wi_left)) return 0;
852
853 if ((fputs(tree->wi_data, out) == EOF) ||
854 (fputc('\n', out) == EOF))
855 return 0;
856
857 tree = tree->wi_right;
858 }
859
860 return 1;
861 }
862