makewhatis.c revision 1.14 1 /* $NetBSD: makewhatis.c,v 1.14 2001/04/08 14:27:50 tron Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthias Scheler.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\
42 All rights reserved.\n");
43 #endif /* not lint */
44
45 #ifndef lint
46 __RCSID("$NetBSD: makewhatis.c,v 1.14 2001/04/08 14:27:50 tron Exp $");
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/param.h>
51 #include <sys/stat.h>
52 #include <sys/wait.h>
53
54 #include <ctype.h>
55 #include <err.h>
56 #include <errno.h>
57 #include <fcntl.h>
58 #include <fts.h>
59 #include <locale.h>
60 #include <paths.h>
61 #include <signal.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <string.h>
65 #include <unistd.h>
66 #include <zlib.h>
67
68 typedef struct manpagestruct manpage;
69 struct manpagestruct {
70 manpage *mp_left,*mp_right;
71 ino_t mp_inode;
72 char mp_name[1];
73 };
74
75 typedef struct whatisstruct whatis;
76 struct whatisstruct {
77 whatis *wi_left,*wi_right;
78 char *wi_data;
79 };
80
81 int main (int, char **);
82 char *findwhitespace (char *);
83 char *strmove (char *,char *);
84 char *GetS (gzFile, char *, int);
85 int manpagesection (char *);
86 char *createsectionstring(char *);
87 int addmanpage (manpage **, ino_t, char *);
88 int addwhatis (whatis **, char *);
89 char *replacestring (char *, char *, char *);
90 void catpreprocess (char *);
91 char *parsecatpage (gzFile *);
92 int manpreprocess (char *);
93 char *nroff (gzFile *);
94 char *parsemanpage (gzFile *, int);
95 char *getwhatisdata (char *);
96 void processmanpages (manpage **,whatis **);
97 int dumpwhatis (FILE *, whatis *);
98
99 char *default_manpath[] = {
100 "/usr/share/man",
101 NULL
102 };
103
104 char sectionext[] = "0123456789ln";
105 char whatisdb[] = "whatis.db";
106
107 int
108 main(int argc,char **argv)
109 {
110 char **manpath;
111 FTS *fts;
112 FTSENT *fe;
113 manpage *source;
114 whatis *dest;
115 FILE *out;
116
117 (void)setlocale(LC_ALL, "");
118
119 manpath = (argc < 2) ? default_manpath : &argv[1];
120
121 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) {
122 perror(getprogname());
123 return EXIT_FAILURE;
124 }
125
126 source = NULL;
127 while ((fe = fts_read(fts)) != NULL) {
128 switch (fe->fts_info) {
129 case FTS_F:
130 if (manpagesection(fe->fts_path) >= 0)
131 if (!addmanpage(&source,
132 fe->fts_statp->st_ino,
133 fe->fts_path))
134 err(EXIT_FAILURE, NULL);
135 case FTS_D:
136 case FTS_DC:
137 case FTS_DEFAULT:
138 case FTS_DP:
139 case FTS_SLNONE:
140 break;
141 default:
142 errx(EXIT_FAILURE, "%s: %s", fe->fts_path,
143 strerror(fe->fts_errno));
144
145 }
146 }
147
148 (void)fts_close(fts);
149
150 dest = NULL;
151 processmanpages(&source, &dest);
152
153 if (chdir(manpath[0]) < 0)
154 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno));
155
156 if ((out = fopen(whatisdb, "w")) == NULL)
157 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
158
159 if (!(dumpwhatis(out, dest) ||
160 (fclose(out) < 0)) ||
161 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0))
162 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
163
164 return EXIT_SUCCESS;
165 }
166
167 char
168 *findwhitespace(char *str)
169
170 {
171 while (!isspace(*str))
172 if (*str++ == '\0') {
173 str = NULL;
174 break;
175 }
176
177 return str;
178 }
179
180 char
181 *strmove(char *dest,char *src)
182
183 {
184 return memmove(dest, src, strlen(src) + 1);
185 }
186
187 char
188 *GetS(gzFile in, char *buffer, int length)
189
190 {
191 char *ptr;
192
193 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0'))
194 ptr = NULL;
195
196 return ptr;
197 }
198
199 int
200 manpagesection(char *name)
201 {
202 char *ptr;
203
204 if ((ptr = strrchr(name, '/')) != NULL)
205 ptr++;
206 else
207 ptr = name;
208
209 while ((ptr = strchr(ptr, '.')) != NULL) {
210 int section;
211
212 ptr++;
213 section=0;
214 while (sectionext[section] != '\0')
215 if (sectionext[section] == *ptr)
216 return section;
217 else
218 section++;
219 }
220
221 return -1;
222 }
223
224 char
225 *createsectionstring(char *section_id)
226 {
227 char *section;
228
229 if ((section = malloc(strlen(section_id) + 7)) != NULL) {
230 section[0] = ' ';
231 section[1] = '(';
232 (void) strcat(strcpy(§ion[2], section_id), ") - ");
233 }
234 return section;
235 }
236
237 int
238 addmanpage(manpage **tree,ino_t inode,char *name)
239 {
240 manpage *mp;
241
242 while ((mp = *tree) != NULL) {
243 if (mp->mp_inode == inode)
244 return 1;
245 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right);
246 }
247
248 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL)
249 return 0;
250
251 mp->mp_left = NULL;
252 mp->mp_right = NULL;
253 mp->mp_inode = inode;
254 (void) strcpy(mp->mp_name, name);
255 *tree = mp;
256
257 return 1;
258 }
259
260 int
261 addwhatis(whatis **tree, char *data)
262 {
263 whatis *wi;
264 int result;
265
266 while (isspace(*data))
267 data++;
268
269 if (*data == '/') {
270 char *ptr;
271
272 ptr = ++data;
273 while ((*ptr != '\0') && !isspace(*ptr))
274 if (*ptr++ == '/')
275 data = ptr;
276 }
277
278 while ((wi = *tree) != NULL) {
279 result=strcmp(data, wi->wi_data);
280 if (result == 0) return 1;
281 tree = &((result < 0) ? wi->wi_left : wi->wi_right);
282 }
283
284 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL)
285 return 0;
286
287 wi->wi_left = NULL;
288 wi->wi_right = NULL;
289 wi->wi_data = data;
290 *tree = wi;
291
292 return 1;
293 }
294
295 void
296 catpreprocess(char *from)
297 {
298 char *to;
299
300 to = from;
301 while (isspace(*from)) from++;
302
303 while (*from != '\0')
304 if (isspace(*from)) {
305 while (isspace(*++from));
306 if (*from != '\0')
307 *to++ = ' ';
308 }
309 else if (*(from + 1) == '\10')
310 from += 2;
311 else
312 *to++ = *from++;
313
314 *to = '\0';
315 }
316
317 char *
318 replacestring(char *string, char *old, char *new)
319
320 {
321 char *ptr, *result;
322 int slength, olength, nlength, pos;
323
324 if (new == NULL)
325 return strdup(string);
326
327 ptr = strstr(string, old);
328 if (ptr == NULL)
329 return strdup(string);
330
331 slength = strlen(string);
332 olength = strlen(old);
333 nlength = strlen(new);
334 if ((result = malloc(slength - olength + nlength + 1)) == NULL)
335 return NULL;
336
337 pos = ptr - string;
338 (void) memcpy(result, string, pos);
339 (void) memcpy(&result[pos], new, nlength);
340 (void) strcpy(&result[pos + nlength], &string[pos + olength]);
341
342 return result;
343 }
344
345 char *
346 parsecatpage(gzFile *in)
347 {
348 char buffer[8192];
349 char *section, *ptr, *last;
350 int size;
351
352 do {
353 if (GetS(in, buffer, sizeof(buffer)) == NULL)
354 return NULL;
355 }
356 while (buffer[0] == '\n');
357
358 section = NULL;
359 if ((ptr = strchr(buffer, '(')) != NULL) {
360 if ((last = strchr(ptr + 1, ')')) !=NULL) {
361 int length;
362
363 length = last - ptr + 1;
364 if ((section = malloc(length + 5)) == NULL)
365 return NULL;
366
367 *section = ' ';
368 (void) memcpy(section + 1, ptr, length);
369 (void) strcpy(section + 1 + length, " - ");
370 }
371 }
372
373 for (;;) {
374 if (GetS(in, buffer, sizeof(buffer)) == NULL) {
375 free(section);
376 return NULL;
377 }
378 if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0)
379 break;
380 }
381
382 ptr = last = buffer;
383 size = sizeof(buffer) - 1;
384 while ((size > 0) && (GetS(in, ptr, size) != NULL)) {
385 int length;
386
387 catpreprocess(ptr);
388
389 length = strlen(ptr);
390 if (length == 0) {
391 *last = '\0';
392
393 ptr = replacestring(buffer, " - ", section);
394 free(section);
395 return ptr;
396 }
397 if ((length > 1) && (ptr[length - 1] == '-') &&
398 isalpha(ptr[length - 2]))
399 last = &ptr[--length];
400 else {
401 last = &ptr[length++];
402 *last = ' ';
403 }
404
405 ptr += length;
406 size -= length;
407 }
408
409 free(section);
410
411 return NULL;
412 }
413
414 int
415 manpreprocess(char *line)
416 {
417 char *from, *to;
418
419 to = from = line;
420 while (isspace(*from)) from++;
421 if (strncmp(from, ".\\\"", 3) == 0)
422 return 1;
423
424 while (*from != '\0')
425 if (isspace(*from)) {
426 while (isspace(*++from));
427 if ((*from != '\0') && (*from != ','))
428 *to++ = ' ';
429 }
430 else if (*from == '\\')
431 switch (*++from) {
432 case '\0':
433 case '-':
434 break;
435 case 'f':
436 case 's':
437 from++;
438 if ((*from=='+') || (*from=='-'))
439 from++;
440 while (isdigit(*from))
441 from++;
442 break;
443 default:
444 from++;
445 }
446 else
447 if (*from == '"')
448 from++;
449 else
450 *to++ = *from++;
451
452 *to = '\0';
453
454 if (strncasecmp(line, ".Xr", 3) == 0) {
455 char *sect;
456
457 from = line + 3;
458 if (isspace(*from))
459 from++;
460
461 if ((sect = findwhitespace(from)) != NULL) {
462 int length;
463
464 *sect++ = '\0';
465 length = strlen(from);
466 (void) memmove(line, from, length);
467 line[length++] = '(';
468 to = &line[length];
469 length = strlen(sect);
470 (void) memmove(to, sect, length);
471 (void) strcpy(&to[length], ")");
472 }
473 }
474
475 return 0;
476 }
477
478 char *
479 nroff(gzFile *in)
480 {
481 char tempname[MAXPATHLEN], buffer[65536], *data;
482 int tempfd, bytes, pipefd[2], status;
483 static int devnull = -1;
484 pid_t child;
485
486 if (gzrewind(in) < 0) {
487 perror(getprogname());
488 return NULL;
489 }
490
491 if ((devnull < 0) &&
492 ((devnull = open(_PATH_DEVNULL, O_WRONLY, 0)) < 0)) {
493 perror(getprogname());
494 return NULL;
495 }
496
497 (void)strcpy(tempname, _PATH_TMP "makewhatis.XXXXXX");
498 if ((tempfd = mkstemp(tempname)) < 0) {
499 perror(getprogname());
500 return NULL;
501 }
502
503 while ((bytes = gzread(in, buffer, sizeof(buffer))) > 0)
504 if (write(tempfd, buffer, bytes) != bytes) {
505 bytes = -1;
506 break;
507 }
508
509 if ((bytes < 0) ||
510 (lseek(tempfd, 0, SEEK_SET) < 0) ||
511 (pipe(pipefd) < 0)) {
512 perror(getprogname());
513 (void)close(tempfd);
514 (void)unlink(tempname);
515 return NULL;
516 }
517
518 switch (child = vfork()) {
519 case -1:
520 perror(getprogname());
521 (void)close(pipefd[1]);
522 (void)close(pipefd[0]);
523 (void)close(tempfd);
524 (void)unlink(tempname);
525 return NULL;
526 /* NOTREACHED */
527 case 0:
528 (void)close(pipefd[0]);
529 if (tempfd != STDIN_FILENO) {
530 (void)dup2(tempfd, STDIN_FILENO);
531 (void)close(tempfd);
532 }
533 if (pipefd[1] != STDOUT_FILENO) {
534 (void)dup2(pipefd[1], STDOUT_FILENO);
535 (void)close(pipefd[1]);
536 }
537 if (devnull != STDERR_FILENO) {
538 (void)dup2(devnull, STDERR_FILENO);
539 (void)close(devnull);
540 }
541 (void)execlp("nroff", "nroff", "-S", "-man", NULL);
542 _exit(EXIT_FAILURE);
543 default:
544 (void)close(pipefd[1]);
545 (void)close(tempfd);
546 /* NOTREACHED */
547 }
548
549 if ((in = gzdopen(pipefd[0], "r")) == NULL) {
550 if (errno == 0)
551 errno = ENOMEM;
552 perror(getprogname());
553 (void)close(pipefd[0]);
554 (void)kill(child, SIGTERM);
555 while (waitpid(child, NULL, 0) != child);
556 (void)unlink(tempname);
557 return NULL;
558 }
559
560 data = parsecatpage(in);
561 while (gzread(in, buffer, sizeof(buffer)) > 0);
562 (void)gzclose(in);
563
564 while (waitpid(child, &status, 0) != child);
565 if ((data != NULL) &&
566 !(WIFEXITED(status) && (WEXITSTATUS(status) == 0))) {
567 free(data);
568 data = NULL;
569 }
570
571 (void)unlink(tempname);
572
573 return data;
574 }
575
576 char *
577 parsemanpage(gzFile *in, int defaultsection)
578 {
579 char *section, buffer[8192], *ptr;
580
581 section = NULL;
582 do {
583 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
584 free(section);
585 return NULL;
586 }
587 if (manpreprocess(buffer))
588 continue;
589 if (strncasecmp(buffer, ".Dt", 3) == 0) {
590 char *end;
591
592 ptr = &buffer[3];
593 if (isspace(*ptr))
594 ptr++;
595 if ((ptr = findwhitespace(ptr)) == NULL)
596 continue;
597
598 if ((end = findwhitespace(++ptr)) != NULL)
599 *end = '\0';
600
601 free(section);
602 section = createsectionstring(ptr);
603 }
604 else if (strncasecmp(buffer, ".TH", 3) == 0) {
605 ptr = &buffer[3];
606 while (isspace(*ptr))
607 ptr++;
608 if ((ptr = findwhitespace(ptr)) != NULL) {
609 char *next;
610
611 while (isspace(*ptr))
612 ptr++;
613 if ((next = findwhitespace(ptr)) != NULL)
614 *next = '\0';
615 free(section);
616 section = createsectionstring(ptr);
617 }
618 }
619 else if (strncasecmp(buffer, ".Ds", 3) == 0) {
620 free(section);
621 return NULL;
622 }
623 } while (strncasecmp(buffer, ".Sh NAME", 8) != 0);
624
625 do {
626 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
627 free(section);
628 return NULL;
629 }
630 } while (manpreprocess(buffer));
631
632 if (strncasecmp(buffer, ".Nm", 3) == 0) {
633 int length, offset;
634
635 ptr = &buffer[3];
636 while (isspace(*ptr))
637 ptr++;
638
639 length = strlen(ptr);
640 if ((length > 1) && (ptr[length - 1] == ',') &&
641 isspace(ptr[length - 2])) {
642 ptr[--length] = '\0';
643 ptr[length - 1] = ',';
644 }
645 (void) memmove(buffer, ptr, length + 1);
646
647 offset = length + 3;
648 ptr = &buffer[offset];
649 for (;;) {
650 int more;
651
652 if ((sizeof(buffer) == offset) ||
653 (GetS(in, ptr, sizeof(buffer) - offset)
654 == NULL)) {
655 free(section);
656 return NULL;
657 }
658 if (manpreprocess(ptr))
659 continue;
660
661 if (strncasecmp(ptr, ".Nm", 3) != 0) break;
662
663 ptr += 3;
664 if (isspace(*ptr))
665 ptr++;
666
667 buffer[length++] = ' ';
668 more = strlen(ptr);
669 if ((more > 1) && (ptr[more - 1] == ',') &&
670 isspace(ptr[more - 2])) {
671 ptr[--more] = '\0';
672 ptr[more - 1] = ',';
673 }
674
675 (void) memmove(&buffer[length], ptr, more + 1);
676 length += more;
677 offset = length + 3;
678
679 ptr = &buffer[offset];
680 }
681
682 if (strncasecmp(ptr, ".Nd", 3) == 0) {
683 (void) strcpy(&buffer[length], " -");
684
685 while (strncasecmp(ptr, ".Sh", 3) != 0) {
686 int more;
687
688 if (*ptr == '.') {
689 char *space;
690
691 space = findwhitespace(ptr);
692 if (space == NULL)
693 ptr = "";
694 else {
695 space++;
696 (void) strmove(ptr, space);
697 }
698 }
699
700 if (*ptr != '\0') {
701 buffer[offset - 1] = ' ';
702 more = strlen(ptr) + 1;
703 offset += more;
704 }
705 ptr = &buffer[offset];
706 if ((sizeof(buffer) == offset) ||
707 (GetS(in, ptr, sizeof(buffer) - offset)
708 == NULL)) {
709 free(section);
710 return NULL;
711 }
712 if (manpreprocess(ptr))
713 *ptr = '\0';
714 }
715 }
716 }
717 else {
718 int offset;
719
720 if (*buffer == '.') {
721 char *space;
722
723 if ((space = findwhitespace(&buffer[1])) == NULL) {
724 free(section);
725 return NULL;
726 }
727 space++;
728 (void) strmove(buffer, space);
729 }
730
731 offset = strlen(buffer) + 1;
732 for (;;) {
733 int more;
734
735 ptr = &buffer[offset];
736 if ((sizeof(buffer) == offset) ||
737 (GetS(in, ptr, sizeof(buffer) - offset)
738 == NULL)) {
739 free(section);
740 return NULL;
741 }
742 if (manpreprocess(ptr) || (*ptr == '\0'))
743 continue;
744
745 if ((strncasecmp(ptr, ".Sh", 3) == 0) ||
746 (strncasecmp(ptr, ".Ss", 3) == 0))
747 break;
748
749 if (*ptr == '.') {
750 char *space;
751
752 if ((space = findwhitespace(ptr)) == NULL) {
753 continue;
754 }
755
756 space++;
757 (void) memmove(ptr, space, strlen(space) + 1);
758 }
759
760 buffer[offset - 1] = ' ';
761 more = strlen(ptr);
762 if ((more > 1) && (ptr[more - 1] == ',') &&
763 isspace(ptr[more - 2])) {
764 ptr[more - 1] = '\0';
765 ptr[more - 2] = ',';
766 }
767 else more++;
768 offset += more;
769 }
770 }
771
772 if (section == NULL) {
773 char sectionbuffer[24];
774
775 (void) sprintf(sectionbuffer, " (%c) - ",
776 sectionext[defaultsection]);
777 ptr = replacestring(buffer, " - ", sectionbuffer);
778 }
779 else {
780 ptr = replacestring(buffer, " - ", section);
781 free(section);
782 }
783 return ptr;
784 }
785
786 char *
787 getwhatisdata(char *name)
788 {
789 gzFile *in;
790 char *data;
791 int section;
792
793 if ((in = gzopen(name, "r")) == NULL) {
794 errx(EXIT_FAILURE, "%s: %s",
795 name,
796 strerror((errno == 0) ? ENOMEM : errno));
797 /* NOTREACHED */
798 }
799
800 section = manpagesection(name);
801 if (section == 0)
802 data = parsecatpage(in);
803 else {
804 data = parsemanpage(in, section);
805 if (data == NULL)
806 data = nroff(in);
807 }
808
809 (void) gzclose(in);
810 return data;
811 }
812
813 void
814 processmanpages(manpage **source, whatis **dest)
815 {
816 manpage *mp;
817
818 mp = *source;
819 *source = NULL;
820
821 while (mp != NULL) {
822 manpage *obsolete;
823 char *data;
824
825 if (mp->mp_left != NULL)
826 processmanpages(&mp->mp_left,dest);
827
828 if ((data = getwhatisdata(mp->mp_name)) != NULL) {
829 if (!addwhatis(dest,data))
830 err(EXIT_FAILURE, NULL);
831 }
832
833 obsolete = mp;
834 mp = mp->mp_right;
835 free(obsolete);
836 }
837 }
838
839 int
840 dumpwhatis (FILE *out, whatis *tree)
841 {
842 while (tree != NULL) {
843 if (tree->wi_left)
844 if (!dumpwhatis(out, tree->wi_left)) return 0;
845
846 if ((fputs(tree->wi_data, out) == EOF) ||
847 (fputc('\n', out) == EOF))
848 return 0;
849
850 tree = tree->wi_right;
851 }
852
853 return 1;
854 }
855