makewhatis.c revision 1.11 1 /* $NetBSD: makewhatis.c,v 1.11 2000/07/13 06:29:43 tron Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthias Scheler.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\
42 All rights reserved.\n");
43 #endif /* not lint */
44
45 #ifndef lint
46 __RCSID("$NetBSD: makewhatis.c,v 1.11 2000/07/13 06:29:43 tron Exp $");
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/param.h>
51 #include <sys/stat.h>
52 #include <sys/wait.h>
53
54 #include <ctype.h>
55 #include <err.h>
56 #include <errno.h>
57 #include <fcntl.h>
58 #include <fts.h>
59 #include <locale.h>
60 #include <paths.h>
61 #include <signal.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <string.h>
65 #include <unistd.h>
66 #include <zlib.h>
67
68 typedef struct manpagestruct manpage;
69 struct manpagestruct {
70 manpage *mp_left,*mp_right;
71 ino_t mp_inode;
72 char mp_name[1];
73 };
74
75 typedef struct whatisstruct whatis;
76 struct whatisstruct {
77 whatis *wi_left,*wi_right;
78 char *wi_data;
79 };
80
81 int main (int, char **);
82 char *findwhitespace(char *);
83 char *GetS(gzFile, char *, int);
84 int manpagesection (char *);
85 int addmanpage (manpage **, ino_t, char *);
86 int addwhatis (whatis **, char *);
87 char *replacestring (char *, char *, char *);
88 void catpreprocess (char *);
89 char *parsecatpage (gzFile *);
90 int manpreprocess (char *);
91 char *nroff (gzFile *);
92 char *parsemanpage (gzFile *, int);
93 char *getwhatisdata (char *);
94 void processmanpages (manpage **,whatis **);
95 int dumpwhatis (FILE *, whatis *);
96
97 char *default_manpath[] = {
98 "/usr/share/man",
99 NULL
100 };
101
102 char sectionext[] = "0123456789ln";
103 char whatisdb[] = "whatis.db";
104
105 extern char *__progname;
106
107 int
108 main(int argc,char **argv)
109 {
110 char **manpath;
111 FTS *fts;
112 FTSENT *fe;
113 manpage *source;
114 whatis *dest;
115 FILE *out;
116
117 (void)setlocale(LC_ALL, "");
118
119 manpath = (argc < 2) ? default_manpath : &argv[1];
120
121 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) {
122 perror(__progname);
123 return EXIT_FAILURE;
124 }
125
126 source = NULL;
127 while ((fe = fts_read(fts)) != NULL) {
128 switch (fe->fts_info) {
129 case FTS_F:
130 if (manpagesection(fe->fts_path) >= 0)
131 if (!addmanpage(&source,
132 fe->fts_statp->st_ino,
133 fe->fts_path))
134 err(EXIT_FAILURE, NULL);
135 case FTS_D:
136 case FTS_DC:
137 case FTS_DEFAULT:
138 case FTS_DP:
139 case FTS_SLNONE:
140 break;
141 default:
142 errx(EXIT_FAILURE, "%s: %s", fe->fts_path,
143 strerror(fe->fts_errno));
144
145 }
146 }
147
148 (void)fts_close(fts);
149
150 dest = NULL;
151 processmanpages(&source, &dest);
152
153 if (chdir(manpath[0]) < 0)
154 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno));
155
156 if ((out = fopen(whatisdb, "w")) == NULL)
157 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
158
159 if (!(dumpwhatis(out, dest) ||
160 (fclose(out) < 0)) ||
161 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0))
162 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
163
164 return EXIT_SUCCESS;
165 }
166
167 char
168 *findwhitespace(char *str)
169
170 {
171 while (!isspace(*str))
172 if (*str++ == '\0') {
173 str = NULL;
174 break;
175 }
176
177 return str;
178 }
179
180 char
181 *GetS(gzFile in, char *buffer, int length)
182
183 {
184 char *ptr;
185
186 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0'))
187 ptr = NULL;
188
189 return ptr;
190 }
191
192 int
193 manpagesection(char *name)
194 {
195 char *ptr;
196
197 if ((ptr = strrchr(name, '/')) != NULL)
198 ptr++;
199 else
200 ptr = name;
201
202 while ((ptr = strchr(ptr, '.')) != NULL) {
203 int section;
204
205 ptr++;
206 section=0;
207 while (sectionext[section] != '\0')
208 if (sectionext[section] == *ptr)
209 return section;
210 else
211 section++;
212 }
213
214 return -1;
215 }
216
217 int
218 addmanpage(manpage **tree,ino_t inode,char *name)
219 {
220 manpage *mp;
221
222 while ((mp = *tree) != NULL) {
223 if (mp->mp_inode == inode)
224 return 1;
225 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right);
226 }
227
228 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL)
229 return 0;
230
231 mp->mp_left = NULL;
232 mp->mp_right = NULL;
233 mp->mp_inode = inode;
234 (void) strcpy(mp->mp_name, name);
235 *tree = mp;
236
237 return 1;
238 }
239
240 int
241 addwhatis(whatis **tree, char *data)
242 {
243 whatis *wi;
244 int result;
245
246 while (isspace(*data))
247 data++;
248
249 if (*data == '/') {
250 char *ptr;
251
252 ptr = ++data;
253 while ((*ptr != '\0') && !isspace(*ptr))
254 if (*ptr++ == '/')
255 data = ptr;
256 }
257
258 while ((wi = *tree) != NULL) {
259 result=strcmp(data, wi->wi_data);
260 if (result == 0) return 1;
261 tree = &((result < 0) ? wi->wi_left : wi->wi_right);
262 }
263
264 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL)
265 return 0;
266
267 wi->wi_left = NULL;
268 wi->wi_right = NULL;
269 wi->wi_data = data;
270 *tree = wi;
271
272 return 1;
273 }
274
275 void
276 catpreprocess(char *from)
277 {
278 char *to;
279
280 to = from;
281 while (isspace(*from)) from++;
282
283 while (*from != '\0')
284 if (isspace(*from)) {
285 while (isspace(*++from));
286 if (*from != '\0')
287 *to++ = ' ';
288 }
289 else if (*(from + 1) == '\10')
290 from += 2;
291 else
292 *to++ = *from++;
293
294 *to = '\0';
295 }
296
297 char *
298 replacestring(char *string, char *old, char *new)
299
300 {
301 char *ptr, *result;
302 int slength, olength, nlength, pos;
303
304 if (new == NULL)
305 return strdup(string);
306
307 ptr = strstr(string, old);
308 if (ptr == NULL)
309 return strdup(string);
310
311 slength = strlen(string);
312 olength = strlen(old);
313 nlength = strlen(new);
314 if ((result = malloc(slength - olength + nlength + 1)) == NULL)
315 return NULL;
316
317 pos = ptr - string;
318 (void) memcpy(result, string, pos);
319 (void) memcpy(&result[pos], new, nlength);
320 (void) strcpy(&result[pos + nlength], &string[pos + olength]);
321
322 return result;
323 }
324
325 char *
326 parsecatpage(gzFile *in)
327 {
328 char buffer[8192];
329 char *section, *ptr, *last;
330 int size;
331
332 do {
333 if (GetS(in, buffer, sizeof(buffer)) == NULL)
334 return NULL;
335 }
336 while (buffer[0] == '\n');
337
338 section = NULL;
339 if ((ptr = strchr(buffer, '(')) != NULL) {
340 if ((last = strchr(ptr + 1, ')')) !=NULL) {
341 int length;
342
343 length = last - ptr + 1;
344 if ((section = malloc(length + 5)) == NULL)
345 return NULL;
346
347 *section = ' ';
348 (void) memcpy(section + 1, ptr, length);
349 (void) strcpy(section + 1 + length, " - ");
350 }
351 }
352
353 for (;;) {
354 if (GetS(in, buffer, sizeof(buffer)) == NULL) {
355 free(section);
356 return NULL;
357 }
358 if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0)
359 break;
360 }
361
362 ptr = last = buffer;
363 size = sizeof(buffer) - 1;
364 while ((size > 0) && (GetS(in, ptr, size) != NULL)) {
365 int length;
366
367 catpreprocess(ptr);
368
369 length = strlen(ptr);
370 if (length == 0) {
371 *last = '\0';
372
373 ptr = replacestring(buffer, " - ", section);
374 free(section);
375 return ptr;
376 }
377 if ((length > 1) && (ptr[length - 1] == '-') &&
378 isalpha(ptr[length - 2]))
379 last = &ptr[--length];
380 else {
381 last = &ptr[length++];
382 *last = ' ';
383 }
384
385 ptr += length;
386 size -= length;
387 }
388
389 free(section);
390
391 return NULL;
392 }
393
394 int
395 manpreprocess(char *line)
396 {
397 char *from, *to;
398
399 to = from = line;
400 while (isspace(*from)) from++;
401 if (strncmp(from, ".\\\"", 3) == 0)
402 return 1;
403
404 while (*from != '\0')
405 if (isspace(*from)) {
406 while (isspace(*++from));
407 if ((*from != '\0') && (*from != ','))
408 *to++ = ' ';
409 }
410 else if (*from == '\\')
411 switch (*++from) {
412 case '\0':
413 case '-':
414 break;
415 case 's':
416 if ((*from=='+') || (*from=='-'))
417 from++;
418 while (isdigit(*from))
419 from++;
420 break;
421 default:
422 from++;
423 }
424 else
425 if (*from == '"')
426 from++;
427 else
428 *to++ = *from++;
429
430 *to = '\0';
431
432 if (strncasecmp(line, ".Xr", 3) == 0) {
433 char *sect;
434
435 from = line + 3;
436 if (isspace(*from))
437 from++;
438
439 if ((sect = findwhitespace(from)) != NULL) {
440 int length;
441
442 *sect++ = '\0';
443 length = strlen(from);
444 (void) memmove(line, from, length);
445 line[length++] = '(';
446 to = &line[length];
447 length = strlen(sect);
448 (void) memmove(to, sect, length);
449 (void) strcpy(&to[length], ")");
450 }
451 }
452
453 return 0;
454 }
455
456 char *
457 nroff(gzFile *in)
458 {
459 char tempname[MAXPATHLEN], buffer[65536], *data;
460 int tempfd, bytes, pipefd[2], status;
461 static int devnull = -1;
462 pid_t child;
463
464 if (gzrewind(in) < 0) {
465 perror(__progname);
466 return NULL;
467 }
468
469 if ((devnull < 0) &&
470 ((devnull = open(_PATH_DEVNULL, O_WRONLY, 0)) < 0)) {
471 perror(__progname);
472 return NULL;
473 }
474
475 (void)strcpy(tempname, _PATH_TMP "makewhatis.XXXXXX");
476 if ((tempfd = mkstemp(tempname)) < 0) {
477 perror(__progname);
478 return NULL;
479 }
480
481 while ((bytes = gzread(in, buffer, sizeof(buffer))) > 0)
482 if (write(tempfd, buffer, bytes) != bytes) {
483 bytes = -1;
484 break;
485 }
486
487 if ((bytes < 0) ||
488 (lseek(tempfd, 0, SEEK_SET) < 0) ||
489 (pipe(pipefd) < 0)) {
490 perror(__progname);
491 (void)close(tempfd);
492 (void)unlink(tempname);
493 return NULL;
494 }
495
496 switch (child = vfork()) {
497 case -1:
498 perror(__progname);
499 (void)close(pipefd[1]);
500 (void)close(pipefd[0]);
501 (void)close(tempfd);
502 (void)unlink(tempname);
503 return NULL;
504 /* NOTREACHED */
505 case 0:
506 (void)close(pipefd[0]);
507 if (tempfd != STDIN_FILENO) {
508 (void)dup2(tempfd, STDIN_FILENO);
509 (void)close(tempfd);
510 }
511 if (pipefd[1] != STDOUT_FILENO) {
512 (void)dup2(pipefd[1], STDOUT_FILENO);
513 (void)close(pipefd[1]);
514 }
515 if (devnull != STDERR_FILENO) {
516 (void)dup2(devnull, STDERR_FILENO);
517 (void)close(devnull);
518 }
519 (void)execlp("nroff", "nroff", "-mandoc", NULL);
520 _exit(EXIT_FAILURE);
521 default:
522 (void)close(pipefd[1]);
523 (void)close(tempfd);
524 /* NOTREACHED */
525 }
526
527 if ((in = gzdopen(pipefd[0], "r")) == NULL) {
528 if (errno == 0)
529 errno = ENOMEM;
530 perror(__progname);
531 (void)close(pipefd[0]);
532 (void)kill(child, SIGTERM);
533 while (waitpid(child, NULL, 0) != child);
534 (void)unlink(tempname);
535 return NULL;
536 }
537
538 data = parsecatpage(in);
539 while (gzread(in, buffer, sizeof(buffer)) > 0);
540 (void)gzclose(in);
541
542 while (waitpid(child, &status, 0) != child);
543 if ((data != NULL) &&
544 !(WIFEXITED(status) && (WEXITSTATUS(status) == 0))) {
545 free(data);
546 data = NULL;
547 }
548
549 (void)unlink(tempname);
550
551 return data;
552 }
553
554 char *
555 parsemanpage(gzFile *in, int defaultsection)
556 {
557 char *section, buffer[8192], *ptr;
558
559 section = NULL;
560 do {
561 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
562 free(section);
563 return NULL;
564 }
565 if (manpreprocess(buffer))
566 continue;
567 if (strncasecmp(buffer, ".Dt", 3) == 0) {
568 char *end;
569
570 ptr = &buffer[3];
571 if (isspace(*ptr))
572 ptr++;
573 if ((ptr = findwhitespace(ptr)) == NULL)
574 continue;
575
576 if ((end = findwhitespace(++ptr)) != NULL)
577 *end = '\0';
578
579 free(section);
580 if ((section = malloc(strlen(ptr) + 7)) != NULL) {
581 section[0] = ' ';
582 section[1] = '(';
583 (void) strcpy(§ion[2], ptr);
584 (void) strcat(§ion[2], ") - ");
585 }
586 }
587 else if (strncasecmp(buffer, ".Ds", 3) == 0)
588 return nroff(in);
589 } while ((strncasecmp(buffer, ".Sh NAME", 8) != 0));
590
591 do {
592 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
593 free(section);
594 return NULL;
595 }
596 } while (manpreprocess(buffer));
597
598 if (strncasecmp(buffer, ".Nm", 3) == 0) {
599 int length, offset;
600
601 ptr = &buffer[3];
602 while (isspace(*ptr))
603 ptr++;
604
605 length = strlen(ptr);
606 if ((length > 1) && (ptr[length - 1] == ',') &&
607 isspace(ptr[length - 2])) {
608 ptr[--length] = '\0';
609 ptr[length - 1] = ',';
610 }
611 (void) memmove(buffer, ptr, length + 1);
612
613 offset = length + 3;
614 ptr = &buffer[offset];
615 for (;;) {
616 int more;
617
618 if ((sizeof(buffer) == offset) ||
619 (GetS(in, ptr, sizeof(buffer) - offset)
620 == NULL)) {
621 free(section);
622 return NULL;
623 }
624 if (manpreprocess(ptr))
625 continue;
626
627 if (strncasecmp(ptr, ".Nm", 3) != 0) break;
628
629 ptr += 3;
630 if (isspace(*ptr))
631 ptr++;
632
633 buffer[length++] = ' ';
634 more = strlen(ptr);
635 if ((more > 1) && (ptr[more - 1] == ',') &&
636 isspace(ptr[more - 2])) {
637 ptr[--more] = '\0';
638 ptr[more - 1] = ',';
639 }
640
641 (void) memmove(&buffer[length], ptr, more + 1);
642 length += more;
643 offset = length + 3;
644
645 ptr = &buffer[offset];
646 }
647
648 if (strncasecmp(ptr, ".Nd", 3) == 0) {
649 (void) strcpy(&buffer[length], " -");
650
651 while (strncasecmp(ptr, ".Sh", 3) != 0) {
652 int more;
653
654 if (*ptr == '.') {
655 char *space;
656
657 if ((space = findwhitespace(ptr)) == NULL)
658 ptr = "";
659 else {
660 space++;
661 (void) memmove(ptr, space,
662 strlen(space) + 1);
663 }
664 }
665
666 if (*ptr != '\0') {
667 buffer[offset - 1] = ' ';
668 more = strlen(ptr) + 1;
669 offset += more;
670 }
671 ptr = &buffer[offset];
672 if ((sizeof(buffer) == offset) ||
673 (GetS(in, ptr, sizeof(buffer) - offset)
674 == NULL)) {
675 free(section);
676 return NULL;
677 }
678 if (manpreprocess(ptr))
679 *ptr = '\0';
680 }
681 }
682 }
683 else {
684 int offset;
685
686 if (*buffer == '.') {
687 char *space;
688
689 if ((space = findwhitespace(buffer)) == NULL) {
690 free(section);
691 return NULL;
692 }
693 space++;
694 (void) memmove(buffer, space, strlen(space) + 1);
695 }
696
697 offset = strlen(buffer) + 1;
698 for (;;) {
699 int more;
700
701 ptr = &buffer[offset];
702 if ((sizeof(buffer) == offset) ||
703 (GetS(in, ptr, sizeof(buffer) - offset)
704 == NULL)) {
705 free(section);
706 return NULL;
707 }
708 if (manpreprocess(ptr) || (*ptr == '\0'))
709 continue;
710
711 if ((strncasecmp(ptr, ".Sh", 3) == 0) ||
712 (strncasecmp(ptr, ".Ss", 3) == 0))
713 break;
714
715 if (*ptr == '.') {
716 char *space;
717
718 if ((space = findwhitespace(ptr)) == NULL) {
719 continue;
720 }
721
722 space++;
723 (void) memmove(ptr, space, strlen(space) + 1);
724 }
725
726 buffer[offset - 1] = ' ';
727 more = strlen(ptr);
728 if ((more > 1) && (ptr[more - 1] == ',') &&
729 isspace(ptr[more - 2])) {
730 ptr[more - 1] = '\0';
731 ptr[more - 2] = ',';
732 }
733 else more++;
734 offset += more;
735 }
736 }
737
738 if (section == NULL) {
739 char sectionbuffer[24];
740
741 (void) sprintf(sectionbuffer, " (%c) - ",
742 sectionext[defaultsection]);
743 ptr = replacestring(buffer, " - ", sectionbuffer);
744 }
745 else {
746 ptr = replacestring(buffer, " - ", section);
747 free(section);
748 }
749 return ptr;
750 }
751
752 char *
753 getwhatisdata(char *name)
754 {
755 gzFile *in;
756 char *data;
757 int section;
758
759 if ((in = gzopen(name, "r")) == NULL) {
760 errx(EXIT_FAILURE, "%s: %s",
761 name,
762 strerror((errno == 0) ? ENOMEM : errno));
763 /* NOTREACHED */
764 }
765
766 section = manpagesection(name);
767 data = (section == 0) ? parsecatpage(in) : parsemanpage(in, section);
768
769 (void) gzclose(in);
770 return data;
771 }
772
773 void
774 processmanpages(manpage **source, whatis **dest)
775 {
776 manpage *mp;
777
778 mp = *source;
779 *source = NULL;
780
781 while (mp != NULL) {
782 manpage *obsolete;
783 char *data;
784
785 if (mp->mp_left != NULL)
786 processmanpages(&mp->mp_left,dest);
787
788 if ((data = getwhatisdata(mp->mp_name)) != NULL) {
789 if (!addwhatis(dest,data))
790 err(EXIT_FAILURE, NULL);
791 }
792
793 obsolete = mp;
794 mp = mp->mp_right;
795 free(obsolete);
796 }
797 }
798
799 int
800 dumpwhatis (FILE *out, whatis *tree)
801 {
802 while (tree != NULL) {
803 if (tree->wi_left)
804 if (!dumpwhatis(out, tree->wi_left)) return 0;
805
806 if ((fputs(tree->wi_data, out) == EOF) ||
807 (fputc('\n', out) == EOF))
808 return 0;
809
810 tree = tree->wi_right;
811 }
812
813 return 1;
814 }
815