makewhatis.c revision 1.9 1 /* $NetBSD: makewhatis.c,v 1.9 2000/07/10 08:11:31 tron Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthias Scheler.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\
42 All rights reserved.\n");
43 #endif /* not lint */
44
45 #ifndef lint
46 __RCSID("$NetBSD: makewhatis.c,v 1.9 2000/07/10 08:11:31 tron Exp $");
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/param.h>
51 #include <sys/stat.h>
52 #include <sys/wait.h>
53
54 #include <ctype.h>
55 #include <err.h>
56 #include <errno.h>
57 #include <fts.h>
58 #include <locale.h>
59 #include <paths.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <string.h>
63 #include <unistd.h>
64 #include <zlib.h>
65
66 typedef struct manpagestruct manpage;
67 struct manpagestruct {
68 manpage *mp_left,*mp_right;
69 ino_t mp_inode;
70 char mp_name[1];
71 };
72
73 typedef struct whatisstruct whatis;
74 struct whatisstruct {
75 whatis *wi_left,*wi_right;
76 char *wi_data;
77 };
78
79 int main (int, char **);
80 char *findwhitespace(char *);
81 char *GetS(gzFile, char *, int);
82 int manpagesection (char *);
83 int addmanpage (manpage **, ino_t, char *);
84 int addwhatis (whatis **, char *);
85 char *replacestring (char *, char *, char *);
86 void catpreprocess (char *);
87 char *parsecatpage (gzFile *);
88 int manpreprocess (char *);
89 char *nroff (gzFile *);
90 char *parsemanpage (gzFile *, int);
91 char *getwhatisdata (char *);
92 void processmanpages (manpage **,whatis **);
93 int dumpwhatis (FILE *, whatis *);
94
95 char *default_manpath[] = {
96 "/usr/share/man",
97 NULL
98 };
99
100 char sectionext[] = "0123456789ln";
101 char whatisdb[] = "whatis.db";
102
103 extern char *__progname;
104
105 int
106 main(int argc,char **argv)
107 {
108 char **manpath;
109 FTS *fts;
110 FTSENT *fe;
111 manpage *source;
112 whatis *dest;
113 FILE *out;
114
115 (void)setlocale(LC_ALL, "");
116
117 manpath = (argc < 2) ? default_manpath : &argv[1];
118
119 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) {
120 perror(__progname);
121 return EXIT_FAILURE;
122 }
123
124 source = NULL;
125 while ((fe = fts_read(fts)) != NULL) {
126 switch (fe->fts_info) {
127 case FTS_F:
128 if (manpagesection(fe->fts_path) >= 0)
129 if (!addmanpage(&source,
130 fe->fts_statp->st_ino,
131 fe->fts_path))
132 err(EXIT_FAILURE, NULL);
133 case FTS_D:
134 case FTS_DC:
135 case FTS_DEFAULT:
136 case FTS_DP:
137 case FTS_SLNONE:
138 break;
139 default:
140 errx(EXIT_FAILURE, "%s: %s", fe->fts_path,
141 strerror(fe->fts_errno));
142
143 }
144 }
145
146 (void)fts_close(fts);
147
148 dest = NULL;
149 processmanpages(&source, &dest);
150
151 if (chdir(manpath[0]) < 0)
152 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno));
153
154 if ((out = fopen(whatisdb, "w")) == NULL)
155 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
156
157 if (!(dumpwhatis(out, dest) ||
158 (fclose(out) < 0)) ||
159 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0))
160 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
161
162 return EXIT_SUCCESS;
163 }
164
165 char
166 *findwhitespace(char *str)
167
168 {
169 while (!isspace(*str))
170 if (*str++ == '\0') {
171 str = NULL;
172 break;
173 }
174
175 return str;
176 }
177
178 char
179 *GetS(gzFile in, char *buffer, int length)
180
181 {
182 char *ptr;
183
184 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0'))
185 ptr = NULL;
186
187 return ptr;
188 }
189
190 int
191 manpagesection(char *name)
192 {
193 char *ptr;
194
195 if ((ptr = strrchr(name, '/')) != NULL)
196 ptr++;
197 else
198 ptr = name;
199
200 while ((ptr = strchr(ptr, '.')) != NULL) {
201 int section;
202
203 ptr++;
204 section=0;
205 while (sectionext[section] != '\0')
206 if (sectionext[section] == *ptr)
207 return section;
208 else
209 section++;
210 }
211
212 return -1;
213 }
214
215 int
216 addmanpage(manpage **tree,ino_t inode,char *name)
217 {
218 manpage *mp;
219
220 while ((mp = *tree) != NULL) {
221 if (mp->mp_inode == inode)
222 return 1;
223 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right);
224 }
225
226 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL)
227 return 0;
228
229 mp->mp_left = NULL;
230 mp->mp_right = NULL;
231 mp->mp_inode = inode;
232 (void) strcpy(mp->mp_name, name);
233 *tree = mp;
234
235 return 1;
236 }
237
238 int
239 addwhatis(whatis **tree, char *data)
240 {
241 whatis *wi;
242 int result;
243
244 while (isspace(*data))
245 data++;
246
247 if (*data == '/') {
248 char *ptr;
249
250 ptr = ++data;
251 while ((*ptr != '\0') && !isspace(*ptr))
252 if (*ptr++ == '/')
253 data = ptr;
254 }
255
256 while ((wi = *tree) != NULL) {
257 result=strcmp(data, wi->wi_data);
258 if (result == 0) return 1;
259 tree = &((result < 0) ? wi->wi_left : wi->wi_right);
260 }
261
262 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL)
263 return 0;
264
265 wi->wi_left = NULL;
266 wi->wi_right = NULL;
267 wi->wi_data = data;
268 *tree = wi;
269
270 return 1;
271 }
272
273 void
274 catpreprocess(char *from)
275 {
276 char *to;
277
278 to = from;
279 while (isspace(*from)) from++;
280
281 while (*from != '\0')
282 if (isspace(*from)) {
283 while (isspace(*++from));
284 if (*from != '\0')
285 *to++ = ' ';
286 }
287 else if (*(from + 1) == '\10')
288 from += 2;
289 else
290 *to++ = *from++;
291
292 *to = '\0';
293 }
294
295 char *
296 replacestring(char *string, char *old, char *new)
297
298 {
299 char *ptr, *result;
300 int slength, olength, nlength, pos;
301
302 if (new == NULL)
303 return strdup(string);
304
305 ptr = strstr(string, old);
306 if (ptr == NULL)
307 return strdup(string);
308
309 slength = strlen(string);
310 olength = strlen(old);
311 nlength = strlen(new);
312 if ((result = malloc(slength - olength + nlength + 1)) == NULL)
313 return NULL;
314
315 pos = ptr - string;
316 (void) memcpy(result, string, pos);
317 (void) memcpy(&result[pos], new, nlength);
318 (void) strcpy(&result[pos + nlength], &string[pos + olength]);
319
320 return result;
321 }
322
323 char *
324 parsecatpage(gzFile *in)
325 {
326 char buffer[8192];
327 char *section, *ptr, *last;
328 int size;
329
330 do {
331 if (GetS(in, buffer, sizeof(buffer)) == NULL)
332 return NULL;
333 }
334 while (buffer[0] == '\n');
335
336 section = NULL;
337 if ((ptr = strchr(buffer, '(')) != NULL) {
338 if ((last = strchr(ptr + 1, ')')) !=NULL) {
339 int length;
340
341 length = last - ptr + 1;
342 if ((section = malloc(length + 5)) == NULL)
343 return NULL;
344
345 *section = ' ';
346 (void) memcpy(section + 1, ptr, length);
347 (void) strcpy(section + 1 + length, " - ");
348 }
349 }
350
351 for (;;) {
352 if (GetS(in, buffer, sizeof(buffer)) == NULL) {
353 free(section);
354 return NULL;
355 }
356 if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0)
357 break;
358 }
359
360 ptr = last = buffer;
361 size = sizeof(buffer) - 1;
362 while ((size > 0) && (GetS(in, ptr, size) != NULL)) {
363 int length;
364
365 catpreprocess(ptr);
366
367 length = strlen(ptr);
368 if (length == 0) {
369 *last = '\0';
370
371 ptr = replacestring(buffer, " - ", section);
372 free(section);
373 return ptr;
374 }
375 if ((length > 1) && (ptr[length - 1] == '-') &&
376 isalpha(ptr[length - 2]))
377 last = &ptr[--length];
378 else {
379 last = &ptr[length++];
380 *last = ' ';
381 }
382
383 ptr += length;
384 size -= length;
385 }
386
387 free(section);
388
389 return NULL;
390 }
391
392 int
393 manpreprocess(char *line)
394 {
395 char *from, *to;
396
397 to = from = line;
398 while (isspace(*from)) from++;
399 if (strncmp(from, ".\\\"", 3) == 0)
400 return 1;
401
402 while (*from != '\0')
403 if (isspace(*from)) {
404 while (isspace(*++from));
405 if ((*from != '\0') && (*from != ','))
406 *to++ = ' ';
407 }
408 else if (*from == '\\')
409 switch (*++from) {
410 case '\0':
411 case '-':
412 break;
413 case 's':
414 if ((*from=='+') || (*from=='-'))
415 from++;
416 while (isdigit(*from))
417 from++;
418 break;
419 default:
420 from++;
421 }
422 else
423 if (*from == '"')
424 from++;
425 else
426 *to++ = *from++;
427
428 *to = '\0';
429
430 if (strncasecmp(line, ".Xr", 3) == 0) {
431 char *sect;
432
433 from = line + 3;
434 if (isspace(*from))
435 from++;
436
437 if ((sect = findwhitespace(from)) != NULL) {
438 int length;
439
440 *sect++ = '\0';
441 length = strlen(from);
442 (void) memmove(line, from, length);
443 line[length++] = '(';
444 to = &line[length];
445 length = strlen(sect);
446 (void) memmove(to, sect, length);
447 (void) strcpy(&to[length], ")");
448 }
449 }
450
451 return 0;
452 }
453
454 char *
455 nroff(gzFile *in)
456 {
457 char tempname[MAXPATHLEN], buffer[65536], *data;
458 int tempfd, bytes, pipefd[2], status;
459 pid_t child;
460
461 if (gzrewind(in) < 0) {
462 perror(__progname);
463 return NULL;
464 }
465
466 (void)strcpy(tempname, _PATH_TMP "makewhatis.XXXXXX");
467 if ((tempfd = mkstemp(tempname)) < 0) {
468 perror(__progname);
469 return NULL;
470 }
471
472 while ((bytes = gzread(in, buffer, sizeof(buffer))) > 0)
473 if (write(tempfd, buffer, bytes) != bytes) {
474 bytes = -1;
475 break;
476 }
477
478 if ((bytes < 0) ||
479 (lseek(tempfd, 0, SEEK_SET) < 0) ||
480 (pipe(pipefd) < 0)) {
481 perror(__progname);
482 (void)close(tempfd);
483 (void)unlink(tempname);
484 return NULL;
485 }
486
487 switch (child = vfork()) {
488 case -1:
489 perror(__progname);
490 (void)close(pipefd[1]);
491 (void)close(pipefd[0]);
492 (void)close(tempfd);
493 (void)unlink(tempname);
494 return NULL;
495 /* NOTREACHED */
496 case 0:
497 (void)close(pipefd[0]);
498 if (pipefd[1] != STDOUT_FILENO) {
499 (void)dup2(pipefd[1], STDOUT_FILENO);
500 (void)close(pipefd[1]);
501 }
502 (void)execlp("nroff", "nroff", "-mandoc", tempname, NULL);
503 _exit(EXIT_FAILURE);
504 default:
505 (void)close(pipefd[1]);
506 (void)close(tempfd);
507 /* NOTREACHED */
508 }
509
510 if ((in = gzdopen(pipefd[0], "r")) == NULL) {
511 if (errno == 0)
512 errno = ENOMEM;
513 perror(__progname);
514 (void)close(pipefd[0]); /* Child will be killed by SIGPIPE. */
515 (void)unlink(tempname);
516 return NULL;
517 }
518
519 data = parsecatpage(in);
520 while (gzread(in, buffer, sizeof(buffer)) > 0);
521 (void)gzclose(in);
522
523 while (waitpid(child, &status, 0) != child);
524 if ((data != NULL) &&
525 !(WIFEXITED(status) && (WEXITSTATUS(status) == 0))) {
526 free(data);
527 data = NULL;
528 }
529
530 (void)unlink(tempname);
531
532 return data;
533 }
534
535 char *
536 parsemanpage(gzFile *in, int defaultsection)
537 {
538 char *section, buffer[8192], *ptr;
539
540 section = NULL;
541 do {
542 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
543 free(section);
544 return NULL;
545 }
546 if (manpreprocess(buffer))
547 continue;
548 if (strncasecmp(buffer, ".Dt", 3) == 0) {
549 char *end;
550
551 ptr = &buffer[3];
552 if (isspace(*ptr))
553 ptr++;
554 if ((ptr = findwhitespace(ptr)) == NULL)
555 continue;
556
557 if ((end = findwhitespace(++ptr)) != NULL)
558 *end = '\0';
559
560 free(section);
561 if ((section = malloc(strlen(ptr) + 7)) != NULL) {
562 section[0] = ' ';
563 section[1] = '(';
564 (void) strcpy(§ion[2], ptr);
565 (void) strcat(§ion[2], ") - ");
566 }
567 }
568 else if (strncasecmp(buffer, ".Ds", 3) == 0)
569 return nroff(in);
570 } while ((strncasecmp(buffer, ".Sh NAME", 8) != 0));
571
572 do {
573 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
574 free(section);
575 return NULL;
576 }
577 } while (manpreprocess(buffer));
578
579 if (strncasecmp(buffer, ".Nm", 3) == 0) {
580 int length, offset;
581
582 ptr = &buffer[3];
583 while (isspace(*ptr))
584 ptr++;
585
586 length = strlen(ptr);
587 if ((length > 1) && (ptr[length - 1] == ',') &&
588 isspace(ptr[length - 2])) {
589 ptr[--length] = '\0';
590 ptr[length - 1] = ',';
591 }
592 (void) memmove(buffer, ptr, length + 1);
593
594 offset = length + 3;
595 ptr = &buffer[offset];
596 for (;;) {
597 int more;
598
599 if ((sizeof(buffer) == offset) ||
600 (GetS(in, ptr, sizeof(buffer) - offset)
601 == NULL)) {
602 free(section);
603 return NULL;
604 }
605 if (manpreprocess(ptr))
606 continue;
607
608 if (strncasecmp(ptr, ".Nm", 3) != 0) break;
609
610 ptr += 3;
611 if (isspace(*ptr))
612 ptr++;
613
614 buffer[length++] = ' ';
615 more = strlen(ptr);
616 if ((more > 1) && (ptr[more - 1] == ',') &&
617 isspace(ptr[more - 2])) {
618 ptr[--more] = '\0';
619 ptr[more - 1] = ',';
620 }
621
622 (void) memmove(&buffer[length], ptr, more + 1);
623 length += more;
624 offset = length + 3;
625
626 ptr = &buffer[offset];
627 }
628
629 if (strncasecmp(ptr, ".Nd", 3) == 0) {
630 (void) strcpy(&buffer[length], " -");
631
632 while (strncasecmp(ptr, ".Sh", 3) != 0) {
633 int more;
634
635 if (*ptr == '.') {
636 char *space;
637
638 if ((space = findwhitespace(ptr)) == NULL)
639 ptr = "";
640 else {
641 space++;
642 (void) memmove(ptr, space,
643 strlen(space) + 1);
644 }
645 }
646
647 if (*ptr != '\0') {
648 buffer[offset - 1] = ' ';
649 more = strlen(ptr) + 1;
650 offset += more;
651 }
652 ptr = &buffer[offset];
653 if ((sizeof(buffer) == offset) ||
654 (GetS(in, ptr, sizeof(buffer) - offset)
655 == NULL)) {
656 free(section);
657 return NULL;
658 }
659 if (manpreprocess(ptr))
660 *ptr = '\0';
661 }
662 }
663 }
664 else {
665 int offset;
666
667 if (*buffer == '.') {
668 char *space;
669
670 if ((space = findwhitespace(buffer)) == NULL) {
671 free(section);
672 return NULL;
673 }
674 space++;
675 (void) memmove(buffer, space, strlen(space) + 1);
676 }
677
678 offset = strlen(buffer) + 1;
679 for (;;) {
680 int more;
681
682 ptr = &buffer[offset];
683 if ((sizeof(buffer) == offset) ||
684 (GetS(in, ptr, sizeof(buffer) - offset)
685 == NULL)) {
686 free(section);
687 return NULL;
688 }
689 if (manpreprocess(ptr) || (*ptr == '\0'))
690 continue;
691
692 if ((strncasecmp(ptr, ".Sh", 3) == 0) ||
693 (strncasecmp(ptr, ".Ss", 3) == 0))
694 break;
695
696 if (*ptr == '.') {
697 char *space;
698
699 if ((space = findwhitespace(ptr)) == NULL) {
700 continue;
701 }
702
703 space++;
704 (void) memmove(ptr, space, strlen(space) + 1);
705 }
706
707 buffer[offset - 1] = ' ';
708 more = strlen(ptr);
709 if ((more > 1) && (ptr[more - 1] == ',') &&
710 isspace(ptr[more - 2])) {
711 ptr[more - 1] = '\0';
712 ptr[more - 2] = ',';
713 }
714 else more++;
715 offset += more;
716 }
717 }
718
719 if (section == NULL) {
720 char sectionbuffer[24];
721
722 (void) sprintf(sectionbuffer, " (%c) - ",
723 sectionext[defaultsection]);
724 ptr = replacestring(buffer, " - ", sectionbuffer);
725 }
726 else {
727 ptr = replacestring(buffer, " - ", section);
728 free(section);
729 }
730 return ptr;
731 }
732
733 char *
734 getwhatisdata(char *name)
735 {
736 gzFile *in;
737 char *data;
738 int section;
739
740 if ((in = gzopen(name, "r")) == NULL) {
741 errx(EXIT_FAILURE, "%s: %s",
742 name,
743 strerror((errno == 0) ? ENOMEM : errno));
744 /* NOTREACHED */
745 }
746
747 section = manpagesection(name);
748 data = (section == 0) ? parsecatpage(in) : parsemanpage(in, section);
749
750 (void) gzclose(in);
751 return data;
752 }
753
754 void
755 processmanpages(manpage **source, whatis **dest)
756 {
757 manpage *mp;
758
759 mp = *source;
760 *source = NULL;
761
762 while (mp != NULL) {
763 manpage *obsolete;
764 char *data;
765
766 if (mp->mp_left != NULL)
767 processmanpages(&mp->mp_left,dest);
768
769 if ((data = getwhatisdata(mp->mp_name)) != NULL) {
770 if (!addwhatis(dest,data))
771 err(EXIT_FAILURE, NULL);
772 }
773
774 obsolete = mp;
775 mp = mp->mp_right;
776 free(obsolete);
777 }
778 }
779
780 int
781 dumpwhatis (FILE *out, whatis *tree)
782 {
783 while (tree != NULL) {
784 if (tree->wi_left)
785 if (!dumpwhatis(out, tree->wi_left)) return 0;
786
787 if ((fputs(tree->wi_data, out) == EOF) ||
788 (fputc('\n', out) == EOF))
789 return 0;
790
791 tree = tree->wi_right;
792 }
793
794 return 1;
795 }
796