makewhatis.c revision 1.10 1 /* $NetBSD: makewhatis.c,v 1.10 2000/07/13 06:15:03 tron Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthias Scheler.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\
42 All rights reserved.\n");
43 #endif /* not lint */
44
45 #ifndef lint
46 __RCSID("$NetBSD: makewhatis.c,v 1.10 2000/07/13 06:15:03 tron Exp $");
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/param.h>
51 #include <sys/stat.h>
52 #include <sys/wait.h>
53
54 #include <ctype.h>
55 #include <err.h>
56 #include <errno.h>
57 #include <fcntl.h>
58 #include <fts.h>
59 #include <locale.h>
60 #include <paths.h>
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <string.h>
64 #include <unistd.h>
65 #include <zlib.h>
66
67 typedef struct manpagestruct manpage;
68 struct manpagestruct {
69 manpage *mp_left,*mp_right;
70 ino_t mp_inode;
71 char mp_name[1];
72 };
73
74 typedef struct whatisstruct whatis;
75 struct whatisstruct {
76 whatis *wi_left,*wi_right;
77 char *wi_data;
78 };
79
80 int main (int, char **);
81 char *findwhitespace(char *);
82 char *GetS(gzFile, char *, int);
83 int manpagesection (char *);
84 int addmanpage (manpage **, ino_t, char *);
85 int addwhatis (whatis **, char *);
86 char *replacestring (char *, char *, char *);
87 void catpreprocess (char *);
88 char *parsecatpage (gzFile *);
89 int manpreprocess (char *);
90 char *nroff (gzFile *);
91 char *parsemanpage (gzFile *, int);
92 char *getwhatisdata (char *);
93 void processmanpages (manpage **,whatis **);
94 int dumpwhatis (FILE *, whatis *);
95
96 char *default_manpath[] = {
97 "/usr/share/man",
98 NULL
99 };
100
101 char sectionext[] = "0123456789ln";
102 char whatisdb[] = "whatis.db";
103
104 extern char *__progname;
105
106 int
107 main(int argc,char **argv)
108 {
109 char **manpath;
110 FTS *fts;
111 FTSENT *fe;
112 manpage *source;
113 whatis *dest;
114 FILE *out;
115
116 (void)setlocale(LC_ALL, "");
117
118 manpath = (argc < 2) ? default_manpath : &argv[1];
119
120 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) {
121 perror(__progname);
122 return EXIT_FAILURE;
123 }
124
125 source = NULL;
126 while ((fe = fts_read(fts)) != NULL) {
127 switch (fe->fts_info) {
128 case FTS_F:
129 if (manpagesection(fe->fts_path) >= 0)
130 if (!addmanpage(&source,
131 fe->fts_statp->st_ino,
132 fe->fts_path))
133 err(EXIT_FAILURE, NULL);
134 case FTS_D:
135 case FTS_DC:
136 case FTS_DEFAULT:
137 case FTS_DP:
138 case FTS_SLNONE:
139 break;
140 default:
141 errx(EXIT_FAILURE, "%s: %s", fe->fts_path,
142 strerror(fe->fts_errno));
143
144 }
145 }
146
147 (void)fts_close(fts);
148
149 dest = NULL;
150 processmanpages(&source, &dest);
151
152 if (chdir(manpath[0]) < 0)
153 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno));
154
155 if ((out = fopen(whatisdb, "w")) == NULL)
156 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
157
158 if (!(dumpwhatis(out, dest) ||
159 (fclose(out) < 0)) ||
160 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0))
161 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
162
163 return EXIT_SUCCESS;
164 }
165
166 char
167 *findwhitespace(char *str)
168
169 {
170 while (!isspace(*str))
171 if (*str++ == '\0') {
172 str = NULL;
173 break;
174 }
175
176 return str;
177 }
178
179 char
180 *GetS(gzFile in, char *buffer, int length)
181
182 {
183 char *ptr;
184
185 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0'))
186 ptr = NULL;
187
188 return ptr;
189 }
190
191 int
192 manpagesection(char *name)
193 {
194 char *ptr;
195
196 if ((ptr = strrchr(name, '/')) != NULL)
197 ptr++;
198 else
199 ptr = name;
200
201 while ((ptr = strchr(ptr, '.')) != NULL) {
202 int section;
203
204 ptr++;
205 section=0;
206 while (sectionext[section] != '\0')
207 if (sectionext[section] == *ptr)
208 return section;
209 else
210 section++;
211 }
212
213 return -1;
214 }
215
216 int
217 addmanpage(manpage **tree,ino_t inode,char *name)
218 {
219 manpage *mp;
220
221 while ((mp = *tree) != NULL) {
222 if (mp->mp_inode == inode)
223 return 1;
224 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right);
225 }
226
227 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL)
228 return 0;
229
230 mp->mp_left = NULL;
231 mp->mp_right = NULL;
232 mp->mp_inode = inode;
233 (void) strcpy(mp->mp_name, name);
234 *tree = mp;
235
236 return 1;
237 }
238
239 int
240 addwhatis(whatis **tree, char *data)
241 {
242 whatis *wi;
243 int result;
244
245 while (isspace(*data))
246 data++;
247
248 if (*data == '/') {
249 char *ptr;
250
251 ptr = ++data;
252 while ((*ptr != '\0') && !isspace(*ptr))
253 if (*ptr++ == '/')
254 data = ptr;
255 }
256
257 while ((wi = *tree) != NULL) {
258 result=strcmp(data, wi->wi_data);
259 if (result == 0) return 1;
260 tree = &((result < 0) ? wi->wi_left : wi->wi_right);
261 }
262
263 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL)
264 return 0;
265
266 wi->wi_left = NULL;
267 wi->wi_right = NULL;
268 wi->wi_data = data;
269 *tree = wi;
270
271 return 1;
272 }
273
274 void
275 catpreprocess(char *from)
276 {
277 char *to;
278
279 to = from;
280 while (isspace(*from)) from++;
281
282 while (*from != '\0')
283 if (isspace(*from)) {
284 while (isspace(*++from));
285 if (*from != '\0')
286 *to++ = ' ';
287 }
288 else if (*(from + 1) == '\10')
289 from += 2;
290 else
291 *to++ = *from++;
292
293 *to = '\0';
294 }
295
296 char *
297 replacestring(char *string, char *old, char *new)
298
299 {
300 char *ptr, *result;
301 int slength, olength, nlength, pos;
302
303 if (new == NULL)
304 return strdup(string);
305
306 ptr = strstr(string, old);
307 if (ptr == NULL)
308 return strdup(string);
309
310 slength = strlen(string);
311 olength = strlen(old);
312 nlength = strlen(new);
313 if ((result = malloc(slength - olength + nlength + 1)) == NULL)
314 return NULL;
315
316 pos = ptr - string;
317 (void) memcpy(result, string, pos);
318 (void) memcpy(&result[pos], new, nlength);
319 (void) strcpy(&result[pos + nlength], &string[pos + olength]);
320
321 return result;
322 }
323
324 char *
325 parsecatpage(gzFile *in)
326 {
327 char buffer[8192];
328 char *section, *ptr, *last;
329 int size;
330
331 do {
332 if (GetS(in, buffer, sizeof(buffer)) == NULL)
333 return NULL;
334 }
335 while (buffer[0] == '\n');
336
337 section = NULL;
338 if ((ptr = strchr(buffer, '(')) != NULL) {
339 if ((last = strchr(ptr + 1, ')')) !=NULL) {
340 int length;
341
342 length = last - ptr + 1;
343 if ((section = malloc(length + 5)) == NULL)
344 return NULL;
345
346 *section = ' ';
347 (void) memcpy(section + 1, ptr, length);
348 (void) strcpy(section + 1 + length, " - ");
349 }
350 }
351
352 for (;;) {
353 if (GetS(in, buffer, sizeof(buffer)) == NULL) {
354 free(section);
355 return NULL;
356 }
357 if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0)
358 break;
359 }
360
361 ptr = last = buffer;
362 size = sizeof(buffer) - 1;
363 while ((size > 0) && (GetS(in, ptr, size) != NULL)) {
364 int length;
365
366 catpreprocess(ptr);
367
368 length = strlen(ptr);
369 if (length == 0) {
370 *last = '\0';
371
372 ptr = replacestring(buffer, " - ", section);
373 free(section);
374 return ptr;
375 }
376 if ((length > 1) && (ptr[length - 1] == '-') &&
377 isalpha(ptr[length - 2]))
378 last = &ptr[--length];
379 else {
380 last = &ptr[length++];
381 *last = ' ';
382 }
383
384 ptr += length;
385 size -= length;
386 }
387
388 free(section);
389
390 return NULL;
391 }
392
393 int
394 manpreprocess(char *line)
395 {
396 char *from, *to;
397
398 to = from = line;
399 while (isspace(*from)) from++;
400 if (strncmp(from, ".\\\"", 3) == 0)
401 return 1;
402
403 while (*from != '\0')
404 if (isspace(*from)) {
405 while (isspace(*++from));
406 if ((*from != '\0') && (*from != ','))
407 *to++ = ' ';
408 }
409 else if (*from == '\\')
410 switch (*++from) {
411 case '\0':
412 case '-':
413 break;
414 case 's':
415 if ((*from=='+') || (*from=='-'))
416 from++;
417 while (isdigit(*from))
418 from++;
419 break;
420 default:
421 from++;
422 }
423 else
424 if (*from == '"')
425 from++;
426 else
427 *to++ = *from++;
428
429 *to = '\0';
430
431 if (strncasecmp(line, ".Xr", 3) == 0) {
432 char *sect;
433
434 from = line + 3;
435 if (isspace(*from))
436 from++;
437
438 if ((sect = findwhitespace(from)) != NULL) {
439 int length;
440
441 *sect++ = '\0';
442 length = strlen(from);
443 (void) memmove(line, from, length);
444 line[length++] = '(';
445 to = &line[length];
446 length = strlen(sect);
447 (void) memmove(to, sect, length);
448 (void) strcpy(&to[length], ")");
449 }
450 }
451
452 return 0;
453 }
454
455 char *
456 nroff(gzFile *in)
457 {
458 char tempname[MAXPATHLEN], buffer[65536], *data;
459 int tempfd, bytes, pipefd[2], status;
460 static int devnull = -1;
461 pid_t child;
462
463 if (gzrewind(in) < 0) {
464 perror(__progname);
465 return NULL;
466 }
467
468 if ((devnull < 0) &&
469 ((devnull = open(_PATH_DEVNULL, O_WRONLY, 0)) < 0)) {
470 perror(__progname);
471 return NULL;
472 }
473
474 (void)strcpy(tempname, _PATH_TMP "makewhatis.XXXXXX");
475 if ((tempfd = mkstemp(tempname)) < 0) {
476 perror(__progname);
477 return NULL;
478 }
479
480 while ((bytes = gzread(in, buffer, sizeof(buffer))) > 0)
481 if (write(tempfd, buffer, bytes) != bytes) {
482 bytes = -1;
483 break;
484 }
485
486 if ((bytes < 0) ||
487 (lseek(tempfd, 0, SEEK_SET) < 0) ||
488 (pipe(pipefd) < 0)) {
489 perror(__progname);
490 (void)close(tempfd);
491 (void)unlink(tempname);
492 return NULL;
493 }
494
495 switch (child = vfork()) {
496 case -1:
497 perror(__progname);
498 (void)close(pipefd[1]);
499 (void)close(pipefd[0]);
500 (void)close(tempfd);
501 (void)unlink(tempname);
502 return NULL;
503 /* NOTREACHED */
504 case 0:
505 (void)close(pipefd[0]);
506 if (tempfd != STDIN_FILENO) {
507 (void)dup2(tempfd, STDIN_FILENO);
508 (void)close(tempfd);
509 }
510 if (pipefd[1] != STDOUT_FILENO) {
511 (void)dup2(pipefd[1], STDOUT_FILENO);
512 (void)close(pipefd[1]);
513 }
514 if (devnull != STDERR_FILENO) {
515 (void)dup2(devnull, STDERR_FILENO);
516 (void)close(devnull);
517 }
518 (void)execlp("nroff", "nroff", "-mandoc", NULL);
519 _exit(EXIT_FAILURE);
520 default:
521 (void)close(pipefd[1]);
522 (void)close(tempfd);
523 /* NOTREACHED */
524 }
525
526 if ((in = gzdopen(pipefd[0], "r")) == NULL) {
527 if (errno == 0)
528 errno = ENOMEM;
529 perror(__progname);
530 (void)close(pipefd[0]); /* Child will be killed by SIGPIPE. */
531 (void)unlink(tempname);
532 return NULL;
533 }
534
535 data = parsecatpage(in);
536 while (gzread(in, buffer, sizeof(buffer)) > 0);
537 (void)gzclose(in);
538
539 while (waitpid(child, &status, 0) != child);
540 if ((data != NULL) &&
541 !(WIFEXITED(status) && (WEXITSTATUS(status) == 0))) {
542 free(data);
543 data = NULL;
544 }
545
546 (void)unlink(tempname);
547
548 return data;
549 }
550
551 char *
552 parsemanpage(gzFile *in, int defaultsection)
553 {
554 char *section, buffer[8192], *ptr;
555
556 section = NULL;
557 do {
558 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
559 free(section);
560 return NULL;
561 }
562 if (manpreprocess(buffer))
563 continue;
564 if (strncasecmp(buffer, ".Dt", 3) == 0) {
565 char *end;
566
567 ptr = &buffer[3];
568 if (isspace(*ptr))
569 ptr++;
570 if ((ptr = findwhitespace(ptr)) == NULL)
571 continue;
572
573 if ((end = findwhitespace(++ptr)) != NULL)
574 *end = '\0';
575
576 free(section);
577 if ((section = malloc(strlen(ptr) + 7)) != NULL) {
578 section[0] = ' ';
579 section[1] = '(';
580 (void) strcpy(§ion[2], ptr);
581 (void) strcat(§ion[2], ") - ");
582 }
583 }
584 else if (strncasecmp(buffer, ".Ds", 3) == 0)
585 return nroff(in);
586 } while ((strncasecmp(buffer, ".Sh NAME", 8) != 0));
587
588 do {
589 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
590 free(section);
591 return NULL;
592 }
593 } while (manpreprocess(buffer));
594
595 if (strncasecmp(buffer, ".Nm", 3) == 0) {
596 int length, offset;
597
598 ptr = &buffer[3];
599 while (isspace(*ptr))
600 ptr++;
601
602 length = strlen(ptr);
603 if ((length > 1) && (ptr[length - 1] == ',') &&
604 isspace(ptr[length - 2])) {
605 ptr[--length] = '\0';
606 ptr[length - 1] = ',';
607 }
608 (void) memmove(buffer, ptr, length + 1);
609
610 offset = length + 3;
611 ptr = &buffer[offset];
612 for (;;) {
613 int more;
614
615 if ((sizeof(buffer) == offset) ||
616 (GetS(in, ptr, sizeof(buffer) - offset)
617 == NULL)) {
618 free(section);
619 return NULL;
620 }
621 if (manpreprocess(ptr))
622 continue;
623
624 if (strncasecmp(ptr, ".Nm", 3) != 0) break;
625
626 ptr += 3;
627 if (isspace(*ptr))
628 ptr++;
629
630 buffer[length++] = ' ';
631 more = strlen(ptr);
632 if ((more > 1) && (ptr[more - 1] == ',') &&
633 isspace(ptr[more - 2])) {
634 ptr[--more] = '\0';
635 ptr[more - 1] = ',';
636 }
637
638 (void) memmove(&buffer[length], ptr, more + 1);
639 length += more;
640 offset = length + 3;
641
642 ptr = &buffer[offset];
643 }
644
645 if (strncasecmp(ptr, ".Nd", 3) == 0) {
646 (void) strcpy(&buffer[length], " -");
647
648 while (strncasecmp(ptr, ".Sh", 3) != 0) {
649 int more;
650
651 if (*ptr == '.') {
652 char *space;
653
654 if ((space = findwhitespace(ptr)) == NULL)
655 ptr = "";
656 else {
657 space++;
658 (void) memmove(ptr, space,
659 strlen(space) + 1);
660 }
661 }
662
663 if (*ptr != '\0') {
664 buffer[offset - 1] = ' ';
665 more = strlen(ptr) + 1;
666 offset += more;
667 }
668 ptr = &buffer[offset];
669 if ((sizeof(buffer) == offset) ||
670 (GetS(in, ptr, sizeof(buffer) - offset)
671 == NULL)) {
672 free(section);
673 return NULL;
674 }
675 if (manpreprocess(ptr))
676 *ptr = '\0';
677 }
678 }
679 }
680 else {
681 int offset;
682
683 if (*buffer == '.') {
684 char *space;
685
686 if ((space = findwhitespace(buffer)) == NULL) {
687 free(section);
688 return NULL;
689 }
690 space++;
691 (void) memmove(buffer, space, strlen(space) + 1);
692 }
693
694 offset = strlen(buffer) + 1;
695 for (;;) {
696 int more;
697
698 ptr = &buffer[offset];
699 if ((sizeof(buffer) == offset) ||
700 (GetS(in, ptr, sizeof(buffer) - offset)
701 == NULL)) {
702 free(section);
703 return NULL;
704 }
705 if (manpreprocess(ptr) || (*ptr == '\0'))
706 continue;
707
708 if ((strncasecmp(ptr, ".Sh", 3) == 0) ||
709 (strncasecmp(ptr, ".Ss", 3) == 0))
710 break;
711
712 if (*ptr == '.') {
713 char *space;
714
715 if ((space = findwhitespace(ptr)) == NULL) {
716 continue;
717 }
718
719 space++;
720 (void) memmove(ptr, space, strlen(space) + 1);
721 }
722
723 buffer[offset - 1] = ' ';
724 more = strlen(ptr);
725 if ((more > 1) && (ptr[more - 1] == ',') &&
726 isspace(ptr[more - 2])) {
727 ptr[more - 1] = '\0';
728 ptr[more - 2] = ',';
729 }
730 else more++;
731 offset += more;
732 }
733 }
734
735 if (section == NULL) {
736 char sectionbuffer[24];
737
738 (void) sprintf(sectionbuffer, " (%c) - ",
739 sectionext[defaultsection]);
740 ptr = replacestring(buffer, " - ", sectionbuffer);
741 }
742 else {
743 ptr = replacestring(buffer, " - ", section);
744 free(section);
745 }
746 return ptr;
747 }
748
749 char *
750 getwhatisdata(char *name)
751 {
752 gzFile *in;
753 char *data;
754 int section;
755
756 if ((in = gzopen(name, "r")) == NULL) {
757 errx(EXIT_FAILURE, "%s: %s",
758 name,
759 strerror((errno == 0) ? ENOMEM : errno));
760 /* NOTREACHED */
761 }
762
763 section = manpagesection(name);
764 data = (section == 0) ? parsecatpage(in) : parsemanpage(in, section);
765
766 (void) gzclose(in);
767 return data;
768 }
769
770 void
771 processmanpages(manpage **source, whatis **dest)
772 {
773 manpage *mp;
774
775 mp = *source;
776 *source = NULL;
777
778 while (mp != NULL) {
779 manpage *obsolete;
780 char *data;
781
782 if (mp->mp_left != NULL)
783 processmanpages(&mp->mp_left,dest);
784
785 if ((data = getwhatisdata(mp->mp_name)) != NULL) {
786 if (!addwhatis(dest,data))
787 err(EXIT_FAILURE, NULL);
788 }
789
790 obsolete = mp;
791 mp = mp->mp_right;
792 free(obsolete);
793 }
794 }
795
796 int
797 dumpwhatis (FILE *out, whatis *tree)
798 {
799 while (tree != NULL) {
800 if (tree->wi_left)
801 if (!dumpwhatis(out, tree->wi_left)) return 0;
802
803 if ((fputs(tree->wi_data, out) == EOF) ||
804 (fputc('\n', out) == EOF))
805 return 0;
806
807 tree = tree->wi_right;
808 }
809
810 return 1;
811 }
812