makewhatis.c revision 1.13 1 /* $NetBSD: makewhatis.c,v 1.13 2001/02/19 22:46:14 cgd Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthias Scheler.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\
42 All rights reserved.\n");
43 #endif /* not lint */
44
45 #ifndef lint
46 __RCSID("$NetBSD: makewhatis.c,v 1.13 2001/02/19 22:46:14 cgd Exp $");
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/param.h>
51 #include <sys/stat.h>
52 #include <sys/wait.h>
53
54 #include <ctype.h>
55 #include <err.h>
56 #include <errno.h>
57 #include <fcntl.h>
58 #include <fts.h>
59 #include <locale.h>
60 #include <paths.h>
61 #include <signal.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <string.h>
65 #include <unistd.h>
66 #include <zlib.h>
67
68 typedef struct manpagestruct manpage;
69 struct manpagestruct {
70 manpage *mp_left,*mp_right;
71 ino_t mp_inode;
72 char mp_name[1];
73 };
74
75 typedef struct whatisstruct whatis;
76 struct whatisstruct {
77 whatis *wi_left,*wi_right;
78 char *wi_data;
79 };
80
81 int main (int, char **);
82 char *findwhitespace(char *);
83 char *GetS(gzFile, char *, int);
84 int manpagesection (char *);
85 int addmanpage (manpage **, ino_t, char *);
86 int addwhatis (whatis **, char *);
87 char *replacestring (char *, char *, char *);
88 void catpreprocess (char *);
89 char *parsecatpage (gzFile *);
90 int manpreprocess (char *);
91 char *nroff (gzFile *);
92 char *parsemanpage (gzFile *, int);
93 char *getwhatisdata (char *);
94 void processmanpages (manpage **,whatis **);
95 int dumpwhatis (FILE *, whatis *);
96
97 char *default_manpath[] = {
98 "/usr/share/man",
99 NULL
100 };
101
102 char sectionext[] = "0123456789ln";
103 char whatisdb[] = "whatis.db";
104
105 int
106 main(int argc,char **argv)
107 {
108 char **manpath;
109 FTS *fts;
110 FTSENT *fe;
111 manpage *source;
112 whatis *dest;
113 FILE *out;
114
115 (void)setlocale(LC_ALL, "");
116
117 manpath = (argc < 2) ? default_manpath : &argv[1];
118
119 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) {
120 perror(getprogname());
121 return EXIT_FAILURE;
122 }
123
124 source = NULL;
125 while ((fe = fts_read(fts)) != NULL) {
126 switch (fe->fts_info) {
127 case FTS_F:
128 if (manpagesection(fe->fts_path) >= 0)
129 if (!addmanpage(&source,
130 fe->fts_statp->st_ino,
131 fe->fts_path))
132 err(EXIT_FAILURE, NULL);
133 case FTS_D:
134 case FTS_DC:
135 case FTS_DEFAULT:
136 case FTS_DP:
137 case FTS_SLNONE:
138 break;
139 default:
140 errx(EXIT_FAILURE, "%s: %s", fe->fts_path,
141 strerror(fe->fts_errno));
142
143 }
144 }
145
146 (void)fts_close(fts);
147
148 dest = NULL;
149 processmanpages(&source, &dest);
150
151 if (chdir(manpath[0]) < 0)
152 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno));
153
154 if ((out = fopen(whatisdb, "w")) == NULL)
155 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
156
157 if (!(dumpwhatis(out, dest) ||
158 (fclose(out) < 0)) ||
159 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0))
160 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
161
162 return EXIT_SUCCESS;
163 }
164
165 char
166 *findwhitespace(char *str)
167
168 {
169 while (!isspace(*str))
170 if (*str++ == '\0') {
171 str = NULL;
172 break;
173 }
174
175 return str;
176 }
177
178 char
179 *GetS(gzFile in, char *buffer, int length)
180
181 {
182 char *ptr;
183
184 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0'))
185 ptr = NULL;
186
187 return ptr;
188 }
189
190 int
191 manpagesection(char *name)
192 {
193 char *ptr;
194
195 if ((ptr = strrchr(name, '/')) != NULL)
196 ptr++;
197 else
198 ptr = name;
199
200 while ((ptr = strchr(ptr, '.')) != NULL) {
201 int section;
202
203 ptr++;
204 section=0;
205 while (sectionext[section] != '\0')
206 if (sectionext[section] == *ptr)
207 return section;
208 else
209 section++;
210 }
211
212 return -1;
213 }
214
215 int
216 addmanpage(manpage **tree,ino_t inode,char *name)
217 {
218 manpage *mp;
219
220 while ((mp = *tree) != NULL) {
221 if (mp->mp_inode == inode)
222 return 1;
223 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right);
224 }
225
226 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL)
227 return 0;
228
229 mp->mp_left = NULL;
230 mp->mp_right = NULL;
231 mp->mp_inode = inode;
232 (void) strcpy(mp->mp_name, name);
233 *tree = mp;
234
235 return 1;
236 }
237
238 int
239 addwhatis(whatis **tree, char *data)
240 {
241 whatis *wi;
242 int result;
243
244 while (isspace(*data))
245 data++;
246
247 if (*data == '/') {
248 char *ptr;
249
250 ptr = ++data;
251 while ((*ptr != '\0') && !isspace(*ptr))
252 if (*ptr++ == '/')
253 data = ptr;
254 }
255
256 while ((wi = *tree) != NULL) {
257 result=strcmp(data, wi->wi_data);
258 if (result == 0) return 1;
259 tree = &((result < 0) ? wi->wi_left : wi->wi_right);
260 }
261
262 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL)
263 return 0;
264
265 wi->wi_left = NULL;
266 wi->wi_right = NULL;
267 wi->wi_data = data;
268 *tree = wi;
269
270 return 1;
271 }
272
273 void
274 catpreprocess(char *from)
275 {
276 char *to;
277
278 to = from;
279 while (isspace(*from)) from++;
280
281 while (*from != '\0')
282 if (isspace(*from)) {
283 while (isspace(*++from));
284 if (*from != '\0')
285 *to++ = ' ';
286 }
287 else if (*(from + 1) == '\10')
288 from += 2;
289 else
290 *to++ = *from++;
291
292 *to = '\0';
293 }
294
295 char *
296 replacestring(char *string, char *old, char *new)
297
298 {
299 char *ptr, *result;
300 int slength, olength, nlength, pos;
301
302 if (new == NULL)
303 return strdup(string);
304
305 ptr = strstr(string, old);
306 if (ptr == NULL)
307 return strdup(string);
308
309 slength = strlen(string);
310 olength = strlen(old);
311 nlength = strlen(new);
312 if ((result = malloc(slength - olength + nlength + 1)) == NULL)
313 return NULL;
314
315 pos = ptr - string;
316 (void) memcpy(result, string, pos);
317 (void) memcpy(&result[pos], new, nlength);
318 (void) strcpy(&result[pos + nlength], &string[pos + olength]);
319
320 return result;
321 }
322
323 char *
324 parsecatpage(gzFile *in)
325 {
326 char buffer[8192];
327 char *section, *ptr, *last;
328 int size;
329
330 do {
331 if (GetS(in, buffer, sizeof(buffer)) == NULL)
332 return NULL;
333 }
334 while (buffer[0] == '\n');
335
336 section = NULL;
337 if ((ptr = strchr(buffer, '(')) != NULL) {
338 if ((last = strchr(ptr + 1, ')')) !=NULL) {
339 int length;
340
341 length = last - ptr + 1;
342 if ((section = malloc(length + 5)) == NULL)
343 return NULL;
344
345 *section = ' ';
346 (void) memcpy(section + 1, ptr, length);
347 (void) strcpy(section + 1 + length, " - ");
348 }
349 }
350
351 for (;;) {
352 if (GetS(in, buffer, sizeof(buffer)) == NULL) {
353 free(section);
354 return NULL;
355 }
356 if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0)
357 break;
358 }
359
360 ptr = last = buffer;
361 size = sizeof(buffer) - 1;
362 while ((size > 0) && (GetS(in, ptr, size) != NULL)) {
363 int length;
364
365 catpreprocess(ptr);
366
367 length = strlen(ptr);
368 if (length == 0) {
369 *last = '\0';
370
371 ptr = replacestring(buffer, " - ", section);
372 free(section);
373 return ptr;
374 }
375 if ((length > 1) && (ptr[length - 1] == '-') &&
376 isalpha(ptr[length - 2]))
377 last = &ptr[--length];
378 else {
379 last = &ptr[length++];
380 *last = ' ';
381 }
382
383 ptr += length;
384 size -= length;
385 }
386
387 free(section);
388
389 return NULL;
390 }
391
392 int
393 manpreprocess(char *line)
394 {
395 char *from, *to;
396
397 to = from = line;
398 while (isspace(*from)) from++;
399 if (strncmp(from, ".\\\"", 3) == 0)
400 return 1;
401
402 while (*from != '\0')
403 if (isspace(*from)) {
404 while (isspace(*++from));
405 if ((*from != '\0') && (*from != ','))
406 *to++ = ' ';
407 }
408 else if (*from == '\\')
409 switch (*++from) {
410 case '\0':
411 case '-':
412 break;
413 case 's':
414 if ((*from=='+') || (*from=='-'))
415 from++;
416 while (isdigit(*from))
417 from++;
418 break;
419 default:
420 from++;
421 }
422 else
423 if (*from == '"')
424 from++;
425 else
426 *to++ = *from++;
427
428 *to = '\0';
429
430 if (strncasecmp(line, ".Xr", 3) == 0) {
431 char *sect;
432
433 from = line + 3;
434 if (isspace(*from))
435 from++;
436
437 if ((sect = findwhitespace(from)) != NULL) {
438 int length;
439
440 *sect++ = '\0';
441 length = strlen(from);
442 (void) memmove(line, from, length);
443 line[length++] = '(';
444 to = &line[length];
445 length = strlen(sect);
446 (void) memmove(to, sect, length);
447 (void) strcpy(&to[length], ")");
448 }
449 }
450
451 return 0;
452 }
453
454 char *
455 nroff(gzFile *in)
456 {
457 char tempname[MAXPATHLEN], buffer[65536], *data;
458 int tempfd, bytes, pipefd[2], status;
459 static int devnull = -1;
460 pid_t child;
461
462 if (gzrewind(in) < 0) {
463 perror(getprogname());
464 return NULL;
465 }
466
467 if ((devnull < 0) &&
468 ((devnull = open(_PATH_DEVNULL, O_WRONLY, 0)) < 0)) {
469 perror(getprogname());
470 return NULL;
471 }
472
473 (void)strcpy(tempname, _PATH_TMP "makewhatis.XXXXXX");
474 if ((tempfd = mkstemp(tempname)) < 0) {
475 perror(getprogname());
476 return NULL;
477 }
478
479 while ((bytes = gzread(in, buffer, sizeof(buffer))) > 0)
480 if (write(tempfd, buffer, bytes) != bytes) {
481 bytes = -1;
482 break;
483 }
484
485 if ((bytes < 0) ||
486 (lseek(tempfd, 0, SEEK_SET) < 0) ||
487 (pipe(pipefd) < 0)) {
488 perror(getprogname());
489 (void)close(tempfd);
490 (void)unlink(tempname);
491 return NULL;
492 }
493
494 switch (child = vfork()) {
495 case -1:
496 perror(getprogname());
497 (void)close(pipefd[1]);
498 (void)close(pipefd[0]);
499 (void)close(tempfd);
500 (void)unlink(tempname);
501 return NULL;
502 /* NOTREACHED */
503 case 0:
504 (void)close(pipefd[0]);
505 if (tempfd != STDIN_FILENO) {
506 (void)dup2(tempfd, STDIN_FILENO);
507 (void)close(tempfd);
508 }
509 if (pipefd[1] != STDOUT_FILENO) {
510 (void)dup2(pipefd[1], STDOUT_FILENO);
511 (void)close(pipefd[1]);
512 }
513 if (devnull != STDERR_FILENO) {
514 (void)dup2(devnull, STDERR_FILENO);
515 (void)close(devnull);
516 }
517 (void)execlp("nroff", "nroff", "-S", "-man", NULL);
518 _exit(EXIT_FAILURE);
519 default:
520 (void)close(pipefd[1]);
521 (void)close(tempfd);
522 /* NOTREACHED */
523 }
524
525 if ((in = gzdopen(pipefd[0], "r")) == NULL) {
526 if (errno == 0)
527 errno = ENOMEM;
528 perror(getprogname());
529 (void)close(pipefd[0]);
530 (void)kill(child, SIGTERM);
531 while (waitpid(child, NULL, 0) != child);
532 (void)unlink(tempname);
533 return NULL;
534 }
535
536 data = parsecatpage(in);
537 while (gzread(in, buffer, sizeof(buffer)) > 0);
538 (void)gzclose(in);
539
540 while (waitpid(child, &status, 0) != child);
541 if ((data != NULL) &&
542 !(WIFEXITED(status) && (WEXITSTATUS(status) == 0))) {
543 free(data);
544 data = NULL;
545 }
546
547 (void)unlink(tempname);
548
549 return data;
550 }
551
552 char *
553 parsemanpage(gzFile *in, int defaultsection)
554 {
555 char *section, buffer[8192], *ptr;
556
557 section = NULL;
558 do {
559 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
560 free(section);
561 return NULL;
562 }
563 if (manpreprocess(buffer))
564 continue;
565 if (strncasecmp(buffer, ".Dt", 3) == 0) {
566 char *end;
567
568 ptr = &buffer[3];
569 if (isspace(*ptr))
570 ptr++;
571 if ((ptr = findwhitespace(ptr)) == NULL)
572 continue;
573
574 if ((end = findwhitespace(++ptr)) != NULL)
575 *end = '\0';
576
577 free(section);
578 if ((section = malloc(strlen(ptr) + 7)) != NULL) {
579 section[0] = ' ';
580 section[1] = '(';
581 (void) strcpy(§ion[2], ptr);
582 (void) strcat(§ion[2], ") - ");
583 }
584 }
585 else if (strncasecmp(buffer, ".Ds", 3) == 0)
586 return nroff(in);
587 } while ((strncasecmp(buffer, ".Sh NAME", 8) != 0));
588
589 do {
590 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
591 free(section);
592 return NULL;
593 }
594 } while (manpreprocess(buffer));
595
596 if (strncasecmp(buffer, ".Nm", 3) == 0) {
597 int length, offset;
598
599 ptr = &buffer[3];
600 while (isspace(*ptr))
601 ptr++;
602
603 length = strlen(ptr);
604 if ((length > 1) && (ptr[length - 1] == ',') &&
605 isspace(ptr[length - 2])) {
606 ptr[--length] = '\0';
607 ptr[length - 1] = ',';
608 }
609 (void) memmove(buffer, ptr, length + 1);
610
611 offset = length + 3;
612 ptr = &buffer[offset];
613 for (;;) {
614 int more;
615
616 if ((sizeof(buffer) == offset) ||
617 (GetS(in, ptr, sizeof(buffer) - offset)
618 == NULL)) {
619 free(section);
620 return NULL;
621 }
622 if (manpreprocess(ptr))
623 continue;
624
625 if (strncasecmp(ptr, ".Nm", 3) != 0) break;
626
627 ptr += 3;
628 if (isspace(*ptr))
629 ptr++;
630
631 buffer[length++] = ' ';
632 more = strlen(ptr);
633 if ((more > 1) && (ptr[more - 1] == ',') &&
634 isspace(ptr[more - 2])) {
635 ptr[--more] = '\0';
636 ptr[more - 1] = ',';
637 }
638
639 (void) memmove(&buffer[length], ptr, more + 1);
640 length += more;
641 offset = length + 3;
642
643 ptr = &buffer[offset];
644 }
645
646 if (strncasecmp(ptr, ".Nd", 3) == 0) {
647 (void) strcpy(&buffer[length], " -");
648
649 while (strncasecmp(ptr, ".Sh", 3) != 0) {
650 int more;
651
652 if (*ptr == '.') {
653 char *space;
654
655 if ((space = findwhitespace(ptr)) == NULL)
656 ptr = "";
657 else {
658 space++;
659 (void) memmove(ptr, space,
660 strlen(space) + 1);
661 }
662 }
663
664 if (*ptr != '\0') {
665 buffer[offset - 1] = ' ';
666 more = strlen(ptr) + 1;
667 offset += more;
668 }
669 ptr = &buffer[offset];
670 if ((sizeof(buffer) == offset) ||
671 (GetS(in, ptr, sizeof(buffer) - offset)
672 == NULL)) {
673 free(section);
674 return NULL;
675 }
676 if (manpreprocess(ptr))
677 *ptr = '\0';
678 }
679 }
680 }
681 else {
682 int offset;
683
684 if (*buffer == '.') {
685 char *space;
686
687 if ((space = findwhitespace(buffer)) == NULL) {
688 free(section);
689 return NULL;
690 }
691 space++;
692 (void) memmove(buffer, space, strlen(space) + 1);
693 }
694
695 offset = strlen(buffer) + 1;
696 for (;;) {
697 int more;
698
699 ptr = &buffer[offset];
700 if ((sizeof(buffer) == offset) ||
701 (GetS(in, ptr, sizeof(buffer) - offset)
702 == NULL)) {
703 free(section);
704 return NULL;
705 }
706 if (manpreprocess(ptr) || (*ptr == '\0'))
707 continue;
708
709 if ((strncasecmp(ptr, ".Sh", 3) == 0) ||
710 (strncasecmp(ptr, ".Ss", 3) == 0))
711 break;
712
713 if (*ptr == '.') {
714 char *space;
715
716 if ((space = findwhitespace(ptr)) == NULL) {
717 continue;
718 }
719
720 space++;
721 (void) memmove(ptr, space, strlen(space) + 1);
722 }
723
724 buffer[offset - 1] = ' ';
725 more = strlen(ptr);
726 if ((more > 1) && (ptr[more - 1] == ',') &&
727 isspace(ptr[more - 2])) {
728 ptr[more - 1] = '\0';
729 ptr[more - 2] = ',';
730 }
731 else more++;
732 offset += more;
733 }
734 }
735
736 if (section == NULL) {
737 char sectionbuffer[24];
738
739 (void) sprintf(sectionbuffer, " (%c) - ",
740 sectionext[defaultsection]);
741 ptr = replacestring(buffer, " - ", sectionbuffer);
742 }
743 else {
744 ptr = replacestring(buffer, " - ", section);
745 free(section);
746 }
747 return ptr;
748 }
749
750 char *
751 getwhatisdata(char *name)
752 {
753 gzFile *in;
754 char *data;
755 int section;
756
757 if ((in = gzopen(name, "r")) == NULL) {
758 errx(EXIT_FAILURE, "%s: %s",
759 name,
760 strerror((errno == 0) ? ENOMEM : errno));
761 /* NOTREACHED */
762 }
763
764 section = manpagesection(name);
765 data = (section == 0) ? parsecatpage(in) : parsemanpage(in, section);
766
767 (void) gzclose(in);
768 return data;
769 }
770
771 void
772 processmanpages(manpage **source, whatis **dest)
773 {
774 manpage *mp;
775
776 mp = *source;
777 *source = NULL;
778
779 while (mp != NULL) {
780 manpage *obsolete;
781 char *data;
782
783 if (mp->mp_left != NULL)
784 processmanpages(&mp->mp_left,dest);
785
786 if ((data = getwhatisdata(mp->mp_name)) != NULL) {
787 if (!addwhatis(dest,data))
788 err(EXIT_FAILURE, NULL);
789 }
790
791 obsolete = mp;
792 mp = mp->mp_right;
793 free(obsolete);
794 }
795 }
796
797 int
798 dumpwhatis (FILE *out, whatis *tree)
799 {
800 while (tree != NULL) {
801 if (tree->wi_left)
802 if (!dumpwhatis(out, tree->wi_left)) return 0;
803
804 if ((fputs(tree->wi_data, out) == EOF) ||
805 (fputc('\n', out) == EOF))
806 return 0;
807
808 tree = tree->wi_right;
809 }
810
811 return 1;
812 }
813