makewhatis.c revision 1.7 1 /* $NetBSD: makewhatis.c,v 1.7 2000/01/24 23:03:54 tron Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthias Scheler.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\
42 All rights reserved.\n");
43 #endif /* not lint */
44
45 #ifndef lint
46 __RCSID("$NetBSD: makewhatis.c,v 1.7 2000/01/24 23:03:54 tron Exp $");
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/stat.h>
51
52 #include <ctype.h>
53 #include <err.h>
54 #include <errno.h>
55 #include <fts.h>
56 #include <locale.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61 #include <zlib.h>
62
63 typedef struct manpagestruct manpage;
64 struct manpagestruct {
65 manpage *mp_left,*mp_right;
66 ino_t mp_inode;
67 char mp_name[1];
68 };
69
70 typedef struct whatisstruct whatis;
71 struct whatisstruct {
72 whatis *wi_left,*wi_right;
73 char *wi_data;
74 };
75
76 int main (int, char **);
77 char *findwhitespace(char *);
78 char *GetS(gzFile, char *, int);
79 int manpagesection (char *);
80 int addmanpage (manpage **, ino_t, char *);
81 int addwhatis (whatis **, char *);
82 char *replacestring (char *, char *, char *);
83 void catpreprocess (char *);
84 char *parsecatpage (gzFile *);
85 int manpreprocess (char *);
86 char *parsemanpage (gzFile *, int);
87 char *getwhatisdata (char *);
88 void processmanpages (manpage **,whatis **);
89 int dumpwhatis (FILE *, whatis *);
90
91 char *default_manpath[] = {
92 "/usr/share/man",
93 NULL
94 };
95
96 char sectionext[] = "0123456789ln";
97 char whatisdb[] = "whatis.db";
98
99 extern char *__progname;
100
101 int
102 main(int argc,char **argv)
103 {
104 char **manpath;
105 FTS *fts;
106 FTSENT *fe;
107 manpage *source;
108 whatis *dest;
109 FILE *out;
110
111 (void)setlocale(LC_ALL, "");
112
113 manpath = (argc < 2) ? default_manpath : &argv[1];
114
115 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) {
116 perror(__progname);
117 return EXIT_FAILURE;
118 }
119
120 source = NULL;
121 while ((fe = fts_read(fts)) != NULL) {
122 switch (fe->fts_info) {
123 case FTS_F:
124 if (manpagesection(fe->fts_path) >= 0)
125 if (!addmanpage(&source,
126 fe->fts_statp->st_ino,
127 fe->fts_path))
128 err(EXIT_FAILURE, NULL);
129 case FTS_D:
130 case FTS_DC:
131 case FTS_DEFAULT:
132 case FTS_DP:
133 case FTS_SLNONE:
134 break;
135 default:
136 errx(EXIT_FAILURE, "%s: %s", fe->fts_path,
137 strerror(fe->fts_errno));
138 /* NOTREACHED */
139 }
140 }
141
142 (void)fts_close(fts);
143
144 dest = NULL;
145 processmanpages(&source, &dest);
146
147 if (chdir(manpath[0]) < 0)
148 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno));
149
150 if ((out = fopen(whatisdb, "w")) == NULL)
151 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
152
153 if (!(dumpwhatis(out, dest) ||
154 (fclose(out) < 0)) ||
155 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0))
156 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
157
158 return EXIT_SUCCESS;
159 }
160
161 char
162 *findwhitespace(char *str)
163
164 {
165 while (!isspace(*str))
166 if (*str++ == '\0') {
167 str = NULL;
168 break;
169 }
170
171 return str;
172 }
173
174 char
175 *GetS(gzFile in, char *buffer, int length)
176
177 {
178 char *ptr;
179
180 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0'))
181 ptr = NULL;
182
183 return ptr;
184 }
185
186 int
187 manpagesection(char *name)
188 {
189 char *ptr;
190
191 if ((ptr = strrchr(name, '/')) != NULL)
192 ptr++;
193 else
194 ptr = name;
195
196 while ((ptr = strchr(ptr, '.')) != NULL) {
197 int section;
198
199 ptr++;
200 section=0;
201 while (sectionext[section] != '\0')
202 if (sectionext[section] == *ptr)
203 return section;
204 else
205 section++;
206 }
207
208 return -1;
209 }
210
211 int
212 addmanpage(manpage **tree,ino_t inode,char *name)
213 {
214 manpage *mp;
215
216 while ((mp = *tree) != NULL) {
217 if (mp->mp_inode == inode)
218 return 1;
219 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right);
220 }
221
222 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL)
223 return 0;
224
225 mp->mp_left = NULL;
226 mp->mp_right = NULL;
227 mp->mp_inode = inode;
228 (void) strcpy(mp->mp_name, name);
229 *tree = mp;
230
231 return 1;
232 }
233
234 int
235 addwhatis(whatis **tree, char *data)
236 {
237 whatis *wi;
238 int result;
239
240 while (isspace(*data))
241 data++;
242
243 if (*data == '/') {
244 char *ptr;
245
246 ptr = ++data;
247 while ((*ptr != '\0') && !isspace(*ptr))
248 if (*ptr++ == '/')
249 data = ptr;
250 }
251
252 while ((wi = *tree) != NULL) {
253 result=strcmp(data, wi->wi_data);
254 if (result == 0) return 1;
255 tree = &((result < 0) ? wi->wi_left : wi->wi_right);
256 }
257
258 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL)
259 return 0;
260
261 wi->wi_left = NULL;
262 wi->wi_right = NULL;
263 wi->wi_data = data;
264 *tree = wi;
265
266 return 1;
267 }
268
269 void
270 catpreprocess(char *from)
271 {
272 char *to;
273
274 to = from;
275 while (isspace(*from)) from++;
276
277 while (*from != '\0')
278 if (isspace(*from)) {
279 while (isspace(*++from));
280 if (*from != '\0')
281 *to++ = ' ';
282 }
283 else if (*(from + 1) == '\10')
284 from += 2;
285 else
286 *to++ = *from++;
287
288 *to = '\0';
289 }
290
291 char *
292 replacestring(char *string, char *old, char *new)
293
294 {
295 char *ptr, *result;
296 int slength, olength, nlength, pos;
297
298 if (new == NULL)
299 return strdup(string);
300
301 ptr = strstr(string, old);
302 if (ptr == NULL)
303 return strdup(string);
304
305 slength = strlen(string);
306 olength = strlen(old);
307 nlength = strlen(new);
308 if ((result = malloc(slength - olength + nlength + 1)) == NULL)
309 return NULL;
310
311 pos = ptr - string;
312 (void) memcpy(result, string, pos);
313 (void) memcpy(&result[pos], new, nlength);
314 (void) strcpy(&result[pos + nlength], &string[pos + olength]);
315
316 return result;
317 }
318
319 char *
320 parsecatpage(gzFile *in)
321 {
322 char buffer[8192];
323 char *section, *ptr, *last;
324 int size;
325
326 do {
327 if (GetS(in, buffer, sizeof(buffer)) == NULL)
328 return NULL;
329 }
330 while (buffer[0] == '\n');
331
332 section = NULL;
333 if ((ptr = strchr(buffer, '(')) != NULL) {
334 if ((last = strchr(ptr + 1, ')')) !=NULL) {
335 int length;
336
337 length = last - ptr + 1;
338 if ((section = malloc(length + 5)) == NULL)
339 return NULL;
340
341 *section = ' ';
342 (void) memcpy(section + 1, ptr, length);
343 (void) strcpy(section + 1 + length, " - ");
344 }
345 }
346
347 for (;;) {
348 if (GetS(in, buffer, sizeof(buffer)) == NULL) {
349 free(section);
350 return NULL;
351 }
352 if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0)
353 break;
354 }
355
356 ptr = last = buffer;
357 size = sizeof(buffer) - 1;
358 while ((size > 0) && (GetS(in, ptr, size) != NULL)) {
359 int length;
360
361 catpreprocess(ptr);
362
363 length = strlen(ptr);
364 if (length == 0) {
365 *last = '\0';
366
367 ptr = replacestring(buffer, " - ", section);
368 free(section);
369 return ptr;
370 }
371 if ((length > 1) && (ptr[length - 1] == '-') &&
372 isalpha(ptr[length - 2]))
373 last = &ptr[--length];
374 else {
375 last = &ptr[length++];
376 *last = ' ';
377 }
378
379 ptr += length;
380 size -= length;
381 }
382
383 free(section);
384
385 return NULL;
386 }
387
388 int
389 manpreprocess(char *line)
390 {
391 char *from, *to;
392
393 to = from = line;
394 while (isspace(*from)) from++;
395 if (strncmp(from, ".\\\"", 3) == 0)
396 return 1;
397
398 while (*from != '\0')
399 if (isspace(*from)) {
400 while (isspace(*++from));
401 if ((*from != '\0') && (*from != ','))
402 *to++ = ' ';
403 }
404 else if (*from == '\\')
405 switch (*++from) {
406 case '\0':
407 case '-':
408 break;
409 case 's':
410 if ((*from=='+') || (*from=='-'))
411 from++;
412 while (isdigit(*from))
413 from++;
414 break;
415 default:
416 from++;
417 }
418 else
419 if (*from == '"')
420 from++;
421 else
422 *to++ = *from++;
423
424 *to = '\0';
425
426 if (strncasecmp(line, ".Xr", 3) == 0) {
427 char *sect;
428
429 from = line + 3;
430 if (isspace(*from))
431 from++;
432
433 if ((sect = findwhitespace(from)) != NULL) {
434 int length;
435
436 *sect++ = '\0';
437 length = strlen(from);
438 (void) memmove(line, from, length);
439 line[length++] = '(';
440 to = &line[length];
441 length = strlen(sect);
442 (void) memmove(to, sect, length);
443 (void) strcpy(&to[length], ")");
444 }
445 }
446
447 return 0;
448 }
449
450 char *
451 parsemanpage(gzFile *in, int defaultsection)
452 {
453 char *section, buffer[8192], *ptr;
454
455 section = NULL;
456 do {
457 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
458 free(section);
459 return NULL;
460 }
461 if (manpreprocess(buffer))
462 continue;
463 if (strncasecmp(buffer, ".Dt", 3) == 0) {
464 char *end;
465
466 ptr = &buffer[3];
467 if (isspace(*ptr))
468 ptr++;
469 if ((ptr = findwhitespace(ptr)) == NULL)
470 continue;
471
472 if ((end = findwhitespace(++ptr)) != NULL)
473 *end = '\0';
474
475 free(section);
476 if ((section = malloc(strlen(ptr) + 7)) != NULL) {
477 section[0] = ' ';
478 section[1] = '(';
479 (void) strcpy(§ion[2], ptr);
480 (void) strcat(§ion[2], ") - ");
481 }
482 }
483 } while ((strncasecmp(buffer, ".Sh NAME", 8) != 0));
484
485 do {
486 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
487 free(section);
488 return NULL;
489 }
490 } while (manpreprocess(buffer));
491
492 if (strncasecmp(buffer, ".Nm", 3) == 0) {
493 int length, offset;
494
495 ptr = &buffer[3];
496 while (isspace(*ptr))
497 ptr++;
498
499 length = strlen(ptr);
500 if ((length > 1) && (ptr[length - 1] == ',') &&
501 isspace(ptr[length - 2])) {
502 ptr[--length] = '\0';
503 ptr[length - 1] = ',';
504 }
505 (void) memmove(buffer, ptr, length + 1);
506
507 offset = length + 3;
508 ptr = &buffer[offset];
509 for (;;) {
510 int more;
511
512 if ((sizeof(buffer) == offset) ||
513 (GetS(in, ptr, sizeof(buffer) - offset)
514 == NULL)) {
515 free(section);
516 return NULL;
517 }
518 if (manpreprocess(ptr))
519 continue;
520
521 if (strncasecmp(ptr, ".Nm", 3) != 0) break;
522
523 ptr += 3;
524 if (isspace(*ptr))
525 ptr++;
526
527 buffer[length++] = ' ';
528 more = strlen(ptr);
529 if ((more > 1) && (ptr[more - 1] == ',') &&
530 isspace(ptr[more - 2])) {
531 ptr[--more] = '\0';
532 ptr[more - 1] = ',';
533 }
534
535 (void) memmove(&buffer[length], ptr, more + 1);
536 length += more;
537 offset = length + 3;
538
539 ptr = &buffer[offset];
540 }
541
542 if (strncasecmp(ptr, ".Nd", 3) == 0) {
543 (void) strcpy(&buffer[length], " -");
544
545 while (strncasecmp(ptr, ".Sh", 3) != 0) {
546 int more;
547
548 if (*ptr == '.') {
549 char *space;
550
551 if ((space = findwhitespace(ptr)) == NULL)
552 ptr = "";
553 else {
554 space++;
555 (void) memmove(ptr, space,
556 strlen(space) + 1);
557 }
558 }
559
560 if (*ptr != '\0') {
561 buffer[offset - 1] = ' ';
562 more = strlen(ptr) + 1;
563 offset += more;
564 }
565 ptr = &buffer[offset];
566 if ((sizeof(buffer) == offset) ||
567 (GetS(in, ptr, sizeof(buffer) - offset)
568 == NULL)) {
569 free(section);
570 return NULL;
571 }
572 if (manpreprocess(ptr))
573 *ptr = '\0';
574 }
575 }
576 }
577 else {
578 int offset;
579
580 if (*buffer == '.') {
581 char *space;
582
583 if ((space = findwhitespace(buffer)) == NULL) {
584 free(section);
585 return NULL;
586 }
587 space++;
588 (void) memmove(buffer, space, strlen(space) + 1);
589 }
590
591 offset = strlen(buffer) + 1;
592 for (;;) {
593 int more;
594
595 ptr = &buffer[offset];
596 if ((sizeof(buffer) == offset) ||
597 (GetS(in, ptr, sizeof(buffer) - offset)
598 == NULL)) {
599 free(section);
600 return NULL;
601 }
602 if (manpreprocess(ptr) || (*ptr == '\0'))
603 continue;
604
605 if ((strncasecmp(ptr, ".Sh", 3) == 0) ||
606 (strncasecmp(ptr, ".Ss", 3) == 0))
607 break;
608
609 if (*ptr == '.') {
610 char *space;
611
612 if ((space = findwhitespace(ptr)) == NULL) {
613 continue;
614 }
615
616 space++;
617 (void) memmove(ptr, space, strlen(space) + 1);
618 }
619
620 buffer[offset - 1] = ' ';
621 more = strlen(ptr);
622 if ((more > 1) && (ptr[more - 1] == ',') &&
623 isspace(ptr[more - 2])) {
624 ptr[more - 1] = '\0';
625 ptr[more - 2] = ',';
626 }
627 else more++;
628 offset += more;
629 }
630 }
631
632 if (section == NULL) {
633 char sectionbuffer[24];
634
635 (void) sprintf(sectionbuffer, " (%c) - ",
636 sectionext[defaultsection]);
637 ptr = replacestring(buffer, " - ", sectionbuffer);
638 }
639 else {
640 ptr = replacestring(buffer, " - ", section);
641 free(section);
642 }
643 return ptr;
644 }
645
646 char *
647 getwhatisdata(char *name)
648 {
649 gzFile *in;
650 char *data;
651 int section;
652
653 if ((in = gzopen(name, "r")) == NULL) {
654 errx(EXIT_FAILURE, "%s: %s",
655 name,
656 strerror((errno == 0) ? ENOMEM : errno));
657 /* NOTREACHED */
658 }
659
660 section = manpagesection(name);
661 data = (section == 0) ? parsecatpage(in) : parsemanpage(in, section);
662
663 (void) gzclose(in);
664 return data;
665 }
666
667 void
668 processmanpages(manpage **source, whatis **dest)
669 {
670 manpage *mp;
671
672 mp = *source;
673 *source = NULL;
674
675 while (mp != NULL) {
676 manpage *obsolete;
677 char *data;
678
679 if (mp->mp_left != NULL)
680 processmanpages(&mp->mp_left,dest);
681
682 if ((data = getwhatisdata(mp->mp_name)) != NULL) {
683 if (!addwhatis(dest,data))
684 err(EXIT_FAILURE, NULL);
685 }
686
687 obsolete = mp;
688 mp = mp->mp_right;
689 free(obsolete);
690 }
691 }
692
693 int
694 dumpwhatis (FILE *out, whatis *tree)
695 {
696 while (tree != NULL) {
697 if (tree->wi_left)
698 if (!dumpwhatis(out, tree->wi_left)) return 0;
699
700 if ((fputs(tree->wi_data, out) == EOF) ||
701 (fputc('\n', out) == EOF))
702 return 0;
703
704 tree = tree->wi_right;
705 }
706
707 return 1;
708 }
709