makewhatis.c revision 1.6 1 /* $NetBSD: makewhatis.c,v 1.6 1999/12/31 14:50:16 tron Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthias Scheler.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\
42 All rights reserved.\n");
43 #endif /* not lint */
44
45 #ifndef lint
46 __RCSID("$NetBSD: makewhatis.c,v 1.6 1999/12/31 14:50:16 tron Exp $");
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/stat.h>
51
52 #include <ctype.h>
53 #include <err.h>
54 #include <errno.h>
55 #include <fts.h>
56 #include <locale.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61 #include <zlib.h>
62
63 typedef struct manpagestruct manpage;
64 struct manpagestruct {
65 manpage *mp_left,*mp_right;
66 ino_t mp_inode;
67 char mp_name[1];
68 };
69
70 typedef struct whatisstruct whatis;
71 struct whatisstruct {
72 whatis *wi_left,*wi_right;
73 char *wi_data;
74 };
75
76 int main (int, char **);
77 char *findwhitespace(char *);
78 char *GetS(gzFile, char *, int);
79 int manpagesection (char *);
80 int addmanpage (manpage **, ino_t, char *);
81 int addwhatis (whatis **, char *);
82 char *replacestring (char *, char *, char *);
83 void catpreprocess (char *);
84 char *parsecatpage (gzFile *);
85 int manpreprocess (char *);
86 char *parsemanpage (gzFile *, int);
87 char *getwhatisdata (char *);
88 void processmanpages (manpage **,whatis **);
89 int dumpwhatis (FILE *, whatis *);
90
91 char *default_manpath[] = {
92 "/usr/share/man",
93 NULL
94 };
95
96 char sectionext[] = "0123456789ln";
97 char whatisdb[] = "whatis.db";
98
99 extern char *__progname;
100
101 int
102 main(int argc,char **argv)
103 {
104 char **manpath;
105 FTS *fts;
106 FTSENT *fe;
107 manpage *source;
108 whatis *dest;
109 FILE *out;
110
111 (void)setlocale(LC_ALL, "");
112
113 manpath = (argc < 2) ? default_manpath : &argv[1];
114
115 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) {
116 perror(__progname);
117 return EXIT_FAILURE;
118 }
119
120 source = NULL;
121 while ((fe = fts_read(fts)) != NULL) {
122 switch (fe->fts_info) {
123 case FTS_F:
124 if (manpagesection(fe->fts_path) >= 0)
125 if (!addmanpage(&source,
126 fe->fts_statp->st_ino,
127 fe->fts_path))
128 err(EXIT_FAILURE, NULL);
129 case FTS_D:
130 case FTS_DC:
131 case FTS_DEFAULT:
132 case FTS_DP:
133 case FTS_SLNONE:
134 break;
135 default:
136 errx(EXIT_FAILURE, "%s: %s", fe->fts_path,
137 strerror(fe->fts_errno));
138 /* NOTREACHED */
139 }
140 }
141
142 (void)fts_close(fts);
143
144 dest = NULL;
145 processmanpages(&source, &dest);
146
147 if (chdir(manpath[0]) < 0)
148 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno));
149
150 if ((out = fopen(whatisdb, "w")) == NULL)
151 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
152
153 if (!(dumpwhatis(out, dest) ||
154 (fclose(out) < 0)) ||
155 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0))
156 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
157
158 return EXIT_SUCCESS;
159 }
160
161 char
162 *findwhitespace(char *str)
163
164 {
165 while (!isspace(*str))
166 if (*str++ == '\0') {
167 str = NULL;
168 break;
169 }
170
171 return str;
172 }
173
174 char
175 *GetS(gzFile in, char *buffer, int length)
176
177 {
178 char *ptr;
179
180 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0'))
181 ptr = NULL;
182
183 return ptr;
184 }
185
186 int
187 manpagesection(char *name)
188 {
189 char *ptr;
190
191 if ((ptr = strrchr(name, '/')) != NULL)
192 ptr++;
193 else
194 ptr = name;
195
196 while ((ptr = strchr(ptr, '.')) != NULL) {
197 int section;
198
199 ptr++;
200 section=0;
201 while (sectionext[section] != '\0')
202 if (sectionext[section] == *ptr)
203 return section;
204 else
205 section++;
206 }
207
208 return -1;
209 }
210
211 int
212 addmanpage(manpage **tree,ino_t inode,char *name)
213 {
214 manpage *mp;
215
216 while ((mp = *tree) != NULL) {
217 if (mp->mp_inode == inode)
218 return 1;
219 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right);
220 }
221
222 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL)
223 return 0;
224
225 mp->mp_left = NULL;
226 mp->mp_right = NULL;
227 mp->mp_inode = inode;
228 (void) strcpy(mp->mp_name, name);
229 *tree = mp;
230
231 return 1;
232 }
233
234 int
235 addwhatis(whatis **tree, char *data)
236 {
237 whatis *wi;
238 int result;
239
240 while ((wi = *tree) != NULL) {
241 result=strcmp(data, wi->wi_data);
242 if (result == 0) return 1;
243 tree = &((result < 0) ? wi->wi_left : wi->wi_right);
244 }
245
246 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL)
247 return 0;
248
249 wi->wi_left = NULL;
250 wi->wi_right = NULL;
251 wi->wi_data = data;
252 *tree = wi;
253
254 return 1;
255 }
256
257 void
258 catpreprocess(char *from)
259 {
260 char *to;
261
262 to = from;
263 while (isspace(*from)) from++;
264
265 while (*from != '\0')
266 if (isspace(*from)) {
267 while (isspace(*++from));
268 if (*from != '\0')
269 *to++ = ' ';
270 }
271 else if (*(from + 1) == '\10')
272 from += 2;
273 else
274 *to++ = *from++;
275
276 *to = '\0';
277 }
278
279 char *
280 replacestring(char *string, char *old, char *new)
281
282 {
283 char *ptr, *result;
284 int slength, olength, nlength, pos;
285
286 if (new == NULL)
287 return strdup(string);
288
289 ptr = strstr(string, old);
290 if (ptr == NULL)
291 return strdup(string);
292
293 slength = strlen(string);
294 olength = strlen(old);
295 nlength = strlen(new);
296 if ((result = malloc(slength - olength + nlength + 1)) == NULL)
297 return NULL;
298
299 pos = ptr - string;
300 (void) memcpy(result, string, pos);
301 (void) memcpy(&result[pos], new, nlength);
302 (void) strcpy(&result[pos + nlength], &string[pos + olength]);
303
304 return result;
305 }
306
307 char *
308 parsecatpage(gzFile *in)
309 {
310 char buffer[8192];
311 char *section, *ptr, *last;
312 int size;
313
314 do {
315 if (GetS(in, buffer, sizeof(buffer)) == NULL)
316 return NULL;
317 }
318 while (buffer[0] == '\n');
319
320 section = NULL;
321 if ((ptr = strchr(buffer, '(')) != NULL) {
322 if ((last = strchr(ptr + 1, ')')) !=NULL) {
323 int length;
324
325 length = last - ptr + 1;
326 if ((section = malloc(length + 5)) == NULL)
327 return NULL;
328
329 *section = ' ';
330 (void) memcpy(section + 1, ptr, length);
331 (void) strcpy(section + 1 + length, " - ");
332 }
333 }
334
335 for (;;) {
336 if (GetS(in, buffer, sizeof(buffer)) == NULL) {
337 free(section);
338 return NULL;
339 }
340 if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0)
341 break;
342 }
343
344 ptr = last = buffer;
345 size = sizeof(buffer) - 1;
346 while ((size > 0) && (GetS(in, ptr, size) != NULL)) {
347 int length;
348
349 catpreprocess(ptr);
350
351 length = strlen(ptr);
352 if (length == 0) {
353 *last = '\0';
354
355 ptr = replacestring(buffer, " - ", section);
356 free(section);
357 return ptr;
358 }
359 if ((length > 1) && (ptr[length - 1] == '-') &&
360 isalpha(ptr[length - 2]))
361 last = &ptr[--length];
362 else {
363 last = &ptr[length++];
364 *last = ' ';
365 }
366
367 ptr += length;
368 size -= length;
369 }
370
371 free(section);
372
373 return NULL;
374 }
375
376 int
377 manpreprocess(char *line)
378 {
379 char *from, *to;
380
381 to = from = line;
382 while (isspace(*from)) from++;
383 if (strncmp(from, ".\\\"", 3) == 0)
384 return 1;
385
386 while (*from != '\0')
387 if (isspace(*from)) {
388 while (isspace(*++from));
389 if ((*from != '\0') && (*from != ','))
390 *to++ = ' ';
391 }
392 else if (*from == '\\')
393 switch (*++from) {
394 case '\0':
395 case '-':
396 break;
397 default:
398 from++;
399 }
400 else
401 if (*from == '"')
402 from++;
403 else
404 *to++ = *from++;
405
406 *to = '\0';
407
408 if (strncasecmp(line, ".Xr", 3) == 0) {
409 char *sect;
410
411 from = line + 3;
412 if (isspace(*from))
413 from++;
414
415 if ((sect = findwhitespace(from)) != NULL) {
416 int length;
417
418 *sect++ = '\0';
419 length = strlen(from);
420 (void) memmove(line, from, length);
421 line[length++] = '(';
422 to = &line[length];
423 length = strlen(sect);
424 (void) memmove(to, sect, length);
425 (void) strcpy(&to[length], ")");
426 }
427 }
428
429 return 0;
430 }
431
432 char *
433 parsemanpage(gzFile *in, int defaultsection)
434 {
435 char *section, buffer[8192], *ptr;
436
437 section = NULL;
438 do {
439 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
440 free(section);
441 return NULL;
442 }
443 if (manpreprocess(buffer))
444 continue;
445 if (strncasecmp(buffer, ".Dt", 3) == 0) {
446 char *end;
447
448 ptr = &buffer[3];
449 if (isspace(*ptr))
450 ptr++;
451 if ((ptr = findwhitespace(ptr)) == NULL)
452 continue;
453
454 if ((end = findwhitespace(++ptr)) != NULL)
455 *end = '\0';
456
457 free(section);
458 if ((section = malloc(strlen(ptr) + 7)) != NULL) {
459 section[0] = ' ';
460 section[1] = '(';
461 (void) strcpy(§ion[2], ptr);
462 (void) strcat(§ion[2], ") - ");
463 }
464 }
465 } while ((strncasecmp(buffer, ".Sh NAME", 8) != 0));
466
467 do {
468 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
469 free(section);
470 return NULL;
471 }
472 } while (manpreprocess(buffer));
473
474 if (strncasecmp(buffer, ".Nm", 3) == 0) {
475 int length, offset;
476
477 ptr = &buffer[3];
478 if (isspace(*ptr))
479 ptr++;
480
481 length = strlen(ptr);
482 if ((length > 1) && (ptr[length - 1] == ',') &&
483 isspace(ptr[length - 2])) {
484 ptr[--length] = '\0';
485 ptr[length - 1] = ',';
486 }
487 (void) memmove(buffer, ptr, length + 1);
488
489 offset = length + 3;
490 ptr = &buffer[offset];
491 for (;;) {
492 int more;
493
494 if ((sizeof(buffer) == offset) ||
495 (GetS(in, ptr, sizeof(buffer) - offset)
496 == NULL)) {
497 free(section);
498 return NULL;
499 }
500 if (manpreprocess(ptr))
501 continue;
502
503 if (strncasecmp(ptr, ".Nm", 3) != 0) break;
504
505 ptr += 3;
506 if (isspace(*ptr))
507 ptr++;
508
509 buffer[length++] = ' ';
510 more = strlen(ptr);
511 if ((more > 1) && (ptr[more - 1] == ',') &&
512 isspace(ptr[more - 2])) {
513 ptr[--more] = '\0';
514 ptr[more - 1] = ',';
515 }
516
517 (void) memmove(&buffer[length], ptr, more + 1);
518 length += more;
519 offset = length + 3;
520
521 ptr = &buffer[offset];
522 }
523
524 if (strncasecmp(ptr, ".Nd", 3) == 0) {
525 (void) strcpy(&buffer[length], " -");
526
527 while (strncasecmp(ptr, ".Sh", 3) != 0) {
528 int more;
529
530 if (*ptr == '.') {
531 char *space;
532
533 if ((space = findwhitespace(ptr)) == NULL)
534 ptr = "";
535 else {
536 space++;
537 (void) memmove(ptr, space,
538 strlen(space) + 1);
539 }
540 }
541
542 if (*ptr != '\0') {
543 buffer[offset - 1] = ' ';
544 more = strlen(ptr) + 1;
545 offset += more;
546 }
547 ptr = &buffer[offset];
548 if ((sizeof(buffer) == offset) ||
549 (GetS(in, ptr, sizeof(buffer) - offset)
550 == NULL)) {
551 free(section);
552 return NULL;
553 }
554 if (manpreprocess(ptr))
555 *ptr = '\0';
556 }
557 }
558 }
559 else {
560 int offset;
561
562 if (*buffer == '.') {
563 char *space;
564
565 if ((space = findwhitespace(buffer)) == NULL) {
566 free(section);
567 return NULL;
568 }
569 space++;
570 (void) memmove(buffer, space, strlen(space) + 1);
571 }
572
573 offset = strlen(buffer) + 1;
574 for (;;) {
575 int more;
576
577 ptr = &buffer[offset];
578 if ((sizeof(buffer) == offset) ||
579 (GetS(in, ptr, sizeof(buffer) - offset)
580 == NULL)) {
581 free(section);
582 return NULL;
583 }
584 if (manpreprocess(ptr) || (*ptr == '\0'))
585 continue;
586
587 if ((strncasecmp(ptr, ".Sh", 3) == 0) ||
588 (strncasecmp(ptr, ".Ss", 3) == 0))
589 break;
590
591 if (*ptr == '.') {
592 char *space;
593
594 if ((space = findwhitespace(ptr)) == NULL) {
595 continue;
596 }
597
598 space++;
599 (void) memmove(ptr, space, strlen(space) + 1);
600 }
601
602 buffer[offset - 1] = ' ';
603 more = strlen(ptr);
604 if ((more > 1) && (ptr[more - 1] == ',') &&
605 isspace(ptr[more - 2])) {
606 ptr[more - 1] = '\0';
607 ptr[more - 2] = ',';
608 }
609 else more++;
610 offset += more;
611 }
612 }
613
614 if (section == NULL) {
615 char sectionbuffer[24];
616
617 (void) sprintf(sectionbuffer, " (%c) - ",
618 sectionext[defaultsection]);
619 ptr = replacestring(buffer, " - ", sectionbuffer);
620 }
621 else {
622 ptr = replacestring(buffer, " - ", section);
623 free(section);
624 }
625 return ptr;
626 }
627
628 char *
629 getwhatisdata(char *name)
630 {
631 gzFile *in;
632 char *data;
633 int section;
634
635 if ((in = gzopen(name, "r")) == NULL) {
636 errx(EXIT_FAILURE, "%s: %s",
637 name,
638 strerror((errno == 0) ? ENOMEM : errno));
639 /* NOTREACHED */
640 }
641
642 section = manpagesection(name);
643 data = (section == 0) ? parsecatpage(in) : parsemanpage(in, section);
644
645 (void) gzclose(in);
646 return data;
647 }
648
649 void
650 processmanpages(manpage **source, whatis **dest)
651 {
652 manpage *mp;
653
654 mp = *source;
655 *source = NULL;
656
657 while (mp != NULL) {
658 manpage *obsolete;
659 char *data;
660
661 if (mp->mp_left != NULL)
662 processmanpages(&mp->mp_left,dest);
663
664 if ((data = getwhatisdata(mp->mp_name)) != NULL) {
665 if (!addwhatis(dest,data))
666 err(EXIT_FAILURE, NULL);
667 }
668
669 obsolete = mp;
670 mp = mp->mp_right;
671 free(obsolete);
672 }
673 }
674
675 int
676 dumpwhatis (FILE *out, whatis *tree)
677 {
678 while (tree != NULL) {
679 if (tree->wi_left)
680 if (!dumpwhatis(out, tree->wi_left)) return 0;
681
682 if ((fputs(tree->wi_data, out) == EOF) ||
683 (fputc('\n', out) == EOF))
684 return 0;
685
686 tree = tree->wi_right;
687 }
688
689 return 1;
690 }
691