makewhatis.c revision 1.5 1 /* $NetBSD: makewhatis.c,v 1.5 1999/12/31 14:28:03 tron Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthias Scheler.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\
42 All rights reserved.\n");
43 #endif /* not lint */
44
45 #ifndef lint
46 __RCSID("$NetBSD: makewhatis.c,v 1.5 1999/12/31 14:28:03 tron Exp $");
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/stat.h>
51
52 #include <ctype.h>
53 #include <err.h>
54 #include <errno.h>
55 #include <fts.h>
56 #include <locale.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61 #include <zlib.h>
62
63 typedef struct manpagestruct manpage;
64 struct manpagestruct {
65 manpage *mp_left,*mp_right;
66 ino_t mp_inode;
67 char mp_name[1];
68 };
69
70 typedef struct whatisstruct whatis;
71 struct whatisstruct {
72 whatis *wi_left,*wi_right;
73 char *wi_data;
74 };
75
76 int main (int, char **);
77 char *GetS(gzFile, char *, int);
78 int manpagesection (char *);
79 int addmanpage (manpage **, ino_t, char *);
80 int addwhatis (whatis **, char *);
81 char *replacestring (char *, char *, char *);
82 void catpreprocess (char *);
83 char *parsecatpage (gzFile *);
84 int manpreprocess (char *);
85 char *parsemanpage (gzFile *, int);
86 char *getwhatisdata (char *);
87 void processmanpages (manpage **,whatis **);
88 int dumpwhatis (FILE *, whatis *);
89
90 char *default_manpath[] = {
91 "/usr/share/man",
92 NULL
93 };
94
95 char sectionext[] = "0123456789ln";
96 char whatisdb[] = "whatis.db";
97
98 extern char *__progname;
99
100 int
101 main(int argc,char **argv)
102 {
103 char **manpath;
104 FTS *fts;
105 FTSENT *fe;
106 manpage *source;
107 whatis *dest;
108 FILE *out;
109
110 (void)setlocale(LC_ALL, "");
111
112 manpath = (argc < 2) ? default_manpath : &argv[1];
113
114 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) {
115 perror(__progname);
116 return EXIT_FAILURE;
117 }
118
119 source = NULL;
120 while ((fe = fts_read(fts)) != NULL) {
121 switch (fe->fts_info) {
122 case FTS_F:
123 if (manpagesection(fe->fts_path) >= 0)
124 if (!addmanpage(&source,
125 fe->fts_statp->st_ino,
126 fe->fts_path))
127 err(EXIT_FAILURE, NULL);
128 case FTS_D:
129 case FTS_DC:
130 case FTS_DEFAULT:
131 case FTS_DP:
132 case FTS_SLNONE:
133 break;
134 default:
135 errx(EXIT_FAILURE, "%s: %s", fe->fts_path,
136 strerror(fe->fts_errno));
137 /* NOTREACHED */
138 }
139 }
140
141 (void)fts_close(fts);
142
143 dest = NULL;
144 processmanpages(&source, &dest);
145
146 if (chdir(manpath[0]) < 0)
147 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno));
148
149 if ((out = fopen(whatisdb, "w")) == NULL)
150 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
151
152 if (!(dumpwhatis(out, dest) ||
153 (fclose(out) < 0)) ||
154 (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0))
155 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
156
157 return EXIT_SUCCESS;
158 }
159
160 char
161 *GetS(gzFile in, char *buffer, int length)
162
163 {
164 char *ptr;
165
166 if (((ptr = gzgets(in, buffer, length)) != NULL) && (*ptr == '\0'))
167 ptr = NULL;
168
169 return ptr;
170 }
171
172 int
173 manpagesection(char *name)
174 {
175 char *ptr;
176
177 if ((ptr = strrchr(name, '/')) != NULL)
178 ptr++;
179 else
180 ptr = name;
181
182 while ((ptr = strchr(ptr, '.')) != NULL) {
183 int section;
184
185 ptr++;
186 section=0;
187 while (sectionext[section] != '\0')
188 if (sectionext[section] == *ptr)
189 return section;
190 else
191 section++;
192 }
193
194 return -1;
195 }
196
197 int
198 addmanpage(manpage **tree,ino_t inode,char *name)
199 {
200 manpage *mp;
201
202 while ((mp = *tree) != NULL) {
203 if (mp->mp_inode == inode)
204 return 1;
205 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right);
206 }
207
208 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL)
209 return 0;
210
211 mp->mp_left = NULL;
212 mp->mp_right = NULL;
213 mp->mp_inode = inode;
214 (void) strcpy(mp->mp_name, name);
215 *tree = mp;
216
217 return 1;
218 }
219
220 int
221 addwhatis(whatis **tree, char *data)
222 {
223 whatis *wi;
224 int result;
225
226 while ((wi = *tree) != NULL) {
227 result=strcmp(data, wi->wi_data);
228 if (result == 0) return 1;
229 tree = &((result < 0) ? wi->wi_left : wi->wi_right);
230 }
231
232 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL)
233 return 0;
234
235 wi->wi_left = NULL;
236 wi->wi_right = NULL;
237 wi->wi_data = data;
238 *tree = wi;
239
240 return 1;
241 }
242
243 void
244 catpreprocess(char *from)
245 {
246 char *to;
247
248 to = from;
249 while (isspace(*from)) from++;
250
251 while (*from != '\0')
252 if (isspace(*from)) {
253 while (isspace(*++from));
254 if (*from != '\0')
255 *to++ = ' ';
256 }
257 else if (*(from + 1) == '\10')
258 from += 2;
259 else
260 *to++ = *from++;
261
262 *to = '\0';
263 }
264
265 char *
266 replacestring(char *string, char *old, char *new)
267
268 {
269 char *ptr, *result;
270 int slength, olength, nlength, pos;
271
272 if (new == NULL)
273 return strdup(string);
274
275 ptr = strstr(string, old);
276 if (ptr == NULL)
277 return strdup(string);
278
279 slength = strlen(string);
280 olength = strlen(old);
281 nlength = strlen(new);
282 if ((result = malloc(slength - olength + nlength + 1)) == NULL)
283 return NULL;
284
285 pos = ptr - string;
286 (void) memcpy(result, string, pos);
287 (void) memcpy(&result[pos], new, nlength);
288 (void) strcpy(&result[pos + nlength], &string[pos + olength]);
289
290 return result;
291 }
292
293 char *
294 parsecatpage(gzFile *in)
295 {
296 char buffer[8192];
297 char *section, *ptr, *last;
298 int size;
299
300 do {
301 if (GetS(in, buffer, sizeof(buffer)) == NULL)
302 return NULL;
303 }
304 while (buffer[0] == '\n');
305
306 section = NULL;
307 if ((ptr = strchr(buffer, '(')) != NULL) {
308 if ((last = strchr(ptr + 1, ')')) !=NULL) {
309 int length;
310
311 length = last - ptr + 1;
312 if ((section = malloc(length + 5)) == NULL)
313 return NULL;
314
315 *section = ' ';
316 (void) memcpy(section + 1, ptr, length);
317 (void) strcpy(section + 1 + length, " - ");
318 }
319 }
320
321 for (;;) {
322 if (GetS(in, buffer, sizeof(buffer)) == NULL) {
323 free(section);
324 return NULL;
325 }
326 if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0)
327 break;
328 }
329
330 ptr = last = buffer;
331 size = sizeof(buffer) - 1;
332 while ((size > 0) && (GetS(in, ptr, size) != NULL)) {
333 int length;
334
335 catpreprocess(ptr);
336
337 length = strlen(ptr);
338 if (length == 0) {
339 *last = '\0';
340
341 ptr = replacestring(buffer, " - ", section);
342 free(section);
343 return ptr;
344 }
345 if ((length > 1) && (ptr[length - 1] == '-') &&
346 isalpha(ptr[length - 2]))
347 last = &ptr[--length];
348 else {
349 last = &ptr[length++];
350 *last = ' ';
351 }
352
353 ptr += length;
354 size -= length;
355 }
356
357 free(section);
358
359 return NULL;
360 }
361
362 int
363 manpreprocess(char *line)
364 {
365 char *from, *to;
366
367 to = from = line;
368 while (isspace(*from)) from++;
369 if (strncmp(from, ".\\\"", 3) == 0)
370 return 1;
371
372 while (*from != '\0')
373 if (isspace(*from)) {
374 while (isspace(*++from));
375 if ((*from != '\0') && (*from != ','))
376 *to++ = ' ';
377 }
378 else if (*from == '\\')
379 switch (*++from) {
380 case '\0':
381 case '-':
382 break;
383 default:
384 from++;
385 }
386 else
387 if (*from == '"')
388 from++;
389 else
390 *to++ = *from++;
391
392 *to = '\0';
393
394 if (strncasecmp(line, ".Xr", 3) == 0) {
395 char *sect;
396
397 from = line + 3;
398 if (isspace(*from))
399 from++;
400
401 if ((sect = strchr(from, ' ')) != NULL) {
402 int length;
403
404 *sect++ = '\0';
405 length = strlen(from);
406 (void) memmove(line, from, length);
407 line[length++] = '(';
408 to = &line[length];
409 length = strlen(sect);
410 (void) memmove(to, sect, length);
411 (void) strcpy(&to[length], ")");
412 }
413 }
414
415 return 0;
416 }
417
418 char *
419 parsemanpage(gzFile *in, int defaultsection)
420 {
421 char *section, buffer[8192], *ptr;
422
423 section = NULL;
424 do {
425 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
426 free(section);
427 return NULL;
428 }
429 if (manpreprocess(buffer))
430 continue;
431 if (strncasecmp(buffer, ".Dt", 3) == 0) {
432 char *end;
433
434 ptr = &buffer[3];
435 if (isspace(*ptr))
436 ptr++;
437 if ((ptr = strchr(ptr, ' ')) == NULL)
438 continue;
439
440 if ((end = strchr(++ptr, ' ')) != NULL)
441 *end = '\0';
442
443 free(section);
444 if ((section = malloc(strlen(ptr) + 7)) != NULL) {
445 section[0] = ' ';
446 section[1] = '(';
447 (void) strcpy(§ion[2], ptr);
448 (void) strcat(§ion[2], ") - ");
449 }
450 }
451 } while ((strncasecmp(buffer, ".Sh NAME", 8) != 0));
452
453 do {
454 if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
455 free(section);
456 return NULL;
457 }
458 } while (manpreprocess(buffer));
459
460 if (strncasecmp(buffer, ".Nm", 3) == 0) {
461 int length, offset;
462
463 ptr = &buffer[3];
464 if (isspace(*ptr))
465 ptr++;
466
467 length = strlen(ptr);
468 if ((length > 1) && (ptr[length - 1] == ',') &&
469 isspace(ptr[length - 2])) {
470 ptr[--length] = '\0';
471 ptr[length - 1] = ',';
472 }
473 (void) memmove(buffer, ptr, length + 1);
474
475 offset = length + 3;
476 ptr = &buffer[offset];
477 for (;;) {
478 int more;
479
480 if ((sizeof(buffer) == offset) ||
481 (GetS(in, ptr, sizeof(buffer) - offset)
482 == NULL)) {
483 free(section);
484 return NULL;
485 }
486 if (manpreprocess(ptr))
487 continue;
488
489 if (strncasecmp(ptr, ".Nm", 3) != 0) break;
490
491 ptr += 3;
492 if (isspace(*ptr))
493 ptr++;
494
495 buffer[length++] = ' ';
496 more = strlen(ptr);
497 if ((more > 1) && (ptr[more - 1] == ',') &&
498 isspace(ptr[more - 2])) {
499 ptr[--more] = '\0';
500 ptr[more - 1] = ',';
501 }
502
503 (void) memmove(&buffer[length], ptr, more + 1);
504 length += more;
505 offset = length + 3;
506
507 ptr = &buffer[offset];
508 }
509
510 if (strncasecmp(ptr, ".Nd", 3) == 0) {
511 (void) strcpy(&buffer[length], " -");
512
513 while (strncasecmp(ptr, ".Sh", 3) != 0) {
514 int more;
515
516 if (*ptr == '.') {
517 char *space;
518
519 if ((space = strchr(ptr, ' ')) == NULL)
520 ptr = "";
521 else {
522 space++;
523 (void) memmove(ptr, space,
524 strlen(space) + 1);
525 }
526 }
527
528 if (*ptr != '\0') {
529 buffer[offset - 1] = ' ';
530 more = strlen(ptr) + 1;
531 offset += more;
532 }
533 ptr = &buffer[offset];
534 if ((sizeof(buffer) == offset) ||
535 (GetS(in, ptr, sizeof(buffer) - offset)
536 == NULL)) {
537 free(section);
538 return NULL;
539 }
540 if (manpreprocess(ptr))
541 *ptr = '\0';
542 }
543 }
544 }
545 else {
546 int offset;
547
548 if (*buffer == '.') {
549 char *space;
550
551 if ((space = strchr(buffer, ' ')) == NULL) {
552 free(section);
553 return NULL;
554 }
555 space++;
556 (void) memmove(buffer, space, strlen(space) + 1);
557 }
558
559 offset = strlen(buffer) + 1;
560 for (;;) {
561 int more;
562
563 ptr = &buffer[offset];
564 if ((sizeof(buffer) == offset) ||
565 (GetS(in, ptr, sizeof(buffer) - offset)
566 == NULL)) {
567 free(section);
568 return NULL;
569 }
570 if (manpreprocess(ptr) || (*ptr == '\0'))
571 continue;
572
573 if ((strncasecmp(ptr, ".Sh", 3) == 0) ||
574 (strncasecmp(ptr, ".Ss", 3) == 0))
575 break;
576
577 if (*ptr == '.') {
578 char *space;
579
580 if ((space = strchr(ptr, ' ')) == NULL) {
581 continue;
582 }
583
584 space++;
585 (void) memmove(ptr, space, strlen(space) + 1);
586 }
587
588 buffer[offset - 1] = ' ';
589 more = strlen(ptr);
590 if ((more > 1) && (ptr[more - 1] == ',') &&
591 isspace(ptr[more - 2])) {
592 ptr[more - 1] = '\0';
593 ptr[more - 2] = ',';
594 }
595 else more++;
596 offset += more;
597 }
598 }
599
600 if (section == NULL) {
601 char sectionbuffer[24];
602
603 (void) sprintf(sectionbuffer, " (%c) - ",
604 sectionext[defaultsection]);
605 ptr = replacestring(buffer, " - ", sectionbuffer);
606 }
607 else {
608 ptr = replacestring(buffer, " - ", section);
609 free(section);
610 }
611 return ptr;
612 }
613
614 char *
615 getwhatisdata(char *name)
616 {
617 gzFile *in;
618 char *data;
619 int section;
620
621 if ((in = gzopen(name, "r")) == NULL) {
622 errx(EXIT_FAILURE, "%s: %s",
623 name,
624 strerror((errno == 0) ? ENOMEM : errno));
625 /* NOTREACHED */
626 }
627
628 section = manpagesection(name);
629 data = (section == 0) ? parsecatpage(in) : parsemanpage(in, section);
630
631 (void) gzclose(in);
632 return data;
633 }
634
635 void
636 processmanpages(manpage **source, whatis **dest)
637 {
638 manpage *mp;
639
640 mp = *source;
641 *source = NULL;
642
643 while (mp != NULL) {
644 manpage *obsolete;
645 char *data;
646
647 if (mp->mp_left != NULL)
648 processmanpages(&mp->mp_left,dest);
649
650 if ((data = getwhatisdata(mp->mp_name)) != NULL) {
651 if (!addwhatis(dest,data))
652 err(EXIT_FAILURE, NULL);
653 }
654
655 obsolete = mp;
656 mp = mp->mp_right;
657 free(obsolete);
658 }
659 }
660
661 int
662 dumpwhatis (FILE *out, whatis *tree)
663 {
664 while (tree != NULL) {
665 if (tree->wi_left)
666 if (!dumpwhatis(out, tree->wi_left)) return 0;
667
668 if ((fputs(tree->wi_data, out) == EOF) ||
669 (fputc('\n', out) == EOF))
670 return 0;
671
672 tree = tree->wi_right;
673 }
674
675 return 1;
676 }
677