makewhatis.c revision 1.1 1 /* $NetBSD: makewhatis.c,v 1.1 1999/09/25 21:17:37 tron Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthias Scheler.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\
42 All rights reserved.\n");
43 #endif /* not lint */
44
45 #ifndef lint
46 __RCSID("$NetBSD: makewhatis.c,v 1.1 1999/09/25 21:17:37 tron Exp $");
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/stat.h>
51
52 #include <ctype.h>
53 #include <err.h>
54 #include <errno.h>
55 #include <fts.h>
56 #include <locale.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61 #include <zlib.h>
62
63 typedef struct manpagestruct manpage;
64 struct manpagestruct {
65 manpage *mp_left,*mp_right;
66 ino_t mp_inode;
67 char mp_name[1];
68 };
69
70 typedef struct whatisstruct whatis;
71 struct whatisstruct {
72 whatis *wi_left,*wi_right;
73 char *wi_data;
74 };
75
76 int main (int, char **);
77 int manpagesection (char *);
78 int addmanpage (manpage **, ino_t, char *);
79 int addwhatis (whatis **, char *);
80 char *replacestring (char *, char *, char *);
81 void catpreprocess (char *);
82 char *parsecatpage (gzFile *);
83 int manpreprocess (char *);
84 char *parsemanpage (gzFile *, int);
85 char *getwhatisdata (char *);
86 void processmanpages (manpage **,whatis **);
87 int dumpwhatis (FILE *, whatis *);
88
89 char *default_manpath[] = {
90 "/usr/share/man",
91 NULL
92 };
93
94 char whatisdb[] = "whatis.db";
95
96 extern char *__progname;
97
98 int
99 main(int argc,char **argv)
100 {
101 char **manpath;
102 FTS *fts;
103 FTSENT *fe;
104 manpage *source;
105 whatis *dest;
106 FILE *out;
107
108 (void)setlocale(LC_ALL, "");
109
110 manpath = (argc < 2) ? default_manpath : &argv[1];
111
112 if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) {
113 perror(__progname);
114 return EXIT_FAILURE;
115 }
116
117 source = NULL;
118 while ((fe = fts_read(fts)) != NULL) {
119 switch (fe->fts_info) {
120 case FTS_F:
121 if (manpagesection(fe->fts_path) >= 0)
122 if (!addmanpage(&source,
123 fe->fts_statp->st_ino,
124 fe->fts_path))
125 err(EXIT_FAILURE, NULL);
126 case FTS_D:
127 case FTS_DP:
128 break;
129 default:
130 errx(EXIT_FAILURE, "%s: %s", fe->fts_path,
131 strerror(fe->fts_errno));
132 /* NOTREACHED */
133 }
134 }
135
136 (void)fts_close(fts);
137
138 dest = NULL;
139 processmanpages(&source, &dest);
140
141 if (chdir(manpath[0]) < 0)
142 errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno));
143
144 if ((out = fopen(whatisdb, "w")) == NULL)
145 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
146
147 if (!(dumpwhatis(out, dest) && (fclose(out) == 0)))
148 errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
149
150 return EXIT_SUCCESS;
151 }
152
153 int
154 manpagesection(char *name)
155 {
156 char *ptr;
157
158 if ((ptr = strrchr(name, '/')) != NULL)
159 ptr++;
160 else
161 ptr = name;
162
163 while ((ptr = strchr(ptr, '.')) != NULL)
164 if (isdigit(*++ptr))
165 return (int)(*ptr - '0');
166
167 return -1;
168 }
169
170 int
171 addmanpage(manpage **tree,ino_t inode,char *name)
172 {
173 manpage *mp;
174
175 while ((mp = *tree) != NULL) {
176 if (mp->mp_inode == inode)
177 return 1;
178 tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right);
179 }
180
181 if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL)
182 return 0;
183
184 mp->mp_left = NULL;
185 mp->mp_right = NULL;
186 mp->mp_inode = inode;
187 (void) strcpy(mp->mp_name, name);
188 *tree = mp;
189
190 return 1;
191 }
192
193 int
194 addwhatis(whatis **tree, char *data)
195 {
196 whatis *wi;
197 int result;
198
199 while ((wi = *tree) != NULL) {
200 result=strcmp(data, wi->wi_data);
201 if (result == 0) return 1;
202 tree = &((result < 0) ? wi->wi_left : wi->wi_right);
203 }
204
205 if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL)
206 return 0;
207
208 wi->wi_left = NULL;
209 wi->wi_right = NULL;
210 wi->wi_data = data;
211 *tree = wi;
212
213 return 1;
214 }
215
216 void
217 catpreprocess(char *from)
218 {
219 char *to;
220
221 to = from;
222 while (isspace(*from)) from++;
223
224 while (*from != '\0')
225 if (isspace(*from)) {
226 while (isspace(*++from));
227 if (*from != '\0')
228 *to++ = ' ';
229 }
230 else if (*(from + 1) == '\10')
231 from += 2;
232 else
233 *to++ = *from++;
234
235 *to = '\0';
236 }
237
238 char *
239 replacestring(char *string, char *old, char *new)
240
241 {
242 char *ptr, *result;
243 int slength, olength, nlength, pos;
244
245 if (new == NULL)
246 return strdup(string);
247
248 ptr = strstr(string, old);
249 if (ptr == NULL)
250 return strdup(string);
251
252 slength = strlen(string);
253 olength = strlen(old);
254 nlength = strlen(new);
255 if ((result = malloc(slength - olength + nlength + 1)) == NULL)
256 return NULL;
257
258 pos = ptr - string;
259 (void) memcpy(result, string, pos);
260 (void) memcpy(&result[pos], new, nlength);
261 (void) strcpy(&result[pos + nlength], &string[pos + olength]);
262
263 return result;
264 }
265
266 char *
267 parsecatpage(gzFile *in)
268 {
269 char buffer[8192];
270 char *section, *ptr, *last;
271 int size;
272
273 do {
274 if (gzgets(in, buffer, sizeof(buffer)) == NULL)
275 return NULL;
276 }
277 while (buffer[0] == '\n');
278
279 section = NULL;
280 if ((ptr = strchr(buffer, '(')) != NULL) {
281 if ((last = strchr(ptr + 1, ')')) !=NULL) {
282 int length;
283
284 length = last - ptr + 1;
285 if ((section = malloc(length + 5)) == NULL)
286 return NULL;
287
288 *section = ' ';
289 (void) memcpy(section + 1, ptr, length);
290 (void) strcpy(section + 1 + length, " - ");
291 }
292 }
293
294 for (;;) {
295 if (gzgets(in, buffer, sizeof(buffer)) == NULL) {
296 free(section);
297 return NULL;
298 }
299 if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0)
300 break;
301 }
302
303 ptr = last = buffer;
304 size = sizeof(buffer) - 1;
305 while ((size > 0) && (gzgets(in, ptr, size) != NULL)) {
306 int length;
307
308 catpreprocess(ptr);
309
310 length = strlen(ptr);
311 if (length == 0) {
312 *last = '\0';
313
314 ptr = replacestring(buffer, " - ", section);
315 free(section);
316 return ptr;
317 }
318 if ((length > 1) && (ptr[length - 1] == '-') &&
319 isalpha(ptr[length - 2]))
320 last = &ptr[--length];
321 else {
322 last = &ptr[length++];
323 *last = ' ';
324 }
325
326 ptr += length;
327 size -= length;
328 }
329
330 free(section);
331
332 return NULL;
333 }
334
335 int
336 manpreprocess(char *line)
337 {
338 char *from, *to;
339
340 to = from = line;
341 while (isspace(*from)) from++;
342 if (strncmp(from, ".\\\"", 3) == 0)
343 return 1;
344
345 while (*from != '\0')
346 if (isspace(*from)) {
347 while (isspace(*++from));
348 if ((*from != '\0') && (*from != ','))
349 *to++ = ' ';
350 }
351 else if (*from == '\\')
352 switch (*++from) {
353 case '\0':
354 case '-':
355 break;
356 default:
357 from++;
358 }
359 else
360 if (*from == '"')
361 from++;
362 else
363 *to++ = *from++;
364
365 *to = '\0';
366
367 if (strncasecmp(line, ".Xr", 3) == 0) {
368 char *sect;
369
370 from = line + 3;
371 if (isspace(*from))
372 from++;
373
374 if ((sect = strchr(from, ' ')) != NULL) {
375 int length;
376
377 *sect++ = '\0';
378 length = strlen(from);
379 (void) memmove(line, from, length);
380 line[length++] = '(';
381 to = &line[length];
382 length = strlen(sect);
383 (void) memmove(to, sect, length);
384 (void) strcpy(&to[length], ")");
385 }
386 }
387
388 return 0;
389 }
390
391 char *
392 parsemanpage(gzFile *in, int defaultsection)
393 {
394 char *section, buffer[8192], *ptr;
395
396 section = NULL;
397 do {
398 if (gzgets(in, buffer, sizeof(buffer) - 1) == NULL) {
399 free(section);
400 return NULL;
401 }
402 if (manpreprocess(buffer))
403 continue;
404 if (strncasecmp(buffer, ".Dt", 3) == 0) {
405 char *end;
406
407 ptr = &buffer[3];
408 if (isspace(*ptr))
409 ptr++;
410 if ((ptr = strchr(ptr, ' ')) == NULL)
411 continue;
412
413 if ((end = strchr(++ptr, ' ')) != NULL)
414 *end = '\0';
415
416 free(section);
417 if ((section = malloc(strlen(ptr) + 7)) != NULL) {
418 section[0] = ' ';
419 section[1] = '(';
420 (void) strcpy(§ion[2], ptr);
421 (void) strcat(§ion[2], ") - ");
422 }
423 }
424 } while ((strncasecmp(buffer, ".Sh NAME", 8) != 0));
425
426 do {
427 if (gzgets(in, buffer, sizeof(buffer) - 1) == NULL) {
428 free(section);
429 return NULL;
430 }
431 } while (manpreprocess(buffer));
432
433 if (strncasecmp(buffer, ".Nm", 3) == 0) {
434 int length, offset;
435
436 ptr = &buffer[3];
437 if (isspace(*ptr))
438 ptr++;
439
440 length = strlen(ptr);
441 if ((length > 1) && (ptr[length - 1] == ',') &&
442 isspace(ptr[length - 2])) {
443 ptr[--length] = '\0';
444 ptr[length - 1] = ',';
445 }
446 (void) memmove(buffer, ptr, length + 1);
447
448 offset = length + 3;
449 ptr = &buffer[offset];
450 for (;;) {
451 int more;
452
453 if ((sizeof(buffer) == offset) ||
454 (gzgets(in, ptr, sizeof(buffer) - offset)
455 == NULL)) {
456 free(section);
457 return NULL;
458 }
459 if (manpreprocess(ptr))
460 continue;
461
462 if (strncasecmp(ptr, ".Nm", 3) != 0) break;
463
464 ptr += 3;
465 if (isspace(*ptr))
466 ptr++;
467
468 buffer[length++] = ' ';
469 more = strlen(ptr);
470 if ((more > 1) && (ptr[more - 1] == ',') &&
471 isspace(ptr[more - 2])) {
472 ptr[--more] = '\0';
473 ptr[more - 1] = ',';
474 }
475
476 (void) memmove(&buffer[length], ptr, more + 1);
477 length += more;
478 offset = length + 3;
479
480 ptr = &buffer[offset];
481 }
482
483 if (strncasecmp(ptr, ".Nd", 3) == 0) {
484 (void) strcpy(&buffer[length], " -");
485
486 while (strncasecmp(ptr, ".Sh", 3) != 0) {
487 int more;
488
489 if (*ptr == '.') {
490 char *space;
491
492 if ((space = strchr(ptr, ' ')) == NULL)
493 ptr = "";
494 else {
495 space++;
496 (void) memmove(ptr, space,
497 strlen(space) + 1);
498 }
499 }
500
501 if (*ptr != '\0') {
502 buffer[offset - 1] = ' ';
503 more = strlen(ptr) + 1;
504 offset += more;
505 }
506 ptr = &buffer[offset];
507 if ((sizeof(buffer) == offset) ||
508 (gzgets(in, ptr, sizeof(buffer) - offset)
509 == NULL)) {
510 free(section);
511 return NULL;
512 }
513 if (manpreprocess(ptr))
514 *ptr = '\0';
515 }
516 }
517 }
518 else {
519 int offset;
520
521 if (*buffer == '.') {
522 char *space;
523
524 if ((space = strchr(buffer, ' ')) == NULL) {
525 free(section);
526 return NULL;
527 }
528 space++;
529 (void) memmove(buffer, space, strlen(space));
530 }
531
532 offset = strlen(buffer) + 1;
533 for (;;) {
534 int more;
535
536 ptr = &buffer[offset];
537 if ((sizeof(buffer) == offset) ||
538 (gzgets(in, ptr, sizeof(buffer) - offset)
539 == NULL)) {
540 free(section);
541 return NULL;
542 }
543 if (manpreprocess(ptr) || (*ptr == '\0'))
544 continue;
545
546 if (strncasecmp(ptr, ".Sh", 3) == 0)
547 break;
548
549 if (*ptr == '.') {
550 char *space;
551
552 if ((space = strchr(ptr, ' ')) == NULL)
553 continue;
554 space++;
555 (void) memmove(ptr, space, strlen(space));
556 }
557
558 buffer[offset - 1] = ' ';
559 more = strlen(ptr);
560 if ((more > 1) && (ptr[more - 1] == ',') &&
561 isspace(ptr[more - 2])) {
562 ptr[more - 1] = '\0';
563 ptr[more - 2] = ',';
564 }
565 else more++;
566 offset += more;
567 }
568 }
569
570 if (section == NULL) {
571 char sectionbuffer[24];
572
573 (void) sprintf(sectionbuffer, " (%d) - ", defaultsection);
574 ptr = replacestring(buffer, " - ", sectionbuffer);
575 }
576 else {
577 ptr = replacestring(buffer, " - ", section);
578 free(section);
579 }
580 return ptr;
581 }
582
583 char *
584 getwhatisdata(char *name)
585 {
586 gzFile *in;
587 char *data;
588 int section;
589
590 if ((in = gzopen(name, "r")) == NULL) {
591 errx(EXIT_FAILURE, "%s: %s",
592 name,
593 strerror((errno == 0) ? ENOMEM : errno));
594 /* NOTREACHED */
595 }
596
597 section = manpagesection(name);
598 data = (section == 0) ? parsecatpage(in) : parsemanpage(in, section);
599
600 (void) gzclose(in);
601 return data;
602 }
603
604 void
605 processmanpages(manpage **source, whatis **dest)
606 {
607 manpage *mp;
608
609 mp = *source;
610 *source = NULL;
611
612 while (mp != NULL) {
613 manpage *obsolete;
614 char *data;
615
616 if (mp->mp_left != NULL)
617 processmanpages(&mp->mp_left,dest);
618
619 if ((data = getwhatisdata(mp->mp_name)) != NULL) {
620 if (!addwhatis(dest,data))
621 err(EXIT_FAILURE, NULL);
622 }
623
624 obsolete = mp;
625 mp = mp->mp_right;
626 free(obsolete);
627 }
628 }
629
630 int
631 dumpwhatis (FILE *out, whatis *tree)
632 {
633 while (tree != NULL) {
634 if (tree->wi_left)
635 if (!dumpwhatis(out, tree->wi_left)) return 0;
636
637 if ((fputs(tree->wi_data, out) == EOF) ||
638 (fputc('\n', out) == EOF))
639 return 0;
640
641 tree = tree->wi_right;
642 }
643
644 return 1;
645 }
646