Home | History | Annotate | Line # | Download | only in makewhatis
makewhatis.c revision 1.4.2.1
      1 /*	$NetBSD: makewhatis.c,v 1.4.2.1 1999/12/27 18:30:17 wrstuden Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Matthias Scheler.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 #ifndef lint
     41 __COPYRIGHT("@(#) Copyright (c) 1999 The NetBSD Foundation, Inc.\n\
     42 	All rights reserved.\n");
     43 #endif /* not lint */
     44 
     45 #ifndef lint
     46 __RCSID("$NetBSD: makewhatis.c,v 1.4.2.1 1999/12/27 18:30:17 wrstuden Exp $");
     47 #endif /* not lint */
     48 
     49 #include <sys/types.h>
     50 #include <sys/stat.h>
     51 
     52 #include <ctype.h>
     53 #include <err.h>
     54 #include <errno.h>
     55 #include <fts.h>
     56 #include <locale.h>
     57 #include <stdio.h>
     58 #include <stdlib.h>
     59 #include <string.h>
     60 #include <unistd.h>
     61 #include <zlib.h>
     62 
     63 typedef struct manpagestruct manpage;
     64 struct manpagestruct {
     65 	manpage *mp_left,*mp_right;
     66 	ino_t	 mp_inode;
     67 	char     mp_name[1];
     68 };
     69 
     70 typedef struct whatisstruct whatis;
     71 struct whatisstruct {
     72 	whatis	*wi_left,*wi_right;
     73 	char	*wi_data;
     74 };
     75 
     76 int              main (int, char **);
     77 int		 manpagesection (char *);
     78 int		 addmanpage (manpage **, ino_t, char *);
     79 int		 addwhatis (whatis **, char *);
     80 char		*replacestring (char *, char *, char *);
     81 void		 catpreprocess (char *);
     82 char		*parsecatpage (gzFile *);
     83 int		 manpreprocess (char *);
     84 char		*parsemanpage (gzFile *, int);
     85 char		*getwhatisdata (char *);
     86 void		 processmanpages (manpage **,whatis **);
     87 int		 dumpwhatis (FILE *, whatis *);
     88 
     89 char *default_manpath[] = {
     90 	"/usr/share/man",
     91 	NULL
     92 };
     93 
     94 char sectionext[] = "0123456789ln";
     95 char whatisdb[]   = "whatis.db";
     96 
     97 extern char *__progname;
     98 
     99 int
    100 main(int argc,char **argv)
    101 {
    102 	char	**manpath;
    103 	FTS	*fts;
    104 	FTSENT	*fe;
    105 	manpage	*source;
    106 	whatis	*dest;
    107 	FILE	*out;
    108 
    109 	(void)setlocale(LC_ALL, "");
    110 
    111 	manpath = (argc < 2) ? default_manpath : &argv[1];
    112 
    113 	if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL) {
    114 		perror(__progname);
    115 		return EXIT_FAILURE;
    116 	}
    117 
    118 	source = NULL;
    119 	while ((fe = fts_read(fts)) != NULL) {
    120 		switch (fe->fts_info) {
    121 		case FTS_F:
    122 			if (manpagesection(fe->fts_path) >= 0)
    123 				if (!addmanpage(&source,
    124 					fe->fts_statp->st_ino,
    125 					fe->fts_path))
    126 					err(EXIT_FAILURE, NULL);
    127 		case FTS_D:
    128 		case FTS_DC:
    129 		case FTS_DEFAULT:
    130 		case FTS_DP:
    131 		case FTS_SLNONE:
    132 			break;
    133 		default:
    134 			errx(EXIT_FAILURE, "%s: %s", fe->fts_path,
    135 			    strerror(fe->fts_errno));
    136 			/* NOTREACHED */
    137 		}
    138 	}
    139 
    140 	(void)fts_close(fts);
    141 
    142 	dest = NULL;
    143 	processmanpages(&source, &dest);
    144 
    145 	if (chdir(manpath[0]) < 0)
    146 		errx(EXIT_FAILURE, "%s: %s", manpath[0], strerror(errno));
    147 
    148 	if ((out = fopen(whatisdb, "w")) == NULL)
    149 		errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
    150 
    151 	if (!(dumpwhatis(out, dest) ||
    152 	    (fclose(out) < 0)) ||
    153 	    (chmod(whatisdb, S_IRUSR|S_IRGRP|S_IROTH) < 0))
    154 		errx(EXIT_FAILURE, "%s: %s", whatisdb, strerror(errno));
    155 
    156 	return EXIT_SUCCESS;
    157 }
    158 
    159 int
    160 manpagesection(char *name)
    161 {
    162 	char	*ptr;
    163 
    164 	if ((ptr = strrchr(name, '/')) != NULL)
    165 		ptr++;
    166 	else
    167 		ptr = name;
    168 
    169 	while ((ptr = strchr(ptr, '.')) != NULL) {
    170 		int section;
    171 
    172 		ptr++;
    173 		section=0;
    174 		while (sectionext[section] != '\0')
    175 			if (sectionext[section] == *ptr)
    176 				return section;
    177 			else
    178 				section++;
    179 	}
    180 
    181 	return -1;
    182 }
    183 
    184 int
    185 addmanpage(manpage **tree,ino_t inode,char *name)
    186 {
    187 	manpage	*mp;
    188 
    189 	while ((mp = *tree) != NULL) {
    190 		if (mp->mp_inode == inode)
    191 			return 1;
    192 		tree = &((inode < mp->mp_inode) ? mp->mp_left : mp->mp_right);
    193 	}
    194 
    195 	if ((mp = malloc(sizeof(manpage) + strlen(name))) == NULL)
    196 		return 0;
    197 
    198 	mp->mp_left = NULL;
    199 	mp->mp_right = NULL;
    200 	mp->mp_inode = inode;
    201 	(void) strcpy(mp->mp_name, name);
    202 	*tree = mp;
    203 
    204 	return 1;
    205 }
    206 
    207 int
    208 addwhatis(whatis **tree, char *data)
    209 {
    210 	whatis *wi;
    211 	int result;
    212 
    213 	while ((wi = *tree) != NULL) {
    214 		result=strcmp(data, wi->wi_data);
    215 		if (result == 0) return 1;
    216 		tree = &((result < 0) ? wi->wi_left : wi->wi_right);
    217 	}
    218 
    219 	if ((wi = malloc(sizeof(whatis) + strlen(data))) == NULL)
    220 		return 0;
    221 
    222 	wi->wi_left = NULL;
    223 	wi->wi_right = NULL;
    224 	wi->wi_data = data;
    225 	*tree = wi;
    226 
    227 	return 1;
    228 }
    229 
    230 void
    231 catpreprocess(char *from)
    232 {
    233 	char	*to;
    234 
    235 	to = from;
    236 	while (isspace(*from)) from++;
    237 
    238 	while (*from != '\0')
    239 		if (isspace(*from)) {
    240 			while (isspace(*++from));
    241 			if (*from != '\0')
    242 				*to++ = ' ';
    243 		}
    244 		else if (*(from + 1) == '\10')
    245 			from += 2;
    246 		else
    247 			*to++ = *from++;
    248 
    249 	*to = '\0';
    250 }
    251 
    252 char *
    253 replacestring(char *string, char *old, char *new)
    254 
    255 {
    256 	char	*ptr, *result;
    257 	int	 slength, olength, nlength, pos;
    258 
    259 	if (new == NULL)
    260 		return strdup(string);
    261 
    262 	ptr = strstr(string, old);
    263 	if (ptr == NULL)
    264 		return strdup(string);
    265 
    266 	slength = strlen(string);
    267 	olength = strlen(old);
    268 	nlength = strlen(new);
    269 	if ((result = malloc(slength - olength + nlength + 1)) == NULL)
    270 		return NULL;
    271 
    272 	pos = ptr - string;
    273 	(void) memcpy(result, string, pos);
    274 	(void) memcpy(&result[pos], new, nlength);
    275 	(void) strcpy(&result[pos + nlength], &string[pos + olength]);
    276 
    277 	return result;
    278 }
    279 
    280 char *
    281 parsecatpage(gzFile *in)
    282 {
    283 	char 	 buffer[8192];
    284 	char	*section, *ptr, *last;
    285 	int	 size;
    286 
    287 	do {
    288 		if (gzgets(in, buffer, sizeof(buffer)) == NULL)
    289 			return NULL;
    290 	}
    291 	while (buffer[0] == '\n');
    292 
    293 	section = NULL;
    294 	if ((ptr = strchr(buffer, '(')) != NULL) {
    295 		if ((last = strchr(ptr + 1, ')')) !=NULL) {
    296 			int 	length;
    297 
    298 			length = last - ptr + 1;
    299 			if ((section = malloc(length + 5)) == NULL)
    300 				return NULL;
    301 
    302 			*section = ' ';
    303 			(void) memcpy(section + 1, ptr, length);
    304 			(void) strcpy(section + 1 + length, " - ");
    305 		}
    306 	}
    307 
    308 	for (;;) {
    309 		if (gzgets(in, buffer, sizeof(buffer)) == NULL) {
    310 			free(section);
    311 			return NULL;
    312 		}
    313 		if (strncmp(buffer, "N\10NA\10AM\10ME\10E", 12) == 0)
    314 			break;
    315 	}
    316 
    317 	ptr = last = buffer;
    318 	size = sizeof(buffer) - 1;
    319 	while ((size > 0) && (gzgets(in, ptr, size) != NULL)) {
    320 		int	 length;
    321 
    322 		catpreprocess(ptr);
    323 
    324 		length = strlen(ptr);
    325 		if (length == 0) {
    326 			*last = '\0';
    327 
    328 			ptr = replacestring(buffer, " - ", section);
    329 			free(section);
    330 			return ptr;
    331 		}
    332 		if ((length > 1) && (ptr[length - 1] == '-') &&
    333 		    isalpha(ptr[length - 2]))
    334 			last = &ptr[--length];
    335 		else {
    336 			last = &ptr[length++];
    337 			*last = ' ';
    338 		}
    339 
    340 		ptr += length;
    341 		size -= length;
    342 	}
    343 
    344 	free(section);
    345 
    346 	return NULL;
    347 }
    348 
    349 int
    350 manpreprocess(char *line)
    351 {
    352 	char	*from, *to;
    353 
    354 	to = from = line;
    355 	while (isspace(*from)) from++;
    356 	if (strncmp(from, ".\\\"", 3) == 0)
    357 		return 1;
    358 
    359 	while (*from != '\0')
    360 		if (isspace(*from)) {
    361 			while (isspace(*++from));
    362 			if ((*from != '\0') && (*from != ','))
    363 				*to++ = ' ';
    364 		}
    365 		else if (*from == '\\')
    366 			switch (*++from) {
    367 			case '\0':
    368 			case '-':
    369 				break;
    370 			default:
    371 				from++;
    372 			}
    373 		else
    374 			if (*from == '"')
    375 				from++;
    376 			else
    377 				*to++ = *from++;
    378 
    379 	*to = '\0';
    380 
    381 	if (strncasecmp(line, ".Xr", 3) == 0) {
    382 		char	*sect;
    383 
    384 		from = line + 3;
    385 		if (isspace(*from))
    386 			from++;
    387 
    388 		if ((sect = strchr(from, ' ')) != NULL) {
    389 			int	 length;
    390 
    391 			*sect++ = '\0';
    392 			length = strlen(from);
    393 			(void) memmove(line, from, length);
    394 			line[length++] = '(';
    395 			to = &line[length];
    396 			length = strlen(sect);
    397 			(void) memmove(to, sect, length);
    398 			(void) strcpy(&to[length], ")");
    399 		}
    400 	}
    401 
    402 	return 0;
    403 }
    404 
    405 char *
    406 parsemanpage(gzFile *in, int defaultsection)
    407 {
    408 	char	*section, buffer[8192], *ptr;
    409 
    410 	section = NULL;
    411 	do {
    412 		if (gzgets(in, buffer, sizeof(buffer) - 1) == NULL) {
    413 			free(section);
    414 			return NULL;
    415 		}
    416 		if (manpreprocess(buffer))
    417 			continue;
    418 		if (strncasecmp(buffer, ".Dt", 3) == 0) {
    419 			char	*end;
    420 
    421 			ptr = &buffer[3];
    422 			if (isspace(*ptr))
    423 				ptr++;
    424 			if ((ptr = strchr(ptr, ' ')) == NULL)
    425 				continue;
    426 
    427 			if ((end = strchr(++ptr, ' ')) != NULL)
    428 				*end = '\0';
    429 
    430 			free(section);
    431 			if ((section = malloc(strlen(ptr) + 7)) != NULL) {
    432 				section[0] = ' ';
    433 				section[1] = '(';
    434 				(void) strcpy(&section[2], ptr);
    435 				(void) strcat(&section[2], ") - ");
    436 			}
    437 		}
    438 	} while ((strncasecmp(buffer, ".Sh NAME", 8) != 0));
    439 
    440 	do {
    441 		if (gzgets(in, buffer, sizeof(buffer) - 1) == NULL) {
    442 			free(section);
    443 			return NULL;
    444 		}
    445 	} while (manpreprocess(buffer));
    446 
    447 	if (strncasecmp(buffer, ".Nm", 3) == 0) {
    448 		int	length, offset;
    449 
    450 		ptr = &buffer[3];
    451 		if (isspace(*ptr))
    452 			ptr++;
    453 
    454 		length = strlen(ptr);
    455 		if ((length > 1) && (ptr[length - 1] == ',') &&
    456 		    isspace(ptr[length - 2])) {
    457 			ptr[--length] = '\0';
    458 			ptr[length - 1] = ',';
    459 		}
    460 		(void) memmove(buffer, ptr, length + 1);
    461 
    462 		offset = length + 3;
    463 		ptr = &buffer[offset];
    464 		for (;;) {
    465 			int	 more;
    466 
    467 			if ((sizeof(buffer) == offset) ||
    468 		            (gzgets(in, ptr, sizeof(buffer) - offset)
    469 			       == NULL)) {
    470 				free(section);
    471 				return NULL;
    472 			}
    473 			if (manpreprocess(ptr))
    474 				continue;
    475 
    476 			if (strncasecmp(ptr, ".Nm", 3) != 0) break;
    477 
    478 			ptr += 3;
    479 			if (isspace(*ptr))
    480 				ptr++;
    481 
    482 			buffer[length++] = ' ';
    483 			more = strlen(ptr);
    484 			if ((more > 1) && (ptr[more - 1] == ',') &&
    485 			    isspace(ptr[more - 2])) {
    486 				ptr[--more] = '\0';
    487 				ptr[more - 1] = ',';
    488 			}
    489 
    490 			(void) memmove(&buffer[length], ptr, more + 1);
    491 			length += more;
    492 			offset = length + 3;
    493 
    494 			ptr = &buffer[offset];
    495 		}
    496 
    497 		if (strncasecmp(ptr, ".Nd", 3) == 0) {
    498 			(void) strcpy(&buffer[length], " -");
    499 
    500 			while (strncasecmp(ptr, ".Sh", 3) != 0) {
    501 				int	 more;
    502 
    503 				if (*ptr == '.') {
    504 					char	*space;
    505 
    506 					if ((space = strchr(ptr, ' ')) == NULL)
    507 						ptr = "";
    508 					else {
    509 						space++;
    510 						(void) memmove(ptr, space,
    511 							   strlen(space) + 1);
    512 					}
    513 				}
    514 
    515 				if (*ptr != '\0') {
    516 					buffer[offset - 1] = ' ';
    517 					more = strlen(ptr) + 1;
    518 					offset += more;
    519 				}
    520 				ptr = &buffer[offset];
    521 				if ((sizeof(buffer) == offset) ||
    522 			            (gzgets(in, ptr, sizeof(buffer) - offset)
    523 					== NULL)) {
    524 					free(section);
    525 					return NULL;
    526 				}
    527 				if (manpreprocess(ptr))
    528 					*ptr = '\0';
    529 			}
    530 		}
    531 	}
    532 	else {
    533 		int	 offset;
    534 
    535 		if (*buffer == '.') {
    536 			char	*space;
    537 
    538 			if ((space = strchr(buffer, ' ')) == NULL) {
    539 				free(section);
    540 				return NULL;
    541 			}
    542 			space++;
    543 			(void) memmove(buffer, space, strlen(space));
    544 		}
    545 
    546 		offset = strlen(buffer) + 1;
    547 		for (;;) {
    548 			int	 more;
    549 
    550 			ptr = &buffer[offset];
    551 			if ((sizeof(buffer) == offset) ||
    552 		            (gzgets(in, ptr, sizeof(buffer) - offset)
    553 				== NULL)) {
    554 				free(section);
    555 				return NULL;
    556 			}
    557 			if (manpreprocess(ptr) || (*ptr == '\0'))
    558 				continue;
    559 
    560 			if (strncasecmp(ptr, ".Sh", 3) == 0)
    561 				break;
    562 
    563 			if (*ptr == '.') {
    564 				char	*space;
    565 
    566 				if ((space = strchr(ptr, ' ')) == NULL)
    567 					continue;
    568 				space++;
    569 				(void) memmove(ptr, space, strlen(space));
    570 			}
    571 
    572 			buffer[offset - 1] = ' ';
    573 			more = strlen(ptr);
    574 			if ((more > 1) && (ptr[more - 1] == ',') &&
    575 			    isspace(ptr[more - 2])) {
    576 				ptr[more - 1] = '\0';
    577 				ptr[more - 2] = ',';
    578 			}
    579 			else more++;
    580 			offset += more;
    581 		}
    582 	}
    583 
    584 	if (section == NULL) {
    585 		char sectionbuffer[24];
    586 
    587 		(void) sprintf(sectionbuffer, " (%c) - ",
    588 			sectionext[defaultsection]);
    589 		ptr = replacestring(buffer, " - ", sectionbuffer);
    590 	}
    591 	else {
    592 		ptr = replacestring(buffer, " - ", section);
    593 		free(section);
    594 	}
    595 	return ptr;
    596 }
    597 
    598 char *
    599 getwhatisdata(char *name)
    600 {
    601 	gzFile	*in;
    602 	char	*data;
    603 	int	 section;
    604 
    605 	if ((in = gzopen(name, "r")) == NULL) {
    606 		errx(EXIT_FAILURE, "%s: %s",
    607 		    name,
    608 		    strerror((errno == 0) ? ENOMEM : errno));
    609 		/* NOTREACHED */
    610 	}
    611 
    612 	section = manpagesection(name);
    613 	data = (section == 0) ? parsecatpage(in) : parsemanpage(in, section);
    614 
    615 	(void) gzclose(in);
    616 	return data;
    617 }
    618 
    619 void
    620 processmanpages(manpage **source, whatis **dest)
    621 {
    622 	manpage	*mp;
    623 
    624 	mp = *source;
    625 	*source = NULL;
    626 
    627 	while (mp != NULL) {
    628 		manpage *obsolete;
    629 		char *data;
    630 
    631 		if (mp->mp_left != NULL)
    632 			processmanpages(&mp->mp_left,dest);
    633 
    634 		if ((data = getwhatisdata(mp->mp_name)) != NULL) {
    635 			if (!addwhatis(dest,data))
    636 				err(EXIT_FAILURE, NULL);
    637 		}
    638 
    639 		obsolete = mp;
    640 		mp = mp->mp_right;
    641 		free(obsolete);
    642 	}
    643 }
    644 
    645 int
    646 dumpwhatis (FILE *out, whatis *tree)
    647 {
    648 	while (tree != NULL) {
    649 		if (tree->wi_left)
    650 			if (!dumpwhatis(out, tree->wi_left)) return 0;
    651 
    652 		if ((fputs(tree->wi_data, out) == EOF) ||
    653 		    (fputc('\n', out) == EOF))
    654 			return 0;
    655 
    656 		tree = tree->wi_right;
    657 	}
    658 
    659 	return 1;
    660 }
    661