Home | History | Annotate | Line # | Download | only in makewhatis
      1 /*	$NetBSD: makewhatis.c,v 1.51 2017/10/02 22:14:32 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Matthias Scheler.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #if HAVE_NBTOOL_CONFIG_H
     33 #include "nbtool_config.h"
     34 #endif
     35 
     36 #include <sys/cdefs.h>
     37 #if !defined(lint)
     38 __COPYRIGHT("@(#) Copyright (c) 1999\
     39  The NetBSD Foundation, Inc.  All rights reserved.");
     40 __RCSID("$NetBSD: makewhatis.c,v 1.51 2017/10/02 22:14:32 christos Exp $");
     41 #endif /* not lint */
     42 
     43 #include <sys/types.h>
     44 #include <sys/param.h>
     45 #include <sys/queue.h>
     46 #include <sys/stat.h>
     47 #include <sys/wait.h>
     48 
     49 #include <ctype.h>
     50 #include <err.h>
     51 #include <errno.h>
     52 #include <fcntl.h>
     53 #include <fts.h>
     54 #include <glob.h>
     55 #include <locale.h>
     56 #include <paths.h>
     57 #include <signal.h>
     58 #include <stdio.h>
     59 #include <stdlib.h>
     60 #include <string.h>
     61 #include <time.h>
     62 #include <unistd.h>
     63 #include <zlib.h>
     64 #include <util.h>
     65 
     66 #include <man/manconf.h>
     67 #include <man/pathnames.h>
     68 
     69 #ifndef NROFF
     70 #define NROFF "nroff"
     71 #endif
     72 
     73 typedef struct manpagestruct manpage;
     74 struct manpagestruct {
     75 	manpage *mp_left, *mp_right;
     76 	ino_t	 mp_inode;
     77 	size_t	 mp_sdoff;
     78 	size_t	 mp_sdlen;
     79 	char	 mp_name[1];
     80 };
     81 
     82 typedef struct whatisstruct whatis;
     83 struct whatisstruct {
     84 	whatis	*wi_left, *wi_right;
     85 	char	*wi_data;
     86 	char	wi_prefix[1];
     87 };
     88 
     89 int		main(int, char * const *);
     90 static char	*findwhitespace(char *);
     91 static char	*strmove(char *, char *);
     92 static char	*GetS(gzFile, char *, size_t);
     93 static int	pathnamesection(const char *, const char *);
     94 static int	manpagesection(char *);
     95 static char	*createsectionstring(char *);
     96 static void	addmanpage(manpage **, ino_t, char *, size_t, size_t);
     97 static void	addwhatis(whatis **, char *, char *);
     98 static char	*makesection(int);
     99 static char	*makewhatisline(const char *, const char *, const char *);
    100 static void	catpreprocess(char *);
    101 static char	*parsecatpage(const char *, gzFile);
    102 static int	manpreprocess(char *);
    103 static char	*nroff(const char *, gzFile);
    104 static char	*parsemanpage(const char *, gzFile, int);
    105 static char	*getwhatisdata(char *);
    106 static void	processmanpages(manpage **, whatis **);
    107 static void	dumpwhatis(FILE *, whatis *);
    108 static int	makewhatis(char * const *manpath);
    109 
    110 static char * const default_manpath[] = {
    111 	"/usr/share/man",
    112 	NULL
    113 };
    114 
    115 static const char	*sectionext = "0123456789ln";
    116 static const char	*whatisdb   = _PATH_WHATIS;
    117 static const char	*whatisdb_new = _PATH_WHATIS ".new";
    118 static int		dowarn      = 0;
    119 
    120 #define	ISALPHA(c)	isalpha((unsigned char)(c))
    121 #define	ISDIGIT(c)	isdigit((unsigned char)(c))
    122 #define	ISSPACE(c)	isspace((unsigned char)(c))
    123 
    124 int
    125 main(int argc, char *const *argv)
    126 {
    127 	char * const	*manpath;
    128 	int		c, dofork;
    129 	const char	*conffile;
    130 	ENTRY		*ep;
    131 	TAG		*tp;
    132 	int		rv, jobs, status;
    133 	glob_t		pg;
    134 	char		*paths[2], **p, *sl;
    135 	int		retval;
    136 
    137 	dofork = 1;
    138 	conffile = NULL;
    139 	jobs = 0;
    140 	retval = EXIT_SUCCESS;
    141 
    142 	(void)setlocale(LC_ALL, "");
    143 
    144 	while ((c = getopt(argc, argv, "C:fw")) != -1) {
    145 		switch (c) {
    146 		case 'C':
    147 			conffile = optarg;
    148 			break;
    149 		case 'f':
    150 			/* run all processing on foreground */
    151 			dofork = 0;
    152 			break;
    153 		case 'w':
    154 			dowarn++;
    155 			break;
    156 		default:
    157 			fprintf(stderr, "Usage: %s [-fw] [-C file] [manpath ...]\n",
    158 				getprogname());
    159 			exit(EXIT_FAILURE);
    160 		}
    161 	}
    162 	argc -= optind;
    163 	argv += optind;
    164 
    165 	if (argc >= 1) {
    166 		manpath = &argv[0];
    167 
    168 	    mkwhatis:
    169 		return makewhatis(manpath);
    170 	}
    171 
    172 	/*
    173 	 * Try read config file, fallback to default_manpath[]
    174 	 * if man.conf not available.
    175 	 */
    176 	config(conffile);
    177 	if ((tp = gettag("_whatdb", 0)) == NULL) {
    178 		manpath = default_manpath;
    179 		goto mkwhatis;
    180 	}
    181 
    182 	/* Build individual databases */
    183 	paths[1] = NULL;
    184 	TAILQ_FOREACH(ep, &tp->entrylist, q) {
    185 		if ((rv = glob(ep->s,
    186 		    GLOB_BRACE | GLOB_NOSORT | GLOB_ERR | GLOB_NOCHECK,
    187 		    NULL, &pg)) != 0)
    188 			err(EXIT_FAILURE, "glob('%s')", ep->s);
    189 
    190 		/* We always have something to work with here */
    191 		for (p = pg.gl_pathv; *p; p++) {
    192 			sl = strrchr(*p, '/');
    193 			if (sl == NULL) {
    194 				err(EXIT_FAILURE, "glob: _whatdb entry '%s' "
    195 				    "doesn't contain slash", ep->s);
    196 			}
    197 
    198 			/*
    199 			 * Cut the last component of path, leaving just
    200 			 * the directory. We will use the result as root
    201 			 * for manpage search.
    202 			 * glob malloc()s space for the paths, so it's
    203 			 * okay to change it in-place.
    204 			 */
    205 			*sl = '\0';
    206 			paths[0] = *p;
    207 
    208 			if (!dofork) {
    209 				/* Do not fork child */
    210 				makewhatis(paths);
    211 				continue;
    212 			}
    213 
    214 			switch (fork()) {
    215 			case 0:
    216 				exit(makewhatis(paths));
    217 				break;
    218 			case -1:
    219 				warn("fork");
    220 				makewhatis(paths);
    221 				break;
    222 			default:
    223 				jobs++;
    224 				break;
    225 			}
    226 
    227 		}
    228 
    229 		globfree(&pg);
    230 	}
    231 
    232 	/* Wait for the childern to finish */
    233 	while (jobs > 0) {
    234 		(void)wait(&status);
    235 		if (!WIFEXITED(status) || WEXITSTATUS(status) != EXIT_SUCCESS)
    236 			retval = EXIT_FAILURE;
    237 		jobs--;
    238 	}
    239 
    240 	return retval;
    241 }
    242 
    243 static int
    244 makewhatis(char * const * manpath)
    245 {
    246 	FTS	*fts;
    247 	FTSENT	*fe;
    248 	manpage *source;
    249 	whatis	*dest;
    250 	FILE	*out;
    251 	size_t	sdoff, sdlen;
    252 	int	outfd;
    253 	struct stat st_before, st_after;
    254 
    255 	if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL)
    256 		err(EXIT_FAILURE, "Cannot open `%s'", *manpath);
    257 
    258 	source = NULL;
    259 	while ((fe = fts_read(fts)) != NULL) {
    260 		switch (fe->fts_info) {
    261 		case FTS_F:
    262 			if (manpagesection(fe->fts_path) >= 0) {
    263 				/*
    264 				 * Get manpage subdirectory prefix. Most
    265 				 * commonly, this is arch-specific subdirectory.
    266 				 */
    267 				if (fe->fts_level >= 3) {
    268 					int		sl;
    269 					const char	*s, *lsl;
    270 
    271 					lsl = NULL;
    272 					s = &fe->fts_path[fe->fts_pathlen - 1];
    273 					for(sl = fe->fts_level - 1; sl > 0;
    274 					    sl--) {
    275 						s--;
    276 						while (s[0] != '/')
    277 							s--;
    278 						if (lsl == NULL)
    279 							lsl = s;
    280 					}
    281 
    282 					/*
    283 					 * Include trailing '/', so we get
    284 					 * 'arch/'.
    285 					 */
    286 					sdoff = s + 1 - fe->fts_path;
    287 					sdlen = lsl - s + 1;
    288 				} else {
    289 					sdoff = 0;
    290 					sdlen = 0;
    291 				}
    292 
    293 				addmanpage(&source, fe->fts_statp->st_ino,
    294 				    fe->fts_path, sdoff, sdlen);
    295 			}
    296 			/*FALLTHROUGH*/
    297 		case FTS_D:
    298 		case FTS_DC:
    299 		case FTS_DEFAULT:
    300 		case FTS_DP:
    301 		case FTS_SL:
    302 		case FTS_DOT:
    303 		case FTS_W:
    304 		case FTS_NSOK:
    305 		case FTS_INIT:
    306 			break;
    307 		case FTS_SLNONE:
    308 			warnx("Symbolic link with no target: `%s'",
    309 			    fe->fts_path);
    310 			break;
    311 		case FTS_DNR:
    312 			warnx("Unreadable directory: `%s'", fe->fts_path);
    313 			break;
    314 		case FTS_NS:
    315 			errno = fe->fts_errno;
    316 			warn("Cannot stat `%s'", fe->fts_path);
    317 			break;
    318 		case FTS_ERR:
    319 			errno = fe->fts_errno;
    320 			warn("Error reading `%s'", fe->fts_path);
    321 			break;
    322 		default:
    323 			errx(EXIT_FAILURE, "Unknown info %d returned from fts "
    324 			    " for path: `%s'", fe->fts_info, fe->fts_path);
    325 		}
    326 	}
    327 
    328 	(void)fts_close(fts);
    329 
    330 	dest = NULL;
    331 	processmanpages(&source, &dest);
    332 
    333 	if (chdir(manpath[0]) == -1)
    334 		err(EXIT_FAILURE, "Cannot change dir to `%s'", manpath[0]);
    335 
    336 	/*
    337 	 * makewhatis runs unattended, so it needs to be able to
    338 	 * recover if the last run crashed out. Therefore, if
    339 	 * whatisdb_new exists and is more than (arbitrarily) sixteen
    340 	 * hours old, nuke it. If it exists but is not so old, refuse
    341 	 * to run until it's cleaned up, in case another makewhatis is
    342 	 * already running. Also, open the output with O_EXCL to make
    343 	 * sure we get our own, in case two copies start exactly at
    344 	 * once. (Unlikely? Maybe, maybe not, if two copies of cron
    345 	 * end up running.)
    346 	 *
    347 	 * Similarly, before renaming the file after we finish writing
    348 	 * to it, make sure it's still the same file we opened. This
    349 	 * can't be completely race-free, but getting caught by it
    350 	 * would require an unexplained sixteen-hour-or-more lag
    351 	 * between the last mtime update when we wrote to it and when
    352 	 * we get to the stat call *and* another makewhatis starting
    353 	 * out to write at exactly the wrong moment. Not impossible,
    354 	 * but not likely enough to worry about.
    355 	 *
    356 	 * This is maybe unnecessarily elaborate, but generating
    357 	 * corrupted output isn't so good either.
    358 	 */
    359 
    360 	if (stat(whatisdb_new, &st_before) == 0) {
    361 		if (st_before.st_mtime - time(NULL) > 16*60*60) {
    362 			/* Don't complain if someone else just removed it. */
    363 			if (unlink(whatisdb_new) == -1 && errno != ENOENT) {
    364 				err(EXIT_FAILURE, "Could not remove `%s'",
    365 				    whatisdb_new);
    366 			} else {
    367 				warnx("Removed stale `%s'", whatisdb_new);
    368 			}
    369 		} else {
    370 			errx(EXIT_FAILURE, "The file `%s' already exists "
    371 			    "-- am I already running?", whatisdb_new);
    372 		}
    373 	} else if (errno != ENOENT) {
    374 		/* Something unexpected happened. */
    375 		err(EXIT_FAILURE, "Cannot stat `%s'", whatisdb_new);
    376 	}
    377 
    378 	outfd = open(whatisdb_new, O_WRONLY|O_CREAT|O_EXCL,
    379 	    S_IRUSR|S_IRGRP|S_IROTH);
    380 	if (outfd < 0)
    381 		err(EXIT_FAILURE, "Cannot open `%s'", whatisdb_new);
    382 
    383 	if (fstat(outfd, &st_before) == -1)
    384 		err(EXIT_FAILURE, "Cannot fstat `%s'", whatisdb_new);
    385 
    386 	if ((out = fdopen(outfd, "w")) == NULL)
    387 		err(EXIT_FAILURE, "Cannot fdopen `%s'", whatisdb_new);
    388 
    389 	dumpwhatis(out, dest);
    390 	if (fchmod(fileno(out), S_IRUSR|S_IRGRP|S_IROTH) == -1)
    391 		err(EXIT_FAILURE, "Cannot chmod `%s'", whatisdb_new);
    392 	if (fclose(out) != 0)
    393 		err(EXIT_FAILURE, "Cannot close `%s'", whatisdb_new);
    394 
    395 	if (stat(whatisdb_new, &st_after) == -1)
    396 		err(EXIT_FAILURE, "Cannot stat `%s' (after writing)",
    397 		    whatisdb_new);
    398 
    399 	if (st_before.st_dev != st_after.st_dev ||
    400 	    st_before.st_ino != st_after.st_ino) {
    401 		errx(EXIT_FAILURE, "The file `%s' changed under me; giving up",
    402 		    whatisdb_new);
    403 	}
    404 
    405 	if (rename(whatisdb_new, whatisdb) == -1)
    406 		err(EXIT_FAILURE, "Could not rename `%s' to `%s'",
    407 		    whatisdb_new, whatisdb);
    408 
    409 	return EXIT_SUCCESS;
    410 }
    411 
    412 static char *
    413 findwhitespace(char *str)
    414 {
    415 	while (!ISSPACE(*str))
    416 		if (*str++ == '\0') {
    417 			str = NULL;
    418 			break;
    419 		}
    420 
    421 	return str;
    422 }
    423 
    424 static char *
    425 strmove(char *dest, char *src)
    426 {
    427 	return memmove(dest, src, strlen(src) + 1);
    428 }
    429 
    430 static char *
    431 GetS(gzFile in, char *buffer, size_t length)
    432 {
    433 	char	*ptr;
    434 
    435 	if (((ptr = gzgets(in, buffer, (int)length)) != NULL) && (*ptr == '\0'))
    436 		ptr = NULL;
    437 
    438 	return ptr;
    439 }
    440 
    441 static char *
    442 makesection(int s)
    443 {
    444 	char sectionbuffer[24];
    445 	if (s == -1)
    446 		return NULL;
    447 	(void)snprintf(sectionbuffer, sizeof(sectionbuffer),
    448 		" (%c) - ", sectionext[s]);
    449 	return estrdup(sectionbuffer);
    450 }
    451 
    452 static int
    453 pathnamesection(const char *pat, const char *name)
    454 {
    455 	char *ptr, *ext;
    456 	size_t len = strlen(pat);
    457 
    458 
    459 	while ((ptr = strstr(name, pat)) != NULL) {
    460 		if ((ext = strchr(sectionext, ptr[len])) != NULL) {
    461 			return ext - sectionext;
    462 		}
    463 		name = ptr + 1;
    464 	}
    465 	return -1;
    466 }
    467 
    468 
    469 static int
    470 manpagesection(char *name)
    471 {
    472 	char	*ptr;
    473 
    474 	if ((ptr = strrchr(name, '/')) != NULL)
    475 		ptr++;
    476 	else
    477 		ptr = name;
    478 
    479 	while ((ptr = strchr(ptr, '.')) != NULL) {
    480 		int section;
    481 
    482 		ptr++;
    483 		section = 0;
    484 		while (sectionext[section] != '\0')
    485 			if (sectionext[section] == *ptr)
    486 				return section;
    487 			else
    488 				section++;
    489 	}
    490 	return -1;
    491 }
    492 
    493 static char *
    494 createsectionstring(char *section_id)
    495 {
    496 	char *section;
    497 
    498 	if (asprintf(&section, " (%s) - ", section_id) < 0)
    499 		err(EXIT_FAILURE, "malloc failed");
    500 	return section;
    501 }
    502 
    503 static void
    504 addmanpage(manpage **tree, ino_t inode, char *name, size_t sdoff, size_t sdlen)
    505 {
    506 	manpage *mp;
    507 
    508 	while ((mp = *tree) != NULL) {
    509 		if (mp->mp_inode == inode)
    510 			return;
    511 		tree = inode < mp->mp_inode ? &mp->mp_left : &mp->mp_right;
    512 	}
    513 
    514 	mp = emalloc(sizeof(manpage) + strlen(name));
    515 	mp->mp_left = NULL;
    516 	mp->mp_right = NULL;
    517 	mp->mp_inode = inode;
    518 	mp->mp_sdoff = sdoff;
    519 	mp->mp_sdlen = sdlen;
    520 	(void)strcpy(mp->mp_name, name);
    521 	*tree = mp;
    522 }
    523 
    524 static void
    525 addwhatis(whatis **tree, char *data, char *prefix)
    526 {
    527 	whatis *wi;
    528 	int result;
    529 
    530 	while (ISSPACE(*data))
    531 		data++;
    532 
    533 	if (*data == '/') {
    534 		char *ptr;
    535 
    536 		ptr = ++data;
    537 		while ((*ptr != '\0') && !ISSPACE(*ptr))
    538 			if (*ptr++ == '/')
    539 				data = ptr;
    540 	}
    541 
    542 	while ((wi = *tree) != NULL) {
    543 		result = strcmp(data, wi->wi_data);
    544 		if (result == 0) result = strcmp(prefix, wi->wi_prefix);
    545 		if (result == 0) return;
    546 		tree = result < 0 ? &wi->wi_left : &wi->wi_right;
    547 	}
    548 
    549 	wi = emalloc(sizeof(whatis) + strlen(prefix));
    550 
    551 	wi->wi_left = NULL;
    552 	wi->wi_right = NULL;
    553 	wi->wi_data = data;
    554 	if (prefix[0] != '\0')
    555 		(void) strcpy(wi->wi_prefix, prefix);
    556 	else
    557 		wi->wi_prefix[0] = '\0';
    558 	*tree = wi;
    559 }
    560 
    561 static void
    562 catpreprocess(char *from)
    563 {
    564 	char	*to;
    565 
    566 	to = from;
    567 	while (ISSPACE(*from)) from++;
    568 
    569 	while (*from != '\0')
    570 		if (ISSPACE(*from)) {
    571 			while (ISSPACE(*++from));
    572 			if (*from != '\0')
    573 				*to++ = ' ';
    574 		}
    575 		else if (*(from + 1) == '\b')
    576 			from += 2;
    577 		else
    578 			*to++ = *from++;
    579 
    580 	*to = '\0';
    581 }
    582 
    583 static char *
    584 makewhatisline(const char *file, const char *line, const char *section)
    585 {
    586 	static const char *del[] = {
    587 		" - ",
    588 		" -- ",
    589 		"- ",
    590 		" -",
    591 		NULL
    592 	};
    593 	size_t i, pos;
    594 	size_t llen, slen, dlen;
    595 	char *result, *ptr;
    596 
    597 	ptr = NULL;
    598 	if (section == NULL) {
    599 		if (dowarn)
    600 			warnx("%s: No section provided for `%s'", file, line);
    601 		return estrdup(line);
    602 	}
    603 
    604 	for (i = 0; del[i]; i++)
    605 		if ((ptr = strstr(line, del[i])) != NULL)
    606 			break;
    607 
    608 	if (del[i] == NULL) {
    609 		if (dowarn)
    610 			warnx("%s: Bad format line `%s'", file, line);
    611 		return estrdup(line);
    612 	}
    613 
    614 	slen = strlen(section);
    615 	llen = strlen(line);
    616 	dlen = strlen(del[i]);
    617 
    618 	result = emalloc(llen - dlen + slen + 1);
    619 	pos = ptr - line;
    620 
    621 	(void)memcpy(result, line, pos);
    622 	(void)memcpy(&result[pos], section, slen);
    623 	(void)strcpy(&result[pos + slen], &line[pos + dlen]);
    624 	return result;
    625 }
    626 
    627 static char *
    628 parsecatpage(const char *name, gzFile in)
    629 {
    630 	char	 buffer[8192];
    631 	char	*section, *ptr, *last;
    632 	size_t	 size;
    633 
    634 	do {
    635 		if (GetS(in, buffer, sizeof(buffer)) == NULL)
    636 			return NULL;
    637 	}
    638 	while (buffer[0] == '\n');
    639 
    640 	section = NULL;
    641 	if ((ptr = strchr(buffer, '(')) != NULL) {
    642 		if ((last = strchr(ptr + 1, ')')) !=NULL) {
    643 			size_t	length;
    644 
    645 			length = last - ptr + 1;
    646 			section = emalloc(length + 5);
    647 			*section = ' ';
    648 			(void) memcpy(section + 1, ptr, length);
    649 			(void) strcpy(section + 1 + length, " - ");
    650 		}
    651 	}
    652 
    653 	for (;;) {
    654 		if (GetS(in, buffer, sizeof(buffer)) == NULL) {
    655 			free(section);
    656 			return NULL;
    657 		}
    658 		catpreprocess(buffer);
    659 		if (strncmp(buffer, "NAME", 4) == 0)
    660 			break;
    661 	}
    662 	if (section == NULL)
    663 		section = makesection(pathnamesection("/cat", name));
    664 
    665 	ptr = last = buffer;
    666 	size = sizeof(buffer) - 1;
    667 	while ((size > 0) && (GetS(in, ptr, size) != NULL)) {
    668 		int	 length;
    669 
    670 		catpreprocess(ptr);
    671 
    672 		length = strlen(ptr);
    673 		if (length == 0) {
    674 			*last = '\0';
    675 
    676 			ptr = makewhatisline(name, buffer, section);
    677 			free(section);
    678 			return ptr;
    679 		}
    680 		if ((length > 1) && (ptr[length - 1] == '-') &&
    681 		    ISALPHA(ptr[length - 2]))
    682 			last = &ptr[--length];
    683 		else {
    684 			last = &ptr[length++];
    685 			*last = ' ';
    686 		}
    687 
    688 		ptr += length;
    689 		size -= length;
    690 	}
    691 
    692 	free(section);
    693 
    694 	return NULL;
    695 }
    696 
    697 static int
    698 manpreprocess(char *line)
    699 {
    700 	char	*from, *to;
    701 
    702 	to = from = line;
    703 	while (ISSPACE(*from))
    704 		from++;
    705 	if (strncmp(from, ".\\\"", 3) == 0)
    706 		return 1;
    707 
    708 	while (*from != '\0')
    709 		if (ISSPACE(*from)) {
    710 			while (ISSPACE(*++from));
    711 			if ((*from != '\0') && (*from != ','))
    712 				*to++ = ' ';
    713 		} else if (*from == '\\') {
    714 			switch (*++from) {
    715 			case '\0':
    716 			case '-':
    717 				break;
    718 			case 'f':
    719 			case 's':
    720 				from++;
    721 				if ((*from=='+') || (*from=='-'))
    722 					from++;
    723 				while (ISDIGIT(*from))
    724 					from++;
    725 				break;
    726 			default:
    727 				from++;
    728 			}
    729 		} else {
    730 			if (*from == '"')
    731 				from++;
    732 			else
    733 				*to++ = *from++;
    734 		}
    735 
    736 	*to = '\0';
    737 
    738 	if (strncasecmp(line, ".Xr", 3) == 0) {
    739 		char	*sect;
    740 
    741 		from = line + 3;
    742 		if (ISSPACE(*from))
    743 			from++;
    744 
    745 		if ((sect = findwhitespace(from)) != NULL) {
    746 			size_t	length;
    747 			char	*trail;
    748 
    749 			*sect++ = '\0';
    750 			if ((trail = findwhitespace(sect)) != NULL)
    751 				*trail++ = '\0';
    752 			length = strlen(from);
    753 			(void) memmove(line, from, length);
    754 			line[length++] = '(';
    755 			to = &line[length];
    756 			length = strlen(sect);
    757 			(void) memmove(to, sect, length);
    758 			if (trail == NULL) {
    759 				(void) strcpy(&to[length], ")");
    760 			} else {
    761 				to += length;
    762 				*to++ = ')';
    763 				length = strlen(trail);
    764 				(void) memmove(to, trail, length + 1);
    765 			}
    766 		}
    767 	}
    768 
    769 	return 0;
    770 }
    771 
    772 static char *
    773 nroff(const char *inname, gzFile in)
    774 {
    775 	char tempname[MAXPATHLEN], buffer[65536], *data;
    776 	int tempfd, bytes, pipefd[2], status;
    777 	static int devnull = -1;
    778 	pid_t child;
    779 
    780 	if (gzrewind(in) < 0)
    781 		err(EXIT_FAILURE, "Cannot rewind pipe");
    782 
    783 	if ((devnull < 0) &&
    784 	    ((devnull = open(_PATH_DEVNULL, O_WRONLY, 0)) < 0))
    785 		err(EXIT_FAILURE, "Cannot open `/dev/null'");
    786 
    787 	(void)strlcpy(tempname, _PATH_TMP "makewhatis.XXXXXX",
    788 	    sizeof(tempname));
    789 	if ((tempfd = mkstemp(tempname)) == -1)
    790 		err(EXIT_FAILURE, "Cannot create temp file");
    791 
    792 	while ((bytes = gzread(in, buffer, sizeof(buffer))) > 0)
    793 		if (write(tempfd, buffer, (size_t)bytes) != bytes) {
    794 			bytes = -1;
    795 			break;
    796 		}
    797 
    798 	if (bytes < 0) {
    799 		(void)close(tempfd);
    800 		(void)unlink(tempname);
    801 		err(EXIT_FAILURE, "Read from pipe failed");
    802 	}
    803 	if (lseek(tempfd, (off_t)0, SEEK_SET) == (off_t)-1) {
    804 		(void)close(tempfd);
    805 		(void)unlink(tempname);
    806 		err(EXIT_FAILURE, "Cannot rewind temp file");
    807 	}
    808 	if (pipe(pipefd) == -1) {
    809 		(void)close(tempfd);
    810 		(void)unlink(tempname);
    811 		err(EXIT_FAILURE, "Cannot create pipe");
    812 	}
    813 
    814 	switch (child = vfork()) {
    815 	case -1:
    816 		(void)close(pipefd[1]);
    817 		(void)close(pipefd[0]);
    818 		(void)close(tempfd);
    819 		(void)unlink(tempname);
    820 		err(EXIT_FAILURE, "Fork failed");
    821 		/* NOTREACHED */
    822 	case 0:
    823 		(void)close(pipefd[0]);
    824 		if (tempfd != STDIN_FILENO) {
    825 			(void)dup2(tempfd, STDIN_FILENO);
    826 			(void)close(tempfd);
    827 		}
    828 		if (pipefd[1] != STDOUT_FILENO) {
    829 			(void)dup2(pipefd[1], STDOUT_FILENO);
    830 			(void)close(pipefd[1]);
    831 		}
    832 		if (devnull != STDERR_FILENO) {
    833 			(void)dup2(devnull, STDERR_FILENO);
    834 			(void)close(devnull);
    835 		}
    836 		(void)execlp(NROFF, NROFF, "-S", "-man", NULL);
    837 		_exit(EXIT_FAILURE);
    838 		/*NOTREACHED*/
    839 	default:
    840 		(void)close(pipefd[1]);
    841 		(void)close(tempfd);
    842 		break;
    843 	}
    844 
    845 	if ((in = gzdopen(pipefd[0], "r")) == NULL) {
    846 		if (errno == 0)
    847 			errno = ENOMEM;
    848 		(void)close(pipefd[0]);
    849 		(void)kill(child, SIGTERM);
    850 		while (waitpid(child, NULL, 0) != child);
    851 		(void)unlink(tempname);
    852 		err(EXIT_FAILURE, "Cannot read from pipe");
    853 	}
    854 
    855 	data = parsecatpage(inname, in);
    856 	while (gzread(in, buffer, sizeof(buffer)) > 0);
    857 	(void)gzclose(in);
    858 
    859 	while (waitpid(child, &status, 0) != child);
    860 	if ((data != NULL) &&
    861 	    !(WIFEXITED(status) && (WEXITSTATUS(status) == 0))) {
    862 		free(data);
    863 		errx(EXIT_FAILURE, NROFF " on `%s' exited with %d status",
    864 		    inname, WEXITSTATUS(status));
    865 	}
    866 
    867 	(void)unlink(tempname);
    868 	return data;
    869 }
    870 
    871 static char *
    872 parsemanpage(const char *name, gzFile in, int defaultsection)
    873 {
    874 	char	*section, buffer[8192], *ptr;
    875 	static const char POD[] = ".\\\" Automatically generated by Pod";
    876 	static const char IX[] = ".IX TITLE";
    877 
    878 	section = NULL;
    879 	do {
    880 		if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
    881 			free(section);
    882 			return NULL;
    883 		}
    884 
    885 		/*
    886 		 * Skip over lines in man pages that have been generated
    887 		 * by Pod, until we find the TITLE.
    888 		 */
    889 		if (strncasecmp(buffer, POD, sizeof(POD) - 1) == 0) {
    890 			do {
    891 				if (GetS(in, buffer, sizeof(buffer) - 1)
    892 				    == NULL) {
    893 					free(section);
    894 					return NULL;
    895 				}
    896 			} while (strncasecmp(buffer, IX, sizeof(IX) - 1) != 0);
    897 		}
    898 
    899 		if (manpreprocess(buffer))
    900 			continue;
    901 		if (strncasecmp(buffer, ".Dt", 3) == 0) {
    902 			char	*end;
    903 
    904 			ptr = &buffer[3];
    905 			if (ISSPACE(*ptr))
    906 				ptr++;
    907 			if ((ptr = findwhitespace(ptr)) == NULL)
    908 				continue;
    909 
    910 			if ((end = findwhitespace(++ptr)) != NULL)
    911 				*end = '\0';
    912 
    913 			free(section);
    914 			section = createsectionstring(ptr);
    915 		}
    916 		else if (strncasecmp(buffer, ".TH", 3) == 0) {
    917 			ptr = &buffer[3];
    918 			while (ISSPACE(*ptr))
    919 				ptr++;
    920 			if ((ptr = findwhitespace(ptr)) != NULL) {
    921 				char *next;
    922 
    923 				while (ISSPACE(*ptr))
    924 					ptr++;
    925 				if ((next = findwhitespace(ptr)) != NULL)
    926 					*next = '\0';
    927 				free(section);
    928 				section = createsectionstring(ptr);
    929 			}
    930 		}
    931 		else if (strncasecmp(buffer, ".Ds", 3) == 0) {
    932 			free(section);
    933 			return NULL;
    934 		}
    935 	} while (strncasecmp(buffer, ".Sh NAME", 8) != 0);
    936 
    937 	do {
    938 		if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
    939 			free(section);
    940 			return NULL;
    941 		}
    942 	} while (manpreprocess(buffer));
    943 
    944 	if (strncasecmp(buffer, ".Nm", 3) == 0) {
    945 		size_t	length, offset;
    946 
    947 		ptr = &buffer[3];
    948 		while (ISSPACE(*ptr))
    949 			ptr++;
    950 
    951 		length = strlen(ptr);
    952 		if ((length > 1) && (ptr[length - 1] == ',') &&
    953 		    ISSPACE(ptr[length - 2])) {
    954 			ptr[--length] = '\0';
    955 			ptr[length - 1] = ',';
    956 		}
    957 		(void) memmove(buffer, ptr, length + 1);
    958 
    959 		offset = length + 3;
    960 		ptr = &buffer[offset];
    961 		for (;;) {
    962 			size_t	 more;
    963 
    964 			if ((sizeof(buffer) == offset) ||
    965 			    (GetS(in, ptr, sizeof(buffer) - offset)
    966 			       == NULL)) {
    967 				free(section);
    968 				return NULL;
    969 			}
    970 			if (manpreprocess(ptr))
    971 				continue;
    972 
    973 			if (strncasecmp(ptr, ".Nm", 3) != 0) break;
    974 
    975 			ptr += 3;
    976 			if (ISSPACE(*ptr))
    977 				ptr++;
    978 
    979 			buffer[length++] = ' ';
    980 			more = strlen(ptr);
    981 			if ((more > 1) && (ptr[more - 1] == ',') &&
    982 			    ISSPACE(ptr[more - 2])) {
    983 				ptr[--more] = '\0';
    984 				ptr[more - 1] = ',';
    985 			}
    986 
    987 			(void) memmove(&buffer[length], ptr, more + 1);
    988 			length += more;
    989 			offset = length + 3;
    990 
    991 			ptr = &buffer[offset];
    992 		}
    993 
    994 		if (strncasecmp(ptr, ".Nd", 3) == 0) {
    995 			(void) strlcpy(&buffer[length], " -",
    996 			    sizeof(buffer) - length);
    997 
    998 			while (strncasecmp(ptr, ".Sh", 3) != 0) {
    999 				int	 more;
   1000 
   1001 				if (*ptr == '.') {
   1002 					char	*space;
   1003 
   1004 					if (strncasecmp(ptr, ".Nd", 3) != 0 ||
   1005 					    strchr(ptr, '[') != NULL) {
   1006 						free(section);
   1007 						return NULL;
   1008 					}
   1009 					space = findwhitespace(ptr);
   1010 					if (space == NULL) {
   1011 						ptr = "";
   1012 					} else {
   1013 						space++;
   1014 						(void) strmove(ptr, space);
   1015 					}
   1016 				}
   1017 
   1018 				if (*ptr != '\0') {
   1019 					buffer[offset - 1] = ' ';
   1020 					more = strlen(ptr) + 1;
   1021 					offset += more;
   1022 				}
   1023 				ptr = &buffer[offset];
   1024 				if ((sizeof(buffer) == offset) ||
   1025 				    (GetS(in, ptr, sizeof(buffer) - offset)
   1026 					== NULL)) {
   1027 					free(section);
   1028 					return NULL;
   1029 				}
   1030 				if (manpreprocess(ptr))
   1031 					*ptr = '\0';
   1032 			}
   1033 		}
   1034 	}
   1035 	else {
   1036 		int	 offset;
   1037 
   1038 		if (*buffer == '.') {
   1039 			char	*space;
   1040 
   1041 			if ((space = findwhitespace(&buffer[1])) == NULL) {
   1042 				free(section);
   1043 				return NULL;
   1044 			}
   1045 			space++;
   1046 			(void) strmove(buffer, space);
   1047 		}
   1048 
   1049 		offset = strlen(buffer) + 1;
   1050 		for (;;) {
   1051 			int	 more;
   1052 
   1053 			ptr = &buffer[offset];
   1054 			if ((sizeof(buffer) == offset) ||
   1055 			    (GetS(in, ptr, sizeof(buffer) - offset)
   1056 				== NULL)) {
   1057 				free(section);
   1058 				return NULL;
   1059 			}
   1060 			if (manpreprocess(ptr) || (*ptr == '\0'))
   1061 				continue;
   1062 
   1063 			if ((strncasecmp(ptr, ".Sh", 3) == 0) ||
   1064 			    (strncasecmp(ptr, ".Ss", 3) == 0))
   1065 				break;
   1066 
   1067 			if (*ptr == '.') {
   1068 				char	*space;
   1069 
   1070 				if ((space = findwhitespace(ptr)) == NULL) {
   1071 					continue;
   1072 				}
   1073 
   1074 				space++;
   1075 				(void) memmove(ptr, space, strlen(space) + 1);
   1076 			}
   1077 
   1078 			buffer[offset - 1] = ' ';
   1079 			more = strlen(ptr);
   1080 			if ((more > 1) && (ptr[more - 1] == ',') &&
   1081 			    ISSPACE(ptr[more - 2])) {
   1082 				ptr[more - 1] = '\0';
   1083 				ptr[more - 2] = ',';
   1084 			}
   1085 			else more++;
   1086 			offset += more;
   1087 		}
   1088 	}
   1089 
   1090 	if (section == NULL)
   1091 		section = makesection(defaultsection);
   1092 
   1093 	ptr = makewhatisline(name, buffer, section);
   1094 	free(section);
   1095 	return ptr;
   1096 }
   1097 
   1098 static char *
   1099 getwhatisdata(char *name)
   1100 {
   1101 	gzFile	in;
   1102 	char	*data;
   1103 	int	 section;
   1104 
   1105 	if ((in = gzopen(name, "r")) == NULL) {
   1106 		if (errno == 0)
   1107 			errno = ENOMEM;
   1108 		err(EXIT_FAILURE, "Cannot open `%s'", name);
   1109 		/* NOTREACHED */
   1110 	}
   1111 
   1112 	section = manpagesection(name);
   1113 	if (section == 0) {
   1114 		data = parsecatpage(name, in);
   1115 	} else {
   1116 		data = parsemanpage(name, in, section);
   1117 		if (data == NULL)
   1118 			data = nroff(name, in);
   1119 	}
   1120 
   1121 	(void) gzclose(in);
   1122 	return data;
   1123 }
   1124 
   1125 static void
   1126 processmanpages(manpage **source, whatis **dest)
   1127 {
   1128 	manpage *mp;
   1129 	char sd[128];
   1130 
   1131 	mp = *source;
   1132 	*source = NULL;
   1133 
   1134 	while (mp != NULL) {
   1135 		manpage *obsolete;
   1136 		char *data;
   1137 
   1138 		if (mp->mp_left != NULL)
   1139 			processmanpages(&mp->mp_left, dest);
   1140 
   1141 		if ((data = getwhatisdata(mp->mp_name)) != NULL) {
   1142 			/* Pass eventual directory prefix to addwhatis() */
   1143 			if (mp->mp_sdlen > 0 && mp->mp_sdlen < sizeof(sd)-1)
   1144 				strlcpy(sd, &mp->mp_name[mp->mp_sdoff],
   1145 					mp->mp_sdlen);
   1146 			else
   1147 				sd[0] = '\0';
   1148 
   1149 			addwhatis(dest, data, sd);
   1150 		}
   1151 
   1152 		obsolete = mp;
   1153 		mp = mp->mp_right;
   1154 		free(obsolete);
   1155 	}
   1156 }
   1157 
   1158 static void
   1159 dumpwhatis(FILE *out, whatis *tree)
   1160 {
   1161 	while (tree != NULL) {
   1162 		if (tree->wi_left)
   1163 			dumpwhatis(out, tree->wi_left);
   1164 
   1165 		if ((tree->wi_data[0] && fputs(tree->wi_prefix, out) == EOF) ||
   1166 		    (fputs(tree->wi_data, out) == EOF) ||
   1167 		    (fputc('\n', out) == EOF))
   1168 			err(EXIT_FAILURE, "Write failed");
   1169 
   1170 		tree = tree->wi_right;
   1171 	}
   1172 }
   1173