Home | History | Annotate | Line # | Download | only in makewhatis
makewhatis.c revision 1.47
      1 /*	$NetBSD: makewhatis.c,v 1.47 2009/04/02 21:39:33 apb Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Matthias Scheler.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #if HAVE_NBTOOL_CONFIG_H
     33 #include "nbtool_config.h"
     34 #endif
     35 
     36 #include <sys/cdefs.h>
     37 #if !defined(lint)
     38 __COPYRIGHT("@(#) Copyright (c) 1999\
     39  The NetBSD Foundation, Inc.  All rights reserved.");
     40 __RCSID("$NetBSD: makewhatis.c,v 1.47 2009/04/02 21:39:33 apb Exp $");
     41 #endif /* not lint */
     42 
     43 #include <sys/types.h>
     44 #include <sys/param.h>
     45 #include <sys/queue.h>
     46 #include <sys/stat.h>
     47 #include <sys/wait.h>
     48 
     49 #include <ctype.h>
     50 #include <err.h>
     51 #include <errno.h>
     52 #include <fcntl.h>
     53 #include <fts.h>
     54 #include <glob.h>
     55 #include <locale.h>
     56 #include <paths.h>
     57 #include <signal.h>
     58 #include <stdio.h>
     59 #include <stdlib.h>
     60 #include <string.h>
     61 #include <unistd.h>
     62 #include <zlib.h>
     63 #include <util.h>
     64 
     65 #include <man/manconf.h>
     66 #include <man/pathnames.h>
     67 
     68 #ifndef NROFF
     69 #define NROFF "nroff"
     70 #endif
     71 
     72 typedef struct manpagestruct manpage;
     73 struct manpagestruct {
     74 	manpage *mp_left, *mp_right;
     75 	ino_t	 mp_inode;
     76 	size_t	 mp_sdoff;
     77 	size_t	 mp_sdlen;
     78 	char	 mp_name[1];
     79 };
     80 
     81 typedef struct whatisstruct whatis;
     82 struct whatisstruct {
     83 	whatis	*wi_left, *wi_right;
     84 	char	*wi_data;
     85 	char	wi_prefix[1];
     86 };
     87 
     88 int		main(int, char * const *);
     89 static char	*findwhitespace(char *);
     90 static char	*strmove(char *, char *);
     91 static char	*GetS(gzFile, char *, size_t);
     92 static int	pathnamesection(const char *, const char *);
     93 static int	manpagesection(char *);
     94 static char	*createsectionstring(char *);
     95 static void	addmanpage(manpage **, ino_t, char *, size_t, size_t);
     96 static void	addwhatis(whatis **, char *, char *);
     97 static char	*makesection(int);
     98 static char	*makewhatisline(const char *, const char *, const char *);
     99 static void	catpreprocess(char *);
    100 static char	*parsecatpage(const char *, gzFile *);
    101 static int	manpreprocess(char *);
    102 static char	*nroff(const char *, gzFile *);
    103 static char	*parsemanpage(const char *, gzFile *, int);
    104 static char	*getwhatisdata(char *);
    105 static void	processmanpages(manpage **, whatis **);
    106 static void	dumpwhatis(FILE *, whatis *);
    107 static int	makewhatis(char * const *manpath);
    108 
    109 static char * const default_manpath[] = {
    110 	"/usr/share/man",
    111 	NULL
    112 };
    113 
    114 static const char	*sectionext = "0123456789ln";
    115 static const char	*whatisdb   = _PATH_WHATIS;
    116 static const char	*whatisdb_new = _PATH_WHATIS ".new";
    117 static int		dowarn      = 0;
    118 
    119 #define	ISALPHA(c)	isalpha((unsigned char)(c))
    120 #define	ISDIGIT(c)	isdigit((unsigned char)(c))
    121 #define	ISSPACE(c)	isspace((unsigned char)(c))
    122 
    123 int
    124 main(int argc, char *const *argv)
    125 {
    126 	char * const	*manpath;
    127 	int		c, dofork;
    128 	const char	*conffile;
    129 	ENTRY		*ep;
    130 	TAG		*tp;
    131 	int		rv, jobs, status;
    132 	glob_t		pg;
    133 	char		*paths[2], **p, *sl;
    134 	int		retval;
    135 
    136 	dofork = 1;
    137 	conffile = NULL;
    138 	jobs = 0;
    139 	retval = EXIT_SUCCESS;
    140 
    141 	(void)setlocale(LC_ALL, "");
    142 
    143 	while ((c = getopt(argc, argv, "C:fw")) != -1) {
    144 		switch (c) {
    145 		case 'C':
    146 			conffile = optarg;
    147 			break;
    148 		case 'f':
    149 			/* run all processing on foreground */
    150 			dofork = 0;
    151 			break;
    152 		case 'w':
    153 			dowarn++;
    154 			break;
    155 		default:
    156 			fprintf(stderr, "Usage: %s [-fw] [-C file] [manpath ...]\n",
    157 				getprogname());
    158 			exit(EXIT_FAILURE);
    159 		}
    160 	}
    161 	argc -= optind;
    162 	argv += optind;
    163 
    164 	if (argc >= 1) {
    165 		manpath = &argv[0];
    166 
    167 	    mkwhatis:
    168 		return makewhatis(manpath);
    169 	}
    170 
    171 	/*
    172 	 * Try read config file, fallback to default_manpath[]
    173 	 * if man.conf not available.
    174 	 */
    175 	config(conffile);
    176 	if ((tp = gettag("_whatdb", 0)) == NULL) {
    177 		manpath = default_manpath;
    178 		goto mkwhatis;
    179 	}
    180 
    181 	/* Build individual databases */
    182 	paths[1] = NULL;
    183 	TAILQ_FOREACH(ep, &tp->entrylist, q) {
    184 		if ((rv = glob(ep->s,
    185 		    GLOB_BRACE | GLOB_NOSORT | GLOB_ERR | GLOB_NOCHECK,
    186 		    NULL, &pg)) != 0)
    187 			err(EXIT_FAILURE, "glob('%s')", ep->s);
    188 
    189 		/* We always have something to work with here */
    190 		for (p = pg.gl_pathv; *p; p++) {
    191 			sl = strrchr(*p, '/');
    192 			if (sl == NULL) {
    193 				err(EXIT_FAILURE, "glob: _whatdb entry '%s' "
    194 				    "doesn't contain slash", ep->s);
    195 			}
    196 
    197 			/*
    198 			 * Cut the last component of path, leaving just
    199 			 * the directory. We will use the result as root
    200 			 * for manpage search.
    201 			 * glob malloc()s space for the paths, so it's
    202 			 * okay to change it in-place.
    203 			 */
    204 			*sl = '\0';
    205 			paths[0] = *p;
    206 
    207 			if (!dofork) {
    208 				/* Do not fork child */
    209 				makewhatis(paths);
    210 				continue;
    211 			}
    212 
    213 			switch (fork()) {
    214 			case 0:
    215 				exit(makewhatis(paths));
    216 				break;
    217 			case -1:
    218 				warn("fork");
    219 				makewhatis(paths);
    220 				break;
    221 			default:
    222 				jobs++;
    223 				break;
    224 			}
    225 
    226 		}
    227 
    228 		globfree(&pg);
    229 	}
    230 
    231 	/* Wait for the childern to finish */
    232 	while (jobs > 0) {
    233 		(void)wait(&status);
    234 		if (!WIFEXITED(status) || WEXITSTATUS(status) != EXIT_SUCCESS)
    235 			retval = EXIT_FAILURE;
    236 		jobs--;
    237 	}
    238 
    239 	return retval;
    240 }
    241 
    242 static int
    243 makewhatis(char * const * manpath)
    244 {
    245 	FTS	*fts;
    246 	FTSENT	*fe;
    247 	manpage *source;
    248 	whatis	*dest;
    249 	FILE	*out;
    250 	size_t	sdoff, sdlen;
    251 	int	outfd;
    252 	struct stat st_before, st_after;
    253 
    254 	if ((fts = fts_open(manpath, FTS_LOGICAL, NULL)) == NULL)
    255 		err(EXIT_FAILURE, "Cannot open `%s'", *manpath);
    256 
    257 	source = NULL;
    258 	while ((fe = fts_read(fts)) != NULL) {
    259 		switch (fe->fts_info) {
    260 		case FTS_F:
    261 			if (manpagesection(fe->fts_path) >= 0) {
    262 				/*
    263 				 * Get manpage subdirectory prefix. Most
    264 				 * commonly, this is arch-specific subdirectory.
    265 				 */
    266 				if (fe->fts_level >= 3) {
    267 					int		sl;
    268 					const char	*s, *lsl;
    269 
    270 					lsl = NULL;
    271 					s = &fe->fts_path[fe->fts_pathlen - 1];
    272 					for(sl = fe->fts_level - 1; sl > 0;
    273 					    sl--) {
    274 						s--;
    275 						while (s[0] != '/')
    276 							s--;
    277 						if (lsl == NULL)
    278 							lsl = s;
    279 					}
    280 
    281 					/*
    282 					 * Include trailing '/', so we get
    283 					 * 'arch/'.
    284 					 */
    285 					sdoff = s + 1 - fe->fts_path;
    286 					sdlen = lsl - s + 1;
    287 				} else {
    288 					sdoff = 0;
    289 					sdlen = 0;
    290 				}
    291 
    292 				addmanpage(&source, fe->fts_statp->st_ino,
    293 				    fe->fts_path, sdoff, sdlen);
    294 			}
    295 			/*FALLTHROUGH*/
    296 		case FTS_D:
    297 		case FTS_DC:
    298 		case FTS_DEFAULT:
    299 		case FTS_DP:
    300 		case FTS_SL:
    301 		case FTS_DOT:
    302 		case FTS_W:
    303 		case FTS_NSOK:
    304 		case FTS_INIT:
    305 			break;
    306 		case FTS_SLNONE:
    307 			warnx("Symbolic link with no target: `%s'",
    308 			    fe->fts_path);
    309 			break;
    310 		case FTS_DNR:
    311 			warnx("Unreadable directory: `%s'", fe->fts_path);
    312 			break;
    313 		case FTS_NS:
    314 			errno = fe->fts_errno;
    315 			warn("Cannot stat `%s'", fe->fts_path);
    316 			break;
    317 		case FTS_ERR:
    318 			errno = fe->fts_errno;
    319 			warn("Error reading `%s'", fe->fts_path);
    320 			break;
    321 		default:
    322 			errx(EXIT_FAILURE, "Unknown info %d returned from fts "
    323 			    " for path: `%s'", fe->fts_info, fe->fts_path);
    324 		}
    325 	}
    326 
    327 	(void)fts_close(fts);
    328 
    329 	dest = NULL;
    330 	processmanpages(&source, &dest);
    331 
    332 	if (chdir(manpath[0]) == -1)
    333 		err(EXIT_FAILURE, "Cannot change dir to `%s'", manpath[0]);
    334 
    335 	/*
    336 	 * makewhatis runs unattended, so it needs to be able to
    337 	 * recover if the last run crashed out. Therefore, if
    338 	 * whatisdb_new exists and is more than (arbitrarily) sixteen
    339 	 * hours old, nuke it. If it exists but is not so old, refuse
    340 	 * to run until it's cleaned up, in case another makewhatis is
    341 	 * already running. Also, open the output with O_EXCL to make
    342 	 * sure we get our own, in case two copies start exactly at
    343 	 * once. (Unlikely? Maybe, maybe not, if two copies of cron
    344 	 * end up running.)
    345 	 *
    346 	 * Similarly, before renaming the file after we finish writing
    347 	 * to it, make sure it's still the same file we opened. This
    348 	 * can't be completely race-free, but getting caught by it
    349 	 * would require an unexplained sixteen-hour-or-more lag
    350 	 * between the last mtime update when we wrote to it and when
    351 	 * we get to the stat call *and* another makewhatis starting
    352 	 * out to write at exactly the wrong moment. Not impossible,
    353 	 * but not likely enough to worry about.
    354 	 *
    355 	 * This is maybe unnecessarily elaborate, but generating
    356 	 * corrupted output isn't so good either.
    357 	 */
    358 
    359 	if (stat(whatisdb_new, &st_before) == 0) {
    360 		if (st_before.st_mtime - time(NULL) > 16*60*60) {
    361 			/* Don't complain if someone else just removed it. */
    362 			if (unlink(whatisdb_new) == -1 && errno != ENOENT) {
    363 				err(EXIT_FAILURE, "Could not remove `%s'",
    364 				    whatisdb_new);
    365 			} else {
    366 				warnx("Removed stale `%s'", whatisdb_new);
    367 			}
    368 		} else {
    369 			errx(EXIT_FAILURE, "The file `%s' already exists "
    370 			    "-- am I already running?", whatisdb_new);
    371 		}
    372 	} else if (errno != ENOENT) {
    373 		/* Something unexpected happened. */
    374 		err(EXIT_FAILURE, "Cannot stat `%s'", whatisdb_new);
    375 	}
    376 
    377 	outfd = open(whatisdb_new, O_WRONLY|O_CREAT|O_EXCL,
    378 	    S_IRUSR|S_IRGRP|S_IROTH);
    379 	if (outfd < 0)
    380 		err(EXIT_FAILURE, "Cannot open `%s'", whatisdb_new);
    381 
    382 	if (fstat(outfd, &st_before) == -1)
    383 		err(EXIT_FAILURE, "Cannot fstat `%s'", whatisdb_new);
    384 
    385 	if ((out = fdopen(outfd, "w")) == NULL)
    386 		err(EXIT_FAILURE, "Cannot fdopen `%s'", whatisdb_new);
    387 
    388 	dumpwhatis(out, dest);
    389 	if (fchmod(fileno(out), S_IRUSR|S_IRGRP|S_IROTH) == -1)
    390 		err(EXIT_FAILURE, "Cannot chmod `%s'", whatisdb_new);
    391 	if (fclose(out) != 0)
    392 		err(EXIT_FAILURE, "Cannot close `%s'", whatisdb_new);
    393 
    394 	if (stat(whatisdb_new, &st_after) == -1)
    395 		err(EXIT_FAILURE, "Cannot stat `%s' (after writing)",
    396 		    whatisdb_new);
    397 
    398 	if (st_before.st_dev != st_after.st_dev ||
    399 	    st_before.st_ino != st_after.st_ino) {
    400 		errx(EXIT_FAILURE, "The file `%s' changed under me; giving up",
    401 		    whatisdb_new);
    402 	}
    403 
    404 	if (rename(whatisdb_new, whatisdb) == -1)
    405 		err(EXIT_FAILURE, "Could not rename `%s' to `%s'",
    406 		    whatisdb_new, whatisdb);
    407 
    408 	return EXIT_SUCCESS;
    409 }
    410 
    411 static char *
    412 findwhitespace(char *str)
    413 {
    414 	while (!ISSPACE(*str))
    415 		if (*str++ == '\0') {
    416 			str = NULL;
    417 			break;
    418 		}
    419 
    420 	return str;
    421 }
    422 
    423 static char *
    424 strmove(char *dest, char *src)
    425 {
    426 	return memmove(dest, src, strlen(src) + 1);
    427 }
    428 
    429 static char *
    430 GetS(gzFile in, char *buffer, size_t length)
    431 {
    432 	char	*ptr;
    433 
    434 	if (((ptr = gzgets(in, buffer, (int)length)) != NULL) && (*ptr == '\0'))
    435 		ptr = NULL;
    436 
    437 	return ptr;
    438 }
    439 
    440 static char *
    441 makesection(int s)
    442 {
    443 	char sectionbuffer[24];
    444 	if (s == -1)
    445 		return NULL;
    446 	(void)snprintf(sectionbuffer, sizeof(sectionbuffer),
    447 		" (%c) - ", sectionext[s]);
    448 	return estrdup(sectionbuffer);
    449 }
    450 
    451 static int
    452 pathnamesection(const char *pat, const char *name)
    453 {
    454 	char *ptr, *ext;
    455 	size_t len = strlen(pat);
    456 
    457 
    458 	while ((ptr = strstr(name, pat)) != NULL) {
    459 		if ((ext = strchr(sectionext, ptr[len])) != NULL) {
    460 			return ext - sectionext;
    461 		}
    462 		name = ptr + 1;
    463 	}
    464 	return -1;
    465 }
    466 
    467 
    468 static int
    469 manpagesection(char *name)
    470 {
    471 	char	*ptr;
    472 
    473 	if ((ptr = strrchr(name, '/')) != NULL)
    474 		ptr++;
    475 	else
    476 		ptr = name;
    477 
    478 	while ((ptr = strchr(ptr, '.')) != NULL) {
    479 		int section;
    480 
    481 		ptr++;
    482 		section = 0;
    483 		while (sectionext[section] != '\0')
    484 			if (sectionext[section] == *ptr)
    485 				return section;
    486 			else
    487 				section++;
    488 	}
    489 	return -1;
    490 }
    491 
    492 static char *
    493 createsectionstring(char *section_id)
    494 {
    495 	char *section;
    496 
    497 	if (asprintf(&section, " (%s) - ", section_id) < 0)
    498 		err(EXIT_FAILURE, "malloc failed");
    499 	return section;
    500 }
    501 
    502 static void
    503 addmanpage(manpage **tree, ino_t inode, char *name, size_t sdoff, size_t sdlen)
    504 {
    505 	manpage *mp;
    506 
    507 	while ((mp = *tree) != NULL) {
    508 		if (mp->mp_inode == inode)
    509 			return;
    510 		tree = inode < mp->mp_inode ? &mp->mp_left : &mp->mp_right;
    511 	}
    512 
    513 	mp = emalloc(sizeof(manpage) + strlen(name));
    514 	mp->mp_left = NULL;
    515 	mp->mp_right = NULL;
    516 	mp->mp_inode = inode;
    517 	mp->mp_sdoff = sdoff;
    518 	mp->mp_sdlen = sdlen;
    519 	(void)strcpy(mp->mp_name, name);
    520 	*tree = mp;
    521 }
    522 
    523 static void
    524 addwhatis(whatis **tree, char *data, char *prefix)
    525 {
    526 	whatis *wi;
    527 	int result;
    528 
    529 	while (ISSPACE(*data))
    530 		data++;
    531 
    532 	if (*data == '/') {
    533 		char *ptr;
    534 
    535 		ptr = ++data;
    536 		while ((*ptr != '\0') && !ISSPACE(*ptr))
    537 			if (*ptr++ == '/')
    538 				data = ptr;
    539 	}
    540 
    541 	while ((wi = *tree) != NULL) {
    542 		result = strcmp(data, wi->wi_data);
    543 		if (result == 0) result = strcmp(prefix, wi->wi_prefix);
    544 		if (result == 0) return;
    545 		tree = result < 0 ? &wi->wi_left : &wi->wi_right;
    546 	}
    547 
    548 	wi = emalloc(sizeof(whatis) + strlen(prefix));
    549 
    550 	wi->wi_left = NULL;
    551 	wi->wi_right = NULL;
    552 	wi->wi_data = data;
    553 	if (prefix[0] != '\0')
    554 		(void) strcpy(wi->wi_prefix, prefix);
    555 	else
    556 		wi->wi_prefix[0] = '\0';
    557 	*tree = wi;
    558 }
    559 
    560 static void
    561 catpreprocess(char *from)
    562 {
    563 	char	*to;
    564 
    565 	to = from;
    566 	while (ISSPACE(*from)) from++;
    567 
    568 	while (*from != '\0')
    569 		if (ISSPACE(*from)) {
    570 			while (ISSPACE(*++from));
    571 			if (*from != '\0')
    572 				*to++ = ' ';
    573 		}
    574 		else if (*(from + 1) == '\b')
    575 			from += 2;
    576 		else
    577 			*to++ = *from++;
    578 
    579 	*to = '\0';
    580 }
    581 
    582 static char *
    583 makewhatisline(const char *file, const char *line, const char *section)
    584 {
    585 	static const char *del[] = {
    586 		" - ",
    587 		" -- ",
    588 		"- ",
    589 		" -",
    590 		NULL
    591 	};
    592 	size_t i, pos;
    593 	size_t llen, slen, dlen;
    594 	char *result, *ptr;
    595 
    596 	ptr = NULL;
    597 	if (section == NULL) {
    598 		if (dowarn)
    599 			warnx("%s: No section provided for `%s'", file, line);
    600 		return estrdup(line);
    601 	}
    602 
    603 	for (i = 0; del[i]; i++)
    604 		if ((ptr = strstr(line, del[i])) != NULL)
    605 			break;
    606 
    607 	if (del[i] == NULL) {
    608 		if (dowarn)
    609 			warnx("%s: Bad format line `%s'", file, line);
    610 		return estrdup(line);
    611 	}
    612 
    613 	slen = strlen(section);
    614 	llen = strlen(line);
    615 	dlen = strlen(del[i]);
    616 
    617 	result = emalloc(llen - dlen + slen + 1);
    618 	pos = ptr - line;
    619 
    620 	(void)memcpy(result, line, pos);
    621 	(void)memcpy(&result[pos], section, slen);
    622 	(void)strcpy(&result[pos + slen], &line[pos + dlen]);
    623 	return result;
    624 }
    625 
    626 static char *
    627 parsecatpage(const char *name, gzFile *in)
    628 {
    629 	char	 buffer[8192];
    630 	char	*section, *ptr, *last;
    631 	size_t	 size;
    632 
    633 	do {
    634 		if (GetS(in, buffer, sizeof(buffer)) == NULL)
    635 			return NULL;
    636 	}
    637 	while (buffer[0] == '\n');
    638 
    639 	section = NULL;
    640 	if ((ptr = strchr(buffer, '(')) != NULL) {
    641 		if ((last = strchr(ptr + 1, ')')) !=NULL) {
    642 			size_t	length;
    643 
    644 			length = last - ptr + 1;
    645 			section = emalloc(length + 5);
    646 			*section = ' ';
    647 			(void) memcpy(section + 1, ptr, length);
    648 			(void) strcpy(section + 1 + length, " - ");
    649 		}
    650 	}
    651 
    652 	for (;;) {
    653 		if (GetS(in, buffer, sizeof(buffer)) == NULL) {
    654 			free(section);
    655 			return NULL;
    656 		}
    657 		catpreprocess(buffer);
    658 		if (strncmp(buffer, "NAME", 4) == 0)
    659 			break;
    660 	}
    661 	if (section == NULL)
    662 		section = makesection(pathnamesection("/cat", name));
    663 
    664 	ptr = last = buffer;
    665 	size = sizeof(buffer) - 1;
    666 	while ((size > 0) && (GetS(in, ptr, size) != NULL)) {
    667 		int	 length;
    668 
    669 		catpreprocess(ptr);
    670 
    671 		length = strlen(ptr);
    672 		if (length == 0) {
    673 			*last = '\0';
    674 
    675 			ptr = makewhatisline(name, buffer, section);
    676 			free(section);
    677 			return ptr;
    678 		}
    679 		if ((length > 1) && (ptr[length - 1] == '-') &&
    680 		    ISALPHA(ptr[length - 2]))
    681 			last = &ptr[--length];
    682 		else {
    683 			last = &ptr[length++];
    684 			*last = ' ';
    685 		}
    686 
    687 		ptr += length;
    688 		size -= length;
    689 	}
    690 
    691 	free(section);
    692 
    693 	return NULL;
    694 }
    695 
    696 static int
    697 manpreprocess(char *line)
    698 {
    699 	char	*from, *to;
    700 
    701 	to = from = line;
    702 	while (ISSPACE(*from))
    703 		from++;
    704 	if (strncmp(from, ".\\\"", 3) == 0)
    705 		return 1;
    706 
    707 	while (*from != '\0')
    708 		if (ISSPACE(*from)) {
    709 			while (ISSPACE(*++from));
    710 			if ((*from != '\0') && (*from != ','))
    711 				*to++ = ' ';
    712 		} else if (*from == '\\') {
    713 			switch (*++from) {
    714 			case '\0':
    715 			case '-':
    716 				break;
    717 			case 'f':
    718 			case 's':
    719 				from++;
    720 				if ((*from=='+') || (*from=='-'))
    721 					from++;
    722 				while (ISDIGIT(*from))
    723 					from++;
    724 				break;
    725 			default:
    726 				from++;
    727 			}
    728 		} else {
    729 			if (*from == '"')
    730 				from++;
    731 			else
    732 				*to++ = *from++;
    733 		}
    734 
    735 	*to = '\0';
    736 
    737 	if (strncasecmp(line, ".Xr", 3) == 0) {
    738 		char	*sect;
    739 
    740 		from = line + 3;
    741 		if (ISSPACE(*from))
    742 			from++;
    743 
    744 		if ((sect = findwhitespace(from)) != NULL) {
    745 			size_t	length;
    746 			char	*trail;
    747 
    748 			*sect++ = '\0';
    749 			if ((trail = findwhitespace(sect)) != NULL)
    750 				*trail++ = '\0';
    751 			length = strlen(from);
    752 			(void) memmove(line, from, length);
    753 			line[length++] = '(';
    754 			to = &line[length];
    755 			length = strlen(sect);
    756 			(void) memmove(to, sect, length);
    757 			if (trail == NULL) {
    758 				(void) strcpy(&to[length], ")");
    759 			} else {
    760 				to += length;
    761 				*to++ = ')';
    762 				length = strlen(trail);
    763 				(void) memmove(to, trail, length + 1);
    764 			}
    765 		}
    766 	}
    767 
    768 	return 0;
    769 }
    770 
    771 static char *
    772 nroff(const char *inname, gzFile *in)
    773 {
    774 	char tempname[MAXPATHLEN], buffer[65536], *data;
    775 	int tempfd, bytes, pipefd[2], status;
    776 	static int devnull = -1;
    777 	pid_t child;
    778 
    779 	if (gzrewind(in) < 0)
    780 		err(EXIT_FAILURE, "Cannot rewind pipe");
    781 
    782 	if ((devnull < 0) &&
    783 	    ((devnull = open(_PATH_DEVNULL, O_WRONLY, 0)) < 0))
    784 		err(EXIT_FAILURE, "Cannot open `/dev/null'");
    785 
    786 	(void)strlcpy(tempname, _PATH_TMP "makewhatis.XXXXXX",
    787 	    sizeof(tempname));
    788 	if ((tempfd = mkstemp(tempname)) == -1)
    789 		err(EXIT_FAILURE, "Cannot create temp file");
    790 
    791 	while ((bytes = gzread(in, buffer, sizeof(buffer))) > 0)
    792 		if (write(tempfd, buffer, (size_t)bytes) != bytes) {
    793 			bytes = -1;
    794 			break;
    795 		}
    796 
    797 	if (bytes < 0) {
    798 		(void)close(tempfd);
    799 		(void)unlink(tempname);
    800 		err(EXIT_FAILURE, "Read from pipe failed");
    801 	}
    802 	if (lseek(tempfd, (off_t)0, SEEK_SET) == (off_t)-1) {
    803 		(void)close(tempfd);
    804 		(void)unlink(tempname);
    805 		err(EXIT_FAILURE, "Cannot rewind temp file");
    806 	}
    807 	if (pipe(pipefd) == -1) {
    808 		(void)close(tempfd);
    809 		(void)unlink(tempname);
    810 		err(EXIT_FAILURE, "Cannot create pipe");
    811 	}
    812 
    813 	switch (child = vfork()) {
    814 	case -1:
    815 		(void)close(pipefd[1]);
    816 		(void)close(pipefd[0]);
    817 		(void)close(tempfd);
    818 		(void)unlink(tempname);
    819 		err(EXIT_FAILURE, "Fork failed");
    820 		/* NOTREACHED */
    821 	case 0:
    822 		(void)close(pipefd[0]);
    823 		if (tempfd != STDIN_FILENO) {
    824 			(void)dup2(tempfd, STDIN_FILENO);
    825 			(void)close(tempfd);
    826 		}
    827 		if (pipefd[1] != STDOUT_FILENO) {
    828 			(void)dup2(pipefd[1], STDOUT_FILENO);
    829 			(void)close(pipefd[1]);
    830 		}
    831 		if (devnull != STDERR_FILENO) {
    832 			(void)dup2(devnull, STDERR_FILENO);
    833 			(void)close(devnull);
    834 		}
    835 		(void)execlp(NROFF, NROFF, "-S", "-man", NULL);
    836 		_exit(EXIT_FAILURE);
    837 		/*NOTREACHED*/
    838 	default:
    839 		(void)close(pipefd[1]);
    840 		(void)close(tempfd);
    841 		break;
    842 	}
    843 
    844 	if ((in = gzdopen(pipefd[0], "r")) == NULL) {
    845 		if (errno == 0)
    846 			errno = ENOMEM;
    847 		(void)close(pipefd[0]);
    848 		(void)kill(child, SIGTERM);
    849 		while (waitpid(child, NULL, 0) != child);
    850 		(void)unlink(tempname);
    851 		err(EXIT_FAILURE, "Cannot read from pipe");
    852 	}
    853 
    854 	data = parsecatpage(inname, in);
    855 	while (gzread(in, buffer, sizeof(buffer)) > 0);
    856 	(void)gzclose(in);
    857 
    858 	while (waitpid(child, &status, 0) != child);
    859 	if ((data != NULL) &&
    860 	    !(WIFEXITED(status) && (WEXITSTATUS(status) == 0))) {
    861 		free(data);
    862 		errx(EXIT_FAILURE, NROFF " on `%s' exited with %d status",
    863 		    inname, WEXITSTATUS(status));
    864 	}
    865 
    866 	(void)unlink(tempname);
    867 	return data;
    868 }
    869 
    870 static char *
    871 parsemanpage(const char *name, gzFile *in, int defaultsection)
    872 {
    873 	char	*section, buffer[8192], *ptr;
    874 
    875 	section = NULL;
    876 	do {
    877 		if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
    878 			free(section);
    879 			return NULL;
    880 		}
    881 		if (manpreprocess(buffer))
    882 			continue;
    883 		if (strncasecmp(buffer, ".Dt", 3) == 0) {
    884 			char	*end;
    885 
    886 			ptr = &buffer[3];
    887 			if (ISSPACE(*ptr))
    888 				ptr++;
    889 			if ((ptr = findwhitespace(ptr)) == NULL)
    890 				continue;
    891 
    892 			if ((end = findwhitespace(++ptr)) != NULL)
    893 				*end = '\0';
    894 
    895 			free(section);
    896 			section = createsectionstring(ptr);
    897 		}
    898 		else if (strncasecmp(buffer, ".TH", 3) == 0) {
    899 			ptr = &buffer[3];
    900 			while (ISSPACE(*ptr))
    901 				ptr++;
    902 			if ((ptr = findwhitespace(ptr)) != NULL) {
    903 				char *next;
    904 
    905 				while (ISSPACE(*ptr))
    906 					ptr++;
    907 				if ((next = findwhitespace(ptr)) != NULL)
    908 					*next = '\0';
    909 				free(section);
    910 				section = createsectionstring(ptr);
    911 			}
    912 		}
    913 		else if (strncasecmp(buffer, ".Ds", 3) == 0) {
    914 			free(section);
    915 			return NULL;
    916 		}
    917 	} while (strncasecmp(buffer, ".Sh NAME", 8) != 0);
    918 
    919 	do {
    920 		if (GetS(in, buffer, sizeof(buffer) - 1) == NULL) {
    921 			free(section);
    922 			return NULL;
    923 		}
    924 	} while (manpreprocess(buffer));
    925 
    926 	if (strncasecmp(buffer, ".Nm", 3) == 0) {
    927 		size_t	length, offset;
    928 
    929 		ptr = &buffer[3];
    930 		while (ISSPACE(*ptr))
    931 			ptr++;
    932 
    933 		length = strlen(ptr);
    934 		if ((length > 1) && (ptr[length - 1] == ',') &&
    935 		    ISSPACE(ptr[length - 2])) {
    936 			ptr[--length] = '\0';
    937 			ptr[length - 1] = ',';
    938 		}
    939 		(void) memmove(buffer, ptr, length + 1);
    940 
    941 		offset = length + 3;
    942 		ptr = &buffer[offset];
    943 		for (;;) {
    944 			size_t	 more;
    945 
    946 			if ((sizeof(buffer) == offset) ||
    947 			    (GetS(in, ptr, sizeof(buffer) - offset)
    948 			       == NULL)) {
    949 				free(section);
    950 				return NULL;
    951 			}
    952 			if (manpreprocess(ptr))
    953 				continue;
    954 
    955 			if (strncasecmp(ptr, ".Nm", 3) != 0) break;
    956 
    957 			ptr += 3;
    958 			if (ISSPACE(*ptr))
    959 				ptr++;
    960 
    961 			buffer[length++] = ' ';
    962 			more = strlen(ptr);
    963 			if ((more > 1) && (ptr[more - 1] == ',') &&
    964 			    ISSPACE(ptr[more - 2])) {
    965 				ptr[--more] = '\0';
    966 				ptr[more - 1] = ',';
    967 			}
    968 
    969 			(void) memmove(&buffer[length], ptr, more + 1);
    970 			length += more;
    971 			offset = length + 3;
    972 
    973 			ptr = &buffer[offset];
    974 		}
    975 
    976 		if (strncasecmp(ptr, ".Nd", 3) == 0) {
    977 			(void) strlcpy(&buffer[length], " -",
    978 			    sizeof(buffer) - length);
    979 
    980 			while (strncasecmp(ptr, ".Sh", 3) != 0) {
    981 				int	 more;
    982 
    983 				if (*ptr == '.') {
    984 					char	*space;
    985 
    986 					if (strncasecmp(ptr, ".Nd", 3) != 0 ||
    987 					    strchr(ptr, '[') != NULL) {
    988 						free(section);
    989 						return NULL;
    990 					}
    991 					space = findwhitespace(ptr);
    992 					if (space == NULL) {
    993 						ptr = "";
    994 					} else {
    995 						space++;
    996 						(void) strmove(ptr, space);
    997 					}
    998 				}
    999 
   1000 				if (*ptr != '\0') {
   1001 					buffer[offset - 1] = ' ';
   1002 					more = strlen(ptr) + 1;
   1003 					offset += more;
   1004 				}
   1005 				ptr = &buffer[offset];
   1006 				if ((sizeof(buffer) == offset) ||
   1007 				    (GetS(in, ptr, sizeof(buffer) - offset)
   1008 					== NULL)) {
   1009 					free(section);
   1010 					return NULL;
   1011 				}
   1012 				if (manpreprocess(ptr))
   1013 					*ptr = '\0';
   1014 			}
   1015 		}
   1016 	}
   1017 	else {
   1018 		int	 offset;
   1019 
   1020 		if (*buffer == '.') {
   1021 			char	*space;
   1022 
   1023 			if ((space = findwhitespace(&buffer[1])) == NULL) {
   1024 				free(section);
   1025 				return NULL;
   1026 			}
   1027 			space++;
   1028 			(void) strmove(buffer, space);
   1029 		}
   1030 
   1031 		offset = strlen(buffer) + 1;
   1032 		for (;;) {
   1033 			int	 more;
   1034 
   1035 			ptr = &buffer[offset];
   1036 			if ((sizeof(buffer) == offset) ||
   1037 			    (GetS(in, ptr, sizeof(buffer) - offset)
   1038 				== NULL)) {
   1039 				free(section);
   1040 				return NULL;
   1041 			}
   1042 			if (manpreprocess(ptr) || (*ptr == '\0'))
   1043 				continue;
   1044 
   1045 			if ((strncasecmp(ptr, ".Sh", 3) == 0) ||
   1046 			    (strncasecmp(ptr, ".Ss", 3) == 0))
   1047 				break;
   1048 
   1049 			if (*ptr == '.') {
   1050 				char	*space;
   1051 
   1052 				if ((space = findwhitespace(ptr)) == NULL) {
   1053 					continue;
   1054 				}
   1055 
   1056 				space++;
   1057 				(void) memmove(ptr, space, strlen(space) + 1);
   1058 			}
   1059 
   1060 			buffer[offset - 1] = ' ';
   1061 			more = strlen(ptr);
   1062 			if ((more > 1) && (ptr[more - 1] == ',') &&
   1063 			    ISSPACE(ptr[more - 2])) {
   1064 				ptr[more - 1] = '\0';
   1065 				ptr[more - 2] = ',';
   1066 			}
   1067 			else more++;
   1068 			offset += more;
   1069 		}
   1070 	}
   1071 
   1072 	if (section == NULL)
   1073 		section = makesection(defaultsection);
   1074 
   1075 	ptr = makewhatisline(name, buffer, section);
   1076 	free(section);
   1077 	return ptr;
   1078 }
   1079 
   1080 static char *
   1081 getwhatisdata(char *name)
   1082 {
   1083 	gzFile	*in;
   1084 	char	*data;
   1085 	int	 section;
   1086 
   1087 	if ((in = gzopen(name, "r")) == NULL) {
   1088 		if (errno == 0)
   1089 			errno = ENOMEM;
   1090 		err(EXIT_FAILURE, "Cannot open `%s'", name);
   1091 		/* NOTREACHED */
   1092 	}
   1093 
   1094 	section = manpagesection(name);
   1095 	if (section == 0) {
   1096 		data = parsecatpage(name, in);
   1097 	} else {
   1098 		data = parsemanpage(name, in, section);
   1099 		if (data == NULL)
   1100 			data = nroff(name, in);
   1101 	}
   1102 
   1103 	(void) gzclose(in);
   1104 	return data;
   1105 }
   1106 
   1107 static void
   1108 processmanpages(manpage **source, whatis **dest)
   1109 {
   1110 	manpage *mp;
   1111 	char sd[128];
   1112 
   1113 	mp = *source;
   1114 	*source = NULL;
   1115 
   1116 	while (mp != NULL) {
   1117 		manpage *obsolete;
   1118 		char *data;
   1119 
   1120 		if (mp->mp_left != NULL)
   1121 			processmanpages(&mp->mp_left, dest);
   1122 
   1123 		if ((data = getwhatisdata(mp->mp_name)) != NULL) {
   1124 			/* Pass eventual directory prefix to addwhatis() */
   1125 			if (mp->mp_sdlen > 0 && mp->mp_sdlen < sizeof(sd)-1)
   1126 				strlcpy(sd, &mp->mp_name[mp->mp_sdoff],
   1127 					mp->mp_sdlen);
   1128 			else
   1129 				sd[0] = '\0';
   1130 
   1131 			addwhatis(dest, data, sd);
   1132 		}
   1133 
   1134 		obsolete = mp;
   1135 		mp = mp->mp_right;
   1136 		free(obsolete);
   1137 	}
   1138 }
   1139 
   1140 static void
   1141 dumpwhatis(FILE *out, whatis *tree)
   1142 {
   1143 	while (tree != NULL) {
   1144 		if (tree->wi_left)
   1145 			dumpwhatis(out, tree->wi_left);
   1146 
   1147 		if ((tree->wi_data[0] && fputs(tree->wi_prefix, out) == EOF) ||
   1148 		    (fputs(tree->wi_data, out) == EOF) ||
   1149 		    (fputc('\n', out) == EOF))
   1150 			err(EXIT_FAILURE, "Write failed");
   1151 
   1152 		tree = tree->wi_right;
   1153 	}
   1154 }
   1155