Home | History | Annotate | Line # | Download | only in spellprog
      1 /*	$NetBSD: spellprog.c,v 1.10 2021/11/09 09:41:05 nia Exp $	*/
      2 
      3 /* derived from OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp */
      4 
      5 /*
      6  * Copyright (c) 1991, 1993
      7  *	The Regents of the University of California.  All rights reserved.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. Neither the name of the University nor the names of its contributors
     18  *    may be used to endorse or promote products derived from this software
     19  *    without specific prior written permission.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     31  * SUCH DAMAGE.
     32  *
     33  *	@(#)spell.h	8.1 (Berkeley) 6/6/93
     34  */
     35 /*
     36  * Copyright (C) Caldera International Inc.  2001-2002.
     37  * All rights reserved.
     38  *
     39  * Redistribution and use in source and binary forms, with or without
     40  * modification, are permitted provided that the following conditions
     41  * are met:
     42  * 1. Redistributions of source code and documentation must retain the above
     43  *    copyright notice, this list of conditions and the following disclaimer.
     44  * 2. Redistributions in binary form must reproduce the above copyright
     45  *    notice, this list of conditions and the following disclaimer in the
     46  *    documentation and/or other materials provided with the distribution.
     47  * 3. All advertising materials mentioning features or use of this software
     48  *    must display the following acknowledgement:
     49  *	This product includes software developed or owned by Caldera
     50  *	International, Inc.
     51  * 4. Neither the name of Caldera International, Inc. nor the names of other
     52  *    contributors may be used to endorse or promote products derived from
     53  *    this software without specific prior written permission.
     54  *
     55  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
     56  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
     57  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     58  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     59  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
     60  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     61  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     62  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     64  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
     65  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     66  * POSSIBILITY OF SUCH DAMAGE.
     67  */
     68 
     69 #include <sys/cdefs.h>
     70 
     71 #ifndef lint
     72 static const char copyright[] =
     73 "@(#) Copyright (c) 1991, 1993\n\
     74 	The Regents of the University of California.  All rights reserved.\n";
     75 #endif /* not lint */
     76 
     77 #ifndef lint
     78 #if 0
     79 static const char sccsid[] = "@(#)spell.c	8.1 (Berkeley) 6/6/93";
     80 #else
     81 #endif
     82 static const char rcsid[] = "$OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp $";
     83 #endif /* not lint */
     84 
     85 #include <sys/param.h>
     86 #include <sys/mman.h>
     87 #include <sys/stat.h>
     88 
     89 #include <ctype.h>
     90 #include <err.h>
     91 #include <errno.h>
     92 #include <fcntl.h>
     93 #include <limits.h>
     94 #include <locale.h>
     95 #include <stdio.h>
     96 #include <stdlib.h>
     97 #include <string.h>
     98 #include <unistd.h>
     99 #include <util.h>
    100 
    101 #include "extern.h"
    102 
    103 #define DLEV 2
    104 
    105 static int	 dict(char *, char *);
    106 static int	 trypref(char *, const char *, size_t);
    107 static int	 tryword(char *, char *, size_t);
    108 static int	 suffix(char *, size_t);
    109 static int	 vowel(int);
    110 static const char *lookuppref(char **, char *);
    111 static char	*skipv(char *);
    112 static void	 ise(void);
    113 static void	 print_word(FILE *);
    114 static void	 ztos(char *);
    115 static int	 monosyl(char *, char *);
    116 static void 	 usage(void) __dead;
    117 static void	 getderiv(size_t);
    118 
    119 static int	 an(char *, const char *, const char *, size_t);
    120 static int	 bility(char *, const char *, const char *, size_t);
    121 static int	 es(char *, const char *, const char *, size_t);
    122 static int	 i_to_y(char *, const char *, const char *, size_t);
    123 static int	 ily(char *, const char *, const char *, size_t);
    124 static int	 ize(char *, const char *, const char *, size_t);
    125 static int	 metry(char *, const char *, const char *, size_t);
    126 static int	 ncy(char *, const char *, const char *, size_t);
    127 static int	 nop(char *, const char *, const char *, size_t);
    128 static int	 s(char *, const char *, const char *, size_t);
    129 static int	 strip(char *, const char *, const char *, size_t);
    130 static int	 tion(char *, const char *, const char *, size_t);
    131 static int	 y_to_e(char *, const char *, const char *, size_t);
    132 static int	 CCe(char *, const char *, const char *, size_t);
    133 static int	 VCe(char *, const char *, const char *, size_t);
    134 
    135 /*
    136  * This cannot be const because we modify it when we choose british
    137  * spelling.
    138  */
    139 static struct suftab {
    140 	const char *suf;
    141 	int (*p1)(char *, const char *, const char *, size_t);
    142 	int n1;
    143 	const char *d1;
    144 	const char *a1;
    145 	int (*p2)(char *, const char *, const char *, size_t);
    146 	int n2;
    147 	const char *d2;
    148 	const char *a2;
    149 } suftab[] = {
    150 	{ .suf = "ssen",	.p1 = ily,	.n1 = 4,
    151 	  .d1 = "-y+iness", 	.a1 = "+ness" },
    152 	{ .suf = "ssel",	.p1 = ily,	.n1 = 4,
    153 	  .d1 = "-y+i+less", 	.a1 = "+less" },
    154 	{ .suf = "se",		.p1 = s,	.n1 = 1,
    155 	  .d1 = "", 		.a1 = "+s",	.p2 = es,
    156 	  .n2 = 2,		.d2 = "-y+ies",	.a2 = "+es" },
    157 	{ .suf = "s'",		.p1 = s,	.n1 = 2,
    158 	  .d1 = "", 		.a1 = "+'s" },
    159 	{ .suf = "s",		.p1 = s,	.n1 = 1,
    160 	  .d1 = "", 		.a1 = "+s" },
    161 	{ .suf = "ecn",		.p1 = ncy,	.n1 = 1,
    162 	  .d1 = "", 		.a1 = "-t+ce" },
    163 	{ .suf = "ycn",		.p1 = ncy,	.n1 = 1,
    164 	  .d1 = "", 		.a1 = "-cy+t" },
    165 	{ .suf = "ytilb",	.p1 = nop,	.n1 = 0,
    166 	  .d1 = "", 		.a1 = "" },
    167 	{ .suf = "ytilib",	.p1 = bility,	.n1 = 5,
    168 	  .d1 = "-le+ility", 	.a1 = "" },
    169 	{ .suf = "elbaif",	.p1 = i_to_y,	.n1 = 4,
    170 	  .d1 = "-y+iable", 	.a1 = "" },
    171 	{ .suf = "elba",	.p1 = CCe,	.n1 = 4,
    172 	  .d1 = "-e+able", 	.a1 = "+able" },
    173 	{ .suf = "yti",		.p1 = CCe,	.n1 = 3,
    174 	  .d1 = "-e+ity", 	.a1 = "+ity" },
    175 	{ .suf = "ylb",		.p1 = y_to_e,	.n1 = 1,
    176 	  .d1 = "-e+y", 	.a1 = "" },
    177 	{ .suf = "yl",		.p1 = ily,	.n1 = 2,
    178 	  .d1 = "-y+ily", 	.a1 = "+ly" },
    179 	{ .suf = "laci",	.p1 = strip,	.n1 = 2,
    180 	  .d1 = "", 		.a1 = "+al" },
    181 	{ .suf = "latnem",	.p1 = strip,	.n1 = 2,
    182 	  .d1 = "", 		.a1 = "+al" },
    183 	{ .suf = "lanoi",	.p1 = strip,	.n1 = 2,
    184 	  .d1 = "", 		.a1 = "+al" },
    185 	{ .suf = "tnem",	.p1 = strip,	.n1 = 4,
    186 	  .d1 = "", 		.a1 = "+ment" },
    187 	{ .suf = "gni",		.p1 = CCe,	.n1 = 3,
    188 	  .d1 = "-e+ing", 	.a1 = "+ing" },
    189 	{ .suf = "reta",	.p1 = nop,	.n1 = 0,
    190 	  .d1 = "", 		.a1 = "" },
    191 	{ .suf = "re",		.p1 = strip,	.n1 = 1,
    192 	  .d1 = "", 		.a1 = "+r",	.p2 = i_to_y,
    193 	  .n2 = 2,		.d2 = "-y+ier",	.a2 = "+er" },
    194 	{ .suf = "de",		.p1 = strip,	.n1 = 1,
    195 	  .d1 = "", 		.a1 = "+d",	.p2 = i_to_y,
    196 	  .n2 = 2,		.d2 = "-y+ied",	.a2 = "+ed" },
    197 	{ .suf = "citsi",	.p1 = strip,	.n1 = 2,
    198 	  .d1 = "", 		.a1 = "+ic" },
    199 	{ .suf = "cihparg",	.p1 = i_to_y,	.n1 = 1,
    200 	  .d1 = "-y+ic", 	.a1 = "" },
    201 	{ .suf = "tse",		.p1 = strip,	.n1 = 2,
    202 	  .d1 = "", 		.a1 = "+st",	.p2 = i_to_y,
    203 	  .n2 = 3,		.d2 = "-y+iest",.a2 = "+est" },
    204 	{ .suf = "cirtem",	.p1 = i_to_y,	.n1 = 1,
    205 	  .d1 = "-y+ic", 	.a1 = "" },
    206 	{ .suf = "yrtem",	.p1 = metry,	.n1 = 0,
    207 	  .d1 = "-ry+er", 	.a1 = "" },
    208 	{ .suf = "cigol",	.p1 = i_to_y,	.n1 = 1,
    209 	  .d1 = "-y+ic", 	.a1 = "" },
    210 	{ .suf = "tsigol",	.p1 = i_to_y,	.n1 = 2,
    211 	  .d1 = "-y+ist", 	.a1 = "" },
    212 	{ .suf = "tsi",		.p1 = VCe,	.n1 = 3,
    213 	  .d1 = "-e+ist", 	.a1 = "+ist" },
    214 	{ .suf = "msi",		.p1 = VCe,	.n1 = 3,
    215 	  .d1 = "-e+ism", 	.a1 = "+ist" },
    216 	{ .suf = "noitacif",	.p1 = i_to_y,	.n1 = 6,
    217 	  .d1 = "-y+ication", 	.a1 = "" },
    218 	{ .suf = "noitazi",	.p1 = ize,	.n1 = 5,
    219 	  .d1 = "-e+ation", 	.a1 = "" },
    220 	{ .suf = "rota",	.p1 = tion,	.n1 = 2,
    221 	  .d1 = "-e+or", 	.a1 = "" },
    222 	{ .suf = "noit",	.p1 = tion,	.n1 = 3,
    223 	  .d1 = "-e+ion", 	.a1 = "+ion" },
    224 	{ .suf = "naino",	.p1 = an,	.n1 = 3,
    225 	  .d1 = "", 		.a1 = "+ian" },
    226 	{ .suf = "na",		.p1 = an,	.n1 = 1,
    227 	  .d1 = "", 		.a1 = "+n" },
    228 	{ .suf = "evit",	.p1 = tion,	.n1 = 3,
    229 	  .d1 = "-e+ive", 	.a1 = "+ive" },
    230 	{ .suf = "ezi",		.p1 = CCe,	.n1 = 3,
    231 	  .d1 = "-e+ize", 	.a1 = "+ize" },
    232 	{ .suf = "pihs",	.p1 = strip,	.n1 = 4,
    233 	  .d1 = "", 		.a1 = "+ship" },
    234 	{ .suf = "dooh",	.p1 = ily,	.n1 = 4,
    235 	  .d1 = "-y+hood", 	.a1 = "+hood" },
    236 	{ .suf = "ekil",	.p1 = strip,	.n1 = 4,
    237 	  .d1 = "", 		.a1 = "+like" },
    238 	{ .suf = NULL, }
    239 };
    240 
    241 static const char *preftab[] = {
    242 	"anti",
    243 	"bio",
    244 	"dis",
    245 	"electro",
    246 	"en",
    247 	"fore",
    248 	"hyper",
    249 	"intra",
    250 	"inter",
    251 	"iso",
    252 	"kilo",
    253 	"magneto",
    254 	"meta",
    255 	"micro",
    256 	"milli",
    257 	"mis",
    258 	"mono",
    259 	"multi",
    260 	"non",
    261 	"out",
    262 	"over",
    263 	"photo",
    264 	"poly",
    265 	"pre",
    266 	"pseudo",
    267 	"re",
    268 	"semi",
    269 	"stereo",
    270 	"sub",
    271 	"super",
    272 	"thermo",
    273 	"ultra",
    274 	"under",	/* must precede un */
    275 	"un",
    276 	NULL
    277 };
    278 
    279 static struct wlist {
    280 	int fd;
    281 	unsigned char *front;
    282 	unsigned char *back;
    283 } *wlists;
    284 
    285 static int vflag;
    286 static int xflag;
    287 static char word[LINE_MAX];
    288 static char original[LINE_MAX];
    289 static char affix[LINE_MAX];
    290 static struct {
    291 	const char **buf;
    292 	size_t maxlev;
    293 } deriv;
    294 
    295 /*
    296  * The spellprog utility accepts a newline-delimited list of words
    297  * on stdin.  For arguments it expects the path to a word list and
    298  * the path to a file in which to store found words.
    299  *
    300  * In normal usage, spell is called twice.  The first time it is
    301  * called with a stop list to flag commonly mispelled words.  The
    302  * remaining words are then passed to spell again, this time with
    303  * the dictionary file as the first (non-flag) argument.
    304  *
    305  * Unlike historic versions of spellprog, this one does not use
    306  * hashed files.  Instead it simply requires that files be sorted
    307  * lexigraphically and uses the same algorithm as the look utility.
    308  *
    309  * Note that spellprog should be called via the spell shell script
    310  * and is not meant to be invoked directly by the user.
    311  */
    312 
    313 int
    314 main(int argc, char **argv)
    315 {
    316 	char *ep, *cp, *dp;
    317 	char *outfile;
    318 	int ch, fold, i;
    319 	struct stat sb;
    320 	FILE *file, *found;
    321 
    322 	setlocale(LC_ALL, "");
    323 
    324 	outfile = NULL;
    325 	while ((ch = getopt(argc, argv, "bvxo:")) != -1) {
    326 		switch (ch) {
    327 		case 'b':
    328 			/* Use British dictionary and convert ize -> ise. */
    329 			ise();
    330 			break;
    331 		case 'o':
    332 			outfile = optarg;
    333 			break;
    334 		case 'v':
    335 			/* Also write derivations to "found" file. */
    336 			vflag++;
    337 			break;
    338 		case 'x':
    339 			/* Print plausible stems to stdout. */
    340 			xflag++;
    341 			break;
    342 		default:
    343 			usage();
    344 		}
    345 
    346 	}
    347 	argc -= optind;
    348 	argv += optind;
    349 	if (argc < 1)
    350 		usage();
    351 
    352 	/* Open and mmap the word/stop lists. */
    353 	if ((wlists = malloc(sizeof(struct wlist) * (argc + 1))) == NULL)
    354 		err(1, "malloc");
    355 
    356 	for (i = 0; argc--; i++) {
    357 		wlists[i].fd = open(argv[i], O_RDONLY, 0);
    358 		if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0)
    359 			err(1, "%s", argv[i]);
    360 		if (sb.st_size > SIZE_T_MAX)
    361 			errx(1, "%s: %s", argv[i], strerror(EFBIG));
    362 		wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ,
    363 		    MAP_PRIVATE, wlists[i].fd, (off_t)0);
    364 		if (wlists[i].front == MAP_FAILED)
    365 			err(1, "%s", argv[i]);
    366 		wlists[i].back = wlists[i].front + (size_t)sb.st_size;
    367 	}
    368 	wlists[i].fd = -1;
    369 
    370 	/* Open file where found words are to be saved. */
    371 	if (outfile == NULL)
    372 		found = NULL;
    373 	else if ((found = fopen(outfile, "w")) == NULL)
    374 		err(1, "cannot open %s", outfile);
    375 
    376 	for (;; print_word(file)) {
    377 		affix[0] = '\0';
    378 		file = found;
    379 		for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) {
    380 			if (ep - word == sizeof(word) - 1) {
    381 				*ep = '\0';
    382 				warnx("word too long (%s)", word);
    383 				while ((ch = getchar()) != '\n')
    384 					;	/* slurp until EOL */
    385 			}
    386 			if (ch == EOF) {
    387 				if (found != NULL)
    388 					fclose(found);
    389 				exit(0);
    390 			}
    391 		}
    392 		for (cp = word, dp = original; cp < ep; )
    393 			*dp++ = *cp++;
    394 		*dp = '\0';
    395 		fold = 0;
    396 		for (cp = word; cp < ep; cp++)
    397 			if (islower((unsigned char)*cp))
    398 				goto lcase;
    399 		if (trypref(ep, ".", 0))
    400 			continue;
    401 		++fold;
    402 		for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++)
    403 			*dp = tolower((unsigned char)*cp);
    404 lcase:
    405 		if (trypref(ep, ".", 0) || suffix(ep, 0))
    406 			continue;
    407 		if (isupper((unsigned char)word[0])) {
    408 			for (cp = original, dp = word; (*dp = *cp++); dp++) {
    409 				if (fold)
    410 					*dp = tolower((unsigned char)*dp);
    411 			}
    412 			word[0] = tolower((unsigned char)word[0]);
    413 			goto lcase;
    414 		}
    415 		file = stdout;
    416 	}
    417 }
    418 
    419 static void
    420 print_word(FILE *f)
    421 {
    422 
    423 	if (f != NULL) {
    424 		if (vflag && affix[0] != '\0' && affix[0] != '.')
    425 			fprintf(f, "%s\t%s\n", affix, original);
    426 		else
    427 			fprintf(f, "%s\n", original);
    428 	}
    429 }
    430 
    431 /*
    432  * For each matching suffix in suftab, call the function associated
    433  * with that suffix (p1 and p2).
    434  */
    435 static int
    436 suffix(char *ep, size_t lev)
    437 {
    438 	const struct suftab *t;
    439 	char *cp;
    440 	const char *sp;
    441 
    442 	lev += DLEV;
    443 	getderiv(lev + 1);
    444 	deriv.buf[lev] = deriv.buf[lev - 1] = 0;
    445 	for (t = suftab; (sp = t->suf) != NULL; t++) {
    446 		cp = ep;
    447 		while (*sp) {
    448 			if (*--cp != *sp++)
    449 				goto next;
    450 		}
    451 		for (sp = cp; --sp >= word && !vowel(*sp);)
    452 			;	/* nothing */
    453 		if (sp < word)
    454 			return 0;
    455 		if ((*t->p1)(ep - t->n1, t->d1, t->a1, lev + 1))
    456 			return 1;
    457 		if (t->p2 != NULL) {
    458 			deriv.buf[lev] = deriv.buf[lev + 1] = NULL;
    459 			return (*t->p2)(ep - t->n2, t->d2, t->a2, lev);
    460 		}
    461 		return 0;
    462 next:		;
    463 	}
    464 	return 0;
    465 }
    466 
    467 static int
    468 /*ARGSUSED*/
    469 nop(char *ep, const char *d, const char *a, size_t lev)
    470 {
    471 
    472 	return 0;
    473 }
    474 
    475 static int
    476 /*ARGSUSED*/
    477 strip(char *ep, const char *d, const char *a, size_t lev)
    478 {
    479 
    480 	return trypref(ep, a, lev) || suffix(ep, lev);
    481 }
    482 
    483 static int
    484 s(char *ep, const char *d, const char *a, const size_t lev)
    485 {
    486 
    487 	if (lev > DLEV + 1)
    488 		return 0;
    489 	if (*ep == 's' && ep[-1] == 's')
    490 		return 0;
    491 	return strip(ep, d, a, lev);
    492 }
    493 
    494 static int
    495 /*ARGSUSED*/
    496 an(char *ep, const char *d, const char *a, size_t lev)
    497 {
    498 
    499 	if (!isupper((unsigned char)*word))	/* must be proper name */
    500 		return 0;
    501 	return trypref(ep, a, lev);
    502 }
    503 
    504 static int
    505 /*ARGSUSED*/
    506 ize(char *ep, const char *d, const char *a, size_t lev)
    507 {
    508 
    509 	*ep++ = 'e';
    510 	return strip(ep ,"", d, lev);
    511 }
    512 
    513 static int
    514 /*ARGSUSED*/
    515 y_to_e(char *ep, const char *d, const char *a, size_t lev)
    516 {
    517 	char c = *ep;
    518 
    519 	*ep++ = 'e';
    520 	if (strip(ep, "", d, lev))
    521 		return 1;
    522 	ep[-1] = c;
    523 	return 0;
    524 }
    525 
    526 static int
    527 ily(char *ep, const char *d, const char *a, size_t lev)
    528 {
    529 
    530 	if (ep[-1] == 'i')
    531 		return i_to_y(ep, d, a, lev);
    532 	else
    533 		return strip(ep, d, a, lev);
    534 }
    535 
    536 static int
    537 ncy(char *ep, const char *d, const char *a, size_t lev)
    538 {
    539 
    540 	if (skipv(skipv(ep - 1)) < word)
    541 		return 0;
    542 	ep[-1] = 't';
    543 	return strip(ep, d, a, lev);
    544 }
    545 
    546 static int
    547 bility(char *ep, const char *d, const char *a, size_t lev)
    548 {
    549 
    550 	*ep++ = 'l';
    551 	return y_to_e(ep, d, a, lev);
    552 }
    553 
    554 static int
    555 i_to_y(char *ep, const char *d, const char *a, size_t lev)
    556 {
    557 
    558 	if (ep[-1] == 'i') {
    559 		ep[-1] = 'y';
    560 		a = d;
    561 	}
    562 	return strip(ep, "", a, lev);
    563 }
    564 
    565 static int
    566 es(char *ep, const char *d, const char *a, size_t lev)
    567 {
    568 
    569 	if (lev > DLEV)
    570 		return 0;
    571 
    572 	switch (ep[-1]) {
    573 	default:
    574 		return 0;
    575 	case 'i':
    576 		return i_to_y(ep, d, a, lev);
    577 	case 's':
    578 	case 'h':
    579 	case 'z':
    580 	case 'x':
    581 		return strip(ep, d, a, lev);
    582 	}
    583 }
    584 
    585 static int
    586 metry(char *ep, const char *d, const char *a, size_t lev)
    587 {
    588 
    589 	ep[-2] = 'e';
    590 	ep[-1] = 'r';
    591 	return strip(ep, d, a, lev);
    592 }
    593 
    594 static int
    595 tion(char *ep, const char *d, const char *a, size_t lev)
    596 {
    597 
    598 	switch (ep[-2]) {
    599 	case 'c':
    600 	case 'r':
    601 		return trypref(ep, a, lev);
    602 	case 'a':
    603 		return y_to_e(ep, d, a, lev);
    604 	}
    605 	return 0;
    606 }
    607 
    608 /*
    609  * Possible consonant-consonant-e ending.
    610  */
    611 static int
    612 CCe(char *ep, const char *d, const char *a, size_t lev)
    613 {
    614 
    615 	switch (ep[-1]) {
    616 	case 'l':
    617 		if (vowel(ep[-2]))
    618 			break;
    619 		switch (ep[-2]) {
    620 		case 'l':
    621 		case 'r':
    622 		case 'w':
    623 			break;
    624 		default:
    625 			return y_to_e(ep, d, a, lev);
    626 		}
    627 		break;
    628 	case 's':
    629 		if (ep[-2] == 's')
    630 			break;
    631 		/*FALLTHROUGH*/
    632 	case 'c':
    633 	case 'g':
    634 		if (*ep == 'a')
    635 			return 0;
    636 		/*FALLTHROUGH*/
    637 	case 'v':
    638 	case 'z':
    639 		if (vowel(ep[-2]))
    640 			break;
    641 		/*FALLTHROUGH*/
    642 	case 'u':
    643 		if (y_to_e(ep, d, a, lev))
    644 			return 1;
    645 		if (!(ep[-2] == 'n' && ep[-1] == 'g'))
    646 			return 0;
    647 	}
    648 	return VCe(ep, d, a, lev);
    649 }
    650 
    651 /*
    652  * Possible consonant-vowel-consonant-e ending.
    653  */
    654 static int
    655 VCe(char *ep, const char *d, const char *a, size_t lev)
    656 {
    657 	char c;
    658 
    659 	c = ep[-1];
    660 	if (c == 'e')
    661 		return 0;
    662 	if (!vowel(c) && vowel(ep[-2])) {
    663 		c = *ep;
    664 		*ep++ = 'e';
    665 		if (trypref(ep, d, lev) || suffix(ep, lev))
    666 			return 1;
    667 		ep--;
    668 		*ep = c;
    669 	}
    670 	return strip(ep, d, a, lev);
    671 }
    672 
    673 static const char *
    674 lookuppref(char **wp, char *ep)
    675 {
    676 	const char **sp, *cp;
    677 	char *bp;
    678 
    679 	for (sp = preftab; *sp; sp++) {
    680 		bp = *wp;
    681 		for (cp = *sp; *cp; cp++, bp++) {
    682 			if (tolower((unsigned char)*bp) != *cp)
    683 				goto next;
    684 		}
    685 		for (cp = bp; cp < ep; cp++) {
    686 			if (vowel(*cp)) {
    687 				*wp = bp;
    688 				return *sp;
    689 			}
    690 		}
    691 next:		;
    692 	}
    693 	return 0;
    694 }
    695 
    696 /*
    697  * If the word is not in the dictionary, try stripping off prefixes
    698  * until the word is found or we run out of prefixes to check.
    699  */
    700 static int
    701 trypref(char *ep, const char *a, size_t lev)
    702 {
    703 	const char *cp;
    704 	char *bp;
    705 	char *pp;
    706 	int val = 0;
    707 	char space[20];
    708 
    709 	getderiv(lev + 2);
    710 	deriv.buf[lev] = a;
    711 	if (tryword(word, ep, lev))
    712 		return 1;
    713 	bp = word;
    714 	pp = space;
    715 	deriv.buf[lev + 1] = pp;
    716 	while ((cp = lookuppref(&bp, ep)) != NULL) {
    717 		*pp++ = '+';
    718 		while ((*pp = *cp++))
    719 			pp++;
    720 		if (tryword(bp, ep, lev + 1)) {
    721 			val = 1;
    722 			break;
    723 		}
    724 		if (pp - space >= sizeof(space))
    725 			return 0;
    726 	}
    727 	deriv.buf[lev + 1] = deriv.buf[lev + 2] = NULL;
    728 	return val;
    729 }
    730 
    731 static int
    732 tryword(char *bp, char *ep, size_t lev)
    733 {
    734 	size_t i, j;
    735 	char duple[3];
    736 
    737 	if (ep-bp <= 1)
    738 		return 0;
    739 	if (vowel(*ep) && monosyl(bp, ep))
    740 		return 0;
    741 
    742 	i = dict(bp, ep);
    743 	if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] &&
    744 	    monosyl(bp, ep - 1)) {
    745 		ep--;
    746 		getderiv(++lev);
    747 		deriv.buf[lev] = duple;
    748 		duple[0] = '+';
    749 		duple[1] = *ep;
    750 		duple[2] = '\0';
    751 		i = dict(bp, ep);
    752 	}
    753 	if (vflag == 0 || i == 0)
    754 		return i;
    755 
    756 	/* Also tack on possible derivations. (XXX - warn on truncation?) */
    757 	for (j = lev; j > 0; j--) {
    758 		if (deriv.buf[j])
    759 			(void)strlcat(affix, deriv.buf[j], sizeof(affix));
    760 	}
    761 	return i;
    762 }
    763 
    764 static int
    765 monosyl(char *bp, char *ep)
    766 {
    767 
    768 	if (ep < bp + 2)
    769 		return 0;
    770 	if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w')
    771 		return 0;
    772 	while (--ep >= bp)
    773 		if (vowel(*ep))
    774 			return 0;
    775 	return 1;
    776 }
    777 
    778 static char *
    779 skipv(char *st)
    780 {
    781 
    782 	if (st >= word && vowel(*st))
    783 		st--;
    784 	while (st >= word && !vowel(*st))
    785 		st--;
    786 	return st;
    787 }
    788 
    789 static int
    790 vowel(int c)
    791 {
    792 
    793 	switch (tolower(c)) {
    794 	case 'a':
    795 	case 'e':
    796 	case 'i':
    797 	case 'o':
    798 	case 'u':
    799 	case 'y':
    800 		return 1;
    801 	}
    802 	return 0;
    803 }
    804 
    805 /*
    806  * Crummy way to Britishise.
    807  */
    808 static void
    809 ise(void)
    810 {
    811 	struct suftab *tab;
    812 	char *cp;
    813 
    814 	for (tab = suftab; tab->suf; tab++) {
    815 		/* Assume that suffix will contain 'z' if a1 or d1 do */
    816 		if (strchr(tab->suf, 'z')) {
    817 			tab->suf = cp = estrdup(tab->suf);
    818 			ztos(cp);
    819 			if (strchr(tab->d1, 'z')) {
    820 				tab->d1 = cp = estrdup(tab->d1);
    821 				ztos(cp);
    822 			}
    823 			if (strchr(tab->a1, 'z')) {
    824 				tab->a1 = cp = estrdup(tab->a1);
    825 				ztos(cp);
    826 			}
    827 		}
    828 	}
    829 }
    830 
    831 static void
    832 ztos(char *st)
    833 {
    834 
    835 	for (; *st; st++)
    836 		if (*st == 'z')
    837 			*st = 's';
    838 }
    839 
    840 /*
    841  * Look up a word in the dictionary.
    842  * Returns 1 if found, 0 if not.
    843  */
    844 static int
    845 dict(char *bp, char *ep)
    846 {
    847 	char c;
    848 	int i, rval;
    849 
    850 	c = *ep;
    851 	*ep = '\0';
    852 	if (xflag)
    853 		printf("=%s\n", bp);
    854 	for (i = rval = 0; wlists[i].fd != -1; i++) {
    855 		if ((rval = look((unsigned char *)bp, wlists[i].front,
    856 		    wlists[i].back)) == 1)
    857 			break;
    858 	}
    859 	*ep = c;
    860 	return rval;
    861 }
    862 
    863 static void
    864 getderiv(size_t lev)
    865 {
    866 	if (deriv.maxlev < lev) {
    867 		if (reallocarr(&deriv.buf, lev, sizeof(*deriv.buf)) != 0)
    868 			err(1, "Cannot grow array");
    869 		deriv.maxlev = lev;
    870 	}
    871 }
    872 
    873 
    874 static void
    875 usage(void)
    876 {
    877 	(void)fprintf(stderr,
    878 	    "Usage: %s [-bvx] [-o found-words] word-list ...\n",
    879 	    getprogname());
    880 	exit(1);
    881 }
    882