Home | History | Annotate | Line # | Download | only in regex
      1 /*	$NetBSD: main.c,v 1.4 2021/02/23 17:13:44 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1993 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #include <assert.h>
     30 #include <regex.h>
     31 #include <stdio.h>
     32 #include <stdlib.h>
     33 #include <string.h>
     34 #include <unistd.h>
     35 
     36 #include <sys/types.h>
     37 
     38 #include "test_regex.h"
     39 
     40 char *progname;
     41 int debug = 0;
     42 int line = 0;
     43 int status = 0;
     44 
     45 int copts = REG_EXTENDED;
     46 int eopts = 0;
     47 regoff_t startoff = 0;
     48 regoff_t endoff = 0;
     49 
     50 static char empty = '\0';
     51 
     52 static char *eprint(int);
     53 static int efind(char *);
     54 
     55 #ifndef REG_ATOI
     56 #define REG_ATOI 0
     57 #define REG_ITOA 0
     58 #define REG_PEND 0
     59 #define REG_TRACE 0
     60 #define REG_BACKR 0
     61 #define REG_NOSPEC 0
     62 #define REG_LARGE 0
     63 #endif
     64 
     65 /*
     66  * main - do the simple case, hand off to regress() for regression
     67  */
     68 int
     69 main(int argc, char *argv[])
     70 {
     71 	regex_t re;
     72 #	define	NS	10
     73 	regmatch_t subs[NS];
     74 	char erbuf[100];
     75 	int err;
     76 	size_t len;
     77 	int c;
     78 	int errflg = 0;
     79 	int i;
     80 	extern int optind;
     81 	extern char *optarg;
     82 
     83 	progname = argv[0];
     84 
     85 	while ((c = getopt(argc, argv, "c:E:e:S:x")) != -1)
     86 		switch (c) {
     87 		case 'c':	/* compile options */
     88 			copts = options('c', optarg);
     89 			break;
     90 		case 'e':	/* execute options */
     91 			eopts = options('e', optarg);
     92 			break;
     93 		case 'E':	/* end offset */
     94 			endoff = (regoff_t)atoi(optarg);
     95 			break;
     96 		case 'S':	/* start offset */
     97 			startoff = (regoff_t)atoi(optarg);
     98 			break;
     99 		case 'x':	/* Debugging. */
    100 			debug++;
    101 			break;
    102 		case '?':
    103 		default:
    104 			errflg++;
    105 			break;
    106 		}
    107 	if (errflg) {
    108 		fprintf(stderr, "usage: %s ", progname);
    109 		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
    110 		exit(2);
    111 	}
    112 
    113 	if (optind >= argc) {
    114 		regress(stdin);
    115 		exit(status);
    116 	}
    117 
    118 	err = regcomp(&re, argv[optind++], copts);
    119 	if (err) {
    120 		len = regerror(err, &re, erbuf, sizeof(erbuf));
    121 		fprintf(stderr, "error %s, %zd/%zd `%s'\n",
    122 			eprint(err), len, (size_t)sizeof(erbuf), erbuf);
    123 		exit(status);
    124 	}
    125 	regprint(&re, stdout);
    126 
    127 	if (optind >= argc) {
    128 		regfree(&re);
    129 		exit(status);
    130 	}
    131 
    132 	if (eopts&REG_STARTEND) {
    133 		subs[0].rm_so = startoff;
    134 		subs[0].rm_eo = strlen(argv[optind]) - endoff;
    135 	}
    136 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
    137 	if (err) {
    138 		len = regerror(err, &re, erbuf, sizeof(erbuf));
    139 		fprintf(stderr, "error %s, %zd/%zd `%s'\n",
    140 			eprint(err), len, (size_t)sizeof(erbuf), erbuf);
    141 		exit(status);
    142 	}
    143 	if (!(copts&REG_NOSUB)) {
    144 		len = (int)(subs[0].rm_eo - subs[0].rm_so);
    145 		if (subs[0].rm_so != -1) {
    146 			if (len != 0)
    147 				printf("match `%.*s'\n", (int)len,
    148 					argv[optind] + subs[0].rm_so);
    149 			else
    150 				printf("match `'@%.1s\n",
    151 					argv[optind] + subs[0].rm_so);
    152 		}
    153 		for (i = 1; i < NS; i++)
    154 			if (subs[i].rm_so != -1)
    155 				printf("(%d) `%.*s'\n", i,
    156 					(int)(subs[i].rm_eo - subs[i].rm_so),
    157 					argv[optind] + subs[i].rm_so);
    158 	}
    159 	exit(status);
    160 }
    161 
    162 /*
    163  * regress - main loop of regression test
    164  */
    165 void
    166 regress(FILE *in)
    167 {
    168 	char inbuf[1000];
    169 #	define	MAXF	10
    170 	char *f[MAXF];
    171 	int nf;
    172 	int i;
    173 	char erbuf[100];
    174 	size_t ne;
    175 	const char *badpat = "invalid regular expression";
    176 #	define	SHORT	10
    177 	const char *bpname = "REG_BADPAT";
    178 	regex_t re;
    179 
    180 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
    181 		line++;
    182 		if (inbuf[0] == '#' || inbuf[0] == '\n')
    183 			continue;			/* NOTE CONTINUE */
    184 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
    185 		if (debug)
    186 			fprintf(stdout, "%d:\n", line);
    187 		nf = split(inbuf, f, MAXF, "\t\t");
    188 		if (nf < 3) {
    189 			fprintf(stderr, "bad input, line %d\n", line);
    190 			exit(1);
    191 		}
    192 		for (i = 0; i < nf; i++)
    193 			if (strcmp(f[i], "\"\"") == 0)
    194 				f[i] = &empty;
    195 		if (nf <= 3)
    196 			f[3] = NULL;
    197 		if (nf <= 4)
    198 			f[4] = NULL;
    199 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
    200 		if (opt('&', f[1]))	/* try with either type of RE */
    201 			try(f[0], f[1], f[2], f[3], f[4],
    202 					options('c', f[1]) &~ REG_EXTENDED);
    203 	}
    204 
    205 	ne = regerror(REG_BADPAT, NULL, erbuf, sizeof(erbuf));
    206 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
    207 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
    208 							erbuf, badpat);
    209 		status = 1;
    210 	}
    211 	ne = regerror(REG_BADPAT, NULL, erbuf, (size_t)SHORT);
    212 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
    213 						ne != strlen(badpat)+1) {
    214 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
    215 						erbuf, SHORT-1, badpat);
    216 		status = 1;
    217 	}
    218 	ne = regerror(REG_ITOA|REG_BADPAT, NULL, erbuf, sizeof(erbuf));
    219 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
    220 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
    221 						erbuf, bpname);
    222 		status = 1;
    223 	}
    224 #if REG_ATOI
    225 	re.re_endp = bpname;
    226 #endif
    227 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
    228 	if (atoi(erbuf) != (int)REG_BADPAT) {
    229 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
    230 						erbuf, (long)REG_BADPAT);
    231 		status = 1;
    232 	} else if (ne != strlen(erbuf)+1) {
    233 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
    234 						erbuf, (long)REG_BADPAT);
    235 		status = 1;
    236 	}
    237 }
    238 
    239 /*
    240  - try - try it, and report on problems
    241  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
    242  */
    243 void
    244 try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts)
    245 {
    246 	regex_t re;
    247 #	define	NSUBS	10
    248 	regmatch_t subs[NSUBS];
    249 #	define	NSHOULD	15
    250 	char *should[NSHOULD];
    251 	int nshould;
    252 	char erbuf[100];
    253 	int err;
    254 	int len;
    255 	const char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
    256 	int i;
    257 	char *grump;
    258 	char f0copy[1000];
    259 	char f2copy[1000];
    260 
    261 	strcpy(f0copy, f0);
    262 #if REG_ATOI
    263 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
    264 #endif
    265 	fixstr(f0copy);
    266 	err = regcomp(&re, f0copy, opts);
    267 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
    268 		/* unexpected error or wrong error */
    269 		len = regerror(err, &re, erbuf, sizeof(erbuf));
    270 		fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
    271 					line, type, eprint(err), len,
    272 					(int)sizeof(erbuf), erbuf);
    273 		status = 1;
    274 	} else if (err == 0 && opt('C', f1)) {
    275 		/* unexpected success */
    276 		fprintf(stderr, "%d: %s should have given REG_%s\n",
    277 						line, type, f2);
    278 		status = 1;
    279 		err = 1;	/* so we won't try regexec */
    280 	}
    281 
    282 	if (err != 0) {
    283 		regfree(&re);
    284 		return;
    285 	}
    286 
    287 	strcpy(f2copy, f2);
    288 	fixstr(f2copy);
    289 
    290 	if (options('e', f1)&REG_STARTEND) {
    291 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
    292 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
    293 		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
    294 		subs[0].rm_eo = strchr(f2, ')') - f2;
    295 	}
    296 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
    297 
    298 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
    299 		/* unexpected error or wrong error */
    300 		len = regerror(err, &re, erbuf, sizeof(erbuf));
    301 		fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
    302 					line, type, eprint(err), len,
    303 					(int)sizeof(erbuf), erbuf);
    304 		status = 1;
    305 	} else if (err != 0) {
    306 		/* nothing more to check */
    307 	} else if (f3 == NULL) {
    308 		/* unexpected success */
    309 		fprintf(stderr, "%d: %s exec should have failed\n",
    310 						line, type);
    311 		status = 1;
    312 		err = 1;		/* just on principle */
    313 	} else if (opts&REG_NOSUB) {
    314 		/* nothing more to check */
    315 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
    316 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
    317 		status = 1;
    318 		err = 1;
    319 	}
    320 
    321 	if (err != 0 || f4 == NULL) {
    322 		regfree(&re);
    323 		return;
    324 	}
    325 
    326 	for (i = 1; i < NSHOULD; i++)
    327 		should[i] = NULL;
    328 	nshould = split(f4, &should[1], NSHOULD-1, ",");
    329 	if (nshould == 0) {
    330 		nshould = 1;
    331 		should[1] = &empty;
    332 	}
    333 	for (i = 1; i < NSUBS; i++) {
    334 		grump = check(f2, subs[i], should[i]);
    335 		if (grump != NULL) {
    336 			fprintf(stderr, "%d: %s $%d %s\n", line,
    337 							type, i, grump);
    338 			status = 1;
    339 			err = 1;
    340 		}
    341 	}
    342 
    343 	regfree(&re);
    344 }
    345 
    346 /*
    347  - options - pick options out of a regression-test string
    348  == int options(int type, char *s);
    349  */
    350 int
    351 options(int type, char *s)
    352 {
    353 	char *p;
    354 	int o = (type == 'c') ? copts : eopts;
    355 	const char *legal = (type == 'c') ? "bisnmpg" : "^$#tl";
    356 
    357 	for (p = s; *p != '\0'; p++)
    358 		if (strchr(legal, *p) != NULL)
    359 			switch (*p) {
    360 			case 'b':
    361 				o &= ~REG_EXTENDED;
    362 				break;
    363 			case 'i':
    364 				o |= REG_ICASE;
    365 				break;
    366 			case 's':
    367 				o |= REG_NOSUB;
    368 				break;
    369 			case 'n':
    370 				o |= REG_NEWLINE;
    371 				break;
    372 			case 'm':
    373 				o &= ~REG_EXTENDED;
    374 				o |= REG_NOSPEC;
    375 				break;
    376 			case 'p':
    377 				o |= REG_PEND;
    378 				break;
    379 			case 'g':
    380 				o |= REG_GNU;
    381 				break;
    382 			case '^':
    383 				o |= REG_NOTBOL;
    384 				break;
    385 			case '$':
    386 				o |= REG_NOTEOL;
    387 				break;
    388 			case '#':
    389 				o |= REG_STARTEND;
    390 				break;
    391 			case 't':	/* trace */
    392 				o |= REG_TRACE;
    393 				break;
    394 			case 'l':	/* force long representation */
    395 				o |= REG_LARGE;
    396 				break;
    397 			case 'r':	/* force backref use */
    398 				o |= REG_BACKR;
    399 				break;
    400 			}
    401 	return(o);
    402 }
    403 
    404 /*
    405  - opt - is a particular option in a regression string?
    406  == int opt(int c, char *s);
    407  */
    408 int				/* predicate */
    409 opt(int c, char *s)
    410 {
    411 	return(strchr(s, c) != NULL);
    412 }
    413 
    414 /*
    415  - fixstr - transform magic characters in strings
    416  == void fixstr(char *p);
    417  */
    418 void
    419 fixstr(char *p)
    420 {
    421 	if (p == NULL)
    422 		return;
    423 
    424 	for (; *p != '\0'; p++)
    425 		if (*p == 'N')
    426 			*p = '\n';
    427 		else if (*p == 'T')
    428 			*p = '\t';
    429 		else if (*p == 'S')
    430 			*p = ' ';
    431 		else if (*p == 'Z')
    432 			*p = '\0';
    433 }
    434 
    435 /*
    436  * check - check a substring match
    437  */
    438 char *				/* NULL or complaint */
    439 check(char *str, regmatch_t sub, char *should)
    440 {
    441 	int len;
    442 	int shlen;
    443 	char *p;
    444 	static char grump[500];
    445 	char *at = NULL;
    446 
    447 	if (should != NULL && strcmp(should, "-") == 0)
    448 		should = NULL;
    449 	if (should != NULL && should[0] == '@') {
    450 		at = should + 1;
    451 		should = &empty;
    452 	}
    453 
    454 	/* check rm_so and rm_eo for consistency */
    455 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
    456 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
    457 				(sub.rm_so != -1 && sub.rm_so < 0) ||
    458 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
    459 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
    460 							(long)sub.rm_eo);
    461 		return(grump);
    462 	}
    463 
    464 	/* check for no match */
    465 	if (sub.rm_so == -1) {
    466 		if (should == NULL)
    467 			return(NULL);
    468 		else {
    469 			sprintf(grump, "did not match");
    470 			return(grump);
    471 		}
    472 	}
    473 
    474 	/* check for in range */
    475 	if (sub.rm_eo > (ssize_t)strlen(str)) {
    476 		sprintf(grump, "start %ld end %ld, past end of string",
    477 					(long)sub.rm_so, (long)sub.rm_eo);
    478 		return(grump);
    479 	}
    480 
    481 	len = (int)(sub.rm_eo - sub.rm_so);
    482 	p = str + sub.rm_so;
    483 
    484 	/* check for not supposed to match */
    485 	if (should == NULL) {
    486 		sprintf(grump, "matched `%.*s'", len, p);
    487 		return(grump);
    488 	}
    489 
    490 	/* check for wrong match */
    491 	shlen = (int)strlen(should);
    492 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
    493 		sprintf(grump, "matched `%.*s' instead", len, p);
    494 		return(grump);
    495 	}
    496 	if (shlen > 0)
    497 		return(NULL);
    498 
    499 	/* check null match in right place */
    500 	if (at == NULL)
    501 		return(NULL);
    502 	shlen = strlen(at);
    503 	if (shlen == 0)
    504 		shlen = 1;	/* force check for end-of-string */
    505 	if (strncmp(p, at, shlen) != 0) {
    506 		sprintf(grump, "matched null at `%.20s'", p);
    507 		return(grump);
    508 	}
    509 	return(NULL);
    510 }
    511 
    512 /*
    513  * eprint - convert error number to name
    514  */
    515 static char *
    516 eprint(int err)
    517 {
    518 	static char epbuf[100];
    519 	size_t len;
    520 
    521 	len = regerror(REG_ITOA|err, NULL, epbuf, sizeof(epbuf));
    522 	assert(len <= sizeof(epbuf));
    523 	return(epbuf);
    524 }
    525 
    526 /*
    527  * efind - convert error name to number
    528  */
    529 static int
    530 efind(char *name)
    531 {
    532 	static char efbuf[100];
    533 	regex_t re;
    534 
    535 	sprintf(efbuf, "REG_%s", name);
    536 	assert(strlen(efbuf) < sizeof(efbuf));
    537 #if REG_ATOI
    538 	re.re_endp = efbuf;
    539 #endif
    540 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
    541 	return(atoi(efbuf));
    542 }
    543