Home | History | Annotate | Line # | Download | only in quiz
rxp.c revision 1.11
      1 /*	$NetBSD: rxp.c,v 1.11 2003/08/07 09:37:34 agc Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1991, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * This code is derived from software contributed to Berkeley by
      8  * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
      9  * Commodore Business Machines.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  */
     35 
     36 #include <sys/cdefs.h>
     37 #ifndef lint
     38 #if 0
     39 static char sccsid[] = "@(#)rxp.c	8.1 (Berkeley) 5/31/93";
     40 #else
     41 __RCSID("$NetBSD: rxp.c,v 1.11 2003/08/07 09:37:34 agc Exp $");
     42 #endif
     43 #endif /* not lint */
     44 
     45 /*
     46  * regular expression parser
     47  *
     48  * external functions and return values are:
     49  * rxp_compile(s)
     50  *	TRUE	success
     51  *	FALSE	parse failure; error message will be in char rxperr[]
     52  * metas are:
     53  *	{...}	optional pattern, equialent to [...|]
     54  *	|	alternate pattern
     55  *	[...]	pattern delimiters
     56  *
     57  * rxp_match(s)
     58  *	TRUE	string s matches compiled pattern
     59  *	FALSE	match failure or regexp error
     60  *
     61  * rxp_expand()
     62  *	char *	reverse-engineered regular expression string
     63  *	NULL	regexp error
     64  */
     65 
     66 #include <stdio.h>
     67 #include <stdlib.h>
     68 #include <ctype.h>
     69 #include "quiz.h"
     70 					/* regexp tokens,	arg */
     71 #define	LIT	(-1)			/* literal character,	char */
     72 #define	SOT	(-2)			/* start text anchor,	- */
     73 #define	EOT	(-3)			/* end text anchor,	- */
     74 #define	GRP_S	(-4)			/* start alternate grp,	ptr_to_end */
     75 #define	GRP_E	(-5)			/* end group,		- */
     76 #define	ALT_S	(-6)			/* alternate starts,	ptr_to_next */
     77 #define	ALT_E	(-7)			/* alternate ends,	- */
     78 #define	END	(-8)			/* end of regexp,	- */
     79 
     80 typedef short Rxp_t;			/* type for regexp tokens */
     81 
     82 static Rxp_t rxpbuf[RXP_LINE_SZ];	/* compiled regular expression buffer */
     83 char rxperr[128];			/* parser error message */
     84 
     85 static int	 rxp__compile __P((const char *, int));
     86 static char	*rxp__expand __P((int));
     87 static int	 rxp__match __P((const char *, int, Rxp_t *, Rxp_t *, const char *));
     88 
     89 int
     90 rxp_compile(s)
     91 	const char *	s;
     92 {
     93 	return (rxp__compile(s, TRUE));
     94 }
     95 
     96 static int
     97 rxp__compile(s, first)
     98 	const char *s;
     99 	int first;
    100 {
    101 	static Rxp_t *rp;
    102 	static const char *sp;
    103 	Rxp_t *grp_ptr;
    104 	Rxp_t *alt_ptr;
    105 	int esc, err;
    106 
    107 	esc = 0;
    108 	if (first) {
    109 		rp = rxpbuf;
    110 		sp = s;
    111 		*rp++ = SOT;	/* auto-anchor: pat is really ^pat$ */
    112 		*rp++ = GRP_S;	/* auto-group: ^pat$ is really ^[pat]$ */
    113 		*rp++ = 0;
    114 	}
    115 	*rp++ = ALT_S;
    116 	alt_ptr = rp;
    117 	*rp++ = 0;
    118 	for (; *sp; ++sp) {
    119 		if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
    120 			(void)snprintf(rxperr, sizeof(rxperr),
    121 			    "regular expression too long %s", s);
    122 			return (FALSE);
    123 		}
    124 		if (*sp == ':' && !esc)
    125 			break;
    126 		if (esc) {
    127 			*rp++ = LIT;
    128 			*rp++ = *sp;
    129 			esc = 0;
    130 		}
    131 		else switch (*sp) {
    132 		case '\\':
    133 			esc = 1;
    134 			break;
    135 		case '{':
    136 		case '[':
    137 			*rp++ = GRP_S;
    138 			grp_ptr = rp;
    139 			*rp++ = 0;
    140 			sp++;
    141 			if ((err = rxp__compile(s, FALSE)) != TRUE)
    142 				return (err);
    143 			*rp++ = GRP_E;
    144 			*grp_ptr = rp - rxpbuf;
    145 			break;
    146 		case '}':
    147 		case ']':
    148 		case '|':
    149 			*rp++ = ALT_E;
    150 			*alt_ptr = rp - rxpbuf;
    151 			if (*sp != ']') {
    152 				*rp++ = ALT_S;
    153 				alt_ptr = rp;
    154 				*rp++ = 0;
    155 			}
    156 			if (*sp != '|') {
    157 				if (*sp != ']') {
    158 					*rp++ = ALT_E;
    159 					*alt_ptr = rp - rxpbuf;
    160 				}
    161 				if (first) {
    162 					(void)snprintf(rxperr, sizeof(rxperr),
    163 					    "unmatched alternator in regexp %s",
    164 					     s);
    165 					return (FALSE);
    166 				}
    167 				return (TRUE);
    168 			}
    169 			break;
    170 		default:
    171 			*rp++ = LIT;
    172 			*rp++ = *sp;
    173 			esc = 0;
    174 			break;
    175 		}
    176 	}
    177 	if (!first) {
    178 		(void)snprintf(rxperr, sizeof(rxperr),
    179 		    "unmatched alternator in regexp %s", s);
    180 		return (FALSE);
    181 	}
    182 	*rp++ = ALT_E;
    183 	*alt_ptr = rp - rxpbuf;
    184 	*rp++ = GRP_E;
    185 	*(rxpbuf + 2) = rp - rxpbuf;
    186 	*rp++ = EOT;
    187 	*rp = END;
    188 	return (TRUE);
    189 }
    190 
    191 /*
    192  * match string against compiled regular expression
    193  */
    194 int
    195 rxp_match(s)
    196 	const char *	s;
    197 {
    198 	return (rxp__match(s, TRUE, NULL, NULL, NULL));
    199 }
    200 
    201 static int
    202 rxp__match(s, first, j_succ, j_fail, sp_fail)
    203 	const char *s;
    204 	int first;
    205 	Rxp_t *j_succ;		/* jump here on successful alt match */
    206 	Rxp_t *j_fail;		/* jump here on failed match */
    207 	const char *sp_fail;		/* reset sp to here on failed match */
    208 {
    209 	static Rxp_t *rp;
    210 	static const char *sp;
    211 	int ch;
    212 	Rxp_t *grp_end = NULL;
    213 
    214 	if (first) {
    215 		rp = rxpbuf;
    216 		sp = s;
    217 	}
    218 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
    219 		switch(*rp) {
    220 		case LIT:
    221 			rp++;
    222 			ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
    223 			if (ch != *sp++) {
    224 				rp = j_fail;
    225 				sp = sp_fail;
    226 				return (FALSE);
    227 			}
    228 			rp++;
    229 			break;
    230 		case SOT:
    231 			if (sp != s)
    232 				return (FALSE);
    233 			rp++;
    234 			break;
    235 		case EOT:
    236 			if (*sp != 0)
    237 				return (FALSE);
    238 			rp++;
    239 			break;
    240 		case GRP_S:
    241 			rp++;
    242 			grp_end = rxpbuf + *rp++;
    243 			break;
    244 		case ALT_S:
    245 			rp++;
    246 			rxp__match(sp, FALSE, grp_end, rxpbuf + *rp++, sp);
    247 			break;
    248 		case ALT_E:
    249 			rp = j_succ;
    250 			return (TRUE);
    251 		case GRP_E:
    252 			rp = j_fail;
    253 			sp = sp_fail;
    254 			return (FALSE);
    255 		default:
    256 			abort();
    257 		}
    258 	return (*rp != END ? FALSE : TRUE);
    259 }
    260 
    261 /*
    262  * Reverse engineer the regular expression, by picking first of all alternates.
    263  */
    264 char *
    265 rxp_expand()
    266 {
    267 	return (rxp__expand(TRUE));
    268 }
    269 
    270 static char *
    271 rxp__expand(first)
    272 	int first;
    273 {
    274 	static char buf[RXP_LINE_SZ/2];
    275 	static Rxp_t *rp;
    276 	static char *bp;
    277 	Rxp_t *grp_ptr;
    278 	char *err;
    279 
    280 	if (first) {
    281 		rp = rxpbuf;
    282 		bp = buf;
    283 	}
    284 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
    285 		switch(*rp) {
    286 		case LIT:
    287 			rp++;
    288 			*bp++ = *rp++;
    289 			break;
    290 		case GRP_S:
    291 			rp++;
    292 			grp_ptr = rxpbuf + *rp;
    293 			rp++;
    294 			if ((err = rxp__expand(FALSE)) == NULL)
    295 				return (err);
    296 			rp = grp_ptr;
    297 			break;
    298 		case ALT_E:
    299 			return (buf);
    300 		case ALT_S:
    301 			rp++;
    302 			/* FALLTHROUGH */
    303 		case SOT:
    304 		case EOT:
    305 		case GRP_E:
    306 			rp++;
    307 			break;
    308 		default:
    309 			return (NULL);
    310 		}
    311 	if (first) {
    312 		if (*rp != END)
    313 			return (NULL);
    314 		*bp = '\0';
    315 	}
    316 	return (buf);
    317 }
    318