Home | History | Annotate | Line # | Download | only in quiz
rxp.c revision 1.4
      1 /*-
      2  * Copyright (c) 1991, 1993
      3  *	The Regents of the University of California.  All rights reserved.
      4  *
      5  * This code is derived from software contributed to Berkeley by
      6  * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
      7  * Commodore Business Machines.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *	This product includes software developed by the University of
     20  *	California, Berkeley and its contributors.
     21  * 4. Neither the name of the University nor the names of its contributors
     22  *    may be used to endorse or promote products derived from this software
     23  *    without specific prior written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     35  * SUCH DAMAGE.
     36  */
     37 
     38 #ifndef lint
     39 static char sccsid[] = "@(#)rxp.c	8.1 (Berkeley) 5/31/93";
     40 #endif /* not lint */
     41 
     42 /*
     43  * regular expression parser
     44  *
     45  * external functions and return values are:
     46  * rxp_compile(s)
     47  *	TRUE	success
     48  *	FALSE	parse failure; error message will be in char rxperr[]
     49  * metas are:
     50  *	{...}	optional pattern, equialent to [...|]
     51  *	|	alternate pattern
     52  *	[...]	pattern delimiters
     53  *
     54  * rxp_match(s)
     55  *	TRUE	string s matches compiled pattern
     56  *	FALSE	match failure or regexp error
     57  *
     58  * rxp_expand()
     59  *	char *	reverse-engineered regular expression string
     60  *	NULL	regexp error
     61  */
     62 
     63 #include <stdio.h>
     64 #include <ctype.h>
     65 #include "quiz.h"
     66 					/* regexp tokens,	arg */
     67 #define	LIT	(-1)			/* literal character,	char */
     68 #define	SOT	(-2)			/* start text anchor,	- */
     69 #define	EOT	(-3)			/* end text anchor,	- */
     70 #define	GRP_S	(-4)			/* start alternate grp,	ptr_to_end */
     71 #define	GRP_E	(-5)			/* end group,		- */
     72 #define	ALT_S	(-6)			/* alternate starts,	ptr_to_next */
     73 #define	ALT_E	(-7)			/* alternate ends,	- */
     74 #define	END	(-8)			/* end of regexp,	- */
     75 
     76 typedef short Rxp_t;			/* type for regexp tokens */
     77 
     78 static Rxp_t rxpbuf[RXP_LINE_SZ];	/* compiled regular expression buffer */
     79 char rxperr[128];			/* parser error message */
     80 
     81 static int	 rxp__compile __P((char *, int));
     82 static char	*rxp__expand __P((int));
     83 static int	 rxp__match __P((char *, int, Rxp_t *, Rxp_t *, char *));
     84 
     85 int
     86 rxp_compile(s)
     87 	register char *	s;
     88 {
     89 	return (rxp__compile(s, TRUE));
     90 }
     91 
     92 static int
     93 rxp__compile(s, first)
     94 	register char *s;
     95 	int first;
     96 {
     97 	static Rxp_t *rp;
     98 	static char *sp;
     99 	Rxp_t *grp_ptr;
    100 	Rxp_t *alt_ptr;
    101 	int esc, err;
    102 
    103 	esc = 0;
    104 	if (first) {
    105 		rp = rxpbuf;
    106 		sp = s;
    107 		*rp++ = SOT;	/* auto-anchor: pat is really ^pat$ */
    108 		*rp++ = GRP_S;	/* auto-group: ^pat$ is really ^[pat]$ */
    109 		*rp++ = 0;
    110 	}
    111 	*rp++ = ALT_S;
    112 	alt_ptr = rp;
    113 	*rp++ = 0;
    114 	for (; *sp; ++sp) {
    115 		if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
    116 			(void)snprintf(rxperr, sizeof(rxperr),
    117 			    "regular expression too long %s", s);
    118 			return (FALSE);
    119 		}
    120 		if (*sp == ':' && !esc)
    121 			break;
    122 		if (esc) {
    123 			*rp++ = LIT;
    124 			*rp++ = *sp;
    125 			esc = 0;
    126 		}
    127 		else switch (*sp) {
    128 		case '\\':
    129 			esc = 1;
    130 			break;
    131 		case '{':
    132 		case '[':
    133 			*rp++ = GRP_S;
    134 			grp_ptr = rp;
    135 			*rp++ = 0;
    136 			sp++;
    137 			if ((err = rxp__compile(s, FALSE)) != TRUE)
    138 				return (err);
    139 			*rp++ = GRP_E;
    140 			*grp_ptr = rp - rxpbuf;
    141 			break;
    142 		case '}':
    143 		case ']':
    144 		case '|':
    145 			*rp++ = ALT_E;
    146 			*alt_ptr = rp - rxpbuf;
    147 			if (*sp != ']') {
    148 				*rp++ = ALT_S;
    149 				alt_ptr = rp;
    150 				*rp++ = 0;
    151 			}
    152 			if (*sp != '|') {
    153 				if (*sp != ']') {
    154 					*rp++ = ALT_E;
    155 					*alt_ptr = rp - rxpbuf;
    156 				}
    157 				if (first) {
    158 					(void)snprintf(rxperr, sizeof(rxperr),
    159 					    "unmatched alternator in regexp %s",
    160 					     s);
    161 					return (FALSE);
    162 				}
    163 				return (TRUE);
    164 			}
    165 			break;
    166 		default:
    167 			*rp++ = LIT;
    168 			*rp++ = *sp;
    169 			esc = 0;
    170 			break;
    171 		}
    172 	}
    173 	if (!first) {
    174 		(void)snprintf(rxperr, sizeof(rxperr),
    175 		    "unmatched alternator in regexp %s", s);
    176 		return (FALSE);
    177 	}
    178 	*rp++ = ALT_E;
    179 	*alt_ptr = rp - rxpbuf;
    180 	*rp++ = GRP_E;
    181 	*(rxpbuf + 2) = rp - rxpbuf;
    182 	*rp++ = EOT;
    183 	*rp = END;
    184 	return (TRUE);
    185 }
    186 
    187 /*
    188  * match string against compiled regular expression
    189  */
    190 int
    191 rxp_match(s)
    192 	register char *	s;
    193 {
    194 	return (rxp__match(s, TRUE, NULL, NULL, NULL));
    195 }
    196 
    197 static int
    198 rxp__match(s, first, j_succ, j_fail, sp_fail)
    199 	char *s;
    200 	int first;
    201 	Rxp_t *j_succ;		/* jump here on successful alt match */
    202 	Rxp_t *j_fail;		/* jump here on failed match */
    203 	char *sp_fail;		/* reset sp to here on failed match */
    204 {
    205 	static Rxp_t *rp;
    206 	static char *sp;
    207 	register int ch;
    208 	Rxp_t *grp_end;
    209 	int err;
    210 
    211 	if (first) {
    212 		rp = rxpbuf;
    213 		sp = s;
    214 	}
    215 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
    216 		switch(*rp) {
    217 		case LIT:
    218 			rp++;
    219 			ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
    220 			if (ch != *sp++) {
    221 				rp = j_fail;
    222 				sp = sp_fail;
    223 				return (TRUE);
    224 			}
    225 			rp++;
    226 			break;
    227 		case SOT:
    228 			if (sp != s)
    229 				return (FALSE);
    230 			rp++;
    231 			break;
    232 		case EOT:
    233 			if (*sp != 0)
    234 				return (FALSE);
    235 			rp++;
    236 			break;
    237 		case GRP_S:
    238 			rp++;
    239 			grp_end = rxpbuf + *rp++;
    240 			break;
    241 		case ALT_S:
    242 			rp++;
    243 			if ((err = rxp__match(sp,
    244 			    FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
    245 				return (err);
    246 			break;
    247 		case ALT_E:
    248 			rp = j_succ;
    249 			return (TRUE);
    250 		case GRP_E:
    251 		default:
    252 			return (FALSE);
    253 		}
    254 	return (*rp != END ? FALSE : TRUE);
    255 }
    256 
    257 /*
    258  * Reverse engineer the regular expression, by picking first of all alternates.
    259  */
    260 char *
    261 rxp_expand()
    262 {
    263 	return (rxp__expand(TRUE));
    264 }
    265 
    266 static char *
    267 rxp__expand(first)
    268 	int first;
    269 {
    270 	static char buf[RXP_LINE_SZ/2];
    271 	static Rxp_t *rp;
    272 	static char *bp;
    273 	Rxp_t *grp_ptr;
    274 	char *err;
    275 
    276 	if (first) {
    277 		rp = rxpbuf;
    278 		bp = buf;
    279 	}
    280 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
    281 		switch(*rp) {
    282 		case LIT:
    283 			rp++;
    284 			*bp++ = *rp++;
    285 			break;
    286 		case GRP_S:
    287 			rp++;
    288 			grp_ptr = rxpbuf + *rp;
    289 			rp++;
    290 			if ((err = rxp__expand(FALSE)) == NULL)
    291 				return (err);
    292 			rp = grp_ptr;
    293 			break;
    294 		case ALT_E:
    295 			return (buf);
    296 		case ALT_S:
    297 			rp++;
    298 			/* FALLTHROUGH */
    299 		case SOT:
    300 		case EOT:
    301 		case GRP_E:
    302 			rp++;
    303 			break;
    304 		default:
    305 			return (NULL);
    306 		}
    307 	if (first) {
    308 		if (*rp != END)
    309 			return (NULL);
    310 		*bp = '\0';
    311 	}
    312 	return (buf);
    313 }
    314