Home | History | Annotate | Line # | Download | only in quiz
rxp.c revision 1.2
      1 /*-
      2  * Copyright (c) 1991 The Regents of the University of California.
      3  * All rights reserved.
      4  *
      5  * This code is derived from software contributed to Berkeley by
      6  * Jim R. Oldroyd at The Instruction Set.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. All advertising materials mentioning features or use of this software
     17  *    must display the following acknowledgement:
     18  *	This product includes software developed by the University of
     19  *	California, Berkeley and its contributors.
     20  * 4. Neither the name of the University nor the names of its contributors
     21  *    may be used to endorse or promote products derived from this software
     22  *    without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  */
     36 
     37 #ifndef lint
     38 static char sccsid[] = "@(#)rxp.c	5.1 (Berkeley) 11/10/91";
     39 #endif /* not lint */
     40 
     41 /*
     42  * regular expression parser
     43  *
     44  * external functions and return values are:
     45  * rxp_compile(s)
     46  *	TRUE	success
     47  *	FALSE	parse failure; error message will be in char rxperr[]
     48  * metas are:
     49  *	{...}	optional pattern, equialent to [...|]
     50  *	|	alternate pattern
     51  *	[...]	pattern delimiters
     52  *
     53  * rxp_match(s)
     54  *	TRUE	string s matches compiled pattern
     55  *	FALSE	match failure or regexp error
     56  *
     57  * rxp_expand()
     58  *	char *	reverse-engineered regular expression string
     59  *	NULL	regexp error
     60  */
     61 
     62 #include <stdio.h>
     63 #include <ctype.h>
     64 #include "quiz.h"
     65 					/* regexp tokens,	arg */
     66 #define	LIT	(-1)			/* literal character,	char */
     67 #define	SOT	(-2)			/* start text anchor,	- */
     68 #define	EOT	(-3)			/* end text anchor,	- */
     69 #define	GRP_S	(-4)			/* start alternate grp,	ptr_to_end */
     70 #define	GRP_E	(-5)			/* end group,		- */
     71 #define	ALT_S	(-6)			/* alternate starts,	ptr_to_next */
     72 #define	ALT_E	(-7)			/* alternate ends,	- */
     73 #define	END	(-8)			/* end of regexp,	- */
     74 
     75 typedef short Rxp_t;			/* type for regexp tokens */
     76 
     77 static Rxp_t rxpbuf[RXP_LINE_SZ];	/* compiled regular expression buffer */
     78 char rxperr[128];			/* parser error message */
     79 
     80 static int	 rxp__compile __P((char *, int));
     81 static char	*rxp__expand __P((int));
     82 static int	 rxp__match __P((char *, int, Rxp_t *, Rxp_t *, char *));
     83 
     84 int
     85 rxp_compile(s)
     86 	register char *	s;
     87 {
     88 	return (rxp__compile(s, TRUE));
     89 }
     90 
     91 static int
     92 rxp__compile(s, first)
     93 	register char *s;
     94 	int first;
     95 {
     96 	static Rxp_t *rp;
     97 	static char *sp;
     98 	Rxp_t *grp_ptr;
     99 	Rxp_t *alt_ptr;
    100 	int esc, err;
    101 
    102 	esc = 0;
    103 	if (first) {
    104 		rp = rxpbuf;
    105 		sp = s;
    106 		*rp++ = SOT;	/* auto-anchor: pat is really ^pat$ */
    107 		*rp++ = GRP_S;	/* auto-group: ^pat$ is really ^[pat]$ */
    108 		*rp++ = 0;
    109 	}
    110 	*rp++ = ALT_S;
    111 	alt_ptr = rp;
    112 	*rp++ = 0;
    113 	for (; *sp; ++sp) {
    114 		if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
    115 			(void)snprintf(rxperr, sizeof(rxperr),
    116 			    "regular expression too long %s", s);
    117 			return (FALSE);
    118 		}
    119 		if (*sp == ':' && !esc)
    120 			break;
    121 		if (esc) {
    122 			*rp++ = LIT;
    123 			*rp++ = *sp;
    124 			esc = 0;
    125 		}
    126 		else switch (*sp) {
    127 		case '\\':
    128 			esc = 1;
    129 			break;
    130 		case '{':
    131 		case '[':
    132 			*rp++ = GRP_S;
    133 			grp_ptr = rp;
    134 			*rp++ = 0;
    135 			sp++;
    136 			if ((err = rxp__compile(s, FALSE)) != TRUE)
    137 				return (err);
    138 			*rp++ = GRP_E;
    139 			*grp_ptr = rp - rxpbuf;
    140 			break;
    141 		case '}':
    142 		case ']':
    143 		case '|':
    144 			*rp++ = ALT_E;
    145 			*alt_ptr = rp - rxpbuf;
    146 			if (*sp != ']') {
    147 				*rp++ = ALT_S;
    148 				alt_ptr = rp;
    149 				*rp++ = 0;
    150 			}
    151 			if (*sp != '|') {
    152 				if (*sp != ']') {
    153 					*rp++ = ALT_E;
    154 					*alt_ptr = rp - rxpbuf;
    155 				}
    156 				if (first) {
    157 					(void)snprintf(rxperr, sizeof(rxperr),
    158 					    "unmatched alternator in regexp %s",
    159 					     s);
    160 					return (FALSE);
    161 				}
    162 				return (TRUE);
    163 			}
    164 			break;
    165 		default:
    166 			*rp++ = LIT;
    167 			*rp++ = *sp;
    168 			esc = 0;
    169 			break;
    170 		}
    171 	}
    172 	if (!first) {
    173 		(void)snprintf(rxperr, sizeof(rxperr),
    174 		    "unmatched alternator in regexp %s", s);
    175 		return (FALSE);
    176 	}
    177 	*rp++ = ALT_E;
    178 	*alt_ptr = rp - rxpbuf;
    179 	*rp++ = GRP_E;
    180 	*(rxpbuf + 2) = rp - rxpbuf;
    181 	*rp++ = EOT;
    182 	*rp = END;
    183 	return (TRUE);
    184 }
    185 
    186 /*
    187  * match string against compiled regular expression
    188  */
    189 int
    190 rxp_match(s)
    191 	register char *	s;
    192 {
    193 	return (rxp__match(s, TRUE, NULL, NULL, NULL));
    194 }
    195 
    196 static int
    197 rxp__match(s, first, j_succ, j_fail, sp_fail)
    198 	char *s;
    199 	int first;
    200 	Rxp_t *j_succ;		/* jump here on successful alt match */
    201 	Rxp_t *j_fail;		/* jump here on failed match */
    202 	char *sp_fail;		/* reset sp to here on failed match */
    203 {
    204 	static Rxp_t *rp;
    205 	static char *sp;
    206 	register int ch;
    207 	Rxp_t *grp_end;
    208 	int err;
    209 
    210 	if (first) {
    211 		rp = rxpbuf;
    212 		sp = s;
    213 	}
    214 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
    215 		switch(*rp) {
    216 		case LIT:
    217 			rp++;
    218 			ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
    219 			if (ch != *sp++) {
    220 				rp = j_fail;
    221 				sp = sp_fail;
    222 				return (TRUE);
    223 			}
    224 			rp++;
    225 			break;
    226 		case SOT:
    227 			if (sp != s)
    228 				return (FALSE);
    229 			rp++;
    230 			break;
    231 		case EOT:
    232 			if (*sp != 0)
    233 				return (FALSE);
    234 			rp++;
    235 			break;
    236 		case GRP_S:
    237 			rp++;
    238 			grp_end = rxpbuf + *rp++;
    239 			break;
    240 		case ALT_S:
    241 			rp++;
    242 			if ((err = rxp__match(sp,
    243 			    FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
    244 				return (err);
    245 			break;
    246 		case ALT_E:
    247 			rp = j_succ;
    248 			return (TRUE);
    249 		case GRP_E:
    250 		default:
    251 			return (FALSE);
    252 		}
    253 	return (*rp != END ? FALSE : TRUE);
    254 }
    255 
    256 /*
    257  * Reverse engineer the regular expression, by picking first of all alternates.
    258  */
    259 char *
    260 rxp_expand()
    261 {
    262 	return (rxp__expand(TRUE));
    263 }
    264 
    265 static char *
    266 rxp__expand(first)
    267 	int first;
    268 {
    269 	static char buf[RXP_LINE_SZ/2];
    270 	static Rxp_t *rp;
    271 	static char *bp;
    272 	Rxp_t *grp_ptr;
    273 	char *err;
    274 
    275 	if (first) {
    276 		rp = rxpbuf;
    277 		bp = buf;
    278 	}
    279 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
    280 		switch(*rp) {
    281 		case LIT:
    282 			rp++;
    283 			*bp++ = *rp++;
    284 			break;
    285 		case GRP_S:
    286 			rp++;
    287 			grp_ptr = rxpbuf + *rp;
    288 			rp++;
    289 			if ((err = rxp__expand(FALSE)) == NULL)
    290 				return (err);
    291 			rp = grp_ptr;
    292 			break;
    293 		case ALT_E:
    294 			return (buf);
    295 		case ALT_S:
    296 			rp++;
    297 			/* FALLTHROUGH */
    298 		case SOT:
    299 		case EOT:
    300 		case GRP_E:
    301 			rp++;
    302 			break;
    303 		default:
    304 			return (NULL);
    305 		}
    306 	if (first) {
    307 		if (*rp != END)
    308 			return (NULL);
    309 		*bp = '\0';
    310 	}
    311 	return (buf);
    312 }
    313