Home | History | Annotate | Line # | Download | only in quiz
rxp.c revision 1.10
      1 /*	$NetBSD: rxp.c,v 1.10 2002/12/06 01:54:55 thorpej Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1991, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * This code is derived from software contributed to Berkeley by
      8  * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
      9  * Commodore Business Machines.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the University of
     22  *	California, Berkeley and its contributors.
     23  * 4. Neither the name of the University nor the names of its contributors
     24  *    may be used to endorse or promote products derived from this software
     25  *    without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     37  * SUCH DAMAGE.
     38  */
     39 
     40 #include <sys/cdefs.h>
     41 #ifndef lint
     42 #if 0
     43 static char sccsid[] = "@(#)rxp.c	8.1 (Berkeley) 5/31/93";
     44 #else
     45 __RCSID("$NetBSD: rxp.c,v 1.10 2002/12/06 01:54:55 thorpej Exp $");
     46 #endif
     47 #endif /* not lint */
     48 
     49 /*
     50  * regular expression parser
     51  *
     52  * external functions and return values are:
     53  * rxp_compile(s)
     54  *	TRUE	success
     55  *	FALSE	parse failure; error message will be in char rxperr[]
     56  * metas are:
     57  *	{...}	optional pattern, equialent to [...|]
     58  *	|	alternate pattern
     59  *	[...]	pattern delimiters
     60  *
     61  * rxp_match(s)
     62  *	TRUE	string s matches compiled pattern
     63  *	FALSE	match failure or regexp error
     64  *
     65  * rxp_expand()
     66  *	char *	reverse-engineered regular expression string
     67  *	NULL	regexp error
     68  */
     69 
     70 #include <stdio.h>
     71 #include <stdlib.h>
     72 #include <ctype.h>
     73 #include "quiz.h"
     74 					/* regexp tokens,	arg */
     75 #define	LIT	(-1)			/* literal character,	char */
     76 #define	SOT	(-2)			/* start text anchor,	- */
     77 #define	EOT	(-3)			/* end text anchor,	- */
     78 #define	GRP_S	(-4)			/* start alternate grp,	ptr_to_end */
     79 #define	GRP_E	(-5)			/* end group,		- */
     80 #define	ALT_S	(-6)			/* alternate starts,	ptr_to_next */
     81 #define	ALT_E	(-7)			/* alternate ends,	- */
     82 #define	END	(-8)			/* end of regexp,	- */
     83 
     84 typedef short Rxp_t;			/* type for regexp tokens */
     85 
     86 static Rxp_t rxpbuf[RXP_LINE_SZ];	/* compiled regular expression buffer */
     87 char rxperr[128];			/* parser error message */
     88 
     89 static int	 rxp__compile __P((const char *, int));
     90 static char	*rxp__expand __P((int));
     91 static int	 rxp__match __P((const char *, int, Rxp_t *, Rxp_t *, const char *));
     92 
     93 int
     94 rxp_compile(s)
     95 	const char *	s;
     96 {
     97 	return (rxp__compile(s, TRUE));
     98 }
     99 
    100 static int
    101 rxp__compile(s, first)
    102 	const char *s;
    103 	int first;
    104 {
    105 	static Rxp_t *rp;
    106 	static const char *sp;
    107 	Rxp_t *grp_ptr;
    108 	Rxp_t *alt_ptr;
    109 	int esc, err;
    110 
    111 	esc = 0;
    112 	if (first) {
    113 		rp = rxpbuf;
    114 		sp = s;
    115 		*rp++ = SOT;	/* auto-anchor: pat is really ^pat$ */
    116 		*rp++ = GRP_S;	/* auto-group: ^pat$ is really ^[pat]$ */
    117 		*rp++ = 0;
    118 	}
    119 	*rp++ = ALT_S;
    120 	alt_ptr = rp;
    121 	*rp++ = 0;
    122 	for (; *sp; ++sp) {
    123 		if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
    124 			(void)snprintf(rxperr, sizeof(rxperr),
    125 			    "regular expression too long %s", s);
    126 			return (FALSE);
    127 		}
    128 		if (*sp == ':' && !esc)
    129 			break;
    130 		if (esc) {
    131 			*rp++ = LIT;
    132 			*rp++ = *sp;
    133 			esc = 0;
    134 		}
    135 		else switch (*sp) {
    136 		case '\\':
    137 			esc = 1;
    138 			break;
    139 		case '{':
    140 		case '[':
    141 			*rp++ = GRP_S;
    142 			grp_ptr = rp;
    143 			*rp++ = 0;
    144 			sp++;
    145 			if ((err = rxp__compile(s, FALSE)) != TRUE)
    146 				return (err);
    147 			*rp++ = GRP_E;
    148 			*grp_ptr = rp - rxpbuf;
    149 			break;
    150 		case '}':
    151 		case ']':
    152 		case '|':
    153 			*rp++ = ALT_E;
    154 			*alt_ptr = rp - rxpbuf;
    155 			if (*sp != ']') {
    156 				*rp++ = ALT_S;
    157 				alt_ptr = rp;
    158 				*rp++ = 0;
    159 			}
    160 			if (*sp != '|') {
    161 				if (*sp != ']') {
    162 					*rp++ = ALT_E;
    163 					*alt_ptr = rp - rxpbuf;
    164 				}
    165 				if (first) {
    166 					(void)snprintf(rxperr, sizeof(rxperr),
    167 					    "unmatched alternator in regexp %s",
    168 					     s);
    169 					return (FALSE);
    170 				}
    171 				return (TRUE);
    172 			}
    173 			break;
    174 		default:
    175 			*rp++ = LIT;
    176 			*rp++ = *sp;
    177 			esc = 0;
    178 			break;
    179 		}
    180 	}
    181 	if (!first) {
    182 		(void)snprintf(rxperr, sizeof(rxperr),
    183 		    "unmatched alternator in regexp %s", s);
    184 		return (FALSE);
    185 	}
    186 	*rp++ = ALT_E;
    187 	*alt_ptr = rp - rxpbuf;
    188 	*rp++ = GRP_E;
    189 	*(rxpbuf + 2) = rp - rxpbuf;
    190 	*rp++ = EOT;
    191 	*rp = END;
    192 	return (TRUE);
    193 }
    194 
    195 /*
    196  * match string against compiled regular expression
    197  */
    198 int
    199 rxp_match(s)
    200 	const char *	s;
    201 {
    202 	return (rxp__match(s, TRUE, NULL, NULL, NULL));
    203 }
    204 
    205 static int
    206 rxp__match(s, first, j_succ, j_fail, sp_fail)
    207 	const char *s;
    208 	int first;
    209 	Rxp_t *j_succ;		/* jump here on successful alt match */
    210 	Rxp_t *j_fail;		/* jump here on failed match */
    211 	const char *sp_fail;		/* reset sp to here on failed match */
    212 {
    213 	static Rxp_t *rp;
    214 	static const char *sp;
    215 	int ch;
    216 	Rxp_t *grp_end = NULL;
    217 
    218 	if (first) {
    219 		rp = rxpbuf;
    220 		sp = s;
    221 	}
    222 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
    223 		switch(*rp) {
    224 		case LIT:
    225 			rp++;
    226 			ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
    227 			if (ch != *sp++) {
    228 				rp = j_fail;
    229 				sp = sp_fail;
    230 				return (FALSE);
    231 			}
    232 			rp++;
    233 			break;
    234 		case SOT:
    235 			if (sp != s)
    236 				return (FALSE);
    237 			rp++;
    238 			break;
    239 		case EOT:
    240 			if (*sp != 0)
    241 				return (FALSE);
    242 			rp++;
    243 			break;
    244 		case GRP_S:
    245 			rp++;
    246 			grp_end = rxpbuf + *rp++;
    247 			break;
    248 		case ALT_S:
    249 			rp++;
    250 			rxp__match(sp, FALSE, grp_end, rxpbuf + *rp++, sp);
    251 			break;
    252 		case ALT_E:
    253 			rp = j_succ;
    254 			return (TRUE);
    255 		case GRP_E:
    256 			rp = j_fail;
    257 			sp = sp_fail;
    258 			return (FALSE);
    259 		default:
    260 			abort();
    261 		}
    262 	return (*rp != END ? FALSE : TRUE);
    263 }
    264 
    265 /*
    266  * Reverse engineer the regular expression, by picking first of all alternates.
    267  */
    268 char *
    269 rxp_expand()
    270 {
    271 	return (rxp__expand(TRUE));
    272 }
    273 
    274 static char *
    275 rxp__expand(first)
    276 	int first;
    277 {
    278 	static char buf[RXP_LINE_SZ/2];
    279 	static Rxp_t *rp;
    280 	static char *bp;
    281 	Rxp_t *grp_ptr;
    282 	char *err;
    283 
    284 	if (first) {
    285 		rp = rxpbuf;
    286 		bp = buf;
    287 	}
    288 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
    289 		switch(*rp) {
    290 		case LIT:
    291 			rp++;
    292 			*bp++ = *rp++;
    293 			break;
    294 		case GRP_S:
    295 			rp++;
    296 			grp_ptr = rxpbuf + *rp;
    297 			rp++;
    298 			if ((err = rxp__expand(FALSE)) == NULL)
    299 				return (err);
    300 			rp = grp_ptr;
    301 			break;
    302 		case ALT_E:
    303 			return (buf);
    304 		case ALT_S:
    305 			rp++;
    306 			/* FALLTHROUGH */
    307 		case SOT:
    308 		case EOT:
    309 		case GRP_E:
    310 			rp++;
    311 			break;
    312 		default:
    313 			return (NULL);
    314 		}
    315 	if (first) {
    316 		if (*rp != END)
    317 			return (NULL);
    318 		*bp = '\0';
    319 	}
    320 	return (buf);
    321 }
    322