rxp.c revision 1.4 1 /*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
7 * Commodore Business Machines.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38 #ifndef lint
39 static char sccsid[] = "@(#)rxp.c 8.1 (Berkeley) 5/31/93";
40 #endif /* not lint */
41
42 /*
43 * regular expression parser
44 *
45 * external functions and return values are:
46 * rxp_compile(s)
47 * TRUE success
48 * FALSE parse failure; error message will be in char rxperr[]
49 * metas are:
50 * {...} optional pattern, equialent to [...|]
51 * | alternate pattern
52 * [...] pattern delimiters
53 *
54 * rxp_match(s)
55 * TRUE string s matches compiled pattern
56 * FALSE match failure or regexp error
57 *
58 * rxp_expand()
59 * char * reverse-engineered regular expression string
60 * NULL regexp error
61 */
62
63 #include <stdio.h>
64 #include <ctype.h>
65 #include "quiz.h"
66 /* regexp tokens, arg */
67 #define LIT (-1) /* literal character, char */
68 #define SOT (-2) /* start text anchor, - */
69 #define EOT (-3) /* end text anchor, - */
70 #define GRP_S (-4) /* start alternate grp, ptr_to_end */
71 #define GRP_E (-5) /* end group, - */
72 #define ALT_S (-6) /* alternate starts, ptr_to_next */
73 #define ALT_E (-7) /* alternate ends, - */
74 #define END (-8) /* end of regexp, - */
75
76 typedef short Rxp_t; /* type for regexp tokens */
77
78 static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */
79 char rxperr[128]; /* parser error message */
80
81 static int rxp__compile __P((char *, int));
82 static char *rxp__expand __P((int));
83 static int rxp__match __P((char *, int, Rxp_t *, Rxp_t *, char *));
84
85 int
86 rxp_compile(s)
87 register char * s;
88 {
89 return (rxp__compile(s, TRUE));
90 }
91
92 static int
93 rxp__compile(s, first)
94 register char *s;
95 int first;
96 {
97 static Rxp_t *rp;
98 static char *sp;
99 Rxp_t *grp_ptr;
100 Rxp_t *alt_ptr;
101 int esc, err;
102
103 esc = 0;
104 if (first) {
105 rp = rxpbuf;
106 sp = s;
107 *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */
108 *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */
109 *rp++ = 0;
110 }
111 *rp++ = ALT_S;
112 alt_ptr = rp;
113 *rp++ = 0;
114 for (; *sp; ++sp) {
115 if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
116 (void)snprintf(rxperr, sizeof(rxperr),
117 "regular expression too long %s", s);
118 return (FALSE);
119 }
120 if (*sp == ':' && !esc)
121 break;
122 if (esc) {
123 *rp++ = LIT;
124 *rp++ = *sp;
125 esc = 0;
126 }
127 else switch (*sp) {
128 case '\\':
129 esc = 1;
130 break;
131 case '{':
132 case '[':
133 *rp++ = GRP_S;
134 grp_ptr = rp;
135 *rp++ = 0;
136 sp++;
137 if ((err = rxp__compile(s, FALSE)) != TRUE)
138 return (err);
139 *rp++ = GRP_E;
140 *grp_ptr = rp - rxpbuf;
141 break;
142 case '}':
143 case ']':
144 case '|':
145 *rp++ = ALT_E;
146 *alt_ptr = rp - rxpbuf;
147 if (*sp != ']') {
148 *rp++ = ALT_S;
149 alt_ptr = rp;
150 *rp++ = 0;
151 }
152 if (*sp != '|') {
153 if (*sp != ']') {
154 *rp++ = ALT_E;
155 *alt_ptr = rp - rxpbuf;
156 }
157 if (first) {
158 (void)snprintf(rxperr, sizeof(rxperr),
159 "unmatched alternator in regexp %s",
160 s);
161 return (FALSE);
162 }
163 return (TRUE);
164 }
165 break;
166 default:
167 *rp++ = LIT;
168 *rp++ = *sp;
169 esc = 0;
170 break;
171 }
172 }
173 if (!first) {
174 (void)snprintf(rxperr, sizeof(rxperr),
175 "unmatched alternator in regexp %s", s);
176 return (FALSE);
177 }
178 *rp++ = ALT_E;
179 *alt_ptr = rp - rxpbuf;
180 *rp++ = GRP_E;
181 *(rxpbuf + 2) = rp - rxpbuf;
182 *rp++ = EOT;
183 *rp = END;
184 return (TRUE);
185 }
186
187 /*
188 * match string against compiled regular expression
189 */
190 int
191 rxp_match(s)
192 register char * s;
193 {
194 return (rxp__match(s, TRUE, NULL, NULL, NULL));
195 }
196
197 static int
198 rxp__match(s, first, j_succ, j_fail, sp_fail)
199 char *s;
200 int first;
201 Rxp_t *j_succ; /* jump here on successful alt match */
202 Rxp_t *j_fail; /* jump here on failed match */
203 char *sp_fail; /* reset sp to here on failed match */
204 {
205 static Rxp_t *rp;
206 static char *sp;
207 register int ch;
208 Rxp_t *grp_end;
209 int err;
210
211 if (first) {
212 rp = rxpbuf;
213 sp = s;
214 }
215 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
216 switch(*rp) {
217 case LIT:
218 rp++;
219 ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
220 if (ch != *sp++) {
221 rp = j_fail;
222 sp = sp_fail;
223 return (TRUE);
224 }
225 rp++;
226 break;
227 case SOT:
228 if (sp != s)
229 return (FALSE);
230 rp++;
231 break;
232 case EOT:
233 if (*sp != 0)
234 return (FALSE);
235 rp++;
236 break;
237 case GRP_S:
238 rp++;
239 grp_end = rxpbuf + *rp++;
240 break;
241 case ALT_S:
242 rp++;
243 if ((err = rxp__match(sp,
244 FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
245 return (err);
246 break;
247 case ALT_E:
248 rp = j_succ;
249 return (TRUE);
250 case GRP_E:
251 default:
252 return (FALSE);
253 }
254 return (*rp != END ? FALSE : TRUE);
255 }
256
257 /*
258 * Reverse engineer the regular expression, by picking first of all alternates.
259 */
260 char *
261 rxp_expand()
262 {
263 return (rxp__expand(TRUE));
264 }
265
266 static char *
267 rxp__expand(first)
268 int first;
269 {
270 static char buf[RXP_LINE_SZ/2];
271 static Rxp_t *rp;
272 static char *bp;
273 Rxp_t *grp_ptr;
274 char *err;
275
276 if (first) {
277 rp = rxpbuf;
278 bp = buf;
279 }
280 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
281 switch(*rp) {
282 case LIT:
283 rp++;
284 *bp++ = *rp++;
285 break;
286 case GRP_S:
287 rp++;
288 grp_ptr = rxpbuf + *rp;
289 rp++;
290 if ((err = rxp__expand(FALSE)) == NULL)
291 return (err);
292 rp = grp_ptr;
293 break;
294 case ALT_E:
295 return (buf);
296 case ALT_S:
297 rp++;
298 /* FALLTHROUGH */
299 case SOT:
300 case EOT:
301 case GRP_E:
302 rp++;
303 break;
304 default:
305 return (NULL);
306 }
307 if (first) {
308 if (*rp != END)
309 return (NULL);
310 *bp = '\0';
311 }
312 return (buf);
313 }
314