regexMain.c revision 1.1.1.1.4.2 1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright 2009-2010 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "regexJIT.h"
28
29 #include <stdio.h>
30
31 #ifdef REGEX_USE_8BIT_CHARS
32 #define S(str) str
33 #else
34 #define S(str) L##str
35 #endif
36
37 #ifdef REGEX_MATCH_VERBOSE
38 void verbose_test(regex_char_t *pattern, regex_char_t *string)
39 {
40 int error;
41 regex_char_t *ptr;
42 struct regex_machine* machine;
43 struct regex_match* match;
44 int begin, end, id;
45
46 ptr = pattern;
47 while (*ptr)
48 ptr++;
49
50 printf("Start test '%s' matches to '%s'\n", pattern, string);
51 machine = regex_compile(pattern, ptr - pattern, REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error);
52
53 if (error) {
54 printf("WARNING: Error %d\n", error);
55 return;
56 }
57 if (!machine) {
58 printf("ERROR: machine must be exists. Report this bug, please\n");
59 return;
60 }
61
62 match = regex_begin_match(machine);
63 if (!match) {
64 printf("WARNING: Not enough memory for matching\n");
65 regex_free_machine(machine);
66 return;
67 }
68
69 ptr = string;
70 while (*ptr)
71 ptr++;
72
73 regex_continue_match_debug(match, string, ptr - string);
74
75 begin = regex_get_result(match, &end, &id);
76 printf("Math returns: %3d->%3d [%3d]\n", begin, end, id);
77
78 regex_free_match(match);
79 regex_free_machine(machine);
80 }
81 #endif
82
83 struct test_case {
84 int begin; /* Expected begin. */
85 int end; /* Expected end. */
86 int id; /* Expected id. */
87 int finished; /* -1 : don't care, 0 : false, 1 : true. */
88 int flags; /* REGEX_MATCH_* */
89 const regex_char_t *pattern; /* NULL : use the previous pattern. */
90 const regex_char_t *string; /* NULL : end of tests. */
91 };
92
93 void run_tests(struct test_case* test)
94 {
95 int error;
96 const regex_char_t *ptr;
97 struct regex_machine* machine = NULL;
98 struct regex_match* match;
99 int begin, end, id, finished;
100 int success = 0, fail = 0;
101
102 for ( ; test->string ; test++) {
103 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
104 fail++;
105
106 if (test->pattern) {
107 if (machine)
108 regex_free_machine(machine);
109
110 ptr = test->pattern;
111 while (*ptr)
112 ptr++;
113
114 machine = regex_compile(test->pattern, ptr - test->pattern, test->flags, &error);
115
116 if (error) {
117 printf("ABORT: Error %d\n", error);
118 return;
119 }
120 if (!machine) {
121 printf("ABORT: machine must be exists. Report this bug, please\n");
122 return;
123 }
124 }
125 else if (test->flags != 0) {
126 printf("ABORT: flag must be 0 if no pattern\n");
127 return;
128 }
129
130 ptr = test->string;
131 while (*ptr)
132 ptr++;
133
134 match = regex_begin_match(machine);
135 #ifdef REGEX_MATCH_VERBOSE
136 if (!match) {
137 printf("ABORT: Not enough memory for matching\n");
138 regex_free_machine(machine);
139 return;
140 }
141 regex_continue_match_debug(match, test->string, ptr - test->string);
142 begin = regex_get_result(match, &end, &id);
143 finished = regex_is_match_finished(match);
144
145 if (begin != test->begin || end != test->end || id != test->id) {
146 printf("FAIL A: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
147 continue;
148 }
149 if (test->finished != -1 && test->finished != !!finished) {
150 printf("FAIL A: finish check\n");
151 continue;
152 }
153 #endif
154
155 regex_reset_match(match);
156 regex_continue_match(match, test->string, ptr - test->string);
157 begin = regex_get_result(match, &end, &id);
158 finished = regex_is_match_finished(match);
159 regex_free_match(match);
160
161 if (begin != test->begin || end != test->end || id != test->id) {
162 printf("FAIL B: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
163 continue;
164 }
165 if (test->finished != -1 && test->finished != !!finished) {
166 printf("FAIL B: finish check\n");
167 continue;
168 }
169
170 printf("SUCCESS\n");
171 fail--;
172 success++;
173 }
174 if (machine)
175 regex_free_machine(machine);
176
177 printf("On %s: ", regex_get_platform_name());
178 if (fail == 0)
179 printf("All tests are passed!\n");
180 else
181 printf("Successful test ratio: %d%%.\n", success * 100 / (success + fail));
182 }
183
184 /* Testing. */
185
186 static struct test_case tests[] = {
187 { 3, 7, 0, -1, 0,
188 S("text"), S("is textile") },
189 { 0, 10, 0, -1, 0,
190 S("^(ab|c)*?d+(es)?"), S("abccabddeses") },
191 { -1, 0, 0, 1, 0,
192 S("^a+"), S("saaaa") },
193 { 3, 6, 0, 0, 0,
194 S("(a+|b+)$"), S("saabbb") },
195 { 1, 6, 0, 0, 0,
196 S("(a+|b+){,2}$"), S("saabbb") },
197 { 1, 6, 0, 1, 0,
198 S("(abcde|bc)(a+*|(b|c){2}+){0}"), S("babcdeaaaaaaaa") },
199 { 1, 6, 0, 1, 0,
200 S("(abc(aa)?|(cab+){2})"), S("cabcaa") },
201 { -1, 0, 0, 1, 0,
202 S("^(abc(aa)?|(cab+){2})$"), S("cabcaa") },
203 { 0, 3, 1, -1, 0,
204 S("^(ab{001!})?c"), S("abcde") },
205 { 1, 15, 2, -1, 0,
206 S("(c?(a|bb{2!}){2,3}()+d){2,3}"), S("ccabbadbbadcaadcaad") },
207 { 2, 9, 0, -1, 0,
208 NULL, S("cacaadaadaa") },
209 { -1, 0, 0, -1, REGEX_MATCH_BEGIN,
210 S("(((ab?c|d{1})))"), S("ad") },
211 { 0, 9, 3, -1, REGEX_MATCH_BEGIN,
212 S("^((a{1!}|b{2!}|c{3!}){3,6}d)+"), S("cabadbacddaa") },
213 { 1, 6, 0, 0, REGEX_MATCH_END,
214 S("(a+(bb|cc?)?){4,}"), S("maaaac") },
215 { 3, 12, 1, 0, REGEX_MATCH_END,
216 S("(x+x+{02,03}(x+|{1!})){03,06}$"), S("aaaxxxxxxxxx") },
217 { 1, 2, 3, -1, 0,
218 S("((c{1!})?|x+{2!}|{3!})(a|c)"), S("scs") },
219 { 1, 4, 2, 1, 0,
220 NULL, S("sxxaxxxaccacca") },
221 { 0, 2, 1, 1, 0,
222 NULL, S("ccdcdcdddddcdccccd") },
223 { 0, 3, 0, -1, REGEX_MATCH_NON_GREEDY,
224 S("^a+a+a+"), S("aaaaaa") },
225 { 2, 5, 0, -1, REGEX_MATCH_NON_GREEDY,
226 S("a+a+a+"), S("bbaaaaaa") },
227 { 1, 4, 0, 1, 0,
228 S("baa|a+"), S("sbaaaaaa") },
229 { 0, 6, 0, 1, 0,
230 S("baaa|baa|sbaaaa"), S("sbaaaaa") },
231 { 1, 4, 0, 1, REGEX_MATCH_NON_GREEDY,
232 S("baaa|baa"), S("xbaaa") },
233 { 0, 0, 3, 1, 0,
234 S("{3!}"), S("xx") },
235 { 0, 0, 1, 1, 0,
236 S("{1!}(a{2!})*"), S("xx") },
237 { 0, 2, 2, 0, 0,
238 NULL, S("aa") },
239 { 0, 0, 1, 1, REGEX_MATCH_NON_GREEDY,
240 S("{1!}(a{2!})*"), S("aaxx") },
241 { 4, 12, 0, 1, 0,
242 S("(.[]-]){3}[^]-]{2}"), S("ax-xs-[][]lmn") },
243 { 3, 7, 1, 1, 0,
244 S("([ABC]|[abc]{1!}){3,5}"), S("AbSAabbx") },
245 { 0, 8, 3, 0, 0,
246 S("^[x\\-y[\\]]+([[\\]]{3!})*$"), S("x-y[-][]") },
247 { 0, 9, 0, 0, 0,
248 NULL, S("x-y[-][]x") },
249 { 2, 8, 0, 1, 0,
250 S("<(/{1!})?[^>]+>"), S(" <html></html> ") },
251 { 2, 9, 1, 1, 0,
252 NULL, S(" </html><html> ") },
253 { 2, 9, 0, 1, 0,
254 S("[A-Z0-9a-z]+"), S("[(Iden9aA)]") },
255 { 1, 4, 0, 1, 0,
256 S("[^x-y]+[a-c_]{2,3}"), S("x_a_y") },
257 { 4, 11, 0, 0, 0,
258 NULL, S("ssaymmaa_ccl") },
259 { 3, 6, 0, 1, REGEX_NEWLINE,
260 S(".a[^k]"), S("\na\nxa\ns") },
261 { 0, 2, 0, 1, REGEX_NEWLINE,
262 S("^a+"), S("aa\n") },
263 { 1, 4, 0, 1, 0 /* =REGEX_NEWLINE */,
264 NULL, S("\naaa\n") },
265 { 2, 3, 0, 1, 0 /* =REGEX_NEWLINE */,
266 NULL, S("\n\na\n") },
267 { 0, 2, 0, 1, REGEX_NEWLINE,
268 S("a+$"), S("aa\n") },
269 { 0, 3, 0, 0, 0 /* =REGEX_NEWLINE */,
270 NULL, S("aaa") },
271 { 2, 4, 1, 1, REGEX_NEWLINE,
272 S("^a(a{1!})*$"), S("\n\naa\n\n") },
273 { 0, 1, 0, 0, 0 /* REGEX_NEWLINE */,
274 NULL, S("a") },
275 { -1, 0, 0, -1, 0 /* REGEX_NEWLINE */,
276 NULL, S("ab\nba") },
277 { -1, 0, 0, 0, 0,
278 NULL, NULL }
279 };
280
281 int main(int argc, char* argv[])
282 {
283 /* verbose_test("a((b)((c|d))|)c|"); */
284 /* verbose_test("Xa{009,0010}Xb{,7}Xc{5,}Xd{,}Xe{1,}Xf{,1}X"); */
285 /* verbose_test("{3!}({3})({0!}){,"); */
286 /* verbose_test("(s(ab){2,4}t){2,}*S(a*(b)(c()|)d+){3,4}{0,0}*M"); */
287 /* verbose_test("^a({2!})*b+(a|{1!}b)+d$"); */
288 /* verbose_test("((a|b|c)*(xy)+)+", "asbcxyxy"); */
289
290 run_tests(tests);
291 return 0;
292 }
293
294