regexMain.c revision 1.1.1.2.4.1 1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright 2009-2010 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 /* Must be the first one. Must not depend on any other include. */
28 #include "regexJIT.h"
29
30 #include <stdio.h>
31
32 #if defined _WIN32 || defined _WIN64
33 #define COLOR_RED
34 #define COLOR_GREEN
35 #define COLOR_ARCH
36 #define COLOR_DEFAULT
37 #else
38 #define COLOR_RED "\33[31m"
39 #define COLOR_GREEN "\33[32m"
40 #define COLOR_ARCH "\33[33m"
41 #define COLOR_DEFAULT "\33[0m"
42 #endif
43
44 #ifdef REGEX_USE_8BIT_CHARS
45 #define S(str) str
46 #else
47 #define S(str) L##str
48 #endif
49
50 #ifdef REGEX_MATCH_VERBOSE
51 void verbose_test(regex_char_t *pattern, regex_char_t *string)
52 {
53 int error;
54 regex_char_t *ptr;
55 struct regex_machine* machine;
56 struct regex_match* match;
57 int begin, end, id;
58
59 ptr = pattern;
60 while (*ptr)
61 ptr++;
62
63 printf("Start test '%s' matches to '%s'\n", pattern, string);
64 machine = regex_compile(pattern, ptr - pattern, REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error);
65
66 if (error) {
67 printf("WARNING: Error %d\n", error);
68 return;
69 }
70 if (!machine) {
71 printf("ERROR: machine must be exists. Report this bug, please\n");
72 return;
73 }
74
75 match = regex_begin_match(machine);
76 if (!match) {
77 printf("WARNING: Not enough memory for matching\n");
78 regex_free_machine(machine);
79 return;
80 }
81
82 ptr = string;
83 while (*ptr)
84 ptr++;
85
86 regex_continue_match_debug(match, string, ptr - string);
87
88 begin = regex_get_result(match, &end, &id);
89 printf("Math returns: %3d->%3d [%3d]\n", begin, end, id);
90
91 regex_free_match(match);
92 regex_free_machine(machine);
93 }
94 #endif
95
96 struct test_case {
97 int begin; /* Expected begin. */
98 int end; /* Expected end. */
99 int id; /* Expected id. */
100 int finished; /* -1 : don't care, 0 : false, 1 : true. */
101 int flags; /* REGEX_MATCH_* */
102 const regex_char_t *pattern; /* NULL : use the previous pattern. */
103 const regex_char_t *string; /* NULL : end of tests. */
104 };
105
106 void run_tests(struct test_case* test, int verbose, int silent)
107 {
108 int error;
109 const regex_char_t *ptr;
110 struct regex_machine* machine = NULL;
111 struct regex_match* match;
112 int begin, end, id, finished;
113 int success = 0, fail = 0;
114
115 if (!verbose && !silent)
116 printf("Pass -v to enable verbose, -s to disable this hint.\n\n");
117
118 for ( ; test->string ; test++) {
119 if (verbose)
120 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
121 fail++;
122
123 if (test->pattern) {
124 if (machine)
125 regex_free_machine(machine);
126
127 ptr = test->pattern;
128 while (*ptr)
129 ptr++;
130
131 machine = regex_compile(test->pattern, ptr - test->pattern, test->flags, &error);
132
133 if (error) {
134 if (!verbose)
135 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
136 printf("ABORT: Error %d\n", error);
137 return;
138 }
139 if (!machine) {
140 if (!verbose)
141 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
142 printf("ABORT: machine must be exists. Report this bug, please\n");
143 return;
144 }
145 }
146 else if (test->flags != 0) {
147 if (!verbose)
148 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
149 printf("ABORT: flag must be 0 if no pattern\n");
150 return;
151 }
152
153 ptr = test->string;
154 while (*ptr)
155 ptr++;
156
157 match = regex_begin_match(machine);
158 #ifdef REGEX_MATCH_VERBOSE
159 if (!match) {
160 if (!verbose)
161 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
162 printf("ABORT: Not enough memory for matching\n");
163 regex_free_machine(machine);
164 return;
165 }
166 regex_continue_match_debug(match, test->string, ptr - test->string);
167 begin = regex_get_result(match, &end, &id);
168 finished = regex_is_match_finished(match);
169
170 if (begin != test->begin || end != test->end || id != test->id) {
171 if (!verbose)
172 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
173 printf("FAIL A: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
174 continue;
175 }
176 if (test->finished != -1 && test->finished != !!finished) {
177 if (!verbose)
178 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
179 printf("FAIL A: finish check\n");
180 continue;
181 }
182 #endif
183
184 regex_reset_match(match);
185 regex_continue_match(match, test->string, ptr - test->string);
186 begin = regex_get_result(match, &end, &id);
187 finished = regex_is_match_finished(match);
188 regex_free_match(match);
189
190 if (begin != test->begin || end != test->end || id != test->id) {
191 if (!verbose)
192 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
193 printf("FAIL B: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
194 continue;
195 }
196 if (test->finished != -1 && test->finished != !!finished) {
197 if (!verbose)
198 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
199 printf("FAIL B: finish check\n");
200 continue;
201 }
202
203 if (verbose)
204 printf("SUCCESS\n");
205 fail--;
206 success++;
207 }
208 if (machine)
209 regex_free_machine(machine);
210
211 printf("REGEX tests: ");
212 if (fail == 0)
213 printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " ");
214 else
215 printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are failed ", fail, fail * 100 / (success + fail));
216 printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "\n", regex_get_platform_name());
217 }
218
219 /* Testing. */
220
221 static struct test_case tests[] = {
222 { 3, 7, 0, -1, 0,
223 S("text"), S("is textile") },
224 { 0, 10, 0, -1, 0,
225 S("^(ab|c)*?d+(es)?"), S("abccabddeses") },
226 { -1, 0, 0, 1, 0,
227 S("^a+"), S("saaaa") },
228 { 3, 6, 0, 0, 0,
229 S("(a+|b+)$"), S("saabbb") },
230 { 1, 6, 0, 0, 0,
231 S("(a+|b+){,2}$"), S("saabbb") },
232 { 1, 6, 0, 1, 0,
233 S("(abcde|bc)(a+*|(b|c){2}+){0}"), S("babcdeaaaaaaaa") },
234 { 1, 6, 0, 1, 0,
235 S("(abc(aa)?|(cab+){2})"), S("cabcaa") },
236 { -1, 0, 0, 1, 0,
237 S("^(abc(aa)?|(cab+){2})$"), S("cabcaa") },
238 { 0, 3, 1, -1, 0,
239 S("^(ab{001!})?c"), S("abcde") },
240 { 1, 15, 2, -1, 0,
241 S("(c?(a|bb{2!}){2,3}()+d){2,3}"), S("ccabbadbbadcaadcaad") },
242 { 2, 9, 0, -1, 0,
243 NULL, S("cacaadaadaa") },
244 { -1, 0, 0, -1, REGEX_MATCH_BEGIN,
245 S("(((ab?c|d{1})))"), S("ad") },
246 { 0, 9, 3, -1, REGEX_MATCH_BEGIN,
247 S("^((a{1!}|b{2!}|c{3!}){3,6}d)+"), S("cabadbacddaa") },
248 { 1, 6, 0, 0, REGEX_MATCH_END,
249 S("(a+(bb|cc?)?){4,}"), S("maaaac") },
250 { 3, 12, 1, 0, REGEX_MATCH_END,
251 S("(x+x+{02,03}(x+|{1!})){03,06}$"), S("aaaxxxxxxxxx") },
252 { 1, 2, 3, -1, 0,
253 S("((c{1!})?|x+{2!}|{3!})(a|c)"), S("scs") },
254 { 1, 4, 2, 1, 0,
255 NULL, S("sxxaxxxaccacca") },
256 { 0, 2, 1, 1, 0,
257 NULL, S("ccdcdcdddddcdccccd") },
258 { 0, 3, 0, -1, REGEX_MATCH_NON_GREEDY,
259 S("^a+a+a+"), S("aaaaaa") },
260 { 2, 5, 0, -1, REGEX_MATCH_NON_GREEDY,
261 S("a+a+a+"), S("bbaaaaaa") },
262 { 1, 4, 0, 1, 0,
263 S("baa|a+"), S("sbaaaaaa") },
264 { 0, 6, 0, 1, 0,
265 S("baaa|baa|sbaaaa"), S("sbaaaaa") },
266 { 1, 4, 0, 1, REGEX_MATCH_NON_GREEDY,
267 S("baaa|baa"), S("xbaaa") },
268 { 0, 0, 3, 1, 0,
269 S("{3!}"), S("xx") },
270 { 0, 0, 1, 1, 0,
271 S("{1!}(a{2!})*"), S("xx") },
272 { 0, 2, 2, 0, 0,
273 NULL, S("aa") },
274 { 0, 0, 1, 1, REGEX_MATCH_NON_GREEDY,
275 S("{1!}(a{2!})*"), S("aaxx") },
276 { 4, 12, 0, 1, 0,
277 S("(.[]-]){3}[^]-]{2}"), S("ax-xs-[][]lmn") },
278 { 3, 7, 1, 1, 0,
279 S("([ABC]|[abc]{1!}){3,5}"), S("AbSAabbx") },
280 { 0, 8, 3, 0, 0,
281 S("^[x\\-y[\\]]+([[\\]]{3!})*$"), S("x-y[-][]") },
282 { 0, 9, 0, 0, 0,
283 NULL, S("x-y[-][]x") },
284 { 2, 8, 0, 1, 0,
285 S("<(/{1!})?[^>]+>"), S(" <html></html> ") },
286 { 2, 9, 1, 1, 0,
287 NULL, S(" </html><html> ") },
288 { 2, 9, 0, 1, 0,
289 S("[A-Z0-9a-z]+"), S("[(Iden9aA)]") },
290 { 1, 4, 0, 1, 0,
291 S("[^x-y]+[a-c_]{2,3}"), S("x_a_y") },
292 { 4, 11, 0, 0, 0,
293 NULL, S("ssaymmaa_ccl") },
294 { 3, 6, 0, 1, REGEX_NEWLINE,
295 S(".a[^k]"), S("\na\nxa\ns") },
296 { 0, 2, 0, 1, REGEX_NEWLINE,
297 S("^a+"), S("aa\n") },
298 { 1, 4, 0, 1, 0 /* =REGEX_NEWLINE */,
299 NULL, S("\naaa\n") },
300 { 2, 3, 0, 1, 0 /* =REGEX_NEWLINE */,
301 NULL, S("\n\na\n") },
302 { 0, 2, 0, 1, REGEX_NEWLINE,
303 S("a+$"), S("aa\n") },
304 { 0, 3, 0, 0, 0 /* =REGEX_NEWLINE */,
305 NULL, S("aaa") },
306 { 2, 4, 1, 1, REGEX_NEWLINE,
307 S("^a(a{1!})*$"), S("\n\naa\n\n") },
308 { 0, 1, 0, 0, 0 /* REGEX_NEWLINE */,
309 NULL, S("a") },
310 { -1, 0, 0, -1, 0 /* REGEX_NEWLINE */,
311 NULL, S("ab\nba") },
312 { -1, 0, 0, 0, 0,
313 NULL, NULL }
314 };
315
316 int main(int argc, char* argv[])
317 {
318 int has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0');
319
320 /* verbose_test("a((b)((c|d))|)c|"); */
321 /* verbose_test("Xa{009,0010}Xb{,7}Xc{5,}Xd{,}Xe{1,}Xf{,1}X"); */
322 /* verbose_test("{3!}({3})({0!}){,"); */
323 /* verbose_test("(s(ab){2,4}t){2,}*S(a*(b)(c()|)d+){3,4}{0,0}*M"); */
324 /* verbose_test("^a({2!})*b+(a|{1!}b)+d$"); */
325 /* verbose_test("((a|b|c)*(xy)+)+", "asbcxyxy"); */
326
327 run_tests(tests, has_arg && argv[1][1] == 'v', has_arg && argv[1][1] == 's');
328 return 0;
329 }
330