regexMain.c revision 1.1.1.4 1 1.1 alnsn /*
2 1.1 alnsn * Stack-less Just-In-Time compiler
3 1.1 alnsn *
4 1.1.1.4 alnsn * Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
5 1.1 alnsn *
6 1.1 alnsn * Redistribution and use in source and binary forms, with or without modification, are
7 1.1 alnsn * permitted provided that the following conditions are met:
8 1.1 alnsn *
9 1.1 alnsn * 1. Redistributions of source code must retain the above copyright notice, this list of
10 1.1 alnsn * conditions and the following disclaimer.
11 1.1 alnsn *
12 1.1 alnsn * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 1.1 alnsn * of conditions and the following disclaimer in the documentation and/or other materials
14 1.1 alnsn * provided with the distribution.
15 1.1 alnsn *
16 1.1 alnsn * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 1.1 alnsn * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 1.1 alnsn * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 1.1 alnsn * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 1.1 alnsn * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 1.1 alnsn * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 1.1 alnsn * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 1.1 alnsn * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 1.1 alnsn * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 1.1 alnsn */
26 1.1 alnsn
27 1.1.1.2 alnsn /* Must be the first one. Must not depend on any other include. */
28 1.1.1.4 alnsn #include "sljitLir.h"
29 1.1 alnsn #include "regexJIT.h"
30 1.1 alnsn
31 1.1 alnsn #include <stdio.h>
32 1.1 alnsn
33 1.1.1.2 alnsn #if defined _WIN32 || defined _WIN64
34 1.1.1.2 alnsn #define COLOR_RED
35 1.1.1.2 alnsn #define COLOR_GREEN
36 1.1.1.2 alnsn #define COLOR_ARCH
37 1.1.1.2 alnsn #define COLOR_DEFAULT
38 1.1.1.2 alnsn #else
39 1.1.1.2 alnsn #define COLOR_RED "\33[31m"
40 1.1.1.2 alnsn #define COLOR_GREEN "\33[32m"
41 1.1.1.2 alnsn #define COLOR_ARCH "\33[33m"
42 1.1.1.2 alnsn #define COLOR_DEFAULT "\33[0m"
43 1.1.1.2 alnsn #endif
44 1.1.1.2 alnsn
45 1.1 alnsn #ifdef REGEX_USE_8BIT_CHARS
46 1.1 alnsn #define S(str) str
47 1.1 alnsn #else
48 1.1 alnsn #define S(str) L##str
49 1.1 alnsn #endif
50 1.1 alnsn
51 1.1 alnsn #ifdef REGEX_MATCH_VERBOSE
52 1.1 alnsn void verbose_test(regex_char_t *pattern, regex_char_t *string)
53 1.1 alnsn {
54 1.1 alnsn int error;
55 1.1 alnsn regex_char_t *ptr;
56 1.1 alnsn struct regex_machine* machine;
57 1.1 alnsn struct regex_match* match;
58 1.1 alnsn int begin, end, id;
59 1.1 alnsn
60 1.1 alnsn ptr = pattern;
61 1.1 alnsn while (*ptr)
62 1.1 alnsn ptr++;
63 1.1 alnsn
64 1.1 alnsn printf("Start test '%s' matches to '%s'\n", pattern, string);
65 1.1 alnsn machine = regex_compile(pattern, ptr - pattern, REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error);
66 1.1 alnsn
67 1.1 alnsn if (error) {
68 1.1 alnsn printf("WARNING: Error %d\n", error);
69 1.1 alnsn return;
70 1.1 alnsn }
71 1.1 alnsn if (!machine) {
72 1.1 alnsn printf("ERROR: machine must be exists. Report this bug, please\n");
73 1.1 alnsn return;
74 1.1 alnsn }
75 1.1 alnsn
76 1.1 alnsn match = regex_begin_match(machine);
77 1.1 alnsn if (!match) {
78 1.1 alnsn printf("WARNING: Not enough memory for matching\n");
79 1.1 alnsn regex_free_machine(machine);
80 1.1 alnsn return;
81 1.1 alnsn }
82 1.1 alnsn
83 1.1 alnsn ptr = string;
84 1.1 alnsn while (*ptr)
85 1.1 alnsn ptr++;
86 1.1 alnsn
87 1.1 alnsn regex_continue_match_debug(match, string, ptr - string);
88 1.1 alnsn
89 1.1 alnsn begin = regex_get_result(match, &end, &id);
90 1.1 alnsn printf("Math returns: %3d->%3d [%3d]\n", begin, end, id);
91 1.1 alnsn
92 1.1 alnsn regex_free_match(match);
93 1.1 alnsn regex_free_machine(machine);
94 1.1 alnsn }
95 1.1 alnsn #endif
96 1.1 alnsn
97 1.1 alnsn struct test_case {
98 1.1 alnsn int begin; /* Expected begin. */
99 1.1 alnsn int end; /* Expected end. */
100 1.1 alnsn int id; /* Expected id. */
101 1.1 alnsn int finished; /* -1 : don't care, 0 : false, 1 : true. */
102 1.1 alnsn int flags; /* REGEX_MATCH_* */
103 1.1 alnsn const regex_char_t *pattern; /* NULL : use the previous pattern. */
104 1.1 alnsn const regex_char_t *string; /* NULL : end of tests. */
105 1.1 alnsn };
106 1.1 alnsn
107 1.1.1.2 alnsn void run_tests(struct test_case* test, int verbose, int silent)
108 1.1 alnsn {
109 1.1 alnsn int error;
110 1.1 alnsn const regex_char_t *ptr;
111 1.1 alnsn struct regex_machine* machine = NULL;
112 1.1 alnsn struct regex_match* match;
113 1.1 alnsn int begin, end, id, finished;
114 1.1 alnsn int success = 0, fail = 0;
115 1.1 alnsn
116 1.1.1.2 alnsn if (!verbose && !silent)
117 1.1.1.2 alnsn printf("Pass -v to enable verbose, -s to disable this hint.\n\n");
118 1.1.1.2 alnsn
119 1.1 alnsn for ( ; test->string ; test++) {
120 1.1.1.2 alnsn if (verbose)
121 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
122 1.1 alnsn fail++;
123 1.1 alnsn
124 1.1 alnsn if (test->pattern) {
125 1.1 alnsn if (machine)
126 1.1 alnsn regex_free_machine(machine);
127 1.1 alnsn
128 1.1 alnsn ptr = test->pattern;
129 1.1 alnsn while (*ptr)
130 1.1 alnsn ptr++;
131 1.1 alnsn
132 1.1 alnsn machine = regex_compile(test->pattern, ptr - test->pattern, test->flags, &error);
133 1.1 alnsn
134 1.1 alnsn if (error) {
135 1.1.1.2 alnsn if (!verbose)
136 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
137 1.1 alnsn printf("ABORT: Error %d\n", error);
138 1.1 alnsn return;
139 1.1 alnsn }
140 1.1 alnsn if (!machine) {
141 1.1.1.2 alnsn if (!verbose)
142 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
143 1.1 alnsn printf("ABORT: machine must be exists. Report this bug, please\n");
144 1.1 alnsn return;
145 1.1 alnsn }
146 1.1 alnsn }
147 1.1 alnsn else if (test->flags != 0) {
148 1.1.1.2 alnsn if (!verbose)
149 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
150 1.1 alnsn printf("ABORT: flag must be 0 if no pattern\n");
151 1.1 alnsn return;
152 1.1 alnsn }
153 1.1 alnsn
154 1.1 alnsn ptr = test->string;
155 1.1 alnsn while (*ptr)
156 1.1 alnsn ptr++;
157 1.1 alnsn
158 1.1 alnsn match = regex_begin_match(machine);
159 1.1 alnsn #ifdef REGEX_MATCH_VERBOSE
160 1.1 alnsn if (!match) {
161 1.1.1.2 alnsn if (!verbose)
162 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
163 1.1 alnsn printf("ABORT: Not enough memory for matching\n");
164 1.1 alnsn regex_free_machine(machine);
165 1.1 alnsn return;
166 1.1 alnsn }
167 1.1 alnsn regex_continue_match_debug(match, test->string, ptr - test->string);
168 1.1 alnsn begin = regex_get_result(match, &end, &id);
169 1.1 alnsn finished = regex_is_match_finished(match);
170 1.1 alnsn
171 1.1 alnsn if (begin != test->begin || end != test->end || id != test->id) {
172 1.1.1.2 alnsn if (!verbose)
173 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
174 1.1 alnsn printf("FAIL A: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
175 1.1 alnsn continue;
176 1.1 alnsn }
177 1.1 alnsn if (test->finished != -1 && test->finished != !!finished) {
178 1.1.1.2 alnsn if (!verbose)
179 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
180 1.1 alnsn printf("FAIL A: finish check\n");
181 1.1 alnsn continue;
182 1.1 alnsn }
183 1.1 alnsn #endif
184 1.1 alnsn
185 1.1 alnsn regex_reset_match(match);
186 1.1 alnsn regex_continue_match(match, test->string, ptr - test->string);
187 1.1 alnsn begin = regex_get_result(match, &end, &id);
188 1.1 alnsn finished = regex_is_match_finished(match);
189 1.1 alnsn regex_free_match(match);
190 1.1 alnsn
191 1.1 alnsn if (begin != test->begin || end != test->end || id != test->id) {
192 1.1.1.2 alnsn if (!verbose)
193 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
194 1.1 alnsn printf("FAIL B: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
195 1.1 alnsn continue;
196 1.1 alnsn }
197 1.1 alnsn if (test->finished != -1 && test->finished != !!finished) {
198 1.1.1.2 alnsn if (!verbose)
199 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
200 1.1 alnsn printf("FAIL B: finish check\n");
201 1.1 alnsn continue;
202 1.1 alnsn }
203 1.1 alnsn
204 1.1.1.2 alnsn if (verbose)
205 1.1.1.2 alnsn printf("SUCCESS\n");
206 1.1 alnsn fail--;
207 1.1 alnsn success++;
208 1.1 alnsn }
209 1.1 alnsn if (machine)
210 1.1 alnsn regex_free_machine(machine);
211 1.1 alnsn
212 1.1.1.3 alnsn printf("REGEX tests: ");
213 1.1 alnsn if (fail == 0)
214 1.1.1.3 alnsn printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " ");
215 1.1 alnsn else
216 1.1.1.3 alnsn printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are failed ", fail, fail * 100 / (success + fail));
217 1.1.1.3 alnsn printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "\n", regex_get_platform_name());
218 1.1 alnsn }
219 1.1 alnsn
220 1.1 alnsn /* Testing. */
221 1.1 alnsn
222 1.1 alnsn static struct test_case tests[] = {
223 1.1 alnsn { 3, 7, 0, -1, 0,
224 1.1 alnsn S("text"), S("is textile") },
225 1.1 alnsn { 0, 10, 0, -1, 0,
226 1.1 alnsn S("^(ab|c)*?d+(es)?"), S("abccabddeses") },
227 1.1 alnsn { -1, 0, 0, 1, 0,
228 1.1 alnsn S("^a+"), S("saaaa") },
229 1.1 alnsn { 3, 6, 0, 0, 0,
230 1.1 alnsn S("(a+|b+)$"), S("saabbb") },
231 1.1 alnsn { 1, 6, 0, 0, 0,
232 1.1 alnsn S("(a+|b+){,2}$"), S("saabbb") },
233 1.1 alnsn { 1, 6, 0, 1, 0,
234 1.1 alnsn S("(abcde|bc)(a+*|(b|c){2}+){0}"), S("babcdeaaaaaaaa") },
235 1.1 alnsn { 1, 6, 0, 1, 0,
236 1.1 alnsn S("(abc(aa)?|(cab+){2})"), S("cabcaa") },
237 1.1 alnsn { -1, 0, 0, 1, 0,
238 1.1 alnsn S("^(abc(aa)?|(cab+){2})$"), S("cabcaa") },
239 1.1 alnsn { 0, 3, 1, -1, 0,
240 1.1 alnsn S("^(ab{001!})?c"), S("abcde") },
241 1.1 alnsn { 1, 15, 2, -1, 0,
242 1.1 alnsn S("(c?(a|bb{2!}){2,3}()+d){2,3}"), S("ccabbadbbadcaadcaad") },
243 1.1 alnsn { 2, 9, 0, -1, 0,
244 1.1 alnsn NULL, S("cacaadaadaa") },
245 1.1 alnsn { -1, 0, 0, -1, REGEX_MATCH_BEGIN,
246 1.1 alnsn S("(((ab?c|d{1})))"), S("ad") },
247 1.1 alnsn { 0, 9, 3, -1, REGEX_MATCH_BEGIN,
248 1.1 alnsn S("^((a{1!}|b{2!}|c{3!}){3,6}d)+"), S("cabadbacddaa") },
249 1.1 alnsn { 1, 6, 0, 0, REGEX_MATCH_END,
250 1.1 alnsn S("(a+(bb|cc?)?){4,}"), S("maaaac") },
251 1.1 alnsn { 3, 12, 1, 0, REGEX_MATCH_END,
252 1.1 alnsn S("(x+x+{02,03}(x+|{1!})){03,06}$"), S("aaaxxxxxxxxx") },
253 1.1 alnsn { 1, 2, 3, -1, 0,
254 1.1 alnsn S("((c{1!})?|x+{2!}|{3!})(a|c)"), S("scs") },
255 1.1 alnsn { 1, 4, 2, 1, 0,
256 1.1 alnsn NULL, S("sxxaxxxaccacca") },
257 1.1 alnsn { 0, 2, 1, 1, 0,
258 1.1 alnsn NULL, S("ccdcdcdddddcdccccd") },
259 1.1 alnsn { 0, 3, 0, -1, REGEX_MATCH_NON_GREEDY,
260 1.1 alnsn S("^a+a+a+"), S("aaaaaa") },
261 1.1 alnsn { 2, 5, 0, -1, REGEX_MATCH_NON_GREEDY,
262 1.1 alnsn S("a+a+a+"), S("bbaaaaaa") },
263 1.1 alnsn { 1, 4, 0, 1, 0,
264 1.1 alnsn S("baa|a+"), S("sbaaaaaa") },
265 1.1 alnsn { 0, 6, 0, 1, 0,
266 1.1 alnsn S("baaa|baa|sbaaaa"), S("sbaaaaa") },
267 1.1 alnsn { 1, 4, 0, 1, REGEX_MATCH_NON_GREEDY,
268 1.1 alnsn S("baaa|baa"), S("xbaaa") },
269 1.1 alnsn { 0, 0, 3, 1, 0,
270 1.1 alnsn S("{3!}"), S("xx") },
271 1.1 alnsn { 0, 0, 1, 1, 0,
272 1.1 alnsn S("{1!}(a{2!})*"), S("xx") },
273 1.1 alnsn { 0, 2, 2, 0, 0,
274 1.1 alnsn NULL, S("aa") },
275 1.1 alnsn { 0, 0, 1, 1, REGEX_MATCH_NON_GREEDY,
276 1.1 alnsn S("{1!}(a{2!})*"), S("aaxx") },
277 1.1 alnsn { 4, 12, 0, 1, 0,
278 1.1 alnsn S("(.[]-]){3}[^]-]{2}"), S("ax-xs-[][]lmn") },
279 1.1 alnsn { 3, 7, 1, 1, 0,
280 1.1 alnsn S("([ABC]|[abc]{1!}){3,5}"), S("AbSAabbx") },
281 1.1 alnsn { 0, 8, 3, 0, 0,
282 1.1 alnsn S("^[x\\-y[\\]]+([[\\]]{3!})*$"), S("x-y[-][]") },
283 1.1 alnsn { 0, 9, 0, 0, 0,
284 1.1 alnsn NULL, S("x-y[-][]x") },
285 1.1 alnsn { 2, 8, 0, 1, 0,
286 1.1 alnsn S("<(/{1!})?[^>]+>"), S(" <html></html> ") },
287 1.1 alnsn { 2, 9, 1, 1, 0,
288 1.1 alnsn NULL, S(" </html><html> ") },
289 1.1 alnsn { 2, 9, 0, 1, 0,
290 1.1 alnsn S("[A-Z0-9a-z]+"), S("[(Iden9aA)]") },
291 1.1 alnsn { 1, 4, 0, 1, 0,
292 1.1 alnsn S("[^x-y]+[a-c_]{2,3}"), S("x_a_y") },
293 1.1 alnsn { 4, 11, 0, 0, 0,
294 1.1 alnsn NULL, S("ssaymmaa_ccl") },
295 1.1 alnsn { 3, 6, 0, 1, REGEX_NEWLINE,
296 1.1 alnsn S(".a[^k]"), S("\na\nxa\ns") },
297 1.1 alnsn { 0, 2, 0, 1, REGEX_NEWLINE,
298 1.1 alnsn S("^a+"), S("aa\n") },
299 1.1 alnsn { 1, 4, 0, 1, 0 /* =REGEX_NEWLINE */,
300 1.1 alnsn NULL, S("\naaa\n") },
301 1.1 alnsn { 2, 3, 0, 1, 0 /* =REGEX_NEWLINE */,
302 1.1 alnsn NULL, S("\n\na\n") },
303 1.1 alnsn { 0, 2, 0, 1, REGEX_NEWLINE,
304 1.1 alnsn S("a+$"), S("aa\n") },
305 1.1 alnsn { 0, 3, 0, 0, 0 /* =REGEX_NEWLINE */,
306 1.1 alnsn NULL, S("aaa") },
307 1.1 alnsn { 2, 4, 1, 1, REGEX_NEWLINE,
308 1.1 alnsn S("^a(a{1!})*$"), S("\n\naa\n\n") },
309 1.1 alnsn { 0, 1, 0, 0, 0 /* REGEX_NEWLINE */,
310 1.1 alnsn NULL, S("a") },
311 1.1 alnsn { -1, 0, 0, -1, 0 /* REGEX_NEWLINE */,
312 1.1 alnsn NULL, S("ab\nba") },
313 1.1 alnsn { -1, 0, 0, 0, 0,
314 1.1 alnsn NULL, NULL }
315 1.1 alnsn };
316 1.1 alnsn
317 1.1 alnsn int main(int argc, char* argv[])
318 1.1 alnsn {
319 1.1.1.2 alnsn int has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0');
320 1.1.1.2 alnsn
321 1.1 alnsn /* verbose_test("a((b)((c|d))|)c|"); */
322 1.1 alnsn /* verbose_test("Xa{009,0010}Xb{,7}Xc{5,}Xd{,}Xe{1,}Xf{,1}X"); */
323 1.1 alnsn /* verbose_test("{3!}({3})({0!}){,"); */
324 1.1 alnsn /* verbose_test("(s(ab){2,4}t){2,}*S(a*(b)(c()|)d+){3,4}{0,0}*M"); */
325 1.1 alnsn /* verbose_test("^a({2!})*b+(a|{1!}b)+d$"); */
326 1.1 alnsn /* verbose_test("((a|b|c)*(xy)+)+", "asbcxyxy"); */
327 1.1 alnsn
328 1.1.1.2 alnsn run_tests(tests, has_arg && argv[1][1] == 'v', has_arg && argv[1][1] == 's');
329 1.1.1.4 alnsn
330 1.1.1.4 alnsn sljit_free_unused_memory_exec();
331 1.1.1.4 alnsn
332 1.1 alnsn return 0;
333 1.1 alnsn }
334