1 1.1 alnsn /* 2 1.1 alnsn * Stack-less Just-In-Time compiler 3 1.1 alnsn * 4 1.1.1.4 alnsn * Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved. 5 1.1 alnsn * 6 1.1 alnsn * Redistribution and use in source and binary forms, with or without modification, are 7 1.1 alnsn * permitted provided that the following conditions are met: 8 1.1 alnsn * 9 1.1 alnsn * 1. Redistributions of source code must retain the above copyright notice, this list of 10 1.1 alnsn * conditions and the following disclaimer. 11 1.1 alnsn * 12 1.1 alnsn * 2. Redistributions in binary form must reproduce the above copyright notice, this list 13 1.1 alnsn * of conditions and the following disclaimer in the documentation and/or other materials 14 1.1 alnsn * provided with the distribution. 15 1.1 alnsn * 16 1.1 alnsn * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 17 1.1 alnsn * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 1.1 alnsn * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 19 1.1 alnsn * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 1.1 alnsn * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 21 1.1 alnsn * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 1.1 alnsn * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 1.1 alnsn * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 1.1 alnsn * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 1.1 alnsn */ 26 1.1 alnsn 27 1.1.1.2 alnsn /* Must be the first one. Must not depend on any other include. */ 28 1.1.1.4 alnsn #include "sljitLir.h" 29 1.1 alnsn #include "regexJIT.h" 30 1.1 alnsn 31 1.1 alnsn #include <stdio.h> 32 1.1 alnsn 33 1.1.1.2 alnsn #if defined _WIN32 || defined _WIN64 34 1.1.1.2 alnsn #define COLOR_RED 35 1.1.1.2 alnsn #define COLOR_GREEN 36 1.1.1.2 alnsn #define COLOR_ARCH 37 1.1.1.2 alnsn #define COLOR_DEFAULT 38 1.1.1.2 alnsn #else 39 1.1.1.2 alnsn #define COLOR_RED "\33[31m" 40 1.1.1.2 alnsn #define COLOR_GREEN "\33[32m" 41 1.1.1.2 alnsn #define COLOR_ARCH "\33[33m" 42 1.1.1.2 alnsn #define COLOR_DEFAULT "\33[0m" 43 1.1.1.2 alnsn #endif 44 1.1.1.2 alnsn 45 1.1 alnsn #ifdef REGEX_USE_8BIT_CHARS 46 1.1 alnsn #define S(str) str 47 1.1 alnsn #else 48 1.1 alnsn #define S(str) L##str 49 1.1 alnsn #endif 50 1.1 alnsn 51 1.1 alnsn #ifdef REGEX_MATCH_VERBOSE 52 1.1 alnsn void verbose_test(regex_char_t *pattern, regex_char_t *string) 53 1.1 alnsn { 54 1.1 alnsn int error; 55 1.1 alnsn regex_char_t *ptr; 56 1.1 alnsn struct regex_machine* machine; 57 1.1 alnsn struct regex_match* match; 58 1.1 alnsn int begin, end, id; 59 1.1 alnsn 60 1.1 alnsn ptr = pattern; 61 1.1 alnsn while (*ptr) 62 1.1 alnsn ptr++; 63 1.1 alnsn 64 1.1 alnsn printf("Start test '%s' matches to '%s'\n", pattern, string); 65 1.1 alnsn machine = regex_compile(pattern, ptr - pattern, REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error); 66 1.1 alnsn 67 1.1 alnsn if (error) { 68 1.1 alnsn printf("WARNING: Error %d\n", error); 69 1.1 alnsn return; 70 1.1 alnsn } 71 1.1 alnsn if (!machine) { 72 1.1 alnsn printf("ERROR: machine must be exists. Report this bug, please\n"); 73 1.1 alnsn return; 74 1.1 alnsn } 75 1.1 alnsn 76 1.1 alnsn match = regex_begin_match(machine); 77 1.1 alnsn if (!match) { 78 1.1 alnsn printf("WARNING: Not enough memory for matching\n"); 79 1.1 alnsn regex_free_machine(machine); 80 1.1 alnsn return; 81 1.1 alnsn } 82 1.1 alnsn 83 1.1 alnsn ptr = string; 84 1.1 alnsn while (*ptr) 85 1.1 alnsn ptr++; 86 1.1 alnsn 87 1.1 alnsn regex_continue_match_debug(match, string, ptr - string); 88 1.1 alnsn 89 1.1 alnsn begin = regex_get_result(match, &end, &id); 90 1.1 alnsn printf("Math returns: %3d->%3d [%3d]\n", begin, end, id); 91 1.1 alnsn 92 1.1 alnsn regex_free_match(match); 93 1.1 alnsn regex_free_machine(machine); 94 1.1 alnsn } 95 1.1 alnsn #endif 96 1.1 alnsn 97 1.1 alnsn struct test_case { 98 1.1 alnsn int begin; /* Expected begin. */ 99 1.1 alnsn int end; /* Expected end. */ 100 1.1 alnsn int id; /* Expected id. */ 101 1.1 alnsn int finished; /* -1 : don't care, 0 : false, 1 : true. */ 102 1.1 alnsn int flags; /* REGEX_MATCH_* */ 103 1.1 alnsn const regex_char_t *pattern; /* NULL : use the previous pattern. */ 104 1.1 alnsn const regex_char_t *string; /* NULL : end of tests. */ 105 1.1 alnsn }; 106 1.1 alnsn 107 1.1.1.2 alnsn void run_tests(struct test_case* test, int verbose, int silent) 108 1.1 alnsn { 109 1.1 alnsn int error; 110 1.1 alnsn const regex_char_t *ptr; 111 1.1 alnsn struct regex_machine* machine = NULL; 112 1.1 alnsn struct regex_match* match; 113 1.1 alnsn int begin, end, id, finished; 114 1.1 alnsn int success = 0, fail = 0; 115 1.1 alnsn 116 1.1.1.2 alnsn if (!verbose && !silent) 117 1.1.1.2 alnsn printf("Pass -v to enable verbose, -s to disable this hint.\n\n"); 118 1.1.1.2 alnsn 119 1.1 alnsn for ( ; test->string ; test++) { 120 1.1.1.2 alnsn if (verbose) 121 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 122 1.1 alnsn fail++; 123 1.1 alnsn 124 1.1 alnsn if (test->pattern) { 125 1.1 alnsn if (machine) 126 1.1 alnsn regex_free_machine(machine); 127 1.1 alnsn 128 1.1 alnsn ptr = test->pattern; 129 1.1 alnsn while (*ptr) 130 1.1 alnsn ptr++; 131 1.1 alnsn 132 1.1 alnsn machine = regex_compile(test->pattern, ptr - test->pattern, test->flags, &error); 133 1.1 alnsn 134 1.1 alnsn if (error) { 135 1.1.1.2 alnsn if (!verbose) 136 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 137 1.1 alnsn printf("ABORT: Error %d\n", error); 138 1.1 alnsn return; 139 1.1 alnsn } 140 1.1 alnsn if (!machine) { 141 1.1.1.2 alnsn if (!verbose) 142 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 143 1.1 alnsn printf("ABORT: machine must be exists. Report this bug, please\n"); 144 1.1 alnsn return; 145 1.1 alnsn } 146 1.1 alnsn } 147 1.1 alnsn else if (test->flags != 0) { 148 1.1.1.2 alnsn if (!verbose) 149 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 150 1.1 alnsn printf("ABORT: flag must be 0 if no pattern\n"); 151 1.1 alnsn return; 152 1.1 alnsn } 153 1.1 alnsn 154 1.1 alnsn ptr = test->string; 155 1.1 alnsn while (*ptr) 156 1.1 alnsn ptr++; 157 1.1 alnsn 158 1.1 alnsn match = regex_begin_match(machine); 159 1.1 alnsn #ifdef REGEX_MATCH_VERBOSE 160 1.1 alnsn if (!match) { 161 1.1.1.2 alnsn if (!verbose) 162 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 163 1.1 alnsn printf("ABORT: Not enough memory for matching\n"); 164 1.1 alnsn regex_free_machine(machine); 165 1.1 alnsn return; 166 1.1 alnsn } 167 1.1 alnsn regex_continue_match_debug(match, test->string, ptr - test->string); 168 1.1 alnsn begin = regex_get_result(match, &end, &id); 169 1.1 alnsn finished = regex_is_match_finished(match); 170 1.1 alnsn 171 1.1 alnsn if (begin != test->begin || end != test->end || id != test->id) { 172 1.1.1.2 alnsn if (!verbose) 173 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 174 1.1 alnsn printf("FAIL A: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id); 175 1.1 alnsn continue; 176 1.1 alnsn } 177 1.1 alnsn if (test->finished != -1 && test->finished != !!finished) { 178 1.1.1.2 alnsn if (!verbose) 179 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 180 1.1 alnsn printf("FAIL A: finish check\n"); 181 1.1 alnsn continue; 182 1.1 alnsn } 183 1.1 alnsn #endif 184 1.1 alnsn 185 1.1 alnsn regex_reset_match(match); 186 1.1 alnsn regex_continue_match(match, test->string, ptr - test->string); 187 1.1 alnsn begin = regex_get_result(match, &end, &id); 188 1.1 alnsn finished = regex_is_match_finished(match); 189 1.1 alnsn regex_free_match(match); 190 1.1 alnsn 191 1.1 alnsn if (begin != test->begin || end != test->end || id != test->id) { 192 1.1.1.2 alnsn if (!verbose) 193 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 194 1.1 alnsn printf("FAIL B: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id); 195 1.1 alnsn continue; 196 1.1 alnsn } 197 1.1 alnsn if (test->finished != -1 && test->finished != !!finished) { 198 1.1.1.2 alnsn if (!verbose) 199 1.1.1.2 alnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 200 1.1 alnsn printf("FAIL B: finish check\n"); 201 1.1 alnsn continue; 202 1.1 alnsn } 203 1.1 alnsn 204 1.1.1.2 alnsn if (verbose) 205 1.1.1.2 alnsn printf("SUCCESS\n"); 206 1.1 alnsn fail--; 207 1.1 alnsn success++; 208 1.1 alnsn } 209 1.1 alnsn if (machine) 210 1.1 alnsn regex_free_machine(machine); 211 1.1 alnsn 212 1.1.1.3 alnsn printf("REGEX tests: "); 213 1.1 alnsn if (fail == 0) 214 1.1.1.3 alnsn printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " "); 215 1.1 alnsn else 216 1.1.1.3 alnsn printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are failed ", fail, fail * 100 / (success + fail)); 217 1.1.1.3 alnsn printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "\n", regex_get_platform_name()); 218 1.1 alnsn } 219 1.1 alnsn 220 1.1 alnsn /* Testing. */ 221 1.1 alnsn 222 1.1 alnsn static struct test_case tests[] = { 223 1.1 alnsn { 3, 7, 0, -1, 0, 224 1.1 alnsn S("text"), S("is textile") }, 225 1.1 alnsn { 0, 10, 0, -1, 0, 226 1.1 alnsn S("^(ab|c)*?d+(es)?"), S("abccabddeses") }, 227 1.1 alnsn { -1, 0, 0, 1, 0, 228 1.1 alnsn S("^a+"), S("saaaa") }, 229 1.1 alnsn { 3, 6, 0, 0, 0, 230 1.1 alnsn S("(a+|b+)$"), S("saabbb") }, 231 1.1 alnsn { 1, 6, 0, 0, 0, 232 1.1 alnsn S("(a+|b+){,2}$"), S("saabbb") }, 233 1.1 alnsn { 1, 6, 0, 1, 0, 234 1.1 alnsn S("(abcde|bc)(a+*|(b|c){2}+){0}"), S("babcdeaaaaaaaa") }, 235 1.1 alnsn { 1, 6, 0, 1, 0, 236 1.1 alnsn S("(abc(aa)?|(cab+){2})"), S("cabcaa") }, 237 1.1 alnsn { -1, 0, 0, 1, 0, 238 1.1 alnsn S("^(abc(aa)?|(cab+){2})$"), S("cabcaa") }, 239 1.1 alnsn { 0, 3, 1, -1, 0, 240 1.1 alnsn S("^(ab{001!})?c"), S("abcde") }, 241 1.1 alnsn { 1, 15, 2, -1, 0, 242 1.1 alnsn S("(c?(a|bb{2!}){2,3}()+d){2,3}"), S("ccabbadbbadcaadcaad") }, 243 1.1 alnsn { 2, 9, 0, -1, 0, 244 1.1 alnsn NULL, S("cacaadaadaa") }, 245 1.1 alnsn { -1, 0, 0, -1, REGEX_MATCH_BEGIN, 246 1.1 alnsn S("(((ab?c|d{1})))"), S("ad") }, 247 1.1 alnsn { 0, 9, 3, -1, REGEX_MATCH_BEGIN, 248 1.1 alnsn S("^((a{1!}|b{2!}|c{3!}){3,6}d)+"), S("cabadbacddaa") }, 249 1.1 alnsn { 1, 6, 0, 0, REGEX_MATCH_END, 250 1.1 alnsn S("(a+(bb|cc?)?){4,}"), S("maaaac") }, 251 1.1 alnsn { 3, 12, 1, 0, REGEX_MATCH_END, 252 1.1 alnsn S("(x+x+{02,03}(x+|{1!})){03,06}$"), S("aaaxxxxxxxxx") }, 253 1.1 alnsn { 1, 2, 3, -1, 0, 254 1.1 alnsn S("((c{1!})?|x+{2!}|{3!})(a|c)"), S("scs") }, 255 1.1 alnsn { 1, 4, 2, 1, 0, 256 1.1 alnsn NULL, S("sxxaxxxaccacca") }, 257 1.1 alnsn { 0, 2, 1, 1, 0, 258 1.1 alnsn NULL, S("ccdcdcdddddcdccccd") }, 259 1.1 alnsn { 0, 3, 0, -1, REGEX_MATCH_NON_GREEDY, 260 1.1 alnsn S("^a+a+a+"), S("aaaaaa") }, 261 1.1 alnsn { 2, 5, 0, -1, REGEX_MATCH_NON_GREEDY, 262 1.1 alnsn S("a+a+a+"), S("bbaaaaaa") }, 263 1.1 alnsn { 1, 4, 0, 1, 0, 264 1.1 alnsn S("baa|a+"), S("sbaaaaaa") }, 265 1.1 alnsn { 0, 6, 0, 1, 0, 266 1.1 alnsn S("baaa|baa|sbaaaa"), S("sbaaaaa") }, 267 1.1 alnsn { 1, 4, 0, 1, REGEX_MATCH_NON_GREEDY, 268 1.1 alnsn S("baaa|baa"), S("xbaaa") }, 269 1.1 alnsn { 0, 0, 3, 1, 0, 270 1.1 alnsn S("{3!}"), S("xx") }, 271 1.1 alnsn { 0, 0, 1, 1, 0, 272 1.1 alnsn S("{1!}(a{2!})*"), S("xx") }, 273 1.1 alnsn { 0, 2, 2, 0, 0, 274 1.1 alnsn NULL, S("aa") }, 275 1.1 alnsn { 0, 0, 1, 1, REGEX_MATCH_NON_GREEDY, 276 1.1 alnsn S("{1!}(a{2!})*"), S("aaxx") }, 277 1.1 alnsn { 4, 12, 0, 1, 0, 278 1.1 alnsn S("(.[]-]){3}[^]-]{2}"), S("ax-xs-[][]lmn") }, 279 1.1 alnsn { 3, 7, 1, 1, 0, 280 1.1 alnsn S("([ABC]|[abc]{1!}){3,5}"), S("AbSAabbx") }, 281 1.1 alnsn { 0, 8, 3, 0, 0, 282 1.1 alnsn S("^[x\\-y[\\]]+([[\\]]{3!})*$"), S("x-y[-][]") }, 283 1.1 alnsn { 0, 9, 0, 0, 0, 284 1.1 alnsn NULL, S("x-y[-][]x") }, 285 1.1 alnsn { 2, 8, 0, 1, 0, 286 1.1 alnsn S("<(/{1!})?[^>]+>"), S(" <html></html> ") }, 287 1.1 alnsn { 2, 9, 1, 1, 0, 288 1.1 alnsn NULL, S(" </html><html> ") }, 289 1.1 alnsn { 2, 9, 0, 1, 0, 290 1.1 alnsn S("[A-Z0-9a-z]+"), S("[(Iden9aA)]") }, 291 1.1 alnsn { 1, 4, 0, 1, 0, 292 1.1 alnsn S("[^x-y]+[a-c_]{2,3}"), S("x_a_y") }, 293 1.1 alnsn { 4, 11, 0, 0, 0, 294 1.1 alnsn NULL, S("ssaymmaa_ccl") }, 295 1.1 alnsn { 3, 6, 0, 1, REGEX_NEWLINE, 296 1.1 alnsn S(".a[^k]"), S("\na\nxa\ns") }, 297 1.1 alnsn { 0, 2, 0, 1, REGEX_NEWLINE, 298 1.1 alnsn S("^a+"), S("aa\n") }, 299 1.1 alnsn { 1, 4, 0, 1, 0 /* =REGEX_NEWLINE */, 300 1.1 alnsn NULL, S("\naaa\n") }, 301 1.1 alnsn { 2, 3, 0, 1, 0 /* =REGEX_NEWLINE */, 302 1.1 alnsn NULL, S("\n\na\n") }, 303 1.1 alnsn { 0, 2, 0, 1, REGEX_NEWLINE, 304 1.1 alnsn S("a+$"), S("aa\n") }, 305 1.1 alnsn { 0, 3, 0, 0, 0 /* =REGEX_NEWLINE */, 306 1.1 alnsn NULL, S("aaa") }, 307 1.1 alnsn { 2, 4, 1, 1, REGEX_NEWLINE, 308 1.1 alnsn S("^a(a{1!})*$"), S("\n\naa\n\n") }, 309 1.1 alnsn { 0, 1, 0, 0, 0 /* REGEX_NEWLINE */, 310 1.1 alnsn NULL, S("a") }, 311 1.1 alnsn { -1, 0, 0, -1, 0 /* REGEX_NEWLINE */, 312 1.1 alnsn NULL, S("ab\nba") }, 313 1.1 alnsn { -1, 0, 0, 0, 0, 314 1.1 alnsn NULL, NULL } 315 1.1 alnsn }; 316 1.1 alnsn 317 1.1 alnsn int main(int argc, char* argv[]) 318 1.1 alnsn { 319 1.1.1.2 alnsn int has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0'); 320 1.1.1.2 alnsn 321 1.1 alnsn /* verbose_test("a((b)((c|d))|)c|"); */ 322 1.1 alnsn /* verbose_test("Xa{009,0010}Xb{,7}Xc{5,}Xd{,}Xe{1,}Xf{,1}X"); */ 323 1.1 alnsn /* verbose_test("{3!}({3})({0!}){,"); */ 324 1.1 alnsn /* verbose_test("(s(ab){2,4}t){2,}*S(a*(b)(c()|)d+){3,4}{0,0}*M"); */ 325 1.1 alnsn /* verbose_test("^a({2!})*b+(a|{1!}b)+d$"); */ 326 1.1 alnsn /* verbose_test("((a|b|c)*(xy)+)+", "asbcxyxy"); */ 327 1.1 alnsn 328 1.1.1.2 alnsn run_tests(tests, has_arg && argv[1][1] == 'v', has_arg && argv[1][1] == 's'); 329 1.1.1.4 alnsn 330 1.1.1.4 alnsn sljit_free_unused_memory_exec(); 331 1.1.1.4 alnsn 332 1.1 alnsn return 0; 333 1.1 alnsn } 334