Home | History | Annotate | Line # | Download | only in regex_src
regexMain.c revision 1.1.1.2.4.1
      1          1.1  alnsn /*
      2          1.1  alnsn  *    Stack-less Just-In-Time compiler
      3          1.1  alnsn  *
      4          1.1  alnsn  *    Copyright 2009-2010 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      5          1.1  alnsn  *
      6          1.1  alnsn  * Redistribution and use in source and binary forms, with or without modification, are
      7          1.1  alnsn  * permitted provided that the following conditions are met:
      8          1.1  alnsn  *
      9          1.1  alnsn  *   1. Redistributions of source code must retain the above copyright notice, this list of
     10          1.1  alnsn  *      conditions and the following disclaimer.
     11          1.1  alnsn  *
     12          1.1  alnsn  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     13          1.1  alnsn  *      of conditions and the following disclaimer in the documentation and/or other materials
     14          1.1  alnsn  *      provided with the distribution.
     15          1.1  alnsn  *
     16          1.1  alnsn  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     17          1.1  alnsn  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18          1.1  alnsn  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     19          1.1  alnsn  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20          1.1  alnsn  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     21          1.1  alnsn  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     22          1.1  alnsn  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23          1.1  alnsn  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     24          1.1  alnsn  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25          1.1  alnsn  */
     26          1.1  alnsn 
     27      1.1.1.2  alnsn /* Must be the first one. Must not depend on any other include. */
     28          1.1  alnsn #include "regexJIT.h"
     29          1.1  alnsn 
     30          1.1  alnsn #include <stdio.h>
     31          1.1  alnsn 
     32      1.1.1.2  alnsn #if defined _WIN32 || defined _WIN64
     33      1.1.1.2  alnsn #define COLOR_RED
     34      1.1.1.2  alnsn #define COLOR_GREEN
     35      1.1.1.2  alnsn #define COLOR_ARCH
     36      1.1.1.2  alnsn #define COLOR_DEFAULT
     37      1.1.1.2  alnsn #else
     38      1.1.1.2  alnsn #define COLOR_RED "\33[31m"
     39      1.1.1.2  alnsn #define COLOR_GREEN "\33[32m"
     40      1.1.1.2  alnsn #define COLOR_ARCH "\33[33m"
     41      1.1.1.2  alnsn #define COLOR_DEFAULT "\33[0m"
     42      1.1.1.2  alnsn #endif
     43      1.1.1.2  alnsn 
     44          1.1  alnsn #ifdef REGEX_USE_8BIT_CHARS
     45          1.1  alnsn #define S(str)	str
     46          1.1  alnsn #else
     47          1.1  alnsn #define S(str)	L##str
     48          1.1  alnsn #endif
     49          1.1  alnsn 
     50          1.1  alnsn #ifdef REGEX_MATCH_VERBOSE
     51          1.1  alnsn void verbose_test(regex_char_t *pattern, regex_char_t *string)
     52          1.1  alnsn {
     53          1.1  alnsn 	int error;
     54          1.1  alnsn 	regex_char_t *ptr;
     55          1.1  alnsn 	struct regex_machine* machine;
     56          1.1  alnsn 	struct regex_match* match;
     57          1.1  alnsn 	int begin, end, id;
     58          1.1  alnsn 
     59          1.1  alnsn 	ptr = pattern;
     60          1.1  alnsn 	while (*ptr)
     61          1.1  alnsn 		ptr++;
     62          1.1  alnsn 
     63          1.1  alnsn 	printf("Start test '%s' matches to '%s'\n", pattern, string);
     64          1.1  alnsn 	machine = regex_compile(pattern, ptr - pattern, REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error);
     65          1.1  alnsn 
     66          1.1  alnsn 	if (error) {
     67          1.1  alnsn 		printf("WARNING: Error %d\n", error);
     68          1.1  alnsn 		return;
     69          1.1  alnsn 	}
     70          1.1  alnsn 	if (!machine) {
     71          1.1  alnsn 		printf("ERROR: machine must be exists. Report this bug, please\n");
     72          1.1  alnsn 		return;
     73          1.1  alnsn 	}
     74          1.1  alnsn 
     75          1.1  alnsn 	match = regex_begin_match(machine);
     76          1.1  alnsn 	if (!match) {
     77          1.1  alnsn 		printf("WARNING: Not enough memory for matching\n");
     78          1.1  alnsn 		regex_free_machine(machine);
     79          1.1  alnsn 		return;
     80          1.1  alnsn 	}
     81          1.1  alnsn 
     82          1.1  alnsn 	ptr = string;
     83          1.1  alnsn 	while (*ptr)
     84          1.1  alnsn 		ptr++;
     85          1.1  alnsn 
     86          1.1  alnsn 	regex_continue_match_debug(match, string, ptr - string);
     87          1.1  alnsn 
     88          1.1  alnsn 	begin = regex_get_result(match, &end, &id);
     89          1.1  alnsn 	printf("Math returns: %3d->%3d [%3d]\n", begin, end, id);
     90          1.1  alnsn 
     91          1.1  alnsn 	regex_free_match(match);
     92          1.1  alnsn 	regex_free_machine(machine);
     93          1.1  alnsn }
     94          1.1  alnsn #endif
     95          1.1  alnsn 
     96          1.1  alnsn struct test_case {
     97          1.1  alnsn 	int begin;	/* Expected begin. */
     98          1.1  alnsn 	int end;	/* Expected end. */
     99          1.1  alnsn 	int id;		/* Expected id. */
    100          1.1  alnsn 	int finished;	/* -1 : don't care, 0 : false, 1 : true. */
    101          1.1  alnsn 	int flags;	/* REGEX_MATCH_* */
    102          1.1  alnsn 	const regex_char_t *pattern;	/* NULL : use the previous pattern. */
    103          1.1  alnsn 	const regex_char_t *string;	/* NULL : end of tests. */
    104          1.1  alnsn };
    105          1.1  alnsn 
    106      1.1.1.2  alnsn void run_tests(struct test_case* test, int verbose, int silent)
    107          1.1  alnsn {
    108          1.1  alnsn 	int error;
    109          1.1  alnsn 	const regex_char_t *ptr;
    110          1.1  alnsn 	struct regex_machine* machine = NULL;
    111          1.1  alnsn 	struct regex_match* match;
    112          1.1  alnsn 	int begin, end, id, finished;
    113          1.1  alnsn 	int success = 0, fail = 0;
    114          1.1  alnsn 
    115      1.1.1.2  alnsn 	if (!verbose && !silent)
    116      1.1.1.2  alnsn 		printf("Pass -v to enable verbose, -s to disable this hint.\n\n");
    117      1.1.1.2  alnsn 
    118          1.1  alnsn 	for ( ; test->string ; test++) {
    119      1.1.1.2  alnsn 		if (verbose)
    120      1.1.1.2  alnsn 			printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    121          1.1  alnsn 		fail++;
    122          1.1  alnsn 
    123          1.1  alnsn 		if (test->pattern) {
    124          1.1  alnsn 			if (machine)
    125          1.1  alnsn 				regex_free_machine(machine);
    126          1.1  alnsn 
    127          1.1  alnsn 			ptr = test->pattern;
    128          1.1  alnsn 			while (*ptr)
    129          1.1  alnsn 				ptr++;
    130          1.1  alnsn 
    131          1.1  alnsn 			machine = regex_compile(test->pattern, ptr - test->pattern, test->flags, &error);
    132          1.1  alnsn 
    133          1.1  alnsn 			if (error) {
    134      1.1.1.2  alnsn 				if (!verbose)
    135      1.1.1.2  alnsn 					printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    136          1.1  alnsn 				printf("ABORT: Error %d\n", error);
    137          1.1  alnsn 				return;
    138          1.1  alnsn 			}
    139          1.1  alnsn 			if (!machine) {
    140      1.1.1.2  alnsn 				if (!verbose)
    141      1.1.1.2  alnsn 					printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    142          1.1  alnsn 				printf("ABORT: machine must be exists. Report this bug, please\n");
    143          1.1  alnsn 				return;
    144          1.1  alnsn 			}
    145          1.1  alnsn 		}
    146          1.1  alnsn 		else if (test->flags != 0) {
    147      1.1.1.2  alnsn 			if (!verbose)
    148      1.1.1.2  alnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    149          1.1  alnsn 			printf("ABORT: flag must be 0 if no pattern\n");
    150          1.1  alnsn 			return;
    151          1.1  alnsn 		}
    152          1.1  alnsn 
    153          1.1  alnsn 		ptr = test->string;
    154          1.1  alnsn 		while (*ptr)
    155          1.1  alnsn 			ptr++;
    156          1.1  alnsn 
    157          1.1  alnsn 		match = regex_begin_match(machine);
    158          1.1  alnsn #ifdef REGEX_MATCH_VERBOSE
    159          1.1  alnsn 		if (!match) {
    160      1.1.1.2  alnsn 			if (!verbose)
    161      1.1.1.2  alnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    162          1.1  alnsn 			printf("ABORT: Not enough memory for matching\n");
    163          1.1  alnsn 			regex_free_machine(machine);
    164          1.1  alnsn 			return;
    165          1.1  alnsn 		}
    166          1.1  alnsn 		regex_continue_match_debug(match, test->string, ptr - test->string);
    167          1.1  alnsn 		begin = regex_get_result(match, &end, &id);
    168          1.1  alnsn 		finished = regex_is_match_finished(match);
    169          1.1  alnsn 
    170          1.1  alnsn 		if (begin != test->begin || end != test->end || id != test->id) {
    171      1.1.1.2  alnsn 			if (!verbose)
    172      1.1.1.2  alnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    173          1.1  alnsn 			printf("FAIL A: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
    174          1.1  alnsn 			continue;
    175          1.1  alnsn 		}
    176          1.1  alnsn 		if (test->finished != -1 && test->finished != !!finished) {
    177      1.1.1.2  alnsn 			if (!verbose)
    178      1.1.1.2  alnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    179          1.1  alnsn 			printf("FAIL A: finish check\n");
    180          1.1  alnsn 			continue;
    181          1.1  alnsn 		}
    182          1.1  alnsn #endif
    183          1.1  alnsn 
    184          1.1  alnsn 		regex_reset_match(match);
    185          1.1  alnsn 		regex_continue_match(match, test->string, ptr - test->string);
    186          1.1  alnsn 		begin = regex_get_result(match, &end, &id);
    187          1.1  alnsn 		finished = regex_is_match_finished(match);
    188          1.1  alnsn 		regex_free_match(match);
    189          1.1  alnsn 
    190          1.1  alnsn 		if (begin != test->begin || end != test->end || id != test->id) {
    191      1.1.1.2  alnsn 			if (!verbose)
    192      1.1.1.2  alnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    193          1.1  alnsn 			printf("FAIL B: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
    194          1.1  alnsn 			continue;
    195          1.1  alnsn 		}
    196          1.1  alnsn 		if (test->finished != -1 && test->finished != !!finished) {
    197      1.1.1.2  alnsn 			if (!verbose)
    198      1.1.1.2  alnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    199          1.1  alnsn 			printf("FAIL B: finish check\n");
    200          1.1  alnsn 			continue;
    201          1.1  alnsn 		}
    202          1.1  alnsn 
    203      1.1.1.2  alnsn 		if (verbose)
    204      1.1.1.2  alnsn 			printf("SUCCESS\n");
    205          1.1  alnsn 		fail--;
    206          1.1  alnsn 		success++;
    207          1.1  alnsn 	}
    208          1.1  alnsn 	if (machine)
    209          1.1  alnsn 		regex_free_machine(machine);
    210          1.1  alnsn 
    211  1.1.1.2.4.1  skrll 	printf("REGEX tests: ");
    212          1.1  alnsn 	if (fail == 0)
    213  1.1.1.2.4.1  skrll 		printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " ");
    214          1.1  alnsn 	else
    215  1.1.1.2.4.1  skrll 		printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are failed ", fail, fail * 100 / (success + fail));
    216  1.1.1.2.4.1  skrll 	printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "\n", regex_get_platform_name());
    217          1.1  alnsn }
    218          1.1  alnsn 
    219          1.1  alnsn /* Testing. */
    220          1.1  alnsn 
    221          1.1  alnsn static struct test_case tests[] = {
    222          1.1  alnsn { 3, 7, 0, -1, 0,
    223          1.1  alnsn   S("text"), S("is textile") },
    224          1.1  alnsn { 0, 10, 0, -1, 0,
    225          1.1  alnsn   S("^(ab|c)*?d+(es)?"), S("abccabddeses") },
    226          1.1  alnsn { -1, 0, 0, 1, 0,
    227          1.1  alnsn   S("^a+"), S("saaaa") },
    228          1.1  alnsn { 3, 6, 0, 0, 0,
    229          1.1  alnsn   S("(a+|b+)$"), S("saabbb") },
    230          1.1  alnsn { 1, 6, 0, 0, 0,
    231          1.1  alnsn   S("(a+|b+){,2}$"), S("saabbb") },
    232          1.1  alnsn { 1, 6, 0, 1, 0,
    233          1.1  alnsn   S("(abcde|bc)(a+*|(b|c){2}+){0}"), S("babcdeaaaaaaaa") },
    234          1.1  alnsn { 1, 6, 0, 1, 0,
    235          1.1  alnsn   S("(abc(aa)?|(cab+){2})"), S("cabcaa") },
    236          1.1  alnsn { -1, 0, 0, 1, 0,
    237          1.1  alnsn   S("^(abc(aa)?|(cab+){2})$"), S("cabcaa") },
    238          1.1  alnsn { 0, 3, 1, -1, 0,
    239          1.1  alnsn   S("^(ab{001!})?c"), S("abcde") },
    240          1.1  alnsn { 1, 15, 2, -1, 0,
    241          1.1  alnsn   S("(c?(a|bb{2!}){2,3}()+d){2,3}"), S("ccabbadbbadcaadcaad") },
    242          1.1  alnsn { 2, 9, 0, -1, 0,
    243          1.1  alnsn   NULL, S("cacaadaadaa") },
    244          1.1  alnsn { -1, 0, 0, -1, REGEX_MATCH_BEGIN,
    245          1.1  alnsn   S("(((ab?c|d{1})))"), S("ad") },
    246          1.1  alnsn { 0, 9, 3, -1, REGEX_MATCH_BEGIN,
    247          1.1  alnsn   S("^((a{1!}|b{2!}|c{3!}){3,6}d)+"), S("cabadbacddaa") },
    248          1.1  alnsn { 1, 6, 0, 0, REGEX_MATCH_END,
    249          1.1  alnsn   S("(a+(bb|cc?)?){4,}"), S("maaaac") },
    250          1.1  alnsn { 3, 12, 1, 0, REGEX_MATCH_END,
    251          1.1  alnsn   S("(x+x+{02,03}(x+|{1!})){03,06}$"), S("aaaxxxxxxxxx") },
    252          1.1  alnsn { 1, 2, 3, -1, 0,
    253          1.1  alnsn   S("((c{1!})?|x+{2!}|{3!})(a|c)"), S("scs") },
    254          1.1  alnsn { 1, 4, 2, 1, 0,
    255          1.1  alnsn   NULL, S("sxxaxxxaccacca") },
    256          1.1  alnsn { 0, 2, 1, 1, 0,
    257          1.1  alnsn   NULL, S("ccdcdcdddddcdccccd") },
    258          1.1  alnsn { 0, 3, 0, -1, REGEX_MATCH_NON_GREEDY,
    259          1.1  alnsn   S("^a+a+a+"), S("aaaaaa") },
    260          1.1  alnsn { 2, 5, 0, -1, REGEX_MATCH_NON_GREEDY,
    261          1.1  alnsn   S("a+a+a+"), S("bbaaaaaa") },
    262          1.1  alnsn { 1, 4, 0, 1, 0,
    263          1.1  alnsn   S("baa|a+"), S("sbaaaaaa") },
    264          1.1  alnsn { 0, 6, 0, 1, 0,
    265          1.1  alnsn   S("baaa|baa|sbaaaa"), S("sbaaaaa") },
    266          1.1  alnsn { 1, 4, 0, 1, REGEX_MATCH_NON_GREEDY,
    267          1.1  alnsn   S("baaa|baa"), S("xbaaa") },
    268          1.1  alnsn { 0, 0, 3, 1, 0,
    269          1.1  alnsn   S("{3!}"), S("xx") },
    270          1.1  alnsn { 0, 0, 1, 1, 0,
    271          1.1  alnsn   S("{1!}(a{2!})*"), S("xx") },
    272          1.1  alnsn { 0, 2, 2, 0, 0,
    273          1.1  alnsn   NULL, S("aa") },
    274          1.1  alnsn { 0, 0, 1, 1, REGEX_MATCH_NON_GREEDY,
    275          1.1  alnsn   S("{1!}(a{2!})*"), S("aaxx") },
    276          1.1  alnsn { 4, 12, 0, 1, 0,
    277          1.1  alnsn   S("(.[]-]){3}[^]-]{2}"), S("ax-xs-[][]lmn") },
    278          1.1  alnsn { 3, 7, 1, 1, 0,
    279          1.1  alnsn   S("([ABC]|[abc]{1!}){3,5}"), S("AbSAabbx") },
    280          1.1  alnsn { 0, 8, 3, 0, 0,
    281          1.1  alnsn   S("^[x\\-y[\\]]+([[\\]]{3!})*$"), S("x-y[-][]") },
    282          1.1  alnsn { 0, 9, 0, 0, 0,
    283          1.1  alnsn   NULL, S("x-y[-][]x") },
    284          1.1  alnsn { 2, 8, 0, 1, 0,
    285          1.1  alnsn   S("<(/{1!})?[^>]+>"), S("  <html></html> ") },
    286          1.1  alnsn { 2, 9, 1, 1, 0,
    287          1.1  alnsn   NULL, S("  </html><html> ") },
    288          1.1  alnsn { 2, 9, 0, 1, 0,
    289          1.1  alnsn   S("[A-Z0-9a-z]+"), S("[(Iden9aA)]") },
    290          1.1  alnsn { 1, 4, 0, 1, 0,
    291          1.1  alnsn   S("[^x-y]+[a-c_]{2,3}"), S("x_a_y") },
    292          1.1  alnsn { 4, 11, 0, 0, 0,
    293          1.1  alnsn   NULL, S("ssaymmaa_ccl") },
    294          1.1  alnsn { 3, 6, 0, 1, REGEX_NEWLINE,
    295          1.1  alnsn   S(".a[^k]"), S("\na\nxa\ns") },
    296          1.1  alnsn { 0, 2, 0, 1, REGEX_NEWLINE,
    297          1.1  alnsn   S("^a+"), S("aa\n") },
    298          1.1  alnsn { 1, 4, 0, 1, 0 /* =REGEX_NEWLINE */,
    299          1.1  alnsn   NULL, S("\naaa\n") },
    300          1.1  alnsn { 2, 3, 0, 1, 0 /* =REGEX_NEWLINE */,
    301          1.1  alnsn   NULL, S("\n\na\n") },
    302          1.1  alnsn { 0, 2, 0, 1, REGEX_NEWLINE,
    303          1.1  alnsn   S("a+$"), S("aa\n") },
    304          1.1  alnsn { 0, 3, 0, 0, 0 /* =REGEX_NEWLINE */,
    305          1.1  alnsn   NULL, S("aaa") },
    306          1.1  alnsn { 2, 4, 1, 1, REGEX_NEWLINE,
    307          1.1  alnsn   S("^a(a{1!})*$"), S("\n\naa\n\n") },
    308          1.1  alnsn { 0, 1, 0, 0, 0 /* REGEX_NEWLINE */,
    309          1.1  alnsn   NULL, S("a") },
    310          1.1  alnsn { -1, 0, 0, -1, 0 /* REGEX_NEWLINE */,
    311          1.1  alnsn   NULL, S("ab\nba") },
    312          1.1  alnsn { -1, 0, 0, 0, 0,
    313          1.1  alnsn   NULL, NULL }
    314          1.1  alnsn };
    315          1.1  alnsn 
    316          1.1  alnsn int main(int argc, char* argv[])
    317          1.1  alnsn {
    318      1.1.1.2  alnsn 	int has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0');
    319      1.1.1.2  alnsn 
    320          1.1  alnsn /*	verbose_test("a((b)((c|d))|)c|"); */
    321          1.1  alnsn /*	verbose_test("Xa{009,0010}Xb{,7}Xc{5,}Xd{,}Xe{1,}Xf{,1}X"); */
    322          1.1  alnsn /*	verbose_test("{3!}({3})({0!}){,"); */
    323          1.1  alnsn /*	verbose_test("(s(ab){2,4}t){2,}*S(a*(b)(c()|)d+){3,4}{0,0}*M"); */
    324          1.1  alnsn /*	verbose_test("^a({2!})*b+(a|{1!}b)+d$"); */
    325          1.1  alnsn /*	verbose_test("((a|b|c)*(xy)+)+", "asbcxyxy"); */
    326          1.1  alnsn 
    327      1.1.1.2  alnsn 	run_tests(tests, has_arg && argv[1][1] == 'v', has_arg && argv[1][1] == 's');
    328          1.1  alnsn 	return 0;
    329          1.1  alnsn }
    330