Home | History | Annotate | Line # | Download | only in regex_src
regexMain.c revision 1.1.1.3.16.1
      1           1.1     alnsn /*
      2           1.1     alnsn  *    Stack-less Just-In-Time compiler
      3           1.1     alnsn  *
      4  1.1.1.3.16.1  pgoyette  *    Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      5           1.1     alnsn  *
      6           1.1     alnsn  * Redistribution and use in source and binary forms, with or without modification, are
      7           1.1     alnsn  * permitted provided that the following conditions are met:
      8           1.1     alnsn  *
      9           1.1     alnsn  *   1. Redistributions of source code must retain the above copyright notice, this list of
     10           1.1     alnsn  *      conditions and the following disclaimer.
     11           1.1     alnsn  *
     12           1.1     alnsn  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     13           1.1     alnsn  *      of conditions and the following disclaimer in the documentation and/or other materials
     14           1.1     alnsn  *      provided with the distribution.
     15           1.1     alnsn  *
     16           1.1     alnsn  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     17           1.1     alnsn  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18           1.1     alnsn  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     19           1.1     alnsn  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20           1.1     alnsn  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     21           1.1     alnsn  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     22           1.1     alnsn  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23           1.1     alnsn  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     24           1.1     alnsn  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25           1.1     alnsn  */
     26           1.1     alnsn 
     27       1.1.1.2     alnsn /* Must be the first one. Must not depend on any other include. */
     28  1.1.1.3.16.1  pgoyette #include "sljitLir.h"
     29           1.1     alnsn #include "regexJIT.h"
     30           1.1     alnsn 
     31           1.1     alnsn #include <stdio.h>
     32           1.1     alnsn 
     33       1.1.1.2     alnsn #if defined _WIN32 || defined _WIN64
     34       1.1.1.2     alnsn #define COLOR_RED
     35       1.1.1.2     alnsn #define COLOR_GREEN
     36       1.1.1.2     alnsn #define COLOR_ARCH
     37       1.1.1.2     alnsn #define COLOR_DEFAULT
     38       1.1.1.2     alnsn #else
     39       1.1.1.2     alnsn #define COLOR_RED "\33[31m"
     40       1.1.1.2     alnsn #define COLOR_GREEN "\33[32m"
     41       1.1.1.2     alnsn #define COLOR_ARCH "\33[33m"
     42       1.1.1.2     alnsn #define COLOR_DEFAULT "\33[0m"
     43       1.1.1.2     alnsn #endif
     44       1.1.1.2     alnsn 
     45           1.1     alnsn #ifdef REGEX_USE_8BIT_CHARS
     46           1.1     alnsn #define S(str)	str
     47           1.1     alnsn #else
     48           1.1     alnsn #define S(str)	L##str
     49           1.1     alnsn #endif
     50           1.1     alnsn 
     51           1.1     alnsn #ifdef REGEX_MATCH_VERBOSE
     52           1.1     alnsn void verbose_test(regex_char_t *pattern, regex_char_t *string)
     53           1.1     alnsn {
     54           1.1     alnsn 	int error;
     55           1.1     alnsn 	regex_char_t *ptr;
     56           1.1     alnsn 	struct regex_machine* machine;
     57           1.1     alnsn 	struct regex_match* match;
     58           1.1     alnsn 	int begin, end, id;
     59           1.1     alnsn 
     60           1.1     alnsn 	ptr = pattern;
     61           1.1     alnsn 	while (*ptr)
     62           1.1     alnsn 		ptr++;
     63           1.1     alnsn 
     64           1.1     alnsn 	printf("Start test '%s' matches to '%s'\n", pattern, string);
     65           1.1     alnsn 	machine = regex_compile(pattern, ptr - pattern, REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error);
     66           1.1     alnsn 
     67           1.1     alnsn 	if (error) {
     68           1.1     alnsn 		printf("WARNING: Error %d\n", error);
     69           1.1     alnsn 		return;
     70           1.1     alnsn 	}
     71           1.1     alnsn 	if (!machine) {
     72           1.1     alnsn 		printf("ERROR: machine must be exists. Report this bug, please\n");
     73           1.1     alnsn 		return;
     74           1.1     alnsn 	}
     75           1.1     alnsn 
     76           1.1     alnsn 	match = regex_begin_match(machine);
     77           1.1     alnsn 	if (!match) {
     78           1.1     alnsn 		printf("WARNING: Not enough memory for matching\n");
     79           1.1     alnsn 		regex_free_machine(machine);
     80           1.1     alnsn 		return;
     81           1.1     alnsn 	}
     82           1.1     alnsn 
     83           1.1     alnsn 	ptr = string;
     84           1.1     alnsn 	while (*ptr)
     85           1.1     alnsn 		ptr++;
     86           1.1     alnsn 
     87           1.1     alnsn 	regex_continue_match_debug(match, string, ptr - string);
     88           1.1     alnsn 
     89           1.1     alnsn 	begin = regex_get_result(match, &end, &id);
     90           1.1     alnsn 	printf("Math returns: %3d->%3d [%3d]\n", begin, end, id);
     91           1.1     alnsn 
     92           1.1     alnsn 	regex_free_match(match);
     93           1.1     alnsn 	regex_free_machine(machine);
     94           1.1     alnsn }
     95           1.1     alnsn #endif
     96           1.1     alnsn 
     97           1.1     alnsn struct test_case {
     98           1.1     alnsn 	int begin;	/* Expected begin. */
     99           1.1     alnsn 	int end;	/* Expected end. */
    100           1.1     alnsn 	int id;		/* Expected id. */
    101           1.1     alnsn 	int finished;	/* -1 : don't care, 0 : false, 1 : true. */
    102           1.1     alnsn 	int flags;	/* REGEX_MATCH_* */
    103           1.1     alnsn 	const regex_char_t *pattern;	/* NULL : use the previous pattern. */
    104           1.1     alnsn 	const regex_char_t *string;	/* NULL : end of tests. */
    105           1.1     alnsn };
    106           1.1     alnsn 
    107       1.1.1.2     alnsn void run_tests(struct test_case* test, int verbose, int silent)
    108           1.1     alnsn {
    109           1.1     alnsn 	int error;
    110           1.1     alnsn 	const regex_char_t *ptr;
    111           1.1     alnsn 	struct regex_machine* machine = NULL;
    112           1.1     alnsn 	struct regex_match* match;
    113           1.1     alnsn 	int begin, end, id, finished;
    114           1.1     alnsn 	int success = 0, fail = 0;
    115           1.1     alnsn 
    116       1.1.1.2     alnsn 	if (!verbose && !silent)
    117       1.1.1.2     alnsn 		printf("Pass -v to enable verbose, -s to disable this hint.\n\n");
    118       1.1.1.2     alnsn 
    119           1.1     alnsn 	for ( ; test->string ; test++) {
    120       1.1.1.2     alnsn 		if (verbose)
    121       1.1.1.2     alnsn 			printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    122           1.1     alnsn 		fail++;
    123           1.1     alnsn 
    124           1.1     alnsn 		if (test->pattern) {
    125           1.1     alnsn 			if (machine)
    126           1.1     alnsn 				regex_free_machine(machine);
    127           1.1     alnsn 
    128           1.1     alnsn 			ptr = test->pattern;
    129           1.1     alnsn 			while (*ptr)
    130           1.1     alnsn 				ptr++;
    131           1.1     alnsn 
    132           1.1     alnsn 			machine = regex_compile(test->pattern, ptr - test->pattern, test->flags, &error);
    133           1.1     alnsn 
    134           1.1     alnsn 			if (error) {
    135       1.1.1.2     alnsn 				if (!verbose)
    136       1.1.1.2     alnsn 					printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    137           1.1     alnsn 				printf("ABORT: Error %d\n", error);
    138           1.1     alnsn 				return;
    139           1.1     alnsn 			}
    140           1.1     alnsn 			if (!machine) {
    141       1.1.1.2     alnsn 				if (!verbose)
    142       1.1.1.2     alnsn 					printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    143           1.1     alnsn 				printf("ABORT: machine must be exists. Report this bug, please\n");
    144           1.1     alnsn 				return;
    145           1.1     alnsn 			}
    146           1.1     alnsn 		}
    147           1.1     alnsn 		else if (test->flags != 0) {
    148       1.1.1.2     alnsn 			if (!verbose)
    149       1.1.1.2     alnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    150           1.1     alnsn 			printf("ABORT: flag must be 0 if no pattern\n");
    151           1.1     alnsn 			return;
    152           1.1     alnsn 		}
    153           1.1     alnsn 
    154           1.1     alnsn 		ptr = test->string;
    155           1.1     alnsn 		while (*ptr)
    156           1.1     alnsn 			ptr++;
    157           1.1     alnsn 
    158           1.1     alnsn 		match = regex_begin_match(machine);
    159           1.1     alnsn #ifdef REGEX_MATCH_VERBOSE
    160           1.1     alnsn 		if (!match) {
    161       1.1.1.2     alnsn 			if (!verbose)
    162       1.1.1.2     alnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    163           1.1     alnsn 			printf("ABORT: Not enough memory for matching\n");
    164           1.1     alnsn 			regex_free_machine(machine);
    165           1.1     alnsn 			return;
    166           1.1     alnsn 		}
    167           1.1     alnsn 		regex_continue_match_debug(match, test->string, ptr - test->string);
    168           1.1     alnsn 		begin = regex_get_result(match, &end, &id);
    169           1.1     alnsn 		finished = regex_is_match_finished(match);
    170           1.1     alnsn 
    171           1.1     alnsn 		if (begin != test->begin || end != test->end || id != test->id) {
    172       1.1.1.2     alnsn 			if (!verbose)
    173       1.1.1.2     alnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    174           1.1     alnsn 			printf("FAIL A: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
    175           1.1     alnsn 			continue;
    176           1.1     alnsn 		}
    177           1.1     alnsn 		if (test->finished != -1 && test->finished != !!finished) {
    178       1.1.1.2     alnsn 			if (!verbose)
    179       1.1.1.2     alnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    180           1.1     alnsn 			printf("FAIL A: finish check\n");
    181           1.1     alnsn 			continue;
    182           1.1     alnsn 		}
    183           1.1     alnsn #endif
    184           1.1     alnsn 
    185           1.1     alnsn 		regex_reset_match(match);
    186           1.1     alnsn 		regex_continue_match(match, test->string, ptr - test->string);
    187           1.1     alnsn 		begin = regex_get_result(match, &end, &id);
    188           1.1     alnsn 		finished = regex_is_match_finished(match);
    189           1.1     alnsn 		regex_free_match(match);
    190           1.1     alnsn 
    191           1.1     alnsn 		if (begin != test->begin || end != test->end || id != test->id) {
    192       1.1.1.2     alnsn 			if (!verbose)
    193       1.1.1.2     alnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    194           1.1     alnsn 			printf("FAIL B: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
    195           1.1     alnsn 			continue;
    196           1.1     alnsn 		}
    197           1.1     alnsn 		if (test->finished != -1 && test->finished != !!finished) {
    198       1.1.1.2     alnsn 			if (!verbose)
    199       1.1.1.2     alnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    200           1.1     alnsn 			printf("FAIL B: finish check\n");
    201           1.1     alnsn 			continue;
    202           1.1     alnsn 		}
    203           1.1     alnsn 
    204       1.1.1.2     alnsn 		if (verbose)
    205       1.1.1.2     alnsn 			printf("SUCCESS\n");
    206           1.1     alnsn 		fail--;
    207           1.1     alnsn 		success++;
    208           1.1     alnsn 	}
    209           1.1     alnsn 	if (machine)
    210           1.1     alnsn 		regex_free_machine(machine);
    211           1.1     alnsn 
    212       1.1.1.3     alnsn 	printf("REGEX tests: ");
    213           1.1     alnsn 	if (fail == 0)
    214       1.1.1.3     alnsn 		printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " ");
    215           1.1     alnsn 	else
    216       1.1.1.3     alnsn 		printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are failed ", fail, fail * 100 / (success + fail));
    217       1.1.1.3     alnsn 	printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "\n", regex_get_platform_name());
    218           1.1     alnsn }
    219           1.1     alnsn 
    220           1.1     alnsn /* Testing. */
    221           1.1     alnsn 
    222           1.1     alnsn static struct test_case tests[] = {
    223           1.1     alnsn { 3, 7, 0, -1, 0,
    224           1.1     alnsn   S("text"), S("is textile") },
    225           1.1     alnsn { 0, 10, 0, -1, 0,
    226           1.1     alnsn   S("^(ab|c)*?d+(es)?"), S("abccabddeses") },
    227           1.1     alnsn { -1, 0, 0, 1, 0,
    228           1.1     alnsn   S("^a+"), S("saaaa") },
    229           1.1     alnsn { 3, 6, 0, 0, 0,
    230           1.1     alnsn   S("(a+|b+)$"), S("saabbb") },
    231           1.1     alnsn { 1, 6, 0, 0, 0,
    232           1.1     alnsn   S("(a+|b+){,2}$"), S("saabbb") },
    233           1.1     alnsn { 1, 6, 0, 1, 0,
    234           1.1     alnsn   S("(abcde|bc)(a+*|(b|c){2}+){0}"), S("babcdeaaaaaaaa") },
    235           1.1     alnsn { 1, 6, 0, 1, 0,
    236           1.1     alnsn   S("(abc(aa)?|(cab+){2})"), S("cabcaa") },
    237           1.1     alnsn { -1, 0, 0, 1, 0,
    238           1.1     alnsn   S("^(abc(aa)?|(cab+){2})$"), S("cabcaa") },
    239           1.1     alnsn { 0, 3, 1, -1, 0,
    240           1.1     alnsn   S("^(ab{001!})?c"), S("abcde") },
    241           1.1     alnsn { 1, 15, 2, -1, 0,
    242           1.1     alnsn   S("(c?(a|bb{2!}){2,3}()+d){2,3}"), S("ccabbadbbadcaadcaad") },
    243           1.1     alnsn { 2, 9, 0, -1, 0,
    244           1.1     alnsn   NULL, S("cacaadaadaa") },
    245           1.1     alnsn { -1, 0, 0, -1, REGEX_MATCH_BEGIN,
    246           1.1     alnsn   S("(((ab?c|d{1})))"), S("ad") },
    247           1.1     alnsn { 0, 9, 3, -1, REGEX_MATCH_BEGIN,
    248           1.1     alnsn   S("^((a{1!}|b{2!}|c{3!}){3,6}d)+"), S("cabadbacddaa") },
    249           1.1     alnsn { 1, 6, 0, 0, REGEX_MATCH_END,
    250           1.1     alnsn   S("(a+(bb|cc?)?){4,}"), S("maaaac") },
    251           1.1     alnsn { 3, 12, 1, 0, REGEX_MATCH_END,
    252           1.1     alnsn   S("(x+x+{02,03}(x+|{1!})){03,06}$"), S("aaaxxxxxxxxx") },
    253           1.1     alnsn { 1, 2, 3, -1, 0,
    254           1.1     alnsn   S("((c{1!})?|x+{2!}|{3!})(a|c)"), S("scs") },
    255           1.1     alnsn { 1, 4, 2, 1, 0,
    256           1.1     alnsn   NULL, S("sxxaxxxaccacca") },
    257           1.1     alnsn { 0, 2, 1, 1, 0,
    258           1.1     alnsn   NULL, S("ccdcdcdddddcdccccd") },
    259           1.1     alnsn { 0, 3, 0, -1, REGEX_MATCH_NON_GREEDY,
    260           1.1     alnsn   S("^a+a+a+"), S("aaaaaa") },
    261           1.1     alnsn { 2, 5, 0, -1, REGEX_MATCH_NON_GREEDY,
    262           1.1     alnsn   S("a+a+a+"), S("bbaaaaaa") },
    263           1.1     alnsn { 1, 4, 0, 1, 0,
    264           1.1     alnsn   S("baa|a+"), S("sbaaaaaa") },
    265           1.1     alnsn { 0, 6, 0, 1, 0,
    266           1.1     alnsn   S("baaa|baa|sbaaaa"), S("sbaaaaa") },
    267           1.1     alnsn { 1, 4, 0, 1, REGEX_MATCH_NON_GREEDY,
    268           1.1     alnsn   S("baaa|baa"), S("xbaaa") },
    269           1.1     alnsn { 0, 0, 3, 1, 0,
    270           1.1     alnsn   S("{3!}"), S("xx") },
    271           1.1     alnsn { 0, 0, 1, 1, 0,
    272           1.1     alnsn   S("{1!}(a{2!})*"), S("xx") },
    273           1.1     alnsn { 0, 2, 2, 0, 0,
    274           1.1     alnsn   NULL, S("aa") },
    275           1.1     alnsn { 0, 0, 1, 1, REGEX_MATCH_NON_GREEDY,
    276           1.1     alnsn   S("{1!}(a{2!})*"), S("aaxx") },
    277           1.1     alnsn { 4, 12, 0, 1, 0,
    278           1.1     alnsn   S("(.[]-]){3}[^]-]{2}"), S("ax-xs-[][]lmn") },
    279           1.1     alnsn { 3, 7, 1, 1, 0,
    280           1.1     alnsn   S("([ABC]|[abc]{1!}){3,5}"), S("AbSAabbx") },
    281           1.1     alnsn { 0, 8, 3, 0, 0,
    282           1.1     alnsn   S("^[x\\-y[\\]]+([[\\]]{3!})*$"), S("x-y[-][]") },
    283           1.1     alnsn { 0, 9, 0, 0, 0,
    284           1.1     alnsn   NULL, S("x-y[-][]x") },
    285           1.1     alnsn { 2, 8, 0, 1, 0,
    286           1.1     alnsn   S("<(/{1!})?[^>]+>"), S("  <html></html> ") },
    287           1.1     alnsn { 2, 9, 1, 1, 0,
    288           1.1     alnsn   NULL, S("  </html><html> ") },
    289           1.1     alnsn { 2, 9, 0, 1, 0,
    290           1.1     alnsn   S("[A-Z0-9a-z]+"), S("[(Iden9aA)]") },
    291           1.1     alnsn { 1, 4, 0, 1, 0,
    292           1.1     alnsn   S("[^x-y]+[a-c_]{2,3}"), S("x_a_y") },
    293           1.1     alnsn { 4, 11, 0, 0, 0,
    294           1.1     alnsn   NULL, S("ssaymmaa_ccl") },
    295           1.1     alnsn { 3, 6, 0, 1, REGEX_NEWLINE,
    296           1.1     alnsn   S(".a[^k]"), S("\na\nxa\ns") },
    297           1.1     alnsn { 0, 2, 0, 1, REGEX_NEWLINE,
    298           1.1     alnsn   S("^a+"), S("aa\n") },
    299           1.1     alnsn { 1, 4, 0, 1, 0 /* =REGEX_NEWLINE */,
    300           1.1     alnsn   NULL, S("\naaa\n") },
    301           1.1     alnsn { 2, 3, 0, 1, 0 /* =REGEX_NEWLINE */,
    302           1.1     alnsn   NULL, S("\n\na\n") },
    303           1.1     alnsn { 0, 2, 0, 1, REGEX_NEWLINE,
    304           1.1     alnsn   S("a+$"), S("aa\n") },
    305           1.1     alnsn { 0, 3, 0, 0, 0 /* =REGEX_NEWLINE */,
    306           1.1     alnsn   NULL, S("aaa") },
    307           1.1     alnsn { 2, 4, 1, 1, REGEX_NEWLINE,
    308           1.1     alnsn   S("^a(a{1!})*$"), S("\n\naa\n\n") },
    309           1.1     alnsn { 0, 1, 0, 0, 0 /* REGEX_NEWLINE */,
    310           1.1     alnsn   NULL, S("a") },
    311           1.1     alnsn { -1, 0, 0, -1, 0 /* REGEX_NEWLINE */,
    312           1.1     alnsn   NULL, S("ab\nba") },
    313           1.1     alnsn { -1, 0, 0, 0, 0,
    314           1.1     alnsn   NULL, NULL }
    315           1.1     alnsn };
    316           1.1     alnsn 
    317           1.1     alnsn int main(int argc, char* argv[])
    318           1.1     alnsn {
    319       1.1.1.2     alnsn 	int has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0');
    320       1.1.1.2     alnsn 
    321           1.1     alnsn /*	verbose_test("a((b)((c|d))|)c|"); */
    322           1.1     alnsn /*	verbose_test("Xa{009,0010}Xb{,7}Xc{5,}Xd{,}Xe{1,}Xf{,1}X"); */
    323           1.1     alnsn /*	verbose_test("{3!}({3})({0!}){,"); */
    324           1.1     alnsn /*	verbose_test("(s(ab){2,4}t){2,}*S(a*(b)(c()|)d+){3,4}{0,0}*M"); */
    325           1.1     alnsn /*	verbose_test("^a({2!})*b+(a|{1!}b)+d$"); */
    326           1.1     alnsn /*	verbose_test("((a|b|c)*(xy)+)+", "asbcxyxy"); */
    327           1.1     alnsn 
    328       1.1.1.2     alnsn 	run_tests(tests, has_arg && argv[1][1] == 'v', has_arg && argv[1][1] == 's');
    329  1.1.1.3.16.1  pgoyette 
    330  1.1.1.3.16.1  pgoyette 	sljit_free_unused_memory_exec();
    331  1.1.1.3.16.1  pgoyette 
    332           1.1     alnsn 	return 0;
    333           1.1     alnsn }
    334