Home | History | Annotate | Line # | Download | only in regex_src
regexMain.c revision 1.1
      1  1.1  alnsn /*
      2  1.1  alnsn  *    Stack-less Just-In-Time compiler
      3  1.1  alnsn  *
      4  1.1  alnsn  *    Copyright 2009-2010 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      5  1.1  alnsn  *
      6  1.1  alnsn  * Redistribution and use in source and binary forms, with or without modification, are
      7  1.1  alnsn  * permitted provided that the following conditions are met:
      8  1.1  alnsn  *
      9  1.1  alnsn  *   1. Redistributions of source code must retain the above copyright notice, this list of
     10  1.1  alnsn  *      conditions and the following disclaimer.
     11  1.1  alnsn  *
     12  1.1  alnsn  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     13  1.1  alnsn  *      of conditions and the following disclaimer in the documentation and/or other materials
     14  1.1  alnsn  *      provided with the distribution.
     15  1.1  alnsn  *
     16  1.1  alnsn  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     17  1.1  alnsn  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  1.1  alnsn  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     19  1.1  alnsn  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  1.1  alnsn  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     21  1.1  alnsn  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     22  1.1  alnsn  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23  1.1  alnsn  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     24  1.1  alnsn  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  1.1  alnsn  */
     26  1.1  alnsn 
     27  1.1  alnsn #include "regexJIT.h"
     28  1.1  alnsn 
     29  1.1  alnsn #include <stdio.h>
     30  1.1  alnsn 
     31  1.1  alnsn #ifdef REGEX_USE_8BIT_CHARS
     32  1.1  alnsn #define S(str)	str
     33  1.1  alnsn #else
     34  1.1  alnsn #define S(str)	L##str
     35  1.1  alnsn #endif
     36  1.1  alnsn 
     37  1.1  alnsn #ifdef REGEX_MATCH_VERBOSE
     38  1.1  alnsn void verbose_test(regex_char_t *pattern, regex_char_t *string)
     39  1.1  alnsn {
     40  1.1  alnsn 	int error;
     41  1.1  alnsn 	regex_char_t *ptr;
     42  1.1  alnsn 	struct regex_machine* machine;
     43  1.1  alnsn 	struct regex_match* match;
     44  1.1  alnsn 	int begin, end, id;
     45  1.1  alnsn 
     46  1.1  alnsn 	ptr = pattern;
     47  1.1  alnsn 	while (*ptr)
     48  1.1  alnsn 		ptr++;
     49  1.1  alnsn 
     50  1.1  alnsn 	printf("Start test '%s' matches to '%s'\n", pattern, string);
     51  1.1  alnsn 	machine = regex_compile(pattern, ptr - pattern, REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error);
     52  1.1  alnsn 
     53  1.1  alnsn 	if (error) {
     54  1.1  alnsn 		printf("WARNING: Error %d\n", error);
     55  1.1  alnsn 		return;
     56  1.1  alnsn 	}
     57  1.1  alnsn 	if (!machine) {
     58  1.1  alnsn 		printf("ERROR: machine must be exists. Report this bug, please\n");
     59  1.1  alnsn 		return;
     60  1.1  alnsn 	}
     61  1.1  alnsn 
     62  1.1  alnsn 	match = regex_begin_match(machine);
     63  1.1  alnsn 	if (!match) {
     64  1.1  alnsn 		printf("WARNING: Not enough memory for matching\n");
     65  1.1  alnsn 		regex_free_machine(machine);
     66  1.1  alnsn 		return;
     67  1.1  alnsn 	}
     68  1.1  alnsn 
     69  1.1  alnsn 	ptr = string;
     70  1.1  alnsn 	while (*ptr)
     71  1.1  alnsn 		ptr++;
     72  1.1  alnsn 
     73  1.1  alnsn 	regex_continue_match_debug(match, string, ptr - string);
     74  1.1  alnsn 
     75  1.1  alnsn 	begin = regex_get_result(match, &end, &id);
     76  1.1  alnsn 	printf("Math returns: %3d->%3d [%3d]\n", begin, end, id);
     77  1.1  alnsn 
     78  1.1  alnsn 	regex_free_match(match);
     79  1.1  alnsn 	regex_free_machine(machine);
     80  1.1  alnsn }
     81  1.1  alnsn #endif
     82  1.1  alnsn 
     83  1.1  alnsn struct test_case {
     84  1.1  alnsn 	int begin;	/* Expected begin. */
     85  1.1  alnsn 	int end;	/* Expected end. */
     86  1.1  alnsn 	int id;		/* Expected id. */
     87  1.1  alnsn 	int finished;	/* -1 : don't care, 0 : false, 1 : true. */
     88  1.1  alnsn 	int flags;	/* REGEX_MATCH_* */
     89  1.1  alnsn 	const regex_char_t *pattern;	/* NULL : use the previous pattern. */
     90  1.1  alnsn 	const regex_char_t *string;	/* NULL : end of tests. */
     91  1.1  alnsn };
     92  1.1  alnsn 
     93  1.1  alnsn void run_tests(struct test_case* test)
     94  1.1  alnsn {
     95  1.1  alnsn 	int error;
     96  1.1  alnsn 	const regex_char_t *ptr;
     97  1.1  alnsn 	struct regex_machine* machine = NULL;
     98  1.1  alnsn 	struct regex_match* match;
     99  1.1  alnsn 	int begin, end, id, finished;
    100  1.1  alnsn 	int success = 0, fail = 0;
    101  1.1  alnsn 
    102  1.1  alnsn 	for ( ; test->string ; test++) {
    103  1.1  alnsn 		printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    104  1.1  alnsn 		fail++;
    105  1.1  alnsn 
    106  1.1  alnsn 		if (test->pattern) {
    107  1.1  alnsn 			if (machine)
    108  1.1  alnsn 				regex_free_machine(machine);
    109  1.1  alnsn 
    110  1.1  alnsn 			ptr = test->pattern;
    111  1.1  alnsn 			while (*ptr)
    112  1.1  alnsn 				ptr++;
    113  1.1  alnsn 
    114  1.1  alnsn 			machine = regex_compile(test->pattern, ptr - test->pattern, test->flags, &error);
    115  1.1  alnsn 
    116  1.1  alnsn 			if (error) {
    117  1.1  alnsn 				printf("ABORT: Error %d\n", error);
    118  1.1  alnsn 				return;
    119  1.1  alnsn 			}
    120  1.1  alnsn 			if (!machine) {
    121  1.1  alnsn 				printf("ABORT: machine must be exists. Report this bug, please\n");
    122  1.1  alnsn 				return;
    123  1.1  alnsn 			}
    124  1.1  alnsn 		}
    125  1.1  alnsn 		else if (test->flags != 0) {
    126  1.1  alnsn 			printf("ABORT: flag must be 0 if no pattern\n");
    127  1.1  alnsn 			return;
    128  1.1  alnsn 		}
    129  1.1  alnsn 
    130  1.1  alnsn 		ptr = test->string;
    131  1.1  alnsn 		while (*ptr)
    132  1.1  alnsn 			ptr++;
    133  1.1  alnsn 
    134  1.1  alnsn 		match = regex_begin_match(machine);
    135  1.1  alnsn #ifdef REGEX_MATCH_VERBOSE
    136  1.1  alnsn 		if (!match) {
    137  1.1  alnsn 			printf("ABORT: Not enough memory for matching\n");
    138  1.1  alnsn 			regex_free_machine(machine);
    139  1.1  alnsn 			return;
    140  1.1  alnsn 		}
    141  1.1  alnsn 		regex_continue_match_debug(match, test->string, ptr - test->string);
    142  1.1  alnsn 		begin = regex_get_result(match, &end, &id);
    143  1.1  alnsn 		finished = regex_is_match_finished(match);
    144  1.1  alnsn 
    145  1.1  alnsn 		if (begin != test->begin || end != test->end || id != test->id) {
    146  1.1  alnsn 			printf("FAIL A: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
    147  1.1  alnsn 			continue;
    148  1.1  alnsn 		}
    149  1.1  alnsn 		if (test->finished != -1 && test->finished != !!finished) {
    150  1.1  alnsn 			printf("FAIL A: finish check\n");
    151  1.1  alnsn 			continue;
    152  1.1  alnsn 		}
    153  1.1  alnsn #endif
    154  1.1  alnsn 
    155  1.1  alnsn 		regex_reset_match(match);
    156  1.1  alnsn 		regex_continue_match(match, test->string, ptr - test->string);
    157  1.1  alnsn 		begin = regex_get_result(match, &end, &id);
    158  1.1  alnsn 		finished = regex_is_match_finished(match);
    159  1.1  alnsn 		regex_free_match(match);
    160  1.1  alnsn 
    161  1.1  alnsn 		if (begin != test->begin || end != test->end || id != test->id) {
    162  1.1  alnsn 			printf("FAIL B: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
    163  1.1  alnsn 			continue;
    164  1.1  alnsn 		}
    165  1.1  alnsn 		if (test->finished != -1 && test->finished != !!finished) {
    166  1.1  alnsn 			printf("FAIL B: finish check\n");
    167  1.1  alnsn 			continue;
    168  1.1  alnsn 		}
    169  1.1  alnsn 
    170  1.1  alnsn 		printf("SUCCESS\n");
    171  1.1  alnsn 		fail--;
    172  1.1  alnsn 		success++;
    173  1.1  alnsn 	}
    174  1.1  alnsn 	if (machine)
    175  1.1  alnsn 		regex_free_machine(machine);
    176  1.1  alnsn 
    177  1.1  alnsn 	printf("On %s: ", regex_get_platform_name());
    178  1.1  alnsn 	if (fail == 0)
    179  1.1  alnsn 		printf("All tests are passed!\n");
    180  1.1  alnsn 	else
    181  1.1  alnsn 		printf("Successful test ratio: %d%%.\n", success * 100 / (success + fail));
    182  1.1  alnsn }
    183  1.1  alnsn 
    184  1.1  alnsn /* Testing. */
    185  1.1  alnsn 
    186  1.1  alnsn static struct test_case tests[] = {
    187  1.1  alnsn { 3, 7, 0, -1, 0,
    188  1.1  alnsn   S("text"), S("is textile") },
    189  1.1  alnsn { 0, 10, 0, -1, 0,
    190  1.1  alnsn   S("^(ab|c)*?d+(es)?"), S("abccabddeses") },
    191  1.1  alnsn { -1, 0, 0, 1, 0,
    192  1.1  alnsn   S("^a+"), S("saaaa") },
    193  1.1  alnsn { 3, 6, 0, 0, 0,
    194  1.1  alnsn   S("(a+|b+)$"), S("saabbb") },
    195  1.1  alnsn { 1, 6, 0, 0, 0,
    196  1.1  alnsn   S("(a+|b+){,2}$"), S("saabbb") },
    197  1.1  alnsn { 1, 6, 0, 1, 0,
    198  1.1  alnsn   S("(abcde|bc)(a+*|(b|c){2}+){0}"), S("babcdeaaaaaaaa") },
    199  1.1  alnsn { 1, 6, 0, 1, 0,
    200  1.1  alnsn   S("(abc(aa)?|(cab+){2})"), S("cabcaa") },
    201  1.1  alnsn { -1, 0, 0, 1, 0,
    202  1.1  alnsn   S("^(abc(aa)?|(cab+){2})$"), S("cabcaa") },
    203  1.1  alnsn { 0, 3, 1, -1, 0,
    204  1.1  alnsn   S("^(ab{001!})?c"), S("abcde") },
    205  1.1  alnsn { 1, 15, 2, -1, 0,
    206  1.1  alnsn   S("(c?(a|bb{2!}){2,3}()+d){2,3}"), S("ccabbadbbadcaadcaad") },
    207  1.1  alnsn { 2, 9, 0, -1, 0,
    208  1.1  alnsn   NULL, S("cacaadaadaa") },
    209  1.1  alnsn { -1, 0, 0, -1, REGEX_MATCH_BEGIN,
    210  1.1  alnsn   S("(((ab?c|d{1})))"), S("ad") },
    211  1.1  alnsn { 0, 9, 3, -1, REGEX_MATCH_BEGIN,
    212  1.1  alnsn   S("^((a{1!}|b{2!}|c{3!}){3,6}d)+"), S("cabadbacddaa") },
    213  1.1  alnsn { 1, 6, 0, 0, REGEX_MATCH_END,
    214  1.1  alnsn   S("(a+(bb|cc?)?){4,}"), S("maaaac") },
    215  1.1  alnsn { 3, 12, 1, 0, REGEX_MATCH_END,
    216  1.1  alnsn   S("(x+x+{02,03}(x+|{1!})){03,06}$"), S("aaaxxxxxxxxx") },
    217  1.1  alnsn { 1, 2, 3, -1, 0,
    218  1.1  alnsn   S("((c{1!})?|x+{2!}|{3!})(a|c)"), S("scs") },
    219  1.1  alnsn { 1, 4, 2, 1, 0,
    220  1.1  alnsn   NULL, S("sxxaxxxaccacca") },
    221  1.1  alnsn { 0, 2, 1, 1, 0,
    222  1.1  alnsn   NULL, S("ccdcdcdddddcdccccd") },
    223  1.1  alnsn { 0, 3, 0, -1, REGEX_MATCH_NON_GREEDY,
    224  1.1  alnsn   S("^a+a+a+"), S("aaaaaa") },
    225  1.1  alnsn { 2, 5, 0, -1, REGEX_MATCH_NON_GREEDY,
    226  1.1  alnsn   S("a+a+a+"), S("bbaaaaaa") },
    227  1.1  alnsn { 1, 4, 0, 1, 0,
    228  1.1  alnsn   S("baa|a+"), S("sbaaaaaa") },
    229  1.1  alnsn { 0, 6, 0, 1, 0,
    230  1.1  alnsn   S("baaa|baa|sbaaaa"), S("sbaaaaa") },
    231  1.1  alnsn { 1, 4, 0, 1, REGEX_MATCH_NON_GREEDY,
    232  1.1  alnsn   S("baaa|baa"), S("xbaaa") },
    233  1.1  alnsn { 0, 0, 3, 1, 0,
    234  1.1  alnsn   S("{3!}"), S("xx") },
    235  1.1  alnsn { 0, 0, 1, 1, 0,
    236  1.1  alnsn   S("{1!}(a{2!})*"), S("xx") },
    237  1.1  alnsn { 0, 2, 2, 0, 0,
    238  1.1  alnsn   NULL, S("aa") },
    239  1.1  alnsn { 0, 0, 1, 1, REGEX_MATCH_NON_GREEDY,
    240  1.1  alnsn   S("{1!}(a{2!})*"), S("aaxx") },
    241  1.1  alnsn { 4, 12, 0, 1, 0,
    242  1.1  alnsn   S("(.[]-]){3}[^]-]{2}"), S("ax-xs-[][]lmn") },
    243  1.1  alnsn { 3, 7, 1, 1, 0,
    244  1.1  alnsn   S("([ABC]|[abc]{1!}){3,5}"), S("AbSAabbx") },
    245  1.1  alnsn { 0, 8, 3, 0, 0,
    246  1.1  alnsn   S("^[x\\-y[\\]]+([[\\]]{3!})*$"), S("x-y[-][]") },
    247  1.1  alnsn { 0, 9, 0, 0, 0,
    248  1.1  alnsn   NULL, S("x-y[-][]x") },
    249  1.1  alnsn { 2, 8, 0, 1, 0,
    250  1.1  alnsn   S("<(/{1!})?[^>]+>"), S("  <html></html> ") },
    251  1.1  alnsn { 2, 9, 1, 1, 0,
    252  1.1  alnsn   NULL, S("  </html><html> ") },
    253  1.1  alnsn { 2, 9, 0, 1, 0,
    254  1.1  alnsn   S("[A-Z0-9a-z]+"), S("[(Iden9aA)]") },
    255  1.1  alnsn { 1, 4, 0, 1, 0,
    256  1.1  alnsn   S("[^x-y]+[a-c_]{2,3}"), S("x_a_y") },
    257  1.1  alnsn { 4, 11, 0, 0, 0,
    258  1.1  alnsn   NULL, S("ssaymmaa_ccl") },
    259  1.1  alnsn { 3, 6, 0, 1, REGEX_NEWLINE,
    260  1.1  alnsn   S(".a[^k]"), S("\na\nxa\ns") },
    261  1.1  alnsn { 0, 2, 0, 1, REGEX_NEWLINE,
    262  1.1  alnsn   S("^a+"), S("aa\n") },
    263  1.1  alnsn { 1, 4, 0, 1, 0 /* =REGEX_NEWLINE */,
    264  1.1  alnsn   NULL, S("\naaa\n") },
    265  1.1  alnsn { 2, 3, 0, 1, 0 /* =REGEX_NEWLINE */,
    266  1.1  alnsn   NULL, S("\n\na\n") },
    267  1.1  alnsn { 0, 2, 0, 1, REGEX_NEWLINE,
    268  1.1  alnsn   S("a+$"), S("aa\n") },
    269  1.1  alnsn { 0, 3, 0, 0, 0 /* =REGEX_NEWLINE */,
    270  1.1  alnsn   NULL, S("aaa") },
    271  1.1  alnsn { 2, 4, 1, 1, REGEX_NEWLINE,
    272  1.1  alnsn   S("^a(a{1!})*$"), S("\n\naa\n\n") },
    273  1.1  alnsn { 0, 1, 0, 0, 0 /* REGEX_NEWLINE */,
    274  1.1  alnsn   NULL, S("a") },
    275  1.1  alnsn { -1, 0, 0, -1, 0 /* REGEX_NEWLINE */,
    276  1.1  alnsn   NULL, S("ab\nba") },
    277  1.1  alnsn { -1, 0, 0, 0, 0,
    278  1.1  alnsn   NULL, NULL }
    279  1.1  alnsn };
    280  1.1  alnsn 
    281  1.1  alnsn int main(int argc, char* argv[])
    282  1.1  alnsn {
    283  1.1  alnsn /*	verbose_test("a((b)((c|d))|)c|"); */
    284  1.1  alnsn /*	verbose_test("Xa{009,0010}Xb{,7}Xc{5,}Xd{,}Xe{1,}Xf{,1}X"); */
    285  1.1  alnsn /*	verbose_test("{3!}({3})({0!}){,"); */
    286  1.1  alnsn /*	verbose_test("(s(ab){2,4}t){2,}*S(a*(b)(c()|)d+){3,4}{0,0}*M"); */
    287  1.1  alnsn /*	verbose_test("^a({2!})*b+(a|{1!}b)+d$"); */
    288  1.1  alnsn /*	verbose_test("((a|b|c)*(xy)+)+", "asbcxyxy"); */
    289  1.1  alnsn 
    290  1.1  alnsn 	run_tests(tests);
    291  1.1  alnsn 	return 0;
    292  1.1  alnsn }
    293  1.1  alnsn 
    294