Home | History | Annotate | Line # | Download | only in regex_src
regexMain.c revision 1.1.1.1.4.3
      1 /*
      2  *    Stack-less Just-In-Time compiler
      3  *
      4  *    Copyright 2009-2010 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without modification, are
      7  * permitted provided that the following conditions are met:
      8  *
      9  *   1. Redistributions of source code must retain the above copyright notice, this list of
     10  *      conditions and the following disclaimer.
     11  *
     12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     13  *      of conditions and the following disclaimer in the documentation and/or other materials
     14  *      provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 /* Must be the first one. Must not depend on any other include. */
     28 #include "regexJIT.h"
     29 
     30 #include <stdio.h>
     31 
     32 #if defined _WIN32 || defined _WIN64
     33 #define COLOR_RED
     34 #define COLOR_GREEN
     35 #define COLOR_ARCH
     36 #define COLOR_DEFAULT
     37 #else
     38 #define COLOR_RED "\33[31m"
     39 #define COLOR_GREEN "\33[32m"
     40 #define COLOR_ARCH "\33[33m"
     41 #define COLOR_DEFAULT "\33[0m"
     42 #endif
     43 
     44 #ifdef REGEX_USE_8BIT_CHARS
     45 #define S(str)	str
     46 #else
     47 #define S(str)	L##str
     48 #endif
     49 
     50 #ifdef REGEX_MATCH_VERBOSE
     51 void verbose_test(regex_char_t *pattern, regex_char_t *string)
     52 {
     53 	int error;
     54 	regex_char_t *ptr;
     55 	struct regex_machine* machine;
     56 	struct regex_match* match;
     57 	int begin, end, id;
     58 
     59 	ptr = pattern;
     60 	while (*ptr)
     61 		ptr++;
     62 
     63 	printf("Start test '%s' matches to '%s'\n", pattern, string);
     64 	machine = regex_compile(pattern, ptr - pattern, REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error);
     65 
     66 	if (error) {
     67 		printf("WARNING: Error %d\n", error);
     68 		return;
     69 	}
     70 	if (!machine) {
     71 		printf("ERROR: machine must be exists. Report this bug, please\n");
     72 		return;
     73 	}
     74 
     75 	match = regex_begin_match(machine);
     76 	if (!match) {
     77 		printf("WARNING: Not enough memory for matching\n");
     78 		regex_free_machine(machine);
     79 		return;
     80 	}
     81 
     82 	ptr = string;
     83 	while (*ptr)
     84 		ptr++;
     85 
     86 	regex_continue_match_debug(match, string, ptr - string);
     87 
     88 	begin = regex_get_result(match, &end, &id);
     89 	printf("Math returns: %3d->%3d [%3d]\n", begin, end, id);
     90 
     91 	regex_free_match(match);
     92 	regex_free_machine(machine);
     93 }
     94 #endif
     95 
     96 struct test_case {
     97 	int begin;	/* Expected begin. */
     98 	int end;	/* Expected end. */
     99 	int id;		/* Expected id. */
    100 	int finished;	/* -1 : don't care, 0 : false, 1 : true. */
    101 	int flags;	/* REGEX_MATCH_* */
    102 	const regex_char_t *pattern;	/* NULL : use the previous pattern. */
    103 	const regex_char_t *string;	/* NULL : end of tests. */
    104 };
    105 
    106 void run_tests(struct test_case* test, int verbose, int silent)
    107 {
    108 	int error;
    109 	const regex_char_t *ptr;
    110 	struct regex_machine* machine = NULL;
    111 	struct regex_match* match;
    112 	int begin, end, id, finished;
    113 	int success = 0, fail = 0;
    114 
    115 	if (!verbose && !silent)
    116 		printf("Pass -v to enable verbose, -s to disable this hint.\n\n");
    117 
    118 	for ( ; test->string ; test++) {
    119 		if (verbose)
    120 			printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    121 		fail++;
    122 
    123 		if (test->pattern) {
    124 			if (machine)
    125 				regex_free_machine(machine);
    126 
    127 			ptr = test->pattern;
    128 			while (*ptr)
    129 				ptr++;
    130 
    131 			machine = regex_compile(test->pattern, ptr - test->pattern, test->flags, &error);
    132 
    133 			if (error) {
    134 				if (!verbose)
    135 					printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    136 				printf("ABORT: Error %d\n", error);
    137 				return;
    138 			}
    139 			if (!machine) {
    140 				if (!verbose)
    141 					printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    142 				printf("ABORT: machine must be exists. Report this bug, please\n");
    143 				return;
    144 			}
    145 		}
    146 		else if (test->flags != 0) {
    147 			if (!verbose)
    148 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    149 			printf("ABORT: flag must be 0 if no pattern\n");
    150 			return;
    151 		}
    152 
    153 		ptr = test->string;
    154 		while (*ptr)
    155 			ptr++;
    156 
    157 		match = regex_begin_match(machine);
    158 #ifdef REGEX_MATCH_VERBOSE
    159 		if (!match) {
    160 			if (!verbose)
    161 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    162 			printf("ABORT: Not enough memory for matching\n");
    163 			regex_free_machine(machine);
    164 			return;
    165 		}
    166 		regex_continue_match_debug(match, test->string, ptr - test->string);
    167 		begin = regex_get_result(match, &end, &id);
    168 		finished = regex_is_match_finished(match);
    169 
    170 		if (begin != test->begin || end != test->end || id != test->id) {
    171 			if (!verbose)
    172 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    173 			printf("FAIL A: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
    174 			continue;
    175 		}
    176 		if (test->finished != -1 && test->finished != !!finished) {
    177 			if (!verbose)
    178 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    179 			printf("FAIL A: finish check\n");
    180 			continue;
    181 		}
    182 #endif
    183 
    184 		regex_reset_match(match);
    185 		regex_continue_match(match, test->string, ptr - test->string);
    186 		begin = regex_get_result(match, &end, &id);
    187 		finished = regex_is_match_finished(match);
    188 		regex_free_match(match);
    189 
    190 		if (begin != test->begin || end != test->end || id != test->id) {
    191 			if (!verbose)
    192 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    193 			printf("FAIL B: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
    194 			continue;
    195 		}
    196 		if (test->finished != -1 && test->finished != !!finished) {
    197 			if (!verbose)
    198 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
    199 			printf("FAIL B: finish check\n");
    200 			continue;
    201 		}
    202 
    203 		if (verbose)
    204 			printf("SUCCESS\n");
    205 		fail--;
    206 		success++;
    207 	}
    208 	if (machine)
    209 		regex_free_machine(machine);
    210 
    211 	printf("REGEX tests: On " COLOR_ARCH "%s" COLOR_DEFAULT ": ", regex_get_platform_name());
    212 	if (fail == 0)
    213 		printf("All tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT "!\n");
    214 	else
    215 		printf("Successful test ratio: " COLOR_RED "%d%%" COLOR_DEFAULT ".\n", success * 100 / (success + fail));
    216 }
    217 
    218 /* Testing. */
    219 
    220 static struct test_case tests[] = {
    221 { 3, 7, 0, -1, 0,
    222   S("text"), S("is textile") },
    223 { 0, 10, 0, -1, 0,
    224   S("^(ab|c)*?d+(es)?"), S("abccabddeses") },
    225 { -1, 0, 0, 1, 0,
    226   S("^a+"), S("saaaa") },
    227 { 3, 6, 0, 0, 0,
    228   S("(a+|b+)$"), S("saabbb") },
    229 { 1, 6, 0, 0, 0,
    230   S("(a+|b+){,2}$"), S("saabbb") },
    231 { 1, 6, 0, 1, 0,
    232   S("(abcde|bc)(a+*|(b|c){2}+){0}"), S("babcdeaaaaaaaa") },
    233 { 1, 6, 0, 1, 0,
    234   S("(abc(aa)?|(cab+){2})"), S("cabcaa") },
    235 { -1, 0, 0, 1, 0,
    236   S("^(abc(aa)?|(cab+){2})$"), S("cabcaa") },
    237 { 0, 3, 1, -1, 0,
    238   S("^(ab{001!})?c"), S("abcde") },
    239 { 1, 15, 2, -1, 0,
    240   S("(c?(a|bb{2!}){2,3}()+d){2,3}"), S("ccabbadbbadcaadcaad") },
    241 { 2, 9, 0, -1, 0,
    242   NULL, S("cacaadaadaa") },
    243 { -1, 0, 0, -1, REGEX_MATCH_BEGIN,
    244   S("(((ab?c|d{1})))"), S("ad") },
    245 { 0, 9, 3, -1, REGEX_MATCH_BEGIN,
    246   S("^((a{1!}|b{2!}|c{3!}){3,6}d)+"), S("cabadbacddaa") },
    247 { 1, 6, 0, 0, REGEX_MATCH_END,
    248   S("(a+(bb|cc?)?){4,}"), S("maaaac") },
    249 { 3, 12, 1, 0, REGEX_MATCH_END,
    250   S("(x+x+{02,03}(x+|{1!})){03,06}$"), S("aaaxxxxxxxxx") },
    251 { 1, 2, 3, -1, 0,
    252   S("((c{1!})?|x+{2!}|{3!})(a|c)"), S("scs") },
    253 { 1, 4, 2, 1, 0,
    254   NULL, S("sxxaxxxaccacca") },
    255 { 0, 2, 1, 1, 0,
    256   NULL, S("ccdcdcdddddcdccccd") },
    257 { 0, 3, 0, -1, REGEX_MATCH_NON_GREEDY,
    258   S("^a+a+a+"), S("aaaaaa") },
    259 { 2, 5, 0, -1, REGEX_MATCH_NON_GREEDY,
    260   S("a+a+a+"), S("bbaaaaaa") },
    261 { 1, 4, 0, 1, 0,
    262   S("baa|a+"), S("sbaaaaaa") },
    263 { 0, 6, 0, 1, 0,
    264   S("baaa|baa|sbaaaa"), S("sbaaaaa") },
    265 { 1, 4, 0, 1, REGEX_MATCH_NON_GREEDY,
    266   S("baaa|baa"), S("xbaaa") },
    267 { 0, 0, 3, 1, 0,
    268   S("{3!}"), S("xx") },
    269 { 0, 0, 1, 1, 0,
    270   S("{1!}(a{2!})*"), S("xx") },
    271 { 0, 2, 2, 0, 0,
    272   NULL, S("aa") },
    273 { 0, 0, 1, 1, REGEX_MATCH_NON_GREEDY,
    274   S("{1!}(a{2!})*"), S("aaxx") },
    275 { 4, 12, 0, 1, 0,
    276   S("(.[]-]){3}[^]-]{2}"), S("ax-xs-[][]lmn") },
    277 { 3, 7, 1, 1, 0,
    278   S("([ABC]|[abc]{1!}){3,5}"), S("AbSAabbx") },
    279 { 0, 8, 3, 0, 0,
    280   S("^[x\\-y[\\]]+([[\\]]{3!})*$"), S("x-y[-][]") },
    281 { 0, 9, 0, 0, 0,
    282   NULL, S("x-y[-][]x") },
    283 { 2, 8, 0, 1, 0,
    284   S("<(/{1!})?[^>]+>"), S("  <html></html> ") },
    285 { 2, 9, 1, 1, 0,
    286   NULL, S("  </html><html> ") },
    287 { 2, 9, 0, 1, 0,
    288   S("[A-Z0-9a-z]+"), S("[(Iden9aA)]") },
    289 { 1, 4, 0, 1, 0,
    290   S("[^x-y]+[a-c_]{2,3}"), S("x_a_y") },
    291 { 4, 11, 0, 0, 0,
    292   NULL, S("ssaymmaa_ccl") },
    293 { 3, 6, 0, 1, REGEX_NEWLINE,
    294   S(".a[^k]"), S("\na\nxa\ns") },
    295 { 0, 2, 0, 1, REGEX_NEWLINE,
    296   S("^a+"), S("aa\n") },
    297 { 1, 4, 0, 1, 0 /* =REGEX_NEWLINE */,
    298   NULL, S("\naaa\n") },
    299 { 2, 3, 0, 1, 0 /* =REGEX_NEWLINE */,
    300   NULL, S("\n\na\n") },
    301 { 0, 2, 0, 1, REGEX_NEWLINE,
    302   S("a+$"), S("aa\n") },
    303 { 0, 3, 0, 0, 0 /* =REGEX_NEWLINE */,
    304   NULL, S("aaa") },
    305 { 2, 4, 1, 1, REGEX_NEWLINE,
    306   S("^a(a{1!})*$"), S("\n\naa\n\n") },
    307 { 0, 1, 0, 0, 0 /* REGEX_NEWLINE */,
    308   NULL, S("a") },
    309 { -1, 0, 0, -1, 0 /* REGEX_NEWLINE */,
    310   NULL, S("ab\nba") },
    311 { -1, 0, 0, 0, 0,
    312   NULL, NULL }
    313 };
    314 
    315 int main(int argc, char* argv[])
    316 {
    317 	int has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0');
    318 
    319 /*	verbose_test("a((b)((c|d))|)c|"); */
    320 /*	verbose_test("Xa{009,0010}Xb{,7}Xc{5,}Xd{,}Xe{1,}Xf{,1}X"); */
    321 /*	verbose_test("{3!}({3})({0!}){,"); */
    322 /*	verbose_test("(s(ab){2,4}t){2,}*S(a*(b)(c()|)d+){3,4}{0,0}*M"); */
    323 /*	verbose_test("^a({2!})*b+(a|{1!}b)+d$"); */
    324 /*	verbose_test("((a|b|c)*(xy)+)+", "asbcxyxy"); */
    325 
    326 	run_tests(tests, has_arg && argv[1][1] == 'v', has_arg && argv[1][1] == 's');
    327 	return 0;
    328 }
    329