1 1.1 alnsn /* 2 1.1 alnsn * Stack-less Just-In-Time compiler 3 1.1 alnsn * 4 1.1.1.4 alnsn * Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved. 5 1.1 alnsn * 6 1.1 alnsn * Redistribution and use in source and binary forms, with or without modification, are 7 1.1 alnsn * permitted provided that the following conditions are met: 8 1.1 alnsn * 9 1.1 alnsn * 1. Redistributions of source code must retain the above copyright notice, this list of 10 1.1 alnsn * conditions and the following disclaimer. 11 1.1 alnsn * 12 1.1 alnsn * 2. Redistributions in binary form must reproduce the above copyright notice, this list 13 1.1 alnsn * of conditions and the following disclaimer in the documentation and/or other materials 14 1.1 alnsn * provided with the distribution. 15 1.1 alnsn * 16 1.1 alnsn * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 17 1.1 alnsn * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 1.1 alnsn * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 19 1.1 alnsn * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 1.1 alnsn * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 21 1.1 alnsn * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 1.1 alnsn * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 1.1 alnsn * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 1.1 alnsn * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 1.1 alnsn */ 26 1.1 alnsn 27 1.1 alnsn #include "sljitLir.h" 28 1.1 alnsn #include "regexJIT.h" 29 1.1 alnsn 30 1.1.1.4 alnsn #include <stdlib.h> 31 1.1.1.4 alnsn 32 1.1 alnsn #ifdef REGEX_MATCH_VERBOSE 33 1.1 alnsn #include <stdio.h> 34 1.1 alnsn #endif 35 1.1 alnsn 36 1.1 alnsn /* Extra, hidden flags: 37 1.1 alnsn {id!} where id > 0 found in the code. */ 38 1.1 alnsn #define REGEX_ID_CHECK 0x100 39 1.1 alnsn /* When REGEX_NEWLINE && REGEX_MATCH_BEGIN defined, the pattern turn to a normal search, 40 1.1 alnsn which starts with [\r\n] character range. */ 41 1.1 alnsn #define REGEX_FAKE_MATCH_BEGIN 0x200 42 1.1 alnsn /* When REGEX_NEWLINE && REGEX_MATCH_END defined, the pattern turn to a normal search, 43 1.1 alnsn which ends with [\r\n] character range. */ 44 1.1 alnsn #define REGEX_FAKE_MATCH_END 0x400 45 1.1 alnsn 46 1.1 alnsn /* Check match completition after every (FINISH_TEST + 1) steps. */ 47 1.1 alnsn #define FINISH_TEST 0x7 48 1.1 alnsn 49 1.1 alnsn /* --------------------------------------------------------------------- */ 50 1.1 alnsn /* Structures for JIT-ed pattern matching */ 51 1.1 alnsn /* --------------------------------------------------------------------- */ 52 1.1 alnsn 53 1.1 alnsn struct regex_machine 54 1.1 alnsn { 55 1.1 alnsn /* flags. */ 56 1.1 alnsn int flags; 57 1.1 alnsn /* Number of state descriptors for one term. */ 58 1.1.1.2 alnsn sljit_sw no_states; 59 1.1 alnsn /* Total size. */ 60 1.1.1.2 alnsn sljit_sw size; 61 1.1 alnsn 62 1.1 alnsn union { 63 1.1 alnsn void *init_match; 64 1.1.1.2 alnsn sljit_sw (SLJIT_CALL *call_init)(void *next, void* match); 65 1.1 alnsn } u; 66 1.1 alnsn #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) 67 1.1 alnsn struct sljit_function_context context; 68 1.1 alnsn #endif 69 1.1 alnsn 70 1.1 alnsn void *continue_match; 71 1.1 alnsn 72 1.1 alnsn /* Variable sized array to contain the handler addresses. */ 73 1.1 alnsn sljit_uw entry_addrs[1]; 74 1.1 alnsn }; 75 1.1 alnsn 76 1.1 alnsn struct regex_match 77 1.1 alnsn { 78 1.1 alnsn /* Current and next state array. */ 79 1.1.1.2 alnsn sljit_sw *current; 80 1.1.1.2 alnsn sljit_sw *next; 81 1.1 alnsn /* Starting. */ 82 1.1.1.2 alnsn sljit_sw head; 83 1.1 alnsn /* String character index (ever increasing). */ 84 1.1.1.2 alnsn sljit_sw index; 85 1.1 alnsn /* Best match found so far (members in priority order). */ 86 1.1.1.2 alnsn sljit_sw best_begin; 87 1.1.1.2 alnsn sljit_sw best_end; 88 1.1.1.2 alnsn sljit_sw best_id; 89 1.1 alnsn /* Bool flags (encoded as word). */ 90 1.1.1.2 alnsn sljit_sw fast_quit; 91 1.1.1.2 alnsn sljit_sw fast_forward; 92 1.1 alnsn /* Machine. */ 93 1.1 alnsn struct regex_machine *machine; 94 1.1 alnsn 95 1.1 alnsn union { 96 1.1 alnsn void *continue_match; 97 1.1 alnsn void (SLJIT_CALL *call_continue)(struct regex_match *match, const regex_char_t *input_string, int length); 98 1.1 alnsn } u; 99 1.1 alnsn 100 1.1 alnsn /* Variable sized array to contain the state arrays. */ 101 1.1.1.2 alnsn sljit_sw states[1]; 102 1.1 alnsn }; 103 1.1 alnsn 104 1.1 alnsn /* State vector 105 1.1 alnsn ITEM[0] - pointer to the address inside the machine code 106 1.1 alnsn ITEM[1] - next pointer 107 1.1 alnsn ITEM[2] - string started from (optional) 108 1.1 alnsn ITEM[3] - max ID (optional) */ 109 1.1 alnsn 110 1.1 alnsn /* Register allocation. */ 111 1.1 alnsn /* Current state array (loaded & stored: regex_match->current). */ 112 1.1.1.3 alnsn #define R_CURR_STATE SLJIT_S0 113 1.1 alnsn /* Next state array (loaded & stored: regex_match->next). */ 114 1.1.1.3 alnsn #define R_NEXT_STATE SLJIT_S1 115 1.1 alnsn /* Head (loaded & stored: regex_match->head). */ 116 1.1.1.3 alnsn #define R_NEXT_HEAD SLJIT_S2 117 1.1 alnsn /* String fragment pointer. */ 118 1.1.1.3 alnsn #define R_STRING SLJIT_S3 119 1.1 alnsn /* String fragment length. */ 120 1.1.1.3 alnsn #define R_LENGTH SLJIT_S4 121 1.1 alnsn /* 'struct regex_match*' */ 122 1.1.1.3 alnsn #define R_REGEX_MATCH SLJIT_R0 123 1.1 alnsn /* Current character. */ 124 1.1.1.3 alnsn #define R_CURR_CHAR SLJIT_R1 125 1.1 alnsn /* Temporary register. */ 126 1.1.1.3 alnsn #define R_TEMP SLJIT_R2 127 1.1 alnsn /* Caches the regex_match->best_begin. */ 128 1.1.1.3 alnsn #define R_BEST_BEGIN SLJIT_R3 129 1.1 alnsn /* Current character index. */ 130 1.1.1.3 alnsn #define R_CURR_INDEX SLJIT_R4 131 1.1 alnsn 132 1.1 alnsn /* --------------------------------------------------------------------- */ 133 1.1 alnsn /* Stack management */ 134 1.1 alnsn /* --------------------------------------------------------------------- */ 135 1.1 alnsn 136 1.1 alnsn /* Try to allocate 2^n blocks. */ 137 1.1 alnsn #define STACK_FRAGMENT_SIZE (((64 * sizeof(struct stack_item)) - (sizeof(struct stack_fragment_data))) / (sizeof(struct stack_item))) 138 1.1 alnsn 139 1.1 alnsn struct stack_item { 140 1.1 alnsn int type; 141 1.1 alnsn int value; 142 1.1 alnsn }; 143 1.1 alnsn 144 1.1 alnsn struct stack_fragment_data { 145 1.1 alnsn struct stack_fragment *next; 146 1.1 alnsn struct stack_fragment *prev; 147 1.1 alnsn }; 148 1.1 alnsn 149 1.1 alnsn struct stack_fragment { 150 1.1 alnsn struct stack_fragment_data data; 151 1.1 alnsn struct stack_item items[STACK_FRAGMENT_SIZE]; 152 1.1 alnsn }; 153 1.1 alnsn 154 1.1 alnsn struct stack { 155 1.1 alnsn struct stack_fragment *first; 156 1.1 alnsn struct stack_fragment *last; 157 1.1 alnsn int index; 158 1.1 alnsn int count; 159 1.1 alnsn }; 160 1.1 alnsn 161 1.1 alnsn #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 162 1.1 alnsn 163 1.1 alnsn static void stack_check(struct stack *stack) 164 1.1 alnsn { 165 1.1 alnsn struct stack_fragment *curr; 166 1.1 alnsn int found; 167 1.1 alnsn 168 1.1 alnsn if (!stack) 169 1.1 alnsn return; 170 1.1 alnsn 171 1.1 alnsn SLJIT_ASSERT(stack->index >= 0 && stack->index < STACK_FRAGMENT_SIZE); 172 1.1 alnsn 173 1.1 alnsn if (stack->first == NULL) { 174 1.1 alnsn SLJIT_ASSERT(stack->first == NULL && stack->last == NULL); 175 1.1 alnsn SLJIT_ASSERT(stack->index == STACK_FRAGMENT_SIZE - 1 && stack->count == 0); 176 1.1 alnsn return; 177 1.1 alnsn } 178 1.1 alnsn 179 1.1 alnsn found = 0; 180 1.1 alnsn if (stack->last == NULL) { 181 1.1 alnsn SLJIT_ASSERT(stack->index == STACK_FRAGMENT_SIZE - 1 && stack->count == 0); 182 1.1 alnsn found = 1; 183 1.1 alnsn } 184 1.1 alnsn else 185 1.1 alnsn SLJIT_ASSERT(stack->index >= 0 && stack->count >= 0); 186 1.1 alnsn 187 1.1 alnsn SLJIT_ASSERT(stack->first->data.prev == NULL); 188 1.1 alnsn curr = stack->first; 189 1.1 alnsn while (curr) { 190 1.1 alnsn if (curr == stack->last) 191 1.1 alnsn found = 1; 192 1.1 alnsn if (curr->data.next) 193 1.1 alnsn SLJIT_ASSERT(curr->data.next->data.prev == curr); 194 1.1 alnsn curr = curr->data.next; 195 1.1 alnsn } 196 1.1 alnsn SLJIT_ASSERT(found); 197 1.1 alnsn } 198 1.1 alnsn 199 1.1 alnsn #endif 200 1.1 alnsn 201 1.1 alnsn static void stack_init(struct stack *stack) 202 1.1 alnsn { 203 1.1 alnsn stack->first = NULL; 204 1.1 alnsn stack->last = NULL; 205 1.1 alnsn stack->index = STACK_FRAGMENT_SIZE - 1; 206 1.1 alnsn stack->count = 0; 207 1.1 alnsn } 208 1.1 alnsn 209 1.1 alnsn static void stack_destroy(struct stack *stack) 210 1.1 alnsn { 211 1.1 alnsn struct stack_fragment *curr = stack->first; 212 1.1 alnsn struct stack_fragment *prev; 213 1.1 alnsn 214 1.1 alnsn #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 215 1.1 alnsn stack_check(stack); 216 1.1 alnsn #endif 217 1.1 alnsn 218 1.1 alnsn while (curr) { 219 1.1 alnsn prev = curr; 220 1.1 alnsn curr = curr->data.next; 221 1.1.1.3 alnsn SLJIT_FREE(prev, NULL); 222 1.1 alnsn } 223 1.1 alnsn } 224 1.1 alnsn 225 1.1 alnsn static SLJIT_INLINE struct stack_item* stack_top(struct stack *stack) 226 1.1 alnsn { 227 1.1 alnsn SLJIT_ASSERT(stack->last); 228 1.1 alnsn return stack->last->items + stack->index; 229 1.1 alnsn } 230 1.1 alnsn 231 1.1 alnsn static int stack_push(struct stack *stack, int type, int value) 232 1.1 alnsn { 233 1.1 alnsn if (stack->last) { 234 1.1 alnsn stack->index++; 235 1.1 alnsn if (stack->index >= STACK_FRAGMENT_SIZE) { 236 1.1 alnsn stack->index = 0; 237 1.1 alnsn if (!stack->last->data.next) { 238 1.1.1.3 alnsn stack->last->data.next = (struct stack_fragment*)SLJIT_MALLOC(sizeof(struct stack_fragment), NULL); 239 1.1 alnsn if (!stack->last->data.next) 240 1.1 alnsn return 1; 241 1.1 alnsn stack->last->data.next->data.next = NULL; 242 1.1 alnsn stack->last->data.next->data.prev = stack->last; 243 1.1 alnsn } 244 1.1 alnsn stack->last = stack->last->data.next; 245 1.1 alnsn } 246 1.1 alnsn } 247 1.1 alnsn else if (!stack->first) { 248 1.1.1.3 alnsn stack->last = (struct stack_fragment*)SLJIT_MALLOC(sizeof(struct stack_fragment), NULL); 249 1.1 alnsn if (!stack->last) 250 1.1 alnsn return 1; 251 1.1 alnsn stack->last->data.prev = NULL; 252 1.1 alnsn stack->last->data.next = NULL; 253 1.1 alnsn stack->first = stack->last; 254 1.1 alnsn stack->index = 0; 255 1.1 alnsn } 256 1.1 alnsn else { 257 1.1 alnsn stack->last = stack->first; 258 1.1 alnsn stack->index = 0; 259 1.1 alnsn } 260 1.1 alnsn stack->last->items[stack->index].type = type; 261 1.1 alnsn stack->last->items[stack->index].value = value; 262 1.1 alnsn stack->count++; 263 1.1 alnsn #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 264 1.1 alnsn stack_check(stack); 265 1.1 alnsn #endif 266 1.1 alnsn return 0; 267 1.1 alnsn } 268 1.1 alnsn 269 1.1 alnsn static struct stack_item* stack_pop(struct stack *stack) 270 1.1 alnsn { 271 1.1 alnsn struct stack_item *ret = stack_top(stack); 272 1.1 alnsn 273 1.1 alnsn if (stack->index > 0) 274 1.1 alnsn stack->index--; 275 1.1 alnsn else { 276 1.1 alnsn stack->last = stack->last->data.prev; 277 1.1 alnsn stack->index = STACK_FRAGMENT_SIZE - 1; 278 1.1 alnsn } 279 1.1 alnsn 280 1.1 alnsn stack->count--; 281 1.1 alnsn #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 282 1.1 alnsn stack_check(stack); 283 1.1 alnsn #endif 284 1.1 alnsn return ret; 285 1.1 alnsn } 286 1.1 alnsn 287 1.1 alnsn static SLJIT_INLINE void stack_clone(struct stack *src, struct stack *dst) 288 1.1 alnsn { 289 1.1 alnsn *dst = *src; 290 1.1 alnsn } 291 1.1 alnsn 292 1.1 alnsn static int stack_push_copy(struct stack *stack, int items, int length) 293 1.1 alnsn { 294 1.1 alnsn struct stack_fragment *frag1; 295 1.1 alnsn int ind1; 296 1.1 alnsn struct stack_fragment *frag2; 297 1.1 alnsn int ind2; 298 1.1 alnsn int counter; 299 1.1 alnsn 300 1.1 alnsn SLJIT_ASSERT(stack->count >= length && items <= length && items > 0); 301 1.1 alnsn 302 1.1 alnsn /* Allocate the necessary elements. */ 303 1.1 alnsn counter = items; 304 1.1 alnsn frag1 = stack->last; 305 1.1 alnsn ind1 = stack->index; 306 1.1 alnsn while (counter > 0) { 307 1.1 alnsn if (stack->index + counter >= STACK_FRAGMENT_SIZE) { 308 1.1 alnsn counter -= STACK_FRAGMENT_SIZE - stack->index - 1 + 1; 309 1.1 alnsn stack->index = 0; 310 1.1 alnsn if (!stack->last->data.next) { 311 1.1.1.3 alnsn stack->last->data.next = (struct stack_fragment*)SLJIT_MALLOC(sizeof(struct stack_fragment), NULL); 312 1.1 alnsn if (!stack->last->data.next) 313 1.1 alnsn return 1; 314 1.1 alnsn stack->last->data.next->data.next = NULL; 315 1.1 alnsn stack->last->data.next->data.prev = stack->last; 316 1.1 alnsn } 317 1.1 alnsn stack->last = stack->last->data.next; 318 1.1 alnsn } 319 1.1 alnsn else { 320 1.1 alnsn stack->index += counter; 321 1.1 alnsn counter = 0; 322 1.1 alnsn } 323 1.1 alnsn } 324 1.1 alnsn 325 1.1 alnsn frag2 = stack->last; 326 1.1 alnsn ind2 = stack->index; 327 1.1 alnsn while (length > 0) { 328 1.1 alnsn frag2->items[ind2--] = frag1->items[ind1--]; 329 1.1 alnsn if (ind1 < 0) { 330 1.1 alnsn ind1 = STACK_FRAGMENT_SIZE - 1; 331 1.1 alnsn frag1 = frag1->data.prev; 332 1.1 alnsn } 333 1.1 alnsn if (ind2 < 0) { 334 1.1 alnsn ind2 = STACK_FRAGMENT_SIZE - 1; 335 1.1 alnsn frag2 = frag2->data.prev; 336 1.1 alnsn } 337 1.1 alnsn length--; 338 1.1 alnsn } 339 1.1 alnsn 340 1.1 alnsn #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 341 1.1 alnsn stack_check(stack); 342 1.1 alnsn #endif 343 1.1 alnsn stack->count += items; 344 1.1 alnsn return 0; 345 1.1 alnsn } 346 1.1 alnsn 347 1.1 alnsn /* --------------------------------------------------------------------- */ 348 1.1 alnsn /* Parser */ 349 1.1 alnsn /* --------------------------------------------------------------------- */ 350 1.1 alnsn 351 1.1 alnsn enum { 352 1.1 alnsn /* Common. */ 353 1.1 alnsn type_begin, 354 1.1 alnsn type_end, 355 1.1 alnsn type_char, 356 1.1 alnsn type_newline, 357 1.1 alnsn type_id, 358 1.1 alnsn type_rng_start, 359 1.1 alnsn type_rng_end, 360 1.1 alnsn type_rng_char, 361 1.1 alnsn type_rng_left, 362 1.1 alnsn type_rng_right, 363 1.1 alnsn 364 1.1 alnsn /* generator only. */ 365 1.1 alnsn type_branch, 366 1.1 alnsn type_jump, 367 1.1 alnsn 368 1.1 alnsn /* Parser only. */ 369 1.1 alnsn type_open_br, 370 1.1 alnsn type_close_br, 371 1.1 alnsn type_select, 372 1.1 alnsn type_asterisk, 373 1.1 alnsn type_plus_sign, 374 1.1 alnsn type_qestion_mark 375 1.1 alnsn }; 376 1.1 alnsn 377 1.1 alnsn struct compiler_common { 378 1.1 alnsn /* Temporary stacks. */ 379 1.1 alnsn struct stack stack; 380 1.1 alnsn struct stack depth; 381 1.1 alnsn /* REGEX_ flags. */ 382 1.1 alnsn int flags; 383 1.1 alnsn /* Encoded size of the dfa representation. */ 384 1.1.1.2 alnsn sljit_sw dfa_size; 385 1.1 alnsn /* Number of terms. */ 386 1.1.1.2 alnsn sljit_sw terms_size; 387 1.1 alnsn /* Number of state descriptors for one term (same as machine->no_states). */ 388 1.1.1.2 alnsn sljit_sw no_states; 389 1.1 alnsn /* Number of type_rng_(char|left)-s in the longest character range. */ 390 1.1.1.2 alnsn sljit_sw longest_range_size; 391 1.1 alnsn 392 1.1 alnsn /* DFA linear representation (size: dfa_size). */ 393 1.1 alnsn struct stack_item *dfa_transitions; 394 1.1 alnsn /* Term id and search state pairs (size: dfa_size). */ 395 1.1 alnsn struct stack_item *search_states; 396 1.1 alnsn 397 1.1 alnsn /* sljit compiler */ 398 1.1 alnsn struct sljit_compiler *compiler; 399 1.1 alnsn /* Machine data, which must be kept for later use. */ 400 1.1 alnsn struct regex_machine *machine; 401 1.1 alnsn /* Temporary space for jumps (size: longest_range_size). */ 402 1.1 alnsn struct sljit_jump **range_jump_list; 403 1.1 alnsn }; 404 1.1 alnsn 405 1.1 alnsn static const regex_char_t* decode_number(const regex_char_t *regex_string, int length, int *result) 406 1.1 alnsn { 407 1.1 alnsn int value = 0; 408 1.1 alnsn 409 1.1 alnsn SLJIT_ASSERT(length > 0); 410 1.1 alnsn if (*regex_string < '0' || *regex_string > '9') { 411 1.1 alnsn *result = -1; 412 1.1 alnsn return regex_string; 413 1.1 alnsn } 414 1.1 alnsn 415 1.1 alnsn while (length > 0 && *regex_string >= '0' && *regex_string <= '9') { 416 1.1 alnsn value = value * 10 + (*regex_string - '0'); 417 1.1 alnsn length--; 418 1.1 alnsn regex_string++; 419 1.1 alnsn } 420 1.1 alnsn 421 1.1 alnsn *result = value; 422 1.1 alnsn return regex_string; 423 1.1 alnsn } 424 1.1 alnsn 425 1.1 alnsn static int iterate(struct stack *stack, int min, int max) 426 1.1 alnsn { 427 1.1 alnsn struct stack it; 428 1.1 alnsn struct stack_item *item; 429 1.1 alnsn int count = -1; 430 1.1 alnsn int len = 0; 431 1.1 alnsn int depth = 0; 432 1.1 alnsn 433 1.1 alnsn stack_clone(stack, &it); 434 1.1 alnsn 435 1.1 alnsn /* Calculate size. */ 436 1.1 alnsn while (count < 0) { 437 1.1 alnsn item = stack_pop(&it); 438 1.1 alnsn switch (item->type) { 439 1.1 alnsn case type_id: 440 1.1 alnsn case type_rng_end: 441 1.1 alnsn case type_rng_char: 442 1.1 alnsn case type_rng_left: 443 1.1 alnsn case type_rng_right: 444 1.1 alnsn case type_plus_sign: 445 1.1 alnsn case type_qestion_mark: 446 1.1 alnsn len++; 447 1.1 alnsn break; 448 1.1 alnsn 449 1.1 alnsn case type_asterisk: 450 1.1 alnsn len += 2; 451 1.1 alnsn break; 452 1.1 alnsn 453 1.1 alnsn case type_close_br: 454 1.1 alnsn depth++; 455 1.1 alnsn break; 456 1.1 alnsn 457 1.1 alnsn case type_open_br: 458 1.1 alnsn SLJIT_ASSERT(depth > 0); 459 1.1 alnsn depth--; 460 1.1 alnsn if (depth == 0) 461 1.1 alnsn count = it.count; 462 1.1 alnsn break; 463 1.1 alnsn 464 1.1 alnsn case type_select: 465 1.1 alnsn SLJIT_ASSERT(depth > 0); 466 1.1 alnsn len += 2; 467 1.1 alnsn break; 468 1.1 alnsn 469 1.1 alnsn default: 470 1.1 alnsn SLJIT_ASSERT(item->type != type_begin && item->type != type_end); 471 1.1 alnsn if (depth == 0) 472 1.1 alnsn count = it.count; 473 1.1 alnsn len++; 474 1.1 alnsn break; 475 1.1 alnsn } 476 1.1 alnsn } 477 1.1 alnsn 478 1.1 alnsn if (min == 0 && max == 0) { 479 1.1 alnsn /* {0,0} case, not {0,} case: delete subtree. */ 480 1.1 alnsn stack_clone(&it, stack); 481 1.1 alnsn /* and put an empty bracket expression instead of it. */ 482 1.1 alnsn if (stack_push(stack, type_open_br, 0)) 483 1.1 alnsn return REGEX_MEMORY_ERROR; 484 1.1 alnsn if (stack_push(stack, type_close_br, 0)) 485 1.1 alnsn return REGEX_MEMORY_ERROR; 486 1.1 alnsn return len; 487 1.1 alnsn } 488 1.1 alnsn 489 1.1 alnsn count = stack->count - count; 490 1.1 alnsn 491 1.1 alnsn /* Put an open bracket before the sequence. */ 492 1.1 alnsn if (stack_push_copy(stack, 1, count)) 493 1.1 alnsn return -1; 494 1.1 alnsn 495 1.1 alnsn #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 496 1.1 alnsn SLJIT_ASSERT(stack_push(&it, type_open_br, 0) == 0); 497 1.1 alnsn #else 498 1.1 alnsn stack_push(&it, type_open_br, 0); 499 1.1 alnsn #endif 500 1.1 alnsn 501 1.1 alnsn /* Copy the data. */ 502 1.1 alnsn if (max > 0) { 503 1.1 alnsn len = len * (max - 1); 504 1.1 alnsn max -= min; 505 1.1 alnsn /* Insert ? operators. */ 506 1.1 alnsn len += max; 507 1.1 alnsn 508 1.1 alnsn if (min > 0) { 509 1.1 alnsn min--; 510 1.1 alnsn while (min > 0) { 511 1.1 alnsn if (stack_push_copy(stack, count, count)) 512 1.1 alnsn return -1; 513 1.1 alnsn min--; 514 1.1 alnsn } 515 1.1 alnsn if (max > 0) { 516 1.1 alnsn if (stack_push_copy(stack, count, count)) 517 1.1 alnsn return -1; 518 1.1 alnsn if (stack_push(stack, type_qestion_mark, 0)) 519 1.1 alnsn return REGEX_MEMORY_ERROR; 520 1.1 alnsn count++; 521 1.1 alnsn max--; 522 1.1 alnsn } 523 1.1 alnsn } 524 1.1 alnsn else { 525 1.1 alnsn SLJIT_ASSERT(max > 0); 526 1.1 alnsn max--; 527 1.1 alnsn count++; 528 1.1 alnsn if (stack_push(stack, type_qestion_mark, 0)) 529 1.1 alnsn return REGEX_MEMORY_ERROR; 530 1.1 alnsn } 531 1.1 alnsn 532 1.1 alnsn while (max > 0) { 533 1.1 alnsn if (stack_push_copy(stack, count, count)) 534 1.1 alnsn return -1; 535 1.1 alnsn max--; 536 1.1 alnsn } 537 1.1 alnsn } 538 1.1 alnsn else { 539 1.1 alnsn SLJIT_ASSERT(min > 0); 540 1.1 alnsn min--; 541 1.1 alnsn /* Insert + operator. */ 542 1.1 alnsn len = len * min + 1; 543 1.1 alnsn while (min > 0) { 544 1.1 alnsn if (stack_push_copy(stack, count, count)) 545 1.1 alnsn return -1; 546 1.1 alnsn min--; 547 1.1 alnsn } 548 1.1 alnsn 549 1.1 alnsn if (stack_push(stack, type_plus_sign, 0)) 550 1.1 alnsn return REGEX_MEMORY_ERROR; 551 1.1 alnsn } 552 1.1 alnsn 553 1.1 alnsn /* Close the opened bracket. */ 554 1.1 alnsn if (stack_push(stack, type_close_br, 0)) 555 1.1 alnsn return REGEX_MEMORY_ERROR; 556 1.1 alnsn 557 1.1 alnsn return len; 558 1.1 alnsn } 559 1.1 alnsn 560 1.1.1.2 alnsn static int parse_iterator(const regex_char_t *regex_string, int length, struct stack *stack, sljit_sw *dfa_size, int begin) 561 1.1 alnsn { 562 1.1 alnsn /* We only know that *regex_string == { . */ 563 1.1 alnsn int val1, val2; 564 1.1 alnsn const regex_char_t *base_from = regex_string; 565 1.1 alnsn const regex_char_t *from; 566 1.1 alnsn 567 1.1 alnsn length--; 568 1.1 alnsn regex_string++; 569 1.1 alnsn 570 1.1 alnsn /* Decode left value. */ 571 1.1 alnsn val2 = -1; 572 1.1 alnsn if (length == 0) 573 1.1 alnsn return -2; 574 1.1 alnsn if (*regex_string == ',') { 575 1.1 alnsn val1 = 0; 576 1.1 alnsn length--; 577 1.1 alnsn regex_string++; 578 1.1 alnsn } 579 1.1 alnsn else { 580 1.1 alnsn from = regex_string; 581 1.1 alnsn regex_string = decode_number(regex_string, length, &val1); 582 1.1 alnsn if (val1 < 0) 583 1.1 alnsn return -2; 584 1.1 alnsn length -= regex_string - from; 585 1.1 alnsn 586 1.1 alnsn if (length == 0) 587 1.1 alnsn return -2; 588 1.1 alnsn if (*regex_string == '}') { 589 1.1 alnsn val2 = val1; 590 1.1 alnsn if (val1 == 0) 591 1.1 alnsn val1 = -1; 592 1.1 alnsn } 593 1.1 alnsn else if (length >= 2 && *regex_string == '!' && regex_string[1] == '}') { 594 1.1 alnsn /* Non posix extension. */ 595 1.1 alnsn if (stack_push(stack, type_id, val1)) 596 1.1 alnsn return -1; 597 1.1 alnsn (*dfa_size)++; 598 1.1 alnsn return (regex_string - base_from) + 1; 599 1.1 alnsn } 600 1.1 alnsn else { 601 1.1 alnsn if (*regex_string != ',') 602 1.1 alnsn return -2; 603 1.1 alnsn length--; 604 1.1 alnsn regex_string++; 605 1.1 alnsn } 606 1.1 alnsn } 607 1.1 alnsn 608 1.1 alnsn if (begin) 609 1.1 alnsn return -2; 610 1.1 alnsn 611 1.1 alnsn /* Decode right value. */ 612 1.1 alnsn if (val2 == -1) { 613 1.1 alnsn if (length == 0) 614 1.1 alnsn return -2; 615 1.1 alnsn if (*regex_string == '}') 616 1.1 alnsn val2 = 0; 617 1.1 alnsn else { 618 1.1 alnsn from = regex_string; 619 1.1 alnsn regex_string = decode_number(regex_string, length, &val2); 620 1.1 alnsn length -= regex_string - from; 621 1.1 alnsn if (val2 < 0 || length == 0 || *regex_string != '}' || val2 < val1) 622 1.1 alnsn return -2; 623 1.1 alnsn if (val2 == 0) { 624 1.1 alnsn SLJIT_ASSERT(val1 == 0); 625 1.1 alnsn val1 = -1; 626 1.1 alnsn } 627 1.1 alnsn } 628 1.1 alnsn } 629 1.1 alnsn 630 1.1 alnsn /* Fast cases. */ 631 1.1 alnsn if (val1 > 1 || val2 > 1) { 632 1.1 alnsn val1 = iterate(stack, val1, val2); 633 1.1 alnsn if (val1 < 0) 634 1.1 alnsn return -1; 635 1.1 alnsn *dfa_size += val1; 636 1.1 alnsn } 637 1.1 alnsn else if (val1 == 0 && val2 == 0) { 638 1.1 alnsn if (stack_push(stack, type_asterisk, 0)) 639 1.1 alnsn return -1; 640 1.1 alnsn *dfa_size += 2; 641 1.1 alnsn } 642 1.1 alnsn else if (val1 == 1 && val2 == 0) { 643 1.1 alnsn if (stack_push(stack, type_plus_sign, 0)) 644 1.1 alnsn return -1; 645 1.1 alnsn (*dfa_size)++; 646 1.1 alnsn } 647 1.1 alnsn else if (val1 == 0 && val2 == 1) { 648 1.1 alnsn if (stack_push(stack, type_qestion_mark, 0)) 649 1.1 alnsn return -1; 650 1.1 alnsn (*dfa_size)++; 651 1.1 alnsn } 652 1.1 alnsn else if (val1 == -1) { 653 1.1 alnsn val1 = iterate(stack, 0, 0); 654 1.1 alnsn if (val1 < 0) 655 1.1 alnsn return -1; 656 1.1 alnsn *dfa_size -= val1; 657 1.1 alnsn SLJIT_ASSERT(*dfa_size >= 2); 658 1.1 alnsn } 659 1.1 alnsn else { 660 1.1 alnsn /* Ignore. */ 661 1.1 alnsn SLJIT_ASSERT(val1 == 1 && val2 == 1); 662 1.1 alnsn } 663 1.1 alnsn return regex_string - base_from; 664 1.1 alnsn } 665 1.1 alnsn 666 1.1 alnsn static int parse_char_range(const regex_char_t *regex_string, int length, struct compiler_common *compiler_common) 667 1.1 alnsn { 668 1.1 alnsn struct stack* stack = &compiler_common->stack; 669 1.1 alnsn const regex_char_t *base_from = regex_string; 670 1.1 alnsn regex_char_t left_char, right_char, tmp_char; 671 1.1 alnsn 672 1.1 alnsn length--; 673 1.1 alnsn regex_string++; 674 1.1 alnsn 675 1.1 alnsn if (length == 0) 676 1.1 alnsn return -2; 677 1.1 alnsn 678 1.1 alnsn if (*regex_string != '^') { 679 1.1 alnsn if (stack_push(stack, type_rng_start, 0)) 680 1.1 alnsn return -1; 681 1.1 alnsn } 682 1.1 alnsn else { 683 1.1 alnsn length--; 684 1.1 alnsn regex_string++; 685 1.1 alnsn 686 1.1 alnsn if (length == 0) 687 1.1 alnsn return -2; 688 1.1 alnsn 689 1.1 alnsn if (stack_push(stack, type_rng_start, 1)) 690 1.1 alnsn return -1; 691 1.1 alnsn } 692 1.1 alnsn /* For both the type_rng_start & type_rng_end. */ 693 1.1 alnsn compiler_common->dfa_size += 2; 694 1.1 alnsn 695 1.1 alnsn /* Range must be at least 1 character. */ 696 1.1 alnsn if (*regex_string == ']') { 697 1.1 alnsn length--; 698 1.1 alnsn regex_string++; 699 1.1 alnsn if (stack_push(stack, type_rng_char, ']')) 700 1.1 alnsn return -1; 701 1.1 alnsn compiler_common->dfa_size++; 702 1.1 alnsn } 703 1.1 alnsn 704 1.1 alnsn while (1) { 705 1.1 alnsn if (length == 0) 706 1.1 alnsn return -2; 707 1.1 alnsn 708 1.1 alnsn if (*regex_string == ']') 709 1.1 alnsn break; 710 1.1 alnsn 711 1.1 alnsn if (*regex_string != '\\') 712 1.1 alnsn left_char = *regex_string; 713 1.1 alnsn else { 714 1.1 alnsn regex_string++; 715 1.1 alnsn length--; 716 1.1 alnsn if (length == 0) 717 1.1 alnsn return -2; 718 1.1 alnsn left_char = *regex_string; 719 1.1 alnsn } 720 1.1 alnsn regex_string++; 721 1.1 alnsn length--; 722 1.1 alnsn 723 1.1 alnsn /* Is a range here? */ 724 1.1 alnsn if (length >= 3 && *regex_string == '-' && *(regex_string + 1) != ']') { 725 1.1 alnsn regex_string++; 726 1.1 alnsn length--; 727 1.1 alnsn 728 1.1 alnsn if (*regex_string != '\\') 729 1.1 alnsn right_char = *regex_string; 730 1.1 alnsn else { 731 1.1 alnsn regex_string++; 732 1.1 alnsn length--; 733 1.1 alnsn if (length == 0) 734 1.1 alnsn return -2; 735 1.1 alnsn right_char = *regex_string; 736 1.1 alnsn } 737 1.1 alnsn regex_string++; 738 1.1 alnsn length--; 739 1.1 alnsn 740 1.1 alnsn if (left_char > right_char) { 741 1.1 alnsn /* Swap if necessary. */ 742 1.1 alnsn tmp_char = left_char; 743 1.1 alnsn left_char = right_char; 744 1.1 alnsn right_char = tmp_char; 745 1.1 alnsn } 746 1.1 alnsn 747 1.1 alnsn if (stack_push(stack, type_rng_left, left_char)) 748 1.1 alnsn return -1; 749 1.1 alnsn if (stack_push(stack, type_rng_right, right_char)) 750 1.1 alnsn return -1; 751 1.1 alnsn compiler_common->dfa_size += 2; 752 1.1 alnsn } 753 1.1 alnsn else { 754 1.1 alnsn if (stack_push(stack, type_rng_char, left_char)) 755 1.1 alnsn return -1; 756 1.1 alnsn compiler_common->dfa_size++; 757 1.1 alnsn } 758 1.1 alnsn } 759 1.1 alnsn 760 1.1 alnsn if (stack_push(stack, type_rng_end, 0)) 761 1.1 alnsn return -1; 762 1.1 alnsn return regex_string - base_from; 763 1.1 alnsn } 764 1.1 alnsn 765 1.1 alnsn static int parse(const regex_char_t *regex_string, int length, struct compiler_common *compiler_common) 766 1.1 alnsn { 767 1.1 alnsn /* Depth of bracketed expressions. */ 768 1.1 alnsn int depth = 0; 769 1.1 alnsn /* Have we already found a term? '1' if not yet. */ 770 1.1 alnsn int begin = 1; 771 1.1 alnsn /* Cache stack pointer. */ 772 1.1 alnsn struct stack* stack = &compiler_common->stack; 773 1.1 alnsn int tmp; 774 1.1 alnsn 775 1.1 alnsn /* Type_begin and type_end. */ 776 1.1 alnsn compiler_common->dfa_size = 2; 777 1.1 alnsn stack_init(stack); 778 1.1 alnsn if (stack_push(stack, type_begin, 0)) 779 1.1 alnsn return REGEX_MEMORY_ERROR; 780 1.1 alnsn 781 1.1 alnsn if (length > 0 && *regex_string == '^') { 782 1.1 alnsn compiler_common->flags |= REGEX_MATCH_BEGIN; 783 1.1 alnsn length--; 784 1.1 alnsn regex_string++; 785 1.1 alnsn } 786 1.1 alnsn 787 1.1 alnsn if ((compiler_common->flags & (REGEX_MATCH_BEGIN | REGEX_NEWLINE)) == (REGEX_MATCH_BEGIN | REGEX_NEWLINE)) { 788 1.1 alnsn /* Replace REGEX_MATCH_BEGIN flag to REGEX_FAKE_MATCH_BEGIN */ 789 1.1 alnsn compiler_common->flags &= ~REGEX_MATCH_BEGIN; 790 1.1 alnsn compiler_common->flags |= REGEX_FAKE_MATCH_BEGIN; 791 1.1 alnsn /* and append a new-line search. */ 792 1.1 alnsn if (stack_push(stack, type_newline, 0)) 793 1.1 alnsn return REGEX_MEMORY_ERROR; 794 1.1 alnsn compiler_common->dfa_size++; 795 1.1 alnsn /* Begin intentionally kept as 1. */ 796 1.1 alnsn } 797 1.1 alnsn 798 1.1 alnsn while (length > 0) { 799 1.1 alnsn switch (*regex_string) { 800 1.1 alnsn case '\\' : 801 1.1 alnsn length--; 802 1.1 alnsn regex_string++; 803 1.1 alnsn if (length == 0) 804 1.1 alnsn return REGEX_INVALID_REGEX; 805 1.1 alnsn if (stack_push(stack, type_char, *regex_string)) 806 1.1 alnsn return REGEX_MEMORY_ERROR; 807 1.1 alnsn begin = 0; 808 1.1 alnsn compiler_common->dfa_size++; 809 1.1 alnsn break; 810 1.1 alnsn 811 1.1 alnsn case '.' : 812 1.1 alnsn if (stack_push(stack, type_rng_start, 1)) 813 1.1 alnsn return REGEX_MEMORY_ERROR; 814 1.1 alnsn if (compiler_common->flags & REGEX_NEWLINE) { 815 1.1 alnsn if (stack_push(stack, type_rng_char, '\n')) 816 1.1 alnsn return REGEX_MEMORY_ERROR; 817 1.1 alnsn if (stack_push(stack, type_rng_char, '\r')) 818 1.1 alnsn return REGEX_MEMORY_ERROR; 819 1.1 alnsn compiler_common->dfa_size += 2; 820 1.1 alnsn } 821 1.1 alnsn if (stack_push(stack, type_rng_end, 1)) 822 1.1 alnsn return REGEX_MEMORY_ERROR; 823 1.1 alnsn begin = 0; 824 1.1 alnsn compiler_common->dfa_size += 2; 825 1.1 alnsn break; 826 1.1 alnsn 827 1.1 alnsn case '(' : 828 1.1 alnsn depth++; 829 1.1 alnsn if (stack_push(stack, type_open_br, 0)) 830 1.1 alnsn return REGEX_MEMORY_ERROR; 831 1.1 alnsn begin = 1; 832 1.1 alnsn break; 833 1.1 alnsn 834 1.1 alnsn case ')' : 835 1.1 alnsn if (depth == 0) 836 1.1 alnsn return REGEX_INVALID_REGEX; 837 1.1 alnsn depth--; 838 1.1 alnsn if (stack_push(stack, type_close_br, 0)) 839 1.1 alnsn return REGEX_MEMORY_ERROR; 840 1.1 alnsn begin = 0; 841 1.1 alnsn break; 842 1.1 alnsn 843 1.1 alnsn case '|' : 844 1.1 alnsn if (stack_push(stack, type_select, 0)) 845 1.1 alnsn return REGEX_MEMORY_ERROR; 846 1.1 alnsn begin = 1; 847 1.1 alnsn compiler_common->dfa_size += 2; 848 1.1 alnsn break; 849 1.1 alnsn 850 1.1 alnsn case '*' : 851 1.1 alnsn if (begin) 852 1.1 alnsn return REGEX_INVALID_REGEX; 853 1.1 alnsn if (stack_push(stack, type_asterisk, 0)) 854 1.1 alnsn return REGEX_MEMORY_ERROR; 855 1.1 alnsn compiler_common->dfa_size += 2; 856 1.1 alnsn break; 857 1.1 alnsn 858 1.1 alnsn case '?' : 859 1.1 alnsn case '+' : 860 1.1 alnsn if (begin) 861 1.1 alnsn return REGEX_INVALID_REGEX; 862 1.1 alnsn if (stack_push(stack, (*regex_string == '+') ? type_plus_sign : type_qestion_mark, 0)) 863 1.1 alnsn return REGEX_MEMORY_ERROR; 864 1.1 alnsn compiler_common->dfa_size++; 865 1.1 alnsn break; 866 1.1 alnsn 867 1.1 alnsn case '{' : 868 1.1 alnsn tmp = parse_iterator(regex_string, length, stack, &compiler_common->dfa_size, begin); 869 1.1 alnsn 870 1.1 alnsn if (tmp >= 0) { 871 1.1 alnsn length -= tmp; 872 1.1 alnsn regex_string += tmp; 873 1.1 alnsn } 874 1.1 alnsn else if (tmp == -1) 875 1.1 alnsn return REGEX_MEMORY_ERROR; 876 1.1 alnsn else { 877 1.1 alnsn /* Not a valid range expression. */ 878 1.1 alnsn SLJIT_ASSERT(tmp == -2); 879 1.1 alnsn if (stack_push(stack, type_char, '{')) 880 1.1 alnsn return REGEX_MEMORY_ERROR; 881 1.1 alnsn compiler_common->dfa_size++; 882 1.1 alnsn } 883 1.1 alnsn break; 884 1.1 alnsn 885 1.1 alnsn case '[' : 886 1.1 alnsn tmp = parse_char_range(regex_string, length, compiler_common); 887 1.1 alnsn if (tmp >= 0) { 888 1.1 alnsn length -= tmp; 889 1.1 alnsn regex_string += tmp; 890 1.1 alnsn } 891 1.1 alnsn else if (tmp == -1) 892 1.1 alnsn return REGEX_MEMORY_ERROR; 893 1.1 alnsn else { 894 1.1 alnsn SLJIT_ASSERT(tmp == -2); 895 1.1 alnsn return REGEX_INVALID_REGEX; 896 1.1 alnsn } 897 1.1 alnsn begin = 0; 898 1.1 alnsn break; 899 1.1 alnsn 900 1.1 alnsn default: 901 1.1 alnsn if (length == 1 && *regex_string == '$') { 902 1.1 alnsn compiler_common->flags |= REGEX_MATCH_END; 903 1.1 alnsn break; 904 1.1 alnsn } 905 1.1 alnsn if (stack_push(stack, type_char, *regex_string)) 906 1.1 alnsn return REGEX_MEMORY_ERROR; 907 1.1 alnsn begin = 0; 908 1.1 alnsn compiler_common->dfa_size++; 909 1.1 alnsn break; 910 1.1 alnsn } 911 1.1 alnsn length--; 912 1.1 alnsn regex_string++; 913 1.1 alnsn } 914 1.1 alnsn 915 1.1 alnsn if (depth != 0) 916 1.1 alnsn return REGEX_INVALID_REGEX; 917 1.1 alnsn 918 1.1 alnsn if ((compiler_common->flags & (REGEX_MATCH_END | REGEX_NEWLINE)) == (REGEX_MATCH_END | REGEX_NEWLINE)) { 919 1.1 alnsn /* Replace REGEX_MATCH_END flag to REGEX_FAKE_MATCH_END */ 920 1.1 alnsn compiler_common->flags &= ~REGEX_MATCH_END; 921 1.1 alnsn compiler_common->flags |= REGEX_FAKE_MATCH_END; 922 1.1 alnsn /* and append a new-line search. */ 923 1.1 alnsn if (stack_push(stack, type_newline, 1)) 924 1.1 alnsn return REGEX_MEMORY_ERROR; 925 1.1 alnsn compiler_common->dfa_size++; 926 1.1 alnsn /* Begin intentionally kept as 1. */ 927 1.1 alnsn } 928 1.1 alnsn 929 1.1 alnsn if (stack_push(stack, type_end, 0)) 930 1.1 alnsn return REGEX_MEMORY_ERROR; 931 1.1 alnsn 932 1.1 alnsn return REGEX_NO_ERROR; 933 1.1 alnsn } 934 1.1 alnsn 935 1.1 alnsn /* --------------------------------------------------------------------- */ 936 1.1 alnsn /* Generating machine state transitions */ 937 1.1 alnsn /* --------------------------------------------------------------------- */ 938 1.1 alnsn 939 1.1 alnsn #define PUT_TRANSITION(typ, val) \ 940 1.1 alnsn do { \ 941 1.1 alnsn --transitions_ptr; \ 942 1.1 alnsn transitions_ptr->type = typ; \ 943 1.1 alnsn transitions_ptr->value = val; \ 944 1.1 alnsn } while (0) 945 1.1 alnsn 946 1.1 alnsn static struct stack_item* handle_iteratives(struct stack_item *transitions_ptr, struct stack_item *transitions, struct stack *depth) 947 1.1 alnsn { 948 1.1 alnsn struct stack_item *item; 949 1.1 alnsn 950 1.1 alnsn while (1) { 951 1.1 alnsn item = stack_top(depth); 952 1.1 alnsn 953 1.1 alnsn switch (item->type) { 954 1.1 alnsn case type_asterisk: 955 1.1 alnsn SLJIT_ASSERT(transitions[item->value].type == type_branch); 956 1.1 alnsn transitions[item->value].value = transitions_ptr - transitions; 957 1.1 alnsn PUT_TRANSITION(type_branch, item->value + 1); 958 1.1 alnsn break; 959 1.1 alnsn 960 1.1 alnsn case type_plus_sign: 961 1.1 alnsn SLJIT_ASSERT(transitions[item->value].type == type_branch); 962 1.1 alnsn transitions[item->value].value = transitions_ptr - transitions; 963 1.1 alnsn break; 964 1.1 alnsn 965 1.1 alnsn case type_qestion_mark: 966 1.1 alnsn PUT_TRANSITION(type_branch, item->value); 967 1.1 alnsn break; 968 1.1 alnsn 969 1.1 alnsn default: 970 1.1 alnsn return transitions_ptr; 971 1.1 alnsn } 972 1.1 alnsn stack_pop(depth); 973 1.1 alnsn } 974 1.1 alnsn } 975 1.1 alnsn 976 1.1 alnsn static int generate_transitions(struct compiler_common *compiler_common) 977 1.1 alnsn { 978 1.1 alnsn struct stack *stack = &compiler_common->stack; 979 1.1 alnsn struct stack *depth = &compiler_common->depth; 980 1.1 alnsn struct stack_item *transitions_ptr; 981 1.1 alnsn struct stack_item *item; 982 1.1 alnsn 983 1.1 alnsn stack_init(depth); 984 1.1.1.3 alnsn compiler_common->dfa_transitions = SLJIT_MALLOC(sizeof(struct stack_item) * compiler_common->dfa_size, NULL); 985 1.1 alnsn if (!compiler_common->dfa_transitions) 986 1.1 alnsn return REGEX_MEMORY_ERROR; 987 1.1 alnsn 988 1.1 alnsn /* Go through the items of the stack and generate the necessary branches and jumps (edges of DFA). */ 989 1.1 alnsn transitions_ptr = compiler_common->dfa_transitions + compiler_common->dfa_size; 990 1.1 alnsn while (stack->count > 0) { 991 1.1 alnsn item = stack_pop(stack); 992 1.1 alnsn switch (item->type) { 993 1.1 alnsn case type_begin: 994 1.1 alnsn case type_open_br: 995 1.1 alnsn item = stack_pop(depth); 996 1.1 alnsn if (item->type == type_select) 997 1.1 alnsn PUT_TRANSITION(type_branch, item->value + 1); 998 1.1 alnsn else 999 1.1 alnsn SLJIT_ASSERT(item->type == type_close_br); 1000 1.1 alnsn if (stack->count == 0) 1001 1.1 alnsn PUT_TRANSITION(type_begin, 0); 1002 1.1 alnsn else 1003 1.1 alnsn transitions_ptr = handle_iteratives(transitions_ptr, compiler_common->dfa_transitions, depth); 1004 1.1 alnsn break; 1005 1.1 alnsn 1006 1.1 alnsn case type_end: 1007 1.1 alnsn case type_close_br: 1008 1.1 alnsn if (item->type == type_end) 1009 1.1 alnsn *--transitions_ptr = *item; 1010 1.1 alnsn if (stack_push(depth, type_close_br, transitions_ptr - compiler_common->dfa_transitions)) 1011 1.1 alnsn return REGEX_MEMORY_ERROR; 1012 1.1 alnsn break; 1013 1.1 alnsn 1014 1.1 alnsn case type_select: 1015 1.1 alnsn item = stack_top(depth); 1016 1.1 alnsn if (item->type == type_select) { 1017 1.1 alnsn SLJIT_ASSERT(compiler_common->dfa_transitions[item->value].type == type_jump); 1018 1.1 alnsn PUT_TRANSITION(type_branch, item->value + 1); 1019 1.1 alnsn PUT_TRANSITION(type_jump, item->value); 1020 1.1 alnsn item->value = transitions_ptr - compiler_common->dfa_transitions; 1021 1.1 alnsn } 1022 1.1 alnsn else { 1023 1.1 alnsn SLJIT_ASSERT(item->type == type_close_br); 1024 1.1 alnsn item->type = type_select; 1025 1.1 alnsn PUT_TRANSITION(type_jump, item->value); 1026 1.1 alnsn item->value = transitions_ptr - compiler_common->dfa_transitions; 1027 1.1 alnsn } 1028 1.1 alnsn break; 1029 1.1 alnsn 1030 1.1 alnsn case type_asterisk: 1031 1.1 alnsn case type_plus_sign: 1032 1.1 alnsn case type_qestion_mark: 1033 1.1 alnsn if (item->type != type_qestion_mark) 1034 1.1 alnsn PUT_TRANSITION(type_branch, 0); 1035 1.1 alnsn if (stack_push(depth, item->type, transitions_ptr - compiler_common->dfa_transitions)) 1036 1.1 alnsn return REGEX_MEMORY_ERROR; 1037 1.1 alnsn break; 1038 1.1 alnsn 1039 1.1 alnsn case type_char: 1040 1.1 alnsn case type_newline: 1041 1.1 alnsn case type_rng_start: 1042 1.1 alnsn /* Requires handle_iteratives. */ 1043 1.1 alnsn *--transitions_ptr = *item; 1044 1.1 alnsn transitions_ptr = handle_iteratives(transitions_ptr, compiler_common->dfa_transitions, depth); 1045 1.1 alnsn break; 1046 1.1 alnsn 1047 1.1 alnsn default: 1048 1.1 alnsn *--transitions_ptr = *item; 1049 1.1 alnsn break; 1050 1.1 alnsn } 1051 1.1 alnsn } 1052 1.1 alnsn 1053 1.1 alnsn SLJIT_ASSERT(compiler_common->dfa_transitions == transitions_ptr); 1054 1.1 alnsn SLJIT_ASSERT(depth->count == 0); 1055 1.1 alnsn return REGEX_NO_ERROR; 1056 1.1 alnsn } 1057 1.1 alnsn 1058 1.1 alnsn #undef PUT_TRANSITION 1059 1.1 alnsn 1060 1.1 alnsn #ifdef REGEX_MATCH_VERBOSE 1061 1.1 alnsn 1062 1.1 alnsn static void verbose_transitions(struct compiler_common *compiler_common) 1063 1.1 alnsn { 1064 1.1 alnsn struct stack_item *transitions_ptr = compiler_common->dfa_transitions; 1065 1.1 alnsn struct stack_item *transitions_end = transitions_ptr + compiler_common->dfa_size; 1066 1.1 alnsn struct stack_item *search_states_ptr = compiler_common->search_states; 1067 1.1 alnsn int pos; 1068 1.1 alnsn 1069 1.1 alnsn printf("-----------------\nTransitions\n-----------------\n"); 1070 1.1 alnsn pos = 0; 1071 1.1 alnsn while (transitions_ptr < transitions_end) { 1072 1.1 alnsn printf("[%3d] ", pos++); 1073 1.1 alnsn if (search_states_ptr->type >= 0) 1074 1.1 alnsn printf("(%3d) ", search_states_ptr->type); 1075 1.1 alnsn switch (transitions_ptr->type) { 1076 1.1 alnsn case type_begin: 1077 1.1 alnsn printf("type_begin\n"); 1078 1.1 alnsn break; 1079 1.1 alnsn 1080 1.1 alnsn case type_end: 1081 1.1 alnsn printf("type_end\n"); 1082 1.1 alnsn break; 1083 1.1 alnsn 1084 1.1 alnsn case type_char: 1085 1.1 alnsn if (transitions_ptr->value >= ' ') 1086 1.1 alnsn printf("type_char '%c'\n", transitions_ptr->value); 1087 1.1 alnsn else 1088 1.1 alnsn printf("type_char 0x%x\n", transitions_ptr->value); 1089 1.1 alnsn break; 1090 1.1 alnsn 1091 1.1 alnsn case type_newline: 1092 1.1 alnsn printf("type_newline %s\n", transitions_ptr->value ? "(end)" : "(begin)"); 1093 1.1 alnsn break; 1094 1.1 alnsn 1095 1.1 alnsn case type_id: 1096 1.1 alnsn printf("type_id %d\n", transitions_ptr->value); 1097 1.1 alnsn break; 1098 1.1 alnsn 1099 1.1 alnsn case type_rng_start: 1100 1.1 alnsn printf("type_rng_start %s\n", transitions_ptr->value ? "(invert)" : "(normal)"); 1101 1.1 alnsn break; 1102 1.1 alnsn 1103 1.1 alnsn case type_rng_end: 1104 1.1 alnsn printf("type_rng_end\n"); 1105 1.1 alnsn break; 1106 1.1 alnsn 1107 1.1 alnsn case type_rng_char: 1108 1.1 alnsn if (transitions_ptr->value >= ' ') 1109 1.1 alnsn printf("type_rng_char '%c'\n", transitions_ptr->value); 1110 1.1 alnsn else 1111 1.1 alnsn printf("type_rng_char 0x%x\n", transitions_ptr->value); 1112 1.1 alnsn break; 1113 1.1 alnsn 1114 1.1 alnsn case type_rng_left: 1115 1.1 alnsn if (transitions_ptr->value >= ' ') 1116 1.1 alnsn printf("type_rng_left '%c'\n", transitions_ptr->value); 1117 1.1 alnsn else 1118 1.1 alnsn printf("type_rng_left 0x%x\n", transitions_ptr->value); 1119 1.1 alnsn break; 1120 1.1 alnsn 1121 1.1 alnsn case type_rng_right: 1122 1.1 alnsn if (transitions_ptr->value >= ' ') 1123 1.1 alnsn printf("type_rng_right '%c'\n", transitions_ptr->value); 1124 1.1 alnsn else 1125 1.1 alnsn printf("type_rng_right 0x%x\n", transitions_ptr->value); 1126 1.1 alnsn break; 1127 1.1 alnsn 1128 1.1 alnsn case type_branch: 1129 1.1 alnsn printf("type_branch -> %d\n", transitions_ptr->value); 1130 1.1 alnsn break; 1131 1.1 alnsn 1132 1.1 alnsn case type_jump: 1133 1.1 alnsn printf("type_jump -> %d\n", transitions_ptr->value); 1134 1.1 alnsn break; 1135 1.1 alnsn 1136 1.1 alnsn default: 1137 1.1 alnsn printf("UNEXPECTED TYPE\n"); 1138 1.1 alnsn break; 1139 1.1 alnsn } 1140 1.1 alnsn transitions_ptr++; 1141 1.1 alnsn search_states_ptr++; 1142 1.1 alnsn } 1143 1.1 alnsn printf("flags: "); 1144 1.1 alnsn if (!(compiler_common->flags & (REGEX_MATCH_BEGIN | REGEX_MATCH_END | REGEX_NEWLINE | REGEX_ID_CHECK | REGEX_FAKE_MATCH_BEGIN | REGEX_FAKE_MATCH_END))) 1145 1.1 alnsn printf("none "); 1146 1.1 alnsn if (compiler_common->flags & REGEX_MATCH_BEGIN) 1147 1.1 alnsn printf("REGEX_MATCH_BEGIN "); 1148 1.1 alnsn if (compiler_common->flags & REGEX_MATCH_END) 1149 1.1 alnsn printf("REGEX_MATCH_END "); 1150 1.1 alnsn if (compiler_common->flags & REGEX_NEWLINE) 1151 1.1 alnsn printf("REGEX_NEWLINE "); 1152 1.1 alnsn if (compiler_common->flags & REGEX_ID_CHECK) 1153 1.1 alnsn printf("REGEX_ID_CHECK "); 1154 1.1 alnsn if (compiler_common->flags & REGEX_FAKE_MATCH_BEGIN) 1155 1.1 alnsn printf("REGEX_FAKE_MATCH_BEGIN "); 1156 1.1 alnsn if (compiler_common->flags & REGEX_FAKE_MATCH_END) 1157 1.1 alnsn printf("REGEX_FAKE_MATCH_END "); 1158 1.1 alnsn if (compiler_common->longest_range_size > 0) 1159 1.1 alnsn printf("(longest range: %ld) ", (long)compiler_common->longest_range_size); 1160 1.1 alnsn printf("\n"); 1161 1.1 alnsn } 1162 1.1 alnsn 1163 1.1 alnsn #endif 1164 1.1 alnsn 1165 1.1 alnsn /* --------------------------------------------------------------------- */ 1166 1.1 alnsn /* Utilities */ 1167 1.1 alnsn /* --------------------------------------------------------------------- */ 1168 1.1 alnsn 1169 1.1 alnsn static int generate_search_states(struct compiler_common *compiler_common) 1170 1.1 alnsn { 1171 1.1 alnsn struct stack_item *transitions_ptr = compiler_common->dfa_transitions; 1172 1.1 alnsn struct stack_item *transitions_end = transitions_ptr + compiler_common->dfa_size; 1173 1.1 alnsn struct stack_item *search_states_ptr; 1174 1.1 alnsn struct stack_item *rng_start = NULL; 1175 1.1 alnsn 1176 1.1 alnsn compiler_common->terms_size = !(compiler_common->flags & REGEX_FAKE_MATCH_END) ? 1 : 2; 1177 1.1 alnsn compiler_common->longest_range_size = 0; 1178 1.1.1.3 alnsn compiler_common->search_states = SLJIT_MALLOC(sizeof(struct stack_item) * compiler_common->dfa_size, NULL); 1179 1.1 alnsn if (!compiler_common->search_states) 1180 1.1 alnsn return REGEX_MEMORY_ERROR; 1181 1.1 alnsn 1182 1.1 alnsn search_states_ptr = compiler_common->search_states; 1183 1.1 alnsn while (transitions_ptr < transitions_end) { 1184 1.1 alnsn switch (transitions_ptr->type) { 1185 1.1 alnsn case type_begin: 1186 1.1 alnsn case type_end: 1187 1.1 alnsn search_states_ptr->type = 0; 1188 1.1 alnsn break; 1189 1.1 alnsn 1190 1.1 alnsn case type_char: 1191 1.1 alnsn search_states_ptr->type = compiler_common->terms_size++; 1192 1.1 alnsn break; 1193 1.1 alnsn 1194 1.1 alnsn case type_newline: 1195 1.1 alnsn if (transitions_ptr->value) 1196 1.1 alnsn search_states_ptr->type = 1; 1197 1.1 alnsn else 1198 1.1 alnsn search_states_ptr->type = compiler_common->terms_size++; 1199 1.1 alnsn SLJIT_ASSERT(search_states_ptr->type == 1 || search_states_ptr->type == 2); 1200 1.1 alnsn break; 1201 1.1 alnsn 1202 1.1 alnsn case type_id: 1203 1.1 alnsn if (transitions_ptr->value > 0) 1204 1.1 alnsn compiler_common->flags |= REGEX_ID_CHECK; 1205 1.1 alnsn search_states_ptr->type = -1; 1206 1.1 alnsn break; 1207 1.1 alnsn 1208 1.1 alnsn case type_rng_start: 1209 1.1 alnsn search_states_ptr->type = compiler_common->terms_size; 1210 1.1 alnsn rng_start = search_states_ptr; 1211 1.1 alnsn break; 1212 1.1 alnsn 1213 1.1 alnsn case type_rng_end: 1214 1.1 alnsn search_states_ptr->type = compiler_common->terms_size++; 1215 1.1 alnsn /* Ok, this is a blunt over estimation :) */ 1216 1.1 alnsn if (compiler_common->longest_range_size < search_states_ptr - rng_start) 1217 1.1 alnsn compiler_common->longest_range_size = search_states_ptr - rng_start; 1218 1.1 alnsn break; 1219 1.1 alnsn 1220 1.1 alnsn default: 1221 1.1 alnsn search_states_ptr->type = -1; 1222 1.1 alnsn break; 1223 1.1 alnsn } 1224 1.1 alnsn search_states_ptr->value = -1; 1225 1.1 alnsn search_states_ptr++; 1226 1.1 alnsn transitions_ptr++; 1227 1.1 alnsn } 1228 1.1 alnsn return REGEX_NO_ERROR; 1229 1.1 alnsn } 1230 1.1 alnsn 1231 1.1 alnsn static int trace_transitions(int from, struct compiler_common *compiler_common) 1232 1.1 alnsn { 1233 1.1 alnsn int id = 0; 1234 1.1 alnsn struct stack *stack = &compiler_common->stack; 1235 1.1 alnsn struct stack *depth = &compiler_common->depth; 1236 1.1 alnsn struct stack_item *dfa_transitions = compiler_common->dfa_transitions; 1237 1.1 alnsn struct stack_item *search_states = compiler_common->search_states; 1238 1.1 alnsn 1239 1.1 alnsn SLJIT_ASSERT(search_states[from].type >= 0); 1240 1.1 alnsn 1241 1.1 alnsn from++; 1242 1.1 alnsn 1243 1.1 alnsn /* Be prepared for any paths (loops, etc). */ 1244 1.1 alnsn while (1) { 1245 1.1 alnsn if (dfa_transitions[from].type == type_id) 1246 1.1 alnsn if (id < dfa_transitions[from].value) 1247 1.1 alnsn id = dfa_transitions[from].value; 1248 1.1 alnsn 1249 1.1 alnsn if (search_states[from].value < id) { 1250 1.1 alnsn /* Forward step. */ 1251 1.1 alnsn if (search_states[from].value == -1) 1252 1.1 alnsn if (stack_push(stack, 0, from)) 1253 1.1 alnsn return REGEX_MEMORY_ERROR; 1254 1.1 alnsn search_states[from].value = id; 1255 1.1 alnsn 1256 1.1 alnsn if (dfa_transitions[from].type == type_branch) { 1257 1.1 alnsn if (stack_push(depth, id, from)) 1258 1.1 alnsn return REGEX_MEMORY_ERROR; 1259 1.1 alnsn from++; 1260 1.1 alnsn continue; 1261 1.1 alnsn } 1262 1.1 alnsn else if (dfa_transitions[from].type == type_jump) { 1263 1.1 alnsn from = dfa_transitions[from].value; 1264 1.1 alnsn continue; 1265 1.1 alnsn } 1266 1.1 alnsn else if (search_states[from].type < 0) { 1267 1.1 alnsn from++; 1268 1.1 alnsn continue; 1269 1.1 alnsn } 1270 1.1 alnsn } 1271 1.1 alnsn 1272 1.1 alnsn /* Back tracking. */ 1273 1.1 alnsn if (depth->count > 0) { 1274 1.1 alnsn id = stack_top(depth)->type; 1275 1.1 alnsn from = dfa_transitions[stack_pop(depth)->value].value; 1276 1.1 alnsn continue; 1277 1.1 alnsn } 1278 1.1 alnsn return 0; 1279 1.1 alnsn } 1280 1.1 alnsn } 1281 1.1 alnsn 1282 1.1 alnsn /* --------------------------------------------------------------------- */ 1283 1.1 alnsn /* Code generator */ 1284 1.1 alnsn /* --------------------------------------------------------------------- */ 1285 1.1 alnsn 1286 1.1.1.2 alnsn #define TERM_OFFSET_OF(index, offs) (((index) * no_states + (offs)) * sizeof(sljit_sw)) 1287 1.1.1.2 alnsn #define TERM_REL_OFFSET_OF(base, offs) ((base) + ((offs) * sizeof(sljit_sw))) 1288 1.1 alnsn 1289 1.1 alnsn #define EMIT_OP1(type, arg1, arg2, arg3, arg4) \ 1290 1.1 alnsn CHECK(sljit_emit_op1(compiler, type, arg1, arg2, arg3, arg4)) 1291 1.1 alnsn 1292 1.1 alnsn #define EMIT_OP2(type, arg1, arg2, arg3, arg4, arg5, arg6) \ 1293 1.1 alnsn CHECK(sljit_emit_op2(compiler, type, arg1, arg2, arg3, arg4, arg5, arg6)) 1294 1.1 alnsn 1295 1.1 alnsn #define EMIT_LABEL(label) \ 1296 1.1 alnsn label = sljit_emit_label(compiler); \ 1297 1.1 alnsn CHECK(!label) 1298 1.1 alnsn 1299 1.1 alnsn #define EMIT_JUMP(jump, type) \ 1300 1.1 alnsn jump = sljit_emit_jump(compiler, type); \ 1301 1.1 alnsn CHECK(!jump) 1302 1.1 alnsn 1303 1.1 alnsn #define EMIT_CMP(jump, type, arg1, arg2, arg3, arg4) \ 1304 1.1 alnsn jump = sljit_emit_cmp(compiler, type, arg1, arg2, arg3, arg4); \ 1305 1.1 alnsn CHECK(!jump) 1306 1.1 alnsn 1307 1.1 alnsn /* CHECK depends on the use case. */ 1308 1.1 alnsn 1309 1.1 alnsn #define CHECK(exp) \ 1310 1.1 alnsn if (SLJIT_UNLIKELY(exp)) \ 1311 1.1 alnsn return REGEX_MEMORY_ERROR 1312 1.1 alnsn 1313 1.1 alnsn static int compile_uncond_tran(struct compiler_common *compiler_common, int reg) 1314 1.1 alnsn { 1315 1.1 alnsn struct sljit_compiler *compiler = compiler_common->compiler; 1316 1.1 alnsn struct stack *stack = &compiler_common->stack; 1317 1.1 alnsn struct stack_item *search_states = compiler_common->search_states; 1318 1.1 alnsn int flags = compiler_common->flags; 1319 1.1.1.2 alnsn sljit_sw no_states = compiler_common->no_states; 1320 1.1 alnsn sljit_uw head = 0; 1321 1.1.1.2 alnsn sljit_sw offset, value; 1322 1.1 alnsn 1323 1.1 alnsn if (reg != R_CURR_STATE || !(compiler_common->flags & REGEX_FAKE_MATCH_BEGIN)) { 1324 1.1 alnsn CHECK(trace_transitions(0, compiler_common)); 1325 1.1 alnsn } 1326 1.1 alnsn else { 1327 1.1 alnsn CHECK(trace_transitions(1, compiler_common)); 1328 1.1 alnsn } 1329 1.1 alnsn 1330 1.1 alnsn while (stack->count > 0) { 1331 1.1 alnsn value = stack_pop(stack)->value; 1332 1.1 alnsn if (search_states[value].type >= 0) { 1333 1.1 alnsn offset = TERM_OFFSET_OF(search_states[value].type, 0); 1334 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(reg), TERM_REL_OFFSET_OF(offset, 1), SLJIT_IMM, head); 1335 1.1 alnsn if (offset > 0) 1336 1.1 alnsn head = offset; 1337 1.1 alnsn 1338 1.1 alnsn if (!(flags & REGEX_MATCH_BEGIN)) { 1339 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(reg), TERM_REL_OFFSET_OF(offset, 2), R_TEMP, 0); 1340 1.1 alnsn if (flags & REGEX_ID_CHECK) { 1341 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(reg), TERM_REL_OFFSET_OF(offset, 3), SLJIT_IMM, search_states[value].value); 1342 1.1 alnsn } 1343 1.1 alnsn } 1344 1.1 alnsn else if (flags & REGEX_ID_CHECK) { 1345 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(reg), TERM_REL_OFFSET_OF(offset, 2), SLJIT_IMM, search_states[value].value); 1346 1.1 alnsn } 1347 1.1 alnsn } 1348 1.1 alnsn search_states[value].value = -1; 1349 1.1 alnsn } 1350 1.1 alnsn if (reg == R_NEXT_STATE) { 1351 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_NEXT_HEAD, 0); 1352 1.1 alnsn } 1353 1.1 alnsn else if (flags & REGEX_FAKE_MATCH_BEGIN) { 1354 1.1 alnsn SLJIT_ASSERT(compiler_common->dfa_transitions[1].type == type_newline && !compiler_common->dfa_transitions[1].value); 1355 1.1 alnsn offset = TERM_OFFSET_OF(search_states[1].type, 0); 1356 1.1 alnsn 1357 1.1 alnsn SLJIT_ASSERT(!(flags & REGEX_MATCH_BEGIN)); 1358 1.1 alnsn 1359 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(reg), TERM_REL_OFFSET_OF(offset, 1), SLJIT_IMM, head); 1360 1.1 alnsn head = offset; 1361 1.1 alnsn 1362 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(reg), TERM_REL_OFFSET_OF(offset, 2), SLJIT_IMM, 1); 1363 1.1 alnsn if (flags & REGEX_ID_CHECK) { 1364 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(reg), TERM_REL_OFFSET_OF(offset, 3), SLJIT_IMM, 0); 1365 1.1 alnsn } 1366 1.1 alnsn } 1367 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, head); 1368 1.1 alnsn return REGEX_NO_ERROR; 1369 1.1 alnsn } 1370 1.1 alnsn 1371 1.1.1.2 alnsn static int compile_cond_tran(struct compiler_common *compiler_common, sljit_sw curr_index) 1372 1.1 alnsn { 1373 1.1 alnsn struct sljit_compiler *compiler = compiler_common->compiler; 1374 1.1 alnsn struct stack *stack = &compiler_common->stack; 1375 1.1 alnsn struct stack_item *search_states = compiler_common->search_states; 1376 1.1.1.2 alnsn sljit_sw offset; 1377 1.1 alnsn int flags; 1378 1.1.1.2 alnsn sljit_sw no_states; 1379 1.1.1.2 alnsn sljit_sw value; 1380 1.1 alnsn struct sljit_jump *jump1; 1381 1.1 alnsn struct sljit_jump *jump2; 1382 1.1 alnsn struct sljit_jump *jump3; 1383 1.1 alnsn struct sljit_jump *jump4; 1384 1.1 alnsn struct sljit_jump *jump5; 1385 1.1 alnsn struct sljit_label *label1; 1386 1.1 alnsn 1387 1.1 alnsn flags = compiler_common->flags; 1388 1.1 alnsn no_states = compiler_common->no_states; 1389 1.1 alnsn 1390 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, fast_forward), SLJIT_IMM, 0); 1391 1.1 alnsn if (!(flags & (REGEX_ID_CHECK | REGEX_MATCH_BEGIN))) { 1392 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(curr_index, 2)); 1393 1.1 alnsn } 1394 1.1 alnsn 1395 1.1 alnsn while (stack->count > 0) { 1396 1.1 alnsn value = stack_pop(stack)->value; 1397 1.1 alnsn if (search_states[value].type >= 0) { 1398 1.1 alnsn #ifdef REGEX_MATCH_VERBOSE 1399 1.1 alnsn if (flags & REGEX_MATCH_VERBOSE) 1400 1.1 alnsn printf("-> (%3d:%3d) ", search_states[value].type, search_states[value].value); 1401 1.1 alnsn #endif 1402 1.1 alnsn offset = TERM_OFFSET_OF(search_states[value].type, 0); 1403 1.1 alnsn 1404 1.1 alnsn if (!(flags & REGEX_ID_CHECK)) { 1405 1.1 alnsn if (!(flags & REGEX_MATCH_BEGIN)) { 1406 1.1 alnsn /* Check whether item is inserted. */ 1407 1.1.1.3 alnsn EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), SLJIT_IMM, -1); 1408 1.1.1.2 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), R_NEXT_HEAD, 0); 1409 1.1 alnsn if (offset > 0) { 1410 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, offset); 1411 1.1 alnsn } 1412 1.1 alnsn EMIT_JUMP(jump2, SLJIT_JUMP); 1413 1.1 alnsn 1414 1.1 alnsn /* Check whether old index <= index. */ 1415 1.1 alnsn EMIT_LABEL(label1); 1416 1.1 alnsn sljit_set_label(jump1, label1); 1417 1.1 alnsn 1418 1.1.1.3 alnsn EMIT_CMP(jump1, SLJIT_LESS_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0); 1419 1.1 alnsn 1420 1.1 alnsn EMIT_LABEL(label1); 1421 1.1 alnsn sljit_set_label(jump2, label1); 1422 1.1.1.2 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0); 1423 1.1 alnsn 1424 1.1 alnsn EMIT_LABEL(label1); 1425 1.1 alnsn sljit_set_label(jump1, label1); 1426 1.1 alnsn } 1427 1.1 alnsn else { 1428 1.1 alnsn /* Check whether item is inserted. */ 1429 1.1.1.3 alnsn EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), SLJIT_IMM, -1); 1430 1.1.1.2 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), R_NEXT_HEAD, 0); 1431 1.1 alnsn if (offset > 0) { 1432 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, offset); 1433 1.1 alnsn } 1434 1.1 alnsn EMIT_LABEL(label1); 1435 1.1 alnsn sljit_set_label(jump1, label1); 1436 1.1 alnsn } 1437 1.1 alnsn } 1438 1.1 alnsn else { 1439 1.1 alnsn if (!(flags & REGEX_MATCH_BEGIN)) { 1440 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(curr_index, 2)); 1441 1.1 alnsn 1442 1.1 alnsn /* Check whether item is inserted. */ 1443 1.1.1.3 alnsn EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), SLJIT_IMM, -1); 1444 1.1.1.2 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), R_NEXT_HEAD, 0); 1445 1.1 alnsn if (offset > 0) { 1446 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, offset); 1447 1.1 alnsn } 1448 1.1 alnsn EMIT_JUMP(jump2, SLJIT_JUMP); 1449 1.1 alnsn 1450 1.1 alnsn /* Check whether old index != index. */ 1451 1.1 alnsn EMIT_LABEL(label1); 1452 1.1 alnsn sljit_set_label(jump1, label1); 1453 1.1 alnsn 1454 1.1.1.4 alnsn EMIT_OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0); 1455 1.1.1.3 alnsn EMIT_JUMP(jump1, SLJIT_LESS); 1456 1.1.1.4 alnsn EMIT_JUMP(jump3, SLJIT_NOT_EQUAL); /* Greater. */ 1457 1.1 alnsn 1458 1.1 alnsn /* Old index == index. */ 1459 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(curr_index, 3)); 1460 1.1 alnsn if (search_states[value].value > 0) { 1461 1.1.1.3 alnsn EMIT_CMP(jump4, SLJIT_GREATER, R_TEMP, 0, SLJIT_IMM, search_states[value].value); 1462 1.1 alnsn 1463 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_IMM, search_states[value].value); 1464 1.1 alnsn EMIT_LABEL(label1); 1465 1.1 alnsn sljit_set_label(jump4, label1); 1466 1.1 alnsn } 1467 1.1 alnsn 1468 1.1.1.4 alnsn EMIT_OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, SLJIT_MEM1(R_NEXT_STATE), offset + 3 * sizeof(sljit_sw), R_TEMP, 0); 1469 1.1.1.3 alnsn EMIT_JUMP(jump4, SLJIT_GREATER_EQUAL); 1470 1.1 alnsn EMIT_JUMP(jump5, SLJIT_JUMP); 1471 1.1 alnsn 1472 1.1 alnsn /* Overwrite index & id. */ 1473 1.1 alnsn EMIT_LABEL(label1); 1474 1.1 alnsn sljit_set_label(jump3, label1); 1475 1.1 alnsn sljit_set_label(jump2, label1); 1476 1.1.1.2 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0); 1477 1.1 alnsn 1478 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(curr_index, 3)); 1479 1.1 alnsn if (search_states[value].value > 0) { 1480 1.1.1.3 alnsn EMIT_CMP(jump3, SLJIT_GREATER, R_TEMP, 0, SLJIT_IMM, search_states[value].value); 1481 1.1 alnsn 1482 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_IMM, search_states[value].value); 1483 1.1 alnsn EMIT_LABEL(label1); 1484 1.1 alnsn sljit_set_label(jump3, label1); 1485 1.1 alnsn } 1486 1.1 alnsn 1487 1.1 alnsn EMIT_LABEL(label1); 1488 1.1 alnsn sljit_set_label(jump5, label1); 1489 1.1.1.2 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 3 * sizeof(sljit_sw), R_TEMP, 0); 1490 1.1 alnsn 1491 1.1 alnsn /* Exit. */ 1492 1.1 alnsn EMIT_LABEL(label1); 1493 1.1 alnsn sljit_set_label(jump1, label1); 1494 1.1 alnsn sljit_set_label(jump4, label1); 1495 1.1 alnsn } 1496 1.1 alnsn else { 1497 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(curr_index, 2)); 1498 1.1 alnsn 1499 1.1 alnsn if (search_states[value].value > 0) { 1500 1.1.1.3 alnsn EMIT_CMP(jump1, SLJIT_GREATER, R_TEMP, 0, SLJIT_IMM, search_states[value].value); 1501 1.1 alnsn 1502 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_IMM, search_states[value].value); 1503 1.1 alnsn EMIT_LABEL(label1); 1504 1.1 alnsn sljit_set_label(jump1, label1); 1505 1.1 alnsn } 1506 1.1 alnsn 1507 1.1 alnsn /* Check whether item is inserted. */ 1508 1.1.1.3 alnsn EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), SLJIT_IMM, -1); 1509 1.1.1.2 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), R_NEXT_HEAD, 0); 1510 1.1 alnsn if (offset > 0) { 1511 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, offset); 1512 1.1 alnsn } 1513 1.1 alnsn EMIT_JUMP(jump2, SLJIT_JUMP); 1514 1.1 alnsn 1515 1.1 alnsn /* Check whether old id >= id. */ 1516 1.1 alnsn EMIT_LABEL(label1); 1517 1.1 alnsn sljit_set_label(jump1, label1); 1518 1.1 alnsn 1519 1.1.1.3 alnsn EMIT_CMP(jump1, SLJIT_GREATER_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0); 1520 1.1 alnsn 1521 1.1 alnsn EMIT_LABEL(label1); 1522 1.1 alnsn sljit_set_label(jump2, label1); 1523 1.1.1.2 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0); 1524 1.1 alnsn 1525 1.1 alnsn EMIT_LABEL(label1); 1526 1.1 alnsn sljit_set_label(jump1, label1); 1527 1.1 alnsn } 1528 1.1 alnsn } 1529 1.1 alnsn } 1530 1.1 alnsn search_states[value].value = -1; 1531 1.1 alnsn } 1532 1.1 alnsn 1533 1.1 alnsn #ifdef REGEX_MATCH_VERBOSE 1534 1.1 alnsn if (flags & REGEX_MATCH_VERBOSE) 1535 1.1 alnsn printf("\n"); 1536 1.1 alnsn #endif 1537 1.1 alnsn return REGEX_NO_ERROR; 1538 1.1 alnsn } 1539 1.1 alnsn 1540 1.1 alnsn static int compile_end_check(struct compiler_common *compiler_common, struct sljit_label *end_check_label) 1541 1.1 alnsn { 1542 1.1 alnsn struct sljit_compiler *compiler = compiler_common->compiler; 1543 1.1 alnsn struct sljit_jump *jump; 1544 1.1 alnsn struct sljit_jump *clear_states_jump; 1545 1.1 alnsn struct sljit_label *label; 1546 1.1 alnsn struct sljit_label *leave_label; 1547 1.1 alnsn struct sljit_label *begin_loop_label; 1548 1.1 alnsn 1549 1.1 alnsn /* Priority order: best_begin > best_end > best_id. 1550 1.1 alnsn In other words: 1551 1.1 alnsn if (new best_begin > old test_begin) do nothing 1552 1.1 alnsn otherwise we know that new_end > old_end, since R_CURR_INDEX ever increasing 1553 1.1 alnsn therefore we must overwrite all best_* variables (new_id also contains the highest id for this turn). */ 1554 1.1 alnsn 1555 1.1 alnsn /* Both R_CURR_CHAR and R_BEST_BEGIN used as temporary registers. */ 1556 1.1 alnsn 1557 1.1 alnsn if (!(compiler_common->flags & REGEX_MATCH_BEGIN)) { 1558 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_CURR_CHAR, 0, SLJIT_MEM1(R_CURR_STATE), TERM_REL_OFFSET_OF(0, 2)); 1559 1.1.1.3 alnsn EMIT_CMP(jump, !(compiler_common->flags & REGEX_MATCH_NON_GREEDY) ? SLJIT_LESS : SLJIT_LESS_EQUAL, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_begin), R_CURR_CHAR, 0); 1560 1.1 alnsn sljit_set_label(jump, end_check_label); 1561 1.1 alnsn 1562 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_begin), R_CURR_CHAR, 0); 1563 1.1 alnsn if (!(compiler_common->flags & (REGEX_FAKE_MATCH_BEGIN | REGEX_FAKE_MATCH_END))) { 1564 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_end), R_CURR_INDEX, 0); 1565 1.1 alnsn } 1566 1.1 alnsn else { 1567 1.1 alnsn if ((compiler_common->flags & (REGEX_FAKE_MATCH_BEGIN | REGEX_FAKE_MATCH_END)) == (REGEX_FAKE_MATCH_BEGIN | REGEX_FAKE_MATCH_END)) { 1568 1.1 alnsn EMIT_OP2(SLJIT_SUB, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_end), R_CURR_INDEX, 0, SLJIT_IMM, 2); 1569 1.1 alnsn } 1570 1.1 alnsn else { 1571 1.1 alnsn EMIT_OP2(SLJIT_SUB, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_end), R_CURR_INDEX, 0, SLJIT_IMM, 1); 1572 1.1 alnsn } 1573 1.1 alnsn } 1574 1.1 alnsn if (compiler_common->flags & REGEX_ID_CHECK) { 1575 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_id), SLJIT_MEM1(R_CURR_STATE), TERM_REL_OFFSET_OF(0, 3)); 1576 1.1 alnsn } 1577 1.1 alnsn 1578 1.1.1.3 alnsn EMIT_CMP(clear_states_jump, SLJIT_LESS, R_CURR_CHAR, 0, R_BEST_BEGIN, 0); 1579 1.1 alnsn 1580 1.1 alnsn EMIT_LABEL(leave_label); 1581 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_BEST_BEGIN, 0, R_CURR_CHAR, 0); 1582 1.1 alnsn EMIT_JUMP(jump, SLJIT_JUMP); 1583 1.1 alnsn sljit_set_label(jump, end_check_label); 1584 1.1 alnsn 1585 1.1 alnsn /* A loop to clear all states, which are > (or >=) than R_CURR_CHAR. */ 1586 1.1 alnsn EMIT_LABEL(label); 1587 1.1 alnsn sljit_set_label(clear_states_jump, label); 1588 1.1 alnsn 1589 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_NEXT_HEAD, 0); 1590 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, 0); 1591 1.1 alnsn 1592 1.1 alnsn /* Begin of the loop. */ 1593 1.1 alnsn EMIT_LABEL(begin_loop_label); 1594 1.1.1.3 alnsn EMIT_CMP(jump, SLJIT_EQUAL, R_TEMP, 0, SLJIT_IMM, 0); 1595 1.1 alnsn sljit_set_label(jump, leave_label); 1596 1.1 alnsn 1597 1.1 alnsn EMIT_OP2(SLJIT_ADD, R_TEMP, 0, R_TEMP, 0, R_CURR_STATE, 0); 1598 1.1.1.2 alnsn EMIT_OP1(SLJIT_MOV, R_BEST_BEGIN, 0, SLJIT_MEM1(R_TEMP), sizeof(sljit_sw)); 1599 1.1.1.3 alnsn EMIT_CMP(clear_states_jump, !(compiler_common->flags & REGEX_MATCH_NON_GREEDY) ? SLJIT_GREATER : SLJIT_GREATER_EQUAL, SLJIT_MEM1(R_TEMP), 2 * sizeof(sljit_sw), R_CURR_CHAR, 0); 1600 1.1 alnsn 1601 1.1 alnsn /* Case 1: keep this case. */ 1602 1.1.1.2 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_TEMP), sizeof(sljit_sw), R_NEXT_HEAD, 0); 1603 1.1 alnsn EMIT_OP2(SLJIT_SUB, R_NEXT_HEAD, 0, R_TEMP, 0, R_CURR_STATE, 0); 1604 1.1 alnsn 1605 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_BEST_BEGIN, 0); 1606 1.1 alnsn EMIT_JUMP(jump, SLJIT_JUMP); 1607 1.1 alnsn sljit_set_label(jump, begin_loop_label); 1608 1.1 alnsn 1609 1.1 alnsn /* Case 2: remove this case. */ 1610 1.1 alnsn EMIT_LABEL(label); 1611 1.1 alnsn sljit_set_label(clear_states_jump, label); 1612 1.1 alnsn 1613 1.1.1.2 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_TEMP), sizeof(sljit_sw), SLJIT_IMM, -1); 1614 1.1 alnsn 1615 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_BEST_BEGIN, 0); 1616 1.1 alnsn EMIT_JUMP(jump, SLJIT_JUMP); 1617 1.1 alnsn sljit_set_label(jump, begin_loop_label); 1618 1.1 alnsn } 1619 1.1 alnsn else { 1620 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_BEST_BEGIN, 0, SLJIT_IMM, 0); 1621 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_begin), SLJIT_IMM, 0); 1622 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_end), R_CURR_INDEX, 0); 1623 1.1 alnsn if (compiler_common->flags & REGEX_ID_CHECK) { 1624 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_id), SLJIT_MEM1(R_CURR_STATE), TERM_REL_OFFSET_OF(0, 2)); 1625 1.1 alnsn } 1626 1.1 alnsn EMIT_JUMP(jump, SLJIT_JUMP); 1627 1.1 alnsn sljit_set_label(jump, end_check_label); 1628 1.1 alnsn } 1629 1.1 alnsn return REGEX_NO_ERROR; 1630 1.1 alnsn } 1631 1.1 alnsn 1632 1.1 alnsn static int compile_leave_fast_forward(struct compiler_common *compiler_common, struct sljit_label *fast_forward_label) 1633 1.1 alnsn { 1634 1.1 alnsn struct sljit_compiler *compiler = compiler_common->compiler; 1635 1.1 alnsn struct stack *stack = &compiler_common->stack; 1636 1.1 alnsn struct stack_item *dfa_transitions = compiler_common->dfa_transitions; 1637 1.1 alnsn struct stack_item *search_states = compiler_common->search_states; 1638 1.1 alnsn int ind; 1639 1.1 alnsn struct sljit_jump *jump; 1640 1.1 alnsn int init_range = 1, prev_value = 0; 1641 1.1 alnsn 1642 1.1 alnsn while (stack->count > 0) { 1643 1.1 alnsn ind = stack_pop(stack)->value; 1644 1.1 alnsn search_states[ind].value = -1; 1645 1.1 alnsn if (search_states[ind].type >= 0) { 1646 1.1 alnsn if (dfa_transitions[ind].type == type_char) { 1647 1.1.1.3 alnsn EMIT_CMP(jump, SLJIT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, dfa_transitions[ind].value); 1648 1.1 alnsn sljit_set_label(jump, fast_forward_label); 1649 1.1 alnsn } 1650 1.1 alnsn else if (dfa_transitions[ind].type == type_rng_start) { 1651 1.1 alnsn SLJIT_ASSERT(!dfa_transitions[ind].value); 1652 1.1 alnsn ind++; 1653 1.1 alnsn while (dfa_transitions[ind].type != type_rng_end) { 1654 1.1 alnsn if (dfa_transitions[ind].type == type_rng_char) { 1655 1.1.1.3 alnsn EMIT_CMP(jump, SLJIT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, dfa_transitions[ind].value); 1656 1.1 alnsn sljit_set_label(jump, fast_forward_label); 1657 1.1 alnsn } 1658 1.1 alnsn else { 1659 1.1 alnsn SLJIT_ASSERT(dfa_transitions[ind].type == type_rng_left); 1660 1.1 alnsn if (init_range) { 1661 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_CURR_CHAR, 0); 1662 1.1 alnsn init_range = 0; 1663 1.1 alnsn } 1664 1.1 alnsn if (dfa_transitions[ind].value != prev_value) { 1665 1.1 alnsn /* Best compatibility to all archs. */ 1666 1.1 alnsn prev_value -= dfa_transitions[ind].value; 1667 1.1 alnsn if (prev_value < 0) { 1668 1.1 alnsn EMIT_OP2(SLJIT_SUB, R_TEMP, 0, R_TEMP, 0, SLJIT_IMM, -prev_value); 1669 1.1 alnsn } 1670 1.1 alnsn else { 1671 1.1 alnsn EMIT_OP2(SLJIT_ADD, R_TEMP, 0, R_TEMP, 0, SLJIT_IMM, prev_value); 1672 1.1 alnsn } 1673 1.1 alnsn prev_value = dfa_transitions[ind].value; 1674 1.1 alnsn } 1675 1.1.1.3 alnsn EMIT_CMP(jump, SLJIT_LESS_EQUAL, R_TEMP, 0, SLJIT_IMM, dfa_transitions[ind + 1].value - dfa_transitions[ind].value); 1676 1.1 alnsn sljit_set_label(jump, fast_forward_label); 1677 1.1 alnsn ind++; 1678 1.1 alnsn } 1679 1.1 alnsn ind++; 1680 1.1 alnsn } 1681 1.1 alnsn } 1682 1.1 alnsn else { 1683 1.1 alnsn SLJIT_ASSERT(dfa_transitions[ind].type == type_newline); 1684 1.1.1.3 alnsn EMIT_CMP(jump, SLJIT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, '\n'); 1685 1.1 alnsn sljit_set_label(jump, fast_forward_label); 1686 1.1.1.3 alnsn EMIT_CMP(jump, SLJIT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, '\r'); 1687 1.1 alnsn sljit_set_label(jump, fast_forward_label); 1688 1.1 alnsn } 1689 1.1 alnsn } 1690 1.1 alnsn } 1691 1.1 alnsn return REGEX_NO_ERROR; 1692 1.1 alnsn } 1693 1.1 alnsn 1694 1.1.1.2 alnsn static int compile_newline_check(struct compiler_common *compiler_common, sljit_sw ind) 1695 1.1 alnsn { 1696 1.1 alnsn struct sljit_compiler *compiler = compiler_common->compiler; 1697 1.1 alnsn struct sljit_jump *jump1; 1698 1.1 alnsn struct sljit_jump *jump2; 1699 1.1 alnsn struct sljit_label *label; 1700 1.1.1.2 alnsn sljit_sw no_states; 1701 1.1.1.2 alnsn sljit_sw offset; 1702 1.1 alnsn 1703 1.1 alnsn /* Check whether a new-line character is found. */ 1704 1.1.1.3 alnsn EMIT_CMP(jump1, SLJIT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, '\n'); 1705 1.1.1.3 alnsn EMIT_CMP(jump2, SLJIT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, '\r'); 1706 1.1 alnsn 1707 1.1 alnsn no_states = compiler_common->no_states; 1708 1.1 alnsn offset = TERM_OFFSET_OF(compiler_common->search_states[ind].type, 1); 1709 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), offset); 1710 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_CURR_STATE), offset, SLJIT_IMM, -1); 1711 1.1 alnsn CHECK(sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM2(R_CURR_STATE, R_TEMP), 0)); 1712 1.1 alnsn 1713 1.1 alnsn EMIT_LABEL(label); 1714 1.1 alnsn sljit_set_label(jump1, label); 1715 1.1 alnsn sljit_set_label(jump2, label); 1716 1.1 alnsn return REGEX_NO_ERROR; 1717 1.1 alnsn } 1718 1.1 alnsn 1719 1.1 alnsn #undef CHECK 1720 1.1 alnsn 1721 1.1 alnsn #define CHECK(exp) \ 1722 1.1 alnsn if (SLJIT_UNLIKELY(exp)) \ 1723 1.1 alnsn return 0 1724 1.1 alnsn 1725 1.1 alnsn static SLJIT_INLINE void range_set_label(struct sljit_jump **range_jump_list, struct sljit_label *label) 1726 1.1 alnsn { 1727 1.1 alnsn while (*range_jump_list) { 1728 1.1 alnsn sljit_set_label(*range_jump_list, label); 1729 1.1 alnsn range_jump_list++; 1730 1.1 alnsn } 1731 1.1 alnsn } 1732 1.1 alnsn 1733 1.1.1.2 alnsn static sljit_sw compile_range_check(struct compiler_common *compiler_common, sljit_sw ind) 1734 1.1 alnsn { 1735 1.1 alnsn struct sljit_compiler *compiler = compiler_common->compiler; 1736 1.1 alnsn struct stack_item *dfa_transitions = compiler_common->dfa_transitions; 1737 1.1 alnsn struct sljit_jump **range_jump_list = compiler_common->range_jump_list; 1738 1.1 alnsn int invert = dfa_transitions[ind].value; 1739 1.1 alnsn struct sljit_label *label; 1740 1.1.1.2 alnsn sljit_sw no_states; 1741 1.1.1.2 alnsn sljit_sw offset; 1742 1.1 alnsn int init_range = 1, prev_value = 0; 1743 1.1 alnsn 1744 1.1 alnsn ind++; 1745 1.1 alnsn 1746 1.1 alnsn while (dfa_transitions[ind].type != type_rng_end) { 1747 1.1 alnsn if (dfa_transitions[ind].type == type_rng_char) { 1748 1.1.1.3 alnsn EMIT_CMP(*range_jump_list, SLJIT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, dfa_transitions[ind].value); 1749 1.1 alnsn range_jump_list++; 1750 1.1 alnsn } 1751 1.1 alnsn else { 1752 1.1 alnsn SLJIT_ASSERT(dfa_transitions[ind].type == type_rng_left); 1753 1.1 alnsn if (init_range) { 1754 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_CURR_CHAR, 0); 1755 1.1 alnsn init_range = 0; 1756 1.1 alnsn } 1757 1.1 alnsn if (dfa_transitions[ind].value != prev_value) { 1758 1.1 alnsn /* Best compatibility to all archs. */ 1759 1.1 alnsn prev_value -= dfa_transitions[ind].value; 1760 1.1 alnsn if (prev_value < 0) { 1761 1.1 alnsn EMIT_OP2(SLJIT_SUB, R_TEMP, 0, R_TEMP, 0, SLJIT_IMM, -prev_value); 1762 1.1 alnsn } 1763 1.1 alnsn else { 1764 1.1 alnsn EMIT_OP2(SLJIT_ADD, R_TEMP, 0, R_TEMP, 0, SLJIT_IMM, prev_value); 1765 1.1 alnsn } 1766 1.1 alnsn prev_value = dfa_transitions[ind].value; 1767 1.1 alnsn } 1768 1.1.1.3 alnsn EMIT_CMP(*range_jump_list, SLJIT_LESS_EQUAL, R_TEMP, 0, SLJIT_IMM, dfa_transitions[ind + 1].value - dfa_transitions[ind].value); 1769 1.1 alnsn range_jump_list++; 1770 1.1 alnsn ind++; 1771 1.1 alnsn } 1772 1.1 alnsn ind++; 1773 1.1 alnsn } 1774 1.1 alnsn 1775 1.1 alnsn *range_jump_list = NULL; 1776 1.1 alnsn 1777 1.1 alnsn if (!invert) { 1778 1.1 alnsn no_states = compiler_common->no_states; 1779 1.1 alnsn offset = TERM_OFFSET_OF(compiler_common->search_states[ind].type, 1); 1780 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), offset); 1781 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_CURR_STATE), offset, SLJIT_IMM, -1); 1782 1.1 alnsn CHECK(sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM2(R_CURR_STATE, R_TEMP), 0)); 1783 1.1 alnsn 1784 1.1 alnsn EMIT_LABEL(label); 1785 1.1 alnsn range_set_label(compiler_common->range_jump_list, label); 1786 1.1 alnsn /* Clears the jump list. */ 1787 1.1 alnsn *compiler_common->range_jump_list = NULL; 1788 1.1 alnsn } 1789 1.1 alnsn return ind; 1790 1.1 alnsn } 1791 1.1 alnsn 1792 1.1 alnsn #undef TERM_OFFSET_OF 1793 1.1 alnsn #undef EMIT_OP1 1794 1.1 alnsn #undef EMIT_OP2 1795 1.1 alnsn #undef EMIT_LABEL 1796 1.1 alnsn #undef EMIT_JUMP 1797 1.1 alnsn #undef EMIT_CMP 1798 1.1 alnsn #undef CHECK 1799 1.1 alnsn 1800 1.1 alnsn /* --------------------------------------------------------------------- */ 1801 1.1 alnsn /* Main compiler */ 1802 1.1 alnsn /* --------------------------------------------------------------------- */ 1803 1.1 alnsn 1804 1.1.1.2 alnsn #define TERM_OFFSET_OF(ind, offs) (((ind) * compiler_common.no_states + (offs)) * sizeof(sljit_sw)) 1805 1.1 alnsn 1806 1.1 alnsn #define EMIT_OP1(type, arg1, arg2, arg3, arg4) \ 1807 1.1 alnsn CHECK(sljit_emit_op1(compiler_common.compiler, type, arg1, arg2, arg3, arg4)) 1808 1.1 alnsn 1809 1.1 alnsn #define EMIT_OP2(type, arg1, arg2, arg3, arg4, arg5, arg6) \ 1810 1.1 alnsn CHECK(sljit_emit_op2(compiler_common.compiler, type, arg1, arg2, arg3, arg4, arg5, arg6)) 1811 1.1 alnsn 1812 1.1 alnsn #define EMIT_LABEL(label) \ 1813 1.1 alnsn label = sljit_emit_label(compiler_common.compiler); \ 1814 1.1 alnsn CHECK(!label) 1815 1.1 alnsn 1816 1.1 alnsn #define EMIT_JUMP(jump, type) \ 1817 1.1 alnsn jump = sljit_emit_jump(compiler_common.compiler, type); \ 1818 1.1 alnsn CHECK(!jump) 1819 1.1 alnsn 1820 1.1 alnsn #define EMIT_CMP(jump, type, arg1, arg2, arg3, arg4) \ 1821 1.1 alnsn jump = sljit_emit_cmp(compiler_common.compiler, type, arg1, arg2, arg3, arg4); \ 1822 1.1 alnsn CHECK(!jump) 1823 1.1 alnsn 1824 1.1 alnsn /* A do {} while(0) expression helps to avoid goto statements. */ 1825 1.1 alnsn #define BEGIN_GUARD \ 1826 1.1 alnsn do { 1827 1.1 alnsn 1828 1.1 alnsn #define END_GUARD \ 1829 1.1 alnsn } while(0); 1830 1.1 alnsn 1831 1.1 alnsn #define CHECK(exp) \ 1832 1.1 alnsn if (SLJIT_UNLIKELY(exp)) \ 1833 1.1 alnsn break; 1834 1.1 alnsn 1835 1.1 alnsn struct regex_machine* regex_compile(const regex_char_t *regex_string, int length, int re_flags, int *error) 1836 1.1 alnsn { 1837 1.1 alnsn struct compiler_common compiler_common; 1838 1.1.1.2 alnsn sljit_sw ind; 1839 1.1 alnsn int error_code, done, suggest_fast_forward; 1840 1.1 alnsn /* ID of an empty match (-1 if not reachable). */ 1841 1.1 alnsn int empty_match_id; 1842 1.1 alnsn 1843 1.1 alnsn struct sljit_jump *jump; 1844 1.1 alnsn struct sljit_jump *best_match_found_jump; 1845 1.1 alnsn struct sljit_jump *fast_forward_jump = NULL; 1846 1.1 alnsn struct sljit_jump *length_is_zero_jump; 1847 1.1 alnsn struct sljit_jump *end_check_jump = NULL; 1848 1.1 alnsn struct sljit_jump *best_match_check_jump = NULL; 1849 1.1 alnsn struct sljit_jump *non_greedy_end_jump = NULL; 1850 1.1 alnsn struct sljit_label *label; 1851 1.1 alnsn struct sljit_label *end_check_label = NULL; 1852 1.1 alnsn struct sljit_label *start_label; 1853 1.1 alnsn struct sljit_label *fast_forward_label; 1854 1.1 alnsn struct sljit_label *fast_forward_return_label; 1855 1.1 alnsn 1856 1.1 alnsn if (error) 1857 1.1 alnsn *error = REGEX_NO_ERROR; 1858 1.1 alnsn #ifdef REGEX_MATCH_VERBOSE 1859 1.1 alnsn compiler_common.flags = re_flags & (REGEX_MATCH_BEGIN | REGEX_MATCH_END | REGEX_MATCH_NON_GREEDY | REGEX_NEWLINE | REGEX_MATCH_VERBOSE); 1860 1.1 alnsn #else 1861 1.1 alnsn compiler_common.flags = re_flags & (REGEX_MATCH_BEGIN | REGEX_MATCH_END | REGEX_MATCH_NON_GREEDY | REGEX_NEWLINE); 1862 1.1 alnsn #endif 1863 1.1 alnsn 1864 1.1 alnsn /* Step 1: parsing (Left->Right). 1865 1.1 alnsn Syntax check and AST generator. */ 1866 1.1 alnsn error_code = parse(regex_string, length, &compiler_common); 1867 1.1 alnsn if (error_code) { 1868 1.1 alnsn stack_destroy(&compiler_common.stack); 1869 1.1 alnsn if (error) 1870 1.1 alnsn *error = error_code; 1871 1.1 alnsn return NULL; 1872 1.1 alnsn } 1873 1.1 alnsn 1874 1.1 alnsn /* Step 2: generating branches (Right->Left). */ 1875 1.1 alnsn error_code = generate_transitions(&compiler_common); 1876 1.1 alnsn stack_destroy(&compiler_common.stack); 1877 1.1 alnsn stack_destroy(&compiler_common.depth); 1878 1.1 alnsn if (error_code) { 1879 1.1 alnsn if (compiler_common.dfa_transitions) 1880 1.1.1.3 alnsn SLJIT_FREE(compiler_common.dfa_transitions, NULL); 1881 1.1 alnsn if (error) 1882 1.1 alnsn *error = error_code; 1883 1.1 alnsn return NULL; 1884 1.1 alnsn } 1885 1.1 alnsn 1886 1.1 alnsn /* Step 3: Generate necessary data for depth-first search (Left->Right). */ 1887 1.1 alnsn error_code = generate_search_states(&compiler_common); 1888 1.1 alnsn if (error_code) { 1889 1.1.1.3 alnsn SLJIT_FREE(compiler_common.dfa_transitions, NULL); 1890 1.1 alnsn if (error) 1891 1.1 alnsn *error = error_code; 1892 1.1 alnsn return NULL; 1893 1.1 alnsn } 1894 1.1 alnsn 1895 1.1 alnsn #ifdef REGEX_MATCH_VERBOSE 1896 1.1 alnsn if (compiler_common.flags & REGEX_MATCH_VERBOSE) 1897 1.1 alnsn verbose_transitions(&compiler_common); 1898 1.1 alnsn #endif 1899 1.1 alnsn 1900 1.1 alnsn /* Step 4: Left->Right generate code. */ 1901 1.1 alnsn stack_init(&compiler_common.stack); 1902 1.1 alnsn stack_init(&compiler_common.depth); 1903 1.1 alnsn done = 0; 1904 1.1 alnsn compiler_common.machine = NULL; 1905 1.1 alnsn compiler_common.compiler = NULL; 1906 1.1 alnsn compiler_common.range_jump_list = NULL; 1907 1.1 alnsn 1908 1.1 alnsn BEGIN_GUARD 1909 1.1 alnsn 1910 1.1.1.3 alnsn compiler_common.machine = (struct regex_machine*)SLJIT_MALLOC(sizeof(struct regex_machine) + (compiler_common.terms_size - 1) * sizeof(sljit_uw), NULL); 1911 1.1 alnsn CHECK(!compiler_common.machine); 1912 1.1 alnsn 1913 1.1.1.3 alnsn compiler_common.compiler = sljit_create_compiler(NULL); 1914 1.1 alnsn CHECK(!compiler_common.compiler); 1915 1.1 alnsn 1916 1.1 alnsn if (compiler_common.longest_range_size > 0) { 1917 1.1.1.3 alnsn compiler_common.range_jump_list = (struct sljit_jump**)SLJIT_MALLOC(sizeof(struct sljit_jump*) * compiler_common.longest_range_size, NULL); 1918 1.1 alnsn CHECK(!compiler_common.range_jump_list); 1919 1.1 alnsn } 1920 1.1 alnsn 1921 1.1 alnsn if ((compiler_common.flags & REGEX_ID_CHECK) && !(compiler_common.flags & REGEX_MATCH_BEGIN)) 1922 1.1 alnsn compiler_common.no_states = 4; 1923 1.1 alnsn else if (!(compiler_common.flags & REGEX_ID_CHECK) && (compiler_common.flags & REGEX_MATCH_BEGIN)) 1924 1.1 alnsn compiler_common.no_states = 2; 1925 1.1 alnsn else 1926 1.1 alnsn compiler_common.no_states = 3; 1927 1.1 alnsn 1928 1.1 alnsn compiler_common.machine->flags = compiler_common.flags; 1929 1.1 alnsn compiler_common.machine->no_states = compiler_common.no_states; 1930 1.1 alnsn compiler_common.machine->size = compiler_common.machine->no_states * compiler_common.terms_size; 1931 1.1 alnsn 1932 1.1 alnsn /* Study the regular expression. */ 1933 1.1 alnsn empty_match_id = -1; 1934 1.1 alnsn suggest_fast_forward = 1; 1935 1.1 alnsn if (!(compiler_common.flags & REGEX_FAKE_MATCH_BEGIN)) { 1936 1.1 alnsn CHECK(trace_transitions(0, &compiler_common)); 1937 1.1 alnsn while (compiler_common.stack.count > 0) { 1938 1.1 alnsn ind = stack_pop(&compiler_common.stack)->value; 1939 1.1 alnsn if (compiler_common.search_states[ind].type == 0) { 1940 1.1 alnsn SLJIT_ASSERT(compiler_common.dfa_transitions[ind].type == type_end); 1941 1.1 alnsn suggest_fast_forward = 0; 1942 1.1 alnsn empty_match_id = compiler_common.search_states[ind].value; 1943 1.1 alnsn } 1944 1.1 alnsn else if (compiler_common.search_states[ind].type > 0) { 1945 1.1 alnsn SLJIT_ASSERT(compiler_common.dfa_transitions[ind].type != type_end); 1946 1.1 alnsn if (compiler_common.dfa_transitions[ind].type == type_rng_start && compiler_common.dfa_transitions[ind].value) 1947 1.1 alnsn suggest_fast_forward = 0; 1948 1.1 alnsn } 1949 1.1 alnsn compiler_common.search_states[ind].value = -1; 1950 1.1 alnsn } 1951 1.1 alnsn } 1952 1.1 alnsn else { 1953 1.1 alnsn SLJIT_ASSERT(compiler_common.dfa_transitions[1].type == type_newline); 1954 1.1 alnsn CHECK(trace_transitions(1, &compiler_common)); 1955 1.1 alnsn while (compiler_common.stack.count > 0) { 1956 1.1 alnsn ind = stack_pop(&compiler_common.stack)->value; 1957 1.1 alnsn if (compiler_common.search_states[ind].type == 0) { 1958 1.1 alnsn SLJIT_ASSERT(compiler_common.dfa_transitions[ind].type == type_end); 1959 1.1 alnsn suggest_fast_forward = 0; 1960 1.1 alnsn empty_match_id = compiler_common.search_states[ind].value; 1961 1.1 alnsn } 1962 1.1 alnsn compiler_common.search_states[ind].value = -1; 1963 1.1 alnsn } 1964 1.1 alnsn } 1965 1.1 alnsn 1966 1.1 alnsn /* Step 4.1: Generate entry. */ 1967 1.1.1.3 alnsn CHECK(sljit_emit_enter(compiler_common.compiler, 0, 3, 5, 5, 0, 0, 0)); 1968 1.1 alnsn 1969 1.1 alnsn /* Copy arguments to their place. */ 1970 1.1.1.3 alnsn EMIT_OP1(SLJIT_MOV, R_REGEX_MATCH, 0, SLJIT_S0, 0); 1971 1.1.1.3 alnsn EMIT_OP1(SLJIT_MOV, R_STRING, 0, SLJIT_S1, 0); 1972 1.1.1.3 alnsn EMIT_OP2(SLJIT_ADD, R_LENGTH, 0, SLJIT_S2, 0, SLJIT_IMM, 1); 1973 1.1 alnsn 1974 1.1 alnsn /* Init global registers. */ 1975 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_CURR_STATE, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, current)); 1976 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_NEXT_STATE, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, next)); 1977 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, head)); 1978 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_BEST_BEGIN, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_begin)); 1979 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_CURR_INDEX, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, index)); 1980 1.1 alnsn 1981 1.1 alnsn /* Check whether the best match has already found in a previous frame. */ 1982 1.1.1.3 alnsn EMIT_CMP(jump, SLJIT_EQUAL, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, fast_quit), SLJIT_IMM, 0); 1983 1.1 alnsn EMIT_JUMP(best_match_found_jump, SLJIT_JUMP); 1984 1.1 alnsn 1985 1.1 alnsn #ifdef REGEX_MATCH_VERBOSE 1986 1.1 alnsn if (compiler_common.flags & REGEX_MATCH_VERBOSE) 1987 1.1 alnsn printf("\n-----------------\nTrace\n-----------------\n"); 1988 1.1 alnsn #endif 1989 1.1 alnsn 1990 1.1 alnsn /* Step 4.2: Generate code for state 0. */ 1991 1.1 alnsn EMIT_LABEL(label); 1992 1.1 alnsn compiler_common.machine->entry_addrs[0] = (sljit_uw)label; 1993 1.1 alnsn 1994 1.1 alnsn /* Swapping current and next. */ 1995 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_CURR_STATE, 0); 1996 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_CURR_STATE, 0, R_NEXT_STATE, 0); 1997 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_NEXT_STATE, 0, R_TEMP, 0); 1998 1.1 alnsn 1999 1.1 alnsn /* Checking whether the best case needs to be updated. */ 2000 1.1 alnsn if (!(compiler_common.flags & REGEX_MATCH_END)) { 2001 1.1.1.3 alnsn EMIT_CMP(end_check_jump, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_CURR_STATE), TERM_REL_OFFSET_OF(0, 1), SLJIT_IMM, -1); 2002 1.1 alnsn EMIT_LABEL(end_check_label); 2003 1.1 alnsn } 2004 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), TERM_REL_OFFSET_OF(0, 1), SLJIT_IMM, -1); 2005 1.1 alnsn EMIT_OP2(SLJIT_ADD, R_CURR_INDEX, 0, R_CURR_INDEX, 0, SLJIT_IMM, 1); 2006 1.1 alnsn 2007 1.1 alnsn /* Checking whether best case has already found. */ 2008 1.1 alnsn if (!(compiler_common.flags & REGEX_MATCH_END) || (compiler_common.flags & REGEX_MATCH_BEGIN)) { 2009 1.1 alnsn if (!(compiler_common.flags & REGEX_MATCH_BEGIN)) { 2010 1.1 alnsn /* We can bail out if no more active states remain and R_BEST_BEGIN != -1. */ 2011 1.1.1.3 alnsn EMIT_CMP(best_match_check_jump, SLJIT_NOT_EQUAL, R_BEST_BEGIN, 0, SLJIT_IMM, -1); 2012 1.1 alnsn } 2013 1.1 alnsn else { 2014 1.1 alnsn /* We can bail out if no more active states remain (regardless of R_BEST_BEGIN). */ 2015 1.1.1.3 alnsn EMIT_CMP(best_match_check_jump, SLJIT_EQUAL, R_NEXT_HEAD, 0, SLJIT_IMM, 0); 2016 1.1 alnsn } 2017 1.1 alnsn } 2018 1.1 alnsn 2019 1.1 alnsn EMIT_LABEL(start_label); 2020 1.1 alnsn sljit_set_label(jump, start_label); 2021 1.1 alnsn 2022 1.1 alnsn if (!(compiler_common.flags & REGEX_MATCH_BEGIN) && suggest_fast_forward) { 2023 1.1.1.3 alnsn EMIT_CMP(fast_forward_jump, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, fast_forward), SLJIT_IMM, 0); 2024 1.1 alnsn } 2025 1.1 alnsn 2026 1.1 alnsn /* Loading the next character. */ 2027 1.1.1.4 alnsn EMIT_OP2(SLJIT_SUB | SLJIT_SET_Z, R_LENGTH, 0, R_LENGTH, 0, SLJIT_IMM, 1); 2028 1.1.1.3 alnsn EMIT_JUMP(length_is_zero_jump, SLJIT_EQUAL); 2029 1.1 alnsn 2030 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_STRING, 0); 2031 1.1 alnsn #ifdef REGEX_USE_8BIT_CHARS 2032 1.1.1.3 alnsn EMIT_OP1(SLJIT_MOV_U8, R_CURR_CHAR, 0, SLJIT_MEM1(R_TEMP), 0); 2033 1.1 alnsn EMIT_OP2(SLJIT_ADD, R_TEMP, 0, R_TEMP, 0, SLJIT_IMM, 1); 2034 1.1 alnsn #else 2035 1.1 alnsn EMIT_OP1(SLJIT_MOV_UH, R_CURR_CHAR, 0, SLJIT_MEM1(R_TEMP), 0); 2036 1.1 alnsn EMIT_OP2(SLJIT_ADD, R_TEMP, 0, R_TEMP, 0, SLJIT_IMM, 2); 2037 1.1 alnsn #endif 2038 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_STRING, 0, R_TEMP, 0); 2039 1.1 alnsn 2040 1.1 alnsn #ifdef REGEX_MATCH_VERBOSE 2041 1.1 alnsn if (compiler_common.flags & REGEX_MATCH_VERBOSE) { 2042 1.1 alnsn printf("(%3d): ", 0); 2043 1.1 alnsn CHECK(trace_transitions(0, &compiler_common)); 2044 1.1 alnsn while (compiler_common.stack.count > 0) { 2045 1.1 alnsn ind = stack_pop(&compiler_common.stack)->value; 2046 1.1 alnsn if (compiler_common.search_states[ind].type >= 0) 2047 1.1 alnsn printf("-> (%3d:%3d) ", compiler_common.search_states[ind].type, compiler_common.search_states[ind].value); 2048 1.1 alnsn compiler_common.search_states[ind].value = -1; 2049 1.1 alnsn } 2050 1.1 alnsn printf("\n"); 2051 1.1 alnsn } 2052 1.1 alnsn #endif 2053 1.1 alnsn 2054 1.1 alnsn EMIT_LABEL(fast_forward_return_label); 2055 1.1 alnsn if (!(compiler_common.flags & REGEX_MATCH_BEGIN)) { 2056 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, fast_forward), SLJIT_IMM, 1); 2057 1.1 alnsn if (!(compiler_common.flags & REGEX_MATCH_END)) { 2058 1.1.1.3 alnsn EMIT_CMP(jump, SLJIT_NOT_EQUAL, R_BEST_BEGIN, 0, SLJIT_IMM, -1); 2059 1.1 alnsn } 2060 1.1 alnsn 2061 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_CURR_INDEX, 0); 2062 1.1 alnsn CHECK(compile_uncond_tran(&compiler_common, R_NEXT_STATE)); 2063 1.1 alnsn /* And branching to the first state. */ 2064 1.1 alnsn CHECK(sljit_emit_ijump(compiler_common.compiler, SLJIT_JUMP, SLJIT_MEM2(R_CURR_STATE, R_TEMP), 0)); 2065 1.1 alnsn 2066 1.1 alnsn if (!(compiler_common.flags & REGEX_MATCH_END)) { 2067 1.1 alnsn EMIT_LABEL(label); 2068 1.1 alnsn sljit_set_label(jump, label); 2069 1.1 alnsn } 2070 1.1 alnsn } 2071 1.1 alnsn /* This is the case where we only have to reset the R_NEXT_HEAD. */ 2072 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, R_NEXT_HEAD, 0); 2073 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, 0); 2074 1.1 alnsn CHECK(sljit_emit_ijump(compiler_common.compiler, SLJIT_JUMP, SLJIT_MEM2(R_CURR_STATE, R_TEMP), 0)); 2075 1.1 alnsn 2076 1.1 alnsn /* Fast-forward loop. */ 2077 1.1 alnsn if (fast_forward_jump) { 2078 1.1 alnsn /* Quit from fast-forward loop. */ 2079 1.1 alnsn EMIT_LABEL(fast_forward_label); 2080 1.1 alnsn EMIT_OP2(SLJIT_SUB, R_TEMP, 0, R_NEXT_HEAD, 0, SLJIT_IMM, 1); 2081 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_LENGTH, 0, R_NEXT_STATE, 0); 2082 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_STRING, 0, R_CURR_STATE, 0); 2083 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_CURR_INDEX, 0, R_NEXT_HEAD, 0); 2084 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_NEXT_STATE, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, next)); 2085 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_CURR_STATE, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, current)); 2086 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, head)); 2087 1.1 alnsn 2088 1.1 alnsn /* Update the start field of the locations. */ 2089 1.1 alnsn CHECK(trace_transitions(0, &compiler_common)); 2090 1.1 alnsn while (compiler_common.stack.count > 0) { 2091 1.1 alnsn ind = stack_pop(&compiler_common.stack)->value; 2092 1.1 alnsn if (compiler_common.search_states[ind].type >= 0) { 2093 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(compiler_common.search_states[ind].type, 2), R_TEMP, 0); 2094 1.1 alnsn } 2095 1.1 alnsn compiler_common.search_states[ind].value = -1; 2096 1.1 alnsn } 2097 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, fast_forward), SLJIT_IMM, 0); 2098 1.1 alnsn EMIT_JUMP(jump, SLJIT_JUMP); 2099 1.1 alnsn sljit_set_label(jump, fast_forward_return_label); 2100 1.1 alnsn 2101 1.1 alnsn /* Start fast-forward. */ 2102 1.1 alnsn EMIT_LABEL(label); 2103 1.1 alnsn sljit_set_label(fast_forward_jump, label); 2104 1.1 alnsn 2105 1.1 alnsn /* Moving everything to registers. */ 2106 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, next), R_NEXT_STATE, 0); 2107 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, current), R_CURR_STATE, 0); 2108 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, head), R_NEXT_HEAD, 0); 2109 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_NEXT_STATE, 0, R_LENGTH, 0); 2110 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_CURR_STATE, 0, R_STRING, 0); 2111 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, R_CURR_INDEX, 0); 2112 1.1 alnsn 2113 1.1 alnsn /* Fast forward mainloop. */ 2114 1.1 alnsn EMIT_LABEL(label); 2115 1.1.1.4 alnsn EMIT_OP2(SLJIT_SUB | SLJIT_SET_Z, R_NEXT_STATE, 0, R_NEXT_STATE, 0, SLJIT_IMM, 1); 2116 1.1.1.3 alnsn EMIT_JUMP(fast_forward_jump, SLJIT_EQUAL); 2117 1.1 alnsn 2118 1.1 alnsn #ifdef REGEX_USE_8BIT_CHARS 2119 1.1.1.3 alnsn EMIT_OP1(SLJIT_MOV_U8, R_CURR_CHAR, 0, SLJIT_MEM1(R_CURR_STATE), 0); 2120 1.1 alnsn EMIT_OP2(SLJIT_ADD, R_CURR_STATE, 0, R_CURR_STATE, 0, SLJIT_IMM, 1); 2121 1.1 alnsn #else 2122 1.1 alnsn EMIT_OP1(SLJIT_MOV_UH, R_CURR_CHAR, 0, SLJIT_MEM1(R_CURR_STATE), 0); 2123 1.1 alnsn EMIT_OP2(SLJIT_ADD, R_CURR_STATE, 0, R_CURR_STATE, 0, SLJIT_IMM, 2); 2124 1.1 alnsn #endif 2125 1.1 alnsn 2126 1.1 alnsn CHECK(trace_transitions(0, &compiler_common)); 2127 1.1 alnsn CHECK(compile_leave_fast_forward(&compiler_common, fast_forward_label)); 2128 1.1 alnsn 2129 1.1 alnsn EMIT_OP2(SLJIT_ADD, R_NEXT_HEAD, 0, R_NEXT_HEAD, 0, SLJIT_IMM, 1); 2130 1.1 alnsn EMIT_JUMP(jump, SLJIT_JUMP); 2131 1.1 alnsn sljit_set_label(jump, label); 2132 1.1 alnsn 2133 1.1 alnsn /* String is finished. */ 2134 1.1 alnsn EMIT_LABEL(label); 2135 1.1 alnsn sljit_set_label(fast_forward_jump, label); 2136 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, index), R_NEXT_HEAD, 0); 2137 1.1 alnsn EMIT_JUMP(fast_forward_jump, SLJIT_JUMP); 2138 1.1 alnsn } 2139 1.1 alnsn 2140 1.1 alnsn /* End check. */ 2141 1.1 alnsn if (end_check_jump) { 2142 1.1 alnsn EMIT_LABEL(label); 2143 1.1 alnsn sljit_set_label(end_check_jump, label); 2144 1.1 alnsn 2145 1.1 alnsn if (!(compiler_common.flags & REGEX_MATCH_NON_GREEDY) || !(compiler_common.flags & REGEX_MATCH_BEGIN)) { 2146 1.1 alnsn CHECK(compile_end_check(&compiler_common, end_check_label)); 2147 1.1 alnsn } 2148 1.1 alnsn else { 2149 1.1 alnsn /* Since we leave, we do not need to update the R_BEST_BEGIN. */ 2150 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_begin), SLJIT_IMM, 0); 2151 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_end), R_CURR_INDEX, 0); 2152 1.1 alnsn if (compiler_common.flags & REGEX_ID_CHECK) { 2153 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, best_id), SLJIT_MEM1(R_CURR_STATE), TERM_REL_OFFSET_OF(0, 2)); 2154 1.1 alnsn } 2155 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, fast_quit), SLJIT_IMM, 1); 2156 1.1 alnsn EMIT_JUMP(non_greedy_end_jump, SLJIT_JUMP); 2157 1.1 alnsn } 2158 1.1 alnsn } 2159 1.1 alnsn 2160 1.1 alnsn /* Finish check. */ 2161 1.1 alnsn if (best_match_check_jump) { 2162 1.1 alnsn EMIT_LABEL(label); 2163 1.1 alnsn sljit_set_label(best_match_check_jump, label); 2164 1.1 alnsn 2165 1.1 alnsn if (!(compiler_common.flags & REGEX_MATCH_BEGIN)) { 2166 1.1.1.3 alnsn EMIT_CMP(jump, SLJIT_NOT_EQUAL, R_NEXT_HEAD, 0, SLJIT_IMM, 0); 2167 1.1 alnsn sljit_set_label(jump, start_label); 2168 1.1 alnsn } 2169 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, fast_quit), SLJIT_IMM, 1); 2170 1.1 alnsn } 2171 1.1 alnsn 2172 1.1 alnsn /* Leaving matching and storing the necessary values. */ 2173 1.1 alnsn EMIT_LABEL(label); 2174 1.1 alnsn sljit_set_label(length_is_zero_jump, label); 2175 1.1 alnsn if (non_greedy_end_jump) 2176 1.1 alnsn sljit_set_label(non_greedy_end_jump, label); 2177 1.1 alnsn 2178 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, index), R_CURR_INDEX, 0); 2179 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, head), R_NEXT_HEAD, 0); 2180 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, next), R_NEXT_STATE, 0); 2181 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_REGEX_MATCH), SLJIT_OFFSETOF(struct regex_match, current), R_CURR_STATE, 0); 2182 1.1 alnsn 2183 1.1 alnsn /* Exit from JIT. */ 2184 1.1 alnsn EMIT_LABEL(label); 2185 1.1 alnsn sljit_set_label(best_match_found_jump, label); 2186 1.1 alnsn if (fast_forward_jump) 2187 1.1 alnsn sljit_set_label(fast_forward_jump, label); 2188 1.1 alnsn CHECK(sljit_emit_return(compiler_common.compiler, SLJIT_UNUSED, 0, 0)); 2189 1.1 alnsn 2190 1.1.1.3 alnsn for (ind = 1; ind < compiler_common.dfa_size - 1; ind++) { 2191 1.1 alnsn if (compiler_common.search_states[ind].type >= 0) { 2192 1.1 alnsn SLJIT_ASSERT(compiler_common.search_states[ind].type < compiler_common.terms_size); 2193 1.1 alnsn EMIT_LABEL(label); 2194 1.1 alnsn compiler_common.machine->entry_addrs[compiler_common.search_states[ind].type] = (sljit_uw)label; 2195 1.1 alnsn 2196 1.1 alnsn if (compiler_common.dfa_transitions[ind].type == type_char) { 2197 1.1.1.3 alnsn EMIT_CMP(jump, SLJIT_NOT_EQUAL, R_CURR_CHAR, 0, SLJIT_IMM, compiler_common.dfa_transitions[ind].value); 2198 1.1 alnsn } 2199 1.1 alnsn else if (compiler_common.dfa_transitions[ind].type == type_rng_start) { 2200 1.1 alnsn ind = compile_range_check(&compiler_common, ind); 2201 1.1 alnsn CHECK(!ind); 2202 1.1 alnsn } 2203 1.1 alnsn else { 2204 1.1 alnsn SLJIT_ASSERT(compiler_common.dfa_transitions[ind].type == type_newline); 2205 1.1 alnsn CHECK(compile_newline_check(&compiler_common, ind)); 2206 1.1 alnsn } 2207 1.1 alnsn 2208 1.1 alnsn CHECK(trace_transitions(ind, &compiler_common)); 2209 1.1 alnsn #ifdef REGEX_MATCH_VERBOSE 2210 1.1 alnsn if (compiler_common.flags & REGEX_MATCH_VERBOSE) 2211 1.1 alnsn printf("(%3d): ", compiler_common.search_states[ind].type); 2212 1.1 alnsn #endif 2213 1.1 alnsn CHECK(compile_cond_tran(&compiler_common, compiler_common.search_states[ind].type)); 2214 1.1 alnsn 2215 1.1 alnsn if (compiler_common.dfa_transitions[ind].type == type_char) { 2216 1.1 alnsn EMIT_LABEL(label); 2217 1.1 alnsn sljit_set_label(jump, label); 2218 1.1 alnsn } 2219 1.1 alnsn else if (compiler_common.dfa_transitions[ind].type == type_rng_end) { 2220 1.1 alnsn EMIT_LABEL(label); 2221 1.1 alnsn range_set_label(compiler_common.range_jump_list, label); 2222 1.1 alnsn } 2223 1.1 alnsn else { 2224 1.1 alnsn SLJIT_ASSERT(compiler_common.dfa_transitions[ind].type == type_newline); 2225 1.1 alnsn } 2226 1.1 alnsn 2227 1.1 alnsn /* Branch to the next item in the list. */ 2228 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(compiler_common.search_states[ind].type, 1)); 2229 1.1 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(compiler_common.search_states[ind].type, 1), SLJIT_IMM, -1); 2230 1.1 alnsn CHECK(sljit_emit_ijump(compiler_common.compiler, SLJIT_JUMP, SLJIT_MEM2(R_CURR_STATE, R_TEMP), 0)); 2231 1.1 alnsn } 2232 1.1 alnsn } 2233 1.1 alnsn 2234 1.1 alnsn if (ind == compiler_common.dfa_size - 1) { 2235 1.1 alnsn /* Generate an init stub function. */ 2236 1.1 alnsn EMIT_LABEL(label); 2237 1.1.1.3 alnsn CHECK(sljit_emit_enter(compiler_common.compiler, 0, 2, 3, 3, 0, 0, 0)); 2238 1.1 alnsn 2239 1.1 alnsn if (empty_match_id == -1) { 2240 1.1.1.3 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), SLJIT_OFFSETOF(struct regex_match, best_begin), SLJIT_IMM, -1); 2241 1.1.1.3 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), SLJIT_OFFSETOF(struct regex_match, best_id), SLJIT_IMM, 0); 2242 1.1 alnsn } 2243 1.1 alnsn else { 2244 1.1.1.3 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), SLJIT_OFFSETOF(struct regex_match, best_begin), SLJIT_IMM, 0); 2245 1.1.1.3 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), SLJIT_OFFSETOF(struct regex_match, best_id), SLJIT_IMM, empty_match_id); 2246 1.1 alnsn } 2247 1.1 alnsn 2248 1.1.1.3 alnsn EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), SLJIT_OFFSETOF(struct regex_match, index), SLJIT_IMM, !(compiler_common.flags & REGEX_FAKE_MATCH_BEGIN) ? 1 : 2); 2249 1.1 alnsn 2250 1.1 alnsn if (!(compiler_common.flags & REGEX_MATCH_NON_GREEDY) || empty_match_id == -1) { 2251 1.1 alnsn /* The else is a really rare event, so we still generate an empty function instead of a runtime pointer check. */ 2252 1.1.1.3 alnsn SLJIT_ASSERT(R_CURR_STATE == SLJIT_S0); 2253 1.1 alnsn if (!(compiler_common.flags & REGEX_MATCH_BEGIN)) { 2254 1.1 alnsn /* R_CURR_INDEX (put to R_TEMP) is zero. */ 2255 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_IMM, 0); 2256 1.1 alnsn } 2257 1.1 alnsn CHECK(compile_uncond_tran(&compiler_common, R_CURR_STATE)); 2258 1.1 alnsn } 2259 1.1 alnsn else { 2260 1.1 alnsn EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, 0); 2261 1.1 alnsn } 2262 1.1 alnsn CHECK(sljit_emit_return(compiler_common.compiler, SLJIT_MOV, R_NEXT_HEAD, 0)); 2263 1.1 alnsn 2264 1.1 alnsn compiler_common.machine->continue_match = sljit_generate_code(compiler_common.compiler); 2265 1.1 alnsn #ifndef SLJIT_INDIRECT_CALL 2266 1.1.1.2 alnsn compiler_common.machine->u.init_match = (void*)(sljit_sw)sljit_get_label_addr(label); 2267 1.1 alnsn #else 2268 1.1 alnsn sljit_set_function_context(&compiler_common.machine->u.init_match, &compiler_common.machine->context, sljit_get_label_addr(label), regex_compile); 2269 1.1 alnsn #endif 2270 1.1 alnsn #ifdef REGEX_MATCH_VERBOSE 2271 1.1 alnsn if (compiler_common.flags & REGEX_MATCH_VERBOSE) 2272 1.1 alnsn printf("Continue match: %p Init match: %p\n\n", compiler_common.machine->continue_match, compiler_common.machine->u.init_match); 2273 1.1 alnsn #endif 2274 1.1 alnsn if (compiler_common.machine->continue_match) { 2275 1.1 alnsn for (ind = 0; ind < compiler_common.terms_size; ++ind) 2276 1.1 alnsn compiler_common.machine->entry_addrs[ind] = sljit_get_label_addr((struct sljit_label*)compiler_common.machine->entry_addrs[ind]); 2277 1.1 alnsn done = 1; 2278 1.1 alnsn } 2279 1.1 alnsn } 2280 1.1 alnsn END_GUARD 2281 1.1 alnsn 2282 1.1 alnsn stack_destroy(&compiler_common.stack); 2283 1.1 alnsn stack_destroy(&compiler_common.depth); 2284 1.1.1.3 alnsn SLJIT_FREE(compiler_common.dfa_transitions, NULL); 2285 1.1.1.3 alnsn SLJIT_FREE(compiler_common.search_states, NULL); 2286 1.1 alnsn if (compiler_common.range_jump_list) 2287 1.1.1.3 alnsn SLJIT_FREE(compiler_common.range_jump_list, NULL); 2288 1.1 alnsn if (compiler_common.compiler) 2289 1.1 alnsn sljit_free_compiler(compiler_common.compiler); 2290 1.1 alnsn if (done) 2291 1.1 alnsn return compiler_common.machine; 2292 1.1 alnsn 2293 1.1 alnsn if (compiler_common.machine) { 2294 1.1.1.3 alnsn SLJIT_FREE(compiler_common.machine, NULL); 2295 1.1 alnsn } 2296 1.1 alnsn if (error) 2297 1.1 alnsn *error = REGEX_MEMORY_ERROR; 2298 1.1 alnsn return NULL; 2299 1.1 alnsn } 2300 1.1 alnsn 2301 1.1 alnsn #undef TERM_OFFSET_OF 2302 1.1 alnsn #undef EMIT_OP1 2303 1.1 alnsn #undef EMIT_OP2 2304 1.1 alnsn #undef EMIT_LABEL 2305 1.1 alnsn #undef EMIT_JUMP 2306 1.1 alnsn #undef EMIT_CMP 2307 1.1 alnsn #undef BEGIN_GUARD 2308 1.1 alnsn #undef END_GUARD 2309 1.1 alnsn #undef CHECK 2310 1.1 alnsn 2311 1.1 alnsn void regex_free_machine(struct regex_machine *machine) 2312 1.1 alnsn { 2313 1.1 alnsn sljit_free_code(machine->continue_match); 2314 1.1.1.3 alnsn SLJIT_FREE(machine, NULL); 2315 1.1 alnsn } 2316 1.1 alnsn 2317 1.1 alnsn const char* regex_get_platform_name(void) 2318 1.1 alnsn { 2319 1.1 alnsn return sljit_get_platform_name(); 2320 1.1 alnsn } 2321 1.1 alnsn 2322 1.1 alnsn /* --------------------------------------------------------------------- */ 2323 1.1 alnsn /* Mathching utilities */ 2324 1.1 alnsn /* --------------------------------------------------------------------- */ 2325 1.1 alnsn 2326 1.1 alnsn struct regex_match* regex_begin_match(struct regex_machine *machine) 2327 1.1 alnsn { 2328 1.1.1.2 alnsn sljit_sw *ptr1; 2329 1.1.1.2 alnsn sljit_sw *ptr2; 2330 1.1.1.2 alnsn sljit_sw *end; 2331 1.1.1.2 alnsn sljit_sw *entry_addrs; 2332 1.1 alnsn 2333 1.1.1.3 alnsn struct regex_match *match = (struct regex_match*)SLJIT_MALLOC(sizeof(struct regex_match) + (machine->size * 2 - 1) * sizeof(sljit_sw), NULL); 2334 1.1 alnsn if (!match) 2335 1.1 alnsn return NULL; 2336 1.1 alnsn 2337 1.1 alnsn ptr1 = match->states; 2338 1.1 alnsn ptr2 = match->states + machine->size; 2339 1.1 alnsn end = ptr2; 2340 1.1.1.2 alnsn entry_addrs = (sljit_sw*)machine->entry_addrs; 2341 1.1 alnsn 2342 1.1 alnsn match->current = ptr1; 2343 1.1 alnsn match->next = ptr2; 2344 1.1 alnsn match->head = 0; 2345 1.1 alnsn match->machine = machine; 2346 1.1 alnsn 2347 1.1 alnsn /* Init machine states. */ 2348 1.1 alnsn switch (machine->no_states) { 2349 1.1 alnsn case 2: 2350 1.1 alnsn while (ptr1 < end) { 2351 1.1 alnsn *ptr1++ = *entry_addrs; 2352 1.1 alnsn *ptr2++ = *entry_addrs++; 2353 1.1 alnsn *ptr1++ = -1; 2354 1.1 alnsn *ptr2++ = -1; 2355 1.1 alnsn } 2356 1.1 alnsn break; 2357 1.1 alnsn 2358 1.1 alnsn case 3: 2359 1.1 alnsn while (ptr1 < end) { 2360 1.1 alnsn *ptr1++ = *entry_addrs; 2361 1.1 alnsn *ptr2++ = *entry_addrs++; 2362 1.1 alnsn *ptr1++ = -1; 2363 1.1 alnsn *ptr2++ = -1; 2364 1.1 alnsn *ptr1++ = 0; 2365 1.1 alnsn *ptr2++ = 0; 2366 1.1 alnsn } 2367 1.1 alnsn break; 2368 1.1 alnsn 2369 1.1 alnsn case 4: 2370 1.1 alnsn while (ptr1 < end) { 2371 1.1 alnsn *ptr1++ = *entry_addrs; 2372 1.1 alnsn *ptr2++ = *entry_addrs++; 2373 1.1 alnsn *ptr1++ = -1; 2374 1.1 alnsn *ptr2++ = -1; 2375 1.1 alnsn *ptr1++ = 0; 2376 1.1 alnsn *ptr2++ = 0; 2377 1.1 alnsn *ptr1++ = 0; 2378 1.1 alnsn *ptr2++ = 0; 2379 1.1 alnsn } 2380 1.1 alnsn break; 2381 1.1 alnsn 2382 1.1 alnsn default: 2383 1.1.1.4 alnsn SLJIT_UNREACHABLE(); 2384 1.1 alnsn break; 2385 1.1 alnsn } 2386 1.1 alnsn 2387 1.1 alnsn SLJIT_ASSERT(ptr1 == end); 2388 1.1 alnsn 2389 1.1 alnsn match->u.continue_match = machine->continue_match; 2390 1.1 alnsn 2391 1.1 alnsn regex_reset_match(match); 2392 1.1 alnsn return match; 2393 1.1 alnsn } 2394 1.1 alnsn 2395 1.1 alnsn void regex_reset_match(struct regex_match *match) 2396 1.1 alnsn { 2397 1.1 alnsn struct regex_machine *machine = match->machine; 2398 1.1.1.2 alnsn sljit_sw current, ind; 2399 1.1.1.2 alnsn sljit_sw *current_ptr; 2400 1.1 alnsn 2401 1.1 alnsn match->best_end = 0; 2402 1.1 alnsn match->fast_quit = 0; 2403 1.1 alnsn match->fast_forward = 0; 2404 1.1 alnsn 2405 1.1 alnsn if (match->head != 0) { 2406 1.1 alnsn /* Clear the current state. */ 2407 1.1 alnsn current = match->head; 2408 1.1 alnsn current_ptr = match->current; 2409 1.1 alnsn do { 2410 1.1.1.2 alnsn ind = (current / sizeof(sljit_sw)) + 1; 2411 1.1 alnsn current = current_ptr[ind]; 2412 1.1 alnsn current_ptr[ind] = -1; 2413 1.1 alnsn } while (current != 0); 2414 1.1 alnsn } 2415 1.1 alnsn match->head = machine->u.call_init(match->current, match); 2416 1.1 alnsn } 2417 1.1 alnsn 2418 1.1 alnsn void regex_free_match(struct regex_match *match) 2419 1.1 alnsn { 2420 1.1.1.3 alnsn SLJIT_FREE(match, NULL); 2421 1.1 alnsn } 2422 1.1 alnsn 2423 1.1 alnsn void regex_continue_match(struct regex_match *match, const regex_char_t *input_string, int length) 2424 1.1 alnsn { 2425 1.1 alnsn match->u.call_continue(match, input_string, length); 2426 1.1 alnsn } 2427 1.1 alnsn 2428 1.1 alnsn int regex_get_result(struct regex_match *match, int *end, int *id) 2429 1.1 alnsn { 2430 1.1 alnsn int flags = match->machine->flags; 2431 1.1.1.2 alnsn sljit_sw no_states; 2432 1.1 alnsn 2433 1.1 alnsn *end = match->best_end; 2434 1.1 alnsn *id = match->best_id; 2435 1.1 alnsn if (!(flags & (REGEX_MATCH_END | REGEX_FAKE_MATCH_END))) 2436 1.1 alnsn return match->best_begin; 2437 1.1 alnsn 2438 1.1 alnsn if (flags & REGEX_FAKE_MATCH_END) { 2439 1.1 alnsn SLJIT_ASSERT(!(flags & (REGEX_MATCH_BEGIN | REGEX_MATCH_END))); 2440 1.1 alnsn if (match->best_begin != -1) 2441 1.1 alnsn return match->best_begin; 2442 1.1 alnsn 2443 1.1 alnsn no_states = match->machine->no_states; 2444 1.1 alnsn if (match->current[no_states + 1] == -1) 2445 1.1 alnsn return -1; 2446 1.1 alnsn if (flags & REGEX_ID_CHECK) 2447 1.1 alnsn *id = match->current[no_states + 3]; 2448 1.1 alnsn if (!(flags & REGEX_FAKE_MATCH_BEGIN)) 2449 1.1 alnsn *end = match->index - 1; 2450 1.1 alnsn else 2451 1.1 alnsn *end = match->index - 2; 2452 1.1 alnsn return match->current[no_states + 2]; 2453 1.1 alnsn } 2454 1.1 alnsn else { 2455 1.1 alnsn /* Check the status of the last code. */ 2456 1.1 alnsn if (!(flags & REGEX_MATCH_BEGIN)) { 2457 1.1 alnsn /* No shortcut in this case. */ 2458 1.1 alnsn if (!(flags & REGEX_ID_CHECK)) { 2459 1.1 alnsn if (match->current[1] == -1) 2460 1.1 alnsn return -1; 2461 1.1 alnsn *end = match->index - 1; 2462 1.1 alnsn return match->current[2]; 2463 1.1 alnsn } 2464 1.1 alnsn 2465 1.1 alnsn if (match->current[1] == -1) 2466 1.1 alnsn return -1; 2467 1.1 alnsn *end = match->index - 1; 2468 1.1 alnsn *id = match->current[3]; 2469 1.1 alnsn return match->current[2]; 2470 1.1 alnsn } 2471 1.1 alnsn 2472 1.1 alnsn /* Shortcut is possible in this case. */ 2473 1.1 alnsn if (!(flags & REGEX_ID_CHECK)) { 2474 1.1 alnsn if (match->current[1] == -1 || match->head == -1) 2475 1.1 alnsn return -1; 2476 1.1 alnsn *end = match->index - 1; 2477 1.1 alnsn return 0; 2478 1.1 alnsn } 2479 1.1 alnsn 2480 1.1 alnsn if (match->current[1] == -1 || match->head == -1) 2481 1.1 alnsn return -1; 2482 1.1 alnsn *end = match->index - 1; 2483 1.1 alnsn *id = match->current[2]; 2484 1.1 alnsn return 0; 2485 1.1 alnsn } 2486 1.1 alnsn } 2487 1.1 alnsn 2488 1.1 alnsn int regex_is_match_finished(struct regex_match *match) 2489 1.1 alnsn { 2490 1.1 alnsn return match->fast_quit; 2491 1.1 alnsn } 2492 1.1 alnsn 2493 1.1 alnsn #ifdef REGEX_MATCH_VERBOSE 2494 1.1 alnsn void regex_continue_match_debug(struct regex_match *match, const regex_char_t *input_string, int length) 2495 1.1 alnsn { 2496 1.1.1.2 alnsn sljit_sw *ptr; 2497 1.1.1.2 alnsn sljit_sw *end; 2498 1.1.1.2 alnsn sljit_sw count; 2499 1.1 alnsn #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 2500 1.1.1.2 alnsn sljit_sw current; 2501 1.1 alnsn #endif 2502 1.1.1.2 alnsn sljit_sw no_states = match->machine->no_states; 2503 1.1.1.2 alnsn sljit_sw len = match->machine->size; 2504 1.1 alnsn 2505 1.1 alnsn while (length > 0) { 2506 1.1 alnsn match->u.call_continue(match, input_string, 1); 2507 1.1 alnsn 2508 1.1 alnsn if (match->fast_forward) { 2509 1.1 alnsn if (match->machine->flags & REGEX_MATCH_VERBOSE) 2510 1.1 alnsn printf("fast forward\n"); 2511 1.1 alnsn } 2512 1.1 alnsn 2513 1.1 alnsn /* Verbose (first). */ 2514 1.1 alnsn if (match->machine->flags & REGEX_MATCH_VERBOSE) { 2515 1.1 alnsn ptr = match->current; 2516 1.1 alnsn end = ptr + len; 2517 1.1 alnsn count = 0; 2518 1.1 alnsn printf("'%c' (%3ld->%3ld [%3ld]) ", *input_string, (long)match->best_begin, (long)match->best_end, (long)match->best_id); 2519 1.1 alnsn while (ptr < end) { 2520 1.1 alnsn printf("[%3ld:", (long)count++); 2521 1.1 alnsn switch (no_states) { 2522 1.1 alnsn case 2: 2523 1.1 alnsn if (ptr[1] != -1) 2524 1.1 alnsn printf("+] "); 2525 1.1 alnsn else 2526 1.1 alnsn printf(" ] "); 2527 1.1 alnsn break; 2528 1.1 alnsn 2529 1.1 alnsn case 3: 2530 1.1 alnsn if (ptr[1] != -1) 2531 1.1 alnsn printf("+,%3ld] ", (long)ptr[2]); 2532 1.1 alnsn else 2533 1.1 alnsn printf(" ,XXX] "); 2534 1.1 alnsn break; 2535 1.1 alnsn 2536 1.1 alnsn case 4: 2537 1.1 alnsn if (ptr[1] != -1) 2538 1.1 alnsn printf("+,%3ld,%3ld] ", (long)ptr[2], (long)ptr[3]); 2539 1.1 alnsn else 2540 1.1 alnsn printf(" ,XXX,XXX] "); 2541 1.1 alnsn break; 2542 1.1 alnsn } 2543 1.1 alnsn ptr += no_states; 2544 1.1 alnsn } 2545 1.1 alnsn printf("\n"); 2546 1.1 alnsn } 2547 1.1 alnsn 2548 1.1 alnsn #if (defined SLJIT_DEBUG && SLJIT_DEBUG) 2549 1.1 alnsn /* Sanity check (later). */ 2550 1.1 alnsn ptr = match->next; 2551 1.1 alnsn end = ptr + len; 2552 1.1 alnsn while (ptr < end) { 2553 1.1 alnsn SLJIT_ASSERT(ptr[1] == -1); 2554 1.1 alnsn ptr += no_states; 2555 1.1 alnsn } 2556 1.1 alnsn 2557 1.1 alnsn /* Check number of active elements. */ 2558 1.1 alnsn ptr = match->current + no_states; 2559 1.1 alnsn end = ptr + len - no_states; 2560 1.1 alnsn count = 0; 2561 1.1 alnsn while (ptr < end) { 2562 1.1 alnsn if (ptr[1] != -1) 2563 1.1 alnsn count++; 2564 1.1 alnsn ptr += no_states; 2565 1.1 alnsn } 2566 1.1 alnsn 2567 1.1 alnsn /* Check chain list. */ 2568 1.1 alnsn current = match->head; 2569 1.1 alnsn ptr = match->current; 2570 1.1 alnsn while (current != 0) { 2571 1.1.1.2 alnsn SLJIT_ASSERT(current >= 0 && current < len * sizeof(sljit_sw)); 2572 1.1.1.2 alnsn SLJIT_ASSERT((current % (no_states * sizeof(sljit_sw))) == 0); 2573 1.1 alnsn SLJIT_ASSERT(count > 0); 2574 1.1.1.2 alnsn current = ptr[(current / sizeof(sljit_sw)) + 1]; 2575 1.1 alnsn count--; 2576 1.1 alnsn } 2577 1.1 alnsn SLJIT_ASSERT(count == 0); 2578 1.1 alnsn #endif 2579 1.1 alnsn 2580 1.1 alnsn if (match->fast_quit) { 2581 1.1 alnsn /* the machine has stopped working. */ 2582 1.1 alnsn if (match->machine->flags & REGEX_MATCH_VERBOSE) 2583 1.1 alnsn printf("Best match has found\n"); 2584 1.1 alnsn break; 2585 1.1 alnsn } 2586 1.1 alnsn 2587 1.1 alnsn input_string++; 2588 1.1 alnsn length--; 2589 1.1 alnsn } 2590 1.1 alnsn } 2591 1.1 alnsn #endif 2592