re.c revision 5dfecf96
15dfecf96Smrg/* 25dfecf96Smrg * Copyright (c) 2002 by The XFree86 Project, Inc. 35dfecf96Smrg * 45dfecf96Smrg * Permission is hereby granted, free of charge, to any person obtaining a 55dfecf96Smrg * copy of this software and associated documentation files (the "Software"), 65dfecf96Smrg * to deal in the Software without restriction, including without limitation 75dfecf96Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 85dfecf96Smrg * and/or sell copies of the Software, and to permit persons to whom the 95dfecf96Smrg * Software is furnished to do so, subject to the following conditions: 105dfecf96Smrg * 115dfecf96Smrg * The above copyright notice and this permission notice shall be included in 125dfecf96Smrg * all copies or substantial portions of the Software. 135dfecf96Smrg * 145dfecf96Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 155dfecf96Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 165dfecf96Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 175dfecf96Smrg * THE XFREE86 PROJECT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 185dfecf96Smrg * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 195dfecf96Smrg * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 205dfecf96Smrg * SOFTWARE. 215dfecf96Smrg * 225dfecf96Smrg * Except as contained in this notice, the name of the XFree86 Project shall 235dfecf96Smrg * not be used in advertising or otherwise to promote the sale, use or other 245dfecf96Smrg * dealings in this Software without prior written authorization from the 255dfecf96Smrg * XFree86 Project. 265dfecf96Smrg * 275dfecf96Smrg * Author: Paulo César Pereira de Andrade 285dfecf96Smrg */ 295dfecf96Smrg 305dfecf96Smrg/* $XFree86: xc/programs/xedit/lisp/re/re.c,v 1.8 2002/11/17 07:51:30 paulo Exp $ */ 315dfecf96Smrg 325dfecf96Smrg#include <stdio.h> 335dfecf96Smrg#include "rep.h" 345dfecf96Smrg#define DEBUG 355dfecf96Smrg/* 365dfecf96Smrg * Types 375dfecf96Smrg */ 385dfecf96Smrg 395dfecf96Smrg/* Information used when generating the final form of the compiled re. 405dfecf96Smrg */ 415dfecf96Smrgstruct _re_inf { 425dfecf96Smrg rec_alt *alt; 435dfecf96Smrg unsigned char *cod; 445dfecf96Smrg long len; 455dfecf96Smrg long spc; 465dfecf96Smrg 475dfecf96Smrg /* Start offset of special repetition instruction */ 485dfecf96Smrg long sr[MAX_DEPTH]; 495dfecf96Smrg 505dfecf96Smrg /* Jump offset of special repetition instruction */ 515dfecf96Smrg long sj[MAX_DEPTH]; 525dfecf96Smrg 535dfecf96Smrg /* Just a flag, to know if this nesting is for a special repetition */ 545dfecf96Smrg char sp[MAX_DEPTH]; 555dfecf96Smrg 565dfecf96Smrg int bas; /* Alternatives/repetitions depth */ 575dfecf96Smrg int par; /* Open parenthesis counter */ 585dfecf96Smrg int ref; /* Backreference counter */ 595dfecf96Smrg 605dfecf96Smrg rec_pat *apat; /* Alternatives duplicate patterns 615dfecf96Smrg * if a special repetition is found, 625dfecf96Smrg * this is done to somewhat simplify 635dfecf96Smrg * the bytecode engine and still allow 645dfecf96Smrg * some complex (and time consuming) 655dfecf96Smrg * patterns. */ 665dfecf96Smrg 675dfecf96Smrg int flags; 685dfecf96Smrg int ecode; 695dfecf96Smrg}; 705dfecf96Smrg 715dfecf96Smrg/* This structure is not associated with re_cod as it's data only matters 725dfecf96Smrg * to the current match search. 735dfecf96Smrg */ 745dfecf96Smrgstruct _re_eng { 755dfecf96Smrg unsigned char *bas; /* Base string pointer */ 765dfecf96Smrg unsigned char *str; /* String to search for pattern */ 775dfecf96Smrg unsigned char *end; /* Where to stop searching */ 785dfecf96Smrg unsigned char *cod; /* Pointer in the re_cod structure */ 795dfecf96Smrg long off; /* Number of used entries in so/eo etc */ 805dfecf96Smrg 815dfecf96Smrg /* Match offset/nesting information */ 825dfecf96Smrg long so[MAX_DEPTH]; /* (s)tart of (m)atch */ 835dfecf96Smrg long eo[MAX_DEPTH]; /* (e)nd of (m)atch */ 845dfecf96Smrg long sv[MAX_DEPTH]; /* (s)a(v)e match end offset */ 855dfecf96Smrg long re[MAX_DEPTH]; /* (re)petition count */ 865dfecf96Smrg long ss[MAX_DEPTH]; /* (s)ave (s)tart of match */ 875dfecf96Smrg unsigned char *rcod[MAX_DEPTH]; /* restart position in regex code */ 885dfecf96Smrg unsigned char *rstr[MAX_DEPTH]; /* restart position in string */ 895dfecf96Smrg 905dfecf96Smrg /* Group/backreference information */ 915dfecf96Smrg long goff; 925dfecf96Smrg long gso[9]; 935dfecf96Smrg long geo[9]; 945dfecf96Smrg}; 955dfecf96Smrg 965dfecf96Smrg/* 975dfecf96Smrg * Prototypes 985dfecf96Smrg */ 995dfecf96Smrgstatic void reinit(void); 1005dfecf96Smrgstatic int rec_check(re_inf*, int); 1015dfecf96Smrgstatic int rec_code(re_inf*, ReCode); 1025dfecf96Smrgstatic int rec_byte(re_inf*, int); 1035dfecf96Smrgstatic int rec_byte_byte(re_inf*, int, int); 1045dfecf96Smrgstatic int rec_code_byte(re_inf*, ReCode, int); 1055dfecf96Smrgstatic int rec_length(re_inf*, int); 1065dfecf96Smrgstatic int rec_code_byte_byte(re_inf*, ReCode, int, int); 1075dfecf96Smrgstatic int rec_build_alt(re_inf*, rec_alt*); 1085dfecf96Smrgstatic int rec_build_pat(re_inf*, rec_pat*); 1095dfecf96Smrgstatic int rec_build_rng(re_inf*, rec_rng*); 1105dfecf96Smrgstatic int rec_build_grp(re_inf*, rec_grp*); 1115dfecf96Smrgstatic int rec_build_stl(re_inf*, rec_stl*); 1125dfecf96Smrgstatic int rec_build_rep(re_inf*, rec_rep*); 1135dfecf96Smrgstatic int rec_inc_spc(re_inf*); 1145dfecf96Smrgstatic int rec_dec_spc(re_inf*); 1155dfecf96Smrgstatic int rec_add_spc(re_inf*, int); 1165dfecf96Smrgstatic int rec_off_spc(re_inf*); 1175dfecf96Smrgstatic int rec_alt_spc(re_inf*, int); 1185dfecf96Smrgstatic int rec_rep_spc(re_inf*, int); 1195dfecf96Smrg#ifdef DEBUG 1205dfecf96Smrgstatic void redump(re_cod*); 1215dfecf96Smrg#endif 1225dfecf96Smrg 1235dfecf96Smrg/* 1245dfecf96Smrg * Initialization 1255dfecf96Smrg */ 1265dfecf96Smrgunsigned char re__alnum[256]; 1275dfecf96Smrgunsigned char re__odigit[256]; 1285dfecf96Smrgunsigned char re__ddigit[256]; 1295dfecf96Smrgunsigned char re__xdigit[256]; 1305dfecf96Smrgunsigned char re__control[256]; 1315dfecf96Smrg 1325dfecf96Smrg/* 1335dfecf96Smrg * Implementation 1345dfecf96Smrg */ 1355dfecf96Smrgint 1365dfecf96Smrgrecomp(re_cod *preg, const char *pattern, int flags) 1375dfecf96Smrg{ 1385dfecf96Smrg int i, ecode; 1395dfecf96Smrg re_inf inf; 1405dfecf96Smrg 1415dfecf96Smrg reinit(); 1425dfecf96Smrg 1435dfecf96Smrg preg->cod = NULL; 1445dfecf96Smrg inf.alt = irec_comp(pattern, 1455dfecf96Smrg flags & RE_PEND ? preg->re_endp : 1465dfecf96Smrg pattern + strlen(pattern), 1475dfecf96Smrg flags, &ecode); 1485dfecf96Smrg if (ecode != 0) 1495dfecf96Smrg return (ecode); 1505dfecf96Smrg 1515dfecf96Smrg inf.cod = NULL; 1525dfecf96Smrg inf.len = inf.spc = 0; 1535dfecf96Smrg inf.bas = 0; 1545dfecf96Smrg inf.par = 0; 1555dfecf96Smrg inf.ref = 0; 1565dfecf96Smrg inf.apat = NULL; 1575dfecf96Smrg inf.flags = flags; 1585dfecf96Smrg inf.ecode = 0; 1595dfecf96Smrg for (i = 0; i < MAX_DEPTH; i++) 1605dfecf96Smrg inf.sp[i] = 0; 1615dfecf96Smrg 1625dfecf96Smrg /* First byte is runtime modifier flags */ 1635dfecf96Smrg if (rec_byte(&inf, flags & (RE_NEWLINE | RE_NOSUB)) == 0 && 1645dfecf96Smrg rec_byte(&inf, 0xff) == 0 && 1655dfecf96Smrg rec_build_alt(&inf, inf.alt) == 0 && 1665dfecf96Smrg rec_rep_spc(&inf, 0) == 0 && 1675dfecf96Smrg rec_code(&inf, Re_Done) == 0) { 1685dfecf96Smrg /* Number of possible references, loops will not leave this 1695dfecf96Smrg * value correct, but it is cheap to read it from the second 1705dfecf96Smrg * byte, instead of adding several extra checks in the bytecode. */ 1715dfecf96Smrg if (inf.ref) 1725dfecf96Smrg inf.cod[1] = inf.ref - 1; 1735dfecf96Smrg preg->cod = inf.cod; 1745dfecf96Smrg /* Public structure member */ 1755dfecf96Smrg preg->re_nsub = inf.ref; 1765dfecf96Smrg } 1775dfecf96Smrg 1785dfecf96Smrg irec_free_alt(inf.alt); 1795dfecf96Smrg if (inf.ecode) 1805dfecf96Smrg free(inf.cod); 1815dfecf96Smrg#ifdef DEBUG 1825dfecf96Smrg else if (flags & RE_DUMP) 1835dfecf96Smrg redump(preg); 1845dfecf96Smrg#endif 1855dfecf96Smrg 1865dfecf96Smrg return (inf.ecode); 1875dfecf96Smrg} 1885dfecf96Smrg 1895dfecf96Smrgint 1905dfecf96Smrgreexec(const re_cod *preg, const char *string, 1915dfecf96Smrg int nmatch, re_mat pmat[], int flags) 1925dfecf96Smrg{ 1935dfecf96Smrg unsigned char *ptr, *str, newline, nosub; 1945dfecf96Smrg int len, si, ci, bas, i, j, k, l, m; 1955dfecf96Smrg re_eng eng; 1965dfecf96Smrg 1975dfecf96Smrg if (preg == NULL || preg->cod == NULL || nmatch < 0 || 1985dfecf96Smrg ((flags & RE_STARTEND) && 1995dfecf96Smrg (pmat == NULL || pmat[0].rm_eo < pmat[0].rm_so))) 2005dfecf96Smrg return (RE_INVARG); 2015dfecf96Smrg 2025dfecf96Smrg eng.str = (unsigned char*)string; 2035dfecf96Smrg if (flags & RE_STARTEND) { 2045dfecf96Smrg eng.end = eng.str + pmat[0].rm_eo; 2055dfecf96Smrg eng.str += pmat[0].rm_so; 2065dfecf96Smrg } 2075dfecf96Smrg else 2085dfecf96Smrg eng.end = eng.str + strlen(string); 2095dfecf96Smrg eng.bas = eng.str; 2105dfecf96Smrg nosub = preg->cod[0] & RE_NOSUB; 2115dfecf96Smrg newline = preg->cod[0] & RE_NEWLINE; 2125dfecf96Smrg eng.cod = preg->cod + 2; 2135dfecf96Smrg 2145dfecf96Smrg if (!nosub && preg->cod[1] != 0xff) { 2155dfecf96Smrg for (i = 0; i <= preg->cod[1]; i++) { 2165dfecf96Smrg eng.gso[i] = 0; 2175dfecf96Smrg eng.geo[i] = -1; 2185dfecf96Smrg } 2195dfecf96Smrg } 2205dfecf96Smrg 2215dfecf96Smrg /* Setup to search for start of match from the first character */ 2225dfecf96Smrg eng.so[0] = 0; 2235dfecf96Smrg eng.eo[0] = eng.sv[0] = -1; 2245dfecf96Smrg eng.rcod[0] = eng.cod; 2255dfecf96Smrg eng.rstr[0] = eng.str + 1; 2265dfecf96Smrg eng.off = 0; 2275dfecf96Smrg eng.goff = -1; 2285dfecf96Smrg for (ci = si = 1;;) { 2295dfecf96Smrgreset: 2305dfecf96Smrg switch (*eng.cod) { 2315dfecf96Smrg /**************************************************** 2325dfecf96Smrg * One byte codes * 2335dfecf96Smrg ****************************************************/ 2345dfecf96Smrg case Re_Any: 2355dfecf96Smrg if (eng.str == eng.end || (newline && eng.str[0] == '\n')) 2365dfecf96Smrg goto fail; 2375dfecf96Smrg goto match; 2385dfecf96Smrg case Re_AnyEatAnyTimes: 2395dfecf96Smrg if (newline) { 2405dfecf96Smrg for (ptr = eng.str; ptr < eng.end; ptr++) { 2415dfecf96Smrg if (*ptr == '\n') 2425dfecf96Smrg break; 2435dfecf96Smrg } 2445dfecf96Smrg si = ptr - eng.str; 2455dfecf96Smrg } 2465dfecf96Smrg else 2475dfecf96Smrg si = eng.end - eng.str; 2485dfecf96Smrg goto match; 2495dfecf96Smrg case Re_AnyEatMaybe: 2505dfecf96Smrg si = eng.end > eng.str; 2515dfecf96Smrg if (newline && si && eng.str[0] == '\n') 2525dfecf96Smrg si = 0; 2535dfecf96Smrg goto match; 2545dfecf96Smrg case Re_AnyEatAtLeast: 2555dfecf96Smrg if (newline) { 2565dfecf96Smrg for (ptr = eng.str; ptr < eng.end; ptr++) { 2575dfecf96Smrg if (*ptr == '\n') 2585dfecf96Smrg break; 2595dfecf96Smrg } 2605dfecf96Smrg si = ptr - eng.str; 2615dfecf96Smrg } 2625dfecf96Smrg else 2635dfecf96Smrg si = eng.end - eng.str; 2645dfecf96Smrg if (si == 0) { 2655dfecf96Smrg si = 1; 2665dfecf96Smrg goto fail; 2675dfecf96Smrg } 2685dfecf96Smrg goto match; 2695dfecf96Smrg case Re_Odigit: 2705dfecf96Smrg if (eng.str >= eng.end) 2715dfecf96Smrg goto fail; 2725dfecf96Smrg if (re__odigit[eng.str[0]]) 2735dfecf96Smrg goto match; 2745dfecf96Smrg goto fail; 2755dfecf96Smrg case Re_OdigitNot: 2765dfecf96Smrg if (eng.str >= eng.end || re__odigit[eng.str[0]]) 2775dfecf96Smrg goto fail; 2785dfecf96Smrg goto match; 2795dfecf96Smrg case Re_Digit: 2805dfecf96Smrg if (eng.str >= eng.end) 2815dfecf96Smrg goto fail; 2825dfecf96Smrg if (re__ddigit[eng.str[0]]) 2835dfecf96Smrg goto match; 2845dfecf96Smrg goto fail; 2855dfecf96Smrg case Re_DigitNot: 2865dfecf96Smrg if (eng.str >= eng.end || re__ddigit[eng.str[0]]) 2875dfecf96Smrg goto fail; 2885dfecf96Smrg goto match; 2895dfecf96Smrg case Re_Xdigit: 2905dfecf96Smrg if (eng.str >= eng.end) 2915dfecf96Smrg goto fail; 2925dfecf96Smrg if (re__xdigit[eng.str[0]]) 2935dfecf96Smrg goto match; 2945dfecf96Smrg goto fail; 2955dfecf96Smrg case Re_XdigitNot: 2965dfecf96Smrg if (eng.str >= eng.end || re__xdigit[eng.str[0]]) 2975dfecf96Smrg goto fail; 2985dfecf96Smrg goto match; 2995dfecf96Smrg case Re_Space: 3005dfecf96Smrg if (eng.str >= eng.end) 3015dfecf96Smrg goto fail; 3025dfecf96Smrg if (eng.str[0] == ' ' || eng.str[0] == '\t') 3035dfecf96Smrg goto match; 3045dfecf96Smrg goto fail; 3055dfecf96Smrg case Re_SpaceNot: 3065dfecf96Smrg if (eng.str >= eng.end) 3075dfecf96Smrg goto fail; 3085dfecf96Smrg if (eng.str[0] != ' ' && eng.str[0] != '\t') 3095dfecf96Smrg goto match; 3105dfecf96Smrg goto fail; 3115dfecf96Smrg case Re_Tab: 3125dfecf96Smrg if (eng.str >= eng.end) 3135dfecf96Smrg goto fail; 3145dfecf96Smrg if (eng.str[0] == '\t') 3155dfecf96Smrg goto match; 3165dfecf96Smrg goto fail; 3175dfecf96Smrg case Re_Newline: 3185dfecf96Smrg if (eng.str >= eng.end) 3195dfecf96Smrg goto fail; 3205dfecf96Smrg if (eng.str[0] == '\n') 3215dfecf96Smrg goto match; 3225dfecf96Smrg goto fail; 3235dfecf96Smrg case Re_Lower: 3245dfecf96Smrg if (eng.str >= eng.end) 3255dfecf96Smrg goto fail; 3265dfecf96Smrg if (eng.str[0] >= 'a' && eng.str[0] <= 'z') 3275dfecf96Smrg goto match; 3285dfecf96Smrg goto fail; 3295dfecf96Smrg case Re_Upper: 3305dfecf96Smrg if (eng.str >= eng.end) 3315dfecf96Smrg goto fail; 3325dfecf96Smrg if (eng.str[0] >= 'A' && eng.str[0] <= 'Z') 3335dfecf96Smrg goto match; 3345dfecf96Smrg goto fail; 3355dfecf96Smrg case Re_Alnum: 3365dfecf96Smrg if (eng.str >= eng.end) 3375dfecf96Smrg goto fail; 3385dfecf96Smrg if (re__alnum[eng.str[0]]) 3395dfecf96Smrg goto match; 3405dfecf96Smrg goto fail; 3415dfecf96Smrg case Re_AlnumNot: 3425dfecf96Smrg if (eng.str >= eng.end) 3435dfecf96Smrg goto fail; 3445dfecf96Smrg if (re__alnum[eng.str[0]]) 3455dfecf96Smrg goto fail; 3465dfecf96Smrg goto match; 3475dfecf96Smrg case Re_Control: 3485dfecf96Smrg if (eng.str >= eng.end) 3495dfecf96Smrg goto fail; 3505dfecf96Smrg if (re__control[eng.str[0]]) 3515dfecf96Smrg goto match; 3525dfecf96Smrg goto fail; 3535dfecf96Smrg case Re_ControlNot: 3545dfecf96Smrg if (eng.str >= eng.end || re__control[eng.str[0]]) 3555dfecf96Smrg goto fail; 3565dfecf96Smrg goto match; 3575dfecf96Smrg 3585dfecf96Smrg /**************************************************** 3595dfecf96Smrg * One byte codes, match special emtpy strings * 3605dfecf96Smrg ****************************************************/ 3615dfecf96Smrg case Re_Bol: 3625dfecf96Smrg if (eng.str == eng.bas) { 3635dfecf96Smrg if ((flags & RE_NOTBOL)) { 3645dfecf96Smrg /* String does not start at the beginning of a line */ 3655dfecf96Smrg if (newline) 3665dfecf96Smrg goto fail; 3675dfecf96Smrg goto wont; 3685dfecf96Smrg } 3695dfecf96Smrg si = 0; 3705dfecf96Smrg goto match; 3715dfecf96Smrg } 3725dfecf96Smrg if (newline && eng.str[-1] == '\n') { 3735dfecf96Smrg si = 0; 3745dfecf96Smrg goto match; 3755dfecf96Smrg } 3765dfecf96Smrg goto fail; 3775dfecf96Smrg case Re_Eol: 3785dfecf96Smrg if (eng.str == eng.end) { 3795dfecf96Smrg if (flags & RE_NOTEOL) 3805dfecf96Smrg /* String does not finish at the end of a line */ 3815dfecf96Smrg goto wont; 3825dfecf96Smrg si = 0; 3835dfecf96Smrg goto match; 3845dfecf96Smrg } 3855dfecf96Smrg if (newline && eng.str[0] == '\n') { 3865dfecf96Smrg si = 0; 3875dfecf96Smrg goto match; 3885dfecf96Smrg } 3895dfecf96Smrg goto fail; 3905dfecf96Smrg case Re_Bow: 3915dfecf96Smrg if (eng.str >= eng.end || 3925dfecf96Smrg (eng.str > eng.bas && 3935dfecf96Smrg (re__alnum[eng.str[-1]]))) 3945dfecf96Smrg goto fail; 3955dfecf96Smrg if (re__alnum[eng.str[0]]) { 3965dfecf96Smrg si = 0; 3975dfecf96Smrg goto match; 3985dfecf96Smrg } 3995dfecf96Smrg goto fail; 4005dfecf96Smrg case Re_Eow: 4015dfecf96Smrg if (eng.str == eng.bas || 4025dfecf96Smrg (eng.str < eng.end && 4035dfecf96Smrg re__alnum[eng.str[0]])) 4045dfecf96Smrg goto fail; 4055dfecf96Smrg if (re__alnum[eng.str[-1]]) { 4065dfecf96Smrg si = 0; 4075dfecf96Smrg goto match; 4085dfecf96Smrg } 4095dfecf96Smrg goto fail; 4105dfecf96Smrg 4115dfecf96Smrg /**************************************************** 4125dfecf96Smrg * One byte code, one byte argument * 4135dfecf96Smrg ****************************************************/ 4145dfecf96Smrg case Re_Literal: 4155dfecf96Smrg if (eng.str >= eng.end) 4165dfecf96Smrg goto fail; 4175dfecf96Smrg if (eng.str[0] == eng.cod[1]) { 4185dfecf96Smrg ci = 2; 4195dfecf96Smrg goto match; 4205dfecf96Smrg } 4215dfecf96Smrg goto fail; 4225dfecf96Smrg case Re_LiteralNot: 4235dfecf96Smrg if (eng.str >= eng.end) 4245dfecf96Smrg goto fail; 4255dfecf96Smrg if (eng.str[0] != eng.cod[1]) { 4265dfecf96Smrg ci = 2; 4275dfecf96Smrg goto match; 4285dfecf96Smrg } 4295dfecf96Smrg goto fail; 4305dfecf96Smrg case Re_SearchLiteral: 4315dfecf96Smrg for (str = eng.str; str < eng.end; str++) { 4325dfecf96Smrg if (*str == eng.cod[1]) { 4335dfecf96Smrg ci = 2; 4345dfecf96Smrg eng.str = str; 4355dfecf96Smrg goto match; 4365dfecf96Smrg } 4375dfecf96Smrg } 4385dfecf96Smrg /* This bytecode only happens in the toplevel */ 4395dfecf96Smrg eng.so[0] = str - eng.bas; 4405dfecf96Smrg eng.str = str; 4415dfecf96Smrg goto fail; 4425dfecf96Smrg 4435dfecf96Smrg /**************************************************** 4445dfecf96Smrg * One byte code, two bytes argument * 4455dfecf96Smrg ****************************************************/ 4465dfecf96Smrg case Re_CaseLiteral: 4475dfecf96Smrg if (eng.str >= eng.end) 4485dfecf96Smrg goto fail; 4495dfecf96Smrg if (eng.str[0] == eng.cod[1] || eng.str[0] == eng.cod[2]) { 4505dfecf96Smrg ci = 3; 4515dfecf96Smrg goto match; 4525dfecf96Smrg } 4535dfecf96Smrg goto fail; 4545dfecf96Smrg case Re_CaseLiteralNot: 4555dfecf96Smrg if (eng.str >= eng.end) 4565dfecf96Smrg goto fail; 4575dfecf96Smrg if (eng.str[0] != eng.cod[1] && eng.str[0] != eng.cod[2]) { 4585dfecf96Smrg ci = 3; 4595dfecf96Smrg goto match; 4605dfecf96Smrg } 4615dfecf96Smrg goto fail; 4625dfecf96Smrg case Re_SearchCaseLiteral: 4635dfecf96Smrg for (str = eng.str; str < eng.end; str++) { 4645dfecf96Smrg if (*str == eng.cod[1] || *str == eng.cod[2]) { 4655dfecf96Smrg ci = 3; 4665dfecf96Smrg eng.str = str; 4675dfecf96Smrg goto match; 4685dfecf96Smrg } 4695dfecf96Smrg } 4705dfecf96Smrg eng.so[0] = str - eng.bas; 4715dfecf96Smrg eng.str = str; 4725dfecf96Smrg goto fail; 4735dfecf96Smrg 4745dfecf96Smrg /**************************************************** 4755dfecf96Smrg * One byte codes, two arguments, n bytes * 4765dfecf96Smrg ****************************************************/ 4775dfecf96Smrg case Re_String: 4785dfecf96Smrg len = eng.cod[1]; 4795dfecf96Smrg if (len & 0x80) { 4805dfecf96Smrg i = 3; 4815dfecf96Smrg len = (len & 0x7f) + (eng.cod[2] << 7); 4825dfecf96Smrg } 4835dfecf96Smrg else 4845dfecf96Smrg i = 2; 4855dfecf96Smrg if (eng.end - eng.str < len) 4865dfecf96Smrg goto fail; 4875dfecf96Smrg ptr = eng.cod + i; 4885dfecf96Smrg str = eng.str; 4895dfecf96Smrg for (k = len; k > 0; k--) { 4905dfecf96Smrg if (*ptr++ != *str++) 4915dfecf96Smrg goto fail; 4925dfecf96Smrg } 4935dfecf96Smrg ci = i + len; 4945dfecf96Smrg si = len; 4955dfecf96Smrg goto match; 4965dfecf96Smrg case Re_SearchString: 4975dfecf96Smrg len = eng.cod[1]; 4985dfecf96Smrg if (len & 0x80) { 4995dfecf96Smrg i = 3; 5005dfecf96Smrg len = (len & 0x7f) + (eng.cod[2] << 7); 5015dfecf96Smrg } 5025dfecf96Smrg else 5035dfecf96Smrg i = 2; 5045dfecf96Smrg for (str = eng.str; eng.end - str >= len; str = eng.str++) { 5055dfecf96Smrg for (ptr = eng.cod + i, str = eng.str, k = len; k > 0; k--) 5065dfecf96Smrg if (*ptr++ != *str++) 5075dfecf96Smrg break; 5085dfecf96Smrg if (k == 0) { 5095dfecf96Smrg /* Substring found */ 5105dfecf96Smrg ci = i + len; 5115dfecf96Smrg si = str - eng.str; 5125dfecf96Smrg goto match; 5135dfecf96Smrg } 5145dfecf96Smrg } 5155dfecf96Smrg eng.so[0] = eng.end - eng.bas; 5165dfecf96Smrg eng.str = eng.end; 5175dfecf96Smrg goto fail; 5185dfecf96Smrg 5195dfecf96Smrg case Re_CaseString: 5205dfecf96Smrg len = eng.cod[1]; 5215dfecf96Smrg if (len & 0x80) { 5225dfecf96Smrg i = 3; 5235dfecf96Smrg len = (len & 0x7f) + (eng.cod[2] << 7); 5245dfecf96Smrg } 5255dfecf96Smrg else 5265dfecf96Smrg i = 2; 5275dfecf96Smrg 5285dfecf96Smrg len >>= 1; 5295dfecf96Smrg /* Check if there are at least len/2 bytes left, string 5305dfecf96Smrg * is represented as two bytes, lower and upper case */ 5315dfecf96Smrg if (eng.end - eng.str < len) 5325dfecf96Smrg goto fail; 5335dfecf96Smrg ptr = eng.cod + i; 5345dfecf96Smrg str = eng.str; 5355dfecf96Smrg for (k = len; k > 0; str++, ptr += 2, k--) { 5365dfecf96Smrg if (*str != ptr[0] && *str != ptr[1]) 5375dfecf96Smrg goto fail; 5385dfecf96Smrg } 5395dfecf96Smrg ci = i + (len << 1); 5405dfecf96Smrg si = len; 5415dfecf96Smrg goto match; 5425dfecf96Smrg case Re_SearchCaseString: 5435dfecf96Smrg len = eng.cod[1]; 5445dfecf96Smrg if (len & 0x80) { 5455dfecf96Smrg i = 3; 5465dfecf96Smrg len = (len & 0x7f) + (eng.cod[2] << 7); 5475dfecf96Smrg } 5485dfecf96Smrg else 5495dfecf96Smrg i = 2; 5505dfecf96Smrg len >>= 1; 5515dfecf96Smrg for (str = eng.str; eng.end - str >= len; str = eng.str++) { 5525dfecf96Smrg for (ptr = eng.cod + i, str = eng.str, k = len; 5535dfecf96Smrg k > 0; k--, ptr += 2, str++) 5545dfecf96Smrg if (ptr[0] != str[0] && ptr[1] != str[0]) 5555dfecf96Smrg break; 5565dfecf96Smrg if (k == 0) { 5575dfecf96Smrg /* Substring found */ 5585dfecf96Smrg ci = i + (len << 1); 5595dfecf96Smrg si = str - eng.str; 5605dfecf96Smrg goto match; 5615dfecf96Smrg } 5625dfecf96Smrg } 5635dfecf96Smrg eng.so[0] = eng.end - eng.bas; 5645dfecf96Smrg eng.str = eng.end; 5655dfecf96Smrg goto fail; 5665dfecf96Smrg 5675dfecf96Smrg case Re_StringList: 5685dfecf96Smrg /* Number of strings */ 5695dfecf96Smrg k = eng.cod[1]; 5705dfecf96Smrg 5715dfecf96Smrg /* Where to jump after match */ 5725dfecf96Smrg bas = eng.cod[2] | (eng.cod[3] << 8); 5735dfecf96Smrg 5745dfecf96Smrg str = eng.str; 5755dfecf96Smrg ptr = eng.cod + k + 4; 5765dfecf96Smrg l = eng.end - eng.str; 5775dfecf96Smrg for (j = 0; j < k; j++) { 5785dfecf96Smrg len = eng.cod[j + 4]; 5795dfecf96Smrg if (len <= l) { 5805dfecf96Smrg for (i = 0; i < len; i++) 5815dfecf96Smrg if (ptr[i] != str[i]) 5825dfecf96Smrg goto next_stl; 5835dfecf96Smrg goto stl_match; 5845dfecf96Smrg } 5855dfecf96Smrgnext_stl: 5865dfecf96Smrg ptr += len; 5875dfecf96Smrg } 5885dfecf96Smrg goto fail; 5895dfecf96Smrgstl_match: 5905dfecf96Smrg ci = bas; 5915dfecf96Smrg si = len; 5925dfecf96Smrg goto match; 5935dfecf96Smrg 5945dfecf96Smrg case Re_CaseStringList: 5955dfecf96Smrg /* Number of strings */ 5965dfecf96Smrg k = eng.cod[1]; 5975dfecf96Smrg 5985dfecf96Smrg /* Where to jump after match */ 5995dfecf96Smrg bas = eng.cod[2] | (eng.cod[3] << 8); 6005dfecf96Smrg 6015dfecf96Smrg str = eng.str; 6025dfecf96Smrg ptr = eng.cod + k + 4; 6035dfecf96Smrg l = eng.end - eng.str; 6045dfecf96Smrg for (j = 0; j < k; j++) { 6055dfecf96Smrg len = eng.cod[j + 4]; 6065dfecf96Smrg if ((len >> 1) <= l) { 6075dfecf96Smrg for (i = m = 0; i < len; m++, i += 2) 6085dfecf96Smrg if (ptr[i] != str[m] && ptr[i + 1] != str[m]) 6095dfecf96Smrg goto next_cstl; 6105dfecf96Smrg goto cstl_match; 6115dfecf96Smrg } 6125dfecf96Smrgnext_cstl: 6135dfecf96Smrg ptr += len; 6145dfecf96Smrg } 6155dfecf96Smrg goto fail; 6165dfecf96Smrgcstl_match: 6175dfecf96Smrg ci = bas; 6185dfecf96Smrg si = len >> 1; 6195dfecf96Smrg goto match; 6205dfecf96Smrg 6215dfecf96Smrg 6225dfecf96Smrg case Re_LargeStringList: 6235dfecf96Smrg /* Where to jump after match */ 6245dfecf96Smrg bas = eng.cod[1] | (eng.cod[2] << 8); 6255dfecf96Smrg 6265dfecf96Smrg str = eng.str; 6275dfecf96Smrg 6285dfecf96Smrg /* First entry in index map */ 6295dfecf96Smrg ptr = eng.cod + 3; 6305dfecf96Smrg i = (int)str[0] << 1; 6315dfecf96Smrg j = ptr[i] | (ptr[i + 1] << 8); 6325dfecf96Smrg if (j == 0xffff) 6335dfecf96Smrg /* No entry with this byte */ 6345dfecf96Smrg goto fail; 6355dfecf96Smrg 6365dfecf96Smrg /* Bytes left in input */ 6375dfecf96Smrg l = eng.end - eng.str; 6385dfecf96Smrg 6395dfecf96Smrg /* First entry matching initial byte */ 6405dfecf96Smrg ptr += 512 + j; 6415dfecf96Smrg 6425dfecf96Smrg for (len = ptr[0]; 6435dfecf96Smrg str[0] == ptr[1]; 6445dfecf96Smrg ptr += len + 1, len = ptr[0]) { 6455dfecf96Smrg if (len <= l) { 6465dfecf96Smrg for (i = 1; i < len; i++) { 6475dfecf96Smrg if (ptr[i + 1] != str[i]) 6485dfecf96Smrg goto next_lstl; 6495dfecf96Smrg } 6505dfecf96Smrg ci = bas; 6515dfecf96Smrg si = len; 6525dfecf96Smrg goto match; 6535dfecf96Smrg } 6545dfecf96Smrgnext_lstl:; 6555dfecf96Smrg } 6565dfecf96Smrg goto fail; 6575dfecf96Smrg 6585dfecf96Smrg case Re_LargeCaseStringList: 6595dfecf96Smrg /* Where to jump after match */ 6605dfecf96Smrg bas = eng.cod[1] | (eng.cod[2] << 8); 6615dfecf96Smrg 6625dfecf96Smrg str = eng.str; 6635dfecf96Smrg 6645dfecf96Smrg /* First entry in index map */ 6655dfecf96Smrg ptr = eng.cod + 3; 6665dfecf96Smrg i = (int)str[0] << 1; 6675dfecf96Smrg j = ptr[i] | (ptr[i + 1] << 8); 6685dfecf96Smrg if (j == 0xffff) 6695dfecf96Smrg /* No entry with this byte */ 6705dfecf96Smrg goto fail; 6715dfecf96Smrg 6725dfecf96Smrg /* Bytes left in input */ 6735dfecf96Smrg l = eng.end - eng.str; 6745dfecf96Smrg 6755dfecf96Smrg /* First entry matching initial byte */ 6765dfecf96Smrg ptr += 512 + j; 6775dfecf96Smrg 6785dfecf96Smrg for (len = ptr[0]; 6795dfecf96Smrg str[0] == ptr[1] || str[0] == ptr[2]; 6805dfecf96Smrg ptr += len + 1, len = ptr[0]) { 6815dfecf96Smrg if ((k = (len >> 1)) <= l) { 6825dfecf96Smrg for (i = 2, j = 1; i < len; i += 2, j++) { 6835dfecf96Smrg if (ptr[i + 1] != str[j] && ptr[i + 2] != str[j]) 6845dfecf96Smrg goto next_lcstl; 6855dfecf96Smrg } 6865dfecf96Smrg ci = bas; 6875dfecf96Smrg si = k; 6885dfecf96Smrg goto match; 6895dfecf96Smrg } 6905dfecf96Smrgnext_lcstl:; 6915dfecf96Smrg } 6925dfecf96Smrg goto fail; 6935dfecf96Smrg 6945dfecf96Smrg 6955dfecf96Smrg /**************************************************** 6965dfecf96Smrg * Character range matching * 6975dfecf96Smrg ****************************************************/ 6985dfecf96Smrg case Re_Range: 6995dfecf96Smrg if (eng.str < eng.end && eng.cod[eng.str[0] + 1]) { 7005dfecf96Smrg ci = 257; 7015dfecf96Smrg goto match; 7025dfecf96Smrg } 7035dfecf96Smrg goto fail; 7045dfecf96Smrg case Re_RangeNot: 7055dfecf96Smrg if (eng.str >= eng.end || eng.cod[eng.str[0] + 1]) 7065dfecf96Smrg goto fail; 7075dfecf96Smrg ci = 257; 7085dfecf96Smrg goto match; 7095dfecf96Smrg 7105dfecf96Smrg /**************************************************** 7115dfecf96Smrg * Group handling * 7125dfecf96Smrg ****************************************************/ 7135dfecf96Smrg case Re_Open: 7145dfecf96Smrg if (++eng.goff >= 9) 7155dfecf96Smrg return (RE_ASSERT); 7165dfecf96Smrg eng.gso[eng.goff] = eng.str - eng.bas; 7175dfecf96Smrg ++eng.cod; 7185dfecf96Smrg continue; 7195dfecf96Smrg case Re_Close: 7205dfecf96Smrg eng.geo[eng.goff] = eng.str - eng.bas; 7215dfecf96Smrg ++eng.cod; 7225dfecf96Smrg continue; 7235dfecf96Smrg case Re_Update: 7245dfecf96Smrg bas = eng.cod[1]; 7255dfecf96Smrg eng.geo[eng.goff] = eng.str - eng.bas; 7265dfecf96Smrg eng.cod += 2; /* + Update + bas */ 7275dfecf96Smrg continue; 7285dfecf96Smrg 7295dfecf96Smrg /**************************************************** 7305dfecf96Smrg * Backreference * 7315dfecf96Smrg ****************************************************/ 7325dfecf96Smrg case Re_Backref: 7335dfecf96Smrg i = eng.cod[1]; 7345dfecf96Smrg j = eng.gso[i]; 7355dfecf96Smrg k = eng.geo[i]; 7365dfecf96Smrg len = k - j; 7375dfecf96Smrg if (k < j || eng.end - eng.str < len) 7385dfecf96Smrg goto fail; 7395dfecf96Smrg ptr = eng.bas + j; 7405dfecf96Smrg str = eng.str; 7415dfecf96Smrg for (l = len; l > 0; l--) { 7425dfecf96Smrg if (*ptr++ != *str++) 7435dfecf96Smrg goto fail; 7445dfecf96Smrg } 7455dfecf96Smrg ci = 2; 7465dfecf96Smrg si = len; 7475dfecf96Smrg goto match; 7485dfecf96Smrg 7495dfecf96Smrg /**************************************************** 7505dfecf96Smrg * Alternatives handling * 7515dfecf96Smrg ****************************************************/ 7525dfecf96Smrg case Re_Alt: 7535dfecf96Smrg bas = eng.off; 7545dfecf96Smrg if (++eng.off >= MAX_DEPTH) 7555dfecf96Smrg return (RE_ASSERT); 7565dfecf96Smrg 7575dfecf96Smrg /* Get offset of next alternative */ 7585dfecf96Smrg i = eng.cod[1] | (eng.cod[2] << 8); 7595dfecf96Smrg 7605dfecf96Smrg /* Setup for next alternative if the current fails */ 7615dfecf96Smrg eng.rcod[eng.off] = eng.cod + i + 1; /* + Alt */ 7625dfecf96Smrg 7635dfecf96Smrg /* If fail, test the next alternative in the same string */ 7645dfecf96Smrg eng.rstr[eng.off] = eng.str; 7655dfecf96Smrg 7665dfecf96Smrg /* Setup match offsets */ 7675dfecf96Smrg if (eng.so[bas] <= eng.eo[bas]) 7685dfecf96Smrg eng.so[eng.off] = eng.eo[bas]; 7695dfecf96Smrg else 7705dfecf96Smrg eng.so[eng.off] = eng.so[bas]; 7715dfecf96Smrg eng.sv[eng.off] = eng.eo[eng.off] = eng.so[eng.off] - 1; 7725dfecf96Smrg 7735dfecf96Smrg /* Save start of possible previous matches */ 7745dfecf96Smrg eng.ss[eng.off] = eng.so[bas]; 7755dfecf96Smrg 7765dfecf96Smrg /* Skip code */ 7775dfecf96Smrg eng.cod += 3; 7785dfecf96Smrg 7795dfecf96Smrg /* Go try the first alternative */ 7805dfecf96Smrg continue; 7815dfecf96Smrg 7825dfecf96Smrg case Re_AltNext: 7835dfecf96Smrg bas = eng.off - 1; 7845dfecf96Smrg /* Check if matched and if it is a better match */ 7855dfecf96Smrg if (eng.sv[eng.off] - eng.so[eng.off] < 7865dfecf96Smrg eng.eo[eng.off] - eng.so[eng.off]) 7875dfecf96Smrg eng.sv[eng.off] = eng.eo[eng.off]; 7885dfecf96Smrg 7895dfecf96Smrg /* Get offset of next alternative */ 7905dfecf96Smrg i = eng.cod[1] | (eng.cod[2] << 8); 7915dfecf96Smrg 7925dfecf96Smrg /* Setup for next alternative if the current fails */ 7935dfecf96Smrg eng.rcod[eng.off] = eng.cod + i + 1; /* + AltNext */ 7945dfecf96Smrg 7955dfecf96Smrg /* Setup match offset */ 7965dfecf96Smrg eng.eo[eng.off] = eng.so[eng.off] - 1; 7975dfecf96Smrg 7985dfecf96Smrg /* Reset string for next alternative */ 7995dfecf96Smrg eng.str = eng.rstr[eng.off]; 8005dfecf96Smrg 8015dfecf96Smrg /* Skip code */ 8025dfecf96Smrg eng.cod += 3; 8035dfecf96Smrg 8045dfecf96Smrg /* Go try the next alternative */ 8055dfecf96Smrg continue; 8065dfecf96Smrg 8075dfecf96Smrg case Re_AltDone: 8085dfecf96Smrg bas = eng.off - 1; 8095dfecf96Smrg /* Check if matched and if it is a better match */ 8105dfecf96Smrg if (eng.sv[eng.off] - eng.so[eng.off] < 8115dfecf96Smrg eng.eo[eng.off] - eng.so[eng.off]) 8125dfecf96Smrg eng.sv[eng.off] = eng.eo[eng.off]; 8135dfecf96Smrg 8145dfecf96Smrg /* If there is a match */ 8155dfecf96Smrg if (eng.sv[eng.off] >= eng.so[eng.off]) { 8165dfecf96Smrg eng.so[bas] = eng.ss[eng.off]; 8175dfecf96Smrg eng.eo[bas] = eng.sv[eng.off]; 8185dfecf96Smrg eng.str = eng.bas + eng.eo[bas]; 8195dfecf96Smrg 8205dfecf96Smrg /* Pop stack and skip code */ 8215dfecf96Smrg --eng.off; 8225dfecf96Smrg ++eng.cod; 8235dfecf96Smrg 8245dfecf96Smrg /* Go try next regular expression pattern */ 8255dfecf96Smrg continue; 8265dfecf96Smrg } 8275dfecf96Smrg 8285dfecf96Smrg /* Failed, reset string and pop stack */ 8295dfecf96Smrg eng.str = eng.rstr[eng.off]; 8305dfecf96Smrg --eng.off; 8315dfecf96Smrg goto fail; 8325dfecf96Smrg 8335dfecf96Smrg 8345dfecf96Smrg /**************************************************** 8355dfecf96Smrg * Repetition * 8365dfecf96Smrg ****************************************************/ 8375dfecf96Smrg 8385dfecf96Smrg /* Note that the repetition counter is not 8395dfecf96Smrg * updated for <re>*, <re>+ and <re>? 8405dfecf96Smrg * it is easy to updated, but since it is not 8415dfecf96Smrg * really useful, code to do it was removed 8425dfecf96Smrg * to save a few cpu cicles. */ 8435dfecf96Smrg 8445dfecf96Smrg#define REPETITION_SETUP() \ 8455dfecf96Smrg if (++eng.off >= MAX_DEPTH) \ 8465dfecf96Smrg return (RE_ASSERT); \ 8475dfecf96Smrg \ 8485dfecf96Smrg /* Return here for recovery if match fail */ \ 8495dfecf96Smrg eng.rcod[eng.off] = eng.cod; \ 8505dfecf96Smrg \ 8515dfecf96Smrg /* Setup match offsets */ \ 8525dfecf96Smrg if (eng.so[bas] <= eng.eo[bas]) \ 8535dfecf96Smrg eng.so[eng.off] = eng.eo[bas]; \ 8545dfecf96Smrg else \ 8555dfecf96Smrg eng.so[eng.off] = eng.so[bas]; \ 8565dfecf96Smrg eng.ss[eng.off] = eng.so[bas]; \ 8575dfecf96Smrg eng.sv[eng.off] = eng.eo[eng.off] = eng.so[eng.off] - 1;\ 8585dfecf96Smrg \ 8595dfecf96Smrg /* Skip repetition instruction */ \ 8605dfecf96Smrg eng.cod += 4; 8615dfecf96Smrg 8625dfecf96Smrg 8635dfecf96Smrg case Re_AnyTimes: 8645dfecf96Smrg bas = eng.cod[1]; 8655dfecf96Smrg if (eng.off == bas) { 8665dfecf96Smrg /* First iteration */ 8675dfecf96Smrg REPETITION_SETUP(); 8685dfecf96Smrg } 8695dfecf96Smrg else { 8705dfecf96Smrg if (eng.eo[eng.off] >= eng.so[eng.off] && 8715dfecf96Smrg eng.eo[eng.off] > eng.sv[eng.off]) { 8725dfecf96Smrg /* Update offset of match */ 8735dfecf96Smrg eng.sv[eng.off] = eng.eo[eng.off]; 8745dfecf96Smrg 8755dfecf96Smrg /* Skip repetition instruction */ 8765dfecf96Smrg eng.cod += 4; 8775dfecf96Smrg } 8785dfecf96Smrg else { 8795dfecf96Smrg /* Match failed but it is ok */ 8805dfecf96Smrg len = eng.cod[2] | (eng.cod[3] << 8); 8815dfecf96Smrg eng.so[bas] = eng.ss[eng.off]; 8825dfecf96Smrg if (eng.sv[eng.off] >= eng.so[eng.off]) 8835dfecf96Smrg /* Something matched earlier, update */ 8845dfecf96Smrg eng.eo[bas] = eng.sv[eng.off]; 8855dfecf96Smrg else if (eng.eo[bas] < eng.so[bas]) 8865dfecf96Smrg /* Empty match */ 8875dfecf96Smrg eng.eo[bas] = eng.so[bas]; 8885dfecf96Smrg 8895dfecf96Smrg /* Try next pattern at correct offset */ 8905dfecf96Smrg eng.str = eng.bas + eng.eo[bas]; 8915dfecf96Smrg 8925dfecf96Smrg /* Pop stack and skip code */ 8935dfecf96Smrg --eng.off; 8945dfecf96Smrg eng.cod += len; 8955dfecf96Smrg } 8965dfecf96Smrg } 8975dfecf96Smrg continue; 8985dfecf96Smrg 8995dfecf96Smrg case Re_Maybe: 9005dfecf96Smrg bas = eng.cod[1]; 9015dfecf96Smrg if (eng.off == bas) { 9025dfecf96Smrg /* First iteration */ 9035dfecf96Smrg REPETITION_SETUP(); 9045dfecf96Smrg } 9055dfecf96Smrg else { 9065dfecf96Smrg /* Matched or first iteration is done */ 9075dfecf96Smrg len = eng.cod[2] | (eng.cod[3] << 8); 9085dfecf96Smrg eng.so[bas] = eng.ss[eng.off]; 9095dfecf96Smrg if (eng.eo[eng.off] > eng.so[eng.off]) { 9105dfecf96Smrg /* Something matched earlier, update */ 9115dfecf96Smrg eng.eo[bas] = eng.eo[eng.off]; 9125dfecf96Smrg eng.str = eng.bas + eng.eo[bas]; 9135dfecf96Smrg /* Don't need to update counter */ 9145dfecf96Smrg } 9155dfecf96Smrg else { 9165dfecf96Smrg /* Empty match */ 9175dfecf96Smrg if (eng.eo[bas] < eng.so[bas]) 9185dfecf96Smrg eng.eo[bas] = eng.so[bas]; 9195dfecf96Smrg 9205dfecf96Smrg /* Try next pattern at correct offset */ 9215dfecf96Smrg eng.str = eng.bas + eng.eo[bas]; 9225dfecf96Smrg } 9235dfecf96Smrg 9245dfecf96Smrg /* Pop stack */ 9255dfecf96Smrg --eng.off; 9265dfecf96Smrg 9275dfecf96Smrg /* Skip code */ 9285dfecf96Smrg eng.cod += len; 9295dfecf96Smrg } 9305dfecf96Smrg continue; 9315dfecf96Smrg 9325dfecf96Smrg case Re_AtLeast: 9335dfecf96Smrg bas = eng.cod[1]; 9345dfecf96Smrg if (eng.off == bas) { 9355dfecf96Smrg /* First iteration */ 9365dfecf96Smrg REPETITION_SETUP(); 9375dfecf96Smrg } 9385dfecf96Smrg else { 9395dfecf96Smrg if (eng.eo[eng.off] >= eng.so[eng.off] && 9405dfecf96Smrg eng.eo[eng.off] > eng.sv[eng.off]) { 9415dfecf96Smrg /* Update offset of match */ 9425dfecf96Smrg eng.sv[eng.off] = eng.eo[eng.off]; 9435dfecf96Smrg 9445dfecf96Smrg /* Skip repetition instruction */ 9455dfecf96Smrg eng.cod += 4; 9465dfecf96Smrg } 9475dfecf96Smrg else { 9485dfecf96Smrg /* Last try failed */ 9495dfecf96Smrg len = eng.cod[2] | (eng.cod[3] << 8); 9505dfecf96Smrg if (eng.sv[eng.off] >= eng.so[eng.off]) { 9515dfecf96Smrg /* Something matched earlier, update */ 9525dfecf96Smrg eng.so[bas] = eng.ss[eng.off]; 9535dfecf96Smrg eng.eo[bas] = eng.sv[eng.off]; 9545dfecf96Smrg eng.str = eng.bas + eng.eo[bas]; 9555dfecf96Smrg } 9565dfecf96Smrg else { 9575dfecf96Smrg /* Do it here, so that the fail label does 9585dfecf96Smrg * not need to do too expensive work for 9595dfecf96Smrg * simple patterns. */ 9605dfecf96Smrg eng.so[bas] = eng.str - eng.bas; 9615dfecf96Smrg 9625dfecf96Smrg /* Zero matches, pop stack and restart */ 9635dfecf96Smrg --eng.off; 9645dfecf96Smrg goto fail; 9655dfecf96Smrg } 9665dfecf96Smrg 9675dfecf96Smrg /* Pop stack and skip code */ 9685dfecf96Smrg --eng.off; 9695dfecf96Smrg eng.cod += len; 9705dfecf96Smrg } 9715dfecf96Smrg } 9725dfecf96Smrg continue; 9735dfecf96Smrg 9745dfecf96Smrg 9755dfecf96Smrg /**************************************************** 9765dfecf96Smrg * Repetition with arguments * 9775dfecf96Smrg ****************************************************/ 9785dfecf96Smrg case Re_Exact: 9795dfecf96Smrg#define COMPLEX_REPETITION_SETUP_0() \ 9805dfecf96Smrg i = eng.cod[1]; \ 9815dfecf96Smrg bas = eng.cod[2]; 9825dfecf96Smrg 9835dfecf96Smrg#define COMPLEX_REPETITION_SETUP() \ 9845dfecf96Smrg /* First iteration */ \ 9855dfecf96Smrg if (++eng.off >= MAX_DEPTH) \ 9865dfecf96Smrg return (RE_ASSERT); \ 9875dfecf96Smrg \ 9885dfecf96Smrg /* Remeber number or repetitions */ \ 9895dfecf96Smrg eng.re[eng.off] = 0; \ 9905dfecf96Smrg \ 9915dfecf96Smrg /* Return here for recovery if match fail */ \ 9925dfecf96Smrg eng.rcod[eng.off] = eng.cod; \ 9935dfecf96Smrg \ 9945dfecf96Smrg /* Setup match offsets */ \ 9955dfecf96Smrg if (eng.so[bas] <= eng.eo[bas]) \ 9965dfecf96Smrg eng.so[eng.off] = eng.eo[bas]; \ 9975dfecf96Smrg else \ 9985dfecf96Smrg eng.so[eng.off] = eng.so[bas]; \ 9995dfecf96Smrg eng.sv[eng.off] = eng.eo[eng.off] = eng.so[eng.off] - 1;\ 10005dfecf96Smrg eng.ss[eng.off] = eng.so[bas]; \ 10015dfecf96Smrg \ 10025dfecf96Smrg /* Skip repetition instruction */ \ 10035dfecf96Smrg eng.cod += 5; 10045dfecf96Smrg 10055dfecf96Smrg COMPLEX_REPETITION_SETUP_0(); 10065dfecf96Smrg if (eng.off == bas) { 10075dfecf96Smrg /* First iteration */ 10085dfecf96Smrg COMPLEX_REPETITION_SETUP(); 10095dfecf96Smrg } 10105dfecf96Smrg else { 10115dfecf96Smrg if (eng.eo[eng.off] >= eng.so[eng.off] && 10125dfecf96Smrg eng.eo[eng.off] > eng.sv[eng.off]) { 10135dfecf96Smrg /* Update offset of match */ 10145dfecf96Smrg eng.sv[eng.off] = eng.eo[eng.off]; 10155dfecf96Smrg 10165dfecf96Smrg /* Update repetition counter */ 10175dfecf96Smrg if (++eng.re[eng.off] == i) { 10185dfecf96Smrg /* Matched the required times */ 10195dfecf96Smrg eng.so[bas] = eng.ss[eng.off]; 10205dfecf96Smrg eng.eo[bas] = eng.sv[eng.off]; 10215dfecf96Smrg eng.str = eng.bas + eng.eo[bas]; 10225dfecf96Smrg 10235dfecf96Smrg /* Update code */ 10245dfecf96Smrg k = eng.cod[3] | (eng.cod[4] << 8); 10255dfecf96Smrg eng.cod += k; 10265dfecf96Smrg 10275dfecf96Smrg /* Pop stack and go for next pattern */ 10285dfecf96Smrg --eng.off; 10295dfecf96Smrg continue; 10305dfecf96Smrg } 10315dfecf96Smrg 10325dfecf96Smrg /* Skip repetition instruction */ 10335dfecf96Smrg eng.cod += 5; 10345dfecf96Smrg } 10355dfecf96Smrg else { 10365dfecf96Smrg /* Do it here, so that the fail label does 10375dfecf96Smrg * not need to do too expensive work for 10385dfecf96Smrg * simple patterns. */ 10395dfecf96Smrg eng.so[bas] = eng.str - eng.bas; 10405dfecf96Smrg 10415dfecf96Smrg /* Pop stack and restart */ 10425dfecf96Smrg --eng.off; 10435dfecf96Smrg goto fail; 10445dfecf96Smrg } 10455dfecf96Smrg } 10465dfecf96Smrg continue; 10475dfecf96Smrg 10485dfecf96Smrg case Re_Min: 10495dfecf96Smrg COMPLEX_REPETITION_SETUP_0(); 10505dfecf96Smrg if (eng.off == bas) { 10515dfecf96Smrg /* First iteration */ 10525dfecf96Smrg COMPLEX_REPETITION_SETUP(); 10535dfecf96Smrg } 10545dfecf96Smrg else { 10555dfecf96Smrg if (eng.eo[eng.off] >= eng.so[eng.off] && 10565dfecf96Smrg eng.eo[eng.off] > eng.sv[eng.off]) { 10575dfecf96Smrg /* Update offset of match */ 10585dfecf96Smrg eng.sv[eng.off] = eng.eo[eng.off]; 10595dfecf96Smrg 10605dfecf96Smrg /* Update repetition counter */ 10615dfecf96Smrg ++eng.re[eng.off]; 10625dfecf96Smrg 10635dfecf96Smrg /* Skip repetition instruction and try again */ 10645dfecf96Smrg eng.cod += 5; 10655dfecf96Smrg } 10665dfecf96Smrg else { 10675dfecf96Smrg /* Match failed! */ 10685dfecf96Smrg if (eng.re[eng.off] < i) { 10695dfecf96Smrg /* Do it here, so that the fail label does 10705dfecf96Smrg * not need to do too expensive work for 10715dfecf96Smrg * simple patterns. */ 10725dfecf96Smrg eng.so[bas] = eng.str - eng.bas; 10735dfecf96Smrg 10745dfecf96Smrg /* Didn't match required number of times */ 10755dfecf96Smrg --eng.off; 10765dfecf96Smrg goto fail; 10775dfecf96Smrg } 10785dfecf96Smrg else { 10795dfecf96Smrg /* Matched minimum number of times */ 10805dfecf96Smrg eng.eo[bas] = eng.sv[eng.off]; 10815dfecf96Smrg eng.str = eng.bas + eng.eo[bas]; 10825dfecf96Smrg k = eng.cod[3] | (eng.cod[4] << 8); 10835dfecf96Smrg 10845dfecf96Smrg /* Update code and pop stack */ 10855dfecf96Smrg eng.cod += k; 10865dfecf96Smrg --eng.off; 10875dfecf96Smrg } 10885dfecf96Smrg } 10895dfecf96Smrg } 10905dfecf96Smrg continue; 10915dfecf96Smrg 10925dfecf96Smrg case Re_Max: 10935dfecf96Smrg COMPLEX_REPETITION_SETUP_0(); 10945dfecf96Smrg if (eng.off == bas) { 10955dfecf96Smrg /* First iteration */ 10965dfecf96Smrg COMPLEX_REPETITION_SETUP(); 10975dfecf96Smrg } 10985dfecf96Smrg else { 10995dfecf96Smrg if (eng.eo[eng.off] >= eng.so[eng.off] && 11005dfecf96Smrg eng.eo[eng.off] > eng.sv[eng.off]) { 11015dfecf96Smrg /* Update offset of match */ 11025dfecf96Smrg eng.sv[eng.off] = eng.eo[eng.off]; 11035dfecf96Smrg 11045dfecf96Smrg /* Update repetition counter */ 11055dfecf96Smrg if (++eng.re[eng.off] == i) { 11065dfecf96Smrg /* Matched the maximum times */ 11075dfecf96Smrg eng.so[bas] = eng.ss[eng.off]; 11085dfecf96Smrg eng.eo[bas] = eng.sv[eng.off]; 11095dfecf96Smrg eng.str = eng.bas + eng.eo[bas]; 11105dfecf96Smrg 11115dfecf96Smrg k = eng.cod[3] | (eng.cod[4] << 8); 11125dfecf96Smrg 11135dfecf96Smrg /* Update code and pop stack */ 11145dfecf96Smrg eng.cod += k; 11155dfecf96Smrg --eng.off; 11165dfecf96Smrg continue; 11175dfecf96Smrg } 11185dfecf96Smrg 11195dfecf96Smrg /* Skip repetition instruction and try again */ 11205dfecf96Smrg eng.cod += 5; 11215dfecf96Smrg } 11225dfecf96Smrg else { 11235dfecf96Smrg /* No matches, but zero matches are ok */ 11245dfecf96Smrg k = eng.cod[3] | (eng.cod[4] << 8); 11255dfecf96Smrg if (eng.sv[eng.off] >= eng.so[eng.off]) { 11265dfecf96Smrg /* Something matched earlier, update */ 11275dfecf96Smrg eng.so[bas] = eng.ss[eng.off]; 11285dfecf96Smrg eng.eo[bas] = eng.sv[eng.off]; 11295dfecf96Smrg eng.str = eng.bas + eng.eo[bas]; 11305dfecf96Smrg } 11315dfecf96Smrg else { 11325dfecf96Smrg /* Empty match */ 11335dfecf96Smrg if (eng.eo[bas] < eng.so[bas]) 11345dfecf96Smrg eng.eo[bas] = eng.so[bas]; 11355dfecf96Smrg 11365dfecf96Smrg /* Try next pattern at correct offset */ 11375dfecf96Smrg eng.str = eng.bas + eng.eo[bas]; 11385dfecf96Smrg } 11395dfecf96Smrg 11405dfecf96Smrg /* Pop stack and update code */ 11415dfecf96Smrg --eng.off; 11425dfecf96Smrg eng.cod += k; 11435dfecf96Smrg } 11445dfecf96Smrg } 11455dfecf96Smrg continue; 11465dfecf96Smrg 11475dfecf96Smrg case Re_MinMax: 11485dfecf96Smrg bas = eng.cod[3]; 11495dfecf96Smrg if (eng.off == bas) { 11505dfecf96Smrg /* First iteration */ 11515dfecf96Smrg COMPLEX_REPETITION_SETUP(); 11525dfecf96Smrg } 11535dfecf96Smrg else { 11545dfecf96Smrg if (eng.eo[eng.off] >= eng.so[eng.off] && 11555dfecf96Smrg eng.eo[eng.off] > eng.sv[eng.off]) { 11565dfecf96Smrg /* Update offset of match */ 11575dfecf96Smrg eng.sv[eng.off] = eng.eo[eng.off]; 11585dfecf96Smrg 11595dfecf96Smrg /* Update repetition counter */ 11605dfecf96Smrg if (++eng.re[eng.off] == eng.cod[2]) { 11615dfecf96Smrg /* Matched the maximum times */ 11625dfecf96Smrg eng.so[bas] = eng.ss[eng.off]; 11635dfecf96Smrg eng.eo[bas] = eng.sv[eng.off]; 11645dfecf96Smrg eng.str = eng.bas + eng.eo[bas]; 11655dfecf96Smrg k = eng.cod[4] | (eng.cod[5] << 8); 11665dfecf96Smrg 11675dfecf96Smrg /* Update code and pop stack */ 11685dfecf96Smrg eng.cod += k; 11695dfecf96Smrg --eng.off; 11705dfecf96Smrg continue; 11715dfecf96Smrg } 11725dfecf96Smrg 11735dfecf96Smrg /* Skip repetition instruction and try again */ 11745dfecf96Smrg eng.cod += 6; 11755dfecf96Smrg } 11765dfecf96Smrg else { 11775dfecf96Smrg /* Match failed! */ 11785dfecf96Smrg if (eng.re[eng.off] < eng.cod[1]) { 11795dfecf96Smrg /* Do it here, so that the fail label does 11805dfecf96Smrg * not need to do too expensive work for 11815dfecf96Smrg * simple patterns. */ 11825dfecf96Smrg eng.so[bas] = eng.str - eng.bas; 11835dfecf96Smrg 11845dfecf96Smrg /* Didn't match required number of times */ 11855dfecf96Smrg --eng.off; 11865dfecf96Smrg goto fail; 11875dfecf96Smrg } 11885dfecf96Smrg else { 11895dfecf96Smrg /* Matched minimum number of times */ 11905dfecf96Smrg eng.so[bas] = eng.ss[eng.off]; 11915dfecf96Smrg eng.eo[bas] = eng.sv[eng.off]; 11925dfecf96Smrg eng.str = eng.bas + eng.eo[bas]; 11935dfecf96Smrg k = eng.cod[4] | (eng.cod[5] << 8); 11945dfecf96Smrg 11955dfecf96Smrg /* Update code and pop stack */ 11965dfecf96Smrg eng.cod += k; 11975dfecf96Smrg --eng.off; 11985dfecf96Smrg } 11995dfecf96Smrg } 12005dfecf96Smrg } 12015dfecf96Smrg continue; 12025dfecf96Smrg 12035dfecf96Smrg 12045dfecf96Smrg /**************************************************** 12055dfecf96Smrg * Special repetition handling * 12065dfecf96Smrg ****************************************************/ 12075dfecf96Smrg case Re_AnyAnyTimes: 12085dfecf96Smrg /* code(1) + bas(1) + gbas(1) + jump(2) */ 12095dfecf96Smrg bas = eng.cod[1]; 12105dfecf96Smrg if (eng.off == bas) { 12115dfecf96Smrg /* First iteration */ 12125dfecf96Smrg if (++eng.off >= MAX_DEPTH) 12135dfecf96Smrg return (RE_ASSERT); 12145dfecf96Smrg 12155dfecf96Smrg /* Return here for recovery if match fail */ 12165dfecf96Smrg eng.rcod[eng.off] = eng.cod; 12175dfecf96Smrg 12185dfecf96Smrg /* If fail, test the next pattern at the same point */ 12195dfecf96Smrg eng.rstr[eng.off] = eng.str; 12205dfecf96Smrg 12215dfecf96Smrg /* Setup match offsets */ 12225dfecf96Smrg eng.so[eng.off] = eng.str - eng.bas; 12235dfecf96Smrg eng.eo[eng.off] = eng.so[eng.off] - 1; 12245dfecf96Smrg 12255dfecf96Smrg if (newline) 12265dfecf96Smrg /* Use the repetition counter to store start of 12275dfecf96Smrg * skipped string, to later check if skipping a 12285dfecf96Smrg * newline. */ 12295dfecf96Smrg eng.re[eng.off] = eng.so[eng.off]; 12305dfecf96Smrg 12315dfecf96Smrg /* Save start of possible previous matches */ 12325dfecf96Smrg eng.ss[eng.off] = eng.so[bas]; 12335dfecf96Smrg 12345dfecf96Smrg /* Skip repetition instruction */ 12355dfecf96Smrg eng.cod += 5; 12365dfecf96Smrg } 12375dfecf96Smrg else { 12385dfecf96Smrg /* -1 as an unsigned char */ 12395dfecf96Smrg if (eng.cod[2] != 0xff) 12405dfecf96Smrg eng.goff = eng.cod[2]; 12415dfecf96Smrg else 12425dfecf96Smrg eng.goff = -1; 12435dfecf96Smrg 12445dfecf96Smrg if (newline) { 12455dfecf96Smrg ptr = eng.bas + eng.re[eng.off]; 12465dfecf96Smrg str = eng.bas + eng.so[eng.off]; 12475dfecf96Smrg for (; ptr < str; ptr++) 12485dfecf96Smrg if (*ptr == '\n') { 12495dfecf96Smrg eng.cod = eng.rcod[0]; 12505dfecf96Smrg eng.so[0] = ptr - eng.bas + 1; 12515dfecf96Smrg eng.eo[0] = eng.so[0] - 1; 12525dfecf96Smrg eng.rstr[0] = eng.str = ptr + 1; 12535dfecf96Smrg eng.off = 0; 12545dfecf96Smrg goto reset; 12555dfecf96Smrg } 12565dfecf96Smrg /* If looping, don't do too many noops */ 12575dfecf96Smrg eng.re[eng.off] = ptr - eng.bas; 12585dfecf96Smrg } 12595dfecf96Smrg 12605dfecf96Smrg if (eng.eo[eng.off] >= eng.so[eng.off]) { 12615dfecf96Smrg /* Note that this is only true if all possibly 12625dfecf96Smrg * nested special repetitions also matched. */ 12635dfecf96Smrg 12645dfecf96Smrg if (eng.goff >= 0) { 12655dfecf96Smrg if (eng.cod[5] == Re_Update) 12665dfecf96Smrg eng.gso[eng.goff] = eng.eo[bas] + 12675dfecf96Smrg (eng.so[bas] > eng.eo[bas]); 12685dfecf96Smrg else if (eng.geo[eng.goff] < eng.so[eng.off]) 12695dfecf96Smrg eng.geo[eng.goff] = eng.so[eng.off]; 12705dfecf96Smrg } 12715dfecf96Smrg 12725dfecf96Smrg /* Jump relative offset */ 12735dfecf96Smrg len = eng.cod[3] | (eng.cod[4] << 8); 12745dfecf96Smrg 12755dfecf96Smrg /* Restore offset from where started trying */ 12765dfecf96Smrg eng.so[bas] = eng.ss[eng.off]; 12775dfecf96Smrg eng.eo[bas] = eng.eo[eng.off]; 12785dfecf96Smrg 12795dfecf96Smrg /* Pop stack and skip code */ 12805dfecf96Smrg --eng.off; 12815dfecf96Smrg eng.cod += len; 12825dfecf96Smrg } 12835dfecf96Smrg else { 12845dfecf96Smrg /* Only give up if the entire string was scanned */ 12855dfecf96Smrg if (eng.str < eng.end) { 12865dfecf96Smrg /* Update restart point for next pattern */ 12875dfecf96Smrg eng.str = ++eng.rstr[eng.off]; 12885dfecf96Smrg 12895dfecf96Smrg /* Reset start of nested match */ 12905dfecf96Smrg eng.so[eng.off] = eng.str - eng.bas; 12915dfecf96Smrg 12925dfecf96Smrg /* Skip repetition instruction */ 12935dfecf96Smrg eng.cod += 5; 12945dfecf96Smrg } 12955dfecf96Smrg else { 12965dfecf96Smrg /* Entire string scanned and failed */ 12975dfecf96Smrg 12985dfecf96Smrg /* Jump relative offset */ 12995dfecf96Smrg len = eng.cod[3] | (eng.cod[4] << 8); 13005dfecf96Smrg 13015dfecf96Smrg /* Restore offset from where started trying */ 13025dfecf96Smrg eng.so[bas] = eng.ss[eng.off]; 13035dfecf96Smrg eng.eo[bas] = eng.ss[eng.off] - 1; 13045dfecf96Smrg 13055dfecf96Smrg /* Pop stack and skip code */ 13065dfecf96Smrg --eng.off; 13075dfecf96Smrg eng.cod += len; 13085dfecf96Smrg } 13095dfecf96Smrg } 13105dfecf96Smrg } 13115dfecf96Smrg continue; 13125dfecf96Smrg 13135dfecf96Smrg /* This is significantly different than matching <re>.*<re> 13145dfecf96Smrg * because it may need to restart several times since it is 13155dfecf96Smrg * possible to find too many false positives, for example: 13165dfecf96Smrg * a.*b => once one "a" is found, scan all 13175dfecf96Smrg * the remaining string searching for a "b" 13185dfecf96Smrg * a.?b => the string may have too many "a"s, but the 13195dfecf96Smrg * first occurrences of "a" may not be followed 13205dfecf96Smrg * by any-character and a "b" or a single "b". 13215dfecf96Smrg */ 13225dfecf96Smrg case Re_AnyMaybe: 13235dfecf96Smrg bas = eng.cod[1]; 13245dfecf96Smrg if (eng.off == bas) { 13255dfecf96Smrg /* First iteration */ 13265dfecf96Smrg if (++eng.off >= MAX_DEPTH) 13275dfecf96Smrg return (RE_ASSERT); 13285dfecf96Smrg 13295dfecf96Smrg /* Return here for recovery if match fail */ 13305dfecf96Smrg eng.rcod[eng.off] = eng.cod; 13315dfecf96Smrg 13325dfecf96Smrg /* First try without eating a byte */ 13335dfecf96Smrg eng.rstr[eng.off] = eng.str; 13345dfecf96Smrg 13355dfecf96Smrg /* Remember this is the first try if match fail */ 13365dfecf96Smrg eng.re[eng.off] = 0; 13375dfecf96Smrg 13385dfecf96Smrg /* Setup match offsets */ 13395dfecf96Smrg eng.so[eng.off] = eng.str - eng.bas; 13405dfecf96Smrg eng.eo[eng.off] = eng.so[eng.off] - 1; 13415dfecf96Smrg 13425dfecf96Smrg /* Save start of possible previous matches */ 13435dfecf96Smrg eng.ss[eng.off] = eng.so[bas]; 13445dfecf96Smrg 13455dfecf96Smrg /* Skip repetition instruction */ 13465dfecf96Smrg eng.cod += 5; 13475dfecf96Smrg } 13485dfecf96Smrg else { 13495dfecf96Smrg /* -1 as an unsigned char */ 13505dfecf96Smrg if (eng.cod[2] != 0xff) 13515dfecf96Smrg eng.goff = eng.cod[2]; 13525dfecf96Smrg else 13535dfecf96Smrg eng.goff = -1; 13545dfecf96Smrg 13555dfecf96Smrg if (eng.eo[eng.off] >= eng.so[eng.off]) { 13565dfecf96Smrg /* Something matched */ 13575dfecf96Smrg 13585dfecf96Smrg if (eng.goff >= 0) { 13595dfecf96Smrg if (eng.cod[5] == Re_Update) 13605dfecf96Smrg eng.gso[eng.goff] = eng.eo[bas] + 13615dfecf96Smrg (eng.so[bas] > eng.eo[bas]); 13625dfecf96Smrg else if (eng.geo[eng.goff] < eng.so[eng.off]) 13635dfecf96Smrg eng.geo[eng.goff] = eng.so[eng.off]; 13645dfecf96Smrg } 13655dfecf96Smrg 13665dfecf96Smrg /* Jump relative offset */ 13675dfecf96Smrg len = eng.cod[3] | (eng.cod[4] << 8); 13685dfecf96Smrg 13695dfecf96Smrg /* Update offset of match */ 13705dfecf96Smrg eng.eo[bas] = eng.eo[eng.off]; 13715dfecf96Smrg 13725dfecf96Smrg /* Pop stack and skip code */ 13735dfecf96Smrg --eng.off; 13745dfecf96Smrg eng.cod += len; 13755dfecf96Smrg } 13765dfecf96Smrg else if (eng.re[eng.off] == 0 && 13775dfecf96Smrg (!newline || eng.rstr[eng.off][1] != '\n')) { 13785dfecf96Smrg /* Try this time skiping a byte */ 13795dfecf96Smrg ++eng.re[eng.off]; 13805dfecf96Smrg 13815dfecf96Smrg /* Reset string, skip code and go try one time more */ 13825dfecf96Smrg eng.str = ++eng.rstr[eng.off]; 13835dfecf96Smrg eng.cod += 5; 13845dfecf96Smrg } 13855dfecf96Smrg else { 13865dfecf96Smrg /* Failed to match */ 13875dfecf96Smrg 13885dfecf96Smrg /* Update offsets */ 13895dfecf96Smrg eng.eo[bas] = eng.ss[eng.off]; 13905dfecf96Smrg eng.so[bas] = eng.eo[bas] + 1; 13915dfecf96Smrg 13925dfecf96Smrg eng.str = eng.rstr[eng.off] + (eng.re[eng.off] == 0); 13935dfecf96Smrg 13945dfecf96Smrg /* Pop stack and return to toplevel code */ 13955dfecf96Smrg --eng.off; 13965dfecf96Smrg if (eng.str >= eng.end) 13975dfecf96Smrg goto wont; 13985dfecf96Smrg eng.cod = eng.rcod[bas]; 13995dfecf96Smrg } 14005dfecf96Smrg } 14015dfecf96Smrg continue; 14025dfecf96Smrg 14035dfecf96Smrg /* .+ almost identical to .* but requires eating at least one byte */ 14045dfecf96Smrg case Re_AnyAtLeast: 14055dfecf96Smrg bas = eng.cod[1]; 14065dfecf96Smrg if (eng.off == bas) { 14075dfecf96Smrg /* First iteration */ 14085dfecf96Smrg if (++eng.off >= MAX_DEPTH) 14095dfecf96Smrg return (RE_ASSERT); 14105dfecf96Smrg 14115dfecf96Smrg /* Return here for recovery if match fail */ 14125dfecf96Smrg eng.rcod[eng.off] = eng.cod; 14135dfecf96Smrg 14145dfecf96Smrg /* Skip one byte for the restart string */ 14155dfecf96Smrg if (newline && eng.str[0] == '\n') { 14165dfecf96Smrg /* Cannot skip newline */ 14175dfecf96Smrg eng.cod = eng.rcod[0]; 14185dfecf96Smrg eng.rstr[0] = ++eng.str; 14195dfecf96Smrg eng.so[0] = eng.str - eng.bas; 14205dfecf96Smrg eng.eo[0] = eng.so[0] - 1; 14215dfecf96Smrg eng.off = 0; 14225dfecf96Smrg goto reset; 14235dfecf96Smrg } 14245dfecf96Smrg eng.rstr[eng.off] = ++eng.str; 14255dfecf96Smrg 14265dfecf96Smrg /* Setup match offsets */ 14275dfecf96Smrg eng.so[eng.off] = eng.str - eng.bas; 14285dfecf96Smrg eng.eo[eng.off] = eng.so[eng.off] - 1; 14295dfecf96Smrg 14305dfecf96Smrg if (newline) 14315dfecf96Smrg /* Use the repetition counter to store start of 14325dfecf96Smrg * skipped string, to later check if skipping a 14335dfecf96Smrg * newline. */ 14345dfecf96Smrg eng.re[eng.off] = eng.so[eng.off]; 14355dfecf96Smrg 14365dfecf96Smrg /* Save start of possible previous matches */ 14375dfecf96Smrg eng.ss[eng.off] = eng.so[bas]; 14385dfecf96Smrg 14395dfecf96Smrg /* Skip repetition instruction */ 14405dfecf96Smrg eng.cod += 5; 14415dfecf96Smrg } 14425dfecf96Smrg else { 14435dfecf96Smrg /* -1 as an unsigned char */ 14445dfecf96Smrg if (eng.cod[2] != 0xff) 14455dfecf96Smrg eng.goff = eng.cod[2]; 14465dfecf96Smrg else 14475dfecf96Smrg eng.goff = -1; 14485dfecf96Smrg 14495dfecf96Smrg if (newline) { 14505dfecf96Smrg ptr = eng.bas + eng.re[eng.off]; 14515dfecf96Smrg str = eng.bas + eng.so[eng.off]; 14525dfecf96Smrg for (; ptr < str; ptr++) 14535dfecf96Smrg if (*ptr == '\n') { 14545dfecf96Smrg eng.cod = eng.rcod[0]; 14555dfecf96Smrg eng.so[0] = ptr - eng.bas + 1; 14565dfecf96Smrg eng.eo[0] = eng.so[0] - 1; 14575dfecf96Smrg eng.rstr[0] = eng.str = ptr + 1; 14585dfecf96Smrg eng.off = 0; 14595dfecf96Smrg goto reset; 14605dfecf96Smrg } 14615dfecf96Smrg /* If looping, don't do too many noops */ 14625dfecf96Smrg eng.re[eng.off] = ptr - eng.bas; 14635dfecf96Smrg } 14645dfecf96Smrg 14655dfecf96Smrg if (eng.eo[eng.off] >= eng.so[eng.off]) { 14665dfecf96Smrg /* Note that this is only true if all possibly 14675dfecf96Smrg * nested special repetitions also matched. */ 14685dfecf96Smrg 14695dfecf96Smrg if (eng.goff >= 0) { 14705dfecf96Smrg if (eng.cod[5] == Re_Update) 14715dfecf96Smrg eng.gso[eng.goff] = eng.eo[bas] + 14725dfecf96Smrg (eng.so[bas] > eng.eo[bas]); 14735dfecf96Smrg else if (eng.geo[eng.goff] < eng.so[eng.off]) 14745dfecf96Smrg eng.geo[eng.goff] = eng.so[eng.off]; 14755dfecf96Smrg } 14765dfecf96Smrg 14775dfecf96Smrg /* Jump relative offset */ 14785dfecf96Smrg len = eng.cod[3] | (eng.cod[4] << 8); 14795dfecf96Smrg 14805dfecf96Smrg /* Restore offset from where started trying */ 14815dfecf96Smrg eng.so[bas] = eng.ss[eng.off]; 14825dfecf96Smrg eng.eo[bas] = eng.eo[eng.off]; 14835dfecf96Smrg 14845dfecf96Smrg /* Pop stack and skip code */ 14855dfecf96Smrg --eng.off; 14865dfecf96Smrg eng.cod += len; 14875dfecf96Smrg } 14885dfecf96Smrg else { 14895dfecf96Smrg /* Only give up if the entire string was scanned */ 14905dfecf96Smrg if (eng.str < eng.end) { 14915dfecf96Smrg /* Update restart point for next pattern */ 14925dfecf96Smrg eng.str = ++eng.rstr[eng.off]; 14935dfecf96Smrg 14945dfecf96Smrg /* Reset start of nested match */ 14955dfecf96Smrg eng.so[eng.off] = eng.str - eng.bas; 14965dfecf96Smrg 14975dfecf96Smrg /* Skip repetition instruction */ 14985dfecf96Smrg eng.cod += 5; 14995dfecf96Smrg } 15005dfecf96Smrg else { 15015dfecf96Smrg /* Entire string scanned and failed */ 15025dfecf96Smrg 15035dfecf96Smrg /* Jump relative offset */ 15045dfecf96Smrg len = eng.cod[3] | (eng.cod[4] << 8); 15055dfecf96Smrg 15065dfecf96Smrg /* Restore offset from where started trying */ 15075dfecf96Smrg eng.so[bas] = eng.ss[eng.off]; 15085dfecf96Smrg eng.eo[bas] = eng.ss[eng.off] - 1; 15095dfecf96Smrg 15105dfecf96Smrg /* Pop stack and skip code */ 15115dfecf96Smrg --eng.off; 15125dfecf96Smrg eng.cod += len; 15135dfecf96Smrg } 15145dfecf96Smrg } 15155dfecf96Smrg } 15165dfecf96Smrg continue; 15175dfecf96Smrg 15185dfecf96Smrg 15195dfecf96Smrg /**************************************************** 15205dfecf96Smrg * Repetition matched! * 15215dfecf96Smrg ****************************************************/ 15225dfecf96Smrg case Re_RepJump: 15235dfecf96Smrg /* eng.cod[1] is toplevel offset of repetition */ 15245dfecf96Smrg if (eng.off > eng.cod[1]) 15255dfecf96Smrg /* If still needs to try matches */ 15265dfecf96Smrg eng.cod -= eng.cod[2]; 15275dfecf96Smrg else 15285dfecf96Smrg eng.cod += 3; /* + RepJump + bas + len-size */ 15295dfecf96Smrg continue; 15305dfecf96Smrg 15315dfecf96Smrg case Re_RepLongJump: 15325dfecf96Smrg /* eng.cod[1] is toplevel offset of repetition */ 15335dfecf96Smrg if (eng.off > eng.cod[1]) 15345dfecf96Smrg /* If still needs to try matches */ 15355dfecf96Smrg eng.cod -= eng.cod[2] | (eng.cod[3] << 8); 15365dfecf96Smrg else 15375dfecf96Smrg eng.cod += 4; /* + RepLongJump + bas + len-size */ 15385dfecf96Smrg continue; 15395dfecf96Smrg 15405dfecf96Smrg /**************************************************** 15415dfecf96Smrg * Finished * 15425dfecf96Smrg ****************************************************/ 15435dfecf96Smrg case Re_DoneIf: 15445dfecf96Smrg if (eng.eo[eng.off] >= eng.so[eng.off]) { 15455dfecf96Smrg eng.so[0] = eng.ss[eng.off]; 15465dfecf96Smrg eng.eo[0] = eng.eo[eng.off]; 15475dfecf96Smrg goto done; 15485dfecf96Smrg } 15495dfecf96Smrg ++eng.cod; 15505dfecf96Smrg continue; 15515dfecf96Smrg case Re_MaybeDone: 15525dfecf96Smrg if (eng.eo[eng.off] >= eng.so[eng.off]) { 15535dfecf96Smrg eng.so[0] = eng.ss[eng.off]; 15545dfecf96Smrg eng.eo[0] = eng.eo[eng.off]; 15555dfecf96Smrg goto done; 15565dfecf96Smrg } 15575dfecf96Smrg ++eng.cod; 15585dfecf96Smrg continue; 15595dfecf96Smrg case Re_Done: 15605dfecf96Smrg goto done; 15615dfecf96Smrg 15625dfecf96Smrg default: 15635dfecf96Smrg /* Fatal internal error */ 15645dfecf96Smrg return (RE_ASSERT); 15655dfecf96Smrg } 15665dfecf96Smrg 15675dfecf96Smrg 15685dfecf96Smrgwont: 15695dfecf96Smrg /* Surely won't match */ 15705dfecf96Smrg if (eng.off == 0) { 15715dfecf96Smrg eng.eo[0] = eng.so[0] - 1; 15725dfecf96Smrg break; 15735dfecf96Smrg } 15745dfecf96Smrg 15755dfecf96Smrg 15765dfecf96Smrgfail: 15775dfecf96Smrg if (eng.off == 0) { 15785dfecf96Smrg /* If the entire string scanned */ 15795dfecf96Smrg if (++eng.str > eng.end) { 15805dfecf96Smrg eng.eo[0] = eng.so[0] - 1; 15815dfecf96Smrg break; 15825dfecf96Smrg } 15835dfecf96Smrg eng.goff = -1; 15845dfecf96Smrg /* Update start of possible match after restart */ 15855dfecf96Smrg if (eng.eo[0] >= eng.so[0]) { 15865dfecf96Smrg /* If first fail */ 15875dfecf96Smrg eng.str = eng.rstr[0]; 15885dfecf96Smrg ++eng.rstr[0]; 15895dfecf96Smrg eng.so[0] = eng.str - eng.bas; 15905dfecf96Smrg eng.eo[0] = eng.so[eng.off] - 1; 15915dfecf96Smrg } 15925dfecf96Smrg else 15935dfecf96Smrg /* Just trying at next byte */ 15945dfecf96Smrg ++eng.so[0]; 15955dfecf96Smrg } 15965dfecf96Smrg else 15975dfecf96Smrg /* Remember this match failed */ 15985dfecf96Smrg eng.eo[eng.off] = eng.so[eng.off] - 1; 15995dfecf96Smrg 16005dfecf96Smrg /* Restart code */ 16015dfecf96Smrg eng.cod = eng.rcod[eng.off]; 16025dfecf96Smrg continue; 16035dfecf96Smrg 16045dfecf96Smrg 16055dfecf96Smrgmatch: 16065dfecf96Smrg /* If first match */ 16075dfecf96Smrg if (eng.eo[eng.off] < eng.so[eng.off]) { 16085dfecf96Smrg if (eng.off == 0) 16095dfecf96Smrg eng.rstr[0] = eng.str + 1; 16105dfecf96Smrg eng.so[eng.off] = eng.eo[eng.off] = eng.str - eng.bas; 16115dfecf96Smrg } 16125dfecf96Smrg eng.eo[eng.off] += si; 16135dfecf96Smrg eng.cod += ci; 16145dfecf96Smrg eng.str += si; 16155dfecf96Smrg ci = si = 1; 16165dfecf96Smrg continue; 16175dfecf96Smrg 16185dfecf96Smrgdone: 16195dfecf96Smrg break; 16205dfecf96Smrg } 16215dfecf96Smrg 16225dfecf96Smrg if (nmatch) { 16235dfecf96Smrg if (flags & RE_STARTEND) 16245dfecf96Smrg len = pmat[0].rm_so; 16255dfecf96Smrg else 16265dfecf96Smrg len = 0; 16275dfecf96Smrg if (!nosub) { 16285dfecf96Smrg if (preg->cod[1] != 0xff) 16295dfecf96Smrg eng.goff = preg->cod[1]; 16305dfecf96Smrg pmat[0].rm_so = eng.so[0]; 16315dfecf96Smrg pmat[0].rm_eo = eng.eo[0]; 16325dfecf96Smrg for (i = 1; i < nmatch; i++) { 16335dfecf96Smrg if (i - 1 <= eng.goff) { 16345dfecf96Smrg pmat[i].rm_so = eng.gso[i - 1]; 16355dfecf96Smrg pmat[i].rm_eo = eng.geo[i - 1]; 16365dfecf96Smrg } 16375dfecf96Smrg else { 16385dfecf96Smrg pmat[i].rm_so = 0; 16395dfecf96Smrg pmat[i].rm_eo = -1; 16405dfecf96Smrg } 16415dfecf96Smrg } 16425dfecf96Smrg if (len) { 16435dfecf96Smrg /* Update offsets, since the match was done in a substring */ 16445dfecf96Smrg j = eng.goff + 2 > nmatch ? nmatch : eng.goff + 2; 16455dfecf96Smrg for (i = 0; i < j; i++) { 16465dfecf96Smrg pmat[i].rm_so += len; 16475dfecf96Smrg pmat[i].rm_eo += len; 16485dfecf96Smrg } 16495dfecf96Smrg } 16505dfecf96Smrg } 16515dfecf96Smrg else { 16525dfecf96Smrg /* Already know these values, allow compiling the regex with 16535dfecf96Smrg * RE_NOSUB to use parenthesis only for grouping, but avoiding 16545dfecf96Smrg * the runtime overhead of keeping track of the subexpression 16555dfecf96Smrg * offsets. */ 16565dfecf96Smrg pmat[0].rm_so = eng.so[0] + len; 16575dfecf96Smrg pmat[0].rm_eo = eng.eo[0] + len; 16585dfecf96Smrg } 16595dfecf96Smrg } 16605dfecf96Smrg 16615dfecf96Smrg return (eng.so[0] <= eng.eo[0] ? 0 : RE_NOMATCH); 16625dfecf96Smrg} 16635dfecf96Smrg 16645dfecf96Smrgint 16655dfecf96Smrgreerror(int ecode, const re_cod *preg, char *ebuffer, int ebuffer_size) 16665dfecf96Smrg{ 16675dfecf96Smrg static char *errors[] = { 16685dfecf96Smrg "No error", 16695dfecf96Smrg "Failed to match", /* NOMATCH */ 16705dfecf96Smrg 16715dfecf96Smrg /* Errors not generated */ 16725dfecf96Smrg "Invalid regular expression", /* BADPAT */ 16735dfecf96Smrg "Invalid collating element", /* ECOLLATE */ 16745dfecf96Smrg "Invalid character class", /* ECTYPE */ 16755dfecf96Smrg 16765dfecf96Smrg "`\' applied to unescapable character", /* EESCAPE */ 16775dfecf96Smrg "Invalid backreference number", /* ESUBREG */ 16785dfecf96Smrg "Brackets `[ ]' not balanced", /* EBRACK */ 16795dfecf96Smrg "Parentheses `( )' not balanced", /* EPAREN */ 16805dfecf96Smrg "Braces `{ }' not balanced", /* EBRACE */ 16815dfecf96Smrg "Invalid repetition count(s) in `{ }'", /* BADBR */ 16825dfecf96Smrg "Invalid character range in `[ ]'", /* ERANGE */ 16835dfecf96Smrg "Out of memory", /* ESPACE */ 16845dfecf96Smrg "`?', `*', or `+' operand invalid", /* BADRPT */ 16855dfecf96Smrg "Empty (sub)expression", /* EMPTY */ 16865dfecf96Smrg "Assertion error - you found a bug", /* ASSERT */ 16875dfecf96Smrg "Invalid argument" /* INVARG */ 16885dfecf96Smrg }; 16895dfecf96Smrg char *str; 16905dfecf96Smrg 16915dfecf96Smrg if (ecode >= 0 && ecode < sizeof(errors) / sizeof(errors[0])) 16925dfecf96Smrg str = errors[ecode]; 16935dfecf96Smrg else 16945dfecf96Smrg str = "Unknown error"; 16955dfecf96Smrg 16965dfecf96Smrg return (snprintf(ebuffer, ebuffer_size, "%s", str)); 16975dfecf96Smrg} 16985dfecf96Smrg 16995dfecf96Smrgvoid 17005dfecf96Smrgrefree(re_cod *cod) 17015dfecf96Smrg{ 17025dfecf96Smrg free(cod->cod); 17035dfecf96Smrg cod->cod = NULL; 17045dfecf96Smrg} 17055dfecf96Smrg 17065dfecf96Smrgstatic void 17075dfecf96Smrgreinit(void) 17085dfecf96Smrg{ 17095dfecf96Smrg int i; 17105dfecf96Smrg static int first = 1; 17115dfecf96Smrg 17125dfecf96Smrg if (!first) 17135dfecf96Smrg return; 17145dfecf96Smrg first = 0; 17155dfecf96Smrg 17165dfecf96Smrg re__alnum['_'] = 1; 17175dfecf96Smrg 17185dfecf96Smrg for (i = '0'; i <= '7'; i++) 17195dfecf96Smrg re__alnum[i] = re__odigit[i] = re__ddigit[i] = re__xdigit[i] = 1; 17205dfecf96Smrg 17215dfecf96Smrg for (; i <= '9'; i++) 17225dfecf96Smrg re__alnum[i] = re__ddigit[i] = re__xdigit[i] = 1; 17235dfecf96Smrg 17245dfecf96Smrg for (i = 'a'; i <= 'f'; i++) 17255dfecf96Smrg re__alnum[i] = re__xdigit[i] = 1; 17265dfecf96Smrg for (; i <= 'z'; i++) 17275dfecf96Smrg re__alnum[i] = 1; 17285dfecf96Smrg 17295dfecf96Smrg for (i = 'A'; i <= 'F'; i++) 17305dfecf96Smrg re__alnum[i] = re__xdigit[i] = 1; 17315dfecf96Smrg for (; i <= 'Z'; i++) 17325dfecf96Smrg re__alnum[i] = 1; 17335dfecf96Smrg 17345dfecf96Smrg for (i = 1; i < 32; i++) 17355dfecf96Smrg re__control[i] = 1; 17365dfecf96Smrg re__control[127] = 1; 17375dfecf96Smrg /* Don't show tabs as control characters */ 17385dfecf96Smrg re__control['\t'] = 0; 17395dfecf96Smrg} 17405dfecf96Smrg 17415dfecf96Smrgstatic int 17425dfecf96Smrgrec_check(re_inf *inf, int count) 17435dfecf96Smrg{ 17445dfecf96Smrg if (inf->len + count >= inf->spc) { 17455dfecf96Smrg int spc; 17465dfecf96Smrg unsigned char *cod; 17475dfecf96Smrg 17485dfecf96Smrg if ((spc = (count % 64)) != 0) 17495dfecf96Smrg spc = 64 - spc; 17505dfecf96Smrg spc += count + inf->spc; 17515dfecf96Smrg if ((cod = realloc(inf->cod, spc)) == NULL) 17525dfecf96Smrg return (inf->ecode = RE_ESPACE); 17535dfecf96Smrg inf->cod = cod; 17545dfecf96Smrg inf->spc = spc; 17555dfecf96Smrg } 17565dfecf96Smrg 17575dfecf96Smrg return (inf->ecode); 17585dfecf96Smrg} 17595dfecf96Smrg 17605dfecf96Smrgstatic int 17615dfecf96Smrgrec_code(re_inf *inf, ReCode code) 17625dfecf96Smrg{ 17635dfecf96Smrg if (rec_check(inf, 1) == 0) 17645dfecf96Smrg inf->cod[inf->len++] = code; 17655dfecf96Smrg 17665dfecf96Smrg return (inf->ecode); 17675dfecf96Smrg} 17685dfecf96Smrg 17695dfecf96Smrgstatic int 17705dfecf96Smrgrec_byte(re_inf *inf, int value) 17715dfecf96Smrg{ 17725dfecf96Smrg if (rec_check(inf, 1) == 0) 17735dfecf96Smrg inf->cod[inf->len++] = value; 17745dfecf96Smrg 17755dfecf96Smrg return (inf->ecode); 17765dfecf96Smrg} 17775dfecf96Smrg 17785dfecf96Smrgstatic int 17795dfecf96Smrgrec_code_byte(re_inf *inf, ReCode code, int value) 17805dfecf96Smrg{ 17815dfecf96Smrg if (rec_check(inf, 2) == 0) { 17825dfecf96Smrg inf->cod[inf->len++] = code; 17835dfecf96Smrg inf->cod[inf->len++] = value; 17845dfecf96Smrg } 17855dfecf96Smrg 17865dfecf96Smrg return (inf->ecode); 17875dfecf96Smrg} 17885dfecf96Smrg 17895dfecf96Smrgstatic int 17905dfecf96Smrgrec_length(re_inf *inf, int length) 17915dfecf96Smrg{ 17925dfecf96Smrg int lo, hi, two; 17935dfecf96Smrg 17945dfecf96Smrg if (length >= 16384) 17955dfecf96Smrg return (inf->ecode = RE_ESPACE); 17965dfecf96Smrg 17975dfecf96Smrg lo = length & 0xff; 17985dfecf96Smrg hi = length & 0xff00; 17995dfecf96Smrg two = ((length > 0x7f) != 0) + 1; 18005dfecf96Smrg if (two == 2) { 18015dfecf96Smrg hi <<= 1; 18025dfecf96Smrg hi |= (lo & 0x80) != 0; 18035dfecf96Smrg lo |= 0x80; 18045dfecf96Smrg } 18055dfecf96Smrg 18065dfecf96Smrg if (rec_check(inf, two) == 0) { 18075dfecf96Smrg inf->cod[inf->len++] = lo; 18085dfecf96Smrg if (two == 2) 18095dfecf96Smrg inf->cod[inf->len++] = hi >> 8; 18105dfecf96Smrg } 18115dfecf96Smrg 18125dfecf96Smrg return (inf->ecode); 18135dfecf96Smrg} 18145dfecf96Smrg 18155dfecf96Smrgstatic int 18165dfecf96Smrgrec_byte_byte(re_inf *inf, int value0, int value1) 18175dfecf96Smrg{ 18185dfecf96Smrg if (rec_check(inf, 2) == 0) { 18195dfecf96Smrg inf->cod[inf->len++] = value0; 18205dfecf96Smrg inf->cod[inf->len++] = value1; 18215dfecf96Smrg } 18225dfecf96Smrg 18235dfecf96Smrg return (inf->ecode); 18245dfecf96Smrg} 18255dfecf96Smrg 18265dfecf96Smrgstatic int 18275dfecf96Smrgrec_code_byte_byte(re_inf *inf, ReCode code, int value0, int value1) 18285dfecf96Smrg{ 18295dfecf96Smrg if (rec_check(inf, 3) == 0) { 18305dfecf96Smrg inf->cod[inf->len++] = code; 18315dfecf96Smrg inf->cod[inf->len++] = value0; 18325dfecf96Smrg inf->cod[inf->len++] = value1; 18335dfecf96Smrg } 18345dfecf96Smrg 18355dfecf96Smrg return (inf->ecode); 18365dfecf96Smrg} 18375dfecf96Smrg 18385dfecf96Smrgstatic int 18395dfecf96Smrgrec_build_alt(re_inf *inf, rec_alt *alt) 18405dfecf96Smrg{ 18415dfecf96Smrg int offset, value, bas = inf->bas + 1; 18425dfecf96Smrg 18435dfecf96Smrg if (alt) { 18445dfecf96Smrg if (alt->next) { 18455dfecf96Smrg if (rec_inc_spc(inf)) 18465dfecf96Smrg return (inf->ecode); 18475dfecf96Smrg 18485dfecf96Smrg /* A real a list of alternatives */ 18495dfecf96Smrg rec_code(inf, Re_Alt); 18505dfecf96Smrg 18515dfecf96Smrg offset = inf->len; /* Remember current offset */ 18525dfecf96Smrg rec_byte_byte(inf, 0, 0); /* Reserve two bytes for retry address */ 18535dfecf96Smrg while (alt && inf->ecode == 0) { 18545dfecf96Smrg if (rec_build_pat(inf, alt->pat)) 18555dfecf96Smrg break; 18565dfecf96Smrg alt = alt->next; 18575dfecf96Smrg if (alt && inf->ecode == 0) { 18585dfecf96Smrg /* Handle (hyper)complex repetitions */ 18595dfecf96Smrg if (inf->bas != bas) { 18605dfecf96Smrg /* Duplicate patterns up to end of expression */ 18615dfecf96Smrg rec_build_pat(inf, inf->apat); 18625dfecf96Smrg /* Restore engine state for next alternative(s) */ 18635dfecf96Smrg rec_alt_spc(inf, bas - 1); 18645dfecf96Smrg } 18655dfecf96Smrg 18665dfecf96Smrg /* If the jump would be so long */ 18675dfecf96Smrg if ((value = inf->len - offset) >= 16384) { 18685dfecf96Smrg inf->ecode = RE_ESPACE; 18695dfecf96Smrg break; 18705dfecf96Smrg } 18715dfecf96Smrg inf->cod[offset] = value & 0xff; 18725dfecf96Smrg inf->cod[offset + 1] = (value & 0xff00) >> 8; 18735dfecf96Smrg 18745dfecf96Smrg rec_code(inf, Re_AltNext); 18755dfecf96Smrg offset = inf->len; 18765dfecf96Smrg rec_byte_byte(inf, 0, 0); 18775dfecf96Smrg } 18785dfecf96Smrg } 18795dfecf96Smrg if (inf->ecode == 0) { 18805dfecf96Smrg /* Handle (hyper)complex repetitions */ 18815dfecf96Smrg if (inf->bas != bas) { 18825dfecf96Smrg /* Duplicate patterns up to end of expression */ 18835dfecf96Smrg rec_build_pat(inf, inf->apat); 18845dfecf96Smrg /* Restore engine state for next alternative(s) */ 18855dfecf96Smrg rec_alt_spc(inf, bas - 1); 18865dfecf96Smrg } 18875dfecf96Smrg 18885dfecf96Smrg /* If the jump would be so long */ 18895dfecf96Smrg if ((value = inf->len - offset) >= 16384) 18905dfecf96Smrg return (inf->ecode = RE_ESPACE); 18915dfecf96Smrg inf->cod[offset] = value & 0xff; 18925dfecf96Smrg inf->cod[offset + 1] = (value & 0xff00) >> 8; 18935dfecf96Smrg /* Last jump is here */ 18945dfecf96Smrg rec_code(inf, Re_AltDone); 18955dfecf96Smrg } 18965dfecf96Smrg rec_dec_spc(inf); 18975dfecf96Smrg } 18985dfecf96Smrg else 18995dfecf96Smrg /* Single alternative */ 19005dfecf96Smrg rec_build_pat(inf, alt->pat); 19015dfecf96Smrg } 19025dfecf96Smrg 19035dfecf96Smrg return (inf->ecode); 19045dfecf96Smrg} 19055dfecf96Smrg 19065dfecf96Smrgstatic int 19075dfecf96Smrgrec_build_pat(re_inf *inf, rec_pat *pat) 19085dfecf96Smrg{ 19095dfecf96Smrg rec_pat *apat; 19105dfecf96Smrg int length, offset = 0, distance, jump = 0, bas = 0; 19115dfecf96Smrg 19125dfecf96Smrg while (pat && inf->ecode == 0) { 19135dfecf96Smrg if (pat->rep) { 19145dfecf96Smrg bas = inf->bas; 19155dfecf96Smrg if (pat->type == Rep_Group && !inf->par && rec_code(inf, Re_Open)) 19165dfecf96Smrg return (inf->ecode); 19175dfecf96Smrg if (rec_inc_spc(inf)) 19185dfecf96Smrg return (inf->ecode); 19195dfecf96Smrg offset = inf->len; 19205dfecf96Smrg if (rec_build_rep(inf, pat->rep)) 19215dfecf96Smrg break; 19225dfecf96Smrg /* Reserve space to jump after repetition done */ 19235dfecf96Smrg jump = inf->len; 19245dfecf96Smrg rec_byte_byte(inf, 0, 0); 19255dfecf96Smrg } 19265dfecf96Smrg switch (pat->type) { 19275dfecf96Smrg case Rep_AnyAnyTimes: 19285dfecf96Smrg case Rep_AnyMaybe: 19295dfecf96Smrg case Rep_AnyAtLeast: 19305dfecf96Smrg if (rec_add_spc(inf, pat->type == Rep_AnyMaybe)) 19315dfecf96Smrg return (inf->ecode); 19325dfecf96Smrg if (rec_code(inf, (ReCode)pat->type) == 0 && 19335dfecf96Smrg rec_byte(inf, inf->bas - 1) == 0 && 19345dfecf96Smrg rec_byte(inf, inf->ref - 1) == 0) 19355dfecf96Smrg rec_off_spc(inf); 19365dfecf96Smrg break; 19375dfecf96Smrg case Rep_Literal: 19385dfecf96Smrg case Rep_LiteralNot: 19395dfecf96Smrg case Rep_SearchLiteral: 19405dfecf96Smrg rec_code_byte(inf, (ReCode)pat->type, pat->data.chr); 19415dfecf96Smrg break; 19425dfecf96Smrg case Rep_CaseLiteral: 19435dfecf96Smrg case Rep_CaseLiteralNot: 19445dfecf96Smrg case Rep_SearchCaseLiteral: 19455dfecf96Smrg rec_code_byte_byte(inf, (ReCode)pat->type, 19465dfecf96Smrg pat->data.cse.lower, pat->data.cse.upper); 19475dfecf96Smrg break; 19485dfecf96Smrg case Rep_Range: 19495dfecf96Smrg case Rep_RangeNot: 19505dfecf96Smrg if (rec_code(inf, (ReCode)pat->type) == 0) 19515dfecf96Smrg rec_build_rng(inf, pat->data.rng); 19525dfecf96Smrg break; 19535dfecf96Smrg case Rep_String: 19545dfecf96Smrg case Rep_SearchString: 19555dfecf96Smrg case Rep_CaseString: 19565dfecf96Smrg case Rep_SearchCaseString: 19575dfecf96Smrg rec_code(inf, (ReCode)pat->type); 19585dfecf96Smrg length = strlen((char*)pat->data.str); 19595dfecf96Smrg if (rec_length(inf, length) == 0 && rec_check(inf, length) == 0) { 19605dfecf96Smrg memcpy(inf->cod + inf->len, pat->data.str, length); 19615dfecf96Smrg inf->len += length; 19625dfecf96Smrg } 19635dfecf96Smrg break; 19645dfecf96Smrg case Rep_Any: 19655dfecf96Smrg case Rep_AnyEatAnyTimes: 19665dfecf96Smrg case Rep_AnyEatMaybe: 19675dfecf96Smrg case Rep_AnyEatAtLeast: 19685dfecf96Smrg case Rep_Odigit: 19695dfecf96Smrg case Rep_OdigitNot: 19705dfecf96Smrg case Rep_Digit: 19715dfecf96Smrg case Rep_DigitNot: 19725dfecf96Smrg case Rep_Xdigit: 19735dfecf96Smrg case Rep_XdigitNot: 19745dfecf96Smrg case Rep_Space: 19755dfecf96Smrg case Rep_SpaceNot: 19765dfecf96Smrg case Rep_Tab: 19775dfecf96Smrg case Rep_Newline: 19785dfecf96Smrg case Rep_Lower: 19795dfecf96Smrg case Rep_Upper: 19805dfecf96Smrg case Rep_Alnum: 19815dfecf96Smrg case Rep_AlnumNot: 19825dfecf96Smrg case Rep_Control: 19835dfecf96Smrg case Rep_ControlNot: 19845dfecf96Smrg case Rep_Bol: 19855dfecf96Smrg case Rep_Eol: 19865dfecf96Smrg case Rep_Bow: 19875dfecf96Smrg case Rep_Eow: 19885dfecf96Smrg rec_code(inf, (ReCode)pat->type); 19895dfecf96Smrg break; 19905dfecf96Smrg case Rep_Backref: 19915dfecf96Smrg rec_code_byte(inf, Re_Backref, pat->data.chr); 19925dfecf96Smrg break; 19935dfecf96Smrg case Rep_Group: 19945dfecf96Smrg if (pat->rep == NULL && !inf->par && rec_code(inf, Re_Open)) 19955dfecf96Smrg break; 19965dfecf96Smrg apat = inf->apat; 19975dfecf96Smrg inf->apat = pat->next; 19985dfecf96Smrg rec_build_grp(inf, pat->data.grp); 19995dfecf96Smrg inf->apat = apat; 20005dfecf96Smrg break; 20015dfecf96Smrg case Rep_StringList: 20025dfecf96Smrg rec_build_stl(inf, pat->data.stl); 20035dfecf96Smrg break; 20045dfecf96Smrg } 20055dfecf96Smrg if (pat->rep) { 20065dfecf96Smrg#if 0 20075dfecf96Smrg if (rec_dec_spc(inf)) 20085dfecf96Smrg return (inf->ecode); 20095dfecf96Smrg#else 20105dfecf96Smrg if (rec_rep_spc(inf, bas)) 20115dfecf96Smrg return (inf->ecode); 20125dfecf96Smrg#endif 20135dfecf96Smrg distance = inf->len - offset; 20145dfecf96Smrg if (distance > 255) { 20155dfecf96Smrg if (rec_code(inf, Re_RepLongJump) || 20165dfecf96Smrg rec_byte(inf, inf->bas) || 20175dfecf96Smrg rec_byte(inf, distance & 0xff) || 20185dfecf96Smrg rec_byte(inf, (distance & 0xff00) >> 8)) 20195dfecf96Smrg break; 20205dfecf96Smrg } 20215dfecf96Smrg else if (rec_code(inf, Re_RepJump) || 20225dfecf96Smrg rec_byte(inf, inf->bas) || 20235dfecf96Smrg rec_byte(inf, distance)) 20245dfecf96Smrg break; 20255dfecf96Smrg distance = inf->len - offset; 20265dfecf96Smrg inf->cod[jump] = distance & 0xff; 20275dfecf96Smrg inf->cod[jump + 1] = (distance & 0xff00) >> 8; 20285dfecf96Smrg } 20295dfecf96Smrg pat = pat->next; 20305dfecf96Smrg } 20315dfecf96Smrg 20325dfecf96Smrg return (inf->ecode); 20335dfecf96Smrg} 20345dfecf96Smrg 20355dfecf96Smrgstatic int 20365dfecf96Smrgrec_build_rng(re_inf *inf, rec_rng *rng) 20375dfecf96Smrg{ 20385dfecf96Smrg if (rec_check(inf, sizeof(rng->range)) == 0) { 20395dfecf96Smrg memcpy(inf->cod + inf->len, rng->range, sizeof(rng->range)); 20405dfecf96Smrg inf->len += sizeof(rng->range); 20415dfecf96Smrg } 20425dfecf96Smrg 20435dfecf96Smrg return (inf->ecode); 20445dfecf96Smrg} 20455dfecf96Smrg 20465dfecf96Smrgstatic int 20475dfecf96Smrgrec_build_grp(re_inf *inf, rec_grp *grp) 20485dfecf96Smrg{ 20495dfecf96Smrg int par = inf->par; 20505dfecf96Smrg 20515dfecf96Smrg if (!(inf->flags & RE_NOSUB)) { 20525dfecf96Smrg ++inf->par; 20535dfecf96Smrg if (par == 0) 20545dfecf96Smrg ++inf->ref; 20555dfecf96Smrg if (rec_build_alt(inf, grp->alt) == 0) { 20565dfecf96Smrg if (par == 0) { 20575dfecf96Smrg if (grp->comp) 20585dfecf96Smrg rec_code_byte(inf, Re_Update, inf->ref - 1); 20595dfecf96Smrg else 20605dfecf96Smrg rec_code(inf, Re_Close); 20615dfecf96Smrg } 20625dfecf96Smrg } 20635dfecf96Smrg --inf->par; 20645dfecf96Smrg } 20655dfecf96Smrg else 20665dfecf96Smrg rec_build_alt(inf, grp->alt); 20675dfecf96Smrg 20685dfecf96Smrg return (inf->ecode); 20695dfecf96Smrg} 20705dfecf96Smrg 20715dfecf96Smrgstatic int 20725dfecf96Smrgrec_build_stl(re_inf *inf, rec_stl *stl) 20735dfecf96Smrg{ 20745dfecf96Smrg int i, len, rlen; 20755dfecf96Smrg ReCode code; 20765dfecf96Smrg 20775dfecf96Smrg /* Calculate jump distance information */ 20785dfecf96Smrg rlen = stl->tlen + stl->nstrs + 4; 20795dfecf96Smrg /* + code + nstrs + place-offset + data-length */ 20805dfecf96Smrg 20815dfecf96Smrg if (stl->nstrs >= LARGE_STL_COUNT) { 20825dfecf96Smrg rlen += 511; /* Don't write number of strings */ 20835dfecf96Smrg code = stl->type == Rep_StringList ? 20845dfecf96Smrg Re_LargeStringList : Re_LargeCaseStringList; 20855dfecf96Smrg } 20865dfecf96Smrg else 20875dfecf96Smrg code = (ReCode)stl->type; 20885dfecf96Smrg 20895dfecf96Smrg if (rlen >= 16386) 20905dfecf96Smrg return (inf->ecode = RE_ESPACE); 20915dfecf96Smrg if (rec_check(inf, rlen) || 20925dfecf96Smrg rec_code(inf, code)) 20935dfecf96Smrg return (inf->ecode); 20945dfecf96Smrg 20955dfecf96Smrg /* Space is allocated, just write the data */ 20965dfecf96Smrg if (stl->nstrs < LARGE_STL_COUNT) 20975dfecf96Smrg inf->cod[inf->len++] = stl->nstrs; 20985dfecf96Smrg 20995dfecf96Smrg inf->cod[inf->len++] = rlen & 0xff; 21005dfecf96Smrg inf->cod[inf->len++] = (rlen & 0xff00) >> 8; 21015dfecf96Smrg 21025dfecf96Smrg if (stl->nstrs < LARGE_STL_COUNT) { 21035dfecf96Smrg for (i = 0; i < stl->nstrs; i++) 21045dfecf96Smrg inf->cod[inf->len++] = stl->lens[i]; 21055dfecf96Smrg for (i = 0; i < stl->nstrs; i++) { 21065dfecf96Smrg len = stl->lens[i]; 21075dfecf96Smrg if (len > 2) { 21085dfecf96Smrg memcpy(inf->cod + inf->len, stl->strs[i], len); 21095dfecf96Smrg inf->len += len; 21105dfecf96Smrg } 21115dfecf96Smrg else { 21125dfecf96Smrg if (len == 1) 21135dfecf96Smrg inf->cod[inf->len++] = (long)stl->strs[i]; 21145dfecf96Smrg else { 21155dfecf96Smrg inf->cod[inf->len++] = (long)stl->strs[i] & 0xff; 21165dfecf96Smrg inf->cod[inf->len++] = ((long)stl->strs[i] & 0xff00) >> 8; 21175dfecf96Smrg } 21185dfecf96Smrg } 21195dfecf96Smrg } 21205dfecf96Smrg } 21215dfecf96Smrg else { 21225dfecf96Smrg /* The string length goes before the string itself */ 21235dfecf96Smrg int j, chl, chu; 21245dfecf96Smrg 21255dfecf96Smrg /* Fill everything with an invalid jump address */ 21265dfecf96Smrg memset(inf->cod + inf->len, 0xff, 512); 21275dfecf96Smrg for (i = len = 0, j = -1; i < stl->nstrs; i++) { 21285dfecf96Smrg chl = stl->lens[i] > 2 ? stl->strs[i][0] : (long)stl->strs[i] & 0xff; 21295dfecf96Smrg if (chl != j) { 21305dfecf96Smrg inf->cod[inf->len + (chl << 1)] = len & 0xff; 21315dfecf96Smrg inf->cod[inf->len + (chl << 1) + 1] = (len & 0xff00) >> 8; 21325dfecf96Smrg if (code == Re_LargeCaseStringList) { 21335dfecf96Smrg chu = stl->lens[i] > 2 ? 21345dfecf96Smrg stl->strs[i][1] : ((long)(stl->strs[i]) & 0xff00) >> 8; 21355dfecf96Smrg inf->cod[inf->len + (chu << 1)] = len & 0xff; 21365dfecf96Smrg inf->cod[inf->len + (chu << 1) + 1] = (len & 0xff00) >> 8; 21375dfecf96Smrg } 21385dfecf96Smrg j = chl; 21395dfecf96Smrg } 21405dfecf96Smrg len += stl->lens[i] + 1; 21415dfecf96Smrg } 21425dfecf96Smrg inf->len += 512; 21435dfecf96Smrg 21445dfecf96Smrg for (i = 0; i < stl->nstrs; i++) { 21455dfecf96Smrg len = stl->lens[i]; 21465dfecf96Smrg inf->cod[inf->len++] = len; 21475dfecf96Smrg if (len > 2) { 21485dfecf96Smrg memcpy(inf->cod + inf->len, stl->strs[i], len); 21495dfecf96Smrg inf->len += len; 21505dfecf96Smrg } 21515dfecf96Smrg else { 21525dfecf96Smrg if (len == 1) 21535dfecf96Smrg inf->cod[inf->len++] = (long)stl->strs[i]; 21545dfecf96Smrg else { 21555dfecf96Smrg inf->cod[inf->len++] = (long)stl->strs[i] & 0xff; 21565dfecf96Smrg inf->cod[inf->len++] = ((long)stl->strs[i] & 0xff00) >> 8; 21575dfecf96Smrg } 21585dfecf96Smrg } 21595dfecf96Smrg } 21605dfecf96Smrg } 21615dfecf96Smrg 21625dfecf96Smrg return (inf->ecode); 21635dfecf96Smrg} 21645dfecf96Smrg 21655dfecf96Smrgstatic int 21665dfecf96Smrgrec_build_rep(re_inf *inf, rec_rep *rep) 21675dfecf96Smrg{ 21685dfecf96Smrg if (rep) { 21695dfecf96Smrg switch (rep->type) { 21705dfecf96Smrg case Rer_AnyTimes: 21715dfecf96Smrg case Rer_AtLeast: 21725dfecf96Smrg case Rer_Maybe: 21735dfecf96Smrg rec_code(inf, (ReCode)rep->type); 21745dfecf96Smrg break; 21755dfecf96Smrg case Rer_Exact: 21765dfecf96Smrg if (rec_code(inf, Re_Exact) == 0) 21775dfecf96Smrg rec_byte(inf, rep->mine); 21785dfecf96Smrg break; 21795dfecf96Smrg case Rer_Min: 21805dfecf96Smrg if (rec_code(inf, Re_Min) == 0) 21815dfecf96Smrg rec_byte(inf, rep->mine); 21825dfecf96Smrg break; 21835dfecf96Smrg case Rer_Max: 21845dfecf96Smrg if (rec_code(inf, Re_Max) == 0) 21855dfecf96Smrg rec_byte(inf, rep->maxc); 21865dfecf96Smrg break; 21875dfecf96Smrg case Rer_MinMax: 21885dfecf96Smrg if (rec_code(inf, Re_MinMax) == 0 && 21895dfecf96Smrg rec_byte(inf, rep->mine) == 0) 21905dfecf96Smrg rec_byte(inf, rep->maxc); 21915dfecf96Smrg break; 21925dfecf96Smrg } 21935dfecf96Smrg /* It is incremented in rec_build_pat */ 21945dfecf96Smrg rec_byte(inf, inf->bas - 1); 21955dfecf96Smrg } 21965dfecf96Smrg 21975dfecf96Smrg return (inf->ecode); 21985dfecf96Smrg} 21995dfecf96Smrg 22005dfecf96Smrgstatic int 22015dfecf96Smrgrec_inc_spc(re_inf *inf) 22025dfecf96Smrg{ 22035dfecf96Smrg if (++inf->bas >= MAX_DEPTH) 22045dfecf96Smrg return (inf->ecode = RE_ESPACE); 22055dfecf96Smrg 22065dfecf96Smrg return (inf->ecode); 22075dfecf96Smrg} 22085dfecf96Smrg 22095dfecf96Smrgstatic int 22105dfecf96Smrgrec_dec_spc(re_inf *inf) 22115dfecf96Smrg{ 22125dfecf96Smrg if (--inf->bas < 0) 22135dfecf96Smrg return (inf->ecode = RE_ASSERT); 22145dfecf96Smrg 22155dfecf96Smrg return (inf->ecode); 22165dfecf96Smrg} 22175dfecf96Smrg 22185dfecf96Smrgstatic int 22195dfecf96Smrgrec_add_spc(re_inf *inf, int maybe) 22205dfecf96Smrg{ 22215dfecf96Smrg if (++inf->bas >= MAX_DEPTH) 22225dfecf96Smrg return (inf->ecode = RE_ESPACE); 22235dfecf96Smrg inf->sp[inf->bas] = maybe + 1; 22245dfecf96Smrg 22255dfecf96Smrg return (inf->ecode); 22265dfecf96Smrg} 22275dfecf96Smrg 22285dfecf96Smrg/* Could be joined with rec_rep_spc, code almost identical */ 22295dfecf96Smrgstatic int 22305dfecf96Smrgrec_alt_spc(re_inf *inf, int top) 22315dfecf96Smrg{ 22325dfecf96Smrg int distance, i, bas = inf->bas; 22335dfecf96Smrg 22345dfecf96Smrg while ((inf->bas > top) && inf->sp[inf->bas]) { 22355dfecf96Smrg /* Jump to this repetition for cleanup */ 22365dfecf96Smrg distance = inf->len - inf->sr[inf->bas]; 22375dfecf96Smrg 22385dfecf96Smrg /* This will generate a jump to a jump decision opcode */ 22395dfecf96Smrg inf->sj[inf->bas] = inf->len; 22405dfecf96Smrg 22415dfecf96Smrg if (distance > 255) { 22425dfecf96Smrg if (rec_code(inf, Re_RepLongJump) || 22435dfecf96Smrg rec_byte(inf, inf->bas - 1) || 22445dfecf96Smrg rec_byte(inf, distance & 0xff) || 22455dfecf96Smrg rec_byte(inf, (distance & 0xff00) >> 8)) 22465dfecf96Smrg break; 22475dfecf96Smrg } 22485dfecf96Smrg else if (rec_code(inf, Re_RepJump) || 22495dfecf96Smrg rec_byte(inf, inf->bas - 1) || 22505dfecf96Smrg rec_byte(inf, distance)) 22515dfecf96Smrg break; 22525dfecf96Smrg 22535dfecf96Smrg /* Top of stack value before repetition, or end condition value */ 22545dfecf96Smrg --inf->bas; 22555dfecf96Smrg } 22565dfecf96Smrg 22575dfecf96Smrg i = inf->bas + 1; 22585dfecf96Smrg 22595dfecf96Smrg if (inf->ecode == 0 && i <= bas && inf->sp[i]) { 22605dfecf96Smrg /* Only the repetition at the bottom jump to code after testing 22615dfecf96Smrg * all possibilities */ 22625dfecf96Smrg distance = inf->len - inf->sr[i]; 22635dfecf96Smrg inf->cod[inf->sr[i] + 3] = distance & 0xff; 22645dfecf96Smrg inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8; 22655dfecf96Smrg 22665dfecf96Smrg /* The bottom jump is here */ 22675dfecf96Smrg if (rec_code(inf, inf->sp[i] == 1 ? Re_DoneIf : Re_MaybeDone)) 22685dfecf96Smrg return (inf->ecode); 22695dfecf96Smrg 22705dfecf96Smrg /* Generate jumps to the previous special repetition */ 22715dfecf96Smrg for (++i; i <= bas; i++) { 22725dfecf96Smrg if (inf->sp[i]) { 22735dfecf96Smrg distance = inf->sj[i] - inf->sr[i]; 22745dfecf96Smrg inf->cod[inf->sr[i] + 3] = distance & 0xff; 22755dfecf96Smrg inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8; 22765dfecf96Smrg } 22775dfecf96Smrg } 22785dfecf96Smrg } 22795dfecf96Smrg 22805dfecf96Smrg return (inf->ecode); 22815dfecf96Smrg} 22825dfecf96Smrg 22835dfecf96Smrgstatic int 22845dfecf96Smrgrec_rep_spc(re_inf *inf, int top) 22855dfecf96Smrg{ 22865dfecf96Smrg int distance, i, bas = inf->bas; 22875dfecf96Smrg 22885dfecf96Smrg while (inf->bas > top) { 22895dfecf96Smrg if (inf->sp[inf->bas]) { 22905dfecf96Smrg /* Jump to this repetition for cleanup */ 22915dfecf96Smrg distance = inf->len - inf->sr[inf->bas]; 22925dfecf96Smrg 22935dfecf96Smrg /* This will generate a jump to a jump decision opcode */ 22945dfecf96Smrg inf->sj[inf->bas] = inf->len; 22955dfecf96Smrg 22965dfecf96Smrg if (distance > 255) { 22975dfecf96Smrg if (rec_code(inf, Re_RepLongJump) || 22985dfecf96Smrg rec_byte(inf, inf->bas - 1) || 22995dfecf96Smrg rec_byte(inf, distance & 0xff) || 23005dfecf96Smrg rec_byte(inf, (distance & 0xff00) >> 8)) 23015dfecf96Smrg break; 23025dfecf96Smrg } 23035dfecf96Smrg else if (rec_code(inf, Re_RepJump) || 23045dfecf96Smrg rec_byte(inf, inf->bas - 1) || 23055dfecf96Smrg rec_byte(inf, distance)) 23065dfecf96Smrg break; 23075dfecf96Smrg } 23085dfecf96Smrg 23095dfecf96Smrg /* Top of stack value before repetition, or end condition value */ 23105dfecf96Smrg --inf->bas; 23115dfecf96Smrg } 23125dfecf96Smrg 23135dfecf96Smrg /* Find first special repetition offset. XXX This should be a noop */ 23145dfecf96Smrg for (i = 0; i < bas; i++) 23155dfecf96Smrg if (inf->sp[i]) 23165dfecf96Smrg break; 23175dfecf96Smrg 23185dfecf96Smrg if (inf->ecode == 0 && i <= bas && inf->sp[i]) { 23195dfecf96Smrg /* Only the repetition at the bottom jump to code after testing 23205dfecf96Smrg * all possibilities */ 23215dfecf96Smrg distance = inf->len - inf->sr[i]; 23225dfecf96Smrg inf->cod[inf->sr[i] + 3] = distance & 0xff; 23235dfecf96Smrg inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8; 23245dfecf96Smrg 23255dfecf96Smrg /* Generate jumps to the previous special repetition */ 23265dfecf96Smrg for (++i; i <= bas; i++) { 23275dfecf96Smrg if (inf->sp[i]) { 23285dfecf96Smrg distance = inf->sj[i] - inf->sr[i]; 23295dfecf96Smrg inf->cod[inf->sr[i] + 3] = distance & 0xff; 23305dfecf96Smrg inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8; 23315dfecf96Smrg } 23325dfecf96Smrg } 23335dfecf96Smrg } 23345dfecf96Smrg 23355dfecf96Smrg return (inf->ecode); 23365dfecf96Smrg} 23375dfecf96Smrg 23385dfecf96Smrgstatic int 23395dfecf96Smrgrec_off_spc(re_inf *inf) 23405dfecf96Smrg{ 23415dfecf96Smrg /* The jump address before the three bytes instruction */ 23425dfecf96Smrg inf->sr[inf->bas] = inf->len - 3; 23435dfecf96Smrg /* Don't know yet where to go after done with the special 23445dfecf96Smrg * repetition, just reserve two bytes for the jump address. */ 23455dfecf96Smrg return (rec_byte_byte(inf, 0, 0)); 23465dfecf96Smrg} 23475dfecf96Smrg 23485dfecf96Smrg#ifdef DEBUG 23495dfecf96Smrgstatic void 23505dfecf96Smrgredump(re_cod *code) 23515dfecf96Smrg{ 23525dfecf96Smrg int i, j, k; 23535dfecf96Smrg unsigned char *cod = code->cod, *stl; 23545dfecf96Smrg 23555dfecf96Smrg if (cod[0] & RE_NOSUB) 23565dfecf96Smrg printf("Nosub\n"); 23575dfecf96Smrg if (cod[0] & RE_NEWLINE) 23585dfecf96Smrg printf("Newline\n"); 23595dfecf96Smrg ++cod; 23605dfecf96Smrg if (cod[0] != 0xff) 23615dfecf96Smrg printf("%d backrefs\n", cod[0] + 1); 23625dfecf96Smrg ++cod; 23635dfecf96Smrg for (;;) { 23645dfecf96Smrg switch (*cod++) { 23655dfecf96Smrg case Re_Open: 23665dfecf96Smrg printf("Open"); 23675dfecf96Smrg break; 23685dfecf96Smrg case Re_Close: 23695dfecf96Smrg printf("Close"); 23705dfecf96Smrg break; 23715dfecf96Smrg case Re_Update: 23725dfecf96Smrg printf("Update (%d)", (int)*cod++); 23735dfecf96Smrg break; 23745dfecf96Smrg case Re_Alt: 23755dfecf96Smrg printf("Alt"); 23765dfecf96Smrg i = cod[0] | cod[1]; 23775dfecf96Smrg cod += 2; 23785dfecf96Smrg printf(" %d", i); 23795dfecf96Smrg break; 23805dfecf96Smrg case Re_AltNext: 23815dfecf96Smrg printf("Alt-next"); 23825dfecf96Smrg i = cod[0] | cod[1]; 23835dfecf96Smrg cod += 2; 23845dfecf96Smrg printf(" %d", i); 23855dfecf96Smrg break; 23865dfecf96Smrg case Re_AltDone: 23875dfecf96Smrg printf("Alt-done"); 23885dfecf96Smrg break; 23895dfecf96Smrg case Re_AnyTimes: 23905dfecf96Smrg printf("-> Anytimes %d", (int)*cod++); 23915dfecf96Smrg i = cod[0] | (cod[1] << 8); 23925dfecf96Smrg cod += 2; 23935dfecf96Smrg printf(" /%d", i); 23945dfecf96Smrg break; 23955dfecf96Smrg case Re_AnyEatAnyTimes: 23965dfecf96Smrg printf("Any-eat-anytimes"); 23975dfecf96Smrg break; 23985dfecf96Smrg case Re_AnyAnyTimes: 23995dfecf96Smrg printf("-> Any-anytimes %d", (int)*cod++); 24005dfecf96Smrg printf(" (%d)", (int)*cod++); 24015dfecf96Smrg i = cod[0] | (cod[1] << 8); 24025dfecf96Smrg cod += 2; 24035dfecf96Smrg printf(" /%d", i); 24045dfecf96Smrg break; 24055dfecf96Smrg case Re_AnyEatMaybe: 24065dfecf96Smrg printf("Any-eat-maybe"); 24075dfecf96Smrg break; 24085dfecf96Smrg case Re_AnyMaybe: 24095dfecf96Smrg printf("-> Any-maybe %d", (int)*cod++); 24105dfecf96Smrg printf(" (%d)", (int)*cod++); 24115dfecf96Smrg i = cod[0] | (cod[1] << 8); 24125dfecf96Smrg cod += 2; 24135dfecf96Smrg printf(" /%d", i); 24145dfecf96Smrg break; 24155dfecf96Smrg case Re_AnyAtLeast: 24165dfecf96Smrg printf("-> Any-atleast %d", (int)*cod++); 24175dfecf96Smrg printf(" (%d)", (int)*cod++); 24185dfecf96Smrg i = cod[0] | (cod[1] << 8); 24195dfecf96Smrg cod += 2; 24205dfecf96Smrg printf(" /%d", i); 24215dfecf96Smrg break; 24225dfecf96Smrg case Re_AnyEatAtLeast: 24235dfecf96Smrg printf("Any-eat-atleast"); 24245dfecf96Smrg break; 24255dfecf96Smrg case Re_Maybe: 24265dfecf96Smrg printf("-> Maybe %d", (int)*cod++); 24275dfecf96Smrg i = cod[0] | (cod[1] << 8); 24285dfecf96Smrg cod += 2; 24295dfecf96Smrg printf(" /%d", i); 24305dfecf96Smrg break; 24315dfecf96Smrg case Re_AtLeast: 24325dfecf96Smrg printf("-> Atleast %d", (int)*cod++); 24335dfecf96Smrg i = cod[0] | (cod[1] << 8); 24345dfecf96Smrg cod += 2; 24355dfecf96Smrg printf(" /%d", i); 24365dfecf96Smrg break; 24375dfecf96Smrg case Re_Exact: 24385dfecf96Smrg printf("-> Exact "); 24395dfecf96Smrg i = *cod++; 24405dfecf96Smrg printf("%d", i); 24415dfecf96Smrg printf(" %d", (int)*cod++); 24425dfecf96Smrg i = cod[0] | (cod[1] << 8); 24435dfecf96Smrg cod += 2; 24445dfecf96Smrg printf(" /%d", i); 24455dfecf96Smrg break; 24465dfecf96Smrg case Re_Min: 24475dfecf96Smrg printf("-> Min "); 24485dfecf96Smrg i = *cod++; 24495dfecf96Smrg printf("%d", i); 24505dfecf96Smrg printf(" %d", (int)*cod++); 24515dfecf96Smrg i = cod[0] | (cod[1] << 8); 24525dfecf96Smrg cod += 2; 24535dfecf96Smrg printf(" /%d", i); 24545dfecf96Smrg break; 24555dfecf96Smrg case Re_Max: 24565dfecf96Smrg printf("-> Max "); 24575dfecf96Smrg i = *cod++; 24585dfecf96Smrg printf("%d", i); 24595dfecf96Smrg printf(" %d", (int)*cod++); 24605dfecf96Smrg i = cod[0] | (cod[1] << 8); 24615dfecf96Smrg cod += 2; 24625dfecf96Smrg printf(" /%d", i); 24635dfecf96Smrg break; 24645dfecf96Smrg case Re_MinMax: 24655dfecf96Smrg printf("-> Min-max "); 24665dfecf96Smrg i = *cod++; 24675dfecf96Smrg printf("%d ", i); 24685dfecf96Smrg i = *cod++; 24695dfecf96Smrg printf("%d", i); 24705dfecf96Smrg printf(" %d", (int)*cod++); 24715dfecf96Smrg i = cod[0] | (cod[1] << 8); 24725dfecf96Smrg cod += 2; 24735dfecf96Smrg printf(" /%d", i); 24745dfecf96Smrg break; 24755dfecf96Smrg case Re_RepJump: 24765dfecf96Smrg printf("<- Rep-jump %d ", (int)*cod++); 24775dfecf96Smrg i = *cod++; 24785dfecf96Smrg printf("%d", i); 24795dfecf96Smrg break; 24805dfecf96Smrg case Re_RepLongJump: 24815dfecf96Smrg printf("<- Rep-long-jump %d ", (int)*cod++); 24825dfecf96Smrg i = cod[0] | (cod[1] << 8); 24835dfecf96Smrg printf("%d", i); 24845dfecf96Smrg break; 24855dfecf96Smrg case Re_Any: 24865dfecf96Smrg printf("Any"); 24875dfecf96Smrg break; 24885dfecf96Smrg case Re_Odigit: 24895dfecf96Smrg printf("Odigit"); 24905dfecf96Smrg break; 24915dfecf96Smrg case Re_OdigitNot: 24925dfecf96Smrg printf("Odigit-not"); 24935dfecf96Smrg break; 24945dfecf96Smrg case Re_Digit: 24955dfecf96Smrg printf("Digit"); 24965dfecf96Smrg break; 24975dfecf96Smrg case Re_DigitNot: 24985dfecf96Smrg printf("Digit-not"); 24995dfecf96Smrg break; 25005dfecf96Smrg case Re_Xdigit: 25015dfecf96Smrg printf("Xdigit"); 25025dfecf96Smrg break; 25035dfecf96Smrg case Re_XdigitNot: 25045dfecf96Smrg printf("Xdigit-not"); 25055dfecf96Smrg break; 25065dfecf96Smrg case Re_Space: 25075dfecf96Smrg printf("Space"); 25085dfecf96Smrg break; 25095dfecf96Smrg case Re_SpaceNot: 25105dfecf96Smrg printf("Space-not"); 25115dfecf96Smrg break; 25125dfecf96Smrg case Re_Tab: 25135dfecf96Smrg printf("Tab"); 25145dfecf96Smrg break; 25155dfecf96Smrg case Re_Newline: 25165dfecf96Smrg printf("Newline"); 25175dfecf96Smrg break; 25185dfecf96Smrg case Re_Lower: 25195dfecf96Smrg printf("Lower"); 25205dfecf96Smrg break; 25215dfecf96Smrg case Re_Upper: 25225dfecf96Smrg printf("Upper"); 25235dfecf96Smrg break; 25245dfecf96Smrg case Re_Alnum: 25255dfecf96Smrg printf("Alnum"); 25265dfecf96Smrg break; 25275dfecf96Smrg case Re_AlnumNot: 25285dfecf96Smrg printf("Alnum-not"); 25295dfecf96Smrg break; 25305dfecf96Smrg case Re_Control: 25315dfecf96Smrg printf("Control"); 25325dfecf96Smrg break; 25335dfecf96Smrg case Re_ControlNot: 25345dfecf96Smrg printf("Control-not"); 25355dfecf96Smrg break; 25365dfecf96Smrg case Re_Bol: 25375dfecf96Smrg printf("Bol"); 25385dfecf96Smrg break; 25395dfecf96Smrg case Re_Eol: 25405dfecf96Smrg printf("Eol"); 25415dfecf96Smrg break; 25425dfecf96Smrg case Re_Bow: 25435dfecf96Smrg printf("Bow"); 25445dfecf96Smrg break; 25455dfecf96Smrg case Re_Eow: 25465dfecf96Smrg printf("Eow"); 25475dfecf96Smrg break; 25485dfecf96Smrg case Re_Range: 25495dfecf96Smrg printf("Range "); 25505dfecf96Smrg goto range; 25515dfecf96Smrg case Re_RangeNot: 25525dfecf96Smrg printf("Range-not "); 25535dfecf96Smrgrange: 25545dfecf96Smrg for (i = 0; i < 256; i += 32) { 25555dfecf96Smrg for (j = k = 0; j < 32; j++) 25565dfecf96Smrg k |= (*cod++ & 1) << (31 - j); 25575dfecf96Smrg printf("%x ", k); 25585dfecf96Smrg } 25595dfecf96Smrg break; 25605dfecf96Smrg case Re_Literal: 25615dfecf96Smrg printf("Literal %c", *cod++); 25625dfecf96Smrg break; 25635dfecf96Smrg case Re_LiteralNot: 25645dfecf96Smrg printf("Literal-not %c", *cod++); 25655dfecf96Smrg break; 25665dfecf96Smrg case Re_SearchLiteral: 25675dfecf96Smrg printf("Search-literal %c", *cod++); 25685dfecf96Smrg break; 25695dfecf96Smrg case Re_CaseLiteral: 25705dfecf96Smrg printf("Case-literal %c", *cod++); 25715dfecf96Smrg putchar(*cod++); 25725dfecf96Smrg break; 25735dfecf96Smrg case Re_CaseLiteralNot: 25745dfecf96Smrg printf("Case-literal-not %c", *cod++); 25755dfecf96Smrg putchar(*cod++); 25765dfecf96Smrg break; 25775dfecf96Smrg case Re_SearchCaseLiteral: 25785dfecf96Smrg printf("Search-case-literal %c", *cod++); 25795dfecf96Smrg putchar(*cod++); 25805dfecf96Smrg break; 25815dfecf96Smrg case Re_String: 25825dfecf96Smrg printf("String "); 25835dfecf96Smrg goto string; 25845dfecf96Smrg case Re_SearchString: 25855dfecf96Smrg printf("Search-string "); 25865dfecf96Smrg goto string; 25875dfecf96Smrg case Re_CaseString: 25885dfecf96Smrg printf("Case-string "); 25895dfecf96Smrg goto string; 25905dfecf96Smrg case Re_SearchCaseString: 25915dfecf96Smrg printf("Search-case-string "); 25925dfecf96Smrgstring: 25935dfecf96Smrg i = *cod++; 25945dfecf96Smrg if (i & 0x80) 25955dfecf96Smrg i = (i & 0x7f) | (*cod++ << 7); 25965dfecf96Smrg for (j = 0; j < i; j++) 25975dfecf96Smrg putchar(*cod++); 25985dfecf96Smrg break; 25995dfecf96Smrg case Re_StringList: 26005dfecf96Smrg printf("String-list"); 26015dfecf96Smrg goto string_list; 26025dfecf96Smrg case Re_CaseStringList: 26035dfecf96Smrg printf("Case-string-list"); 26045dfecf96Smrgstring_list: 26055dfecf96Smrg j = *cod++; 26065dfecf96Smrg cod += 2; 26075dfecf96Smrg stl = cod + j; 26085dfecf96Smrg for (i = 0; i < j; i++) { 26095dfecf96Smrg k = *cod++; 26105dfecf96Smrg putchar(i ? ',' : ' '); 26115dfecf96Smrg fwrite(stl, k, 1, stdout); 26125dfecf96Smrg stl += k; 26135dfecf96Smrg } 26145dfecf96Smrg cod = stl; 26155dfecf96Smrg break; 26165dfecf96Smrg case Re_LargeStringList: 26175dfecf96Smrg printf("Large-string-list"); 26185dfecf96Smrglarge_string_list: 26195dfecf96Smrg i = cod[0] | (cod[1] << 8); 26205dfecf96Smrg stl = cod + i - 1; 26215dfecf96Smrg for (i = 0, cod += 514; cod < stl; i++) { 26225dfecf96Smrg k = *cod++; 26235dfecf96Smrg putchar(i ? ',' : ' '); 26245dfecf96Smrg fwrite(cod, k, 1, stdout); 26255dfecf96Smrg cod += k; 26265dfecf96Smrg } 26275dfecf96Smrg cod = stl; 26285dfecf96Smrg break; 26295dfecf96Smrg case Re_LargeCaseStringList: 26305dfecf96Smrg printf("Large-case-string-list"); 26315dfecf96Smrg goto large_string_list; 26325dfecf96Smrg case Re_Backref: 26335dfecf96Smrg printf("Backref %d", (int)*cod++); 26345dfecf96Smrg break; 26355dfecf96Smrg case Re_DoneIf: 26365dfecf96Smrg printf("Done-if"); 26375dfecf96Smrg break; 26385dfecf96Smrg case Re_MaybeDone: 26395dfecf96Smrg printf("Maybe-done"); 26405dfecf96Smrg break; 26415dfecf96Smrg case Re_Done: 26425dfecf96Smrg printf("Done\n"); 26435dfecf96Smrg return; 26445dfecf96Smrg } 26455dfecf96Smrg putchar('\n'); 26465dfecf96Smrg } 26475dfecf96Smrg} 26485dfecf96Smrg#endif 2649