Home | History | Annotate | Line # | Download | only in dist
pattern.c revision 1.1.1.3
      1 /*
      2  * Copyright (C) 1984-2023  Mark Nudelman
      3  *
      4  * You may distribute under the terms of either the GNU General Public
      5  * License or the Less License, as specified in the README file.
      6  *
      7  * For more information, see the README file.
      8  */
      9 
     10 /*
     11  * Routines to do pattern matching.
     12  */
     13 
     14 #include "less.h"
     15 
     16 extern int caseless;
     17 extern int is_caseless;
     18 extern int utf_mode;
     19 
     20 /*
     21  * Compile a search pattern, for future use by match_pattern.
     22  */
     23 static int compile_pattern2(char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error)
     24 {
     25 	if (search_type & SRCH_NO_REGEX)
     26 		return (0);
     27   {
     28 #if HAVE_GNU_REGEX
     29 	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
     30 		ecalloc(1, sizeof(struct re_pattern_buffer));
     31 	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
     32 	if (re_compile_pattern(pattern, strlen(pattern), comp))
     33 	{
     34 		free(comp);
     35 		if (show_error)
     36 			error("Invalid pattern", NULL_PARG);
     37 		return (-1);
     38 	}
     39 	if (*comp_pattern != NULL)
     40 	{
     41 		regfree(*comp_pattern);
     42 		free(*comp_pattern);
     43 	}
     44 	*comp_pattern = comp;
     45 #endif
     46 #if HAVE_POSIX_REGCOMP
     47 	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
     48 	if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0)))
     49 	{
     50 		free(comp);
     51 		if (show_error)
     52 			error("Invalid pattern", NULL_PARG);
     53 		return (-1);
     54 	}
     55 	if (*comp_pattern != NULL)
     56 	{
     57 		regfree(*comp_pattern);
     58 		free(*comp_pattern);
     59 	}
     60 	*comp_pattern = comp;
     61 #endif
     62 #if HAVE_PCRE
     63 	constant char *errstring;
     64 	int erroffset;
     65 	PARG parg;
     66 	pcre *comp = pcre_compile(pattern,
     67 			((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) |
     68 			(is_caseless ? PCRE_CASELESS : 0),
     69 			&errstring, &erroffset, NULL);
     70 	if (comp == NULL)
     71 	{
     72 		parg.p_string = (char *) errstring;
     73 		if (show_error)
     74 			error("%s", &parg);
     75 		return (-1);
     76 	}
     77 	*comp_pattern = comp;
     78 #endif
     79 #if HAVE_PCRE2
     80 	int errcode;
     81 	PCRE2_SIZE erroffset;
     82 	PARG parg;
     83 	pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
     84 			(is_caseless ? PCRE2_CASELESS : 0),
     85 			&errcode, &erroffset, NULL);
     86 	if (comp == NULL)
     87 	{
     88 		if (show_error)
     89 		{
     90 			char msg[160];
     91 			pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
     92 			parg.p_string = msg;
     93 			error("%s", &parg);
     94 		}
     95 		return (-1);
     96 	}
     97 	*comp_pattern = comp;
     98 #endif
     99 #if HAVE_RE_COMP
    100 	PARG parg;
    101 	if ((parg.p_string = re_comp(pattern)) != NULL)
    102 	{
    103 		if (show_error)
    104 			error("%s", &parg);
    105 		return (-1);
    106 	}
    107 	*comp_pattern = 1;
    108 #endif
    109 #if HAVE_REGCMP
    110 	char *comp;
    111 	if ((comp = regcmp(pattern, 0)) == NULL)
    112 	{
    113 		if (show_error)
    114 			error("Invalid pattern", NULL_PARG);
    115 		return (-1);
    116 	}
    117 	if (comp_pattern != NULL)
    118 		free(*comp_pattern);
    119 	*comp_pattern = comp;
    120 #endif
    121 #if HAVE_V8_REGCOMP
    122 	struct regexp *comp;
    123 	reg_show_error = show_error;
    124 	comp = regcomp(pattern);
    125 	reg_show_error = 1;
    126 	if (comp == NULL)
    127 	{
    128 		/*
    129 		 * regcomp has already printed an error message
    130 		 * via regerror().
    131 		 */
    132 		return (-1);
    133 	}
    134 	if (*comp_pattern != NULL)
    135 		free(*comp_pattern);
    136 	*comp_pattern = comp;
    137 #endif
    138   }
    139 	return (0);
    140 }
    141 
    142 /*
    143  * Like compile_pattern2, but convert the pattern to lowercase if necessary.
    144  */
    145 public int compile_pattern(char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern)
    146 {
    147 	char *cvt_pattern;
    148 	int result;
    149 
    150 	if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX)))
    151 		cvt_pattern = pattern;
    152 	else
    153 	{
    154 		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
    155 		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
    156 	}
    157 	result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
    158 	if (cvt_pattern != pattern)
    159 		free(cvt_pattern);
    160 	return (result);
    161 }
    162 
    163 /*
    164  * Forget that we have a compiled pattern.
    165  */
    166 public void uncompile_pattern(PATTERN_TYPE *pattern)
    167 {
    168 #if HAVE_GNU_REGEX
    169 	if (*pattern != NULL)
    170 	{
    171 		regfree(*pattern);
    172 		free(*pattern);
    173 	}
    174 	*pattern = NULL;
    175 #endif
    176 #if HAVE_POSIX_REGCOMP
    177 	if (*pattern != NULL)
    178 	{
    179 		regfree(*pattern);
    180 		free(*pattern);
    181 	}
    182 	*pattern = NULL;
    183 #endif
    184 #if HAVE_PCRE
    185 	if (*pattern != NULL)
    186 		pcre_free(*pattern);
    187 	*pattern = NULL;
    188 #endif
    189 #if HAVE_PCRE2
    190 	if (*pattern != NULL)
    191 		pcre2_code_free(*pattern);
    192 	*pattern = NULL;
    193 #endif
    194 #if HAVE_RE_COMP
    195 	*pattern = 0;
    196 #endif
    197 #if HAVE_REGCMP
    198 	if (*pattern != NULL)
    199 		free(*pattern);
    200 	*pattern = NULL;
    201 #endif
    202 #if HAVE_V8_REGCOMP
    203 	if (*pattern != NULL)
    204 		free(*pattern);
    205 	*pattern = NULL;
    206 #endif
    207 }
    208 
    209 #if 0
    210 /*
    211  * Can a pattern be successfully compiled?
    212  */
    213 public int valid_pattern(char *pattern)
    214 {
    215 	PATTERN_TYPE comp_pattern;
    216 	int result;
    217 
    218 	SET_NULL_PATTERN(comp_pattern);
    219 	result = compile_pattern2(pattern, 0, &comp_pattern, 0);
    220 	if (result != 0)
    221 		return (0);
    222 	uncompile_pattern(&comp_pattern);
    223 	return (1);
    224 }
    225 #endif
    226 
    227 /*
    228  * Is a compiled pattern null?
    229  */
    230 public int is_null_pattern(PATTERN_TYPE pattern)
    231 {
    232 #if HAVE_GNU_REGEX
    233 	return (pattern == NULL);
    234 #endif
    235 #if HAVE_POSIX_REGCOMP
    236 	return (pattern == NULL);
    237 #endif
    238 #if HAVE_PCRE
    239 	return (pattern == NULL);
    240 #endif
    241 #if HAVE_PCRE2
    242 	return (pattern == NULL);
    243 #endif
    244 #if HAVE_RE_COMP
    245 	return (pattern == 0);
    246 #endif
    247 #if HAVE_REGCMP
    248 	return (pattern == NULL);
    249 #endif
    250 #if HAVE_V8_REGCOMP
    251 	return (pattern == NULL);
    252 #endif
    253 #if NO_REGEX
    254 	return (pattern == NULL);
    255 #endif
    256 }
    257 /*
    258  * Simple pattern matching function.
    259  * It supports no metacharacters like *, etc.
    260  */
    261 static int match(char *pattern, int pattern_len, char *buf, int buf_len, char ***sp, char ***ep, int nsubs)
    262 {
    263 	char *pp, *lp;
    264 	char *pattern_end = pattern + pattern_len;
    265 	char *buf_end = buf + buf_len;
    266 
    267 	for ( ;  buf < buf_end;  buf++)
    268 	{
    269 		for (pp = pattern, lp = buf;  ;  pp++, lp++)
    270 		{
    271 			char cp = *pp;
    272 			char cl = *lp;
    273 			if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
    274 				cp = ASCII_TO_LOWER(cp);
    275 			if (cp != cl)
    276 				break;
    277 			if (pp == pattern_end || lp == buf_end)
    278 				break;
    279 		}
    280 		if (pp == pattern_end)
    281 		{
    282 			*(*sp)++ = buf;
    283 			*(*ep)++ = lp;
    284 			return (1);
    285 		}
    286 	}
    287 	**sp = **ep = NULL;
    288 	return (0);
    289 }
    290 
    291 /*
    292  * Perform a pattern match with the previously compiled pattern.
    293  * Set sp[0] and ep[0] to the start and end of the matched string.
    294  * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern.
    295  * Subpatterns are defined by parentheses in the regex language.
    296  */
    297 static int match_pattern1(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
    298 {
    299 	int matched;
    300 
    301 #if NO_REGEX
    302 	search_type |= SRCH_NO_REGEX;
    303 #endif
    304 	if (search_type & SRCH_NO_REGEX)
    305 		matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp);
    306 	else
    307 	{
    308 #if HAVE_GNU_REGEX
    309 	{
    310 		struct re_registers search_regs;
    311 		pattern->not_bol = notbol;
    312 		pattern->regs_allocated = REGS_UNALLOCATED;
    313 		matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
    314 		if (matched)
    315 		{
    316 			*sp++ = line + search_regs.start[0];
    317 			*ep++ = line + search_regs.end[0];
    318 		}
    319 	}
    320 #endif
    321 #if HAVE_POSIX_REGCOMP
    322 	{
    323 		#define RM_COUNT (NUM_SEARCH_COLORS+2)
    324 		regmatch_t rm[RM_COUNT];
    325 		int flags = (notbol) ? REG_NOTBOL : 0;
    326 #ifdef REG_STARTEND
    327 		flags |= REG_STARTEND;
    328 		rm[0].rm_so = 0;
    329 		rm[0].rm_eo = line_len;
    330 #endif
    331 		matched = !regexec(pattern, line, RM_COUNT, rm, flags);
    332 		if (matched)
    333 		{
    334 			int i;
    335 			int ecount;
    336 			for (ecount = RM_COUNT;  ecount > 0;  ecount--)
    337 				if (rm[ecount-1].rm_so >= 0)
    338 					break;
    339 			if (ecount >= nsp)
    340 				ecount = nsp-1;
    341 			for (i = 0;  i < ecount;  i++)
    342 			{
    343 				if (rm[i].rm_so < 0)
    344 				{
    345 					*sp++ = *ep++ = line;
    346 				} else
    347 				{
    348 #ifndef __WATCOMC__
    349 					*sp++ = line + rm[i].rm_so;
    350 					*ep++ = line + rm[i].rm_eo;
    351 #else
    352 					*sp++ = rm[i].rm_sp;
    353 					*ep++ = rm[i].rm_ep;
    354 #endif
    355 				}
    356 			}
    357 		}
    358 	}
    359 #endif
    360 #if HAVE_PCRE
    361 	{
    362 		#define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3)
    363 		int ovector[OVECTOR_COUNT];
    364 		int flags = (notbol) ? PCRE_NOTBOL : 0;
    365 		int i;
    366 		int ecount;
    367 		int mcount = pcre_exec(pattern, NULL, line, line_len,
    368 			0, flags, ovector, OVECTOR_COUNT);
    369 		matched = (mcount > 0);
    370 		ecount = nsp-1;
    371 		if (ecount > mcount) ecount = mcount;
    372 		for (i = 0;  i < ecount*2; )
    373 		{
    374 			if (ovector[i] < 0 || ovector[i+1] < 0)
    375 			{
    376 				*sp++ = *ep++ = line;
    377 				i += 2;
    378 			} else
    379 			{
    380 				*sp++ = line + ovector[i++];
    381 				*ep++ = line + ovector[i++];
    382 			}
    383 		}
    384 	}
    385 #endif
    386 #if HAVE_PCRE2
    387 	{
    388 		int flags = (notbol) ? PCRE2_NOTBOL : 0;
    389 		pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL);
    390 		int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
    391 			0, flags, md, NULL);
    392 		matched = (mcount > 0);
    393 		if (matched)
    394 		{
    395 			PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
    396 			int i;
    397 			int ecount = nsp-1;
    398 			if (ecount > mcount) ecount = mcount;
    399 			for (i = 0;  i < ecount*2; )
    400 			{
    401 				if (ovector[i] < 0 || ovector[i+1] < 0)
    402 				{
    403 					*sp++ = *ep++ = line;
    404 					i += 2;
    405 				} else
    406 				{
    407 					*sp++ = line + ovector[i++];
    408 					*ep++ = line + ovector[i++];
    409 				}
    410 			}
    411 		}
    412 		pcre2_match_data_free(md);
    413 	}
    414 #endif
    415 #if HAVE_RE_COMP
    416 	matched = (re_exec(line) == 1);
    417 	/*
    418 	 * re_exec doesn't seem to provide a way to get the matched string.
    419 	 */
    420 #endif
    421 #if HAVE_REGCMP
    422 	matched = ((*ep++ = regex(pattern, line)) != NULL);
    423 	if (matched)
    424 		*sp++ = __loc1;
    425 #endif
    426 #if HAVE_V8_REGCOMP
    427 #if HAVE_REGEXEC2
    428 	matched = regexec2(pattern, line, notbol);
    429 #else
    430 	matched = regexec(pattern, line);
    431 #endif
    432 	if (matched)
    433 	{
    434 		*sp++ = pattern->startp[0];
    435 		*ep++ = pattern->endp[0];
    436 	}
    437 #endif
    438 	}
    439 	*sp = *ep = NULL;
    440 	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
    441 			((search_type & SRCH_NO_MATCH) && !matched);
    442 	return (matched);
    443 }
    444 
    445 public int match_pattern(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
    446 {
    447 	int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type);
    448 	int i;
    449 	for (i = 1;  i <= NUM_SEARCH_COLORS;  i++)
    450 	{
    451 		if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i])
    452 			matched = 0;
    453 	}
    454 	return matched;
    455 }
    456 
    457 /*
    458  * Return the name of the pattern matching library.
    459  */
    460 public char * pattern_lib_name(void)
    461 {
    462 #if HAVE_GNU_REGEX
    463 	return ("GNU");
    464 #else
    465 #if HAVE_POSIX_REGCOMP
    466 	return ("POSIX");
    467 #else
    468 #if HAVE_PCRE2
    469 	return ("PCRE2");
    470 #else
    471 #if HAVE_PCRE
    472 	return ("PCRE");
    473 #else
    474 #if HAVE_RE_COMP
    475 	return ("BSD");
    476 #else
    477 #if HAVE_REGCMP
    478 	return ("V8");
    479 #else
    480 #if HAVE_V8_REGCOMP
    481 	return ("Spencer V8");
    482 #else
    483 	return ("no");
    484 #endif
    485 #endif
    486 #endif
    487 #endif
    488 #endif
    489 #endif
    490 #endif
    491 }
    492