Home | History | Annotate | Line # | Download | only in dist
      1 /*	$NetBSD: pattern.c,v 1.4 2023/10/06 05:49:49 simonb Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1984-2023  Mark Nudelman
      5  *
      6  * You may distribute under the terms of either the GNU General Public
      7  * License or the Less License, as specified in the README file.
      8  *
      9  * For more information, see the README file.
     10  */
     11 
     12 /*
     13  * Routines to do pattern matching.
     14  */
     15 
     16 #include "less.h"
     17 
     18 extern int caseless;
     19 extern int is_caseless;
     20 extern int utf_mode;
     21 
     22 /*
     23  * Compile a search pattern, for future use by match_pattern.
     24  */
     25 static int compile_pattern2(char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error)
     26 {
     27 	if (search_type & SRCH_NO_REGEX)
     28 		return (0);
     29   {
     30 #if HAVE_GNU_REGEX
     31 	struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
     32 		ecalloc(1, sizeof(struct re_pattern_buffer));
     33 	re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
     34 	if (re_compile_pattern(pattern, strlen(pattern), comp))
     35 	{
     36 		free(comp);
     37 		if (show_error)
     38 			error("Invalid pattern", NULL_PARG);
     39 		return (-1);
     40 	}
     41 	if (*comp_pattern != NULL)
     42 	{
     43 		regfree(*comp_pattern);
     44 		free(*comp_pattern);
     45 	}
     46 	*comp_pattern = comp;
     47 #endif
     48 #if HAVE_POSIX_REGCOMP
     49 	regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
     50 	if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0)))
     51 	{
     52 		free(comp);
     53 		if (show_error)
     54 			error("Invalid pattern", NULL_PARG);
     55 		return (-1);
     56 	}
     57 	if (*comp_pattern != NULL)
     58 	{
     59 		regfree(*comp_pattern);
     60 		free(*comp_pattern);
     61 	}
     62 	*comp_pattern = comp;
     63 #endif
     64 #if HAVE_PCRE
     65 	constant char *errstring;
     66 	int erroffset;
     67 	PARG parg;
     68 	pcre *comp = pcre_compile(pattern,
     69 			((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) |
     70 			(is_caseless ? PCRE_CASELESS : 0),
     71 			&errstring, &erroffset, NULL);
     72 	if (comp == NULL)
     73 	{
     74 		parg.p_string = (char *) errstring;
     75 		if (show_error)
     76 			error("%s", &parg);
     77 		return (-1);
     78 	}
     79 	*comp_pattern = comp;
     80 #endif
     81 #if HAVE_PCRE2
     82 	int errcode;
     83 	PCRE2_SIZE erroffset;
     84 	PARG parg;
     85 	pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
     86 			(is_caseless ? PCRE2_CASELESS : 0),
     87 			&errcode, &erroffset, NULL);
     88 	if (comp == NULL)
     89 	{
     90 		if (show_error)
     91 		{
     92 			char msg[160];
     93 			pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
     94 			parg.p_string = msg;
     95 			error("%s", &parg);
     96 		}
     97 		return (-1);
     98 	}
     99 	*comp_pattern = comp;
    100 #endif
    101 #if HAVE_RE_COMP
    102 	PARG parg;
    103 	if ((parg.p_string = re_comp(pattern)) != NULL)
    104 	{
    105 		if (show_error)
    106 			error("%s", &parg);
    107 		return (-1);
    108 	}
    109 	*comp_pattern = 1;
    110 #endif
    111 #if HAVE_REGCMP
    112 	char *comp;
    113 	if ((comp = regcmp(pattern, 0)) == NULL)
    114 	{
    115 		if (show_error)
    116 			error("Invalid pattern", NULL_PARG);
    117 		return (-1);
    118 	}
    119 	if (comp_pattern != NULL)
    120 		free(*comp_pattern);
    121 	*comp_pattern = comp;
    122 #endif
    123 #if HAVE_V8_REGCOMP
    124 	struct regexp *comp;
    125 	reg_show_error = show_error;
    126 	comp = regcomp(pattern);
    127 	reg_show_error = 1;
    128 	if (comp == NULL)
    129 	{
    130 		/*
    131 		 * regcomp has already printed an error message
    132 		 * via regerror().
    133 		 */
    134 		return (-1);
    135 	}
    136 	if (*comp_pattern != NULL)
    137 		free(*comp_pattern);
    138 	*comp_pattern = comp;
    139 #endif
    140   }
    141 	return (0);
    142 }
    143 
    144 /*
    145  * Like compile_pattern2, but convert the pattern to lowercase if necessary.
    146  */
    147 public int compile_pattern(char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern)
    148 {
    149 	char *cvt_pattern;
    150 	int result;
    151 
    152 	if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX)))
    153 		cvt_pattern = pattern;
    154 	else
    155 	{
    156 		cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
    157 		cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
    158 	}
    159 	result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
    160 	if (cvt_pattern != pattern)
    161 		free(cvt_pattern);
    162 	return (result);
    163 }
    164 
    165 /*
    166  * Forget that we have a compiled pattern.
    167  */
    168 public void uncompile_pattern(PATTERN_TYPE *pattern)
    169 {
    170 #if HAVE_GNU_REGEX
    171 	if (*pattern != NULL)
    172 	{
    173 		regfree(*pattern);
    174 		free(*pattern);
    175 	}
    176 	*pattern = NULL;
    177 #endif
    178 #if HAVE_POSIX_REGCOMP
    179 	if (*pattern != NULL)
    180 	{
    181 		regfree(*pattern);
    182 		free(*pattern);
    183 	}
    184 	*pattern = NULL;
    185 #endif
    186 #if HAVE_PCRE
    187 	if (*pattern != NULL)
    188 		pcre_free(*pattern);
    189 	*pattern = NULL;
    190 #endif
    191 #if HAVE_PCRE2
    192 	if (*pattern != NULL)
    193 		pcre2_code_free(*pattern);
    194 	*pattern = NULL;
    195 #endif
    196 #if HAVE_RE_COMP
    197 	*pattern = 0;
    198 #endif
    199 #if HAVE_REGCMP
    200 	if (*pattern != NULL)
    201 		free(*pattern);
    202 	*pattern = NULL;
    203 #endif
    204 #if HAVE_V8_REGCOMP
    205 	if (*pattern != NULL)
    206 		free(*pattern);
    207 	*pattern = NULL;
    208 #endif
    209 }
    210 
    211 #if 0
    212 /*
    213  * Can a pattern be successfully compiled?
    214  */
    215 public int valid_pattern(char *pattern)
    216 {
    217 	PATTERN_TYPE comp_pattern;
    218 	int result;
    219 
    220 	SET_NULL_PATTERN(comp_pattern);
    221 	result = compile_pattern2(pattern, 0, &comp_pattern, 0);
    222 	if (result != 0)
    223 		return (0);
    224 	uncompile_pattern(&comp_pattern);
    225 	return (1);
    226 }
    227 #endif
    228 
    229 /*
    230  * Is a compiled pattern null?
    231  */
    232 public int is_null_pattern(PATTERN_TYPE pattern)
    233 {
    234 #if HAVE_GNU_REGEX
    235 	return (pattern == NULL);
    236 #endif
    237 #if HAVE_POSIX_REGCOMP
    238 	return (pattern == NULL);
    239 #endif
    240 #if HAVE_PCRE
    241 	return (pattern == NULL);
    242 #endif
    243 #if HAVE_PCRE2
    244 	return (pattern == NULL);
    245 #endif
    246 #if HAVE_RE_COMP
    247 	return (pattern == 0);
    248 #endif
    249 #if HAVE_REGCMP
    250 	return (pattern == NULL);
    251 #endif
    252 #if HAVE_V8_REGCOMP
    253 	return (pattern == NULL);
    254 #endif
    255 #if NO_REGEX
    256 	return (pattern == NULL);
    257 #endif
    258 }
    259 /*
    260  * Simple pattern matching function.
    261  * It supports no metacharacters like *, etc.
    262  */
    263 static int match(char *pattern, int pattern_len, char *buf, int buf_len, char ***sp, char ***ep, int nsubs)
    264 {
    265 	char *pp, *lp;
    266 	char *pattern_end = pattern + pattern_len;
    267 	char *buf_end = buf + buf_len;
    268 
    269 	for ( ;  buf < buf_end;  buf++)
    270 	{
    271 		for (pp = pattern, lp = buf;  ;  pp++, lp++)
    272 		{
    273 			char cp = *pp;
    274 			char cl = *lp;
    275 			if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
    276 				cp = ASCII_TO_LOWER(cp);
    277 			if (cp != cl)
    278 				break;
    279 			if (pp == pattern_end || lp == buf_end)
    280 				break;
    281 		}
    282 		if (pp == pattern_end)
    283 		{
    284 			*(*sp)++ = buf;
    285 			*(*ep)++ = lp;
    286 			return (1);
    287 		}
    288 	}
    289 	**sp = **ep = NULL;
    290 	return (0);
    291 }
    292 
    293 /*
    294  * Perform a pattern match with the previously compiled pattern.
    295  * Set sp[0] and ep[0] to the start and end of the matched string.
    296  * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern.
    297  * Subpatterns are defined by parentheses in the regex language.
    298  */
    299 static int match_pattern1(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
    300 {
    301 	int matched;
    302 
    303 #if NO_REGEX
    304 	search_type |= SRCH_NO_REGEX;
    305 #endif
    306 	if (search_type & SRCH_NO_REGEX)
    307 		matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp);
    308 	else
    309 	{
    310 #if HAVE_GNU_REGEX
    311 	{
    312 		struct re_registers search_regs;
    313 		pattern->not_bol = notbol;
    314 		pattern->regs_allocated = REGS_UNALLOCATED;
    315 		matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
    316 		if (matched)
    317 		{
    318 			*sp++ = line + search_regs.start[0];
    319 			*ep++ = line + search_regs.end[0];
    320 		}
    321 	}
    322 #endif
    323 #if HAVE_POSIX_REGCOMP
    324 	{
    325 		#define RM_COUNT (NUM_SEARCH_COLORS+2)
    326 		regmatch_t rm[RM_COUNT];
    327 		int flags = (notbol) ? REG_NOTBOL : 0;
    328 #ifdef REG_STARTEND
    329 		flags |= REG_STARTEND;
    330 		rm[0].rm_so = 0;
    331 		rm[0].rm_eo = line_len;
    332 #endif
    333 		matched = !regexec(pattern, line, RM_COUNT, rm, flags);
    334 		if (matched)
    335 		{
    336 			int i;
    337 			int ecount;
    338 			for (ecount = RM_COUNT;  ecount > 0;  ecount--)
    339 				if (rm[ecount-1].rm_so >= 0)
    340 					break;
    341 			if (ecount >= nsp)
    342 				ecount = nsp-1;
    343 			for (i = 0;  i < ecount;  i++)
    344 			{
    345 				if (rm[i].rm_so < 0)
    346 				{
    347 					*sp++ = *ep++ = line;
    348 				} else
    349 				{
    350 #ifndef __WATCOMC__
    351 					*sp++ = line + rm[i].rm_so;
    352 					*ep++ = line + rm[i].rm_eo;
    353 #else
    354 					*sp++ = rm[i].rm_sp;
    355 					*ep++ = rm[i].rm_ep;
    356 #endif
    357 				}
    358 			}
    359 		}
    360 	}
    361 #endif
    362 #if HAVE_PCRE
    363 	{
    364 		#define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3)
    365 		int ovector[OVECTOR_COUNT];
    366 		int flags = (notbol) ? PCRE_NOTBOL : 0;
    367 		int i;
    368 		int ecount;
    369 		int mcount = pcre_exec(pattern, NULL, line, line_len,
    370 			0, flags, ovector, OVECTOR_COUNT);
    371 		matched = (mcount > 0);
    372 		ecount = nsp-1;
    373 		if (ecount > mcount) ecount = mcount;
    374 		for (i = 0;  i < ecount*2; )
    375 		{
    376 			if (ovector[i] < 0 || ovector[i+1] < 0)
    377 			{
    378 				*sp++ = *ep++ = line;
    379 				i += 2;
    380 			} else
    381 			{
    382 				*sp++ = line + ovector[i++];
    383 				*ep++ = line + ovector[i++];
    384 			}
    385 		}
    386 	}
    387 #endif
    388 #if HAVE_PCRE2
    389 	{
    390 		int flags = (notbol) ? PCRE2_NOTBOL : 0;
    391 		pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL);
    392 		int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
    393 			0, flags, md, NULL);
    394 		matched = (mcount > 0);
    395 		if (matched)
    396 		{
    397 			PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
    398 			int i;
    399 			int ecount = nsp-1;
    400 			if (ecount > mcount) ecount = mcount;
    401 			for (i = 0;  i < ecount*2; )
    402 			{
    403 				if (ovector[i] < 0 || ovector[i+1] < 0)
    404 				{
    405 					*sp++ = *ep++ = line;
    406 					i += 2;
    407 				} else
    408 				{
    409 					*sp++ = line + ovector[i++];
    410 					*ep++ = line + ovector[i++];
    411 				}
    412 			}
    413 		}
    414 		pcre2_match_data_free(md);
    415 	}
    416 #endif
    417 #if HAVE_RE_COMP
    418 	matched = (re_exec(line) == 1);
    419 	/*
    420 	 * re_exec doesn't seem to provide a way to get the matched string.
    421 	 */
    422 #endif
    423 #if HAVE_REGCMP
    424 	matched = ((*ep++ = regex(pattern, line)) != NULL);
    425 	if (matched)
    426 		*sp++ = __loc1;
    427 #endif
    428 #if HAVE_V8_REGCOMP
    429 #if HAVE_REGEXEC2
    430 	matched = regexec2(pattern, line, notbol);
    431 #else
    432 	matched = regexec(pattern, line);
    433 #endif
    434 	if (matched)
    435 	{
    436 		*sp++ = pattern->startp[0];
    437 		*ep++ = pattern->endp[0];
    438 	}
    439 #endif
    440 	}
    441 	*sp = *ep = NULL;
    442 	matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
    443 			((search_type & SRCH_NO_MATCH) && !matched);
    444 	return (matched);
    445 }
    446 
    447 public int match_pattern(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type)
    448 {
    449 	int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type);
    450 	int i;
    451 	for (i = 1;  i <= NUM_SEARCH_COLORS;  i++)
    452 	{
    453 		if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i])
    454 			matched = 0;
    455 	}
    456 	return matched;
    457 }
    458 
    459 /*
    460  * Return the name of the pattern matching library.
    461  */
    462 public char * pattern_lib_name(void)
    463 {
    464 #if HAVE_GNU_REGEX
    465 	return ("GNU");
    466 #else
    467 #if HAVE_POSIX_REGCOMP
    468 	return ("POSIX");
    469 #else
    470 #if HAVE_PCRE2
    471 	return ("PCRE2");
    472 #else
    473 #if HAVE_PCRE
    474 	return ("PCRE");
    475 #else
    476 #if HAVE_RE_COMP
    477 	return ("BSD");
    478 #else
    479 #if HAVE_REGCMP
    480 	return ("V8");
    481 #else
    482 #if HAVE_V8_REGCOMP
    483 	return ("Spencer V8");
    484 #else
    485 	return ("no");
    486 #endif
    487 #endif
    488 #endif
    489 #endif
    490 #endif
    491 #endif
    492 #endif
    493 }
    494