1 1.4 simonb /* $NetBSD: pattern.c,v 1.4 2023/10/06 05:49:49 simonb Exp $ */ 2 1.1 tron 3 1.1 tron /* 4 1.4 simonb * Copyright (C) 1984-2023 Mark Nudelman 5 1.1 tron * 6 1.1 tron * You may distribute under the terms of either the GNU General Public 7 1.1 tron * License or the Less License, as specified in the README file. 8 1.1 tron * 9 1.3 tron * For more information, see the README file. 10 1.1 tron */ 11 1.1 tron 12 1.1 tron /* 13 1.1 tron * Routines to do pattern matching. 14 1.1 tron */ 15 1.1 tron 16 1.1 tron #include "less.h" 17 1.1 tron 18 1.1 tron extern int caseless; 19 1.4 simonb extern int is_caseless; 20 1.4 simonb extern int utf_mode; 21 1.1 tron 22 1.1 tron /* 23 1.1 tron * Compile a search pattern, for future use by match_pattern. 24 1.1 tron */ 25 1.4 simonb static int compile_pattern2(char *pattern, int search_type, PATTERN_TYPE *comp_pattern, int show_error) 26 1.1 tron { 27 1.3 tron if (search_type & SRCH_NO_REGEX) 28 1.3 tron return (0); 29 1.3 tron { 30 1.3 tron #if HAVE_GNU_REGEX 31 1.3 tron struct re_pattern_buffer *comp = (struct re_pattern_buffer *) 32 1.3 tron ecalloc(1, sizeof(struct re_pattern_buffer)); 33 1.3 tron re_set_syntax(RE_SYNTAX_POSIX_EXTENDED); 34 1.3 tron if (re_compile_pattern(pattern, strlen(pattern), comp)) 35 1.1 tron { 36 1.3 tron free(comp); 37 1.4 simonb if (show_error) 38 1.4 simonb error("Invalid pattern", NULL_PARG); 39 1.3 tron return (-1); 40 1.3 tron } 41 1.4 simonb if (*comp_pattern != NULL) 42 1.4 simonb { 43 1.4 simonb regfree(*comp_pattern); 44 1.4 simonb free(*comp_pattern); 45 1.4 simonb } 46 1.4 simonb *comp_pattern = comp; 47 1.3 tron #endif 48 1.1 tron #if HAVE_POSIX_REGCOMP 49 1.3 tron regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t)); 50 1.4 simonb if (regcomp(comp, pattern, REGCOMP_FLAG | (is_caseless ? REG_ICASE : 0))) 51 1.3 tron { 52 1.3 tron free(comp); 53 1.4 simonb if (show_error) 54 1.4 simonb error("Invalid pattern", NULL_PARG); 55 1.3 tron return (-1); 56 1.3 tron } 57 1.4 simonb if (*comp_pattern != NULL) 58 1.4 simonb { 59 1.4 simonb regfree(*comp_pattern); 60 1.4 simonb free(*comp_pattern); 61 1.4 simonb } 62 1.4 simonb *comp_pattern = comp; 63 1.1 tron #endif 64 1.1 tron #if HAVE_PCRE 65 1.3 tron constant char *errstring; 66 1.3 tron int erroffset; 67 1.3 tron PARG parg; 68 1.4 simonb pcre *comp = pcre_compile(pattern, 69 1.4 simonb ((utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0) | 70 1.4 simonb (is_caseless ? PCRE_CASELESS : 0), 71 1.3 tron &errstring, &erroffset, NULL); 72 1.3 tron if (comp == NULL) 73 1.3 tron { 74 1.3 tron parg.p_string = (char *) errstring; 75 1.4 simonb if (show_error) 76 1.4 simonb error("%s", &parg); 77 1.3 tron return (-1); 78 1.3 tron } 79 1.4 simonb *comp_pattern = comp; 80 1.4 simonb #endif 81 1.4 simonb #if HAVE_PCRE2 82 1.4 simonb int errcode; 83 1.4 simonb PCRE2_SIZE erroffset; 84 1.4 simonb PARG parg; 85 1.4 simonb pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern), 86 1.4 simonb (is_caseless ? PCRE2_CASELESS : 0), 87 1.4 simonb &errcode, &erroffset, NULL); 88 1.4 simonb if (comp == NULL) 89 1.4 simonb { 90 1.4 simonb if (show_error) 91 1.4 simonb { 92 1.4 simonb char msg[160]; 93 1.4 simonb pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg)); 94 1.4 simonb parg.p_string = msg; 95 1.4 simonb error("%s", &parg); 96 1.4 simonb } 97 1.4 simonb return (-1); 98 1.4 simonb } 99 1.4 simonb *comp_pattern = comp; 100 1.1 tron #endif 101 1.1 tron #if HAVE_RE_COMP 102 1.3 tron PARG parg; 103 1.3 tron if ((parg.p_string = re_comp(pattern)) != NULL) 104 1.3 tron { 105 1.4 simonb if (show_error) 106 1.4 simonb error("%s", &parg); 107 1.3 tron return (-1); 108 1.3 tron } 109 1.4 simonb *comp_pattern = 1; 110 1.1 tron #endif 111 1.1 tron #if HAVE_REGCMP 112 1.3 tron char *comp; 113 1.3 tron if ((comp = regcmp(pattern, 0)) == NULL) 114 1.3 tron { 115 1.4 simonb if (show_error) 116 1.4 simonb error("Invalid pattern", NULL_PARG); 117 1.3 tron return (-1); 118 1.3 tron } 119 1.4 simonb if (comp_pattern != NULL) 120 1.4 simonb free(*comp_pattern); 121 1.4 simonb *comp_pattern = comp; 122 1.1 tron #endif 123 1.1 tron #if HAVE_V8_REGCOMP 124 1.3 tron struct regexp *comp; 125 1.4 simonb reg_show_error = show_error; 126 1.4 simonb comp = regcomp(pattern); 127 1.4 simonb reg_show_error = 1; 128 1.4 simonb if (comp == NULL) 129 1.3 tron { 130 1.3 tron /* 131 1.3 tron * regcomp has already printed an error message 132 1.3 tron * via regerror(). 133 1.3 tron */ 134 1.3 tron return (-1); 135 1.3 tron } 136 1.4 simonb if (*comp_pattern != NULL) 137 1.4 simonb free(*comp_pattern); 138 1.4 simonb *comp_pattern = comp; 139 1.1 tron #endif 140 1.3 tron } 141 1.1 tron return (0); 142 1.1 tron } 143 1.1 tron 144 1.1 tron /* 145 1.1 tron * Like compile_pattern2, but convert the pattern to lowercase if necessary. 146 1.1 tron */ 147 1.4 simonb public int compile_pattern(char *pattern, int search_type, int show_error, PATTERN_TYPE *comp_pattern) 148 1.1 tron { 149 1.1 tron char *cvt_pattern; 150 1.1 tron int result; 151 1.1 tron 152 1.4 simonb if (caseless != OPT_ONPLUS || (re_handles_caseless && !(search_type & SRCH_NO_REGEX))) 153 1.1 tron cvt_pattern = pattern; 154 1.1 tron else 155 1.1 tron { 156 1.1 tron cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC)); 157 1.1 tron cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC); 158 1.1 tron } 159 1.4 simonb result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error); 160 1.1 tron if (cvt_pattern != pattern) 161 1.1 tron free(cvt_pattern); 162 1.1 tron return (result); 163 1.1 tron } 164 1.1 tron 165 1.1 tron /* 166 1.1 tron * Forget that we have a compiled pattern. 167 1.1 tron */ 168 1.4 simonb public void uncompile_pattern(PATTERN_TYPE *pattern) 169 1.1 tron { 170 1.3 tron #if HAVE_GNU_REGEX 171 1.4 simonb if (*pattern != NULL) 172 1.4 simonb { 173 1.4 simonb regfree(*pattern); 174 1.4 simonb free(*pattern); 175 1.4 simonb } 176 1.4 simonb *pattern = NULL; 177 1.3 tron #endif 178 1.1 tron #if HAVE_POSIX_REGCOMP 179 1.4 simonb if (*pattern != NULL) 180 1.4 simonb { 181 1.4 simonb regfree(*pattern); 182 1.4 simonb free(*pattern); 183 1.4 simonb } 184 1.4 simonb *pattern = NULL; 185 1.1 tron #endif 186 1.1 tron #if HAVE_PCRE 187 1.4 simonb if (*pattern != NULL) 188 1.4 simonb pcre_free(*pattern); 189 1.4 simonb *pattern = NULL; 190 1.4 simonb #endif 191 1.4 simonb #if HAVE_PCRE2 192 1.4 simonb if (*pattern != NULL) 193 1.4 simonb pcre2_code_free(*pattern); 194 1.4 simonb *pattern = NULL; 195 1.1 tron #endif 196 1.1 tron #if HAVE_RE_COMP 197 1.4 simonb *pattern = 0; 198 1.1 tron #endif 199 1.1 tron #if HAVE_REGCMP 200 1.4 simonb if (*pattern != NULL) 201 1.4 simonb free(*pattern); 202 1.4 simonb *pattern = NULL; 203 1.1 tron #endif 204 1.1 tron #if HAVE_V8_REGCOMP 205 1.4 simonb if (*pattern != NULL) 206 1.4 simonb free(*pattern); 207 1.4 simonb *pattern = NULL; 208 1.1 tron #endif 209 1.1 tron } 210 1.1 tron 211 1.4 simonb #if 0 212 1.4 simonb /* 213 1.4 simonb * Can a pattern be successfully compiled? 214 1.4 simonb */ 215 1.4 simonb public int valid_pattern(char *pattern) 216 1.4 simonb { 217 1.4 simonb PATTERN_TYPE comp_pattern; 218 1.4 simonb int result; 219 1.4 simonb 220 1.4 simonb SET_NULL_PATTERN(comp_pattern); 221 1.4 simonb result = compile_pattern2(pattern, 0, &comp_pattern, 0); 222 1.4 simonb if (result != 0) 223 1.4 simonb return (0); 224 1.4 simonb uncompile_pattern(&comp_pattern); 225 1.4 simonb return (1); 226 1.4 simonb } 227 1.4 simonb #endif 228 1.4 simonb 229 1.1 tron /* 230 1.1 tron * Is a compiled pattern null? 231 1.1 tron */ 232 1.4 simonb public int is_null_pattern(PATTERN_TYPE pattern) 233 1.1 tron { 234 1.3 tron #if HAVE_GNU_REGEX 235 1.3 tron return (pattern == NULL); 236 1.3 tron #endif 237 1.1 tron #if HAVE_POSIX_REGCOMP 238 1.1 tron return (pattern == NULL); 239 1.1 tron #endif 240 1.1 tron #if HAVE_PCRE 241 1.1 tron return (pattern == NULL); 242 1.1 tron #endif 243 1.4 simonb #if HAVE_PCRE2 244 1.4 simonb return (pattern == NULL); 245 1.4 simonb #endif 246 1.1 tron #if HAVE_RE_COMP 247 1.1 tron return (pattern == 0); 248 1.1 tron #endif 249 1.1 tron #if HAVE_REGCMP 250 1.1 tron return (pattern == NULL); 251 1.1 tron #endif 252 1.1 tron #if HAVE_V8_REGCOMP 253 1.1 tron return (pattern == NULL); 254 1.1 tron #endif 255 1.4 simonb #if NO_REGEX 256 1.4 simonb return (pattern == NULL); 257 1.4 simonb #endif 258 1.1 tron } 259 1.1 tron /* 260 1.1 tron * Simple pattern matching function. 261 1.1 tron * It supports no metacharacters like *, etc. 262 1.1 tron */ 263 1.4 simonb static int match(char *pattern, int pattern_len, char *buf, int buf_len, char ***sp, char ***ep, int nsubs) 264 1.1 tron { 265 1.4 simonb char *pp, *lp; 266 1.4 simonb char *pattern_end = pattern + pattern_len; 267 1.4 simonb char *buf_end = buf + buf_len; 268 1.1 tron 269 1.1 tron for ( ; buf < buf_end; buf++) 270 1.1 tron { 271 1.4 simonb for (pp = pattern, lp = buf; ; pp++, lp++) 272 1.4 simonb { 273 1.4 simonb char cp = *pp; 274 1.4 simonb char cl = *lp; 275 1.4 simonb if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp)) 276 1.4 simonb cp = ASCII_TO_LOWER(cp); 277 1.4 simonb if (cp != cl) 278 1.4 simonb break; 279 1.1 tron if (pp == pattern_end || lp == buf_end) 280 1.1 tron break; 281 1.4 simonb } 282 1.1 tron if (pp == pattern_end) 283 1.1 tron { 284 1.4 simonb *(*sp)++ = buf; 285 1.4 simonb *(*ep)++ = lp; 286 1.1 tron return (1); 287 1.1 tron } 288 1.1 tron } 289 1.4 simonb **sp = **ep = NULL; 290 1.1 tron return (0); 291 1.1 tron } 292 1.1 tron 293 1.1 tron /* 294 1.1 tron * Perform a pattern match with the previously compiled pattern. 295 1.4 simonb * Set sp[0] and ep[0] to the start and end of the matched string. 296 1.4 simonb * Set sp[i] and ep[i] to the start and end of the i-th matched subpattern. 297 1.4 simonb * Subpatterns are defined by parentheses in the regex language. 298 1.1 tron */ 299 1.4 simonb static int match_pattern1(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type) 300 1.1 tron { 301 1.1 tron int matched; 302 1.1 tron 303 1.3 tron #if NO_REGEX 304 1.3 tron search_type |= SRCH_NO_REGEX; 305 1.3 tron #endif 306 1.1 tron if (search_type & SRCH_NO_REGEX) 307 1.4 simonb matched = match(tpattern, strlen(tpattern), line, line_len, &sp, &ep, nsp); 308 1.1 tron else 309 1.1 tron { 310 1.3 tron #if HAVE_GNU_REGEX 311 1.3 tron { 312 1.3 tron struct re_registers search_regs; 313 1.4 simonb pattern->not_bol = notbol; 314 1.4 simonb pattern->regs_allocated = REGS_UNALLOCATED; 315 1.4 simonb matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0; 316 1.3 tron if (matched) 317 1.3 tron { 318 1.4 simonb *sp++ = line + search_regs.start[0]; 319 1.4 simonb *ep++ = line + search_regs.end[0]; 320 1.3 tron } 321 1.3 tron } 322 1.3 tron #endif 323 1.1 tron #if HAVE_POSIX_REGCOMP 324 1.1 tron { 325 1.4 simonb #define RM_COUNT (NUM_SEARCH_COLORS+2) 326 1.4 simonb regmatch_t rm[RM_COUNT]; 327 1.1 tron int flags = (notbol) ? REG_NOTBOL : 0; 328 1.4 simonb #ifdef REG_STARTEND 329 1.4 simonb flags |= REG_STARTEND; 330 1.4 simonb rm[0].rm_so = 0; 331 1.4 simonb rm[0].rm_eo = line_len; 332 1.4 simonb #endif 333 1.4 simonb matched = !regexec(pattern, line, RM_COUNT, rm, flags); 334 1.1 tron if (matched) 335 1.1 tron { 336 1.4 simonb int i; 337 1.4 simonb int ecount; 338 1.4 simonb for (ecount = RM_COUNT; ecount > 0; ecount--) 339 1.4 simonb if (rm[ecount-1].rm_so >= 0) 340 1.4 simonb break; 341 1.4 simonb if (ecount >= nsp) 342 1.4 simonb ecount = nsp-1; 343 1.4 simonb for (i = 0; i < ecount; i++) 344 1.4 simonb { 345 1.4 simonb if (rm[i].rm_so < 0) 346 1.4 simonb { 347 1.4 simonb *sp++ = *ep++ = line; 348 1.4 simonb } else 349 1.4 simonb { 350 1.1 tron #ifndef __WATCOMC__ 351 1.4 simonb *sp++ = line + rm[i].rm_so; 352 1.4 simonb *ep++ = line + rm[i].rm_eo; 353 1.1 tron #else 354 1.4 simonb *sp++ = rm[i].rm_sp; 355 1.4 simonb *ep++ = rm[i].rm_ep; 356 1.1 tron #endif 357 1.4 simonb } 358 1.4 simonb } 359 1.1 tron } 360 1.1 tron } 361 1.1 tron #endif 362 1.1 tron #if HAVE_PCRE 363 1.1 tron { 364 1.4 simonb #define OVECTOR_COUNT ((3*NUM_SEARCH_COLORS)+3) 365 1.4 simonb int ovector[OVECTOR_COUNT]; 366 1.1 tron int flags = (notbol) ? PCRE_NOTBOL : 0; 367 1.4 simonb int i; 368 1.4 simonb int ecount; 369 1.4 simonb int mcount = pcre_exec(pattern, NULL, line, line_len, 370 1.4 simonb 0, flags, ovector, OVECTOR_COUNT); 371 1.4 simonb matched = (mcount > 0); 372 1.4 simonb ecount = nsp-1; 373 1.4 simonb if (ecount > mcount) ecount = mcount; 374 1.4 simonb for (i = 0; i < ecount*2; ) 375 1.4 simonb { 376 1.4 simonb if (ovector[i] < 0 || ovector[i+1] < 0) 377 1.4 simonb { 378 1.4 simonb *sp++ = *ep++ = line; 379 1.4 simonb i += 2; 380 1.4 simonb } else 381 1.4 simonb { 382 1.4 simonb *sp++ = line + ovector[i++]; 383 1.4 simonb *ep++ = line + ovector[i++]; 384 1.4 simonb } 385 1.4 simonb } 386 1.4 simonb } 387 1.4 simonb #endif 388 1.4 simonb #if HAVE_PCRE2 389 1.4 simonb { 390 1.4 simonb int flags = (notbol) ? PCRE2_NOTBOL : 0; 391 1.4 simonb pcre2_match_data *md = pcre2_match_data_create(nsp-1, NULL); 392 1.4 simonb int mcount = pcre2_match(pattern, (PCRE2_SPTR)line, line_len, 393 1.4 simonb 0, flags, md, NULL); 394 1.4 simonb matched = (mcount > 0); 395 1.1 tron if (matched) 396 1.1 tron { 397 1.4 simonb PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md); 398 1.4 simonb int i; 399 1.4 simonb int ecount = nsp-1; 400 1.4 simonb if (ecount > mcount) ecount = mcount; 401 1.4 simonb for (i = 0; i < ecount*2; ) 402 1.4 simonb { 403 1.4 simonb if (ovector[i] < 0 || ovector[i+1] < 0) 404 1.4 simonb { 405 1.4 simonb *sp++ = *ep++ = line; 406 1.4 simonb i += 2; 407 1.4 simonb } else 408 1.4 simonb { 409 1.4 simonb *sp++ = line + ovector[i++]; 410 1.4 simonb *ep++ = line + ovector[i++]; 411 1.4 simonb } 412 1.4 simonb } 413 1.1 tron } 414 1.4 simonb pcre2_match_data_free(md); 415 1.1 tron } 416 1.1 tron #endif 417 1.1 tron #if HAVE_RE_COMP 418 1.1 tron matched = (re_exec(line) == 1); 419 1.1 tron /* 420 1.1 tron * re_exec doesn't seem to provide a way to get the matched string. 421 1.1 tron */ 422 1.1 tron #endif 423 1.1 tron #if HAVE_REGCMP 424 1.4 simonb matched = ((*ep++ = regex(pattern, line)) != NULL); 425 1.1 tron if (matched) 426 1.4 simonb *sp++ = __loc1; 427 1.1 tron #endif 428 1.1 tron #if HAVE_V8_REGCOMP 429 1.1 tron #if HAVE_REGEXEC2 430 1.4 simonb matched = regexec2(pattern, line, notbol); 431 1.1 tron #else 432 1.4 simonb matched = regexec(pattern, line); 433 1.1 tron #endif 434 1.1 tron if (matched) 435 1.1 tron { 436 1.4 simonb *sp++ = pattern->startp[0]; 437 1.4 simonb *ep++ = pattern->endp[0]; 438 1.1 tron } 439 1.1 tron #endif 440 1.1 tron } 441 1.4 simonb *sp = *ep = NULL; 442 1.1 tron matched = (!(search_type & SRCH_NO_MATCH) && matched) || 443 1.1 tron ((search_type & SRCH_NO_MATCH) && !matched); 444 1.1 tron return (matched); 445 1.1 tron } 446 1.1 tron 447 1.4 simonb public int match_pattern(PATTERN_TYPE pattern, char *tpattern, char *line, int line_len, char **sp, char **ep, int nsp, int notbol, int search_type) 448 1.4 simonb { 449 1.4 simonb int matched = match_pattern1(pattern, tpattern, line, line_len, sp, ep, nsp, notbol, search_type); 450 1.4 simonb int i; 451 1.4 simonb for (i = 1; i <= NUM_SEARCH_COLORS; i++) 452 1.4 simonb { 453 1.4 simonb if ((search_type & SRCH_SUBSEARCH(i)) && ep[i] == sp[i]) 454 1.4 simonb matched = 0; 455 1.4 simonb } 456 1.4 simonb return matched; 457 1.4 simonb } 458 1.4 simonb 459 1.4 simonb /* 460 1.4 simonb * Return the name of the pattern matching library. 461 1.4 simonb */ 462 1.4 simonb public char * pattern_lib_name(void) 463 1.4 simonb { 464 1.4 simonb #if HAVE_GNU_REGEX 465 1.4 simonb return ("GNU"); 466 1.4 simonb #else 467 1.4 simonb #if HAVE_POSIX_REGCOMP 468 1.4 simonb return ("POSIX"); 469 1.4 simonb #else 470 1.4 simonb #if HAVE_PCRE2 471 1.4 simonb return ("PCRE2"); 472 1.4 simonb #else 473 1.4 simonb #if HAVE_PCRE 474 1.4 simonb return ("PCRE"); 475 1.4 simonb #else 476 1.4 simonb #if HAVE_RE_COMP 477 1.4 simonb return ("BSD"); 478 1.4 simonb #else 479 1.4 simonb #if HAVE_REGCMP 480 1.4 simonb return ("V8"); 481 1.4 simonb #else 482 1.4 simonb #if HAVE_V8_REGCOMP 483 1.4 simonb return ("Spencer V8"); 484 1.4 simonb #else 485 1.4 simonb return ("no"); 486 1.4 simonb #endif 487 1.4 simonb #endif 488 1.4 simonb #endif 489 1.4 simonb #endif 490 1.4 simonb #endif 491 1.4 simonb #endif 492 1.4 simonb #endif 493 1.4 simonb } 494