Home | History | Annotate | Line # | Download | only in util
      1 /*	$NetBSD: dict_regexp.c,v 1.6 2026/05/09 18:49:22 christos Exp $	*/
      2 
      3 /*++
      4 /* NAME
      5 /*	dict_regexp 3
      6 /* SUMMARY
      7 /*	dictionary manager interface to REGEXP regular expression library
      8 /* SYNOPSIS
      9 /*	#include <dict_regexp.h>
     10 /*
     11 /*	DICT	*dict_regexp_open(name, dummy, dict_flags)
     12 /*	const char *name;
     13 /*	int	dummy;
     14 /*	int	dict_flags;
     15 /* DESCRIPTION
     16 /*	dict_regexp_open() opens the named file and compiles the contained
     17 /*	regular expressions. The result object can be used to match strings
     18 /*	against the table.
     19 /* SEE ALSO
     20 /*	dict(3) generic dictionary manager
     21 /*	regexp_table(5) regular expression table configuration
     22 /* AUTHOR(S)
     23 /*	LaMont Jones
     24 /*	lamont (at) hp.com
     25 /*
     26 /*	Based on PCRE dictionary contributed by Andrew McNamara
     27 /*	andrewm (at) connect.com.au
     28 /*	connect.com.au Pty. Ltd.
     29 /*	Level 3, 213 Miller St
     30 /*	North Sydney, NSW, Australia
     31 /*
     32 /*	Heavily rewritten by Wietse Venema
     33 /*	IBM T.J. Watson Research
     34 /*	P.O. Box 704
     35 /*	Yorktown Heights, NY 10598, USA
     36 /*
     37 /*	Wietse Venema
     38 /*	Google, Inc.
     39 /*	111 8th Avenue
     40 /*	New York, NY 10011, USA
     41 /*--*/
     42 
     43 /* System library. */
     44 
     45 #include "sys_defs.h"
     46 
     47 #ifdef HAS_POSIX_REGEXP
     48 
     49 #include <sys/stat.h>
     50 #include <stdlib.h>
     51 #include <unistd.h>
     52 #include <string.h>
     53 #include <ctype.h>
     54 #include <regex.h>
     55 #ifdef STRCASECMP_IN_STRINGS_H
     56 #include <strings.h>
     57 #endif
     58 
     59 /* Utility library. */
     60 
     61 #include "mymalloc.h"
     62 #include "msg.h"
     63 #include "safe.h"
     64 #include "vstream.h"
     65 #include "vstring.h"
     66 #include "stringops.h"
     67 #include "readlline.h"
     68 #include "dict.h"
     69 #include "dict_regexp.h"
     70 #include "mac_parse.h"
     71 #include "warn_stat.h"
     72 #include "mvect.h"
     73 
     74  /*
     75   * Support for IF/ENDIF based on an idea by Bert Driehuis.
     76   */
     77 #define DICT_REGEXP_OP_MATCH	1	/* Match this regexp */
     78 #define DICT_REGEXP_OP_IF	2	/* Increase if/endif nesting on match */
     79 #define DICT_REGEXP_OP_ENDIF	3	/* Decrease if/endif nesting on match */
     80 
     81  /*
     82   * Regular expression before compiling.
     83   */
     84 typedef struct {
     85     char   *regexp;			/* regular expression */
     86     int     options;			/* regcomp() options */
     87     int     match;			/* positive or negative match */
     88 } DICT_REGEXP_PATTERN;
     89 
     90  /*
     91   * Compiled generic rule, and subclasses that derive from it.
     92   */
     93 typedef struct DICT_REGEXP_RULE {
     94     int     op;				/* DICT_REGEXP_OP_MATCH/IF/ENDIF */
     95     int     lineno;			/* source file line number */
     96     struct DICT_REGEXP_RULE *next;	/* next rule in dict */
     97 } DICT_REGEXP_RULE;
     98 
     99 typedef struct {
    100     DICT_REGEXP_RULE rule;		/* generic part */
    101     regex_t *first_exp;			/* compiled primary pattern */
    102     int     first_match;		/* positive or negative match */
    103     regex_t *second_exp;		/* compiled secondary pattern */
    104     int     second_match;		/* positive or negative match */
    105     char   *replacement;		/* replacement text */
    106     size_t  max_sub;			/* largest $number in replacement */
    107 } DICT_REGEXP_MATCH_RULE;
    108 
    109 typedef struct {
    110     DICT_REGEXP_RULE rule;		/* generic members */
    111     regex_t *expr;			/* the condition */
    112     int     match;			/* positive or negative match */
    113     struct DICT_REGEXP_RULE *endif_rule;/* matching endif rule */
    114 } DICT_REGEXP_IF_RULE;
    115 
    116  /*
    117   * Regexp map.
    118   */
    119 typedef struct {
    120     DICT    dict;			/* generic members */
    121     regmatch_t *pmatch;			/* matched substring info */
    122     DICT_REGEXP_RULE *head;		/* first rule */
    123     VSTRING *expansion_buf;		/* lookup result */
    124 } DICT_REGEXP;
    125 
    126  /*
    127   * Macros to make dense code more readable.
    128   */
    129 #define NULL_SUBSTITUTIONS	(0)
    130 #define NULL_MATCH_RESULT	((regmatch_t *) 0)
    131 
    132  /*
    133   * Context for $number expansion callback.
    134   */
    135 typedef struct {
    136     DICT_REGEXP *dict_regexp;		/* the dictionary handle */
    137     DICT_REGEXP_MATCH_RULE *match_rule;	/* the rule we matched */
    138     const char *lookup_string;		/* matched text */
    139 } DICT_REGEXP_EXPAND_CONTEXT;
    140 
    141  /*
    142   * Context for $number pre-scan callback.
    143   */
    144 typedef struct {
    145     const char *mapname;		/* name of regexp map */
    146     int     lineno;			/* where in file */
    147     size_t  max_sub;			/* largest $number seen */
    148     char   *literal;			/* constant result, $$ -> $ */
    149 } DICT_REGEXP_PRESCAN_CONTEXT;
    150 
    151  /*
    152   * Compatibility.
    153   */
    154 #ifndef MAC_PARSE_OK
    155 #define MAC_PARSE_OK 0
    156 #endif
    157 
    158 /* dict_regexp_expand - replace $number with substring from matched text */
    159 
    160 static int dict_regexp_expand(int type, VSTRING *buf, void *ptr)
    161 {
    162     DICT_REGEXP_EXPAND_CONTEXT *ctxt = (DICT_REGEXP_EXPAND_CONTEXT *) ptr;
    163     DICT_REGEXP_MATCH_RULE *match_rule = ctxt->match_rule;
    164     DICT_REGEXP *dict_regexp = ctxt->dict_regexp;
    165     regmatch_t *pmatch;
    166     size_t  n;
    167 
    168     /*
    169      * Replace $number by the corresponding substring from the matched text.
    170      * We pre-scanned the replacement text at compile time, so any out of
    171      * range $number means that something impossible has happened.
    172      */
    173     if (type == MAC_PARSE_VARNAME) {
    174 	n = atoi(vstring_str(buf));
    175 	if (n < 1 || n > match_rule->max_sub)
    176 	    msg_panic("regexp map %s, line %d: out of range replacement index \"%s\"",
    177 		      dict_regexp->dict.name, match_rule->rule.lineno,
    178 		      vstring_str(buf));
    179 	pmatch = dict_regexp->pmatch + n;
    180 	if (pmatch->rm_so < 0 || pmatch->rm_so == pmatch->rm_eo)
    181 	    return (MAC_PARSE_UNDEF);		/* empty or not matched */
    182 	vstring_strncat(dict_regexp->expansion_buf,
    183 			ctxt->lookup_string + pmatch->rm_so,
    184 			pmatch->rm_eo - pmatch->rm_so);
    185 	return (MAC_PARSE_OK);
    186     }
    187 
    188     /*
    189      * Straight text - duplicate with no substitution.
    190      */
    191     else {
    192 	vstring_strcat(dict_regexp->expansion_buf, vstring_str(buf));
    193 	return (MAC_PARSE_OK);
    194     }
    195 }
    196 
    197 /* dict_regexp_regerror - report regexp compile/execute error */
    198 
    199 static void dict_regexp_regerror(const char *mapname, int lineno, int error,
    200 				         const regex_t *expr)
    201 {
    202     char    errbuf[256];
    203 
    204     (void) regerror(error, expr, errbuf, sizeof(errbuf));
    205     msg_warn("regexp map %s, line %d: %s", mapname, lineno, errbuf);
    206 }
    207 
    208  /*
    209   * Inlined to reduce function call overhead in the time-critical loop.
    210   */
    211 #define DICT_REGEXP_REGEXEC(err, map, line, expr, match, str, nsub, pmatch) \
    212     ((err) = regexec((expr), (str), (nsub), (pmatch), 0), \
    213      ((err) == REG_NOMATCH ? !(match) : \
    214       (err) == 0 ? (match) : \
    215       (dict_regexp_regerror((map), (line), (err), (expr)), 0)))
    216 
    217 /* dict_regexp_lookup - match string and perform optional substitution */
    218 
    219 static const char *dict_regexp_lookup(DICT *dict, const char *lookup_string)
    220 {
    221     DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
    222     DICT_REGEXP_RULE *rule;
    223     DICT_REGEXP_IF_RULE *if_rule;
    224     DICT_REGEXP_MATCH_RULE *match_rule;
    225     DICT_REGEXP_EXPAND_CONTEXT expand_context;
    226     int     error;
    227 
    228     dict->error = 0;
    229 
    230     if (msg_verbose)
    231 	msg_info("dict_regexp_lookup: %s: %s", dict->name, lookup_string);
    232 
    233     /*
    234      * Optionally fold the key.
    235      */
    236     if (dict->flags & DICT_FLAG_FOLD_MUL) {
    237 	if (dict->fold_buf == 0)
    238 	    dict->fold_buf = vstring_alloc(10);
    239 	vstring_strcpy(dict->fold_buf, lookup_string);
    240 	lookup_string = lowercase(vstring_str(dict->fold_buf));
    241     }
    242     for (rule = dict_regexp->head; rule; rule = rule->next) {
    243 
    244 	switch (rule->op) {
    245 
    246 	    /*
    247 	     * Search for the first matching primary expression. Limit the
    248 	     * overhead for substring substitution to the bare minimum.
    249 	     */
    250 	case DICT_REGEXP_OP_MATCH:
    251 	    match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
    252 	    if (!DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
    253 				     match_rule->first_exp,
    254 				     match_rule->first_match,
    255 				     lookup_string,
    256 				     match_rule->max_sub > 0 ?
    257 				     match_rule->max_sub + 1 : 0,
    258 				     dict_regexp->pmatch))
    259 		continue;
    260 	    if (match_rule->second_exp
    261 		&& !DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
    262 					match_rule->second_exp,
    263 					match_rule->second_match,
    264 					lookup_string,
    265 					NULL_SUBSTITUTIONS,
    266 					NULL_MATCH_RESULT))
    267 		continue;
    268 
    269 	    /*
    270 	     * Skip $number substitutions when the replacement text contains
    271 	     * no $number strings, as learned during the compile time
    272 	     * pre-scan. The pre-scan already replaced $$ by $.
    273 	     */
    274 	    if (match_rule->max_sub == 0)
    275 		return (match_rule->replacement);
    276 
    277 	    /*
    278 	     * Perform $number substitutions on the replacement text. We
    279 	     * pre-scanned the replacement text at compile time. Any macro
    280 	     * expansion errors at this point mean something impossible has
    281 	     * happened.
    282 	     */
    283 	    if (!dict_regexp->expansion_buf)
    284 		dict_regexp->expansion_buf = vstring_alloc(10);
    285 	    VSTRING_RESET(dict_regexp->expansion_buf);
    286 	    expand_context.lookup_string = lookup_string;
    287 	    expand_context.match_rule = match_rule;
    288 	    expand_context.dict_regexp = dict_regexp;
    289 
    290 	    if (mac_parse(match_rule->replacement, dict_regexp_expand,
    291 			  (void *) &expand_context) & MAC_PARSE_ERROR)
    292 		msg_panic("regexp map %s, line %d: bad replacement syntax",
    293 			  dict->name, rule->lineno);
    294 	    VSTRING_TERMINATE(dict_regexp->expansion_buf);
    295 	    return (vstring_str(dict_regexp->expansion_buf));
    296 
    297 	    /*
    298 	     * Conditional.
    299 	     */
    300 	case DICT_REGEXP_OP_IF:
    301 	    if_rule = (DICT_REGEXP_IF_RULE *) rule;
    302 	    if (DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
    303 			       if_rule->expr, if_rule->match, lookup_string,
    304 				    NULL_SUBSTITUTIONS, NULL_MATCH_RESULT))
    305 		continue;
    306 	    /* An IF without matching ENDIF has no "endif" rule. */
    307 	    if ((rule = if_rule->endif_rule) == 0)
    308 		return (0);
    309 	    /* FALLTHROUGH */
    310 
    311 	    /*
    312 	     * ENDIF after IF.
    313 	     */
    314 	case DICT_REGEXP_OP_ENDIF:
    315 	    continue;
    316 
    317 	default:
    318 	    msg_panic("dict_regexp_lookup: impossible operation %d", rule->op);
    319 	}
    320     }
    321     return (0);
    322 }
    323 
    324 /* dict_regexp_close - close regexp dictionary */
    325 
    326 static void dict_regexp_close(DICT *dict)
    327 {
    328     DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
    329     DICT_REGEXP_RULE *rule;
    330     DICT_REGEXP_RULE *next;
    331     DICT_REGEXP_MATCH_RULE *match_rule;
    332     DICT_REGEXP_IF_RULE *if_rule;
    333 
    334     for (rule = dict_regexp->head; rule; rule = next) {
    335 	next = rule->next;
    336 	switch (rule->op) {
    337 	case DICT_REGEXP_OP_MATCH:
    338 	    match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
    339 	    if (match_rule->first_exp) {
    340 		regfree(match_rule->first_exp);
    341 		myfree((void *) match_rule->first_exp);
    342 	    }
    343 	    if (match_rule->second_exp) {
    344 		regfree(match_rule->second_exp);
    345 		myfree((void *) match_rule->second_exp);
    346 	    }
    347 	    if (match_rule->replacement)
    348 		myfree((void *) match_rule->replacement);
    349 	    break;
    350 	case DICT_REGEXP_OP_IF:
    351 	    if_rule = (DICT_REGEXP_IF_RULE *) rule;
    352 	    if (if_rule->expr) {
    353 		regfree(if_rule->expr);
    354 		myfree((void *) if_rule->expr);
    355 	    }
    356 	    break;
    357 	case DICT_REGEXP_OP_ENDIF:
    358 	    break;
    359 	default:
    360 	    msg_panic("dict_regexp_close: unknown operation %d", rule->op);
    361 	}
    362 	myfree((void *) rule);
    363     }
    364     if (dict_regexp->pmatch)
    365 	myfree((void *) dict_regexp->pmatch);
    366     if (dict_regexp->expansion_buf)
    367 	vstring_free(dict_regexp->expansion_buf);
    368     if (dict->fold_buf)
    369 	vstring_free(dict->fold_buf);
    370     dict_free(dict);
    371 }
    372 
    373 /* dict_regexp_get_pat - extract one pattern with options from rule */
    374 
    375 static int dict_regexp_get_pat(const char *mapname, int lineno, char **bufp,
    376 			               DICT_REGEXP_PATTERN *pat)
    377 {
    378     char   *p = *bufp;
    379     char    re_delim;
    380 
    381     /*
    382      * Process negation operators.
    383      */
    384     pat->match = 1;
    385     for (;;) {
    386 	if (*p == '!')
    387 	    pat->match = !pat->match;
    388 	else if (!ISSPACE(*p))
    389 	    break;
    390 	p++;
    391     }
    392     if (*p == 0) {
    393 	msg_warn("regexp map %s, line %d: no regexp: skipping this rule",
    394 		 mapname, lineno);
    395 	return (0);
    396     }
    397 
    398     /*
    399      * Search for the closing delimiter, handling backslash escape.
    400      */
    401     re_delim = *p++;
    402     pat->regexp = p;
    403     while (*p) {
    404 	if (*p == '\\') {
    405 	    if (p[1])
    406 		p++;
    407 	    else
    408 		break;
    409 	} else if (*p == re_delim) {
    410 	    break;
    411 	}
    412 	++p;
    413     }
    414     if (!*p) {
    415 	msg_warn("regexp map %s, line %d: no closing regexp delimiter \"%c\": "
    416 		 "skipping this rule", mapname, lineno, re_delim);
    417 	return (0);
    418     }
    419     *p++ = 0;					/* null terminate */
    420 
    421     /*
    422      * Search for options.
    423      */
    424     pat->options = REG_EXTENDED | REG_ICASE;
    425     while (*p && !ISSPACE(*p) && *p != '!') {
    426 	switch (*p) {
    427 	case 'i':
    428 	    pat->options ^= REG_ICASE;
    429 	    break;
    430 	case 'm':
    431 	    pat->options ^= REG_NEWLINE;
    432 	    break;
    433 	case 'x':
    434 	    pat->options ^= REG_EXTENDED;
    435 	    break;
    436 	default:
    437 	    msg_warn("regexp map %s, line %d: unknown regexp option \"%c\": "
    438 		     "skipping this rule", mapname, lineno, *p);
    439 	    return (0);
    440 	}
    441 	++p;
    442     }
    443     *bufp = p;
    444     return (1);
    445 }
    446 
    447 /* dict_regexp_get_pats - get the primary and second patterns and flags */
    448 
    449 static int dict_regexp_get_pats(const char *mapname, int lineno, char **p,
    450 				        DICT_REGEXP_PATTERN *first_pat,
    451 				        DICT_REGEXP_PATTERN *second_pat)
    452 {
    453 
    454     /*
    455      * Get the primary and optional secondary patterns and their flags.
    456      */
    457     if (dict_regexp_get_pat(mapname, lineno, p, first_pat) == 0)
    458 	return (0);
    459     if (**p == '!') {
    460 #if 0
    461 	static int bitrot_warned = 0;
    462 
    463 	if (bitrot_warned == 0) {
    464 	    msg_warn("regexp file %s, line %d: /pattern1/!/pattern2/ goes away,"
    465 		 " use \"if !/pattern2/ ... /pattern1/ ... endif\" instead",
    466 		     mapname, lineno);
    467 	    bitrot_warned = 1;
    468 	}
    469 #endif
    470 	if (dict_regexp_get_pat(mapname, lineno, p, second_pat) == 0)
    471 	    return (0);
    472     } else {
    473 	second_pat->regexp = 0;
    474     }
    475     return (1);
    476 }
    477 
    478 /* dict_regexp_prescan - find largest $number in replacement text */
    479 
    480 static int dict_regexp_prescan(int type, VSTRING *buf, void *context)
    481 {
    482     DICT_REGEXP_PRESCAN_CONTEXT *ctxt = (DICT_REGEXP_PRESCAN_CONTEXT *) context;
    483     size_t  n;
    484 
    485     /*
    486      * Keep a copy of literal text (with $$ already replaced by $) if and
    487      * only if the replacement text contains no $number expression. This way
    488      * we can avoid having to scan the replacement text at lookup time.
    489      */
    490     if (type == MAC_PARSE_VARNAME) {
    491 	if (ctxt->literal) {
    492 	    myfree(ctxt->literal);
    493 	    ctxt->literal = 0;
    494 	}
    495 	if (!alldig(vstring_str(buf))) {
    496 	    msg_warn("regexp map %s, line %d: non-numeric replacement index \"%s\"",
    497 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
    498 	    return (MAC_PARSE_ERROR);
    499 	}
    500 	n = atoi(vstring_str(buf));
    501 	if (n < 1) {
    502 	    msg_warn("regexp map %s, line %d: out-of-range replacement index \"%s\"",
    503 		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
    504 	    return (MAC_PARSE_ERROR);
    505 	}
    506 	if (n > ctxt->max_sub)
    507 	    ctxt->max_sub = n;
    508     } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
    509 	if (ctxt->literal)
    510 	    msg_panic("regexp map %s, line %d: multiple literals but no $number",
    511 		      ctxt->mapname, ctxt->lineno);
    512 	ctxt->literal = mystrdup(vstring_str(buf));
    513     }
    514     return (MAC_PARSE_OK);
    515 }
    516 
    517 /* dict_regexp_compile_pat - compile one pattern */
    518 
    519 static regex_t *dict_regexp_compile_pat(const char *mapname, int lineno,
    520 					        DICT_REGEXP_PATTERN *pat)
    521 {
    522     int     error;
    523     regex_t *expr;
    524 
    525     expr = (regex_t *) mymalloc(sizeof(*expr));
    526     error = regcomp(expr, pat->regexp, pat->options);
    527     if (error != 0) {
    528 	dict_regexp_regerror(mapname, lineno, error, expr);
    529 	myfree((void *) expr);
    530 	return (0);
    531     }
    532     return (expr);
    533 }
    534 
    535 /* dict_regexp_rule_alloc - fill in a generic rule structure */
    536 
    537 static DICT_REGEXP_RULE *dict_regexp_rule_alloc(int op, int lineno, size_t size)
    538 {
    539     DICT_REGEXP_RULE *rule;
    540 
    541     rule = (DICT_REGEXP_RULE *) mymalloc(size);
    542     rule->op = op;
    543     rule->lineno = lineno;
    544     rule->next = 0;
    545 
    546     return (rule);
    547 }
    548 
    549 /* dict_regexp_parseline - parse one rule */
    550 
    551 static DICT_REGEXP_RULE *dict_regexp_parseline(DICT *dict, const char *mapname,
    552 					             int lineno, char *line,
    553 					               int nesting)
    554 {
    555     char   *p;
    556 
    557     p = line;
    558 
    559     /*
    560      * An ordinary rule takes one or two patterns and replacement text.
    561      */
    562     if (!ISALNUM(*p)) {
    563 	DICT_REGEXP_PATTERN first_pat;
    564 	DICT_REGEXP_PATTERN second_pat;
    565 	DICT_REGEXP_PRESCAN_CONTEXT prescan_context;
    566 	regex_t *first_exp = 0;
    567 	regex_t *second_exp;
    568 	DICT_REGEXP_MATCH_RULE *match_rule;
    569 
    570 	/*
    571 	 * Get the primary and the optional secondary patterns.
    572 	 */
    573 	if (!dict_regexp_get_pats(mapname, lineno, &p, &first_pat, &second_pat))
    574 	    return (0);
    575 
    576 	/*
    577 	 * Get the replacement text.
    578 	 */
    579 	while (*p && ISSPACE(*p))
    580 	    ++p;
    581 	if (!*p) {
    582 	    msg_warn("regexp map %s, line %d: no replacement text: "
    583 		     "using empty string", mapname, lineno);
    584 	}
    585 
    586 	/*
    587 	 * Find the highest-numbered $number in the replacement text. We can
    588 	 * speed up pattern matching 1) by passing hints to the regexp
    589 	 * compiler, setting the REG_NOSUB flag when the replacement text
    590 	 * contains no $number string; 2) by passing hints to the regexp
    591 	 * execution code, limiting the amount of text that is made available
    592 	 * for substitution.
    593 	 */
    594 	prescan_context.mapname = mapname;
    595 	prescan_context.lineno = lineno;
    596 	prescan_context.max_sub = 0;
    597 	prescan_context.literal = 0;
    598 
    599 	/*
    600 	 * The optimizer will eliminate code duplication and/or dead code.
    601 	 */
    602 #define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
    603 	if (first_exp) { \
    604 	    regfree(first_exp); \
    605 	    myfree((void *) first_exp); \
    606 	} \
    607 	if (prescan_context.literal) \
    608 	    myfree(prescan_context.literal); \
    609 	return (rval); \
    610     } while (0)
    611 
    612 	if (dict->flags & DICT_FLAG_SRC_RHS_IS_FILE) {
    613 	    VSTRING *base64_buf;
    614 	    char   *err;
    615 
    616 	    if ((base64_buf = dict_file_to_b64(dict, p)) == 0) {
    617 		err = dict_file_get_error(dict);
    618 		msg_warn("regexp map %s, line %d: %s: skipping this rule",
    619 			 mapname, lineno, err);
    620 		myfree(err);
    621 		CREATE_MATCHOP_ERROR_RETURN(0);
    622 	    }
    623 	    p = vstring_str(base64_buf);
    624 	}
    625 	if (mac_parse(p, dict_regexp_prescan, (void *) &prescan_context)
    626 	    & MAC_PARSE_ERROR) {
    627 	    msg_warn("regexp map %s, line %d: bad replacement syntax: "
    628 		     "skipping this rule", mapname, lineno);
    629 	    CREATE_MATCHOP_ERROR_RETURN(0);
    630 	}
    631 
    632 	/*
    633 	 * Compile the primary and the optional secondary pattern. Speed up
    634 	 * execution when no matched text needs to be substituted into the
    635 	 * result string, or when the highest numbered substring is less than
    636 	 * the total number of () subpatterns.
    637 	 */
    638 	if (prescan_context.max_sub == 0)
    639 	    first_pat.options |= REG_NOSUB;
    640 	if (prescan_context.max_sub > 0 && first_pat.match == 0) {
    641 	    msg_warn("regexp map %s, line %d: $number found in negative match "
    642 		   "replacement text: skipping this rule", mapname, lineno);
    643 	    CREATE_MATCHOP_ERROR_RETURN(0);
    644 	}
    645 	if (prescan_context.max_sub > 0 && (dict->flags & DICT_FLAG_NO_REGSUB)) {
    646 	    msg_warn("regexp map %s, line %d: "
    647 		     "regular expression substitution is not allowed: "
    648 		     "skipping this rule", mapname, lineno);
    649 	    CREATE_MATCHOP_ERROR_RETURN(0);
    650 	}
    651 	if ((first_exp = dict_regexp_compile_pat(mapname, lineno,
    652 						 &first_pat)) == 0)
    653 	    CREATE_MATCHOP_ERROR_RETURN(0);
    654 	if (prescan_context.max_sub > first_exp->re_nsub) {
    655 	    msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": "
    656 		     "skipping this rule", mapname, lineno,
    657 		     (int) prescan_context.max_sub);
    658 	    CREATE_MATCHOP_ERROR_RETURN(0);
    659 	}
    660 	if (second_pat.regexp != 0) {
    661 	    second_pat.options |= REG_NOSUB;
    662 	    if ((second_exp = dict_regexp_compile_pat(mapname, lineno,
    663 						      &second_pat)) == 0)
    664 		CREATE_MATCHOP_ERROR_RETURN(0);
    665 	} else {
    666 	    second_exp = 0;
    667 	}
    668 	match_rule = (DICT_REGEXP_MATCH_RULE *)
    669 	    dict_regexp_rule_alloc(DICT_REGEXP_OP_MATCH, lineno,
    670 				   sizeof(DICT_REGEXP_MATCH_RULE));
    671 	match_rule->first_exp = first_exp;
    672 	match_rule->first_match = first_pat.match;
    673 	match_rule->max_sub = prescan_context.max_sub;
    674 	match_rule->second_exp = second_exp;
    675 	match_rule->second_match = second_pat.match;
    676 	if (prescan_context.literal)
    677 	    match_rule->replacement = prescan_context.literal;
    678 	else
    679 	    match_rule->replacement = mystrdup(p);
    680 	return ((DICT_REGEXP_RULE *) match_rule);
    681     }
    682 
    683     /*
    684      * The IF operator takes one pattern but no replacement text.
    685      */
    686     else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
    687 	DICT_REGEXP_PATTERN pattern;
    688 	regex_t *expr;
    689 	DICT_REGEXP_IF_RULE *if_rule;
    690 
    691 	p += 2;
    692 	while (*p && ISSPACE(*p))
    693 	    p++;
    694 	if (!dict_regexp_get_pat(mapname, lineno, &p, &pattern))
    695 	    return (0);
    696 	while (*p && ISSPACE(*p))
    697 	    ++p;
    698 	if (*p) {
    699 	    msg_warn("regexp map %s, line %d: ignoring extra text after"
    700 		     " IF statement: \"%s\"", mapname, lineno, p);
    701 	    msg_warn("regexp map %s, line %d: do not prepend whitespace"
    702 		     " to statements between IF and ENDIF", mapname, lineno);
    703 	}
    704 	if ((expr = dict_regexp_compile_pat(mapname, lineno, &pattern)) == 0)
    705 	    return (0);
    706 	if_rule = (DICT_REGEXP_IF_RULE *)
    707 	    dict_regexp_rule_alloc(DICT_REGEXP_OP_IF, lineno,
    708 				   sizeof(DICT_REGEXP_IF_RULE));
    709 	if_rule->expr = expr;
    710 	if_rule->match = pattern.match;
    711 	if_rule->endif_rule = 0;
    712 	return ((DICT_REGEXP_RULE *) if_rule);
    713     }
    714 
    715     /*
    716      * The ENDIF operator takes no patterns and no replacement text.
    717      */
    718     else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
    719 	DICT_REGEXP_RULE *rule;
    720 
    721 	p += 5;
    722 	if (nesting == 0) {
    723 	    msg_warn("regexp map %s, line %d: ignoring ENDIF without matching IF",
    724 		     mapname, lineno);
    725 	    return (0);
    726 	}
    727 	while (*p && ISSPACE(*p))
    728 	    ++p;
    729 	if (*p)
    730 	    msg_warn("regexp map %s, line %d: ignoring extra text after ENDIF",
    731 		     mapname, lineno);
    732 	rule = dict_regexp_rule_alloc(DICT_REGEXP_OP_ENDIF, lineno,
    733 				      sizeof(DICT_REGEXP_RULE));
    734 	return (rule);
    735     }
    736 
    737     /*
    738      * Unrecognized input.
    739      */
    740     else {
    741 	msg_warn("regexp map %s, line %d: ignoring unrecognized request",
    742 		 mapname, lineno);
    743 	return (0);
    744     }
    745 }
    746 
    747 /* dict_regexp_open - load and compile a file containing regular expressions */
    748 
    749 DICT   *dict_regexp_open(const char *mapname, int open_flags, int dict_flags)
    750 {
    751     const char myname[] = "dict_regexp_open";
    752     DICT_REGEXP *dict_regexp;
    753     VSTREAM *map_fp = 0;
    754     struct stat st;
    755     VSTRING *why = 0;
    756     VSTRING *line_buffer = 0;
    757     DICT_REGEXP_RULE *rule;
    758     DICT_REGEXP_RULE *last_rule = 0;
    759     int     lineno;
    760     int     last_line = 0;
    761     size_t  max_sub = 0;
    762     int     nesting = 0;
    763     char   *p;
    764     DICT_REGEXP_RULE **rule_stack = 0;
    765     MVECT   mvect;
    766 
    767     /*
    768      * Let the optimizer worry about eliminating redundant code.
    769      */
    770 #define DICT_REGEXP_OPEN_RETURN(d) do { \
    771 	DICT *__d = (d); \
    772 	if (line_buffer != 0) \
    773 	    vstring_free(line_buffer); \
    774 	if (map_fp != 0) \
    775 	    vstream_fclose(map_fp); \
    776 	if (why != 0) \
    777 	   vstring_free(why); \
    778 	return (__d); \
    779     } while (0)
    780 
    781     /*
    782      * Sanity checks.
    783      */
    784     if (open_flags != O_RDONLY)
    785 	DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP,
    786 					    mapname, open_flags, dict_flags,
    787 				  "%s:%s map requires O_RDONLY access mode",
    788 					       DICT_TYPE_REGEXP, mapname));
    789 
    790     /*
    791      * Open the configuration file.
    792      */
    793     if ((map_fp = dict_stream_open(DICT_TYPE_REGEXP, mapname, O_RDONLY,
    794 				   dict_flags, &st, &why)) == 0)
    795 	DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP, mapname,
    796 					       open_flags, dict_flags,
    797 					       "%s", vstring_str(why)));
    798     line_buffer = vstring_alloc(100);
    799 
    800     dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, mapname,
    801 					     sizeof(*dict_regexp));
    802     dict_regexp->dict.lookup = dict_regexp_lookup;
    803     dict_regexp->dict.close = dict_regexp_close;
    804     dict_regexp->dict.flags = dict_flags | DICT_FLAG_PATTERN;
    805     if (dict_flags & DICT_FLAG_FOLD_MUL)
    806 	dict_regexp->dict.fold_buf = vstring_alloc(10);
    807     dict_regexp->head = 0;
    808     dict_regexp->pmatch = 0;
    809     dict_regexp->expansion_buf = 0;
    810     dict_regexp->dict.owner.uid = st.st_uid;
    811     dict_regexp->dict.owner.status = (st.st_uid != 0);
    812 
    813     /*
    814      * Parse the regexp table.
    815      */
    816     while (readllines(line_buffer, map_fp, &last_line, &lineno)) {
    817 	p = vstring_str(line_buffer);
    818 	trimblanks(p, 0)[0] = 0;
    819 	if (*p == 0)
    820 	    continue;
    821 	rule = dict_regexp_parseline(&dict_regexp->dict, mapname, lineno,
    822 				     p, nesting);
    823 	if (rule == 0)
    824 	    continue;
    825 	if (rule->op == DICT_REGEXP_OP_MATCH) {
    826 	    if (((DICT_REGEXP_MATCH_RULE *) rule)->max_sub > max_sub)
    827 		max_sub = ((DICT_REGEXP_MATCH_RULE *) rule)->max_sub;
    828 	} else if (rule->op == DICT_REGEXP_OP_IF) {
    829 	    if (rule_stack == 0)
    830 		rule_stack = (DICT_REGEXP_RULE **) mvect_alloc(&mvect,
    831 					   sizeof(*rule_stack), nesting + 1,
    832 						(MVECT_FN) 0, (MVECT_FN) 0);
    833 	    else
    834 		rule_stack =
    835 		    (DICT_REGEXP_RULE **) mvect_realloc(&mvect, nesting + 1);
    836 	    rule_stack[nesting] = rule;
    837 	    nesting++;
    838 	} else if (rule->op == DICT_REGEXP_OP_ENDIF) {
    839 	    DICT_REGEXP_IF_RULE *if_rule;
    840 
    841 	    if (nesting-- <= 0)
    842 		/* Already handled in dict_regexp_parseline(). */
    843 		msg_panic("%s: ENDIF without IF", myname);
    844 	    if (rule_stack[nesting]->op != DICT_REGEXP_OP_IF)
    845 		msg_panic("%s: unexpected rule stack element type %d",
    846 			  myname, rule_stack[nesting]->op);
    847 	    if_rule = (DICT_REGEXP_IF_RULE *) rule_stack[nesting];
    848 	    if_rule->endif_rule = rule;
    849 	}
    850 	if (last_rule == 0)
    851 	    dict_regexp->head = rule;
    852 	else
    853 	    last_rule->next = rule;
    854 	last_rule = rule;
    855     }
    856 
    857     while (nesting-- > 0)
    858 	msg_warn("regexp map %s, line %d: IF has no matching ENDIF",
    859 		 mapname, rule_stack[nesting]->lineno);
    860 
    861     if (rule_stack)
    862 	(void) mvect_free(&mvect);
    863 
    864     /*
    865      * Allocate space for only as many matched substrings as used in the
    866      * replacement text.
    867      */
    868     if (max_sub > 0)
    869 	dict_regexp->pmatch =
    870 	    (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_sub + 1));
    871 
    872     dict_file_purge_buffers(&dict_regexp->dict);
    873     DICT_REGEXP_OPEN_RETURN(&dict_regexp->dict);
    874 }
    875 
    876 #endif
    877