Home | History | Annotate | Line # | Download | only in isc
regex.c revision 1.1.1.1
      1  1.1  christos /*	$NetBSD: regex.c,v 1.1.1.1 2018/08/12 12:08:23 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /*
      4  1.1  christos  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  1.1  christos  *
      6  1.1  christos  * This Source Code Form is subject to the terms of the Mozilla Public
      7  1.1  christos  * License, v. 2.0. If a copy of the MPL was not distributed with this
      8  1.1  christos  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
      9  1.1  christos  *
     10  1.1  christos  * See the COPYRIGHT file distributed with this work for additional
     11  1.1  christos  * information regarding copyright ownership.
     12  1.1  christos  */
     13  1.1  christos 
     14  1.1  christos #include <config.h>
     15  1.1  christos 
     16  1.1  christos #include <isc/file.h>
     17  1.1  christos #include <isc/print.h>
     18  1.1  christos #include <isc/regex.h>
     19  1.1  christos #include <isc/string.h>
     20  1.1  christos 
     21  1.1  christos #if VALREGEX_REPORT_REASON
     22  1.1  christos #define FAIL(x) do { reason = (x); goto error; } while(0)
     23  1.1  christos #else
     24  1.1  christos #define FAIL(x) goto error
     25  1.1  christos #endif
     26  1.1  christos 
     27  1.1  christos /*
     28  1.1  christos  * Validate the regular expression 'C' locale.
     29  1.1  christos  */
     30  1.1  christos int
     31  1.1  christos isc_regex_validate(const char *c) {
     32  1.1  christos 	enum {
     33  1.1  christos 		none, parse_bracket, parse_bound,
     34  1.1  christos 		parse_ce, parse_ec, parse_cc
     35  1.1  christos 	} state = none;
     36  1.1  christos 	/* Well known character classes. */
     37  1.1  christos 	const char *cc[] = {
     38  1.1  christos 		":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:",
     39  1.1  christos 		":space:", ":blank:", ":lower:", ":upper:", ":cntrl:",
     40  1.1  christos 		":print:", ":xdigit:"
     41  1.1  christos 	};
     42  1.1  christos 	isc_boolean_t seen_comma = ISC_FALSE;
     43  1.1  christos 	isc_boolean_t seen_high = ISC_FALSE;
     44  1.1  christos 	isc_boolean_t seen_char = ISC_FALSE;
     45  1.1  christos 	isc_boolean_t seen_ec = ISC_FALSE;
     46  1.1  christos 	isc_boolean_t seen_ce = ISC_FALSE;
     47  1.1  christos 	isc_boolean_t have_atom = ISC_FALSE;
     48  1.1  christos 	int group = 0;
     49  1.1  christos 	int range = 0;
     50  1.1  christos 	int sub = 0;
     51  1.1  christos 	isc_boolean_t empty_ok = ISC_FALSE;
     52  1.1  christos 	isc_boolean_t neg = ISC_FALSE;
     53  1.1  christos 	isc_boolean_t was_multiple = ISC_FALSE;
     54  1.1  christos 	unsigned int low = 0;
     55  1.1  christos 	unsigned int high = 0;
     56  1.1  christos 	const char *ccname = NULL;
     57  1.1  christos 	int range_start = 0;
     58  1.1  christos #if VALREGEX_REPORT_REASON
     59  1.1  christos 	const char *reason = "";
     60  1.1  christos #endif
     61  1.1  christos 
     62  1.1  christos 	if (c == NULL || *c == 0)
     63  1.1  christos 		FAIL("empty string");
     64  1.1  christos 
     65  1.1  christos 	while (c != NULL && *c != 0) {
     66  1.1  christos 		switch (state) {
     67  1.1  christos 		case none:
     68  1.1  christos 			switch (*c) {
     69  1.1  christos 			case '\\':	/* make literal */
     70  1.1  christos 				++c;
     71  1.1  christos 				switch (*c) {
     72  1.1  christos 				case '1': case '2': case '3':
     73  1.1  christos 				case '4': case '5': case '6':
     74  1.1  christos 				case '7': case '8': case '9':
     75  1.1  christos 					if ((*c - '0') > sub)
     76  1.1  christos 						FAIL("bad back reference");
     77  1.1  christos 					have_atom = ISC_TRUE;
     78  1.1  christos 					was_multiple = ISC_FALSE;
     79  1.1  christos 					break;
     80  1.1  christos 				case 0:
     81  1.1  christos 					FAIL("escaped end-of-string");
     82  1.1  christos 				default:
     83  1.1  christos 					goto literal;
     84  1.1  christos 				}
     85  1.1  christos 				++c;
     86  1.1  christos 				break;
     87  1.1  christos 			case '[':	/* bracket start */
     88  1.1  christos 				++c;
     89  1.1  christos 				neg = ISC_FALSE;
     90  1.1  christos 				was_multiple = ISC_FALSE;
     91  1.1  christos 				seen_char = ISC_FALSE;
     92  1.1  christos 				state = parse_bracket;
     93  1.1  christos 				break;
     94  1.1  christos 			case '{': 	/* bound start */
     95  1.1  christos 				switch (c[1]) {
     96  1.1  christos 				case '0': case '1': case '2': case '3':
     97  1.1  christos 				case '4': case '5': case '6': case '7':
     98  1.1  christos 				case '8': case '9':
     99  1.1  christos 					if (!have_atom)
    100  1.1  christos 						FAIL("no atom");
    101  1.1  christos 					if (was_multiple)
    102  1.1  christos 						FAIL("was multiple");
    103  1.1  christos 					seen_comma = ISC_FALSE;
    104  1.1  christos 					seen_high = ISC_FALSE;
    105  1.1  christos 					low = high = 0;
    106  1.1  christos 					state = parse_bound;
    107  1.1  christos 					break;
    108  1.1  christos 				default:
    109  1.1  christos 					goto literal;
    110  1.1  christos 				}
    111  1.1  christos 				++c;
    112  1.1  christos 				have_atom = ISC_TRUE;
    113  1.1  christos 				was_multiple = ISC_TRUE;
    114  1.1  christos 				break;
    115  1.1  christos 			case '}':
    116  1.1  christos 				goto literal;
    117  1.1  christos 			case '(':	/* group start */
    118  1.1  christos 				have_atom = ISC_FALSE;
    119  1.1  christos 				was_multiple = ISC_FALSE;
    120  1.1  christos 				empty_ok = ISC_TRUE;
    121  1.1  christos 				++group;
    122  1.1  christos 				++sub;
    123  1.1  christos 				++c;
    124  1.1  christos 				break;
    125  1.1  christos 			case ')':	/* group end */
    126  1.1  christos 				if (group && !have_atom && !empty_ok)
    127  1.1  christos 					FAIL("empty alternative");
    128  1.1  christos 				have_atom = ISC_TRUE;
    129  1.1  christos 				was_multiple = ISC_FALSE;
    130  1.1  christos 				if (group != 0)
    131  1.1  christos 					--group;
    132  1.1  christos 				++c;
    133  1.1  christos 				break;
    134  1.1  christos 			case '|':	/* alternative seperator */
    135  1.1  christos 				if (!have_atom)
    136  1.1  christos 					FAIL("no atom");
    137  1.1  christos 				have_atom = ISC_FALSE;
    138  1.1  christos 				empty_ok = ISC_FALSE;
    139  1.1  christos 				was_multiple = ISC_FALSE;
    140  1.1  christos 				++c;
    141  1.1  christos 				break;
    142  1.1  christos 			case '^':
    143  1.1  christos 			case '$':
    144  1.1  christos 				have_atom = ISC_TRUE;
    145  1.1  christos 				was_multiple = ISC_TRUE;
    146  1.1  christos 				++c;
    147  1.1  christos 				break;
    148  1.1  christos 			case '+':
    149  1.1  christos 			case '*':
    150  1.1  christos 			case '?':
    151  1.1  christos 				if (was_multiple)
    152  1.1  christos 					FAIL("was multiple");
    153  1.1  christos 				if (!have_atom)
    154  1.1  christos 					FAIL("no atom");
    155  1.1  christos 				have_atom = ISC_TRUE;
    156  1.1  christos 				was_multiple = ISC_TRUE;
    157  1.1  christos 				++c;
    158  1.1  christos 				break;
    159  1.1  christos 			case '.':
    160  1.1  christos 			default:
    161  1.1  christos 			literal:
    162  1.1  christos 				have_atom = ISC_TRUE;
    163  1.1  christos 				was_multiple = ISC_FALSE;
    164  1.1  christos 				++c;
    165  1.1  christos 				break;
    166  1.1  christos 			}
    167  1.1  christos 			break;
    168  1.1  christos 		case parse_bound:
    169  1.1  christos 			switch (*c) {
    170  1.1  christos 			case '0': case '1': case '2': case '3': case '4':
    171  1.1  christos 			case '5': case '6': case '7': case '8': case '9':
    172  1.1  christos 				if (!seen_comma) {
    173  1.1  christos 					low = low * 10 + *c - '0';
    174  1.1  christos 					if (low > 255)
    175  1.1  christos 						FAIL("lower bound too big");
    176  1.1  christos 				} else {
    177  1.1  christos 					seen_high = ISC_TRUE;
    178  1.1  christos 					high = high * 10 + *c - '0';
    179  1.1  christos 					if (high > 255)
    180  1.1  christos 						FAIL("upper bound too big");
    181  1.1  christos 				}
    182  1.1  christos 				++c;
    183  1.1  christos 				break;
    184  1.1  christos 			case ',':
    185  1.1  christos 				if (seen_comma)
    186  1.1  christos 					FAIL("multiple commas");
    187  1.1  christos 				seen_comma = ISC_TRUE;
    188  1.1  christos 				++c;
    189  1.1  christos 				break;
    190  1.1  christos 			default:
    191  1.1  christos 			case '{':
    192  1.1  christos 				FAIL("non digit/comma");
    193  1.1  christos 			case '}':
    194  1.1  christos 				if (seen_high && low > high)
    195  1.1  christos 					FAIL("bad parse bound");
    196  1.1  christos 				seen_comma = ISC_FALSE;
    197  1.1  christos 				state = none;
    198  1.1  christos 				++c;
    199  1.1  christos 				break;
    200  1.1  christos 			}
    201  1.1  christos 			break;
    202  1.1  christos 		case parse_bracket:
    203  1.1  christos 			switch (*c) {
    204  1.1  christos 			case '^':
    205  1.1  christos 				if (seen_char || neg) goto inside;
    206  1.1  christos 				neg = ISC_TRUE;
    207  1.1  christos 				++c;
    208  1.1  christos 				break;
    209  1.1  christos 			case '-':
    210  1.1  christos 				if (range == 2) goto inside;
    211  1.1  christos 				if (!seen_char) goto inside;
    212  1.1  christos 				if (range == 1)
    213  1.1  christos 					FAIL("bad range");
    214  1.1  christos 				range = 2;
    215  1.1  christos 				++c;
    216  1.1  christos 				break;
    217  1.1  christos 			case '[':
    218  1.1  christos 				++c;
    219  1.1  christos 				switch (*c) {
    220  1.1  christos 				case '.':	/* collating element */
    221  1.1  christos 					if (range != 0) --range;
    222  1.1  christos 					++c;
    223  1.1  christos 					state = parse_ce;
    224  1.1  christos 					seen_ce = ISC_FALSE;
    225  1.1  christos 					break;
    226  1.1  christos 				case '=':	/* equivalence class */
    227  1.1  christos 					if (range == 2)
    228  1.1  christos 					    FAIL("equivalence class in range");
    229  1.1  christos 					++c;
    230  1.1  christos 					state = parse_ec;
    231  1.1  christos 					seen_ec = ISC_FALSE;
    232  1.1  christos 					break;
    233  1.1  christos 				case ':':	/* character class */
    234  1.1  christos 					if (range == 2)
    235  1.1  christos 					      FAIL("character class in range");
    236  1.1  christos 					ccname = c;
    237  1.1  christos 					++c;
    238  1.1  christos 					state = parse_cc;
    239  1.1  christos 					break;
    240  1.1  christos 				}
    241  1.1  christos 				seen_char = ISC_TRUE;
    242  1.1  christos 				break;
    243  1.1  christos 			case ']':
    244  1.1  christos 				if (!c[1] && !seen_char)
    245  1.1  christos 					FAIL("unfinished brace");
    246  1.1  christos 				if (!seen_char)
    247  1.1  christos 					goto inside;
    248  1.1  christos 				++c;
    249  1.1  christos 				range = 0;
    250  1.1  christos 				have_atom = ISC_TRUE;
    251  1.1  christos 				state = none;
    252  1.1  christos 				break;
    253  1.1  christos 			default:
    254  1.1  christos 			inside:
    255  1.1  christos 				seen_char = ISC_TRUE;
    256  1.1  christos 				if (range == 2 && (*c & 0xff) < range_start)
    257  1.1  christos 					FAIL("out of order range");
    258  1.1  christos 				if (range != 0)
    259  1.1  christos 					--range;
    260  1.1  christos 				range_start = *c & 0xff;
    261  1.1  christos 				++c;
    262  1.1  christos 				break;
    263  1.1  christos 			};
    264  1.1  christos 			break;
    265  1.1  christos 		case parse_ce:
    266  1.1  christos 			switch (*c) {
    267  1.1  christos 			case '.':
    268  1.1  christos 				++c;
    269  1.1  christos 				switch (*c) {
    270  1.1  christos 				case ']':
    271  1.1  christos 					if (!seen_ce)
    272  1.1  christos 						 FAIL("empty ce");
    273  1.1  christos 					++c;
    274  1.1  christos 					state = parse_bracket;
    275  1.1  christos 					break;
    276  1.1  christos 				default:
    277  1.1  christos 					if (seen_ce)
    278  1.1  christos 						range_start = 256;
    279  1.1  christos 					else
    280  1.1  christos 						range_start = '.';
    281  1.1  christos 					seen_ce = ISC_TRUE;
    282  1.1  christos 					break;
    283  1.1  christos 				}
    284  1.1  christos 				break;
    285  1.1  christos 			default:
    286  1.1  christos 				if (seen_ce)
    287  1.1  christos 					range_start = 256;
    288  1.1  christos 				else
    289  1.1  christos 					range_start = *c;
    290  1.1  christos 				seen_ce = ISC_TRUE;
    291  1.1  christos 				++c;
    292  1.1  christos 				break;
    293  1.1  christos 			}
    294  1.1  christos 			break;
    295  1.1  christos 		case parse_ec:
    296  1.1  christos 			switch (*c) {
    297  1.1  christos 			case '=':
    298  1.1  christos 				++c;
    299  1.1  christos 				switch (*c) {
    300  1.1  christos 				case ']':
    301  1.1  christos 					if (!seen_ec)
    302  1.1  christos 						FAIL("no ec");
    303  1.1  christos 					++c;
    304  1.1  christos 					state = parse_bracket;
    305  1.1  christos 					break;
    306  1.1  christos 				default:
    307  1.1  christos 					seen_ec = ISC_TRUE;
    308  1.1  christos 					break;
    309  1.1  christos 				}
    310  1.1  christos 				break;
    311  1.1  christos 			default:
    312  1.1  christos 				seen_ec = ISC_TRUE;
    313  1.1  christos 				++c;
    314  1.1  christos 				break;
    315  1.1  christos 			}
    316  1.1  christos 			break;
    317  1.1  christos 		case parse_cc:
    318  1.1  christos 			switch (*c) {
    319  1.1  christos 			case ':':
    320  1.1  christos 				++c;
    321  1.1  christos 				switch (*c) {
    322  1.1  christos 				case ']': {
    323  1.1  christos 					unsigned int i;
    324  1.1  christos 					isc_boolean_t found = ISC_FALSE;
    325  1.1  christos 					for (i = 0;
    326  1.1  christos 					     i < sizeof(cc)/sizeof(*cc);
    327  1.1  christos 					     i++)
    328  1.1  christos 					{
    329  1.1  christos 						unsigned int len;
    330  1.1  christos 						len = strlen(cc[i]);
    331  1.1  christos 						if (len !=
    332  1.1  christos 						    (unsigned int)(c - ccname))
    333  1.1  christos 							continue;
    334  1.1  christos 						if (strncmp(cc[i], ccname, len))
    335  1.1  christos 							continue;
    336  1.1  christos 						found = ISC_TRUE;
    337  1.1  christos 					}
    338  1.1  christos 					if (!found)
    339  1.1  christos 						FAIL("unknown cc");
    340  1.1  christos 					++c;
    341  1.1  christos 					state = parse_bracket;
    342  1.1  christos 					break;
    343  1.1  christos 					}
    344  1.1  christos 				default:
    345  1.1  christos 					break;
    346  1.1  christos 				}
    347  1.1  christos 				break;
    348  1.1  christos 			default:
    349  1.1  christos 				++c;
    350  1.1  christos 				break;
    351  1.1  christos 			}
    352  1.1  christos 			break;
    353  1.1  christos 		}
    354  1.1  christos 	}
    355  1.1  christos 	if (group != 0)
    356  1.1  christos 		FAIL("group open");
    357  1.1  christos 	if (state != none)
    358  1.1  christos 		FAIL("incomplete");
    359  1.1  christos 	if (!have_atom)
    360  1.1  christos 		FAIL("no atom");
    361  1.1  christos 	return (sub);
    362  1.1  christos 
    363  1.1  christos  error:
    364  1.1  christos #if VALREGEX_REPORT_REASON
    365  1.1  christos 	fprintf(stderr, "%s\n", reason);
    366  1.1  christos #endif
    367  1.1  christos 	return (-1);
    368  1.1  christos }
    369