Home | History | Annotate | Line # | Download | only in vgrind
regexp.c revision 1.10
      1  1.10  christos /*	$NetBSD: regexp.c,v 1.10 2006/05/01 05:14:22 christos Exp $	*/
      2   1.3       jtc 
      3   1.1       cgd /*
      4   1.3       jtc  * Copyright (c) 1980, 1993
      5   1.3       jtc  *	The Regents of the University of California.  All rights reserved.
      6   1.3       jtc  *
      7   1.1       cgd  *
      8   1.1       cgd  * Redistribution and use in source and binary forms, with or without
      9   1.1       cgd  * modification, are permitted provided that the following conditions
     10   1.1       cgd  * are met:
     11   1.1       cgd  * 1. Redistributions of source code must retain the above copyright
     12   1.1       cgd  *    notice, this list of conditions and the following disclaimer.
     13   1.1       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     14   1.1       cgd  *    notice, this list of conditions and the following disclaimer in the
     15   1.1       cgd  *    documentation and/or other materials provided with the distribution.
     16   1.7       agc  * 3. Neither the name of the University nor the names of its contributors
     17   1.1       cgd  *    may be used to endorse or promote products derived from this software
     18   1.1       cgd  *    without specific prior written permission.
     19   1.1       cgd  *
     20   1.1       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     21   1.1       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22   1.1       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23   1.1       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     24   1.1       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     25   1.1       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     26   1.1       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     27   1.1       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     28   1.1       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     29   1.1       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     30   1.1       cgd  * SUCH DAMAGE.
     31   1.1       cgd  */
     32   1.1       cgd 
     33   1.4     lukem #include <sys/cdefs.h>
     34   1.1       cgd #ifndef lint
     35   1.4     lukem __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
     36   1.4     lukem 	The Regents of the University of California.  All rights reserved.\n");
     37   1.3       jtc #endif /* not lint */
     38   1.3       jtc 
     39   1.3       jtc #ifndef lint
     40   1.3       jtc #if 0
     41   1.3       jtc static char sccsid[] = "@(#)regexp.c	8.1 (Berkeley) 6/6/93";
     42   1.3       jtc #endif
     43  1.10  christos __RCSID("$NetBSD: regexp.c,v 1.10 2006/05/01 05:14:22 christos Exp $");
     44   1.1       cgd #endif /* not lint */
     45   1.1       cgd 
     46  1.10  christos #include <assert.h>
     47   1.1       cgd #include <ctype.h>
     48   1.3       jtc #include <stdlib.h>
     49   1.3       jtc #include <string.h>
     50   1.3       jtc #include "extern.h"
     51   1.1       cgd 
     52   1.1       cgd #define FALSE	0
     53   1.3       jtc #define TRUE	!(FALSE)
     54   1.1       cgd #define NIL	0
     55   1.1       cgd 
     56   1.3       jtc static void	expconv __P((void));
     57   1.3       jtc 
     58   1.5  christos boolean	 x_escaped;	/* true if we are currently x_escaped */
     59   1.5  christos char	*x_start;	/* start of string */
     60   1.3       jtc boolean	 l_onecase;	/* true if upper and lower equivalent */
     61   1.1       cgd 
     62   1.8       dsl #define makelower(c) (isupper((unsigned char)(c)) ? tolower((unsigned char)(c)) : (c))
     63   1.1       cgd 
     64   1.1       cgd /*  STRNCMP -	like strncmp except that we convert the
     65   1.1       cgd  *	 	first string to lower case before comparing
     66   1.1       cgd  *		if l_onecase is set.
     67   1.1       cgd  */
     68   1.1       cgd 
     69   1.3       jtc int
     70   1.1       cgd STRNCMP(s1, s2, len)
     71   1.4     lukem 	char *s1,*s2;
     72   1.4     lukem 	int len;
     73   1.1       cgd {
     74   1.1       cgd 	if (l_onecase) {
     75   1.1       cgd 	    do
     76   1.1       cgd 		if (*s2 - makelower(*s1))
     77   1.1       cgd 			return (*s2 - makelower(*s1));
     78   1.1       cgd 		else {
     79   1.1       cgd 			s2++;
     80   1.1       cgd 			s1++;
     81   1.1       cgd 		}
     82   1.1       cgd 	    while (--len);
     83   1.1       cgd 	} else {
     84   1.1       cgd 	    do
     85   1.1       cgd 		if (*s2 - *s1)
     86   1.1       cgd 			return (*s2 - *s1);
     87   1.1       cgd 		else {
     88   1.1       cgd 			s2++;
     89   1.1       cgd 			s1++;
     90   1.1       cgd 		}
     91   1.1       cgd 	    while (--len);
     92   1.1       cgd 	}
     93   1.1       cgd 	return(0);
     94   1.1       cgd }
     95   1.1       cgd 
     96   1.1       cgd /*	The following routine converts an irregular expression to
     97   1.1       cgd  *	internal format.
     98   1.1       cgd  *
     99   1.1       cgd  *	Either meta symbols (\a \d or \p) or character strings or
    100   1.1       cgd  *	operations ( alternation or perenthesizing ) can be
    101   1.1       cgd  *	specified.  Each starts with a descriptor byte.  The descriptor
    102   1.1       cgd  *	byte has STR set for strings, META set for meta symbols
    103   1.1       cgd  *	and OPER set for operations.
    104   1.1       cgd  *	The descriptor byte can also have the OPT bit set if the object
    105   1.1       cgd  *	defined is optional.  Also ALT can be set to indicate an alternation.
    106   1.1       cgd  *
    107   1.1       cgd  *	For metasymbols the byte following the descriptor byte identities
    108   1.1       cgd  *	the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '(').  For
    109   1.1       cgd  *	strings the byte after the descriptor is a character count for
    110   1.1       cgd  *	the string:
    111   1.1       cgd  *
    112   1.1       cgd  *		meta symbols := descriptor
    113   1.1       cgd  *				symbol
    114   1.1       cgd  *
    115   1.1       cgd  *		strings :=	descriptor
    116   1.1       cgd  *				character count
    117   1.1       cgd  *				the string
    118   1.1       cgd  *
    119   1.1       cgd  *		operatins :=	descriptor
    120   1.1       cgd  *				symbol
    121   1.1       cgd  *				character count
    122   1.1       cgd  */
    123   1.1       cgd 
    124   1.1       cgd /*
    125   1.1       cgd  *  handy macros for accessing parts of match blocks
    126   1.1       cgd  */
    127   1.1       cgd #define MSYM(A) (*(A+1))	/* symbol in a meta symbol block */
    128   1.1       cgd #define MNEXT(A) (A+2)		/* character following a metasymbol block */
    129   1.1       cgd 
    130   1.1       cgd #define OSYM(A) (*(A+1))	/* symbol in an operation block */
    131   1.1       cgd #define OCNT(A) (*(A+2))	/* character count */
    132   1.1       cgd #define ONEXT(A) (A+3)		/* next character after the operation */
    133   1.1       cgd #define OPTR(A) (A+*(A+2))	/* place pointed to by the operator */
    134   1.1       cgd 
    135   1.1       cgd #define SCNT(A) (*(A+1))	/* byte count of a string */
    136   1.1       cgd #define SSTR(A) (A+2)		/* address of the string */
    137   1.1       cgd #define SNEXT(A) (A+2+*(A+1))	/* character following the string */
    138   1.1       cgd 
    139   1.1       cgd /*
    140   1.1       cgd  *  bit flags in the descriptor
    141   1.1       cgd  */
    142   1.1       cgd #define OPT 1
    143   1.1       cgd #define STR 2
    144   1.1       cgd #define META 4
    145   1.1       cgd #define ALT 8
    146   1.1       cgd #define OPER 16
    147   1.1       cgd 
    148   1.3       jtc static char *ccre;	/* pointer to current position in converted exp*/
    149   1.3       jtc static char *ure;	/* pointer current position in unconverted exp */
    150   1.1       cgd 
    151   1.1       cgd char *
    152   1.1       cgd convexp(re)
    153   1.1       cgd     char *re;		/* unconverted irregular expression */
    154   1.1       cgd {
    155   1.4     lukem     char *cre;		/* pointer to converted regular expression */
    156   1.1       cgd 
    157   1.1       cgd     /* allocate room for the converted expression */
    158   1.1       cgd     if (re == NIL)
    159   1.1       cgd 	return (NIL);
    160   1.1       cgd     if (*re == '\0')
    161   1.1       cgd 	return (NIL);
    162   1.6    itojun     cre = malloc(4 * strlen(re) + 3);
    163   1.1       cgd     ccre = cre;
    164   1.1       cgd     ure = re;
    165   1.1       cgd 
    166   1.1       cgd     /* start the conversion with a \a */
    167   1.1       cgd     *cre = META | OPT;
    168   1.1       cgd     MSYM(cre) = 'a';
    169   1.1       cgd     ccre = MNEXT(cre);
    170   1.1       cgd 
    171   1.1       cgd     /* start the conversion (its recursive) */
    172   1.6    itojun     expconv();
    173   1.1       cgd     *ccre = 0;
    174   1.1       cgd     return (cre);
    175   1.1       cgd }
    176   1.1       cgd 
    177   1.3       jtc static void
    178   1.1       cgd expconv()
    179   1.1       cgd {
    180   1.4     lukem     char *cs;		/* pointer to current symbol in converted exp */
    181   1.4     lukem     char c;		/* character being processed */
    182   1.4     lukem     char *acs;		/* pinter to last alternate */
    183   1.4     lukem     int temp;
    184   1.1       cgd 
    185   1.1       cgd     /* let the conversion begin */
    186   1.1       cgd     acs = NIL;
    187   1.1       cgd     cs = NIL;
    188   1.1       cgd     while (*ure != NIL) {
    189   1.1       cgd 	switch (c = *ure++) {
    190   1.1       cgd 
    191   1.1       cgd 	case '\\':
    192   1.1       cgd 	    switch (c = *ure++) {
    193   1.1       cgd 
    194   1.1       cgd 	    /* escaped characters are just characters */
    195   1.1       cgd 	    default:
    196   1.1       cgd 		if (cs == NIL || (*cs & STR) == 0) {
    197   1.1       cgd 		    cs = ccre;
    198   1.1       cgd 		    *cs = STR;
    199   1.1       cgd 		    SCNT(cs) = 1;
    200   1.1       cgd 		    ccre += 2;
    201   1.1       cgd 		} else
    202   1.1       cgd 		    SCNT(cs)++;
    203   1.1       cgd 		*ccre++ = c;
    204   1.1       cgd 		break;
    205   1.1       cgd 
    206   1.1       cgd 	    /* normal(?) metacharacters */
    207   1.1       cgd 	    case 'a':
    208   1.1       cgd 	    case 'd':
    209   1.1       cgd 	    case 'e':
    210   1.1       cgd 	    case 'p':
    211   1.1       cgd 		if (acs != NIL && acs != cs) {
    212   1.1       cgd 		    do {
    213   1.1       cgd 			temp = OCNT(acs);
    214   1.1       cgd 			OCNT(acs) = ccre - acs;
    215   1.1       cgd 			acs -= temp;
    216   1.1       cgd 		    } while (temp != 0);
    217   1.1       cgd 		    acs = NIL;
    218   1.1       cgd 		}
    219   1.1       cgd 		cs = ccre;
    220   1.1       cgd 		*cs = META;
    221   1.1       cgd 		MSYM(cs) = c;
    222   1.1       cgd 		ccre = MNEXT(cs);
    223   1.1       cgd 		break;
    224   1.1       cgd 	    }
    225   1.1       cgd 	    break;
    226   1.1       cgd 
    227   1.1       cgd 	/* just put the symbol in */
    228   1.1       cgd 	case '^':
    229   1.1       cgd 	case '$':
    230   1.1       cgd 	    if (acs != NIL && acs != cs) {
    231   1.1       cgd 		do {
    232   1.1       cgd 		    temp = OCNT(acs);
    233   1.1       cgd 		    OCNT(acs) = ccre - acs;
    234   1.1       cgd 		    acs -= temp;
    235   1.1       cgd 		} while (temp != 0);
    236   1.1       cgd 		acs = NIL;
    237   1.1       cgd 	    }
    238   1.1       cgd 	    cs = ccre;
    239   1.1       cgd 	    *cs = META;
    240   1.1       cgd 	    MSYM(cs) = c;
    241   1.1       cgd 	    ccre = MNEXT(cs);
    242   1.1       cgd 	    break;
    243   1.1       cgd 
    244   1.1       cgd 	/* mark the last match sequence as optional */
    245   1.1       cgd 	case '?':
    246   1.1       cgd 	    if (cs)
    247   1.1       cgd 	    	*cs = *cs | OPT;
    248   1.1       cgd 	    break;
    249   1.1       cgd 
    250   1.1       cgd 	/* recurse and define a subexpression */
    251   1.1       cgd 	case '(':
    252   1.1       cgd 	    if (acs != NIL && acs != cs) {
    253   1.1       cgd 		do {
    254   1.1       cgd 		    temp = OCNT(acs);
    255   1.1       cgd 		    OCNT(acs) = ccre - acs;
    256   1.1       cgd 		    acs -= temp;
    257   1.1       cgd 		} while (temp != 0);
    258   1.1       cgd 		acs = NIL;
    259   1.1       cgd 	    }
    260   1.1       cgd 	    cs = ccre;
    261   1.1       cgd 	    *cs = OPER;
    262   1.1       cgd 	    OSYM(cs) = '(';
    263   1.1       cgd 	    ccre = ONEXT(cs);
    264   1.6    itojun 	    expconv();
    265   1.1       cgd 	    OCNT(cs) = ccre - cs;		/* offset to next symbol */
    266   1.1       cgd 	    break;
    267   1.1       cgd 
    268   1.3       jtc 	/* reurn from a recursion */
    269   1.1       cgd 	case ')':
    270   1.1       cgd 	    if (acs != NIL) {
    271   1.1       cgd 		do {
    272   1.1       cgd 		    temp = OCNT(acs);
    273   1.1       cgd 		    OCNT(acs) = ccre - acs;
    274   1.1       cgd 		    acs -= temp;
    275   1.1       cgd 		} while (temp != 0);
    276   1.1       cgd 		acs = NIL;
    277   1.1       cgd 	    }
    278   1.1       cgd 	    cs = ccre;
    279   1.1       cgd 	    *cs = META;
    280   1.1       cgd 	    MSYM(cs) = c;
    281   1.1       cgd 	    ccre = MNEXT(cs);
    282   1.1       cgd 	    return;
    283   1.1       cgd 
    284   1.1       cgd 	/* mark the last match sequence as having an alternate */
    285   1.1       cgd 	/* the third byte will contain an offset to jump over the */
    286   1.1       cgd 	/* alternate match in case the first did not fail */
    287   1.1       cgd 	case '|':
    288   1.1       cgd 	    if (acs != NIL && acs != cs)
    289   1.1       cgd 		OCNT(ccre) = ccre - acs;	/* make a back pointer */
    290   1.1       cgd 	    else
    291   1.1       cgd 		OCNT(ccre) = 0;
    292   1.9  christos 	    assert(cs != NULL);
    293   1.1       cgd 	    *cs |= ALT;
    294   1.1       cgd 	    cs = ccre;
    295   1.1       cgd 	    *cs = OPER;
    296   1.1       cgd 	    OSYM(cs) = '|';
    297   1.1       cgd 	    ccre = ONEXT(cs);
    298   1.1       cgd 	    acs = cs;	/* remember that the pointer is to be filles */
    299   1.1       cgd 	    break;
    300   1.1       cgd 
    301   1.1       cgd 	/* if its not a metasymbol just build a scharacter string */
    302   1.1       cgd 	default:
    303   1.1       cgd 	    if (cs == NIL || (*cs & STR) == 0) {
    304   1.1       cgd 		cs = ccre;
    305   1.1       cgd 		*cs = STR;
    306   1.1       cgd 		SCNT(cs) = 1;
    307   1.1       cgd 		ccre = SSTR(cs);
    308   1.1       cgd 	    } else
    309   1.1       cgd 		SCNT(cs)++;
    310   1.1       cgd 	    *ccre++ = c;
    311   1.1       cgd 	    break;
    312   1.1       cgd 	}
    313   1.1       cgd     }
    314   1.1       cgd     if (acs != NIL) {
    315   1.1       cgd 	do {
    316   1.1       cgd 	    temp = OCNT(acs);
    317   1.1       cgd 	    OCNT(acs) = ccre - acs;
    318   1.1       cgd 	    acs -= temp;
    319   1.1       cgd 	} while (temp != 0);
    320   1.1       cgd 	acs = NIL;
    321   1.1       cgd     }
    322   1.1       cgd     return;
    323   1.1       cgd }
    324   1.1       cgd /* end of convertre */
    325   1.1       cgd 
    326   1.1       cgd 
    327   1.1       cgd /*
    328   1.1       cgd  *	The following routine recognises an irregular expresion
    329   1.1       cgd  *	with the following special characters:
    330   1.1       cgd  *
    331   1.1       cgd  *		\?	-	means last match was optional
    332   1.1       cgd  *		\a	-	matches any number of characters
    333   1.1       cgd  *		\d	-	matches any number of spaces and tabs
    334   1.1       cgd  *		\p	-	matches any number of alphanumeric
    335   1.1       cgd  *				characters. The
    336   1.1       cgd  *				characters matched will be copied into
    337   1.1       cgd  *				the area pointed to by 'name'.
    338   1.1       cgd  *		\|	-	alternation
    339   1.1       cgd  *		\( \)	-	grouping used mostly for alternation and
    340   1.1       cgd  *				optionality
    341   1.1       cgd  *
    342   1.1       cgd  *	The irregular expression must be translated to internal form
    343   1.1       cgd  *	prior to calling this routine
    344   1.1       cgd  *
    345   1.1       cgd  *	The value returned is the pointer to the first non \a
    346   1.1       cgd  *	character matched.
    347   1.1       cgd  */
    348   1.1       cgd 
    349   1.1       cgd char *
    350   1.6    itojun expmatch(s, re, mstring)
    351   1.4     lukem     char *s;		/* string to check for a match in */
    352   1.4     lukem     char *re;		/* a converted irregular expression */
    353   1.4     lukem     char *mstring;	/* where to put whatever matches a \p */
    354   1.1       cgd {
    355   1.4     lukem     char *cs;		/* the current symbol */
    356   1.4     lukem     char *ptr,*s1;	/* temporary pointer */
    357   1.4     lukem     boolean matched;	/* a temporary boolean */
    358   1.1       cgd 
    359   1.1       cgd     /* initial conditions */
    360   1.1       cgd     if (re == NIL)
    361   1.1       cgd 	return (NIL);
    362   1.1       cgd     cs = re;
    363   1.1       cgd     matched = FALSE;
    364   1.1       cgd 
    365   1.1       cgd     /* loop till expression string is exhausted (or at least pretty tired) */
    366   1.1       cgd     while (*cs) {
    367   1.1       cgd 	switch (*cs & (OPER | STR | META)) {
    368   1.1       cgd 
    369   1.1       cgd 	/* try to match a string */
    370   1.1       cgd 	case STR:
    371   1.1       cgd 	    matched = !STRNCMP (s, SSTR(cs), SCNT(cs));
    372   1.1       cgd 	    if (matched) {
    373   1.1       cgd 
    374   1.1       cgd 		/* hoorah it matches */
    375   1.1       cgd 		s += SCNT(cs);
    376   1.1       cgd 		cs = SNEXT(cs);
    377   1.1       cgd 	    } else if (*cs & ALT) {
    378   1.1       cgd 
    379   1.1       cgd 		/* alternation, skip to next expression */
    380   1.1       cgd 		cs = SNEXT(cs);
    381   1.1       cgd 	    } else if (*cs & OPT) {
    382   1.1       cgd 
    383   1.1       cgd 		/* the match is optional */
    384   1.1       cgd 		cs = SNEXT(cs);
    385   1.1       cgd 		matched = 1;		/* indicate a successful match */
    386   1.1       cgd 	    } else {
    387   1.1       cgd 
    388   1.1       cgd 		/* no match, error return */
    389   1.1       cgd 		return (NIL);
    390   1.1       cgd 	    }
    391   1.1       cgd 	    break;
    392   1.1       cgd 
    393   1.1       cgd 	/* an operator, do something fancy */
    394   1.1       cgd 	case OPER:
    395   1.1       cgd 	    switch (OSYM(cs)) {
    396   1.1       cgd 
    397   1.1       cgd 	    /* this is an alternation */
    398   1.1       cgd 	    case '|':
    399   1.1       cgd 		if (matched)
    400   1.1       cgd 
    401   1.1       cgd 		    /* last thing in the alternation was a match, skip ahead */
    402   1.1       cgd 		    cs = OPTR(cs);
    403   1.1       cgd 		else
    404   1.1       cgd 
    405   1.1       cgd 		    /* no match, keep trying */
    406   1.1       cgd 		    cs = ONEXT(cs);
    407   1.1       cgd 		break;
    408   1.1       cgd 
    409   1.1       cgd 	    /* this is a grouping, recurse */
    410   1.1       cgd 	    case '(':
    411   1.6    itojun 		ptr = expmatch(s, ONEXT(cs), mstring);
    412   1.1       cgd 		if (ptr != NIL) {
    413   1.1       cgd 
    414   1.1       cgd 		    /* the subexpression matched */
    415   1.1       cgd 		    matched = 1;
    416   1.1       cgd 		    s = ptr;
    417   1.1       cgd 		} else if (*cs & ALT) {
    418   1.1       cgd 
    419   1.1       cgd 		    /* alternation, skip to next expression */
    420   1.1       cgd 		    matched = 0;
    421   1.1       cgd 		} else if (*cs & OPT) {
    422   1.1       cgd 
    423   1.1       cgd 		    /* the match is optional */
    424   1.1       cgd 		    matched = 1;	/* indicate a successful match */
    425   1.1       cgd 		} else {
    426   1.1       cgd 
    427   1.1       cgd 		    /* no match, error return */
    428   1.1       cgd 		    return (NIL);
    429   1.1       cgd 		}
    430   1.1       cgd 		cs = OPTR(cs);
    431   1.1       cgd 		break;
    432   1.1       cgd 	    }
    433   1.1       cgd 	    break;
    434   1.1       cgd 
    435   1.1       cgd 	/* try to match a metasymbol */
    436   1.1       cgd 	case META:
    437   1.1       cgd 	    switch (MSYM(cs)) {
    438   1.1       cgd 
    439   1.1       cgd 	    /* try to match anything and remember what was matched */
    440   1.1       cgd 	    case 'p':
    441   1.1       cgd 		/*
    442   1.1       cgd 		 *  This is really the same as trying the match the
    443   1.1       cgd 		 *  remaining parts of the expression to any subset
    444   1.1       cgd 		 *  of the string.
    445   1.1       cgd 		 */
    446   1.1       cgd 		s1 = s;
    447   1.1       cgd 		do {
    448   1.6    itojun 		    ptr = expmatch(s1, MNEXT(cs), mstring);
    449   1.1       cgd 		    if (ptr != NIL && s1 != s) {
    450   1.1       cgd 
    451   1.1       cgd 			/* we have a match, remember the match */
    452   1.6    itojun 			strncpy(mstring, s, s1 - s);
    453   1.1       cgd 			mstring[s1 - s] = '\0';
    454   1.1       cgd 			return (ptr);
    455   1.1       cgd 		    } else if (ptr != NIL && (*cs & OPT)) {
    456   1.1       cgd 
    457   1.1       cgd 			/* it was aoptional so no match is ok */
    458   1.1       cgd 			return (ptr);
    459   1.1       cgd 		    } else if (ptr != NIL) {
    460   1.1       cgd 
    461   1.1       cgd 			/* not optional and we still matched */
    462   1.1       cgd 			return (NIL);
    463   1.1       cgd 		    }
    464   1.5  christos 		    if (!isalnum((unsigned char)*s1) && *s1 != '_')
    465   1.1       cgd 			return (NIL);
    466   1.1       cgd 		    if (*s1 == '\\')
    467   1.5  christos 			x_escaped = x_escaped ? FALSE : TRUE;
    468   1.1       cgd 		    else
    469   1.5  christos 			x_escaped = FALSE;
    470   1.1       cgd 		} while (*s1++);
    471   1.1       cgd 		return (NIL);
    472   1.1       cgd 
    473   1.1       cgd 	    /* try to match anything */
    474   1.1       cgd 	    case 'a':
    475   1.1       cgd 		/*
    476   1.1       cgd 		 *  This is really the same as trying the match the
    477   1.1       cgd 		 *  remaining parts of the expression to any subset
    478   1.1       cgd 		 *  of the string.
    479   1.1       cgd 		 */
    480   1.1       cgd 		s1 = s;
    481   1.1       cgd 		do {
    482   1.6    itojun 		    ptr = expmatch(s1, MNEXT(cs), mstring);
    483   1.1       cgd 		    if (ptr != NIL && s1 != s) {
    484   1.1       cgd 
    485   1.1       cgd 			/* we have a match */
    486   1.1       cgd 			return (ptr);
    487   1.1       cgd 		    } else if (ptr != NIL && (*cs & OPT)) {
    488   1.1       cgd 
    489   1.1       cgd 			/* it was aoptional so no match is ok */
    490   1.1       cgd 			return (ptr);
    491   1.1       cgd 		    } else if (ptr != NIL) {
    492   1.1       cgd 
    493   1.1       cgd 			/* not optional and we still matched */
    494   1.1       cgd 			return (NIL);
    495   1.1       cgd 		    }
    496   1.1       cgd 		    if (*s1 == '\\')
    497   1.5  christos 			x_escaped = x_escaped ? FALSE : TRUE;
    498   1.1       cgd 		    else
    499   1.5  christos 			x_escaped = FALSE;
    500   1.1       cgd 		} while (*s1++);
    501   1.1       cgd 		return (NIL);
    502   1.1       cgd 
    503   1.5  christos 	    /* fail if we are currently x_escaped */
    504   1.1       cgd 	    case 'e':
    505   1.5  christos 		if (x_escaped)
    506   1.1       cgd 		    return(NIL);
    507   1.1       cgd 		cs = MNEXT(cs);
    508   1.1       cgd 		break;
    509   1.1       cgd 
    510   1.1       cgd 	    /* match any number of tabs and spaces */
    511   1.1       cgd 	    case 'd':
    512   1.1       cgd 		ptr = s;
    513   1.1       cgd 		while (*s == ' ' || *s == '\t')
    514   1.1       cgd 		    s++;
    515   1.5  christos 		if (s != ptr || s == x_start) {
    516   1.1       cgd 
    517   1.1       cgd 		    /* match, be happy */
    518   1.1       cgd 		    matched = 1;
    519   1.1       cgd 		    cs = MNEXT(cs);
    520   1.1       cgd 		} else if (*s == '\n' || *s == '\0') {
    521   1.1       cgd 
    522   1.1       cgd 		    /* match, be happy */
    523   1.1       cgd 		    matched = 1;
    524   1.1       cgd 		    cs = MNEXT(cs);
    525   1.1       cgd 		} else if (*cs & ALT) {
    526   1.1       cgd 
    527   1.1       cgd 		    /* try the next part */
    528   1.1       cgd 		    matched = 0;
    529   1.1       cgd 		    cs = MNEXT(cs);
    530   1.1       cgd 		} else if (*cs & OPT) {
    531   1.1       cgd 
    532   1.1       cgd 		    /* doesn't matter */
    533   1.1       cgd 		    matched = 1;
    534   1.1       cgd 		    cs = MNEXT(cs);
    535   1.1       cgd 		} else
    536   1.1       cgd 
    537   1.1       cgd 		    /* no match, error return */
    538   1.1       cgd 		    return (NIL);
    539   1.1       cgd 		break;
    540   1.1       cgd 
    541   1.1       cgd 	    /* check for end of line */
    542   1.1       cgd 	    case '$':
    543   1.1       cgd 		if (*s == '\0' || *s == '\n') {
    544   1.1       cgd 
    545   1.1       cgd 		    /* match, be happy */
    546   1.1       cgd 		    s++;
    547   1.1       cgd 		    matched = 1;
    548   1.1       cgd 		    cs = MNEXT(cs);
    549   1.1       cgd 		} else if (*cs & ALT) {
    550   1.1       cgd 
    551   1.1       cgd 		    /* try the next part */
    552   1.1       cgd 		    matched = 0;
    553   1.1       cgd 		    cs = MNEXT(cs);
    554   1.1       cgd 		} else if (*cs & OPT) {
    555   1.1       cgd 
    556   1.1       cgd 		    /* doesn't matter */
    557   1.1       cgd 		    matched = 1;
    558   1.1       cgd 		    cs = MNEXT(cs);
    559   1.1       cgd 		} else
    560   1.1       cgd 
    561   1.1       cgd 		    /* no match, error return */
    562   1.1       cgd 		    return (NIL);
    563   1.1       cgd 		break;
    564   1.1       cgd 
    565   1.1       cgd 	    /* check for start of line */
    566   1.1       cgd 	    case '^':
    567   1.5  christos 		if (s == x_start) {
    568   1.1       cgd 
    569   1.1       cgd 		    /* match, be happy */
    570   1.1       cgd 		    matched = 1;
    571   1.1       cgd 		    cs = MNEXT(cs);
    572   1.1       cgd 		} else if (*cs & ALT) {
    573   1.1       cgd 
    574   1.1       cgd 		    /* try the next part */
    575   1.1       cgd 		    matched = 0;
    576   1.1       cgd 		    cs = MNEXT(cs);
    577   1.1       cgd 		} else if (*cs & OPT) {
    578   1.1       cgd 
    579   1.1       cgd 		    /* doesn't matter */
    580   1.1       cgd 		    matched = 1;
    581   1.1       cgd 		    cs = MNEXT(cs);
    582   1.1       cgd 		} else
    583   1.1       cgd 
    584   1.1       cgd 		    /* no match, error return */
    585   1.1       cgd 		    return (NIL);
    586   1.1       cgd 		break;
    587   1.1       cgd 
    588   1.1       cgd 	    /* end of a subexpression, return success */
    589   1.1       cgd 	    case ')':
    590   1.1       cgd 		return (s);
    591   1.1       cgd 	    }
    592   1.1       cgd 	    break;
    593   1.1       cgd 	}
    594   1.1       cgd     }
    595   1.1       cgd     return (s);
    596   1.1       cgd }
    597