Home | History | Annotate | Line # | Download | only in tr
str.c revision 1.1
      1 /*-
      2  * Copyright (c) 1991 The Regents of the University of California.
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  * 3. All advertising materials mentioning features or use of this software
     14  *    must display the following acknowledgement:
     15  *	This product includes software developed by the University of
     16  *	California, Berkeley and its contributors.
     17  * 4. Neither the name of the University nor the names of its contributors
     18  *    may be used to endorse or promote products derived from this software
     19  *    without specific prior written permission.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     31  * SUCH DAMAGE.
     32  */
     33 
     34 #ifndef lint
     35 static char sccsid[] = "@(#)str.c	5.9 (Berkeley) 3/4/93";
     36 #endif /* not lint */
     37 
     38 #include <sys/cdefs.h>
     39 #include <sys/types.h>
     40 
     41 #include <errno.h>
     42 #include <stddef.h>
     43 #include <stdio.h>
     44 #include <stdlib.h>
     45 #include <string.h>
     46 
     47 #include "extern.h"
     48 
     49 static int	backslash __P((STR *));
     50 static int	bracket __P((STR *));
     51 static int	c_class __P((const void *, const void *));
     52 static void	genclass __P((STR *));
     53 static void	genequiv __P((STR *));
     54 static int	genrange __P((STR *));
     55 static void	genseq __P((STR *));
     56 
     57 int
     58 next(s)
     59 	register STR *s;
     60 {
     61 	register int ch;
     62 
     63 	switch (s->state) {
     64 	case EOS:
     65 		return (0);
     66 	case INFINITE:
     67 		return (1);
     68 	case NORMAL:
     69 		switch (ch = *s->str) {
     70 		case '\0':
     71 			s->state = EOS;
     72 			return (0);
     73 		case '\\':
     74 			s->lastch = backslash(s);
     75 			break;
     76 		case '[':
     77 			if (bracket(s))
     78 				return (next(s));
     79 			/* FALLTHROUGH */
     80 		default:
     81 			++s->str;
     82 			s->lastch = ch;
     83 			break;
     84 		}
     85 
     86 		/* We can start a range at any time. */
     87 		if (s->str[0] == '-' && genrange(s))
     88 			return (next(s));
     89 		return (1);
     90 	case RANGE:
     91 		if (s->cnt-- == 0) {
     92 			s->state = NORMAL;
     93 			return (next(s));
     94 		}
     95 		++s->lastch;
     96 		return (1);
     97 	case SEQUENCE:
     98 		if (s->cnt-- == 0) {
     99 			s->state = NORMAL;
    100 			return (next(s));
    101 		}
    102 		return (1);
    103 	case SET:
    104 		if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
    105 			s->state = NORMAL;
    106 			return (next(s));
    107 		}
    108 		return (1);
    109 	}
    110 	/* NOTREACHED */
    111 }
    112 
    113 static int
    114 bracket(s)
    115 	register STR *s;
    116 {
    117 	register char *p;
    118 
    119 	switch (s->str[1]) {
    120 	case ':':				/* "[:class:]" */
    121 		if ((p = strstr(s->str + 2, ":]")) == NULL)
    122 			return (0);
    123 		*p = '\0';
    124 		s->str += 2;
    125 		genclass(s);
    126 		s->str = p + 2;
    127 		return (1);
    128 	case '=':				/* "[=equiv=]" */
    129 		if ((p = strstr(s->str + 2, "=]")) == NULL)
    130 			return (0);
    131 		s->str += 2;
    132 		genequiv(s);
    133 		return (1);
    134 	default:				/* "[\###*n]" or "[#*n]" */
    135 		if ((p = strpbrk(s->str + 2, "*]")) == NULL)
    136 			return (0);
    137 		if (p[0] != '*' || index(p, ']') == NULL)
    138 			return (0);
    139 		s->str += 1;
    140 		genseq(s);
    141 		return (1);
    142 	}
    143 	/* NOTREACHED */
    144 }
    145 
    146 int isalnum __P((int)),
    147     isalpha __P((int)),
    148     isblank __P((int)),
    149     isspace __P((int)),
    150     iscntrl __P((int)),
    151     isdigit __P((int)),
    152     isgraph __P((int)),
    153     islower __P((int)),
    154     isprint __P((int)),
    155     ispunct __P((int)),
    156     isupper __P((int)),
    157     isxdigit __P((int));
    158 
    159 
    160 static int isblank(x) /* until 4.4 */
    161      int x;
    162 {
    163     if ((x == ' ') || (x== '\t')) return 1;
    164     return 0;
    165 }
    166 
    167 
    168 typedef struct {
    169 	char *name;
    170 	int (*func) __P((int));
    171 	int *set;
    172 } CLASS;
    173 
    174 static CLASS classes[] = {
    175 	{ "alnum",  isalnum,  },
    176 	{ "alpha",  isalpha,  },
    177 	{ "blank",  isblank,  },
    178 	{ "cntrl",  iscntrl,  },
    179 	{ "digit",  isdigit,  },
    180 	{ "graph",  isgraph,  },
    181 	{ "lower",  islower,  },
    182 	{ "print",  isupper,  },
    183 	{ "punct",  ispunct,  },
    184 	{ "space",  isspace,  },
    185 	{ "upper",  isupper,  },
    186 	{ "xdigit", isxdigit, },
    187 };
    188 
    189 static void
    190 genclass(s)
    191 	STR *s;
    192 {
    193 	register int cnt, (*func) __P((int));
    194 	CLASS *cp, tmp;
    195 	int *p;
    196 
    197 	tmp.name = s->str;
    198 	if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
    199 	    sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
    200 		err("unknown class %s", s->str);
    201 
    202 	if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
    203 		err("%s", strerror(errno));
    204 	bzero(p, (NCHARS + 1) * sizeof(int));
    205 	for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
    206 		if ((func)(cnt))
    207 			*p++ = cnt;
    208 	*p = OOBCH;
    209 
    210 	s->cnt = 0;
    211 	s->state = SET;
    212 	s->set = cp->set;
    213 }
    214 
    215 static int
    216 c_class(a, b)
    217 	const void *a, *b;
    218 {
    219 	return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name));
    220 }
    221 
    222 /*
    223  * English doesn't have any equivalence classes, so for now
    224  * we just syntax check and grab the character.
    225  */
    226 static void
    227 genequiv(s)
    228 	STR *s;
    229 {
    230 	if (*s->str == '\\') {
    231 		s->equiv[0] = backslash(s);
    232 		if (*s->str != '=')
    233 			err("misplaced equivalence equals sign");
    234 	} else {
    235 		s->equiv[0] = s->str[0];
    236 		if (s->str[1] != '=')
    237 			err("misplaced equivalence equals sign");
    238 	}
    239 	s->str += 2;
    240 	s->cnt = 0;
    241 	s->state = SET;
    242 	s->set = s->equiv;
    243 }
    244 
    245 static int
    246 genrange(s)
    247 	STR *s;
    248 {
    249 	int stopval;
    250 	char *savestart;
    251 
    252 	savestart = s->str;
    253 	stopval = *++s->str == '\\' ? backslash(s) : *s->str;
    254 	if (stopval < s->lastch) {
    255 		s->str = savestart;
    256 		return (0);
    257 	}
    258 	s->cnt = stopval - s->lastch + 1;
    259 	s->state = RANGE;
    260 	--s->lastch;
    261 	return (1);
    262 }
    263 
    264 static void
    265 genseq(s)
    266 	STR *s;
    267 {
    268 	char *ep;
    269 
    270 	if (s->which == STRING1)
    271 		err("sequences only valid in string2");
    272 
    273 	if (*s->str == '\\')
    274 		s->lastch = backslash(s);
    275 	else
    276 		s->lastch = *s->str++;
    277 	if (*s->str != '*')
    278 		err("misplaced sequence asterisk");
    279 
    280 	switch (*++s->str) {
    281 	case '\\':
    282 		s->cnt = backslash(s);
    283 		break;
    284 	case ']':
    285 		s->cnt = 0;
    286 		++s->str;
    287 		break;
    288 	default:
    289 		if (isdigit(*s->str)) {
    290 			s->cnt = strtol(s->str, &ep, 0);
    291 			if (*ep == ']') {
    292 				s->str = ep + 1;
    293 				break;
    294 			}
    295 		}
    296 		err("illegal sequence count");
    297 		/* NOTREACHED */
    298 	}
    299 
    300 	s->state = s->cnt ? SEQUENCE : INFINITE;
    301 }
    302 
    303 /* Use the #defines isXXX() here, DON'T use them above. */
    304 #include <ctype.h>
    305 
    306 /*
    307  * Translate \??? into a character.  Up to 3 octal digits, if no digits either
    308  * an escape code or a literal character.
    309  */
    310 static int
    311 backslash(s)
    312 	register STR *s;
    313 {
    314 	register int ch, cnt, val;
    315 
    316 	for (cnt = val = 0;;) {
    317 		ch = *++s->str;
    318 		if (!isascii(ch) || !isdigit(ch))
    319 			break;
    320 		val = val * 8 + ch - '0';
    321 		if (++cnt == 3) {
    322 			++s->str;
    323 			break;
    324 		}
    325 	}
    326 	if (cnt)
    327 		return (val);
    328 	if (ch != '\0')
    329 		++s->str;
    330 	switch (ch) {
    331 		case 'a':			/* escape characters */
    332 			return ('\7');
    333 		case 'b':
    334 			return ('\b');
    335 		case 'f':
    336 			return ('\f');
    337 		case 'n':
    338 			return ('\n');
    339 		case 'r':
    340 			return ('\r');
    341 		case 't':
    342 			return ('\t');
    343 		case 'v':
    344 			return ('\13');
    345 		case '\0':			/*  \" -> \ */
    346 			s->state = EOS;
    347 			return ('\\');
    348 		default:			/* \x" -> x */
    349 			return (ch);
    350 	}
    351 }
    352