Home | History | Annotate | Line # | Download | only in tr
str.c revision 1.10
      1 /*	$NetBSD: str.c,v 1.10 2003/08/07 11:16:46 agc Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1991, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 #ifndef lint
     34 #if 0
     35 static char sccsid[] = "@(#)str.c	8.2 (Berkeley) 4/28/95";
     36 #endif
     37 __RCSID("$NetBSD: str.c,v 1.10 2003/08/07 11:16:46 agc Exp $");
     38 #endif /* not lint */
     39 
     40 #include <sys/cdefs.h>
     41 #include <sys/types.h>
     42 
     43 #include <err.h>
     44 #include <errno.h>
     45 #include <stddef.h>
     46 #include <stdio.h>
     47 #include <stdlib.h>
     48 #include <string.h>
     49 #include <ctype.h>
     50 
     51 #include "extern.h"
     52 
     53 static int	backslash __P((STR *));
     54 static int	bracket __P((STR *));
     55 static int	c_class __P((const void *, const void *));
     56 static void	genclass __P((STR *));
     57 static void	genequiv __P((STR *));
     58 static int	genrange __P((STR *));
     59 static void	genseq __P((STR *));
     60 
     61 int
     62 next(s)
     63 	STR *s;
     64 {
     65 	int ch;
     66 
     67 	switch (s->state) {
     68 	case EOS:
     69 		return (0);
     70 	case INFINITE:
     71 		return (1);
     72 	case NORMAL:
     73 		switch (ch = *s->str) {
     74 		case '\0':
     75 			s->state = EOS;
     76 			return (0);
     77 		case '\\':
     78 			s->lastch = backslash(s);
     79 			break;
     80 		case '[':
     81 			if (bracket(s))
     82 				return (next(s));
     83 			/* FALLTHROUGH */
     84 		default:
     85 			++s->str;
     86 			s->lastch = ch;
     87 			break;
     88 		}
     89 
     90 		/* We can start a range at any time. */
     91 		if (s->str[0] == '-' && genrange(s))
     92 			return (next(s));
     93 		return (1);
     94 	case RANGE:
     95 		if (s->cnt-- == 0) {
     96 			s->state = NORMAL;
     97 			return (next(s));
     98 		}
     99 		++s->lastch;
    100 		return (1);
    101 	case SEQUENCE:
    102 		if (s->cnt-- == 0) {
    103 			s->state = NORMAL;
    104 			return (next(s));
    105 		}
    106 		return (1);
    107 	case SET:
    108 		if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
    109 			s->state = NORMAL;
    110 			return (next(s));
    111 		}
    112 		return (1);
    113 	}
    114 	/* NOTREACHED */
    115 	return (0);
    116 }
    117 
    118 static int
    119 bracket(s)
    120 	STR *s;
    121 {
    122 	char *p;
    123 
    124 	switch (s->str[1]) {
    125 	case ':':				/* "[:class:]" */
    126 		if ((p = strstr(s->str + 2, ":]")) == NULL)
    127 			return (0);
    128 		*p = '\0';
    129 		s->str += 2;
    130 		genclass(s);
    131 		s->str = p + 2;
    132 		return (1);
    133 	case '=':				/* "[=equiv=]" */
    134 		if ((p = strstr(s->str + 2, "=]")) == NULL)
    135 			return (0);
    136 		s->str += 2;
    137 		genequiv(s);
    138 		return (1);
    139 	default:				/* "[\###*n]" or "[#*n]" */
    140 		if ((p = strpbrk(s->str + 2, "*]")) == NULL)
    141 			return (0);
    142 		if (p[0] != '*' || strchr(p, ']') == NULL)
    143 			return (0);
    144 		s->str += 1;
    145 		genseq(s);
    146 		return (1);
    147 	}
    148 	/* NOTREACHED */
    149 }
    150 
    151 typedef struct {
    152 	char *name;
    153 	int (*func) __P((int));
    154 	int *set;
    155 } CLASS;
    156 
    157 static CLASS classes[] = {
    158 	{ "alnum",  isalnum,  },
    159 	{ "alpha",  isalpha,  },
    160 	{ "blank",  isblank,  },
    161 	{ "cntrl",  iscntrl,  },
    162 	{ "digit",  isdigit,  },
    163 	{ "graph",  isgraph,  },
    164 	{ "lower",  islower,  },
    165 	{ "print",  isprint,  },
    166 	{ "punct",  ispunct,  },
    167 	{ "space",  isspace,  },
    168 	{ "upper",  isupper,  },
    169 	{ "xdigit", isxdigit, },
    170 };
    171 
    172 static void
    173 genclass(s)
    174 	STR *s;
    175 {
    176 	int cnt, (*func) __P((int));
    177 	CLASS *cp, tmp;
    178 	int *p;
    179 
    180 	tmp.name = s->str;
    181 	if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
    182 	    sizeof(CLASS), sizeof(CLASS), c_class)) == NULL)
    183 		errx(1, "unknown class %s", s->str);
    184 
    185 	if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
    186 		err(1, "malloc");
    187 	memset(p, 0, (NCHARS + 1) * sizeof(int));
    188 	for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
    189 		if ((func)(cnt))
    190 			*p++ = cnt;
    191 	*p = OOBCH;
    192 
    193 	s->cnt = 0;
    194 	s->state = SET;
    195 	s->set = cp->set;
    196 }
    197 
    198 static int
    199 c_class(a, b)
    200 	const void *a, *b;
    201 {
    202 	return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name));
    203 }
    204 
    205 /*
    206  * English doesn't have any equivalence classes, so for now
    207  * we just syntax check and grab the character.
    208  */
    209 static void
    210 genequiv(s)
    211 	STR *s;
    212 {
    213 	if (*s->str == '\\') {
    214 		s->equiv[0] = backslash(s);
    215 		if (*s->str != '=')
    216 			errx(1, "misplaced equivalence equals sign");
    217 	} else {
    218 		s->equiv[0] = s->str[0];
    219 		if (s->str[1] != '=')
    220 			errx(1, "misplaced equivalence equals sign");
    221 	}
    222 	s->str += 2;
    223 	s->cnt = 0;
    224 	s->state = SET;
    225 	s->set = s->equiv;
    226 }
    227 
    228 static int
    229 genrange(s)
    230 	STR *s;
    231 {
    232 	int stopval;
    233 	char *savestart;
    234 
    235 	savestart = s->str;
    236 	stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
    237 	if (stopval < (u_char)s->lastch) {
    238 		s->str = savestart;
    239 		return (0);
    240 	}
    241 	s->cnt = stopval - s->lastch + 1;
    242 	s->state = RANGE;
    243 	--s->lastch;
    244 	return (1);
    245 }
    246 
    247 static void
    248 genseq(s)
    249 	STR *s;
    250 {
    251 	char *ep;
    252 
    253 	if (s->which == STRING1)
    254 		errx(1, "sequences only valid in string2");
    255 
    256 	if (*s->str == '\\')
    257 		s->lastch = backslash(s);
    258 	else
    259 		s->lastch = *s->str++;
    260 	if (*s->str != '*')
    261 		errx(1, "misplaced sequence asterisk");
    262 
    263 	switch (*++s->str) {
    264 	case '\\':
    265 		s->cnt = backslash(s);
    266 		break;
    267 	case ']':
    268 		s->cnt = 0;
    269 		++s->str;
    270 		break;
    271 	default:
    272 		if (isdigit(*s->str)) {
    273 			s->cnt = strtol(s->str, &ep, 0);
    274 			if (*ep == ']') {
    275 				s->str = ep + 1;
    276 				break;
    277 			}
    278 		}
    279 		errx(1, "illegal sequence count");
    280 		/* NOTREACHED */
    281 	}
    282 
    283 	s->state = s->cnt ? SEQUENCE : INFINITE;
    284 }
    285 
    286 /*
    287  * Translate \??? into a character.  Up to 3 octal digits, if no digits either
    288  * an escape code or a literal character.
    289  */
    290 static int
    291 backslash(s)
    292 	STR *s;
    293 {
    294 	int ch, cnt, val;
    295 
    296 	for (cnt = val = 0;;) {
    297 		ch = *++s->str;
    298 		if (!isascii(ch) || !isdigit(ch))
    299 			break;
    300 		val = val * 8 + ch - '0';
    301 		if (++cnt == 3) {
    302 			++s->str;
    303 			break;
    304 		}
    305 	}
    306 	if (cnt)
    307 		return (val);
    308 	if (ch != '\0')
    309 		++s->str;
    310 	switch (ch) {
    311 		case 'a':			/* escape characters */
    312 			return ('\7');
    313 		case 'b':
    314 			return ('\b');
    315 		case 'f':
    316 			return ('\f');
    317 		case 'n':
    318 			return ('\n');
    319 		case 'r':
    320 			return ('\r');
    321 		case 't':
    322 			return ('\t');
    323 		case 'v':
    324 			return ('\13');
    325 		case '\0':			/*  \" -> \ */
    326 			s->state = EOS;
    327 			return ('\\');
    328 		default:			/* \x" -> x */
    329 			return (ch);
    330 	}
    331 }
    332