Home | History | Annotate | Line # | Download | only in tr
str.c revision 1.19.8.1
      1  1.19.8.1       tls /*	$NetBSD: str.c,v 1.19.8.1 2014/08/20 00:05:05 tls Exp $	*/
      2       1.6       jtc 
      3       1.1     glass /*-
      4       1.6       jtc  * Copyright (c) 1991, 1993
      5       1.6       jtc  *	The Regents of the University of California.  All rights reserved.
      6       1.1     glass  *
      7       1.1     glass  * Redistribution and use in source and binary forms, with or without
      8       1.1     glass  * modification, are permitted provided that the following conditions
      9       1.1     glass  * are met:
     10       1.1     glass  * 1. Redistributions of source code must retain the above copyright
     11       1.1     glass  *    notice, this list of conditions and the following disclaimer.
     12       1.1     glass  * 2. Redistributions in binary form must reproduce the above copyright
     13       1.1     glass  *    notice, this list of conditions and the following disclaimer in the
     14       1.1     glass  *    documentation and/or other materials provided with the distribution.
     15      1.10       agc  * 3. Neither the name of the University nor the names of its contributors
     16       1.1     glass  *    may be used to endorse or promote products derived from this software
     17       1.1     glass  *    without specific prior written permission.
     18       1.1     glass  *
     19       1.1     glass  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20       1.1     glass  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21       1.1     glass  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22       1.1     glass  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23       1.1     glass  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24       1.1     glass  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25       1.1     glass  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26       1.1     glass  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27       1.1     glass  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28       1.1     glass  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29       1.1     glass  * SUCH DAMAGE.
     30       1.1     glass  */
     31       1.1     glass 
     32       1.8     lukem #include <sys/cdefs.h>
     33       1.1     glass #ifndef lint
     34       1.6       jtc #if 0
     35       1.7       jtc static char sccsid[] = "@(#)str.c	8.2 (Berkeley) 4/28/95";
     36       1.6       jtc #endif
     37  1.19.8.1       tls __RCSID("$NetBSD: str.c,v 1.19.8.1 2014/08/20 00:05:05 tls Exp $");
     38       1.1     glass #endif /* not lint */
     39       1.1     glass 
     40       1.1     glass #include <sys/types.h>
     41       1.1     glass 
     42       1.8     lukem #include <err.h>
     43       1.1     glass #include <errno.h>
     44       1.1     glass #include <stddef.h>
     45       1.1     glass #include <stdio.h>
     46       1.1     glass #include <stdlib.h>
     47       1.1     glass #include <string.h>
     48       1.4       jtc #include <ctype.h>
     49  1.19.8.1       tls #include <assert.h>
     50       1.1     glass 
     51       1.1     glass #include "extern.h"
     52       1.1     glass 
     53  1.19.8.1       tls struct str {
     54  1.19.8.1       tls 	enum { STRING1, STRING2 } which;
     55  1.19.8.1       tls 	enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
     56  1.19.8.1       tls 	int cnt;			/* character count */
     57  1.19.8.1       tls 	int lastch;			/* last character */
     58  1.19.8.1       tls 	int equiv[2];			/* equivalence set */
     59  1.19.8.1       tls 	int *set;			/* set of characters */
     60  1.19.8.1       tls 	const char *str;		/* user's string */
     61  1.19.8.1       tls };
     62  1.19.8.1       tls 
     63  1.19.8.1       tls static int backslash(STR *);
     64  1.19.8.1       tls static int bracket(STR *);
     65  1.19.8.1       tls static int c_class(const void *, const void *);
     66  1.19.8.1       tls static int *genclass(const char *, size_t);
     67  1.19.8.1       tls static void genequiv(STR *);
     68  1.19.8.1       tls static int genrange(STR *);
     69  1.19.8.1       tls static void genseq(STR *);
     70  1.19.8.1       tls 
     71  1.19.8.1       tls STR *
     72  1.19.8.1       tls str_create(int whichstring, const char *txt)
     73  1.19.8.1       tls {
     74  1.19.8.1       tls 	STR *s;
     75  1.19.8.1       tls 
     76  1.19.8.1       tls 	s = malloc(sizeof(*s));
     77  1.19.8.1       tls 	if (s == NULL) {
     78  1.19.8.1       tls 		err(1, "Out of memory");
     79  1.19.8.1       tls 	}
     80  1.19.8.1       tls 
     81  1.19.8.1       tls 	s->which = whichstring == 2 ? STRING2 : STRING1;
     82  1.19.8.1       tls 	s->state = NORMAL;
     83  1.19.8.1       tls 	s->cnt = 0;
     84  1.19.8.1       tls 	s->lastch = OOBCH;
     85  1.19.8.1       tls 	s->equiv[0] = 0;
     86  1.19.8.1       tls 	s->equiv[1] = OOBCH;
     87  1.19.8.1       tls 	s->set = NULL;
     88  1.19.8.1       tls 	s->str = txt;
     89  1.19.8.1       tls 
     90  1.19.8.1       tls 	return s;
     91  1.19.8.1       tls }
     92  1.19.8.1       tls 
     93  1.19.8.1       tls void
     94  1.19.8.1       tls str_destroy(STR *s)
     95  1.19.8.1       tls {
     96  1.19.8.1       tls 	if (s->set != NULL && s->set != s->equiv) {
     97  1.19.8.1       tls 		free(s->set);
     98  1.19.8.1       tls 	}
     99  1.19.8.1       tls 	free(s);
    100  1.19.8.1       tls }
    101       1.1     glass 
    102       1.1     glass int
    103  1.19.8.1       tls next(STR *s, int *ret)
    104       1.1     glass {
    105       1.8     lukem 	int ch;
    106       1.1     glass 
    107       1.1     glass 	switch (s->state) {
    108       1.1     glass 	case EOS:
    109  1.19.8.1       tls 		*ret = s->lastch;
    110      1.16  christos 		return 0;
    111       1.1     glass 	case INFINITE:
    112  1.19.8.1       tls 		*ret = s->lastch;
    113      1.16  christos 		return 1;
    114       1.1     glass 	case NORMAL:
    115  1.19.8.1       tls 		ch = (unsigned char)s->str[0];
    116  1.19.8.1       tls 		switch (ch) {
    117       1.1     glass 		case '\0':
    118       1.1     glass 			s->state = EOS;
    119  1.19.8.1       tls 			*ret = s->lastch;
    120      1.16  christos 			return 0;
    121       1.1     glass 		case '\\':
    122       1.1     glass 			s->lastch = backslash(s);
    123       1.1     glass 			break;
    124       1.1     glass 		case '[':
    125  1.19.8.1       tls 			if (bracket(s)) {
    126  1.19.8.1       tls 				return next(s, ret);
    127  1.19.8.1       tls 			}
    128       1.1     glass 			/* FALLTHROUGH */
    129       1.1     glass 		default:
    130       1.1     glass 			++s->str;
    131       1.1     glass 			s->lastch = ch;
    132       1.1     glass 			break;
    133       1.1     glass 		}
    134       1.1     glass 
    135       1.1     glass 		/* We can start a range at any time. */
    136  1.19.8.1       tls 		if (s->str[0] == '-' && genrange(s)) {
    137  1.19.8.1       tls 			return next(s, ret);
    138  1.19.8.1       tls 		}
    139  1.19.8.1       tls 		*ret = s->lastch;
    140      1.16  christos 		return 1;
    141       1.1     glass 	case RANGE:
    142  1.19.8.1       tls 		if (s->cnt == 0) {
    143       1.1     glass 			s->state = NORMAL;
    144  1.19.8.1       tls 			return next(s, ret);
    145       1.1     glass 		}
    146  1.19.8.1       tls 		s->cnt--;
    147       1.1     glass 		++s->lastch;
    148  1.19.8.1       tls 		*ret = s->lastch;
    149      1.16  christos 		return 1;
    150       1.1     glass 	case SEQUENCE:
    151  1.19.8.1       tls 		if (s->cnt == 0) {
    152       1.1     glass 			s->state = NORMAL;
    153  1.19.8.1       tls 			return next(s, ret);
    154       1.1     glass 		}
    155  1.19.8.1       tls 		s->cnt--;
    156  1.19.8.1       tls 		*ret = s->lastch;
    157      1.16  christos 		return 1;
    158       1.1     glass 	case SET:
    159  1.19.8.1       tls 		s->lastch = s->set[s->cnt++];
    160  1.19.8.1       tls 		if (s->lastch == OOBCH) {
    161       1.1     glass 			s->state = NORMAL;
    162  1.19.8.1       tls 			if (s->set != s->equiv) {
    163  1.19.8.1       tls 				free(s->set);
    164  1.19.8.1       tls 			}
    165  1.19.8.1       tls 			s->set = NULL;
    166  1.19.8.1       tls 			return next(s, ret);
    167       1.1     glass 		}
    168  1.19.8.1       tls 		*ret = s->lastch;
    169      1.16  christos 		return 1;
    170       1.1     glass 	}
    171       1.1     glass 	/* NOTREACHED */
    172  1.19.8.1       tls 	assert(0);
    173  1.19.8.1       tls 	*ret = s->lastch;
    174      1.16  christos 	return 0;
    175       1.1     glass }
    176       1.1     glass 
    177       1.1     glass static int
    178      1.13     joerg bracket(STR *s)
    179       1.1     glass {
    180  1.19.8.1       tls 	const char *p;
    181  1.19.8.1       tls 	int *q;
    182       1.1     glass 
    183       1.1     glass 	switch (s->str[1]) {
    184       1.1     glass 	case ':':				/* "[:class:]" */
    185       1.1     glass 		if ((p = strstr(s->str + 2, ":]")) == NULL)
    186      1.16  christos 			return 0;
    187       1.1     glass 		s->str += 2;
    188  1.19.8.1       tls 		q = genclass(s->str, p - s->str);
    189  1.19.8.1       tls 		s->state = SET;
    190  1.19.8.1       tls 		s->set = q;
    191  1.19.8.1       tls 		s->cnt = 0;
    192       1.1     glass 		s->str = p + 2;
    193      1.16  christos 		return 1;
    194       1.1     glass 	case '=':				/* "[=equiv=]" */
    195       1.1     glass 		if ((p = strstr(s->str + 2, "=]")) == NULL)
    196      1.16  christos 			return 0;
    197       1.1     glass 		s->str += 2;
    198       1.1     glass 		genequiv(s);
    199  1.19.8.1       tls 		s->str = p + 2;
    200      1.16  christos 		return 1;
    201       1.1     glass 	default:				/* "[\###*n]" or "[#*n]" */
    202       1.1     glass 		if ((p = strpbrk(s->str + 2, "*]")) == NULL)
    203      1.16  christos 			return 0;
    204       1.8     lukem 		if (p[0] != '*' || strchr(p, ']') == NULL)
    205      1.16  christos 			return 0;
    206       1.1     glass 		s->str += 1;
    207       1.1     glass 		genseq(s);
    208      1.16  christos 		return 1;
    209       1.1     glass 	}
    210       1.1     glass 	/* NOTREACHED */
    211       1.1     glass }
    212       1.1     glass 
    213       1.1     glass typedef struct {
    214      1.12     lukem 	const char *name;
    215      1.13     joerg 	int (*func)(int);
    216       1.1     glass } CLASS;
    217       1.1     glass 
    218      1.16  christos static const CLASS classes[] = {
    219      1.16  christos 	{ "alnum",  isalnum  },
    220      1.16  christos 	{ "alpha",  isalpha  },
    221      1.16  christos 	{ "blank",  isblank  },
    222      1.16  christos 	{ "cntrl",  iscntrl  },
    223      1.16  christos 	{ "digit",  isdigit  },
    224      1.16  christos 	{ "graph",  isgraph  },
    225      1.16  christos 	{ "lower",  islower  },
    226      1.16  christos 	{ "print",  isprint  },
    227      1.16  christos 	{ "punct",  ispunct  },
    228      1.16  christos 	{ "space",  isspace  },
    229      1.16  christos 	{ "upper",  isupper  },
    230      1.16  christos 	{ "xdigit", isxdigit },
    231       1.1     glass };
    232       1.1     glass 
    233  1.19.8.1       tls typedef struct {
    234  1.19.8.1       tls 	const char *name;
    235  1.19.8.1       tls 	size_t len;
    236  1.19.8.1       tls } CLASSKEY;
    237  1.19.8.1       tls 
    238  1.19.8.1       tls static int *
    239  1.19.8.1       tls genclass(const char *class, size_t len)
    240       1.1     glass {
    241  1.19.8.1       tls 	int ch;
    242      1.16  christos 	const CLASS *cp;
    243  1.19.8.1       tls 	CLASSKEY key;
    244       1.1     glass 	int *p;
    245  1.19.8.1       tls 	unsigned pos, num;
    246       1.1     glass 
    247  1.19.8.1       tls 	/* Find the class */
    248  1.19.8.1       tls 	key.name = class;
    249  1.19.8.1       tls 	key.len = len;
    250  1.19.8.1       tls 	cp = bsearch(&key, classes, __arraycount(classes), sizeof(classes[0]),
    251  1.19.8.1       tls 		     c_class);
    252  1.19.8.1       tls 	if (cp == NULL) {
    253  1.19.8.1       tls 		errx(1, "unknown class %.*s", (int)len, class);
    254  1.19.8.1       tls 	}
    255       1.1     glass 
    256  1.19.8.1       tls 	/*
    257  1.19.8.1       tls 	 * Figure out what characters are in the class
    258  1.19.8.1       tls 	 */
    259  1.19.8.1       tls 
    260  1.19.8.1       tls 	num = NCHARS + 1;
    261  1.19.8.1       tls 	p = malloc(num * sizeof(*p));
    262  1.19.8.1       tls 	if (p == NULL) {
    263       1.8     lukem 		err(1, "malloc");
    264  1.19.8.1       tls 	}
    265  1.19.8.1       tls 
    266  1.19.8.1       tls 	pos = 0;
    267  1.19.8.1       tls 	for (ch = 0; ch < NCHARS; ch++) {
    268  1.19.8.1       tls 		if (cp->func(ch)) {
    269  1.19.8.1       tls 			p[pos++] = ch;
    270  1.19.8.1       tls 		}
    271  1.19.8.1       tls 	}
    272      1.19  christos 
    273  1.19.8.1       tls 	p[pos++] = OOBCH;
    274  1.19.8.1       tls 	for (; pos < num; pos++) {
    275  1.19.8.1       tls 		p[pos] = 0;
    276  1.19.8.1       tls 	}
    277       1.1     glass 
    278  1.19.8.1       tls 	return p;
    279       1.1     glass }
    280       1.1     glass 
    281       1.1     glass static int
    282  1.19.8.1       tls c_class(const void *av, const void *bv)
    283       1.1     glass {
    284  1.19.8.1       tls 	const CLASSKEY *a = av;
    285  1.19.8.1       tls 	const CLASS *b = bv;
    286  1.19.8.1       tls 	size_t blen;
    287  1.19.8.1       tls 	int r;
    288  1.19.8.1       tls 
    289  1.19.8.1       tls 	blen = strlen(b->name);
    290  1.19.8.1       tls 	r = strncmp(a->name, b->name, a->len);
    291  1.19.8.1       tls 	if (r != 0) {
    292  1.19.8.1       tls 		return r;
    293  1.19.8.1       tls 	}
    294  1.19.8.1       tls 	if (a->len < blen) {
    295  1.19.8.1       tls 		/* someone gave us a prefix of the right name */
    296  1.19.8.1       tls 		return -1;
    297  1.19.8.1       tls 	}
    298  1.19.8.1       tls 	assert(a-> len == blen);
    299  1.19.8.1       tls 	return 0;
    300       1.1     glass }
    301       1.1     glass 
    302       1.1     glass /*
    303       1.1     glass  * English doesn't have any equivalence classes, so for now
    304       1.1     glass  * we just syntax check and grab the character.
    305       1.1     glass  */
    306       1.1     glass static void
    307      1.13     joerg genequiv(STR *s)
    308       1.1     glass {
    309  1.19.8.1       tls 	int ch;
    310  1.19.8.1       tls 
    311  1.19.8.1       tls 	ch = (unsigned char)s->str[0];
    312  1.19.8.1       tls 	if (ch == '\\') {
    313       1.1     glass 		s->equiv[0] = backslash(s);
    314       1.1     glass 	} else {
    315  1.19.8.1       tls 		s->equiv[0] = ch;
    316  1.19.8.1       tls 		s->str++;
    317  1.19.8.1       tls 	}
    318  1.19.8.1       tls 	if (s->str[0] != '=') {
    319  1.19.8.1       tls 		errx(1, "Misplaced equivalence equals sign");
    320       1.1     glass 	}
    321  1.19.8.1       tls 	s->str++;
    322  1.19.8.1       tls 	if (s->str[0] != ']') {
    323  1.19.8.1       tls 		errx(1, "Misplaced equivalence right bracket");
    324  1.19.8.1       tls 	}
    325  1.19.8.1       tls 	s->str++;
    326  1.19.8.1       tls 
    327       1.1     glass 	s->cnt = 0;
    328       1.1     glass 	s->state = SET;
    329       1.1     glass 	s->set = s->equiv;
    330       1.1     glass }
    331       1.1     glass 
    332       1.1     glass static int
    333      1.13     joerg genrange(STR *s)
    334       1.1     glass {
    335       1.1     glass 	int stopval;
    336  1.19.8.1       tls 	const char *savestart;
    337       1.1     glass 
    338  1.19.8.1       tls 	savestart = s->str++;
    339  1.19.8.1       tls 	stopval = s->str[0] == '\\' ? backslash(s) : (unsigned char)*s->str++;
    340  1.19.8.1       tls 	if (stopval < (unsigned char)s->lastch) {
    341       1.1     glass 		s->str = savestart;
    342      1.16  christos 		return 0;
    343       1.1     glass 	}
    344       1.1     glass 	s->cnt = stopval - s->lastch + 1;
    345       1.1     glass 	s->state = RANGE;
    346       1.1     glass 	--s->lastch;
    347      1.16  christos 	return 1;
    348       1.1     glass }
    349       1.1     glass 
    350       1.1     glass static void
    351      1.13     joerg genseq(STR *s)
    352       1.1     glass {
    353       1.1     glass 	char *ep;
    354       1.1     glass 
    355  1.19.8.1       tls 	if (s->which == STRING1) {
    356  1.19.8.1       tls 		errx(1, "Sequences only valid in string2");
    357  1.19.8.1       tls 	}
    358       1.1     glass 
    359  1.19.8.1       tls 	if (*s->str == '\\') {
    360       1.1     glass 		s->lastch = backslash(s);
    361  1.19.8.1       tls 	} else {
    362  1.19.8.1       tls 		s->lastch = (unsigned char)*s->str++;
    363  1.19.8.1       tls 	}
    364  1.19.8.1       tls 	if (*s->str != '*') {
    365  1.19.8.1       tls 		errx(1, "Misplaced sequence asterisk");
    366  1.19.8.1       tls 	}
    367       1.1     glass 
    368  1.19.8.1       tls 	s->str++;
    369  1.19.8.1       tls 	switch (s->str[0]) {
    370       1.1     glass 	case '\\':
    371       1.1     glass 		s->cnt = backslash(s);
    372       1.1     glass 		break;
    373       1.1     glass 	case ']':
    374       1.1     glass 		s->cnt = 0;
    375       1.1     glass 		++s->str;
    376       1.1     glass 		break;
    377       1.1     glass 	default:
    378  1.19.8.1       tls 		if (isdigit((unsigned char)s->str[0])) {
    379       1.1     glass 			s->cnt = strtol(s->str, &ep, 0);
    380       1.1     glass 			if (*ep == ']') {
    381       1.1     glass 				s->str = ep + 1;
    382       1.1     glass 				break;
    383       1.1     glass 			}
    384       1.1     glass 		}
    385       1.8     lukem 		errx(1, "illegal sequence count");
    386       1.1     glass 		/* NOTREACHED */
    387       1.1     glass 	}
    388       1.1     glass 
    389       1.1     glass 	s->state = s->cnt ? SEQUENCE : INFINITE;
    390       1.1     glass }
    391       1.1     glass 
    392       1.1     glass /*
    393       1.1     glass  * Translate \??? into a character.  Up to 3 octal digits, if no digits either
    394       1.1     glass  * an escape code or a literal character.
    395       1.1     glass  */
    396       1.1     glass static int
    397      1.13     joerg backslash(STR *s)
    398       1.1     glass {
    399       1.8     lukem 	int ch, cnt, val;
    400       1.1     glass 
    401  1.19.8.1       tls 	cnt = val = 0;
    402  1.19.8.1       tls 	for (;;) {
    403  1.19.8.1       tls 		/* Consume the character we're already on. */
    404  1.19.8.1       tls 		s->str++;
    405  1.19.8.1       tls 
    406  1.19.8.1       tls 		/* Look at the next character. */
    407  1.19.8.1       tls 		ch = (unsigned char)s->str[0];
    408  1.19.8.1       tls 		if (!isascii(ch) || !isdigit(ch)) {
    409       1.1     glass 			break;
    410  1.19.8.1       tls 		}
    411       1.1     glass 		val = val * 8 + ch - '0';
    412       1.1     glass 		if (++cnt == 3) {
    413  1.19.8.1       tls 			/* Enough digits; consume this one and stop */
    414       1.1     glass 			++s->str;
    415       1.1     glass 			break;
    416       1.1     glass 		}
    417       1.1     glass 	}
    418  1.19.8.1       tls 	if (cnt) {
    419  1.19.8.1       tls 		/* We saw digits, so return their value */
    420      1.16  christos 		return val;
    421  1.19.8.1       tls 	}
    422  1.19.8.1       tls 	if (ch == '\0') {
    423  1.19.8.1       tls 		/* \<end> -> \ */
    424  1.19.8.1       tls 		s->state = EOS;
    425  1.19.8.1       tls 		return '\\';
    426  1.19.8.1       tls 	}
    427  1.19.8.1       tls 
    428  1.19.8.1       tls 	/* Consume the escaped character */
    429  1.19.8.1       tls 	s->str++;
    430  1.19.8.1       tls 
    431       1.1     glass 	switch (ch) {
    432      1.17  christos 	case 'a':			/* escape characters */
    433      1.17  christos 		return '\7';
    434      1.17  christos 	case 'b':
    435      1.17  christos 		return '\b';
    436      1.17  christos 	case 'e':
    437      1.17  christos 		return '\033';
    438      1.17  christos 	case 'f':
    439      1.17  christos 		return '\f';
    440      1.17  christos 	case 'n':
    441      1.17  christos 		return '\n';
    442      1.17  christos 	case 'r':
    443      1.17  christos 		return '\r';
    444      1.17  christos 	case 't':
    445      1.17  christos 		return '\t';
    446      1.17  christos 	case 'v':
    447      1.17  christos 		return '\13';
    448  1.19.8.1       tls 	default:			/* \q -> q */
    449      1.17  christos 		return ch;
    450       1.1     glass 	}
    451       1.1     glass }
    452