Home | History | Annotate | Line # | Download | only in tr
str.c revision 1.29.24.1
      1  1.29.24.1  pgoyette /*	$NetBSD: str.c,v 1.29.24.1 2018/06/25 07:26:11 pgoyette Exp $	*/
      2        1.6       jtc 
      3        1.1     glass /*-
      4        1.6       jtc  * Copyright (c) 1991, 1993
      5        1.6       jtc  *	The Regents of the University of California.  All rights reserved.
      6        1.1     glass  *
      7        1.1     glass  * Redistribution and use in source and binary forms, with or without
      8        1.1     glass  * modification, are permitted provided that the following conditions
      9        1.1     glass  * are met:
     10        1.1     glass  * 1. Redistributions of source code must retain the above copyright
     11        1.1     glass  *    notice, this list of conditions and the following disclaimer.
     12        1.1     glass  * 2. Redistributions in binary form must reproduce the above copyright
     13        1.1     glass  *    notice, this list of conditions and the following disclaimer in the
     14        1.1     glass  *    documentation and/or other materials provided with the distribution.
     15       1.10       agc  * 3. Neither the name of the University nor the names of its contributors
     16        1.1     glass  *    may be used to endorse or promote products derived from this software
     17        1.1     glass  *    without specific prior written permission.
     18        1.1     glass  *
     19        1.1     glass  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20        1.1     glass  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21        1.1     glass  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22        1.1     glass  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23        1.1     glass  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24        1.1     glass  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25        1.1     glass  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26        1.1     glass  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27        1.1     glass  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28        1.1     glass  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29        1.1     glass  * SUCH DAMAGE.
     30        1.1     glass  */
     31        1.1     glass 
     32        1.8     lukem #include <sys/cdefs.h>
     33        1.1     glass #ifndef lint
     34        1.6       jtc #if 0
     35        1.7       jtc static char sccsid[] = "@(#)str.c	8.2 (Berkeley) 4/28/95";
     36        1.6       jtc #endif
     37  1.29.24.1  pgoyette __RCSID("$NetBSD: str.c,v 1.29.24.1 2018/06/25 07:26:11 pgoyette Exp $");
     38        1.1     glass #endif /* not lint */
     39        1.1     glass 
     40        1.1     glass #include <sys/types.h>
     41        1.1     glass 
     42        1.8     lukem #include <err.h>
     43        1.1     glass #include <errno.h>
     44        1.1     glass #include <stddef.h>
     45        1.1     glass #include <stdio.h>
     46        1.1     glass #include <stdlib.h>
     47        1.1     glass #include <string.h>
     48        1.4       jtc #include <ctype.h>
     49       1.20  dholland #include <assert.h>
     50        1.1     glass 
     51        1.1     glass #include "extern.h"
     52        1.1     glass 
     53       1.21  dholland struct str {
     54       1.21  dholland 	enum { STRING1, STRING2 } which;
     55       1.21  dholland 	enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
     56       1.29  dholland 	int cnt;			/* character count */
     57       1.29  dholland 	int lastch;			/* last character */
     58       1.29  dholland 	int equiv[2];			/* equivalence set */
     59       1.29  dholland 	int *set;			/* set of characters */
     60       1.25  dholland 	const char *str;		/* user's string */
     61       1.21  dholland };
     62       1.21  dholland 
     63       1.29  dholland static int backslash(STR *);
     64       1.29  dholland static int bracket(STR *);
     65       1.29  dholland static int c_class(const void *, const void *);
     66       1.27  dholland static int *genclass(const char *, size_t);
     67       1.29  dholland static void genequiv(STR *);
     68       1.29  dholland static int genrange(STR *);
     69       1.29  dholland static void genseq(STR *);
     70        1.1     glass 
     71       1.21  dholland STR *
     72       1.23  dholland str_create(int whichstring, const char *txt)
     73       1.21  dholland {
     74       1.21  dholland 	STR *s;
     75       1.21  dholland 
     76       1.21  dholland 	s = malloc(sizeof(*s));
     77       1.21  dholland 	if (s == NULL) {
     78       1.21  dholland 		err(1, "Out of memory");
     79       1.21  dholland 	}
     80       1.21  dholland 
     81       1.21  dholland 	s->which = whichstring == 2 ? STRING2 : STRING1;
     82       1.21  dholland 	s->state = NORMAL;
     83       1.21  dholland 	s->cnt = 0;
     84       1.21  dholland 	s->lastch = OOBCH;
     85       1.21  dholland 	s->equiv[0] = 0;
     86       1.21  dholland 	s->equiv[1] = OOBCH;
     87       1.21  dholland 	s->set = NULL;
     88       1.23  dholland 	s->str = txt;
     89       1.21  dholland 
     90       1.21  dholland 	return s;
     91       1.21  dholland }
     92       1.21  dholland 
     93       1.21  dholland void
     94       1.21  dholland str_destroy(STR *s)
     95       1.21  dholland {
     96       1.21  dholland 	if (s->set != NULL && s->set != s->equiv) {
     97       1.21  dholland 		free(s->set);
     98       1.21  dholland 	}
     99       1.21  dholland 	free(s);
    100       1.21  dholland }
    101       1.21  dholland 
    102        1.1     glass int
    103       1.20  dholland next(STR *s, int *ret)
    104        1.1     glass {
    105        1.8     lukem 	int ch;
    106        1.1     glass 
    107        1.1     glass 	switch (s->state) {
    108        1.1     glass 	case EOS:
    109       1.20  dholland 		*ret = s->lastch;
    110       1.16  christos 		return 0;
    111        1.1     glass 	case INFINITE:
    112       1.20  dholland 		*ret = s->lastch;
    113       1.16  christos 		return 1;
    114        1.1     glass 	case NORMAL:
    115       1.26  dholland 		ch = (unsigned char)s->str[0];
    116       1.26  dholland 		switch (ch) {
    117        1.1     glass 		case '\0':
    118        1.1     glass 			s->state = EOS;
    119       1.20  dholland 			*ret = s->lastch;
    120       1.16  christos 			return 0;
    121        1.1     glass 		case '\\':
    122        1.1     glass 			s->lastch = backslash(s);
    123        1.1     glass 			break;
    124        1.1     glass 		case '[':
    125       1.26  dholland 			if (bracket(s)) {
    126       1.20  dholland 				return next(s, ret);
    127       1.26  dholland 			}
    128        1.1     glass 			/* FALLTHROUGH */
    129        1.1     glass 		default:
    130        1.1     glass 			++s->str;
    131        1.1     glass 			s->lastch = ch;
    132        1.1     glass 			break;
    133        1.1     glass 		}
    134        1.1     glass 
    135        1.1     glass 		/* We can start a range at any time. */
    136       1.20  dholland 		if (s->str[0] == '-' && genrange(s)) {
    137       1.20  dholland 			return next(s, ret);
    138       1.20  dholland 		}
    139       1.20  dholland 		*ret = s->lastch;
    140       1.16  christos 		return 1;
    141        1.1     glass 	case RANGE:
    142       1.26  dholland 		if (s->cnt == 0) {
    143        1.1     glass 			s->state = NORMAL;
    144       1.20  dholland 			return next(s, ret);
    145        1.1     glass 		}
    146       1.26  dholland 		s->cnt--;
    147        1.1     glass 		++s->lastch;
    148       1.20  dholland 		*ret = s->lastch;
    149       1.16  christos 		return 1;
    150        1.1     glass 	case SEQUENCE:
    151       1.26  dholland 		if (s->cnt == 0) {
    152        1.1     glass 			s->state = NORMAL;
    153       1.20  dholland 			return next(s, ret);
    154        1.1     glass 		}
    155       1.26  dholland 		s->cnt--;
    156       1.20  dholland 		*ret = s->lastch;
    157       1.16  christos 		return 1;
    158        1.1     glass 	case SET:
    159       1.26  dholland 		s->lastch = s->set[s->cnt++];
    160       1.26  dholland 		if (s->lastch == OOBCH) {
    161        1.1     glass 			s->state = NORMAL;
    162       1.26  dholland 			if (s->set != s->equiv) {
    163       1.26  dholland 				free(s->set);
    164       1.26  dholland 			}
    165       1.26  dholland 			s->set = NULL;
    166       1.20  dholland 			return next(s, ret);
    167        1.1     glass 		}
    168       1.20  dholland 		*ret = s->lastch;
    169       1.16  christos 		return 1;
    170        1.1     glass 	}
    171        1.1     glass 	/* NOTREACHED */
    172       1.20  dholland 	assert(0);
    173       1.20  dholland 	*ret = s->lastch;
    174       1.16  christos 	return 0;
    175        1.1     glass }
    176        1.1     glass 
    177        1.1     glass static int
    178       1.13     joerg bracket(STR *s)
    179        1.1     glass {
    180       1.26  dholland 	const char *p;
    181       1.27  dholland 	int *q;
    182        1.1     glass 
    183        1.1     glass 	switch (s->str[1]) {
    184        1.1     glass 	case ':':				/* "[:class:]" */
    185        1.1     glass 		if ((p = strstr(s->str + 2, ":]")) == NULL)
    186       1.16  christos 			return 0;
    187        1.1     glass 		s->str += 2;
    188       1.27  dholland 		q = genclass(s->str, p - s->str);
    189       1.27  dholland 		s->state = SET;
    190       1.27  dholland 		s->set = q;
    191       1.27  dholland 		s->cnt = 0;
    192        1.1     glass 		s->str = p + 2;
    193       1.16  christos 		return 1;
    194        1.1     glass 	case '=':				/* "[=equiv=]" */
    195        1.1     glass 		if ((p = strstr(s->str + 2, "=]")) == NULL)
    196       1.16  christos 			return 0;
    197        1.1     glass 		s->str += 2;
    198        1.1     glass 		genequiv(s);
    199       1.28  dholland 		s->str = p + 2;
    200       1.16  christos 		return 1;
    201        1.1     glass 	default:				/* "[\###*n]" or "[#*n]" */
    202        1.1     glass 		if ((p = strpbrk(s->str + 2, "*]")) == NULL)
    203       1.16  christos 			return 0;
    204        1.8     lukem 		if (p[0] != '*' || strchr(p, ']') == NULL)
    205       1.16  christos 			return 0;
    206        1.1     glass 		s->str += 1;
    207        1.1     glass 		genseq(s);
    208       1.16  christos 		return 1;
    209        1.1     glass 	}
    210        1.1     glass 	/* NOTREACHED */
    211        1.1     glass }
    212        1.1     glass 
    213        1.1     glass typedef struct {
    214       1.12     lukem 	const char *name;
    215       1.13     joerg 	int (*func)(int);
    216        1.1     glass } CLASS;
    217        1.1     glass 
    218       1.16  christos static const CLASS classes[] = {
    219       1.16  christos 	{ "alnum",  isalnum  },
    220       1.16  christos 	{ "alpha",  isalpha  },
    221       1.16  christos 	{ "blank",  isblank  },
    222       1.16  christos 	{ "cntrl",  iscntrl  },
    223       1.16  christos 	{ "digit",  isdigit  },
    224       1.16  christos 	{ "graph",  isgraph  },
    225       1.16  christos 	{ "lower",  islower  },
    226       1.16  christos 	{ "print",  isprint  },
    227       1.16  christos 	{ "punct",  ispunct  },
    228       1.16  christos 	{ "space",  isspace  },
    229       1.16  christos 	{ "upper",  isupper  },
    230       1.16  christos 	{ "xdigit", isxdigit },
    231        1.1     glass };
    232        1.1     glass 
    233       1.26  dholland typedef struct {
    234       1.26  dholland 	const char *name;
    235       1.26  dholland 	size_t len;
    236       1.26  dholland } CLASSKEY;
    237       1.26  dholland 
    238       1.27  dholland static int *
    239       1.27  dholland genclass(const char *class, size_t len)
    240        1.1     glass {
    241       1.26  dholland 	int ch;
    242       1.16  christos 	const CLASS *cp;
    243       1.26  dholland 	CLASSKEY key;
    244        1.1     glass 	int *p;
    245       1.26  dholland 	unsigned pos, num;
    246        1.1     glass 
    247       1.26  dholland 	/* Find the class */
    248       1.26  dholland 	key.name = class;
    249       1.26  dholland 	key.len = len;
    250       1.26  dholland 	cp = bsearch(&key, classes, __arraycount(classes), sizeof(classes[0]),
    251       1.26  dholland 		     c_class);
    252       1.26  dholland 	if (cp == NULL) {
    253       1.26  dholland 		errx(1, "unknown class %.*s", (int)len, class);
    254       1.26  dholland 	}
    255        1.1     glass 
    256       1.26  dholland 	/*
    257       1.26  dholland 	 * Figure out what characters are in the class
    258       1.26  dholland 	 */
    259       1.26  dholland 
    260       1.26  dholland 	num = NCHARS + 1;
    261       1.26  dholland 	p = malloc(num * sizeof(*p));
    262       1.26  dholland 	if (p == NULL) {
    263        1.8     lukem 		err(1, "malloc");
    264       1.26  dholland 	}
    265       1.26  dholland 
    266       1.26  dholland 	pos = 0;
    267       1.26  dholland 	for (ch = 0; ch < NCHARS; ch++) {
    268       1.26  dholland 		if (cp->func(ch)) {
    269       1.26  dholland 			p[pos++] = ch;
    270       1.26  dholland 		}
    271       1.26  dholland 	}
    272       1.26  dholland 
    273       1.26  dholland 	p[pos++] = OOBCH;
    274       1.26  dholland 	for (; pos < num; pos++) {
    275       1.26  dholland 		p[pos] = 0;
    276       1.26  dholland 	}
    277       1.19  christos 
    278       1.27  dholland 	return p;
    279        1.1     glass }
    280        1.1     glass 
    281        1.1     glass static int
    282       1.26  dholland c_class(const void *av, const void *bv)
    283        1.1     glass {
    284       1.26  dholland 	const CLASSKEY *a = av;
    285       1.26  dholland 	const CLASS *b = bv;
    286       1.26  dholland 	size_t blen;
    287       1.26  dholland 	int r;
    288       1.26  dholland 
    289       1.26  dholland 	blen = strlen(b->name);
    290       1.26  dholland 	r = strncmp(a->name, b->name, a->len);
    291       1.26  dholland 	if (r != 0) {
    292       1.26  dholland 		return r;
    293       1.26  dholland 	}
    294       1.26  dholland 	if (a->len < blen) {
    295       1.26  dholland 		/* someone gave us a prefix of the right name */
    296       1.26  dholland 		return -1;
    297       1.26  dholland 	}
    298       1.26  dholland 	assert(a-> len == blen);
    299       1.26  dholland 	return 0;
    300        1.1     glass }
    301        1.1     glass 
    302        1.1     glass /*
    303        1.1     glass  * English doesn't have any equivalence classes, so for now
    304        1.1     glass  * we just syntax check and grab the character.
    305        1.1     glass  */
    306        1.1     glass static void
    307       1.13     joerg genequiv(STR *s)
    308        1.1     glass {
    309       1.27  dholland 	int ch;
    310       1.27  dholland 
    311       1.27  dholland 	ch = (unsigned char)s->str[0];
    312       1.27  dholland 	if (ch == '\\') {
    313        1.1     glass 		s->equiv[0] = backslash(s);
    314        1.1     glass 	} else {
    315       1.27  dholland 		s->equiv[0] = ch;
    316       1.28  dholland 		s->str++;
    317       1.28  dholland 	}
    318       1.28  dholland 	if (s->str[0] != '=') {
    319       1.28  dholland 		errx(1, "Misplaced equivalence equals sign");
    320       1.28  dholland 	}
    321       1.28  dholland 	s->str++;
    322       1.28  dholland 	if (s->str[0] != ']') {
    323       1.28  dholland 		errx(1, "Misplaced equivalence right bracket");
    324        1.1     glass 	}
    325       1.28  dholland 	s->str++;
    326       1.28  dholland 
    327        1.1     glass 	s->cnt = 0;
    328        1.1     glass 	s->state = SET;
    329        1.1     glass 	s->set = s->equiv;
    330        1.1     glass }
    331        1.1     glass 
    332        1.1     glass static int
    333       1.13     joerg genrange(STR *s)
    334        1.1     glass {
    335        1.1     glass 	int stopval;
    336       1.22  dholland 	const char *savestart;
    337        1.1     glass 
    338       1.24  dholland 	savestart = s->str++;
    339       1.26  dholland 	stopval = s->str[0] == '\\' ? backslash(s) : (unsigned char)*s->str++;
    340       1.26  dholland 	if (stopval < (unsigned char)s->lastch) {
    341        1.1     glass 		s->str = savestart;
    342       1.16  christos 		return 0;
    343        1.1     glass 	}
    344        1.1     glass 	s->cnt = stopval - s->lastch + 1;
    345        1.1     glass 	s->state = RANGE;
    346        1.1     glass 	--s->lastch;
    347       1.16  christos 	return 1;
    348        1.1     glass }
    349        1.1     glass 
    350        1.1     glass static void
    351       1.13     joerg genseq(STR *s)
    352        1.1     glass {
    353        1.1     glass 	char *ep;
    354        1.1     glass 
    355       1.26  dholland 	if (s->which == STRING1) {
    356       1.26  dholland 		errx(1, "Sequences only valid in string2");
    357       1.26  dholland 	}
    358        1.1     glass 
    359       1.26  dholland 	if (*s->str == '\\') {
    360        1.1     glass 		s->lastch = backslash(s);
    361       1.26  dholland 	} else {
    362       1.25  dholland 		s->lastch = (unsigned char)*s->str++;
    363       1.26  dholland 	}
    364       1.26  dholland 	if (*s->str != '*') {
    365       1.26  dholland 		errx(1, "Misplaced sequence asterisk");
    366       1.26  dholland 	}
    367        1.1     glass 
    368       1.26  dholland 	s->str++;
    369       1.26  dholland 	switch (s->str[0]) {
    370        1.1     glass 	case '\\':
    371        1.1     glass 		s->cnt = backslash(s);
    372        1.1     glass 		break;
    373        1.1     glass 	case ']':
    374        1.1     glass 		s->cnt = 0;
    375        1.1     glass 		++s->str;
    376        1.1     glass 		break;
    377        1.1     glass 	default:
    378       1.26  dholland 		if (isdigit((unsigned char)s->str[0])) {
    379        1.1     glass 			s->cnt = strtol(s->str, &ep, 0);
    380        1.1     glass 			if (*ep == ']') {
    381        1.1     glass 				s->str = ep + 1;
    382        1.1     glass 				break;
    383        1.1     glass 			}
    384        1.1     glass 		}
    385        1.8     lukem 		errx(1, "illegal sequence count");
    386        1.1     glass 		/* NOTREACHED */
    387        1.1     glass 	}
    388        1.1     glass 
    389        1.1     glass 	s->state = s->cnt ? SEQUENCE : INFINITE;
    390        1.1     glass }
    391        1.1     glass 
    392        1.1     glass /*
    393        1.1     glass  * Translate \??? into a character.  Up to 3 octal digits, if no digits either
    394        1.1     glass  * an escape code or a literal character.
    395        1.1     glass  */
    396        1.1     glass static int
    397       1.13     joerg backslash(STR *s)
    398        1.1     glass {
    399        1.8     lukem 	int ch, cnt, val;
    400        1.1     glass 
    401       1.27  dholland 	cnt = val = 0;
    402       1.27  dholland 	for (;;) {
    403       1.27  dholland 		/* Consume the character we're already on. */
    404       1.26  dholland 		s->str++;
    405       1.27  dholland 
    406       1.27  dholland 		/* Look at the next character. */
    407       1.26  dholland 		ch = (unsigned char)s->str[0];
    408       1.26  dholland 		if (!isascii(ch) || !isdigit(ch)) {
    409        1.1     glass 			break;
    410       1.26  dholland 		}
    411        1.1     glass 		val = val * 8 + ch - '0';
    412        1.1     glass 		if (++cnt == 3) {
    413       1.27  dholland 			/* Enough digits; consume this one and stop */
    414        1.1     glass 			++s->str;
    415        1.1     glass 			break;
    416        1.1     glass 		}
    417        1.1     glass 	}
    418       1.26  dholland 	if (cnt) {
    419       1.27  dholland 		/* We saw digits, so return their value */
    420  1.29.24.1  pgoyette 		if (val >= OOBCH)
    421  1.29.24.1  pgoyette 			errx(1, "Invalid octal character value");
    422       1.16  christos 		return val;
    423       1.26  dholland 	}
    424       1.27  dholland 	if (ch == '\0') {
    425       1.27  dholland 		/* \<end> -> \ */
    426       1.27  dholland 		s->state = EOS;
    427       1.27  dholland 		return '\\';
    428       1.26  dholland 	}
    429       1.27  dholland 
    430       1.27  dholland 	/* Consume the escaped character */
    431       1.27  dholland 	s->str++;
    432       1.27  dholland 
    433        1.1     glass 	switch (ch) {
    434       1.17  christos 	case 'a':			/* escape characters */
    435       1.17  christos 		return '\7';
    436       1.17  christos 	case 'b':
    437       1.17  christos 		return '\b';
    438       1.17  christos 	case 'e':
    439       1.17  christos 		return '\033';
    440       1.17  christos 	case 'f':
    441       1.17  christos 		return '\f';
    442       1.17  christos 	case 'n':
    443       1.17  christos 		return '\n';
    444       1.17  christos 	case 'r':
    445       1.17  christos 		return '\r';
    446       1.17  christos 	case 't':
    447       1.17  christos 		return '\t';
    448       1.17  christos 	case 'v':
    449       1.17  christos 		return '\13';
    450       1.27  dholland 	default:			/* \q -> q */
    451       1.17  christos 		return ch;
    452        1.1     glass 	}
    453        1.1     glass }
    454