str.c revision 1.19.8.1 1 1.19.8.1 tls /* $NetBSD: str.c,v 1.19.8.1 2014/08/20 00:05:05 tls Exp $ */
2 1.6 jtc
3 1.1 glass /*-
4 1.6 jtc * Copyright (c) 1991, 1993
5 1.6 jtc * The Regents of the University of California. All rights reserved.
6 1.1 glass *
7 1.1 glass * Redistribution and use in source and binary forms, with or without
8 1.1 glass * modification, are permitted provided that the following conditions
9 1.1 glass * are met:
10 1.1 glass * 1. Redistributions of source code must retain the above copyright
11 1.1 glass * notice, this list of conditions and the following disclaimer.
12 1.1 glass * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 glass * notice, this list of conditions and the following disclaimer in the
14 1.1 glass * documentation and/or other materials provided with the distribution.
15 1.10 agc * 3. Neither the name of the University nor the names of its contributors
16 1.1 glass * may be used to endorse or promote products derived from this software
17 1.1 glass * without specific prior written permission.
18 1.1 glass *
19 1.1 glass * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 1.1 glass * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 1.1 glass * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 1.1 glass * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 1.1 glass * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 1.1 glass * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 1.1 glass * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 1.1 glass * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 1.1 glass * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 1.1 glass * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 1.1 glass * SUCH DAMAGE.
30 1.1 glass */
31 1.1 glass
32 1.8 lukem #include <sys/cdefs.h>
33 1.1 glass #ifndef lint
34 1.6 jtc #if 0
35 1.7 jtc static char sccsid[] = "@(#)str.c 8.2 (Berkeley) 4/28/95";
36 1.6 jtc #endif
37 1.19.8.1 tls __RCSID("$NetBSD: str.c,v 1.19.8.1 2014/08/20 00:05:05 tls Exp $");
38 1.1 glass #endif /* not lint */
39 1.1 glass
40 1.1 glass #include <sys/types.h>
41 1.1 glass
42 1.8 lukem #include <err.h>
43 1.1 glass #include <errno.h>
44 1.1 glass #include <stddef.h>
45 1.1 glass #include <stdio.h>
46 1.1 glass #include <stdlib.h>
47 1.1 glass #include <string.h>
48 1.4 jtc #include <ctype.h>
49 1.19.8.1 tls #include <assert.h>
50 1.1 glass
51 1.1 glass #include "extern.h"
52 1.1 glass
53 1.19.8.1 tls struct str {
54 1.19.8.1 tls enum { STRING1, STRING2 } which;
55 1.19.8.1 tls enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
56 1.19.8.1 tls int cnt; /* character count */
57 1.19.8.1 tls int lastch; /* last character */
58 1.19.8.1 tls int equiv[2]; /* equivalence set */
59 1.19.8.1 tls int *set; /* set of characters */
60 1.19.8.1 tls const char *str; /* user's string */
61 1.19.8.1 tls };
62 1.19.8.1 tls
63 1.19.8.1 tls static int backslash(STR *);
64 1.19.8.1 tls static int bracket(STR *);
65 1.19.8.1 tls static int c_class(const void *, const void *);
66 1.19.8.1 tls static int *genclass(const char *, size_t);
67 1.19.8.1 tls static void genequiv(STR *);
68 1.19.8.1 tls static int genrange(STR *);
69 1.19.8.1 tls static void genseq(STR *);
70 1.19.8.1 tls
71 1.19.8.1 tls STR *
72 1.19.8.1 tls str_create(int whichstring, const char *txt)
73 1.19.8.1 tls {
74 1.19.8.1 tls STR *s;
75 1.19.8.1 tls
76 1.19.8.1 tls s = malloc(sizeof(*s));
77 1.19.8.1 tls if (s == NULL) {
78 1.19.8.1 tls err(1, "Out of memory");
79 1.19.8.1 tls }
80 1.19.8.1 tls
81 1.19.8.1 tls s->which = whichstring == 2 ? STRING2 : STRING1;
82 1.19.8.1 tls s->state = NORMAL;
83 1.19.8.1 tls s->cnt = 0;
84 1.19.8.1 tls s->lastch = OOBCH;
85 1.19.8.1 tls s->equiv[0] = 0;
86 1.19.8.1 tls s->equiv[1] = OOBCH;
87 1.19.8.1 tls s->set = NULL;
88 1.19.8.1 tls s->str = txt;
89 1.19.8.1 tls
90 1.19.8.1 tls return s;
91 1.19.8.1 tls }
92 1.19.8.1 tls
93 1.19.8.1 tls void
94 1.19.8.1 tls str_destroy(STR *s)
95 1.19.8.1 tls {
96 1.19.8.1 tls if (s->set != NULL && s->set != s->equiv) {
97 1.19.8.1 tls free(s->set);
98 1.19.8.1 tls }
99 1.19.8.1 tls free(s);
100 1.19.8.1 tls }
101 1.1 glass
102 1.1 glass int
103 1.19.8.1 tls next(STR *s, int *ret)
104 1.1 glass {
105 1.8 lukem int ch;
106 1.1 glass
107 1.1 glass switch (s->state) {
108 1.1 glass case EOS:
109 1.19.8.1 tls *ret = s->lastch;
110 1.16 christos return 0;
111 1.1 glass case INFINITE:
112 1.19.8.1 tls *ret = s->lastch;
113 1.16 christos return 1;
114 1.1 glass case NORMAL:
115 1.19.8.1 tls ch = (unsigned char)s->str[0];
116 1.19.8.1 tls switch (ch) {
117 1.1 glass case '\0':
118 1.1 glass s->state = EOS;
119 1.19.8.1 tls *ret = s->lastch;
120 1.16 christos return 0;
121 1.1 glass case '\\':
122 1.1 glass s->lastch = backslash(s);
123 1.1 glass break;
124 1.1 glass case '[':
125 1.19.8.1 tls if (bracket(s)) {
126 1.19.8.1 tls return next(s, ret);
127 1.19.8.1 tls }
128 1.1 glass /* FALLTHROUGH */
129 1.1 glass default:
130 1.1 glass ++s->str;
131 1.1 glass s->lastch = ch;
132 1.1 glass break;
133 1.1 glass }
134 1.1 glass
135 1.1 glass /* We can start a range at any time. */
136 1.19.8.1 tls if (s->str[0] == '-' && genrange(s)) {
137 1.19.8.1 tls return next(s, ret);
138 1.19.8.1 tls }
139 1.19.8.1 tls *ret = s->lastch;
140 1.16 christos return 1;
141 1.1 glass case RANGE:
142 1.19.8.1 tls if (s->cnt == 0) {
143 1.1 glass s->state = NORMAL;
144 1.19.8.1 tls return next(s, ret);
145 1.1 glass }
146 1.19.8.1 tls s->cnt--;
147 1.1 glass ++s->lastch;
148 1.19.8.1 tls *ret = s->lastch;
149 1.16 christos return 1;
150 1.1 glass case SEQUENCE:
151 1.19.8.1 tls if (s->cnt == 0) {
152 1.1 glass s->state = NORMAL;
153 1.19.8.1 tls return next(s, ret);
154 1.1 glass }
155 1.19.8.1 tls s->cnt--;
156 1.19.8.1 tls *ret = s->lastch;
157 1.16 christos return 1;
158 1.1 glass case SET:
159 1.19.8.1 tls s->lastch = s->set[s->cnt++];
160 1.19.8.1 tls if (s->lastch == OOBCH) {
161 1.1 glass s->state = NORMAL;
162 1.19.8.1 tls if (s->set != s->equiv) {
163 1.19.8.1 tls free(s->set);
164 1.19.8.1 tls }
165 1.19.8.1 tls s->set = NULL;
166 1.19.8.1 tls return next(s, ret);
167 1.1 glass }
168 1.19.8.1 tls *ret = s->lastch;
169 1.16 christos return 1;
170 1.1 glass }
171 1.1 glass /* NOTREACHED */
172 1.19.8.1 tls assert(0);
173 1.19.8.1 tls *ret = s->lastch;
174 1.16 christos return 0;
175 1.1 glass }
176 1.1 glass
177 1.1 glass static int
178 1.13 joerg bracket(STR *s)
179 1.1 glass {
180 1.19.8.1 tls const char *p;
181 1.19.8.1 tls int *q;
182 1.1 glass
183 1.1 glass switch (s->str[1]) {
184 1.1 glass case ':': /* "[:class:]" */
185 1.1 glass if ((p = strstr(s->str + 2, ":]")) == NULL)
186 1.16 christos return 0;
187 1.1 glass s->str += 2;
188 1.19.8.1 tls q = genclass(s->str, p - s->str);
189 1.19.8.1 tls s->state = SET;
190 1.19.8.1 tls s->set = q;
191 1.19.8.1 tls s->cnt = 0;
192 1.1 glass s->str = p + 2;
193 1.16 christos return 1;
194 1.1 glass case '=': /* "[=equiv=]" */
195 1.1 glass if ((p = strstr(s->str + 2, "=]")) == NULL)
196 1.16 christos return 0;
197 1.1 glass s->str += 2;
198 1.1 glass genequiv(s);
199 1.19.8.1 tls s->str = p + 2;
200 1.16 christos return 1;
201 1.1 glass default: /* "[\###*n]" or "[#*n]" */
202 1.1 glass if ((p = strpbrk(s->str + 2, "*]")) == NULL)
203 1.16 christos return 0;
204 1.8 lukem if (p[0] != '*' || strchr(p, ']') == NULL)
205 1.16 christos return 0;
206 1.1 glass s->str += 1;
207 1.1 glass genseq(s);
208 1.16 christos return 1;
209 1.1 glass }
210 1.1 glass /* NOTREACHED */
211 1.1 glass }
212 1.1 glass
213 1.1 glass typedef struct {
214 1.12 lukem const char *name;
215 1.13 joerg int (*func)(int);
216 1.1 glass } CLASS;
217 1.1 glass
218 1.16 christos static const CLASS classes[] = {
219 1.16 christos { "alnum", isalnum },
220 1.16 christos { "alpha", isalpha },
221 1.16 christos { "blank", isblank },
222 1.16 christos { "cntrl", iscntrl },
223 1.16 christos { "digit", isdigit },
224 1.16 christos { "graph", isgraph },
225 1.16 christos { "lower", islower },
226 1.16 christos { "print", isprint },
227 1.16 christos { "punct", ispunct },
228 1.16 christos { "space", isspace },
229 1.16 christos { "upper", isupper },
230 1.16 christos { "xdigit", isxdigit },
231 1.1 glass };
232 1.1 glass
233 1.19.8.1 tls typedef struct {
234 1.19.8.1 tls const char *name;
235 1.19.8.1 tls size_t len;
236 1.19.8.1 tls } CLASSKEY;
237 1.19.8.1 tls
238 1.19.8.1 tls static int *
239 1.19.8.1 tls genclass(const char *class, size_t len)
240 1.1 glass {
241 1.19.8.1 tls int ch;
242 1.16 christos const CLASS *cp;
243 1.19.8.1 tls CLASSKEY key;
244 1.1 glass int *p;
245 1.19.8.1 tls unsigned pos, num;
246 1.1 glass
247 1.19.8.1 tls /* Find the class */
248 1.19.8.1 tls key.name = class;
249 1.19.8.1 tls key.len = len;
250 1.19.8.1 tls cp = bsearch(&key, classes, __arraycount(classes), sizeof(classes[0]),
251 1.19.8.1 tls c_class);
252 1.19.8.1 tls if (cp == NULL) {
253 1.19.8.1 tls errx(1, "unknown class %.*s", (int)len, class);
254 1.19.8.1 tls }
255 1.1 glass
256 1.19.8.1 tls /*
257 1.19.8.1 tls * Figure out what characters are in the class
258 1.19.8.1 tls */
259 1.19.8.1 tls
260 1.19.8.1 tls num = NCHARS + 1;
261 1.19.8.1 tls p = malloc(num * sizeof(*p));
262 1.19.8.1 tls if (p == NULL) {
263 1.8 lukem err(1, "malloc");
264 1.19.8.1 tls }
265 1.19.8.1 tls
266 1.19.8.1 tls pos = 0;
267 1.19.8.1 tls for (ch = 0; ch < NCHARS; ch++) {
268 1.19.8.1 tls if (cp->func(ch)) {
269 1.19.8.1 tls p[pos++] = ch;
270 1.19.8.1 tls }
271 1.19.8.1 tls }
272 1.19 christos
273 1.19.8.1 tls p[pos++] = OOBCH;
274 1.19.8.1 tls for (; pos < num; pos++) {
275 1.19.8.1 tls p[pos] = 0;
276 1.19.8.1 tls }
277 1.1 glass
278 1.19.8.1 tls return p;
279 1.1 glass }
280 1.1 glass
281 1.1 glass static int
282 1.19.8.1 tls c_class(const void *av, const void *bv)
283 1.1 glass {
284 1.19.8.1 tls const CLASSKEY *a = av;
285 1.19.8.1 tls const CLASS *b = bv;
286 1.19.8.1 tls size_t blen;
287 1.19.8.1 tls int r;
288 1.19.8.1 tls
289 1.19.8.1 tls blen = strlen(b->name);
290 1.19.8.1 tls r = strncmp(a->name, b->name, a->len);
291 1.19.8.1 tls if (r != 0) {
292 1.19.8.1 tls return r;
293 1.19.8.1 tls }
294 1.19.8.1 tls if (a->len < blen) {
295 1.19.8.1 tls /* someone gave us a prefix of the right name */
296 1.19.8.1 tls return -1;
297 1.19.8.1 tls }
298 1.19.8.1 tls assert(a-> len == blen);
299 1.19.8.1 tls return 0;
300 1.1 glass }
301 1.1 glass
302 1.1 glass /*
303 1.1 glass * English doesn't have any equivalence classes, so for now
304 1.1 glass * we just syntax check and grab the character.
305 1.1 glass */
306 1.1 glass static void
307 1.13 joerg genequiv(STR *s)
308 1.1 glass {
309 1.19.8.1 tls int ch;
310 1.19.8.1 tls
311 1.19.8.1 tls ch = (unsigned char)s->str[0];
312 1.19.8.1 tls if (ch == '\\') {
313 1.1 glass s->equiv[0] = backslash(s);
314 1.1 glass } else {
315 1.19.8.1 tls s->equiv[0] = ch;
316 1.19.8.1 tls s->str++;
317 1.19.8.1 tls }
318 1.19.8.1 tls if (s->str[0] != '=') {
319 1.19.8.1 tls errx(1, "Misplaced equivalence equals sign");
320 1.1 glass }
321 1.19.8.1 tls s->str++;
322 1.19.8.1 tls if (s->str[0] != ']') {
323 1.19.8.1 tls errx(1, "Misplaced equivalence right bracket");
324 1.19.8.1 tls }
325 1.19.8.1 tls s->str++;
326 1.19.8.1 tls
327 1.1 glass s->cnt = 0;
328 1.1 glass s->state = SET;
329 1.1 glass s->set = s->equiv;
330 1.1 glass }
331 1.1 glass
332 1.1 glass static int
333 1.13 joerg genrange(STR *s)
334 1.1 glass {
335 1.1 glass int stopval;
336 1.19.8.1 tls const char *savestart;
337 1.1 glass
338 1.19.8.1 tls savestart = s->str++;
339 1.19.8.1 tls stopval = s->str[0] == '\\' ? backslash(s) : (unsigned char)*s->str++;
340 1.19.8.1 tls if (stopval < (unsigned char)s->lastch) {
341 1.1 glass s->str = savestart;
342 1.16 christos return 0;
343 1.1 glass }
344 1.1 glass s->cnt = stopval - s->lastch + 1;
345 1.1 glass s->state = RANGE;
346 1.1 glass --s->lastch;
347 1.16 christos return 1;
348 1.1 glass }
349 1.1 glass
350 1.1 glass static void
351 1.13 joerg genseq(STR *s)
352 1.1 glass {
353 1.1 glass char *ep;
354 1.1 glass
355 1.19.8.1 tls if (s->which == STRING1) {
356 1.19.8.1 tls errx(1, "Sequences only valid in string2");
357 1.19.8.1 tls }
358 1.1 glass
359 1.19.8.1 tls if (*s->str == '\\') {
360 1.1 glass s->lastch = backslash(s);
361 1.19.8.1 tls } else {
362 1.19.8.1 tls s->lastch = (unsigned char)*s->str++;
363 1.19.8.1 tls }
364 1.19.8.1 tls if (*s->str != '*') {
365 1.19.8.1 tls errx(1, "Misplaced sequence asterisk");
366 1.19.8.1 tls }
367 1.1 glass
368 1.19.8.1 tls s->str++;
369 1.19.8.1 tls switch (s->str[0]) {
370 1.1 glass case '\\':
371 1.1 glass s->cnt = backslash(s);
372 1.1 glass break;
373 1.1 glass case ']':
374 1.1 glass s->cnt = 0;
375 1.1 glass ++s->str;
376 1.1 glass break;
377 1.1 glass default:
378 1.19.8.1 tls if (isdigit((unsigned char)s->str[0])) {
379 1.1 glass s->cnt = strtol(s->str, &ep, 0);
380 1.1 glass if (*ep == ']') {
381 1.1 glass s->str = ep + 1;
382 1.1 glass break;
383 1.1 glass }
384 1.1 glass }
385 1.8 lukem errx(1, "illegal sequence count");
386 1.1 glass /* NOTREACHED */
387 1.1 glass }
388 1.1 glass
389 1.1 glass s->state = s->cnt ? SEQUENCE : INFINITE;
390 1.1 glass }
391 1.1 glass
392 1.1 glass /*
393 1.1 glass * Translate \??? into a character. Up to 3 octal digits, if no digits either
394 1.1 glass * an escape code or a literal character.
395 1.1 glass */
396 1.1 glass static int
397 1.13 joerg backslash(STR *s)
398 1.1 glass {
399 1.8 lukem int ch, cnt, val;
400 1.1 glass
401 1.19.8.1 tls cnt = val = 0;
402 1.19.8.1 tls for (;;) {
403 1.19.8.1 tls /* Consume the character we're already on. */
404 1.19.8.1 tls s->str++;
405 1.19.8.1 tls
406 1.19.8.1 tls /* Look at the next character. */
407 1.19.8.1 tls ch = (unsigned char)s->str[0];
408 1.19.8.1 tls if (!isascii(ch) || !isdigit(ch)) {
409 1.1 glass break;
410 1.19.8.1 tls }
411 1.1 glass val = val * 8 + ch - '0';
412 1.1 glass if (++cnt == 3) {
413 1.19.8.1 tls /* Enough digits; consume this one and stop */
414 1.1 glass ++s->str;
415 1.1 glass break;
416 1.1 glass }
417 1.1 glass }
418 1.19.8.1 tls if (cnt) {
419 1.19.8.1 tls /* We saw digits, so return their value */
420 1.16 christos return val;
421 1.19.8.1 tls }
422 1.19.8.1 tls if (ch == '\0') {
423 1.19.8.1 tls /* \<end> -> \ */
424 1.19.8.1 tls s->state = EOS;
425 1.19.8.1 tls return '\\';
426 1.19.8.1 tls }
427 1.19.8.1 tls
428 1.19.8.1 tls /* Consume the escaped character */
429 1.19.8.1 tls s->str++;
430 1.19.8.1 tls
431 1.1 glass switch (ch) {
432 1.17 christos case 'a': /* escape characters */
433 1.17 christos return '\7';
434 1.17 christos case 'b':
435 1.17 christos return '\b';
436 1.17 christos case 'e':
437 1.17 christos return '\033';
438 1.17 christos case 'f':
439 1.17 christos return '\f';
440 1.17 christos case 'n':
441 1.17 christos return '\n';
442 1.17 christos case 'r':
443 1.17 christos return '\r';
444 1.17 christos case 't':
445 1.17 christos return '\t';
446 1.17 christos case 'v':
447 1.17 christos return '\13';
448 1.19.8.1 tls default: /* \q -> q */
449 1.17 christos return ch;
450 1.1 glass }
451 1.1 glass }
452