str.c revision 1.20 1 1.20 dholland /* $NetBSD: str.c,v 1.20 2013/08/10 23:54:41 dholland Exp $ */
2 1.6 jtc
3 1.1 glass /*-
4 1.6 jtc * Copyright (c) 1991, 1993
5 1.6 jtc * The Regents of the University of California. All rights reserved.
6 1.1 glass *
7 1.1 glass * Redistribution and use in source and binary forms, with or without
8 1.1 glass * modification, are permitted provided that the following conditions
9 1.1 glass * are met:
10 1.1 glass * 1. Redistributions of source code must retain the above copyright
11 1.1 glass * notice, this list of conditions and the following disclaimer.
12 1.1 glass * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 glass * notice, this list of conditions and the following disclaimer in the
14 1.1 glass * documentation and/or other materials provided with the distribution.
15 1.10 agc * 3. Neither the name of the University nor the names of its contributors
16 1.1 glass * may be used to endorse or promote products derived from this software
17 1.1 glass * without specific prior written permission.
18 1.1 glass *
19 1.1 glass * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 1.1 glass * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 1.1 glass * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 1.1 glass * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 1.1 glass * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 1.1 glass * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 1.1 glass * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 1.1 glass * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 1.1 glass * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 1.1 glass * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 1.1 glass * SUCH DAMAGE.
30 1.1 glass */
31 1.1 glass
32 1.8 lukem #include <sys/cdefs.h>
33 1.1 glass #ifndef lint
34 1.6 jtc #if 0
35 1.7 jtc static char sccsid[] = "@(#)str.c 8.2 (Berkeley) 4/28/95";
36 1.6 jtc #endif
37 1.20 dholland __RCSID("$NetBSD: str.c,v 1.20 2013/08/10 23:54:41 dholland Exp $");
38 1.1 glass #endif /* not lint */
39 1.1 glass
40 1.1 glass #include <sys/types.h>
41 1.1 glass
42 1.8 lukem #include <err.h>
43 1.1 glass #include <errno.h>
44 1.1 glass #include <stddef.h>
45 1.1 glass #include <stdio.h>
46 1.1 glass #include <stdlib.h>
47 1.1 glass #include <string.h>
48 1.4 jtc #include <ctype.h>
49 1.20 dholland #include <assert.h>
50 1.1 glass
51 1.1 glass #include "extern.h"
52 1.1 glass
53 1.13 joerg static int backslash(STR *);
54 1.13 joerg static int bracket(STR *);
55 1.13 joerg static int c_class(const void *, const void *);
56 1.13 joerg static void genclass(STR *);
57 1.13 joerg static void genequiv(STR *);
58 1.13 joerg static int genrange(STR *);
59 1.13 joerg static void genseq(STR *);
60 1.1 glass
61 1.1 glass int
62 1.20 dholland next(STR *s, int *ret)
63 1.1 glass {
64 1.8 lukem int ch;
65 1.1 glass
66 1.1 glass switch (s->state) {
67 1.1 glass case EOS:
68 1.20 dholland *ret = s->lastch;
69 1.16 christos return 0;
70 1.1 glass case INFINITE:
71 1.20 dholland *ret = s->lastch;
72 1.16 christos return 1;
73 1.1 glass case NORMAL:
74 1.1 glass switch (ch = *s->str) {
75 1.1 glass case '\0':
76 1.1 glass s->state = EOS;
77 1.20 dholland *ret = s->lastch;
78 1.16 christos return 0;
79 1.1 glass case '\\':
80 1.1 glass s->lastch = backslash(s);
81 1.1 glass break;
82 1.1 glass case '[':
83 1.1 glass if (bracket(s))
84 1.20 dholland return next(s, ret);
85 1.1 glass /* FALLTHROUGH */
86 1.1 glass default:
87 1.1 glass ++s->str;
88 1.1 glass s->lastch = ch;
89 1.1 glass break;
90 1.1 glass }
91 1.1 glass
92 1.1 glass /* We can start a range at any time. */
93 1.20 dholland if (s->str[0] == '-' && genrange(s)) {
94 1.20 dholland return next(s, ret);
95 1.20 dholland }
96 1.20 dholland *ret = s->lastch;
97 1.16 christos return 1;
98 1.1 glass case RANGE:
99 1.1 glass if (s->cnt-- == 0) {
100 1.1 glass s->state = NORMAL;
101 1.20 dholland return next(s, ret);
102 1.1 glass }
103 1.1 glass ++s->lastch;
104 1.20 dholland *ret = s->lastch;
105 1.16 christos return 1;
106 1.1 glass case SEQUENCE:
107 1.1 glass if (s->cnt-- == 0) {
108 1.1 glass s->state = NORMAL;
109 1.20 dholland return next(s, ret);
110 1.1 glass }
111 1.20 dholland *ret = s->lastch;
112 1.16 christos return 1;
113 1.1 glass case SET:
114 1.1 glass if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
115 1.1 glass s->state = NORMAL;
116 1.20 dholland return next(s, ret);
117 1.1 glass }
118 1.20 dholland *ret = s->lastch;
119 1.16 christos return 1;
120 1.1 glass }
121 1.1 glass /* NOTREACHED */
122 1.20 dholland assert(0);
123 1.20 dholland *ret = s->lastch;
124 1.16 christos return 0;
125 1.1 glass }
126 1.1 glass
127 1.1 glass static int
128 1.13 joerg bracket(STR *s)
129 1.1 glass {
130 1.8 lukem char *p;
131 1.1 glass
132 1.1 glass switch (s->str[1]) {
133 1.1 glass case ':': /* "[:class:]" */
134 1.1 glass if ((p = strstr(s->str + 2, ":]")) == NULL)
135 1.16 christos return 0;
136 1.1 glass *p = '\0';
137 1.1 glass s->str += 2;
138 1.1 glass genclass(s);
139 1.1 glass s->str = p + 2;
140 1.16 christos return 1;
141 1.1 glass case '=': /* "[=equiv=]" */
142 1.1 glass if ((p = strstr(s->str + 2, "=]")) == NULL)
143 1.16 christos return 0;
144 1.1 glass s->str += 2;
145 1.1 glass genequiv(s);
146 1.16 christos return 1;
147 1.1 glass default: /* "[\###*n]" or "[#*n]" */
148 1.1 glass if ((p = strpbrk(s->str + 2, "*]")) == NULL)
149 1.16 christos return 0;
150 1.8 lukem if (p[0] != '*' || strchr(p, ']') == NULL)
151 1.16 christos return 0;
152 1.1 glass s->str += 1;
153 1.1 glass genseq(s);
154 1.16 christos return 1;
155 1.1 glass }
156 1.1 glass /* NOTREACHED */
157 1.1 glass }
158 1.1 glass
159 1.1 glass typedef struct {
160 1.12 lukem const char *name;
161 1.13 joerg int (*func)(int);
162 1.1 glass } CLASS;
163 1.1 glass
164 1.16 christos static const CLASS classes[] = {
165 1.16 christos { "alnum", isalnum },
166 1.16 christos { "alpha", isalpha },
167 1.16 christos { "blank", isblank },
168 1.16 christos { "cntrl", iscntrl },
169 1.16 christos { "digit", isdigit },
170 1.16 christos { "graph", isgraph },
171 1.16 christos { "lower", islower },
172 1.16 christos { "print", isprint },
173 1.16 christos { "punct", ispunct },
174 1.16 christos { "space", isspace },
175 1.16 christos { "upper", isupper },
176 1.16 christos { "xdigit", isxdigit },
177 1.1 glass };
178 1.1 glass
179 1.1 glass static void
180 1.13 joerg genclass(STR *s)
181 1.1 glass {
182 1.19 christos int cnt;
183 1.16 christos const CLASS *cp;
184 1.16 christos CLASS tmp;
185 1.1 glass int *p;
186 1.1 glass
187 1.1 glass tmp.name = s->str;
188 1.16 christos if ((cp = bsearch(&tmp, classes, sizeof(classes) /
189 1.16 christos sizeof(*cp), sizeof(*cp), c_class)) == NULL)
190 1.8 lukem errx(1, "unknown class %s", s->str);
191 1.1 glass
192 1.16 christos if ((s->set = p = malloc((NCHARS + 1) * sizeof(*p))) == NULL)
193 1.8 lukem err(1, "malloc");
194 1.19 christos
195 1.19 christos for (cnt = 0; cnt < NCHARS; ++cnt)
196 1.19 christos if ((*cp->func)(cnt))
197 1.1 glass *p++ = cnt;
198 1.19 christos *p++ = OOBCH;
199 1.19 christos memset(p, 0, NCHARS + 1 - (p - s->set));
200 1.1 glass
201 1.1 glass s->cnt = 0;
202 1.1 glass s->state = SET;
203 1.1 glass }
204 1.1 glass
205 1.1 glass static int
206 1.13 joerg c_class(const void *a, const void *b)
207 1.1 glass {
208 1.16 christos return strcmp(((const CLASS *)a)->name, ((const CLASS *)b)->name);
209 1.1 glass }
210 1.1 glass
211 1.1 glass /*
212 1.1 glass * English doesn't have any equivalence classes, so for now
213 1.1 glass * we just syntax check and grab the character.
214 1.1 glass */
215 1.1 glass static void
216 1.13 joerg genequiv(STR *s)
217 1.1 glass {
218 1.1 glass if (*s->str == '\\') {
219 1.1 glass s->equiv[0] = backslash(s);
220 1.1 glass if (*s->str != '=')
221 1.8 lukem errx(1, "misplaced equivalence equals sign");
222 1.1 glass } else {
223 1.1 glass s->equiv[0] = s->str[0];
224 1.1 glass if (s->str[1] != '=')
225 1.8 lukem errx(1, "misplaced equivalence equals sign");
226 1.1 glass }
227 1.1 glass s->str += 2;
228 1.1 glass s->cnt = 0;
229 1.1 glass s->state = SET;
230 1.1 glass s->set = s->equiv;
231 1.1 glass }
232 1.1 glass
233 1.1 glass static int
234 1.13 joerg genrange(STR *s)
235 1.1 glass {
236 1.1 glass int stopval;
237 1.1 glass char *savestart;
238 1.1 glass
239 1.1 glass savestart = s->str;
240 1.5 jtc stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
241 1.7 jtc if (stopval < (u_char)s->lastch) {
242 1.1 glass s->str = savestart;
243 1.16 christos return 0;
244 1.1 glass }
245 1.1 glass s->cnt = stopval - s->lastch + 1;
246 1.1 glass s->state = RANGE;
247 1.1 glass --s->lastch;
248 1.16 christos return 1;
249 1.1 glass }
250 1.1 glass
251 1.1 glass static void
252 1.13 joerg genseq(STR *s)
253 1.1 glass {
254 1.1 glass char *ep;
255 1.1 glass
256 1.1 glass if (s->which == STRING1)
257 1.8 lukem errx(1, "sequences only valid in string2");
258 1.1 glass
259 1.1 glass if (*s->str == '\\')
260 1.1 glass s->lastch = backslash(s);
261 1.1 glass else
262 1.1 glass s->lastch = *s->str++;
263 1.1 glass if (*s->str != '*')
264 1.8 lukem errx(1, "misplaced sequence asterisk");
265 1.1 glass
266 1.1 glass switch (*++s->str) {
267 1.1 glass case '\\':
268 1.1 glass s->cnt = backslash(s);
269 1.1 glass break;
270 1.1 glass case ']':
271 1.1 glass s->cnt = 0;
272 1.1 glass ++s->str;
273 1.1 glass break;
274 1.1 glass default:
275 1.1 glass if (isdigit(*s->str)) {
276 1.1 glass s->cnt = strtol(s->str, &ep, 0);
277 1.1 glass if (*ep == ']') {
278 1.1 glass s->str = ep + 1;
279 1.1 glass break;
280 1.1 glass }
281 1.1 glass }
282 1.8 lukem errx(1, "illegal sequence count");
283 1.1 glass /* NOTREACHED */
284 1.1 glass }
285 1.1 glass
286 1.1 glass s->state = s->cnt ? SEQUENCE : INFINITE;
287 1.1 glass }
288 1.1 glass
289 1.1 glass /*
290 1.1 glass * Translate \??? into a character. Up to 3 octal digits, if no digits either
291 1.1 glass * an escape code or a literal character.
292 1.1 glass */
293 1.1 glass static int
294 1.13 joerg backslash(STR *s)
295 1.1 glass {
296 1.8 lukem int ch, cnt, val;
297 1.1 glass
298 1.1 glass for (cnt = val = 0;;) {
299 1.1 glass ch = *++s->str;
300 1.1 glass if (!isascii(ch) || !isdigit(ch))
301 1.1 glass break;
302 1.1 glass val = val * 8 + ch - '0';
303 1.1 glass if (++cnt == 3) {
304 1.1 glass ++s->str;
305 1.1 glass break;
306 1.1 glass }
307 1.1 glass }
308 1.1 glass if (cnt)
309 1.16 christos return val;
310 1.1 glass if (ch != '\0')
311 1.1 glass ++s->str;
312 1.1 glass switch (ch) {
313 1.17 christos case 'a': /* escape characters */
314 1.17 christos return '\7';
315 1.17 christos case 'b':
316 1.17 christos return '\b';
317 1.17 christos case 'e':
318 1.17 christos return '\033';
319 1.17 christos case 'f':
320 1.17 christos return '\f';
321 1.17 christos case 'n':
322 1.17 christos return '\n';
323 1.17 christos case 'r':
324 1.17 christos return '\r';
325 1.17 christos case 't':
326 1.17 christos return '\t';
327 1.17 christos case 'v':
328 1.17 christos return '\13';
329 1.17 christos case '\0': /* \" -> \ */
330 1.17 christos s->state = EOS;
331 1.17 christos return '\\';
332 1.17 christos default: /* \x" -> x */
333 1.17 christos return ch;
334 1.1 glass }
335 1.1 glass }
336