str.c revision 1.16 1 1.16 christos /* $NetBSD: str.c,v 1.16 2011/09/08 01:18:05 christos Exp $ */
2 1.6 jtc
3 1.1 glass /*-
4 1.6 jtc * Copyright (c) 1991, 1993
5 1.6 jtc * The Regents of the University of California. All rights reserved.
6 1.1 glass *
7 1.1 glass * Redistribution and use in source and binary forms, with or without
8 1.1 glass * modification, are permitted provided that the following conditions
9 1.1 glass * are met:
10 1.1 glass * 1. Redistributions of source code must retain the above copyright
11 1.1 glass * notice, this list of conditions and the following disclaimer.
12 1.1 glass * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 glass * notice, this list of conditions and the following disclaimer in the
14 1.1 glass * documentation and/or other materials provided with the distribution.
15 1.10 agc * 3. Neither the name of the University nor the names of its contributors
16 1.1 glass * may be used to endorse or promote products derived from this software
17 1.1 glass * without specific prior written permission.
18 1.1 glass *
19 1.1 glass * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 1.1 glass * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 1.1 glass * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 1.1 glass * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 1.1 glass * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 1.1 glass * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 1.1 glass * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 1.1 glass * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 1.1 glass * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 1.1 glass * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 1.1 glass * SUCH DAMAGE.
30 1.1 glass */
31 1.1 glass
32 1.8 lukem #include <sys/cdefs.h>
33 1.1 glass #ifndef lint
34 1.6 jtc #if 0
35 1.7 jtc static char sccsid[] = "@(#)str.c 8.2 (Berkeley) 4/28/95";
36 1.6 jtc #endif
37 1.16 christos __RCSID("$NetBSD: str.c,v 1.16 2011/09/08 01:18:05 christos Exp $");
38 1.1 glass #endif /* not lint */
39 1.1 glass
40 1.1 glass #include <sys/types.h>
41 1.1 glass
42 1.8 lukem #include <err.h>
43 1.1 glass #include <errno.h>
44 1.1 glass #include <stddef.h>
45 1.1 glass #include <stdio.h>
46 1.1 glass #include <stdlib.h>
47 1.1 glass #include <string.h>
48 1.4 jtc #include <ctype.h>
49 1.1 glass
50 1.1 glass #include "extern.h"
51 1.1 glass
52 1.13 joerg static int backslash(STR *);
53 1.13 joerg static int bracket(STR *);
54 1.13 joerg static int c_class(const void *, const void *);
55 1.13 joerg static void genclass(STR *);
56 1.13 joerg static void genequiv(STR *);
57 1.13 joerg static int genrange(STR *);
58 1.13 joerg static void genseq(STR *);
59 1.1 glass
60 1.1 glass int
61 1.13 joerg next(STR *s)
62 1.1 glass {
63 1.8 lukem int ch;
64 1.1 glass
65 1.1 glass switch (s->state) {
66 1.1 glass case EOS:
67 1.16 christos return 0;
68 1.1 glass case INFINITE:
69 1.16 christos return 1;
70 1.1 glass case NORMAL:
71 1.1 glass switch (ch = *s->str) {
72 1.1 glass case '\0':
73 1.1 glass s->state = EOS;
74 1.16 christos return 0;
75 1.1 glass case '\\':
76 1.1 glass s->lastch = backslash(s);
77 1.1 glass break;
78 1.1 glass case '[':
79 1.1 glass if (bracket(s))
80 1.16 christos return next(s);
81 1.1 glass /* FALLTHROUGH */
82 1.1 glass default:
83 1.1 glass ++s->str;
84 1.1 glass s->lastch = ch;
85 1.1 glass break;
86 1.1 glass }
87 1.1 glass
88 1.1 glass /* We can start a range at any time. */
89 1.1 glass if (s->str[0] == '-' && genrange(s))
90 1.16 christos return next(s);
91 1.16 christos return 1;
92 1.1 glass case RANGE:
93 1.1 glass if (s->cnt-- == 0) {
94 1.1 glass s->state = NORMAL;
95 1.16 christos return next(s);
96 1.1 glass }
97 1.1 glass ++s->lastch;
98 1.16 christos return 1;
99 1.1 glass case SEQUENCE:
100 1.1 glass if (s->cnt-- == 0) {
101 1.1 glass s->state = NORMAL;
102 1.16 christos return next(s);
103 1.1 glass }
104 1.16 christos return 1;
105 1.1 glass case SET:
106 1.1 glass if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
107 1.1 glass s->state = NORMAL;
108 1.16 christos return next(s);
109 1.1 glass }
110 1.16 christos return 1;
111 1.1 glass }
112 1.1 glass /* NOTREACHED */
113 1.16 christos return 0;
114 1.1 glass }
115 1.1 glass
116 1.1 glass static int
117 1.13 joerg bracket(STR *s)
118 1.1 glass {
119 1.8 lukem char *p;
120 1.1 glass
121 1.1 glass switch (s->str[1]) {
122 1.1 glass case ':': /* "[:class:]" */
123 1.1 glass if ((p = strstr(s->str + 2, ":]")) == NULL)
124 1.16 christos return 0;
125 1.1 glass *p = '\0';
126 1.1 glass s->str += 2;
127 1.1 glass genclass(s);
128 1.1 glass s->str = p + 2;
129 1.16 christos return 1;
130 1.1 glass case '=': /* "[=equiv=]" */
131 1.1 glass if ((p = strstr(s->str + 2, "=]")) == NULL)
132 1.16 christos return 0;
133 1.1 glass s->str += 2;
134 1.1 glass genequiv(s);
135 1.16 christos return 1;
136 1.1 glass default: /* "[\###*n]" or "[#*n]" */
137 1.1 glass if ((p = strpbrk(s->str + 2, "*]")) == NULL)
138 1.16 christos return 0;
139 1.8 lukem if (p[0] != '*' || strchr(p, ']') == NULL)
140 1.16 christos return 0;
141 1.1 glass s->str += 1;
142 1.1 glass genseq(s);
143 1.16 christos return 1;
144 1.1 glass }
145 1.1 glass /* NOTREACHED */
146 1.1 glass }
147 1.1 glass
148 1.1 glass typedef struct {
149 1.12 lukem const char *name;
150 1.13 joerg int (*func)(int);
151 1.1 glass } CLASS;
152 1.1 glass
153 1.16 christos static const CLASS classes[] = {
154 1.16 christos { "alnum", isalnum },
155 1.16 christos { "alpha", isalpha },
156 1.16 christos { "blank", isblank },
157 1.16 christos { "cntrl", iscntrl },
158 1.16 christos { "digit", isdigit },
159 1.16 christos { "graph", isgraph },
160 1.16 christos { "lower", islower },
161 1.16 christos { "print", isprint },
162 1.16 christos { "punct", ispunct },
163 1.16 christos { "space", isspace },
164 1.16 christos { "upper", isupper },
165 1.16 christos { "xdigit", isxdigit },
166 1.1 glass };
167 1.1 glass
168 1.1 glass static void
169 1.13 joerg genclass(STR *s)
170 1.1 glass {
171 1.13 joerg int cnt, (*func)(int);
172 1.16 christos const CLASS *cp;
173 1.16 christos CLASS tmp;
174 1.1 glass int *p;
175 1.1 glass
176 1.1 glass tmp.name = s->str;
177 1.16 christos if ((cp = bsearch(&tmp, classes, sizeof(classes) /
178 1.16 christos sizeof(*cp), sizeof(*cp), c_class)) == NULL)
179 1.8 lukem errx(1, "unknown class %s", s->str);
180 1.1 glass
181 1.16 christos if ((s->set = p = malloc((NCHARS + 1) * sizeof(*p))) == NULL)
182 1.8 lukem err(1, "malloc");
183 1.1 glass for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt)
184 1.1 glass if ((func)(cnt))
185 1.1 glass *p++ = cnt;
186 1.1 glass *p = OOBCH;
187 1.1 glass
188 1.1 glass s->cnt = 0;
189 1.1 glass s->state = SET;
190 1.1 glass }
191 1.1 glass
192 1.1 glass static int
193 1.13 joerg c_class(const void *a, const void *b)
194 1.1 glass {
195 1.16 christos return strcmp(((const CLASS *)a)->name, ((const CLASS *)b)->name);
196 1.1 glass }
197 1.1 glass
198 1.1 glass /*
199 1.1 glass * English doesn't have any equivalence classes, so for now
200 1.1 glass * we just syntax check and grab the character.
201 1.1 glass */
202 1.1 glass static void
203 1.13 joerg genequiv(STR *s)
204 1.1 glass {
205 1.1 glass if (*s->str == '\\') {
206 1.1 glass s->equiv[0] = backslash(s);
207 1.1 glass if (*s->str != '=')
208 1.8 lukem errx(1, "misplaced equivalence equals sign");
209 1.1 glass } else {
210 1.1 glass s->equiv[0] = s->str[0];
211 1.1 glass if (s->str[1] != '=')
212 1.8 lukem errx(1, "misplaced equivalence equals sign");
213 1.1 glass }
214 1.1 glass s->str += 2;
215 1.1 glass s->cnt = 0;
216 1.1 glass s->state = SET;
217 1.1 glass s->set = s->equiv;
218 1.1 glass }
219 1.1 glass
220 1.1 glass static int
221 1.13 joerg genrange(STR *s)
222 1.1 glass {
223 1.1 glass int stopval;
224 1.1 glass char *savestart;
225 1.1 glass
226 1.1 glass savestart = s->str;
227 1.5 jtc stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
228 1.7 jtc if (stopval < (u_char)s->lastch) {
229 1.1 glass s->str = savestart;
230 1.16 christos return 0;
231 1.1 glass }
232 1.1 glass s->cnt = stopval - s->lastch + 1;
233 1.1 glass s->state = RANGE;
234 1.1 glass --s->lastch;
235 1.16 christos return 1;
236 1.1 glass }
237 1.1 glass
238 1.1 glass static void
239 1.13 joerg genseq(STR *s)
240 1.1 glass {
241 1.1 glass char *ep;
242 1.1 glass
243 1.1 glass if (s->which == STRING1)
244 1.8 lukem errx(1, "sequences only valid in string2");
245 1.1 glass
246 1.1 glass if (*s->str == '\\')
247 1.1 glass s->lastch = backslash(s);
248 1.1 glass else
249 1.1 glass s->lastch = *s->str++;
250 1.1 glass if (*s->str != '*')
251 1.8 lukem errx(1, "misplaced sequence asterisk");
252 1.1 glass
253 1.1 glass switch (*++s->str) {
254 1.1 glass case '\\':
255 1.1 glass s->cnt = backslash(s);
256 1.1 glass break;
257 1.1 glass case ']':
258 1.1 glass s->cnt = 0;
259 1.1 glass ++s->str;
260 1.1 glass break;
261 1.1 glass default:
262 1.1 glass if (isdigit(*s->str)) {
263 1.1 glass s->cnt = strtol(s->str, &ep, 0);
264 1.1 glass if (*ep == ']') {
265 1.1 glass s->str = ep + 1;
266 1.1 glass break;
267 1.1 glass }
268 1.1 glass }
269 1.8 lukem errx(1, "illegal sequence count");
270 1.1 glass /* NOTREACHED */
271 1.1 glass }
272 1.1 glass
273 1.1 glass s->state = s->cnt ? SEQUENCE : INFINITE;
274 1.1 glass }
275 1.1 glass
276 1.1 glass /*
277 1.1 glass * Translate \??? into a character. Up to 3 octal digits, if no digits either
278 1.1 glass * an escape code or a literal character.
279 1.1 glass */
280 1.1 glass static int
281 1.13 joerg backslash(STR *s)
282 1.1 glass {
283 1.8 lukem int ch, cnt, val;
284 1.1 glass
285 1.1 glass for (cnt = val = 0;;) {
286 1.1 glass ch = *++s->str;
287 1.1 glass if (!isascii(ch) || !isdigit(ch))
288 1.1 glass break;
289 1.1 glass val = val * 8 + ch - '0';
290 1.1 glass if (++cnt == 3) {
291 1.1 glass ++s->str;
292 1.1 glass break;
293 1.1 glass }
294 1.1 glass }
295 1.1 glass if (cnt)
296 1.16 christos return val;
297 1.1 glass if (ch != '\0')
298 1.1 glass ++s->str;
299 1.1 glass switch (ch) {
300 1.16 christos case 'a': /* escape characters */
301 1.16 christos return '\7';
302 1.16 christos case 'b':
303 1.16 christos return '\b';
304 1.16 christos case 'e':
305 1.16 christos return '\033';
306 1.16 christos case 'f':
307 1.16 christos return '\f';
308 1.16 christos case 'n':
309 1.16 christos return '\n';
310 1.16 christos case 'r':
311 1.16 christos return '\r';
312 1.16 christos case 't':
313 1.16 christos return '\t';
314 1.16 christos case 'v':
315 1.16 christos return '\13';
316 1.16 christos case '\0': /* \" -> \ */
317 1.16 christos s->state = EOS;
318 1.16 christos return '\\';
319 1.16 christos default: /* \x" -> x */
320 1.16 christos return ch;
321 1.1 glass }
322 1.1 glass }
323