1 1.30 leot /* $NetBSD: str.c,v 1.30 2018/05/26 11:20:30 leot Exp $ */ 2 1.6 jtc 3 1.1 glass /*- 4 1.6 jtc * Copyright (c) 1991, 1993 5 1.6 jtc * The Regents of the University of California. All rights reserved. 6 1.1 glass * 7 1.1 glass * Redistribution and use in source and binary forms, with or without 8 1.1 glass * modification, are permitted provided that the following conditions 9 1.1 glass * are met: 10 1.1 glass * 1. Redistributions of source code must retain the above copyright 11 1.1 glass * notice, this list of conditions and the following disclaimer. 12 1.1 glass * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 glass * notice, this list of conditions and the following disclaimer in the 14 1.1 glass * documentation and/or other materials provided with the distribution. 15 1.10 agc * 3. Neither the name of the University nor the names of its contributors 16 1.1 glass * may be used to endorse or promote products derived from this software 17 1.1 glass * without specific prior written permission. 18 1.1 glass * 19 1.1 glass * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 1.1 glass * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 1.1 glass * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 1.1 glass * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 1.1 glass * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 1.1 glass * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 1.1 glass * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 1.1 glass * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 1.1 glass * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 1.1 glass * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 1.1 glass * SUCH DAMAGE. 30 1.1 glass */ 31 1.1 glass 32 1.8 lukem #include <sys/cdefs.h> 33 1.1 glass #ifndef lint 34 1.6 jtc #if 0 35 1.7 jtc static char sccsid[] = "@(#)str.c 8.2 (Berkeley) 4/28/95"; 36 1.6 jtc #endif 37 1.30 leot __RCSID("$NetBSD: str.c,v 1.30 2018/05/26 11:20:30 leot Exp $"); 38 1.1 glass #endif /* not lint */ 39 1.1 glass 40 1.1 glass #include <sys/types.h> 41 1.1 glass 42 1.8 lukem #include <err.h> 43 1.1 glass #include <errno.h> 44 1.1 glass #include <stddef.h> 45 1.1 glass #include <stdio.h> 46 1.1 glass #include <stdlib.h> 47 1.1 glass #include <string.h> 48 1.4 jtc #include <ctype.h> 49 1.20 dholland #include <assert.h> 50 1.1 glass 51 1.1 glass #include "extern.h" 52 1.1 glass 53 1.21 dholland struct str { 54 1.21 dholland enum { STRING1, STRING2 } which; 55 1.21 dholland enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state; 56 1.29 dholland int cnt; /* character count */ 57 1.29 dholland int lastch; /* last character */ 58 1.29 dholland int equiv[2]; /* equivalence set */ 59 1.29 dholland int *set; /* set of characters */ 60 1.25 dholland const char *str; /* user's string */ 61 1.21 dholland }; 62 1.21 dholland 63 1.29 dholland static int backslash(STR *); 64 1.29 dholland static int bracket(STR *); 65 1.29 dholland static int c_class(const void *, const void *); 66 1.27 dholland static int *genclass(const char *, size_t); 67 1.29 dholland static void genequiv(STR *); 68 1.29 dholland static int genrange(STR *); 69 1.29 dholland static void genseq(STR *); 70 1.1 glass 71 1.21 dholland STR * 72 1.23 dholland str_create(int whichstring, const char *txt) 73 1.21 dholland { 74 1.21 dholland STR *s; 75 1.21 dholland 76 1.21 dholland s = malloc(sizeof(*s)); 77 1.21 dholland if (s == NULL) { 78 1.21 dholland err(1, "Out of memory"); 79 1.21 dholland } 80 1.21 dholland 81 1.21 dholland s->which = whichstring == 2 ? STRING2 : STRING1; 82 1.21 dholland s->state = NORMAL; 83 1.21 dholland s->cnt = 0; 84 1.21 dholland s->lastch = OOBCH; 85 1.21 dholland s->equiv[0] = 0; 86 1.21 dholland s->equiv[1] = OOBCH; 87 1.21 dholland s->set = NULL; 88 1.23 dholland s->str = txt; 89 1.21 dholland 90 1.21 dholland return s; 91 1.21 dholland } 92 1.21 dholland 93 1.21 dholland void 94 1.21 dholland str_destroy(STR *s) 95 1.21 dholland { 96 1.21 dholland if (s->set != NULL && s->set != s->equiv) { 97 1.21 dholland free(s->set); 98 1.21 dholland } 99 1.21 dholland free(s); 100 1.21 dholland } 101 1.21 dholland 102 1.1 glass int 103 1.20 dholland next(STR *s, int *ret) 104 1.1 glass { 105 1.8 lukem int ch; 106 1.1 glass 107 1.1 glass switch (s->state) { 108 1.1 glass case EOS: 109 1.20 dholland *ret = s->lastch; 110 1.16 christos return 0; 111 1.1 glass case INFINITE: 112 1.20 dholland *ret = s->lastch; 113 1.16 christos return 1; 114 1.1 glass case NORMAL: 115 1.26 dholland ch = (unsigned char)s->str[0]; 116 1.26 dholland switch (ch) { 117 1.1 glass case '\0': 118 1.1 glass s->state = EOS; 119 1.20 dholland *ret = s->lastch; 120 1.16 christos return 0; 121 1.1 glass case '\\': 122 1.1 glass s->lastch = backslash(s); 123 1.1 glass break; 124 1.1 glass case '[': 125 1.26 dholland if (bracket(s)) { 126 1.20 dholland return next(s, ret); 127 1.26 dholland } 128 1.1 glass /* FALLTHROUGH */ 129 1.1 glass default: 130 1.1 glass ++s->str; 131 1.1 glass s->lastch = ch; 132 1.1 glass break; 133 1.1 glass } 134 1.1 glass 135 1.1 glass /* We can start a range at any time. */ 136 1.20 dholland if (s->str[0] == '-' && genrange(s)) { 137 1.20 dholland return next(s, ret); 138 1.20 dholland } 139 1.20 dholland *ret = s->lastch; 140 1.16 christos return 1; 141 1.1 glass case RANGE: 142 1.26 dholland if (s->cnt == 0) { 143 1.1 glass s->state = NORMAL; 144 1.20 dholland return next(s, ret); 145 1.1 glass } 146 1.26 dholland s->cnt--; 147 1.1 glass ++s->lastch; 148 1.20 dholland *ret = s->lastch; 149 1.16 christos return 1; 150 1.1 glass case SEQUENCE: 151 1.26 dholland if (s->cnt == 0) { 152 1.1 glass s->state = NORMAL; 153 1.20 dholland return next(s, ret); 154 1.1 glass } 155 1.26 dholland s->cnt--; 156 1.20 dholland *ret = s->lastch; 157 1.16 christos return 1; 158 1.1 glass case SET: 159 1.26 dholland s->lastch = s->set[s->cnt++]; 160 1.26 dholland if (s->lastch == OOBCH) { 161 1.1 glass s->state = NORMAL; 162 1.26 dholland if (s->set != s->equiv) { 163 1.26 dholland free(s->set); 164 1.26 dholland } 165 1.26 dholland s->set = NULL; 166 1.20 dholland return next(s, ret); 167 1.1 glass } 168 1.20 dholland *ret = s->lastch; 169 1.16 christos return 1; 170 1.1 glass } 171 1.1 glass /* NOTREACHED */ 172 1.20 dholland assert(0); 173 1.20 dholland *ret = s->lastch; 174 1.16 christos return 0; 175 1.1 glass } 176 1.1 glass 177 1.1 glass static int 178 1.13 joerg bracket(STR *s) 179 1.1 glass { 180 1.26 dholland const char *p; 181 1.27 dholland int *q; 182 1.1 glass 183 1.1 glass switch (s->str[1]) { 184 1.1 glass case ':': /* "[:class:]" */ 185 1.1 glass if ((p = strstr(s->str + 2, ":]")) == NULL) 186 1.16 christos return 0; 187 1.1 glass s->str += 2; 188 1.27 dholland q = genclass(s->str, p - s->str); 189 1.27 dholland s->state = SET; 190 1.27 dholland s->set = q; 191 1.27 dholland s->cnt = 0; 192 1.1 glass s->str = p + 2; 193 1.16 christos return 1; 194 1.1 glass case '=': /* "[=equiv=]" */ 195 1.1 glass if ((p = strstr(s->str + 2, "=]")) == NULL) 196 1.16 christos return 0; 197 1.1 glass s->str += 2; 198 1.1 glass genequiv(s); 199 1.28 dholland s->str = p + 2; 200 1.16 christos return 1; 201 1.1 glass default: /* "[\###*n]" or "[#*n]" */ 202 1.1 glass if ((p = strpbrk(s->str + 2, "*]")) == NULL) 203 1.16 christos return 0; 204 1.8 lukem if (p[0] != '*' || strchr(p, ']') == NULL) 205 1.16 christos return 0; 206 1.1 glass s->str += 1; 207 1.1 glass genseq(s); 208 1.16 christos return 1; 209 1.1 glass } 210 1.1 glass /* NOTREACHED */ 211 1.1 glass } 212 1.1 glass 213 1.1 glass typedef struct { 214 1.12 lukem const char *name; 215 1.13 joerg int (*func)(int); 216 1.1 glass } CLASS; 217 1.1 glass 218 1.16 christos static const CLASS classes[] = { 219 1.16 christos { "alnum", isalnum }, 220 1.16 christos { "alpha", isalpha }, 221 1.16 christos { "blank", isblank }, 222 1.16 christos { "cntrl", iscntrl }, 223 1.16 christos { "digit", isdigit }, 224 1.16 christos { "graph", isgraph }, 225 1.16 christos { "lower", islower }, 226 1.16 christos { "print", isprint }, 227 1.16 christos { "punct", ispunct }, 228 1.16 christos { "space", isspace }, 229 1.16 christos { "upper", isupper }, 230 1.16 christos { "xdigit", isxdigit }, 231 1.1 glass }; 232 1.1 glass 233 1.26 dholland typedef struct { 234 1.26 dholland const char *name; 235 1.26 dholland size_t len; 236 1.26 dholland } CLASSKEY; 237 1.26 dholland 238 1.27 dholland static int * 239 1.27 dholland genclass(const char *class, size_t len) 240 1.1 glass { 241 1.26 dholland int ch; 242 1.16 christos const CLASS *cp; 243 1.26 dholland CLASSKEY key; 244 1.1 glass int *p; 245 1.26 dholland unsigned pos, num; 246 1.1 glass 247 1.26 dholland /* Find the class */ 248 1.26 dholland key.name = class; 249 1.26 dholland key.len = len; 250 1.26 dholland cp = bsearch(&key, classes, __arraycount(classes), sizeof(classes[0]), 251 1.26 dholland c_class); 252 1.26 dholland if (cp == NULL) { 253 1.26 dholland errx(1, "unknown class %.*s", (int)len, class); 254 1.26 dholland } 255 1.1 glass 256 1.26 dholland /* 257 1.26 dholland * Figure out what characters are in the class 258 1.26 dholland */ 259 1.26 dholland 260 1.26 dholland num = NCHARS + 1; 261 1.26 dholland p = malloc(num * sizeof(*p)); 262 1.26 dholland if (p == NULL) { 263 1.8 lukem err(1, "malloc"); 264 1.26 dholland } 265 1.26 dholland 266 1.26 dholland pos = 0; 267 1.26 dholland for (ch = 0; ch < NCHARS; ch++) { 268 1.26 dholland if (cp->func(ch)) { 269 1.26 dholland p[pos++] = ch; 270 1.26 dholland } 271 1.26 dholland } 272 1.26 dholland 273 1.26 dholland p[pos++] = OOBCH; 274 1.26 dholland for (; pos < num; pos++) { 275 1.26 dholland p[pos] = 0; 276 1.26 dholland } 277 1.19 christos 278 1.27 dholland return p; 279 1.1 glass } 280 1.1 glass 281 1.1 glass static int 282 1.26 dholland c_class(const void *av, const void *bv) 283 1.1 glass { 284 1.26 dholland const CLASSKEY *a = av; 285 1.26 dholland const CLASS *b = bv; 286 1.26 dholland size_t blen; 287 1.26 dholland int r; 288 1.26 dholland 289 1.26 dholland blen = strlen(b->name); 290 1.26 dholland r = strncmp(a->name, b->name, a->len); 291 1.26 dholland if (r != 0) { 292 1.26 dholland return r; 293 1.26 dholland } 294 1.26 dholland if (a->len < blen) { 295 1.26 dholland /* someone gave us a prefix of the right name */ 296 1.26 dholland return -1; 297 1.26 dholland } 298 1.26 dholland assert(a-> len == blen); 299 1.26 dholland return 0; 300 1.1 glass } 301 1.1 glass 302 1.1 glass /* 303 1.1 glass * English doesn't have any equivalence classes, so for now 304 1.1 glass * we just syntax check and grab the character. 305 1.1 glass */ 306 1.1 glass static void 307 1.13 joerg genequiv(STR *s) 308 1.1 glass { 309 1.27 dholland int ch; 310 1.27 dholland 311 1.27 dholland ch = (unsigned char)s->str[0]; 312 1.27 dholland if (ch == '\\') { 313 1.1 glass s->equiv[0] = backslash(s); 314 1.1 glass } else { 315 1.27 dholland s->equiv[0] = ch; 316 1.28 dholland s->str++; 317 1.28 dholland } 318 1.28 dholland if (s->str[0] != '=') { 319 1.28 dholland errx(1, "Misplaced equivalence equals sign"); 320 1.28 dholland } 321 1.28 dholland s->str++; 322 1.28 dholland if (s->str[0] != ']') { 323 1.28 dholland errx(1, "Misplaced equivalence right bracket"); 324 1.1 glass } 325 1.28 dholland s->str++; 326 1.28 dholland 327 1.1 glass s->cnt = 0; 328 1.1 glass s->state = SET; 329 1.1 glass s->set = s->equiv; 330 1.1 glass } 331 1.1 glass 332 1.1 glass static int 333 1.13 joerg genrange(STR *s) 334 1.1 glass { 335 1.1 glass int stopval; 336 1.22 dholland const char *savestart; 337 1.1 glass 338 1.24 dholland savestart = s->str++; 339 1.26 dholland stopval = s->str[0] == '\\' ? backslash(s) : (unsigned char)*s->str++; 340 1.26 dholland if (stopval < (unsigned char)s->lastch) { 341 1.1 glass s->str = savestart; 342 1.16 christos return 0; 343 1.1 glass } 344 1.1 glass s->cnt = stopval - s->lastch + 1; 345 1.1 glass s->state = RANGE; 346 1.1 glass --s->lastch; 347 1.16 christos return 1; 348 1.1 glass } 349 1.1 glass 350 1.1 glass static void 351 1.13 joerg genseq(STR *s) 352 1.1 glass { 353 1.1 glass char *ep; 354 1.1 glass 355 1.26 dholland if (s->which == STRING1) { 356 1.26 dholland errx(1, "Sequences only valid in string2"); 357 1.26 dholland } 358 1.1 glass 359 1.26 dholland if (*s->str == '\\') { 360 1.1 glass s->lastch = backslash(s); 361 1.26 dholland } else { 362 1.25 dholland s->lastch = (unsigned char)*s->str++; 363 1.26 dholland } 364 1.26 dholland if (*s->str != '*') { 365 1.26 dholland errx(1, "Misplaced sequence asterisk"); 366 1.26 dholland } 367 1.1 glass 368 1.26 dholland s->str++; 369 1.26 dholland switch (s->str[0]) { 370 1.1 glass case '\\': 371 1.1 glass s->cnt = backslash(s); 372 1.1 glass break; 373 1.1 glass case ']': 374 1.1 glass s->cnt = 0; 375 1.1 glass ++s->str; 376 1.1 glass break; 377 1.1 glass default: 378 1.26 dholland if (isdigit((unsigned char)s->str[0])) { 379 1.1 glass s->cnt = strtol(s->str, &ep, 0); 380 1.1 glass if (*ep == ']') { 381 1.1 glass s->str = ep + 1; 382 1.1 glass break; 383 1.1 glass } 384 1.1 glass } 385 1.8 lukem errx(1, "illegal sequence count"); 386 1.1 glass /* NOTREACHED */ 387 1.1 glass } 388 1.1 glass 389 1.1 glass s->state = s->cnt ? SEQUENCE : INFINITE; 390 1.1 glass } 391 1.1 glass 392 1.1 glass /* 393 1.1 glass * Translate \??? into a character. Up to 3 octal digits, if no digits either 394 1.1 glass * an escape code or a literal character. 395 1.1 glass */ 396 1.1 glass static int 397 1.13 joerg backslash(STR *s) 398 1.1 glass { 399 1.8 lukem int ch, cnt, val; 400 1.1 glass 401 1.27 dholland cnt = val = 0; 402 1.27 dholland for (;;) { 403 1.27 dholland /* Consume the character we're already on. */ 404 1.26 dholland s->str++; 405 1.27 dholland 406 1.27 dholland /* Look at the next character. */ 407 1.26 dholland ch = (unsigned char)s->str[0]; 408 1.26 dholland if (!isascii(ch) || !isdigit(ch)) { 409 1.1 glass break; 410 1.26 dholland } 411 1.1 glass val = val * 8 + ch - '0'; 412 1.1 glass if (++cnt == 3) { 413 1.27 dholland /* Enough digits; consume this one and stop */ 414 1.1 glass ++s->str; 415 1.1 glass break; 416 1.1 glass } 417 1.1 glass } 418 1.26 dholland if (cnt) { 419 1.27 dholland /* We saw digits, so return their value */ 420 1.30 leot if (val >= OOBCH) 421 1.30 leot errx(1, "Invalid octal character value"); 422 1.16 christos return val; 423 1.26 dholland } 424 1.27 dholland if (ch == '\0') { 425 1.27 dholland /* \<end> -> \ */ 426 1.27 dholland s->state = EOS; 427 1.27 dholland return '\\'; 428 1.26 dholland } 429 1.27 dholland 430 1.27 dholland /* Consume the escaped character */ 431 1.27 dholland s->str++; 432 1.27 dholland 433 1.1 glass switch (ch) { 434 1.17 christos case 'a': /* escape characters */ 435 1.17 christos return '\7'; 436 1.17 christos case 'b': 437 1.17 christos return '\b'; 438 1.17 christos case 'e': 439 1.17 christos return '\033'; 440 1.17 christos case 'f': 441 1.17 christos return '\f'; 442 1.17 christos case 'n': 443 1.17 christos return '\n'; 444 1.17 christos case 'r': 445 1.17 christos return '\r'; 446 1.17 christos case 't': 447 1.17 christos return '\t'; 448 1.17 christos case 'v': 449 1.17 christos return '\13'; 450 1.27 dholland default: /* \q -> q */ 451 1.17 christos return ch; 452 1.1 glass } 453 1.1 glass } 454