regcomp.c revision 1.3 1 #include <sys/types.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <ctype.h>
5 #include <limits.h>
6 #include <stdlib.h>
7 #include <regex.h>
8
9 #include "utils.h"
10 #include "regex2.h"
11
12 #include "cclass.h"
13 #include "cname.h"
14
15 /*
16 * parse structure, passed up and down to avoid global variables and
17 * other clumsinesses
18 */
19 struct parse {
20 char *next; /* next character in RE */
21 char *end; /* end of string (-> NUL normally) */
22 int error; /* has an error been seen? */
23 sop *strip; /* malloced strip */
24 sopno ssize; /* malloced strip size (allocated) */
25 sopno slen; /* malloced strip length (used) */
26 int ncsalloc; /* number of csets allocated */
27 struct re_guts *g;
28 # define NPAREN 10 /* we need to remember () 1-9 for back refs */
29 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
30 sopno pend[NPAREN]; /* -> ) ([0] unused) */
31 };
32
33 #include "regcomp.ih"
34
35 static char nuls[10]; /* place to point scanner in event of error */
36
37 /*
38 * macros for use with parse structure
39 * BEWARE: these know that the parse structure is named `p' !!!
40 */
41 #define PEEK() (*p->next)
42 #define PEEK2() (*(p->next+1))
43 #define MORE() (p->next < p->end)
44 #define MORE2() (p->next+1 < p->end)
45 #define SEE(c) (MORE() && PEEK() == (c))
46 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
47 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
48 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
49 #define NEXT() (p->next++)
50 #define NEXT2() (p->next += 2)
51 #define NEXTn(n) (p->next += (n))
52 #define GETNEXT() (*p->next++)
53 #define SETERROR(e) seterr(p, (e))
54 #define REQUIRE(co, e) ((co) || SETERROR(e))
55 #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
56 #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
57 #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
58 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
59 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
60 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
61 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
62 #define HERE() (p->slen)
63 #define THERE() (p->slen - 1)
64 #define DROP(n) (p->slen -= (n))
65
66 #ifndef NDEBUG
67 static int never = 0; /* for use in asserts; shuts lint up */
68 #else
69 #define never 0 /* some <assert.h>s have bugs too */
70 #endif
71
72 /*
73 - regcomp - interface for parser and compilation
74 = extern int regcomp(regex_t *, const char *, int);
75 = #define REG_BASIC 0000
76 = #define REG_EXTENDED 0001
77 = #define REG_ICASE 0002
78 = #define REG_NOSUB 0004
79 = #define REG_NEWLINE 0010
80 = #define REG_NOSPEC 0020
81 = #define REG_PEND 0040
82 = #define REG_DUMP 0200
83 */
84 int /* 0 success, otherwise REG_something */
85 regcomp(preg, pattern, cflags)
86 regex_t *preg;
87 const char *pattern;
88 int cflags;
89 {
90 struct parse pa;
91 register struct re_guts *g;
92 register struct parse *p = &pa;
93 register int i;
94 register size_t len;
95 #ifdef REDEBUG
96 # define GOODFLAGS(f) (f)
97 #else
98 # define GOODFLAGS(f) ((f)&~REG_DUMP)
99 #endif
100
101 cflags = GOODFLAGS(cflags);
102 if ((cflags®_EXTENDED) && (cflags®_NOSPEC))
103 return(REG_INVARG);
104
105 if (cflags®_PEND) {
106 if (preg->re_endp < pattern)
107 return(REG_INVARG);
108 len = preg->re_endp - pattern;
109 } else
110 len = strlen((char *)pattern);
111
112 /* do the mallocs early so failure handling is easy */
113 g = (struct re_guts *)malloc(sizeof(struct re_guts) +
114 (NC-1)*sizeof(cat_t));
115 if (g == NULL)
116 return(REG_ESPACE);
117 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
118 p->strip = (sop *)malloc(p->ssize * sizeof(sop));
119 p->slen = 0;
120 if (p->strip == NULL) {
121 free((char *)g);
122 return(REG_ESPACE);
123 }
124
125 /* set things up */
126 p->g = g;
127 p->next = (char *)pattern; /* convenience; we do not modify it */
128 p->end = p->next + len;
129 p->error = 0;
130 p->ncsalloc = 0;
131 for (i = 0; i < NPAREN; i++) {
132 p->pbegin[i] = 0;
133 p->pend[i] = 0;
134 }
135 g->csetsize = NC;
136 g->sets = NULL;
137 g->setbits = NULL;
138 g->ncsets = 0;
139 g->cflags = cflags;
140 g->iflags = 0;
141 g->nbol = 0;
142 g->neol = 0;
143 g->must = NULL;
144 g->mlen = 0;
145 g->nsub = 0;
146 g->ncategories = 1; /* category 0 is "everything else" */
147 g->categories = &g->catspace[-(CHAR_MIN)];
148 (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
149 g->backrefs = 0;
150
151 /* do it */
152 EMIT(OEND, 0);
153 g->firststate = THERE();
154 if (cflags®_EXTENDED)
155 p_ere(p, OUT);
156 else if (cflags®_NOSPEC)
157 p_str(p);
158 else
159 p_bre(p, OUT, OUT);
160 EMIT(OEND, 0);
161 g->laststate = THERE();
162
163 /* tidy up loose ends and fill things in */
164 categorize(p, g);
165 stripsnug(p, g);
166 findmust(p, g);
167 g->nplus = pluscount(p, g);
168 g->magic = MAGIC2;
169 preg->re_nsub = g->nsub;
170 preg->re_g = g;
171 preg->re_magic = MAGIC1;
172 #ifndef REDEBUG
173 /* not debugging, so can't rely on the assert() in regexec() */
174 if (g->iflags&BAD)
175 SETERROR(REG_ASSERT);
176 #endif
177
178 /* win or lose, we're done */
179 if (p->error != 0) /* lose */
180 regfree(preg);
181 return(p->error);
182 }
183
184 /*
185 - p_ere - ERE parser top level, concatenation and alternation
186 == static void p_ere(register struct parse *p, int stop);
187 */
188 static void
189 p_ere(p, stop)
190 register struct parse *p;
191 int stop; /* character this ERE should end at */
192 {
193 register char c;
194 register sopno prevback;
195 register sopno prevfwd;
196 register sopno conc;
197 register int first = 1; /* is this the first alternative? */
198
199 for (;;) {
200 /* do a bunch of concatenated expressions */
201 conc = HERE();
202 while (MORE() && (c = PEEK()) != '|' && c != stop)
203 p_ere_exp(p);
204 REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
205
206 if (!EAT('|'))
207 break; /* NOTE BREAK OUT */
208
209 if (first) {
210 INSERT(OCH_, conc); /* offset is wrong */
211 prevfwd = conc;
212 prevback = conc;
213 first = 0;
214 }
215 ASTERN(OOR1, prevback);
216 prevback = THERE();
217 AHEAD(prevfwd); /* fix previous offset */
218 prevfwd = HERE();
219 EMIT(OOR2, 0); /* offset is very wrong */
220 }
221
222 if (!first) { /* tail-end fixups */
223 AHEAD(prevfwd);
224 ASTERN(O_CH, prevback);
225 }
226
227 assert(!MORE() || SEE(stop));
228 }
229
230 /*
231 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
232 == static void p_ere_exp(register struct parse *p);
233 */
234 static void
235 p_ere_exp(p)
236 register struct parse *p;
237 {
238 register char c;
239 register sopno pos;
240 register int count;
241 register int count2;
242 register sopno subno;
243 int wascaret = 0;
244
245 assert(MORE()); /* caller should have ensured this */
246 c = GETNEXT();
247
248 pos = HERE();
249 switch (c) {
250 case '(':
251 REQUIRE(MORE(), REG_EPAREN);
252 p->g->nsub++;
253 subno = p->g->nsub;
254 if (subno < NPAREN)
255 p->pbegin[subno] = HERE();
256 EMIT(OLPAREN, subno);
257 if (!SEE(')'))
258 p_ere(p, ')');
259 if (subno < NPAREN) {
260 p->pend[subno] = HERE();
261 assert(p->pend[subno] != 0);
262 }
263 EMIT(ORPAREN, subno);
264 MUSTEAT(')', REG_EPAREN);
265 break;
266 #ifndef POSIX_MISTAKE
267 case ')': /* happens only if no current unmatched ( */
268 /*
269 * You may ask, why the ifndef? Because I didn't notice
270 * this until slightly too late for 1003.2, and none of the
271 * other 1003.2 regular-expression reviewers noticed it at
272 * all. So an unmatched ) is legal POSIX, at least until
273 * we can get it fixed.
274 */
275 SETERROR(REG_EPAREN);
276 break;
277 #endif
278 case '^':
279 EMIT(OBOL, 0);
280 p->g->iflags |= USEBOL;
281 p->g->nbol++;
282 wascaret = 1;
283 break;
284 case '$':
285 EMIT(OEOL, 0);
286 p->g->iflags |= USEEOL;
287 p->g->neol++;
288 break;
289 case '|':
290 SETERROR(REG_EMPTY);
291 break;
292 case '*':
293 case '+':
294 case '?':
295 SETERROR(REG_BADRPT);
296 break;
297 case '.':
298 if (p->g->cflags®_NEWLINE)
299 nonnewline(p);
300 else
301 EMIT(OANY, 0);
302 break;
303 case '[':
304 p_bracket(p);
305 break;
306 case '\\':
307 REQUIRE(MORE(), REG_EESCAPE);
308 c = GETNEXT();
309 ordinary(p, c);
310 break;
311 case '{': /* okay as ordinary except if digit follows */
312 REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
313 /* FALLTHROUGH */
314 default:
315 ordinary(p, c);
316 break;
317 }
318
319 if (!MORE())
320 return;
321 c = PEEK();
322 /* we call { a repetition if followed by a digit */
323 if (!( c == '*' || c == '+' || c == '?' ||
324 (c == '{' && MORE2() && isdigit(PEEK2())) ))
325 return; /* no repetition, we're done */
326 NEXT();
327
328 REQUIRE(!wascaret, REG_BADRPT);
329 switch (c) {
330 case '*': /* implemented as +? */
331 INSERT(OPLUS_, pos);
332 ASTERN(O_PLUS, pos);
333 INSERT(OQUEST_, pos);
334 ASTERN(O_QUEST, pos);
335 break;
336 case '+':
337 INSERT(OPLUS_, pos);
338 ASTERN(O_PLUS, pos);
339 break;
340 case '?':
341 INSERT(OQUEST_, pos);
342 ASTERN(O_QUEST, pos);
343 break;
344 case '{':
345 count = p_count(p);
346 if (EAT(',')) {
347 if (isdigit(PEEK())) {
348 count2 = p_count(p);
349 REQUIRE(count <= count2, REG_BADBR);
350 } else /* single number with comma */
351 count2 = INFINITY;
352 } else /* just a single number */
353 count2 = count;
354 repeat(p, pos, count, count2);
355 if (!EAT('}')) { /* error heuristics */
356 while (MORE() && PEEK() != '}')
357 NEXT();
358 REQUIRE(MORE(), REG_EBRACE);
359 SETERROR(REG_BADBR);
360 }
361 break;
362 }
363
364 if (!MORE())
365 return;
366 c = PEEK();
367 if (!( c == '*' || c == '+' || c == '?' ||
368 (c == '{' && MORE2() && isdigit(PEEK2())) ) )
369 return;
370 SETERROR(REG_BADRPT);
371 }
372
373 /*
374 - p_str - string (no metacharacters) "parser"
375 == static void p_str(register struct parse *p);
376 */
377 static void
378 p_str(p)
379 register struct parse *p;
380 {
381 REQUIRE(MORE(), REG_EMPTY);
382 while (MORE())
383 ordinary(p, GETNEXT());
384 }
385
386 /*
387 - p_bre - BRE parser top level, anchoring and concatenation
388 == static void p_bre(register struct parse *p, register int end1, \
389 == register int end2);
390 * Giving end1 as OUT essentially eliminates the end1/end2 check.
391 *
392 * This implementation is a bit of a kludge, in that a trailing $ is first
393 * taken as an ordinary character and then revised to be an anchor. The
394 * only undesirable side effect is that '$' gets included as a character
395 * category in such cases. This is fairly harmless; not worth fixing.
396 * The amount of lookahead needed to avoid this kludge is excessive.
397 */
398 static void
399 p_bre(p, end1, end2)
400 register struct parse *p;
401 register int end1; /* first terminating character */
402 register int end2; /* second terminating character */
403 {
404 register sopno start = HERE();
405 register int first = 1; /* first subexpression? */
406 register int wasdollar = 0;
407
408 if (EAT('^')) {
409 EMIT(OBOL, 0);
410 p->g->iflags |= USEBOL;
411 p->g->nbol++;
412 }
413 while (MORE() && !SEETWO(end1, end2)) {
414 wasdollar = p_simp_re(p, first);
415 first = 0;
416 }
417 if (wasdollar) { /* oops, that was a trailing anchor */
418 DROP(1);
419 EMIT(OEOL, 0);
420 p->g->iflags |= USEEOL;
421 p->g->neol++;
422 }
423
424 REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
425 }
426
427 /*
428 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
429 == static int p_simp_re(register struct parse *p, int starordinary);
430 */
431 static int /* was the simple RE an unbackslashed $? */
432 p_simp_re(p, starordinary)
433 register struct parse *p;
434 int starordinary; /* is a leading * an ordinary character? */
435 {
436 register int c;
437 register int count;
438 register int count2;
439 register sopno pos;
440 register int i;
441 register sopno subno;
442 # define BACKSL (1<<CHAR_BIT)
443
444 pos = HERE(); /* repetion op, if any, covers from here */
445
446 assert(MORE()); /* caller should have ensured this */
447 c = GETNEXT();
448 if (c == '\\') {
449 REQUIRE(MORE(), REG_EESCAPE);
450 c = BACKSL | (unsigned char)GETNEXT();
451 }
452 switch (c) {
453 case '.':
454 if (p->g->cflags®_NEWLINE)
455 nonnewline(p);
456 else
457 EMIT(OANY, 0);
458 break;
459 case '[':
460 p_bracket(p);
461 break;
462 case BACKSL|'{':
463 SETERROR(REG_BADRPT);
464 break;
465 case BACKSL|'(':
466 p->g->nsub++;
467 subno = p->g->nsub;
468 if (subno < NPAREN)
469 p->pbegin[subno] = HERE();
470 EMIT(OLPAREN, subno);
471 /* the MORE here is an error heuristic */
472 if (MORE() && !SEETWO('\\', ')'))
473 p_bre(p, '\\', ')');
474 if (subno < NPAREN) {
475 p->pend[subno] = HERE();
476 assert(p->pend[subno] != 0);
477 }
478 EMIT(ORPAREN, subno);
479 REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
480 break;
481 case BACKSL|')': /* should not get here -- must be user */
482 case BACKSL|'}':
483 SETERROR(REG_EPAREN);
484 break;
485 case BACKSL|'1':
486 case BACKSL|'2':
487 case BACKSL|'3':
488 case BACKSL|'4':
489 case BACKSL|'5':
490 case BACKSL|'6':
491 case BACKSL|'7':
492 case BACKSL|'8':
493 case BACKSL|'9':
494 i = (c&~BACKSL) - '0';
495 assert(i < NPAREN);
496 if (p->pend[i] != 0) {
497 assert(i <= p->g->nsub);
498 EMIT(OBACK_, i);
499 assert(p->pbegin[i] != 0);
500 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
501 assert(OP(p->strip[p->pend[i]]) == ORPAREN);
502 (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
503 EMIT(O_BACK, i);
504 } else
505 SETERROR(REG_ESUBREG);
506 p->g->backrefs = 1;
507 break;
508 case '*':
509 REQUIRE(starordinary, REG_BADRPT);
510 /* FALLTHROUGH */
511 default:
512 ordinary(p, c &~ BACKSL);
513 break;
514 }
515
516 if (EAT('*')) { /* implemented as +? */
517 INSERT(OPLUS_, pos);
518 ASTERN(O_PLUS, pos);
519 INSERT(OQUEST_, pos);
520 ASTERN(O_QUEST, pos);
521 } else if (EATTWO('\\', '{')) {
522 count = p_count(p);
523 if (EAT(',')) {
524 if (MORE() && isdigit(PEEK())) {
525 count2 = p_count(p);
526 REQUIRE(count <= count2, REG_BADBR);
527 } else /* single number with comma */
528 count2 = INFINITY;
529 } else /* just a single number */
530 count2 = count;
531 repeat(p, pos, count, count2);
532 if (!EATTWO('\\', '}')) { /* error heuristics */
533 while (MORE() && !SEETWO('\\', '}'))
534 NEXT();
535 REQUIRE(MORE(), REG_EBRACE);
536 SETERROR(REG_BADBR);
537 }
538 } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */
539 return(1);
540
541 return(0);
542 }
543
544 /*
545 - p_count - parse a repetition count
546 == static int p_count(register struct parse *p);
547 */
548 static int /* the value */
549 p_count(p)
550 register struct parse *p;
551 {
552 register int count = 0;
553 register int ndigits = 0;
554
555 while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
556 count = count*10 + (GETNEXT() - '0');
557 ndigits++;
558 }
559
560 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
561 return(count);
562 }
563
564 /*
565 - p_bracket - parse a bracketed character list
566 == static void p_bracket(register struct parse *p);
567 *
568 * Note a significant property of this code: if the allocset() did SETERROR,
569 * no set operations are done.
570 */
571 static void
572 p_bracket(p)
573 register struct parse *p;
574 {
575 register char c;
576 register cset *cs = allocset(p);
577 register int invert = 0;
578
579 /* Dept of Truly Sickening Special-Case Kludges */
580 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
581 EMIT(OBOW, 0);
582 NEXTn(6);
583 return;
584 }
585 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
586 EMIT(OEOW, 0);
587 NEXTn(6);
588 return;
589 }
590
591 if (EAT('^'))
592 invert++; /* make note to invert set at end */
593 if (EAT(']'))
594 CHadd(cs, ']');
595 else if (EAT('-'))
596 CHadd(cs, '-');
597 while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
598 p_b_term(p, cs);
599 if (EAT('-'))
600 CHadd(cs, '-');
601 MUSTEAT(']', REG_EBRACK);
602
603 if (p->error != 0) /* don't mess things up further */
604 return;
605
606 if (p->g->cflags®_ICASE) {
607 register int i;
608 register int ci;
609
610 for (i = p->g->csetsize - 1; i >= 0; i--)
611 if (CHIN(cs, i) && isalpha(i)) {
612 ci = othercase(i);
613 if (ci != i)
614 CHadd(cs, ci);
615 }
616 if (cs->multis != NULL)
617 mccase(p, cs);
618 }
619 if (invert) {
620 register int i;
621
622 for (i = p->g->csetsize - 1; i >= 0; i--)
623 if (CHIN(cs, i))
624 CHsub(cs, i);
625 else
626 CHadd(cs, i);
627 if (p->g->cflags®_NEWLINE)
628 CHsub(cs, '\n');
629 if (cs->multis != NULL)
630 mcinvert(p, cs);
631 }
632
633 assert(cs->multis == NULL); /* xxx */
634
635 if (nch(p, cs) == 1) { /* optimize singleton sets */
636 ordinary(p, firstch(p, cs));
637 freeset(p, cs);
638 } else
639 EMIT(OANYOF, freezeset(p, cs));
640 }
641
642 /*
643 - p_b_term - parse one term of a bracketed character list
644 == static void p_b_term(register struct parse *p, register cset *cs);
645 */
646 static void
647 p_b_term(p, cs)
648 register struct parse *p;
649 register cset *cs;
650 {
651 register char c;
652 register char start, finish;
653 register int i;
654
655 /* classify what we've got */
656 switch ((MORE()) ? PEEK() : '\0') {
657 case '[':
658 c = (MORE2()) ? PEEK2() : '\0';
659 break;
660 case '-':
661 SETERROR(REG_ERANGE);
662 return; /* NOTE RETURN */
663 break;
664 default:
665 c = '\0';
666 break;
667 }
668
669 switch (c) {
670 case ':': /* character class */
671 NEXT2();
672 REQUIRE(MORE(), REG_EBRACK);
673 c = PEEK();
674 REQUIRE(c != '-' && c != ']', REG_ECTYPE);
675 p_b_cclass(p, cs);
676 REQUIRE(MORE(), REG_EBRACK);
677 REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
678 break;
679 case '=': /* equivalence class */
680 NEXT2();
681 REQUIRE(MORE(), REG_EBRACK);
682 c = PEEK();
683 REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
684 p_b_eclass(p, cs);
685 REQUIRE(MORE(), REG_EBRACK);
686 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
687 break;
688 default: /* symbol, ordinary character, or range */
689 /* xxx revision needed for multichar stuff */
690 start = p_b_symbol(p);
691 if (SEE('-') && MORE2() && PEEK2() != ']') {
692 /* range */
693 NEXT();
694 if (EAT('-'))
695 finish = '-';
696 else
697 finish = p_b_symbol(p);
698 } else
699 finish = start;
700 /* xxx what about signed chars here... */
701 REQUIRE(start <= finish, REG_ERANGE);
702 for (i = start; i <= finish; i++)
703 CHadd(cs, i);
704 break;
705 }
706 }
707
708 /*
709 - p_b_cclass - parse a character-class name and deal with it
710 == static void p_b_cclass(register struct parse *p, register cset *cs);
711 */
712 static void
713 p_b_cclass(p, cs)
714 register struct parse *p;
715 register cset *cs;
716 {
717 register char *sp = p->next;
718 register struct cclass *cp;
719 register size_t len;
720 register char *u;
721 register char c;
722
723 while (MORE() && isalpha(PEEK()))
724 NEXT();
725 len = p->next - sp;
726 for (cp = cclasses; cp->name != NULL; cp++)
727 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
728 break;
729 if (cp->name == NULL) {
730 /* oops, didn't find it */
731 SETERROR(REG_ECTYPE);
732 return;
733 }
734
735 u = cp->chars;
736 while ((c = *u++) != '\0')
737 CHadd(cs, c);
738 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
739 MCadd(p, cs, u);
740 }
741
742 /*
743 - p_b_eclass - parse an equivalence-class name and deal with it
744 == static void p_b_eclass(register struct parse *p, register cset *cs);
745 *
746 * This implementation is incomplete. xxx
747 */
748 static void
749 p_b_eclass(p, cs)
750 register struct parse *p;
751 register cset *cs;
752 {
753 register char c;
754
755 c = p_b_coll_elem(p, '=');
756 CHadd(cs, c);
757 }
758
759 /*
760 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
761 == static char p_b_symbol(register struct parse *p);
762 */
763 static char /* value of symbol */
764 p_b_symbol(p)
765 register struct parse *p;
766 {
767 register char value;
768
769 REQUIRE(MORE(), REG_EBRACK);
770 if (!EATTWO('[', '.'))
771 return(GETNEXT());
772
773 /* collating symbol */
774 value = p_b_coll_elem(p, '.');
775 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
776 return(value);
777 }
778
779 /*
780 - p_b_coll_elem - parse a collating-element name and look it up
781 == static char p_b_coll_elem(register struct parse *p, int endc);
782 */
783 static char /* value of collating element */
784 p_b_coll_elem(p, endc)
785 register struct parse *p;
786 int endc; /* name ended by endc,']' */
787 {
788 register char *sp = p->next;
789 register struct cname *cp;
790 register int len;
791 register char c;
792
793 while (MORE() && !SEETWO(endc, ']'))
794 NEXT();
795 if (!MORE()) {
796 SETERROR(REG_EBRACK);
797 return(0);
798 }
799 len = p->next - sp;
800 for (cp = cnames; cp->name != NULL; cp++)
801 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
802 return(cp->code); /* known name */
803 if (len == 1)
804 return(*sp); /* single character */
805 SETERROR(REG_ECOLLATE); /* neither */
806 return(0);
807 }
808
809 /*
810 - othercase - return the case counterpart of an alphabetic
811 == static char othercase(int ch);
812 */
813 static char /* if no counterpart, return ch */
814 othercase(ch)
815 int ch;
816 {
817 assert(isalpha(ch));
818 if (isupper(ch))
819 return(tolower(ch));
820 else if (islower(ch))
821 return(toupper(ch));
822 else /* peculiar, but could happen */
823 return(ch);
824 }
825
826 /*
827 - bothcases - emit a dualcase version of a two-case character
828 == static void bothcases(register struct parse *p, int ch);
829 *
830 * Boy, is this implementation ever a kludge...
831 */
832 static void
833 bothcases(p, ch)
834 register struct parse *p;
835 int ch;
836 {
837 register char *oldnext = p->next;
838 register char *oldend = p->end;
839 char bracket[3];
840
841 assert(othercase(ch) != ch); /* p_bracket() would recurse */
842 p->next = bracket;
843 p->end = bracket+2;
844 bracket[0] = ch;
845 bracket[1] = ']';
846 bracket[2] = '\0';
847 p_bracket(p);
848 assert(p->next == bracket+2);
849 p->next = oldnext;
850 p->end = oldend;
851 }
852
853 /*
854 - ordinary - emit an ordinary character
855 == static void ordinary(register struct parse *p, register int ch);
856 */
857 static void
858 ordinary(p, ch)
859 register struct parse *p;
860 register int ch;
861 {
862 register cat_t *cap = p->g->categories;
863
864 if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch)
865 bothcases(p, ch);
866 else {
867 EMIT(OCHAR, (unsigned char)ch);
868 if (cap[ch] == 0)
869 cap[ch] = p->g->ncategories++;
870 }
871 }
872
873 /*
874 - nonnewline - emit REG_NEWLINE version of OANY
875 == static void nonnewline(register struct parse *p);
876 *
877 * Boy, is this implementation ever a kludge...
878 */
879 static void
880 nonnewline(p)
881 register struct parse *p;
882 {
883 register char *oldnext = p->next;
884 register char *oldend = p->end;
885 char bracket[4];
886
887 p->next = bracket;
888 p->end = bracket+3;
889 bracket[0] = '^';
890 bracket[1] = '\n';
891 bracket[2] = ']';
892 bracket[3] = '\0';
893 p_bracket(p);
894 assert(p->next == bracket+3);
895 p->next = oldnext;
896 p->end = oldend;
897 }
898
899 /*
900 - repeat - generate code for a bounded repetition, recursively if needed
901 == static void repeat(register struct parse *p, sopno start, int from, int to);
902 */
903 static void
904 repeat(p, start, from, to)
905 register struct parse *p;
906 sopno start; /* operand from here to end of strip */
907 int from; /* repeated from this number */
908 int to; /* to this number of times (maybe INFINITY) */
909 {
910 register sopno finish = HERE();
911 # define N 2
912 # define INF 3
913 # define REP(f, t) ((f)*8 + (t))
914 # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
915 register sopno copy;
916
917 if (p->error != 0) /* head off possible runaway recursion */
918 return;
919
920 assert(from <= to);
921
922 switch (REP(MAP(from), MAP(to))) {
923 case REP(0, 0): /* must be user doing this */
924 DROP(finish-start); /* drop the operand */
925 break;
926 case REP(0, 1): /* as x{1,1}? */
927 case REP(0, N): /* as x{1,n}? */
928 case REP(0, INF): /* as x{1,}? */
929 INSERT(OQUEST_, start); /* offset is wrong... */
930 repeat(p, start+1, 1, to);
931 AHEAD(start); /* ... fix it */
932 ASTERN(O_QUEST, start);
933 break;
934 case REP(1, 1): /* trivial case */
935 /* done */
936 break;
937 case REP(1, N): /* as x?x{1,n-1} */
938 INSERT(OQUEST_, start);
939 ASTERN(O_QUEST, start);
940 copy = dupl(p, start+1, finish+1);
941 assert(copy == finish+2);
942 repeat(p, copy, 1, to-1);
943 break;
944 case REP(1, INF): /* as x+ */
945 INSERT(OPLUS_, start);
946 ASTERN(O_PLUS, start);
947 break;
948 case REP(N, N): /* as xx{m-1,n-1} */
949 copy = dupl(p, start, finish);
950 repeat(p, copy, from-1, to-1);
951 break;
952 case REP(N, INF): /* as xx{n-1,INF} */
953 copy = dupl(p, start, finish);
954 repeat(p, copy, from-1, to);
955 break;
956 default: /* "can't happen" */
957 SETERROR(REG_ASSERT); /* just in case */
958 break;
959 }
960 }
961
962 /*
963 - seterr - set an error condition
964 == static int seterr(register struct parse *p, int e);
965 */
966 static int /* useless but makes type checking happy */
967 seterr(p, e)
968 register struct parse *p;
969 int e;
970 {
971 if (p->error == 0) /* keep earliest error condition */
972 p->error = e;
973 p->next = nuls; /* try to bring things to a halt */
974 p->end = nuls;
975 return(0); /* make the return value well-defined */
976 }
977
978 /*
979 - allocset - allocate a set of characters for []
980 == static cset *allocset(register struct parse *p);
981 */
982 static cset *
983 allocset(p)
984 register struct parse *p;
985 {
986 register int no = p->g->ncsets++;
987 register size_t nc;
988 register size_t nbytes;
989 register cset *cs;
990 register size_t css = (size_t)p->g->csetsize;
991 register int i;
992
993 if (no >= p->ncsalloc) { /* need another column of space */
994 p->ncsalloc += CHAR_BIT;
995 nc = p->ncsalloc;
996 assert(nc % CHAR_BIT == 0);
997 nbytes = nc / CHAR_BIT * css;
998 if (p->g->sets == NULL)
999 p->g->sets = (cset *)malloc(nc * sizeof(cset));
1000 else
1001 p->g->sets = (cset *)realloc((char *)p->g->sets,
1002 nc * sizeof(cset));
1003 if (p->g->setbits == NULL)
1004 p->g->setbits = (uch *)malloc(nbytes);
1005 else {
1006 p->g->setbits = (uch *)realloc((char *)p->g->setbits,
1007 nbytes);
1008 /* xxx this isn't right if setbits is now NULL */
1009 for (i = 0; i < no; i++)
1010 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1011 }
1012 if (p->g->sets != NULL && p->g->setbits != NULL)
1013 (void) memset((char *)p->g->setbits + (nbytes - css),
1014 0, css);
1015 else {
1016 no = 0;
1017 SETERROR(REG_ESPACE);
1018 /* caller's responsibility not to do set ops */
1019 }
1020 }
1021
1022 assert(p->g->sets != NULL); /* xxx */
1023 cs = &p->g->sets[no];
1024 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1025 cs->mask = 1 << ((no) % CHAR_BIT);
1026 cs->hash = 0;
1027 cs->smultis = 0;
1028 cs->multis = NULL;
1029
1030 return(cs);
1031 }
1032
1033 /*
1034 - freeset - free a now-unused set
1035 == static void freeset(register struct parse *p, register cset *cs);
1036 */
1037 static void
1038 freeset(p, cs)
1039 register struct parse *p;
1040 register cset *cs;
1041 {
1042 register int i;
1043 register cset *top = &p->g->sets[p->g->ncsets];
1044 register size_t css = (size_t)p->g->csetsize;
1045
1046 for (i = 0; i < css; i++)
1047 CHsub(cs, i);
1048 if (cs == top-1) /* recover only the easy case */
1049 p->g->ncsets--;
1050 }
1051
1052 /*
1053 - freezeset - final processing on a set of characters
1054 == static int freezeset(register struct parse *p, register cset *cs);
1055 *
1056 * The main task here is merging identical sets. This is usually a waste
1057 * of time (although the hash code minimizes the overhead), but can win
1058 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
1059 * is done using addition rather than xor -- all ASCII [aA] sets xor to
1060 * the same value!
1061 */
1062 static int /* set number */
1063 freezeset(p, cs)
1064 register struct parse *p;
1065 register cset *cs;
1066 {
1067 register uch h = cs->hash;
1068 register int i;
1069 register cset *top = &p->g->sets[p->g->ncsets];
1070 register cset *cs2;
1071 register size_t css = (size_t)p->g->csetsize;
1072
1073 /* look for an earlier one which is the same */
1074 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1075 if (cs2->hash == h && cs2 != cs) {
1076 /* maybe */
1077 for (i = 0; i < css; i++)
1078 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1079 break; /* no */
1080 if (i == css)
1081 break; /* yes */
1082 }
1083
1084 if (cs2 < top) { /* found one */
1085 freeset(p, cs);
1086 cs = cs2;
1087 }
1088
1089 return((int)(cs - p->g->sets));
1090 }
1091
1092 /*
1093 - firstch - return first character in a set (which must have at least one)
1094 == static int firstch(register struct parse *p, register cset *cs);
1095 */
1096 static int /* character; there is no "none" value */
1097 firstch(p, cs)
1098 register struct parse *p;
1099 register cset *cs;
1100 {
1101 register int i;
1102 register size_t css = (size_t)p->g->csetsize;
1103
1104 for (i = 0; i < css; i++)
1105 if (CHIN(cs, i))
1106 return((char)i);
1107 assert(never);
1108 return(0); /* arbitrary */
1109 }
1110
1111 /*
1112 - nch - number of characters in a set
1113 == static int nch(register struct parse *p, register cset *cs);
1114 */
1115 static int
1116 nch(p, cs)
1117 register struct parse *p;
1118 register cset *cs;
1119 {
1120 register int i;
1121 register size_t css = (size_t)p->g->csetsize;
1122 register int n = 0;
1123
1124 for (i = 0; i < css; i++)
1125 if (CHIN(cs, i))
1126 n++;
1127 return(n);
1128 }
1129
1130 /*
1131 - mcadd - add a collating element to a cset
1132 == static void mcadd(register struct parse *p, register cset *cs, \
1133 == register char *cp);
1134 */
1135 static void
1136 mcadd(p, cs, cp)
1137 register struct parse *p;
1138 register cset *cs;
1139 register char *cp;
1140 {
1141 register size_t oldend = cs->smultis;
1142
1143 cs->smultis += strlen(cp) + 1;
1144 if (cs->multis == NULL)
1145 cs->multis = malloc(cs->smultis);
1146 else
1147 cs->multis = realloc(cs->multis, cs->smultis);
1148 if (cs->multis == NULL) {
1149 SETERROR(REG_ESPACE);
1150 return;
1151 }
1152
1153 (void) strcpy(cs->multis + oldend - 1, cp);
1154 cs->multis[cs->smultis - 1] = '\0';
1155 }
1156
1157 /*
1158 - mcsub - subtract a collating element from a cset
1159 == static void mcsub(register cset *cs, register char *cp);
1160 */
1161 static void
1162 mcsub(cs, cp)
1163 register cset *cs;
1164 register char *cp;
1165 {
1166 register char *fp = mcfind(cs, cp);
1167 register size_t len = strlen(fp);
1168
1169 assert(fp != NULL);
1170 (void) memmove(fp, fp + len + 1,
1171 cs->smultis - (fp + len + 1 - cs->multis));
1172 cs->smultis -= len;
1173
1174 if (cs->smultis == 0) {
1175 free(cs->multis);
1176 cs->multis = NULL;
1177 return;
1178 }
1179
1180 cs->multis = realloc(cs->multis, cs->smultis);
1181 assert(cs->multis != NULL);
1182 }
1183
1184 /*
1185 - mcin - is a collating element in a cset?
1186 == static int mcin(register cset *cs, register char *cp);
1187 */
1188 static int
1189 mcin(cs, cp)
1190 register cset *cs;
1191 register char *cp;
1192 {
1193 return(mcfind(cs, cp) != NULL);
1194 }
1195
1196 /*
1197 - mcfind - find a collating element in a cset
1198 == static char *mcfind(register cset *cs, register char *cp);
1199 */
1200 static char *
1201 mcfind(cs, cp)
1202 register cset *cs;
1203 register char *cp;
1204 {
1205 register char *p;
1206
1207 if (cs->multis == NULL)
1208 return(NULL);
1209 for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
1210 if (strcmp(cp, p) == 0)
1211 return(p);
1212 return(NULL);
1213 }
1214
1215 /*
1216 - mcinvert - invert the list of collating elements in a cset
1217 == static void mcinvert(register struct parse *p, register cset *cs);
1218 *
1219 * This would have to know the set of possibilities. Implementation
1220 * is deferred.
1221 */
1222 static void
1223 mcinvert(p, cs)
1224 register struct parse *p;
1225 register cset *cs;
1226 {
1227 assert(cs->multis == NULL); /* xxx */
1228 }
1229
1230 /*
1231 - mccase - add case counterparts of the list of collating elements in a cset
1232 == static void mccase(register struct parse *p, register cset *cs);
1233 *
1234 * This would have to know the set of possibilities. Implementation
1235 * is deferred.
1236 */
1237 static void
1238 mccase(p, cs)
1239 register struct parse *p;
1240 register cset *cs;
1241 {
1242 assert(cs->multis == NULL); /* xxx */
1243 }
1244
1245 /*
1246 - isinsets - is this character in any sets?
1247 == static int isinsets(register struct re_guts *g, int c);
1248 */
1249 static int /* predicate */
1250 isinsets(g, c)
1251 register struct re_guts *g;
1252 int c;
1253 {
1254 register uch *col;
1255 register int i;
1256 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1257 register unsigned uc = (unsigned char)c;
1258
1259 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1260 if (col[uc] != 0)
1261 return(1);
1262 return(0);
1263 }
1264
1265 /*
1266 - samesets - are these two characters in exactly the same sets?
1267 == static int samesets(register struct re_guts *g, int c1, int c2);
1268 */
1269 static int /* predicate */
1270 samesets(g, c1, c2)
1271 register struct re_guts *g;
1272 int c1;
1273 int c2;
1274 {
1275 register uch *col;
1276 register int i;
1277 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1278 register unsigned uc1 = (unsigned char)c1;
1279 register unsigned uc2 = (unsigned char)c2;
1280
1281 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1282 if (col[uc1] != col[uc2])
1283 return(0);
1284 return(1);
1285 }
1286
1287 /*
1288 - categorize - sort out character categories
1289 == static void categorize(struct parse *p, register struct re_guts *g);
1290 */
1291 static void
1292 categorize(p, g)
1293 struct parse *p;
1294 register struct re_guts *g;
1295 {
1296 register cat_t *cats = g->categories;
1297 register int c;
1298 register int c2;
1299 register cat_t cat;
1300
1301 /* avoid making error situations worse */
1302 if (p->error != 0)
1303 return;
1304
1305 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1306 if (cats[c] == 0 && isinsets(g, c)) {
1307 cat = g->ncategories++;
1308 cats[c] = cat;
1309 for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1310 if (cats[c2] == 0 && samesets(g, c, c2))
1311 cats[c2] = cat;
1312 }
1313 }
1314
1315 /*
1316 - dupl - emit a duplicate of a bunch of sops
1317 == static sopno dupl(register struct parse *p, sopno start, sopno finish);
1318 */
1319 static sopno /* start of duplicate */
1320 dupl(p, start, finish)
1321 register struct parse *p;
1322 sopno start; /* from here */
1323 sopno finish; /* to this less one */
1324 {
1325 register sopno ret = HERE();
1326 register sopno len = finish - start;
1327
1328 assert(finish >= start);
1329 if (len == 0)
1330 return(ret);
1331 enlarge(p, p->ssize + len); /* this many unexpected additions */
1332 assert(p->ssize >= p->slen + len);
1333 (void) memcpy((char *)(p->strip + p->slen),
1334 (char *)(p->strip + start), (size_t)len*sizeof(sop));
1335 p->slen += len;
1336 return(ret);
1337 }
1338
1339 /*
1340 - doemit - emit a strip operator
1341 == static void doemit(register struct parse *p, sop op, size_t opnd);
1342 *
1343 * It might seem better to implement this as a macro with a function as
1344 * hard-case backup, but it's just too big and messy unless there are
1345 * some changes to the data structures. Maybe later.
1346 */
1347 static void
1348 doemit(p, op, opnd)
1349 register struct parse *p;
1350 sop op;
1351 size_t opnd;
1352 {
1353 /* avoid making error situations worse */
1354 if (p->error != 0)
1355 return;
1356
1357 /* deal with oversize operands ("can't happen", more or less) */
1358 assert(opnd < 1<<OPSHIFT);
1359
1360 /* deal with undersized strip */
1361 if (p->slen >= p->ssize)
1362 enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
1363 assert(p->slen < p->ssize);
1364
1365 /* finally, it's all reduced to the easy case */
1366 p->strip[p->slen++] = SOP(op, opnd);
1367 }
1368
1369 /*
1370 - doinsert - insert a sop into the strip
1371 == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
1372 */
1373 static void
1374 doinsert(p, op, opnd, pos)
1375 register struct parse *p;
1376 sop op;
1377 size_t opnd;
1378 sopno pos;
1379 {
1380 register sopno sn;
1381 register sop s;
1382 register int i;
1383
1384 /* avoid making error situations worse */
1385 if (p->error != 0)
1386 return;
1387
1388 sn = HERE();
1389 EMIT(op, opnd); /* do checks, ensure space */
1390 assert(HERE() == sn+1);
1391 s = p->strip[sn];
1392
1393 /* adjust paren pointers */
1394 assert(pos > 0);
1395 for (i = 1; i < NPAREN; i++) {
1396 if (p->pbegin[i] >= pos) {
1397 p->pbegin[i]++;
1398 }
1399 if (p->pend[i] >= pos) {
1400 p->pend[i]++;
1401 }
1402 }
1403
1404 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1405 (HERE()-pos-1)*sizeof(sop));
1406 p->strip[pos] = s;
1407 }
1408
1409 /*
1410 - dofwd - complete a forward reference
1411 == static void dofwd(register struct parse *p, sopno pos, sop value);
1412 */
1413 static void
1414 dofwd(p, pos, value)
1415 register struct parse *p;
1416 register sopno pos;
1417 sop value;
1418 {
1419 /* avoid making error situations worse */
1420 if (p->error != 0)
1421 return;
1422
1423 assert(value < 1<<OPSHIFT);
1424 p->strip[pos] = OP(p->strip[pos]) | value;
1425 }
1426
1427 /*
1428 - enlarge - enlarge the strip
1429 == static void enlarge(register struct parse *p, sopno size);
1430 */
1431 static void
1432 enlarge(p, size)
1433 register struct parse *p;
1434 register sopno size;
1435 {
1436 register sop *sp;
1437
1438 if (p->ssize >= size)
1439 return;
1440
1441 sp = (sop *)realloc(p->strip, size*sizeof(sop));
1442 if (sp == NULL) {
1443 SETERROR(REG_ESPACE);
1444 return;
1445 }
1446 p->strip = sp;
1447 p->ssize = size;
1448 }
1449
1450 /*
1451 - stripsnug - compact the strip
1452 == static void stripsnug(register struct parse *p, register struct re_guts *g);
1453 */
1454 static void
1455 stripsnug(p, g)
1456 register struct parse *p;
1457 register struct re_guts *g;
1458 {
1459 g->nstates = p->slen;
1460 g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
1461 if (g->strip == NULL) {
1462 SETERROR(REG_ESPACE);
1463 g->strip = p->strip;
1464 }
1465 }
1466
1467 /*
1468 - findmust - fill in must and mlen with longest mandatory literal string
1469 == static void findmust(register struct parse *p, register struct re_guts *g);
1470 *
1471 * This algorithm could do fancy things like analyzing the operands of |
1472 * for common subsequences. Someday. This code is simple and finds most
1473 * of the interesting cases.
1474 *
1475 * Note that must and mlen got initialized during setup.
1476 */
1477 static void
1478 findmust(p, g)
1479 struct parse *p;
1480 register struct re_guts *g;
1481 {
1482 register sop *scan;
1483 sop *start;
1484 register sop *newstart;
1485 register sopno newlen;
1486 register sop s;
1487 register char *cp;
1488 register sopno i;
1489
1490 /* avoid making error situations worse */
1491 if (p->error != 0)
1492 return;
1493
1494 /* find the longest OCHAR sequence in strip */
1495 newlen = 0;
1496 scan = g->strip + 1;
1497 do {
1498 s = *scan++;
1499 switch (OP(s)) {
1500 case OCHAR: /* sequence member */
1501 if (newlen == 0) /* new sequence */
1502 newstart = scan - 1;
1503 newlen++;
1504 break;
1505 case OPLUS_: /* things that don't break one */
1506 case OLPAREN:
1507 case ORPAREN:
1508 break;
1509 case OQUEST_: /* things that must be skipped */
1510 case OCH_:
1511 scan--;
1512 do {
1513 scan += OPND(s);
1514 s = *scan;
1515 /* assert() interferes w debug printouts */
1516 if (OP(s) != O_QUEST && OP(s) != O_CH &&
1517 OP(s) != OOR2) {
1518 g->iflags |= BAD;
1519 return;
1520 }
1521 } while (OP(s) != O_QUEST && OP(s) != O_CH);
1522 /* fallthrough */
1523 default: /* things that break a sequence */
1524 if (newlen > g->mlen) { /* ends one */
1525 start = newstart;
1526 g->mlen = newlen;
1527 }
1528 newlen = 0;
1529 break;
1530 }
1531 } while (OP(s) != OEND);
1532
1533 if (g->mlen == 0) /* there isn't one */
1534 return;
1535
1536 /* turn it into a character string */
1537 g->must = malloc((size_t)g->mlen + 1);
1538 if (g->must == NULL) { /* argh; just forget it */
1539 g->mlen = 0;
1540 return;
1541 }
1542 cp = g->must;
1543 scan = start;
1544 for (i = g->mlen; i > 0; i--) {
1545 while (OP(s = *scan++) != OCHAR)
1546 continue;
1547 assert(cp < g->must + g->mlen);
1548 *cp++ = (char)OPND(s);
1549 }
1550 assert(cp == g->must + g->mlen);
1551 *cp++ = '\0'; /* just on general principles */
1552 }
1553
1554 /*
1555 - pluscount - count + nesting
1556 == static sopno pluscount(register struct parse *p, register struct re_guts *g);
1557 */
1558 static sopno /* nesting depth */
1559 pluscount(p, g)
1560 struct parse *p;
1561 register struct re_guts *g;
1562 {
1563 register sop *scan;
1564 register sop s;
1565 register sopno plusnest = 0;
1566 register sopno maxnest = 0;
1567
1568 if (p->error != 0)
1569 return(0); /* there may not be an OEND */
1570
1571 scan = g->strip + 1;
1572 do {
1573 s = *scan++;
1574 switch (OP(s)) {
1575 case OPLUS_:
1576 plusnest++;
1577 break;
1578 case O_PLUS:
1579 if (plusnest > maxnest)
1580 maxnest = plusnest;
1581 plusnest--;
1582 break;
1583 }
1584 } while (OP(s) != OEND);
1585 if (plusnest != 0)
1586 g->iflags |= BAD;
1587 return(maxnest);
1588 }
1589