compile.c revision 1.25 1 /* $NetBSD: compile.c,v 1.25 2003/08/07 11:15:49 agc Exp $ */
2
3 /*-
4 * Copyright (c) 1992, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Diomidis Spinellis of Imperial College, University of London.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 /*-
36 * Copyright (c) 1992 Diomidis Spinellis.
37 *
38 * This code is derived from software contributed to Berkeley by
39 * Diomidis Spinellis of Imperial College, University of London.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. All advertising materials mentioning features or use of this software
50 * must display the following acknowledgement:
51 * This product includes software developed by the University of
52 * California, Berkeley and its contributors.
53 * 4. Neither the name of the University nor the names of its contributors
54 * may be used to endorse or promote products derived from this software
55 * without specific prior written permission.
56 *
57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67 * SUCH DAMAGE.
68 */
69
70 #include <sys/cdefs.h>
71 #ifndef lint
72 #if 0
73 static char sccsid[] = "@(#)compile.c 8.2 (Berkeley) 4/28/95";
74 #else
75 __RCSID("$NetBSD: compile.c,v 1.25 2003/08/07 11:15:49 agc Exp $");
76 #endif
77 #endif /* not lint */
78
79 #include <sys/types.h>
80 #include <sys/stat.h>
81
82 #include <ctype.h>
83 #include <errno.h>
84 #include <fcntl.h>
85 #include <limits.h>
86 #include <regex.h>
87 #include <stdio.h>
88 #include <stdlib.h>
89 #include <string.h>
90
91 #include "defs.h"
92 #include "extern.h"
93
94 #define LHSZ 128
95 #define LHMASK (LHSZ - 1)
96 static struct labhash {
97 struct labhash *lh_next;
98 u_int lh_hash;
99 struct s_command *lh_cmd;
100 int lh_ref;
101 } *labels[LHSZ];
102
103 static char *compile_addr(char *, struct s_addr *);
104 static char *compile_ccl(char **, char *);
105 static char *compile_delimited(char *, char *);
106 static char *compile_flags(char *, struct s_subst *);
107 static char *compile_re(char *, regex_t **);
108 static char *compile_subst(char *, struct s_subst *);
109 static char *compile_text(void);
110 static char *compile_tr(char *, char **);
111 static struct s_command
112 **compile_stream(struct s_command **);
113 static char *duptoeol(char *, char *);
114 static void enterlabel(struct s_command *);
115 static struct s_command
116 *findlabel(char *);
117 static void fixuplabel(struct s_command *, struct s_command *);
118 static void uselabel(void);
119
120 /*
121 * Command specification. This is used to drive the command parser.
122 */
123 struct s_format {
124 char code; /* Command code */
125 int naddr; /* Number of address args */
126 enum e_args args; /* Argument type */
127 };
128
129 static struct s_format cmd_fmts[] = {
130 {'{', 2, GROUP},
131 {'}', 0, ENDGROUP},
132 {'a', 1, TEXT},
133 {'b', 2, BRANCH},
134 {'c', 2, TEXT},
135 {'d', 2, EMPTY},
136 {'D', 2, EMPTY},
137 {'g', 2, EMPTY},
138 {'G', 2, EMPTY},
139 {'h', 2, EMPTY},
140 {'H', 2, EMPTY},
141 {'i', 1, TEXT},
142 {'l', 2, EMPTY},
143 {'n', 2, EMPTY},
144 {'N', 2, EMPTY},
145 {'p', 2, EMPTY},
146 {'P', 2, EMPTY},
147 {'q', 1, EMPTY},
148 {'r', 1, RFILE},
149 {'s', 2, SUBST},
150 {'t', 2, BRANCH},
151 {'w', 2, WFILE},
152 {'x', 2, EMPTY},
153 {'y', 2, TR},
154 {'!', 2, NONSEL},
155 {':', 0, LABEL},
156 {'#', 0, COMMENT},
157 {'=', 1, EMPTY},
158 {'\0', 0, COMMENT},
159 };
160
161 /* The compiled program. */
162 struct s_command *prog;
163
164 /*
165 * Compile the program into prog.
166 * Initialise appends.
167 */
168 void
169 compile(void)
170 {
171 *compile_stream(&prog) = NULL;
172 fixuplabel(prog, NULL);
173 uselabel();
174 if (appendnum > 0)
175 appends = xmalloc(sizeof(struct s_appends) * appendnum);
176 match = xmalloc((maxnsub + 1) * sizeof(regmatch_t));
177 }
178
179 #define EATSPACE() do { \
180 if (p) \
181 while (*p && isascii((unsigned char)*p) && \
182 isspace((unsigned char)*p)) \
183 p++; \
184 } while (0)
185
186 static struct s_command **
187 compile_stream(struct s_command **link)
188 {
189 char *p;
190 static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */
191 struct s_command *cmd, *cmd2, *stack;
192 struct s_format *fp;
193 int naddr; /* Number of addresses */
194
195 stack = 0;
196 for (;;) {
197 if ((p = cu_fgets(lbuf, sizeof(lbuf))) == NULL) {
198 if (stack != 0)
199 err(COMPILE, "unexpected EOF (pending }'s)");
200 return (link);
201 }
202
203 semicolon: EATSPACE();
204 if (p) {
205 if (*p == '#' || *p == '\0')
206 continue;
207 else if (*p == ';') {
208 p++;
209 goto semicolon;
210 }
211 }
212 *link = cmd = xmalloc(sizeof(struct s_command));
213 link = &cmd->next;
214 cmd->nonsel = cmd->inrange = 0;
215 /* First parse the addresses */
216 naddr = 0;
217
218 /* Valid characters to start an address */
219 #define addrchar(c) (strchr("0123456789/\\$", (c)))
220 if (addrchar(*p)) {
221 naddr++;
222 cmd->a1 = xmalloc(sizeof(struct s_addr));
223 p = compile_addr(p, cmd->a1);
224 EATSPACE(); /* EXTENSION */
225 if (*p == ',') {
226 p++;
227 EATSPACE(); /* EXTENSION */
228 naddr++;
229 cmd->a2 = xmalloc(sizeof(struct s_addr));
230 p = compile_addr(p, cmd->a2);
231 EATSPACE();
232 } else
233 cmd->a2 = 0;
234 } else
235 cmd->a1 = cmd->a2 = 0;
236
237 nonsel: /* Now parse the command */
238 if (!*p)
239 err(COMPILE, "command expected");
240 cmd->code = *p;
241 for (fp = cmd_fmts; fp->code; fp++)
242 if (fp->code == *p)
243 break;
244 if (!fp->code)
245 err(COMPILE, "invalid command code %c", *p);
246 if (naddr > fp->naddr)
247 err(COMPILE,
248 "command %c expects up to %d address(es), found %d", *p, fp->naddr, naddr);
249 switch (fp->args) {
250 case NONSEL: /* ! */
251 p++;
252 EATSPACE();
253 cmd->nonsel = ! cmd->nonsel;
254 goto nonsel;
255 case GROUP: /* { */
256 p++;
257 EATSPACE();
258 cmd->next = stack;
259 stack = cmd;
260 link = &cmd->u.c;
261 if (*p)
262 goto semicolon;
263 break;
264 case ENDGROUP:
265 /*
266 * Short-circuit command processing, since end of
267 * group is really just a noop.
268 */
269 cmd->nonsel = 1;
270 if (stack == 0)
271 err(COMPILE, "unexpected }");
272 cmd2 = stack;
273 stack = cmd2->next;
274 cmd2->next = cmd;
275 /*FALLTHROUGH*/
276 case EMPTY: /* d D g G h H l n N p P q x = \0 */
277 p++;
278 EATSPACE();
279 if (*p == ';') {
280 p++;
281 link = &cmd->next;
282 goto semicolon;
283 }
284 if (*p)
285 err(COMPILE,
286 "extra characters at the end of %c command", cmd->code);
287 break;
288 case TEXT: /* a c i */
289 p++;
290 EATSPACE();
291 if (*p != '\\')
292 err(COMPILE,
293 "command %c expects \\ followed by text", cmd->code);
294 p++;
295 EATSPACE();
296 if (*p)
297 err(COMPILE,
298 "extra characters after \\ at the end of %c command", cmd->code);
299 cmd->t = compile_text();
300 break;
301 case COMMENT: /* \0 # */
302 break;
303 case WFILE: /* w */
304 p++;
305 EATSPACE();
306 if (*p == '\0')
307 err(COMPILE, "filename expected");
308 cmd->t = duptoeol(p, "w command");
309 if (aflag)
310 cmd->u.fd = -1;
311 else if ((cmd->u.fd = open(p,
312 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
313 DEFFILEMODE)) == -1)
314 err(FATAL, "%s: %s", p, strerror(errno));
315 break;
316 case RFILE: /* r */
317 p++;
318 EATSPACE();
319 if (*p == '\0')
320 err(COMPILE, "filename expected");
321 else
322 cmd->t = duptoeol(p, "read command");
323 break;
324 case BRANCH: /* b t */
325 p++;
326 EATSPACE();
327 if (*p == '\0')
328 cmd->t = NULL;
329 else
330 cmd->t = duptoeol(p, "branch");
331 break;
332 case LABEL: /* : */
333 p++;
334 EATSPACE();
335 cmd->t = duptoeol(p, "label");
336 if (strlen(p) == 0)
337 err(COMPILE, "empty label");
338 enterlabel(cmd);
339 break;
340 case SUBST: /* s */
341 p++;
342 if (*p == '\0' || *p == '\\')
343 err(COMPILE,
344 "substitute pattern can not be delimited by newline or backslash");
345 cmd->u.s = xmalloc(sizeof(struct s_subst));
346 p = compile_re(p, &cmd->u.s->re);
347 if (p == NULL)
348 err(COMPILE, "unterminated substitute pattern");
349 --p;
350 p = compile_subst(p, cmd->u.s);
351 p = compile_flags(p, cmd->u.s);
352 EATSPACE();
353 if (*p == ';') {
354 p++;
355 link = &cmd->next;
356 goto semicolon;
357 }
358 break;
359 case TR: /* y */
360 p++;
361 p = compile_tr(p, (char **)&cmd->u.y);
362 EATSPACE();
363 if (*p == ';') {
364 p++;
365 link = &cmd->next;
366 goto semicolon;
367 }
368 if (*p)
369 err(COMPILE,
370 "extra text at the end of a transform command");
371 break;
372 }
373 }
374 }
375
376 /*
377 * Get a delimited string. P points to the delimiter of the string; d points
378 * to a buffer area. Newline and delimiter escapes are processed; other
379 * escapes are ignored.
380 *
381 * Returns a pointer to the first character after the final delimiter or NULL
382 * in the case of a non-terminated string. The character array d is filled
383 * with the processed string.
384 */
385 static char *
386 compile_delimited(char *p, char *d)
387 {
388 char c;
389
390 c = *p++;
391 if (c == '\0')
392 return (NULL);
393 else if (c == '\\')
394 err(COMPILE, "\\ can not be used as a string delimiter");
395 else if (c == '\n')
396 err(COMPILE, "newline can not be used as a string delimiter");
397 while (*p) {
398 if (*p == '[') {
399 if ((d = compile_ccl(&p, d)) == NULL)
400 err(COMPILE, "unbalanced brackets ([])");
401 continue;
402 } else if (*p == '\\' && p[1] == '[') {
403 *d++ = *p++;
404 } else if (*p == '\\' && p[1] == c)
405 p++;
406 else if (*p == '\\' && p[1] == 'n') {
407 *d++ = '\n';
408 p += 2;
409 continue;
410 } else if (*p == '\\' && p[1] == '\\')
411 *d++ = *p++;
412 else if (*p == c) {
413 *d = '\0';
414 return (p + 1);
415 }
416 *d++ = *p++;
417 }
418 return (NULL);
419 }
420
421
422 /* compile_ccl: expand a POSIX character class */
423 static char *
424 compile_ccl(char **sp, char *t)
425 {
426 int c, d;
427 char *s = *sp;
428
429 *t++ = *s++;
430 if (*s == '^')
431 *t++ = *s++;
432 if (*s == ']')
433 *t++ = *s++;
434 for (; *s && (*t = *s) != ']'; s++, t++)
435 if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) {
436 *++t = *++s, t++, s++;
437 for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
438 if ((c = *s) == '\0')
439 return NULL;
440 } else if (*s == '\\' && s[1] == 'n')
441 *t = '\n', s++;
442 return (*s == ']') ? *sp = ++s, ++t : NULL;
443 }
444
445 /*
446 * Get a regular expression. P points to the delimiter of the regular
447 * expression; repp points to the address of a regexp pointer. Newline
448 * and delimiter escapes are processed; other escapes are ignored.
449 * Returns a pointer to the first character after the final delimiter
450 * or NULL in the case of a non terminated regular expression. The regexp
451 * pointer is set to the compiled regular expression.
452 * Cflags are passed to regcomp.
453 */
454 static char *
455 compile_re(char *p, regex_t **repp)
456 {
457 int eval;
458 char re[_POSIX2_LINE_MAX + 1];
459
460 p = compile_delimited(p, re);
461 if (p && strlen(re) == 0) {
462 *repp = NULL;
463 return (p);
464 }
465 *repp = xmalloc(sizeof(regex_t));
466 if (p && (eval = regcomp(*repp, re, ere)) != 0)
467 err(COMPILE, "RE error: %s", strregerror(eval, *repp));
468 if (maxnsub < (*repp)->re_nsub)
469 maxnsub = (*repp)->re_nsub;
470 return (p);
471 }
472
473 /*
474 * Compile the substitution string of a regular expression and set res to
475 * point to a saved copy of it. Nsub is the number of parenthesized regular
476 * expressions.
477 */
478 static char *
479 compile_subst(char *p, struct s_subst *s)
480 {
481 static char lbuf[_POSIX2_LINE_MAX + 1];
482 int asize, ref, size;
483 char c, *text, *op, *sp;
484
485 c = *p++; /* Terminator character */
486 if (c == '\0')
487 return (NULL);
488
489 s->maxbref = 0;
490 s->linenum = linenum;
491 asize = 2 * _POSIX2_LINE_MAX + 1;
492 text = xmalloc(asize);
493 size = 0;
494 do {
495 op = sp = text + size;
496 for (; *p; p++) {
497 if (*p == '\\') {
498 p++;
499 if (strchr("123456789", *p) != NULL) {
500 *sp++ = '\\';
501 ref = *p - '0';
502 if (s->re != NULL &&
503 ref > s->re->re_nsub)
504 err(COMPILE,
505 "\\%c not defined in the RE", *p);
506 if (s->maxbref < ref)
507 s->maxbref = ref;
508 } else if (*p == '&' || *p == '\\')
509 *sp++ = '\\';
510 } else if (*p == c) {
511 p++;
512 *sp++ = '\0';
513 size += sp - op;
514 s->new = xrealloc(text, size);
515 return (p);
516 } else if (*p == '\n') {
517 err(COMPILE,
518 "unescaped newline inside substitute pattern");
519 /* NOTREACHED */
520 }
521 *sp++ = *p;
522 }
523 size += sp - op;
524 if (asize - size < _POSIX2_LINE_MAX + 1) {
525 asize *= 2;
526 text = xmalloc(asize);
527 }
528 } while (cu_fgets(p = lbuf, sizeof(lbuf)));
529 err(COMPILE, "unterminated substitute in regular expression");
530 /* NOTREACHED */
531 return (NULL);
532 }
533
534 /*
535 * Compile the flags of the s command
536 */
537 static char *
538 compile_flags(char *p, struct s_subst *s)
539 {
540 int gn; /* True if we have seen g or n */
541 char wfile[_POSIX2_LINE_MAX + 1], *q;
542
543 s->n = 1; /* Default */
544 s->p = 0;
545 s->wfile = NULL;
546 s->wfd = -1;
547 for (gn = 0;;) {
548 EATSPACE(); /* EXTENSION */
549 switch (*p) {
550 case 'g':
551 if (gn)
552 err(COMPILE,
553 "more than one number or 'g' in substitute flags");
554 gn = 1;
555 s->n = 0;
556 break;
557 case '\0':
558 case '\n':
559 case ';':
560 return (p);
561 case 'p':
562 s->p = 1;
563 break;
564 case '1': case '2': case '3':
565 case '4': case '5': case '6':
566 case '7': case '8': case '9':
567 if (gn)
568 err(COMPILE,
569 "more than one number or 'g' in substitute flags");
570 gn = 1;
571 /* XXX Check for overflow */
572 s->n = (int)strtol(p, &p, 10);
573 break;
574 case 'w':
575 p++;
576 #ifdef HISTORIC_PRACTICE
577 if (*p != ' ') {
578 err(WARNING, "space missing before w wfile");
579 return (p);
580 }
581 #endif
582 EATSPACE();
583 q = wfile;
584 while (*p) {
585 if (*p == '\n')
586 break;
587 *q++ = *p++;
588 }
589 *q = '\0';
590 if (q == wfile)
591 err(COMPILE, "no wfile specified");
592 s->wfile = strdup(wfile);
593 if (!aflag && (s->wfd = open(wfile,
594 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
595 DEFFILEMODE)) == -1)
596 err(FATAL, "%s: %s", wfile, strerror(errno));
597 return (p);
598 default:
599 err(COMPILE,
600 "bad flag in substitute command: '%c'", *p);
601 break;
602 }
603 p++;
604 }
605 }
606
607 /*
608 * Compile a translation set of strings into a lookup table.
609 */
610 static char *
611 compile_tr(char *p, char **transtab)
612 {
613 int i;
614 char *lt, *op, *np;
615 char old[_POSIX2_LINE_MAX + 1];
616 char new[_POSIX2_LINE_MAX + 1];
617
618 if (*p == '\0' || *p == '\\')
619 err(COMPILE,
620 "transform pattern can not be delimited by newline or backslash");
621 p = compile_delimited(p, old);
622 if (p == NULL) {
623 err(COMPILE, "unterminated transform source string");
624 return (NULL);
625 }
626 p = compile_delimited(--p, new);
627 if (p == NULL) {
628 err(COMPILE, "unterminated transform target string");
629 return (NULL);
630 }
631 EATSPACE();
632 if (strlen(new) != strlen(old)) {
633 err(COMPILE, "transform strings are not the same length");
634 return (NULL);
635 }
636 /* We assume characters are 8 bits */
637 lt = xmalloc(UCHAR_MAX);
638 for (i = 0; i <= UCHAR_MAX; i++)
639 lt[i] = (char)i;
640 for (op = old, np = new; *op; op++, np++)
641 lt[(u_char)*op] = *np;
642 *transtab = lt;
643 return (p);
644 }
645
646 /*
647 * Compile the text following an a or i command.
648 */
649 static char *
650 compile_text(void)
651 {
652 int asize, size;
653 char *text, *p, *op, *s;
654 char lbuf[_POSIX2_LINE_MAX + 1];
655
656 asize = 2 * _POSIX2_LINE_MAX + 1;
657 text = xmalloc(asize);
658 size = 0;
659 while (cu_fgets(lbuf, sizeof(lbuf))) {
660 op = s = text + size;
661 p = lbuf;
662 EATSPACE();
663 for (; *p; p++) {
664 if (*p == '\\')
665 p++;
666 *s++ = *p;
667 }
668 size += s - op;
669 if (p[-2] != '\\') {
670 *s = '\0';
671 break;
672 }
673 if (asize - size < _POSIX2_LINE_MAX + 1) {
674 asize *= 2;
675 text = xmalloc(asize);
676 }
677 }
678 return (xrealloc(text, size + 1));
679 }
680
681 /*
682 * Get an address and return a pointer to the first character after
683 * it. Fill the structure pointed to according to the address.
684 */
685 static char *
686 compile_addr(char *p, struct s_addr *a)
687 {
688 char *end;
689
690 switch (*p) {
691 case '\\': /* Context address */
692 ++p;
693 /* FALLTHROUGH */
694 case '/': /* Context address */
695 p = compile_re(p, &a->u.r);
696 if (p == NULL)
697 err(COMPILE, "unterminated regular expression");
698 a->type = AT_RE;
699 return (p);
700
701 case '$': /* Last line */
702 a->type = AT_LAST;
703 return (p + 1);
704 /* Line number */
705 case '0': case '1': case '2': case '3': case '4':
706 case '5': case '6': case '7': case '8': case '9':
707 a->type = AT_LINE;
708 a->u.l = strtol(p, &end, 10);
709 return (end);
710 default:
711 err(COMPILE, "expected context address");
712 return (NULL);
713 }
714 }
715
716 /*
717 * duptoeol --
718 * Return a copy of all the characters up to \n or \0.
719 */
720 static char *
721 duptoeol(char *s, char *ctype)
722 {
723 size_t len;
724 int ws;
725 char *start;
726
727 ws = 0;
728 for (start = s; *s != '\0' && *s != '\n'; ++s)
729 ws = isspace((unsigned char)*s);
730 *s = '\0';
731 if (ws)
732 err(WARNING, "whitespace after %s", ctype);
733 len = s - start + 1;
734 return (memmove(xmalloc(len), start, len));
735 }
736
737 /*
738 * Convert goto label names to addresses, and count a and r commands, in
739 * the given subset of the script. Free the memory used by labels in b
740 * and t commands (but not by :).
741 *
742 * TODO: Remove } nodes
743 */
744 static void
745 fixuplabel(struct s_command *cp, struct s_command *end)
746 {
747
748 for (; cp != end; cp = cp->next)
749 switch (cp->code) {
750 case 'a':
751 case 'r':
752 appendnum++;
753 break;
754 case 'b':
755 case 't':
756 /* Resolve branch target. */
757 if (cp->t == NULL) {
758 cp->u.c = NULL;
759 break;
760 }
761 if ((cp->u.c = findlabel(cp->t)) == NULL)
762 err(COMPILE2, "undefined label '%s'", cp->t);
763 free(cp->t);
764 break;
765 case '{':
766 /* Do interior commands. */
767 fixuplabel(cp->u.c, cp->next);
768 break;
769 }
770 }
771
772 /*
773 * Associate the given command label for later lookup.
774 */
775 static void
776 enterlabel(struct s_command *cp)
777 {
778 struct labhash **lhp, *lh;
779 u_char *p;
780 u_int h, c;
781
782 for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++)
783 h = (h << 5) + h + c;
784 lhp = &labels[h & LHMASK];
785 for (lh = *lhp; lh != NULL; lh = lh->lh_next)
786 if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
787 err(COMPILE2, "duplicate label '%s'", cp->t);
788 lh = xmalloc(sizeof *lh);
789 lh->lh_next = *lhp;
790 lh->lh_hash = h;
791 lh->lh_cmd = cp;
792 lh->lh_ref = 0;
793 *lhp = lh;
794 }
795
796 /*
797 * Find the label contained in the command l in the command linked
798 * list cp. L is excluded from the search. Return NULL if not found.
799 */
800 static struct s_command *
801 findlabel(char *name)
802 {
803 struct labhash *lh;
804 u_char *p;
805 u_int h, c;
806
807 for (h = 0, p = (u_char *)name; (c = *p) != 0; p++)
808 h = (h << 5) + h + c;
809 for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) {
810 if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) {
811 lh->lh_ref = 1;
812 return (lh->lh_cmd);
813 }
814 }
815 return (NULL);
816 }
817
818 /*
819 * Warn about any unused labels. As a side effect, release the label hash
820 * table space.
821 */
822 static void
823 uselabel(void)
824 {
825 struct labhash *lh, *next;
826 int i;
827
828 for (i = 0; i < LHSZ; i++) {
829 for (lh = labels[i]; lh != NULL; lh = next) {
830 next = lh->lh_next;
831 if (!lh->lh_ref)
832 err(WARNING, "unused label '%s'",
833 lh->lh_cmd->t);
834 free(lh);
835 }
836 }
837 }
838