compile.c revision 1.44 1 /* $NetBSD: compile.c,v 1.44 2015/02/28 21:56:53 asau Exp $ */
2
3 /*-
4 * Copyright (c) 1992 Diomidis Spinellis.
5 * Copyright (c) 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Diomidis Spinellis of Imperial College, University of London.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #if HAVE_NBTOOL_CONFIG_H
37 #include "nbtool_config.h"
38 #endif
39
40 #include <sys/cdefs.h>
41 __RCSID("$NetBSD: compile.c,v 1.44 2015/02/28 21:56:53 asau Exp $");
42 #ifdef __FBSDID
43 __FBSDID("$FreeBSD: head/usr.bin/sed/compile.c 259132 2013-12-09 18:57:20Z eadler $");
44 #endif
45
46 #if 0
47 static const char sccsid[] = "@(#)compile.c 8.1 (Berkeley) 6/6/93";
48 #endif
49
50 #include <sys/types.h>
51 #include <sys/stat.h>
52
53 #include <ctype.h>
54 #include <err.h>
55 #include <errno.h>
56 #include <fcntl.h>
57 #include <limits.h>
58 #include <regex.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62 #include <wchar.h>
63
64 #include "defs.h"
65 #include "extern.h"
66
67 #define LHSZ 128
68 #define LHMASK (LHSZ - 1)
69 static struct labhash {
70 struct labhash *lh_next;
71 u_int lh_hash;
72 struct s_command *lh_cmd;
73 int lh_ref;
74 } *labels[LHSZ];
75
76 static char *cu_fgets(char *, int, int *);
77
78 static char *compile_addr(char *, struct s_addr *);
79 static char *compile_ccl(char **, char *);
80 static char *compile_delimited(char *, char *, int);
81 static char *compile_flags(char *, struct s_subst *);
82 static regex_t *compile_re(char *, int);
83 static char *compile_subst(char *, struct s_subst *);
84 static char *compile_text(void);
85 static char *compile_tr(char *, struct s_tr **);
86 static struct s_command
87 **compile_stream(struct s_command **);
88 static char *duptoeol(char *, const char *);
89 static void enterlabel(struct s_command *);
90 static struct s_command
91 *findlabel(char *);
92 static void fixuplabel(struct s_command *, struct s_command *);
93 static void uselabel(void);
94
95 /*
96 * Current file and line number; line numbers restart across compilation
97 * units, but span across input files. The latter is optional if editing
98 * in place.
99 */
100 static const char *fname; /* File name. */
101 static u_long linenum;
102
103 /*
104 * Command specification. This is used to drive the command parser.
105 */
106 struct s_format {
107 char code; /* Command code */
108 int naddr; /* Number of address args */
109 enum e_args args; /* Argument type */
110 };
111
112 static struct s_format cmd_fmts[] = {
113 {'{', 2, GROUP},
114 {'}', 0, ENDGROUP},
115 {'a', 1, TEXT},
116 {'b', 2, BRANCH},
117 {'c', 2, TEXT},
118 {'d', 2, EMPTY},
119 {'D', 2, EMPTY},
120 {'g', 2, EMPTY},
121 {'G', 2, EMPTY},
122 {'h', 2, EMPTY},
123 {'H', 2, EMPTY},
124 {'i', 1, TEXT},
125 {'l', 2, EMPTY},
126 {'n', 2, EMPTY},
127 {'N', 2, EMPTY},
128 {'p', 2, EMPTY},
129 {'P', 2, EMPTY},
130 {'q', 1, EMPTY},
131 {'r', 1, RFILE},
132 {'s', 2, SUBST},
133 {'t', 2, BRANCH},
134 {'w', 2, WFILE},
135 {'x', 2, EMPTY},
136 {'y', 2, TR},
137 {'!', 2, NONSEL},
138 {':', 0, LABEL},
139 {'#', 0, COMMENT},
140 {'=', 1, EMPTY},
141 {'\0', 0, COMMENT},
142 };
143
144 /* The compiled program. */
145 struct s_command *prog;
146
147 /*
148 * Compile the program into prog.
149 * Initialise appends.
150 */
151 void
152 compile(void)
153 {
154 *compile_stream(&prog) = NULL;
155 fixuplabel(prog, NULL);
156 uselabel();
157 if (appendnum > 0)
158 appends = xmalloc(sizeof(struct s_appends) * appendnum);
159 match = xmalloc((maxnsub + 1) * sizeof(regmatch_t));
160 }
161
162 #define EATSPACE() do { \
163 if (p) \
164 while (*p && isspace((unsigned char)*p)) \
165 p++; \
166 } while (0)
167
168 static struct s_command **
169 compile_stream(struct s_command **link)
170 {
171 char *p;
172 static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */
173 struct s_command *cmd, *cmd2, *stack;
174 struct s_format *fp;
175 char re[_POSIX2_LINE_MAX + 1];
176 int naddr; /* Number of addresses */
177
178 stack = 0;
179 for (;;) {
180 if ((p = cu_fgets(lbuf, sizeof(lbuf), NULL)) == NULL) {
181 if (stack != 0)
182 errx(1, "%lu: %s: unexpected EOF (pending }'s)",
183 linenum, fname);
184 return (link);
185 }
186
187 semicolon: EATSPACE();
188 if (p) {
189 if (*p == '#' || *p == '\0')
190 continue;
191 else if (*p == ';') {
192 p++;
193 goto semicolon;
194 }
195 }
196 *link = cmd = xmalloc(sizeof(struct s_command));
197 link = &cmd->next;
198 cmd->startline = cmd->nonsel = 0;
199 /* First parse the addresses */
200 naddr = 0;
201
202 /* Valid characters to start an address */
203 #define addrchar(c) (strchr("0123456789/\\$", (c)))
204 if (addrchar(*p)) {
205 naddr++;
206 cmd->a1 = xmalloc(sizeof(struct s_addr));
207 p = compile_addr(p, cmd->a1);
208 EATSPACE(); /* EXTENSION */
209 if (*p == ',') {
210 p++;
211 EATSPACE(); /* EXTENSION */
212 naddr++;
213 cmd->a2 = xmalloc(sizeof(struct s_addr));
214 p = compile_addr(p, cmd->a2);
215 EATSPACE();
216 } else
217 cmd->a2 = 0;
218 } else
219 cmd->a1 = cmd->a2 = 0;
220
221 nonsel: /* Now parse the command */
222 if (!*p)
223 errx(1, "%lu: %s: command expected", linenum, fname);
224 cmd->code = *p;
225 for (fp = cmd_fmts; fp->code; fp++)
226 if (fp->code == *p)
227 break;
228 if (!fp->code)
229 errx(1, "%lu: %s: invalid command code %c", linenum, fname, *p);
230 if (naddr > fp->naddr)
231 errx(1,
232 "%lu: %s: command %c expects up to %d address(es), found %d",
233 linenum, fname, *p, fp->naddr, naddr);
234 switch (fp->args) {
235 case NONSEL: /* ! */
236 p++;
237 EATSPACE();
238 cmd->nonsel = ! cmd->nonsel;
239 goto nonsel;
240 case GROUP: /* { */
241 p++;
242 EATSPACE();
243 cmd->next = stack;
244 stack = cmd;
245 link = &cmd->u.c;
246 if (*p)
247 goto semicolon;
248 break;
249 case ENDGROUP:
250 /*
251 * Short-circuit command processing, since end of
252 * group is really just a noop.
253 */
254 cmd->nonsel = 1;
255 if (stack == 0)
256 errx(1, "%lu: %s: unexpected }", linenum, fname);
257 cmd2 = stack;
258 stack = cmd2->next;
259 cmd2->next = cmd;
260 /*FALLTHROUGH*/
261 case EMPTY: /* d D g G h H l n N p P q x = \0 */
262 p++;
263 EATSPACE();
264 switch (*p) {
265 case ';':
266 p++;
267 link = &cmd->next;
268 goto semicolon;
269 case '}':
270 goto semicolon;
271 case '\0':
272 break;
273 default:
274 errx(1, "%lu: %s: extra characters at the end of %c command",
275 linenum, fname, cmd->code);
276 }
277 break;
278 case TEXT: /* a c i */
279 p++;
280 EATSPACE();
281 if (*p != '\\')
282 errx(1,
283 "%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code);
284 p++;
285 EATSPACE();
286 if (*p)
287 errx(1,
288 "%lu: %s: extra characters after \\ at the end of %c command",
289 linenum, fname, cmd->code);
290 cmd->t = compile_text();
291 break;
292 case COMMENT: /* \0 # */
293 break;
294 case WFILE: /* w */
295 p++;
296 EATSPACE();
297 if (*p == '\0')
298 errx(1, "%lu: %s: filename expected", linenum, fname);
299 cmd->t = duptoeol(p, "w command");
300 if (aflag)
301 cmd->u.fd = -1;
302 else if ((cmd->u.fd = open(p,
303 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
304 DEFFILEMODE)) == -1)
305 err(1, "%s", p);
306 break;
307 case RFILE: /* r */
308 p++;
309 EATSPACE();
310 if (*p == '\0')
311 errx(1, "%lu: %s: filename expected", linenum, fname);
312 else
313 cmd->t = duptoeol(p, "read command");
314 break;
315 case BRANCH: /* b t */
316 p++;
317 EATSPACE();
318 if (*p == '\0')
319 cmd->t = NULL;
320 else
321 cmd->t = duptoeol(p, "branch");
322 break;
323 case LABEL: /* : */
324 p++;
325 EATSPACE();
326 cmd->t = duptoeol(p, "label");
327 if (strlen(p) == 0)
328 errx(1, "%lu: %s: empty label", linenum, fname);
329 enterlabel(cmd);
330 break;
331 case SUBST: /* s */
332 p++;
333 if (*p == '\0' || *p == '\\')
334 errx(1,
335 "%lu: %s: substitute pattern can not be delimited by newline or backslash",
336 linenum, fname);
337 cmd->u.s = xcalloc(1, sizeof(struct s_subst));
338 p = compile_delimited(p, re, 0);
339 if (p == NULL)
340 errx(1,
341 "%lu: %s: unterminated substitute pattern", linenum, fname);
342
343 /* Compile RE with no case sensitivity temporarily */
344 if (*re == '\0')
345 cmd->u.s->re = NULL;
346 else
347 cmd->u.s->re = compile_re(re, 0);
348 --p;
349 p = compile_subst(p, cmd->u.s);
350 p = compile_flags(p, cmd->u.s);
351
352 /* Recompile RE with case sensitivity from "I" flag if any */
353 if (*re == '\0')
354 cmd->u.s->re = NULL;
355 else
356 cmd->u.s->re = compile_re(re, cmd->u.s->icase);
357 EATSPACE();
358 if (*p == ';') {
359 p++;
360 link = &cmd->next;
361 goto semicolon;
362 }
363 break;
364 case TR: /* y */
365 p++;
366 p = compile_tr(p, &cmd->u.y);
367 EATSPACE();
368 switch (*p) {
369 case ';':
370 p++;
371 link = &cmd->next;
372 goto semicolon;
373 case '}':
374 goto semicolon;
375 case '\0':
376 break;
377 default:
378 errx(1,
379 "%lu: %s: extra text at the end of a transform command", linenum, fname);
380 }
381 if (*p)
382 break;
383 }
384 }
385 }
386
387 /*
388 * Get a delimited string. P points to the delimeter of the string; d points
389 * to a buffer area. Newline and delimiter escapes are processed; other
390 * escapes are ignored.
391 *
392 * Returns a pointer to the first character after the final delimiter or NULL
393 * in the case of a non-terminated string. The character array d is filled
394 * with the processed string.
395 */
396 static char *
397 compile_delimited(char *p, char *d, int is_tr)
398 {
399 char c;
400
401 c = *p++;
402 if (c == '\0')
403 return (NULL);
404 else if (c == '\\')
405 errx(1, "%lu: %s: \\ can not be used as a string delimiter",
406 linenum, fname);
407 else if (c == '\n')
408 errx(1, "%lu: %s: newline can not be used as a string delimiter",
409 linenum, fname);
410 while (*p) {
411 if (*p == '[' && *p != c) {
412 if ((d = compile_ccl(&p, d)) == NULL)
413 errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname);
414 continue;
415 } else if (*p == '\\' && p[1] == '[') {
416 *d++ = *p++;
417 } else if (*p == '\\' && p[1] == c)
418 p++;
419 else if (*p == '\\' && p[1] == 'n') {
420 *d++ = '\n';
421 p += 2;
422 continue;
423 } else if (*p == '\\' && p[1] == '\\') {
424 if (is_tr)
425 p++;
426 else
427 *d++ = *p++;
428 } else if (*p == c) {
429 *d = '\0';
430 return (p + 1);
431 }
432 *d++ = *p++;
433 }
434 return (NULL);
435 }
436
437
438 /* compile_ccl: expand a POSIX character class */
439 static char *
440 compile_ccl(char **sp, char *t)
441 {
442 int c, d;
443 char *s = *sp;
444
445 *t++ = *s++;
446 if (*s == '^')
447 *t++ = *s++;
448 if (*s == ']')
449 *t++ = *s++;
450 for (; *s && (*t = *s) != ']'; s++, t++)
451 if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) {
452 *++t = *++s, t++, s++;
453 for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
454 if ((c = *s) == '\0')
455 return NULL;
456 }
457 return (*s == ']') ? *sp = ++s, ++t : NULL;
458 }
459
460 /*
461 * Compiles the regular expression in RE and returns a pointer to the compiled
462 * regular expression.
463 * Cflags are passed to regcomp.
464 */
465 static regex_t *
466 compile_re(char *re, int case_insensitive)
467 {
468 regex_t *rep;
469 int eval, flags;
470
471
472 flags = rflags;
473 if (case_insensitive)
474 flags |= REG_ICASE;
475 rep = xmalloc(sizeof(regex_t));
476 if ((eval = regcomp(rep, re, flags)) != 0)
477 errx(1, "%lu: %s: RE error: %s",
478 linenum, fname, strregerror(eval, rep));
479 if (maxnsub < rep->re_nsub)
480 maxnsub = rep->re_nsub;
481 return (rep);
482 }
483
484 /*
485 * Compile the substitution string of a regular expression and set res to
486 * point to a saved copy of it. Nsub is the number of parenthesized regular
487 * expressions.
488 */
489 static char *
490 compile_subst(char *p, struct s_subst *s)
491 {
492 static char lbuf[_POSIX2_LINE_MAX + 1];
493 size_t asize, size;
494 u_char ref;
495 char c, *text, *op, *sp;
496 int more = 1, sawesc = 0;
497
498 c = *p++; /* Terminator character */
499 if (c == '\0')
500 return (NULL);
501
502 s->maxbref = 0;
503 s->linenum = linenum;
504 asize = 2 * _POSIX2_LINE_MAX + 1;
505 text = xmalloc(asize);
506 size = 0;
507 do {
508 op = sp = text + size;
509 for (; *p; p++) {
510 if (*p == '\\' || sawesc) {
511 /*
512 * If this is a continuation from the last
513 * buffer, we won't have a character to
514 * skip over.
515 */
516 if (sawesc)
517 sawesc = 0;
518 else
519 p++;
520
521 if (*p == '\0') {
522 /*
523 * This escaped character is continued
524 * in the next part of the line. Note
525 * this fact, then cause the loop to
526 * exit w/ normal EOL case and reenter
527 * above with the new buffer.
528 */
529 sawesc = 1;
530 p--;
531 continue;
532 } else if (strchr("123456789", *p) != NULL) {
533 *sp++ = '\\';
534 ref = (u_char)(*p - '0');
535 if (s->re != NULL &&
536 ref > s->re->re_nsub)
537 errx(1, "%lu: %s: \\%c not defined in the RE",
538 linenum, fname, *p);
539 if (s->maxbref < ref)
540 s->maxbref = ref;
541 } else if (*p == '&' || *p == '\\')
542 *sp++ = '\\';
543 } else if (*p == c) {
544 if (*++p == '\0' && more) {
545 if (cu_fgets(lbuf, sizeof(lbuf), &more))
546 p = lbuf;
547 }
548 *sp++ = '\0';
549 size += (size_t)(sp - op);
550 s->new = xrealloc(text, size);
551 return (p);
552 } else if (*p == '\n') {
553 errx(1,
554 "%lu: %s: unescaped newline inside substitute pattern", linenum, fname);
555 /* NOTREACHED */
556 }
557 *sp++ = *p;
558 }
559 size += (size_t)(sp - op);
560 if (asize - size < _POSIX2_LINE_MAX + 1) {
561 asize *= 2;
562 text = xrealloc(text, asize);
563 }
564 } while (cu_fgets(p = lbuf, sizeof(lbuf), &more));
565 errx(1, "%lu: %s: unterminated substitute in regular expression",
566 linenum, fname);
567 /* NOTREACHED */
568 }
569
570 /*
571 * Compile the flags of the s command
572 */
573 static char *
574 compile_flags(char *p, struct s_subst *s)
575 {
576 int gn; /* True if we have seen g or n */
577 unsigned long nval;
578 char wfile[_POSIX2_LINE_MAX + 1], *q;
579
580 s->n = 1; /* Default */
581 s->p = 0;
582 s->wfile = NULL;
583 s->wfd = -1;
584 s->icase = 0;
585 for (gn = 0;;) {
586 EATSPACE(); /* EXTENSION */
587 switch (*p) {
588 case 'g':
589 if (gn)
590 errx(1,
591 "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
592 gn = 1;
593 s->n = 0;
594 break;
595 case '\0':
596 case '\n':
597 case ';':
598 return (p);
599 case 'p':
600 s->p = 1;
601 break;
602 case 'i':
603 case 'I':
604 s->icase = 1;
605 break;
606 case '1': case '2': case '3':
607 case '4': case '5': case '6':
608 case '7': case '8': case '9':
609 if (gn)
610 errx(1,
611 "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
612 gn = 1;
613 errno = 0;
614 nval = strtoul(p, &p, 10);
615 if (errno == ERANGE || nval > INT_MAX)
616 errx(1,
617 "%lu: %s: overflow in the 'N' substitute flag", linenum, fname);
618 s->n = (int)nval;
619 p--;
620 break;
621 case 'w':
622 p++;
623 #ifdef HISTORIC_PRACTICE
624 if (*p != ' ') {
625 warnx("%lu: %s: space missing before w wfile", linenum, fname);
626 return (p);
627 }
628 #endif
629 EATSPACE();
630 q = wfile;
631 while (*p) {
632 if (*p == '\n')
633 break;
634 *q++ = *p++;
635 }
636 *q = '\0';
637 if (q == wfile)
638 errx(1, "%lu: %s: no wfile specified", linenum, fname);
639 s->wfile = strdup(wfile);
640 if (!aflag && (s->wfd = open(wfile,
641 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
642 DEFFILEMODE)) == -1)
643 err(1, "%s", wfile);
644 return (p);
645 default:
646 errx(1, "%lu: %s: bad flag in substitute command: '%c'",
647 linenum, fname, *p);
648 break;
649 }
650 p++;
651 }
652 }
653
654 /*
655 * Compile a translation set of strings into a lookup table.
656 */
657 static char *
658 compile_tr(char *p, struct s_tr **py)
659 {
660 struct s_tr *y;
661 size_t i;
662 const char *op, *np;
663 char old[_POSIX2_LINE_MAX + 1];
664 char new[_POSIX2_LINE_MAX + 1];
665 size_t oclen, oldlen, nclen, newlen;
666 mbstate_t mbs1, mbs2;
667
668 *py = y = xmalloc(sizeof(*y));
669 y->multis = NULL;
670 y->nmultis = 0;
671
672 if (*p == '\0' || *p == '\\')
673 errx(1,
674 "%lu: %s: transform pattern can not be delimited by newline or backslash",
675 linenum, fname);
676 p = compile_delimited(p, old, 1);
677 if (p == NULL)
678 errx(1, "%lu: %s: unterminated transform source string",
679 linenum, fname);
680 p = compile_delimited(p - 1, new, 1);
681 if (p == NULL)
682 errx(1, "%lu: %s: unterminated transform target string",
683 linenum, fname);
684 EATSPACE();
685 op = old;
686 oldlen = mbsrtowcs(NULL, &op, 0, NULL);
687 if (oldlen == (size_t)-1)
688 err(1, NULL);
689 np = new;
690 newlen = mbsrtowcs(NULL, &np, 0, NULL);
691 if (newlen == (size_t)-1)
692 err(1, NULL);
693 if (newlen != oldlen)
694 errx(1, "%lu: %s: transform strings are not the same length",
695 linenum, fname);
696 if (MB_CUR_MAX == 1) {
697 /*
698 * The single-byte encoding case is easy: generate a
699 * lookup table.
700 */
701 for (i = 0; i <= UCHAR_MAX; i++)
702 y->bytetab[i] = (u_char)i;
703 for (; *op; op++, np++)
704 y->bytetab[(u_char)*op] = (u_char)*np;
705 } else {
706 /*
707 * Multi-byte encoding case: generate a lookup table as
708 * above, but only for single-byte characters. The first
709 * bytes of multi-byte characters have their lookup table
710 * entries set to 0, which causes do_tr() to search through
711 * an auxiliary vector of multi-byte mappings.
712 */
713 memset(&mbs1, 0, sizeof(mbs1));
714 memset(&mbs2, 0, sizeof(mbs2));
715 for (i = 0; i <= UCHAR_MAX; i++)
716 y->bytetab[i] = (u_char)((btowc((int)i) != WEOF) ? i : 0);
717 while (*op != '\0') {
718 oclen = mbrlen(op, MB_LEN_MAX, &mbs1);
719 if (oclen == (size_t)-1 || oclen == (size_t)-2)
720 errc(1, EILSEQ, NULL);
721 nclen = mbrlen(np, MB_LEN_MAX, &mbs2);
722 if (nclen == (size_t)-1 || nclen == (size_t)-2)
723 errc(1, EILSEQ, NULL);
724 if (oclen == 1 && nclen == 1)
725 y->bytetab[(u_char)*op] = (u_char)*np;
726 else {
727 y->bytetab[(u_char)*op] = 0;
728 y->multis = xrealloc(y->multis,
729 (y->nmultis + 1) * sizeof(*y->multis));
730 i = y->nmultis++;
731 y->multis[i].fromlen = oclen;
732 memcpy(y->multis[i].from, op, oclen);
733 y->multis[i].tolen = nclen;
734 memcpy(y->multis[i].to, np, nclen);
735 }
736 op += oclen;
737 np += nclen;
738 }
739 }
740 return (p);
741 }
742
743 /*
744 * Compile the text following an a or i command.
745 */
746 static char *
747 compile_text(void)
748 {
749 size_t asize, size;
750 int esc_nl;
751 char *text, *p, *op, *s;
752 char lbuf[_POSIX2_LINE_MAX + 1];
753
754 asize = 2 * _POSIX2_LINE_MAX + 1;
755 text = xmalloc(asize);
756 size = 0;
757 while (cu_fgets(lbuf, sizeof(lbuf), NULL)) {
758 op = s = text + size;
759 p = lbuf;
760 EATSPACE();
761 for (esc_nl = 0; *p != '\0'; p++) {
762 if (*p == '\\' && p[1] != '\0' && *++p == '\n')
763 esc_nl = 1;
764 *s++ = *p;
765 }
766 size += (size_t)(s - op);
767 if (!esc_nl) {
768 *s = '\0';
769 break;
770 }
771 if (asize - size < _POSIX2_LINE_MAX + 1) {
772 asize *= 2;
773 text = xrealloc(text, asize);
774 }
775 }
776 text[size] = '\0';
777 p = xrealloc(text, size + 1);
778 return (p);
779 }
780
781 /*
782 * Get an address and return a pointer to the first character after
783 * it. Fill the structure pointed to according to the address.
784 */
785 static char *
786 compile_addr(char *p, struct s_addr *a)
787 {
788 char *end, re[_POSIX2_LINE_MAX + 1];
789 int icase;
790
791 icase = 0;
792
793 a->type = 0;
794 switch (*p) {
795 case '\\': /* Context address */
796 ++p;
797 /* FALLTHROUGH */
798 case '/': /* Context address */
799 p = compile_delimited(p, re, 0);
800 if (p == NULL)
801 errx(1, "%lu: %s: unterminated regular expression", linenum, fname);
802 /* Check for case insensitive regexp flag */
803 if (*p == 'I') {
804 icase = 1;
805 p++;
806 }
807 if (*re == '\0')
808 a->u.r = NULL;
809 else
810 a->u.r = compile_re(re, icase);
811 a->type = AT_RE;
812 return (p);
813
814 case '$': /* Last line */
815 a->type = AT_LAST;
816 return (p + 1);
817
818 case '+': /* Relative line number */
819 a->type = AT_RELLINE;
820 p++;
821 /* FALLTHROUGH */
822 /* Line number */
823 case '0': case '1': case '2': case '3': case '4':
824 case '5': case '6': case '7': case '8': case '9':
825 if (a->type == 0)
826 a->type = AT_LINE;
827 a->u.l = strtoul(p, &end, 10);
828 return (end);
829 default:
830 errx(1, "%lu: %s: expected context address", linenum, fname);
831 return (NULL);
832 }
833 }
834
835 /*
836 * duptoeol --
837 * Return a copy of all the characters up to \n or \0.
838 */
839 static char *
840 duptoeol(char *s, const char *ctype)
841 {
842 size_t len;
843 int ws;
844 char *p, *start;
845
846 ws = 0;
847 for (start = s; *s != '\0' && *s != '\n'; ++s)
848 ws = isspace((unsigned char)*s);
849 *s = '\0';
850 if (ws)
851 warnx("%lu: %s: whitespace after %s", linenum, fname, ctype);
852 len = (size_t)(s - start + 1);
853 p = xmalloc(len);
854 return (memmove(p, start, len));
855 }
856
857 /*
858 * Convert goto label names to addresses, and count a and r commands, in
859 * the given subset of the script. Free the memory used by labels in b
860 * and t commands (but not by :).
861 *
862 * TODO: Remove } nodes
863 */
864 static void
865 fixuplabel(struct s_command *cp, struct s_command *end)
866 {
867
868 for (; cp != end; cp = cp->next)
869 switch (cp->code) {
870 case 'a':
871 case 'r':
872 appendnum++;
873 break;
874 case 'b':
875 case 't':
876 /* Resolve branch target. */
877 if (cp->t == NULL) {
878 cp->u.c = NULL;
879 break;
880 }
881 if ((cp->u.c = findlabel(cp->t)) == NULL)
882 errx(1, "%lu: %s: undefined label '%s'", linenum, fname, cp->t);
883 free(cp->t);
884 break;
885 case '{':
886 /* Do interior commands. */
887 fixuplabel(cp->u.c, cp->next);
888 break;
889 }
890 }
891
892 /*
893 * Associate the given command label for later lookup.
894 */
895 static void
896 enterlabel(struct s_command *cp)
897 {
898 struct labhash **lhp, *lh;
899 u_char *p;
900 u_int h, c;
901
902 for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++)
903 h = (h << 5) + h + c;
904 lhp = &labels[h & LHMASK];
905 for (lh = *lhp; lh != NULL; lh = lh->lh_next)
906 if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
907 errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t);
908 lh = xmalloc(sizeof *lh);
909 lh->lh_next = *lhp;
910 lh->lh_hash = h;
911 lh->lh_cmd = cp;
912 lh->lh_ref = 0;
913 *lhp = lh;
914 }
915
916 /*
917 * Find the label contained in the command l in the command linked
918 * list cp. L is excluded from the search. Return NULL if not found.
919 */
920 static struct s_command *
921 findlabel(char *name)
922 {
923 struct labhash *lh;
924 u_char *p;
925 u_int h, c;
926
927 for (h = 0, p = (u_char *)name; (c = *p) != 0; p++)
928 h = (h << 5) + h + c;
929 for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) {
930 if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) {
931 lh->lh_ref = 1;
932 return (lh->lh_cmd);
933 }
934 }
935 return (NULL);
936 }
937
938 /*
939 * Warn about any unused labels. As a side effect, release the label hash
940 * table space.
941 */
942 static void
943 uselabel(void)
944 {
945 struct labhash *lh, *next;
946 int i;
947
948 for (i = 0; i < LHSZ; i++) {
949 for (lh = labels[i]; lh != NULL; lh = next) {
950 next = lh->lh_next;
951 if (!lh->lh_ref)
952 warnx("%lu: %s: unused label '%s'",
953 linenum, fname, lh->lh_cmd->t);
954 free(lh);
955 }
956 }
957 }
958
959 /*
960 * Like fgets, but go through the chain of compilation units chaining them
961 * together. Empty strings and files are ignored.
962 */
963 char *
964 cu_fgets(char *buf, int n, int *more)
965 {
966 static enum {ST_EOF, ST_FILE, ST_STRING} state = ST_EOF;
967 static FILE *f; /* Current open file */
968 static char *s; /* Current pointer inside string */
969 static char string_ident[30];
970 char *p;
971
972 again:
973 switch (state) {
974 case ST_EOF:
975 if (script == NULL) {
976 if (more != NULL)
977 *more = 0;
978 return (NULL);
979 }
980 linenum = 0;
981 switch (script->type) {
982 case CU_FILE:
983 if ((f = fopen(script->s, "r")) == NULL)
984 err(1, "%s", script->s);
985 fname = script->s;
986 state = ST_FILE;
987 goto again;
988 case CU_STRING:
989 if (((size_t)snprintf(string_ident,
990 sizeof(string_ident), "\"%s\"", script->s)) >=
991 sizeof(string_ident) - 1)
992 (void)strcpy(string_ident +
993 sizeof(string_ident) - 6, " ...\"");
994 fname = string_ident;
995 s = script->s;
996 state = ST_STRING;
997 goto again;
998 }
999 case ST_FILE:
1000 if ((p = fgets(buf, n, f)) != NULL) {
1001 linenum++;
1002 if (linenum == 1 && buf[0] == '#' && buf[1] == 'n')
1003 nflag = 1;
1004 if (more != NULL)
1005 *more = !feof(f);
1006 return (p);
1007 }
1008 script = script->next;
1009 (void)fclose(f);
1010 state = ST_EOF;
1011 goto again;
1012 case ST_STRING:
1013 if (linenum == 0 && s[0] == '#' && s[1] == 'n')
1014 nflag = 1;
1015 p = buf;
1016 for (;;) {
1017 if (n-- <= 1) {
1018 *p = '\0';
1019 linenum++;
1020 if (more != NULL)
1021 *more = 1;
1022 return (buf);
1023 }
1024 switch (*s) {
1025 case '\0':
1026 state = ST_EOF;
1027 if (s == script->s) {
1028 script = script->next;
1029 goto again;
1030 } else {
1031 script = script->next;
1032 *p = '\0';
1033 linenum++;
1034 if (more != NULL)
1035 *more = 0;
1036 return (buf);
1037 }
1038 case '\n':
1039 *p++ = '\n';
1040 *p = '\0';
1041 s++;
1042 linenum++;
1043 if (more != NULL)
1044 *more = 0;
1045 return (buf);
1046 default:
1047 *p++ = *s++;
1048 }
1049 }
1050 }
1051 /* NOTREACHED */
1052 return (NULL);
1053 }
1054