process.c revision 1.19 1 /* $NetBSD: process.c,v 1.19 1997/10/19 05:23:50 mrg Exp $ */
2
3 /*-
4 * Copyright (c) 1992 Diomidis Spinellis.
5 * Copyright (c) 1992, 1993, 1994
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Diomidis Spinellis of Imperial College, University of London.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94";
43 #else
44 static char *rcsid = "$NetBSD: process.c,v 1.19 1997/10/19 05:23:50 mrg Exp $";
45 #endif
46 #endif /* not lint */
47
48 #include <sys/types.h>
49 #include <sys/stat.h>
50 #include <sys/ioctl.h>
51 #include <sys/uio.h>
52
53 #include <ctype.h>
54 #include <errno.h>
55 #include <fcntl.h>
56 #include <limits.h>
57 #include <regex.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <unistd.h>
62
63 #include "defs.h"
64 #include "extern.h"
65
66 static SPACE HS, PS, SS;
67 #define pd PS.deleted
68 #define ps PS.space
69 #define psl PS.len
70 #define hs HS.space
71 #define hsl HS.len
72
73 static inline int applies __P((struct s_command *));
74 static void flush_appends __P((void));
75 static void lputs __P((char *));
76 static inline int regexec_e __P((regex_t *, const char *, int, int, size_t));
77 static void regsub __P((SPACE *, char *, char *));
78 static int substitute __P((struct s_command *));
79
80 struct s_appends *appends; /* Array of pointers to strings to append. */
81 static int appendx; /* Index into appends array. */
82 int appendnum; /* Size of appends array. */
83
84 static int lastaddr; /* Set by applies if last address of a range. */
85 static int sdone; /* If any substitutes since last line input. */
86 /* Iov structure for 'w' commands. */
87 static regex_t *defpreg;
88 size_t maxnsub;
89 regmatch_t *match;
90
91 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
92
93 void
94 process()
95 {
96 struct s_command *cp;
97 SPACE tspace;
98 size_t len, oldpsl;
99 char *p;
100
101 for (linenum = 0; mf_fgets(&PS, REPLACE);) {
102 pd = 0;
103 top:
104 cp = prog;
105 redirect:
106 while (cp != NULL) {
107 if (!applies(cp)) {
108 cp = cp->next;
109 continue;
110 }
111 switch (cp->code) {
112 case '{':
113 cp = cp->u.c;
114 goto redirect;
115 case 'a':
116 if (appendx >= appendnum)
117 appends = xrealloc(appends,
118 sizeof(struct s_appends) *
119 (appendnum *= 2));
120 appends[appendx].type = AP_STRING;
121 appends[appendx].s = cp->t;
122 appends[appendx].len = strlen(cp->t);
123 appendx++;
124 break;
125 case 'b':
126 cp = cp->u.c;
127 goto redirect;
128 case 'c':
129 pd = 1;
130 psl = 0;
131 if (cp->a2 == NULL || lastaddr)
132 (void)printf("%s", cp->t);
133 break;
134 case 'd':
135 pd = 1;
136 goto new;
137 case 'D':
138 if (pd)
139 goto new;
140 if ((p = memchr(ps, '\n', psl - 1)) == NULL) {
141 pd = 1;
142 goto new;
143 } else {
144 psl -= (p + 1) - ps;
145 memmove(ps, p + 1, psl);
146 goto top;
147 }
148 case 'g':
149 cspace(&PS, hs, hsl, REPLACE);
150 break;
151 case 'G':
152 cspace(&PS, hs, hsl, 0);
153 break;
154 case 'h':
155 cspace(&HS, ps, psl, REPLACE);
156 break;
157 case 'H':
158 cspace(&HS, ps, psl, 0);
159 break;
160 case 'i':
161 (void)printf("%s", cp->t);
162 break;
163 case 'l':
164 lputs(ps);
165 break;
166 case 'n':
167 if (!nflag && !pd)
168 OUT(ps)
169 flush_appends();
170 if (!mf_fgets(&PS, REPLACE))
171 exit(0);
172 pd = 0;
173 break;
174 case 'N':
175 flush_appends();
176 if (!mf_fgets(&PS, 0)) {
177 if (!nflag && !pd)
178 OUT(ps)
179 exit(0);
180 }
181 break;
182 case 'p':
183 if (pd)
184 break;
185 OUT(ps)
186 break;
187 case 'P':
188 if (pd)
189 break;
190 if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
191 oldpsl = psl;
192 psl = (p + 1) - ps;
193 }
194 OUT(ps)
195 if (p != NULL)
196 psl = oldpsl;
197 break;
198 case 'q':
199 if (!nflag && !pd)
200 OUT(ps)
201 flush_appends();
202 exit(0);
203 case 'r':
204 if (appendx >= appendnum)
205 appends = xrealloc(appends,
206 sizeof(struct s_appends) *
207 (appendnum *= 2));
208 appends[appendx].type = AP_FILE;
209 appends[appendx].s = cp->t;
210 appends[appendx].len = strlen(cp->t);
211 appendx++;
212 break;
213 case 's':
214 sdone |= substitute(cp);
215 break;
216 case 't':
217 if (sdone) {
218 sdone = 0;
219 cp = cp->u.c;
220 goto redirect;
221 }
222 break;
223 case 'w':
224 if (pd)
225 break;
226 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
227 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
228 DEFFILEMODE)) == -1)
229 err(FATAL, "%s: %s\n",
230 cp->t, strerror(errno));
231 if (write(cp->u.fd, ps, psl) != psl)
232 err(FATAL, "%s: %s\n",
233 cp->t, strerror(errno));
234 break;
235 case 'x':
236 if (hs == NULL)
237 cspace(&HS, "\n", 1, REPLACE);
238 tspace = PS;
239 PS = HS;
240 HS = tspace;
241 break;
242 case 'y':
243 if (pd)
244 break;
245 for (p = ps, len = psl; --len; ++p)
246 *p = cp->u.y[*p];
247 break;
248 case ':':
249 case '}':
250 break;
251 case '=':
252 (void)printf("%lu\n", linenum);
253 }
254 cp = cp->next;
255 } /* for all cp */
256
257 new: if (!nflag && !pd)
258 OUT(ps)
259 flush_appends();
260 } /* for all lines */
261 }
262
263 /*
264 * TRUE if the address passed matches the current program state
265 * (lastline, linenumber, ps).
266 */
267 #define MATCH(a) \
268 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
269 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
270
271 /*
272 * Return TRUE if the command applies to the current line. Sets the inrange
273 * flag to process ranges. Interprets the non-select (``!'') flag.
274 */
275 static inline int
276 applies(cp)
277 struct s_command *cp;
278 {
279 int r;
280
281 lastaddr = 0;
282 if (cp->a1 == NULL && cp->a2 == NULL)
283 r = 1;
284 else if (cp->a2)
285 if (cp->inrange) {
286 if (MATCH(cp->a2)) {
287 cp->inrange = 0;
288 lastaddr = 1;
289 }
290 r = 1;
291 } else if (MATCH(cp->a1)) {
292 /*
293 * If the second address is a number less than or
294 * equal to the line number first selected, only
295 * one line shall be selected.
296 * -- POSIX 1003.2
297 */
298 if (cp->a2->type == AT_LINE &&
299 linenum >= cp->a2->u.l)
300 lastaddr = 1;
301 else
302 cp->inrange = 1;
303 r = 1;
304 } else
305 r = 0;
306 else
307 r = MATCH(cp->a1);
308 return (cp->nonsel ? ! r : r);
309 }
310
311 /*
312 * substitute --
313 * Do substitutions in the pattern space. Currently, we build a
314 * copy of the new pattern space in the substitute space structure
315 * and then swap them.
316 */
317 static int
318 substitute(cp)
319 struct s_command *cp;
320 {
321 SPACE tspace;
322 regex_t *re;
323 size_t re_off, slen;
324 int lastempty, n;
325 char *s;
326
327 s = ps;
328 re = cp->u.s->re;
329 if (re == NULL) {
330 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
331 linenum = cp->u.s->linenum;
332 err(COMPILE, "\\%d not defined in the RE",
333 cp->u.s->maxbref);
334 }
335 }
336 if (!regexec_e(re, s, 0, 0, psl))
337 return (0);
338
339 SS.len = 0; /* Clean substitute space. */
340 slen = psl;
341 n = cp->u.s->n;
342 lastempty = 1;
343
344 switch (n) {
345 case 0: /* Global */
346 do {
347 if (lastempty || match[0].rm_so != match[0].rm_eo) {
348 /* Locate start of replaced string. */
349 re_off = match[0].rm_so;
350 /* Copy leading retained string. */
351 cspace(&SS, s, re_off, APPEND);
352 /* Add in regular expression. */
353 regsub(&SS, s, cp->u.s->new);
354 }
355
356 /* Move past this match. */
357 if (match[0].rm_so != match[0].rm_eo) {
358 s += match[0].rm_eo;
359 slen -= match[0].rm_eo;
360 lastempty = 0;
361 } else {
362 if (match[0].rm_so == 0)
363 cspace(&SS,
364 s, match[0].rm_so + 1, APPEND);
365 else
366 cspace(&SS,
367 s + match[0].rm_so, 1, APPEND);
368 s += match[0].rm_so + 1;
369 slen -= match[0].rm_so + 1;
370 lastempty = 1;
371 }
372 } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
373 /* Copy trailing retained string. */
374 if (slen > 0)
375 cspace(&SS, s, slen, APPEND);
376 break;
377 default: /* Nth occurrence */
378 while (--n) {
379 s += match[0].rm_eo;
380 slen -= match[0].rm_eo;
381 if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
382 return (0);
383 }
384 /* FALLTHROUGH */
385 case 1: /* 1st occurrence */
386 /* Locate start of replaced string. */
387 re_off = match[0].rm_so + (s - ps);
388 /* Copy leading retained string. */
389 cspace(&SS, ps, re_off, APPEND);
390 /* Add in regular expression. */
391 regsub(&SS, s, cp->u.s->new);
392 /* Copy trailing retained string. */
393 s += match[0].rm_eo;
394 slen -= match[0].rm_eo;
395 cspace(&SS, s, slen, APPEND);
396 break;
397 }
398
399 /*
400 * Swap the substitute space and the pattern space, and make sure
401 * that any leftover pointers into stdio memory get lost.
402 */
403 tspace = PS;
404 PS = SS;
405 SS = tspace;
406 SS.space = SS.back;
407
408 /* Handle the 'p' flag. */
409 if (cp->u.s->p)
410 OUT(ps)
411
412 /* Handle the 'w' flag. */
413 if (cp->u.s->wfile && !pd) {
414 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
415 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
416 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
417 if (write(cp->u.s->wfd, ps, psl) != psl)
418 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
419 }
420 return (1);
421 }
422
423 /*
424 * Flush append requests. Always called before reading a line,
425 * therefore it also resets the substitution done (sdone) flag.
426 */
427 static void
428 flush_appends()
429 {
430 FILE *f;
431 int count, i;
432 char buf[8 * 1024];
433
434 for (i = 0; i < appendx; i++)
435 switch (appends[i].type) {
436 case AP_STRING:
437 fwrite(appends[i].s, sizeof(char), appends[i].len,
438 stdout);
439 break;
440 case AP_FILE:
441 /*
442 * Read files probably shouldn't be cached. Since
443 * it's not an error to read a non-existent file,
444 * it's possible that another program is interacting
445 * with the sed script through the file system. It
446 * would be truly bizarre, but possible. It's probably
447 * not that big a performance win, anyhow.
448 */
449 if ((f = fopen(appends[i].s, "r")) == NULL)
450 break;
451 while (count = fread(buf, sizeof(char), sizeof(buf), f))
452 (void)fwrite(buf, sizeof(char), count, stdout);
453 (void)fclose(f);
454 break;
455 }
456 if (ferror(stdout))
457 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
458 appendx = sdone = 0;
459 }
460
461 static void
462 lputs(s)
463 register char *s;
464 {
465 register int count;
466 register char *escapes, *p;
467 struct winsize win;
468 static int termwidth = -1;
469
470 if (termwidth == -1)
471 if (p = getenv("COLUMNS"))
472 termwidth = atoi(p);
473 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
474 win.ws_col > 0)
475 termwidth = win.ws_col;
476 else
477 termwidth = 60;
478
479 for (count = 0; *s; ++s) {
480 if (count >= termwidth) {
481 (void)printf("\\\n");
482 count = 0;
483 }
484 if (isascii(*s) && isprint(*s) && *s != '\\') {
485 (void)putchar(*s);
486 count++;
487 } else {
488 escapes = "\\\a\b\f\n\r\t\v";
489 (void)putchar('\\');
490 if (p = strchr(escapes, *s)) {
491 (void)putchar("\\abfnrtv"[p - escapes]);
492 count += 2;
493 } else {
494 (void)printf("%03o", *(u_char *)s);
495 count += 4;
496 }
497 }
498 }
499 (void)putchar('$');
500 (void)putchar('\n');
501 if (ferror(stdout))
502 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
503 }
504
505 static inline int
506 regexec_e(preg, string, eflags, nomatch, slen)
507 regex_t *preg;
508 const char *string;
509 int eflags, nomatch;
510 size_t slen;
511 {
512 int eval;
513
514 if (preg == NULL) {
515 if (defpreg == NULL)
516 err(FATAL, "first RE may not be empty");
517 } else
518 defpreg = preg;
519
520 /* Set anchors, discounting trailing newline (if any). */
521 if (slen > 0 && string[slen - 1] == '\n')
522 slen--;
523 match[0].rm_so = 0;
524 match[0].rm_eo = slen;
525
526 eval = regexec(defpreg, string,
527 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
528 switch(eval) {
529 case 0:
530 return (1);
531 case REG_NOMATCH:
532 return (0);
533 }
534 err(FATAL, "RE error: %s", strregerror(eval, defpreg));
535 /* NOTREACHED */
536 }
537
538 /*
539 * regsub - perform substitutions after a regexp match
540 * Based on a routine by Henry Spencer
541 */
542 static void
543 regsub(sp, string, src)
544 SPACE *sp;
545 char *string, *src;
546 {
547 register int len, no;
548 register char c, *dst;
549
550 #define NEEDSP(reqlen) \
551 if (sp->len >= sp->blen - (reqlen) - 1) { \
552 sp->blen += (reqlen) + 1024; \
553 sp->space = sp->back = xrealloc(sp->back, sp->blen); \
554 dst = sp->space + sp->len; \
555 }
556
557 dst = sp->space + sp->len;
558 while ((c = *src++) != '\0') {
559 if (c == '&')
560 no = 0;
561 else if (c == '\\' && isdigit(*src))
562 no = *src++ - '0';
563 else
564 no = -1;
565 if (no < 0) { /* Ordinary character. */
566 if (c == '\\' && (*src == '\\' || *src == '&'))
567 c = *src++;
568 NEEDSP(1);
569 *dst++ = c;
570 ++sp->len;
571 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
572 len = match[no].rm_eo - match[no].rm_so;
573 NEEDSP(len);
574 memmove(dst, string + match[no].rm_so, len);
575 dst += len;
576 sp->len += len;
577 }
578 }
579 NEEDSP(1);
580 *dst = '\0';
581 }
582
583 /*
584 * aspace --
585 * Append the source space to the destination space, allocating new
586 * space as necessary.
587 */
588 void
589 cspace(sp, p, len, spflag)
590 SPACE *sp;
591 char *p;
592 size_t len;
593 enum e_spflag spflag;
594 {
595 size_t tlen;
596
597 /* Make sure SPACE has enough memory and ramp up quickly. */
598 tlen = sp->len + len + 1;
599 if (tlen > sp->blen) {
600 sp->blen = tlen + 1024;
601 sp->space = sp->back = xrealloc(sp->back, sp->blen);
602 }
603
604 if (spflag == REPLACE)
605 sp->len = 0;
606
607 memmove(sp->space + sp->len, p, len);
608
609 sp->space[sp->len += len] = '\0';
610 }
611
612 /*
613 * Close all cached opened files and report any errors
614 */
615 void
616 cfclose(cp, end)
617 register struct s_command *cp, *end;
618 {
619
620 for (; cp != end; cp = cp->next)
621 switch(cp->code) {
622 case 's':
623 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
624 err(FATAL,
625 "%s: %s", cp->u.s->wfile, strerror(errno));
626 cp->u.s->wfd = -1;
627 break;
628 case 'w':
629 if (cp->u.fd != -1 && close(cp->u.fd))
630 err(FATAL, "%s: %s", cp->t, strerror(errno));
631 cp->u.fd = -1;
632 break;
633 case '{':
634 cfclose(cp->u.c, cp->next);
635 break;
636 }
637 }
638