process.c revision 1.34 1 /* $NetBSD: process.c,v 1.34 2003/11/07 03:58:06 itojun Exp $ */
2
3 /*-
4 * Copyright (c) 1992, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Diomidis Spinellis of Imperial College, University of London.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 /*-
36 * Copyright (c) 1992 Diomidis Spinellis.
37 *
38 * This code is derived from software contributed to Berkeley by
39 * Diomidis Spinellis of Imperial College, University of London.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. All advertising materials mentioning features or use of this software
50 * must display the following acknowledgement:
51 * This product includes software developed by the University of
52 * California, Berkeley and its contributors.
53 * 4. Neither the name of the University nor the names of its contributors
54 * may be used to endorse or promote products derived from this software
55 * without specific prior written permission.
56 *
57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67 * SUCH DAMAGE.
68 */
69
70 #include <sys/cdefs.h>
71 #ifndef lint
72 #if 0
73 static char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94";
74 #else
75 __RCSID("$NetBSD: process.c,v 1.34 2003/11/07 03:58:06 itojun Exp $");
76 #endif
77 #endif /* not lint */
78
79 #include <sys/types.h>
80 #include <sys/stat.h>
81 #include <sys/ioctl.h>
82 #include <sys/uio.h>
83
84 #include <ctype.h>
85 #include <errno.h>
86 #include <fcntl.h>
87 #include <limits.h>
88 #include <regex.h>
89 #include <stdio.h>
90 #include <stdlib.h>
91 #include <string.h>
92 #include <unistd.h>
93
94 #include "defs.h"
95 #include "extern.h"
96
97 static SPACE HS, PS, SS;
98 #define pd PS.deleted
99 #define ps PS.space
100 #define psl PS.len
101 #define hs HS.space
102 #define hsl HS.len
103
104 static inline int applies(struct s_command *);
105 static void flush_appends(void);
106 static void lputs(char *);
107 static inline int regexec_e(regex_t *, const char *, int, int, size_t);
108 static void regsub(SPACE *, char *, char *);
109 static int substitute(struct s_command *);
110
111 struct s_appends *appends; /* Array of pointers to strings to append. */
112 static int appendx; /* Index into appends array. */
113 int appendnum; /* Size of appends array. */
114
115 static int lastaddr; /* Set by applies if last address of a range. */
116 static int sdone; /* If any substitutes since last line input. */
117 /* Iov structure for 'w' commands. */
118 static regex_t *defpreg;
119 size_t maxnsub;
120 regmatch_t *match;
121
122 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
123
124 void
125 process(void)
126 {
127 struct s_command *cp;
128 SPACE tspace;
129 size_t len, oldpsl;
130 char *p;
131
132 oldpsl = 0;
133 for (linenum = 0; mf_fgets(&PS, REPLACE);) {
134 pd = 0;
135 top:
136 cp = prog;
137 redirect:
138 while (cp != NULL) {
139 if (!applies(cp)) {
140 cp = cp->next;
141 continue;
142 }
143 switch (cp->code) {
144 case '{':
145 cp = cp->u.c;
146 goto redirect;
147 case 'a':
148 if (appendx >= appendnum)
149 appends = xrealloc(appends,
150 sizeof(struct s_appends) *
151 (appendnum *= 2));
152 appends[appendx].type = AP_STRING;
153 appends[appendx].s = cp->t;
154 appends[appendx].len = strlen(cp->t);
155 appendx++;
156 break;
157 case 'b':
158 cp = cp->u.c;
159 goto redirect;
160 case 'c':
161 pd = 1;
162 psl = 0;
163 if (cp->a2 == NULL || lastaddr)
164 (void)printf("%s", cp->t);
165 break;
166 case 'd':
167 pd = 1;
168 goto new;
169 case 'D':
170 if (psl == 0)
171 pd = 1;
172 if (pd)
173 goto new;
174 if ((p = memchr(ps, '\n', psl - 1)) == NULL) {
175 pd = 1;
176 goto new;
177 } else {
178 psl -= (p + 1) - ps;
179 memmove(ps, p + 1, psl);
180 goto top;
181 }
182 case 'g':
183 cspace(&PS, hs, hsl, REPLACE);
184 break;
185 case 'G':
186 if (hs == NULL)
187 cspace(&HS, "\n", 1, REPLACE);
188 cspace(&PS, hs, hsl, 0);
189 break;
190 case 'h':
191 cspace(&HS, ps, psl, REPLACE);
192 break;
193 case 'H':
194 cspace(&HS, ps, psl, 0);
195 break;
196 case 'i':
197 (void)printf("%s", cp->t);
198 break;
199 case 'l':
200 lputs(ps);
201 break;
202 case 'n':
203 if (!nflag && !pd)
204 OUT(ps)
205 flush_appends();
206 if (!mf_fgets(&PS, REPLACE))
207 exit(0);
208 pd = 0;
209 break;
210 case 'N':
211 flush_appends();
212 if (!mf_fgets(&PS, 0)) {
213 if (!nflag && !pd)
214 OUT(ps)
215 exit(0);
216 }
217 break;
218 case 'p':
219 if (pd)
220 break;
221 OUT(ps)
222 break;
223 case 'P':
224 if (pd)
225 break;
226 if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
227 oldpsl = psl;
228 psl = (p + 1) - ps;
229 }
230 OUT(ps)
231 if (p != NULL)
232 psl = oldpsl;
233 break;
234 case 'q':
235 if (!nflag && !pd)
236 OUT(ps)
237 flush_appends();
238 exit(0);
239 case 'r':
240 if (appendx >= appendnum)
241 appends = xrealloc(appends,
242 sizeof(struct s_appends) *
243 (appendnum *= 2));
244 appends[appendx].type = AP_FILE;
245 appends[appendx].s = cp->t;
246 appends[appendx].len = strlen(cp->t);
247 appendx++;
248 break;
249 case 's':
250 sdone |= substitute(cp);
251 break;
252 case 't':
253 if (sdone) {
254 sdone = 0;
255 cp = cp->u.c;
256 goto redirect;
257 }
258 break;
259 case 'w':
260 if (pd)
261 break;
262 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
263 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
264 DEFFILEMODE)) == -1)
265 err(FATAL, "%s: %s",
266 cp->t, strerror(errno));
267 if (write(cp->u.fd, ps, psl) != psl)
268 err(FATAL, "%s: %s",
269 cp->t, strerror(errno));
270 break;
271 case 'x':
272 if (hs == NULL)
273 cspace(&HS, "\n", 1, REPLACE);
274 tspace = PS;
275 PS = HS;
276 HS = tspace;
277 break;
278 case 'y':
279 if (pd)
280 break;
281 for (p = ps, len = psl; --len; ++p)
282 *p = cp->u.y[(int)*p];
283 break;
284 case ':':
285 case '}':
286 break;
287 case '=':
288 (void)printf("%lu\n", linenum);
289 }
290 cp = cp->next;
291 } /* for all cp */
292
293 new: if (!nflag && !pd)
294 OUT(ps)
295 flush_appends();
296 } /* for all lines */
297 }
298
299 /*
300 * TRUE if the address passed matches the current program state
301 * (lastline, linenumber, ps).
302 */
303 #define MATCH(a) \
304 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
305 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
306
307 /*
308 * Return TRUE if the command applies to the current line. Sets the inrange
309 * flag to process ranges. Interprets the non-select (``!'') flag.
310 */
311 static inline int
312 applies(struct s_command *cp)
313 {
314 int r;
315
316 lastaddr = 0;
317 if (cp->a1 == NULL && cp->a2 == NULL)
318 r = 1;
319 else if (cp->a2) {
320 if (cp->inrange) {
321 if (MATCH(cp->a2)) {
322 cp->inrange = 0;
323 lastaddr = 1;
324 }
325 r = 1;
326 } else if (MATCH(cp->a1)) {
327 /*
328 * If the second address is a number less than or
329 * equal to the line number first selected, only
330 * one line shall be selected.
331 * -- POSIX 1003.2
332 */
333 if (cp->a2->type == AT_LINE &&
334 linenum >= cp->a2->u.l)
335 lastaddr = 1;
336 else
337 cp->inrange = 1;
338 r = 1;
339 } else
340 r = 0;
341 } else
342 r = MATCH(cp->a1);
343 return (cp->nonsel ? ! r : r);
344 }
345
346 /*
347 * substitute --
348 * Do substitutions in the pattern space. Currently, we build a
349 * copy of the new pattern space in the substitute space structure
350 * and then swap them.
351 */
352 static int
353 substitute(struct s_command *cp)
354 {
355 SPACE tspace;
356 regex_t *re;
357 size_t re_off, slen;
358 int lastempty, n;
359 char *s;
360
361 s = ps;
362 re = cp->u.s->re;
363 if (re == NULL) {
364 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
365 linenum = cp->u.s->linenum;
366 err(COMPILE, "\\%d not defined in the RE",
367 cp->u.s->maxbref);
368 }
369 }
370 if (!regexec_e(re, s, 0, 0, psl))
371 return (0);
372
373 SS.len = 0; /* Clean substitute space. */
374 slen = psl;
375 n = cp->u.s->n;
376 lastempty = 1;
377
378 switch (n) {
379 case 0: /* Global */
380 do {
381 if (lastempty || match[0].rm_so != match[0].rm_eo) {
382 /* Locate start of replaced string. */
383 re_off = match[0].rm_so;
384 /* Copy leading retained string. */
385 cspace(&SS, s, re_off, APPEND);
386 /* Add in regular expression. */
387 regsub(&SS, s, cp->u.s->new);
388 }
389
390 /* Move past this match. */
391 if (match[0].rm_so != match[0].rm_eo) {
392 s += match[0].rm_eo;
393 slen -= match[0].rm_eo;
394 lastempty = 0;
395 } else {
396 if (match[0].rm_so == 0)
397 cspace(&SS,
398 s, match[0].rm_so + 1, APPEND);
399 else
400 cspace(&SS,
401 s + match[0].rm_so, 1, APPEND);
402 s += match[0].rm_so + 1;
403 slen -= match[0].rm_so + 1;
404 lastempty = 1;
405 }
406 } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
407 /* Copy trailing retained string. */
408 if (slen > 0)
409 cspace(&SS, s, slen, APPEND);
410 break;
411 default: /* Nth occurrence */
412 while (--n) {
413 s += match[0].rm_eo;
414 slen -= match[0].rm_eo;
415 if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
416 return (0);
417 }
418 /* FALLTHROUGH */
419 case 1: /* 1st occurrence */
420 /* Locate start of replaced string. */
421 re_off = match[0].rm_so + (s - ps);
422 /* Copy leading retained string. */
423 cspace(&SS, ps, re_off, APPEND);
424 /* Add in regular expression. */
425 regsub(&SS, s, cp->u.s->new);
426 /* Copy trailing retained string. */
427 s += match[0].rm_eo;
428 slen -= match[0].rm_eo;
429 cspace(&SS, s, slen, APPEND);
430 break;
431 }
432
433 /*
434 * Swap the substitute space and the pattern space, and make sure
435 * that any leftover pointers into stdio memory get lost.
436 */
437 tspace = PS;
438 PS = SS;
439 SS = tspace;
440 SS.space = SS.back;
441
442 /* Handle the 'p' flag. */
443 if (cp->u.s->p)
444 OUT(ps)
445
446 /* Handle the 'w' flag. */
447 if (cp->u.s->wfile && !pd) {
448 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
449 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
450 err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
451 if (write(cp->u.s->wfd, ps, psl) != psl)
452 err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
453 }
454 return (1);
455 }
456
457 /*
458 * Flush append requests. Always called before reading a line,
459 * therefore it also resets the substitution done (sdone) flag.
460 */
461 static void
462 flush_appends(void)
463 {
464 FILE *f;
465 int count, i;
466 char buf[8 * 1024];
467
468 for (i = 0; i < appendx; i++)
469 switch (appends[i].type) {
470 case AP_STRING:
471 fwrite(appends[i].s, sizeof(char), appends[i].len,
472 stdout);
473 break;
474 case AP_FILE:
475 /*
476 * Read files probably shouldn't be cached. Since
477 * it's not an error to read a non-existent file,
478 * it's possible that another program is interacting
479 * with the sed script through the file system. It
480 * would be truly bizarre, but possible. It's probably
481 * not that big a performance win, anyhow.
482 */
483 if ((f = fopen(appends[i].s, "r")) == NULL)
484 break;
485 while ((count =
486 fread(buf, sizeof(char), sizeof(buf), f)) > 0)
487 (void)fwrite(buf, sizeof(char), count, stdout);
488 (void)fclose(f);
489 break;
490 }
491 if (ferror(stdout))
492 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
493 appendx = sdone = 0;
494 }
495
496 static void
497 lputs(char *s)
498 {
499 int count;
500 char *escapes, *p;
501 struct winsize win;
502 static int termwidth = -1;
503
504 if (termwidth == -1) {
505 if ((p = getenv("COLUMNS")) != NULL)
506 termwidth = atoi(p);
507 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
508 win.ws_col > 0)
509 termwidth = win.ws_col;
510 else
511 termwidth = 60;
512 }
513 for (count = 0; *s; ++s) {
514 if (count >= termwidth) {
515 (void)printf("\\\n");
516 count = 0;
517 }
518 if (isascii((unsigned char)*s) && isprint((unsigned char)*s) &&
519 *s != '\\') {
520 (void)putchar(*s);
521 count++;
522 } else {
523 escapes = "\\\a\b\f\n\r\t\v";
524 (void)putchar('\\');
525 if ((p = strchr(escapes, *s)) != NULL) {
526 (void)putchar("\\abfnrtv"[p - escapes]);
527 count += 2;
528 } else {
529 (void)printf("%03o", *(u_char *)s);
530 count += 4;
531 }
532 }
533 }
534 (void)putchar('$');
535 (void)putchar('\n');
536 if (ferror(stdout))
537 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
538 }
539
540 static inline int
541 regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, size_t slen)
542 {
543 int eval;
544 #ifndef REG_STARTEND
545 char *buf;
546 #endif
547
548 if (preg == NULL) {
549 if (defpreg == NULL)
550 err(FATAL, "first RE may not be empty");
551 } else
552 defpreg = preg;
553
554 /* Set anchors, discounting trailing newline (if any). */
555 if (slen > 0 && string[slen - 1] == '\n')
556 slen--;
557
558 #ifndef REG_STARTEND
559 if ((buf = malloc(slen + 1)) == NULL)
560 err(1, NULL);
561 (void)memcpy(buf, string, slen);
562 buf[slen] = '\0';
563 eval = regexec(defpreg, buf,
564 nomatch ? 0 : maxnsub + 1, match, eflags);
565 free(buf);
566 #else
567 match[0].rm_so = 0;
568 match[0].rm_eo = slen;
569 eval = regexec(defpreg, string,
570 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
571 #endif
572 switch(eval) {
573 case 0:
574 return (1);
575 case REG_NOMATCH:
576 return (0);
577 }
578 err(FATAL, "RE error: %s", strregerror(eval, defpreg));
579 /* NOTREACHED */
580 return (0);
581 }
582
583 /*
584 * regsub - perform substitutions after a regexp match
585 * Based on a routine by Henry Spencer
586 */
587 static void
588 regsub(SPACE *sp, char *string, char *src)
589 {
590 int len, no;
591 char c, *dst;
592
593 #define NEEDSP(reqlen) \
594 if (sp->len + (reqlen) + 1 >= sp->blen) { \
595 sp->blen += (reqlen) + 1024; \
596 sp->space = sp->back = xrealloc(sp->back, sp->blen); \
597 dst = sp->space + sp->len; \
598 }
599
600 dst = sp->space + sp->len;
601 while ((c = *src++) != '\0') {
602 if (c == '&')
603 no = 0;
604 else if (c == '\\' && isdigit((unsigned char)*src))
605 no = *src++ - '0';
606 else
607 no = -1;
608 if (no < 0) { /* Ordinary character. */
609 if (c == '\\' && (*src == '\\' || *src == '&'))
610 c = *src++;
611 NEEDSP(1);
612 *dst++ = c;
613 ++sp->len;
614 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
615 len = match[no].rm_eo - match[no].rm_so;
616 NEEDSP(len);
617 memmove(dst, string + match[no].rm_so, len);
618 dst += len;
619 sp->len += len;
620 }
621 }
622 NEEDSP(1);
623 *dst = '\0';
624 }
625
626 /*
627 * aspace --
628 * Append the source space to the destination space, allocating new
629 * space as necessary.
630 */
631 void
632 cspace(SPACE *sp, char *p, size_t len, enum e_spflag spflag)
633 {
634 size_t tlen;
635
636 /* Make sure SPACE has enough memory and ramp up quickly. */
637 tlen = sp->len + len + 1;
638 if (tlen > sp->blen) {
639 sp->blen = tlen + 1024;
640 sp->space = sp->back = xrealloc(sp->back, sp->blen);
641 }
642
643 if (spflag == REPLACE)
644 sp->len = 0;
645
646 memmove(sp->space + sp->len, p, len);
647
648 sp->space[sp->len += len] = '\0';
649 }
650
651 /*
652 * Close all cached opened files and report any errors
653 */
654 void
655 cfclose(struct s_command *cp, struct s_command *end)
656 {
657
658 for (; cp != end; cp = cp->next)
659 switch(cp->code) {
660 case 's':
661 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
662 err(FATAL,
663 "%s: %s", cp->u.s->wfile, strerror(errno));
664 cp->u.s->wfd = -1;
665 break;
666 case 'w':
667 if (cp->u.fd != -1 && close(cp->u.fd))
668 err(FATAL, "%s: %s", cp->t, strerror(errno));
669 cp->u.fd = -1;
670 break;
671 case '{':
672 cfclose(cp->u.c, cp->next);
673 break;
674 }
675 }
676