process.c revision 1.31 1 /* $NetBSD: process.c,v 1.31 2002/06/11 06:06:20 itojun Exp $ */
2
3 /*-
4 * Copyright (c) 1992 Diomidis Spinellis.
5 * Copyright (c) 1992, 1993, 1994
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Diomidis Spinellis of Imperial College, University of London.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41 #ifndef lint
42 #if 0
43 static char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94";
44 #else
45 __RCSID("$NetBSD: process.c,v 1.31 2002/06/11 06:06:20 itojun Exp $");
46 #endif
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/stat.h>
51 #include <sys/ioctl.h>
52 #include <sys/uio.h>
53
54 #include <ctype.h>
55 #include <errno.h>
56 #include <fcntl.h>
57 #include <limits.h>
58 #include <regex.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62 #include <unistd.h>
63
64 #include "defs.h"
65 #include "extern.h"
66
67 static SPACE HS, PS, SS;
68 #define pd PS.deleted
69 #define ps PS.space
70 #define psl PS.len
71 #define hs HS.space
72 #define hsl HS.len
73
74 static inline int applies __P((struct s_command *));
75 static void flush_appends __P((void));
76 static void lputs __P((char *));
77 static inline int regexec_e __P((regex_t *, const char *, int, int, size_t));
78 static void regsub __P((SPACE *, char *, char *));
79 static int substitute __P((struct s_command *));
80
81 struct s_appends *appends; /* Array of pointers to strings to append. */
82 static int appendx; /* Index into appends array. */
83 int appendnum; /* Size of appends array. */
84
85 static int lastaddr; /* Set by applies if last address of a range. */
86 static int sdone; /* If any substitutes since last line input. */
87 /* Iov structure for 'w' commands. */
88 static regex_t *defpreg;
89 size_t maxnsub;
90 regmatch_t *match;
91
92 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
93
94 void
95 process()
96 {
97 struct s_command *cp;
98 SPACE tspace;
99 size_t len, oldpsl;
100 char *p;
101
102 oldpsl = 0;
103 for (linenum = 0; mf_fgets(&PS, REPLACE);) {
104 pd = 0;
105 top:
106 cp = prog;
107 redirect:
108 while (cp != NULL) {
109 if (!applies(cp)) {
110 cp = cp->next;
111 continue;
112 }
113 switch (cp->code) {
114 case '{':
115 cp = cp->u.c;
116 goto redirect;
117 case 'a':
118 if (appendx >= appendnum)
119 appends = xrealloc(appends,
120 sizeof(struct s_appends) *
121 (appendnum *= 2));
122 appends[appendx].type = AP_STRING;
123 appends[appendx].s = cp->t;
124 appends[appendx].len = strlen(cp->t);
125 appendx++;
126 break;
127 case 'b':
128 cp = cp->u.c;
129 goto redirect;
130 case 'c':
131 pd = 1;
132 psl = 0;
133 if (cp->a2 == NULL || lastaddr)
134 (void)printf("%s", cp->t);
135 break;
136 case 'd':
137 pd = 1;
138 goto new;
139 case 'D':
140 if (psl == 0)
141 pd = 1;
142 if (pd)
143 goto new;
144 if ((p = memchr(ps, '\n', psl - 1)) == NULL) {
145 pd = 1;
146 goto new;
147 } else {
148 psl -= (p + 1) - ps;
149 memmove(ps, p + 1, psl);
150 goto top;
151 }
152 case 'g':
153 cspace(&PS, hs, hsl, REPLACE);
154 break;
155 case 'G':
156 if (hs == NULL)
157 cspace(&HS, "\n", 1, REPLACE);
158 cspace(&PS, hs, hsl, 0);
159 break;
160 case 'h':
161 cspace(&HS, ps, psl, REPLACE);
162 break;
163 case 'H':
164 cspace(&HS, ps, psl, 0);
165 break;
166 case 'i':
167 (void)printf("%s", cp->t);
168 break;
169 case 'l':
170 lputs(ps);
171 break;
172 case 'n':
173 if (!nflag && !pd)
174 OUT(ps)
175 flush_appends();
176 if (!mf_fgets(&PS, REPLACE))
177 exit(0);
178 pd = 0;
179 break;
180 case 'N':
181 flush_appends();
182 if (!mf_fgets(&PS, 0)) {
183 if (!nflag && !pd)
184 OUT(ps)
185 exit(0);
186 }
187 break;
188 case 'p':
189 if (pd)
190 break;
191 OUT(ps)
192 break;
193 case 'P':
194 if (pd)
195 break;
196 if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
197 oldpsl = psl;
198 psl = (p + 1) - ps;
199 }
200 OUT(ps)
201 if (p != NULL)
202 psl = oldpsl;
203 break;
204 case 'q':
205 if (!nflag && !pd)
206 OUT(ps)
207 flush_appends();
208 exit(0);
209 case 'r':
210 if (appendx >= appendnum)
211 appends = xrealloc(appends,
212 sizeof(struct s_appends) *
213 (appendnum *= 2));
214 appends[appendx].type = AP_FILE;
215 appends[appendx].s = cp->t;
216 appends[appendx].len = strlen(cp->t);
217 appendx++;
218 break;
219 case 's':
220 sdone |= substitute(cp);
221 break;
222 case 't':
223 if (sdone) {
224 sdone = 0;
225 cp = cp->u.c;
226 goto redirect;
227 }
228 break;
229 case 'w':
230 if (pd)
231 break;
232 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
233 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
234 DEFFILEMODE)) == -1)
235 err(FATAL, "%s: %s",
236 cp->t, strerror(errno));
237 if (write(cp->u.fd, ps, psl) != psl)
238 err(FATAL, "%s: %s",
239 cp->t, strerror(errno));
240 break;
241 case 'x':
242 if (hs == NULL)
243 cspace(&HS, "\n", 1, REPLACE);
244 tspace = PS;
245 PS = HS;
246 HS = tspace;
247 break;
248 case 'y':
249 if (pd)
250 break;
251 for (p = ps, len = psl; --len; ++p)
252 *p = cp->u.y[(int)*p];
253 break;
254 case ':':
255 case '}':
256 break;
257 case '=':
258 (void)printf("%lu\n", linenum);
259 }
260 cp = cp->next;
261 } /* for all cp */
262
263 new: if (!nflag && !pd)
264 OUT(ps)
265 flush_appends();
266 } /* for all lines */
267 }
268
269 /*
270 * TRUE if the address passed matches the current program state
271 * (lastline, linenumber, ps).
272 */
273 #define MATCH(a) \
274 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
275 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
276
277 /*
278 * Return TRUE if the command applies to the current line. Sets the inrange
279 * flag to process ranges. Interprets the non-select (``!'') flag.
280 */
281 static inline int
282 applies(cp)
283 struct s_command *cp;
284 {
285 int r;
286
287 lastaddr = 0;
288 if (cp->a1 == NULL && cp->a2 == NULL)
289 r = 1;
290 else if (cp->a2) {
291 if (cp->inrange) {
292 if (MATCH(cp->a2)) {
293 cp->inrange = 0;
294 lastaddr = 1;
295 }
296 r = 1;
297 } else if (MATCH(cp->a1)) {
298 /*
299 * If the second address is a number less than or
300 * equal to the line number first selected, only
301 * one line shall be selected.
302 * -- POSIX 1003.2
303 */
304 if (cp->a2->type == AT_LINE &&
305 linenum >= cp->a2->u.l)
306 lastaddr = 1;
307 else
308 cp->inrange = 1;
309 r = 1;
310 } else
311 r = 0;
312 } else
313 r = MATCH(cp->a1);
314 return (cp->nonsel ? ! r : r);
315 }
316
317 /*
318 * substitute --
319 * Do substitutions in the pattern space. Currently, we build a
320 * copy of the new pattern space in the substitute space structure
321 * and then swap them.
322 */
323 static int
324 substitute(cp)
325 struct s_command *cp;
326 {
327 SPACE tspace;
328 regex_t *re;
329 size_t re_off, slen;
330 int lastempty, n;
331 char *s;
332
333 s = ps;
334 re = cp->u.s->re;
335 if (re == NULL) {
336 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
337 linenum = cp->u.s->linenum;
338 err(COMPILE, "\\%d not defined in the RE",
339 cp->u.s->maxbref);
340 }
341 }
342 if (!regexec_e(re, s, 0, 0, psl))
343 return (0);
344
345 SS.len = 0; /* Clean substitute space. */
346 slen = psl;
347 n = cp->u.s->n;
348 lastempty = 1;
349
350 switch (n) {
351 case 0: /* Global */
352 do {
353 if (lastempty || match[0].rm_so != match[0].rm_eo) {
354 /* Locate start of replaced string. */
355 re_off = match[0].rm_so;
356 /* Copy leading retained string. */
357 cspace(&SS, s, re_off, APPEND);
358 /* Add in regular expression. */
359 regsub(&SS, s, cp->u.s->new);
360 }
361
362 /* Move past this match. */
363 if (match[0].rm_so != match[0].rm_eo) {
364 s += match[0].rm_eo;
365 slen -= match[0].rm_eo;
366 lastempty = 0;
367 } else {
368 if (match[0].rm_so == 0)
369 cspace(&SS,
370 s, match[0].rm_so + 1, APPEND);
371 else
372 cspace(&SS,
373 s + match[0].rm_so, 1, APPEND);
374 s += match[0].rm_so + 1;
375 slen -= match[0].rm_so + 1;
376 lastempty = 1;
377 }
378 } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
379 /* Copy trailing retained string. */
380 if (slen > 0)
381 cspace(&SS, s, slen, APPEND);
382 break;
383 default: /* Nth occurrence */
384 while (--n) {
385 s += match[0].rm_eo;
386 slen -= match[0].rm_eo;
387 if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
388 return (0);
389 }
390 /* FALLTHROUGH */
391 case 1: /* 1st occurrence */
392 /* Locate start of replaced string. */
393 re_off = match[0].rm_so + (s - ps);
394 /* Copy leading retained string. */
395 cspace(&SS, ps, re_off, APPEND);
396 /* Add in regular expression. */
397 regsub(&SS, s, cp->u.s->new);
398 /* Copy trailing retained string. */
399 s += match[0].rm_eo;
400 slen -= match[0].rm_eo;
401 cspace(&SS, s, slen, APPEND);
402 break;
403 }
404
405 /*
406 * Swap the substitute space and the pattern space, and make sure
407 * that any leftover pointers into stdio memory get lost.
408 */
409 tspace = PS;
410 PS = SS;
411 SS = tspace;
412 SS.space = SS.back;
413
414 /* Handle the 'p' flag. */
415 if (cp->u.s->p)
416 OUT(ps)
417
418 /* Handle the 'w' flag. */
419 if (cp->u.s->wfile && !pd) {
420 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
421 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
422 err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
423 if (write(cp->u.s->wfd, ps, psl) != psl)
424 err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
425 }
426 return (1);
427 }
428
429 /*
430 * Flush append requests. Always called before reading a line,
431 * therefore it also resets the substitution done (sdone) flag.
432 */
433 static void
434 flush_appends()
435 {
436 FILE *f;
437 int count, i;
438 char buf[8 * 1024];
439
440 for (i = 0; i < appendx; i++)
441 switch (appends[i].type) {
442 case AP_STRING:
443 fwrite(appends[i].s, sizeof(char), appends[i].len,
444 stdout);
445 break;
446 case AP_FILE:
447 /*
448 * Read files probably shouldn't be cached. Since
449 * it's not an error to read a non-existent file,
450 * it's possible that another program is interacting
451 * with the sed script through the file system. It
452 * would be truly bizarre, but possible. It's probably
453 * not that big a performance win, anyhow.
454 */
455 if ((f = fopen(appends[i].s, "r")) == NULL)
456 break;
457 while ((count =
458 fread(buf, sizeof(char), sizeof(buf), f)) > 0)
459 (void)fwrite(buf, sizeof(char), count, stdout);
460 (void)fclose(f);
461 break;
462 }
463 if (ferror(stdout))
464 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
465 appendx = sdone = 0;
466 }
467
468 static void
469 lputs(s)
470 char *s;
471 {
472 int count;
473 char *escapes, *p;
474 struct winsize win;
475 static int termwidth = -1;
476
477 if (termwidth == -1) {
478 if ((p = getenv("COLUMNS")) != NULL)
479 termwidth = atoi(p);
480 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
481 win.ws_col > 0)
482 termwidth = win.ws_col;
483 else
484 termwidth = 60;
485 }
486 for (count = 0; *s; ++s) {
487 if (count >= termwidth) {
488 (void)printf("\\\n");
489 count = 0;
490 }
491 if (isascii((unsigned char)*s) && isprint((unsigned char)*s) &&
492 *s != '\\') {
493 (void)putchar(*s);
494 count++;
495 } else {
496 escapes = "\\\a\b\f\n\r\t\v";
497 (void)putchar('\\');
498 if ((p = strchr(escapes, *s)) != NULL) {
499 (void)putchar("\\abfnrtv"[p - escapes]);
500 count += 2;
501 } else {
502 (void)printf("%03o", *(u_char *)s);
503 count += 4;
504 }
505 }
506 }
507 (void)putchar('$');
508 (void)putchar('\n');
509 if (ferror(stdout))
510 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
511 }
512
513 static inline int
514 regexec_e(preg, string, eflags, nomatch, slen)
515 regex_t *preg;
516 const char *string;
517 int eflags, nomatch;
518 size_t slen;
519 {
520 int eval;
521 #ifndef REG_STARTEND
522 char *buf;
523 #endif
524
525 if (preg == NULL) {
526 if (defpreg == NULL)
527 err(FATAL, "first RE may not be empty");
528 } else
529 defpreg = preg;
530
531 /* Set anchors, discounting trailing newline (if any). */
532 if (slen > 0 && string[slen - 1] == '\n')
533 slen--;
534
535 #ifndef REG_STARTEND
536 if ((buf = malloc(slen + 1)) == NULL)
537 err(1, NULL);
538 (void)memcpy(buf, string, slen);
539 buf[slen] = '\0';
540 eval = regexec(defpreg, buf,
541 nomatch ? 0 : maxnsub + 1, match, eflags);
542 free(buf);
543 #else
544 match[0].rm_so = 0;
545 match[0].rm_eo = slen;
546 eval = regexec(defpreg, string,
547 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
548 #endif
549 switch(eval) {
550 case 0:
551 return (1);
552 case REG_NOMATCH:
553 return (0);
554 }
555 err(FATAL, "RE error: %s", strregerror(eval, defpreg));
556 /* NOTREACHED */
557 return (0);
558 }
559
560 /*
561 * regsub - perform substitutions after a regexp match
562 * Based on a routine by Henry Spencer
563 */
564 static void
565 regsub(sp, string, src)
566 SPACE *sp;
567 char *string, *src;
568 {
569 int len, no;
570 char c, *dst;
571
572 #define NEEDSP(reqlen) \
573 if (sp->len >= sp->blen - (reqlen) - 1) { \
574 sp->blen += (reqlen) + 1024; \
575 sp->space = sp->back = xrealloc(sp->back, sp->blen); \
576 dst = sp->space + sp->len; \
577 }
578
579 dst = sp->space + sp->len;
580 while ((c = *src++) != '\0') {
581 if (c == '&')
582 no = 0;
583 else if (c == '\\' && isdigit((unsigned char)*src))
584 no = *src++ - '0';
585 else
586 no = -1;
587 if (no < 0) { /* Ordinary character. */
588 if (c == '\\' && (*src == '\\' || *src == '&'))
589 c = *src++;
590 NEEDSP(1);
591 *dst++ = c;
592 ++sp->len;
593 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
594 len = match[no].rm_eo - match[no].rm_so;
595 NEEDSP(len);
596 memmove(dst, string + match[no].rm_so, len);
597 dst += len;
598 sp->len += len;
599 }
600 }
601 NEEDSP(1);
602 *dst = '\0';
603 }
604
605 /*
606 * aspace --
607 * Append the source space to the destination space, allocating new
608 * space as necessary.
609 */
610 void
611 cspace(sp, p, len, spflag)
612 SPACE *sp;
613 char *p;
614 size_t len;
615 enum e_spflag spflag;
616 {
617 size_t tlen;
618
619 /* Make sure SPACE has enough memory and ramp up quickly. */
620 tlen = sp->len + len + 1;
621 if (tlen > sp->blen) {
622 sp->blen = tlen + 1024;
623 sp->space = sp->back = xrealloc(sp->back, sp->blen);
624 }
625
626 if (spflag == REPLACE)
627 sp->len = 0;
628
629 memmove(sp->space + sp->len, p, len);
630
631 sp->space[sp->len += len] = '\0';
632 }
633
634 /*
635 * Close all cached opened files and report any errors
636 */
637 void
638 cfclose(cp, end)
639 struct s_command *cp, *end;
640 {
641
642 for (; cp != end; cp = cp->next)
643 switch(cp->code) {
644 case 's':
645 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
646 err(FATAL,
647 "%s: %s", cp->u.s->wfile, strerror(errno));
648 cp->u.s->wfd = -1;
649 break;
650 case 'w':
651 if (cp->u.fd != -1 && close(cp->u.fd))
652 err(FATAL, "%s: %s", cp->t, strerror(errno));
653 cp->u.fd = -1;
654 break;
655 case '{':
656 cfclose(cp->u.c, cp->next);
657 break;
658 }
659 }
660