process.c revision 1.28 1 /* $NetBSD: process.c,v 1.28 2001/09/28 21:10:34 atatat Exp $ */
2
3 /*-
4 * Copyright (c) 1992 Diomidis Spinellis.
5 * Copyright (c) 1992, 1993, 1994
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Diomidis Spinellis of Imperial College, University of London.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41 #ifndef lint
42 #if 0
43 static char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94";
44 #else
45 __RCSID("$NetBSD: process.c,v 1.28 2001/09/28 21:10:34 atatat Exp $");
46 #endif
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/stat.h>
51 #include <sys/ioctl.h>
52 #include <sys/uio.h>
53
54 #include <ctype.h>
55 #include <errno.h>
56 #include <fcntl.h>
57 #include <limits.h>
58 #include <regex.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62 #include <unistd.h>
63
64 #include "defs.h"
65 #include "extern.h"
66
67 static SPACE HS, PS, SS;
68 #define pd PS.deleted
69 #define ps PS.space
70 #define psl PS.len
71 #define hs HS.space
72 #define hsl HS.len
73
74 static inline int applies __P((struct s_command *));
75 static void flush_appends __P((void));
76 static void lputs __P((char *));
77 static inline int regexec_e __P((regex_t *, const char *, int, int, size_t));
78 static void regsub __P((SPACE *, char *, char *));
79 static int substitute __P((struct s_command *));
80
81 struct s_appends *appends; /* Array of pointers to strings to append. */
82 static int appendx; /* Index into appends array. */
83 int appendnum; /* Size of appends array. */
84
85 static int lastaddr; /* Set by applies if last address of a range. */
86 static int sdone; /* If any substitutes since last line input. */
87 /* Iov structure for 'w' commands. */
88 static regex_t *defpreg;
89 size_t maxnsub;
90 regmatch_t *match;
91
92 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
93
94 void
95 process()
96 {
97 struct s_command *cp;
98 SPACE tspace;
99 size_t len, oldpsl;
100 char *p;
101
102 oldpsl = 0;
103 for (linenum = 0; mf_fgets(&PS, REPLACE);) {
104 pd = 0;
105 top:
106 cp = prog;
107 redirect:
108 while (cp != NULL) {
109 if (!applies(cp)) {
110 cp = cp->next;
111 continue;
112 }
113 switch (cp->code) {
114 case '{':
115 cp = cp->u.c;
116 goto redirect;
117 case 'a':
118 if (appendx >= appendnum)
119 appends = xrealloc(appends,
120 sizeof(struct s_appends) *
121 (appendnum *= 2));
122 appends[appendx].type = AP_STRING;
123 appends[appendx].s = cp->t;
124 appends[appendx].len = strlen(cp->t);
125 appendx++;
126 break;
127 case 'b':
128 cp = cp->u.c;
129 goto redirect;
130 case 'c':
131 pd = 1;
132 psl = 0;
133 if (cp->a2 == NULL || lastaddr)
134 (void)printf("%s", cp->t);
135 break;
136 case 'd':
137 pd = 1;
138 goto new;
139 case 'D':
140 if (pd)
141 goto new;
142 if ((p = memchr(ps, '\n', psl - 1)) == NULL) {
143 pd = 1;
144 goto new;
145 } else {
146 psl -= (p + 1) - ps;
147 memmove(ps, p + 1, psl);
148 goto top;
149 }
150 case 'g':
151 cspace(&PS, hs, hsl, REPLACE);
152 break;
153 case 'G':
154 if (hs == NULL)
155 cspace(&HS, "\n", 1, REPLACE);
156 cspace(&PS, hs, hsl, 0);
157 break;
158 case 'h':
159 cspace(&HS, ps, psl, REPLACE);
160 break;
161 case 'H':
162 cspace(&HS, ps, psl, 0);
163 break;
164 case 'i':
165 (void)printf("%s", cp->t);
166 break;
167 case 'l':
168 lputs(ps);
169 break;
170 case 'n':
171 if (!nflag && !pd)
172 OUT(ps)
173 flush_appends();
174 if (!mf_fgets(&PS, REPLACE))
175 exit(0);
176 pd = 0;
177 break;
178 case 'N':
179 flush_appends();
180 if (!mf_fgets(&PS, 0)) {
181 if (!nflag && !pd)
182 OUT(ps)
183 exit(0);
184 }
185 break;
186 case 'p':
187 if (pd)
188 break;
189 OUT(ps)
190 break;
191 case 'P':
192 if (pd)
193 break;
194 if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
195 oldpsl = psl;
196 psl = (p + 1) - ps;
197 }
198 OUT(ps)
199 if (p != NULL)
200 psl = oldpsl;
201 break;
202 case 'q':
203 if (!nflag && !pd)
204 OUT(ps)
205 flush_appends();
206 exit(0);
207 case 'r':
208 if (appendx >= appendnum)
209 appends = xrealloc(appends,
210 sizeof(struct s_appends) *
211 (appendnum *= 2));
212 appends[appendx].type = AP_FILE;
213 appends[appendx].s = cp->t;
214 appends[appendx].len = strlen(cp->t);
215 appendx++;
216 break;
217 case 's':
218 sdone |= substitute(cp);
219 break;
220 case 't':
221 if (sdone) {
222 sdone = 0;
223 cp = cp->u.c;
224 goto redirect;
225 }
226 break;
227 case 'w':
228 if (pd)
229 break;
230 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
231 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
232 DEFFILEMODE)) == -1)
233 err(FATAL, "%s: %s\n",
234 cp->t, strerror(errno));
235 if (write(cp->u.fd, ps, psl) != psl)
236 err(FATAL, "%s: %s\n",
237 cp->t, strerror(errno));
238 break;
239 case 'x':
240 if (hs == NULL)
241 cspace(&HS, "\n", 1, REPLACE);
242 tspace = PS;
243 PS = HS;
244 HS = tspace;
245 break;
246 case 'y':
247 if (pd)
248 break;
249 for (p = ps, len = psl; --len; ++p)
250 *p = cp->u.y[(int)*p];
251 break;
252 case ':':
253 case '}':
254 break;
255 case '=':
256 (void)printf("%lu\n", linenum);
257 }
258 cp = cp->next;
259 } /* for all cp */
260
261 new: if (!nflag && !pd)
262 OUT(ps)
263 flush_appends();
264 } /* for all lines */
265 }
266
267 /*
268 * TRUE if the address passed matches the current program state
269 * (lastline, linenumber, ps).
270 */
271 #define MATCH(a) \
272 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
273 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
274
275 /*
276 * Return TRUE if the command applies to the current line. Sets the inrange
277 * flag to process ranges. Interprets the non-select (``!'') flag.
278 */
279 static inline int
280 applies(cp)
281 struct s_command *cp;
282 {
283 int r;
284
285 lastaddr = 0;
286 if (cp->a1 == NULL && cp->a2 == NULL)
287 r = 1;
288 else if (cp->a2) {
289 if (cp->inrange) {
290 if (MATCH(cp->a2)) {
291 cp->inrange = 0;
292 lastaddr = 1;
293 }
294 r = 1;
295 } else if (MATCH(cp->a1)) {
296 /*
297 * If the second address is a number less than or
298 * equal to the line number first selected, only
299 * one line shall be selected.
300 * -- POSIX 1003.2
301 */
302 if (cp->a2->type == AT_LINE &&
303 linenum >= cp->a2->u.l)
304 lastaddr = 1;
305 else
306 cp->inrange = 1;
307 r = 1;
308 } else
309 r = 0;
310 } else
311 r = MATCH(cp->a1);
312 return (cp->nonsel ? ! r : r);
313 }
314
315 /*
316 * substitute --
317 * Do substitutions in the pattern space. Currently, we build a
318 * copy of the new pattern space in the substitute space structure
319 * and then swap them.
320 */
321 static int
322 substitute(cp)
323 struct s_command *cp;
324 {
325 SPACE tspace;
326 regex_t *re;
327 size_t re_off, slen;
328 int lastempty, n;
329 char *s;
330
331 s = ps;
332 re = cp->u.s->re;
333 if (re == NULL) {
334 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
335 linenum = cp->u.s->linenum;
336 err(COMPILE, "\\%d not defined in the RE",
337 cp->u.s->maxbref);
338 }
339 }
340 if (!regexec_e(re, s, 0, 0, psl))
341 return (0);
342
343 SS.len = 0; /* Clean substitute space. */
344 slen = psl;
345 n = cp->u.s->n;
346 lastempty = 1;
347
348 switch (n) {
349 case 0: /* Global */
350 do {
351 if (lastempty || match[0].rm_so != match[0].rm_eo) {
352 /* Locate start of replaced string. */
353 re_off = match[0].rm_so;
354 /* Copy leading retained string. */
355 cspace(&SS, s, re_off, APPEND);
356 /* Add in regular expression. */
357 regsub(&SS, s, cp->u.s->new);
358 }
359
360 /* Move past this match. */
361 if (match[0].rm_so != match[0].rm_eo) {
362 s += match[0].rm_eo;
363 slen -= match[0].rm_eo;
364 lastempty = 0;
365 } else {
366 if (match[0].rm_so == 0)
367 cspace(&SS,
368 s, match[0].rm_so + 1, APPEND);
369 else
370 cspace(&SS,
371 s + match[0].rm_so, 1, APPEND);
372 s += match[0].rm_so + 1;
373 slen -= match[0].rm_so + 1;
374 lastempty = 1;
375 }
376 } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
377 /* Copy trailing retained string. */
378 if (slen > 0)
379 cspace(&SS, s, slen, APPEND);
380 break;
381 default: /* Nth occurrence */
382 while (--n) {
383 s += match[0].rm_eo;
384 slen -= match[0].rm_eo;
385 if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
386 return (0);
387 }
388 /* FALLTHROUGH */
389 case 1: /* 1st occurrence */
390 /* Locate start of replaced string. */
391 re_off = match[0].rm_so + (s - ps);
392 /* Copy leading retained string. */
393 cspace(&SS, ps, re_off, APPEND);
394 /* Add in regular expression. */
395 regsub(&SS, s, cp->u.s->new);
396 /* Copy trailing retained string. */
397 s += match[0].rm_eo;
398 slen -= match[0].rm_eo;
399 cspace(&SS, s, slen, APPEND);
400 break;
401 }
402
403 /*
404 * Swap the substitute space and the pattern space, and make sure
405 * that any leftover pointers into stdio memory get lost.
406 */
407 tspace = PS;
408 PS = SS;
409 SS = tspace;
410 SS.space = SS.back;
411
412 /* Handle the 'p' flag. */
413 if (cp->u.s->p)
414 OUT(ps)
415
416 /* Handle the 'w' flag. */
417 if (cp->u.s->wfile && !pd) {
418 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
419 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
420 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
421 if (write(cp->u.s->wfd, ps, psl) != psl)
422 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno));
423 }
424 return (1);
425 }
426
427 /*
428 * Flush append requests. Always called before reading a line,
429 * therefore it also resets the substitution done (sdone) flag.
430 */
431 static void
432 flush_appends()
433 {
434 FILE *f;
435 int count, i;
436 char buf[8 * 1024];
437
438 for (i = 0; i < appendx; i++)
439 switch (appends[i].type) {
440 case AP_STRING:
441 fwrite(appends[i].s, sizeof(char), appends[i].len,
442 stdout);
443 break;
444 case AP_FILE:
445 /*
446 * Read files probably shouldn't be cached. Since
447 * it's not an error to read a non-existent file,
448 * it's possible that another program is interacting
449 * with the sed script through the file system. It
450 * would be truly bizarre, but possible. It's probably
451 * not that big a performance win, anyhow.
452 */
453 if ((f = fopen(appends[i].s, "r")) == NULL)
454 break;
455 while ((count =
456 fread(buf, sizeof(char), sizeof(buf), f)) > 0)
457 (void)fwrite(buf, sizeof(char), count, stdout);
458 (void)fclose(f);
459 break;
460 }
461 if (ferror(stdout))
462 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
463 appendx = sdone = 0;
464 }
465
466 static void
467 lputs(s)
468 char *s;
469 {
470 int count;
471 char *escapes, *p;
472 struct winsize win;
473 static int termwidth = -1;
474
475 if (termwidth == -1) {
476 if ((p = getenv("COLUMNS")) != NULL)
477 termwidth = atoi(p);
478 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
479 win.ws_col > 0)
480 termwidth = win.ws_col;
481 else
482 termwidth = 60;
483 }
484 for (count = 0; *s; ++s) {
485 if (count >= termwidth) {
486 (void)printf("\\\n");
487 count = 0;
488 }
489 if (isascii((unsigned char)*s) && isprint((unsigned char)*s) &&
490 *s != '\\') {
491 (void)putchar(*s);
492 count++;
493 } else {
494 escapes = "\\\a\b\f\n\r\t\v";
495 (void)putchar('\\');
496 if ((p = strchr(escapes, *s)) != NULL) {
497 (void)putchar("\\abfnrtv"[p - escapes]);
498 count += 2;
499 } else {
500 (void)printf("%03o", *(u_char *)s);
501 count += 4;
502 }
503 }
504 }
505 (void)putchar('$');
506 (void)putchar('\n');
507 if (ferror(stdout))
508 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
509 }
510
511 static inline int
512 regexec_e(preg, string, eflags, nomatch, slen)
513 regex_t *preg;
514 const char *string;
515 int eflags, nomatch;
516 size_t slen;
517 {
518 int eval;
519 #ifndef REG_STARTEND
520 char *buf;
521 #endif
522
523 if (preg == NULL) {
524 if (defpreg == NULL)
525 err(FATAL, "first RE may not be empty");
526 } else
527 defpreg = preg;
528
529 /* Set anchors, discounting trailing newline (if any). */
530 if (slen > 0 && string[slen - 1] == '\n')
531 slen--;
532
533 #ifndef REG_STARTEND
534 if ((buf = malloc(slen + 1)) == NULL)
535 err(1, NULL);
536 (void)memcpy(buf, string, slen);
537 buf[slen] = '\0';
538 eval = regexec(defpreg, buf,
539 nomatch ? 0 : maxnsub + 1, match, eflags);
540 free(buf);
541 #else
542 match[0].rm_so = 0;
543 match[0].rm_eo = slen;
544 eval = regexec(defpreg, string,
545 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
546 #endif
547 switch(eval) {
548 case 0:
549 return (1);
550 case REG_NOMATCH:
551 return (0);
552 }
553 err(FATAL, "RE error: %s", strregerror(eval, defpreg));
554 /* NOTREACHED */
555 return (0);
556 }
557
558 /*
559 * regsub - perform substitutions after a regexp match
560 * Based on a routine by Henry Spencer
561 */
562 static void
563 regsub(sp, string, src)
564 SPACE *sp;
565 char *string, *src;
566 {
567 int len, no;
568 char c, *dst;
569
570 #define NEEDSP(reqlen) \
571 if (sp->len >= sp->blen - (reqlen) - 1) { \
572 sp->blen += (reqlen) + 1024; \
573 sp->space = sp->back = xrealloc(sp->back, sp->blen); \
574 dst = sp->space + sp->len; \
575 }
576
577 dst = sp->space + sp->len;
578 while ((c = *src++) != '\0') {
579 if (c == '&')
580 no = 0;
581 else if (c == '\\' && isdigit((unsigned char)*src))
582 no = *src++ - '0';
583 else
584 no = -1;
585 if (no < 0) { /* Ordinary character. */
586 if (c == '\\' && (*src == '\\' || *src == '&'))
587 c = *src++;
588 NEEDSP(1);
589 *dst++ = c;
590 ++sp->len;
591 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
592 len = match[no].rm_eo - match[no].rm_so;
593 NEEDSP(len);
594 memmove(dst, string + match[no].rm_so, len);
595 dst += len;
596 sp->len += len;
597 }
598 }
599 NEEDSP(1);
600 *dst = '\0';
601 }
602
603 /*
604 * aspace --
605 * Append the source space to the destination space, allocating new
606 * space as necessary.
607 */
608 void
609 cspace(sp, p, len, spflag)
610 SPACE *sp;
611 char *p;
612 size_t len;
613 enum e_spflag spflag;
614 {
615 size_t tlen;
616
617 /* Make sure SPACE has enough memory and ramp up quickly. */
618 tlen = sp->len + len + 1;
619 if (tlen > sp->blen) {
620 sp->blen = tlen + 1024;
621 sp->space = sp->back = xrealloc(sp->back, sp->blen);
622 }
623
624 if (spflag == REPLACE)
625 sp->len = 0;
626
627 memmove(sp->space + sp->len, p, len);
628
629 sp->space[sp->len += len] = '\0';
630 }
631
632 /*
633 * Close all cached opened files and report any errors
634 */
635 void
636 cfclose(cp, end)
637 struct s_command *cp, *end;
638 {
639
640 for (; cp != end; cp = cp->next)
641 switch(cp->code) {
642 case 's':
643 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
644 err(FATAL,
645 "%s: %s", cp->u.s->wfile, strerror(errno));
646 cp->u.s->wfd = -1;
647 break;
648 case 'w':
649 if (cp->u.fd != -1 && close(cp->u.fd))
650 err(FATAL, "%s: %s", cp->t, strerror(errno));
651 cp->u.fd = -1;
652 break;
653 case '{':
654 cfclose(cp->u.c, cp->next);
655 break;
656 }
657 }
658