process.c revision 1.32 1 /* $NetBSD: process.c,v 1.32 2002/06/14 00:41:42 wiz Exp $ */
2
3 /*-
4 * Copyright (c) 1992 Diomidis Spinellis.
5 * Copyright (c) 1992, 1993, 1994
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Diomidis Spinellis of Imperial College, University of London.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41 #ifndef lint
42 #if 0
43 static char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94";
44 #else
45 __RCSID("$NetBSD: process.c,v 1.32 2002/06/14 00:41:42 wiz Exp $");
46 #endif
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/stat.h>
51 #include <sys/ioctl.h>
52 #include <sys/uio.h>
53
54 #include <ctype.h>
55 #include <errno.h>
56 #include <fcntl.h>
57 #include <limits.h>
58 #include <regex.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62 #include <unistd.h>
63
64 #include "defs.h"
65 #include "extern.h"
66
67 static SPACE HS, PS, SS;
68 #define pd PS.deleted
69 #define ps PS.space
70 #define psl PS.len
71 #define hs HS.space
72 #define hsl HS.len
73
74 static inline int applies(struct s_command *);
75 static void flush_appends(void);
76 static void lputs(char *);
77 static inline int regexec_e(regex_t *, const char *, int, int, size_t);
78 static void regsub(SPACE *, char *, char *);
79 static int substitute(struct s_command *);
80
81 struct s_appends *appends; /* Array of pointers to strings to append. */
82 static int appendx; /* Index into appends array. */
83 int appendnum; /* Size of appends array. */
84
85 static int lastaddr; /* Set by applies if last address of a range. */
86 static int sdone; /* If any substitutes since last line input. */
87 /* Iov structure for 'w' commands. */
88 static regex_t *defpreg;
89 size_t maxnsub;
90 regmatch_t *match;
91
92 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
93
94 void
95 process(void)
96 {
97 struct s_command *cp;
98 SPACE tspace;
99 size_t len, oldpsl;
100 char *p;
101
102 oldpsl = 0;
103 for (linenum = 0; mf_fgets(&PS, REPLACE);) {
104 pd = 0;
105 top:
106 cp = prog;
107 redirect:
108 while (cp != NULL) {
109 if (!applies(cp)) {
110 cp = cp->next;
111 continue;
112 }
113 switch (cp->code) {
114 case '{':
115 cp = cp->u.c;
116 goto redirect;
117 case 'a':
118 if (appendx >= appendnum)
119 appends = xrealloc(appends,
120 sizeof(struct s_appends) *
121 (appendnum *= 2));
122 appends[appendx].type = AP_STRING;
123 appends[appendx].s = cp->t;
124 appends[appendx].len = strlen(cp->t);
125 appendx++;
126 break;
127 case 'b':
128 cp = cp->u.c;
129 goto redirect;
130 case 'c':
131 pd = 1;
132 psl = 0;
133 if (cp->a2 == NULL || lastaddr)
134 (void)printf("%s", cp->t);
135 break;
136 case 'd':
137 pd = 1;
138 goto new;
139 case 'D':
140 if (psl == 0)
141 pd = 1;
142 if (pd)
143 goto new;
144 if ((p = memchr(ps, '\n', psl - 1)) == NULL) {
145 pd = 1;
146 goto new;
147 } else {
148 psl -= (p + 1) - ps;
149 memmove(ps, p + 1, psl);
150 goto top;
151 }
152 case 'g':
153 cspace(&PS, hs, hsl, REPLACE);
154 break;
155 case 'G':
156 if (hs == NULL)
157 cspace(&HS, "\n", 1, REPLACE);
158 cspace(&PS, hs, hsl, 0);
159 break;
160 case 'h':
161 cspace(&HS, ps, psl, REPLACE);
162 break;
163 case 'H':
164 cspace(&HS, ps, psl, 0);
165 break;
166 case 'i':
167 (void)printf("%s", cp->t);
168 break;
169 case 'l':
170 lputs(ps);
171 break;
172 case 'n':
173 if (!nflag && !pd)
174 OUT(ps)
175 flush_appends();
176 if (!mf_fgets(&PS, REPLACE))
177 exit(0);
178 pd = 0;
179 break;
180 case 'N':
181 flush_appends();
182 if (!mf_fgets(&PS, 0)) {
183 if (!nflag && !pd)
184 OUT(ps)
185 exit(0);
186 }
187 break;
188 case 'p':
189 if (pd)
190 break;
191 OUT(ps)
192 break;
193 case 'P':
194 if (pd)
195 break;
196 if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
197 oldpsl = psl;
198 psl = (p + 1) - ps;
199 }
200 OUT(ps)
201 if (p != NULL)
202 psl = oldpsl;
203 break;
204 case 'q':
205 if (!nflag && !pd)
206 OUT(ps)
207 flush_appends();
208 exit(0);
209 case 'r':
210 if (appendx >= appendnum)
211 appends = xrealloc(appends,
212 sizeof(struct s_appends) *
213 (appendnum *= 2));
214 appends[appendx].type = AP_FILE;
215 appends[appendx].s = cp->t;
216 appends[appendx].len = strlen(cp->t);
217 appendx++;
218 break;
219 case 's':
220 sdone |= substitute(cp);
221 break;
222 case 't':
223 if (sdone) {
224 sdone = 0;
225 cp = cp->u.c;
226 goto redirect;
227 }
228 break;
229 case 'w':
230 if (pd)
231 break;
232 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
233 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
234 DEFFILEMODE)) == -1)
235 err(FATAL, "%s: %s",
236 cp->t, strerror(errno));
237 if (write(cp->u.fd, ps, psl) != psl)
238 err(FATAL, "%s: %s",
239 cp->t, strerror(errno));
240 break;
241 case 'x':
242 if (hs == NULL)
243 cspace(&HS, "\n", 1, REPLACE);
244 tspace = PS;
245 PS = HS;
246 HS = tspace;
247 break;
248 case 'y':
249 if (pd)
250 break;
251 for (p = ps, len = psl; --len; ++p)
252 *p = cp->u.y[(int)*p];
253 break;
254 case ':':
255 case '}':
256 break;
257 case '=':
258 (void)printf("%lu\n", linenum);
259 }
260 cp = cp->next;
261 } /* for all cp */
262
263 new: if (!nflag && !pd)
264 OUT(ps)
265 flush_appends();
266 } /* for all lines */
267 }
268
269 /*
270 * TRUE if the address passed matches the current program state
271 * (lastline, linenumber, ps).
272 */
273 #define MATCH(a) \
274 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
275 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
276
277 /*
278 * Return TRUE if the command applies to the current line. Sets the inrange
279 * flag to process ranges. Interprets the non-select (``!'') flag.
280 */
281 static inline int
282 applies(struct s_command *cp)
283 {
284 int r;
285
286 lastaddr = 0;
287 if (cp->a1 == NULL && cp->a2 == NULL)
288 r = 1;
289 else if (cp->a2) {
290 if (cp->inrange) {
291 if (MATCH(cp->a2)) {
292 cp->inrange = 0;
293 lastaddr = 1;
294 }
295 r = 1;
296 } else if (MATCH(cp->a1)) {
297 /*
298 * If the second address is a number less than or
299 * equal to the line number first selected, only
300 * one line shall be selected.
301 * -- POSIX 1003.2
302 */
303 if (cp->a2->type == AT_LINE &&
304 linenum >= cp->a2->u.l)
305 lastaddr = 1;
306 else
307 cp->inrange = 1;
308 r = 1;
309 } else
310 r = 0;
311 } else
312 r = MATCH(cp->a1);
313 return (cp->nonsel ? ! r : r);
314 }
315
316 /*
317 * substitute --
318 * Do substitutions in the pattern space. Currently, we build a
319 * copy of the new pattern space in the substitute space structure
320 * and then swap them.
321 */
322 static int
323 substitute(struct s_command *cp)
324 {
325 SPACE tspace;
326 regex_t *re;
327 size_t re_off, slen;
328 int lastempty, n;
329 char *s;
330
331 s = ps;
332 re = cp->u.s->re;
333 if (re == NULL) {
334 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
335 linenum = cp->u.s->linenum;
336 err(COMPILE, "\\%d not defined in the RE",
337 cp->u.s->maxbref);
338 }
339 }
340 if (!regexec_e(re, s, 0, 0, psl))
341 return (0);
342
343 SS.len = 0; /* Clean substitute space. */
344 slen = psl;
345 n = cp->u.s->n;
346 lastempty = 1;
347
348 switch (n) {
349 case 0: /* Global */
350 do {
351 if (lastempty || match[0].rm_so != match[0].rm_eo) {
352 /* Locate start of replaced string. */
353 re_off = match[0].rm_so;
354 /* Copy leading retained string. */
355 cspace(&SS, s, re_off, APPEND);
356 /* Add in regular expression. */
357 regsub(&SS, s, cp->u.s->new);
358 }
359
360 /* Move past this match. */
361 if (match[0].rm_so != match[0].rm_eo) {
362 s += match[0].rm_eo;
363 slen -= match[0].rm_eo;
364 lastempty = 0;
365 } else {
366 if (match[0].rm_so == 0)
367 cspace(&SS,
368 s, match[0].rm_so + 1, APPEND);
369 else
370 cspace(&SS,
371 s + match[0].rm_so, 1, APPEND);
372 s += match[0].rm_so + 1;
373 slen -= match[0].rm_so + 1;
374 lastempty = 1;
375 }
376 } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
377 /* Copy trailing retained string. */
378 if (slen > 0)
379 cspace(&SS, s, slen, APPEND);
380 break;
381 default: /* Nth occurrence */
382 while (--n) {
383 s += match[0].rm_eo;
384 slen -= match[0].rm_eo;
385 if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
386 return (0);
387 }
388 /* FALLTHROUGH */
389 case 1: /* 1st occurrence */
390 /* Locate start of replaced string. */
391 re_off = match[0].rm_so + (s - ps);
392 /* Copy leading retained string. */
393 cspace(&SS, ps, re_off, APPEND);
394 /* Add in regular expression. */
395 regsub(&SS, s, cp->u.s->new);
396 /* Copy trailing retained string. */
397 s += match[0].rm_eo;
398 slen -= match[0].rm_eo;
399 cspace(&SS, s, slen, APPEND);
400 break;
401 }
402
403 /*
404 * Swap the substitute space and the pattern space, and make sure
405 * that any leftover pointers into stdio memory get lost.
406 */
407 tspace = PS;
408 PS = SS;
409 SS = tspace;
410 SS.space = SS.back;
411
412 /* Handle the 'p' flag. */
413 if (cp->u.s->p)
414 OUT(ps)
415
416 /* Handle the 'w' flag. */
417 if (cp->u.s->wfile && !pd) {
418 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
419 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
420 err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
421 if (write(cp->u.s->wfd, ps, psl) != psl)
422 err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
423 }
424 return (1);
425 }
426
427 /*
428 * Flush append requests. Always called before reading a line,
429 * therefore it also resets the substitution done (sdone) flag.
430 */
431 static void
432 flush_appends(void)
433 {
434 FILE *f;
435 int count, i;
436 char buf[8 * 1024];
437
438 for (i = 0; i < appendx; i++)
439 switch (appends[i].type) {
440 case AP_STRING:
441 fwrite(appends[i].s, sizeof(char), appends[i].len,
442 stdout);
443 break;
444 case AP_FILE:
445 /*
446 * Read files probably shouldn't be cached. Since
447 * it's not an error to read a non-existent file,
448 * it's possible that another program is interacting
449 * with the sed script through the file system. It
450 * would be truly bizarre, but possible. It's probably
451 * not that big a performance win, anyhow.
452 */
453 if ((f = fopen(appends[i].s, "r")) == NULL)
454 break;
455 while ((count =
456 fread(buf, sizeof(char), sizeof(buf), f)) > 0)
457 (void)fwrite(buf, sizeof(char), count, stdout);
458 (void)fclose(f);
459 break;
460 }
461 if (ferror(stdout))
462 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
463 appendx = sdone = 0;
464 }
465
466 static void
467 lputs(char *s)
468 {
469 int count;
470 char *escapes, *p;
471 struct winsize win;
472 static int termwidth = -1;
473
474 if (termwidth == -1) {
475 if ((p = getenv("COLUMNS")) != NULL)
476 termwidth = atoi(p);
477 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
478 win.ws_col > 0)
479 termwidth = win.ws_col;
480 else
481 termwidth = 60;
482 }
483 for (count = 0; *s; ++s) {
484 if (count >= termwidth) {
485 (void)printf("\\\n");
486 count = 0;
487 }
488 if (isascii((unsigned char)*s) && isprint((unsigned char)*s) &&
489 *s != '\\') {
490 (void)putchar(*s);
491 count++;
492 } else {
493 escapes = "\\\a\b\f\n\r\t\v";
494 (void)putchar('\\');
495 if ((p = strchr(escapes, *s)) != NULL) {
496 (void)putchar("\\abfnrtv"[p - escapes]);
497 count += 2;
498 } else {
499 (void)printf("%03o", *(u_char *)s);
500 count += 4;
501 }
502 }
503 }
504 (void)putchar('$');
505 (void)putchar('\n');
506 if (ferror(stdout))
507 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
508 }
509
510 static inline int
511 regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, size_t slen)
512 {
513 int eval;
514 #ifndef REG_STARTEND
515 char *buf;
516 #endif
517
518 if (preg == NULL) {
519 if (defpreg == NULL)
520 err(FATAL, "first RE may not be empty");
521 } else
522 defpreg = preg;
523
524 /* Set anchors, discounting trailing newline (if any). */
525 if (slen > 0 && string[slen - 1] == '\n')
526 slen--;
527
528 #ifndef REG_STARTEND
529 if ((buf = malloc(slen + 1)) == NULL)
530 err(1, NULL);
531 (void)memcpy(buf, string, slen);
532 buf[slen] = '\0';
533 eval = regexec(defpreg, buf,
534 nomatch ? 0 : maxnsub + 1, match, eflags);
535 free(buf);
536 #else
537 match[0].rm_so = 0;
538 match[0].rm_eo = slen;
539 eval = regexec(defpreg, string,
540 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
541 #endif
542 switch(eval) {
543 case 0:
544 return (1);
545 case REG_NOMATCH:
546 return (0);
547 }
548 err(FATAL, "RE error: %s", strregerror(eval, defpreg));
549 /* NOTREACHED */
550 return (0);
551 }
552
553 /*
554 * regsub - perform substitutions after a regexp match
555 * Based on a routine by Henry Spencer
556 */
557 static void
558 regsub(SPACE *sp, char *string, char *src)
559 {
560 int len, no;
561 char c, *dst;
562
563 #define NEEDSP(reqlen) \
564 if (sp->len >= sp->blen - (reqlen) - 1) { \
565 sp->blen += (reqlen) + 1024; \
566 sp->space = sp->back = xrealloc(sp->back, sp->blen); \
567 dst = sp->space + sp->len; \
568 }
569
570 dst = sp->space + sp->len;
571 while ((c = *src++) != '\0') {
572 if (c == '&')
573 no = 0;
574 else if (c == '\\' && isdigit((unsigned char)*src))
575 no = *src++ - '0';
576 else
577 no = -1;
578 if (no < 0) { /* Ordinary character. */
579 if (c == '\\' && (*src == '\\' || *src == '&'))
580 c = *src++;
581 NEEDSP(1);
582 *dst++ = c;
583 ++sp->len;
584 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
585 len = match[no].rm_eo - match[no].rm_so;
586 NEEDSP(len);
587 memmove(dst, string + match[no].rm_so, len);
588 dst += len;
589 sp->len += len;
590 }
591 }
592 NEEDSP(1);
593 *dst = '\0';
594 }
595
596 /*
597 * aspace --
598 * Append the source space to the destination space, allocating new
599 * space as necessary.
600 */
601 void
602 cspace(SPACE *sp, char *p, size_t len, enum e_spflag spflag)
603 {
604 size_t tlen;
605
606 /* Make sure SPACE has enough memory and ramp up quickly. */
607 tlen = sp->len + len + 1;
608 if (tlen > sp->blen) {
609 sp->blen = tlen + 1024;
610 sp->space = sp->back = xrealloc(sp->back, sp->blen);
611 }
612
613 if (spflag == REPLACE)
614 sp->len = 0;
615
616 memmove(sp->space + sp->len, p, len);
617
618 sp->space[sp->len += len] = '\0';
619 }
620
621 /*
622 * Close all cached opened files and report any errors
623 */
624 void
625 cfclose(struct s_command *cp, struct s_command *end)
626 {
627
628 for (; cp != end; cp = cp->next)
629 switch(cp->code) {
630 case 's':
631 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
632 err(FATAL,
633 "%s: %s", cp->u.s->wfile, strerror(errno));
634 cp->u.s->wfd = -1;
635 break;
636 case 'w':
637 if (cp->u.fd != -1 && close(cp->u.fd))
638 err(FATAL, "%s: %s", cp->t, strerror(errno));
639 cp->u.fd = -1;
640 break;
641 case '{':
642 cfclose(cp->u.c, cp->next);
643 break;
644 }
645 }
646