process.c revision 1.1.1.2 1 1.1 alm /*-
2 1.1 alm * Copyright (c) 1992 Diomidis Spinellis.
3 1.1.1.1 mrg * Copyright (c) 1992, 1993, 1994
4 1.1.1.1 mrg * The Regents of the University of California. All rights reserved.
5 1.1 alm *
6 1.1 alm * This code is derived from software contributed to Berkeley by
7 1.1 alm * Diomidis Spinellis of Imperial College, University of London.
8 1.1 alm *
9 1.1 alm * Redistribution and use in source and binary forms, with or without
10 1.1 alm * modification, are permitted provided that the following conditions
11 1.1 alm * are met:
12 1.1 alm * 1. Redistributions of source code must retain the above copyright
13 1.1 alm * notice, this list of conditions and the following disclaimer.
14 1.1 alm * 2. Redistributions in binary form must reproduce the above copyright
15 1.1 alm * notice, this list of conditions and the following disclaimer in the
16 1.1 alm * documentation and/or other materials provided with the distribution.
17 1.1 alm * 4. Neither the name of the University nor the names of its contributors
18 1.1 alm * may be used to endorse or promote products derived from this software
19 1.1 alm * without specific prior written permission.
20 1.1 alm *
21 1.1 alm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 1.1 alm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 1.1 alm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 1.1 alm * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 1.1 alm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 1.1 alm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 1.1 alm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 1.1 alm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 1.1 alm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 1.1 alm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 1.1 alm * SUCH DAMAGE.
32 1.1 alm */
33 1.1 alm
34 1.1.1.2 christos #include <sys/cdefs.h>
35 1.1.1.2 christos __FBSDID("$FreeBSD: head/usr.bin/sed/process.c 192732 2009-05-25 06:45:33Z brian $");
36 1.1.1.2 christos
37 1.1 alm #ifndef lint
38 1.1.1.2 christos static const char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94";
39 1.1.1.2 christos #endif
40 1.1 alm
41 1.1 alm #include <sys/types.h>
42 1.1 alm #include <sys/stat.h>
43 1.1 alm #include <sys/ioctl.h>
44 1.1 alm #include <sys/uio.h>
45 1.1 alm
46 1.1 alm #include <ctype.h>
47 1.1.1.2 christos #include <err.h>
48 1.1 alm #include <errno.h>
49 1.1 alm #include <fcntl.h>
50 1.1 alm #include <limits.h>
51 1.1 alm #include <regex.h>
52 1.1 alm #include <stdio.h>
53 1.1 alm #include <stdlib.h>
54 1.1 alm #include <string.h>
55 1.1 alm #include <unistd.h>
56 1.1.1.2 christos #include <wchar.h>
57 1.1.1.2 christos #include <wctype.h>
58 1.1 alm
59 1.1 alm #include "defs.h"
60 1.1 alm #include "extern.h"
61 1.1 alm
62 1.1.1.2 christos static SPACE HS, PS, SS, YS;
63 1.1 alm #define pd PS.deleted
64 1.1 alm #define ps PS.space
65 1.1 alm #define psl PS.len
66 1.1 alm #define hs HS.space
67 1.1 alm #define hsl HS.len
68 1.1 alm
69 1.1.1.2 christos static __inline int applies(struct s_command *);
70 1.1.1.2 christos static void do_tr(struct s_tr *);
71 1.1.1.2 christos static void flush_appends(void);
72 1.1.1.2 christos static void lputs(char *, size_t);
73 1.1.1.2 christos static __inline int regexec_e(regex_t *, const char *, int, int, size_t);
74 1.1.1.2 christos static void regsub(SPACE *, char *, char *);
75 1.1.1.2 christos static int substitute(struct s_command *);
76 1.1 alm
77 1.1 alm struct s_appends *appends; /* Array of pointers to strings to append. */
78 1.1 alm static int appendx; /* Index into appends array. */
79 1.1 alm int appendnum; /* Size of appends array. */
80 1.1 alm
81 1.1 alm static int lastaddr; /* Set by applies if last address of a range. */
82 1.1 alm static int sdone; /* If any substitutes since last line input. */
83 1.1 alm /* Iov structure for 'w' commands. */
84 1.1 alm static regex_t *defpreg;
85 1.1 alm size_t maxnsub;
86 1.1 alm regmatch_t *match;
87 1.1 alm
88 1.1.1.2 christos #define OUT() do {fwrite(ps, 1, psl, outfile); fputc('\n', outfile);} while (0)
89 1.1.1.1 mrg
90 1.1 alm void
91 1.1.1.2 christos process(void)
92 1.1 alm {
93 1.1 alm struct s_command *cp;
94 1.1 alm SPACE tspace;
95 1.1.1.2 christos size_t oldpsl = 0;
96 1.1.1.2 christos char *p;
97 1.1.1.2 christos
98 1.1.1.2 christos p = NULL;
99 1.1 alm
100 1.1 alm for (linenum = 0; mf_fgets(&PS, REPLACE);) {
101 1.1 alm pd = 0;
102 1.1.1.2 christos top:
103 1.1 alm cp = prog;
104 1.1 alm redirect:
105 1.1 alm while (cp != NULL) {
106 1.1 alm if (!applies(cp)) {
107 1.1 alm cp = cp->next;
108 1.1 alm continue;
109 1.1 alm }
110 1.1 alm switch (cp->code) {
111 1.1 alm case '{':
112 1.1 alm cp = cp->u.c;
113 1.1 alm goto redirect;
114 1.1 alm case 'a':
115 1.1 alm if (appendx >= appendnum)
116 1.1.1.2 christos if ((appends = realloc(appends,
117 1.1 alm sizeof(struct s_appends) *
118 1.1.1.2 christos (appendnum *= 2))) == NULL)
119 1.1.1.2 christos err(1, "realloc");
120 1.1 alm appends[appendx].type = AP_STRING;
121 1.1 alm appends[appendx].s = cp->t;
122 1.1.1.1 mrg appends[appendx].len = strlen(cp->t);
123 1.1 alm appendx++;
124 1.1 alm break;
125 1.1 alm case 'b':
126 1.1 alm cp = cp->u.c;
127 1.1 alm goto redirect;
128 1.1 alm case 'c':
129 1.1 alm pd = 1;
130 1.1 alm psl = 0;
131 1.1.1.2 christos if (cp->a2 == NULL || lastaddr || lastline())
132 1.1.1.2 christos (void)fprintf(outfile, "%s", cp->t);
133 1.1 alm break;
134 1.1 alm case 'd':
135 1.1 alm pd = 1;
136 1.1 alm goto new;
137 1.1 alm case 'D':
138 1.1 alm if (pd)
139 1.1 alm goto new;
140 1.1.1.2 christos if (psl == 0 ||
141 1.1.1.2 christos (p = memchr(ps, '\n', psl)) == NULL) {
142 1.1 alm pd = 1;
143 1.1.1.2 christos goto new;
144 1.1.1.2 christos } else {
145 1.1.1.2 christos psl -= (p + 1) - ps;
146 1.1 alm memmove(ps, p + 1, psl);
147 1.1.1.2 christos goto top;
148 1.1 alm }
149 1.1 alm case 'g':
150 1.1 alm cspace(&PS, hs, hsl, REPLACE);
151 1.1 alm break;
152 1.1 alm case 'G':
153 1.1.1.2 christos cspace(&PS, "\n", 1, APPEND);
154 1.1.1.2 christos cspace(&PS, hs, hsl, APPEND);
155 1.1 alm break;
156 1.1 alm case 'h':
157 1.1 alm cspace(&HS, ps, psl, REPLACE);
158 1.1 alm break;
159 1.1 alm case 'H':
160 1.1.1.2 christos cspace(&HS, "\n", 1, APPEND);
161 1.1.1.2 christos cspace(&HS, ps, psl, APPEND);
162 1.1 alm break;
163 1.1 alm case 'i':
164 1.1.1.2 christos (void)fprintf(outfile, "%s", cp->t);
165 1.1 alm break;
166 1.1 alm case 'l':
167 1.1.1.2 christos lputs(ps, psl);
168 1.1 alm break;
169 1.1 alm case 'n':
170 1.1 alm if (!nflag && !pd)
171 1.1.1.2 christos OUT();
172 1.1 alm flush_appends();
173 1.1.1.1 mrg if (!mf_fgets(&PS, REPLACE))
174 1.1 alm exit(0);
175 1.1 alm pd = 0;
176 1.1 alm break;
177 1.1 alm case 'N':
178 1.1 alm flush_appends();
179 1.1.1.2 christos cspace(&PS, "\n", 1, APPEND);
180 1.1.1.2 christos if (!mf_fgets(&PS, APPEND))
181 1.1 alm exit(0);
182 1.1 alm break;
183 1.1 alm case 'p':
184 1.1 alm if (pd)
185 1.1 alm break;
186 1.1.1.2 christos OUT();
187 1.1 alm break;
188 1.1 alm case 'P':
189 1.1 alm if (pd)
190 1.1 alm break;
191 1.1.1.1 mrg if ((p = memchr(ps, '\n', psl)) != NULL) {
192 1.1.1.2 christos oldpsl = psl;
193 1.1.1.2 christos psl = p - ps;
194 1.1 alm }
195 1.1.1.2 christos OUT();
196 1.1 alm if (p != NULL)
197 1.1.1.2 christos psl = oldpsl;
198 1.1 alm break;
199 1.1 alm case 'q':
200 1.1 alm if (!nflag && !pd)
201 1.1.1.2 christos OUT();
202 1.1 alm flush_appends();
203 1.1 alm exit(0);
204 1.1 alm case 'r':
205 1.1 alm if (appendx >= appendnum)
206 1.1.1.2 christos if ((appends = realloc(appends,
207 1.1 alm sizeof(struct s_appends) *
208 1.1.1.2 christos (appendnum *= 2))) == NULL)
209 1.1.1.2 christos err(1, "realloc");
210 1.1 alm appends[appendx].type = AP_FILE;
211 1.1 alm appends[appendx].s = cp->t;
212 1.1.1.1 mrg appends[appendx].len = strlen(cp->t);
213 1.1 alm appendx++;
214 1.1 alm break;
215 1.1 alm case 's':
216 1.1 alm sdone |= substitute(cp);
217 1.1 alm break;
218 1.1 alm case 't':
219 1.1 alm if (sdone) {
220 1.1 alm sdone = 0;
221 1.1 alm cp = cp->u.c;
222 1.1 alm goto redirect;
223 1.1 alm }
224 1.1 alm break;
225 1.1 alm case 'w':
226 1.1 alm if (pd)
227 1.1 alm break;
228 1.1 alm if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
229 1.1 alm O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
230 1.1 alm DEFFILEMODE)) == -1)
231 1.1.1.2 christos err(1, "%s", cp->t);
232 1.1.1.2 christos if (write(cp->u.fd, ps, psl) != (ssize_t)psl ||
233 1.1.1.2 christos write(cp->u.fd, "\n", 1) != 1)
234 1.1.1.2 christos err(1, "%s", cp->t);
235 1.1 alm break;
236 1.1 alm case 'x':
237 1.1.1.2 christos /*
238 1.1.1.2 christos * If the hold space is null, make it empty
239 1.1.1.2 christos * but not null. Otherwise the pattern space
240 1.1.1.2 christos * will become null after the swap, which is
241 1.1.1.2 christos * an abnormal condition.
242 1.1.1.2 christos */
243 1.1.1.1 mrg if (hs == NULL)
244 1.1.1.1 mrg cspace(&HS, "", 0, REPLACE);
245 1.1 alm tspace = PS;
246 1.1 alm PS = HS;
247 1.1 alm HS = tspace;
248 1.1 alm break;
249 1.1 alm case 'y':
250 1.1.1.2 christos if (pd || psl == 0)
251 1.1 alm break;
252 1.1.1.2 christos do_tr(cp->u.y);
253 1.1 alm break;
254 1.1 alm case ':':
255 1.1 alm case '}':
256 1.1 alm break;
257 1.1 alm case '=':
258 1.1.1.2 christos (void)fprintf(outfile, "%lu\n", linenum);
259 1.1 alm }
260 1.1 alm cp = cp->next;
261 1.1 alm } /* for all cp */
262 1.1 alm
263 1.1 alm new: if (!nflag && !pd)
264 1.1.1.2 christos OUT();
265 1.1 alm flush_appends();
266 1.1 alm } /* for all lines */
267 1.1 alm }
268 1.1 alm
269 1.1 alm /*
270 1.1 alm * TRUE if the address passed matches the current program state
271 1.1 alm * (lastline, linenumber, ps).
272 1.1 alm */
273 1.1.1.2 christos #define MATCH(a) \
274 1.1.1.2 christos ((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
275 1.1.1.2 christos (a)->type == AT_LINE ? linenum == (a)->u.l : lastline())
276 1.1 alm
277 1.1 alm /*
278 1.1.1.2 christos * Return TRUE if the command applies to the current line. Sets the start
279 1.1.1.2 christos * line for process ranges. Interprets the non-select (``!'') flag.
280 1.1 alm */
281 1.1.1.2 christos static __inline int
282 1.1.1.2 christos applies(struct s_command *cp)
283 1.1 alm {
284 1.1 alm int r;
285 1.1 alm
286 1.1 alm lastaddr = 0;
287 1.1 alm if (cp->a1 == NULL && cp->a2 == NULL)
288 1.1 alm r = 1;
289 1.1 alm else if (cp->a2)
290 1.1.1.2 christos if (cp->startline > 0) {
291 1.1 alm if (MATCH(cp->a2)) {
292 1.1.1.2 christos cp->startline = 0;
293 1.1 alm lastaddr = 1;
294 1.1.1.2 christos r = 1;
295 1.1.1.2 christos } else if (linenum - cp->startline <= cp->a2->u.l)
296 1.1.1.2 christos r = 1;
297 1.1.1.2 christos else if ((cp->a2->type == AT_LINE &&
298 1.1.1.2 christos linenum > cp->a2->u.l) ||
299 1.1.1.2 christos (cp->a2->type == AT_RELLINE &&
300 1.1.1.2 christos linenum - cp->startline > cp->a2->u.l)) {
301 1.1.1.2 christos /*
302 1.1.1.2 christos * We missed the 2nd address due to a branch,
303 1.1.1.2 christos * so just close the range and return false.
304 1.1.1.2 christos */
305 1.1.1.2 christos cp->startline = 0;
306 1.1.1.2 christos r = 0;
307 1.1.1.2 christos } else
308 1.1.1.2 christos r = 1;
309 1.1 alm } else if (MATCH(cp->a1)) {
310 1.1 alm /*
311 1.1 alm * If the second address is a number less than or
312 1.1 alm * equal to the line number first selected, only
313 1.1 alm * one line shall be selected.
314 1.1 alm * -- POSIX 1003.2
315 1.1.1.2 christos * Likewise if the relative second line address is zero.
316 1.1 alm */
317 1.1.1.2 christos if ((cp->a2->type == AT_LINE &&
318 1.1.1.2 christos linenum >= cp->a2->u.l) ||
319 1.1.1.2 christos (cp->a2->type == AT_RELLINE && cp->a2->u.l == 0))
320 1.1 alm lastaddr = 1;
321 1.1.1.2 christos else {
322 1.1.1.2 christos cp->startline = linenum;
323 1.1.1.2 christos }
324 1.1 alm r = 1;
325 1.1 alm } else
326 1.1 alm r = 0;
327 1.1 alm else
328 1.1 alm r = MATCH(cp->a1);
329 1.1 alm return (cp->nonsel ? ! r : r);
330 1.1 alm }
331 1.1 alm
332 1.1 alm /*
333 1.1.1.2 christos * Reset the sed processor to its initial state.
334 1.1.1.2 christos */
335 1.1.1.2 christos void
336 1.1.1.2 christos resetstate(void)
337 1.1.1.2 christos {
338 1.1.1.2 christos struct s_command *cp;
339 1.1.1.2 christos
340 1.1.1.2 christos /*
341 1.1.1.2 christos * Reset all in-range markers.
342 1.1.1.2 christos */
343 1.1.1.2 christos for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
344 1.1.1.2 christos if (cp->a2)
345 1.1.1.2 christos cp->startline = 0;
346 1.1.1.2 christos
347 1.1.1.2 christos /*
348 1.1.1.2 christos * Clear out the hold space.
349 1.1.1.2 christos */
350 1.1.1.2 christos cspace(&HS, "", 0, REPLACE);
351 1.1.1.2 christos }
352 1.1.1.2 christos
353 1.1.1.2 christos /*
354 1.1 alm * substitute --
355 1.1 alm * Do substitutions in the pattern space. Currently, we build a
356 1.1 alm * copy of the new pattern space in the substitute space structure
357 1.1 alm * and then swap them.
358 1.1 alm */
359 1.1 alm static int
360 1.1.1.2 christos substitute(struct s_command *cp)
361 1.1 alm {
362 1.1 alm SPACE tspace;
363 1.1 alm regex_t *re;
364 1.1.1.2 christos regoff_t re_off, slen;
365 1.1.1.1 mrg int lastempty, n;
366 1.1 alm char *s;
367 1.1 alm
368 1.1 alm s = ps;
369 1.1 alm re = cp->u.s->re;
370 1.1 alm if (re == NULL) {
371 1.1 alm if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
372 1.1 alm linenum = cp->u.s->linenum;
373 1.1.1.2 christos errx(1, "%lu: %s: \\%u not defined in the RE",
374 1.1.1.2 christos linenum, fname, cp->u.s->maxbref);
375 1.1 alm }
376 1.1 alm }
377 1.1.1.1 mrg if (!regexec_e(re, s, 0, 0, psl))
378 1.1 alm return (0);
379 1.1 alm
380 1.1.1.2 christos SS.len = 0; /* Clean substitute space. */
381 1.1.1.2 christos slen = psl;
382 1.1.1.2 christos n = cp->u.s->n;
383 1.1.1.1 mrg lastempty = 1;
384 1.1.1.1 mrg
385 1.1.1.2 christos switch (n) {
386 1.1.1.2 christos case 0: /* Global */
387 1.1.1.2 christos do {
388 1.1.1.1 mrg if (lastempty || match[0].rm_so != match[0].rm_eo) {
389 1.1.1.1 mrg /* Locate start of replaced string. */
390 1.1.1.1 mrg re_off = match[0].rm_so;
391 1.1.1.1 mrg /* Copy leading retained string. */
392 1.1.1.1 mrg cspace(&SS, s, re_off, APPEND);
393 1.1.1.1 mrg /* Add in regular expression. */
394 1.1.1.1 mrg regsub(&SS, s, cp->u.s->new);
395 1.1.1.1 mrg }
396 1.1.1.1 mrg
397 1.1.1.2 christos /* Move past this match. */
398 1.1.1.1 mrg if (match[0].rm_so != match[0].rm_eo) {
399 1.1.1.1 mrg s += match[0].rm_eo;
400 1.1.1.1 mrg slen -= match[0].rm_eo;
401 1.1.1.1 mrg lastempty = 0;
402 1.1.1.1 mrg } else {
403 1.1.1.2 christos if (match[0].rm_so < slen)
404 1.1.1.2 christos cspace(&SS, s + match[0].rm_so, 1,
405 1.1.1.2 christos APPEND);
406 1.1.1.1 mrg s += match[0].rm_so + 1;
407 1.1.1.1 mrg slen -= match[0].rm_so + 1;
408 1.1.1.1 mrg lastempty = 1;
409 1.1.1.1 mrg }
410 1.1.1.2 christos } while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
411 1.1 alm /* Copy trailing retained string. */
412 1.1.1.1 mrg if (slen > 0)
413 1.1.1.1 mrg cspace(&SS, s, slen, APPEND);
414 1.1.1.2 christos break;
415 1.1 alm default: /* Nth occurrence */
416 1.1 alm while (--n) {
417 1.1.1.2 christos if (match[0].rm_eo == match[0].rm_so)
418 1.1.1.2 christos match[0].rm_eo = match[0].rm_so + 1;
419 1.1 alm s += match[0].rm_eo;
420 1.1.1.1 mrg slen -= match[0].rm_eo;
421 1.1.1.2 christos if (slen < 0)
422 1.1.1.2 christos return (0);
423 1.1.1.1 mrg if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
424 1.1 alm return (0);
425 1.1 alm }
426 1.1 alm /* FALLTHROUGH */
427 1.1 alm case 1: /* 1st occurrence */
428 1.1 alm /* Locate start of replaced string. */
429 1.1 alm re_off = match[0].rm_so + (s - ps);
430 1.1 alm /* Copy leading retained string. */
431 1.1 alm cspace(&SS, ps, re_off, APPEND);
432 1.1 alm /* Add in regular expression. */
433 1.1 alm regsub(&SS, s, cp->u.s->new);
434 1.1 alm /* Copy trailing retained string. */
435 1.1 alm s += match[0].rm_eo;
436 1.1.1.1 mrg slen -= match[0].rm_eo;
437 1.1.1.1 mrg cspace(&SS, s, slen, APPEND);
438 1.1 alm break;
439 1.1 alm }
440 1.1 alm
441 1.1 alm /*
442 1.1 alm * Swap the substitute space and the pattern space, and make sure
443 1.1 alm * that any leftover pointers into stdio memory get lost.
444 1.1 alm */
445 1.1 alm tspace = PS;
446 1.1 alm PS = SS;
447 1.1 alm SS = tspace;
448 1.1 alm SS.space = SS.back;
449 1.1 alm
450 1.1 alm /* Handle the 'p' flag. */
451 1.1 alm if (cp->u.s->p)
452 1.1.1.2 christos OUT();
453 1.1 alm
454 1.1 alm /* Handle the 'w' flag. */
455 1.1 alm if (cp->u.s->wfile && !pd) {
456 1.1 alm if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
457 1.1 alm O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
458 1.1.1.2 christos err(1, "%s", cp->u.s->wfile);
459 1.1.1.2 christos if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl ||
460 1.1.1.2 christos write(cp->u.s->wfd, "\n", 1) != 1)
461 1.1.1.2 christos err(1, "%s", cp->u.s->wfile);
462 1.1 alm }
463 1.1 alm return (1);
464 1.1 alm }
465 1.1 alm
466 1.1 alm /*
467 1.1.1.2 christos * do_tr --
468 1.1.1.2 christos * Perform translation ('y' command) in the pattern space.
469 1.1.1.2 christos */
470 1.1.1.2 christos static void
471 1.1.1.2 christos do_tr(struct s_tr *y)
472 1.1.1.2 christos {
473 1.1.1.2 christos SPACE tmp;
474 1.1.1.2 christos char c, *p;
475 1.1.1.2 christos size_t clen, left;
476 1.1.1.2 christos int i;
477 1.1.1.2 christos
478 1.1.1.2 christos if (MB_CUR_MAX == 1) {
479 1.1.1.2 christos /*
480 1.1.1.2 christos * Single-byte encoding: perform in-place translation
481 1.1.1.2 christos * of the pattern space.
482 1.1.1.2 christos */
483 1.1.1.2 christos for (p = ps; p < &ps[psl]; p++)
484 1.1.1.2 christos *p = y->bytetab[(u_char)*p];
485 1.1.1.2 christos } else {
486 1.1.1.2 christos /*
487 1.1.1.2 christos * Multi-byte encoding: perform translation into the
488 1.1.1.2 christos * translation space, then swap the translation and
489 1.1.1.2 christos * pattern spaces.
490 1.1.1.2 christos */
491 1.1.1.2 christos /* Clean translation space. */
492 1.1.1.2 christos YS.len = 0;
493 1.1.1.2 christos for (p = ps, left = psl; left > 0; p += clen, left -= clen) {
494 1.1.1.2 christos if ((c = y->bytetab[(u_char)*p]) != '\0') {
495 1.1.1.2 christos cspace(&YS, &c, 1, APPEND);
496 1.1.1.2 christos clen = 1;
497 1.1.1.2 christos continue;
498 1.1.1.2 christos }
499 1.1.1.2 christos for (i = 0; i < y->nmultis; i++)
500 1.1.1.2 christos if (left >= y->multis[i].fromlen &&
501 1.1.1.2 christos memcmp(p, y->multis[i].from,
502 1.1.1.2 christos y->multis[i].fromlen) == 0)
503 1.1.1.2 christos break;
504 1.1.1.2 christos if (i < y->nmultis) {
505 1.1.1.2 christos cspace(&YS, y->multis[i].to,
506 1.1.1.2 christos y->multis[i].tolen, APPEND);
507 1.1.1.2 christos clen = y->multis[i].fromlen;
508 1.1.1.2 christos } else {
509 1.1.1.2 christos cspace(&YS, p, 1, APPEND);
510 1.1.1.2 christos clen = 1;
511 1.1.1.2 christos }
512 1.1.1.2 christos }
513 1.1.1.2 christos /* Swap the translation space and the pattern space. */
514 1.1.1.2 christos tmp = PS;
515 1.1.1.2 christos PS = YS;
516 1.1.1.2 christos YS = tmp;
517 1.1.1.2 christos YS.space = YS.back;
518 1.1.1.2 christos }
519 1.1.1.2 christos }
520 1.1.1.2 christos
521 1.1.1.2 christos /*
522 1.1 alm * Flush append requests. Always called before reading a line,
523 1.1 alm * therefore it also resets the substitution done (sdone) flag.
524 1.1 alm */
525 1.1 alm static void
526 1.1.1.2 christos flush_appends(void)
527 1.1 alm {
528 1.1 alm FILE *f;
529 1.1 alm int count, i;
530 1.1 alm char buf[8 * 1024];
531 1.1 alm
532 1.1.1.2 christos for (i = 0; i < appendx; i++)
533 1.1 alm switch (appends[i].type) {
534 1.1 alm case AP_STRING:
535 1.1.1.2 christos fwrite(appends[i].s, sizeof(char), appends[i].len,
536 1.1.1.2 christos outfile);
537 1.1 alm break;
538 1.1 alm case AP_FILE:
539 1.1 alm /*
540 1.1 alm * Read files probably shouldn't be cached. Since
541 1.1 alm * it's not an error to read a non-existent file,
542 1.1 alm * it's possible that another program is interacting
543 1.1.1.2 christos * with the sed script through the filesystem. It
544 1.1 alm * would be truly bizarre, but possible. It's probably
545 1.1 alm * not that big a performance win, anyhow.
546 1.1 alm */
547 1.1 alm if ((f = fopen(appends[i].s, "r")) == NULL)
548 1.1 alm break;
549 1.1.1.2 christos while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
550 1.1.1.2 christos (void)fwrite(buf, sizeof(char), count, outfile);
551 1.1 alm (void)fclose(f);
552 1.1 alm break;
553 1.1 alm }
554 1.1.1.2 christos if (ferror(outfile))
555 1.1.1.2 christos errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
556 1.1 alm appendx = sdone = 0;
557 1.1 alm }
558 1.1 alm
559 1.1 alm static void
560 1.1.1.2 christos lputs(char *s, size_t len)
561 1.1 alm {
562 1.1.1.2 christos static const char escapes[] = "\\\a\b\f\r\t\v";
563 1.1.1.2 christos int c, col, width;
564 1.1.1.2 christos const char *p;
565 1.1 alm struct winsize win;
566 1.1 alm static int termwidth = -1;
567 1.1.1.2 christos size_t clen, i;
568 1.1.1.2 christos wchar_t wc;
569 1.1.1.2 christos mbstate_t mbs;
570 1.1.1.2 christos
571 1.1.1.2 christos if (outfile != stdout)
572 1.1.1.2 christos termwidth = 60;
573 1.1.1.2 christos if (termwidth == -1) {
574 1.1.1.2 christos if ((p = getenv("COLUMNS")) && *p != '\0')
575 1.1 alm termwidth = atoi(p);
576 1.1 alm else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
577 1.1 alm win.ws_col > 0)
578 1.1 alm termwidth = win.ws_col;
579 1.1 alm else
580 1.1 alm termwidth = 60;
581 1.1.1.2 christos }
582 1.1.1.2 christos if (termwidth <= 0)
583 1.1.1.2 christos termwidth = 1;
584 1.1 alm
585 1.1.1.2 christos memset(&mbs, 0, sizeof(mbs));
586 1.1.1.2 christos col = 0;
587 1.1.1.2 christos while (len != 0) {
588 1.1.1.2 christos clen = mbrtowc(&wc, s, len, &mbs);
589 1.1.1.2 christos if (clen == 0)
590 1.1.1.2 christos clen = 1;
591 1.1.1.2 christos if (clen == (size_t)-1 || clen == (size_t)-2) {
592 1.1.1.2 christos wc = (unsigned char)*s;
593 1.1.1.2 christos clen = 1;
594 1.1.1.2 christos memset(&mbs, 0, sizeof(mbs));
595 1.1 alm }
596 1.1.1.2 christos if (wc == '\n') {
597 1.1.1.2 christos if (col + 1 >= termwidth)
598 1.1.1.2 christos fprintf(outfile, "\\\n");
599 1.1.1.2 christos fputc('$', outfile);
600 1.1.1.2 christos fputc('\n', outfile);
601 1.1.1.2 christos col = 0;
602 1.1.1.2 christos } else if (iswprint(wc)) {
603 1.1.1.2 christos width = wcwidth(wc);
604 1.1.1.2 christos if (col + width >= termwidth) {
605 1.1.1.2 christos fprintf(outfile, "\\\n");
606 1.1.1.2 christos col = 0;
607 1.1.1.2 christos }
608 1.1.1.2 christos fwrite(s, 1, clen, outfile);
609 1.1.1.2 christos col += width;
610 1.1.1.2 christos } else if (wc != L'\0' && (c = wctob(wc)) != EOF &&
611 1.1.1.2 christos (p = strchr(escapes, c)) != NULL) {
612 1.1.1.2 christos if (col + 2 >= termwidth) {
613 1.1.1.2 christos fprintf(outfile, "\\\n");
614 1.1.1.2 christos col = 0;
615 1.1.1.2 christos }
616 1.1.1.2 christos fprintf(outfile, "\\%c", "\\abfrtv"[p - escapes]);
617 1.1.1.2 christos col += 2;
618 1.1 alm } else {
619 1.1.1.2 christos if (col + 4 * clen >= (unsigned)termwidth) {
620 1.1.1.2 christos fprintf(outfile, "\\\n");
621 1.1.1.2 christos col = 0;
622 1.1 alm }
623 1.1.1.2 christos for (i = 0; i < clen; i++)
624 1.1.1.2 christos fprintf(outfile, "\\%03o",
625 1.1.1.2 christos (int)(unsigned char)s[i]);
626 1.1.1.2 christos col += 4 * clen;
627 1.1 alm }
628 1.1.1.2 christos s += clen;
629 1.1.1.2 christos len -= clen;
630 1.1 alm }
631 1.1.1.2 christos if (col + 1 >= termwidth)
632 1.1.1.2 christos fprintf(outfile, "\\\n");
633 1.1.1.2 christos (void)fputc('$', outfile);
634 1.1.1.2 christos (void)fputc('\n', outfile);
635 1.1.1.2 christos if (ferror(outfile))
636 1.1.1.2 christos errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
637 1.1 alm }
638 1.1 alm
639 1.1.1.2 christos static __inline int
640 1.1.1.2 christos regexec_e(regex_t *preg, const char *string, int eflags, int nomatch,
641 1.1.1.2 christos size_t slen)
642 1.1 alm {
643 1.1 alm int eval;
644 1.1.1.2 christos
645 1.1 alm if (preg == NULL) {
646 1.1 alm if (defpreg == NULL)
647 1.1.1.2 christos errx(1, "first RE may not be empty");
648 1.1 alm } else
649 1.1 alm defpreg = preg;
650 1.1 alm
651 1.1.1.2 christos /* Set anchors */
652 1.1.1.1 mrg match[0].rm_so = 0;
653 1.1.1.1 mrg match[0].rm_eo = slen;
654 1.1.1.2 christos
655 1.1 alm eval = regexec(defpreg, string,
656 1.1.1.1 mrg nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
657 1.1 alm switch(eval) {
658 1.1 alm case 0:
659 1.1 alm return (1);
660 1.1 alm case REG_NOMATCH:
661 1.1 alm return (0);
662 1.1 alm }
663 1.1.1.2 christos errx(1, "RE error: %s", strregerror(eval, defpreg));
664 1.1 alm /* NOTREACHED */
665 1.1 alm }
666 1.1 alm
667 1.1 alm /*
668 1.1 alm * regsub - perform substitutions after a regexp match
669 1.1 alm * Based on a routine by Henry Spencer
670 1.1 alm */
671 1.1 alm static void
672 1.1.1.2 christos regsub(SPACE *sp, char *string, char *src)
673 1.1 alm {
674 1.1.1.2 christos int len, no;
675 1.1.1.2 christos char c, *dst;
676 1.1 alm
677 1.1 alm #define NEEDSP(reqlen) \
678 1.1.1.2 christos /* XXX What is the +1 for? */ \
679 1.1.1.2 christos if (sp->len + (reqlen) + 1 >= sp->blen) { \
680 1.1 alm sp->blen += (reqlen) + 1024; \
681 1.1.1.2 christos if ((sp->space = sp->back = realloc(sp->back, sp->blen)) \
682 1.1.1.2 christos == NULL) \
683 1.1.1.2 christos err(1, "realloc"); \
684 1.1 alm dst = sp->space + sp->len; \
685 1.1 alm }
686 1.1 alm
687 1.1 alm dst = sp->space + sp->len;
688 1.1 alm while ((c = *src++) != '\0') {
689 1.1 alm if (c == '&')
690 1.1 alm no = 0;
691 1.1.1.2 christos else if (c == '\\' && isdigit((unsigned char)*src))
692 1.1 alm no = *src++ - '0';
693 1.1 alm else
694 1.1 alm no = -1;
695 1.1 alm if (no < 0) { /* Ordinary character. */
696 1.1.1.2 christos if (c == '\\' && (*src == '\\' || *src == '&'))
697 1.1.1.2 christos c = *src++;
698 1.1 alm NEEDSP(1);
699 1.1.1.2 christos *dst++ = c;
700 1.1 alm ++sp->len;
701 1.1.1.2 christos } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
702 1.1 alm len = match[no].rm_eo - match[no].rm_so;
703 1.1 alm NEEDSP(len);
704 1.1 alm memmove(dst, string + match[no].rm_so, len);
705 1.1 alm dst += len;
706 1.1 alm sp->len += len;
707 1.1 alm }
708 1.1 alm }
709 1.1 alm NEEDSP(1);
710 1.1 alm *dst = '\0';
711 1.1 alm }
712 1.1 alm
713 1.1 alm /*
714 1.1.1.2 christos * cspace --
715 1.1.1.2 christos * Concatenate space: append the source space to the destination space,
716 1.1.1.2 christos * allocating new space as necessary.
717 1.1 alm */
718 1.1 alm void
719 1.1.1.2 christos cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
720 1.1 alm {
721 1.1 alm size_t tlen;
722 1.1 alm
723 1.1.1.1 mrg /* Make sure SPACE has enough memory and ramp up quickly. */
724 1.1.1.1 mrg tlen = sp->len + len + 1;
725 1.1 alm if (tlen > sp->blen) {
726 1.1 alm sp->blen = tlen + 1024;
727 1.1.1.2 christos if ((sp->space = sp->back = realloc(sp->back, sp->blen)) ==
728 1.1.1.2 christos NULL)
729 1.1.1.2 christos err(1, "realloc");
730 1.1 alm }
731 1.1 alm
732 1.1.1.1 mrg if (spflag == REPLACE)
733 1.1 alm sp->len = 0;
734 1.1 alm
735 1.1 alm memmove(sp->space + sp->len, p, len);
736 1.1.1.1 mrg
737 1.1 alm sp->space[sp->len += len] = '\0';
738 1.1 alm }
739 1.1 alm
740 1.1 alm /*
741 1.1 alm * Close all cached opened files and report any errors
742 1.1 alm */
743 1.1 alm void
744 1.1.1.2 christos cfclose(struct s_command *cp, struct s_command *end)
745 1.1 alm {
746 1.1 alm
747 1.1 alm for (; cp != end; cp = cp->next)
748 1.1 alm switch(cp->code) {
749 1.1 alm case 's':
750 1.1 alm if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
751 1.1.1.2 christos err(1, "%s", cp->u.s->wfile);
752 1.1 alm cp->u.s->wfd = -1;
753 1.1 alm break;
754 1.1 alm case 'w':
755 1.1 alm if (cp->u.fd != -1 && close(cp->u.fd))
756 1.1.1.2 christos err(1, "%s", cp->t);
757 1.1 alm cp->u.fd = -1;
758 1.1 alm break;
759 1.1 alm case '{':
760 1.1 alm cfclose(cp->u.c, cp->next);
761 1.1 alm break;
762 1.1 alm }
763 1.1 alm }
764