process.c revision 1.44 1 1.44 christos /* $NetBSD: process.c,v 1.44 2014/06/09 12:48:58 christos Exp $ */
2 1.18 tls
3 1.1 alm /*-
4 1.40 christos * Copyright (c) 1992 Diomidis Spinellis.
5 1.19 mrg * Copyright (c) 1992, 1993, 1994
6 1.8 cgd * The Regents of the University of California. All rights reserved.
7 1.1 alm *
8 1.1 alm * This code is derived from software contributed to Berkeley by
9 1.1 alm * Diomidis Spinellis of Imperial College, University of London.
10 1.1 alm *
11 1.1 alm * Redistribution and use in source and binary forms, with or without
12 1.1 alm * modification, are permitted provided that the following conditions
13 1.1 alm * are met:
14 1.1 alm * 1. Redistributions of source code must retain the above copyright
15 1.1 alm * notice, this list of conditions and the following disclaimer.
16 1.1 alm * 2. Redistributions in binary form must reproduce the above copyright
17 1.1 alm * notice, this list of conditions and the following disclaimer in the
18 1.1 alm * documentation and/or other materials provided with the distribution.
19 1.33 agc * 3. Neither the name of the University nor the names of its contributors
20 1.33 agc * may be used to endorse or promote products derived from this software
21 1.33 agc * without specific prior written permission.
22 1.33 agc *
23 1.33 agc * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 1.33 agc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 1.33 agc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 1.33 agc * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 1.33 agc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 1.33 agc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 1.33 agc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 1.33 agc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 1.33 agc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 1.33 agc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 1.33 agc * SUCH DAMAGE.
34 1.33 agc */
35 1.33 agc
36 1.37 gdamore #if HAVE_NBTOOL_CONFIG_H
37 1.37 gdamore #include "nbtool_config.h"
38 1.37 gdamore #endif
39 1.37 gdamore
40 1.20 lukem #include <sys/cdefs.h>
41 1.44 christos __RCSID("$NetBSD: process.c,v 1.44 2014/06/09 12:48:58 christos Exp $");
42 1.40 christos #ifdef __FBSDID
43 1.40 christos __FBSDID("$FreeBSD: head/usr.bin/sed/process.c 192732 2009-05-25 06:45:33Z brian $");
44 1.40 christos #endif
45 1.40 christos
46 1.1 alm #include <sys/types.h>
47 1.1 alm #include <sys/stat.h>
48 1.1 alm #include <sys/ioctl.h>
49 1.1 alm #include <sys/uio.h>
50 1.1 alm
51 1.1 alm #include <ctype.h>
52 1.40 christos #include <err.h>
53 1.1 alm #include <errno.h>
54 1.1 alm #include <fcntl.h>
55 1.1 alm #include <limits.h>
56 1.1 alm #include <regex.h>
57 1.1 alm #include <stdio.h>
58 1.1 alm #include <stdlib.h>
59 1.1 alm #include <string.h>
60 1.1 alm #include <unistd.h>
61 1.40 christos #include <wchar.h>
62 1.40 christos #include <wctype.h>
63 1.1 alm
64 1.1 alm #include "defs.h"
65 1.1 alm #include "extern.h"
66 1.1 alm
67 1.40 christos static SPACE HS, PS, SS, YS;
68 1.1 alm #define pd PS.deleted
69 1.1 alm #define ps PS.space
70 1.1 alm #define psl PS.len
71 1.1 alm #define hs HS.space
72 1.1 alm #define hsl HS.len
73 1.1 alm
74 1.40 christos static __inline int applies(struct s_command *);
75 1.40 christos static void do_tr(struct s_tr *);
76 1.32 wiz static void flush_appends(void);
77 1.40 christos static void lputs(char *, size_t);
78 1.40 christos static __inline int regexec_e(regex_t *, const char *, int, int, size_t);
79 1.32 wiz static void regsub(SPACE *, char *, char *);
80 1.32 wiz static int substitute(struct s_command *);
81 1.1 alm
82 1.1 alm struct s_appends *appends; /* Array of pointers to strings to append. */
83 1.40 christos static size_t appendx; /* Index into appends array. */
84 1.40 christos size_t appendnum; /* Size of appends array. */
85 1.1 alm
86 1.1 alm static int lastaddr; /* Set by applies if last address of a range. */
87 1.1 alm static int sdone; /* If any substitutes since last line input. */
88 1.1 alm /* Iov structure for 'w' commands. */
89 1.1 alm static regex_t *defpreg;
90 1.1 alm size_t maxnsub;
91 1.1 alm regmatch_t *match;
92 1.1 alm
93 1.40 christos #define OUT() do {fwrite(ps, 1, psl, outfile); fputc('\n', outfile);} while (0)
94 1.8 cgd
95 1.1 alm void
96 1.32 wiz process(void)
97 1.1 alm {
98 1.1 alm struct s_command *cp;
99 1.1 alm SPACE tspace;
100 1.40 christos size_t oldpsl = 0;
101 1.15 mycroft char *p;
102 1.1 alm
103 1.40 christos p = NULL;
104 1.40 christos
105 1.1 alm for (linenum = 0; mf_fgets(&PS, REPLACE);) {
106 1.1 alm pd = 0;
107 1.16 mycroft top:
108 1.1 alm cp = prog;
109 1.1 alm redirect:
110 1.1 alm while (cp != NULL) {
111 1.1 alm if (!applies(cp)) {
112 1.1 alm cp = cp->next;
113 1.1 alm continue;
114 1.1 alm }
115 1.1 alm switch (cp->code) {
116 1.1 alm case '{':
117 1.1 alm cp = cp->u.c;
118 1.1 alm goto redirect;
119 1.1 alm case 'a':
120 1.40 christos if (appendx >= appendnum)
121 1.1 alm appends = xrealloc(appends,
122 1.1 alm sizeof(struct s_appends) *
123 1.40 christos (appendnum *= 2));
124 1.1 alm appends[appendx].type = AP_STRING;
125 1.1 alm appends[appendx].s = cp->t;
126 1.8 cgd appends[appendx].len = strlen(cp->t);
127 1.1 alm appendx++;
128 1.1 alm break;
129 1.1 alm case 'b':
130 1.1 alm cp = cp->u.c;
131 1.1 alm goto redirect;
132 1.1 alm case 'c':
133 1.1 alm pd = 1;
134 1.1 alm psl = 0;
135 1.40 christos if (cp->a2 == NULL || lastaddr || lastline())
136 1.40 christos (void)fprintf(outfile, "%s", cp->t);
137 1.39 uwe goto new;
138 1.1 alm case 'd':
139 1.1 alm pd = 1;
140 1.1 alm goto new;
141 1.1 alm case 'D':
142 1.1 alm if (pd)
143 1.1 alm goto new;
144 1.40 christos if (psl == 0 ||
145 1.40 christos (p = memchr(ps, '\n', psl - 1)) == NULL) {
146 1.1 alm pd = 1;
147 1.16 mycroft goto new;
148 1.28 atatat } else {
149 1.40 christos psl -= (size_t)((p + 1) - ps);
150 1.1 alm memmove(ps, p + 1, psl);
151 1.28 atatat goto top;
152 1.1 alm }
153 1.1 alm case 'g':
154 1.1 alm cspace(&PS, hs, hsl, REPLACE);
155 1.1 alm break;
156 1.1 alm case 'G':
157 1.43 christos cspace(&PS, "\n", 1, APPEND);
158 1.40 christos cspace(&PS, hs, hsl, APPEND);
159 1.1 alm break;
160 1.1 alm case 'h':
161 1.1 alm cspace(&HS, ps, psl, REPLACE);
162 1.1 alm break;
163 1.1 alm case 'H':
164 1.40 christos cspace(&HS, "\n", 1, APPEND);
165 1.40 christos cspace(&HS, ps, psl, APPEND);
166 1.1 alm break;
167 1.1 alm case 'i':
168 1.40 christos (void)fprintf(outfile, "%s", cp->t);
169 1.1 alm break;
170 1.1 alm case 'l':
171 1.40 christos lputs(ps, psl);
172 1.1 alm break;
173 1.1 alm case 'n':
174 1.1 alm if (!nflag && !pd)
175 1.40 christos OUT();
176 1.1 alm flush_appends();
177 1.14 mycroft if (!mf_fgets(&PS, REPLACE))
178 1.1 alm exit(0);
179 1.1 alm pd = 0;
180 1.1 alm break;
181 1.1 alm case 'N':
182 1.1 alm flush_appends();
183 1.40 christos cspace(&PS, "\n", 1, APPEND);
184 1.40 christos if (!mf_fgets(&PS, APPEND))
185 1.1 alm exit(0);
186 1.1 alm break;
187 1.1 alm case 'p':
188 1.1 alm if (pd)
189 1.1 alm break;
190 1.40 christos OUT();
191 1.1 alm break;
192 1.1 alm case 'P':
193 1.1 alm if (pd)
194 1.1 alm break;
195 1.15 mycroft if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
196 1.15 mycroft oldpsl = psl;
197 1.40 christos psl = (size_t)(p - ps);
198 1.1 alm }
199 1.40 christos OUT();
200 1.1 alm if (p != NULL)
201 1.15 mycroft psl = oldpsl;
202 1.1 alm break;
203 1.1 alm case 'q':
204 1.1 alm if (!nflag && !pd)
205 1.40 christos OUT();
206 1.1 alm flush_appends();
207 1.1 alm exit(0);
208 1.1 alm case 'r':
209 1.40 christos if (appendx >= appendnum)
210 1.1 alm appends = xrealloc(appends,
211 1.1 alm sizeof(struct s_appends) *
212 1.40 christos (appendnum *= 2));
213 1.1 alm appends[appendx].type = AP_FILE;
214 1.1 alm appends[appendx].s = cp->t;
215 1.8 cgd appends[appendx].len = strlen(cp->t);
216 1.1 alm appendx++;
217 1.1 alm break;
218 1.1 alm case 's':
219 1.1 alm sdone |= substitute(cp);
220 1.1 alm break;
221 1.1 alm case 't':
222 1.1 alm if (sdone) {
223 1.1 alm sdone = 0;
224 1.1 alm cp = cp->u.c;
225 1.1 alm goto redirect;
226 1.1 alm }
227 1.1 alm break;
228 1.1 alm case 'w':
229 1.1 alm if (pd)
230 1.1 alm break;
231 1.1 alm if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
232 1.1 alm O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
233 1.1 alm DEFFILEMODE)) == -1)
234 1.40 christos err(1, "%s", cp->t);
235 1.40 christos if (write(cp->u.fd, ps, psl) != (ssize_t)psl ||
236 1.40 christos write(cp->u.fd, "\n", 1) != 1)
237 1.40 christos err(1, "%s", cp->t);
238 1.1 alm break;
239 1.1 alm case 'x':
240 1.40 christos /*
241 1.40 christos * If the hold space is null, make it empty
242 1.40 christos * but not null. Otherwise the pattern space
243 1.40 christos * will become null after the swap, which is
244 1.40 christos * an abnormal condition.
245 1.40 christos */
246 1.8 cgd if (hs == NULL)
247 1.40 christos cspace(&HS, "", 0, REPLACE);
248 1.1 alm tspace = PS;
249 1.1 alm PS = HS;
250 1.1 alm HS = tspace;
251 1.1 alm break;
252 1.1 alm case 'y':
253 1.40 christos if (pd || psl == 0)
254 1.1 alm break;
255 1.40 christos do_tr(cp->u.y);
256 1.1 alm break;
257 1.1 alm case ':':
258 1.1 alm case '}':
259 1.1 alm break;
260 1.1 alm case '=':
261 1.40 christos (void)fprintf(outfile, "%lu\n", linenum);
262 1.1 alm }
263 1.1 alm cp = cp->next;
264 1.1 alm } /* for all cp */
265 1.1 alm
266 1.1 alm new: if (!nflag && !pd)
267 1.40 christos OUT();
268 1.1 alm flush_appends();
269 1.1 alm } /* for all lines */
270 1.1 alm }
271 1.1 alm
272 1.1 alm /*
273 1.1 alm * TRUE if the address passed matches the current program state
274 1.1 alm * (lastline, linenumber, ps).
275 1.1 alm */
276 1.40 christos #define MATCH(a) \
277 1.40 christos ((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
278 1.40 christos (a)->type == AT_LINE ? linenum == (a)->u.l : lastline())
279 1.1 alm
280 1.1 alm /*
281 1.40 christos * Return TRUE if the command applies to the current line. Sets the start
282 1.40 christos * line for process ranges. Interprets the non-select (``!'') flag.
283 1.1 alm */
284 1.40 christos static __inline int
285 1.32 wiz applies(struct s_command *cp)
286 1.1 alm {
287 1.1 alm int r;
288 1.1 alm
289 1.1 alm lastaddr = 0;
290 1.1 alm if (cp->a1 == NULL && cp->a2 == NULL)
291 1.1 alm r = 1;
292 1.40 christos else if (cp->a2)
293 1.40 christos if (cp->startline > 0) {
294 1.1 alm if (MATCH(cp->a2)) {
295 1.40 christos cp->startline = 0;
296 1.1 alm lastaddr = 1;
297 1.40 christos r = 1;
298 1.40 christos } else if (linenum - cp->startline <= cp->a2->u.l)
299 1.40 christos r = 1;
300 1.40 christos else if ((cp->a2->type == AT_LINE &&
301 1.40 christos linenum > cp->a2->u.l) ||
302 1.40 christos (cp->a2->type == AT_RELLINE &&
303 1.40 christos linenum - cp->startline > cp->a2->u.l)) {
304 1.40 christos /*
305 1.40 christos * We missed the 2nd address due to a branch,
306 1.40 christos * so just close the range and return false.
307 1.40 christos */
308 1.40 christos cp->startline = 0;
309 1.40 christos r = 0;
310 1.40 christos } else
311 1.40 christos r = 1;
312 1.36 christos } else if (cp->a1 && MATCH(cp->a1)) {
313 1.1 alm /*
314 1.1 alm * If the second address is a number less than or
315 1.1 alm * equal to the line number first selected, only
316 1.1 alm * one line shall be selected.
317 1.1 alm * -- POSIX 1003.2
318 1.40 christos * Likewise if the relative second line address is zero.
319 1.1 alm */
320 1.40 christos if ((cp->a2->type == AT_LINE &&
321 1.40 christos linenum >= cp->a2->u.l) ||
322 1.40 christos (cp->a2->type == AT_RELLINE && cp->a2->u.l == 0))
323 1.1 alm lastaddr = 1;
324 1.40 christos else {
325 1.40 christos cp->startline = linenum;
326 1.40 christos }
327 1.1 alm r = 1;
328 1.1 alm } else
329 1.1 alm r = 0;
330 1.40 christos else
331 1.1 alm r = MATCH(cp->a1);
332 1.1 alm return (cp->nonsel ? ! r : r);
333 1.1 alm }
334 1.1 alm
335 1.1 alm /*
336 1.40 christos * Reset the sed processor to its initial state.
337 1.40 christos */
338 1.40 christos void
339 1.40 christos resetstate(void)
340 1.40 christos {
341 1.40 christos struct s_command *cp;
342 1.40 christos
343 1.40 christos /*
344 1.40 christos * Reset all in-range markers.
345 1.40 christos */
346 1.40 christos for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
347 1.40 christos if (cp->a2)
348 1.40 christos cp->startline = 0;
349 1.40 christos
350 1.40 christos /*
351 1.40 christos * Clear out the hold space.
352 1.40 christos */
353 1.40 christos cspace(&HS, "", 0, REPLACE);
354 1.40 christos }
355 1.40 christos
356 1.40 christos /*
357 1.1 alm * substitute --
358 1.1 alm * Do substitutions in the pattern space. Currently, we build a
359 1.1 alm * copy of the new pattern space in the substitute space structure
360 1.1 alm * and then swap them.
361 1.1 alm */
362 1.1 alm static int
363 1.32 wiz substitute(struct s_command *cp)
364 1.1 alm {
365 1.1 alm SPACE tspace;
366 1.1 alm regex_t *re;
367 1.40 christos regoff_t re_off, slen;
368 1.19 mrg int lastempty, n;
369 1.1 alm char *s;
370 1.1 alm
371 1.1 alm s = ps;
372 1.1 alm re = cp->u.s->re;
373 1.1 alm if (re == NULL) {
374 1.40 christos if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
375 1.1 alm linenum = cp->u.s->linenum;
376 1.40 christos errx(1, "%lu: %s: \\%u not defined in the RE",
377 1.40 christos linenum, fname, cp->u.s->maxbref);
378 1.1 alm }
379 1.1 alm }
380 1.8 cgd if (!regexec_e(re, s, 0, 0, psl))
381 1.1 alm return (0);
382 1.1 alm
383 1.1 alm SS.len = 0; /* Clean substitute space. */
384 1.40 christos slen = (regoff_t)psl;
385 1.1 alm n = cp->u.s->n;
386 1.12 cgd lastempty = 1;
387 1.12 cgd
388 1.1 alm switch (n) {
389 1.1 alm case 0: /* Global */
390 1.1 alm do {
391 1.12 cgd if (lastempty || match[0].rm_so != match[0].rm_eo) {
392 1.12 cgd /* Locate start of replaced string. */
393 1.12 cgd re_off = match[0].rm_so;
394 1.12 cgd /* Copy leading retained string. */
395 1.40 christos cspace(&SS, s, (size_t)re_off, APPEND);
396 1.12 cgd /* Add in regular expression. */
397 1.12 cgd regsub(&SS, s, cp->u.s->new);
398 1.12 cgd }
399 1.12 cgd
400 1.1 alm /* Move past this match. */
401 1.12 cgd if (match[0].rm_so != match[0].rm_eo) {
402 1.12 cgd s += match[0].rm_eo;
403 1.12 cgd slen -= match[0].rm_eo;
404 1.12 cgd lastempty = 0;
405 1.12 cgd } else {
406 1.40 christos if (match[0].rm_so < slen)
407 1.40 christos cspace(&SS, s + match[0].rm_so, 1,
408 1.40 christos APPEND);
409 1.12 cgd s += match[0].rm_so + 1;
410 1.12 cgd slen -= match[0].rm_so + 1;
411 1.12 cgd lastempty = 1;
412 1.12 cgd }
413 1.40 christos } while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, (size_t)slen));
414 1.1 alm /* Copy trailing retained string. */
415 1.12 cgd if (slen > 0)
416 1.40 christos cspace(&SS, s, (size_t)slen, APPEND);
417 1.1 alm break;
418 1.1 alm default: /* Nth occurrence */
419 1.1 alm while (--n) {
420 1.40 christos if (match[0].rm_eo == match[0].rm_so)
421 1.40 christos match[0].rm_eo = match[0].rm_so + 1;
422 1.1 alm s += match[0].rm_eo;
423 1.8 cgd slen -= match[0].rm_eo;
424 1.40 christos if (slen < 0)
425 1.40 christos return (0);
426 1.40 christos if (!regexec_e(re, s, REG_NOTBOL, 0, (size_t)slen))
427 1.1 alm return (0);
428 1.1 alm }
429 1.1 alm /* FALLTHROUGH */
430 1.1 alm case 1: /* 1st occurrence */
431 1.1 alm /* Locate start of replaced string. */
432 1.1 alm re_off = match[0].rm_so + (s - ps);
433 1.1 alm /* Copy leading retained string. */
434 1.40 christos cspace(&SS, ps, (size_t)re_off, APPEND);
435 1.1 alm /* Add in regular expression. */
436 1.1 alm regsub(&SS, s, cp->u.s->new);
437 1.1 alm /* Copy trailing retained string. */
438 1.1 alm s += match[0].rm_eo;
439 1.8 cgd slen -= match[0].rm_eo;
440 1.40 christos cspace(&SS, s, (size_t)slen, APPEND);
441 1.1 alm break;
442 1.1 alm }
443 1.1 alm
444 1.1 alm /*
445 1.1 alm * Swap the substitute space and the pattern space, and make sure
446 1.1 alm * that any leftover pointers into stdio memory get lost.
447 1.1 alm */
448 1.1 alm tspace = PS;
449 1.1 alm PS = SS;
450 1.1 alm SS = tspace;
451 1.1 alm SS.space = SS.back;
452 1.1 alm
453 1.1 alm /* Handle the 'p' flag. */
454 1.1 alm if (cp->u.s->p)
455 1.40 christos OUT();
456 1.1 alm
457 1.1 alm /* Handle the 'w' flag. */
458 1.1 alm if (cp->u.s->wfile && !pd) {
459 1.1 alm if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
460 1.1 alm O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
461 1.40 christos err(1, "%s", cp->u.s->wfile);
462 1.40 christos if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl ||
463 1.40 christos write(cp->u.s->wfd, "\n", 1) != 1)
464 1.40 christos err(1, "%s", cp->u.s->wfile);
465 1.1 alm }
466 1.1 alm return (1);
467 1.1 alm }
468 1.1 alm
469 1.1 alm /*
470 1.40 christos * do_tr --
471 1.40 christos * Perform translation ('y' command) in the pattern space.
472 1.40 christos */
473 1.40 christos static void
474 1.40 christos do_tr(struct s_tr *y)
475 1.40 christos {
476 1.40 christos SPACE tmp;
477 1.40 christos char c, *p;
478 1.40 christos size_t clen, left;
479 1.40 christos size_t i;
480 1.40 christos
481 1.40 christos if (MB_CUR_MAX == 1) {
482 1.40 christos /*
483 1.40 christos * Single-byte encoding: perform in-place translation
484 1.40 christos * of the pattern space.
485 1.40 christos */
486 1.40 christos for (p = ps; p < &ps[psl]; p++)
487 1.40 christos *p = (char)y->bytetab[(u_char)*p];
488 1.40 christos } else {
489 1.40 christos /*
490 1.40 christos * Multi-byte encoding: perform translation into the
491 1.40 christos * translation space, then swap the translation and
492 1.40 christos * pattern spaces.
493 1.40 christos */
494 1.40 christos /* Clean translation space. */
495 1.40 christos YS.len = 0;
496 1.40 christos for (p = ps, left = psl; left > 0; p += clen, left -= clen) {
497 1.40 christos if ((c = (char)y->bytetab[(u_char)*p]) != '\0') {
498 1.40 christos cspace(&YS, &c, 1, APPEND);
499 1.40 christos clen = 1;
500 1.40 christos continue;
501 1.40 christos }
502 1.40 christos for (i = 0; i < y->nmultis; i++)
503 1.40 christos if (left >= y->multis[i].fromlen &&
504 1.40 christos memcmp(p, y->multis[i].from,
505 1.40 christos y->multis[i].fromlen) == 0)
506 1.40 christos break;
507 1.40 christos if (i < y->nmultis) {
508 1.40 christos cspace(&YS, y->multis[i].to,
509 1.40 christos y->multis[i].tolen, APPEND);
510 1.40 christos clen = y->multis[i].fromlen;
511 1.40 christos } else {
512 1.40 christos cspace(&YS, p, 1, APPEND);
513 1.40 christos clen = 1;
514 1.40 christos }
515 1.40 christos }
516 1.40 christos /* Swap the translation space and the pattern space. */
517 1.40 christos tmp = PS;
518 1.40 christos PS = YS;
519 1.40 christos YS = tmp;
520 1.40 christos YS.space = YS.back;
521 1.40 christos }
522 1.40 christos }
523 1.40 christos
524 1.40 christos /*
525 1.1 alm * Flush append requests. Always called before reading a line,
526 1.1 alm * therefore it also resets the substitution done (sdone) flag.
527 1.1 alm */
528 1.1 alm static void
529 1.32 wiz flush_appends(void)
530 1.1 alm {
531 1.1 alm FILE *f;
532 1.40 christos size_t count, i;
533 1.1 alm char buf[8 * 1024];
534 1.1 alm
535 1.40 christos for (i = 0; i < appendx; i++)
536 1.1 alm switch (appends[i].type) {
537 1.1 alm case AP_STRING:
538 1.40 christos fwrite(appends[i].s, sizeof(char), appends[i].len,
539 1.40 christos outfile);
540 1.1 alm break;
541 1.1 alm case AP_FILE:
542 1.1 alm /*
543 1.1 alm * Read files probably shouldn't be cached. Since
544 1.1 alm * it's not an error to read a non-existent file,
545 1.1 alm * it's possible that another program is interacting
546 1.40 christos * with the sed script through the filesystem. It
547 1.1 alm * would be truly bizarre, but possible. It's probably
548 1.1 alm * not that big a performance win, anyhow.
549 1.1 alm */
550 1.1 alm if ((f = fopen(appends[i].s, "r")) == NULL)
551 1.1 alm break;
552 1.40 christos while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
553 1.40 christos (void)fwrite(buf, sizeof(char), count, outfile);
554 1.1 alm (void)fclose(f);
555 1.1 alm break;
556 1.1 alm }
557 1.40 christos if (ferror(outfile))
558 1.40 christos errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
559 1.40 christos appendx = 0;
560 1.40 christos sdone = 0;
561 1.1 alm }
562 1.1 alm
563 1.1 alm static void
564 1.40 christos lputs(char *s, size_t len)
565 1.1 alm {
566 1.40 christos static const char escapes[] = "\\\a\b\f\r\t\v";
567 1.40 christos int c;
568 1.40 christos size_t col, width;
569 1.40 christos const char *p;
570 1.40 christos #ifdef TIOCGWINSZ
571 1.1 alm struct winsize win;
572 1.37 gdamore #endif
573 1.40 christos static size_t termwidth = (size_t)-1;
574 1.40 christos size_t clen, i;
575 1.40 christos wchar_t wc;
576 1.40 christos mbstate_t mbs;
577 1.40 christos
578 1.40 christos if (outfile != stdout)
579 1.40 christos termwidth = 60;
580 1.40 christos if (termwidth == (size_t)-1) {
581 1.40 christos if ((p = getenv("COLUMNS")) && *p != '\0')
582 1.40 christos termwidth = (size_t)atoi(p);
583 1.40 christos #ifdef TIOCGWINSZ
584 1.1 alm else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
585 1.1 alm win.ws_col > 0)
586 1.1 alm termwidth = win.ws_col;
587 1.37 gdamore #endif
588 1.1 alm else
589 1.1 alm termwidth = 60;
590 1.23 ross }
591 1.40 christos if (termwidth == 0)
592 1.40 christos termwidth = 1;
593 1.40 christos
594 1.40 christos memset(&mbs, 0, sizeof(mbs));
595 1.40 christos col = 0;
596 1.40 christos while (len != 0) {
597 1.40 christos clen = mbrtowc(&wc, s, len, &mbs);
598 1.40 christos if (clen == 0)
599 1.40 christos clen = 1;
600 1.40 christos if (clen == (size_t)-1 || clen == (size_t)-2) {
601 1.40 christos wc = (unsigned char)*s;
602 1.40 christos clen = 1;
603 1.40 christos memset(&mbs, 0, sizeof(mbs));
604 1.1 alm }
605 1.40 christos if (wc == '\n') {
606 1.40 christos if (col + 1 >= termwidth)
607 1.40 christos fprintf(outfile, "\\\n");
608 1.40 christos fputc('$', outfile);
609 1.40 christos fputc('\n', outfile);
610 1.40 christos col = 0;
611 1.40 christos } else if (iswprint(wc)) {
612 1.40 christos width = (size_t)wcwidth(wc);
613 1.40 christos if (col + width >= termwidth) {
614 1.40 christos fprintf(outfile, "\\\n");
615 1.40 christos col = 0;
616 1.40 christos }
617 1.40 christos fwrite(s, 1, clen, outfile);
618 1.40 christos col += width;
619 1.40 christos } else if (wc != L'\0' && (c = wctob(wc)) != EOF &&
620 1.40 christos (p = strchr(escapes, c)) != NULL) {
621 1.40 christos if (col + 2 >= termwidth) {
622 1.40 christos fprintf(outfile, "\\\n");
623 1.40 christos col = 0;
624 1.40 christos }
625 1.40 christos fprintf(outfile, "\\%c", "\\abfrtv"[p - escapes]);
626 1.40 christos col += 2;
627 1.1 alm } else {
628 1.40 christos if (col + 4 * clen >= termwidth) {
629 1.40 christos fprintf(outfile, "\\\n");
630 1.40 christos col = 0;
631 1.1 alm }
632 1.40 christos for (i = 0; i < clen; i++)
633 1.40 christos fprintf(outfile, "\\%03o",
634 1.40 christos (int)(unsigned char)s[i]);
635 1.40 christos col += 4 * clen;
636 1.1 alm }
637 1.40 christos s += clen;
638 1.40 christos len -= clen;
639 1.1 alm }
640 1.40 christos if (col + 1 >= termwidth)
641 1.40 christos fprintf(outfile, "\\\n");
642 1.40 christos (void)fputc('$', outfile);
643 1.40 christos (void)fputc('\n', outfile);
644 1.40 christos if (ferror(outfile))
645 1.40 christos errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
646 1.1 alm }
647 1.1 alm
648 1.40 christos static __inline int
649 1.40 christos regexec_e(regex_t *preg, const char *string, int eflags, int nomatch,
650 1.40 christos size_t slen)
651 1.1 alm {
652 1.1 alm int eval;
653 1.44 christos #ifndef REG_STARTEND
654 1.44 christos char *buf;
655 1.44 christos #endif
656 1.40 christos
657 1.1 alm if (preg == NULL) {
658 1.1 alm if (defpreg == NULL)
659 1.40 christos errx(1, "first RE may not be empty");
660 1.1 alm } else
661 1.1 alm defpreg = preg;
662 1.1 alm
663 1.40 christos /* Set anchors */
664 1.44 christos #ifndef REG_STARTEND
665 1.44 christos buf = xmalloc(slen + 1);
666 1.44 christos (void)memcpy(buf, string, slen);
667 1.44 christos buf[slen] = '\0';
668 1.44 christos eval = regexec(defpreg, buf,
669 1.44 christos nomatch ? 0 : maxnsub + 1, match, eflags);
670 1.44 christos free(buf);
671 1.44 christos #else
672 1.8 cgd match[0].rm_so = 0;
673 1.40 christos match[0].rm_eo = (regoff_t)slen;
674 1.1 alm eval = regexec(defpreg, string,
675 1.8 cgd nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
676 1.44 christos #endif
677 1.1 alm switch(eval) {
678 1.1 alm case 0:
679 1.1 alm return (1);
680 1.1 alm case REG_NOMATCH:
681 1.1 alm return (0);
682 1.1 alm }
683 1.40 christos errx(1, "RE error: %s", strregerror(eval, defpreg));
684 1.1 alm /* NOTREACHED */
685 1.1 alm }
686 1.1 alm
687 1.1 alm /*
688 1.1 alm * regsub - perform substitutions after a regexp match
689 1.1 alm * Based on a routine by Henry Spencer
690 1.1 alm */
691 1.1 alm static void
692 1.32 wiz regsub(SPACE *sp, char *string, char *src)
693 1.1 alm {
694 1.40 christos size_t len;
695 1.40 christos int no;
696 1.20 lukem char c, *dst;
697 1.1 alm
698 1.1 alm #define NEEDSP(reqlen) \
699 1.40 christos /* XXX What is the +1 for? */ \
700 1.34 itojun if (sp->len + (reqlen) + 1 >= sp->blen) { \
701 1.40 christos sp->blen += (reqlen) + 1024; \
702 1.40 christos sp->space = sp->back = xrealloc(sp->back, sp->blen); \
703 1.1 alm dst = sp->space + sp->len; \
704 1.1 alm }
705 1.1 alm
706 1.1 alm dst = sp->space + sp->len;
707 1.1 alm while ((c = *src++) != '\0') {
708 1.1 alm if (c == '&')
709 1.1 alm no = 0;
710 1.24 christos else if (c == '\\' && isdigit((unsigned char)*src))
711 1.1 alm no = *src++ - '0';
712 1.1 alm else
713 1.1 alm no = -1;
714 1.1 alm if (no < 0) { /* Ordinary character. */
715 1.40 christos if (c == '\\' && (*src == '\\' || *src == '&'))
716 1.40 christos c = *src++;
717 1.1 alm NEEDSP(1);
718 1.40 christos *dst++ = c;
719 1.1 alm ++sp->len;
720 1.40 christos } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
721 1.40 christos len = (size_t)(match[no].rm_eo - match[no].rm_so);
722 1.1 alm NEEDSP(len);
723 1.1 alm memmove(dst, string + match[no].rm_so, len);
724 1.1 alm dst += len;
725 1.1 alm sp->len += len;
726 1.1 alm }
727 1.1 alm }
728 1.1 alm NEEDSP(1);
729 1.1 alm *dst = '\0';
730 1.1 alm }
731 1.1 alm
732 1.1 alm /*
733 1.40 christos * cspace --
734 1.40 christos * Concatenate space: append the source space to the destination space,
735 1.40 christos * allocating new space as necessary.
736 1.1 alm */
737 1.1 alm void
738 1.38 lukem cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
739 1.1 alm {
740 1.1 alm size_t tlen;
741 1.1 alm
742 1.8 cgd /* Make sure SPACE has enough memory and ramp up quickly. */
743 1.8 cgd tlen = sp->len + len + 1;
744 1.1 alm if (tlen > sp->blen) {
745 1.40 christos sp->blen = tlen + 1024;
746 1.40 christos sp->space = sp->back = xrealloc(sp->back, sp->blen);
747 1.1 alm }
748 1.1 alm
749 1.8 cgd if (spflag == REPLACE)
750 1.1 alm sp->len = 0;
751 1.1 alm
752 1.1 alm memmove(sp->space + sp->len, p, len);
753 1.8 cgd
754 1.1 alm sp->space[sp->len += len] = '\0';
755 1.1 alm }
756 1.1 alm
757 1.1 alm /*
758 1.1 alm * Close all cached opened files and report any errors
759 1.1 alm */
760 1.1 alm void
761 1.32 wiz cfclose(struct s_command *cp, struct s_command *end)
762 1.1 alm {
763 1.1 alm
764 1.1 alm for (; cp != end; cp = cp->next)
765 1.1 alm switch(cp->code) {
766 1.1 alm case 's':
767 1.1 alm if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
768 1.40 christos err(1, "%s", cp->u.s->wfile);
769 1.1 alm cp->u.s->wfd = -1;
770 1.1 alm break;
771 1.1 alm case 'w':
772 1.1 alm if (cp->u.fd != -1 && close(cp->u.fd))
773 1.40 christos err(1, "%s", cp->t);
774 1.1 alm cp->u.fd = -1;
775 1.1 alm break;
776 1.1 alm case '{':
777 1.1 alm cfclose(cp->u.c, cp->next);
778 1.1 alm break;
779 1.1 alm }
780 1.1 alm }
781