process.c revision 1.47 1 1.47 asau /* $NetBSD: process.c,v 1.47 2015/02/28 21:56:53 asau Exp $ */
2 1.18 tls
3 1.1 alm /*-
4 1.40 christos * Copyright (c) 1992 Diomidis Spinellis.
5 1.19 mrg * Copyright (c) 1992, 1993, 1994
6 1.8 cgd * The Regents of the University of California. All rights reserved.
7 1.1 alm *
8 1.1 alm * This code is derived from software contributed to Berkeley by
9 1.1 alm * Diomidis Spinellis of Imperial College, University of London.
10 1.1 alm *
11 1.1 alm * Redistribution and use in source and binary forms, with or without
12 1.1 alm * modification, are permitted provided that the following conditions
13 1.1 alm * are met:
14 1.1 alm * 1. Redistributions of source code must retain the above copyright
15 1.1 alm * notice, this list of conditions and the following disclaimer.
16 1.1 alm * 2. Redistributions in binary form must reproduce the above copyright
17 1.1 alm * notice, this list of conditions and the following disclaimer in the
18 1.1 alm * documentation and/or other materials provided with the distribution.
19 1.33 agc * 3. Neither the name of the University nor the names of its contributors
20 1.33 agc * may be used to endorse or promote products derived from this software
21 1.33 agc * without specific prior written permission.
22 1.33 agc *
23 1.33 agc * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 1.33 agc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 1.33 agc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 1.33 agc * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 1.33 agc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 1.33 agc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 1.33 agc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 1.33 agc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 1.33 agc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 1.33 agc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 1.33 agc * SUCH DAMAGE.
34 1.33 agc */
35 1.33 agc
36 1.37 gdamore #if HAVE_NBTOOL_CONFIG_H
37 1.37 gdamore #include "nbtool_config.h"
38 1.37 gdamore #endif
39 1.37 gdamore
40 1.20 lukem #include <sys/cdefs.h>
41 1.47 asau __RCSID("$NetBSD: process.c,v 1.47 2015/02/28 21:56:53 asau Exp $");
42 1.40 christos #ifdef __FBSDID
43 1.40 christos __FBSDID("$FreeBSD: head/usr.bin/sed/process.c 192732 2009-05-25 06:45:33Z brian $");
44 1.40 christos #endif
45 1.40 christos
46 1.45 christos #if 0
47 1.45 christos static const char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94";
48 1.45 christos #endif
49 1.45 christos
50 1.1 alm #include <sys/types.h>
51 1.1 alm #include <sys/stat.h>
52 1.1 alm #include <sys/ioctl.h>
53 1.1 alm #include <sys/uio.h>
54 1.1 alm
55 1.1 alm #include <ctype.h>
56 1.40 christos #include <err.h>
57 1.1 alm #include <errno.h>
58 1.1 alm #include <fcntl.h>
59 1.47 asau #include <libgen.h>
60 1.1 alm #include <limits.h>
61 1.1 alm #include <regex.h>
62 1.1 alm #include <stdio.h>
63 1.1 alm #include <stdlib.h>
64 1.1 alm #include <string.h>
65 1.1 alm #include <unistd.h>
66 1.40 christos #include <wchar.h>
67 1.40 christos #include <wctype.h>
68 1.1 alm
69 1.1 alm #include "defs.h"
70 1.1 alm #include "extern.h"
71 1.1 alm
72 1.40 christos static SPACE HS, PS, SS, YS;
73 1.1 alm #define pd PS.deleted
74 1.1 alm #define ps PS.space
75 1.1 alm #define psl PS.len
76 1.1 alm #define hs HS.space
77 1.1 alm #define hsl HS.len
78 1.1 alm
79 1.47 asau static int mf_fgets(SPACE *, enum e_spflag);
80 1.47 asau static int lastline(void);
81 1.47 asau
82 1.40 christos static __inline int applies(struct s_command *);
83 1.40 christos static void do_tr(struct s_tr *);
84 1.32 wiz static void flush_appends(void);
85 1.40 christos static void lputs(char *, size_t);
86 1.40 christos static __inline int regexec_e(regex_t *, const char *, int, int, size_t);
87 1.32 wiz static void regsub(SPACE *, char *, char *);
88 1.32 wiz static int substitute(struct s_command *);
89 1.1 alm
90 1.47 asau /*
91 1.47 asau * Current file and line number; line numbers restart across compilation
92 1.47 asau * units, but span across input files. The latter is optional if editing
93 1.47 asau * in place.
94 1.47 asau */
95 1.47 asau static const char *fname; /* File name. */
96 1.47 asau static const char *outfname; /* Output file name */
97 1.47 asau static char oldfname[PATH_MAX]; /* Old file name (for in-place editing) */
98 1.47 asau static char tmpfname[PATH_MAX]; /* Temporary file name (for in-place editing) */
99 1.47 asau static const char *inplace; /* Inplace edit file extension. */
100 1.47 asau static u_long linenum;
101 1.47 asau
102 1.47 asau static int rval; /* Exit status */
103 1.47 asau
104 1.1 alm struct s_appends *appends; /* Array of pointers to strings to append. */
105 1.40 christos static size_t appendx; /* Index into appends array. */
106 1.40 christos size_t appendnum; /* Size of appends array. */
107 1.1 alm
108 1.1 alm static int lastaddr; /* Set by applies if last address of a range. */
109 1.1 alm static int sdone; /* If any substitutes since last line input. */
110 1.1 alm /* Iov structure for 'w' commands. */
111 1.1 alm static regex_t *defpreg;
112 1.1 alm size_t maxnsub;
113 1.1 alm regmatch_t *match;
114 1.1 alm
115 1.40 christos #define OUT() do {fwrite(ps, 1, psl, outfile); fputc('\n', outfile);} while (0)
116 1.8 cgd
117 1.47 asau int
118 1.32 wiz process(void)
119 1.1 alm {
120 1.1 alm struct s_command *cp;
121 1.1 alm SPACE tspace;
122 1.40 christos size_t oldpsl = 0;
123 1.15 mycroft char *p;
124 1.1 alm
125 1.40 christos p = NULL;
126 1.40 christos
127 1.1 alm for (linenum = 0; mf_fgets(&PS, REPLACE);) {
128 1.1 alm pd = 0;
129 1.16 mycroft top:
130 1.1 alm cp = prog;
131 1.1 alm redirect:
132 1.1 alm while (cp != NULL) {
133 1.1 alm if (!applies(cp)) {
134 1.1 alm cp = cp->next;
135 1.1 alm continue;
136 1.1 alm }
137 1.1 alm switch (cp->code) {
138 1.1 alm case '{':
139 1.1 alm cp = cp->u.c;
140 1.1 alm goto redirect;
141 1.1 alm case 'a':
142 1.40 christos if (appendx >= appendnum)
143 1.1 alm appends = xrealloc(appends,
144 1.1 alm sizeof(struct s_appends) *
145 1.40 christos (appendnum *= 2));
146 1.1 alm appends[appendx].type = AP_STRING;
147 1.1 alm appends[appendx].s = cp->t;
148 1.8 cgd appends[appendx].len = strlen(cp->t);
149 1.1 alm appendx++;
150 1.1 alm break;
151 1.1 alm case 'b':
152 1.1 alm cp = cp->u.c;
153 1.1 alm goto redirect;
154 1.1 alm case 'c':
155 1.1 alm pd = 1;
156 1.1 alm psl = 0;
157 1.40 christos if (cp->a2 == NULL || lastaddr || lastline())
158 1.40 christos (void)fprintf(outfile, "%s", cp->t);
159 1.39 uwe goto new;
160 1.1 alm case 'd':
161 1.1 alm pd = 1;
162 1.1 alm goto new;
163 1.1 alm case 'D':
164 1.1 alm if (pd)
165 1.1 alm goto new;
166 1.40 christos if (psl == 0 ||
167 1.40 christos (p = memchr(ps, '\n', psl - 1)) == NULL) {
168 1.1 alm pd = 1;
169 1.16 mycroft goto new;
170 1.28 atatat } else {
171 1.40 christos psl -= (size_t)((p + 1) - ps);
172 1.1 alm memmove(ps, p + 1, psl);
173 1.28 atatat goto top;
174 1.1 alm }
175 1.1 alm case 'g':
176 1.1 alm cspace(&PS, hs, hsl, REPLACE);
177 1.1 alm break;
178 1.1 alm case 'G':
179 1.43 christos cspace(&PS, "\n", 1, APPEND);
180 1.40 christos cspace(&PS, hs, hsl, APPEND);
181 1.1 alm break;
182 1.1 alm case 'h':
183 1.1 alm cspace(&HS, ps, psl, REPLACE);
184 1.1 alm break;
185 1.1 alm case 'H':
186 1.40 christos cspace(&HS, "\n", 1, APPEND);
187 1.40 christos cspace(&HS, ps, psl, APPEND);
188 1.1 alm break;
189 1.1 alm case 'i':
190 1.40 christos (void)fprintf(outfile, "%s", cp->t);
191 1.1 alm break;
192 1.1 alm case 'l':
193 1.40 christos lputs(ps, psl);
194 1.1 alm break;
195 1.1 alm case 'n':
196 1.1 alm if (!nflag && !pd)
197 1.40 christos OUT();
198 1.1 alm flush_appends();
199 1.14 mycroft if (!mf_fgets(&PS, REPLACE))
200 1.1 alm exit(0);
201 1.1 alm pd = 0;
202 1.1 alm break;
203 1.1 alm case 'N':
204 1.1 alm flush_appends();
205 1.40 christos cspace(&PS, "\n", 1, APPEND);
206 1.40 christos if (!mf_fgets(&PS, APPEND))
207 1.1 alm exit(0);
208 1.1 alm break;
209 1.1 alm case 'p':
210 1.1 alm if (pd)
211 1.1 alm break;
212 1.40 christos OUT();
213 1.1 alm break;
214 1.1 alm case 'P':
215 1.1 alm if (pd)
216 1.1 alm break;
217 1.15 mycroft if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
218 1.15 mycroft oldpsl = psl;
219 1.40 christos psl = (size_t)(p - ps);
220 1.1 alm }
221 1.40 christos OUT();
222 1.1 alm if (p != NULL)
223 1.15 mycroft psl = oldpsl;
224 1.1 alm break;
225 1.1 alm case 'q':
226 1.1 alm if (!nflag && !pd)
227 1.40 christos OUT();
228 1.1 alm flush_appends();
229 1.1 alm exit(0);
230 1.1 alm case 'r':
231 1.40 christos if (appendx >= appendnum)
232 1.1 alm appends = xrealloc(appends,
233 1.1 alm sizeof(struct s_appends) *
234 1.40 christos (appendnum *= 2));
235 1.1 alm appends[appendx].type = AP_FILE;
236 1.1 alm appends[appendx].s = cp->t;
237 1.8 cgd appends[appendx].len = strlen(cp->t);
238 1.1 alm appendx++;
239 1.1 alm break;
240 1.1 alm case 's':
241 1.1 alm sdone |= substitute(cp);
242 1.1 alm break;
243 1.1 alm case 't':
244 1.1 alm if (sdone) {
245 1.1 alm sdone = 0;
246 1.1 alm cp = cp->u.c;
247 1.1 alm goto redirect;
248 1.1 alm }
249 1.1 alm break;
250 1.1 alm case 'w':
251 1.1 alm if (pd)
252 1.1 alm break;
253 1.1 alm if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
254 1.1 alm O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
255 1.1 alm DEFFILEMODE)) == -1)
256 1.40 christos err(1, "%s", cp->t);
257 1.40 christos if (write(cp->u.fd, ps, psl) != (ssize_t)psl ||
258 1.40 christos write(cp->u.fd, "\n", 1) != 1)
259 1.40 christos err(1, "%s", cp->t);
260 1.1 alm break;
261 1.1 alm case 'x':
262 1.40 christos /*
263 1.40 christos * If the hold space is null, make it empty
264 1.40 christos * but not null. Otherwise the pattern space
265 1.40 christos * will become null after the swap, which is
266 1.40 christos * an abnormal condition.
267 1.40 christos */
268 1.8 cgd if (hs == NULL)
269 1.40 christos cspace(&HS, "", 0, REPLACE);
270 1.1 alm tspace = PS;
271 1.1 alm PS = HS;
272 1.1 alm HS = tspace;
273 1.1 alm break;
274 1.1 alm case 'y':
275 1.40 christos if (pd || psl == 0)
276 1.1 alm break;
277 1.40 christos do_tr(cp->u.y);
278 1.1 alm break;
279 1.1 alm case ':':
280 1.1 alm case '}':
281 1.1 alm break;
282 1.1 alm case '=':
283 1.40 christos (void)fprintf(outfile, "%lu\n", linenum);
284 1.1 alm }
285 1.1 alm cp = cp->next;
286 1.1 alm } /* for all cp */
287 1.1 alm
288 1.1 alm new: if (!nflag && !pd)
289 1.40 christos OUT();
290 1.1 alm flush_appends();
291 1.1 alm } /* for all lines */
292 1.47 asau return rval;
293 1.1 alm }
294 1.1 alm
295 1.1 alm /*
296 1.1 alm * TRUE if the address passed matches the current program state
297 1.1 alm * (lastline, linenumber, ps).
298 1.1 alm */
299 1.40 christos #define MATCH(a) \
300 1.40 christos ((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
301 1.40 christos (a)->type == AT_LINE ? linenum == (a)->u.l : lastline())
302 1.1 alm
303 1.1 alm /*
304 1.40 christos * Return TRUE if the command applies to the current line. Sets the start
305 1.40 christos * line for process ranges. Interprets the non-select (``!'') flag.
306 1.1 alm */
307 1.40 christos static __inline int
308 1.32 wiz applies(struct s_command *cp)
309 1.1 alm {
310 1.1 alm int r;
311 1.1 alm
312 1.1 alm lastaddr = 0;
313 1.1 alm if (cp->a1 == NULL && cp->a2 == NULL)
314 1.1 alm r = 1;
315 1.40 christos else if (cp->a2)
316 1.40 christos if (cp->startline > 0) {
317 1.46 christos switch (cp->a2->type) {
318 1.46 christos case AT_RELLINE:
319 1.46 christos if (linenum - cp->startline <= cp->a2->u.l)
320 1.46 christos r = 1;
321 1.46 christos else {
322 1.46 christos cp->startline = 0;
323 1.46 christos r = 0;
324 1.46 christos }
325 1.46 christos break;
326 1.46 christos default:
327 1.46 christos if (MATCH(cp->a2)) {
328 1.46 christos cp->startline = 0;
329 1.46 christos lastaddr = 1;
330 1.46 christos r = 1;
331 1.46 christos } else if (cp->a2->type == AT_LINE &&
332 1.46 christos linenum > cp->a2->u.l) {
333 1.46 christos /*
334 1.46 christos * We missed the 2nd address due to a
335 1.46 christos * branch, so just close the range and
336 1.46 christos * return false.
337 1.46 christos */
338 1.46 christos cp->startline = 0;
339 1.46 christos r = 0;
340 1.46 christos } else
341 1.46 christos r = 1;
342 1.46 christos }
343 1.36 christos } else if (cp->a1 && MATCH(cp->a1)) {
344 1.1 alm /*
345 1.1 alm * If the second address is a number less than or
346 1.1 alm * equal to the line number first selected, only
347 1.1 alm * one line shall be selected.
348 1.1 alm * -- POSIX 1003.2
349 1.40 christos * Likewise if the relative second line address is zero.
350 1.1 alm */
351 1.40 christos if ((cp->a2->type == AT_LINE &&
352 1.40 christos linenum >= cp->a2->u.l) ||
353 1.40 christos (cp->a2->type == AT_RELLINE && cp->a2->u.l == 0))
354 1.1 alm lastaddr = 1;
355 1.40 christos else {
356 1.40 christos cp->startline = linenum;
357 1.40 christos }
358 1.1 alm r = 1;
359 1.1 alm } else
360 1.1 alm r = 0;
361 1.40 christos else
362 1.1 alm r = MATCH(cp->a1);
363 1.1 alm return (cp->nonsel ? ! r : r);
364 1.1 alm }
365 1.1 alm
366 1.1 alm /*
367 1.40 christos * Reset the sed processor to its initial state.
368 1.40 christos */
369 1.40 christos void
370 1.40 christos resetstate(void)
371 1.40 christos {
372 1.40 christos struct s_command *cp;
373 1.40 christos
374 1.40 christos /*
375 1.40 christos * Reset all in-range markers.
376 1.40 christos */
377 1.40 christos for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
378 1.40 christos if (cp->a2)
379 1.40 christos cp->startline = 0;
380 1.40 christos
381 1.40 christos /*
382 1.40 christos * Clear out the hold space.
383 1.40 christos */
384 1.40 christos cspace(&HS, "", 0, REPLACE);
385 1.40 christos }
386 1.40 christos
387 1.40 christos /*
388 1.1 alm * substitute --
389 1.1 alm * Do substitutions in the pattern space. Currently, we build a
390 1.1 alm * copy of the new pattern space in the substitute space structure
391 1.1 alm * and then swap them.
392 1.1 alm */
393 1.1 alm static int
394 1.32 wiz substitute(struct s_command *cp)
395 1.1 alm {
396 1.1 alm SPACE tspace;
397 1.1 alm regex_t *re;
398 1.40 christos regoff_t re_off, slen;
399 1.19 mrg int lastempty, n;
400 1.1 alm char *s;
401 1.1 alm
402 1.1 alm s = ps;
403 1.1 alm re = cp->u.s->re;
404 1.1 alm if (re == NULL) {
405 1.40 christos if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
406 1.1 alm linenum = cp->u.s->linenum;
407 1.40 christos errx(1, "%lu: %s: \\%u not defined in the RE",
408 1.40 christos linenum, fname, cp->u.s->maxbref);
409 1.1 alm }
410 1.1 alm }
411 1.8 cgd if (!regexec_e(re, s, 0, 0, psl))
412 1.1 alm return (0);
413 1.1 alm
414 1.1 alm SS.len = 0; /* Clean substitute space. */
415 1.40 christos slen = (regoff_t)psl;
416 1.1 alm n = cp->u.s->n;
417 1.12 cgd lastempty = 1;
418 1.12 cgd
419 1.1 alm switch (n) {
420 1.1 alm case 0: /* Global */
421 1.1 alm do {
422 1.12 cgd if (lastempty || match[0].rm_so != match[0].rm_eo) {
423 1.12 cgd /* Locate start of replaced string. */
424 1.12 cgd re_off = match[0].rm_so;
425 1.12 cgd /* Copy leading retained string. */
426 1.40 christos cspace(&SS, s, (size_t)re_off, APPEND);
427 1.12 cgd /* Add in regular expression. */
428 1.12 cgd regsub(&SS, s, cp->u.s->new);
429 1.12 cgd }
430 1.12 cgd
431 1.1 alm /* Move past this match. */
432 1.12 cgd if (match[0].rm_so != match[0].rm_eo) {
433 1.12 cgd s += match[0].rm_eo;
434 1.12 cgd slen -= match[0].rm_eo;
435 1.12 cgd lastempty = 0;
436 1.12 cgd } else {
437 1.40 christos if (match[0].rm_so < slen)
438 1.40 christos cspace(&SS, s + match[0].rm_so, 1,
439 1.40 christos APPEND);
440 1.12 cgd s += match[0].rm_so + 1;
441 1.12 cgd slen -= match[0].rm_so + 1;
442 1.12 cgd lastempty = 1;
443 1.12 cgd }
444 1.40 christos } while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, (size_t)slen));
445 1.1 alm /* Copy trailing retained string. */
446 1.12 cgd if (slen > 0)
447 1.40 christos cspace(&SS, s, (size_t)slen, APPEND);
448 1.1 alm break;
449 1.1 alm default: /* Nth occurrence */
450 1.1 alm while (--n) {
451 1.40 christos if (match[0].rm_eo == match[0].rm_so)
452 1.40 christos match[0].rm_eo = match[0].rm_so + 1;
453 1.1 alm s += match[0].rm_eo;
454 1.8 cgd slen -= match[0].rm_eo;
455 1.40 christos if (slen < 0)
456 1.40 christos return (0);
457 1.40 christos if (!regexec_e(re, s, REG_NOTBOL, 0, (size_t)slen))
458 1.1 alm return (0);
459 1.1 alm }
460 1.1 alm /* FALLTHROUGH */
461 1.1 alm case 1: /* 1st occurrence */
462 1.1 alm /* Locate start of replaced string. */
463 1.1 alm re_off = match[0].rm_so + (s - ps);
464 1.1 alm /* Copy leading retained string. */
465 1.40 christos cspace(&SS, ps, (size_t)re_off, APPEND);
466 1.1 alm /* Add in regular expression. */
467 1.1 alm regsub(&SS, s, cp->u.s->new);
468 1.1 alm /* Copy trailing retained string. */
469 1.1 alm s += match[0].rm_eo;
470 1.8 cgd slen -= match[0].rm_eo;
471 1.40 christos cspace(&SS, s, (size_t)slen, APPEND);
472 1.1 alm break;
473 1.1 alm }
474 1.1 alm
475 1.1 alm /*
476 1.1 alm * Swap the substitute space and the pattern space, and make sure
477 1.1 alm * that any leftover pointers into stdio memory get lost.
478 1.1 alm */
479 1.1 alm tspace = PS;
480 1.1 alm PS = SS;
481 1.1 alm SS = tspace;
482 1.1 alm SS.space = SS.back;
483 1.1 alm
484 1.1 alm /* Handle the 'p' flag. */
485 1.1 alm if (cp->u.s->p)
486 1.40 christos OUT();
487 1.1 alm
488 1.1 alm /* Handle the 'w' flag. */
489 1.1 alm if (cp->u.s->wfile && !pd) {
490 1.1 alm if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
491 1.1 alm O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
492 1.40 christos err(1, "%s", cp->u.s->wfile);
493 1.40 christos if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl ||
494 1.40 christos write(cp->u.s->wfd, "\n", 1) != 1)
495 1.40 christos err(1, "%s", cp->u.s->wfile);
496 1.1 alm }
497 1.1 alm return (1);
498 1.1 alm }
499 1.1 alm
500 1.1 alm /*
501 1.40 christos * do_tr --
502 1.40 christos * Perform translation ('y' command) in the pattern space.
503 1.40 christos */
504 1.40 christos static void
505 1.40 christos do_tr(struct s_tr *y)
506 1.40 christos {
507 1.40 christos SPACE tmp;
508 1.40 christos char c, *p;
509 1.40 christos size_t clen, left;
510 1.40 christos size_t i;
511 1.40 christos
512 1.40 christos if (MB_CUR_MAX == 1) {
513 1.40 christos /*
514 1.40 christos * Single-byte encoding: perform in-place translation
515 1.40 christos * of the pattern space.
516 1.40 christos */
517 1.40 christos for (p = ps; p < &ps[psl]; p++)
518 1.40 christos *p = (char)y->bytetab[(u_char)*p];
519 1.40 christos } else {
520 1.40 christos /*
521 1.40 christos * Multi-byte encoding: perform translation into the
522 1.40 christos * translation space, then swap the translation and
523 1.40 christos * pattern spaces.
524 1.40 christos */
525 1.40 christos /* Clean translation space. */
526 1.40 christos YS.len = 0;
527 1.40 christos for (p = ps, left = psl; left > 0; p += clen, left -= clen) {
528 1.40 christos if ((c = (char)y->bytetab[(u_char)*p]) != '\0') {
529 1.40 christos cspace(&YS, &c, 1, APPEND);
530 1.40 christos clen = 1;
531 1.40 christos continue;
532 1.40 christos }
533 1.40 christos for (i = 0; i < y->nmultis; i++)
534 1.40 christos if (left >= y->multis[i].fromlen &&
535 1.40 christos memcmp(p, y->multis[i].from,
536 1.40 christos y->multis[i].fromlen) == 0)
537 1.40 christos break;
538 1.40 christos if (i < y->nmultis) {
539 1.40 christos cspace(&YS, y->multis[i].to,
540 1.40 christos y->multis[i].tolen, APPEND);
541 1.40 christos clen = y->multis[i].fromlen;
542 1.40 christos } else {
543 1.40 christos cspace(&YS, p, 1, APPEND);
544 1.40 christos clen = 1;
545 1.40 christos }
546 1.40 christos }
547 1.40 christos /* Swap the translation space and the pattern space. */
548 1.40 christos tmp = PS;
549 1.40 christos PS = YS;
550 1.40 christos YS = tmp;
551 1.40 christos YS.space = YS.back;
552 1.40 christos }
553 1.40 christos }
554 1.40 christos
555 1.40 christos /*
556 1.1 alm * Flush append requests. Always called before reading a line,
557 1.1 alm * therefore it also resets the substitution done (sdone) flag.
558 1.1 alm */
559 1.1 alm static void
560 1.32 wiz flush_appends(void)
561 1.1 alm {
562 1.1 alm FILE *f;
563 1.40 christos size_t count, i;
564 1.1 alm char buf[8 * 1024];
565 1.1 alm
566 1.40 christos for (i = 0; i < appendx; i++)
567 1.1 alm switch (appends[i].type) {
568 1.1 alm case AP_STRING:
569 1.40 christos fwrite(appends[i].s, sizeof(char), appends[i].len,
570 1.40 christos outfile);
571 1.1 alm break;
572 1.1 alm case AP_FILE:
573 1.1 alm /*
574 1.1 alm * Read files probably shouldn't be cached. Since
575 1.1 alm * it's not an error to read a non-existent file,
576 1.1 alm * it's possible that another program is interacting
577 1.40 christos * with the sed script through the filesystem. It
578 1.1 alm * would be truly bizarre, but possible. It's probably
579 1.1 alm * not that big a performance win, anyhow.
580 1.1 alm */
581 1.1 alm if ((f = fopen(appends[i].s, "r")) == NULL)
582 1.1 alm break;
583 1.40 christos while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
584 1.40 christos (void)fwrite(buf, sizeof(char), count, outfile);
585 1.1 alm (void)fclose(f);
586 1.1 alm break;
587 1.1 alm }
588 1.40 christos if (ferror(outfile))
589 1.40 christos errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
590 1.40 christos appendx = 0;
591 1.40 christos sdone = 0;
592 1.1 alm }
593 1.1 alm
594 1.1 alm static void
595 1.40 christos lputs(char *s, size_t len)
596 1.1 alm {
597 1.40 christos static const char escapes[] = "\\\a\b\f\r\t\v";
598 1.40 christos int c;
599 1.40 christos size_t col, width;
600 1.40 christos const char *p;
601 1.40 christos #ifdef TIOCGWINSZ
602 1.1 alm struct winsize win;
603 1.37 gdamore #endif
604 1.40 christos static size_t termwidth = (size_t)-1;
605 1.40 christos size_t clen, i;
606 1.40 christos wchar_t wc;
607 1.40 christos mbstate_t mbs;
608 1.40 christos
609 1.40 christos if (outfile != stdout)
610 1.40 christos termwidth = 60;
611 1.40 christos if (termwidth == (size_t)-1) {
612 1.40 christos if ((p = getenv("COLUMNS")) && *p != '\0')
613 1.40 christos termwidth = (size_t)atoi(p);
614 1.40 christos #ifdef TIOCGWINSZ
615 1.1 alm else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
616 1.1 alm win.ws_col > 0)
617 1.1 alm termwidth = win.ws_col;
618 1.37 gdamore #endif
619 1.1 alm else
620 1.1 alm termwidth = 60;
621 1.23 ross }
622 1.40 christos if (termwidth == 0)
623 1.40 christos termwidth = 1;
624 1.40 christos
625 1.40 christos memset(&mbs, 0, sizeof(mbs));
626 1.40 christos col = 0;
627 1.40 christos while (len != 0) {
628 1.40 christos clen = mbrtowc(&wc, s, len, &mbs);
629 1.40 christos if (clen == 0)
630 1.40 christos clen = 1;
631 1.40 christos if (clen == (size_t)-1 || clen == (size_t)-2) {
632 1.40 christos wc = (unsigned char)*s;
633 1.40 christos clen = 1;
634 1.40 christos memset(&mbs, 0, sizeof(mbs));
635 1.1 alm }
636 1.40 christos if (wc == '\n') {
637 1.40 christos if (col + 1 >= termwidth)
638 1.40 christos fprintf(outfile, "\\\n");
639 1.40 christos fputc('$', outfile);
640 1.40 christos fputc('\n', outfile);
641 1.40 christos col = 0;
642 1.40 christos } else if (iswprint(wc)) {
643 1.40 christos width = (size_t)wcwidth(wc);
644 1.40 christos if (col + width >= termwidth) {
645 1.40 christos fprintf(outfile, "\\\n");
646 1.40 christos col = 0;
647 1.40 christos }
648 1.40 christos fwrite(s, 1, clen, outfile);
649 1.40 christos col += width;
650 1.40 christos } else if (wc != L'\0' && (c = wctob(wc)) != EOF &&
651 1.40 christos (p = strchr(escapes, c)) != NULL) {
652 1.40 christos if (col + 2 >= termwidth) {
653 1.40 christos fprintf(outfile, "\\\n");
654 1.40 christos col = 0;
655 1.40 christos }
656 1.40 christos fprintf(outfile, "\\%c", "\\abfrtv"[p - escapes]);
657 1.40 christos col += 2;
658 1.1 alm } else {
659 1.40 christos if (col + 4 * clen >= termwidth) {
660 1.40 christos fprintf(outfile, "\\\n");
661 1.40 christos col = 0;
662 1.1 alm }
663 1.40 christos for (i = 0; i < clen; i++)
664 1.40 christos fprintf(outfile, "\\%03o",
665 1.40 christos (int)(unsigned char)s[i]);
666 1.40 christos col += 4 * clen;
667 1.1 alm }
668 1.40 christos s += clen;
669 1.40 christos len -= clen;
670 1.1 alm }
671 1.40 christos if (col + 1 >= termwidth)
672 1.40 christos fprintf(outfile, "\\\n");
673 1.40 christos (void)fputc('$', outfile);
674 1.40 christos (void)fputc('\n', outfile);
675 1.40 christos if (ferror(outfile))
676 1.40 christos errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
677 1.1 alm }
678 1.1 alm
679 1.40 christos static __inline int
680 1.40 christos regexec_e(regex_t *preg, const char *string, int eflags, int nomatch,
681 1.40 christos size_t slen)
682 1.1 alm {
683 1.1 alm int eval;
684 1.44 christos #ifndef REG_STARTEND
685 1.44 christos char *buf;
686 1.44 christos #endif
687 1.40 christos
688 1.1 alm if (preg == NULL) {
689 1.1 alm if (defpreg == NULL)
690 1.40 christos errx(1, "first RE may not be empty");
691 1.1 alm } else
692 1.1 alm defpreg = preg;
693 1.1 alm
694 1.40 christos /* Set anchors */
695 1.44 christos #ifndef REG_STARTEND
696 1.44 christos buf = xmalloc(slen + 1);
697 1.44 christos (void)memcpy(buf, string, slen);
698 1.44 christos buf[slen] = '\0';
699 1.44 christos eval = regexec(defpreg, buf,
700 1.44 christos nomatch ? 0 : maxnsub + 1, match, eflags);
701 1.44 christos free(buf);
702 1.44 christos #else
703 1.8 cgd match[0].rm_so = 0;
704 1.40 christos match[0].rm_eo = (regoff_t)slen;
705 1.1 alm eval = regexec(defpreg, string,
706 1.8 cgd nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
707 1.44 christos #endif
708 1.1 alm switch(eval) {
709 1.1 alm case 0:
710 1.1 alm return (1);
711 1.1 alm case REG_NOMATCH:
712 1.1 alm return (0);
713 1.1 alm }
714 1.40 christos errx(1, "RE error: %s", strregerror(eval, defpreg));
715 1.1 alm /* NOTREACHED */
716 1.1 alm }
717 1.1 alm
718 1.1 alm /*
719 1.1 alm * regsub - perform substitutions after a regexp match
720 1.1 alm * Based on a routine by Henry Spencer
721 1.1 alm */
722 1.1 alm static void
723 1.32 wiz regsub(SPACE *sp, char *string, char *src)
724 1.1 alm {
725 1.40 christos size_t len;
726 1.40 christos int no;
727 1.20 lukem char c, *dst;
728 1.1 alm
729 1.1 alm #define NEEDSP(reqlen) \
730 1.40 christos /* XXX What is the +1 for? */ \
731 1.34 itojun if (sp->len + (reqlen) + 1 >= sp->blen) { \
732 1.40 christos sp->blen += (reqlen) + 1024; \
733 1.40 christos sp->space = sp->back = xrealloc(sp->back, sp->blen); \
734 1.1 alm dst = sp->space + sp->len; \
735 1.1 alm }
736 1.1 alm
737 1.1 alm dst = sp->space + sp->len;
738 1.1 alm while ((c = *src++) != '\0') {
739 1.1 alm if (c == '&')
740 1.1 alm no = 0;
741 1.24 christos else if (c == '\\' && isdigit((unsigned char)*src))
742 1.1 alm no = *src++ - '0';
743 1.1 alm else
744 1.1 alm no = -1;
745 1.1 alm if (no < 0) { /* Ordinary character. */
746 1.40 christos if (c == '\\' && (*src == '\\' || *src == '&'))
747 1.40 christos c = *src++;
748 1.1 alm NEEDSP(1);
749 1.40 christos *dst++ = c;
750 1.1 alm ++sp->len;
751 1.40 christos } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
752 1.40 christos len = (size_t)(match[no].rm_eo - match[no].rm_so);
753 1.1 alm NEEDSP(len);
754 1.1 alm memmove(dst, string + match[no].rm_so, len);
755 1.1 alm dst += len;
756 1.1 alm sp->len += len;
757 1.1 alm }
758 1.1 alm }
759 1.1 alm NEEDSP(1);
760 1.1 alm *dst = '\0';
761 1.1 alm }
762 1.1 alm
763 1.1 alm /*
764 1.40 christos * cspace --
765 1.40 christos * Concatenate space: append the source space to the destination space,
766 1.40 christos * allocating new space as necessary.
767 1.1 alm */
768 1.1 alm void
769 1.38 lukem cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
770 1.1 alm {
771 1.1 alm size_t tlen;
772 1.1 alm
773 1.8 cgd /* Make sure SPACE has enough memory and ramp up quickly. */
774 1.8 cgd tlen = sp->len + len + 1;
775 1.1 alm if (tlen > sp->blen) {
776 1.40 christos sp->blen = tlen + 1024;
777 1.40 christos sp->space = sp->back = xrealloc(sp->back, sp->blen);
778 1.1 alm }
779 1.1 alm
780 1.8 cgd if (spflag == REPLACE)
781 1.1 alm sp->len = 0;
782 1.1 alm
783 1.1 alm memmove(sp->space + sp->len, p, len);
784 1.8 cgd
785 1.1 alm sp->space[sp->len += len] = '\0';
786 1.1 alm }
787 1.1 alm
788 1.1 alm /*
789 1.1 alm * Close all cached opened files and report any errors
790 1.1 alm */
791 1.1 alm void
792 1.32 wiz cfclose(struct s_command *cp, struct s_command *end)
793 1.1 alm {
794 1.1 alm
795 1.1 alm for (; cp != end; cp = cp->next)
796 1.1 alm switch(cp->code) {
797 1.1 alm case 's':
798 1.1 alm if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
799 1.40 christos err(1, "%s", cp->u.s->wfile);
800 1.1 alm cp->u.s->wfd = -1;
801 1.1 alm break;
802 1.1 alm case 'w':
803 1.1 alm if (cp->u.fd != -1 && close(cp->u.fd))
804 1.40 christos err(1, "%s", cp->t);
805 1.1 alm cp->u.fd = -1;
806 1.1 alm break;
807 1.1 alm case '{':
808 1.1 alm cfclose(cp->u.c, cp->next);
809 1.1 alm break;
810 1.1 alm }
811 1.1 alm }
812 1.47 asau
813 1.47 asau /*
814 1.47 asau * Like fgets, but go through the list of files chaining them together.
815 1.47 asau * Set len to the length of the line.
816 1.47 asau */
817 1.47 asau int
818 1.47 asau mf_fgets(SPACE *sp, enum e_spflag spflag)
819 1.47 asau {
820 1.47 asau struct stat sb;
821 1.47 asau size_t len;
822 1.47 asau static char *p = NULL;
823 1.47 asau static size_t plen = 0;
824 1.47 asau int c;
825 1.47 asau static int firstfile;
826 1.47 asau
827 1.47 asau if (infile == NULL) {
828 1.47 asau /* stdin? */
829 1.47 asau if (files->fname == NULL) {
830 1.47 asau if (inplace != NULL)
831 1.47 asau errx(1, "-I or -i may not be used with stdin");
832 1.47 asau infile = stdin;
833 1.47 asau fname = "stdin";
834 1.47 asau outfile = stdout;
835 1.47 asau outfname = "stdout";
836 1.47 asau }
837 1.47 asau firstfile = 1;
838 1.47 asau }
839 1.47 asau
840 1.47 asau for (;;) {
841 1.47 asau if (infile != NULL && (c = getc(infile)) != EOF) {
842 1.47 asau (void)ungetc(c, infile);
843 1.47 asau break;
844 1.47 asau }
845 1.47 asau /* If we are here then either eof or no files are open yet */
846 1.47 asau if (infile == stdin) {
847 1.47 asau sp->len = 0;
848 1.47 asau return (0);
849 1.47 asau }
850 1.47 asau if (infile != NULL) {
851 1.47 asau fclose(infile);
852 1.47 asau if (*oldfname != '\0') {
853 1.47 asau /* if there was a backup file, remove it */
854 1.47 asau unlink(oldfname);
855 1.47 asau /*
856 1.47 asau * Backup the original. Note that hard links
857 1.47 asau * are not supported on all filesystems.
858 1.47 asau */
859 1.47 asau if ((link(fname, oldfname) != 0) &&
860 1.47 asau (rename(fname, oldfname) != 0)) {
861 1.47 asau warn("rename()");
862 1.47 asau if (*tmpfname)
863 1.47 asau unlink(tmpfname);
864 1.47 asau exit(1);
865 1.47 asau }
866 1.47 asau *oldfname = '\0';
867 1.47 asau }
868 1.47 asau if (*tmpfname != '\0') {
869 1.47 asau if (outfile != NULL && outfile != stdout)
870 1.47 asau if (fclose(outfile) != 0) {
871 1.47 asau warn("fclose()");
872 1.47 asau unlink(tmpfname);
873 1.47 asau exit(1);
874 1.47 asau }
875 1.47 asau outfile = NULL;
876 1.47 asau if (rename(tmpfname, fname) != 0) {
877 1.47 asau /* this should not happen really! */
878 1.47 asau warn("rename()");
879 1.47 asau unlink(tmpfname);
880 1.47 asau exit(1);
881 1.47 asau }
882 1.47 asau *tmpfname = '\0';
883 1.47 asau }
884 1.47 asau outfname = NULL;
885 1.47 asau }
886 1.47 asau if (firstfile == 0)
887 1.47 asau files = files->next;
888 1.47 asau else
889 1.47 asau firstfile = 0;
890 1.47 asau if (files == NULL) {
891 1.47 asau sp->len = 0;
892 1.47 asau return (0);
893 1.47 asau }
894 1.47 asau fname = files->fname;
895 1.47 asau if (inplace != NULL) {
896 1.47 asau if (lstat(fname, &sb) != 0)
897 1.47 asau err(1, "%s", fname);
898 1.47 asau if (!(sb.st_mode & S_IFREG))
899 1.47 asau errx(1, "%s: %s %s", fname,
900 1.47 asau "in-place editing only",
901 1.47 asau "works for regular files");
902 1.47 asau if (*inplace != '\0') {
903 1.47 asau strlcpy(oldfname, fname,
904 1.47 asau sizeof(oldfname));
905 1.47 asau len = strlcat(oldfname, inplace,
906 1.47 asau sizeof(oldfname));
907 1.47 asau if (len > sizeof(oldfname))
908 1.47 asau errx(1, "%s: name too long", fname);
909 1.47 asau }
910 1.47 asau char d_name[PATH_MAX], f_name[PATH_MAX];
911 1.47 asau (void)strlcpy(d_name, fname, sizeof(d_name));
912 1.47 asau (void)strlcpy(f_name, fname, sizeof(f_name));
913 1.47 asau len = (size_t)snprintf(tmpfname, sizeof(tmpfname),
914 1.47 asau "%s/.!%ld!%s", dirname(d_name), (long)getpid(),
915 1.47 asau basename(f_name));
916 1.47 asau if (len >= sizeof(tmpfname))
917 1.47 asau errx(1, "%s: name too long", fname);
918 1.47 asau unlink(tmpfname);
919 1.47 asau if (outfile != NULL && outfile != stdout)
920 1.47 asau fclose(outfile);
921 1.47 asau if ((outfile = fopen(tmpfname, "w")) == NULL)
922 1.47 asau err(1, "%s", fname);
923 1.47 asau fchown(fileno(outfile), sb.st_uid, sb.st_gid);
924 1.47 asau fchmod(fileno(outfile), sb.st_mode & ALLPERMS);
925 1.47 asau outfname = tmpfname;
926 1.47 asau if (!ispan) {
927 1.47 asau linenum = 0;
928 1.47 asau resetstate();
929 1.47 asau }
930 1.47 asau } else {
931 1.47 asau outfile = stdout;
932 1.47 asau outfname = "stdout";
933 1.47 asau }
934 1.47 asau if ((infile = fopen(fname, "r")) == NULL) {
935 1.47 asau warn("%s", fname);
936 1.47 asau rval = 1;
937 1.47 asau continue;
938 1.47 asau }
939 1.47 asau }
940 1.47 asau /*
941 1.47 asau * We are here only when infile is open and we still have something
942 1.47 asau * to read from it.
943 1.47 asau *
944 1.47 asau * Use getline() so that we can handle essentially infinite input
945 1.47 asau * data. The p and plen are static so each invocation gives
946 1.47 asau * getline() the same buffer which is expanded as needed.
947 1.47 asau */
948 1.47 asau ssize_t slen = getline(&p, &plen, infile);
949 1.47 asau if (slen == -1)
950 1.47 asau err(1, "%s", fname);
951 1.47 asau if (slen != 0 && p[slen - 1] == '\n')
952 1.47 asau slen--;
953 1.47 asau cspace(sp, p, (size_t)slen, spflag);
954 1.47 asau
955 1.47 asau linenum++;
956 1.47 asau
957 1.47 asau return (1);
958 1.47 asau }
959 1.47 asau
960 1.47 asau static int
961 1.47 asau lastline(void)
962 1.47 asau {
963 1.47 asau int ch;
964 1.47 asau
965 1.47 asau if (files->next != NULL && (inplace == NULL || ispan))
966 1.47 asau return (0);
967 1.47 asau if ((ch = getc(infile)) == EOF)
968 1.47 asau return (1);
969 1.47 asau ungetc(ch, infile);
970 1.47 asau return (0);
971 1.47 asau }
972