1 1.54 kre /* $NetBSD: process.c,v 1.54 2024/09/17 13:34:08 kre Exp $ */ 2 1.18 tls 3 1.1 alm /*- 4 1.40 christos * Copyright (c) 1992 Diomidis Spinellis. 5 1.19 mrg * Copyright (c) 1992, 1993, 1994 6 1.8 cgd * The Regents of the University of California. All rights reserved. 7 1.1 alm * 8 1.1 alm * This code is derived from software contributed to Berkeley by 9 1.1 alm * Diomidis Spinellis of Imperial College, University of London. 10 1.1 alm * 11 1.1 alm * Redistribution and use in source and binary forms, with or without 12 1.1 alm * modification, are permitted provided that the following conditions 13 1.1 alm * are met: 14 1.1 alm * 1. Redistributions of source code must retain the above copyright 15 1.1 alm * notice, this list of conditions and the following disclaimer. 16 1.1 alm * 2. Redistributions in binary form must reproduce the above copyright 17 1.1 alm * notice, this list of conditions and the following disclaimer in the 18 1.1 alm * documentation and/or other materials provided with the distribution. 19 1.33 agc * 3. Neither the name of the University nor the names of its contributors 20 1.33 agc * may be used to endorse or promote products derived from this software 21 1.33 agc * without specific prior written permission. 22 1.33 agc * 23 1.33 agc * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 1.33 agc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 1.33 agc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 1.33 agc * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 1.33 agc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 1.33 agc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 1.33 agc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 1.33 agc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 1.33 agc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 1.33 agc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 1.33 agc * SUCH DAMAGE. 34 1.33 agc */ 35 1.33 agc 36 1.37 gdamore #if HAVE_NBTOOL_CONFIG_H 37 1.37 gdamore #include "nbtool_config.h" 38 1.37 gdamore #endif 39 1.37 gdamore 40 1.20 lukem #include <sys/cdefs.h> 41 1.54 kre __RCSID("$NetBSD: process.c,v 1.54 2024/09/17 13:34:08 kre Exp $"); 42 1.40 christos #ifdef __FBSDID 43 1.40 christos __FBSDID("$FreeBSD: head/usr.bin/sed/process.c 192732 2009-05-25 06:45:33Z brian $"); 44 1.40 christos #endif 45 1.40 christos 46 1.45 christos #if 0 47 1.45 christos static const char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94"; 48 1.45 christos #endif 49 1.45 christos 50 1.1 alm #include <sys/types.h> 51 1.1 alm #include <sys/stat.h> 52 1.1 alm #include <sys/ioctl.h> 53 1.1 alm #include <sys/uio.h> 54 1.1 alm 55 1.1 alm #include <ctype.h> 56 1.40 christos #include <err.h> 57 1.1 alm #include <errno.h> 58 1.1 alm #include <fcntl.h> 59 1.1 alm #include <limits.h> 60 1.1 alm #include <regex.h> 61 1.1 alm #include <stdio.h> 62 1.1 alm #include <stdlib.h> 63 1.1 alm #include <string.h> 64 1.1 alm #include <unistd.h> 65 1.40 christos #include <wchar.h> 66 1.40 christos #include <wctype.h> 67 1.1 alm 68 1.1 alm #include "defs.h" 69 1.1 alm #include "extern.h" 70 1.1 alm 71 1.40 christos static SPACE HS, PS, SS, YS; 72 1.1 alm #define pd PS.deleted 73 1.1 alm #define ps PS.space 74 1.1 alm #define psl PS.len 75 1.53 christos #define psanl PS.append_newline 76 1.1 alm #define hs HS.space 77 1.1 alm #define hsl HS.len 78 1.1 alm 79 1.40 christos static __inline int applies(struct s_command *); 80 1.40 christos static void do_tr(struct s_tr *); 81 1.32 wiz static void flush_appends(void); 82 1.40 christos static void lputs(char *, size_t); 83 1.40 christos static __inline int regexec_e(regex_t *, const char *, int, int, size_t); 84 1.32 wiz static void regsub(SPACE *, char *, char *); 85 1.32 wiz static int substitute(struct s_command *); 86 1.1 alm 87 1.52 christos struct s_appends *appends; /* Array of pointers to strings to append. */ 88 1.40 christos static size_t appendx; /* Index into appends array. */ 89 1.40 christos size_t appendnum; /* Size of appends array. */ 90 1.1 alm 91 1.1 alm static int lastaddr; /* Set by applies if last address of a range. */ 92 1.1 alm static int sdone; /* If any substitutes since last line input. */ 93 1.1 alm /* Iov structure for 'w' commands. */ 94 1.1 alm static regex_t *defpreg; 95 1.1 alm size_t maxnsub; 96 1.52 christos regmatch_t *match; 97 1.1 alm 98 1.53 christos #define OUT() do { \ 99 1.53 christos fwrite(ps, 1, psl, outfile); \ 100 1.53 christos if (psanl) fputc('\n', outfile); \ 101 1.53 christos } while (0) 102 1.8 cgd 103 1.52 christos void 104 1.32 wiz process(void) 105 1.1 alm { 106 1.1 alm struct s_command *cp; 107 1.1 alm SPACE tspace; 108 1.40 christos size_t oldpsl = 0; 109 1.15 mycroft char *p; 110 1.53 christos int oldpsanl; 111 1.1 alm 112 1.40 christos p = NULL; 113 1.40 christos 114 1.1 alm for (linenum = 0; mf_fgets(&PS, REPLACE);) { 115 1.1 alm pd = 0; 116 1.16 mycroft top: 117 1.1 alm cp = prog; 118 1.1 alm redirect: 119 1.1 alm while (cp != NULL) { 120 1.1 alm if (!applies(cp)) { 121 1.1 alm cp = cp->next; 122 1.1 alm continue; 123 1.1 alm } 124 1.1 alm switch (cp->code) { 125 1.1 alm case '{': 126 1.1 alm cp = cp->u.c; 127 1.1 alm goto redirect; 128 1.1 alm case 'a': 129 1.40 christos if (appendx >= appendnum) 130 1.1 alm appends = xrealloc(appends, 131 1.1 alm sizeof(struct s_appends) * 132 1.40 christos (appendnum *= 2)); 133 1.1 alm appends[appendx].type = AP_STRING; 134 1.1 alm appends[appendx].s = cp->t; 135 1.8 cgd appends[appendx].len = strlen(cp->t); 136 1.1 alm appendx++; 137 1.1 alm break; 138 1.1 alm case 'b': 139 1.1 alm cp = cp->u.c; 140 1.1 alm goto redirect; 141 1.1 alm case 'c': 142 1.1 alm pd = 1; 143 1.1 alm psl = 0; 144 1.40 christos if (cp->a2 == NULL || lastaddr || lastline()) 145 1.40 christos (void)fprintf(outfile, "%s", cp->t); 146 1.39 uwe goto new; 147 1.1 alm case 'd': 148 1.1 alm pd = 1; 149 1.1 alm goto new; 150 1.1 alm case 'D': 151 1.1 alm if (pd) 152 1.1 alm goto new; 153 1.40 christos if (psl == 0 || 154 1.40 christos (p = memchr(ps, '\n', psl - 1)) == NULL) { 155 1.1 alm pd = 1; 156 1.16 mycroft goto new; 157 1.28 atatat } else { 158 1.40 christos psl -= (size_t)((p + 1) - ps); 159 1.1 alm memmove(ps, p + 1, psl); 160 1.28 atatat goto top; 161 1.1 alm } 162 1.1 alm case 'g': 163 1.1 alm cspace(&PS, hs, hsl, REPLACE); 164 1.1 alm break; 165 1.1 alm case 'G': 166 1.43 christos cspace(&PS, "\n", 1, APPEND); 167 1.40 christos cspace(&PS, hs, hsl, APPEND); 168 1.1 alm break; 169 1.1 alm case 'h': 170 1.1 alm cspace(&HS, ps, psl, REPLACE); 171 1.1 alm break; 172 1.1 alm case 'H': 173 1.40 christos cspace(&HS, "\n", 1, APPEND); 174 1.40 christos cspace(&HS, ps, psl, APPEND); 175 1.1 alm break; 176 1.1 alm case 'i': 177 1.40 christos (void)fprintf(outfile, "%s", cp->t); 178 1.1 alm break; 179 1.1 alm case 'l': 180 1.40 christos lputs(ps, psl); 181 1.1 alm break; 182 1.1 alm case 'n': 183 1.1 alm if (!nflag && !pd) 184 1.40 christos OUT(); 185 1.1 alm flush_appends(); 186 1.14 mycroft if (!mf_fgets(&PS, REPLACE)) 187 1.1 alm exit(0); 188 1.1 alm pd = 0; 189 1.1 alm break; 190 1.1 alm case 'N': 191 1.1 alm flush_appends(); 192 1.40 christos cspace(&PS, "\n", 1, APPEND); 193 1.40 christos if (!mf_fgets(&PS, APPEND)) 194 1.1 alm exit(0); 195 1.1 alm break; 196 1.1 alm case 'p': 197 1.1 alm if (pd) 198 1.1 alm break; 199 1.40 christos OUT(); 200 1.1 alm break; 201 1.1 alm case 'P': 202 1.1 alm if (pd) 203 1.1 alm break; 204 1.15 mycroft if ((p = memchr(ps, '\n', psl - 1)) != NULL) { 205 1.15 mycroft oldpsl = psl; 206 1.53 christos oldpsanl = psanl; 207 1.40 christos psl = (size_t)(p - ps); 208 1.53 christos psanl = 1; 209 1.1 alm } 210 1.40 christos OUT(); 211 1.53 christos if (p != NULL) { 212 1.15 mycroft psl = oldpsl; 213 1.53 christos psanl = oldpsanl; 214 1.53 christos } 215 1.1 alm break; 216 1.1 alm case 'q': 217 1.1 alm if (!nflag && !pd) 218 1.40 christos OUT(); 219 1.1 alm flush_appends(); 220 1.1 alm exit(0); 221 1.1 alm case 'r': 222 1.40 christos if (appendx >= appendnum) 223 1.1 alm appends = xrealloc(appends, 224 1.1 alm sizeof(struct s_appends) * 225 1.40 christos (appendnum *= 2)); 226 1.1 alm appends[appendx].type = AP_FILE; 227 1.1 alm appends[appendx].s = cp->t; 228 1.8 cgd appends[appendx].len = strlen(cp->t); 229 1.1 alm appendx++; 230 1.1 alm break; 231 1.1 alm case 's': 232 1.1 alm sdone |= substitute(cp); 233 1.1 alm break; 234 1.1 alm case 't': 235 1.1 alm if (sdone) { 236 1.1 alm sdone = 0; 237 1.1 alm cp = cp->u.c; 238 1.1 alm goto redirect; 239 1.1 alm } 240 1.1 alm break; 241 1.1 alm case 'w': 242 1.1 alm if (pd) 243 1.1 alm break; 244 1.1 alm if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, 245 1.1 alm O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 246 1.1 alm DEFFILEMODE)) == -1) 247 1.40 christos err(1, "%s", cp->t); 248 1.40 christos if (write(cp->u.fd, ps, psl) != (ssize_t)psl || 249 1.40 christos write(cp->u.fd, "\n", 1) != 1) 250 1.40 christos err(1, "%s", cp->t); 251 1.1 alm break; 252 1.1 alm case 'x': 253 1.40 christos /* 254 1.40 christos * If the hold space is null, make it empty 255 1.40 christos * but not null. Otherwise the pattern space 256 1.40 christos * will become null after the swap, which is 257 1.40 christos * an abnormal condition. 258 1.40 christos */ 259 1.8 cgd if (hs == NULL) 260 1.40 christos cspace(&HS, "", 0, REPLACE); 261 1.1 alm tspace = PS; 262 1.1 alm PS = HS; 263 1.53 christos psanl = tspace.append_newline; 264 1.1 alm HS = tspace; 265 1.1 alm break; 266 1.1 alm case 'y': 267 1.40 christos if (pd || psl == 0) 268 1.1 alm break; 269 1.40 christos do_tr(cp->u.y); 270 1.1 alm break; 271 1.1 alm case ':': 272 1.1 alm case '}': 273 1.1 alm break; 274 1.1 alm case '=': 275 1.40 christos (void)fprintf(outfile, "%lu\n", linenum); 276 1.1 alm } 277 1.1 alm cp = cp->next; 278 1.1 alm } /* for all cp */ 279 1.1 alm 280 1.1 alm new: if (!nflag && !pd) 281 1.40 christos OUT(); 282 1.1 alm flush_appends(); 283 1.1 alm } /* for all lines */ 284 1.1 alm } 285 1.1 alm 286 1.1 alm /* 287 1.1 alm * TRUE if the address passed matches the current program state 288 1.1 alm * (lastline, linenumber, ps). 289 1.1 alm */ 290 1.40 christos #define MATCH(a) \ 291 1.40 christos ((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \ 292 1.40 christos (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()) 293 1.1 alm 294 1.1 alm /* 295 1.40 christos * Return TRUE if the command applies to the current line. Sets the start 296 1.40 christos * line for process ranges. Interprets the non-select (``!'') flag. 297 1.1 alm */ 298 1.40 christos static __inline int 299 1.32 wiz applies(struct s_command *cp) 300 1.1 alm { 301 1.1 alm int r; 302 1.1 alm 303 1.1 alm lastaddr = 0; 304 1.1 alm if (cp->a1 == NULL && cp->a2 == NULL) 305 1.1 alm r = 1; 306 1.40 christos else if (cp->a2) 307 1.40 christos if (cp->startline > 0) { 308 1.46 christos switch (cp->a2->type) { 309 1.46 christos case AT_RELLINE: 310 1.46 christos if (linenum - cp->startline <= cp->a2->u.l) 311 1.46 christos r = 1; 312 1.46 christos else { 313 1.46 christos cp->startline = 0; 314 1.46 christos r = 0; 315 1.46 christos } 316 1.46 christos break; 317 1.46 christos default: 318 1.46 christos if (MATCH(cp->a2)) { 319 1.46 christos cp->startline = 0; 320 1.46 christos lastaddr = 1; 321 1.46 christos r = 1; 322 1.46 christos } else if (cp->a2->type == AT_LINE && 323 1.46 christos linenum > cp->a2->u.l) { 324 1.46 christos /* 325 1.46 christos * We missed the 2nd address due to a 326 1.46 christos * branch, so just close the range and 327 1.46 christos * return false. 328 1.46 christos */ 329 1.46 christos cp->startline = 0; 330 1.46 christos r = 0; 331 1.46 christos } else 332 1.46 christos r = 1; 333 1.46 christos } 334 1.36 christos } else if (cp->a1 && MATCH(cp->a1)) { 335 1.1 alm /* 336 1.1 alm * If the second address is a number less than or 337 1.1 alm * equal to the line number first selected, only 338 1.1 alm * one line shall be selected. 339 1.1 alm * -- POSIX 1003.2 340 1.40 christos * Likewise if the relative second line address is zero. 341 1.1 alm */ 342 1.40 christos if ((cp->a2->type == AT_LINE && 343 1.40 christos linenum >= cp->a2->u.l) || 344 1.40 christos (cp->a2->type == AT_RELLINE && cp->a2->u.l == 0)) 345 1.1 alm lastaddr = 1; 346 1.40 christos else { 347 1.40 christos cp->startline = linenum; 348 1.40 christos } 349 1.1 alm r = 1; 350 1.1 alm } else 351 1.1 alm r = 0; 352 1.40 christos else 353 1.1 alm r = MATCH(cp->a1); 354 1.1 alm return (cp->nonsel ? ! r : r); 355 1.1 alm } 356 1.1 alm 357 1.1 alm /* 358 1.40 christos * Reset the sed processor to its initial state. 359 1.40 christos */ 360 1.52 christos void 361 1.40 christos resetstate(void) 362 1.40 christos { 363 1.40 christos struct s_command *cp; 364 1.40 christos 365 1.40 christos /* 366 1.40 christos * Reset all in-range markers. 367 1.40 christos */ 368 1.40 christos for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next) 369 1.40 christos if (cp->a2) 370 1.40 christos cp->startline = 0; 371 1.40 christos 372 1.40 christos /* 373 1.40 christos * Clear out the hold space. 374 1.40 christos */ 375 1.40 christos cspace(&HS, "", 0, REPLACE); 376 1.40 christos } 377 1.40 christos 378 1.40 christos /* 379 1.1 alm * substitute -- 380 1.1 alm * Do substitutions in the pattern space. Currently, we build a 381 1.1 alm * copy of the new pattern space in the substitute space structure 382 1.1 alm * and then swap them. 383 1.1 alm */ 384 1.1 alm static int 385 1.32 wiz substitute(struct s_command *cp) 386 1.1 alm { 387 1.1 alm SPACE tspace; 388 1.1 alm regex_t *re; 389 1.40 christos regoff_t re_off, slen; 390 1.19 mrg int lastempty, n; 391 1.1 alm char *s; 392 1.1 alm 393 1.1 alm s = ps; 394 1.1 alm re = cp->u.s->re; 395 1.1 alm if (re == NULL) { 396 1.40 christos if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) { 397 1.1 alm linenum = cp->u.s->linenum; 398 1.40 christos errx(1, "%lu: %s: \\%u not defined in the RE", 399 1.40 christos linenum, fname, cp->u.s->maxbref); 400 1.1 alm } 401 1.1 alm } 402 1.8 cgd if (!regexec_e(re, s, 0, 0, psl)) 403 1.1 alm return (0); 404 1.1 alm 405 1.1 alm SS.len = 0; /* Clean substitute space. */ 406 1.40 christos slen = (regoff_t)psl; 407 1.1 alm n = cp->u.s->n; 408 1.12 cgd lastempty = 1; 409 1.12 cgd 410 1.1 alm switch (n) { 411 1.1 alm case 0: /* Global */ 412 1.1 alm do { 413 1.12 cgd if (lastempty || match[0].rm_so != match[0].rm_eo) { 414 1.12 cgd /* Locate start of replaced string. */ 415 1.12 cgd re_off = match[0].rm_so; 416 1.12 cgd /* Copy leading retained string. */ 417 1.40 christos cspace(&SS, s, (size_t)re_off, APPEND); 418 1.12 cgd /* Add in regular expression. */ 419 1.12 cgd regsub(&SS, s, cp->u.s->new); 420 1.12 cgd } 421 1.12 cgd 422 1.1 alm /* Move past this match. */ 423 1.12 cgd if (match[0].rm_so != match[0].rm_eo) { 424 1.12 cgd s += match[0].rm_eo; 425 1.12 cgd slen -= match[0].rm_eo; 426 1.12 cgd lastempty = 0; 427 1.12 cgd } else { 428 1.40 christos if (match[0].rm_so < slen) 429 1.40 christos cspace(&SS, s + match[0].rm_so, 1, 430 1.40 christos APPEND); 431 1.12 cgd s += match[0].rm_so + 1; 432 1.12 cgd slen -= match[0].rm_so + 1; 433 1.12 cgd lastempty = 1; 434 1.12 cgd } 435 1.40 christos } while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, (size_t)slen)); 436 1.1 alm /* Copy trailing retained string. */ 437 1.12 cgd if (slen > 0) 438 1.40 christos cspace(&SS, s, (size_t)slen, APPEND); 439 1.1 alm break; 440 1.1 alm default: /* Nth occurrence */ 441 1.1 alm while (--n) { 442 1.40 christos if (match[0].rm_eo == match[0].rm_so) 443 1.40 christos match[0].rm_eo = match[0].rm_so + 1; 444 1.1 alm s += match[0].rm_eo; 445 1.8 cgd slen -= match[0].rm_eo; 446 1.40 christos if (slen < 0) 447 1.40 christos return (0); 448 1.40 christos if (!regexec_e(re, s, REG_NOTBOL, 0, (size_t)slen)) 449 1.1 alm return (0); 450 1.1 alm } 451 1.1 alm /* FALLTHROUGH */ 452 1.1 alm case 1: /* 1st occurrence */ 453 1.1 alm /* Locate start of replaced string. */ 454 1.1 alm re_off = match[0].rm_so + (s - ps); 455 1.1 alm /* Copy leading retained string. */ 456 1.40 christos cspace(&SS, ps, (size_t)re_off, APPEND); 457 1.1 alm /* Add in regular expression. */ 458 1.1 alm regsub(&SS, s, cp->u.s->new); 459 1.1 alm /* Copy trailing retained string. */ 460 1.1 alm s += match[0].rm_eo; 461 1.8 cgd slen -= match[0].rm_eo; 462 1.40 christos cspace(&SS, s, (size_t)slen, APPEND); 463 1.1 alm break; 464 1.1 alm } 465 1.1 alm 466 1.1 alm /* 467 1.1 alm * Swap the substitute space and the pattern space, and make sure 468 1.1 alm * that any leftover pointers into stdio memory get lost. 469 1.1 alm */ 470 1.1 alm tspace = PS; 471 1.1 alm PS = SS; 472 1.53 christos psanl = tspace.append_newline; 473 1.1 alm SS = tspace; 474 1.1 alm SS.space = SS.back; 475 1.1 alm 476 1.1 alm /* Handle the 'p' flag. */ 477 1.1 alm if (cp->u.s->p) 478 1.40 christos OUT(); 479 1.1 alm 480 1.1 alm /* Handle the 'w' flag. */ 481 1.1 alm if (cp->u.s->wfile && !pd) { 482 1.1 alm if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, 483 1.1 alm O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) 484 1.40 christos err(1, "%s", cp->u.s->wfile); 485 1.40 christos if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl || 486 1.40 christos write(cp->u.s->wfd, "\n", 1) != 1) 487 1.40 christos err(1, "%s", cp->u.s->wfile); 488 1.1 alm } 489 1.1 alm return (1); 490 1.1 alm } 491 1.1 alm 492 1.1 alm /* 493 1.40 christos * do_tr -- 494 1.40 christos * Perform translation ('y' command) in the pattern space. 495 1.40 christos */ 496 1.40 christos static void 497 1.40 christos do_tr(struct s_tr *y) 498 1.40 christos { 499 1.40 christos SPACE tmp; 500 1.40 christos char c, *p; 501 1.40 christos size_t clen, left; 502 1.40 christos size_t i; 503 1.40 christos 504 1.40 christos if (MB_CUR_MAX == 1) { 505 1.40 christos /* 506 1.40 christos * Single-byte encoding: perform in-place translation 507 1.40 christos * of the pattern space. 508 1.40 christos */ 509 1.40 christos for (p = ps; p < &ps[psl]; p++) 510 1.40 christos *p = (char)y->bytetab[(u_char)*p]; 511 1.40 christos } else { 512 1.40 christos /* 513 1.40 christos * Multi-byte encoding: perform translation into the 514 1.40 christos * translation space, then swap the translation and 515 1.40 christos * pattern spaces. 516 1.40 christos */ 517 1.40 christos /* Clean translation space. */ 518 1.40 christos YS.len = 0; 519 1.40 christos for (p = ps, left = psl; left > 0; p += clen, left -= clen) { 520 1.40 christos if ((c = (char)y->bytetab[(u_char)*p]) != '\0') { 521 1.40 christos cspace(&YS, &c, 1, APPEND); 522 1.40 christos clen = 1; 523 1.40 christos continue; 524 1.40 christos } 525 1.40 christos for (i = 0; i < y->nmultis; i++) 526 1.40 christos if (left >= y->multis[i].fromlen && 527 1.40 christos memcmp(p, y->multis[i].from, 528 1.40 christos y->multis[i].fromlen) == 0) 529 1.40 christos break; 530 1.40 christos if (i < y->nmultis) { 531 1.40 christos cspace(&YS, y->multis[i].to, 532 1.40 christos y->multis[i].tolen, APPEND); 533 1.40 christos clen = y->multis[i].fromlen; 534 1.40 christos } else { 535 1.40 christos cspace(&YS, p, 1, APPEND); 536 1.40 christos clen = 1; 537 1.40 christos } 538 1.40 christos } 539 1.40 christos /* Swap the translation space and the pattern space. */ 540 1.40 christos tmp = PS; 541 1.40 christos PS = YS; 542 1.53 christos psanl = tmp.append_newline; 543 1.40 christos YS = tmp; 544 1.40 christos YS.space = YS.back; 545 1.40 christos } 546 1.40 christos } 547 1.40 christos 548 1.40 christos /* 549 1.1 alm * Flush append requests. Always called before reading a line, 550 1.1 alm * therefore it also resets the substitution done (sdone) flag. 551 1.1 alm */ 552 1.1 alm static void 553 1.32 wiz flush_appends(void) 554 1.1 alm { 555 1.1 alm FILE *f; 556 1.40 christos size_t count, i; 557 1.1 alm char buf[8 * 1024]; 558 1.1 alm 559 1.40 christos for (i = 0; i < appendx; i++) 560 1.1 alm switch (appends[i].type) { 561 1.1 alm case AP_STRING: 562 1.40 christos fwrite(appends[i].s, sizeof(char), appends[i].len, 563 1.40 christos outfile); 564 1.1 alm break; 565 1.1 alm case AP_FILE: 566 1.1 alm /* 567 1.1 alm * Read files probably shouldn't be cached. Since 568 1.1 alm * it's not an error to read a non-existent file, 569 1.1 alm * it's possible that another program is interacting 570 1.40 christos * with the sed script through the filesystem. It 571 1.1 alm * would be truly bizarre, but possible. It's probably 572 1.1 alm * not that big a performance win, anyhow. 573 1.1 alm */ 574 1.1 alm if ((f = fopen(appends[i].s, "r")) == NULL) 575 1.1 alm break; 576 1.40 christos while ((count = fread(buf, sizeof(char), sizeof(buf), f))) 577 1.40 christos (void)fwrite(buf, sizeof(char), count, outfile); 578 1.1 alm (void)fclose(f); 579 1.1 alm break; 580 1.1 alm } 581 1.40 christos if (ferror(outfile)) 582 1.40 christos errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO)); 583 1.40 christos appendx = 0; 584 1.40 christos sdone = 0; 585 1.1 alm } 586 1.1 alm 587 1.1 alm static void 588 1.40 christos lputs(char *s, size_t len) 589 1.1 alm { 590 1.40 christos static const char escapes[] = "\\\a\b\f\r\t\v"; 591 1.40 christos int c; 592 1.40 christos size_t col, width; 593 1.40 christos const char *p; 594 1.40 christos #ifdef TIOCGWINSZ 595 1.1 alm struct winsize win; 596 1.37 gdamore #endif 597 1.40 christos static size_t termwidth = (size_t)-1; 598 1.40 christos size_t clen, i; 599 1.40 christos wchar_t wc; 600 1.40 christos mbstate_t mbs; 601 1.40 christos 602 1.40 christos if (outfile != stdout) 603 1.40 christos termwidth = 60; 604 1.40 christos if (termwidth == (size_t)-1) { 605 1.40 christos if ((p = getenv("COLUMNS")) && *p != '\0') 606 1.40 christos termwidth = (size_t)atoi(p); 607 1.40 christos #ifdef TIOCGWINSZ 608 1.1 alm else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && 609 1.1 alm win.ws_col > 0) 610 1.1 alm termwidth = win.ws_col; 611 1.37 gdamore #endif 612 1.1 alm else 613 1.1 alm termwidth = 60; 614 1.23 ross } 615 1.40 christos if (termwidth == 0) 616 1.40 christos termwidth = 1; 617 1.40 christos 618 1.40 christos memset(&mbs, 0, sizeof(mbs)); 619 1.40 christos col = 0; 620 1.40 christos while (len != 0) { 621 1.40 christos clen = mbrtowc(&wc, s, len, &mbs); 622 1.40 christos if (clen == 0) 623 1.40 christos clen = 1; 624 1.40 christos if (clen == (size_t)-1 || clen == (size_t)-2) { 625 1.40 christos wc = (unsigned char)*s; 626 1.40 christos clen = 1; 627 1.40 christos memset(&mbs, 0, sizeof(mbs)); 628 1.1 alm } 629 1.40 christos if (wc == '\n') { 630 1.40 christos if (col + 1 >= termwidth) 631 1.40 christos fprintf(outfile, "\\\n"); 632 1.40 christos fputc('$', outfile); 633 1.40 christos fputc('\n', outfile); 634 1.40 christos col = 0; 635 1.40 christos } else if (iswprint(wc)) { 636 1.54 kre #ifdef HAVE_NBTOOL_CONFIG_H 637 1.54 kre width = 1; /* wcwidth is an XSI function */ 638 1.54 kre #else 639 1.40 christos width = (size_t)wcwidth(wc); 640 1.54 kre #endif 641 1.40 christos if (col + width >= termwidth) { 642 1.40 christos fprintf(outfile, "\\\n"); 643 1.40 christos col = 0; 644 1.40 christos } 645 1.40 christos fwrite(s, 1, clen, outfile); 646 1.40 christos col += width; 647 1.40 christos } else if (wc != L'\0' && (c = wctob(wc)) != EOF && 648 1.40 christos (p = strchr(escapes, c)) != NULL) { 649 1.40 christos if (col + 2 >= termwidth) { 650 1.40 christos fprintf(outfile, "\\\n"); 651 1.40 christos col = 0; 652 1.40 christos } 653 1.40 christos fprintf(outfile, "\\%c", "\\abfrtv"[p - escapes]); 654 1.40 christos col += 2; 655 1.1 alm } else { 656 1.40 christos if (col + 4 * clen >= termwidth) { 657 1.40 christos fprintf(outfile, "\\\n"); 658 1.40 christos col = 0; 659 1.1 alm } 660 1.40 christos for (i = 0; i < clen; i++) 661 1.40 christos fprintf(outfile, "\\%03o", 662 1.40 christos (int)(unsigned char)s[i]); 663 1.40 christos col += 4 * clen; 664 1.1 alm } 665 1.40 christos s += clen; 666 1.40 christos len -= clen; 667 1.1 alm } 668 1.40 christos if (col + 1 >= termwidth) 669 1.40 christos fprintf(outfile, "\\\n"); 670 1.40 christos (void)fputc('$', outfile); 671 1.40 christos (void)fputc('\n', outfile); 672 1.40 christos if (ferror(outfile)) 673 1.40 christos errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO)); 674 1.1 alm } 675 1.1 alm 676 1.40 christos static __inline int 677 1.40 christos regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, 678 1.40 christos size_t slen) 679 1.1 alm { 680 1.1 alm int eval; 681 1.44 christos #ifndef REG_STARTEND 682 1.44 christos char *buf; 683 1.44 christos #endif 684 1.40 christos 685 1.1 alm if (preg == NULL) { 686 1.1 alm if (defpreg == NULL) 687 1.40 christos errx(1, "first RE may not be empty"); 688 1.1 alm } else 689 1.1 alm defpreg = preg; 690 1.1 alm 691 1.40 christos /* Set anchors */ 692 1.44 christos #ifndef REG_STARTEND 693 1.44 christos buf = xmalloc(slen + 1); 694 1.44 christos (void)memcpy(buf, string, slen); 695 1.44 christos buf[slen] = '\0'; 696 1.44 christos eval = regexec(defpreg, buf, 697 1.44 christos nomatch ? 0 : maxnsub + 1, match, eflags); 698 1.44 christos free(buf); 699 1.44 christos #else 700 1.8 cgd match[0].rm_so = 0; 701 1.40 christos match[0].rm_eo = (regoff_t)slen; 702 1.1 alm eval = regexec(defpreg, string, 703 1.8 cgd nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND); 704 1.44 christos #endif 705 1.1 alm switch(eval) { 706 1.1 alm case 0: 707 1.1 alm return (1); 708 1.1 alm case REG_NOMATCH: 709 1.1 alm return (0); 710 1.1 alm } 711 1.40 christos errx(1, "RE error: %s", strregerror(eval, defpreg)); 712 1.1 alm /* NOTREACHED */ 713 1.1 alm } 714 1.1 alm 715 1.1 alm /* 716 1.1 alm * regsub - perform substitutions after a regexp match 717 1.1 alm * Based on a routine by Henry Spencer 718 1.1 alm */ 719 1.1 alm static void 720 1.32 wiz regsub(SPACE *sp, char *string, char *src) 721 1.1 alm { 722 1.40 christos size_t len; 723 1.40 christos int no; 724 1.20 lukem char c, *dst; 725 1.1 alm 726 1.1 alm #define NEEDSP(reqlen) \ 727 1.40 christos /* XXX What is the +1 for? */ \ 728 1.34 itojun if (sp->len + (reqlen) + 1 >= sp->blen) { \ 729 1.40 christos sp->blen += (reqlen) + 1024; \ 730 1.40 christos sp->space = sp->back = xrealloc(sp->back, sp->blen); \ 731 1.1 alm dst = sp->space + sp->len; \ 732 1.1 alm } 733 1.1 alm 734 1.1 alm dst = sp->space + sp->len; 735 1.1 alm while ((c = *src++) != '\0') { 736 1.1 alm if (c == '&') 737 1.1 alm no = 0; 738 1.24 christos else if (c == '\\' && isdigit((unsigned char)*src)) 739 1.1 alm no = *src++ - '0'; 740 1.1 alm else 741 1.1 alm no = -1; 742 1.1 alm if (no < 0) { /* Ordinary character. */ 743 1.40 christos if (c == '\\' && (*src == '\\' || *src == '&')) 744 1.40 christos c = *src++; 745 1.1 alm NEEDSP(1); 746 1.40 christos *dst++ = c; 747 1.1 alm ++sp->len; 748 1.40 christos } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) { 749 1.40 christos len = (size_t)(match[no].rm_eo - match[no].rm_so); 750 1.1 alm NEEDSP(len); 751 1.1 alm memmove(dst, string + match[no].rm_so, len); 752 1.1 alm dst += len; 753 1.1 alm sp->len += len; 754 1.1 alm } 755 1.1 alm } 756 1.1 alm NEEDSP(1); 757 1.1 alm *dst = '\0'; 758 1.1 alm } 759 1.1 alm 760 1.1 alm /* 761 1.40 christos * cspace -- 762 1.40 christos * Concatenate space: append the source space to the destination space, 763 1.40 christos * allocating new space as necessary. 764 1.1 alm */ 765 1.52 christos void 766 1.38 lukem cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag) 767 1.1 alm { 768 1.1 alm size_t tlen; 769 1.1 alm 770 1.8 cgd /* Make sure SPACE has enough memory and ramp up quickly. */ 771 1.8 cgd tlen = sp->len + len + 1; 772 1.1 alm if (tlen > sp->blen) { 773 1.40 christos sp->blen = tlen + 1024; 774 1.40 christos sp->space = sp->back = xrealloc(sp->back, sp->blen); 775 1.1 alm } 776 1.1 alm 777 1.8 cgd if (spflag == REPLACE) 778 1.1 alm sp->len = 0; 779 1.1 alm 780 1.1 alm memmove(sp->space + sp->len, p, len); 781 1.8 cgd 782 1.1 alm sp->space[sp->len += len] = '\0'; 783 1.1 alm } 784 1.1 alm 785 1.1 alm /* 786 1.1 alm * Close all cached opened files and report any errors 787 1.1 alm */ 788 1.52 christos void 789 1.32 wiz cfclose(struct s_command *cp, struct s_command *end) 790 1.1 alm { 791 1.1 alm 792 1.1 alm for (; cp != end; cp = cp->next) 793 1.1 alm switch(cp->code) { 794 1.1 alm case 's': 795 1.1 alm if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) 796 1.40 christos err(1, "%s", cp->u.s->wfile); 797 1.1 alm cp->u.s->wfd = -1; 798 1.1 alm break; 799 1.1 alm case 'w': 800 1.1 alm if (cp->u.fd != -1 && close(cp->u.fd)) 801 1.40 christos err(1, "%s", cp->t); 802 1.1 alm cp->u.fd = -1; 803 1.1 alm break; 804 1.1 alm case '{': 805 1.1 alm cfclose(cp->u.c, cp->next); 806 1.1 alm break; 807 1.1 alm } 808 1.1 alm } 809