1 1.7 cheusov /* $NetBSD: csplit.c,v 1.7 2017/07/30 23:02:53 cheusov Exp $ */ 2 1.1 christos /* $FreeBSD: src/usr.bin/csplit/csplit.c,v 1.9 2004/03/22 11:15:03 tjr Exp$ */ 3 1.1 christos 4 1.1 christos /*- 5 1.1 christos * Copyright (c) 2002 Tim J. Robbins. 6 1.1 christos * All rights reserved. 7 1.1 christos * 8 1.1 christos * Redistribution and use in source and binary forms, with or without 9 1.1 christos * modification, are permitted provided that the following conditions 10 1.1 christos * are met: 11 1.1 christos * 1. Redistributions of source code must retain the above copyright 12 1.1 christos * notice, this list of conditions and the following disclaimer. 13 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright 14 1.1 christos * notice, this list of conditions and the following disclaimer in the 15 1.1 christos * documentation and/or other materials provided with the distribution. 16 1.1 christos * 17 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 1.1 christos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 1.1 christos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 1.1 christos * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 1.1 christos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 1.1 christos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 1.1 christos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 1.1 christos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 1.1 christos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 1.1 christos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 1.1 christos * SUCH DAMAGE. 28 1.1 christos */ 29 1.1 christos 30 1.1 christos /* 31 1.1 christos * csplit -- split files based on context 32 1.1 christos * 33 1.1 christos * This utility splits its input into numbered output files by line number 34 1.1 christos * or by a regular expression. Regular expression matches have an optional 35 1.1 christos * offset with them, allowing the split to occur a specified number of 36 1.1 christos * lines before or after the match. 37 1.1 christos * 38 1.1 christos * To handle negative offsets, we stop reading when the match occurs and 39 1.1 christos * store the offset that the file should have been split at, then use 40 1.1 christos * this output file as input until all the "overflowed" lines have been read. 41 1.1 christos * The file is then closed and truncated to the correct length. 42 1.1 christos * 43 1.1 christos * We assume that the output files can be seeked upon (ie. they cannot be 44 1.1 christos * symlinks to named pipes or character devices), but make no such 45 1.1 christos * assumption about the input. 46 1.1 christos */ 47 1.1 christos 48 1.1 christos #include <sys/cdefs.h> 49 1.1 christos #ifndef lint 50 1.7 cheusov __RCSID("$NetBSD: csplit.c,v 1.7 2017/07/30 23:02:53 cheusov Exp $"); 51 1.1 christos #endif 52 1.1 christos 53 1.1 christos #include <sys/types.h> 54 1.1 christos 55 1.1 christos #include <ctype.h> 56 1.1 christos #include <err.h> 57 1.1 christos #include <errno.h> 58 1.1 christos #include <limits.h> 59 1.1 christos #include <locale.h> 60 1.1 christos #include <regex.h> 61 1.1 christos #include <signal.h> 62 1.1 christos #include <stdint.h> 63 1.1 christos #include <stdio.h> 64 1.1 christos #include <stdlib.h> 65 1.1 christos #include <string.h> 66 1.1 christos #include <unistd.h> 67 1.3 lukem #include <util.h> 68 1.1 christos 69 1.1 christos static void cleanup(void); 70 1.1 christos static void do_lineno(const char *); 71 1.1 christos static void do_rexp(const char *); 72 1.5 roy static char *get_line(void); 73 1.1 christos static void handlesig(int); 74 1.1 christos static FILE *newfile(void); 75 1.1 christos static void toomuch(FILE *, long); 76 1.4 perry static void usage(void) __dead; 77 1.1 christos 78 1.1 christos /* 79 1.1 christos * Command line options 80 1.1 christos */ 81 1.6 joerg static const char *prefix; /* File name prefix */ 82 1.6 joerg static long sufflen; /* Number of decimal digits for suffix */ 83 1.6 joerg static int sflag; /* Suppress output of file names */ 84 1.6 joerg static int kflag; /* Keep output if error occurs */ 85 1.1 christos 86 1.1 christos /* 87 1.1 christos * Other miscellaneous globals (XXX too many) 88 1.1 christos */ 89 1.6 joerg static long lineno; /* Current line number in input file */ 90 1.6 joerg static long reps; /* Number of repetitions for this pattern */ 91 1.6 joerg static long nfiles; /* Number of files output so far */ 92 1.6 joerg static long maxfiles; /* Maximum number of files we can create */ 93 1.6 joerg static char currfile[PATH_MAX]; /* Current output file */ 94 1.6 joerg static const char *infn; /* Name of the input file */ 95 1.6 joerg static FILE *infile; /* Input file handle */ 96 1.6 joerg static FILE *overfile; /* Overflow file for toomuch() */ 97 1.6 joerg static off_t truncofs; /* Offset this file should be truncated at */ 98 1.6 joerg static int doclean; /* Should cleanup() remove output? */ 99 1.1 christos 100 1.1 christos int 101 1.1 christos main(int argc, char *argv[]) 102 1.1 christos { 103 1.1 christos struct sigaction sa; 104 1.1 christos long i; 105 1.1 christos int ch; 106 1.1 christos const char *expr; 107 1.1 christos char *ep, *p; 108 1.1 christos FILE *ofp; 109 1.1 christos 110 1.1 christos (void)setlocale(LC_ALL, ""); 111 1.1 christos 112 1.1 christos kflag = sflag = 0; 113 1.1 christos prefix = "xx"; 114 1.1 christos sufflen = 2; 115 1.1 christos while ((ch = getopt(argc, argv, "ksf:n:")) > 0) { 116 1.1 christos switch (ch) { 117 1.1 christos case 'f': 118 1.1 christos prefix = optarg; 119 1.1 christos break; 120 1.1 christos case 'k': 121 1.1 christos kflag = 1; 122 1.1 christos break; 123 1.1 christos case 'n': 124 1.1 christos errno = 0; 125 1.1 christos sufflen = strtol(optarg, &ep, 10); 126 1.1 christos if (sufflen <= 0 || *ep != '\0' || errno != 0) 127 1.1 christos errx(1, "%s: bad suffix length", optarg); 128 1.1 christos break; 129 1.1 christos case 's': 130 1.1 christos sflag = 1; 131 1.1 christos break; 132 1.1 christos default: 133 1.1 christos usage(); 134 1.1 christos /*NOTREACHED*/ 135 1.1 christos } 136 1.1 christos } 137 1.1 christos 138 1.1 christos if (sufflen + strlen(prefix) >= PATH_MAX) 139 1.1 christos errx(1, "name too long"); 140 1.1 christos 141 1.1 christos argc -= optind; 142 1.1 christos argv += optind; 143 1.1 christos 144 1.1 christos if ((infn = *argv++) == NULL) 145 1.1 christos usage(); 146 1.1 christos if (strcmp(infn, "-") == 0) { 147 1.1 christos infile = stdin; 148 1.1 christos infn = "stdin"; 149 1.1 christos } else if ((infile = fopen(infn, "r")) == NULL) 150 1.1 christos err(1, "%s", infn); 151 1.1 christos 152 1.1 christos if (!kflag) { 153 1.1 christos doclean = 1; 154 1.1 christos (void)atexit(cleanup); 155 1.1 christos sa.sa_flags = 0; 156 1.1 christos sa.sa_handler = handlesig; 157 1.1 christos (void)sigemptyset(&sa.sa_mask); 158 1.1 christos (void)sigaddset(&sa.sa_mask, SIGHUP); 159 1.1 christos (void)sigaddset(&sa.sa_mask, SIGINT); 160 1.1 christos (void)sigaddset(&sa.sa_mask, SIGTERM); 161 1.1 christos (void)sigaction(SIGHUP, &sa, NULL); 162 1.1 christos (void)sigaction(SIGINT, &sa, NULL); 163 1.1 christos (void)sigaction(SIGTERM, &sa, NULL); 164 1.1 christos } 165 1.1 christos 166 1.1 christos lineno = 0; 167 1.1 christos nfiles = 0; 168 1.1 christos truncofs = 0; 169 1.1 christos overfile = NULL; 170 1.1 christos 171 1.1 christos /* Ensure 10^sufflen < LONG_MAX. */ 172 1.1 christos for (maxfiles = 1, i = 0; i < sufflen; i++) { 173 1.1 christos if (maxfiles > LONG_MAX / 10) 174 1.1 christos errx(1, "%ld: suffix too long (limit %ld)", 175 1.1 christos sufflen, i); 176 1.1 christos maxfiles *= 10; 177 1.1 christos } 178 1.1 christos 179 1.1 christos /* Create files based on supplied patterns. */ 180 1.1 christos while (nfiles < maxfiles - 1 && (expr = *argv++) != NULL) { 181 1.1 christos /* Look ahead & see if this pattern has any repetitions. */ 182 1.1 christos if (*argv != NULL && **argv == '{') { 183 1.1 christos errno = 0; 184 1.1 christos reps = strtol(*argv + 1, &ep, 10); 185 1.1 christos if (reps < 0 || *ep != '}' || errno != 0) 186 1.1 christos errx(1, "%s: bad repetition count", *argv + 1); 187 1.1 christos argv++; 188 1.1 christos } else 189 1.1 christos reps = 0; 190 1.1 christos 191 1.1 christos if (*expr == '/' || *expr == '%') { 192 1.1 christos do 193 1.1 christos do_rexp(expr); 194 1.1 christos while (reps-- != 0 && nfiles < maxfiles - 1); 195 1.1 christos } else if (isdigit((unsigned char)*expr)) 196 1.1 christos do_lineno(expr); 197 1.1 christos else 198 1.1 christos errx(1, "%s: unrecognised pattern", expr); 199 1.1 christos } 200 1.1 christos 201 1.1 christos /* Copy the rest into a new file. */ 202 1.1 christos if (!feof(infile)) { 203 1.1 christos ofp = newfile(); 204 1.7 cheusov while ((p = get_line()) != NULL && fputs(p, ofp) != EOF) 205 1.1 christos ; 206 1.1 christos if (!sflag) 207 1.1 christos (void)printf("%jd\n", (intmax_t)ftello(ofp)); 208 1.1 christos if (fclose(ofp) != 0) 209 1.1 christos err(1, "%s", currfile); 210 1.1 christos } 211 1.1 christos 212 1.1 christos toomuch(NULL, 0L); 213 1.1 christos doclean = 0; 214 1.1 christos 215 1.1 christos return (0); 216 1.1 christos } 217 1.1 christos 218 1.1 christos static void 219 1.1 christos usage(void) 220 1.1 christos { 221 1.1 christos 222 1.1 christos (void)fprintf(stderr, 223 1.1 christos "Usage: %s [-ks] [-f prefix] [-n number] file args ...\n", getprogname()); 224 1.1 christos exit(1); 225 1.1 christos } 226 1.1 christos 227 1.6 joerg __dead static void 228 1.1 christos handlesig(int sig) 229 1.1 christos { 230 1.1 christos char msg[BUFSIZ]; 231 1.1 christos size_t len; 232 1.1 christos 233 1.1 christos len = snprintf(msg, sizeof(msg), "%s: Caught %s, cleaning up\n", 234 1.1 christos getprogname(), strsignal(sig)); 235 1.1 christos if (len < sizeof(msg)) 236 1.1 christos (void)write(STDERR_FILENO, msg, len); 237 1.1 christos cleanup(); 238 1.3 lukem (void)raise_default_signal(sig); 239 1.1 christos _exit(2); 240 1.1 christos } 241 1.1 christos 242 1.1 christos /* Create a new output file. */ 243 1.1 christos static FILE * 244 1.1 christos newfile(void) 245 1.1 christos { 246 1.1 christos FILE *fp; 247 1.1 christos 248 1.1 christos if ((size_t)snprintf(currfile, sizeof(currfile), "%s%0*ld", prefix, 249 1.1 christos (int)sufflen, nfiles) >= sizeof(currfile)) 250 1.1 christos errx(1, "%s: %s", currfile, strerror(ENAMETOOLONG)); 251 1.1 christos if ((fp = fopen(currfile, "w+")) == NULL) 252 1.1 christos err(1, "%s", currfile); 253 1.1 christos nfiles++; 254 1.1 christos 255 1.1 christos return (fp); 256 1.1 christos } 257 1.1 christos 258 1.1 christos /* Remove partial output, called before exiting. */ 259 1.1 christos static void 260 1.1 christos cleanup(void) 261 1.1 christos { 262 1.1 christos char fnbuf[PATH_MAX]; 263 1.1 christos long i; 264 1.1 christos 265 1.1 christos if (!doclean) 266 1.1 christos return; 267 1.1 christos 268 1.1 christos /* 269 1.1 christos * NOTE: One cannot portably assume to be able to call snprintf() 270 1.1 christos * from inside a signal handler. It does, however, appear to be safe 271 1.1 christos * to do on FreeBSD and NetBSD. The solution to this problem is worse 272 1.1 christos * than the problem itself. 273 1.1 christos */ 274 1.1 christos 275 1.1 christos for (i = 0; i < nfiles; i++) { 276 1.1 christos (void)snprintf(fnbuf, sizeof(fnbuf), "%s%0*ld", prefix, 277 1.1 christos (int)sufflen, i); 278 1.1 christos (void)unlink(fnbuf); 279 1.1 christos } 280 1.1 christos } 281 1.1 christos 282 1.1 christos /* Read a line from the input into a static buffer. */ 283 1.1 christos static char * 284 1.5 roy get_line(void) 285 1.1 christos { 286 1.1 christos static char lbuf[LINE_MAX]; 287 1.1 christos FILE *src; 288 1.1 christos 289 1.1 christos src = overfile != NULL ? overfile : infile; 290 1.1 christos 291 1.1 christos again: if (fgets(lbuf, sizeof(lbuf), src) == NULL) { 292 1.1 christos if (src == overfile) { 293 1.1 christos src = infile; 294 1.1 christos goto again; 295 1.1 christos } 296 1.1 christos return (NULL); 297 1.1 christos } 298 1.1 christos if (ferror(src)) 299 1.1 christos err(1, "%s", infn); 300 1.1 christos lineno++; 301 1.1 christos 302 1.1 christos return (lbuf); 303 1.1 christos } 304 1.1 christos 305 1.5 roy /* Conceptually rewind the input (as obtained by get_line()) back `n' lines. */ 306 1.1 christos static void 307 1.1 christos toomuch(FILE *ofp, long n) 308 1.1 christos { 309 1.1 christos char buf[BUFSIZ]; 310 1.1 christos size_t i, nread; 311 1.1 christos 312 1.1 christos if (overfile != NULL) { 313 1.1 christos /* 314 1.1 christos * Truncate the previous file we overflowed into back to 315 1.1 christos * the correct length, close it. 316 1.1 christos */ 317 1.1 christos if (fflush(overfile) != 0) 318 1.1 christos err(1, "overflow"); 319 1.1 christos if (ftruncate(fileno(overfile), truncofs) != 0) 320 1.1 christos err(1, "overflow"); 321 1.1 christos if (fclose(overfile) != 0) 322 1.1 christos err(1, "overflow"); 323 1.1 christos overfile = NULL; 324 1.1 christos } 325 1.1 christos 326 1.1 christos if (n == 0) 327 1.1 christos /* Just tidying up */ 328 1.1 christos return; 329 1.1 christos 330 1.1 christos lineno -= n; 331 1.1 christos 332 1.1 christos /* 333 1.1 christos * Wind the overflow file backwards to `n' lines before the 334 1.1 christos * current one. 335 1.1 christos */ 336 1.1 christos do { 337 1.1 christos if (ftello(ofp) < (off_t)sizeof(buf)) 338 1.1 christos rewind(ofp); 339 1.1 christos else 340 1.1 christos (void)fseeko(ofp, -(off_t)sizeof(buf), SEEK_CUR); 341 1.1 christos if (ferror(ofp)) 342 1.1 christos errx(1, "%s: can't seek", currfile); 343 1.1 christos if ((nread = fread(buf, 1, sizeof(buf), ofp)) == 0) 344 1.1 christos errx(1, "can't read overflowed output"); 345 1.1 christos if (fseeko(ofp, -(off_t)nread, SEEK_CUR) != 0) 346 1.1 christos err(1, "%s", currfile); 347 1.1 christos for (i = 1; i <= nread; i++) 348 1.1 christos if (buf[nread - i] == '\n' && n-- == 0) 349 1.1 christos break; 350 1.1 christos if (ftello(ofp) == 0) 351 1.1 christos break; 352 1.1 christos } while (n > 0); 353 1.1 christos if (fseeko(ofp, (off_t)nread - i + 1, SEEK_CUR) != 0) 354 1.1 christos err(1, "%s", currfile); 355 1.1 christos 356 1.1 christos /* 357 1.5 roy * get_line() will read from here. Next call will truncate to 358 1.1 christos * truncofs in this file. 359 1.1 christos */ 360 1.1 christos overfile = ofp; 361 1.1 christos truncofs = ftello(overfile); 362 1.1 christos } 363 1.1 christos 364 1.1 christos /* Handle splits for /regexp/ and %regexp% patterns. */ 365 1.1 christos static void 366 1.1 christos do_rexp(const char *expr) 367 1.1 christos { 368 1.1 christos regex_t cre; 369 1.1 christos intmax_t nwritten; 370 1.1 christos long ofs; 371 1.1 christos int first; 372 1.1 christos char *ecopy, *ep, *p, *pofs, *re; 373 1.1 christos FILE *ofp; 374 1.1 christos 375 1.1 christos if ((ecopy = strdup(expr)) == NULL) 376 1.1 christos err(1, "strdup"); 377 1.1 christos 378 1.1 christos re = ecopy + 1; 379 1.1 christos if ((pofs = strrchr(ecopy, *expr)) == NULL || pofs[-1] == '\\') 380 1.1 christos errx(1, "%s: missing trailing %c", expr, *expr); 381 1.1 christos *pofs++ = '\0'; 382 1.1 christos 383 1.1 christos if (*pofs != '\0') { 384 1.1 christos errno = 0; 385 1.1 christos ofs = strtol(pofs, &ep, 10); 386 1.1 christos if (*ep != '\0' || errno != 0) 387 1.1 christos errx(1, "%s: bad offset", pofs); 388 1.1 christos } else 389 1.1 christos ofs = 0; 390 1.1 christos 391 1.1 christos if (regcomp(&cre, re, REG_BASIC|REG_NOSUB) != 0) 392 1.1 christos errx(1, "%s: bad regular expression", re); 393 1.1 christos 394 1.1 christos if (*expr == '/') 395 1.1 christos /* /regexp/: Save results to a file. */ 396 1.1 christos ofp = newfile(); 397 1.1 christos else { 398 1.1 christos /* %regexp%: Make a temporary file for overflow. */ 399 1.1 christos if ((ofp = tmpfile()) == NULL) 400 1.1 christos err(1, "tmpfile"); 401 1.1 christos } 402 1.1 christos 403 1.1 christos /* Read and output lines until we get a match. */ 404 1.1 christos first = 1; 405 1.5 roy while ((p = get_line()) != NULL) { 406 1.7 cheusov if (fputs(p, ofp) == EOF) 407 1.1 christos break; 408 1.1 christos if (!first && regexec(&cre, p, 0, NULL, 0) == 0) 409 1.1 christos break; 410 1.1 christos first = 0; 411 1.1 christos } 412 1.1 christos 413 1.1 christos if (p == NULL) 414 1.1 christos errx(1, "%s: no match", re); 415 1.1 christos 416 1.1 christos if (ofs <= 0) { 417 1.1 christos /* 418 1.1 christos * Negative (or zero) offset: throw back any lines we should 419 1.1 christos * not have read yet. 420 1.1 christos */ 421 1.1 christos if (p != NULL) { 422 1.1 christos toomuch(ofp, -ofs + 1); 423 1.1 christos nwritten = (intmax_t)truncofs; 424 1.1 christos } else 425 1.1 christos nwritten = (intmax_t)ftello(ofp); 426 1.1 christos } else { 427 1.1 christos /* 428 1.1 christos * Positive offset: copy the requested number of lines 429 1.1 christos * after the match. 430 1.1 christos */ 431 1.5 roy while (--ofs > 0 && (p = get_line()) != NULL) 432 1.7 cheusov if (fputs(p, ofp) == EOF) 433 1.1 christos break; 434 1.1 christos toomuch(NULL, 0L); 435 1.1 christos nwritten = (intmax_t)ftello(ofp); 436 1.1 christos if (fclose(ofp) != 0) 437 1.1 christos err(1, "%s", currfile); 438 1.1 christos } 439 1.1 christos 440 1.1 christos if (!sflag && *expr == '/') 441 1.1 christos (void)printf("%jd\n", nwritten); 442 1.1 christos 443 1.1 christos regfree(&cre); 444 1.1 christos free(ecopy); 445 1.1 christos } 446 1.1 christos 447 1.1 christos /* Handle splits based on line number. */ 448 1.1 christos static void 449 1.1 christos do_lineno(const char *expr) 450 1.1 christos { 451 1.1 christos long lastline, tgtline; 452 1.1 christos char *ep, *p; 453 1.1 christos FILE *ofp; 454 1.1 christos 455 1.1 christos errno = 0; 456 1.1 christos tgtline = strtol(expr, &ep, 10); 457 1.1 christos if (tgtline <= 0 || errno != 0 || *ep != '\0') 458 1.1 christos errx(1, "%s: bad line number", expr); 459 1.1 christos lastline = tgtline; 460 1.1 christos if (lastline <= lineno) 461 1.1 christos errx(1, "%s: can't go backwards", expr); 462 1.1 christos 463 1.1 christos while (nfiles < maxfiles - 1) { 464 1.1 christos ofp = newfile(); 465 1.1 christos while (lineno + 1 != lastline) { 466 1.5 roy if ((p = get_line()) == NULL) 467 1.1 christos errx(1, "%ld: out of range", lastline); 468 1.7 cheusov if (fputs(p, ofp) == EOF) 469 1.1 christos break; 470 1.1 christos } 471 1.1 christos if (!sflag) 472 1.1 christos (void)printf("%jd\n", (intmax_t)ftello(ofp)); 473 1.1 christos if (fclose(ofp) != 0) 474 1.1 christos err(1, "%s", currfile); 475 1.1 christos if (reps-- == 0) 476 1.1 christos break; 477 1.1 christos lastline += tgtline; 478 1.1 christos } 479 1.1 christos } 480