csplit.c revision 1.1 1 1.1 christos /* $NetBSD: csplit.c,v 1.1 2006/09/25 19:21:42 christos Exp $ */
2 1.1 christos /* $FreeBSD: src/usr.bin/csplit/csplit.c,v 1.9 2004/03/22 11:15:03 tjr Exp$ */
3 1.1 christos
4 1.1 christos /*-
5 1.1 christos * Copyright (c) 2002 Tim J. Robbins.
6 1.1 christos * All rights reserved.
7 1.1 christos *
8 1.1 christos * Redistribution and use in source and binary forms, with or without
9 1.1 christos * modification, are permitted provided that the following conditions
10 1.1 christos * are met:
11 1.1 christos * 1. Redistributions of source code must retain the above copyright
12 1.1 christos * notice, this list of conditions and the following disclaimer.
13 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright
14 1.1 christos * notice, this list of conditions and the following disclaimer in the
15 1.1 christos * documentation and/or other materials provided with the distribution.
16 1.1 christos *
17 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 1.1 christos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 1.1 christos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 1.1 christos * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 1.1 christos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 1.1 christos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 1.1 christos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 1.1 christos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 1.1 christos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 1.1 christos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 1.1 christos * SUCH DAMAGE.
28 1.1 christos */
29 1.1 christos
30 1.1 christos /*
31 1.1 christos * csplit -- split files based on context
32 1.1 christos *
33 1.1 christos * This utility splits its input into numbered output files by line number
34 1.1 christos * or by a regular expression. Regular expression matches have an optional
35 1.1 christos * offset with them, allowing the split to occur a specified number of
36 1.1 christos * lines before or after the match.
37 1.1 christos *
38 1.1 christos * To handle negative offsets, we stop reading when the match occurs and
39 1.1 christos * store the offset that the file should have been split at, then use
40 1.1 christos * this output file as input until all the "overflowed" lines have been read.
41 1.1 christos * The file is then closed and truncated to the correct length.
42 1.1 christos *
43 1.1 christos * We assume that the output files can be seeked upon (ie. they cannot be
44 1.1 christos * symlinks to named pipes or character devices), but make no such
45 1.1 christos * assumption about the input.
46 1.1 christos */
47 1.1 christos
48 1.1 christos #include <sys/cdefs.h>
49 1.1 christos #ifndef lint
50 1.1 christos __RCSID("$NetBSD: csplit.c,v 1.1 2006/09/25 19:21:42 christos Exp $");
51 1.1 christos #endif
52 1.1 christos
53 1.1 christos #include <sys/types.h>
54 1.1 christos
55 1.1 christos #include <ctype.h>
56 1.1 christos #include <err.h>
57 1.1 christos #include <errno.h>
58 1.1 christos #include <limits.h>
59 1.1 christos #include <locale.h>
60 1.1 christos #include <regex.h>
61 1.1 christos #include <signal.h>
62 1.1 christos #include <stdint.h>
63 1.1 christos #include <stdio.h>
64 1.1 christos #include <stdlib.h>
65 1.1 christos #include <string.h>
66 1.1 christos #include <unistd.h>
67 1.1 christos
68 1.1 christos static void cleanup(void);
69 1.1 christos static void do_lineno(const char *);
70 1.1 christos static void do_rexp(const char *);
71 1.1 christos static char *getline(void);
72 1.1 christos static void handlesig(int);
73 1.1 christos static FILE *newfile(void);
74 1.1 christos static void toomuch(FILE *, long);
75 1.1 christos static void usage(void) __attribute__((__noreturn__));
76 1.1 christos
77 1.1 christos /*
78 1.1 christos * Command line options
79 1.1 christos */
80 1.1 christos const char *prefix; /* File name prefix */
81 1.1 christos long sufflen; /* Number of decimal digits for suffix */
82 1.1 christos int sflag; /* Suppress output of file names */
83 1.1 christos int kflag; /* Keep output if error occurs */
84 1.1 christos
85 1.1 christos /*
86 1.1 christos * Other miscellaneous globals (XXX too many)
87 1.1 christos */
88 1.1 christos long lineno; /* Current line number in input file */
89 1.1 christos long reps; /* Number of repetitions for this pattern */
90 1.1 christos long nfiles; /* Number of files output so far */
91 1.1 christos long maxfiles; /* Maximum number of files we can create */
92 1.1 christos char currfile[PATH_MAX]; /* Current output file */
93 1.1 christos const char *infn; /* Name of the input file */
94 1.1 christos FILE *infile; /* Input file handle */
95 1.1 christos FILE *overfile; /* Overflow file for toomuch() */
96 1.1 christos off_t truncofs; /* Offset this file should be truncated at */
97 1.1 christos int doclean; /* Should cleanup() remove output? */
98 1.1 christos
99 1.1 christos int
100 1.1 christos main(int argc, char *argv[])
101 1.1 christos {
102 1.1 christos struct sigaction sa;
103 1.1 christos long i;
104 1.1 christos int ch;
105 1.1 christos const char *expr;
106 1.1 christos char *ep, *p;
107 1.1 christos FILE *ofp;
108 1.1 christos
109 1.1 christos (void)setlocale(LC_ALL, "");
110 1.1 christos
111 1.1 christos kflag = sflag = 0;
112 1.1 christos prefix = "xx";
113 1.1 christos sufflen = 2;
114 1.1 christos while ((ch = getopt(argc, argv, "ksf:n:")) > 0) {
115 1.1 christos switch (ch) {
116 1.1 christos case 'f':
117 1.1 christos prefix = optarg;
118 1.1 christos break;
119 1.1 christos case 'k':
120 1.1 christos kflag = 1;
121 1.1 christos break;
122 1.1 christos case 'n':
123 1.1 christos errno = 0;
124 1.1 christos sufflen = strtol(optarg, &ep, 10);
125 1.1 christos if (sufflen <= 0 || *ep != '\0' || errno != 0)
126 1.1 christos errx(1, "%s: bad suffix length", optarg);
127 1.1 christos break;
128 1.1 christos case 's':
129 1.1 christos sflag = 1;
130 1.1 christos break;
131 1.1 christos default:
132 1.1 christos usage();
133 1.1 christos /*NOTREACHED*/
134 1.1 christos }
135 1.1 christos }
136 1.1 christos
137 1.1 christos if (sufflen + strlen(prefix) >= PATH_MAX)
138 1.1 christos errx(1, "name too long");
139 1.1 christos
140 1.1 christos argc -= optind;
141 1.1 christos argv += optind;
142 1.1 christos
143 1.1 christos if ((infn = *argv++) == NULL)
144 1.1 christos usage();
145 1.1 christos if (strcmp(infn, "-") == 0) {
146 1.1 christos infile = stdin;
147 1.1 christos infn = "stdin";
148 1.1 christos } else if ((infile = fopen(infn, "r")) == NULL)
149 1.1 christos err(1, "%s", infn);
150 1.1 christos
151 1.1 christos if (!kflag) {
152 1.1 christos doclean = 1;
153 1.1 christos (void)atexit(cleanup);
154 1.1 christos sa.sa_flags = 0;
155 1.1 christos sa.sa_handler = handlesig;
156 1.1 christos (void)sigemptyset(&sa.sa_mask);
157 1.1 christos (void)sigaddset(&sa.sa_mask, SIGHUP);
158 1.1 christos (void)sigaddset(&sa.sa_mask, SIGINT);
159 1.1 christos (void)sigaddset(&sa.sa_mask, SIGTERM);
160 1.1 christos (void)sigaction(SIGHUP, &sa, NULL);
161 1.1 christos (void)sigaction(SIGINT, &sa, NULL);
162 1.1 christos (void)sigaction(SIGTERM, &sa, NULL);
163 1.1 christos }
164 1.1 christos
165 1.1 christos lineno = 0;
166 1.1 christos nfiles = 0;
167 1.1 christos truncofs = 0;
168 1.1 christos overfile = NULL;
169 1.1 christos
170 1.1 christos /* Ensure 10^sufflen < LONG_MAX. */
171 1.1 christos for (maxfiles = 1, i = 0; i < sufflen; i++) {
172 1.1 christos if (maxfiles > LONG_MAX / 10)
173 1.1 christos errx(1, "%ld: suffix too long (limit %ld)",
174 1.1 christos sufflen, i);
175 1.1 christos maxfiles *= 10;
176 1.1 christos }
177 1.1 christos
178 1.1 christos /* Create files based on supplied patterns. */
179 1.1 christos while (nfiles < maxfiles - 1 && (expr = *argv++) != NULL) {
180 1.1 christos /* Look ahead & see if this pattern has any repetitions. */
181 1.1 christos if (*argv != NULL && **argv == '{') {
182 1.1 christos errno = 0;
183 1.1 christos reps = strtol(*argv + 1, &ep, 10);
184 1.1 christos if (reps < 0 || *ep != '}' || errno != 0)
185 1.1 christos errx(1, "%s: bad repetition count", *argv + 1);
186 1.1 christos argv++;
187 1.1 christos } else
188 1.1 christos reps = 0;
189 1.1 christos
190 1.1 christos if (*expr == '/' || *expr == '%') {
191 1.1 christos do
192 1.1 christos do_rexp(expr);
193 1.1 christos while (reps-- != 0 && nfiles < maxfiles - 1);
194 1.1 christos } else if (isdigit((unsigned char)*expr))
195 1.1 christos do_lineno(expr);
196 1.1 christos else
197 1.1 christos errx(1, "%s: unrecognised pattern", expr);
198 1.1 christos }
199 1.1 christos
200 1.1 christos /* Copy the rest into a new file. */
201 1.1 christos if (!feof(infile)) {
202 1.1 christos ofp = newfile();
203 1.1 christos while ((p = getline()) != NULL && fputs(p, ofp) == 0)
204 1.1 christos ;
205 1.1 christos if (!sflag)
206 1.1 christos (void)printf("%jd\n", (intmax_t)ftello(ofp));
207 1.1 christos if (fclose(ofp) != 0)
208 1.1 christos err(1, "%s", currfile);
209 1.1 christos }
210 1.1 christos
211 1.1 christos toomuch(NULL, 0L);
212 1.1 christos doclean = 0;
213 1.1 christos
214 1.1 christos return (0);
215 1.1 christos }
216 1.1 christos
217 1.1 christos static void
218 1.1 christos usage(void)
219 1.1 christos {
220 1.1 christos
221 1.1 christos (void)fprintf(stderr,
222 1.1 christos "Usage: %s [-ks] [-f prefix] [-n number] file args ...\n", getprogname());
223 1.1 christos exit(1);
224 1.1 christos }
225 1.1 christos
226 1.1 christos static void
227 1.1 christos handlesig(int sig)
228 1.1 christos {
229 1.1 christos char msg[BUFSIZ];
230 1.1 christos size_t len;
231 1.1 christos
232 1.1 christos len = snprintf(msg, sizeof(msg), "%s: Caught %s, cleaning up\n",
233 1.1 christos getprogname(), strsignal(sig));
234 1.1 christos if (len < sizeof(msg))
235 1.1 christos (void)write(STDERR_FILENO, msg, len);
236 1.1 christos cleanup();
237 1.1 christos _exit(2);
238 1.1 christos }
239 1.1 christos
240 1.1 christos /* Create a new output file. */
241 1.1 christos static FILE *
242 1.1 christos newfile(void)
243 1.1 christos {
244 1.1 christos FILE *fp;
245 1.1 christos
246 1.1 christos if ((size_t)snprintf(currfile, sizeof(currfile), "%s%0*ld", prefix,
247 1.1 christos (int)sufflen, nfiles) >= sizeof(currfile))
248 1.1 christos errx(1, "%s: %s", currfile, strerror(ENAMETOOLONG));
249 1.1 christos if ((fp = fopen(currfile, "w+")) == NULL)
250 1.1 christos err(1, "%s", currfile);
251 1.1 christos nfiles++;
252 1.1 christos
253 1.1 christos return (fp);
254 1.1 christos }
255 1.1 christos
256 1.1 christos /* Remove partial output, called before exiting. */
257 1.1 christos static void
258 1.1 christos cleanup(void)
259 1.1 christos {
260 1.1 christos char fnbuf[PATH_MAX];
261 1.1 christos long i;
262 1.1 christos
263 1.1 christos if (!doclean)
264 1.1 christos return;
265 1.1 christos
266 1.1 christos /*
267 1.1 christos * NOTE: One cannot portably assume to be able to call snprintf()
268 1.1 christos * from inside a signal handler. It does, however, appear to be safe
269 1.1 christos * to do on FreeBSD and NetBSD. The solution to this problem is worse
270 1.1 christos * than the problem itself.
271 1.1 christos */
272 1.1 christos
273 1.1 christos for (i = 0; i < nfiles; i++) {
274 1.1 christos (void)snprintf(fnbuf, sizeof(fnbuf), "%s%0*ld", prefix,
275 1.1 christos (int)sufflen, i);
276 1.1 christos (void)unlink(fnbuf);
277 1.1 christos }
278 1.1 christos }
279 1.1 christos
280 1.1 christos /* Read a line from the input into a static buffer. */
281 1.1 christos static char *
282 1.1 christos getline(void)
283 1.1 christos {
284 1.1 christos static char lbuf[LINE_MAX];
285 1.1 christos FILE *src;
286 1.1 christos
287 1.1 christos src = overfile != NULL ? overfile : infile;
288 1.1 christos
289 1.1 christos again: if (fgets(lbuf, sizeof(lbuf), src) == NULL) {
290 1.1 christos if (src == overfile) {
291 1.1 christos src = infile;
292 1.1 christos goto again;
293 1.1 christos }
294 1.1 christos return (NULL);
295 1.1 christos }
296 1.1 christos if (ferror(src))
297 1.1 christos err(1, "%s", infn);
298 1.1 christos lineno++;
299 1.1 christos
300 1.1 christos return (lbuf);
301 1.1 christos }
302 1.1 christos
303 1.1 christos /* Conceptually rewind the input (as obtained by getline()) back `n' lines. */
304 1.1 christos static void
305 1.1 christos toomuch(FILE *ofp, long n)
306 1.1 christos {
307 1.1 christos char buf[BUFSIZ];
308 1.1 christos size_t i, nread;
309 1.1 christos
310 1.1 christos if (overfile != NULL) {
311 1.1 christos /*
312 1.1 christos * Truncate the previous file we overflowed into back to
313 1.1 christos * the correct length, close it.
314 1.1 christos */
315 1.1 christos if (fflush(overfile) != 0)
316 1.1 christos err(1, "overflow");
317 1.1 christos if (ftruncate(fileno(overfile), truncofs) != 0)
318 1.1 christos err(1, "overflow");
319 1.1 christos if (fclose(overfile) != 0)
320 1.1 christos err(1, "overflow");
321 1.1 christos overfile = NULL;
322 1.1 christos }
323 1.1 christos
324 1.1 christos if (n == 0)
325 1.1 christos /* Just tidying up */
326 1.1 christos return;
327 1.1 christos
328 1.1 christos lineno -= n;
329 1.1 christos
330 1.1 christos /*
331 1.1 christos * Wind the overflow file backwards to `n' lines before the
332 1.1 christos * current one.
333 1.1 christos */
334 1.1 christos do {
335 1.1 christos if (ftello(ofp) < (off_t)sizeof(buf))
336 1.1 christos rewind(ofp);
337 1.1 christos else
338 1.1 christos (void)fseeko(ofp, -(off_t)sizeof(buf), SEEK_CUR);
339 1.1 christos if (ferror(ofp))
340 1.1 christos errx(1, "%s: can't seek", currfile);
341 1.1 christos if ((nread = fread(buf, 1, sizeof(buf), ofp)) == 0)
342 1.1 christos errx(1, "can't read overflowed output");
343 1.1 christos if (fseeko(ofp, -(off_t)nread, SEEK_CUR) != 0)
344 1.1 christos err(1, "%s", currfile);
345 1.1 christos for (i = 1; i <= nread; i++)
346 1.1 christos if (buf[nread - i] == '\n' && n-- == 0)
347 1.1 christos break;
348 1.1 christos if (ftello(ofp) == 0)
349 1.1 christos break;
350 1.1 christos } while (n > 0);
351 1.1 christos if (fseeko(ofp, (off_t)nread - i + 1, SEEK_CUR) != 0)
352 1.1 christos err(1, "%s", currfile);
353 1.1 christos
354 1.1 christos /*
355 1.1 christos * getline() will read from here. Next call will truncate to
356 1.1 christos * truncofs in this file.
357 1.1 christos */
358 1.1 christos overfile = ofp;
359 1.1 christos truncofs = ftello(overfile);
360 1.1 christos }
361 1.1 christos
362 1.1 christos /* Handle splits for /regexp/ and %regexp% patterns. */
363 1.1 christos static void
364 1.1 christos do_rexp(const char *expr)
365 1.1 christos {
366 1.1 christos regex_t cre;
367 1.1 christos intmax_t nwritten;
368 1.1 christos long ofs;
369 1.1 christos int first;
370 1.1 christos char *ecopy, *ep, *p, *pofs, *re;
371 1.1 christos FILE *ofp;
372 1.1 christos
373 1.1 christos if ((ecopy = strdup(expr)) == NULL)
374 1.1 christos err(1, "strdup");
375 1.1 christos
376 1.1 christos re = ecopy + 1;
377 1.1 christos if ((pofs = strrchr(ecopy, *expr)) == NULL || pofs[-1] == '\\')
378 1.1 christos errx(1, "%s: missing trailing %c", expr, *expr);
379 1.1 christos *pofs++ = '\0';
380 1.1 christos
381 1.1 christos if (*pofs != '\0') {
382 1.1 christos errno = 0;
383 1.1 christos ofs = strtol(pofs, &ep, 10);
384 1.1 christos if (*ep != '\0' || errno != 0)
385 1.1 christos errx(1, "%s: bad offset", pofs);
386 1.1 christos } else
387 1.1 christos ofs = 0;
388 1.1 christos
389 1.1 christos if (regcomp(&cre, re, REG_BASIC|REG_NOSUB) != 0)
390 1.1 christos errx(1, "%s: bad regular expression", re);
391 1.1 christos
392 1.1 christos if (*expr == '/')
393 1.1 christos /* /regexp/: Save results to a file. */
394 1.1 christos ofp = newfile();
395 1.1 christos else {
396 1.1 christos /* %regexp%: Make a temporary file for overflow. */
397 1.1 christos if ((ofp = tmpfile()) == NULL)
398 1.1 christos err(1, "tmpfile");
399 1.1 christos }
400 1.1 christos
401 1.1 christos /* Read and output lines until we get a match. */
402 1.1 christos first = 1;
403 1.1 christos while ((p = getline()) != NULL) {
404 1.1 christos if (fputs(p, ofp) != 0)
405 1.1 christos break;
406 1.1 christos if (!first && regexec(&cre, p, 0, NULL, 0) == 0)
407 1.1 christos break;
408 1.1 christos first = 0;
409 1.1 christos }
410 1.1 christos
411 1.1 christos if (p == NULL)
412 1.1 christos errx(1, "%s: no match", re);
413 1.1 christos
414 1.1 christos if (ofs <= 0) {
415 1.1 christos /*
416 1.1 christos * Negative (or zero) offset: throw back any lines we should
417 1.1 christos * not have read yet.
418 1.1 christos */
419 1.1 christos if (p != NULL) {
420 1.1 christos toomuch(ofp, -ofs + 1);
421 1.1 christos nwritten = (intmax_t)truncofs;
422 1.1 christos } else
423 1.1 christos nwritten = (intmax_t)ftello(ofp);
424 1.1 christos } else {
425 1.1 christos /*
426 1.1 christos * Positive offset: copy the requested number of lines
427 1.1 christos * after the match.
428 1.1 christos */
429 1.1 christos while (--ofs > 0 && (p = getline()) != NULL)
430 1.1 christos if (fputs(p, ofp) != 0)
431 1.1 christos break;
432 1.1 christos toomuch(NULL, 0L);
433 1.1 christos nwritten = (intmax_t)ftello(ofp);
434 1.1 christos if (fclose(ofp) != 0)
435 1.1 christos err(1, "%s", currfile);
436 1.1 christos }
437 1.1 christos
438 1.1 christos if (!sflag && *expr == '/')
439 1.1 christos (void)printf("%jd\n", nwritten);
440 1.1 christos
441 1.1 christos regfree(&cre);
442 1.1 christos free(ecopy);
443 1.1 christos }
444 1.1 christos
445 1.1 christos /* Handle splits based on line number. */
446 1.1 christos static void
447 1.1 christos do_lineno(const char *expr)
448 1.1 christos {
449 1.1 christos long lastline, tgtline;
450 1.1 christos char *ep, *p;
451 1.1 christos FILE *ofp;
452 1.1 christos
453 1.1 christos errno = 0;
454 1.1 christos tgtline = strtol(expr, &ep, 10);
455 1.1 christos if (tgtline <= 0 || errno != 0 || *ep != '\0')
456 1.1 christos errx(1, "%s: bad line number", expr);
457 1.1 christos lastline = tgtline;
458 1.1 christos if (lastline <= lineno)
459 1.1 christos errx(1, "%s: can't go backwards", expr);
460 1.1 christos
461 1.1 christos while (nfiles < maxfiles - 1) {
462 1.1 christos ofp = newfile();
463 1.1 christos while (lineno + 1 != lastline) {
464 1.1 christos if ((p = getline()) == NULL)
465 1.1 christos errx(1, "%ld: out of range", lastline);
466 1.1 christos if (fputs(p, ofp) != 0)
467 1.1 christos break;
468 1.1 christos }
469 1.1 christos if (!sflag)
470 1.1 christos (void)printf("%jd\n", (intmax_t)ftello(ofp));
471 1.1 christos if (fclose(ofp) != 0)
472 1.1 christos err(1, "%s", currfile);
473 1.1 christos if (reps-- == 0)
474 1.1 christos break;
475 1.1 christos lastline += tgtline;
476 1.1 christos }
477 1.1 christos }
478