1 1.33 andvar /* $NetBSD: split.c,v 1.33 2024/02/09 22:08:38 andvar Exp $ */ 2 1.4 jtc 3 1.1 cgd /* 4 1.4 jtc * Copyright (c) 1987, 1993, 1994 5 1.4 jtc * The Regents of the University of California. All rights reserved. 6 1.1 cgd * 7 1.1 cgd * Redistribution and use in source and binary forms, with or without 8 1.1 cgd * modification, are permitted provided that the following conditions 9 1.1 cgd * are met: 10 1.1 cgd * 1. Redistributions of source code must retain the above copyright 11 1.1 cgd * notice, this list of conditions and the following disclaimer. 12 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 cgd * notice, this list of conditions and the following disclaimer in the 14 1.1 cgd * documentation and/or other materials provided with the distribution. 15 1.20 agc * 3. Neither the name of the University nor the names of its contributors 16 1.1 cgd * may be used to endorse or promote products derived from this software 17 1.1 cgd * without specific prior written permission. 18 1.1 cgd * 19 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 1.1 cgd * SUCH DAMAGE. 30 1.1 cgd */ 31 1.1 cgd 32 1.6 lukem #include <sys/cdefs.h> 33 1.1 cgd #ifndef lint 34 1.24 lukem __COPYRIGHT("@(#) Copyright (c) 1987, 1993, 1994\ 35 1.24 lukem The Regents of the University of California. All rights reserved."); 36 1.1 cgd #endif /* not lint */ 37 1.1 cgd 38 1.1 cgd #ifndef lint 39 1.4 jtc #if 0 40 1.5 jtc static char sccsid[] = "@(#)split.c 8.3 (Berkeley) 4/25/94"; 41 1.4 jtc #endif 42 1.33 andvar __RCSID("$NetBSD: split.c,v 1.33 2024/02/09 22:08:38 andvar Exp $"); 43 1.1 cgd #endif /* not lint */ 44 1.1 cgd 45 1.1 cgd #include <sys/param.h> 46 1.27 christos #include <sys/stat.h> 47 1.4 jtc 48 1.4 jtc #include <ctype.h> 49 1.4 jtc #include <err.h> 50 1.12 bjh21 #include <errno.h> 51 1.4 jtc #include <fcntl.h> 52 1.1 cgd #include <stdio.h> 53 1.4 jtc #include <stdlib.h> 54 1.4 jtc #include <string.h> 55 1.4 jtc #include <unistd.h> 56 1.4 jtc 57 1.9 christos #define DEFLINE 1000 /* Default num lines per file. */ 58 1.4 jtc 59 1.28 jschauma static int file_open; /* If a file is open. */ 60 1.16 bjh21 static int ifd = STDIN_FILENO, ofd = -1; /* Input/output file descriptors. */ 61 1.12 bjh21 static char *fname; /* File name prefix. */ 62 1.28 jschauma static size_t sfxlen = 2; /* Suffix length. */ 63 1.29 jschauma static int autosfx = 1; /* Whether to auto-extend the suffix length. */ 64 1.9 christos 65 1.9 christos static void newfile(void); 66 1.26 joerg static void split1(off_t, int) __dead; 67 1.26 joerg static void split2(off_t) __dead; 68 1.26 joerg static void split3(off_t) __dead; 69 1.23 perry static void usage(void) __dead; 70 1.14 bjh21 static size_t bigwrite(int, void const *, size_t); 71 1.1 cgd 72 1.4 jtc int 73 1.9 christos main(int argc, char *argv[]) 74 1.1 cgd { 75 1.4 jtc int ch; 76 1.4 jtc char *ep, *p; 77 1.18 bjh21 char const *base; 78 1.14 bjh21 off_t bytecnt = 0; /* Byte count to split on. */ 79 1.14 bjh21 off_t numlines = 0; /* Line count to split on. */ 80 1.22 jschauma off_t chunks = 0; /* Number of chunks to split into. */ 81 1.4 jtc 82 1.31 jschauma while ((ch = getopt(argc, argv, "0123456789a:b:l:n:")) != -1) 83 1.4 jtc switch (ch) { 84 1.4 jtc case '0': case '1': case '2': case '3': case '4': 85 1.4 jtc case '5': case '6': case '7': case '8': case '9': 86 1.4 jtc /* 87 1.4 jtc * Undocumented kludge: split was originally designed 88 1.4 jtc * to take a number after a dash. 89 1.4 jtc */ 90 1.4 jtc if (numlines == 0) { 91 1.4 jtc p = argv[optind - 1]; 92 1.4 jtc if (p[0] == '-' && p[1] == ch && !p[2]) 93 1.14 bjh21 p++; 94 1.4 jtc else 95 1.14 bjh21 p = argv[optind] + 1; 96 1.14 bjh21 numlines = strtoull(p, &ep, 10); 97 1.14 bjh21 if (numlines == 0 || *ep != '\0') 98 1.32 jschauma errx(EXIT_FAILURE, "%s: illegal line count.", p); 99 1.1 cgd } 100 1.4 jtc break; 101 1.30 jschauma case 'a': /* Suffix length. */ 102 1.30 jschauma if (!isdigit((unsigned char)optarg[0]) || 103 1.30 jschauma (sfxlen = (size_t)strtoul(optarg, &ep, 10)) == 0 || 104 1.30 jschauma *ep != '\0') 105 1.32 jschauma errx(EXIT_FAILURE, "%s: illegal suffix length.", optarg); 106 1.30 jschauma autosfx = 0; 107 1.30 jschauma break; 108 1.4 jtc case 'b': /* Byte count. */ 109 1.14 bjh21 if (!isdigit((unsigned char)optarg[0]) || 110 1.14 bjh21 (bytecnt = strtoull(optarg, &ep, 10)) == 0 || 111 1.6 lukem (*ep != '\0' && *ep != 'k' && *ep != 'm')) 112 1.32 jschauma errx(EXIT_FAILURE, "%s: illegal byte count.", optarg); 113 1.4 jtc if (*ep == 'k') 114 1.4 jtc bytecnt *= 1024; 115 1.4 jtc else if (*ep == 'm') 116 1.9 christos bytecnt *= 1024 * 1024; 117 1.4 jtc break; 118 1.4 jtc case 'l': /* Line count. */ 119 1.4 jtc if (numlines != 0) 120 1.4 jtc usage(); 121 1.14 bjh21 if (!isdigit((unsigned char)optarg[0]) || 122 1.14 bjh21 (numlines = strtoull(optarg, &ep, 10)) == 0 || 123 1.14 bjh21 *ep != '\0') 124 1.32 jschauma errx(EXIT_FAILURE, "%s: illegal line count.", optarg); 125 1.4 jtc break; 126 1.22 jschauma case 'n': /* Chunks. */ 127 1.22 jschauma if (!isdigit((unsigned char)optarg[0]) || 128 1.22 jschauma (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 || 129 1.22 jschauma *ep != '\0') 130 1.32 jschauma errx(EXIT_FAILURE, "%s: illegal number of chunks.", optarg); 131 1.22 jschauma break; 132 1.4 jtc default: 133 1.4 jtc usage(); 134 1.4 jtc } 135 1.4 jtc argv += optind; 136 1.4 jtc argc -= optind; 137 1.4 jtc 138 1.15 bjh21 if (*argv != NULL) { 139 1.16 bjh21 if (strcmp(*argv, "-") != 0 && 140 1.16 bjh21 (ifd = open(*argv, O_RDONLY, 0)) < 0) 141 1.32 jschauma err(EXIT_FAILURE, "%s", *argv); 142 1.15 bjh21 ++argv; 143 1.15 bjh21 } 144 1.10 bjh21 145 1.18 bjh21 146 1.18 bjh21 base = (*argv != NULL) ? *argv++ : "x"; 147 1.19 bjh21 if ((fname = malloc(strlen(base) + sfxlen + 1)) == NULL) 148 1.18 bjh21 err(EXIT_FAILURE, NULL); 149 1.18 bjh21 (void)strcpy(fname, base); /* File name prefix. */ 150 1.10 bjh21 151 1.4 jtc if (*argv != NULL) 152 1.4 jtc usage(); 153 1.4 jtc 154 1.4 jtc if (numlines == 0) 155 1.4 jtc numlines = DEFLINE; 156 1.22 jschauma else if (bytecnt || chunks) 157 1.22 jschauma usage(); 158 1.22 jschauma 159 1.22 jschauma if (bytecnt && chunks) 160 1.4 jtc usage(); 161 1.4 jtc 162 1.13 bjh21 if (bytecnt) 163 1.22 jschauma split1(bytecnt, 0); 164 1.22 jschauma else if (chunks) 165 1.22 jschauma split3(chunks); 166 1.22 jschauma else 167 1.9 christos split2(numlines); 168 1.13 bjh21 169 1.9 christos return 0; 170 1.1 cgd } 171 1.1 cgd 172 1.1 cgd /* 173 1.1 cgd * split1 -- 174 1.4 jtc * Split the input by bytes. 175 1.1 cgd */ 176 1.9 christos static void 177 1.22 jschauma split1(off_t bytecnt, int maxcnt) 178 1.1 cgd { 179 1.14 bjh21 off_t bcnt; 180 1.14 bjh21 ssize_t dist, len; 181 1.4 jtc char *C; 182 1.9 christos char bfr[MAXBSIZE]; 183 1.22 jschauma int nfiles; 184 1.22 jschauma 185 1.22 jschauma nfiles = 0; 186 1.1 cgd 187 1.1 cgd for (bcnt = 0;;) 188 1.4 jtc switch (len = read(ifd, bfr, MAXBSIZE)) { 189 1.1 cgd case 0: 190 1.32 jschauma exit(EXIT_SUCCESS); 191 1.13 bjh21 /* NOTREACHED */ 192 1.4 jtc case -1: 193 1.32 jschauma err(EXIT_FAILURE, "read"); 194 1.4 jtc /* NOTREACHED */ 195 1.1 cgd default: 196 1.1 cgd if (!file_open) { 197 1.22 jschauma if (!maxcnt || (nfiles < maxcnt)) { 198 1.22 jschauma newfile(); 199 1.22 jschauma nfiles++; 200 1.22 jschauma file_open = 1; 201 1.22 jschauma } 202 1.1 cgd } 203 1.1 cgd if (bcnt + len >= bytecnt) { 204 1.14 bjh21 /* LINTED: bytecnt - bcnt <= len */ 205 1.1 cgd dist = bytecnt - bcnt; 206 1.25 lukem if (bigwrite(ofd, bfr, dist) != (size_t)dist) 207 1.32 jschauma err(EXIT_FAILURE, "write"); 208 1.1 cgd len -= dist; 209 1.4 jtc for (C = bfr + dist; len >= bytecnt; 210 1.14 bjh21 /* LINTED: bytecnt <= len */ 211 1.4 jtc len -= bytecnt, C += bytecnt) { 212 1.22 jschauma if (!maxcnt || (nfiles < maxcnt)) { 213 1.22 jschauma newfile(); 214 1.22 jschauma nfiles++; 215 1.22 jschauma } 216 1.14 bjh21 /* LINTED: as above */ 217 1.9 christos if (bigwrite(ofd, 218 1.25 lukem C, bytecnt) != (size_t)bytecnt) 219 1.32 jschauma err(EXIT_FAILURE, "write"); 220 1.1 cgd } 221 1.1 cgd if (len) { 222 1.22 jschauma if (!maxcnt || (nfiles < maxcnt)) { 223 1.22 jschauma newfile(); 224 1.22 jschauma nfiles++; 225 1.22 jschauma } 226 1.14 bjh21 /* LINTED: len >= 0 */ 227 1.25 lukem if (bigwrite(ofd, C, len) != (size_t)len) 228 1.32 jschauma err(EXIT_FAILURE, "write"); 229 1.4 jtc } else 230 1.4 jtc file_open = 0; 231 1.1 cgd bcnt = len; 232 1.4 jtc } else { 233 1.1 cgd bcnt += len; 234 1.14 bjh21 /* LINTED: len >= 0 */ 235 1.25 lukem if (bigwrite(ofd, bfr, len) != (size_t)len) 236 1.32 jschauma err(EXIT_FAILURE, "write"); 237 1.1 cgd } 238 1.1 cgd } 239 1.1 cgd } 240 1.1 cgd 241 1.1 cgd /* 242 1.1 cgd * split2 -- 243 1.4 jtc * Split the input by lines. 244 1.1 cgd */ 245 1.9 christos static void 246 1.14 bjh21 split2(off_t numlines) 247 1.1 cgd { 248 1.14 bjh21 off_t lcnt; 249 1.14 bjh21 size_t bcnt; 250 1.9 christos ssize_t len; 251 1.4 jtc char *Ce, *Cs; 252 1.9 christos char bfr[MAXBSIZE]; 253 1.1 cgd 254 1.1 cgd for (lcnt = 0;;) 255 1.4 jtc switch (len = read(ifd, bfr, MAXBSIZE)) { 256 1.1 cgd case 0: 257 1.32 jschauma exit(EXIT_SUCCESS); 258 1.13 bjh21 /* NOTREACHED */ 259 1.4 jtc case -1: 260 1.32 jschauma err(EXIT_FAILURE, "read"); 261 1.4 jtc /* NOTREACHED */ 262 1.1 cgd default: 263 1.1 cgd if (!file_open) { 264 1.1 cgd newfile(); 265 1.4 jtc file_open = 1; 266 1.1 cgd } 267 1.1 cgd for (Cs = Ce = bfr; len--; Ce++) 268 1.1 cgd if (*Ce == '\n' && ++lcnt == numlines) { 269 1.1 cgd bcnt = Ce - Cs + 1; 270 1.25 lukem if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt) 271 1.32 jschauma err(EXIT_FAILURE, "write"); 272 1.1 cgd lcnt = 0; 273 1.1 cgd Cs = Ce + 1; 274 1.1 cgd if (len) 275 1.1 cgd newfile(); 276 1.1 cgd else 277 1.4 jtc file_open = 0; 278 1.1 cgd } 279 1.1 cgd if (Cs < Ce) { 280 1.1 cgd bcnt = Ce - Cs; 281 1.25 lukem if (bigwrite(ofd, Cs, bcnt) != (size_t)bcnt) 282 1.32 jschauma err(EXIT_FAILURE, "write"); 283 1.1 cgd } 284 1.1 cgd } 285 1.1 cgd } 286 1.1 cgd 287 1.1 cgd /* 288 1.22 jschauma * split3 -- 289 1.22 jschauma * Split the input into specified number of chunks 290 1.22 jschauma */ 291 1.22 jschauma static void 292 1.22 jschauma split3(off_t chunks) 293 1.22 jschauma { 294 1.22 jschauma struct stat sb; 295 1.22 jschauma 296 1.22 jschauma if (fstat(ifd, &sb) == -1) { 297 1.32 jschauma err(EXIT_FAILURE, "stat"); 298 1.22 jschauma /* NOTREACHED */ 299 1.22 jschauma } 300 1.22 jschauma 301 1.22 jschauma if (chunks > sb.st_size) { 302 1.32 jschauma errx(EXIT_FAILURE, "can't split into more than %d files", 303 1.22 jschauma (int)sb.st_size); 304 1.22 jschauma /* NOTREACHED */ 305 1.22 jschauma } 306 1.22 jschauma 307 1.22 jschauma split1(sb.st_size/chunks, chunks); 308 1.22 jschauma } 309 1.22 jschauma 310 1.22 jschauma /* 311 1.1 cgd * newfile -- 312 1.4 jtc * Open a new output file. 313 1.1 cgd */ 314 1.9 christos static void 315 1.9 christos newfile(void) 316 1.1 cgd { 317 1.9 christos static int fnum; 318 1.1 cgd static char *fpnt; 319 1.10 bjh21 int quot, i; 320 1.1 cgd 321 1.4 jtc if (ofd == -1) { 322 1.18 bjh21 fpnt = fname + strlen(fname); 323 1.18 bjh21 fpnt[sfxlen] = '\0'; 324 1.18 bjh21 } else if (close(ofd) != 0) 325 1.32 jschauma err(EXIT_FAILURE, "%s", fname); 326 1.18 bjh21 327 1.10 bjh21 quot = fnum; 328 1.29 jschauma 329 1.29 jschauma /* If '-a' is not specified, then we automatically expand the 330 1.33 andvar * suffix length to accommodate splitting all input. We do this 331 1.29 jschauma * by moving the suffix pointer (fpnt) forward and incrementing 332 1.29 jschauma * sfxlen by one, thereby yielding an additional two characters 333 1.29 jschauma * and allowing all output files to sort such that 'cat *' yields 334 1.29 jschauma * the input in order. I.e., the order is '... xyy xyz xzaaa 335 1.29 jschauma * xzaab ... xzyzy, xzyzz, xzzaaaa, xzzaaab' and so on. */ 336 1.29 jschauma if (autosfx && (fpnt[0] == 'y') && (strspn(fpnt+1, "z") == strlen(fpnt+1))) { 337 1.29 jschauma if ((fname = realloc(fname, strlen(fname) + sfxlen + 2 + 1)) == NULL) 338 1.29 jschauma err(EXIT_FAILURE, NULL); 339 1.29 jschauma /* NOTREACHED */ 340 1.29 jschauma 341 1.29 jschauma fpnt = fname + strlen(fname) - sfxlen; 342 1.29 jschauma fpnt[sfxlen + 2] = '\0'; 343 1.29 jschauma 344 1.29 jschauma fpnt[0] = 'z'; 345 1.29 jschauma fpnt[1] = 'a'; 346 1.29 jschauma 347 1.29 jschauma /* Basename | Suffix 348 1.29 jschauma * before: 349 1.29 jschauma * x | yz 350 1.29 jschauma * after: 351 1.29 jschauma * xz | a.. */ 352 1.29 jschauma fpnt++; 353 1.29 jschauma sfxlen++; 354 1.29 jschauma 355 1.29 jschauma /* Reset so we start back at all 'a's in our extended suffix. */ 356 1.29 jschauma quot = 0; 357 1.29 jschauma fnum = 0; 358 1.29 jschauma } 359 1.29 jschauma 360 1.10 bjh21 for (i = sfxlen - 1; i >= 0; i--) { 361 1.10 bjh21 fpnt[i] = quot % 26 + 'a'; 362 1.10 bjh21 quot = quot / 26; 363 1.10 bjh21 } 364 1.18 bjh21 if (quot > 0) 365 1.32 jschauma errx(EXIT_FAILURE, "too many files."); 366 1.1 cgd ++fnum; 367 1.17 bjh21 if ((ofd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE)) < 0) 368 1.32 jschauma err(EXIT_FAILURE, "%s", fname); 369 1.1 cgd } 370 1.1 cgd 371 1.14 bjh21 static size_t 372 1.14 bjh21 bigwrite(int fd, const void *buf, size_t len) 373 1.9 christos { 374 1.9 christos const char *ptr = buf; 375 1.14 bjh21 size_t sofar = 0; 376 1.14 bjh21 ssize_t w; 377 1.9 christos 378 1.9 christos while (len != 0) { 379 1.14 bjh21 if ((w = write(fd, ptr, len)) == -1) 380 1.9 christos return sofar; 381 1.9 christos len -= w; 382 1.9 christos ptr += w; 383 1.9 christos sofar += w; 384 1.9 christos } 385 1.9 christos return sofar; 386 1.9 christos } 387 1.9 christos 388 1.9 christos 389 1.9 christos static void 390 1.9 christos usage(void) 391 1.1 cgd { 392 1.4 jtc (void)fprintf(stderr, 393 1.22 jschauma "usage: %s [-b byte_count] [-l line_count] [-n chunk_count] [-a suffix_length] " 394 1.10 bjh21 "[file [prefix]]\n", getprogname()); 395 1.32 jschauma exit(EXIT_FAILURE); 396 1.1 cgd } 397