1 /* $NetBSD: cut.c,v 1.32 2025/03/09 05:04:54 gutteridge Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #ifndef lint 37 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 38 The Regents of the University of California. All rights reserved."); 39 #endif /* not lint */ 40 41 #ifndef lint 42 #if 0 43 static char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 44 #endif 45 __RCSID("$NetBSD: cut.c,v 1.32 2025/03/09 05:04:54 gutteridge Exp $"); 46 #endif /* not lint */ 47 48 #include <ctype.h> 49 #include <err.h> 50 #include <errno.h> 51 #include <limits.h> 52 #include <locale.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 #include <unistd.h> 57 #include <util.h> 58 #include <wchar.h> 59 #include <sys/param.h> 60 61 static int bflag; 62 static int cflag; 63 static char dchar; 64 static int dflag; 65 static int fflag; 66 static int nflag; 67 static int sflag; 68 69 static void b_cut(FILE *, const char *); 70 static void b_n_cut(FILE *, const char *); 71 static void c_cut(FILE *, const char *); 72 static void f_cut(FILE *, const char *); 73 static void get_list(char *); 74 static void usage(void) __dead; 75 76 int 77 main(int argc, char *argv[]) 78 { 79 FILE *fp; 80 void (*fcn)(FILE *, const char *); 81 int ch, rval; 82 83 fcn = NULL; 84 (void)setlocale(LC_ALL, ""); 85 86 dchar = '\t'; /* default delimiter is \t */ 87 88 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 89 switch(ch) { 90 case 'b': 91 fcn = b_cut; 92 get_list(optarg); 93 bflag = 1; 94 break; 95 case 'c': 96 fcn = c_cut; 97 get_list(optarg); 98 cflag = 1; 99 break; 100 case 'd': 101 dchar = *optarg; 102 dflag = 1; 103 break; 104 case 'f': 105 get_list(optarg); 106 fcn = f_cut; 107 fflag = 1; 108 break; 109 case 's': 110 sflag = 1; 111 break; 112 case 'n': 113 nflag = 1; 114 break; 115 case '?': 116 default: 117 usage(); 118 } 119 argc -= optind; 120 argv += optind; 121 122 if (bflag + cflag + fflag != 1 || 123 (nflag && !bflag) || 124 ((dflag || sflag) && !fflag)) 125 usage(); 126 127 if (nflag) 128 fcn = b_n_cut; 129 130 rval = 0; 131 if (*argv) 132 for (; *argv; ++argv) { 133 if (strcmp(*argv, "-") == 0) 134 fcn(stdin, "stdin"); 135 else { 136 if ((fp = fopen(*argv, "r"))) { 137 fcn(fp, *argv); 138 (void)fclose(fp); 139 } else { 140 rval = 1; 141 warn("%s", *argv); 142 } 143 } 144 } 145 else 146 fcn(stdin, "stdin"); 147 return(rval); 148 } 149 150 static size_t autostart, autostop, maxval; 151 152 static char *positions = NULL; 153 static size_t numpositions = 0; 154 #define ALLOC_CHUNK _POSIX2_LINE_MAX /* malloc granularity */ 155 156 static void 157 get_list(char *list) 158 { 159 size_t setautostart, start, stop; 160 char *pos; 161 char *p; 162 163 if (positions == NULL) { 164 numpositions = ALLOC_CHUNK; 165 positions = ecalloc(numpositions, sizeof(*positions)); 166 } 167 168 /* 169 * Set a byte in the positions array to indicate if a field or 170 * column is to be selected; use +1, it's 1-based, not 0-based. 171 * Numbers and number ranges may be overlapping, repeated, and in 172 * any order. We handle "-3-5" although there's no real reason to. 173 */ 174 for (; (p = strtok(list, ", \t")) != NULL; list = NULL) { 175 setautostart = start = stop = 0; 176 if (*p == '-') { 177 ++p; 178 setautostart = 1; 179 } 180 if (isdigit((unsigned char)*p)) { 181 start = stop = strtol(p, &p, 10); 182 if (setautostart && start > autostart) 183 autostart = start; 184 } 185 if (*p == '-') { 186 if (isdigit((unsigned char)p[1])) 187 stop = strtol(p + 1, &p, 10); 188 if (*p == '-') { 189 ++p; 190 if (!autostop || autostop > stop) 191 autostop = stop; 192 } 193 } 194 if (*p) 195 errx(1, "[-bcf] list: illegal list value"); 196 if (!stop || !start) 197 errx(1, "[-bcf] list: values may not include zero"); 198 if (stop + 1 > numpositions) { 199 size_t newsize; 200 newsize = roundup(stop + 1, ALLOC_CHUNK); 201 positions = erealloc(positions, newsize); 202 (void)memset(positions + numpositions, 0, 203 newsize - numpositions); 204 numpositions = newsize; 205 } 206 if (maxval < stop) 207 maxval = stop; 208 for (pos = positions + start; start++ <= stop; pos++) 209 *pos = 1; 210 } 211 212 /* overlapping ranges */ 213 if (autostop && maxval > autostop) 214 maxval = autostop; 215 216 /* set autostart */ 217 if (autostart) 218 (void)memset(positions + 1, '1', autostart); 219 } 220 221 /* 222 * Cut based on byte positions, taking care not to split multibyte characters. 223 * Although this function also handles the case where -n is not specified, 224 * b_cut() ought to be much faster. 225 */ 226 static void 227 b_n_cut(FILE *fp, const char *fname) 228 { 229 size_t col, i, lbuflen; 230 char *lbuf; 231 int canwrite, clen, warned; 232 mbstate_t mbs; 233 234 memset(&mbs, 0, sizeof(mbs)); 235 warned = 0; 236 while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { 237 for (col = 0; lbuflen > 0; col += clen) { 238 if ((clen = mbrlen(lbuf, lbuflen, &mbs)) < 0) { 239 if (!warned) { 240 warn("%s", fname); 241 warned = 1; 242 } 243 memset(&mbs, 0, sizeof(mbs)); 244 clen = 1; 245 } 246 if (clen == 0 || *lbuf == '\n') 247 break; 248 if (col < maxval && !positions[1 + col]) { 249 /* 250 * Print the character if (1) after an initial 251 * segment of un-selected bytes, the rest of 252 * it is selected, and (2) the last byte is 253 * selected. 254 */ 255 i = col; 256 while (i < col + clen && i < maxval && 257 !positions[1 + i]) 258 i++; 259 canwrite = i < col + clen; 260 for (; i < col + clen && i < maxval; i++) 261 canwrite &= positions[1 + i]; 262 if (canwrite) 263 fwrite(lbuf, 1, clen, stdout); 264 } else { 265 /* 266 * Print the character if all of it has 267 * been selected. 268 */ 269 canwrite = 1; 270 for (i = col; i < col + clen; i++) 271 if ((i >= maxval && !autostop) || 272 (i < maxval && !positions[1 + i])) { 273 canwrite = 0; 274 break; 275 } 276 if (canwrite) 277 fwrite(lbuf, 1, clen, stdout); 278 } 279 lbuf += clen; 280 lbuflen -= clen; 281 } 282 if (lbuflen > 0) 283 putchar('\n'); 284 } 285 } 286 287 static void 288 /*ARGSUSED*/ 289 f_cut(FILE *fp, const char *fname __unused) 290 { 291 int ch, field, isdelim; 292 char *pos, *p, sep; 293 int output; 294 size_t len; 295 char *lbuf, *tbuf; 296 297 for (sep = dchar, tbuf = NULL; (lbuf = fgetln(fp, &len)) != NULL;) { 298 output = 0; 299 if (lbuf[len - 1] != '\n') { 300 /* no newline at the end of the last line so add one */ 301 if ((tbuf = (char *)malloc(len + 1)) == NULL) 302 err(1, NULL); 303 (void)memcpy(tbuf, lbuf, len); 304 tbuf[len++] = '\n'; 305 lbuf = tbuf; 306 } 307 for (isdelim = 0, p = lbuf;; ++p) { 308 ch = *p; 309 /* this should work if newline is delimiter */ 310 if (ch == sep) 311 isdelim = 1; 312 if (ch == '\n') { 313 if (!isdelim && !sflag) 314 (void)fwrite(lbuf, len, 1, stdout); 315 break; 316 } 317 } 318 if (!isdelim) 319 continue; 320 321 pos = positions + 1; 322 for (field = maxval, p = lbuf; field; --field, ++pos) { 323 if (*pos) { 324 if (output++) 325 (void)putchar(sep); 326 while ((ch = *p++) != '\n' && ch != sep) 327 (void)putchar(ch); 328 } else { 329 while ((ch = *p++) != '\n' && ch != sep) 330 continue; 331 } 332 if (ch == '\n') 333 break; 334 } 335 if (ch != '\n') { 336 if (autostop) { 337 if (output) 338 (void)putchar(sep); 339 for (; (ch = *p) != '\n'; ++p) 340 (void)putchar(ch); 341 } else 342 for (; (ch = *p) != '\n'; ++p); 343 } 344 (void)putchar('\n'); 345 if (tbuf) { 346 free(tbuf); 347 tbuf = NULL; 348 } 349 } 350 if (tbuf) 351 free(tbuf); 352 } 353 354 static void 355 usage(void) 356 { 357 (void)fprintf(stderr, "usage:\tcut -b list [-n] [file ...]\n" 358 "\tcut -c list [file ...]\n" 359 "\tcut -f list [-d string] [-s] [file ...]\n"); 360 exit(1); 361 } 362 363 /* make b_cut(): */ 364 #define CUT_BYTE 1 365 #include "x_cut.c" 366 #undef CUT_BYTE 367 368 /* make c_cut(): */ 369 #define CUT_BYTE 0 370 #include "x_cut.c" 371 #undef CUT_BYTE 372