1 /* $NetBSD: cut.c,v 1.33 2026/04/05 03:03:56 gutteridge Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #ifndef lint 37 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 38 The Regents of the University of California. All rights reserved."); 39 #endif /* not lint */ 40 41 #ifndef lint 42 #if 0 43 static char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 44 #endif 45 __RCSID("$NetBSD: cut.c,v 1.33 2026/04/05 03:03:56 gutteridge Exp $"); 46 #endif /* not lint */ 47 48 #include <sys/param.h> 49 50 #include <ctype.h> 51 #include <err.h> 52 #include <errno.h> 53 #include <limits.h> 54 #include <locale.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <unistd.h> 59 #include <util.h> 60 #include <wchar.h> 61 62 static int bflag; 63 static int cflag; 64 static char dchar; 65 static int dflag; 66 static int fflag; 67 static int nflag; 68 static int sflag; 69 70 static void b_cut(FILE *, const char *); 71 static void b_n_cut(FILE *, const char *); 72 static void c_cut(FILE *, const char *); 73 static void f_cut(FILE *, const char *); 74 static void get_list(char *); 75 static void usage(void) __dead; 76 77 int 78 main(int argc, char *argv[]) 79 { 80 FILE *fp; 81 void (*fcn)(FILE *, const char *); 82 int ch, rval; 83 84 fcn = NULL; 85 (void)setlocale(LC_ALL, ""); 86 87 dchar = '\t'; /* default delimiter is \t */ 88 89 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 90 switch(ch) { 91 case 'b': 92 fcn = b_cut; 93 get_list(optarg); 94 bflag = 1; 95 break; 96 case 'c': 97 fcn = c_cut; 98 get_list(optarg); 99 cflag = 1; 100 break; 101 case 'd': 102 dchar = *optarg; 103 dflag = 1; 104 break; 105 case 'f': 106 get_list(optarg); 107 fcn = f_cut; 108 fflag = 1; 109 break; 110 case 's': 111 sflag = 1; 112 break; 113 case 'n': 114 nflag = 1; 115 break; 116 case '?': 117 default: 118 usage(); 119 } 120 argc -= optind; 121 argv += optind; 122 123 if (bflag + cflag + fflag != 1 || 124 (nflag && !bflag) || 125 ((dflag || sflag) && !fflag)) 126 usage(); 127 128 if (nflag) 129 fcn = b_n_cut; 130 131 rval = 0; 132 if (*argv) 133 for (; *argv; ++argv) { 134 if (strcmp(*argv, "-") == 0) 135 fcn(stdin, "stdin"); 136 else { 137 if ((fp = fopen(*argv, "r"))) { 138 fcn(fp, *argv); 139 (void)fclose(fp); 140 } else { 141 rval = 1; 142 warn("%s", *argv); 143 } 144 } 145 } 146 else 147 fcn(stdin, "stdin"); 148 return(rval); 149 } 150 151 static size_t autostart, autostop, maxval; 152 153 static char *positions = NULL; 154 static size_t numpositions = 0; 155 #define ALLOC_CHUNK _POSIX2_LINE_MAX /* malloc granularity */ 156 157 static void 158 get_list(char *list) 159 { 160 size_t setautostart, start, stop; 161 char *p, *pos; 162 163 if (positions == NULL) { 164 numpositions = ALLOC_CHUNK; 165 positions = ecalloc(numpositions, sizeof(*positions)); 166 } 167 168 /* 169 * Set a byte in the positions array to indicate if a field or 170 * column is to be selected; use +1, it's 1-based, not 0-based. 171 * Numbers and number ranges may be overlapping, repeated, and in 172 * any order. We handle "-3-5" although there's no real reason to. 173 */ 174 for (; (p = strtok(list, ", \t")) != NULL; list = NULL) { 175 setautostart = start = stop = 0; 176 if (*p == '-') { 177 ++p; 178 setautostart = 1; 179 } 180 if (isdigit((unsigned char)*p)) { 181 start = stop = strtol(p, &p, 10); 182 if (setautostart && start > autostart) 183 autostart = start; 184 } 185 if (*p == '-') { 186 if (isdigit((unsigned char)p[1])) 187 stop = strtol(p + 1, &p, 10); 188 if (*p == '-') { 189 ++p; 190 if (!autostop || autostop > stop) 191 autostop = stop; 192 } 193 } 194 if (*p) 195 errx(1, "[-bcf] list: illegal list value"); 196 if (!stop || !start) 197 errx(1, "[-bcf] list: values may not include zero"); 198 if (stop + 1 > numpositions) { 199 size_t newsize; 200 newsize = roundup(stop + 1, ALLOC_CHUNK); 201 positions = erealloc(positions, newsize); 202 (void)memset(positions + numpositions, 0, 203 newsize - numpositions); 204 numpositions = newsize; 205 } 206 if (maxval < stop) 207 maxval = stop; 208 for (pos = positions + start; start++ <= stop; pos++) 209 *pos = 1; 210 } 211 212 /* overlapping ranges */ 213 if (autostop && maxval > autostop) 214 maxval = autostop; 215 216 /* set autostart */ 217 if (autostart) 218 (void)memset(positions + 1, '1', autostart); 219 } 220 221 /* 222 * Cut based on byte positions, taking care not to split multibyte characters. 223 * Although this function also handles the case where -n is not specified, 224 * b_cut() ought to be much faster. 225 */ 226 static void 227 b_n_cut(FILE *fp, const char *fname) 228 { 229 size_t col, i, lbuflen; 230 char *lbuf; 231 int canwrite, clen, warned; 232 mbstate_t mbs; 233 234 memset(&mbs, 0, sizeof(mbs)); 235 warned = 0; 236 while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { 237 for (col = 0; lbuflen > 0; col += clen) { 238 if ((clen = mbrlen(lbuf, lbuflen, &mbs)) < 0) { 239 if (!warned) { 240 warn("%s", fname); 241 warned = 1; 242 } 243 memset(&mbs, 0, sizeof(mbs)); 244 clen = 1; 245 } 246 if (clen == 0 || *lbuf == '\n') 247 break; 248 if (col < maxval && !positions[1 + col]) { 249 /* 250 * Print the character if (1) after an initial 251 * segment of un-selected bytes, the rest of 252 * it is selected, and (2) the last byte is 253 * selected. 254 */ 255 i = col; 256 while (i < col + clen && i < maxval && 257 !positions[1 + i]) 258 i++; 259 canwrite = i < col + clen; 260 for (; i < col + clen && i < maxval; i++) 261 canwrite &= positions[1 + i]; 262 if (canwrite) 263 fwrite(lbuf, 1, clen, stdout); 264 } else { 265 /* 266 * Print the character if all of it has 267 * been selected. 268 */ 269 canwrite = 1; 270 for (i = col; i < col + clen; i++) 271 if ((i >= maxval && !autostop) || 272 (i < maxval && !positions[1 + i])) { 273 canwrite = 0; 274 break; 275 } 276 if (canwrite) 277 fwrite(lbuf, 1, clen, stdout); 278 } 279 lbuf += clen; 280 lbuflen -= clen; 281 } 282 if (lbuflen > 0) 283 putchar('\n'); 284 } 285 } 286 287 static void 288 /*ARGSUSED*/ 289 f_cut(FILE *fp, const char *fname __unused) 290 { 291 size_t len; 292 int ch, field, isdelim, output; 293 char *lbuf, *pos, *p, sep, *tbuf; 294 295 for (sep = dchar, tbuf = NULL; (lbuf = fgetln(fp, &len)) != NULL;) { 296 output = 0; 297 if (lbuf[len - 1] != '\n') { 298 /* no newline at the end of the last line so add one */ 299 if ((tbuf = (char *)malloc(len + 1)) == NULL) 300 err(1, NULL); 301 (void)memcpy(tbuf, lbuf, len); 302 tbuf[len++] = '\n'; 303 lbuf = tbuf; 304 } 305 for (isdelim = 0, p = lbuf;; ++p) { 306 ch = *p; 307 /* this should work if newline is delimiter */ 308 if (ch == sep) 309 isdelim = 1; 310 if (ch == '\n') { 311 if (!isdelim && !sflag) 312 (void)fwrite(lbuf, len, 1, stdout); 313 break; 314 } 315 } 316 if (!isdelim) 317 continue; 318 319 pos = positions + 1; 320 for (field = maxval, p = lbuf; field; --field, ++pos) { 321 if (*pos) { 322 if (output++) 323 (void)putchar(sep); 324 while ((ch = *p++) != '\n' && ch != sep) 325 (void)putchar(ch); 326 } else { 327 while ((ch = *p++) != '\n' && ch != sep) 328 continue; 329 } 330 if (ch == '\n') 331 break; 332 } 333 if (ch != '\n') { 334 if (autostop) { 335 if (output) 336 (void)putchar(sep); 337 for (; (ch = *p) != '\n'; ++p) 338 (void)putchar(ch); 339 } else 340 for (; (ch = *p) != '\n'; ++p); 341 } 342 (void)putchar('\n'); 343 if (tbuf) { 344 free(tbuf); 345 tbuf = NULL; 346 } 347 } 348 if (tbuf) 349 free(tbuf); 350 } 351 352 static void 353 usage(void) 354 { 355 (void)fprintf(stderr, "usage:\tcut -b list [-n] [file ...]\n" 356 "\tcut -c list [file ...]\n" 357 "\tcut -f list [-d string] [-s] [file ...]\n"); 358 exit(1); 359 } 360 361 /* make b_cut(): */ 362 #define CUT_BYTE 1 363 #include "x_cut.c" 364 #undef CUT_BYTE 365 366 /* make c_cut(): */ 367 #define CUT_BYTE 0 368 #include "x_cut.c" 369 #undef CUT_BYTE 370