1 1.32 gutterid /* $NetBSD: cut.c,v 1.32 2025/03/09 05:04:54 gutteridge Exp $ */ 2 1.8 glass 3 1.1 cgd /* 4 1.8 glass * Copyright (c) 1989, 1993 5 1.8 glass * The Regents of the University of California. All rights reserved. 6 1.1 cgd * 7 1.1 cgd * This code is derived from software contributed to Berkeley by 8 1.1 cgd * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 9 1.1 cgd * 10 1.1 cgd * Redistribution and use in source and binary forms, with or without 11 1.1 cgd * modification, are permitted provided that the following conditions 12 1.1 cgd * are met: 13 1.1 cgd * 1. Redistributions of source code must retain the above copyright 14 1.1 cgd * notice, this list of conditions and the following disclaimer. 15 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 cgd * notice, this list of conditions and the following disclaimer in the 17 1.1 cgd * documentation and/or other materials provided with the distribution. 18 1.16 agc * 3. Neither the name of the University nor the names of its contributors 19 1.1 cgd * may be used to endorse or promote products derived from this software 20 1.1 cgd * without specific prior written permission. 21 1.1 cgd * 22 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 1.1 cgd * SUCH DAMAGE. 33 1.1 cgd */ 34 1.1 cgd 35 1.11 lukem #include <sys/cdefs.h> 36 1.1 cgd #ifndef lint 37 1.25 lukem __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 38 1.25 lukem The Regents of the University of California. All rights reserved."); 39 1.1 cgd #endif /* not lint */ 40 1.1 cgd 41 1.1 cgd #ifndef lint 42 1.8 glass #if 0 43 1.9 jtc static char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 44 1.8 glass #endif 45 1.32 gutterid __RCSID("$NetBSD: cut.c,v 1.32 2025/03/09 05:04:54 gutteridge Exp $"); 46 1.1 cgd #endif /* not lint */ 47 1.1 cgd 48 1.8 glass #include <ctype.h> 49 1.8 glass #include <err.h> 50 1.8 glass #include <errno.h> 51 1.8 glass #include <limits.h> 52 1.8 glass #include <locale.h> 53 1.5 jtc #include <stdio.h> 54 1.5 jtc #include <stdlib.h> 55 1.5 jtc #include <string.h> 56 1.9 jtc #include <unistd.h> 57 1.23 christos #include <util.h> 58 1.22 hubertf #include <wchar.h> 59 1.23 christos #include <sys/param.h> 60 1.1 cgd 61 1.23 christos static int bflag; 62 1.23 christos static int cflag; 63 1.23 christos static char dchar; 64 1.23 christos static int dflag; 65 1.23 christos static int fflag; 66 1.31 gutterid static int nflag; 67 1.23 christos static int sflag; 68 1.23 christos 69 1.23 christos static void b_cut(FILE *, const char *); 70 1.31 gutterid static void b_n_cut(FILE *, const char *); 71 1.23 christos static void c_cut(FILE *, const char *); 72 1.23 christos static void f_cut(FILE *, const char *); 73 1.23 christos static void get_list(char *); 74 1.24 perry static void usage(void) __dead; 75 1.8 glass 76 1.5 jtc int 77 1.17 xtraeme main(int argc, char *argv[]) 78 1.1 cgd { 79 1.1 cgd FILE *fp; 80 1.17 xtraeme void (*fcn)(FILE *, const char *); 81 1.29 wiz int ch, rval; 82 1.6 jtc 83 1.11 lukem fcn = NULL; 84 1.23 christos (void)setlocale(LC_ALL, ""); 85 1.1 cgd 86 1.1 cgd dchar = '\t'; /* default delimiter is \t */ 87 1.1 cgd 88 1.10 mrg while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 89 1.1 cgd switch(ch) { 90 1.4 jtc case 'b': 91 1.22 hubertf fcn = b_cut; 92 1.22 hubertf get_list(optarg); 93 1.22 hubertf bflag = 1; 94 1.22 hubertf break; 95 1.1 cgd case 'c': 96 1.1 cgd fcn = c_cut; 97 1.1 cgd get_list(optarg); 98 1.1 cgd cflag = 1; 99 1.1 cgd break; 100 1.1 cgd case 'd': 101 1.1 cgd dchar = *optarg; 102 1.1 cgd dflag = 1; 103 1.1 cgd break; 104 1.1 cgd case 'f': 105 1.1 cgd get_list(optarg); 106 1.1 cgd fcn = f_cut; 107 1.1 cgd fflag = 1; 108 1.1 cgd break; 109 1.1 cgd case 's': 110 1.1 cgd sflag = 1; 111 1.4 jtc break; 112 1.4 jtc case 'n': 113 1.31 gutterid nflag = 1; 114 1.1 cgd break; 115 1.1 cgd case '?': 116 1.1 cgd default: 117 1.1 cgd usage(); 118 1.1 cgd } 119 1.1 cgd argc -= optind; 120 1.1 cgd argv += optind; 121 1.1 cgd 122 1.31 gutterid if (bflag + cflag + fflag != 1 || 123 1.31 gutterid (nflag && !bflag) || 124 1.31 gutterid ((dflag || sflag) && !fflag)) 125 1.1 cgd usage(); 126 1.31 gutterid 127 1.31 gutterid if (nflag) 128 1.31 gutterid fcn = b_n_cut; 129 1.1 cgd 130 1.29 wiz rval = 0; 131 1.1 cgd if (*argv) 132 1.1 cgd for (; *argv; ++argv) { 133 1.21 jnemeth if (strcmp(*argv, "-") == 0) 134 1.21 jnemeth fcn(stdin, "stdin"); 135 1.21 jnemeth else { 136 1.29 wiz if ((fp = fopen(*argv, "r"))) { 137 1.29 wiz fcn(fp, *argv); 138 1.29 wiz (void)fclose(fp); 139 1.29 wiz } else { 140 1.29 wiz rval = 1; 141 1.29 wiz warn("%s", *argv); 142 1.29 wiz } 143 1.21 jnemeth } 144 1.1 cgd } 145 1.1 cgd else 146 1.1 cgd fcn(stdin, "stdin"); 147 1.29 wiz return(rval); 148 1.1 cgd } 149 1.1 cgd 150 1.23 christos static size_t autostart, autostop, maxval; 151 1.1 cgd 152 1.23 christos static char *positions = NULL; 153 1.23 christos static size_t numpositions = 0; 154 1.23 christos #define ALLOC_CHUNK _POSIX2_LINE_MAX /* malloc granularity */ 155 1.1 cgd 156 1.23 christos static void 157 1.17 xtraeme get_list(char *list) 158 1.1 cgd { 159 1.23 christos size_t setautostart, start, stop; 160 1.11 lukem char *pos; 161 1.6 jtc char *p; 162 1.1 cgd 163 1.23 christos if (positions == NULL) { 164 1.23 christos numpositions = ALLOC_CHUNK; 165 1.23 christos positions = ecalloc(numpositions, sizeof(*positions)); 166 1.23 christos } 167 1.23 christos 168 1.1 cgd /* 169 1.32 gutterid * Set a byte in the positions array to indicate if a field or 170 1.1 cgd * column is to be selected; use +1, it's 1-based, not 0-based. 171 1.32 gutterid * Numbers and number ranges may be overlapping, repeated, and in 172 1.32 gutterid * any order. We handle "-3-5" although there's no real reason to. 173 1.1 cgd */ 174 1.11 lukem for (; (p = strtok(list, ", \t")) != NULL; list = NULL) { 175 1.1 cgd setautostart = start = stop = 0; 176 1.1 cgd if (*p == '-') { 177 1.1 cgd ++p; 178 1.1 cgd setautostart = 1; 179 1.1 cgd } 180 1.13 christos if (isdigit((unsigned char)*p)) { 181 1.1 cgd start = stop = strtol(p, &p, 10); 182 1.1 cgd if (setautostart && start > autostart) 183 1.1 cgd autostart = start; 184 1.1 cgd } 185 1.1 cgd if (*p == '-') { 186 1.13 christos if (isdigit((unsigned char)p[1])) 187 1.1 cgd stop = strtol(p + 1, &p, 10); 188 1.1 cgd if (*p == '-') { 189 1.1 cgd ++p; 190 1.1 cgd if (!autostop || autostop > stop) 191 1.1 cgd autostop = stop; 192 1.1 cgd } 193 1.1 cgd } 194 1.1 cgd if (*p) 195 1.27 wiz errx(1, "[-bcf] list: illegal list value"); 196 1.1 cgd if (!stop || !start) 197 1.27 wiz errx(1, "[-bcf] list: values may not include zero"); 198 1.23 christos if (stop + 1 > numpositions) { 199 1.23 christos size_t newsize; 200 1.23 christos newsize = roundup(stop + 1, ALLOC_CHUNK); 201 1.23 christos positions = erealloc(positions, newsize); 202 1.23 christos (void)memset(positions + numpositions, 0, 203 1.23 christos newsize - numpositions); 204 1.23 christos numpositions = newsize; 205 1.23 christos } 206 1.1 cgd if (maxval < stop) 207 1.1 cgd maxval = stop; 208 1.23 christos for (pos = positions + start; start++ <= stop; pos++) 209 1.23 christos *pos = 1; 210 1.1 cgd } 211 1.1 cgd 212 1.1 cgd /* overlapping ranges */ 213 1.1 cgd if (autostop && maxval > autostop) 214 1.1 cgd maxval = autostop; 215 1.1 cgd 216 1.1 cgd /* set autostart */ 217 1.1 cgd if (autostart) 218 1.23 christos (void)memset(positions + 1, '1', autostart); 219 1.1 cgd } 220 1.1 cgd 221 1.31 gutterid /* 222 1.31 gutterid * Cut based on byte positions, taking care not to split multibyte characters. 223 1.31 gutterid * Although this function also handles the case where -n is not specified, 224 1.31 gutterid * b_cut() ought to be much faster. 225 1.31 gutterid */ 226 1.31 gutterid static void 227 1.31 gutterid b_n_cut(FILE *fp, const char *fname) 228 1.31 gutterid { 229 1.31 gutterid size_t col, i, lbuflen; 230 1.31 gutterid char *lbuf; 231 1.31 gutterid int canwrite, clen, warned; 232 1.31 gutterid mbstate_t mbs; 233 1.31 gutterid 234 1.31 gutterid memset(&mbs, 0, sizeof(mbs)); 235 1.31 gutterid warned = 0; 236 1.31 gutterid while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { 237 1.31 gutterid for (col = 0; lbuflen > 0; col += clen) { 238 1.31 gutterid if ((clen = mbrlen(lbuf, lbuflen, &mbs)) < 0) { 239 1.31 gutterid if (!warned) { 240 1.31 gutterid warn("%s", fname); 241 1.31 gutterid warned = 1; 242 1.31 gutterid } 243 1.31 gutterid memset(&mbs, 0, sizeof(mbs)); 244 1.31 gutterid clen = 1; 245 1.31 gutterid } 246 1.31 gutterid if (clen == 0 || *lbuf == '\n') 247 1.31 gutterid break; 248 1.31 gutterid if (col < maxval && !positions[1 + col]) { 249 1.31 gutterid /* 250 1.31 gutterid * Print the character if (1) after an initial 251 1.31 gutterid * segment of un-selected bytes, the rest of 252 1.31 gutterid * it is selected, and (2) the last byte is 253 1.31 gutterid * selected. 254 1.31 gutterid */ 255 1.31 gutterid i = col; 256 1.31 gutterid while (i < col + clen && i < maxval && 257 1.31 gutterid !positions[1 + i]) 258 1.31 gutterid i++; 259 1.31 gutterid canwrite = i < col + clen; 260 1.31 gutterid for (; i < col + clen && i < maxval; i++) 261 1.31 gutterid canwrite &= positions[1 + i]; 262 1.31 gutterid if (canwrite) 263 1.31 gutterid fwrite(lbuf, 1, clen, stdout); 264 1.31 gutterid } else { 265 1.31 gutterid /* 266 1.31 gutterid * Print the character if all of it has 267 1.31 gutterid * been selected. 268 1.31 gutterid */ 269 1.31 gutterid canwrite = 1; 270 1.31 gutterid for (i = col; i < col + clen; i++) 271 1.31 gutterid if ((i >= maxval && !autostop) || 272 1.31 gutterid (i < maxval && !positions[1 + i])) { 273 1.31 gutterid canwrite = 0; 274 1.31 gutterid break; 275 1.31 gutterid } 276 1.31 gutterid if (canwrite) 277 1.31 gutterid fwrite(lbuf, 1, clen, stdout); 278 1.31 gutterid } 279 1.31 gutterid lbuf += clen; 280 1.31 gutterid lbuflen -= clen; 281 1.31 gutterid } 282 1.31 gutterid if (lbuflen > 0) 283 1.31 gutterid putchar('\n'); 284 1.31 gutterid } 285 1.31 gutterid } 286 1.31 gutterid 287 1.23 christos static void 288 1.23 christos /*ARGSUSED*/ 289 1.23 christos f_cut(FILE *fp, const char *fname __unused) 290 1.1 cgd { 291 1.11 lukem int ch, field, isdelim; 292 1.11 lukem char *pos, *p, sep; 293 1.1 cgd int output; 294 1.18 yamt size_t len; 295 1.18 yamt char *lbuf, *tbuf; 296 1.1 cgd 297 1.23 christos for (sep = dchar, tbuf = NULL; (lbuf = fgetln(fp, &len)) != NULL;) { 298 1.7 mycroft output = 0; 299 1.18 yamt if (lbuf[len - 1] != '\n') { 300 1.18 yamt /* no newline at the end of the last line so add one */ 301 1.18 yamt if ((tbuf = (char *)malloc(len + 1)) == NULL) 302 1.18 yamt err(1, NULL); 303 1.23 christos (void)memcpy(tbuf, lbuf, len); 304 1.19 dsl tbuf[len++] = '\n'; 305 1.18 yamt lbuf = tbuf; 306 1.18 yamt } 307 1.1 cgd for (isdelim = 0, p = lbuf;; ++p) { 308 1.18 yamt ch = *p; 309 1.1 cgd /* this should work if newline is delimiter */ 310 1.1 cgd if (ch == sep) 311 1.1 cgd isdelim = 1; 312 1.1 cgd if (ch == '\n') { 313 1.1 cgd if (!isdelim && !sflag) 314 1.18 yamt (void)fwrite(lbuf, len, 1, stdout); 315 1.1 cgd break; 316 1.1 cgd } 317 1.1 cgd } 318 1.1 cgd if (!isdelim) 319 1.1 cgd continue; 320 1.1 cgd 321 1.1 cgd pos = positions + 1; 322 1.1 cgd for (field = maxval, p = lbuf; field; --field, ++pos) { 323 1.1 cgd if (*pos) { 324 1.1 cgd if (output++) 325 1.8 glass (void)putchar(sep); 326 1.1 cgd while ((ch = *p++) != '\n' && ch != sep) 327 1.8 glass (void)putchar(ch); 328 1.12 ross } else { 329 1.12 ross while ((ch = *p++) != '\n' && ch != sep) 330 1.12 ross continue; 331 1.12 ross } 332 1.1 cgd if (ch == '\n') 333 1.1 cgd break; 334 1.1 cgd } 335 1.12 ross if (ch != '\n') { 336 1.1 cgd if (autostop) { 337 1.1 cgd if (output) 338 1.8 glass (void)putchar(sep); 339 1.1 cgd for (; (ch = *p) != '\n'; ++p) 340 1.8 glass (void)putchar(ch); 341 1.1 cgd } else 342 1.1 cgd for (; (ch = *p) != '\n'; ++p); 343 1.12 ross } 344 1.8 glass (void)putchar('\n'); 345 1.19 dsl if (tbuf) { 346 1.19 dsl free(tbuf); 347 1.19 dsl tbuf = NULL; 348 1.19 dsl } 349 1.1 cgd } 350 1.20 christos if (tbuf) 351 1.20 christos free(tbuf); 352 1.1 cgd } 353 1.1 cgd 354 1.23 christos static void 355 1.17 xtraeme usage(void) 356 1.1 cgd { 357 1.26 wiz (void)fprintf(stderr, "usage:\tcut -b list [-n] [file ...]\n" 358 1.26 wiz "\tcut -c list [file ...]\n" 359 1.28 wiz "\tcut -f list [-d string] [-s] [file ...]\n"); 360 1.1 cgd exit(1); 361 1.1 cgd } 362 1.22 hubertf 363 1.30 gutterid /* make b_cut(): */ 364 1.28 wiz #define CUT_BYTE 1 365 1.22 hubertf #include "x_cut.c" 366 1.22 hubertf #undef CUT_BYTE 367 1.22 hubertf 368 1.30 gutterid /* make c_cut(): */ 369 1.22 hubertf #define CUT_BYTE 0 370 1.22 hubertf #include "x_cut.c" 371 1.22 hubertf #undef CUT_BYTE 372