cut.c revision 1.12 1 /* $NetBSD: cut.c,v 1.12 1998/08/25 20:59:36 ross Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\n\
42 The Regents of the University of California. All rights reserved.\n");
43 #endif /* not lint */
44
45 #ifndef lint
46 #if 0
47 static char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95";
48 #endif
49 __RCSID("$NetBSD: cut.c,v 1.12 1998/08/25 20:59:36 ross Exp $");
50 #endif /* not lint */
51
52 #include <ctype.h>
53 #include <err.h>
54 #include <errno.h>
55 #include <limits.h>
56 #include <locale.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61
62 int cflag;
63 char dchar;
64 int dflag;
65 int fflag;
66 int sflag;
67
68 void c_cut __P((FILE *, char *));
69 void f_cut __P((FILE *, char *));
70 void get_list __P((char *));
71 int main __P((int, char **));
72 void usage __P((void));
73
74 int
75 main(argc, argv)
76 int argc;
77 char *argv[];
78 {
79 FILE *fp;
80 void (*fcn) __P((FILE *, char *));
81 int ch;
82
83 fcn = NULL;
84 setlocale (LC_ALL, "");
85
86 dchar = '\t'; /* default delimiter is \t */
87
88 /* Since we don't support multi-byte characters, the -c and -b
89 options are equivalent, and the -n option is meaningless. */
90 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
91 switch(ch) {
92 case 'b':
93 case 'c':
94 fcn = c_cut;
95 get_list(optarg);
96 cflag = 1;
97 break;
98 case 'd':
99 dchar = *optarg;
100 dflag = 1;
101 break;
102 case 'f':
103 get_list(optarg);
104 fcn = f_cut;
105 fflag = 1;
106 break;
107 case 's':
108 sflag = 1;
109 break;
110 case 'n':
111 break;
112 case '?':
113 default:
114 usage();
115 }
116 argc -= optind;
117 argv += optind;
118
119 if (fflag) {
120 if (cflag)
121 usage();
122 } else if (!cflag || dflag || sflag)
123 usage();
124
125 if (*argv)
126 for (; *argv; ++argv) {
127 if (!(fp = fopen(*argv, "r")))
128 err(1, "%s", *argv);
129 fcn(fp, *argv);
130 (void)fclose(fp);
131 }
132 else
133 fcn(stdin, "stdin");
134 exit(0);
135 }
136
137 int autostart, autostop, maxval;
138
139 char positions[_POSIX2_LINE_MAX + 1];
140
141 void
142 get_list(list)
143 char *list;
144 {
145 int setautostart, start, stop;
146 char *pos;
147 char *p;
148
149 /*
150 * set a byte in the positions array to indicate if a field or
151 * column is to be selected; use +1, it's 1-based, not 0-based.
152 * This parser is less restrictive than the Draft 9 POSIX spec.
153 * POSIX doesn't allow lists that aren't in increasing order or
154 * overlapping lists. We also handle "-3-5" although there's no
155 * real reason too.
156 */
157 for (; (p = strtok(list, ", \t")) != NULL; list = NULL) {
158 setautostart = start = stop = 0;
159 if (*p == '-') {
160 ++p;
161 setautostart = 1;
162 }
163 if (isdigit(*p)) {
164 start = stop = strtol(p, &p, 10);
165 if (setautostart && start > autostart)
166 autostart = start;
167 }
168 if (*p == '-') {
169 if (isdigit(p[1]))
170 stop = strtol(p + 1, &p, 10);
171 if (*p == '-') {
172 ++p;
173 if (!autostop || autostop > stop)
174 autostop = stop;
175 }
176 }
177 if (*p)
178 errx(1, "[-cf] list: illegal list value\n");
179 if (!stop || !start)
180 errx(1, "[-cf] list: values may not include zero\n");
181 if (stop > _POSIX2_LINE_MAX)
182 errx(1, "[-cf] list: %d too large (max %d)\n",
183 stop, _POSIX2_LINE_MAX);
184 if (maxval < stop)
185 maxval = stop;
186 for (pos = positions + start; start++ <= stop; *pos++ = 1);
187 }
188
189 /* overlapping ranges */
190 if (autostop && maxval > autostop)
191 maxval = autostop;
192
193 /* set autostart */
194 if (autostart)
195 memset(positions + 1, '1', autostart);
196 }
197
198 /* ARGSUSED */
199 void
200 c_cut(fp, fname)
201 FILE *fp;
202 char *fname;
203 {
204 int ch, col;
205 char *pos;
206
207 ch = 0;
208 for (;;) {
209 pos = positions + 1;
210 for (col = maxval; col; --col) {
211 if ((ch = getc(fp)) == EOF)
212 return;
213 if (ch == '\n')
214 break;
215 if (*pos++)
216 (void)putchar(ch);
217 }
218 if (ch != '\n') {
219 if (autostop)
220 while ((ch = getc(fp)) != EOF && ch != '\n')
221 (void)putchar(ch);
222 else
223 while ((ch = getc(fp)) != EOF && ch != '\n');
224 }
225 (void)putchar('\n');
226 }
227 }
228
229 void
230 f_cut(fp, fname)
231 FILE *fp;
232 char *fname;
233 {
234 int ch, field, isdelim;
235 char *pos, *p, sep;
236 int output;
237 char lbuf[_POSIX2_LINE_MAX + 1];
238
239 for (sep = dchar; fgets(lbuf, sizeof(lbuf), fp);) {
240 output = 0;
241 for (isdelim = 0, p = lbuf;; ++p) {
242 if (!(ch = *p))
243 errx(1, "%s: line too long.\n", fname);
244 /* this should work if newline is delimiter */
245 if (ch == sep)
246 isdelim = 1;
247 if (ch == '\n') {
248 if (!isdelim && !sflag)
249 (void)printf("%s", lbuf);
250 break;
251 }
252 }
253 if (!isdelim)
254 continue;
255
256 pos = positions + 1;
257 for (field = maxval, p = lbuf; field; --field, ++pos) {
258 if (*pos) {
259 if (output++)
260 (void)putchar(sep);
261 while ((ch = *p++) != '\n' && ch != sep)
262 (void)putchar(ch);
263 } else {
264 while ((ch = *p++) != '\n' && ch != sep)
265 continue;
266 }
267 if (ch == '\n')
268 break;
269 }
270 if (ch != '\n') {
271 if (autostop) {
272 if (output)
273 (void)putchar(sep);
274 for (; (ch = *p) != '\n'; ++p)
275 (void)putchar(ch);
276 } else
277 for (; (ch = *p) != '\n'; ++p);
278 }
279 (void)putchar('\n');
280 }
281 }
282
283 void
284 usage()
285 {
286 (void)fprintf(stderr,
287 "usage:\tcut -c list [file1 ...]\n\tcut -f list [-s] [-d delim] [file ...]\n");
288 exit(1);
289 }
290