cut.c revision 1.4 1 /*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #ifndef lint
38 char copyright[] =
39 "@(#) Copyright (c) 1989 The Regents of the University of California.\n\
40 All rights reserved.\n";
41 #endif /* not lint */
42
43 #ifndef lint
44 /*static char sccsid[] = "from: @(#)cut.c 5.4 (Berkeley) 10/30/90";*/
45 static char rcsid[] = "$Id: cut.c,v 1.4 1993/08/16 22:44:51 jtc Exp $";
46 #endif /* not lint */
47
48 #include <limits.h>
49 #include <stdio.h>
50 #include <ctype.h>
51
52 int cflag;
53 char dchar;
54 int dflag;
55 int fflag;
56 int sflag;
57
58 main(argc, argv)
59 int argc;
60 char **argv;
61 {
62 extern char *optarg;
63 extern int errno, optind;
64 FILE *fp;
65 int ch, (*fcn)(), c_cut(), f_cut();
66 char *strerror();
67
68 dchar = '\t'; /* default delimiter is \t */
69
70 /* Since we don't support multi-byte characters, the -c and -b
71 options are equivalent, and the -n option is meaningless. */
72 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != EOF)
73 switch(ch) {
74 case 'b':
75 case 'c':
76 fcn = c_cut;
77 get_list(optarg);
78 cflag = 1;
79 break;
80 case 'd':
81 dchar = *optarg;
82 dflag = 1;
83 break;
84 case 'f':
85 get_list(optarg);
86 fcn = f_cut;
87 fflag = 1;
88 break;
89 case 's':
90 sflag = 1;
91 break;
92 case 'n':
93 break;
94 case '?':
95 default:
96 usage();
97 }
98 argc -= optind;
99 argv += optind;
100
101 if (fflag) {
102 if (cflag)
103 usage();
104 } else if (!cflag || dflag || sflag)
105 usage();
106
107 if (*argv)
108 for (; *argv; ++argv) {
109 if (!(fp = fopen(*argv, "r"))) {
110 (void)fprintf(stderr,
111 "cut: %s: %s\n", *argv, strerror(errno));
112 exit(1);
113 }
114 fcn(fp, *argv);
115 }
116 else
117 fcn(stdin, "stdin");
118 exit(0);
119 }
120
121 int autostart, autostop, maxval;
122
123 char positions[_POSIX2_LINE_MAX + 1];
124
125 get_list(list)
126 char *list;
127 {
128 register char *pos;
129 register int setautostart, start, stop;
130 char *p, *strtok();
131
132 /*
133 * set a byte in the positions array to indicate if a field or
134 * column is to be selected; use +1, it's 1-based, not 0-based.
135 * This parser is less restrictive than the Draft 9 POSIX spec.
136 * POSIX doesn't allow lists that aren't in increasing order or
137 * overlapping lists. We also handle "-3-5" although there's no
138 * real reason too.
139 */
140 for (; p = strtok(list, ", \t"); list = NULL) {
141 setautostart = start = stop = 0;
142 if (*p == '-') {
143 ++p;
144 setautostart = 1;
145 }
146 if (isdigit(*p)) {
147 start = stop = strtol(p, &p, 10);
148 if (setautostart && start > autostart)
149 autostart = start;
150 }
151 if (*p == '-') {
152 if (isdigit(p[1]))
153 stop = strtol(p + 1, &p, 10);
154 if (*p == '-') {
155 ++p;
156 if (!autostop || autostop > stop)
157 autostop = stop;
158 }
159 }
160 if (*p)
161 badlist("illegal list value");
162 if (!stop || !start)
163 badlist("values may not include zero");
164 if (stop > _POSIX2_LINE_MAX) {
165 /* positions used rather than allocate a new buffer */
166 (void)sprintf(positions, "%d too large (max %d)",
167 stop, _POSIX2_LINE_MAX);
168 badlist(positions);
169 }
170 if (maxval < stop)
171 maxval = stop;
172 for (pos = positions + start; start++ <= stop; *pos++ = 1);
173 }
174
175 /* overlapping ranges */
176 if (autostop && maxval > autostop)
177 maxval = autostop;
178
179 /* set autostart */
180 if (autostart)
181 memset(positions + 1, '1', autostart);
182 }
183
184 /* ARGSUSED */
185 c_cut(fp, fname)
186 FILE *fp;
187 char *fname;
188 {
189 register int ch, col;
190 register char *pos;
191
192 for (;;) {
193 pos = positions + 1;
194 for (col = maxval; col; --col) {
195 if ((ch = getc(fp)) == EOF)
196 return;
197 if (ch == '\n')
198 break;
199 if (*pos++)
200 putchar(ch);
201 }
202 if (ch != '\n')
203 if (autostop)
204 while ((ch = getc(fp)) != EOF && ch != '\n')
205 putchar(ch);
206 else
207 while ((ch = getc(fp)) != EOF && ch != '\n');
208 putchar('\n');
209 }
210 }
211
212 f_cut(fp, fname)
213 FILE *fp;
214 char *fname;
215 {
216 register int ch, field, isdelim;
217 register char *pos, *p, sep;
218 int output;
219 char lbuf[_POSIX2_LINE_MAX + 1];
220
221 for (sep = dchar, output = 0; fgets(lbuf, sizeof(lbuf), fp); output = 0) {
222 for (isdelim = 0, p = lbuf;; ++p) {
223 if (!(ch = *p)) {
224 (void)fprintf(stderr,
225 "cut: %s: line too long.\n", fname);
226 exit(1);
227 }
228 /* this should work if newline is delimiter */
229 if (ch == sep)
230 isdelim = 1;
231 if (ch == '\n') {
232 if (!isdelim && !sflag)
233 (void)printf("%s", lbuf);
234 break;
235 }
236 }
237 if (!isdelim)
238 continue;
239
240 pos = positions + 1;
241 for (field = maxval, p = lbuf; field; --field, ++pos) {
242 if (*pos) {
243 if (output++)
244 putchar(sep);
245 while ((ch = *p++) != '\n' && ch != sep)
246 putchar(ch);
247 } else
248 while ((ch = *p++) != '\n' && ch != sep);
249 if (ch == '\n')
250 break;
251 }
252 if (ch != '\n')
253 if (autostop) {
254 if (output)
255 putchar(sep);
256 for (; (ch = *p) != '\n'; ++p)
257 putchar(ch);
258 } else
259 for (; (ch = *p) != '\n'; ++p);
260 putchar('\n');
261 }
262 }
263
264 badlist(msg)
265 char *msg;
266 {
267 (void)fprintf(stderr, "cut: [-cf] list: %s.\n", msg);
268 exit(1);
269 }
270
271 usage()
272 {
273 (void)fprintf(stderr,
274 "usage:\tcut -c list [file1 ...]\n\tcut -f list [-s] [-d delim] [file ...]\n");
275 exit(1);
276 }
277