split.c revision 1.6 1 /* $NetBSD: split.c,v 1.6 1997/10/19 23:26:58 lukem Exp $ */
2
3 /*
4 * Copyright (c) 1987, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include <sys/cdefs.h>
37 #ifndef lint
38 __COPYRIGHT("@(#) Copyright (c) 1987, 1993, 1994\n\
39 The Regents of the University of California. All rights reserved.\n");
40 #endif /* not lint */
41
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)split.c 8.3 (Berkeley) 4/25/94";
45 #endif
46 __RCSID("$NetBSD: split.c,v 1.6 1997/10/19 23:26:58 lukem Exp $");
47 #endif /* not lint */
48
49 #include <sys/param.h>
50
51 #include <ctype.h>
52 #include <err.h>
53 #include <fcntl.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <unistd.h>
58
59 #define DEFLINE 1000 /* Default num lines per file. */
60
61 long bytecnt; /* Byte count to split on. */
62 long numlines; /* Line count to split on. */
63 int file_open; /* If a file open. */
64 int ifd = -1, ofd = -1; /* Input/output file descriptors. */
65 char bfr[MAXBSIZE]; /* I/O buffer. */
66 char fname[MAXPATHLEN]; /* File name prefix. */
67
68 int main __P((int, char **));
69 void newfile __P((void));
70 void split1 __P((void));
71 void split2 __P((void));
72 void usage __P((void));
73
74 int
75 main(argc, argv)
76 int argc;
77 char *argv[];
78 {
79 int ch;
80 char *ep, *p;
81
82 while ((ch = getopt(argc, argv, "-0123456789b:l:")) != -1)
83 switch (ch) {
84 case '0': case '1': case '2': case '3': case '4':
85 case '5': case '6': case '7': case '8': case '9':
86 /*
87 * Undocumented kludge: split was originally designed
88 * to take a number after a dash.
89 */
90 if (numlines == 0) {
91 p = argv[optind - 1];
92 if (p[0] == '-' && p[1] == ch && !p[2])
93 numlines = strtol(++p, &ep, 10);
94 else
95 numlines =
96 strtol(argv[optind] + 1, &ep, 10);
97 if (numlines <= 0 || *ep)
98 errx(1,
99 "%s: illegal line count.", optarg);
100 }
101 break;
102 case '-': /* Undocumented: historic stdin flag. */
103 if (ifd != -1)
104 usage();
105 ifd = 0;
106 break;
107 case 'b': /* Byte count. */
108 if ((bytecnt = strtol(optarg, &ep, 10)) <= 0 ||
109 (*ep != '\0' && *ep != 'k' && *ep != 'm'))
110 errx(1, "%s: illegal byte count.", optarg);
111 if (*ep == 'k')
112 bytecnt *= 1024;
113 else if (*ep == 'm')
114 bytecnt *= 1048576;
115 break;
116 case 'l': /* Line count. */
117 if (numlines != 0)
118 usage();
119 if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
120 errx(1, "%s: illegal line count.", optarg);
121 break;
122 default:
123 usage();
124 }
125 argv += optind;
126 argc -= optind;
127
128 if (*argv != NULL)
129 if (ifd == -1) { /* Input file. */
130 if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
131 err(1, "%s", *argv);
132 ++argv;
133 }
134 if (*argv != NULL) /* File name prefix. */
135 (void)strcpy(fname, *argv++);
136 if (*argv != NULL)
137 usage();
138
139 if (numlines == 0)
140 numlines = DEFLINE;
141 else if (bytecnt)
142 usage();
143
144 if (ifd == -1) /* Stdin by default. */
145 ifd = 0;
146
147 if (bytecnt) {
148 split1();
149 exit (0);
150 }
151 split2();
152 exit(0);
153 }
154
155 /*
156 * split1 --
157 * Split the input by bytes.
158 */
159 void
160 split1()
161 {
162 long bcnt;
163 int dist, len;
164 char *C;
165
166 for (bcnt = 0;;)
167 switch (len = read(ifd, bfr, MAXBSIZE)) {
168 case 0:
169 exit(0);
170 case -1:
171 err(1, "read");
172 /* NOTREACHED */
173 default:
174 if (!file_open) {
175 newfile();
176 file_open = 1;
177 }
178 if (bcnt + len >= bytecnt) {
179 dist = bytecnt - bcnt;
180 if (write(ofd, bfr, dist) != dist)
181 err(1, "write");
182 len -= dist;
183 for (C = bfr + dist; len >= bytecnt;
184 len -= bytecnt, C += bytecnt) {
185 newfile();
186 if (write(ofd,
187 C, (int)bytecnt) != bytecnt)
188 err(1, "write");
189 }
190 if (len) {
191 newfile();
192 if (write(ofd, C, len) != len)
193 err(1, "write");
194 } else
195 file_open = 0;
196 bcnt = len;
197 } else {
198 bcnt += len;
199 if (write(ofd, bfr, len) != len)
200 err(1, "write");
201 }
202 }
203 }
204
205 /*
206 * split2 --
207 * Split the input by lines.
208 */
209 void
210 split2()
211 {
212 long lcnt;
213 int len, bcnt;
214 char *Ce, *Cs;
215
216 for (lcnt = 0;;)
217 switch (len = read(ifd, bfr, MAXBSIZE)) {
218 case 0:
219 exit(0);
220 case -1:
221 err(1, "read");
222 /* NOTREACHED */
223 default:
224 if (!file_open) {
225 newfile();
226 file_open = 1;
227 }
228 for (Cs = Ce = bfr; len--; Ce++)
229 if (*Ce == '\n' && ++lcnt == numlines) {
230 bcnt = Ce - Cs + 1;
231 if (write(ofd, Cs, bcnt) != bcnt)
232 err(1, "write");
233 lcnt = 0;
234 Cs = Ce + 1;
235 if (len)
236 newfile();
237 else
238 file_open = 0;
239 }
240 if (Cs < Ce) {
241 bcnt = Ce - Cs;
242 if (write(ofd, Cs, bcnt) != bcnt)
243 err(1, "write");
244 }
245 }
246 }
247
248 /*
249 * newfile --
250 * Open a new output file.
251 */
252 void
253 newfile()
254 {
255 static long fnum;
256 static int defname;
257 static char *fpnt;
258
259 if (ofd == -1) {
260 if (fname[0] == '\0') {
261 fname[0] = 'x';
262 fpnt = fname + 1;
263 defname = 1;
264 } else {
265 fpnt = fname + strlen(fname);
266 defname = 0;
267 }
268 ofd = fileno(stdout);
269 }
270 /*
271 * Hack to increase max files; original code wandered through
272 * magic characters. Maximum files is 3 * 26 * 26 == 2028
273 */
274 #define MAXFILES 676
275 if (fnum == MAXFILES) {
276 if (!defname || fname[0] == 'z')
277 errx(1, "too many files.");
278 ++fname[0];
279 fnum = 0;
280 }
281 fpnt[0] = fnum / 26 + 'a';
282 fpnt[1] = fnum % 26 + 'a';
283 ++fnum;
284 if (!freopen(fname, "w", stdout))
285 err(1, "%s", fname);
286 }
287
288 void
289 usage()
290 {
291 (void)fprintf(stderr,
292 "usage: split [-b byte_count] [-l line_count] [file [prefix]]\n");
293 exit(1);
294 }
295