wc.c revision 1.34 1 /* $NetBSD: wc.c,v 1.34 2010/02/19 11:15:23 tron Exp $ */
2
3 /*
4 * Copyright (c) 1980, 1987, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\
35 The Regents of the University of California. All rights reserved.");
36 #endif /* not lint */
37
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95";
41 #else
42 __RCSID("$NetBSD: wc.c,v 1.34 2010/02/19 11:15:23 tron Exp $");
43 #endif
44 #endif /* not lint */
45
46 /* wc line, word, char count and optionally longest line. */
47
48 #include <sys/param.h>
49 #include <sys/file.h>
50 #include <sys/stat.h>
51
52 #include <ctype.h>
53 #include <fcntl.h>
54 #include <err.h>
55 #include <errno.h>
56 #include <locale.h>
57 #include <stdbool.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <unistd.h>
62 #include <wchar.h>
63 #include <wctype.h>
64
65 #ifdef NO_QUAD
66 typedef u_long wc_count_t;
67 # define WCFMT " %7lu"
68 # define WCCAST unsigned long
69 #else
70 typedef u_quad_t wc_count_t;
71 # define WCFMT " %7llu"
72 # define WCCAST unsigned long long
73 #endif
74
75 static wc_count_t tlinect, twordct, tcharct, tlongest;
76 static bool doline, doword, dobyte, dochar, dolongest;
77 static int rval = 0;
78
79 static void cnt(const char *);
80 static void print_counts(wc_count_t, wc_count_t, wc_count_t, wc_count_t,
81 const char *);
82 static void usage(void);
83 static size_t do_mb(wchar_t *, const char *, size_t, mbstate_t *,
84 size_t *, const char *);
85 int main(int, char *[]);
86
87 int
88 main(int argc, char *argv[])
89 {
90 int ch;
91
92 setlocale(LC_ALL, "");
93
94 while ((ch = getopt(argc, argv, "lwcmL")) != -1)
95 switch (ch) {
96 case 'l':
97 doline = true;
98 break;
99 case 'w':
100 doword = true;
101 break;
102 case 'm':
103 dochar = true;
104 dobyte = 0;
105 break;
106 case 'c':
107 dochar = 0;
108 dobyte = true;
109 break;
110 case 'L':
111 dolongest = true;
112 break;
113 case '?':
114 default:
115 usage();
116 }
117 argv += optind;
118 argc -= optind;
119
120 /* Wc's flags are on by default. */
121 if (!(doline || doword || dobyte || dochar || dolongest))
122 doline = doword = dobyte = true;
123
124 if (*argv == NULL) {
125 cnt(NULL);
126 } else {
127 bool dototal = (argc > 1);
128
129 do {
130 cnt(*argv);
131 } while(*++argv);
132
133 if (dototal) {
134 print_counts(tlinect, twordct, tcharct, tlongest,
135 "total");
136 }
137 }
138
139 exit(rval);
140 }
141
142 static size_t
143 do_mb(wchar_t *wc, const char *p, size_t len, mbstate_t *st,
144 size_t *retcnt, const char *file)
145 {
146 size_t r;
147 size_t c = 0;
148
149 do {
150 r = mbrtowc(wc, p, len, st);
151 if (r == (size_t)-1) {
152 warnx("%s: invalid byte sequence", file);
153 rval = 1;
154
155 /* XXX skip 1 byte */
156 len--;
157 p++;
158 memset(st, 0, sizeof(*st));
159 continue;
160 } else if (r == (size_t)-2)
161 break;
162 else if (r == 0)
163 r = 1;
164 c++;
165 if (wc)
166 wc++;
167 len -= r;
168 p += r;
169 } while (len > 0);
170
171 *retcnt = c;
172
173 return (r);
174 }
175
176 static void
177 cnt(const char *file)
178 {
179 u_char buf[MAXBSIZE];
180 wchar_t wbuf[MAXBSIZE];
181 struct stat sb;
182 wc_count_t charct, linect, wordct, longest;
183 mbstate_t st;
184 u_char *C;
185 wchar_t *WC;
186 const char *name; /* filename or <stdin> */
187 size_t r = 0;
188 int fd, len = 0;
189
190 linect = wordct = charct = longest = 0;
191 if (file != NULL) {
192 if ((fd = open(file, O_RDONLY, 0)) < 0) {
193 warn("%s", file);
194 rval = 1;
195 return;
196 }
197 name = file;
198 } else {
199 fd = STDIN_FILENO;
200 name = "<stdin>";
201 }
202
203 if (dochar || doword || dolongest)
204 (void)memset(&st, 0, sizeof(st));
205
206 if (!(doword || dolongest)) {
207 /*
208 * line counting is split out because it's a lot
209 * faster to get lines than to get words, since
210 * the word count requires some logic.
211 */
212 if (doline || dochar) {
213 while ((len = read(fd, buf, MAXBSIZE)) > 0) {
214 if (dochar) {
215 size_t wlen;
216
217 r = do_mb(0, (char *)buf, (size_t)len,
218 &st, &wlen, name);
219 charct += wlen;
220 } else if (dobyte)
221 charct += len;
222 if (doline) {
223 for (C = buf; len--; ++C) {
224 if (*C == '\n')
225 ++linect;
226 }
227 }
228 }
229 }
230
231 /*
232 * if all we need is the number of characters and
233 * it's a directory or a regular or linked file, just
234 * stat the puppy. We avoid testing for it not being
235 * a special device in case someone adds a new type
236 * of inode.
237 */
238 else if (dobyte) {
239 if (fstat(fd, &sb)) {
240 warn("%s", name);
241 rval = 1;
242 } else {
243 if (S_ISREG(sb.st_mode) ||
244 S_ISLNK(sb.st_mode) ||
245 S_ISDIR(sb.st_mode)) {
246 charct = sb.st_size;
247 } else {
248 while ((len =
249 read(fd, buf, MAXBSIZE)) > 0)
250 charct += len;
251 }
252 }
253 }
254 } else {
255 /* do it the hard way... */
256 wc_count_t linelen;
257 bool gotsp;
258
259 linelen = 0;
260 gotsp = true;
261 while ((len = read(fd, buf, MAXBSIZE)) > 0) {
262 size_t wlen;
263
264 r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen,
265 name);
266 if (dochar) {
267 charct += wlen;
268 } else if (dobyte) {
269 charct += len;
270 }
271 for (WC = wbuf; wlen--; ++WC) {
272 if (iswspace(*WC)) {
273 gotsp = true;
274 if (*WC == L'\n') {
275 ++linect;
276 if (linelen > longest)
277 longest = linelen;
278 linelen = 0;
279 } else {
280 linelen++;
281 }
282 } else {
283 /*
284 * This line implements the POSIX
285 * spec, i.e. a word is a "maximal
286 * string of characters delimited by
287 * whitespace." Notice nothing was
288 * said about a character being
289 * printing or non-printing.
290 */
291 if (gotsp) {
292 gotsp = false;
293 ++wordct;
294 }
295
296 linelen++;
297 }
298 }
299 }
300 }
301
302 if (len == -1) {
303 warn("%s", name);
304 rval = 1;
305 }
306 if (dochar && r == (size_t)-2) {
307 warnx("%s: incomplete multibyte character", name);
308 rval = 1;
309 }
310
311 print_counts(linect, wordct, charct, longest, file);
312
313 /*
314 * don't bother checkint doline, doword, or dobyte --- speeds
315 * up the common case
316 */
317 tlinect += linect;
318 twordct += wordct;
319 tcharct += charct;
320 if (dolongest && longest > tlongest)
321 tlongest = longest;
322
323 if (close(fd)) {
324 warn("%s", name);
325 rval = 1;
326 }
327 }
328
329 static void
330 print_counts(wc_count_t lines, wc_count_t words, wc_count_t chars,
331 wc_count_t longest, const char *name)
332 {
333
334 if (doline)
335 (void)printf(WCFMT, (WCCAST)lines);
336 if (doword)
337 (void)printf(WCFMT, (WCCAST)words);
338 if (dobyte || dochar)
339 (void)printf(WCFMT, (WCCAST)chars);
340 if (dolongest)
341 (void)printf(WCFMT, (WCCAST)longest);
342
343 if (name != NULL)
344 (void)printf(" %s\n", name);
345 else
346 (void)putchar('\n');
347 }
348
349 static void
350 usage(void)
351 {
352
353 (void)fprintf(stderr, "usage: wc [-c | -m] [-Llw] [file ...]\n");
354 exit(1);
355 }
356