Home | History | Annotate | Line # | Download | only in wc
wc.c revision 1.34
      1 /*	$NetBSD: wc.c,v 1.34 2010/02/19 11:15:23 tron Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1980, 1987, 1991, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 #ifndef lint
     34 __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\
     35  The Regents of the University of California.  All rights reserved.");
     36 #endif /* not lint */
     37 
     38 #ifndef lint
     39 #if 0
     40 static char sccsid[] = "@(#)wc.c	8.2 (Berkeley) 5/2/95";
     41 #else
     42 __RCSID("$NetBSD: wc.c,v 1.34 2010/02/19 11:15:23 tron Exp $");
     43 #endif
     44 #endif /* not lint */
     45 
     46 /* wc line, word, char count and optionally longest line. */
     47 
     48 #include <sys/param.h>
     49 #include <sys/file.h>
     50 #include <sys/stat.h>
     51 
     52 #include <ctype.h>
     53 #include <fcntl.h>
     54 #include <err.h>
     55 #include <errno.h>
     56 #include <locale.h>
     57 #include <stdbool.h>
     58 #include <stdio.h>
     59 #include <stdlib.h>
     60 #include <string.h>
     61 #include <unistd.h>
     62 #include <wchar.h>
     63 #include <wctype.h>
     64 
     65 #ifdef NO_QUAD
     66 typedef u_long wc_count_t;
     67 # define WCFMT	" %7lu"
     68 # define WCCAST unsigned long
     69 #else
     70 typedef u_quad_t wc_count_t;
     71 # define WCFMT	" %7llu"
     72 # define WCCAST	unsigned long long
     73 #endif
     74 
     75 static wc_count_t	tlinect, twordct, tcharct, tlongest;
     76 static bool		doline, doword, dobyte, dochar, dolongest;
     77 static int 		rval = 0;
     78 
     79 static void	cnt(const char *);
     80 static void	print_counts(wc_count_t, wc_count_t, wc_count_t, wc_count_t,
     81 		    const char *);
     82 static void	usage(void);
     83 static size_t	do_mb(wchar_t *, const char *, size_t, mbstate_t *,
     84 		    size_t *, const char *);
     85 int	main(int, char *[]);
     86 
     87 int
     88 main(int argc, char *argv[])
     89 {
     90 	int ch;
     91 
     92 	setlocale(LC_ALL, "");
     93 
     94 	while ((ch = getopt(argc, argv, "lwcmL")) != -1)
     95 		switch (ch) {
     96 		case 'l':
     97 			doline = true;
     98 			break;
     99 		case 'w':
    100 			doword = true;
    101 			break;
    102 		case 'm':
    103 			dochar = true;
    104 			dobyte = 0;
    105 			break;
    106 		case 'c':
    107 			dochar = 0;
    108 			dobyte = true;
    109 			break;
    110 		case 'L':
    111 			dolongest = true;
    112 			break;
    113 		case '?':
    114 		default:
    115 			usage();
    116 		}
    117 	argv += optind;
    118 	argc -= optind;
    119 
    120 	/* Wc's flags are on by default. */
    121 	if (!(doline || doword || dobyte || dochar || dolongest))
    122 		doline = doword = dobyte = true;
    123 
    124 	if (*argv == NULL) {
    125 		cnt(NULL);
    126 	} else {
    127 		bool dototal = (argc > 1);
    128 
    129 		do {
    130 			cnt(*argv);
    131 		} while(*++argv);
    132 
    133 		if (dototal) {
    134 			print_counts(tlinect, twordct, tcharct, tlongest,
    135 			    "total");
    136 		}
    137 	}
    138 
    139 	exit(rval);
    140 }
    141 
    142 static size_t
    143 do_mb(wchar_t *wc, const char *p, size_t len, mbstate_t *st,
    144     size_t *retcnt, const char *file)
    145 {
    146 	size_t r;
    147 	size_t c = 0;
    148 
    149 	do {
    150 		r = mbrtowc(wc, p, len, st);
    151 		if (r == (size_t)-1) {
    152 			warnx("%s: invalid byte sequence", file);
    153 			rval = 1;
    154 
    155 			/* XXX skip 1 byte */
    156 			len--;
    157 			p++;
    158 			memset(st, 0, sizeof(*st));
    159 			continue;
    160 		} else if (r == (size_t)-2)
    161 			break;
    162 		else if (r == 0)
    163 			r = 1;
    164 		c++;
    165 		if (wc)
    166 			wc++;
    167 		len -= r;
    168 		p += r;
    169 	} while (len > 0);
    170 
    171 	*retcnt = c;
    172 
    173 	return (r);
    174 }
    175 
    176 static void
    177 cnt(const char *file)
    178 {
    179 	u_char buf[MAXBSIZE];
    180 	wchar_t wbuf[MAXBSIZE];
    181 	struct stat sb;
    182 	wc_count_t charct, linect, wordct, longest;
    183 	mbstate_t st;
    184 	u_char *C;
    185 	wchar_t *WC;
    186 	const char *name;			/* filename or <stdin> */
    187 	size_t r = 0;
    188 	int fd, len = 0;
    189 
    190 	linect = wordct = charct = longest = 0;
    191 	if (file != NULL) {
    192 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
    193 			warn("%s", file);
    194 			rval = 1;
    195 			return;
    196 		}
    197 		name = file;
    198 	} else {
    199 		fd = STDIN_FILENO;
    200 		name = "<stdin>";
    201 	}
    202 
    203 	if (dochar || doword || dolongest)
    204 		(void)memset(&st, 0, sizeof(st));
    205 
    206 	if (!(doword || dolongest)) {
    207 		/*
    208 		 * line counting is split out because it's a lot
    209 		 * faster to get lines than to get words, since
    210 		 * the word count requires some logic.
    211 		 */
    212 		if (doline || dochar) {
    213 			while ((len = read(fd, buf, MAXBSIZE)) > 0) {
    214 				if (dochar) {
    215 					size_t wlen;
    216 
    217 					r = do_mb(0, (char *)buf, (size_t)len,
    218 					    &st, &wlen, name);
    219 					charct += wlen;
    220 				} else if (dobyte)
    221 					charct += len;
    222 				if (doline) {
    223 					for (C = buf; len--; ++C) {
    224 						if (*C == '\n')
    225 							++linect;
    226 					}
    227 				}
    228 			}
    229 		}
    230 
    231 		/*
    232 		 * if all we need is the number of characters and
    233 		 * it's a directory or a regular or linked file, just
    234 		 * stat the puppy.  We avoid testing for it not being
    235 		 * a special device in case someone adds a new type
    236 		 * of inode.
    237 		 */
    238 		else if (dobyte) {
    239 			if (fstat(fd, &sb)) {
    240 				warn("%s", name);
    241 				rval = 1;
    242 			} else {
    243 				if (S_ISREG(sb.st_mode) ||
    244 				    S_ISLNK(sb.st_mode) ||
    245 				    S_ISDIR(sb.st_mode)) {
    246 					charct = sb.st_size;
    247 				} else {
    248 					while ((len =
    249 					    read(fd, buf, MAXBSIZE)) > 0)
    250 						charct += len;
    251 				}
    252 			}
    253 		}
    254 	} else {
    255 		/* do it the hard way... */
    256 		wc_count_t linelen;
    257                 bool       gotsp;
    258 
    259 		linelen = 0;
    260 		gotsp = true;
    261 		while ((len = read(fd, buf, MAXBSIZE)) > 0) {
    262 			size_t wlen;
    263 
    264 			r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen,
    265 			    name);
    266 			if (dochar) {
    267 				charct += wlen;
    268 			} else if (dobyte) {
    269 				charct += len;
    270 			}
    271 			for (WC = wbuf; wlen--; ++WC) {
    272 				if (iswspace(*WC)) {
    273 					gotsp = true;
    274 					if (*WC == L'\n') {
    275 						++linect;
    276 						if (linelen > longest)
    277 							longest = linelen;
    278 						linelen = 0;
    279 					} else {
    280 						linelen++;
    281 					}
    282 				} else {
    283 					/*
    284 					 * This line implements the POSIX
    285 					 * spec, i.e. a word is a "maximal
    286 					 * string of characters delimited by
    287 					 * whitespace."  Notice nothing was
    288 					 * said about a character being
    289 					 * printing or non-printing.
    290 					 */
    291 					if (gotsp) {
    292 						gotsp = false;
    293 						++wordct;
    294 					}
    295 
    296 					linelen++;
    297 				}
    298 			}
    299 		}
    300 	}
    301 
    302 	if (len == -1) {
    303 		warn("%s", name);
    304 		rval = 1;
    305 	}
    306 	if (dochar && r == (size_t)-2) {
    307 		warnx("%s: incomplete multibyte character", name);
    308 		rval = 1;
    309 	}
    310 
    311 	print_counts(linect, wordct, charct, longest, file);
    312 
    313 	/*
    314 	 * don't bother checkint doline, doword, or dobyte --- speeds
    315 	 * up the common case
    316 	 */
    317 	tlinect += linect;
    318 	twordct += wordct;
    319 	tcharct += charct;
    320 	if (dolongest && longest > tlongest)
    321 		tlongest = longest;
    322 
    323 	if (close(fd)) {
    324 		warn("%s", name);
    325 		rval = 1;
    326 	}
    327 }
    328 
    329 static void
    330 print_counts(wc_count_t lines, wc_count_t words, wc_count_t chars,
    331     wc_count_t longest, const char *name)
    332 {
    333 
    334 	if (doline)
    335 		(void)printf(WCFMT, (WCCAST)lines);
    336 	if (doword)
    337 		(void)printf(WCFMT, (WCCAST)words);
    338 	if (dobyte || dochar)
    339 		(void)printf(WCFMT, (WCCAST)chars);
    340 	if (dolongest)
    341 		(void)printf(WCFMT, (WCCAST)longest);
    342 
    343 	if (name != NULL)
    344 		(void)printf(" %s\n", name);
    345 	else
    346 		(void)putchar('\n');
    347 }
    348 
    349 static void
    350 usage(void)
    351 {
    352 
    353 	(void)fprintf(stderr, "usage: wc [-c | -m] [-Llw] [file ...]\n");
    354 	exit(1);
    355 }
    356