Home | History | Annotate | Line # | Download | only in wc
wc.c revision 1.25
      1 /*	$NetBSD: wc.c,v 1.25 2002/03/23 21:29:38 enami Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1980, 1987, 1991, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the University of
     18  *	California, Berkeley and its contributors.
     19  * 4. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  */
     35 
     36 #include <sys/cdefs.h>
     37 #ifndef lint
     38 __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
     39 	The Regents of the University of California.  All rights reserved.\n");
     40 #endif /* not lint */
     41 
     42 #ifndef lint
     43 #if 0
     44 static char sccsid[] = "@(#)wc.c	8.2 (Berkeley) 5/2/95";
     45 #else
     46 __RCSID("$NetBSD: wc.c,v 1.25 2002/03/23 21:29:38 enami Exp $");
     47 #endif
     48 #endif /* not lint */
     49 
     50 /* wc line, word and char count */
     51 
     52 #include <sys/param.h>
     53 #include <sys/file.h>
     54 #include <sys/stat.h>
     55 
     56 #include <ctype.h>
     57 #include <fcntl.h>
     58 #include <err.h>
     59 #include <errno.h>
     60 #include <locale.h>
     61 #include <stdio.h>
     62 #include <stdlib.h>
     63 #include <string.h>
     64 #include <unistd.h>
     65 #include <wchar.h>
     66 #include <wctype.h>
     67 
     68 #ifdef NO_QUAD
     69 typedef u_long wc_count_t;
     70 # define WCFMT	" %7lu"
     71 # define WCCAST unsigned long
     72 #else
     73 typedef u_quad_t wc_count_t;
     74 # define WCFMT	" %7llu"
     75 # define WCCAST	unsigned long long
     76 #endif
     77 
     78 static wc_count_t	tlinect, twordct, tcharct;
     79 static int		doline, doword, dobyte, dochar;
     80 static int 		rval = 0;
     81 
     82 static void	cnt __P((char *));
     83 static void	print_counts __P((wc_count_t, wc_count_t, wc_count_t, char *));
     84 static void	usage __P((void));
     85 static size_t	do_mb __P((wchar_t *, const char *, size_t, mbstate_t *,
     86 		    size_t *, const char *));
     87 int	main __P((int, char *[]));
     88 
     89 int
     90 main(argc, argv)
     91 	int argc;
     92 	char *argv[];
     93 {
     94 	int ch;
     95 
     96 	setlocale(LC_ALL, "");
     97 
     98 	while ((ch = getopt(argc, argv, "lwcm")) != -1)
     99 		switch (ch) {
    100 		case 'l':
    101 			doline = 1;
    102 			break;
    103 		case 'w':
    104 			doword = 1;
    105 			break;
    106 		case 'm':
    107 			dochar = 1;
    108 			dobyte = 0;
    109 			break;
    110 		case 'c':
    111 			dochar = 0;
    112 			dobyte = 1;
    113 			break;
    114 		case '?':
    115 		default:
    116 			usage();
    117 		}
    118 	argv += optind;
    119 	argc -= optind;
    120 
    121 	/* Wc's flags are on by default. */
    122 	if (doline + doword + dobyte + dochar == 0)
    123 		doline = doword = dobyte = 1;
    124 
    125 	if (!*argv) {
    126 		cnt(NULL);
    127 	} else {
    128 		int dototal = (argc > 1);
    129 
    130 		do {
    131 			cnt(*argv);
    132 		} while(*++argv);
    133 
    134 		if (dototal)
    135 			print_counts(tlinect, twordct, tcharct, "total");
    136 	}
    137 
    138 	exit(rval);
    139 }
    140 
    141 static size_t
    142 do_mb(wc, p, mblen, st, cnt, file)
    143 	wchar_t *wc;
    144 	const char *p;
    145 	size_t mblen;
    146 	mbstate_t *st;
    147 	size_t *cnt;
    148 	const char *file;
    149 {
    150 	size_t r;
    151 	size_t c = 0;
    152 
    153 	do {
    154 		r = mbrtowc(wc, p, mblen, st);
    155 		if (r == (size_t)-1) {
    156 			warnx("%s: invalid byte sequence", file);
    157 			rval = 1;
    158 
    159 			/* XXX skip 1 byte */
    160 			mblen--;
    161 			p++;
    162 			memset(st, 0, sizeof(*st));
    163 		} else if (r == (size_t)-2)
    164 			break;
    165 		else if (r == 0)
    166 			r = 1;
    167 		c++;
    168 		if (wc)
    169 			wc++;
    170 		mblen -= r;
    171 		p += r;
    172 	} while (mblen > 0);
    173 
    174 	*cnt = c;
    175 
    176 	return (r);
    177 }
    178 
    179 static void
    180 cnt(file)
    181 	char *file;
    182 {
    183 	u_char buf[MAXBSIZE];
    184 	wchar_t wbuf[MAXBSIZE];
    185 	struct stat sb;
    186 	wc_count_t charct, linect, wordct;
    187 	mbstate_t st;
    188 	u_char *C;
    189 	wchar_t *WC;
    190 	size_t r = 0;
    191 	int fd, gotsp, len = 0;
    192 
    193 	linect = wordct = charct = 0;
    194 	if (file) {
    195 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
    196 			warn("%s", file);
    197 			rval = 1;
    198 			return;
    199 		}
    200 	} else {
    201 		fd = STDIN_FILENO;
    202 	}
    203 
    204 	if (dochar || doword)
    205 		memset(&st, 0, sizeof(st));
    206 
    207 	if (!doword) {
    208 		/*
    209 		 * line counting is split out because it's a lot
    210 		 * faster to get lines than to get words, since
    211 		 * the word count requires some logic.
    212 		 */
    213 		if (doline || dochar) {
    214 			while ((len = read(fd, buf, MAXBSIZE)) > 0) {
    215 				if (dochar) {
    216 					size_t wlen;
    217 
    218 					r = do_mb(0, (char *)buf, (size_t)len,
    219 					    &st, &wlen, file);
    220 					charct += wlen;
    221 				} else if (dobyte)
    222 					charct += len;
    223 				if (doline)
    224 					for (C = buf; len--; ++C)
    225 						if (*C == '\n')
    226 							++linect;
    227 			}
    228 		}
    229 
    230 		/*
    231 		 * if all we need is the number of characters and
    232 		 * it's a directory or a regular or linked file, just
    233 		 * stat the puppy.  We avoid testing for it not being
    234 		 * a special device in case someone adds a new type
    235 		 * of inode.
    236 		 */
    237 		else if (dobyte) {
    238 			if (fstat(fd, &sb)) {
    239 				warn("%s", file);
    240 				rval = 1;
    241 			} else {
    242 				if (S_ISREG(sb.st_mode) ||
    243 				    S_ISLNK(sb.st_mode) ||
    244 				    S_ISDIR(sb.st_mode)) {
    245 					charct = sb.st_size;
    246 				} else {
    247 					while ((len =
    248 					    read(fd, buf, MAXBSIZE)) > 0)
    249 						charct += len;
    250 				}
    251 			}
    252 		}
    253 	} else {
    254 		/* do it the hard way... */
    255 		gotsp = 1;
    256 		while ((len = read(fd, buf, MAXBSIZE)) > 0) {
    257 			size_t wlen;
    258 
    259 			r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen,
    260 			    file);
    261 			if (dochar) {
    262 				charct += wlen;
    263 			} else if (dobyte)
    264 				charct += len;
    265 			for (WC = wbuf; wlen--; ++WC) {
    266 				if (iswspace(*WC)) {
    267 					gotsp = 1;
    268 					if (*WC == L'\n') {
    269 						++linect;
    270 					}
    271 				} else {
    272 					/*
    273 					 * This line implements the POSIX
    274 					 * spec, i.e. a word is a "maximal
    275 					 * string of characters delimited by
    276 					 * whitespace."  Notice nothing was
    277 					 * said about a character being
    278 					 * printing or non-printing.
    279 					 */
    280 					if (gotsp) {
    281 						gotsp = 0;
    282 						++wordct;
    283 					}
    284 				}
    285 			}
    286 		}
    287 	}
    288 
    289 	if (len == -1) {
    290 		warn("%s", file);
    291 		rval = 1;
    292 	}
    293 	if (dochar && r == (size_t)-2) {
    294 		warnx("%s: incomplete multibyte character", file);
    295 		rval = 1;
    296 	}
    297 
    298 	print_counts(linect, wordct, charct, file ? file : 0);
    299 
    300 	/*
    301 	 * don't bother checkint doline, doword, or dobyte --- speeds
    302 	 * up the common case
    303 	 */
    304 	tlinect += linect;
    305 	twordct += wordct;
    306 	tcharct += charct;
    307 
    308 	if (close(fd)) {
    309 		warn("%s", file);
    310 		rval = 1;
    311 	}
    312 }
    313 
    314 static void
    315 print_counts(lines, words, chars, name)
    316 	wc_count_t lines;
    317 	wc_count_t words;
    318 	wc_count_t chars;
    319 	char *name;
    320 {
    321 
    322 	if (doline)
    323 		printf(WCFMT, (WCCAST)lines);
    324 	if (doword)
    325 		printf(WCFMT, (WCCAST)words);
    326 	if (dobyte || dochar)
    327 		printf(WCFMT, (WCCAST)chars);
    328 
    329 	if (name)
    330 		printf(" %s\n", name);
    331 	else
    332 		printf("\n");
    333 }
    334 
    335 static void
    336 usage()
    337 {
    338 
    339 	(void)fprintf(stderr, "usage: wc [-clw] [file ...]\n");
    340 	exit(1);
    341 }
    342