Home | History | Annotate | Line # | Download | only in wc
wc.c revision 1.31.4.1
      1  1.31.4.1  sborrill /*	$NetBSD: wc.c,v 1.31.4.1 2010/03/06 21:19:11 sborrill Exp $	*/
      2      1.10       tls 
      3       1.1       cgd /*
      4      1.11       mrg  * Copyright (c) 1980, 1987, 1991, 1993
      5      1.11       mrg  *	The Regents of the University of California.  All rights reserved.
      6       1.1       cgd  *
      7       1.1       cgd  * Redistribution and use in source and binary forms, with or without
      8       1.1       cgd  * modification, are permitted provided that the following conditions
      9       1.1       cgd  * are met:
     10       1.1       cgd  * 1. Redistributions of source code must retain the above copyright
     11       1.1       cgd  *    notice, this list of conditions and the following disclaimer.
     12       1.1       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     13       1.1       cgd  *    notice, this list of conditions and the following disclaimer in the
     14       1.1       cgd  *    documentation and/or other materials provided with the distribution.
     15      1.29       agc  * 3. Neither the name of the University nor the names of its contributors
     16       1.1       cgd  *    may be used to endorse or promote products derived from this software
     17       1.1       cgd  *    without specific prior written permission.
     18       1.1       cgd  *
     19       1.1       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20       1.1       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21       1.1       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22       1.1       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23       1.1       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24       1.1       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25       1.1       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26       1.1       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27       1.1       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28       1.1       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29       1.1       cgd  * SUCH DAMAGE.
     30       1.1       cgd  */
     31       1.1       cgd 
     32      1.13       mrg #include <sys/cdefs.h>
     33       1.1       cgd #ifndef lint
     34      1.31     lukem __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\
     35      1.31     lukem  The Regents of the University of California.  All rights reserved.");
     36       1.1       cgd #endif /* not lint */
     37       1.1       cgd 
     38       1.1       cgd #ifndef lint
     39      1.11       mrg #if 0
     40      1.11       mrg static char sccsid[] = "@(#)wc.c	8.2 (Berkeley) 5/2/95";
     41      1.11       mrg #else
     42  1.31.4.1  sborrill __RCSID("$NetBSD: wc.c,v 1.31.4.1 2010/03/06 21:19:11 sborrill Exp $");
     43      1.11       mrg #endif
     44       1.1       cgd #endif /* not lint */
     45       1.1       cgd 
     46  1.31.4.1  sborrill /* wc line, word, char count and optionally longest line. */
     47       1.1       cgd 
     48      1.11       mrg #include <sys/param.h>
     49      1.22     enami #include <sys/file.h>
     50      1.11       mrg #include <sys/stat.h>
     51      1.11       mrg 
     52      1.22     enami #include <ctype.h>
     53      1.11       mrg #include <fcntl.h>
     54      1.22     enami #include <err.h>
     55      1.11       mrg #include <errno.h>
     56      1.22     enami #include <locale.h>
     57  1.31.4.1  sborrill #include <stdbool.h>
     58       1.1       cgd #include <stdio.h>
     59       1.2       jtc #include <stdlib.h>
     60       1.2       jtc #include <string.h>
     61       1.5       jtc #include <unistd.h>
     62      1.21      yamt #include <wchar.h>
     63      1.21      yamt #include <wctype.h>
     64       1.1       cgd 
     65      1.17  christos #ifdef NO_QUAD
     66      1.17  christos typedef u_long wc_count_t;
     67      1.18   mycroft # define WCFMT	" %7lu"
     68      1.17  christos # define WCCAST unsigned long
     69      1.17  christos #else
     70      1.17  christos typedef u_quad_t wc_count_t;
     71      1.18   mycroft # define WCFMT	" %7llu"
     72      1.17  christos # define WCCAST	unsigned long long
     73      1.17  christos #endif
     74      1.17  christos 
     75  1.31.4.1  sborrill static wc_count_t	tlinect, twordct, tcharct, tlongest;
     76  1.31.4.1  sborrill static bool		doline, doword, dobyte, dochar, dolongest;
     77      1.14  wsanchez static int 		rval = 0;
     78       1.1       cgd 
     79  1.31.4.1  sborrill static void	cnt(const char *);
     80  1.31.4.1  sborrill static void	print_counts(wc_count_t, wc_count_t, wc_count_t, wc_count_t,
     81  1.31.4.1  sborrill 		    const char *);
     82      1.30     perry static void	usage(void);
     83      1.30     perry static size_t	do_mb(wchar_t *, const char *, size_t, mbstate_t *,
     84      1.30     perry 		    size_t *, const char *);
     85      1.30     perry int	main(int, char *[]);
     86      1.11       mrg 
     87       1.5       jtc int
     88      1.30     perry main(int argc, char *argv[])
     89       1.1       cgd {
     90      1.13       mrg 	int ch;
     91       1.1       cgd 
     92       1.7       jtc 	setlocale(LC_ALL, "");
     93       1.7       jtc 
     94  1.31.4.1  sborrill 	while ((ch = getopt(argc, argv, "lwcmL")) != -1)
     95      1.24     enami 		switch (ch) {
     96       1.2       jtc 		case 'l':
     97  1.31.4.1  sborrill 			doline = true;
     98       1.2       jtc 			break;
     99       1.2       jtc 		case 'w':
    100  1.31.4.1  sborrill 			doword = true;
    101       1.2       jtc 			break;
    102       1.4       jtc 		case 'm':
    103  1.31.4.1  sborrill 			dochar = true;
    104      1.21      yamt 			dobyte = 0;
    105      1.21      yamt 			break;
    106      1.21      yamt 		case 'c':
    107      1.21      yamt 			dochar = 0;
    108  1.31.4.1  sborrill 			dobyte = true;
    109  1.31.4.1  sborrill 			break;
    110  1.31.4.1  sborrill 		case 'L':
    111  1.31.4.1  sborrill 			dolongest = true;
    112       1.2       jtc 			break;
    113       1.2       jtc 		case '?':
    114       1.2       jtc 		default:
    115      1.11       mrg 			usage();
    116       1.2       jtc 		}
    117       1.2       jtc 	argv += optind;
    118       1.2       jtc 	argc -= optind;
    119       1.2       jtc 
    120      1.11       mrg 	/* Wc's flags are on by default. */
    121  1.31.4.1  sborrill 	if (!(doline || doword || dobyte || dochar || dolongest))
    122  1.31.4.1  sborrill 		doline = doword = dobyte = true;
    123       1.1       cgd 
    124  1.31.4.1  sborrill 	if (*argv == NULL) {
    125      1.11       mrg 		cnt(NULL);
    126       1.2       jtc 	} else {
    127  1.31.4.1  sborrill 		bool dototal = (argc > 1);
    128       1.2       jtc 
    129       1.2       jtc 		do {
    130       1.2       jtc 			cnt(*argv);
    131       1.2       jtc 		} while(*++argv);
    132       1.2       jtc 
    133  1.31.4.1  sborrill 		if (dototal) {
    134  1.31.4.1  sborrill 			print_counts(tlinect, twordct, tcharct, tlongest,
    135  1.31.4.1  sborrill 			    "total");
    136  1.31.4.1  sborrill 		}
    137       1.1       cgd 	}
    138       1.2       jtc 
    139       1.6       jtc 	exit(rval);
    140       1.1       cgd }
    141       1.1       cgd 
    142      1.21      yamt static size_t
    143  1.31.4.1  sborrill do_mb(wchar_t *wc, const char *p, size_t len, mbstate_t *st,
    144  1.31.4.1  sborrill     size_t *retcnt, const char *file)
    145      1.21      yamt {
    146      1.21      yamt 	size_t r;
    147      1.21      yamt 	size_t c = 0;
    148      1.21      yamt 
    149      1.21      yamt 	do {
    150  1.31.4.1  sborrill 		r = mbrtowc(wc, p, len, st);
    151      1.21      yamt 		if (r == (size_t)-1) {
    152      1.21      yamt 			warnx("%s: invalid byte sequence", file);
    153      1.21      yamt 			rval = 1;
    154      1.21      yamt 
    155      1.21      yamt 			/* XXX skip 1 byte */
    156  1.31.4.1  sborrill 			len--;
    157      1.23     enami 			p++;
    158      1.21      yamt 			memset(st, 0, sizeof(*st));
    159      1.26     enami 			continue;
    160      1.23     enami 		} else if (r == (size_t)-2)
    161      1.21      yamt 			break;
    162      1.21      yamt 		else if (r == 0)
    163      1.21      yamt 			r = 1;
    164      1.23     enami 		c++;
    165      1.21      yamt 		if (wc)
    166      1.23     enami 			wc++;
    167  1.31.4.1  sborrill 		len -= r;
    168      1.21      yamt 		p += r;
    169  1.31.4.1  sborrill 	} while (len > 0);
    170      1.21      yamt 
    171  1.31.4.1  sborrill 	*retcnt = c;
    172      1.21      yamt 
    173      1.24     enami 	return (r);
    174      1.21      yamt }
    175      1.23     enami 
    176       1.5       jtc static void
    177  1.31.4.1  sborrill cnt(const char *file)
    178       1.1       cgd {
    179      1.24     enami 	u_char buf[MAXBSIZE];
    180      1.24     enami 	wchar_t wbuf[MAXBSIZE];
    181      1.24     enami 	struct stat sb;
    182  1.31.4.1  sborrill 	wc_count_t charct, linect, wordct, longest;
    183      1.24     enami 	mbstate_t st;
    184      1.13       mrg 	u_char *C;
    185      1.21      yamt 	wchar_t *WC;
    186  1.31.4.1  sborrill 	const char *name;			/* filename or <stdin> */
    187      1.21      yamt 	size_t r = 0;
    188  1.31.4.1  sborrill 	int fd, len = 0;
    189       1.1       cgd 
    190  1.31.4.1  sborrill 	linect = wordct = charct = longest = 0;
    191  1.31.4.1  sborrill 	if (file != NULL) {
    192       1.1       cgd 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
    193      1.11       mrg 			warn("%s", file);
    194       1.6       jtc 			rval = 1;
    195       1.6       jtc 			return;
    196       1.1       cgd 		}
    197      1.27     enami 		name = file;
    198      1.23     enami 	} else {
    199       1.7       jtc 		fd = STDIN_FILENO;
    200      1.27     enami 		name = "<stdin>";
    201       1.7       jtc 	}
    202      1.21      yamt 
    203  1.31.4.1  sborrill 	if (dochar || doword || dolongest)
    204  1.31.4.1  sborrill 		(void)memset(&st, 0, sizeof(st));
    205      1.23     enami 
    206  1.31.4.1  sborrill 	if (!(doword || dolongest)) {
    207       1.7       jtc 		/*
    208       1.7       jtc 		 * line counting is split out because it's a lot
    209       1.7       jtc 		 * faster to get lines than to get words, since
    210       1.7       jtc 		 * the word count requires some logic.
    211       1.7       jtc 		 */
    212      1.21      yamt 		if (doline || dochar) {
    213      1.11       mrg 			while ((len = read(fd, buf, MAXBSIZE)) > 0) {
    214      1.21      yamt 				if (dochar) {
    215      1.21      yamt 					size_t wlen;
    216      1.21      yamt 
    217      1.23     enami 					r = do_mb(0, (char *)buf, (size_t)len,
    218      1.27     enami 					    &st, &wlen, name);
    219      1.21      yamt 					charct += wlen;
    220      1.23     enami 				} else if (dobyte)
    221      1.21      yamt 					charct += len;
    222  1.31.4.1  sborrill 				if (doline) {
    223  1.31.4.1  sborrill 					for (C = buf; len--; ++C) {
    224      1.21      yamt 						if (*C == '\n')
    225      1.21      yamt 							++linect;
    226  1.31.4.1  sborrill 					}
    227  1.31.4.1  sborrill 				}
    228       1.1       cgd 			}
    229       1.7       jtc 		}
    230       1.1       cgd 
    231       1.7       jtc 		/*
    232       1.7       jtc 		 * if all we need is the number of characters and
    233       1.7       jtc 		 * it's a directory or a regular or linked file, just
    234       1.7       jtc 		 * stat the puppy.  We avoid testing for it not being
    235       1.7       jtc 		 * a special device in case someone adds a new type
    236       1.7       jtc 		 * of inode.
    237       1.7       jtc 		 */
    238      1.21      yamt 		else if (dobyte) {
    239      1.11       mrg 			if (fstat(fd, &sb)) {
    240      1.27     enami 				warn("%s", name);
    241       1.7       jtc 				rval = 1;
    242       1.7       jtc 			} else {
    243      1.12   mycroft 				if (S_ISREG(sb.st_mode) ||
    244      1.12   mycroft 				    S_ISLNK(sb.st_mode) ||
    245      1.12   mycroft 				    S_ISDIR(sb.st_mode)) {
    246      1.11       mrg 					charct = sb.st_size;
    247       1.9    andrew 				} else {
    248      1.23     enami 					while ((len =
    249      1.23     enami 					    read(fd, buf, MAXBSIZE)) > 0)
    250       1.9    andrew 						charct += len;
    251       1.1       cgd 				}
    252       1.1       cgd 			}
    253       1.1       cgd 		}
    254      1.23     enami 	} else {
    255       1.7       jtc 		/* do it the hard way... */
    256  1.31.4.1  sborrill 		wc_count_t linelen;
    257  1.31.4.1  sborrill                 bool       gotsp;
    258  1.31.4.1  sborrill 
    259  1.31.4.1  sborrill 		linelen = 0;
    260  1.31.4.1  sborrill 		gotsp = true;
    261       1.8       jtc 		while ((len = read(fd, buf, MAXBSIZE)) > 0) {
    262      1.21      yamt 			size_t wlen;
    263      1.21      yamt 
    264      1.23     enami 			r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen,
    265      1.27     enami 			    name);
    266      1.21      yamt 			if (dochar) {
    267      1.21      yamt 				charct += wlen;
    268  1.31.4.1  sborrill 			} else if (dobyte) {
    269      1.21      yamt 				charct += len;
    270  1.31.4.1  sborrill 			}
    271      1.21      yamt 			for (WC = wbuf; wlen--; ++WC) {
    272      1.21      yamt 				if (iswspace(*WC)) {
    273  1.31.4.1  sborrill 					gotsp = true;
    274      1.21      yamt 					if (*WC == L'\n') {
    275       1.7       jtc 						++linect;
    276  1.31.4.1  sborrill 						if (linelen > longest)
    277  1.31.4.1  sborrill 							longest = linelen;
    278  1.31.4.1  sborrill 						linelen = 0;
    279  1.31.4.1  sborrill 					} else {
    280  1.31.4.1  sborrill 						linelen++;
    281       1.7       jtc 					}
    282       1.7       jtc 				} else {
    283       1.7       jtc 					/*
    284       1.7       jtc 					 * This line implements the POSIX
    285       1.7       jtc 					 * spec, i.e. a word is a "maximal
    286       1.7       jtc 					 * string of characters delimited by
    287       1.7       jtc 					 * whitespace."  Notice nothing was
    288       1.7       jtc 					 * said about a character being
    289       1.7       jtc 					 * printing or non-printing.
    290       1.7       jtc 					 */
    291       1.7       jtc 					if (gotsp) {
    292  1.31.4.1  sborrill 						gotsp = false;
    293       1.7       jtc 						++wordct;
    294       1.7       jtc 					}
    295  1.31.4.1  sborrill 
    296  1.31.4.1  sborrill 					linelen++;
    297       1.2       jtc 				}
    298       1.1       cgd 			}
    299       1.2       jtc 		}
    300      1.21      yamt 	}
    301      1.21      yamt 
    302      1.21      yamt 	if (len == -1) {
    303      1.27     enami 		warn("%s", name);
    304      1.21      yamt 		rval = 1;
    305      1.21      yamt 	}
    306      1.21      yamt 	if (dochar && r == (size_t)-2) {
    307      1.27     enami 		warnx("%s: incomplete multibyte character", name);
    308      1.21      yamt 		rval = 1;
    309       1.1       cgd 	}
    310       1.7       jtc 
    311  1.31.4.1  sborrill 	print_counts(linect, wordct, charct, longest, file);
    312       1.8       jtc 
    313      1.23     enami 	/*
    314      1.23     enami 	 * don't bother checkint doline, doword, or dobyte --- speeds
    315      1.23     enami 	 * up the common case
    316      1.23     enami 	 */
    317       1.8       jtc 	tlinect += linect;
    318       1.8       jtc 	twordct += wordct;
    319       1.8       jtc 	tcharct += charct;
    320  1.31.4.1  sborrill 	if (dolongest && longest > tlongest)
    321  1.31.4.1  sborrill 		tlongest = longest;
    322       1.8       jtc 
    323       1.8       jtc 	if (close(fd)) {
    324      1.27     enami 		warn("%s", name);
    325       1.8       jtc 		rval = 1;
    326       1.1       cgd 	}
    327       1.8       jtc }
    328       1.8       jtc 
    329      1.11       mrg static void
    330  1.31.4.1  sborrill print_counts(wc_count_t lines, wc_count_t words, wc_count_t chars,
    331  1.31.4.1  sborrill     wc_count_t longest, const char *name)
    332       1.8       jtc {
    333       1.8       jtc 
    334       1.8       jtc 	if (doline)
    335  1.31.4.1  sborrill 		(void)printf(WCFMT, (WCCAST)lines);
    336       1.8       jtc 	if (doword)
    337  1.31.4.1  sborrill 		(void)printf(WCFMT, (WCCAST)words);
    338      1.21      yamt 	if (dobyte || dochar)
    339  1.31.4.1  sborrill 		(void)printf(WCFMT, (WCCAST)chars);
    340  1.31.4.1  sborrill 	if (dolongest)
    341  1.31.4.1  sborrill 		(void)printf(WCFMT, (WCCAST)longest);
    342       1.7       jtc 
    343  1.31.4.1  sborrill 	if (name != NULL)
    344  1.31.4.1  sborrill 		(void)printf(" %s\n", name);
    345      1.19   mycroft 	else
    346  1.31.4.1  sborrill 		(void)putchar('\n');
    347      1.11       mrg }
    348      1.11       mrg 
    349      1.11       mrg static void
    350      1.30     perry usage(void)
    351      1.11       mrg {
    352      1.23     enami 
    353  1.31.4.1  sborrill 	(void)fprintf(stderr, "usage: wc [-c | -m] [-Llw] [file ...]\n");
    354      1.11       mrg 	exit(1);
    355       1.1       cgd }
    356