Home | History | Annotate | Line # | Download | only in wc
      1 /*	$NetBSD: wc.c,v 1.37 2024/01/14 17:39:19 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1980, 1987, 1991, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 #ifndef lint
     34 __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\
     35  The Regents of the University of California.  All rights reserved.");
     36 #endif /* not lint */
     37 
     38 #ifndef lint
     39 #if 0
     40 static char sccsid[] = "@(#)wc.c	8.2 (Berkeley) 5/2/95";
     41 #else
     42 __RCSID("$NetBSD: wc.c,v 1.37 2024/01/14 17:39:19 christos Exp $");
     43 #endif
     44 #endif /* not lint */
     45 
     46 /* wc line, word, char count and optionally longest line. */
     47 
     48 #include <sys/param.h>
     49 #include <sys/file.h>
     50 #include <sys/stat.h>
     51 
     52 #include <ctype.h>
     53 #include <fcntl.h>
     54 #include <err.h>
     55 #include <errno.h>
     56 #include <locale.h>
     57 #include <stdbool.h>
     58 #include <stdio.h>
     59 #include <stdlib.h>
     60 #include <string.h>
     61 #include <unistd.h>
     62 #include <wchar.h>
     63 #include <wctype.h>
     64 
     65 #ifdef NO_QUAD
     66 typedef u_long wc_count_t;
     67 # define WCFMT	" %7lu"
     68 # define WCCAST unsigned long
     69 #else
     70 typedef u_quad_t wc_count_t;
     71 # define WCFMT	" %7llu"
     72 # define WCCAST	unsigned long long
     73 #endif
     74 
     75 static wc_count_t	tlinect, twordct, tcharct, tlongest;
     76 static bool		doline, doword, dobyte, dochar, dolongest;
     77 static int 		rval = 0;
     78 
     79 static void	cnt(const char *);
     80 static void	print_counts(wc_count_t, wc_count_t, wc_count_t, wc_count_t,
     81 		    const char *);
     82 __dead static void	usage(void);
     83 static size_t	do_mb(wchar_t *, const char *, size_t, mbstate_t *,
     84 		    size_t *, const char *);
     85 
     86 int
     87 main(int argc, char *argv[])
     88 {
     89 	int ch;
     90 
     91 	setlocale(LC_ALL, "");
     92 
     93 	while ((ch = getopt(argc, argv, "lwcmL")) != -1)
     94 		switch (ch) {
     95 		case 'l':
     96 			doline = true;
     97 			break;
     98 		case 'w':
     99 			doword = true;
    100 			break;
    101 		case 'm':
    102 			dochar = true;
    103 			dobyte = 0;
    104 			break;
    105 		case 'c':
    106 			dochar = 0;
    107 			dobyte = true;
    108 			break;
    109 		case 'L':
    110 			dolongest = true;
    111 			break;
    112 		case '?':
    113 		default:
    114 			usage();
    115 		}
    116 	argv += optind;
    117 	argc -= optind;
    118 
    119 	/* Wc's flags are on by default. */
    120 	if (!(doline || doword || dobyte || dochar || dolongest))
    121 		doline = doword = dobyte = true;
    122 
    123 	if (*argv == NULL) {
    124 		cnt(NULL);
    125 	} else {
    126 		bool dototal = (argc > 1);
    127 
    128 		do {
    129 			cnt(*argv);
    130 		} while(*++argv);
    131 
    132 		if (dototal) {
    133 			print_counts(tlinect, twordct, tcharct, tlongest,
    134 			    "total");
    135 		}
    136 	}
    137 
    138 	exit(rval);
    139 }
    140 
    141 static size_t
    142 do_mb(wchar_t *wc, const char *p, size_t len, mbstate_t *st,
    143     size_t *retcnt, const char *file)
    144 {
    145 	size_t r;
    146 	size_t c = 0;
    147 
    148 	do {
    149 		r = mbrtowc(wc, p, len, st);
    150 		if (r == (size_t)-1) {
    151 			warnx("%s: invalid byte sequence", file);
    152 			rval = 1;
    153 
    154 			/* XXX skip 1 byte */
    155 			len--;
    156 			p++;
    157 			memset(st, 0, sizeof(*st));
    158 			continue;
    159 		} else if (r == (size_t)-2)
    160 			break;
    161 		else if (r == 0)
    162 			r = 1;
    163 		c++;
    164 		if (wc)
    165 			wc++;
    166 		len -= r;
    167 		p += r;
    168 	} while (len > 0);
    169 
    170 	*retcnt = c;
    171 
    172 	return (r);
    173 }
    174 
    175 static void
    176 cnt(const char *file)
    177 {
    178 	u_char buf[MAXBSIZE];
    179 	wchar_t wbuf[MAXBSIZE];
    180 	struct stat sb;
    181 	wc_count_t charct, linect, wordct, longest;
    182 	mbstate_t st;
    183 	u_char *C;
    184 	wchar_t *WC;
    185 	const char *name;			/* filename or <stdin> */
    186 	size_t r = 0;
    187 	int fd, len = 0;
    188 
    189 	linect = wordct = charct = longest = 0;
    190 	if (file != NULL) {
    191 		if ((fd = open(file, O_RDONLY, 0)) < 0) {
    192 			warn("%s", file);
    193 			rval = 1;
    194 			return;
    195 		}
    196 		name = file;
    197 	} else {
    198 		fd = STDIN_FILENO;
    199 		name = "<stdin>";
    200 	}
    201 
    202 	if (dochar || doword || dolongest)
    203 		(void)memset(&st, 0, sizeof(st));
    204 
    205 	if (!(doword || dolongest)) {
    206 		/*
    207 		 * line counting is split out because it's a lot
    208 		 * faster to get lines than to get words, since
    209 		 * the word count requires some logic.
    210 		 */
    211 		if (doline || dochar) {
    212 			while ((len = read(fd, buf, MAXBSIZE)) > 0) {
    213 				if (dochar) {
    214 					size_t wlen;
    215 
    216 					r = do_mb(0, (char *)buf, (size_t)len,
    217 					    &st, &wlen, name);
    218 					charct += wlen;
    219 				} else if (dobyte)
    220 					charct += len;
    221 				if (doline) {
    222 					for (C = buf; len--; ++C) {
    223 						if (*C == '\n')
    224 							++linect;
    225 					}
    226 				}
    227 			}
    228 		}
    229 
    230 		/*
    231 		 * if all we need is the number of characters and
    232 		 * it's a directory or a regular or linked file, just
    233 		 * stat the puppy.  We avoid testing for it not being
    234 		 * a special device in case someone adds a new type
    235 		 * of inode.
    236 		 */
    237 		else if (dobyte) {
    238 			if (fstat(fd, &sb)) {
    239 				warn("%s", name);
    240 				rval = 1;
    241 			} else {
    242 				if (sb.st_size != 0 &&
    243 				    (S_ISREG(sb.st_mode) ||
    244 				    S_ISLNK(sb.st_mode) ||
    245 				    S_ISDIR(sb.st_mode))) {
    246 					charct = sb.st_size;
    247 				} else {
    248 					while ((len =
    249 					    read(fd, buf, MAXBSIZE)) > 0)
    250 						charct += len;
    251 				}
    252 			}
    253 		}
    254 	} else {
    255 		/* do it the hard way... */
    256 		wc_count_t linelen;
    257                 bool       gotsp;
    258 
    259 		linelen = 0;
    260 		gotsp = true;
    261 		while ((len = read(fd, buf, MAXBSIZE)) > 0) {
    262 			size_t wlen;
    263 
    264 			r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen,
    265 			    name);
    266 			if (dochar) {
    267 				charct += wlen;
    268 			} else if (dobyte) {
    269 				charct += len;
    270 			}
    271 			for (WC = wbuf; wlen--; ++WC) {
    272 				if (iswspace(*WC)) {
    273 					gotsp = true;
    274 					if (*WC == L'\n') {
    275 						++linect;
    276 						if (linelen > longest)
    277 							longest = linelen;
    278 						linelen = 0;
    279 					} else {
    280 						linelen++;
    281 					}
    282 				} else {
    283 					/*
    284 					 * This line implements the POSIX
    285 					 * spec, i.e. a word is a "maximal
    286 					 * string of characters delimited by
    287 					 * whitespace."  Notice nothing was
    288 					 * said about a character being
    289 					 * printing or non-printing.
    290 					 */
    291 					if (gotsp) {
    292 						gotsp = false;
    293 						++wordct;
    294 					}
    295 
    296 					linelen++;
    297 				}
    298 			}
    299 		}
    300 	}
    301 
    302 	if (len == -1) {
    303 		warn("%s", name);
    304 		rval = 1;
    305 	}
    306 	if (dochar && r == (size_t)-2) {
    307 		warnx("%s: incomplete multibyte character", name);
    308 		rval = 1;
    309 	}
    310 
    311 	print_counts(linect, wordct, charct, longest, file);
    312 
    313 	/*
    314 	 * don't bother checkint doline, doword, or dobyte --- speeds
    315 	 * up the common case
    316 	 */
    317 	tlinect += linect;
    318 	twordct += wordct;
    319 	tcharct += charct;
    320 	if (dolongest && longest > tlongest)
    321 		tlongest = longest;
    322 
    323 	if (close(fd)) {
    324 		warn("%s", name);
    325 		rval = 1;
    326 	}
    327 }
    328 
    329 static void
    330 print_counts(wc_count_t lines, wc_count_t words, wc_count_t chars,
    331     wc_count_t longest, const char *name)
    332 {
    333 
    334 	if (doline)
    335 		(void)printf(WCFMT, (WCCAST)lines);
    336 	if (doword)
    337 		(void)printf(WCFMT, (WCCAST)words);
    338 	if (dobyte || dochar)
    339 		(void)printf(WCFMT, (WCCAST)chars);
    340 	if (dolongest)
    341 		(void)printf(WCFMT, (WCCAST)longest);
    342 
    343 	if (name != NULL)
    344 		(void)printf(" %s\n", name);
    345 	else
    346 		(void)putchar('\n');
    347 }
    348 
    349 static void
    350 usage(void)
    351 {
    352 
    353 	(void)fprintf(stderr, "usage: wc [-c | -m] [-Llw] [file ...]\n");
    354 	exit(1);
    355 }
    356