1 1.37 christos /* $NetBSD: wc.c,v 1.37 2024/01/14 17:39:19 christos Exp $ */ 2 1.10 tls 3 1.1 cgd /* 4 1.11 mrg * Copyright (c) 1980, 1987, 1991, 1993 5 1.11 mrg * The Regents of the University of California. All rights reserved. 6 1.1 cgd * 7 1.1 cgd * Redistribution and use in source and binary forms, with or without 8 1.1 cgd * modification, are permitted provided that the following conditions 9 1.1 cgd * are met: 10 1.1 cgd * 1. Redistributions of source code must retain the above copyright 11 1.1 cgd * notice, this list of conditions and the following disclaimer. 12 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 cgd * notice, this list of conditions and the following disclaimer in the 14 1.1 cgd * documentation and/or other materials provided with the distribution. 15 1.29 agc * 3. Neither the name of the University nor the names of its contributors 16 1.1 cgd * may be used to endorse or promote products derived from this software 17 1.1 cgd * without specific prior written permission. 18 1.1 cgd * 19 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 1.1 cgd * SUCH DAMAGE. 30 1.1 cgd */ 31 1.1 cgd 32 1.13 mrg #include <sys/cdefs.h> 33 1.1 cgd #ifndef lint 34 1.31 lukem __COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\ 35 1.31 lukem The Regents of the University of California. All rights reserved."); 36 1.1 cgd #endif /* not lint */ 37 1.1 cgd 38 1.1 cgd #ifndef lint 39 1.11 mrg #if 0 40 1.11 mrg static char sccsid[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95"; 41 1.11 mrg #else 42 1.37 christos __RCSID("$NetBSD: wc.c,v 1.37 2024/01/14 17:39:19 christos Exp $"); 43 1.11 mrg #endif 44 1.1 cgd #endif /* not lint */ 45 1.1 cgd 46 1.33 tron /* wc line, word, char count and optionally longest line. */ 47 1.1 cgd 48 1.11 mrg #include <sys/param.h> 49 1.22 enami #include <sys/file.h> 50 1.11 mrg #include <sys/stat.h> 51 1.11 mrg 52 1.22 enami #include <ctype.h> 53 1.11 mrg #include <fcntl.h> 54 1.22 enami #include <err.h> 55 1.11 mrg #include <errno.h> 56 1.22 enami #include <locale.h> 57 1.33 tron #include <stdbool.h> 58 1.1 cgd #include <stdio.h> 59 1.2 jtc #include <stdlib.h> 60 1.2 jtc #include <string.h> 61 1.5 jtc #include <unistd.h> 62 1.21 yamt #include <wchar.h> 63 1.21 yamt #include <wctype.h> 64 1.1 cgd 65 1.17 christos #ifdef NO_QUAD 66 1.17 christos typedef u_long wc_count_t; 67 1.18 mycroft # define WCFMT " %7lu" 68 1.17 christos # define WCCAST unsigned long 69 1.17 christos #else 70 1.17 christos typedef u_quad_t wc_count_t; 71 1.18 mycroft # define WCFMT " %7llu" 72 1.17 christos # define WCCAST unsigned long long 73 1.17 christos #endif 74 1.17 christos 75 1.33 tron static wc_count_t tlinect, twordct, tcharct, tlongest; 76 1.33 tron static bool doline, doword, dobyte, dochar, dolongest; 77 1.14 wsanchez static int rval = 0; 78 1.1 cgd 79 1.32 lukem static void cnt(const char *); 80 1.33 tron static void print_counts(wc_count_t, wc_count_t, wc_count_t, wc_count_t, 81 1.33 tron const char *); 82 1.35 joerg __dead static void usage(void); 83 1.30 perry static size_t do_mb(wchar_t *, const char *, size_t, mbstate_t *, 84 1.30 perry size_t *, const char *); 85 1.11 mrg 86 1.5 jtc int 87 1.30 perry main(int argc, char *argv[]) 88 1.1 cgd { 89 1.13 mrg int ch; 90 1.1 cgd 91 1.7 jtc setlocale(LC_ALL, ""); 92 1.7 jtc 93 1.33 tron while ((ch = getopt(argc, argv, "lwcmL")) != -1) 94 1.24 enami switch (ch) { 95 1.2 jtc case 'l': 96 1.33 tron doline = true; 97 1.2 jtc break; 98 1.2 jtc case 'w': 99 1.33 tron doword = true; 100 1.2 jtc break; 101 1.4 jtc case 'm': 102 1.33 tron dochar = true; 103 1.21 yamt dobyte = 0; 104 1.21 yamt break; 105 1.21 yamt case 'c': 106 1.21 yamt dochar = 0; 107 1.33 tron dobyte = true; 108 1.33 tron break; 109 1.33 tron case 'L': 110 1.33 tron dolongest = true; 111 1.2 jtc break; 112 1.2 jtc case '?': 113 1.2 jtc default: 114 1.11 mrg usage(); 115 1.2 jtc } 116 1.2 jtc argv += optind; 117 1.2 jtc argc -= optind; 118 1.2 jtc 119 1.11 mrg /* Wc's flags are on by default. */ 120 1.33 tron if (!(doline || doword || dobyte || dochar || dolongest)) 121 1.33 tron doline = doword = dobyte = true; 122 1.1 cgd 123 1.33 tron if (*argv == NULL) { 124 1.11 mrg cnt(NULL); 125 1.2 jtc } else { 126 1.33 tron bool dototal = (argc > 1); 127 1.2 jtc 128 1.2 jtc do { 129 1.2 jtc cnt(*argv); 130 1.2 jtc } while(*++argv); 131 1.2 jtc 132 1.33 tron if (dototal) { 133 1.33 tron print_counts(tlinect, twordct, tcharct, tlongest, 134 1.33 tron "total"); 135 1.33 tron } 136 1.1 cgd } 137 1.2 jtc 138 1.6 jtc exit(rval); 139 1.1 cgd } 140 1.1 cgd 141 1.21 yamt static size_t 142 1.32 lukem do_mb(wchar_t *wc, const char *p, size_t len, mbstate_t *st, 143 1.32 lukem size_t *retcnt, const char *file) 144 1.21 yamt { 145 1.21 yamt size_t r; 146 1.21 yamt size_t c = 0; 147 1.21 yamt 148 1.21 yamt do { 149 1.32 lukem r = mbrtowc(wc, p, len, st); 150 1.21 yamt if (r == (size_t)-1) { 151 1.21 yamt warnx("%s: invalid byte sequence", file); 152 1.21 yamt rval = 1; 153 1.21 yamt 154 1.21 yamt /* XXX skip 1 byte */ 155 1.32 lukem len--; 156 1.23 enami p++; 157 1.21 yamt memset(st, 0, sizeof(*st)); 158 1.26 enami continue; 159 1.23 enami } else if (r == (size_t)-2) 160 1.21 yamt break; 161 1.21 yamt else if (r == 0) 162 1.21 yamt r = 1; 163 1.23 enami c++; 164 1.21 yamt if (wc) 165 1.23 enami wc++; 166 1.32 lukem len -= r; 167 1.21 yamt p += r; 168 1.32 lukem } while (len > 0); 169 1.21 yamt 170 1.32 lukem *retcnt = c; 171 1.21 yamt 172 1.24 enami return (r); 173 1.21 yamt } 174 1.23 enami 175 1.5 jtc static void 176 1.32 lukem cnt(const char *file) 177 1.1 cgd { 178 1.24 enami u_char buf[MAXBSIZE]; 179 1.24 enami wchar_t wbuf[MAXBSIZE]; 180 1.24 enami struct stat sb; 181 1.33 tron wc_count_t charct, linect, wordct, longest; 182 1.24 enami mbstate_t st; 183 1.13 mrg u_char *C; 184 1.21 yamt wchar_t *WC; 185 1.32 lukem const char *name; /* filename or <stdin> */ 186 1.21 yamt size_t r = 0; 187 1.34 tron int fd, len = 0; 188 1.1 cgd 189 1.33 tron linect = wordct = charct = longest = 0; 190 1.33 tron if (file != NULL) { 191 1.1 cgd if ((fd = open(file, O_RDONLY, 0)) < 0) { 192 1.11 mrg warn("%s", file); 193 1.6 jtc rval = 1; 194 1.6 jtc return; 195 1.1 cgd } 196 1.27 enami name = file; 197 1.23 enami } else { 198 1.7 jtc fd = STDIN_FILENO; 199 1.27 enami name = "<stdin>"; 200 1.7 jtc } 201 1.21 yamt 202 1.34 tron if (dochar || doword || dolongest) 203 1.34 tron (void)memset(&st, 0, sizeof(st)); 204 1.23 enami 205 1.34 tron if (!(doword || dolongest)) { 206 1.7 jtc /* 207 1.7 jtc * line counting is split out because it's a lot 208 1.7 jtc * faster to get lines than to get words, since 209 1.7 jtc * the word count requires some logic. 210 1.7 jtc */ 211 1.34 tron if (doline || dochar) { 212 1.11 mrg while ((len = read(fd, buf, MAXBSIZE)) > 0) { 213 1.21 yamt if (dochar) { 214 1.21 yamt size_t wlen; 215 1.21 yamt 216 1.23 enami r = do_mb(0, (char *)buf, (size_t)len, 217 1.27 enami &st, &wlen, name); 218 1.21 yamt charct += wlen; 219 1.23 enami } else if (dobyte) 220 1.21 yamt charct += len; 221 1.34 tron if (doline) { 222 1.33 tron for (C = buf; len--; ++C) { 223 1.34 tron if (*C == '\n') 224 1.21 yamt ++linect; 225 1.33 tron } 226 1.33 tron } 227 1.1 cgd } 228 1.7 jtc } 229 1.1 cgd 230 1.7 jtc /* 231 1.7 jtc * if all we need is the number of characters and 232 1.7 jtc * it's a directory or a regular or linked file, just 233 1.7 jtc * stat the puppy. We avoid testing for it not being 234 1.7 jtc * a special device in case someone adds a new type 235 1.7 jtc * of inode. 236 1.7 jtc */ 237 1.21 yamt else if (dobyte) { 238 1.11 mrg if (fstat(fd, &sb)) { 239 1.27 enami warn("%s", name); 240 1.7 jtc rval = 1; 241 1.7 jtc } else { 242 1.37 christos if (sb.st_size != 0 && 243 1.36 christos (S_ISREG(sb.st_mode) || 244 1.12 mycroft S_ISLNK(sb.st_mode) || 245 1.36 christos S_ISDIR(sb.st_mode))) { 246 1.11 mrg charct = sb.st_size; 247 1.9 andrew } else { 248 1.23 enami while ((len = 249 1.23 enami read(fd, buf, MAXBSIZE)) > 0) 250 1.9 andrew charct += len; 251 1.1 cgd } 252 1.1 cgd } 253 1.1 cgd } 254 1.23 enami } else { 255 1.7 jtc /* do it the hard way... */ 256 1.34 tron wc_count_t linelen; 257 1.34 tron bool gotsp; 258 1.33 tron 259 1.34 tron linelen = 0; 260 1.34 tron gotsp = true; 261 1.8 jtc while ((len = read(fd, buf, MAXBSIZE)) > 0) { 262 1.21 yamt size_t wlen; 263 1.21 yamt 264 1.23 enami r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen, 265 1.27 enami name); 266 1.21 yamt if (dochar) { 267 1.21 yamt charct += wlen; 268 1.33 tron } else if (dobyte) { 269 1.21 yamt charct += len; 270 1.33 tron } 271 1.21 yamt for (WC = wbuf; wlen--; ++WC) { 272 1.21 yamt if (iswspace(*WC)) { 273 1.34 tron gotsp = true; 274 1.21 yamt if (*WC == L'\n') { 275 1.7 jtc ++linect; 276 1.34 tron if (linelen > longest) 277 1.34 tron longest = linelen; 278 1.34 tron linelen = 0; 279 1.33 tron } else { 280 1.34 tron linelen++; 281 1.7 jtc } 282 1.7 jtc } else { 283 1.7 jtc /* 284 1.7 jtc * This line implements the POSIX 285 1.7 jtc * spec, i.e. a word is a "maximal 286 1.7 jtc * string of characters delimited by 287 1.7 jtc * whitespace." Notice nothing was 288 1.7 jtc * said about a character being 289 1.7 jtc * printing or non-printing. 290 1.7 jtc */ 291 1.7 jtc if (gotsp) { 292 1.34 tron gotsp = false; 293 1.7 jtc ++wordct; 294 1.7 jtc } 295 1.33 tron 296 1.34 tron linelen++; 297 1.2 jtc } 298 1.1 cgd } 299 1.2 jtc } 300 1.21 yamt } 301 1.21 yamt 302 1.21 yamt if (len == -1) { 303 1.27 enami warn("%s", name); 304 1.21 yamt rval = 1; 305 1.21 yamt } 306 1.21 yamt if (dochar && r == (size_t)-2) { 307 1.27 enami warnx("%s: incomplete multibyte character", name); 308 1.21 yamt rval = 1; 309 1.1 cgd } 310 1.7 jtc 311 1.33 tron print_counts(linect, wordct, charct, longest, file); 312 1.8 jtc 313 1.23 enami /* 314 1.23 enami * don't bother checkint doline, doword, or dobyte --- speeds 315 1.23 enami * up the common case 316 1.23 enami */ 317 1.8 jtc tlinect += linect; 318 1.8 jtc twordct += wordct; 319 1.8 jtc tcharct += charct; 320 1.33 tron if (dolongest && longest > tlongest) 321 1.33 tron tlongest = longest; 322 1.8 jtc 323 1.8 jtc if (close(fd)) { 324 1.27 enami warn("%s", name); 325 1.8 jtc rval = 1; 326 1.1 cgd } 327 1.8 jtc } 328 1.8 jtc 329 1.11 mrg static void 330 1.32 lukem print_counts(wc_count_t lines, wc_count_t words, wc_count_t chars, 331 1.33 tron wc_count_t longest, const char *name) 332 1.8 jtc { 333 1.8 jtc 334 1.8 jtc if (doline) 335 1.33 tron (void)printf(WCFMT, (WCCAST)lines); 336 1.8 jtc if (doword) 337 1.33 tron (void)printf(WCFMT, (WCCAST)words); 338 1.21 yamt if (dobyte || dochar) 339 1.33 tron (void)printf(WCFMT, (WCCAST)chars); 340 1.33 tron if (dolongest) 341 1.33 tron (void)printf(WCFMT, (WCCAST)longest); 342 1.7 jtc 343 1.33 tron if (name != NULL) 344 1.33 tron (void)printf(" %s\n", name); 345 1.19 mycroft else 346 1.33 tron (void)putchar('\n'); 347 1.11 mrg } 348 1.11 mrg 349 1.11 mrg static void 350 1.30 perry usage(void) 351 1.11 mrg { 352 1.23 enami 353 1.33 tron (void)fprintf(stderr, "usage: wc [-c | -m] [-Llw] [file ...]\n"); 354 1.11 mrg exit(1); 355 1.1 cgd } 356