grep.c revision 1.7 1 /* $NetBSD: grep.c,v 1.7 2011/04/18 17:18:03 joerg Exp $ */
2 /* $FreeBSD: head/usr.bin/grep/grep.c 211519 2010-08-19 22:55:17Z delphij $ */
3 /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
4
5 /*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
7 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor (at) FreeBSD.org>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #if HAVE_NBTOOL_CONFIG_H
33 #include "nbtool_config.h"
34 #endif
35
36 #include <sys/cdefs.h>
37 __RCSID("$NetBSD: grep.c,v 1.7 2011/04/18 17:18:03 joerg Exp $");
38
39 #include <sys/stat.h>
40 #include <sys/types.h>
41
42 #include <ctype.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <getopt.h>
46 #include <limits.h>
47 #include <libgen.h>
48 #include <locale.h>
49 #include <stdbool.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <unistd.h>
54
55 #include "grep.h"
56
57 #ifndef WITHOUT_NLS
58 #include <nl_types.h>
59 nl_catd catalog;
60 #endif
61
62 /*
63 * Default messags to use when NLS is disabled or no catalogue
64 * is found.
65 */
66 const char *errstr[] = {
67 "",
68 /* 1*/ "(standard input)",
69 /* 2*/ "cannot read bzip2 compressed file",
70 /* 3*/ "unknown %s option",
71 /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
72 /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
73 /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
74 /* 7*/ "\t[--null] [pattern] [file ...]\n",
75 /* 8*/ "Binary file %s matches\n",
76 /* 9*/ "%s (BSD grep) %s\n",
77 };
78
79 /* Flags passed to regcomp() and regexec() */
80 int cflags = 0;
81 int eflags = REG_STARTEND;
82
83 /* Shortcut for matching all cases like empty regex */
84 bool matchall;
85
86 /* Searching patterns */
87 unsigned int patterns, pattern_sz;
88 char **pattern;
89 regex_t *r_pattern;
90 fastgrep_t *fg_pattern;
91
92 /* Filename exclusion/inclusion patterns */
93 unsigned int fpatterns, fpattern_sz;
94 unsigned int dpatterns, dpattern_sz;
95 struct epat *dpattern, *fpattern;
96
97 /* For regex errors */
98 char re_error[RE_ERROR_BUF + 1];
99
100 /* Command-line flags */
101 unsigned long long Aflag; /* -A x: print x lines trailing each match */
102 unsigned long long Bflag; /* -B x: print x lines leading each match */
103 bool Hflag; /* -H: always print file name */
104 bool Lflag; /* -L: only show names of files with no matches */
105 bool bflag; /* -b: show block numbers for each match */
106 bool cflag; /* -c: only show a count of matching lines */
107 bool hflag; /* -h: don't print filename headers */
108 bool iflag; /* -i: ignore case */
109 bool lflag; /* -l: only show names of files with matches */
110 bool mflag; /* -m x: stop reading the files after x matches */
111 unsigned long long mcount; /* count for -m */
112 bool nflag; /* -n: show line numbers in front of matching lines */
113 bool oflag; /* -o: print only matching part */
114 bool qflag; /* -q: quiet mode (don't output anything) */
115 bool sflag; /* -s: silent mode (ignore errors) */
116 bool vflag; /* -v: only show non-matching lines */
117 bool wflag; /* -w: pattern must start and end on word boundaries */
118 bool xflag; /* -x: pattern must match entire line */
119 bool lbflag; /* --line-buffered */
120 bool nullflag; /* --null */
121 char *label; /* --label */
122 const char *color; /* --color */
123 int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
124 int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
125 int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
126 int devbehave = DEV_READ; /* -D: handling of devices */
127 int dirbehave = DIR_READ; /* -dRr: handling of directories */
128 int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
129
130 bool dexclude, dinclude; /* --exclude-dir and --include-dir */
131 bool fexclude, finclude; /* --exclude and --include */
132
133 enum {
134 BIN_OPT = CHAR_MAX + 1,
135 COLOR_OPT,
136 HELP_OPT,
137 MMAP_OPT,
138 LINEBUF_OPT,
139 LABEL_OPT,
140 NULL_OPT,
141 R_EXCLUDE_OPT,
142 R_INCLUDE_OPT,
143 R_DEXCLUDE_OPT,
144 R_DINCLUDE_OPT
145 };
146
147 static inline const char *init_color(const char *);
148
149 /* Housekeeping */
150 int tail; /* lines left to print */
151 bool notfound; /* file not found */
152
153 extern char *__progname;
154
155 /*
156 * Prints usage information and returns 2.
157 */
158 static void
159 usage(void)
160 {
161 fprintf(stderr, getstr(4), __progname);
162 fprintf(stderr, "%s", getstr(5));
163 fprintf(stderr, "%s", getstr(5));
164 fprintf(stderr, "%s", getstr(6));
165 fprintf(stderr, "%s", getstr(7));
166 exit(2);
167 }
168
169 static const char *optstr = "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxy";
170
171 struct option long_options[] =
172 {
173 {"binary-files", required_argument, NULL, BIN_OPT},
174 {"help", no_argument, NULL, HELP_OPT},
175 {"mmap", no_argument, NULL, MMAP_OPT},
176 {"line-buffered", no_argument, NULL, LINEBUF_OPT},
177 {"label", required_argument, NULL, LABEL_OPT},
178 {"null", no_argument, NULL, NULL_OPT},
179 {"color", optional_argument, NULL, COLOR_OPT},
180 {"colour", optional_argument, NULL, COLOR_OPT},
181 {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
182 {"include", required_argument, NULL, R_INCLUDE_OPT},
183 {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
184 {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
185 {"after-context", required_argument, NULL, 'A'},
186 {"text", no_argument, NULL, 'a'},
187 {"before-context", required_argument, NULL, 'B'},
188 {"byte-offset", no_argument, NULL, 'b'},
189 {"context", optional_argument, NULL, 'C'},
190 {"count", no_argument, NULL, 'c'},
191 {"devices", required_argument, NULL, 'D'},
192 {"directories", required_argument, NULL, 'd'},
193 {"extended-regexp", no_argument, NULL, 'E'},
194 {"regexp", required_argument, NULL, 'e'},
195 {"fixed-strings", no_argument, NULL, 'F'},
196 {"file", required_argument, NULL, 'f'},
197 {"basic-regexp", no_argument, NULL, 'G'},
198 {"no-filename", no_argument, NULL, 'h'},
199 {"with-filename", no_argument, NULL, 'H'},
200 {"ignore-case", no_argument, NULL, 'i'},
201 {"bz2decompress", no_argument, NULL, 'J'},
202 {"files-with-matches", no_argument, NULL, 'l'},
203 {"files-without-match", no_argument, NULL, 'L'},
204 {"max-count", required_argument, NULL, 'm'},
205 {"line-number", no_argument, NULL, 'n'},
206 {"only-matching", no_argument, NULL, 'o'},
207 {"quiet", no_argument, NULL, 'q'},
208 {"silent", no_argument, NULL, 'q'},
209 {"recursive", no_argument, NULL, 'r'},
210 {"no-messages", no_argument, NULL, 's'},
211 {"binary", no_argument, NULL, 'U'},
212 {"unix-byte-offsets", no_argument, NULL, 'u'},
213 {"invert-match", no_argument, NULL, 'v'},
214 {"version", no_argument, NULL, 'V'},
215 {"word-regexp", no_argument, NULL, 'w'},
216 {"line-regexp", no_argument, NULL, 'x'},
217 {"decompress", no_argument, NULL, 'Z'},
218 {NULL, no_argument, NULL, 0}
219 };
220
221 /*
222 * Adds a searching pattern to the internal array.
223 */
224 static void
225 add_pattern(char *pat, size_t len)
226 {
227
228 /* Check if we can do a shortcut */
229 if (len == 0 || matchall) {
230 matchall = true;
231 return;
232 }
233 /* Increase size if necessary */
234 if (patterns == pattern_sz) {
235 pattern_sz *= 2;
236 pattern = grep_realloc(pattern, ++pattern_sz *
237 sizeof(*pattern));
238 }
239 if (len > 0 && pat[len - 1] == '\n')
240 --len;
241 /* pat may not be NUL-terminated */
242 pattern[patterns] = grep_malloc(len + 1);
243 memcpy(pattern[patterns], pat, len);
244 pattern[patterns][len] = '\0';
245 ++patterns;
246 }
247
248 /*
249 * Adds a file include/exclude pattern to the internal array.
250 */
251 static void
252 add_fpattern(const char *pat, int mode)
253 {
254
255 /* Increase size if necessary */
256 if (fpatterns == fpattern_sz) {
257 fpattern_sz *= 2;
258 fpattern = grep_realloc(fpattern, ++fpattern_sz *
259 sizeof(struct epat));
260 }
261 fpattern[fpatterns].pat = grep_strdup(pat);
262 fpattern[fpatterns].mode = mode;
263 ++fpatterns;
264 }
265
266 /*
267 * Adds a directory include/exclude pattern to the internal array.
268 */
269 static void
270 add_dpattern(const char *pat, int mode)
271 {
272
273 /* Increase size if necessary */
274 if (dpatterns == dpattern_sz) {
275 dpattern_sz *= 2;
276 dpattern = grep_realloc(dpattern, ++dpattern_sz *
277 sizeof(struct epat));
278 }
279 dpattern[dpatterns].pat = grep_strdup(pat);
280 dpattern[dpatterns].mode = mode;
281 ++dpatterns;
282 }
283
284 /*
285 * Reads searching patterns from a file and adds them with add_pattern().
286 */
287 static void
288 read_patterns(const char *fn)
289 {
290 FILE *f;
291 char *line;
292 size_t len;
293 ssize_t rlen;
294
295 if ((f = fopen(fn, "r")) == NULL)
296 err(2, "%s", fn);
297 line = NULL;
298 len = 0;
299 while ((rlen = getline(&line, &len, f)) != -1)
300 add_pattern(line, *line == '\n' ? 0 : (size_t)rlen);
301 free(line);
302 if (ferror(f))
303 err(2, "%s", fn);
304 fclose(f);
305 }
306
307 static inline const char *
308 init_color(const char *d)
309 {
310 char *c;
311
312 c = getenv("GREP_COLOR");
313 return (c != NULL ? c : d);
314 }
315
316 int
317 main(int argc, char *argv[])
318 {
319 char **aargv, **eargv, *eopts;
320 char *ep;
321 unsigned long long l;
322 unsigned int aargc, eargc, i;
323 int c, lastc, needpattern, newarg, prevoptind;
324
325 setlocale(LC_ALL, "");
326
327 #ifndef WITHOUT_NLS
328 catalog = catopen("grep", NL_CAT_LOCALE);
329 #endif
330
331 /* Check what is the program name of the binary. In this
332 way we can have all the funcionalities in one binary
333 without the need of scripting and using ugly hacks. */
334 switch (__progname[0]) {
335 case 'e':
336 grepbehave = GREP_EXTENDED;
337 break;
338 case 'f':
339 grepbehave = GREP_FIXED;
340 break;
341 case 'g':
342 grepbehave = GREP_BASIC;
343 break;
344 case 'z':
345 filebehave = FILE_GZIP;
346 switch(__progname[1]) {
347 case 'e':
348 grepbehave = GREP_EXTENDED;
349 break;
350 case 'f':
351 grepbehave = GREP_FIXED;
352 break;
353 case 'g':
354 grepbehave = GREP_BASIC;
355 break;
356 }
357 break;
358 }
359
360 lastc = '\0';
361 newarg = 1;
362 prevoptind = 1;
363 needpattern = 1;
364
365 eopts = getenv("GREP_OPTIONS");
366
367 /* support for extra arguments in GREP_OPTIONS */
368 eargc = 0;
369 if (eopts != NULL) {
370 char *str;
371
372 /* make an estimation of how many extra arguments we have */
373 for (unsigned int j = 0; j < strlen(eopts); j++)
374 if (eopts[j] == ' ')
375 eargc++;
376
377 eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
378
379 eargc = 0;
380 /* parse extra arguments */
381 while ((str = strsep(&eopts, " ")) != NULL)
382 eargv[eargc++] = grep_strdup(str);
383
384 aargv = (char **)grep_calloc(eargc + argc + 1,
385 sizeof(char *));
386
387 aargv[0] = argv[0];
388 for (i = 0; i < eargc; i++)
389 aargv[i + 1] = eargv[i];
390 for (int j = 1; j < argc; j++, i++)
391 aargv[i + 1] = argv[j];
392
393 aargc = eargc + argc;
394 } else {
395 aargv = argv;
396 aargc = argc;
397 }
398
399 while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
400 -1)) {
401 switch (c) {
402 case '0': case '1': case '2': case '3': case '4':
403 case '5': case '6': case '7': case '8': case '9':
404 if (newarg || !isdigit(lastc))
405 Aflag = 0;
406 else if (Aflag > LLONG_MAX / 10) {
407 errno = ERANGE;
408 err(2, NULL);
409 }
410 Aflag = Bflag = (Aflag * 10) + (c - '0');
411 break;
412 case 'C':
413 if (optarg == NULL) {
414 Aflag = Bflag = 2;
415 break;
416 }
417 /* FALLTHROUGH */
418 case 'A':
419 /* FALLTHROUGH */
420 case 'B':
421 errno = 0;
422 l = strtoull(optarg, &ep, 10);
423 if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
424 ((errno == EINVAL) && (l == 0)))
425 err(2, NULL);
426 else if (ep[0] != '\0') {
427 errno = EINVAL;
428 err(2, NULL);
429 }
430 if (c == 'A')
431 Aflag = l;
432 else if (c == 'B')
433 Bflag = l;
434 else
435 Aflag = Bflag = l;
436 break;
437 case 'a':
438 binbehave = BINFILE_TEXT;
439 break;
440 case 'b':
441 bflag = true;
442 break;
443 case 'c':
444 cflag = true;
445 break;
446 case 'D':
447 if (strcasecmp(optarg, "skip") == 0)
448 devbehave = DEV_SKIP;
449 else if (strcasecmp(optarg, "read") == 0)
450 devbehave = DEV_READ;
451 else
452 errx(2, getstr(3), "--devices");
453 break;
454 case 'd':
455 if (strcasecmp("recurse", optarg) == 0) {
456 Hflag = true;
457 dirbehave = DIR_RECURSE;
458 } else if (strcasecmp("skip", optarg) == 0)
459 dirbehave = DIR_SKIP;
460 else if (strcasecmp("read", optarg) == 0)
461 dirbehave = DIR_READ;
462 else
463 errx(2, getstr(3), "--directories");
464 break;
465 case 'E':
466 grepbehave = GREP_EXTENDED;
467 break;
468 case 'e':
469 add_pattern(optarg, strlen(optarg));
470 needpattern = 0;
471 break;
472 case 'F':
473 grepbehave = GREP_FIXED;
474 break;
475 case 'f':
476 read_patterns(optarg);
477 needpattern = 0;
478 break;
479 case 'G':
480 grepbehave = GREP_BASIC;
481 break;
482 case 'H':
483 Hflag = true;
484 break;
485 case 'h':
486 Hflag = false;
487 hflag = true;
488 break;
489 case 'I':
490 binbehave = BINFILE_SKIP;
491 break;
492 case 'i':
493 case 'y':
494 iflag = true;
495 cflags |= REG_ICASE;
496 break;
497 case 'J':
498 filebehave = FILE_BZIP;
499 break;
500 case 'L':
501 lflag = false;
502 Lflag = true;
503 break;
504 case 'l':
505 Lflag = false;
506 lflag = true;
507 break;
508 case 'm':
509 mflag = true;
510 errno = 0;
511 mcount = strtoull(optarg, &ep, 10);
512 if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
513 ((errno == EINVAL) && (mcount == 0)))
514 err(2, NULL);
515 else if (ep[0] != '\0') {
516 errno = EINVAL;
517 err(2, NULL);
518 }
519 break;
520 case 'n':
521 nflag = true;
522 break;
523 case 'O':
524 linkbehave = LINK_EXPLICIT;
525 break;
526 case 'o':
527 oflag = true;
528 break;
529 case 'p':
530 linkbehave = LINK_SKIP;
531 break;
532 case 'q':
533 qflag = true;
534 break;
535 case 'S':
536 linkbehave = LINK_READ;
537 break;
538 case 'R':
539 case 'r':
540 dirbehave = DIR_RECURSE;
541 Hflag = true;
542 break;
543 case 's':
544 sflag = true;
545 break;
546 case 'U':
547 binbehave = BINFILE_BIN;
548 break;
549 case 'u':
550 case MMAP_OPT:
551 /* noop, compatibility */
552 break;
553 case 'V':
554 printf(getstr(9), __progname, VERSION);
555 exit(0);
556 case 'v':
557 vflag = true;
558 break;
559 case 'w':
560 wflag = true;
561 break;
562 case 'x':
563 xflag = true;
564 break;
565 case 'Z':
566 filebehave = FILE_GZIP;
567 break;
568 case BIN_OPT:
569 if (strcasecmp("binary", optarg) == 0)
570 binbehave = BINFILE_BIN;
571 else if (strcasecmp("without-match", optarg) == 0)
572 binbehave = BINFILE_SKIP;
573 else if (strcasecmp("text", optarg) == 0)
574 binbehave = BINFILE_TEXT;
575 else
576 errx(2, getstr(3), "--binary-files");
577 break;
578 case COLOR_OPT:
579 color = NULL;
580 if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
581 strcasecmp("tty", optarg) == 0 ||
582 strcasecmp("if-tty", optarg) == 0) {
583 char *term;
584
585 term = getenv("TERM");
586 if (isatty(STDOUT_FILENO) && term != NULL &&
587 strcasecmp(term, "dumb") != 0)
588 color = init_color("01;31");
589 } else if (strcasecmp("always", optarg) == 0 ||
590 strcasecmp("yes", optarg) == 0 ||
591 strcasecmp("force", optarg) == 0) {
592 color = init_color("01;31");
593 } else if (strcasecmp("never", optarg) != 0 &&
594 strcasecmp("none", optarg) != 0 &&
595 strcasecmp("no", optarg) != 0)
596 errx(2, getstr(3), "--color");
597 break;
598 case LABEL_OPT:
599 label = optarg;
600 break;
601 case LINEBUF_OPT:
602 lbflag = true;
603 break;
604 case NULL_OPT:
605 nullflag = true;
606 break;
607 case R_INCLUDE_OPT:
608 finclude = true;
609 add_fpattern(optarg, INCL_PAT);
610 break;
611 case R_EXCLUDE_OPT:
612 fexclude = true;
613 add_fpattern(optarg, EXCL_PAT);
614 break;
615 case R_DINCLUDE_OPT:
616 dinclude = true;
617 add_dpattern(optarg, INCL_PAT);
618 break;
619 case R_DEXCLUDE_OPT:
620 dexclude = true;
621 add_dpattern(optarg, EXCL_PAT);
622 break;
623 case HELP_OPT:
624 default:
625 usage();
626 }
627 lastc = c;
628 newarg = optind != prevoptind;
629 prevoptind = optind;
630 }
631 aargc -= optind;
632 aargv += optind;
633
634 /* Fail if we don't have any pattern */
635 if (aargc == 0 && needpattern)
636 usage();
637
638 /* Process patterns from command line */
639 if (aargc != 0 && needpattern) {
640 add_pattern(*aargv, strlen(*aargv));
641 --aargc;
642 ++aargv;
643 }
644
645 switch (grepbehave) {
646 case GREP_FIXED:
647 case GREP_BASIC:
648 break;
649 case GREP_EXTENDED:
650 cflags |= REG_EXTENDED;
651 break;
652 default:
653 /* NOTREACHED */
654 usage();
655 }
656
657 fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
658 r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
659 /*
660 * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
661 * Optimizations should be done there.
662 */
663 /* Check if cheating is allowed (always is for fgrep). */
664 if (grepbehave == GREP_FIXED) {
665 for (i = 0; i < patterns; ++i)
666 fgrepcomp(&fg_pattern[i], pattern[i]);
667 } else {
668 for (i = 0; i < patterns; ++i) {
669 if (fastcomp(&fg_pattern[i], pattern[i])) {
670 /* Fall back to full regex library */
671 c = regcomp(&r_pattern[i], pattern[i], cflags);
672 if (c != 0) {
673 regerror(c, &r_pattern[i], re_error,
674 RE_ERROR_BUF);
675 errx(2, "%s", re_error);
676 }
677 }
678 }
679 }
680
681 if (lbflag)
682 setlinebuf(stdout);
683
684 if ((aargc == 0 || aargc == 1) && !Hflag)
685 hflag = true;
686
687 if (aargc == 0)
688 exit(!procfile("-"));
689
690 if (dirbehave == DIR_RECURSE)
691 c = grep_tree(aargv);
692 else
693 for (c = 0; aargc--; ++aargv) {
694 if ((finclude || fexclude) && !file_matching(*aargv))
695 continue;
696 c+= procfile(*aargv);
697 }
698
699 #ifndef WITHOUT_NLS
700 catclose(catalog);
701 #endif
702
703 /* Find out the correct return value according to the
704 results and the command line option. */
705 exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1));
706 }
707