util.c revision 1.12 1 /* $NetBSD: util.c,v 1.12 2011/04/18 22:46:48 joerg Exp $ */
2 /* $FreeBSD: head/usr.bin/grep/util.c 211496 2010-08-19 09:28:59Z des $ */
3 /* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */
4
5 /*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #if HAVE_NBTOOL_CONFIG_H
33 #include "nbtool_config.h"
34 #endif
35
36 #include <sys/cdefs.h>
37 __RCSID("$NetBSD: util.c,v 1.12 2011/04/18 22:46:48 joerg Exp $");
38
39 #include <sys/stat.h>
40 #include <sys/types.h>
41
42 #include <ctype.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <fnmatch.h>
46 #include <fts.h>
47 #include <libgen.h>
48 #include <stdbool.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <unistd.h>
53 #include <wchar.h>
54 #include <wctype.h>
55
56 #include "grep.h"
57
58 static bool first, first_global = true;
59 static unsigned long long since_printed;
60
61 static int procline(struct str *l, int);
62
63 bool
64 file_matching(const char *fname)
65 {
66 char *fname_base, *fname_copy;
67 bool ret;
68
69 ret = finclude ? false : true;
70 fname_copy = grep_strdup(fname);
71 fname_base = basename(fname_copy);
72
73 for (unsigned int i = 0; i < fpatterns; ++i) {
74 if (fnmatch(fpattern[i].pat, fname, 0) == 0 ||
75 fnmatch(fpattern[i].pat, fname_base, 0) == 0) {
76 if (fpattern[i].mode == EXCL_PAT)
77 return (false);
78 else
79 ret = true;
80 }
81 }
82 free(fname_copy);
83 return (ret);
84 }
85
86 static inline bool
87 dir_matching(const char *dname)
88 {
89 bool ret;
90
91 ret = dinclude ? false : true;
92
93 for (unsigned int i = 0; i < dpatterns; ++i) {
94 if (dname != NULL &&
95 fnmatch(dname, dpattern[i].pat, 0) == 0) {
96 if (dpattern[i].mode == EXCL_PAT)
97 return (false);
98 else
99 ret = true;
100 }
101 }
102 return (ret);
103 }
104
105 /*
106 * Processes a directory when a recursive search is performed with
107 * the -R option. Each appropriate file is passed to procfile().
108 */
109 int
110 grep_tree(char **argv)
111 {
112 FTS *fts;
113 FTSENT *p;
114 char *d, *dir = NULL;
115 int c, fts_flags;
116 bool ok;
117
118 c = fts_flags = 0;
119
120 switch(linkbehave) {
121 case LINK_EXPLICIT:
122 fts_flags = FTS_COMFOLLOW;
123 break;
124 case LINK_SKIP:
125 fts_flags = FTS_PHYSICAL;
126 break;
127 default:
128 fts_flags = FTS_LOGICAL;
129
130 }
131
132 fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
133
134 if (!(fts = fts_open(argv, fts_flags, NULL)))
135 err(2, "fts_open");
136 while ((p = fts_read(fts)) != NULL) {
137 switch (p->fts_info) {
138 case FTS_DNR:
139 /* FALLTHROUGH */
140 case FTS_ERR:
141 errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
142 break;
143 case FTS_D:
144 /* FALLTHROUGH */
145 case FTS_DP:
146 break;
147 case FTS_DC:
148 /* Print a warning for recursive directory loop */
149 warnx("warning: %s: recursive directory loop",
150 p->fts_path);
151 break;
152 default:
153 /* Check for file exclusion/inclusion */
154 ok = true;
155 if (dexclude || dinclude) {
156 if ((d = strrchr(p->fts_path, '/')) != NULL) {
157 dir = grep_malloc(sizeof(char) *
158 (d - p->fts_path + 1));
159 memcpy(dir, p->fts_path,
160 d - p->fts_path);
161 dir[d - p->fts_path] = '\0';
162 }
163 ok = dir_matching(dir);
164 free(dir);
165 dir = NULL;
166 }
167 if (fexclude || finclude)
168 ok &= file_matching(p->fts_path);
169
170 if (ok)
171 c += procfile(p->fts_path);
172 break;
173 }
174 }
175
176 fts_close(fts);
177 return (c);
178 }
179
180 /*
181 * Opens a file and processes it. Each file is processed line-by-line
182 * passing the lines to procline().
183 */
184 int
185 procfile(const char *fn)
186 {
187 struct file *f;
188 struct stat sb;
189 struct str ln;
190 mode_t s;
191 int c, t;
192
193 if (mflag && (mcount <= 0))
194 return (0);
195
196 if (strcmp(fn, "-") == 0) {
197 fn = label != NULL ? label : getstr(1);
198 f = grep_open(NULL);
199 } else {
200 if (!stat(fn, &sb)) {
201 /* Check if we need to process the file */
202 s = sb.st_mode & S_IFMT;
203 if (s == S_IFDIR && dirbehave == DIR_SKIP)
204 return (0);
205 if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
206 || s == S_IFSOCK) && devbehave == DEV_SKIP)
207 return (0);
208 }
209 f = grep_open(fn);
210 }
211 if (f == NULL) {
212 if (!sflag)
213 warn("%s", fn);
214 if (errno == ENOENT)
215 notfound = true;
216 return (0);
217 }
218
219 ln.file = grep_malloc(strlen(fn) + 1);
220 strcpy(ln.file, fn);
221 ln.line_no = 0;
222 ln.len = 0;
223 tail = 0;
224 ln.off = -1;
225
226 for (first = true, c = 0; c == 0 || !(lflag || qflag); ) {
227 ln.off += ln.len + 1;
228 if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) {
229 if (ln.line_no == 0 && matchall)
230 exit(0);
231 else
232 break;
233 }
234 if (ln.len > 0 && ln.dat[ln.len - 1] == line_sep)
235 --ln.len;
236 ln.line_no++;
237
238 /* Return if we need to skip a binary file */
239 if (f->binary && binbehave == BINFILE_SKIP) {
240 grep_close(f);
241 free(ln.file);
242 free(f);
243 return (0);
244 }
245 /* Process the file line-by-line */
246 t = procline(&ln, f->binary);
247 c += t;
248
249 /* Count the matches if we have a match limit */
250 if (mflag) {
251 mcount -= t;
252 if (mcount <= 0)
253 break;
254 }
255 }
256 if (Bflag > 0)
257 clearqueue();
258 grep_close(f);
259
260 if (cflag) {
261 if (!hflag)
262 printf("%s:", ln.file);
263 printf("%u%c", c, line_sep);
264 }
265 if (lflag && !qflag && c != 0)
266 printf("%s%c", fn, line_sep);
267 if (Lflag && !qflag && c == 0)
268 printf("%s%c", fn, line_sep);
269 if (c && !cflag && !lflag && !Lflag &&
270 binbehave == BINFILE_BIN && f->binary && !qflag)
271 printf(getstr(8), fn);
272
273 free(ln.file);
274 free(f);
275 return (c);
276 }
277
278 #define iswword(x) (iswalnum((x)) || (x) == L'_')
279
280 /*
281 * Processes a line comparing it with the specified patterns. Each pattern
282 * is looped to be compared along with the full string, saving each and every
283 * match, which is necessary to colorize the output and to count the
284 * matches. The matching lines are passed to printline() to display the
285 * appropriate output.
286 */
287 static int
288 procline(struct str *l, int nottext)
289 {
290 regmatch_t matches[MAX_LINE_MATCHES];
291 regmatch_t pmatch;
292 size_t st = 0;
293 unsigned int i;
294 int c = 0, m = 0, r = 0;
295
296 if (!matchall) {
297 /* Loop to process the whole line */
298 while (st <= l->len) {
299 pmatch.rm_so = st;
300 pmatch.rm_eo = l->len;
301
302 /* Loop to compare with all the patterns */
303 for (i = 0; i < patterns; i++) {
304 /*
305 * XXX: grep_search() is a workaround for speed up and should be
306 * removed in the future. See fastgrep.c.
307 */
308 if (fg_pattern[i].pattern) {
309 r = grep_search(&fg_pattern[i],
310 (unsigned char *)l->dat,
311 l->len, &pmatch);
312 r = (r == 0) ? 0 : REG_NOMATCH;
313 st = pmatch.rm_eo;
314 } else {
315 r = regexec(&r_pattern[i], l->dat, 1,
316 &pmatch, eflags);
317 r = (r == 0) ? 0 : REG_NOMATCH;
318 st = pmatch.rm_eo;
319 }
320 if (r == REG_NOMATCH)
321 continue;
322 /* Check for full match */
323 if (r == 0 && xflag)
324 if (pmatch.rm_so != 0 ||
325 (size_t)pmatch.rm_eo != l->len)
326 r = REG_NOMATCH;
327 /* Check for whole word match */
328 if (r == 0 && fg_pattern[i].word &&
329 pmatch.rm_so != 0) {
330 wint_t wbegin, wend;
331
332 wbegin = wend = L' ';
333 if (pmatch.rm_so != 0 &&
334 sscanf(&l->dat[pmatch.rm_so - 1],
335 "%lc", &wbegin) != 1)
336 r = REG_NOMATCH;
337 else if ((size_t)pmatch.rm_eo != l->len &&
338 sscanf(&l->dat[pmatch.rm_eo],
339 "%lc", &wend) != 1)
340 r = REG_NOMATCH;
341 else if (iswword(wbegin) || iswword(wend))
342 r = REG_NOMATCH;
343 }
344 if (r == 0) {
345 if (m == 0)
346 c++;
347 if (m < MAX_LINE_MATCHES)
348 matches[m++] = pmatch;
349 /* matches - skip further patterns */
350 if ((color != NULL && !oflag) || qflag || lflag)
351 break;
352 }
353 }
354
355 if (vflag) {
356 c = !c;
357 break;
358 }
359 /* One pass if we are not recording matches */
360 if ((color != NULL && !oflag) || qflag || lflag)
361 break;
362
363 if (st == (size_t)pmatch.rm_so)
364 break; /* No matches */
365 }
366 } else
367 c = !vflag;
368
369 if (c && binbehave == BINFILE_BIN && nottext)
370 return (c); /* Binary file */
371
372 /* Dealing with the context */
373 if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
374 if (c) {
375 if ((Aflag || Bflag) && !first_global &&
376 (first || since_printed > Bflag))
377 printf("--\n");
378 tail = Aflag;
379 if (Bflag > 0)
380 printqueue();
381 printline(l, ':', matches, m);
382 } else {
383 printline(l, '-', matches, m);
384 tail--;
385 }
386 first = false;
387 first_global = false;
388 since_printed = 0;
389 } else {
390 if (Bflag)
391 enqueue(l);
392 since_printed++;
393 }
394 return (c);
395 }
396
397 /*
398 * Safe malloc() for internal use.
399 */
400 void *
401 grep_malloc(size_t size)
402 {
403 void *ptr;
404
405 if ((ptr = malloc(size)) == NULL)
406 err(2, "malloc");
407 return (ptr);
408 }
409
410 /*
411 * Safe calloc() for internal use.
412 */
413 void *
414 grep_calloc(size_t nmemb, size_t size)
415 {
416 void *ptr;
417
418 if ((ptr = calloc(nmemb, size)) == NULL)
419 err(2, "calloc");
420 return (ptr);
421 }
422
423 /*
424 * Safe realloc() for internal use.
425 */
426 void *
427 grep_realloc(void *ptr, size_t size)
428 {
429
430 if ((ptr = realloc(ptr, size)) == NULL)
431 err(2, "realloc");
432 return (ptr);
433 }
434
435 /*
436 * Safe strdup() for internal use.
437 */
438 char *
439 grep_strdup(const char *str)
440 {
441 char *ret;
442
443 if ((ret = strdup(str)) == NULL)
444 err(2, "strdup");
445 return (ret);
446 }
447
448 /*
449 * Prints a matching line according to the command line options.
450 */
451 void
452 printline(struct str *line, int sep, regmatch_t *matches, int m)
453 {
454 size_t a = 0;
455 int i, n = 0;
456
457 if (!hflag) {
458 if (nullflag == 0)
459 fputs(line->file, stdout);
460 else {
461 printf("%s", line->file);
462 putchar(0);
463 }
464 ++n;
465 }
466 if (nflag) {
467 if (n > 0)
468 putchar(sep);
469 printf("%d", line->line_no);
470 ++n;
471 }
472 if (bflag) {
473 if (n > 0)
474 putchar(sep);
475 printf("%lld", (long long)line->off);
476 ++n;
477 }
478 if (n)
479 putchar(sep);
480 /* --color and -o */
481 if ((oflag || color) && m > 0) {
482 for (i = 0; i < m; i++) {
483 if (!oflag)
484 fwrite(line->dat + a, matches[i].rm_so - a, 1,
485 stdout);
486 if (color)
487 fprintf(stdout, "\33[%sm\33[K", color);
488
489 fwrite(line->dat + matches[i].rm_so,
490 matches[i].rm_eo - matches[i].rm_so, 1,
491 stdout);
492 if (color)
493 fprintf(stdout, "\33[m\33[K");
494 a = matches[i].rm_eo;
495 if (oflag)
496 putchar('\n');
497 }
498 if (!oflag) {
499 if (line->len - a > 0)
500 fwrite(line->dat + a, line->len - a, 1, stdout);
501 putchar(line_sep);
502 }
503 } else {
504 fwrite(line->dat, line->len, 1, stdout);
505 putchar(line_sep);
506 }
507 }
508