util.c revision 1.7 1 /* $NetBSD: util.c,v 1.7 2011/02/16 01:31:33 joerg Exp $ */
2 /* $FreeBSD: head/usr.bin/grep/util.c 211496 2010-08-19 09:28:59Z des $ */
3 /* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */
4
5 /*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __RCSID("$NetBSD: util.c,v 1.7 2011/02/16 01:31:33 joerg Exp $");
34
35 #include <sys/stat.h>
36 #include <sys/types.h>
37
38 #include <ctype.h>
39 #include <err.h>
40 #include <errno.h>
41 #include <fnmatch.h>
42 #include <fts.h>
43 #include <libgen.h>
44 #include <stdbool.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49 #include <wchar.h>
50 #include <wctype.h>
51
52 #include "grep.h"
53
54 static int linesqueued;
55 static int procline(struct str *l, int);
56
57 bool
58 file_matching(const char *fname)
59 {
60 bool ret;
61
62 ret = finclude ? false : true;
63
64 for (unsigned int i = 0; i < fpatterns; ++i) {
65 if (fnmatch(fpattern[i].pat,
66 fname, 0) == 0 || fnmatch(fpattern[i].pat,
67 basename(fname), 0) == 0) {
68 if (fpattern[i].mode == EXCL_PAT)
69 return (false);
70 else
71 ret = true;
72 }
73 }
74 return (ret);
75 }
76
77 static inline bool
78 dir_matching(const char *dname)
79 {
80 bool ret;
81
82 ret = dinclude ? false : true;
83
84 for (unsigned int i = 0; i < dpatterns; ++i) {
85 if (dname != NULL &&
86 fnmatch(dname, dpattern[i].pat, 0) == 0) {
87 if (dpattern[i].mode == EXCL_PAT)
88 return (false);
89 else
90 ret = true;
91 }
92 }
93 return (ret);
94 }
95
96 /*
97 * Processes a directory when a recursive search is performed with
98 * the -R option. Each appropriate file is passed to procfile().
99 */
100 int
101 grep_tree(char **argv)
102 {
103 FTS *fts;
104 FTSENT *p;
105 char *d, *dir = NULL;
106 int c, fts_flags;
107 bool ok;
108
109 c = fts_flags = 0;
110
111 switch(linkbehave) {
112 case LINK_EXPLICIT:
113 fts_flags = FTS_COMFOLLOW;
114 break;
115 case LINK_SKIP:
116 fts_flags = FTS_PHYSICAL;
117 break;
118 default:
119 fts_flags = FTS_LOGICAL;
120
121 }
122
123 fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
124
125 if (!(fts = fts_open(argv, fts_flags, NULL)))
126 err(2, "fts_open");
127 while ((p = fts_read(fts)) != NULL) {
128 switch (p->fts_info) {
129 case FTS_DNR:
130 /* FALLTHROUGH */
131 case FTS_ERR:
132 errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
133 break;
134 case FTS_D:
135 /* FALLTHROUGH */
136 case FTS_DP:
137 break;
138 case FTS_DC:
139 /* Print a warning for recursive directory loop */
140 warnx("warning: %s: recursive directory loop",
141 p->fts_path);
142 break;
143 default:
144 /* Check for file exclusion/inclusion */
145 ok = true;
146 if (dexclude || dinclude) {
147 if ((d = strrchr(p->fts_path, '/')) != NULL) {
148 dir = grep_malloc(sizeof(char) *
149 (d - p->fts_path + 1));
150 memcpy(dir, p->fts_path,
151 d - p->fts_path);
152 dir[d - p->fts_path] = '\0';
153 }
154 ok = dir_matching(dir);
155 free(dir);
156 dir = NULL;
157 }
158 if (fexclude || finclude)
159 ok &= file_matching(p->fts_path);
160
161 if (ok)
162 c += procfile(p->fts_path);
163 break;
164 }
165 }
166
167 fts_close(fts);
168 return (c);
169 }
170
171 /*
172 * Opens a file and processes it. Each file is processed line-by-line
173 * passing the lines to procline().
174 */
175 int
176 procfile(const char *fn)
177 {
178 struct file *f;
179 struct stat sb;
180 struct str ln;
181 mode_t s;
182 int c, t;
183
184 if (mflag && (mcount <= 0))
185 return (0);
186
187 if (strcmp(fn, "-") == 0) {
188 fn = label != NULL ? label : getstr(1);
189 f = grep_open(NULL);
190 } else {
191 if (!stat(fn, &sb)) {
192 /* Check if we need to process the file */
193 s = sb.st_mode & S_IFMT;
194 if (s == S_IFDIR && dirbehave == DIR_SKIP)
195 return (0);
196 if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
197 || s == S_IFSOCK) && devbehave == DEV_SKIP)
198 return (0);
199 }
200 f = grep_open(fn);
201 }
202 if (f == NULL) {
203 if (!sflag)
204 warn("%s", fn);
205 if (errno == ENOENT)
206 notfound = true;
207 return (0);
208 }
209
210 ln.file = grep_malloc(strlen(fn) + 1);
211 strcpy(ln.file, fn);
212 ln.line_no = 0;
213 ln.len = 0;
214 linesqueued = 0;
215 tail = 0;
216 ln.off = -1;
217
218 for (c = 0; c == 0 || !(lflag || qflag); ) {
219 ln.off += ln.len + 1;
220 if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) {
221 if (ln.line_no == 0 && matchall)
222 exit(0);
223 else
224 break;
225 }
226 if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
227 --ln.len;
228 ln.line_no++;
229
230 /* Return if we need to skip a binary file */
231 if (f->binary && binbehave == BINFILE_SKIP) {
232 grep_close(f);
233 free(ln.file);
234 free(f);
235 return (0);
236 }
237 /* Process the file line-by-line */
238 if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) {
239 enqueue(&ln);
240 linesqueued++;
241 }
242 c += t;
243
244 /* Count the matches if we have a match limit */
245 if (mflag) {
246 mcount -= t;
247 if (mcount <= 0)
248 break;
249 }
250 }
251 if (Bflag > 0)
252 clearqueue();
253 grep_close(f);
254
255 if (cflag) {
256 if (!hflag)
257 printf("%s:", ln.file);
258 printf("%u\n", c);
259 }
260 if (lflag && !qflag && c != 0)
261 printf("%s\n", fn);
262 if (Lflag && !qflag && c == 0)
263 printf("%s\n", fn);
264 if (c && !cflag && !lflag && !Lflag &&
265 binbehave == BINFILE_BIN && f->binary && !qflag)
266 printf(getstr(8), fn);
267
268 free(ln.file);
269 free(f);
270 return (c);
271 }
272
273 #define iswword(x) (iswalnum((x)) || (x) == L'_')
274
275 /*
276 * Processes a line comparing it with the specified patterns. Each pattern
277 * is looped to be compared along with the full string, saving each and every
278 * match, which is necessary to colorize the output and to count the
279 * matches. The matching lines are passed to printline() to display the
280 * appropriate output.
281 */
282 static inline int
283 procline(struct str *l, int nottext)
284 {
285 regmatch_t matches[MAX_LINE_MATCHES];
286 regmatch_t pmatch;
287 size_t st = 0;
288 unsigned int i;
289 int c = 0, m = 0, r = 0;
290
291 if (!matchall) {
292 /* Loop to process the whole line */
293 while (st <= l->len) {
294 pmatch.rm_so = st;
295 pmatch.rm_eo = l->len;
296
297 /* Loop to compare with all the patterns */
298 for (i = 0; i < patterns; i++) {
299 /*
300 * XXX: grep_search() is a workaround for speed up and should be
301 * removed in the future. See fastgrep.c.
302 */
303 if (fg_pattern[i].pattern) {
304 r = grep_search(&fg_pattern[i],
305 (unsigned char *)l->dat,
306 l->len, &pmatch);
307 r = (r == 0) ? 0 : REG_NOMATCH;
308 st = pmatch.rm_eo;
309 } else {
310 r = regexec(&r_pattern[i], l->dat, 1,
311 &pmatch, eflags);
312 r = (r == 0) ? 0 : REG_NOMATCH;
313 st = pmatch.rm_eo;
314 }
315 if (r == REG_NOMATCH)
316 continue;
317 /* Check for full match */
318 if (r == 0 && xflag)
319 if (pmatch.rm_so != 0 ||
320 (size_t)pmatch.rm_eo != l->len)
321 r = REG_NOMATCH;
322 /* Check for whole word match */
323 if (r == 0 && wflag && pmatch.rm_so != 0) {
324 wint_t wbegin, wend;
325
326 wbegin = wend = L' ';
327 if (pmatch.rm_so != 0 &&
328 sscanf(&l->dat[pmatch.rm_so - 1],
329 "%lc", &wbegin) != 1)
330 r = REG_NOMATCH;
331 else if ((size_t)pmatch.rm_eo != l->len &&
332 sscanf(&l->dat[pmatch.rm_eo],
333 "%lc", &wend) != 1)
334 r = REG_NOMATCH;
335 else if (iswword(wbegin) || iswword(wend))
336 r = REG_NOMATCH;
337 }
338 if (r == 0) {
339 if (m == 0)
340 c++;
341 if (m < MAX_LINE_MATCHES)
342 matches[m++] = pmatch;
343 /* matches - skip further patterns */
344 if ((color != NULL && !oflag) || qflag || lflag)
345 break;
346 }
347 }
348
349 if (vflag) {
350 c = !c;
351 break;
352 }
353 /* One pass if we are not recording matches */
354 if ((color != NULL && !oflag) || qflag || lflag)
355 break;
356
357 if (st == (size_t)pmatch.rm_so)
358 break; /* No matches */
359 }
360 } else
361 c = !vflag;
362
363 if (c && binbehave == BINFILE_BIN && nottext)
364 return (c); /* Binary file */
365
366 /* Dealing with the context */
367 if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
368 if (c) {
369 if (!first && !prev && !tail && Aflag)
370 printf("--\n");
371 tail = Aflag;
372 if (Bflag > 0) {
373 if (!first && !prev)
374 printf("--\n");
375 printqueue();
376 }
377 linesqueued = 0;
378 printline(l, ':', matches, m);
379 } else {
380 printline(l, '-', matches, m);
381 tail--;
382 }
383 }
384
385 if (c) {
386 prev = true;
387 first = false;
388 } else
389 prev = false;
390
391 return (c);
392 }
393
394 /*
395 * Safe malloc() for internal use.
396 */
397 void *
398 grep_malloc(size_t size)
399 {
400 void *ptr;
401
402 if ((ptr = malloc(size)) == NULL)
403 err(2, "malloc");
404 return (ptr);
405 }
406
407 /*
408 * Safe calloc() for internal use.
409 */
410 void *
411 grep_calloc(size_t nmemb, size_t size)
412 {
413 void *ptr;
414
415 if ((ptr = calloc(nmemb, size)) == NULL)
416 err(2, "calloc");
417 return (ptr);
418 }
419
420 /*
421 * Safe realloc() for internal use.
422 */
423 void *
424 grep_realloc(void *ptr, size_t size)
425 {
426
427 if ((ptr = realloc(ptr, size)) == NULL)
428 err(2, "realloc");
429 return (ptr);
430 }
431
432 /*
433 * Safe strdup() for internal use.
434 */
435 char *
436 grep_strdup(const char *str)
437 {
438 char *ret;
439
440 if ((ret = strdup(str)) == NULL)
441 err(2, "strdup");
442 return (ret);
443 }
444
445 /*
446 * Prints a matching line according to the command line options.
447 */
448 void
449 printline(struct str *line, int sep, regmatch_t *matches, int m)
450 {
451 size_t a = 0;
452 int i, n = 0;
453
454 if (!hflag) {
455 if (nullflag == 0)
456 fputs(line->file, stdout);
457 else {
458 printf("%s", line->file);
459 putchar(0);
460 }
461 ++n;
462 }
463 if (nflag) {
464 if (n > 0)
465 putchar(sep);
466 printf("%d", line->line_no);
467 ++n;
468 }
469 if (bflag) {
470 if (n > 0)
471 putchar(sep);
472 printf("%lld", (long long)line->off);
473 ++n;
474 }
475 if (n)
476 putchar(sep);
477 /* --color and -o */
478 if ((oflag || color) && m > 0) {
479 for (i = 0; i < m; i++) {
480 if (!oflag)
481 fwrite(line->dat + a, matches[i].rm_so - a, 1,
482 stdout);
483 if (color)
484 fprintf(stdout, "\33[%sm\33[K", color);
485
486 fwrite(line->dat + matches[i].rm_so,
487 matches[i].rm_eo - matches[i].rm_so, 1,
488 stdout);
489 if (color)
490 fprintf(stdout, "\33[m\33[K");
491 a = matches[i].rm_eo;
492 if (oflag)
493 putchar('\n');
494 }
495 if (!oflag) {
496 if (line->len - a > 0)
497 fwrite(line->dat + a, line->len - a, 1, stdout);
498 putchar('\n');
499 }
500 } else {
501 fwrite(line->dat, line->len, 1, stdout);
502 putchar('\n');
503 }
504 }
505