util.c revision 1.10 1 /* $NetBSD: util.c,v 1.10 2011/04/18 03:27:40 joerg Exp $ */
2 /* $FreeBSD: head/usr.bin/grep/util.c 211496 2010-08-19 09:28:59Z des $ */
3 /* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */
4
5 /*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #if HAVE_NBTOOL_CONFIG_H
33 #include "nbtool_config.h"
34 #endif
35
36 #include <sys/cdefs.h>
37 __RCSID("$NetBSD: util.c,v 1.10 2011/04/18 03:27:40 joerg Exp $");
38
39 #include <sys/stat.h>
40 #include <sys/types.h>
41
42 #include <ctype.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <fnmatch.h>
46 #include <fts.h>
47 #include <libgen.h>
48 #include <stdbool.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <unistd.h>
53 #include <wchar.h>
54 #include <wctype.h>
55
56 #include "grep.h"
57
58 static int linesqueued;
59 static int procline(struct str *l, int);
60
61 bool
62 file_matching(const char *fname)
63 {
64 char *fname_base, *fname_copy;
65 bool ret;
66
67 ret = finclude ? false : true;
68 fname_copy = grep_strdup(fname);
69 fname_base = basename(fname_copy);
70
71 for (unsigned int i = 0; i < fpatterns; ++i) {
72 if (fnmatch(fpattern[i].pat, fname, 0) == 0 ||
73 fnmatch(fpattern[i].pat, fname_base, 0) == 0) {
74 if (fpattern[i].mode == EXCL_PAT)
75 return (false);
76 else
77 ret = true;
78 }
79 }
80 free(fname_copy);
81 return (ret);
82 }
83
84 static inline bool
85 dir_matching(const char *dname)
86 {
87 bool ret;
88
89 ret = dinclude ? false : true;
90
91 for (unsigned int i = 0; i < dpatterns; ++i) {
92 if (dname != NULL &&
93 fnmatch(dname, dpattern[i].pat, 0) == 0) {
94 if (dpattern[i].mode == EXCL_PAT)
95 return (false);
96 else
97 ret = true;
98 }
99 }
100 return (ret);
101 }
102
103 /*
104 * Processes a directory when a recursive search is performed with
105 * the -R option. Each appropriate file is passed to procfile().
106 */
107 int
108 grep_tree(char **argv)
109 {
110 FTS *fts;
111 FTSENT *p;
112 char *d, *dir = NULL;
113 int c, fts_flags;
114 bool ok;
115
116 c = fts_flags = 0;
117
118 switch(linkbehave) {
119 case LINK_EXPLICIT:
120 fts_flags = FTS_COMFOLLOW;
121 break;
122 case LINK_SKIP:
123 fts_flags = FTS_PHYSICAL;
124 break;
125 default:
126 fts_flags = FTS_LOGICAL;
127
128 }
129
130 fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
131
132 if (!(fts = fts_open(argv, fts_flags, NULL)))
133 err(2, "fts_open");
134 while ((p = fts_read(fts)) != NULL) {
135 switch (p->fts_info) {
136 case FTS_DNR:
137 /* FALLTHROUGH */
138 case FTS_ERR:
139 errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
140 break;
141 case FTS_D:
142 /* FALLTHROUGH */
143 case FTS_DP:
144 break;
145 case FTS_DC:
146 /* Print a warning for recursive directory loop */
147 warnx("warning: %s: recursive directory loop",
148 p->fts_path);
149 break;
150 default:
151 /* Check for file exclusion/inclusion */
152 ok = true;
153 if (dexclude || dinclude) {
154 if ((d = strrchr(p->fts_path, '/')) != NULL) {
155 dir = grep_malloc(sizeof(char) *
156 (d - p->fts_path + 1));
157 memcpy(dir, p->fts_path,
158 d - p->fts_path);
159 dir[d - p->fts_path] = '\0';
160 }
161 ok = dir_matching(dir);
162 free(dir);
163 dir = NULL;
164 }
165 if (fexclude || finclude)
166 ok &= file_matching(p->fts_path);
167
168 if (ok)
169 c += procfile(p->fts_path);
170 break;
171 }
172 }
173
174 fts_close(fts);
175 return (c);
176 }
177
178 /*
179 * Opens a file and processes it. Each file is processed line-by-line
180 * passing the lines to procline().
181 */
182 int
183 procfile(const char *fn)
184 {
185 struct file *f;
186 struct stat sb;
187 struct str ln;
188 mode_t s;
189 int c, t;
190
191 if (mflag && (mcount <= 0))
192 return (0);
193
194 if (strcmp(fn, "-") == 0) {
195 fn = label != NULL ? label : getstr(1);
196 f = grep_open(NULL);
197 } else {
198 if (!stat(fn, &sb)) {
199 /* Check if we need to process the file */
200 s = sb.st_mode & S_IFMT;
201 if (s == S_IFDIR && dirbehave == DIR_SKIP)
202 return (0);
203 if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
204 || s == S_IFSOCK) && devbehave == DEV_SKIP)
205 return (0);
206 }
207 f = grep_open(fn);
208 }
209 if (f == NULL) {
210 if (!sflag)
211 warn("%s", fn);
212 if (errno == ENOENT)
213 notfound = true;
214 return (0);
215 }
216
217 ln.file = grep_malloc(strlen(fn) + 1);
218 strcpy(ln.file, fn);
219 ln.line_no = 0;
220 ln.len = 0;
221 linesqueued = 0;
222 tail = 0;
223 ln.off = -1;
224
225 for (c = 0; c == 0 || !(lflag || qflag); ) {
226 ln.off += ln.len + 1;
227 if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) {
228 if (ln.line_no == 0 && matchall)
229 exit(0);
230 else
231 break;
232 }
233 if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
234 --ln.len;
235 ln.line_no++;
236
237 /* Return if we need to skip a binary file */
238 if (f->binary && binbehave == BINFILE_SKIP) {
239 grep_close(f);
240 free(ln.file);
241 free(f);
242 return (0);
243 }
244 /* Process the file line-by-line */
245 if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) {
246 enqueue(&ln);
247 linesqueued++;
248 }
249 c += t;
250
251 /* Count the matches if we have a match limit */
252 if (mflag) {
253 mcount -= t;
254 if (mcount <= 0)
255 break;
256 }
257 }
258 if (Bflag > 0)
259 clearqueue();
260 grep_close(f);
261
262 if (cflag) {
263 if (!hflag)
264 printf("%s:", ln.file);
265 printf("%u\n", c);
266 }
267 if (lflag && !qflag && c != 0)
268 printf("%s\n", fn);
269 if (Lflag && !qflag && c == 0)
270 printf("%s\n", fn);
271 if (c && !cflag && !lflag && !Lflag &&
272 binbehave == BINFILE_BIN && f->binary && !qflag)
273 printf(getstr(8), fn);
274
275 free(ln.file);
276 free(f);
277 return (c);
278 }
279
280 #define iswword(x) (iswalnum((x)) || (x) == L'_')
281
282 /*
283 * Processes a line comparing it with the specified patterns. Each pattern
284 * is looped to be compared along with the full string, saving each and every
285 * match, which is necessary to colorize the output and to count the
286 * matches. The matching lines are passed to printline() to display the
287 * appropriate output.
288 */
289 static int
290 procline(struct str *l, int nottext)
291 {
292 regmatch_t matches[MAX_LINE_MATCHES];
293 regmatch_t pmatch;
294 size_t st = 0;
295 unsigned int i;
296 int c = 0, m = 0, r = 0;
297
298 if (!matchall) {
299 /* Loop to process the whole line */
300 while (st <= l->len) {
301 pmatch.rm_so = st;
302 pmatch.rm_eo = l->len;
303
304 /* Loop to compare with all the patterns */
305 for (i = 0; i < patterns; i++) {
306 /*
307 * XXX: grep_search() is a workaround for speed up and should be
308 * removed in the future. See fastgrep.c.
309 */
310 if (fg_pattern[i].pattern) {
311 r = grep_search(&fg_pattern[i],
312 (unsigned char *)l->dat,
313 l->len, &pmatch);
314 r = (r == 0) ? 0 : REG_NOMATCH;
315 st = pmatch.rm_eo;
316 } else {
317 r = regexec(&r_pattern[i], l->dat, 1,
318 &pmatch, eflags);
319 r = (r == 0) ? 0 : REG_NOMATCH;
320 st = pmatch.rm_eo;
321 }
322 if (r == REG_NOMATCH)
323 continue;
324 /* Check for full match */
325 if (r == 0 && xflag)
326 if (pmatch.rm_so != 0 ||
327 (size_t)pmatch.rm_eo != l->len)
328 r = REG_NOMATCH;
329 /* Check for whole word match */
330 if (r == 0 && fg_pattern[i].word &&
331 pmatch.rm_so != 0) {
332 wint_t wbegin, wend;
333
334 wbegin = wend = L' ';
335 if (pmatch.rm_so != 0 &&
336 sscanf(&l->dat[pmatch.rm_so - 1],
337 "%lc", &wbegin) != 1)
338 r = REG_NOMATCH;
339 else if ((size_t)pmatch.rm_eo != l->len &&
340 sscanf(&l->dat[pmatch.rm_eo],
341 "%lc", &wend) != 1)
342 r = REG_NOMATCH;
343 else if (iswword(wbegin) || iswword(wend))
344 r = REG_NOMATCH;
345 }
346 if (r == 0) {
347 if (m == 0)
348 c++;
349 if (m < MAX_LINE_MATCHES)
350 matches[m++] = pmatch;
351 /* matches - skip further patterns */
352 if ((color != NULL && !oflag) || qflag || lflag)
353 break;
354 }
355 }
356
357 if (vflag) {
358 c = !c;
359 break;
360 }
361 /* One pass if we are not recording matches */
362 if ((color != NULL && !oflag) || qflag || lflag)
363 break;
364
365 if (st == (size_t)pmatch.rm_so)
366 break; /* No matches */
367 }
368 } else
369 c = !vflag;
370
371 if (c && binbehave == BINFILE_BIN && nottext)
372 return (c); /* Binary file */
373
374 /* Dealing with the context */
375 if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
376 if (c) {
377 if (!first && !prev && !tail && Aflag)
378 printf("--\n");
379 tail = Aflag;
380 if (Bflag > 0) {
381 if (!first && !prev)
382 printf("--\n");
383 printqueue();
384 }
385 linesqueued = 0;
386 printline(l, ':', matches, m);
387 } else {
388 printline(l, '-', matches, m);
389 tail--;
390 }
391 }
392
393 if (c) {
394 prev = true;
395 first = false;
396 } else
397 prev = false;
398
399 return (c);
400 }
401
402 /*
403 * Safe malloc() for internal use.
404 */
405 void *
406 grep_malloc(size_t size)
407 {
408 void *ptr;
409
410 if ((ptr = malloc(size)) == NULL)
411 err(2, "malloc");
412 return (ptr);
413 }
414
415 /*
416 * Safe calloc() for internal use.
417 */
418 void *
419 grep_calloc(size_t nmemb, size_t size)
420 {
421 void *ptr;
422
423 if ((ptr = calloc(nmemb, size)) == NULL)
424 err(2, "calloc");
425 return (ptr);
426 }
427
428 /*
429 * Safe realloc() for internal use.
430 */
431 void *
432 grep_realloc(void *ptr, size_t size)
433 {
434
435 if ((ptr = realloc(ptr, size)) == NULL)
436 err(2, "realloc");
437 return (ptr);
438 }
439
440 /*
441 * Safe strdup() for internal use.
442 */
443 char *
444 grep_strdup(const char *str)
445 {
446 char *ret;
447
448 if ((ret = strdup(str)) == NULL)
449 err(2, "strdup");
450 return (ret);
451 }
452
453 /*
454 * Prints a matching line according to the command line options.
455 */
456 void
457 printline(struct str *line, int sep, regmatch_t *matches, int m)
458 {
459 size_t a = 0;
460 int i, n = 0;
461
462 if (!hflag) {
463 if (nullflag == 0)
464 fputs(line->file, stdout);
465 else {
466 printf("%s", line->file);
467 putchar(0);
468 }
469 ++n;
470 }
471 if (nflag) {
472 if (n > 0)
473 putchar(sep);
474 printf("%d", line->line_no);
475 ++n;
476 }
477 if (bflag) {
478 if (n > 0)
479 putchar(sep);
480 printf("%lld", (long long)line->off);
481 ++n;
482 }
483 if (n)
484 putchar(sep);
485 /* --color and -o */
486 if ((oflag || color) && m > 0) {
487 for (i = 0; i < m; i++) {
488 if (!oflag)
489 fwrite(line->dat + a, matches[i].rm_so - a, 1,
490 stdout);
491 if (color)
492 fprintf(stdout, "\33[%sm\33[K", color);
493
494 fwrite(line->dat + matches[i].rm_so,
495 matches[i].rm_eo - matches[i].rm_so, 1,
496 stdout);
497 if (color)
498 fprintf(stdout, "\33[m\33[K");
499 a = matches[i].rm_eo;
500 if (oflag)
501 putchar('\n');
502 }
503 if (!oflag) {
504 if (line->len - a > 0)
505 fwrite(line->dat + a, line->len - a, 1, stdout);
506 putchar('\n');
507 }
508 } else {
509 fwrite(line->dat, line->len, 1, stdout);
510 putchar('\n');
511 }
512 }
513