Home | History | Annotate | Line # | Download | only in grep
file.c revision 1.4
      1  1.4   joerg /*	$NetBSD: file.c,v 1.4 2011/02/16 01:31:33 joerg Exp $	*/
      2  1.4   joerg /*	$FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $	*/
      3  1.4   joerg /*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
      4  1.2  rillig 
      5  1.1    cjep /*-
      6  1.4   joerg  * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
      7  1.4   joerg  * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
      8  1.4   joerg  * Copyright (C) 2010 Dimitry Andric <dimitry (at) andric.com>
      9  1.1    cjep  * All rights reserved.
     10  1.1    cjep  *
     11  1.1    cjep  * Redistribution and use in source and binary forms, with or without
     12  1.1    cjep  * modification, are permitted provided that the following conditions
     13  1.1    cjep  * are met:
     14  1.1    cjep  * 1. Redistributions of source code must retain the above copyright
     15  1.1    cjep  *    notice, this list of conditions and the following disclaimer.
     16  1.1    cjep  * 2. Redistributions in binary form must reproduce the above copyright
     17  1.1    cjep  *    notice, this list of conditions and the following disclaimer in the
     18  1.1    cjep  *    documentation and/or other materials provided with the distribution.
     19  1.1    cjep  *
     20  1.1    cjep  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     21  1.1    cjep  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22  1.1    cjep  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23  1.1    cjep  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     24  1.1    cjep  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     25  1.1    cjep  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     26  1.1    cjep  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     27  1.1    cjep  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     28  1.1    cjep  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     29  1.1    cjep  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     30  1.1    cjep  * SUCH DAMAGE.
     31  1.1    cjep  */
     32  1.1    cjep 
     33  1.2  rillig #include <sys/cdefs.h>
     34  1.4   joerg __RCSID("$NetBSD: file.c,v 1.4 2011/02/16 01:31:33 joerg Exp $");
     35  1.2  rillig 
     36  1.1    cjep #include <sys/param.h>
     37  1.4   joerg #include <sys/types.h>
     38  1.4   joerg #include <sys/stat.h>
     39  1.1    cjep 
     40  1.4   joerg #include <bzlib.h>
     41  1.1    cjep #include <err.h>
     42  1.4   joerg #include <errno.h>
     43  1.4   joerg #include <fcntl.h>
     44  1.4   joerg #include <stddef.h>
     45  1.1    cjep #include <stdlib.h>
     46  1.4   joerg #include <string.h>
     47  1.4   joerg #include <unistd.h>
     48  1.4   joerg #include <wchar.h>
     49  1.4   joerg #include <wctype.h>
     50  1.4   joerg #include <zlib.h>
     51  1.1    cjep 
     52  1.1    cjep #include "grep.h"
     53  1.1    cjep 
     54  1.4   joerg #define	MAXBUFSIZ	(32 * 1024)
     55  1.4   joerg #define	LNBUFBUMP	80
     56  1.4   joerg 
     57  1.4   joerg static gzFile gzbufdesc;
     58  1.4   joerg static BZFILE* bzbufdesc;
     59  1.4   joerg 
     60  1.4   joerg static unsigned char buffer[MAXBUFSIZ];
     61  1.4   joerg static unsigned char *bufpos;
     62  1.4   joerg static size_t bufrem;
     63  1.4   joerg 
     64  1.4   joerg static unsigned char *lnbuf;
     65  1.2  rillig static size_t lnbuflen;
     66  1.1    cjep 
     67  1.4   joerg static inline int
     68  1.4   joerg grep_refill(struct file *f)
     69  1.4   joerg {
     70  1.4   joerg 	ssize_t nr;
     71  1.4   joerg 	int bzerr;
     72  1.4   joerg 
     73  1.4   joerg 	bufpos = buffer;
     74  1.4   joerg 	bufrem = 0;
     75  1.4   joerg 
     76  1.4   joerg 	if (filebehave == FILE_GZIP)
     77  1.4   joerg 		nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
     78  1.4   joerg 	else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
     79  1.4   joerg 		nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
     80  1.4   joerg 		switch (bzerr) {
     81  1.4   joerg 		case BZ_OK:
     82  1.4   joerg 		case BZ_STREAM_END:
     83  1.4   joerg 			/* No problem, nr will be okay */
     84  1.2  rillig 			break;
     85  1.4   joerg 		case BZ_DATA_ERROR_MAGIC:
     86  1.4   joerg 			/*
     87  1.4   joerg 			 * As opposed to gzread(), which simply returns the
     88  1.4   joerg 			 * plain file data, if it is not in the correct
     89  1.4   joerg 			 * compressed format, BZ2_bzRead() instead aborts.
     90  1.4   joerg 			 *
     91  1.4   joerg 			 * So, just restart at the beginning of the file again,
     92  1.4   joerg 			 * and use plain reads from now on.
     93  1.4   joerg 			 */
     94  1.4   joerg 			BZ2_bzReadClose(&bzerr, bzbufdesc);
     95  1.4   joerg 			bzbufdesc = NULL;
     96  1.4   joerg 			if (lseek(f->fd, 0, SEEK_SET) == -1)
     97  1.4   joerg 				return (-1);
     98  1.4   joerg 			nr = read(f->fd, buffer, MAXBUFSIZ);
     99  1.1    cjep 			break;
    100  1.4   joerg 		default:
    101  1.4   joerg 			/* Make sure we exit with an error */
    102  1.4   joerg 			nr = -1;
    103  1.1    cjep 		}
    104  1.4   joerg 	} else
    105  1.4   joerg 		nr = read(f->fd, buffer, MAXBUFSIZ);
    106  1.4   joerg 
    107  1.4   joerg 	if (nr < 0)
    108  1.4   joerg 		return (-1);
    109  1.1    cjep 
    110  1.4   joerg 	bufrem = nr;
    111  1.4   joerg 	return (0);
    112  1.1    cjep }
    113  1.1    cjep 
    114  1.4   joerg static inline int
    115  1.4   joerg grep_lnbufgrow(size_t newlen)
    116  1.1    cjep {
    117  1.1    cjep 
    118  1.4   joerg 	if (lnbuflen < newlen) {
    119  1.4   joerg 		lnbuf = grep_realloc(lnbuf, newlen);
    120  1.4   joerg 		lnbuflen = newlen;
    121  1.4   joerg 	}
    122  1.2  rillig 
    123  1.4   joerg 	return (0);
    124  1.4   joerg }
    125  1.2  rillig 
    126  1.4   joerg char *
    127  1.4   joerg grep_fgetln(struct file *f, size_t *lenp)
    128  1.4   joerg {
    129  1.4   joerg 	unsigned char *p;
    130  1.4   joerg 	char *ret;
    131  1.4   joerg 	size_t len;
    132  1.4   joerg 	size_t off;
    133  1.4   joerg 	ptrdiff_t diff;
    134  1.4   joerg 
    135  1.4   joerg 	/* Fill the buffer, if necessary */
    136  1.4   joerg 	if (bufrem == 0 && grep_refill(f) != 0)
    137  1.4   joerg 		goto error;
    138  1.4   joerg 
    139  1.4   joerg 	if (bufrem == 0) {
    140  1.4   joerg 		/* Return zero length to indicate EOF */
    141  1.4   joerg 		*lenp = 0;
    142  1.4   joerg 		return (bufpos);
    143  1.4   joerg 	}
    144  1.4   joerg 
    145  1.4   joerg 	/* Look for a newline in the remaining part of the buffer */
    146  1.4   joerg 	if ((p = memchr(bufpos, '\n', bufrem)) != NULL) {
    147  1.4   joerg 		++p; /* advance over newline */
    148  1.4   joerg 		ret = bufpos;
    149  1.4   joerg 		len = p - bufpos;
    150  1.4   joerg 		bufrem -= len;
    151  1.4   joerg 		bufpos = p;
    152  1.4   joerg 		*lenp = len;
    153  1.4   joerg 		return (ret);
    154  1.4   joerg 	}
    155  1.4   joerg 
    156  1.4   joerg 	/* We have to copy the current buffered data to the line buffer */
    157  1.4   joerg 	for (len = bufrem, off = 0; ; len += bufrem) {
    158  1.4   joerg 		/* Make sure there is room for more data */
    159  1.4   joerg 		if (grep_lnbufgrow(len + LNBUFBUMP))
    160  1.4   joerg 			goto error;
    161  1.4   joerg 		memcpy(lnbuf + off, bufpos, len - off);
    162  1.4   joerg 		off = len;
    163  1.4   joerg 		if (grep_refill(f) != 0)
    164  1.4   joerg 			goto error;
    165  1.4   joerg 		if (bufrem == 0)
    166  1.4   joerg 			/* EOF: return partial line */
    167  1.4   joerg 			break;
    168  1.4   joerg 		if ((p = memchr(bufpos, '\n', bufrem)) == NULL)
    169  1.4   joerg 			continue;
    170  1.4   joerg 		/* got it: finish up the line (like code above) */
    171  1.4   joerg 		++p;
    172  1.4   joerg 		diff = p - bufpos;
    173  1.4   joerg 		len += diff;
    174  1.4   joerg 		if (grep_lnbufgrow(len))
    175  1.4   joerg 		    goto error;
    176  1.4   joerg 		memcpy(lnbuf + off, bufpos, diff);
    177  1.4   joerg 		bufrem -= diff;
    178  1.4   joerg 		bufpos = p;
    179  1.4   joerg 		break;
    180  1.1    cjep 	}
    181  1.4   joerg 	*lenp = len;
    182  1.4   joerg 	return (lnbuf);
    183  1.2  rillig 
    184  1.4   joerg error:
    185  1.4   joerg 	*lenp = 0;
    186  1.4   joerg 	return (NULL);
    187  1.1    cjep }
    188  1.1    cjep 
    189  1.4   joerg static inline struct file *
    190  1.4   joerg grep_file_init(struct file *f)
    191  1.1    cjep {
    192  1.1    cjep 
    193  1.4   joerg 	if (filebehave == FILE_GZIP &&
    194  1.4   joerg 	    (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
    195  1.4   joerg 		goto error;
    196  1.4   joerg 
    197  1.4   joerg 	if (filebehave == FILE_BZIP &&
    198  1.4   joerg 	    (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
    199  1.4   joerg 		goto error;
    200  1.2  rillig 
    201  1.4   joerg 	/* Fill read buffer, also catches errors early */
    202  1.4   joerg 	if (grep_refill(f) != 0)
    203  1.4   joerg 		goto error;
    204  1.2  rillig 
    205  1.4   joerg 	/* Check for binary stuff, if necessary */
    206  1.4   joerg 	if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
    207  1.4   joerg 		f->binary = true;
    208  1.2  rillig 
    209  1.4   joerg 	return (f);
    210  1.4   joerg error:
    211  1.4   joerg 	close(f->fd);
    212  1.1    cjep 	free(f);
    213  1.4   joerg 	return (NULL);
    214  1.1    cjep }
    215  1.1    cjep 
    216  1.4   joerg /*
    217  1.4   joerg  * Opens a file for processing.
    218  1.4   joerg  */
    219  1.4   joerg struct file *
    220  1.4   joerg grep_open(const char *path)
    221  1.1    cjep {
    222  1.4   joerg 	struct file *f;
    223  1.4   joerg 
    224  1.4   joerg 	f = grep_malloc(sizeof *f);
    225  1.4   joerg 	memset(f, 0, sizeof *f);
    226  1.4   joerg 	if (path == NULL) {
    227  1.4   joerg 		/* Processing stdin implies --line-buffered. */
    228  1.4   joerg 		lbflag = true;
    229  1.4   joerg 		f->fd = STDIN_FILENO;
    230  1.4   joerg 	} else if ((f->fd = open(path, O_RDONLY)) == -1) {
    231  1.4   joerg 		free(f);
    232  1.4   joerg 		return (NULL);
    233  1.1    cjep 	}
    234  1.1    cjep 
    235  1.4   joerg 	return (grep_file_init(f));
    236  1.1    cjep }
    237  1.1    cjep 
    238  1.4   joerg /*
    239  1.4   joerg  * Closes a file.
    240  1.4   joerg  */
    241  1.1    cjep void
    242  1.4   joerg grep_close(struct file *f)
    243  1.1    cjep {
    244  1.4   joerg 
    245  1.4   joerg 	close(f->fd);
    246  1.4   joerg 
    247  1.4   joerg 	/* Reset read buffer and line buffer */
    248  1.4   joerg 	bufpos = buffer;
    249  1.4   joerg 	bufrem = 0;
    250  1.4   joerg 
    251  1.4   joerg 	free(lnbuf);
    252  1.4   joerg 	lnbuf = NULL;
    253  1.4   joerg 	lnbuflen = 0;
    254  1.1    cjep }
    255