Home | History | Annotate | Line # | Download | only in grep
file.c revision 1.3.24.1
      1  1.3.24.1  bouyer /*	$NetBSD: file.c,v 1.3.24.1 2011/02/17 12:00:55 bouyer Exp $	*/
      2  1.3.24.1  bouyer /*	$FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $	*/
      3  1.3.24.1  bouyer /*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
      4       1.2  rillig 
      5       1.1    cjep /*-
      6  1.3.24.1  bouyer  * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
      7  1.3.24.1  bouyer  * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
      8  1.3.24.1  bouyer  * Copyright (C) 2010 Dimitry Andric <dimitry (at) andric.com>
      9       1.1    cjep  * All rights reserved.
     10       1.1    cjep  *
     11       1.1    cjep  * Redistribution and use in source and binary forms, with or without
     12       1.1    cjep  * modification, are permitted provided that the following conditions
     13       1.1    cjep  * are met:
     14       1.1    cjep  * 1. Redistributions of source code must retain the above copyright
     15       1.1    cjep  *    notice, this list of conditions and the following disclaimer.
     16       1.1    cjep  * 2. Redistributions in binary form must reproduce the above copyright
     17       1.1    cjep  *    notice, this list of conditions and the following disclaimer in the
     18       1.1    cjep  *    documentation and/or other materials provided with the distribution.
     19       1.1    cjep  *
     20       1.1    cjep  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     21       1.1    cjep  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22       1.1    cjep  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23       1.1    cjep  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     24       1.1    cjep  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     25       1.1    cjep  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     26       1.1    cjep  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     27       1.1    cjep  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     28       1.1    cjep  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     29       1.1    cjep  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     30       1.1    cjep  * SUCH DAMAGE.
     31       1.1    cjep  */
     32       1.1    cjep 
     33       1.2  rillig #include <sys/cdefs.h>
     34  1.3.24.1  bouyer __RCSID("$NetBSD: file.c,v 1.3.24.1 2011/02/17 12:00:55 bouyer Exp $");
     35       1.2  rillig 
     36       1.1    cjep #include <sys/param.h>
     37  1.3.24.1  bouyer #include <sys/types.h>
     38  1.3.24.1  bouyer #include <sys/stat.h>
     39       1.1    cjep 
     40  1.3.24.1  bouyer #include <bzlib.h>
     41       1.1    cjep #include <err.h>
     42  1.3.24.1  bouyer #include <errno.h>
     43  1.3.24.1  bouyer #include <fcntl.h>
     44  1.3.24.1  bouyer #include <stddef.h>
     45       1.1    cjep #include <stdlib.h>
     46  1.3.24.1  bouyer #include <string.h>
     47  1.3.24.1  bouyer #include <unistd.h>
     48  1.3.24.1  bouyer #include <wchar.h>
     49  1.3.24.1  bouyer #include <wctype.h>
     50  1.3.24.1  bouyer #include <zlib.h>
     51       1.1    cjep 
     52       1.1    cjep #include "grep.h"
     53       1.1    cjep 
     54  1.3.24.1  bouyer #define	MAXBUFSIZ	(32 * 1024)
     55  1.3.24.1  bouyer #define	LNBUFBUMP	80
     56  1.3.24.1  bouyer 
     57  1.3.24.1  bouyer static gzFile gzbufdesc;
     58  1.3.24.1  bouyer static BZFILE* bzbufdesc;
     59  1.3.24.1  bouyer 
     60  1.3.24.1  bouyer static unsigned char buffer[MAXBUFSIZ];
     61  1.3.24.1  bouyer static unsigned char *bufpos;
     62  1.3.24.1  bouyer static size_t bufrem;
     63  1.3.24.1  bouyer 
     64  1.3.24.1  bouyer static unsigned char *lnbuf;
     65       1.2  rillig static size_t lnbuflen;
     66       1.1    cjep 
     67  1.3.24.1  bouyer static inline int
     68  1.3.24.1  bouyer grep_refill(struct file *f)
     69  1.3.24.1  bouyer {
     70  1.3.24.1  bouyer 	ssize_t nr;
     71  1.3.24.1  bouyer 	int bzerr;
     72  1.3.24.1  bouyer 
     73  1.3.24.1  bouyer 	bufpos = buffer;
     74  1.3.24.1  bouyer 	bufrem = 0;
     75  1.3.24.1  bouyer 
     76  1.3.24.1  bouyer 	if (filebehave == FILE_GZIP)
     77  1.3.24.1  bouyer 		nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
     78  1.3.24.1  bouyer 	else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
     79  1.3.24.1  bouyer 		nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
     80  1.3.24.1  bouyer 		switch (bzerr) {
     81  1.3.24.1  bouyer 		case BZ_OK:
     82  1.3.24.1  bouyer 		case BZ_STREAM_END:
     83  1.3.24.1  bouyer 			/* No problem, nr will be okay */
     84       1.2  rillig 			break;
     85  1.3.24.1  bouyer 		case BZ_DATA_ERROR_MAGIC:
     86  1.3.24.1  bouyer 			/*
     87  1.3.24.1  bouyer 			 * As opposed to gzread(), which simply returns the
     88  1.3.24.1  bouyer 			 * plain file data, if it is not in the correct
     89  1.3.24.1  bouyer 			 * compressed format, BZ2_bzRead() instead aborts.
     90  1.3.24.1  bouyer 			 *
     91  1.3.24.1  bouyer 			 * So, just restart at the beginning of the file again,
     92  1.3.24.1  bouyer 			 * and use plain reads from now on.
     93  1.3.24.1  bouyer 			 */
     94  1.3.24.1  bouyer 			BZ2_bzReadClose(&bzerr, bzbufdesc);
     95  1.3.24.1  bouyer 			bzbufdesc = NULL;
     96  1.3.24.1  bouyer 			if (lseek(f->fd, 0, SEEK_SET) == -1)
     97  1.3.24.1  bouyer 				return (-1);
     98  1.3.24.1  bouyer 			nr = read(f->fd, buffer, MAXBUFSIZ);
     99       1.1    cjep 			break;
    100  1.3.24.1  bouyer 		default:
    101  1.3.24.1  bouyer 			/* Make sure we exit with an error */
    102  1.3.24.1  bouyer 			nr = -1;
    103       1.1    cjep 		}
    104  1.3.24.1  bouyer 	} else
    105  1.3.24.1  bouyer 		nr = read(f->fd, buffer, MAXBUFSIZ);
    106       1.1    cjep 
    107  1.3.24.1  bouyer 	if (nr < 0)
    108  1.3.24.1  bouyer 		return (-1);
    109  1.3.24.1  bouyer 
    110  1.3.24.1  bouyer 	bufrem = nr;
    111  1.3.24.1  bouyer 	return (0);
    112       1.1    cjep }
    113       1.1    cjep 
    114  1.3.24.1  bouyer static inline int
    115  1.3.24.1  bouyer grep_lnbufgrow(size_t newlen)
    116       1.1    cjep {
    117       1.1    cjep 
    118  1.3.24.1  bouyer 	if (lnbuflen < newlen) {
    119  1.3.24.1  bouyer 		lnbuf = grep_realloc(lnbuf, newlen);
    120  1.3.24.1  bouyer 		lnbuflen = newlen;
    121  1.3.24.1  bouyer 	}
    122       1.2  rillig 
    123  1.3.24.1  bouyer 	return (0);
    124  1.3.24.1  bouyer }
    125       1.2  rillig 
    126  1.3.24.1  bouyer char *
    127  1.3.24.1  bouyer grep_fgetln(struct file *f, size_t *lenp)
    128  1.3.24.1  bouyer {
    129  1.3.24.1  bouyer 	unsigned char *p;
    130  1.3.24.1  bouyer 	char *ret;
    131  1.3.24.1  bouyer 	size_t len;
    132  1.3.24.1  bouyer 	size_t off;
    133  1.3.24.1  bouyer 	ptrdiff_t diff;
    134  1.3.24.1  bouyer 
    135  1.3.24.1  bouyer 	/* Fill the buffer, if necessary */
    136  1.3.24.1  bouyer 	if (bufrem == 0 && grep_refill(f) != 0)
    137  1.3.24.1  bouyer 		goto error;
    138  1.3.24.1  bouyer 
    139  1.3.24.1  bouyer 	if (bufrem == 0) {
    140  1.3.24.1  bouyer 		/* Return zero length to indicate EOF */
    141  1.3.24.1  bouyer 		*lenp = 0;
    142  1.3.24.1  bouyer 		return ((char *)bufpos);
    143  1.3.24.1  bouyer 	}
    144  1.3.24.1  bouyer 
    145  1.3.24.1  bouyer 	/* Look for a newline in the remaining part of the buffer */
    146  1.3.24.1  bouyer 	if ((p = memchr(bufpos, '\n', bufrem)) != NULL) {
    147  1.3.24.1  bouyer 		++p; /* advance over newline */
    148  1.3.24.1  bouyer 		ret = (char *)bufpos;
    149  1.3.24.1  bouyer 		len = p - bufpos;
    150  1.3.24.1  bouyer 		bufrem -= len;
    151  1.3.24.1  bouyer 		bufpos = p;
    152  1.3.24.1  bouyer 		*lenp = len;
    153  1.3.24.1  bouyer 		return (ret);
    154  1.3.24.1  bouyer 	}
    155  1.3.24.1  bouyer 
    156  1.3.24.1  bouyer 	/* We have to copy the current buffered data to the line buffer */
    157  1.3.24.1  bouyer 	for (len = bufrem, off = 0; ; len += bufrem) {
    158  1.3.24.1  bouyer 		/* Make sure there is room for more data */
    159  1.3.24.1  bouyer 		if (grep_lnbufgrow(len + LNBUFBUMP))
    160  1.3.24.1  bouyer 			goto error;
    161  1.3.24.1  bouyer 		memcpy(lnbuf + off, bufpos, len - off);
    162  1.3.24.1  bouyer 		off = len;
    163  1.3.24.1  bouyer 		if (grep_refill(f) != 0)
    164  1.3.24.1  bouyer 			goto error;
    165  1.3.24.1  bouyer 		if (bufrem == 0)
    166  1.3.24.1  bouyer 			/* EOF: return partial line */
    167  1.3.24.1  bouyer 			break;
    168  1.3.24.1  bouyer 		if ((p = memchr(bufpos, '\n', bufrem)) == NULL)
    169  1.3.24.1  bouyer 			continue;
    170  1.3.24.1  bouyer 		/* got it: finish up the line (like code above) */
    171  1.3.24.1  bouyer 		++p;
    172  1.3.24.1  bouyer 		diff = p - bufpos;
    173  1.3.24.1  bouyer 		len += diff;
    174  1.3.24.1  bouyer 		if (grep_lnbufgrow(len))
    175  1.3.24.1  bouyer 		    goto error;
    176  1.3.24.1  bouyer 		memcpy(lnbuf + off, bufpos, diff);
    177  1.3.24.1  bouyer 		bufrem -= diff;
    178  1.3.24.1  bouyer 		bufpos = p;
    179  1.3.24.1  bouyer 		break;
    180       1.1    cjep 	}
    181  1.3.24.1  bouyer 	*lenp = len;
    182  1.3.24.1  bouyer 	return ((char *)lnbuf);
    183       1.2  rillig 
    184  1.3.24.1  bouyer error:
    185  1.3.24.1  bouyer 	*lenp = 0;
    186  1.3.24.1  bouyer 	return (NULL);
    187       1.1    cjep }
    188       1.1    cjep 
    189  1.3.24.1  bouyer static inline struct file *
    190  1.3.24.1  bouyer grep_file_init(struct file *f)
    191       1.1    cjep {
    192       1.1    cjep 
    193  1.3.24.1  bouyer 	if (filebehave == FILE_GZIP &&
    194  1.3.24.1  bouyer 	    (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
    195  1.3.24.1  bouyer 		goto error;
    196       1.2  rillig 
    197  1.3.24.1  bouyer 	if (filebehave == FILE_BZIP &&
    198  1.3.24.1  bouyer 	    (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
    199  1.3.24.1  bouyer 		goto error;
    200       1.2  rillig 
    201  1.3.24.1  bouyer 	/* Fill read buffer, also catches errors early */
    202  1.3.24.1  bouyer 	if (grep_refill(f) != 0)
    203  1.3.24.1  bouyer 		goto error;
    204  1.3.24.1  bouyer 
    205  1.3.24.1  bouyer 	/* Check for binary stuff, if necessary */
    206  1.3.24.1  bouyer 	if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
    207  1.3.24.1  bouyer 		f->binary = true;
    208       1.2  rillig 
    209  1.3.24.1  bouyer 	return (f);
    210  1.3.24.1  bouyer error:
    211  1.3.24.1  bouyer 	close(f->fd);
    212       1.1    cjep 	free(f);
    213  1.3.24.1  bouyer 	return (NULL);
    214       1.1    cjep }
    215       1.1    cjep 
    216  1.3.24.1  bouyer /*
    217  1.3.24.1  bouyer  * Opens a file for processing.
    218  1.3.24.1  bouyer  */
    219  1.3.24.1  bouyer struct file *
    220  1.3.24.1  bouyer grep_open(const char *path)
    221       1.1    cjep {
    222  1.3.24.1  bouyer 	struct file *f;
    223       1.1    cjep 
    224  1.3.24.1  bouyer 	f = grep_malloc(sizeof *f);
    225  1.3.24.1  bouyer 	memset(f, 0, sizeof *f);
    226  1.3.24.1  bouyer 	if (path == NULL) {
    227  1.3.24.1  bouyer 		/* Processing stdin implies --line-buffered. */
    228  1.3.24.1  bouyer 		lbflag = true;
    229  1.3.24.1  bouyer 		f->fd = STDIN_FILENO;
    230  1.3.24.1  bouyer 	} else if ((f->fd = open(path, O_RDONLY)) == -1) {
    231  1.3.24.1  bouyer 		free(f);
    232  1.3.24.1  bouyer 		return (NULL);
    233       1.1    cjep 	}
    234  1.3.24.1  bouyer 
    235  1.3.24.1  bouyer 	return (grep_file_init(f));
    236       1.1    cjep }
    237       1.1    cjep 
    238  1.3.24.1  bouyer /*
    239  1.3.24.1  bouyer  * Closes a file.
    240  1.3.24.1  bouyer  */
    241       1.1    cjep void
    242  1.3.24.1  bouyer grep_close(struct file *f)
    243       1.1    cjep {
    244  1.3.24.1  bouyer 
    245  1.3.24.1  bouyer 	close(f->fd);
    246  1.3.24.1  bouyer 
    247  1.3.24.1  bouyer 	/* Reset read buffer and line buffer */
    248  1.3.24.1  bouyer 	bufpos = buffer;
    249  1.3.24.1  bouyer 	bufrem = 0;
    250  1.3.24.1  bouyer 
    251  1.3.24.1  bouyer 	free(lnbuf);
    252  1.3.24.1  bouyer 	lnbuf = NULL;
    253  1.3.24.1  bouyer 	lnbuflen = 0;
    254       1.1    cjep }
    255