Home | History | Annotate | Line # | Download | only in grep
file.c revision 1.9
      1  1.9  christos /*	$NetBSD: file.c,v 1.9 2018/08/12 07:53:19 christos Exp $	*/
      2  1.4     joerg /*	$FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $	*/
      3  1.4     joerg /*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
      4  1.2    rillig 
      5  1.1      cjep /*-
      6  1.4     joerg  * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
      7  1.4     joerg  * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
      8  1.4     joerg  * Copyright (C) 2010 Dimitry Andric <dimitry (at) andric.com>
      9  1.1      cjep  * All rights reserved.
     10  1.1      cjep  *
     11  1.1      cjep  * Redistribution and use in source and binary forms, with or without
     12  1.1      cjep  * modification, are permitted provided that the following conditions
     13  1.1      cjep  * are met:
     14  1.1      cjep  * 1. Redistributions of source code must retain the above copyright
     15  1.1      cjep  *    notice, this list of conditions and the following disclaimer.
     16  1.1      cjep  * 2. Redistributions in binary form must reproduce the above copyright
     17  1.1      cjep  *    notice, this list of conditions and the following disclaimer in the
     18  1.1      cjep  *    documentation and/or other materials provided with the distribution.
     19  1.1      cjep  *
     20  1.1      cjep  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     21  1.1      cjep  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22  1.1      cjep  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23  1.1      cjep  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     24  1.1      cjep  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     25  1.1      cjep  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     26  1.1      cjep  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     27  1.1      cjep  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     28  1.1      cjep  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     29  1.1      cjep  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     30  1.1      cjep  * SUCH DAMAGE.
     31  1.1      cjep  */
     32  1.1      cjep 
     33  1.6     joerg #if HAVE_NBTOOL_CONFIG_H
     34  1.6     joerg #include "nbtool_config.h"
     35  1.6     joerg #endif
     36  1.6     joerg 
     37  1.2    rillig #include <sys/cdefs.h>
     38  1.9  christos __RCSID("$NetBSD: file.c,v 1.9 2018/08/12 07:53:19 christos Exp $");
     39  1.2    rillig 
     40  1.1      cjep #include <sys/param.h>
     41  1.4     joerg #include <sys/types.h>
     42  1.4     joerg #include <sys/stat.h>
     43  1.1      cjep 
     44  1.1      cjep #include <err.h>
     45  1.4     joerg #include <errno.h>
     46  1.4     joerg #include <fcntl.h>
     47  1.4     joerg #include <stddef.h>
     48  1.1      cjep #include <stdlib.h>
     49  1.4     joerg #include <string.h>
     50  1.4     joerg #include <unistd.h>
     51  1.4     joerg #include <wchar.h>
     52  1.4     joerg #include <wctype.h>
     53  1.1      cjep 
     54  1.1      cjep #include "grep.h"
     55  1.1      cjep 
     56  1.4     joerg #define	MAXBUFSIZ	(32 * 1024)
     57  1.4     joerg #define	LNBUFBUMP	80
     58  1.4     joerg 
     59  1.4     joerg static gzFile gzbufdesc;
     60  1.9  christos #ifndef WITHOUT_BZ2
     61  1.4     joerg static BZFILE* bzbufdesc;
     62  1.9  christos #endif
     63  1.4     joerg 
     64  1.4     joerg static unsigned char buffer[MAXBUFSIZ];
     65  1.4     joerg static unsigned char *bufpos;
     66  1.4     joerg static size_t bufrem;
     67  1.4     joerg 
     68  1.4     joerg static unsigned char *lnbuf;
     69  1.2    rillig static size_t lnbuflen;
     70  1.1      cjep 
     71  1.4     joerg static inline int
     72  1.4     joerg grep_refill(struct file *f)
     73  1.4     joerg {
     74  1.4     joerg 	ssize_t nr;
     75  1.4     joerg 	int bzerr;
     76  1.4     joerg 
     77  1.4     joerg 	bufpos = buffer;
     78  1.4     joerg 	bufrem = 0;
     79  1.4     joerg 
     80  1.9  christos 	if (filebehave == FILE_GZIP) {
     81  1.4     joerg 		nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
     82  1.9  christos #ifndef WITHOUT_BZ2
     83  1.9  christos 	} else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
     84  1.4     joerg 		nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
     85  1.4     joerg 		switch (bzerr) {
     86  1.4     joerg 		case BZ_OK:
     87  1.4     joerg 		case BZ_STREAM_END:
     88  1.4     joerg 			/* No problem, nr will be okay */
     89  1.2    rillig 			break;
     90  1.4     joerg 		case BZ_DATA_ERROR_MAGIC:
     91  1.4     joerg 			/*
     92  1.4     joerg 			 * As opposed to gzread(), which simply returns the
     93  1.4     joerg 			 * plain file data, if it is not in the correct
     94  1.4     joerg 			 * compressed format, BZ2_bzRead() instead aborts.
     95  1.4     joerg 			 *
     96  1.4     joerg 			 * So, just restart at the beginning of the file again,
     97  1.4     joerg 			 * and use plain reads from now on.
     98  1.4     joerg 			 */
     99  1.4     joerg 			BZ2_bzReadClose(&bzerr, bzbufdesc);
    100  1.4     joerg 			bzbufdesc = NULL;
    101  1.4     joerg 			if (lseek(f->fd, 0, SEEK_SET) == -1)
    102  1.4     joerg 				return (-1);
    103  1.4     joerg 			nr = read(f->fd, buffer, MAXBUFSIZ);
    104  1.1      cjep 			break;
    105  1.4     joerg 		default:
    106  1.4     joerg 			/* Make sure we exit with an error */
    107  1.4     joerg 			nr = -1;
    108  1.1      cjep 		}
    109  1.9  christos #endif
    110  1.4     joerg 	} else
    111  1.4     joerg 		nr = read(f->fd, buffer, MAXBUFSIZ);
    112  1.4     joerg 
    113  1.4     joerg 	if (nr < 0)
    114  1.4     joerg 		return (-1);
    115  1.1      cjep 
    116  1.4     joerg 	bufrem = nr;
    117  1.4     joerg 	return (0);
    118  1.1      cjep }
    119  1.1      cjep 
    120  1.4     joerg static inline int
    121  1.4     joerg grep_lnbufgrow(size_t newlen)
    122  1.1      cjep {
    123  1.1      cjep 
    124  1.4     joerg 	if (lnbuflen < newlen) {
    125  1.4     joerg 		lnbuf = grep_realloc(lnbuf, newlen);
    126  1.4     joerg 		lnbuflen = newlen;
    127  1.4     joerg 	}
    128  1.2    rillig 
    129  1.4     joerg 	return (0);
    130  1.4     joerg }
    131  1.2    rillig 
    132  1.4     joerg char *
    133  1.4     joerg grep_fgetln(struct file *f, size_t *lenp)
    134  1.4     joerg {
    135  1.4     joerg 	unsigned char *p;
    136  1.4     joerg 	char *ret;
    137  1.4     joerg 	size_t len;
    138  1.4     joerg 	size_t off;
    139  1.4     joerg 	ptrdiff_t diff;
    140  1.4     joerg 
    141  1.4     joerg 	/* Fill the buffer, if necessary */
    142  1.4     joerg 	if (bufrem == 0 && grep_refill(f) != 0)
    143  1.4     joerg 		goto error;
    144  1.4     joerg 
    145  1.4     joerg 	if (bufrem == 0) {
    146  1.4     joerg 		/* Return zero length to indicate EOF */
    147  1.4     joerg 		*lenp = 0;
    148  1.5     joerg 		return ((char *)bufpos);
    149  1.4     joerg 	}
    150  1.4     joerg 
    151  1.4     joerg 	/* Look for a newline in the remaining part of the buffer */
    152  1.7     joerg 	if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
    153  1.4     joerg 		++p; /* advance over newline */
    154  1.5     joerg 		ret = (char *)bufpos;
    155  1.4     joerg 		len = p - bufpos;
    156  1.4     joerg 		bufrem -= len;
    157  1.4     joerg 		bufpos = p;
    158  1.4     joerg 		*lenp = len;
    159  1.4     joerg 		return (ret);
    160  1.4     joerg 	}
    161  1.4     joerg 
    162  1.4     joerg 	/* We have to copy the current buffered data to the line buffer */
    163  1.4     joerg 	for (len = bufrem, off = 0; ; len += bufrem) {
    164  1.4     joerg 		/* Make sure there is room for more data */
    165  1.4     joerg 		if (grep_lnbufgrow(len + LNBUFBUMP))
    166  1.4     joerg 			goto error;
    167  1.4     joerg 		memcpy(lnbuf + off, bufpos, len - off);
    168  1.4     joerg 		off = len;
    169  1.4     joerg 		if (grep_refill(f) != 0)
    170  1.4     joerg 			goto error;
    171  1.4     joerg 		if (bufrem == 0)
    172  1.4     joerg 			/* EOF: return partial line */
    173  1.4     joerg 			break;
    174  1.7     joerg 		if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
    175  1.4     joerg 			continue;
    176  1.4     joerg 		/* got it: finish up the line (like code above) */
    177  1.4     joerg 		++p;
    178  1.4     joerg 		diff = p - bufpos;
    179  1.4     joerg 		len += diff;
    180  1.4     joerg 		if (grep_lnbufgrow(len))
    181  1.4     joerg 		    goto error;
    182  1.4     joerg 		memcpy(lnbuf + off, bufpos, diff);
    183  1.4     joerg 		bufrem -= diff;
    184  1.4     joerg 		bufpos = p;
    185  1.4     joerg 		break;
    186  1.1      cjep 	}
    187  1.4     joerg 	*lenp = len;
    188  1.5     joerg 	return ((char *)lnbuf);
    189  1.2    rillig 
    190  1.4     joerg error:
    191  1.4     joerg 	*lenp = 0;
    192  1.4     joerg 	return (NULL);
    193  1.1      cjep }
    194  1.1      cjep 
    195  1.4     joerg static inline struct file *
    196  1.4     joerg grep_file_init(struct file *f)
    197  1.1      cjep {
    198  1.1      cjep 
    199  1.4     joerg 	if (filebehave == FILE_GZIP &&
    200  1.4     joerg 	    (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
    201  1.4     joerg 		goto error;
    202  1.4     joerg 
    203  1.9  christos #ifndef WITHOUT_BZ2
    204  1.4     joerg 	if (filebehave == FILE_BZIP &&
    205  1.4     joerg 	    (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
    206  1.4     joerg 		goto error;
    207  1.9  christos #endif
    208  1.2    rillig 
    209  1.4     joerg 	/* Fill read buffer, also catches errors early */
    210  1.4     joerg 	if (grep_refill(f) != 0)
    211  1.4     joerg 		goto error;
    212  1.2    rillig 
    213  1.4     joerg 	/* Check for binary stuff, if necessary */
    214  1.7     joerg 	if (!nulldataflag && binbehave != BINFILE_TEXT &&
    215  1.7     joerg 	    memchr(bufpos, '\0', bufrem) != NULL)
    216  1.4     joerg 		f->binary = true;
    217  1.2    rillig 
    218  1.4     joerg 	return (f);
    219  1.4     joerg error:
    220  1.4     joerg 	close(f->fd);
    221  1.1      cjep 	free(f);
    222  1.4     joerg 	return (NULL);
    223  1.1      cjep }
    224  1.1      cjep 
    225  1.4     joerg /*
    226  1.4     joerg  * Opens a file for processing.
    227  1.4     joerg  */
    228  1.4     joerg struct file *
    229  1.4     joerg grep_open(const char *path)
    230  1.1      cjep {
    231  1.4     joerg 	struct file *f;
    232  1.4     joerg 
    233  1.4     joerg 	f = grep_malloc(sizeof *f);
    234  1.4     joerg 	memset(f, 0, sizeof *f);
    235  1.4     joerg 	if (path == NULL) {
    236  1.4     joerg 		/* Processing stdin implies --line-buffered. */
    237  1.4     joerg 		lbflag = true;
    238  1.4     joerg 		f->fd = STDIN_FILENO;
    239  1.4     joerg 	} else if ((f->fd = open(path, O_RDONLY)) == -1) {
    240  1.4     joerg 		free(f);
    241  1.4     joerg 		return (NULL);
    242  1.1      cjep 	}
    243  1.1      cjep 
    244  1.4     joerg 	return (grep_file_init(f));
    245  1.1      cjep }
    246  1.1      cjep 
    247  1.4     joerg /*
    248  1.4     joerg  * Closes a file.
    249  1.4     joerg  */
    250  1.1      cjep void
    251  1.4     joerg grep_close(struct file *f)
    252  1.1      cjep {
    253  1.4     joerg 
    254  1.4     joerg 	close(f->fd);
    255  1.4     joerg 
    256  1.4     joerg 	/* Reset read buffer and line buffer */
    257  1.4     joerg 	bufpos = buffer;
    258  1.4     joerg 	bufrem = 0;
    259  1.4     joerg 
    260  1.4     joerg 	free(lnbuf);
    261  1.4     joerg 	lnbuf = NULL;
    262  1.4     joerg 	lnbuflen = 0;
    263  1.1      cjep }
    264