Home | History | Annotate | Line # | Download | only in grep
file.c revision 1.5
      1 /*	$NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $	*/
      2 /*	$FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $	*/
      3 /*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
      4 
      5 /*-
      6  * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
      7  * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
      8  * Copyright (C) 2010 Dimitry Andric <dimitry (at) andric.com>
      9  * All rights reserved.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     30  * SUCH DAMAGE.
     31  */
     32 
     33 #include <sys/cdefs.h>
     34 __RCSID("$NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $");
     35 
     36 #include <sys/param.h>
     37 #include <sys/types.h>
     38 #include <sys/stat.h>
     39 
     40 #include <bzlib.h>
     41 #include <err.h>
     42 #include <errno.h>
     43 #include <fcntl.h>
     44 #include <stddef.h>
     45 #include <stdlib.h>
     46 #include <string.h>
     47 #include <unistd.h>
     48 #include <wchar.h>
     49 #include <wctype.h>
     50 #include <zlib.h>
     51 
     52 #include "grep.h"
     53 
     54 #define	MAXBUFSIZ	(32 * 1024)
     55 #define	LNBUFBUMP	80
     56 
     57 static gzFile gzbufdesc;
     58 static BZFILE* bzbufdesc;
     59 
     60 static unsigned char buffer[MAXBUFSIZ];
     61 static unsigned char *bufpos;
     62 static size_t bufrem;
     63 
     64 static unsigned char *lnbuf;
     65 static size_t lnbuflen;
     66 
     67 static inline int
     68 grep_refill(struct file *f)
     69 {
     70 	ssize_t nr;
     71 	int bzerr;
     72 
     73 	bufpos = buffer;
     74 	bufrem = 0;
     75 
     76 	if (filebehave == FILE_GZIP)
     77 		nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
     78 	else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
     79 		nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
     80 		switch (bzerr) {
     81 		case BZ_OK:
     82 		case BZ_STREAM_END:
     83 			/* No problem, nr will be okay */
     84 			break;
     85 		case BZ_DATA_ERROR_MAGIC:
     86 			/*
     87 			 * As opposed to gzread(), which simply returns the
     88 			 * plain file data, if it is not in the correct
     89 			 * compressed format, BZ2_bzRead() instead aborts.
     90 			 *
     91 			 * So, just restart at the beginning of the file again,
     92 			 * and use plain reads from now on.
     93 			 */
     94 			BZ2_bzReadClose(&bzerr, bzbufdesc);
     95 			bzbufdesc = NULL;
     96 			if (lseek(f->fd, 0, SEEK_SET) == -1)
     97 				return (-1);
     98 			nr = read(f->fd, buffer, MAXBUFSIZ);
     99 			break;
    100 		default:
    101 			/* Make sure we exit with an error */
    102 			nr = -1;
    103 		}
    104 	} else
    105 		nr = read(f->fd, buffer, MAXBUFSIZ);
    106 
    107 	if (nr < 0)
    108 		return (-1);
    109 
    110 	bufrem = nr;
    111 	return (0);
    112 }
    113 
    114 static inline int
    115 grep_lnbufgrow(size_t newlen)
    116 {
    117 
    118 	if (lnbuflen < newlen) {
    119 		lnbuf = grep_realloc(lnbuf, newlen);
    120 		lnbuflen = newlen;
    121 	}
    122 
    123 	return (0);
    124 }
    125 
    126 char *
    127 grep_fgetln(struct file *f, size_t *lenp)
    128 {
    129 	unsigned char *p;
    130 	char *ret;
    131 	size_t len;
    132 	size_t off;
    133 	ptrdiff_t diff;
    134 
    135 	/* Fill the buffer, if necessary */
    136 	if (bufrem == 0 && grep_refill(f) != 0)
    137 		goto error;
    138 
    139 	if (bufrem == 0) {
    140 		/* Return zero length to indicate EOF */
    141 		*lenp = 0;
    142 		return ((char *)bufpos);
    143 	}
    144 
    145 	/* Look for a newline in the remaining part of the buffer */
    146 	if ((p = memchr(bufpos, '\n', bufrem)) != NULL) {
    147 		++p; /* advance over newline */
    148 		ret = (char *)bufpos;
    149 		len = p - bufpos;
    150 		bufrem -= len;
    151 		bufpos = p;
    152 		*lenp = len;
    153 		return (ret);
    154 	}
    155 
    156 	/* We have to copy the current buffered data to the line buffer */
    157 	for (len = bufrem, off = 0; ; len += bufrem) {
    158 		/* Make sure there is room for more data */
    159 		if (grep_lnbufgrow(len + LNBUFBUMP))
    160 			goto error;
    161 		memcpy(lnbuf + off, bufpos, len - off);
    162 		off = len;
    163 		if (grep_refill(f) != 0)
    164 			goto error;
    165 		if (bufrem == 0)
    166 			/* EOF: return partial line */
    167 			break;
    168 		if ((p = memchr(bufpos, '\n', bufrem)) == NULL)
    169 			continue;
    170 		/* got it: finish up the line (like code above) */
    171 		++p;
    172 		diff = p - bufpos;
    173 		len += diff;
    174 		if (grep_lnbufgrow(len))
    175 		    goto error;
    176 		memcpy(lnbuf + off, bufpos, diff);
    177 		bufrem -= diff;
    178 		bufpos = p;
    179 		break;
    180 	}
    181 	*lenp = len;
    182 	return ((char *)lnbuf);
    183 
    184 error:
    185 	*lenp = 0;
    186 	return (NULL);
    187 }
    188 
    189 static inline struct file *
    190 grep_file_init(struct file *f)
    191 {
    192 
    193 	if (filebehave == FILE_GZIP &&
    194 	    (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
    195 		goto error;
    196 
    197 	if (filebehave == FILE_BZIP &&
    198 	    (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
    199 		goto error;
    200 
    201 	/* Fill read buffer, also catches errors early */
    202 	if (grep_refill(f) != 0)
    203 		goto error;
    204 
    205 	/* Check for binary stuff, if necessary */
    206 	if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
    207 		f->binary = true;
    208 
    209 	return (f);
    210 error:
    211 	close(f->fd);
    212 	free(f);
    213 	return (NULL);
    214 }
    215 
    216 /*
    217  * Opens a file for processing.
    218  */
    219 struct file *
    220 grep_open(const char *path)
    221 {
    222 	struct file *f;
    223 
    224 	f = grep_malloc(sizeof *f);
    225 	memset(f, 0, sizeof *f);
    226 	if (path == NULL) {
    227 		/* Processing stdin implies --line-buffered. */
    228 		lbflag = true;
    229 		f->fd = STDIN_FILENO;
    230 	} else if ((f->fd = open(path, O_RDONLY)) == -1) {
    231 		free(f);
    232 		return (NULL);
    233 	}
    234 
    235 	return (grep_file_init(f));
    236 }
    237 
    238 /*
    239  * Closes a file.
    240  */
    241 void
    242 grep_close(struct file *f)
    243 {
    244 
    245 	close(f->fd);
    246 
    247 	/* Reset read buffer and line buffer */
    248 	bufpos = buffer;
    249 	bufrem = 0;
    250 
    251 	free(lnbuf);
    252 	lnbuf = NULL;
    253 	lnbuflen = 0;
    254 }
    255