Home | History | Annotate | Line # | Download | only in grep
      1 /*	$NetBSD: file.c,v 1.13 2024/08/14 05:02:19 rin Exp $	*/
      2 /*	$FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $	*/
      3 /*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
      4 
      5 /*-
      6  * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
      7  * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
      8  * Copyright (C) 2010 Dimitry Andric <dimitry (at) andric.com>
      9  * All rights reserved.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     30  * SUCH DAMAGE.
     31  */
     32 
     33 #if HAVE_NBTOOL_CONFIG_H
     34 #include "nbtool_config.h"
     35 #endif
     36 
     37 #include <sys/cdefs.h>
     38 __RCSID("$NetBSD: file.c,v 1.13 2024/08/14 05:02:19 rin Exp $");
     39 
     40 #include <sys/param.h>
     41 #include <sys/types.h>
     42 #include <sys/stat.h>
     43 
     44 #include <err.h>
     45 #include <errno.h>
     46 #include <fcntl.h>
     47 #include <stddef.h>
     48 #include <stdlib.h>
     49 #include <string.h>
     50 #include <unistd.h>
     51 #include <wchar.h>
     52 #include <wctype.h>
     53 
     54 #include "grep.h"
     55 
     56 #define	MAXBUFSIZ	(32 * 1024)
     57 #define	LNBUFBUMP	80
     58 
     59 #ifndef WITHOUT_GZIP
     60 static gzFile gzbufdesc;
     61 #endif
     62 #ifndef WITHOUT_BZ2
     63 static BZFILE* bzbufdesc;
     64 #endif
     65 
     66 static unsigned char buffer[MAXBUFSIZ + 1];
     67 static unsigned char *bufpos;
     68 static size_t bufrem;
     69 
     70 static unsigned char *lnbuf;
     71 static size_t lnbuflen;
     72 
     73 static inline int
     74 grep_refill(struct file *f)
     75 {
     76 	ssize_t nr = -1;
     77 #ifndef WITHOUT_BZ2
     78 	int bzerr;
     79 #endif
     80 
     81 	bufpos = buffer;
     82 	bufrem = 0;
     83 
     84 #ifndef WITHOUT_GZIP
     85 	if (filebehave == FILE_GZIP) {
     86 		nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
     87 		if (nr == -1)
     88 			return -1;
     89 	}
     90 #endif
     91 #ifndef WITHOUT_BZ2
     92 	if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
     93 		nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
     94 		switch (bzerr) {
     95 		case BZ_OK:
     96 		case BZ_STREAM_END:
     97 			/* No problem, nr will be okay */
     98 			break;
     99 		case BZ_DATA_ERROR_MAGIC:
    100 			/*
    101 			 * As opposed to gzread(), which simply returns the
    102 			 * plain file data, if it is not in the correct
    103 			 * compressed format, BZ2_bzRead() instead aborts.
    104 			 *
    105 			 * So, just restart at the beginning of the file again,
    106 			 * and use plain reads from now on.
    107 			 */
    108 			BZ2_bzReadClose(&bzerr, bzbufdesc);
    109 			bzbufdesc = NULL;
    110 			if (lseek(f->fd, 0, SEEK_SET) == -1)
    111 				return (-1);
    112 			nr = read(f->fd, buffer, MAXBUFSIZ);
    113 			break;
    114 		default:
    115 			/* Make sure we exit with an error */
    116 			nr = -1;
    117 		}
    118 		if (nr == -1)
    119 			return -1;
    120 	}
    121 #endif
    122 	if (nr == -1) {
    123 		nr = read(f->fd, buffer, MAXBUFSIZ);
    124 	}
    125 
    126 	if (nr < 0)
    127 		return (-1);
    128 
    129 	bufrem = nr;
    130 	return (0);
    131 }
    132 
    133 static inline void
    134 grep_lnbufgrow(size_t newlen)
    135 {
    136 
    137 	if (lnbuflen < newlen) {
    138 		lnbuf = grep_realloc(lnbuf, newlen);
    139 		lnbuflen = newlen;
    140 	}
    141 }
    142 
    143 static void
    144 grep_copyline(size_t off, size_t len)
    145 {
    146 	memcpy(lnbuf + off, bufpos, len);
    147 	lnbuf[off + len] = '\0';
    148 }
    149 
    150 char *
    151 grep_fgetln(struct file *f, size_t *lenp)
    152 {
    153 	unsigned char *p;
    154 	size_t len;
    155 	size_t off;
    156 	ptrdiff_t diff;
    157 
    158 	/* Fill the buffer, if necessary */
    159 	if (bufrem == 0 && grep_refill(f) != 0)
    160 		goto error;
    161 
    162 	if (bufrem == 0) {
    163 		/* Return zero length to indicate EOF */
    164 		*lenp = 0;
    165 		return ((char *)bufpos);
    166 	}
    167 
    168 	/* Look for a newline in the remaining part of the buffer */
    169 	if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
    170 		++p; /* advance over newline */
    171 		len = p - bufpos;
    172 		grep_lnbufgrow(len + 1);
    173 		grep_copyline(0, len);
    174 		*lenp = len;
    175 		bufrem -= len;
    176 		bufpos = p;
    177 		return (char *)lnbuf;
    178 	}
    179 
    180 	/* We have to copy the current buffered data to the line buffer */
    181 	for (len = bufrem, off = 0; ; len += bufrem) {
    182 		/* Make sure there is room for more data */
    183 		grep_lnbufgrow(len + LNBUFBUMP);
    184 		grep_copyline(off, len - off);
    185 		off = len;
    186 		if (grep_refill(f) != 0)
    187 			goto error;
    188 		if (bufrem == 0)
    189 			/* EOF: return partial line */
    190 			break;
    191 		if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
    192 			continue;
    193 		/* got it: finish up the line (like code above) */
    194 		++p;
    195 		diff = p - bufpos;
    196 		len += diff;
    197 		grep_lnbufgrow(len + 1);
    198 		grep_copyline(off, diff);
    199 		bufrem -= diff;
    200 		bufpos = p;
    201 		break;
    202 	}
    203 	*lenp = len;
    204 	return ((char *)lnbuf);
    205 
    206 error:
    207 	*lenp = 0;
    208 	return (NULL);
    209 }
    210 
    211 static inline struct file *
    212 grep_file_init(struct file *f)
    213 {
    214 
    215 #ifndef WITHOUT_GZIP
    216 	if (filebehave == FILE_GZIP &&
    217 	    (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
    218 		goto error;
    219 #endif
    220 
    221 #ifndef WITHOUT_BZ2
    222 	if (filebehave == FILE_BZIP &&
    223 	    (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
    224 		goto error;
    225 #endif
    226 
    227 	/* Fill read buffer, also catches errors early */
    228 	if (grep_refill(f) != 0)
    229 		goto error;
    230 
    231 	/* Check for binary stuff, if necessary */
    232 	if (!nulldataflag && binbehave != BINFILE_TEXT &&
    233 	    memchr(bufpos, '\0', bufrem) != NULL)
    234 		f->binary = true;
    235 
    236 	return (f);
    237 error:
    238 	close(f->fd);
    239 	free(f);
    240 	return (NULL);
    241 }
    242 
    243 /*
    244  * Opens a file for processing.
    245  */
    246 struct file *
    247 grep_open(const char *path)
    248 {
    249 	struct file *f;
    250 
    251 	f = grep_malloc(sizeof *f);
    252 	memset(f, 0, sizeof *f);
    253 	if (path == NULL) {
    254 		/* Processing stdin implies --line-buffered. */
    255 		lbflag = true;
    256 		f->fd = STDIN_FILENO;
    257 	} else if ((f->fd = open(path, O_RDONLY)) == -1) {
    258 		free(f);
    259 		return (NULL);
    260 	}
    261 
    262 	return (grep_file_init(f));
    263 }
    264 
    265 /*
    266  * Closes a file.
    267  */
    268 void
    269 grep_close(struct file *f)
    270 {
    271 
    272 	close(f->fd);
    273 
    274 	/* Reset read buffer and line buffer */
    275 	bufpos = buffer;
    276 	bufrem = 0;
    277 
    278 	free(lnbuf);
    279 	lnbuf = NULL;
    280 	lnbuflen = 0;
    281 }
    282