file.c revision 1.7.44.1 1 /* $NetBSD: file.c,v 1.7.44.1 2019/06/10 22:10:20 christos Exp $ */
2 /* $FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $ */
3 /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
4
5 /*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
8 * Copyright (C) 2010 Dimitry Andric <dimitry (at) andric.com>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #if HAVE_NBTOOL_CONFIG_H
34 #include "nbtool_config.h"
35 #endif
36
37 #include <sys/cdefs.h>
38 __RCSID("$NetBSD: file.c,v 1.7.44.1 2019/06/10 22:10:20 christos Exp $");
39
40 #include <sys/param.h>
41 #include <sys/types.h>
42 #include <sys/stat.h>
43
44 #include <err.h>
45 #include <errno.h>
46 #include <fcntl.h>
47 #include <stddef.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <wchar.h>
52 #include <wctype.h>
53
54 #include "grep.h"
55
56 #define MAXBUFSIZ (32 * 1024)
57 #define LNBUFBUMP 80
58
59 #ifndef WITHOUT_GZIP
60 static gzFile gzbufdesc;
61 #endif
62 #ifndef WITHOUT_BZ2
63 static BZFILE* bzbufdesc;
64 #endif
65
66 static unsigned char buffer[MAXBUFSIZ + 1];
67 static unsigned char *bufpos;
68 static size_t bufrem;
69
70 static unsigned char *lnbuf;
71 static size_t lnbuflen;
72
73 static inline int
74 grep_refill(struct file *f)
75 {
76 ssize_t nr = -1;
77 int bzerr;
78
79 bufpos = buffer;
80 bufrem = 0;
81
82 #ifndef WITHOUT_GZIP
83 if (filebehave == FILE_GZIP) {
84 nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
85 if (nr == -1)
86 return -1;
87 }
88 #endif
89 #ifndef WITHOUT_BZ2
90 if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
91 nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
92 switch (bzerr) {
93 case BZ_OK:
94 case BZ_STREAM_END:
95 /* No problem, nr will be okay */
96 break;
97 case BZ_DATA_ERROR_MAGIC:
98 /*
99 * As opposed to gzread(), which simply returns the
100 * plain file data, if it is not in the correct
101 * compressed format, BZ2_bzRead() instead aborts.
102 *
103 * So, just restart at the beginning of the file again,
104 * and use plain reads from now on.
105 */
106 BZ2_bzReadClose(&bzerr, bzbufdesc);
107 bzbufdesc = NULL;
108 if (lseek(f->fd, 0, SEEK_SET) == -1)
109 return (-1);
110 nr = read(f->fd, buffer, MAXBUFSIZ);
111 break;
112 default:
113 /* Make sure we exit with an error */
114 nr = -1;
115 }
116 if (nr == -1)
117 return -1;
118 }
119 #endif
120 if (nr == -1) {
121 nr = read(f->fd, buffer, MAXBUFSIZ);
122 }
123
124 if (nr < 0)
125 return (-1);
126
127 bufrem = nr;
128 return (0);
129 }
130
131 static inline void
132 grep_lnbufgrow(size_t newlen)
133 {
134
135 if (lnbuflen < newlen) {
136 lnbuf = grep_realloc(lnbuf, newlen);
137 lnbuflen = newlen;
138 }
139 }
140
141 static void
142 grep_copyline(size_t off, size_t len)
143 {
144 memcpy(lnbuf + off, bufpos, len);
145 lnbuf[off + len] = '\0';
146 }
147
148 char *
149 grep_fgetln(struct file *f, size_t *lenp)
150 {
151 unsigned char *p;
152 size_t len;
153 size_t off;
154 ptrdiff_t diff;
155
156 /* Fill the buffer, if necessary */
157 if (bufrem == 0 && grep_refill(f) != 0)
158 goto error;
159
160 if (bufrem == 0) {
161 /* Return zero length to indicate EOF */
162 *lenp = 0;
163 return ((char *)bufpos);
164 }
165
166 /* Look for a newline in the remaining part of the buffer */
167 if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
168 ++p; /* advance over newline */
169 len = p - bufpos;
170 grep_lnbufgrow(len + 1);
171 grep_copyline(0, len);
172 *lenp = len;
173 bufrem -= len;
174 bufpos = p;
175 return (char *)lnbuf;
176 }
177
178 /* We have to copy the current buffered data to the line buffer */
179 for (len = bufrem, off = 0; ; len += bufrem) {
180 /* Make sure there is room for more data */
181 grep_lnbufgrow(len + LNBUFBUMP);
182 grep_copyline(off, len - off);
183 off = len;
184 if (grep_refill(f) != 0)
185 goto error;
186 if (bufrem == 0)
187 /* EOF: return partial line */
188 break;
189 if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
190 continue;
191 /* got it: finish up the line (like code above) */
192 ++p;
193 diff = p - bufpos;
194 len += diff;
195 grep_lnbufgrow(len + 1);
196 grep_copyline(off, diff);
197 bufrem -= diff;
198 bufpos = p;
199 break;
200 }
201 *lenp = len;
202 return ((char *)lnbuf);
203
204 error:
205 *lenp = 0;
206 return (NULL);
207 }
208
209 static inline struct file *
210 grep_file_init(struct file *f)
211 {
212
213 #ifndef WITHOUT_GZIP
214 if (filebehave == FILE_GZIP &&
215 (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
216 goto error;
217 #endif
218
219 #ifndef WITHOUT_BZ2
220 if (filebehave == FILE_BZIP &&
221 (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
222 goto error;
223 #endif
224
225 /* Fill read buffer, also catches errors early */
226 if (grep_refill(f) != 0)
227 goto error;
228
229 /* Check for binary stuff, if necessary */
230 if (!nulldataflag && binbehave != BINFILE_TEXT &&
231 memchr(bufpos, '\0', bufrem) != NULL)
232 f->binary = true;
233
234 return (f);
235 error:
236 close(f->fd);
237 free(f);
238 return (NULL);
239 }
240
241 /*
242 * Opens a file for processing.
243 */
244 struct file *
245 grep_open(const char *path)
246 {
247 struct file *f;
248
249 f = grep_malloc(sizeof *f);
250 memset(f, 0, sizeof *f);
251 if (path == NULL) {
252 /* Processing stdin implies --line-buffered. */
253 lbflag = true;
254 f->fd = STDIN_FILENO;
255 } else if ((f->fd = open(path, O_RDONLY)) == -1) {
256 free(f);
257 return (NULL);
258 }
259
260 return (grep_file_init(f));
261 }
262
263 /*
264 * Closes a file.
265 */
266 void
267 grep_close(struct file *f)
268 {
269
270 close(f->fd);
271
272 /* Reset read buffer and line buffer */
273 bufpos = buffer;
274 bufrem = 0;
275
276 free(lnbuf);
277 lnbuf = NULL;
278 lnbuflen = 0;
279 }
280