file.c revision 1.9 1 /* $NetBSD: file.c,v 1.9 2018/08/12 07:53:19 christos Exp $ */
2 /* $FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $ */
3 /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
4
5 /*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
8 * Copyright (C) 2010 Dimitry Andric <dimitry (at) andric.com>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #if HAVE_NBTOOL_CONFIG_H
34 #include "nbtool_config.h"
35 #endif
36
37 #include <sys/cdefs.h>
38 __RCSID("$NetBSD: file.c,v 1.9 2018/08/12 07:53:19 christos Exp $");
39
40 #include <sys/param.h>
41 #include <sys/types.h>
42 #include <sys/stat.h>
43
44 #include <err.h>
45 #include <errno.h>
46 #include <fcntl.h>
47 #include <stddef.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <wchar.h>
52 #include <wctype.h>
53
54 #include "grep.h"
55
56 #define MAXBUFSIZ (32 * 1024)
57 #define LNBUFBUMP 80
58
59 static gzFile gzbufdesc;
60 #ifndef WITHOUT_BZ2
61 static BZFILE* bzbufdesc;
62 #endif
63
64 static unsigned char buffer[MAXBUFSIZ];
65 static unsigned char *bufpos;
66 static size_t bufrem;
67
68 static unsigned char *lnbuf;
69 static size_t lnbuflen;
70
71 static inline int
72 grep_refill(struct file *f)
73 {
74 ssize_t nr;
75 int bzerr;
76
77 bufpos = buffer;
78 bufrem = 0;
79
80 if (filebehave == FILE_GZIP) {
81 nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
82 #ifndef WITHOUT_BZ2
83 } else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
84 nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
85 switch (bzerr) {
86 case BZ_OK:
87 case BZ_STREAM_END:
88 /* No problem, nr will be okay */
89 break;
90 case BZ_DATA_ERROR_MAGIC:
91 /*
92 * As opposed to gzread(), which simply returns the
93 * plain file data, if it is not in the correct
94 * compressed format, BZ2_bzRead() instead aborts.
95 *
96 * So, just restart at the beginning of the file again,
97 * and use plain reads from now on.
98 */
99 BZ2_bzReadClose(&bzerr, bzbufdesc);
100 bzbufdesc = NULL;
101 if (lseek(f->fd, 0, SEEK_SET) == -1)
102 return (-1);
103 nr = read(f->fd, buffer, MAXBUFSIZ);
104 break;
105 default:
106 /* Make sure we exit with an error */
107 nr = -1;
108 }
109 #endif
110 } else
111 nr = read(f->fd, buffer, MAXBUFSIZ);
112
113 if (nr < 0)
114 return (-1);
115
116 bufrem = nr;
117 return (0);
118 }
119
120 static inline int
121 grep_lnbufgrow(size_t newlen)
122 {
123
124 if (lnbuflen < newlen) {
125 lnbuf = grep_realloc(lnbuf, newlen);
126 lnbuflen = newlen;
127 }
128
129 return (0);
130 }
131
132 char *
133 grep_fgetln(struct file *f, size_t *lenp)
134 {
135 unsigned char *p;
136 char *ret;
137 size_t len;
138 size_t off;
139 ptrdiff_t diff;
140
141 /* Fill the buffer, if necessary */
142 if (bufrem == 0 && grep_refill(f) != 0)
143 goto error;
144
145 if (bufrem == 0) {
146 /* Return zero length to indicate EOF */
147 *lenp = 0;
148 return ((char *)bufpos);
149 }
150
151 /* Look for a newline in the remaining part of the buffer */
152 if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
153 ++p; /* advance over newline */
154 ret = (char *)bufpos;
155 len = p - bufpos;
156 bufrem -= len;
157 bufpos = p;
158 *lenp = len;
159 return (ret);
160 }
161
162 /* We have to copy the current buffered data to the line buffer */
163 for (len = bufrem, off = 0; ; len += bufrem) {
164 /* Make sure there is room for more data */
165 if (grep_lnbufgrow(len + LNBUFBUMP))
166 goto error;
167 memcpy(lnbuf + off, bufpos, len - off);
168 off = len;
169 if (grep_refill(f) != 0)
170 goto error;
171 if (bufrem == 0)
172 /* EOF: return partial line */
173 break;
174 if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
175 continue;
176 /* got it: finish up the line (like code above) */
177 ++p;
178 diff = p - bufpos;
179 len += diff;
180 if (grep_lnbufgrow(len))
181 goto error;
182 memcpy(lnbuf + off, bufpos, diff);
183 bufrem -= diff;
184 bufpos = p;
185 break;
186 }
187 *lenp = len;
188 return ((char *)lnbuf);
189
190 error:
191 *lenp = 0;
192 return (NULL);
193 }
194
195 static inline struct file *
196 grep_file_init(struct file *f)
197 {
198
199 if (filebehave == FILE_GZIP &&
200 (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
201 goto error;
202
203 #ifndef WITHOUT_BZ2
204 if (filebehave == FILE_BZIP &&
205 (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
206 goto error;
207 #endif
208
209 /* Fill read buffer, also catches errors early */
210 if (grep_refill(f) != 0)
211 goto error;
212
213 /* Check for binary stuff, if necessary */
214 if (!nulldataflag && binbehave != BINFILE_TEXT &&
215 memchr(bufpos, '\0', bufrem) != NULL)
216 f->binary = true;
217
218 return (f);
219 error:
220 close(f->fd);
221 free(f);
222 return (NULL);
223 }
224
225 /*
226 * Opens a file for processing.
227 */
228 struct file *
229 grep_open(const char *path)
230 {
231 struct file *f;
232
233 f = grep_malloc(sizeof *f);
234 memset(f, 0, sizeof *f);
235 if (path == NULL) {
236 /* Processing stdin implies --line-buffered. */
237 lbflag = true;
238 f->fd = STDIN_FILENO;
239 } else if ((f->fd = open(path, O_RDONLY)) == -1) {
240 free(f);
241 return (NULL);
242 }
243
244 return (grep_file_init(f));
245 }
246
247 /*
248 * Closes a file.
249 */
250 void
251 grep_close(struct file *f)
252 {
253
254 close(f->fd);
255
256 /* Reset read buffer and line buffer */
257 bufpos = buffer;
258 bufrem = 0;
259
260 free(lnbuf);
261 lnbuf = NULL;
262 lnbuflen = 0;
263 }
264