file.c revision 1.10 1 1.10 christos /* $NetBSD: file.c,v 1.10 2018/08/12 09:03:21 christos Exp $ */
2 1.4 joerg /* $FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $ */
3 1.4 joerg /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
4 1.2 rillig
5 1.1 cjep /*-
6 1.4 joerg * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
7 1.4 joerg * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
8 1.4 joerg * Copyright (C) 2010 Dimitry Andric <dimitry (at) andric.com>
9 1.1 cjep * All rights reserved.
10 1.1 cjep *
11 1.1 cjep * Redistribution and use in source and binary forms, with or without
12 1.1 cjep * modification, are permitted provided that the following conditions
13 1.1 cjep * are met:
14 1.1 cjep * 1. Redistributions of source code must retain the above copyright
15 1.1 cjep * notice, this list of conditions and the following disclaimer.
16 1.1 cjep * 2. Redistributions in binary form must reproduce the above copyright
17 1.1 cjep * notice, this list of conditions and the following disclaimer in the
18 1.1 cjep * documentation and/or other materials provided with the distribution.
19 1.1 cjep *
20 1.1 cjep * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 1.1 cjep * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 1.1 cjep * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 1.1 cjep * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 1.1 cjep * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 1.1 cjep * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 1.1 cjep * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 1.1 cjep * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 1.1 cjep * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 1.1 cjep * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 1.1 cjep * SUCH DAMAGE.
31 1.1 cjep */
32 1.1 cjep
33 1.6 joerg #if HAVE_NBTOOL_CONFIG_H
34 1.6 joerg #include "nbtool_config.h"
35 1.6 joerg #endif
36 1.6 joerg
37 1.2 rillig #include <sys/cdefs.h>
38 1.10 christos __RCSID("$NetBSD: file.c,v 1.10 2018/08/12 09:03:21 christos Exp $");
39 1.2 rillig
40 1.1 cjep #include <sys/param.h>
41 1.4 joerg #include <sys/types.h>
42 1.4 joerg #include <sys/stat.h>
43 1.1 cjep
44 1.1 cjep #include <err.h>
45 1.4 joerg #include <errno.h>
46 1.4 joerg #include <fcntl.h>
47 1.4 joerg #include <stddef.h>
48 1.1 cjep #include <stdlib.h>
49 1.4 joerg #include <string.h>
50 1.4 joerg #include <unistd.h>
51 1.4 joerg #include <wchar.h>
52 1.4 joerg #include <wctype.h>
53 1.1 cjep
54 1.1 cjep #include "grep.h"
55 1.1 cjep
56 1.4 joerg #define MAXBUFSIZ (32 * 1024)
57 1.4 joerg #define LNBUFBUMP 80
58 1.4 joerg
59 1.10 christos #ifndef WITHOUT_GZIP
60 1.4 joerg static gzFile gzbufdesc;
61 1.10 christos #endif
62 1.9 christos #ifndef WITHOUT_BZ2
63 1.4 joerg static BZFILE* bzbufdesc;
64 1.9 christos #endif
65 1.4 joerg
66 1.4 joerg static unsigned char buffer[MAXBUFSIZ];
67 1.4 joerg static unsigned char *bufpos;
68 1.4 joerg static size_t bufrem;
69 1.4 joerg
70 1.4 joerg static unsigned char *lnbuf;
71 1.2 rillig static size_t lnbuflen;
72 1.1 cjep
73 1.4 joerg static inline int
74 1.4 joerg grep_refill(struct file *f)
75 1.4 joerg {
76 1.10 christos ssize_t nr = -1;
77 1.4 joerg int bzerr;
78 1.4 joerg
79 1.4 joerg bufpos = buffer;
80 1.4 joerg bufrem = 0;
81 1.4 joerg
82 1.10 christos #ifndef WITHOUT_GZIP
83 1.9 christos if (filebehave == FILE_GZIP) {
84 1.4 joerg nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
85 1.10 christos if (nr == -1)
86 1.10 christos return -1;
87 1.10 christos }
88 1.10 christos #endif
89 1.9 christos #ifndef WITHOUT_BZ2
90 1.10 christos if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
91 1.4 joerg nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
92 1.4 joerg switch (bzerr) {
93 1.4 joerg case BZ_OK:
94 1.4 joerg case BZ_STREAM_END:
95 1.4 joerg /* No problem, nr will be okay */
96 1.2 rillig break;
97 1.4 joerg case BZ_DATA_ERROR_MAGIC:
98 1.4 joerg /*
99 1.4 joerg * As opposed to gzread(), which simply returns the
100 1.4 joerg * plain file data, if it is not in the correct
101 1.4 joerg * compressed format, BZ2_bzRead() instead aborts.
102 1.4 joerg *
103 1.4 joerg * So, just restart at the beginning of the file again,
104 1.4 joerg * and use plain reads from now on.
105 1.4 joerg */
106 1.4 joerg BZ2_bzReadClose(&bzerr, bzbufdesc);
107 1.4 joerg bzbufdesc = NULL;
108 1.4 joerg if (lseek(f->fd, 0, SEEK_SET) == -1)
109 1.4 joerg return (-1);
110 1.4 joerg nr = read(f->fd, buffer, MAXBUFSIZ);
111 1.1 cjep break;
112 1.4 joerg default:
113 1.4 joerg /* Make sure we exit with an error */
114 1.4 joerg nr = -1;
115 1.1 cjep }
116 1.10 christos if (nr == -1)
117 1.10 christos return -1;
118 1.10 christos }
119 1.9 christos #endif
120 1.10 christos if (nr == -1) {
121 1.4 joerg nr = read(f->fd, buffer, MAXBUFSIZ);
122 1.10 christos }
123 1.4 joerg
124 1.4 joerg if (nr < 0)
125 1.4 joerg return (-1);
126 1.1 cjep
127 1.4 joerg bufrem = nr;
128 1.4 joerg return (0);
129 1.1 cjep }
130 1.1 cjep
131 1.4 joerg static inline int
132 1.4 joerg grep_lnbufgrow(size_t newlen)
133 1.1 cjep {
134 1.1 cjep
135 1.4 joerg if (lnbuflen < newlen) {
136 1.4 joerg lnbuf = grep_realloc(lnbuf, newlen);
137 1.4 joerg lnbuflen = newlen;
138 1.4 joerg }
139 1.2 rillig
140 1.4 joerg return (0);
141 1.4 joerg }
142 1.2 rillig
143 1.4 joerg char *
144 1.4 joerg grep_fgetln(struct file *f, size_t *lenp)
145 1.4 joerg {
146 1.4 joerg unsigned char *p;
147 1.4 joerg char *ret;
148 1.4 joerg size_t len;
149 1.4 joerg size_t off;
150 1.4 joerg ptrdiff_t diff;
151 1.4 joerg
152 1.4 joerg /* Fill the buffer, if necessary */
153 1.4 joerg if (bufrem == 0 && grep_refill(f) != 0)
154 1.4 joerg goto error;
155 1.4 joerg
156 1.4 joerg if (bufrem == 0) {
157 1.4 joerg /* Return zero length to indicate EOF */
158 1.4 joerg *lenp = 0;
159 1.5 joerg return ((char *)bufpos);
160 1.4 joerg }
161 1.4 joerg
162 1.4 joerg /* Look for a newline in the remaining part of the buffer */
163 1.7 joerg if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
164 1.4 joerg ++p; /* advance over newline */
165 1.5 joerg ret = (char *)bufpos;
166 1.4 joerg len = p - bufpos;
167 1.4 joerg bufrem -= len;
168 1.4 joerg bufpos = p;
169 1.4 joerg *lenp = len;
170 1.4 joerg return (ret);
171 1.4 joerg }
172 1.4 joerg
173 1.4 joerg /* We have to copy the current buffered data to the line buffer */
174 1.4 joerg for (len = bufrem, off = 0; ; len += bufrem) {
175 1.4 joerg /* Make sure there is room for more data */
176 1.4 joerg if (grep_lnbufgrow(len + LNBUFBUMP))
177 1.4 joerg goto error;
178 1.4 joerg memcpy(lnbuf + off, bufpos, len - off);
179 1.4 joerg off = len;
180 1.4 joerg if (grep_refill(f) != 0)
181 1.4 joerg goto error;
182 1.4 joerg if (bufrem == 0)
183 1.4 joerg /* EOF: return partial line */
184 1.4 joerg break;
185 1.7 joerg if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
186 1.4 joerg continue;
187 1.4 joerg /* got it: finish up the line (like code above) */
188 1.4 joerg ++p;
189 1.4 joerg diff = p - bufpos;
190 1.4 joerg len += diff;
191 1.4 joerg if (grep_lnbufgrow(len))
192 1.4 joerg goto error;
193 1.4 joerg memcpy(lnbuf + off, bufpos, diff);
194 1.4 joerg bufrem -= diff;
195 1.4 joerg bufpos = p;
196 1.4 joerg break;
197 1.1 cjep }
198 1.4 joerg *lenp = len;
199 1.5 joerg return ((char *)lnbuf);
200 1.2 rillig
201 1.4 joerg error:
202 1.4 joerg *lenp = 0;
203 1.4 joerg return (NULL);
204 1.1 cjep }
205 1.1 cjep
206 1.4 joerg static inline struct file *
207 1.4 joerg grep_file_init(struct file *f)
208 1.1 cjep {
209 1.1 cjep
210 1.10 christos #ifndef WITHOUT_GZIP
211 1.4 joerg if (filebehave == FILE_GZIP &&
212 1.4 joerg (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
213 1.4 joerg goto error;
214 1.10 christos #endif
215 1.4 joerg
216 1.9 christos #ifndef WITHOUT_BZ2
217 1.4 joerg if (filebehave == FILE_BZIP &&
218 1.4 joerg (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
219 1.4 joerg goto error;
220 1.9 christos #endif
221 1.2 rillig
222 1.4 joerg /* Fill read buffer, also catches errors early */
223 1.4 joerg if (grep_refill(f) != 0)
224 1.4 joerg goto error;
225 1.2 rillig
226 1.4 joerg /* Check for binary stuff, if necessary */
227 1.7 joerg if (!nulldataflag && binbehave != BINFILE_TEXT &&
228 1.7 joerg memchr(bufpos, '\0', bufrem) != NULL)
229 1.4 joerg f->binary = true;
230 1.2 rillig
231 1.4 joerg return (f);
232 1.4 joerg error:
233 1.4 joerg close(f->fd);
234 1.1 cjep free(f);
235 1.4 joerg return (NULL);
236 1.1 cjep }
237 1.1 cjep
238 1.4 joerg /*
239 1.4 joerg * Opens a file for processing.
240 1.4 joerg */
241 1.4 joerg struct file *
242 1.4 joerg grep_open(const char *path)
243 1.1 cjep {
244 1.4 joerg struct file *f;
245 1.4 joerg
246 1.4 joerg f = grep_malloc(sizeof *f);
247 1.4 joerg memset(f, 0, sizeof *f);
248 1.4 joerg if (path == NULL) {
249 1.4 joerg /* Processing stdin implies --line-buffered. */
250 1.4 joerg lbflag = true;
251 1.4 joerg f->fd = STDIN_FILENO;
252 1.4 joerg } else if ((f->fd = open(path, O_RDONLY)) == -1) {
253 1.4 joerg free(f);
254 1.4 joerg return (NULL);
255 1.1 cjep }
256 1.1 cjep
257 1.4 joerg return (grep_file_init(f));
258 1.1 cjep }
259 1.1 cjep
260 1.4 joerg /*
261 1.4 joerg * Closes a file.
262 1.4 joerg */
263 1.1 cjep void
264 1.4 joerg grep_close(struct file *f)
265 1.1 cjep {
266 1.4 joerg
267 1.4 joerg close(f->fd);
268 1.4 joerg
269 1.4 joerg /* Reset read buffer and line buffer */
270 1.4 joerg bufpos = buffer;
271 1.4 joerg bufrem = 0;
272 1.4 joerg
273 1.4 joerg free(lnbuf);
274 1.4 joerg lnbuf = NULL;
275 1.4 joerg lnbuflen = 0;
276 1.1 cjep }
277