file.c revision 1.9 1 1.9 christos /* $NetBSD: file.c,v 1.9 2018/08/12 07:53:19 christos Exp $ */
2 1.4 joerg /* $FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $ */
3 1.4 joerg /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
4 1.2 rillig
5 1.1 cjep /*-
6 1.4 joerg * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
7 1.4 joerg * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
8 1.4 joerg * Copyright (C) 2010 Dimitry Andric <dimitry (at) andric.com>
9 1.1 cjep * All rights reserved.
10 1.1 cjep *
11 1.1 cjep * Redistribution and use in source and binary forms, with or without
12 1.1 cjep * modification, are permitted provided that the following conditions
13 1.1 cjep * are met:
14 1.1 cjep * 1. Redistributions of source code must retain the above copyright
15 1.1 cjep * notice, this list of conditions and the following disclaimer.
16 1.1 cjep * 2. Redistributions in binary form must reproduce the above copyright
17 1.1 cjep * notice, this list of conditions and the following disclaimer in the
18 1.1 cjep * documentation and/or other materials provided with the distribution.
19 1.1 cjep *
20 1.1 cjep * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 1.1 cjep * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 1.1 cjep * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 1.1 cjep * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 1.1 cjep * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 1.1 cjep * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 1.1 cjep * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 1.1 cjep * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 1.1 cjep * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 1.1 cjep * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 1.1 cjep * SUCH DAMAGE.
31 1.1 cjep */
32 1.1 cjep
33 1.6 joerg #if HAVE_NBTOOL_CONFIG_H
34 1.6 joerg #include "nbtool_config.h"
35 1.6 joerg #endif
36 1.6 joerg
37 1.2 rillig #include <sys/cdefs.h>
38 1.9 christos __RCSID("$NetBSD: file.c,v 1.9 2018/08/12 07:53:19 christos Exp $");
39 1.2 rillig
40 1.1 cjep #include <sys/param.h>
41 1.4 joerg #include <sys/types.h>
42 1.4 joerg #include <sys/stat.h>
43 1.1 cjep
44 1.1 cjep #include <err.h>
45 1.4 joerg #include <errno.h>
46 1.4 joerg #include <fcntl.h>
47 1.4 joerg #include <stddef.h>
48 1.1 cjep #include <stdlib.h>
49 1.4 joerg #include <string.h>
50 1.4 joerg #include <unistd.h>
51 1.4 joerg #include <wchar.h>
52 1.4 joerg #include <wctype.h>
53 1.1 cjep
54 1.1 cjep #include "grep.h"
55 1.1 cjep
56 1.4 joerg #define MAXBUFSIZ (32 * 1024)
57 1.4 joerg #define LNBUFBUMP 80
58 1.4 joerg
59 1.4 joerg static gzFile gzbufdesc;
60 1.9 christos #ifndef WITHOUT_BZ2
61 1.4 joerg static BZFILE* bzbufdesc;
62 1.9 christos #endif
63 1.4 joerg
64 1.4 joerg static unsigned char buffer[MAXBUFSIZ];
65 1.4 joerg static unsigned char *bufpos;
66 1.4 joerg static size_t bufrem;
67 1.4 joerg
68 1.4 joerg static unsigned char *lnbuf;
69 1.2 rillig static size_t lnbuflen;
70 1.1 cjep
71 1.4 joerg static inline int
72 1.4 joerg grep_refill(struct file *f)
73 1.4 joerg {
74 1.4 joerg ssize_t nr;
75 1.4 joerg int bzerr;
76 1.4 joerg
77 1.4 joerg bufpos = buffer;
78 1.4 joerg bufrem = 0;
79 1.4 joerg
80 1.9 christos if (filebehave == FILE_GZIP) {
81 1.4 joerg nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
82 1.9 christos #ifndef WITHOUT_BZ2
83 1.9 christos } else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
84 1.4 joerg nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
85 1.4 joerg switch (bzerr) {
86 1.4 joerg case BZ_OK:
87 1.4 joerg case BZ_STREAM_END:
88 1.4 joerg /* No problem, nr will be okay */
89 1.2 rillig break;
90 1.4 joerg case BZ_DATA_ERROR_MAGIC:
91 1.4 joerg /*
92 1.4 joerg * As opposed to gzread(), which simply returns the
93 1.4 joerg * plain file data, if it is not in the correct
94 1.4 joerg * compressed format, BZ2_bzRead() instead aborts.
95 1.4 joerg *
96 1.4 joerg * So, just restart at the beginning of the file again,
97 1.4 joerg * and use plain reads from now on.
98 1.4 joerg */
99 1.4 joerg BZ2_bzReadClose(&bzerr, bzbufdesc);
100 1.4 joerg bzbufdesc = NULL;
101 1.4 joerg if (lseek(f->fd, 0, SEEK_SET) == -1)
102 1.4 joerg return (-1);
103 1.4 joerg nr = read(f->fd, buffer, MAXBUFSIZ);
104 1.1 cjep break;
105 1.4 joerg default:
106 1.4 joerg /* Make sure we exit with an error */
107 1.4 joerg nr = -1;
108 1.1 cjep }
109 1.9 christos #endif
110 1.4 joerg } else
111 1.4 joerg nr = read(f->fd, buffer, MAXBUFSIZ);
112 1.4 joerg
113 1.4 joerg if (nr < 0)
114 1.4 joerg return (-1);
115 1.1 cjep
116 1.4 joerg bufrem = nr;
117 1.4 joerg return (0);
118 1.1 cjep }
119 1.1 cjep
120 1.4 joerg static inline int
121 1.4 joerg grep_lnbufgrow(size_t newlen)
122 1.1 cjep {
123 1.1 cjep
124 1.4 joerg if (lnbuflen < newlen) {
125 1.4 joerg lnbuf = grep_realloc(lnbuf, newlen);
126 1.4 joerg lnbuflen = newlen;
127 1.4 joerg }
128 1.2 rillig
129 1.4 joerg return (0);
130 1.4 joerg }
131 1.2 rillig
132 1.4 joerg char *
133 1.4 joerg grep_fgetln(struct file *f, size_t *lenp)
134 1.4 joerg {
135 1.4 joerg unsigned char *p;
136 1.4 joerg char *ret;
137 1.4 joerg size_t len;
138 1.4 joerg size_t off;
139 1.4 joerg ptrdiff_t diff;
140 1.4 joerg
141 1.4 joerg /* Fill the buffer, if necessary */
142 1.4 joerg if (bufrem == 0 && grep_refill(f) != 0)
143 1.4 joerg goto error;
144 1.4 joerg
145 1.4 joerg if (bufrem == 0) {
146 1.4 joerg /* Return zero length to indicate EOF */
147 1.4 joerg *lenp = 0;
148 1.5 joerg return ((char *)bufpos);
149 1.4 joerg }
150 1.4 joerg
151 1.4 joerg /* Look for a newline in the remaining part of the buffer */
152 1.7 joerg if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
153 1.4 joerg ++p; /* advance over newline */
154 1.5 joerg ret = (char *)bufpos;
155 1.4 joerg len = p - bufpos;
156 1.4 joerg bufrem -= len;
157 1.4 joerg bufpos = p;
158 1.4 joerg *lenp = len;
159 1.4 joerg return (ret);
160 1.4 joerg }
161 1.4 joerg
162 1.4 joerg /* We have to copy the current buffered data to the line buffer */
163 1.4 joerg for (len = bufrem, off = 0; ; len += bufrem) {
164 1.4 joerg /* Make sure there is room for more data */
165 1.4 joerg if (grep_lnbufgrow(len + LNBUFBUMP))
166 1.4 joerg goto error;
167 1.4 joerg memcpy(lnbuf + off, bufpos, len - off);
168 1.4 joerg off = len;
169 1.4 joerg if (grep_refill(f) != 0)
170 1.4 joerg goto error;
171 1.4 joerg if (bufrem == 0)
172 1.4 joerg /* EOF: return partial line */
173 1.4 joerg break;
174 1.7 joerg if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
175 1.4 joerg continue;
176 1.4 joerg /* got it: finish up the line (like code above) */
177 1.4 joerg ++p;
178 1.4 joerg diff = p - bufpos;
179 1.4 joerg len += diff;
180 1.4 joerg if (grep_lnbufgrow(len))
181 1.4 joerg goto error;
182 1.4 joerg memcpy(lnbuf + off, bufpos, diff);
183 1.4 joerg bufrem -= diff;
184 1.4 joerg bufpos = p;
185 1.4 joerg break;
186 1.1 cjep }
187 1.4 joerg *lenp = len;
188 1.5 joerg return ((char *)lnbuf);
189 1.2 rillig
190 1.4 joerg error:
191 1.4 joerg *lenp = 0;
192 1.4 joerg return (NULL);
193 1.1 cjep }
194 1.1 cjep
195 1.4 joerg static inline struct file *
196 1.4 joerg grep_file_init(struct file *f)
197 1.1 cjep {
198 1.1 cjep
199 1.4 joerg if (filebehave == FILE_GZIP &&
200 1.4 joerg (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
201 1.4 joerg goto error;
202 1.4 joerg
203 1.9 christos #ifndef WITHOUT_BZ2
204 1.4 joerg if (filebehave == FILE_BZIP &&
205 1.4 joerg (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
206 1.4 joerg goto error;
207 1.9 christos #endif
208 1.2 rillig
209 1.4 joerg /* Fill read buffer, also catches errors early */
210 1.4 joerg if (grep_refill(f) != 0)
211 1.4 joerg goto error;
212 1.2 rillig
213 1.4 joerg /* Check for binary stuff, if necessary */
214 1.7 joerg if (!nulldataflag && binbehave != BINFILE_TEXT &&
215 1.7 joerg memchr(bufpos, '\0', bufrem) != NULL)
216 1.4 joerg f->binary = true;
217 1.2 rillig
218 1.4 joerg return (f);
219 1.4 joerg error:
220 1.4 joerg close(f->fd);
221 1.1 cjep free(f);
222 1.4 joerg return (NULL);
223 1.1 cjep }
224 1.1 cjep
225 1.4 joerg /*
226 1.4 joerg * Opens a file for processing.
227 1.4 joerg */
228 1.4 joerg struct file *
229 1.4 joerg grep_open(const char *path)
230 1.1 cjep {
231 1.4 joerg struct file *f;
232 1.4 joerg
233 1.4 joerg f = grep_malloc(sizeof *f);
234 1.4 joerg memset(f, 0, sizeof *f);
235 1.4 joerg if (path == NULL) {
236 1.4 joerg /* Processing stdin implies --line-buffered. */
237 1.4 joerg lbflag = true;
238 1.4 joerg f->fd = STDIN_FILENO;
239 1.4 joerg } else if ((f->fd = open(path, O_RDONLY)) == -1) {
240 1.4 joerg free(f);
241 1.4 joerg return (NULL);
242 1.1 cjep }
243 1.1 cjep
244 1.4 joerg return (grep_file_init(f));
245 1.1 cjep }
246 1.1 cjep
247 1.4 joerg /*
248 1.4 joerg * Closes a file.
249 1.4 joerg */
250 1.1 cjep void
251 1.4 joerg grep_close(struct file *f)
252 1.1 cjep {
253 1.4 joerg
254 1.4 joerg close(f->fd);
255 1.4 joerg
256 1.4 joerg /* Reset read buffer and line buffer */
257 1.4 joerg bufpos = buffer;
258 1.4 joerg bufrem = 0;
259 1.4 joerg
260 1.4 joerg free(lnbuf);
261 1.4 joerg lnbuf = NULL;
262 1.4 joerg lnbuflen = 0;
263 1.1 cjep }
264