file.c revision 1.12 1 1.12 rin /* $NetBSD: file.c,v 1.12 2024/08/14 04:59:51 rin Exp $ */
2 1.4 joerg /* $FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $ */
3 1.4 joerg /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
4 1.2 rillig
5 1.1 cjep /*-
6 1.4 joerg * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
7 1.4 joerg * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
8 1.4 joerg * Copyright (C) 2010 Dimitry Andric <dimitry (at) andric.com>
9 1.1 cjep * All rights reserved.
10 1.1 cjep *
11 1.1 cjep * Redistribution and use in source and binary forms, with or without
12 1.1 cjep * modification, are permitted provided that the following conditions
13 1.1 cjep * are met:
14 1.1 cjep * 1. Redistributions of source code must retain the above copyright
15 1.1 cjep * notice, this list of conditions and the following disclaimer.
16 1.1 cjep * 2. Redistributions in binary form must reproduce the above copyright
17 1.1 cjep * notice, this list of conditions and the following disclaimer in the
18 1.1 cjep * documentation and/or other materials provided with the distribution.
19 1.1 cjep *
20 1.1 cjep * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 1.1 cjep * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 1.1 cjep * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 1.1 cjep * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 1.1 cjep * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 1.1 cjep * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 1.1 cjep * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 1.1 cjep * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 1.1 cjep * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 1.1 cjep * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 1.1 cjep * SUCH DAMAGE.
31 1.1 cjep */
32 1.1 cjep
33 1.6 joerg #if HAVE_NBTOOL_CONFIG_H
34 1.6 joerg #include "nbtool_config.h"
35 1.6 joerg #endif
36 1.6 joerg
37 1.2 rillig #include <sys/cdefs.h>
38 1.12 rin __RCSID("$NetBSD: file.c,v 1.12 2024/08/14 04:59:51 rin Exp $");
39 1.2 rillig
40 1.1 cjep #include <sys/param.h>
41 1.4 joerg #include <sys/types.h>
42 1.4 joerg #include <sys/stat.h>
43 1.1 cjep
44 1.1 cjep #include <err.h>
45 1.4 joerg #include <errno.h>
46 1.4 joerg #include <fcntl.h>
47 1.4 joerg #include <stddef.h>
48 1.1 cjep #include <stdlib.h>
49 1.4 joerg #include <string.h>
50 1.4 joerg #include <unistd.h>
51 1.4 joerg #include <wchar.h>
52 1.4 joerg #include <wctype.h>
53 1.1 cjep
54 1.1 cjep #include "grep.h"
55 1.1 cjep
56 1.4 joerg #define MAXBUFSIZ (32 * 1024)
57 1.4 joerg #define LNBUFBUMP 80
58 1.4 joerg
59 1.10 christos #ifndef WITHOUT_GZIP
60 1.4 joerg static gzFile gzbufdesc;
61 1.10 christos #endif
62 1.9 christos #ifndef WITHOUT_BZ2
63 1.4 joerg static BZFILE* bzbufdesc;
64 1.9 christos #endif
65 1.4 joerg
66 1.11 christos static unsigned char buffer[MAXBUFSIZ + 1];
67 1.4 joerg static unsigned char *bufpos;
68 1.4 joerg static size_t bufrem;
69 1.4 joerg
70 1.4 joerg static unsigned char *lnbuf;
71 1.2 rillig static size_t lnbuflen;
72 1.1 cjep
73 1.4 joerg static inline int
74 1.4 joerg grep_refill(struct file *f)
75 1.4 joerg {
76 1.10 christos ssize_t nr = -1;
77 1.12 rin #ifndef WITHOUT_GZIP
78 1.4 joerg int bzerr;
79 1.12 rin #endif
80 1.4 joerg
81 1.4 joerg bufpos = buffer;
82 1.4 joerg bufrem = 0;
83 1.4 joerg
84 1.10 christos #ifndef WITHOUT_GZIP
85 1.9 christos if (filebehave == FILE_GZIP) {
86 1.4 joerg nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
87 1.10 christos if (nr == -1)
88 1.10 christos return -1;
89 1.10 christos }
90 1.10 christos #endif
91 1.9 christos #ifndef WITHOUT_BZ2
92 1.10 christos if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
93 1.4 joerg nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
94 1.4 joerg switch (bzerr) {
95 1.4 joerg case BZ_OK:
96 1.4 joerg case BZ_STREAM_END:
97 1.4 joerg /* No problem, nr will be okay */
98 1.2 rillig break;
99 1.4 joerg case BZ_DATA_ERROR_MAGIC:
100 1.4 joerg /*
101 1.4 joerg * As opposed to gzread(), which simply returns the
102 1.4 joerg * plain file data, if it is not in the correct
103 1.4 joerg * compressed format, BZ2_bzRead() instead aborts.
104 1.4 joerg *
105 1.4 joerg * So, just restart at the beginning of the file again,
106 1.4 joerg * and use plain reads from now on.
107 1.4 joerg */
108 1.4 joerg BZ2_bzReadClose(&bzerr, bzbufdesc);
109 1.4 joerg bzbufdesc = NULL;
110 1.4 joerg if (lseek(f->fd, 0, SEEK_SET) == -1)
111 1.4 joerg return (-1);
112 1.4 joerg nr = read(f->fd, buffer, MAXBUFSIZ);
113 1.1 cjep break;
114 1.4 joerg default:
115 1.4 joerg /* Make sure we exit with an error */
116 1.4 joerg nr = -1;
117 1.1 cjep }
118 1.10 christos if (nr == -1)
119 1.10 christos return -1;
120 1.10 christos }
121 1.9 christos #endif
122 1.10 christos if (nr == -1) {
123 1.4 joerg nr = read(f->fd, buffer, MAXBUFSIZ);
124 1.10 christos }
125 1.4 joerg
126 1.4 joerg if (nr < 0)
127 1.4 joerg return (-1);
128 1.1 cjep
129 1.4 joerg bufrem = nr;
130 1.4 joerg return (0);
131 1.1 cjep }
132 1.1 cjep
133 1.11 christos static inline void
134 1.4 joerg grep_lnbufgrow(size_t newlen)
135 1.1 cjep {
136 1.1 cjep
137 1.4 joerg if (lnbuflen < newlen) {
138 1.4 joerg lnbuf = grep_realloc(lnbuf, newlen);
139 1.4 joerg lnbuflen = newlen;
140 1.4 joerg }
141 1.11 christos }
142 1.2 rillig
143 1.11 christos static void
144 1.11 christos grep_copyline(size_t off, size_t len)
145 1.11 christos {
146 1.11 christos memcpy(lnbuf + off, bufpos, len);
147 1.11 christos lnbuf[off + len] = '\0';
148 1.4 joerg }
149 1.2 rillig
150 1.4 joerg char *
151 1.4 joerg grep_fgetln(struct file *f, size_t *lenp)
152 1.4 joerg {
153 1.4 joerg unsigned char *p;
154 1.4 joerg size_t len;
155 1.4 joerg size_t off;
156 1.4 joerg ptrdiff_t diff;
157 1.4 joerg
158 1.4 joerg /* Fill the buffer, if necessary */
159 1.4 joerg if (bufrem == 0 && grep_refill(f) != 0)
160 1.4 joerg goto error;
161 1.4 joerg
162 1.4 joerg if (bufrem == 0) {
163 1.4 joerg /* Return zero length to indicate EOF */
164 1.4 joerg *lenp = 0;
165 1.5 joerg return ((char *)bufpos);
166 1.4 joerg }
167 1.4 joerg
168 1.4 joerg /* Look for a newline in the remaining part of the buffer */
169 1.7 joerg if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
170 1.4 joerg ++p; /* advance over newline */
171 1.4 joerg len = p - bufpos;
172 1.11 christos grep_lnbufgrow(len + 1);
173 1.11 christos grep_copyline(0, len);
174 1.11 christos *lenp = len;
175 1.4 joerg bufrem -= len;
176 1.4 joerg bufpos = p;
177 1.11 christos return (char *)lnbuf;
178 1.4 joerg }
179 1.4 joerg
180 1.4 joerg /* We have to copy the current buffered data to the line buffer */
181 1.4 joerg for (len = bufrem, off = 0; ; len += bufrem) {
182 1.4 joerg /* Make sure there is room for more data */
183 1.11 christos grep_lnbufgrow(len + LNBUFBUMP);
184 1.11 christos grep_copyline(off, len - off);
185 1.4 joerg off = len;
186 1.4 joerg if (grep_refill(f) != 0)
187 1.4 joerg goto error;
188 1.4 joerg if (bufrem == 0)
189 1.4 joerg /* EOF: return partial line */
190 1.4 joerg break;
191 1.7 joerg if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
192 1.4 joerg continue;
193 1.4 joerg /* got it: finish up the line (like code above) */
194 1.4 joerg ++p;
195 1.4 joerg diff = p - bufpos;
196 1.4 joerg len += diff;
197 1.11 christos grep_lnbufgrow(len + 1);
198 1.11 christos grep_copyline(off, diff);
199 1.4 joerg bufrem -= diff;
200 1.4 joerg bufpos = p;
201 1.4 joerg break;
202 1.1 cjep }
203 1.4 joerg *lenp = len;
204 1.5 joerg return ((char *)lnbuf);
205 1.2 rillig
206 1.4 joerg error:
207 1.4 joerg *lenp = 0;
208 1.4 joerg return (NULL);
209 1.1 cjep }
210 1.1 cjep
211 1.4 joerg static inline struct file *
212 1.4 joerg grep_file_init(struct file *f)
213 1.1 cjep {
214 1.1 cjep
215 1.10 christos #ifndef WITHOUT_GZIP
216 1.4 joerg if (filebehave == FILE_GZIP &&
217 1.4 joerg (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
218 1.4 joerg goto error;
219 1.10 christos #endif
220 1.4 joerg
221 1.9 christos #ifndef WITHOUT_BZ2
222 1.4 joerg if (filebehave == FILE_BZIP &&
223 1.4 joerg (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
224 1.4 joerg goto error;
225 1.9 christos #endif
226 1.2 rillig
227 1.4 joerg /* Fill read buffer, also catches errors early */
228 1.4 joerg if (grep_refill(f) != 0)
229 1.4 joerg goto error;
230 1.2 rillig
231 1.4 joerg /* Check for binary stuff, if necessary */
232 1.7 joerg if (!nulldataflag && binbehave != BINFILE_TEXT &&
233 1.7 joerg memchr(bufpos, '\0', bufrem) != NULL)
234 1.4 joerg f->binary = true;
235 1.2 rillig
236 1.4 joerg return (f);
237 1.4 joerg error:
238 1.4 joerg close(f->fd);
239 1.1 cjep free(f);
240 1.4 joerg return (NULL);
241 1.1 cjep }
242 1.1 cjep
243 1.4 joerg /*
244 1.4 joerg * Opens a file for processing.
245 1.4 joerg */
246 1.4 joerg struct file *
247 1.4 joerg grep_open(const char *path)
248 1.1 cjep {
249 1.4 joerg struct file *f;
250 1.4 joerg
251 1.4 joerg f = grep_malloc(sizeof *f);
252 1.4 joerg memset(f, 0, sizeof *f);
253 1.4 joerg if (path == NULL) {
254 1.4 joerg /* Processing stdin implies --line-buffered. */
255 1.4 joerg lbflag = true;
256 1.4 joerg f->fd = STDIN_FILENO;
257 1.4 joerg } else if ((f->fd = open(path, O_RDONLY)) == -1) {
258 1.4 joerg free(f);
259 1.4 joerg return (NULL);
260 1.1 cjep }
261 1.1 cjep
262 1.4 joerg return (grep_file_init(f));
263 1.1 cjep }
264 1.1 cjep
265 1.4 joerg /*
266 1.4 joerg * Closes a file.
267 1.4 joerg */
268 1.1 cjep void
269 1.4 joerg grep_close(struct file *f)
270 1.1 cjep {
271 1.4 joerg
272 1.4 joerg close(f->fd);
273 1.4 joerg
274 1.4 joerg /* Reset read buffer and line buffer */
275 1.4 joerg bufpos = buffer;
276 1.4 joerg bufrem = 0;
277 1.4 joerg
278 1.4 joerg free(lnbuf);
279 1.4 joerg lnbuf = NULL;
280 1.4 joerg lnbuflen = 0;
281 1.1 cjep }
282