file.c revision 1.3.24.1 1 1.3.24.1 bouyer /* $NetBSD: file.c,v 1.3.24.1 2011/02/17 12:00:55 bouyer Exp $ */
2 1.3.24.1 bouyer /* $FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $ */
3 1.3.24.1 bouyer /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
4 1.2 rillig
5 1.1 cjep /*-
6 1.3.24.1 bouyer * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
7 1.3.24.1 bouyer * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
8 1.3.24.1 bouyer * Copyright (C) 2010 Dimitry Andric <dimitry (at) andric.com>
9 1.1 cjep * All rights reserved.
10 1.1 cjep *
11 1.1 cjep * Redistribution and use in source and binary forms, with or without
12 1.1 cjep * modification, are permitted provided that the following conditions
13 1.1 cjep * are met:
14 1.1 cjep * 1. Redistributions of source code must retain the above copyright
15 1.1 cjep * notice, this list of conditions and the following disclaimer.
16 1.1 cjep * 2. Redistributions in binary form must reproduce the above copyright
17 1.1 cjep * notice, this list of conditions and the following disclaimer in the
18 1.1 cjep * documentation and/or other materials provided with the distribution.
19 1.1 cjep *
20 1.1 cjep * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 1.1 cjep * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 1.1 cjep * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 1.1 cjep * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 1.1 cjep * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 1.1 cjep * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 1.1 cjep * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 1.1 cjep * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 1.1 cjep * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 1.1 cjep * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 1.1 cjep * SUCH DAMAGE.
31 1.1 cjep */
32 1.1 cjep
33 1.2 rillig #include <sys/cdefs.h>
34 1.3.24.1 bouyer __RCSID("$NetBSD: file.c,v 1.3.24.1 2011/02/17 12:00:55 bouyer Exp $");
35 1.2 rillig
36 1.1 cjep #include <sys/param.h>
37 1.3.24.1 bouyer #include <sys/types.h>
38 1.3.24.1 bouyer #include <sys/stat.h>
39 1.1 cjep
40 1.3.24.1 bouyer #include <bzlib.h>
41 1.1 cjep #include <err.h>
42 1.3.24.1 bouyer #include <errno.h>
43 1.3.24.1 bouyer #include <fcntl.h>
44 1.3.24.1 bouyer #include <stddef.h>
45 1.1 cjep #include <stdlib.h>
46 1.3.24.1 bouyer #include <string.h>
47 1.3.24.1 bouyer #include <unistd.h>
48 1.3.24.1 bouyer #include <wchar.h>
49 1.3.24.1 bouyer #include <wctype.h>
50 1.3.24.1 bouyer #include <zlib.h>
51 1.1 cjep
52 1.1 cjep #include "grep.h"
53 1.1 cjep
54 1.3.24.1 bouyer #define MAXBUFSIZ (32 * 1024)
55 1.3.24.1 bouyer #define LNBUFBUMP 80
56 1.3.24.1 bouyer
57 1.3.24.1 bouyer static gzFile gzbufdesc;
58 1.3.24.1 bouyer static BZFILE* bzbufdesc;
59 1.3.24.1 bouyer
60 1.3.24.1 bouyer static unsigned char buffer[MAXBUFSIZ];
61 1.3.24.1 bouyer static unsigned char *bufpos;
62 1.3.24.1 bouyer static size_t bufrem;
63 1.3.24.1 bouyer
64 1.3.24.1 bouyer static unsigned char *lnbuf;
65 1.2 rillig static size_t lnbuflen;
66 1.1 cjep
67 1.3.24.1 bouyer static inline int
68 1.3.24.1 bouyer grep_refill(struct file *f)
69 1.3.24.1 bouyer {
70 1.3.24.1 bouyer ssize_t nr;
71 1.3.24.1 bouyer int bzerr;
72 1.3.24.1 bouyer
73 1.3.24.1 bouyer bufpos = buffer;
74 1.3.24.1 bouyer bufrem = 0;
75 1.3.24.1 bouyer
76 1.3.24.1 bouyer if (filebehave == FILE_GZIP)
77 1.3.24.1 bouyer nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
78 1.3.24.1 bouyer else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
79 1.3.24.1 bouyer nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
80 1.3.24.1 bouyer switch (bzerr) {
81 1.3.24.1 bouyer case BZ_OK:
82 1.3.24.1 bouyer case BZ_STREAM_END:
83 1.3.24.1 bouyer /* No problem, nr will be okay */
84 1.2 rillig break;
85 1.3.24.1 bouyer case BZ_DATA_ERROR_MAGIC:
86 1.3.24.1 bouyer /*
87 1.3.24.1 bouyer * As opposed to gzread(), which simply returns the
88 1.3.24.1 bouyer * plain file data, if it is not in the correct
89 1.3.24.1 bouyer * compressed format, BZ2_bzRead() instead aborts.
90 1.3.24.1 bouyer *
91 1.3.24.1 bouyer * So, just restart at the beginning of the file again,
92 1.3.24.1 bouyer * and use plain reads from now on.
93 1.3.24.1 bouyer */
94 1.3.24.1 bouyer BZ2_bzReadClose(&bzerr, bzbufdesc);
95 1.3.24.1 bouyer bzbufdesc = NULL;
96 1.3.24.1 bouyer if (lseek(f->fd, 0, SEEK_SET) == -1)
97 1.3.24.1 bouyer return (-1);
98 1.3.24.1 bouyer nr = read(f->fd, buffer, MAXBUFSIZ);
99 1.1 cjep break;
100 1.3.24.1 bouyer default:
101 1.3.24.1 bouyer /* Make sure we exit with an error */
102 1.3.24.1 bouyer nr = -1;
103 1.1 cjep }
104 1.3.24.1 bouyer } else
105 1.3.24.1 bouyer nr = read(f->fd, buffer, MAXBUFSIZ);
106 1.1 cjep
107 1.3.24.1 bouyer if (nr < 0)
108 1.3.24.1 bouyer return (-1);
109 1.3.24.1 bouyer
110 1.3.24.1 bouyer bufrem = nr;
111 1.3.24.1 bouyer return (0);
112 1.1 cjep }
113 1.1 cjep
114 1.3.24.1 bouyer static inline int
115 1.3.24.1 bouyer grep_lnbufgrow(size_t newlen)
116 1.1 cjep {
117 1.1 cjep
118 1.3.24.1 bouyer if (lnbuflen < newlen) {
119 1.3.24.1 bouyer lnbuf = grep_realloc(lnbuf, newlen);
120 1.3.24.1 bouyer lnbuflen = newlen;
121 1.3.24.1 bouyer }
122 1.2 rillig
123 1.3.24.1 bouyer return (0);
124 1.3.24.1 bouyer }
125 1.2 rillig
126 1.3.24.1 bouyer char *
127 1.3.24.1 bouyer grep_fgetln(struct file *f, size_t *lenp)
128 1.3.24.1 bouyer {
129 1.3.24.1 bouyer unsigned char *p;
130 1.3.24.1 bouyer char *ret;
131 1.3.24.1 bouyer size_t len;
132 1.3.24.1 bouyer size_t off;
133 1.3.24.1 bouyer ptrdiff_t diff;
134 1.3.24.1 bouyer
135 1.3.24.1 bouyer /* Fill the buffer, if necessary */
136 1.3.24.1 bouyer if (bufrem == 0 && grep_refill(f) != 0)
137 1.3.24.1 bouyer goto error;
138 1.3.24.1 bouyer
139 1.3.24.1 bouyer if (bufrem == 0) {
140 1.3.24.1 bouyer /* Return zero length to indicate EOF */
141 1.3.24.1 bouyer *lenp = 0;
142 1.3.24.1 bouyer return ((char *)bufpos);
143 1.3.24.1 bouyer }
144 1.3.24.1 bouyer
145 1.3.24.1 bouyer /* Look for a newline in the remaining part of the buffer */
146 1.3.24.1 bouyer if ((p = memchr(bufpos, '\n', bufrem)) != NULL) {
147 1.3.24.1 bouyer ++p; /* advance over newline */
148 1.3.24.1 bouyer ret = (char *)bufpos;
149 1.3.24.1 bouyer len = p - bufpos;
150 1.3.24.1 bouyer bufrem -= len;
151 1.3.24.1 bouyer bufpos = p;
152 1.3.24.1 bouyer *lenp = len;
153 1.3.24.1 bouyer return (ret);
154 1.3.24.1 bouyer }
155 1.3.24.1 bouyer
156 1.3.24.1 bouyer /* We have to copy the current buffered data to the line buffer */
157 1.3.24.1 bouyer for (len = bufrem, off = 0; ; len += bufrem) {
158 1.3.24.1 bouyer /* Make sure there is room for more data */
159 1.3.24.1 bouyer if (grep_lnbufgrow(len + LNBUFBUMP))
160 1.3.24.1 bouyer goto error;
161 1.3.24.1 bouyer memcpy(lnbuf + off, bufpos, len - off);
162 1.3.24.1 bouyer off = len;
163 1.3.24.1 bouyer if (grep_refill(f) != 0)
164 1.3.24.1 bouyer goto error;
165 1.3.24.1 bouyer if (bufrem == 0)
166 1.3.24.1 bouyer /* EOF: return partial line */
167 1.3.24.1 bouyer break;
168 1.3.24.1 bouyer if ((p = memchr(bufpos, '\n', bufrem)) == NULL)
169 1.3.24.1 bouyer continue;
170 1.3.24.1 bouyer /* got it: finish up the line (like code above) */
171 1.3.24.1 bouyer ++p;
172 1.3.24.1 bouyer diff = p - bufpos;
173 1.3.24.1 bouyer len += diff;
174 1.3.24.1 bouyer if (grep_lnbufgrow(len))
175 1.3.24.1 bouyer goto error;
176 1.3.24.1 bouyer memcpy(lnbuf + off, bufpos, diff);
177 1.3.24.1 bouyer bufrem -= diff;
178 1.3.24.1 bouyer bufpos = p;
179 1.3.24.1 bouyer break;
180 1.1 cjep }
181 1.3.24.1 bouyer *lenp = len;
182 1.3.24.1 bouyer return ((char *)lnbuf);
183 1.2 rillig
184 1.3.24.1 bouyer error:
185 1.3.24.1 bouyer *lenp = 0;
186 1.3.24.1 bouyer return (NULL);
187 1.1 cjep }
188 1.1 cjep
189 1.3.24.1 bouyer static inline struct file *
190 1.3.24.1 bouyer grep_file_init(struct file *f)
191 1.1 cjep {
192 1.1 cjep
193 1.3.24.1 bouyer if (filebehave == FILE_GZIP &&
194 1.3.24.1 bouyer (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
195 1.3.24.1 bouyer goto error;
196 1.2 rillig
197 1.3.24.1 bouyer if (filebehave == FILE_BZIP &&
198 1.3.24.1 bouyer (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
199 1.3.24.1 bouyer goto error;
200 1.2 rillig
201 1.3.24.1 bouyer /* Fill read buffer, also catches errors early */
202 1.3.24.1 bouyer if (grep_refill(f) != 0)
203 1.3.24.1 bouyer goto error;
204 1.3.24.1 bouyer
205 1.3.24.1 bouyer /* Check for binary stuff, if necessary */
206 1.3.24.1 bouyer if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
207 1.3.24.1 bouyer f->binary = true;
208 1.2 rillig
209 1.3.24.1 bouyer return (f);
210 1.3.24.1 bouyer error:
211 1.3.24.1 bouyer close(f->fd);
212 1.1 cjep free(f);
213 1.3.24.1 bouyer return (NULL);
214 1.1 cjep }
215 1.1 cjep
216 1.3.24.1 bouyer /*
217 1.3.24.1 bouyer * Opens a file for processing.
218 1.3.24.1 bouyer */
219 1.3.24.1 bouyer struct file *
220 1.3.24.1 bouyer grep_open(const char *path)
221 1.1 cjep {
222 1.3.24.1 bouyer struct file *f;
223 1.1 cjep
224 1.3.24.1 bouyer f = grep_malloc(sizeof *f);
225 1.3.24.1 bouyer memset(f, 0, sizeof *f);
226 1.3.24.1 bouyer if (path == NULL) {
227 1.3.24.1 bouyer /* Processing stdin implies --line-buffered. */
228 1.3.24.1 bouyer lbflag = true;
229 1.3.24.1 bouyer f->fd = STDIN_FILENO;
230 1.3.24.1 bouyer } else if ((f->fd = open(path, O_RDONLY)) == -1) {
231 1.3.24.1 bouyer free(f);
232 1.3.24.1 bouyer return (NULL);
233 1.1 cjep }
234 1.3.24.1 bouyer
235 1.3.24.1 bouyer return (grep_file_init(f));
236 1.1 cjep }
237 1.1 cjep
238 1.3.24.1 bouyer /*
239 1.3.24.1 bouyer * Closes a file.
240 1.3.24.1 bouyer */
241 1.1 cjep void
242 1.3.24.1 bouyer grep_close(struct file *f)
243 1.1 cjep {
244 1.3.24.1 bouyer
245 1.3.24.1 bouyer close(f->fd);
246 1.3.24.1 bouyer
247 1.3.24.1 bouyer /* Reset read buffer and line buffer */
248 1.3.24.1 bouyer bufpos = buffer;
249 1.3.24.1 bouyer bufrem = 0;
250 1.3.24.1 bouyer
251 1.3.24.1 bouyer free(lnbuf);
252 1.3.24.1 bouyer lnbuf = NULL;
253 1.3.24.1 bouyer lnbuflen = 0;
254 1.1 cjep }
255