file.c revision 1.4 1 1.4 joerg /* $NetBSD: file.c,v 1.4 2011/02/16 01:31:33 joerg Exp $ */
2 1.4 joerg /* $FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $ */
3 1.4 joerg /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
4 1.2 rillig
5 1.1 cjep /*-
6 1.4 joerg * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav
7 1.4 joerg * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org>
8 1.4 joerg * Copyright (C) 2010 Dimitry Andric <dimitry (at) andric.com>
9 1.1 cjep * All rights reserved.
10 1.1 cjep *
11 1.1 cjep * Redistribution and use in source and binary forms, with or without
12 1.1 cjep * modification, are permitted provided that the following conditions
13 1.1 cjep * are met:
14 1.1 cjep * 1. Redistributions of source code must retain the above copyright
15 1.1 cjep * notice, this list of conditions and the following disclaimer.
16 1.1 cjep * 2. Redistributions in binary form must reproduce the above copyright
17 1.1 cjep * notice, this list of conditions and the following disclaimer in the
18 1.1 cjep * documentation and/or other materials provided with the distribution.
19 1.1 cjep *
20 1.1 cjep * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 1.1 cjep * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 1.1 cjep * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 1.1 cjep * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 1.1 cjep * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 1.1 cjep * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 1.1 cjep * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 1.1 cjep * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 1.1 cjep * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 1.1 cjep * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 1.1 cjep * SUCH DAMAGE.
31 1.1 cjep */
32 1.1 cjep
33 1.2 rillig #include <sys/cdefs.h>
34 1.4 joerg __RCSID("$NetBSD: file.c,v 1.4 2011/02/16 01:31:33 joerg Exp $");
35 1.2 rillig
36 1.1 cjep #include <sys/param.h>
37 1.4 joerg #include <sys/types.h>
38 1.4 joerg #include <sys/stat.h>
39 1.1 cjep
40 1.4 joerg #include <bzlib.h>
41 1.1 cjep #include <err.h>
42 1.4 joerg #include <errno.h>
43 1.4 joerg #include <fcntl.h>
44 1.4 joerg #include <stddef.h>
45 1.1 cjep #include <stdlib.h>
46 1.4 joerg #include <string.h>
47 1.4 joerg #include <unistd.h>
48 1.4 joerg #include <wchar.h>
49 1.4 joerg #include <wctype.h>
50 1.4 joerg #include <zlib.h>
51 1.1 cjep
52 1.1 cjep #include "grep.h"
53 1.1 cjep
54 1.4 joerg #define MAXBUFSIZ (32 * 1024)
55 1.4 joerg #define LNBUFBUMP 80
56 1.4 joerg
57 1.4 joerg static gzFile gzbufdesc;
58 1.4 joerg static BZFILE* bzbufdesc;
59 1.4 joerg
60 1.4 joerg static unsigned char buffer[MAXBUFSIZ];
61 1.4 joerg static unsigned char *bufpos;
62 1.4 joerg static size_t bufrem;
63 1.4 joerg
64 1.4 joerg static unsigned char *lnbuf;
65 1.2 rillig static size_t lnbuflen;
66 1.1 cjep
67 1.4 joerg static inline int
68 1.4 joerg grep_refill(struct file *f)
69 1.4 joerg {
70 1.4 joerg ssize_t nr;
71 1.4 joerg int bzerr;
72 1.4 joerg
73 1.4 joerg bufpos = buffer;
74 1.4 joerg bufrem = 0;
75 1.4 joerg
76 1.4 joerg if (filebehave == FILE_GZIP)
77 1.4 joerg nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
78 1.4 joerg else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
79 1.4 joerg nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
80 1.4 joerg switch (bzerr) {
81 1.4 joerg case BZ_OK:
82 1.4 joerg case BZ_STREAM_END:
83 1.4 joerg /* No problem, nr will be okay */
84 1.2 rillig break;
85 1.4 joerg case BZ_DATA_ERROR_MAGIC:
86 1.4 joerg /*
87 1.4 joerg * As opposed to gzread(), which simply returns the
88 1.4 joerg * plain file data, if it is not in the correct
89 1.4 joerg * compressed format, BZ2_bzRead() instead aborts.
90 1.4 joerg *
91 1.4 joerg * So, just restart at the beginning of the file again,
92 1.4 joerg * and use plain reads from now on.
93 1.4 joerg */
94 1.4 joerg BZ2_bzReadClose(&bzerr, bzbufdesc);
95 1.4 joerg bzbufdesc = NULL;
96 1.4 joerg if (lseek(f->fd, 0, SEEK_SET) == -1)
97 1.4 joerg return (-1);
98 1.4 joerg nr = read(f->fd, buffer, MAXBUFSIZ);
99 1.1 cjep break;
100 1.4 joerg default:
101 1.4 joerg /* Make sure we exit with an error */
102 1.4 joerg nr = -1;
103 1.1 cjep }
104 1.4 joerg } else
105 1.4 joerg nr = read(f->fd, buffer, MAXBUFSIZ);
106 1.4 joerg
107 1.4 joerg if (nr < 0)
108 1.4 joerg return (-1);
109 1.1 cjep
110 1.4 joerg bufrem = nr;
111 1.4 joerg return (0);
112 1.1 cjep }
113 1.1 cjep
114 1.4 joerg static inline int
115 1.4 joerg grep_lnbufgrow(size_t newlen)
116 1.1 cjep {
117 1.1 cjep
118 1.4 joerg if (lnbuflen < newlen) {
119 1.4 joerg lnbuf = grep_realloc(lnbuf, newlen);
120 1.4 joerg lnbuflen = newlen;
121 1.4 joerg }
122 1.2 rillig
123 1.4 joerg return (0);
124 1.4 joerg }
125 1.2 rillig
126 1.4 joerg char *
127 1.4 joerg grep_fgetln(struct file *f, size_t *lenp)
128 1.4 joerg {
129 1.4 joerg unsigned char *p;
130 1.4 joerg char *ret;
131 1.4 joerg size_t len;
132 1.4 joerg size_t off;
133 1.4 joerg ptrdiff_t diff;
134 1.4 joerg
135 1.4 joerg /* Fill the buffer, if necessary */
136 1.4 joerg if (bufrem == 0 && grep_refill(f) != 0)
137 1.4 joerg goto error;
138 1.4 joerg
139 1.4 joerg if (bufrem == 0) {
140 1.4 joerg /* Return zero length to indicate EOF */
141 1.4 joerg *lenp = 0;
142 1.4 joerg return (bufpos);
143 1.4 joerg }
144 1.4 joerg
145 1.4 joerg /* Look for a newline in the remaining part of the buffer */
146 1.4 joerg if ((p = memchr(bufpos, '\n', bufrem)) != NULL) {
147 1.4 joerg ++p; /* advance over newline */
148 1.4 joerg ret = bufpos;
149 1.4 joerg len = p - bufpos;
150 1.4 joerg bufrem -= len;
151 1.4 joerg bufpos = p;
152 1.4 joerg *lenp = len;
153 1.4 joerg return (ret);
154 1.4 joerg }
155 1.4 joerg
156 1.4 joerg /* We have to copy the current buffered data to the line buffer */
157 1.4 joerg for (len = bufrem, off = 0; ; len += bufrem) {
158 1.4 joerg /* Make sure there is room for more data */
159 1.4 joerg if (grep_lnbufgrow(len + LNBUFBUMP))
160 1.4 joerg goto error;
161 1.4 joerg memcpy(lnbuf + off, bufpos, len - off);
162 1.4 joerg off = len;
163 1.4 joerg if (grep_refill(f) != 0)
164 1.4 joerg goto error;
165 1.4 joerg if (bufrem == 0)
166 1.4 joerg /* EOF: return partial line */
167 1.4 joerg break;
168 1.4 joerg if ((p = memchr(bufpos, '\n', bufrem)) == NULL)
169 1.4 joerg continue;
170 1.4 joerg /* got it: finish up the line (like code above) */
171 1.4 joerg ++p;
172 1.4 joerg diff = p - bufpos;
173 1.4 joerg len += diff;
174 1.4 joerg if (grep_lnbufgrow(len))
175 1.4 joerg goto error;
176 1.4 joerg memcpy(lnbuf + off, bufpos, diff);
177 1.4 joerg bufrem -= diff;
178 1.4 joerg bufpos = p;
179 1.4 joerg break;
180 1.1 cjep }
181 1.4 joerg *lenp = len;
182 1.4 joerg return (lnbuf);
183 1.2 rillig
184 1.4 joerg error:
185 1.4 joerg *lenp = 0;
186 1.4 joerg return (NULL);
187 1.1 cjep }
188 1.1 cjep
189 1.4 joerg static inline struct file *
190 1.4 joerg grep_file_init(struct file *f)
191 1.1 cjep {
192 1.1 cjep
193 1.4 joerg if (filebehave == FILE_GZIP &&
194 1.4 joerg (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
195 1.4 joerg goto error;
196 1.4 joerg
197 1.4 joerg if (filebehave == FILE_BZIP &&
198 1.4 joerg (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
199 1.4 joerg goto error;
200 1.2 rillig
201 1.4 joerg /* Fill read buffer, also catches errors early */
202 1.4 joerg if (grep_refill(f) != 0)
203 1.4 joerg goto error;
204 1.2 rillig
205 1.4 joerg /* Check for binary stuff, if necessary */
206 1.4 joerg if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
207 1.4 joerg f->binary = true;
208 1.2 rillig
209 1.4 joerg return (f);
210 1.4 joerg error:
211 1.4 joerg close(f->fd);
212 1.1 cjep free(f);
213 1.4 joerg return (NULL);
214 1.1 cjep }
215 1.1 cjep
216 1.4 joerg /*
217 1.4 joerg * Opens a file for processing.
218 1.4 joerg */
219 1.4 joerg struct file *
220 1.4 joerg grep_open(const char *path)
221 1.1 cjep {
222 1.4 joerg struct file *f;
223 1.4 joerg
224 1.4 joerg f = grep_malloc(sizeof *f);
225 1.4 joerg memset(f, 0, sizeof *f);
226 1.4 joerg if (path == NULL) {
227 1.4 joerg /* Processing stdin implies --line-buffered. */
228 1.4 joerg lbflag = true;
229 1.4 joerg f->fd = STDIN_FILENO;
230 1.4 joerg } else if ((f->fd = open(path, O_RDONLY)) == -1) {
231 1.4 joerg free(f);
232 1.4 joerg return (NULL);
233 1.1 cjep }
234 1.1 cjep
235 1.4 joerg return (grep_file_init(f));
236 1.1 cjep }
237 1.1 cjep
238 1.4 joerg /*
239 1.4 joerg * Closes a file.
240 1.4 joerg */
241 1.1 cjep void
242 1.4 joerg grep_close(struct file *f)
243 1.1 cjep {
244 1.4 joerg
245 1.4 joerg close(f->fd);
246 1.4 joerg
247 1.4 joerg /* Reset read buffer and line buffer */
248 1.4 joerg bufpos = buffer;
249 1.4 joerg bufrem = 0;
250 1.4 joerg
251 1.4 joerg free(lnbuf);
252 1.4 joerg lnbuf = NULL;
253 1.4 joerg lnbuflen = 0;
254 1.1 cjep }
255