1 1.13 rin /* $NetBSD: file.c,v 1.13 2024/08/14 05:02:19 rin Exp $ */ 2 1.4 joerg /* $FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $ */ 3 1.4 joerg /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */ 4 1.2 rillig 5 1.1 cjep /*- 6 1.4 joerg * Copyright (c) 1999 James Howard and Dag-Erling Codan Smrgrav 7 1.4 joerg * Copyright (C) 2008-2010 Gabor Kovesdan <gabor (at) FreeBSD.org> 8 1.4 joerg * Copyright (C) 2010 Dimitry Andric <dimitry (at) andric.com> 9 1.1 cjep * All rights reserved. 10 1.1 cjep * 11 1.1 cjep * Redistribution and use in source and binary forms, with or without 12 1.1 cjep * modification, are permitted provided that the following conditions 13 1.1 cjep * are met: 14 1.1 cjep * 1. Redistributions of source code must retain the above copyright 15 1.1 cjep * notice, this list of conditions and the following disclaimer. 16 1.1 cjep * 2. Redistributions in binary form must reproduce the above copyright 17 1.1 cjep * notice, this list of conditions and the following disclaimer in the 18 1.1 cjep * documentation and/or other materials provided with the distribution. 19 1.1 cjep * 20 1.1 cjep * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 1.1 cjep * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 1.1 cjep * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 1.1 cjep * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 1.1 cjep * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 1.1 cjep * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 1.1 cjep * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 1.1 cjep * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 1.1 cjep * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 1.1 cjep * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 1.1 cjep * SUCH DAMAGE. 31 1.1 cjep */ 32 1.1 cjep 33 1.6 joerg #if HAVE_NBTOOL_CONFIG_H 34 1.6 joerg #include "nbtool_config.h" 35 1.6 joerg #endif 36 1.6 joerg 37 1.2 rillig #include <sys/cdefs.h> 38 1.13 rin __RCSID("$NetBSD: file.c,v 1.13 2024/08/14 05:02:19 rin Exp $"); 39 1.2 rillig 40 1.1 cjep #include <sys/param.h> 41 1.4 joerg #include <sys/types.h> 42 1.4 joerg #include <sys/stat.h> 43 1.1 cjep 44 1.1 cjep #include <err.h> 45 1.4 joerg #include <errno.h> 46 1.4 joerg #include <fcntl.h> 47 1.4 joerg #include <stddef.h> 48 1.1 cjep #include <stdlib.h> 49 1.4 joerg #include <string.h> 50 1.4 joerg #include <unistd.h> 51 1.4 joerg #include <wchar.h> 52 1.4 joerg #include <wctype.h> 53 1.1 cjep 54 1.1 cjep #include "grep.h" 55 1.1 cjep 56 1.4 joerg #define MAXBUFSIZ (32 * 1024) 57 1.4 joerg #define LNBUFBUMP 80 58 1.4 joerg 59 1.10 christos #ifndef WITHOUT_GZIP 60 1.4 joerg static gzFile gzbufdesc; 61 1.10 christos #endif 62 1.9 christos #ifndef WITHOUT_BZ2 63 1.4 joerg static BZFILE* bzbufdesc; 64 1.9 christos #endif 65 1.4 joerg 66 1.11 christos static unsigned char buffer[MAXBUFSIZ + 1]; 67 1.4 joerg static unsigned char *bufpos; 68 1.4 joerg static size_t bufrem; 69 1.4 joerg 70 1.4 joerg static unsigned char *lnbuf; 71 1.2 rillig static size_t lnbuflen; 72 1.1 cjep 73 1.4 joerg static inline int 74 1.4 joerg grep_refill(struct file *f) 75 1.4 joerg { 76 1.10 christos ssize_t nr = -1; 77 1.13 rin #ifndef WITHOUT_BZ2 78 1.4 joerg int bzerr; 79 1.12 rin #endif 80 1.4 joerg 81 1.4 joerg bufpos = buffer; 82 1.4 joerg bufrem = 0; 83 1.4 joerg 84 1.10 christos #ifndef WITHOUT_GZIP 85 1.9 christos if (filebehave == FILE_GZIP) { 86 1.4 joerg nr = gzread(gzbufdesc, buffer, MAXBUFSIZ); 87 1.10 christos if (nr == -1) 88 1.10 christos return -1; 89 1.10 christos } 90 1.10 christos #endif 91 1.9 christos #ifndef WITHOUT_BZ2 92 1.10 christos if (filebehave == FILE_BZIP && bzbufdesc != NULL) { 93 1.4 joerg nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ); 94 1.4 joerg switch (bzerr) { 95 1.4 joerg case BZ_OK: 96 1.4 joerg case BZ_STREAM_END: 97 1.4 joerg /* No problem, nr will be okay */ 98 1.2 rillig break; 99 1.4 joerg case BZ_DATA_ERROR_MAGIC: 100 1.4 joerg /* 101 1.4 joerg * As opposed to gzread(), which simply returns the 102 1.4 joerg * plain file data, if it is not in the correct 103 1.4 joerg * compressed format, BZ2_bzRead() instead aborts. 104 1.4 joerg * 105 1.4 joerg * So, just restart at the beginning of the file again, 106 1.4 joerg * and use plain reads from now on. 107 1.4 joerg */ 108 1.4 joerg BZ2_bzReadClose(&bzerr, bzbufdesc); 109 1.4 joerg bzbufdesc = NULL; 110 1.4 joerg if (lseek(f->fd, 0, SEEK_SET) == -1) 111 1.4 joerg return (-1); 112 1.4 joerg nr = read(f->fd, buffer, MAXBUFSIZ); 113 1.1 cjep break; 114 1.4 joerg default: 115 1.4 joerg /* Make sure we exit with an error */ 116 1.4 joerg nr = -1; 117 1.1 cjep } 118 1.10 christos if (nr == -1) 119 1.10 christos return -1; 120 1.10 christos } 121 1.9 christos #endif 122 1.10 christos if (nr == -1) { 123 1.4 joerg nr = read(f->fd, buffer, MAXBUFSIZ); 124 1.10 christos } 125 1.4 joerg 126 1.4 joerg if (nr < 0) 127 1.4 joerg return (-1); 128 1.1 cjep 129 1.4 joerg bufrem = nr; 130 1.4 joerg return (0); 131 1.1 cjep } 132 1.1 cjep 133 1.11 christos static inline void 134 1.4 joerg grep_lnbufgrow(size_t newlen) 135 1.1 cjep { 136 1.1 cjep 137 1.4 joerg if (lnbuflen < newlen) { 138 1.4 joerg lnbuf = grep_realloc(lnbuf, newlen); 139 1.4 joerg lnbuflen = newlen; 140 1.4 joerg } 141 1.11 christos } 142 1.2 rillig 143 1.11 christos static void 144 1.11 christos grep_copyline(size_t off, size_t len) 145 1.11 christos { 146 1.11 christos memcpy(lnbuf + off, bufpos, len); 147 1.11 christos lnbuf[off + len] = '\0'; 148 1.4 joerg } 149 1.2 rillig 150 1.4 joerg char * 151 1.4 joerg grep_fgetln(struct file *f, size_t *lenp) 152 1.4 joerg { 153 1.4 joerg unsigned char *p; 154 1.4 joerg size_t len; 155 1.4 joerg size_t off; 156 1.4 joerg ptrdiff_t diff; 157 1.4 joerg 158 1.4 joerg /* Fill the buffer, if necessary */ 159 1.4 joerg if (bufrem == 0 && grep_refill(f) != 0) 160 1.4 joerg goto error; 161 1.4 joerg 162 1.4 joerg if (bufrem == 0) { 163 1.4 joerg /* Return zero length to indicate EOF */ 164 1.4 joerg *lenp = 0; 165 1.5 joerg return ((char *)bufpos); 166 1.4 joerg } 167 1.4 joerg 168 1.4 joerg /* Look for a newline in the remaining part of the buffer */ 169 1.7 joerg if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) { 170 1.4 joerg ++p; /* advance over newline */ 171 1.4 joerg len = p - bufpos; 172 1.11 christos grep_lnbufgrow(len + 1); 173 1.11 christos grep_copyline(0, len); 174 1.11 christos *lenp = len; 175 1.4 joerg bufrem -= len; 176 1.4 joerg bufpos = p; 177 1.11 christos return (char *)lnbuf; 178 1.4 joerg } 179 1.4 joerg 180 1.4 joerg /* We have to copy the current buffered data to the line buffer */ 181 1.4 joerg for (len = bufrem, off = 0; ; len += bufrem) { 182 1.4 joerg /* Make sure there is room for more data */ 183 1.11 christos grep_lnbufgrow(len + LNBUFBUMP); 184 1.11 christos grep_copyline(off, len - off); 185 1.4 joerg off = len; 186 1.4 joerg if (grep_refill(f) != 0) 187 1.4 joerg goto error; 188 1.4 joerg if (bufrem == 0) 189 1.4 joerg /* EOF: return partial line */ 190 1.4 joerg break; 191 1.7 joerg if ((p = memchr(bufpos, line_sep, bufrem)) == NULL) 192 1.4 joerg continue; 193 1.4 joerg /* got it: finish up the line (like code above) */ 194 1.4 joerg ++p; 195 1.4 joerg diff = p - bufpos; 196 1.4 joerg len += diff; 197 1.11 christos grep_lnbufgrow(len + 1); 198 1.11 christos grep_copyline(off, diff); 199 1.4 joerg bufrem -= diff; 200 1.4 joerg bufpos = p; 201 1.4 joerg break; 202 1.1 cjep } 203 1.4 joerg *lenp = len; 204 1.5 joerg return ((char *)lnbuf); 205 1.2 rillig 206 1.4 joerg error: 207 1.4 joerg *lenp = 0; 208 1.4 joerg return (NULL); 209 1.1 cjep } 210 1.1 cjep 211 1.4 joerg static inline struct file * 212 1.4 joerg grep_file_init(struct file *f) 213 1.1 cjep { 214 1.1 cjep 215 1.10 christos #ifndef WITHOUT_GZIP 216 1.4 joerg if (filebehave == FILE_GZIP && 217 1.4 joerg (gzbufdesc = gzdopen(f->fd, "r")) == NULL) 218 1.4 joerg goto error; 219 1.10 christos #endif 220 1.4 joerg 221 1.9 christos #ifndef WITHOUT_BZ2 222 1.4 joerg if (filebehave == FILE_BZIP && 223 1.4 joerg (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL) 224 1.4 joerg goto error; 225 1.9 christos #endif 226 1.2 rillig 227 1.4 joerg /* Fill read buffer, also catches errors early */ 228 1.4 joerg if (grep_refill(f) != 0) 229 1.4 joerg goto error; 230 1.2 rillig 231 1.4 joerg /* Check for binary stuff, if necessary */ 232 1.7 joerg if (!nulldataflag && binbehave != BINFILE_TEXT && 233 1.7 joerg memchr(bufpos, '\0', bufrem) != NULL) 234 1.4 joerg f->binary = true; 235 1.2 rillig 236 1.4 joerg return (f); 237 1.4 joerg error: 238 1.4 joerg close(f->fd); 239 1.1 cjep free(f); 240 1.4 joerg return (NULL); 241 1.1 cjep } 242 1.1 cjep 243 1.4 joerg /* 244 1.4 joerg * Opens a file for processing. 245 1.4 joerg */ 246 1.4 joerg struct file * 247 1.4 joerg grep_open(const char *path) 248 1.1 cjep { 249 1.4 joerg struct file *f; 250 1.4 joerg 251 1.4 joerg f = grep_malloc(sizeof *f); 252 1.4 joerg memset(f, 0, sizeof *f); 253 1.4 joerg if (path == NULL) { 254 1.4 joerg /* Processing stdin implies --line-buffered. */ 255 1.4 joerg lbflag = true; 256 1.4 joerg f->fd = STDIN_FILENO; 257 1.4 joerg } else if ((f->fd = open(path, O_RDONLY)) == -1) { 258 1.4 joerg free(f); 259 1.4 joerg return (NULL); 260 1.1 cjep } 261 1.1 cjep 262 1.4 joerg return (grep_file_init(f)); 263 1.1 cjep } 264 1.1 cjep 265 1.4 joerg /* 266 1.4 joerg * Closes a file. 267 1.4 joerg */ 268 1.1 cjep void 269 1.4 joerg grep_close(struct file *f) 270 1.1 cjep { 271 1.4 joerg 272 1.4 joerg close(f->fd); 273 1.4 joerg 274 1.4 joerg /* Reset read buffer and line buffer */ 275 1.4 joerg bufpos = buffer; 276 1.4 joerg bufrem = 0; 277 1.4 joerg 278 1.4 joerg free(lnbuf); 279 1.4 joerg lnbuf = NULL; 280 1.4 joerg lnbuflen = 0; 281 1.1 cjep } 282