1 1.17 leot /* $NetBSD: look.c,v 1.17 2017/02/21 09:23:31 leot Exp $ */ 2 1.6 jtc 3 1.1 cgd /*- 4 1.6 jtc * Copyright (c) 1991, 1993 5 1.6 jtc * The Regents of the University of California. All rights reserved. 6 1.1 cgd * 7 1.1 cgd * This code is derived from software contributed to Berkeley by 8 1.1 cgd * David Hitz of Auspex Systems, Inc. 9 1.1 cgd * 10 1.1 cgd * Redistribution and use in source and binary forms, with or without 11 1.1 cgd * modification, are permitted provided that the following conditions 12 1.1 cgd * are met: 13 1.1 cgd * 1. Redistributions of source code must retain the above copyright 14 1.1 cgd * notice, this list of conditions and the following disclaimer. 15 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 cgd * notice, this list of conditions and the following disclaimer in the 17 1.1 cgd * documentation and/or other materials provided with the distribution. 18 1.11 agc * 3. Neither the name of the University nor the names of its contributors 19 1.1 cgd * may be used to endorse or promote products derived from this software 20 1.1 cgd * without specific prior written permission. 21 1.1 cgd * 22 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 1.1 cgd * SUCH DAMAGE. 33 1.1 cgd */ 34 1.1 cgd 35 1.8 lukem #include <sys/cdefs.h> 36 1.1 cgd #ifndef lint 37 1.12 lukem __COPYRIGHT("@(#) Copyright (c) 1991, 1993\ 38 1.12 lukem The Regents of the University of California. All rights reserved."); 39 1.1 cgd #endif /* not lint */ 40 1.1 cgd 41 1.1 cgd #ifndef lint 42 1.6 jtc #if 0 43 1.7 jtc static char sccsid[] = "@(#)look.c 8.2 (Berkeley) 5/4/95"; 44 1.6 jtc #endif 45 1.17 leot __RCSID("$NetBSD: look.c,v 1.17 2017/02/21 09:23:31 leot Exp $"); 46 1.1 cgd #endif /* not lint */ 47 1.1 cgd 48 1.1 cgd /* 49 1.1 cgd * look -- find lines in a sorted list. 50 1.1 cgd * 51 1.1 cgd * The man page said that TABs and SPACEs participate in -d comparisons. 52 1.1 cgd * In fact, they were ignored. This implements historic practice, not 53 1.1 cgd * the manual page. 54 1.1 cgd */ 55 1.1 cgd 56 1.1 cgd #include <sys/types.h> 57 1.1 cgd #include <sys/mman.h> 58 1.1 cgd #include <sys/stat.h> 59 1.6 jtc 60 1.7 jtc #include <ctype.h> 61 1.1 cgd #include <errno.h> 62 1.1 cgd #include <fcntl.h> 63 1.7 jtc #include <limits.h> 64 1.1 cgd #include <stdio.h> 65 1.1 cgd #include <stdlib.h> 66 1.1 cgd #include <string.h> 67 1.4 jtc #include <unistd.h> 68 1.6 jtc #include <err.h> 69 1.7 jtc 70 1.1 cgd #include "pathnames.h" 71 1.1 cgd 72 1.1 cgd /* 73 1.1 cgd * FOLD and DICT convert characters to a normal form for comparison, 74 1.1 cgd * according to the user specified flags. 75 1.1 cgd * 76 1.1 cgd * DICT expects integers because it uses a non-character value to 77 1.1 cgd * indicate a character which should not participate in comparisons. 78 1.1 cgd */ 79 1.1 cgd #define EQUAL 0 80 1.1 cgd #define GREATER 1 81 1.1 cgd #define LESS (-1) 82 1.1 cgd #define NO_COMPARE (-2) 83 1.1 cgd 84 1.1 cgd #define FOLD(c) (isascii(c) && isupper(c) ? tolower(c) : (c)) 85 1.1 cgd #define DICT(c) (isascii(c) && isalnum(c) ? (c) : NO_COMPARE) 86 1.1 cgd 87 1.15 joerg static int dflag, fflag; 88 1.1 cgd 89 1.15 joerg static char *binary_search(char *, char *, char *); 90 1.15 joerg static int compare(char *, char *, char *); 91 1.15 joerg static char *linear_search(char *, char *, char *); 92 1.15 joerg static int look(char *, char *, char *); 93 1.15 joerg static void print_from(char *, char *, char *); 94 1.15 joerg __dead static void usage(void); 95 1.1 cgd 96 1.6 jtc int 97 1.15 joerg main(int argc, char *argv[]) 98 1.1 cgd { 99 1.1 cgd struct stat sb; 100 1.6 jtc int ch, fd, termchar; 101 1.13 lukem char *back, *front, *string, *p; 102 1.13 lukem const char *file; 103 1.14 christos size_t len; 104 1.1 cgd 105 1.8 lukem string = NULL; 106 1.1 cgd file = _PATH_WORDS; 107 1.6 jtc termchar = '\0'; 108 1.8 lukem while ((ch = getopt(argc, argv, "dft:")) != -1) 109 1.1 cgd switch(ch) { 110 1.1 cgd case 'd': 111 1.1 cgd dflag = 1; 112 1.1 cgd break; 113 1.1 cgd case 'f': 114 1.1 cgd fflag = 1; 115 1.1 cgd break; 116 1.6 jtc case 't': 117 1.6 jtc termchar = *optarg; 118 1.6 jtc break; 119 1.1 cgd case '?': 120 1.1 cgd default: 121 1.1 cgd usage(); 122 1.1 cgd } 123 1.1 cgd argc -= optind; 124 1.1 cgd argv += optind; 125 1.1 cgd 126 1.1 cgd switch (argc) { 127 1.1 cgd case 2: /* Don't set -df for user. */ 128 1.1 cgd string = *argv++; 129 1.1 cgd file = *argv; 130 1.1 cgd break; 131 1.1 cgd case 1: /* But set -df by default. */ 132 1.1 cgd dflag = fflag = 1; 133 1.1 cgd string = *argv; 134 1.1 cgd break; 135 1.1 cgd default: 136 1.1 cgd usage(); 137 1.1 cgd } 138 1.1 cgd 139 1.6 jtc if (termchar != '\0' && (p = strchr(string, termchar)) != NULL) 140 1.6 jtc *++p = '\0'; 141 1.6 jtc 142 1.6 jtc if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb)) 143 1.6 jtc err(2, "%s", file); 144 1.14 christos len = (size_t)sb.st_size; 145 1.14 christos if ((off_t)len != sb.st_size) { 146 1.14 christos errno = EFBIG; 147 1.14 christos err(2, "%s", file); 148 1.14 christos } 149 1.14 christos if ((front = mmap(NULL, len, 150 1.17 leot PROT_READ, MAP_FILE|MAP_SHARED, fd, (off_t)0)) == MAP_FAILED) 151 1.6 jtc err(2, "%s", file); 152 1.14 christos back = front + len; 153 1.1 cgd exit(look(string, front, back)); 154 1.1 cgd } 155 1.1 cgd 156 1.15 joerg static int 157 1.15 joerg look(char *string, char *front, char *back) 158 1.1 cgd { 159 1.8 lukem int ch; 160 1.8 lukem char *readp, *writep; 161 1.1 cgd 162 1.1 cgd /* Reformat string string to avoid doing it multiple times later. */ 163 1.8 lukem for (readp = writep = string; (ch = *readp++) != 0; ) { 164 1.1 cgd if (fflag) 165 1.1 cgd ch = FOLD(ch); 166 1.1 cgd if (dflag) 167 1.1 cgd ch = DICT(ch); 168 1.1 cgd if (ch != NO_COMPARE) 169 1.1 cgd *(writep++) = ch; 170 1.1 cgd } 171 1.1 cgd *writep = '\0'; 172 1.1 cgd 173 1.1 cgd front = binary_search(string, front, back); 174 1.1 cgd front = linear_search(string, front, back); 175 1.1 cgd 176 1.1 cgd if (front) 177 1.1 cgd print_from(string, front, back); 178 1.1 cgd return (front ? 0 : 1); 179 1.1 cgd } 180 1.1 cgd 181 1.1 cgd 182 1.1 cgd /* 183 1.1 cgd * Binary search for "string" in memory between "front" and "back". 184 1.1 cgd * 185 1.1 cgd * This routine is expected to return a pointer to the start of a line at 186 1.1 cgd * *or before* the first word matching "string". Relaxing the constraint 187 1.1 cgd * this way simplifies the algorithm. 188 1.1 cgd * 189 1.1 cgd * Invariants: 190 1.1 cgd * front points to the beginning of a line at or before the first 191 1.1 cgd * matching string. 192 1.1 cgd * 193 1.1 cgd * back points to the beginning of a line at or after the first 194 1.1 cgd * matching line. 195 1.1 cgd * 196 1.1 cgd * Base of the Invariants. 197 1.1 cgd * front = NULL; 198 1.1 cgd * back = EOF; 199 1.1 cgd * 200 1.1 cgd * Advancing the Invariants: 201 1.1 cgd * 202 1.1 cgd * p = first newline after halfway point from front to back. 203 1.1 cgd * 204 1.1 cgd * If the string at "p" is not greater than the string to match, 205 1.1 cgd * p is the new front. Otherwise it is the new back. 206 1.1 cgd * 207 1.1 cgd * Termination: 208 1.1 cgd * 209 1.1 cgd * The definition of the routine allows it return at any point, 210 1.1 cgd * since front is always at or before the line to print. 211 1.1 cgd * 212 1.1 cgd * In fact, it returns when the chosen "p" equals "back". This 213 1.1 cgd * implies that there exists a string is least half as long as 214 1.1 cgd * (back - front), which in turn implies that a linear search will 215 1.1 cgd * be no more expensive than the cost of simply printing a string or two. 216 1.1 cgd * 217 1.1 cgd * Trying to continue with binary search at this point would be 218 1.1 cgd * more trouble than it's worth. 219 1.1 cgd */ 220 1.1 cgd #define SKIP_PAST_NEWLINE(p, back) \ 221 1.16 joerg while (p < back && *p++ != '\n') continue; 222 1.1 cgd 223 1.15 joerg static char * 224 1.15 joerg binary_search(char *string, char *front, char *back) 225 1.1 cgd { 226 1.8 lukem char *p; 227 1.1 cgd 228 1.1 cgd p = front + (back - front) / 2; 229 1.1 cgd SKIP_PAST_NEWLINE(p, back); 230 1.1 cgd 231 1.6 jtc /* 232 1.6 jtc * If the file changes underneath us, make sure we don't 233 1.6 jtc * infinitely loop. 234 1.6 jtc */ 235 1.6 jtc while (p < back && back > front) { 236 1.1 cgd if (compare(string, p, back) == GREATER) 237 1.1 cgd front = p; 238 1.1 cgd else 239 1.1 cgd back = p; 240 1.1 cgd p = front + (back - front) / 2; 241 1.1 cgd SKIP_PAST_NEWLINE(p, back); 242 1.1 cgd } 243 1.1 cgd return (front); 244 1.1 cgd } 245 1.1 cgd 246 1.1 cgd /* 247 1.1 cgd * Find the first line that starts with string, linearly searching from front 248 1.1 cgd * to back. 249 1.1 cgd * 250 1.1 cgd * Return NULL for no such line. 251 1.1 cgd * 252 1.1 cgd * This routine assumes: 253 1.1 cgd * 254 1.1 cgd * o front points at the first character in a line. 255 1.1 cgd * o front is before or at the first line to be printed. 256 1.1 cgd */ 257 1.15 joerg static char * 258 1.15 joerg linear_search(char *string, char *front, char *back) 259 1.1 cgd { 260 1.1 cgd while (front < back) { 261 1.1 cgd switch (compare(string, front, back)) { 262 1.1 cgd case EQUAL: /* Found it. */ 263 1.1 cgd return (front); 264 1.1 cgd break; 265 1.1 cgd case LESS: /* No such string. */ 266 1.1 cgd return (NULL); 267 1.1 cgd break; 268 1.1 cgd case GREATER: /* Keep going. */ 269 1.1 cgd break; 270 1.1 cgd } 271 1.1 cgd SKIP_PAST_NEWLINE(front, back); 272 1.1 cgd } 273 1.1 cgd return (NULL); 274 1.1 cgd } 275 1.1 cgd 276 1.1 cgd /* 277 1.1 cgd * Print as many lines as match string, starting at front. 278 1.1 cgd */ 279 1.15 joerg static void 280 1.15 joerg print_from(char *string, char *front, char *back) 281 1.1 cgd { 282 1.1 cgd for (; front < back && compare(string, front, back) == EQUAL; ++front) { 283 1.1 cgd for (; front < back && *front != '\n'; ++front) 284 1.1 cgd if (putchar(*front) == EOF) 285 1.6 jtc err(2, "stdout"); 286 1.1 cgd if (putchar('\n') == EOF) 287 1.6 jtc err(2, "stdout"); 288 1.1 cgd } 289 1.1 cgd } 290 1.1 cgd 291 1.1 cgd /* 292 1.1 cgd * Return LESS, GREATER, or EQUAL depending on how the string1 compares with 293 1.1 cgd * string2 (s1 ??? s2). 294 1.1 cgd * 295 1.1 cgd * o Matches up to len(s1) are EQUAL. 296 1.1 cgd * o Matches up to len(s2) are GREATER. 297 1.1 cgd * 298 1.1 cgd * Compare understands about the -f and -d flags, and treats comparisons 299 1.1 cgd * appropriately. 300 1.1 cgd * 301 1.1 cgd * The string "s1" is null terminated. The string s2 is '\n' terminated (or 302 1.1 cgd * "back" terminated). 303 1.1 cgd */ 304 1.15 joerg static int 305 1.15 joerg compare(char *s1, char *s2, char *back) 306 1.1 cgd { 307 1.8 lukem int ch; 308 1.1 cgd 309 1.1 cgd for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) { 310 1.1 cgd ch = *s2; 311 1.1 cgd if (fflag) 312 1.1 cgd ch = FOLD(ch); 313 1.1 cgd if (dflag) 314 1.1 cgd ch = DICT(ch); 315 1.1 cgd 316 1.1 cgd if (ch == NO_COMPARE) { 317 1.1 cgd ++s2; /* Ignore character in comparison. */ 318 1.1 cgd continue; 319 1.1 cgd } 320 1.1 cgd if (*s1 != ch) 321 1.1 cgd return (*s1 < ch ? LESS : GREATER); 322 1.1 cgd } 323 1.1 cgd return (*s1 ? GREATER : EQUAL); 324 1.1 cgd } 325 1.1 cgd 326 1.15 joerg static void 327 1.15 joerg usage(void) 328 1.1 cgd { 329 1.6 jtc (void)fprintf(stderr, "usage: look [-df] [-t char] string [file]\n"); 330 1.1 cgd exit(2); 331 1.1 cgd } 332