1 /* $NetBSD: locate.c,v 1.19 2018/05/14 05:17:10 lukem Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * James A. Woods. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #ifndef lint 37 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 38 The Regents of the University of California. All rights reserved."); 39 #endif /* not lint */ 40 41 #ifndef lint 42 #if 0 43 static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93"; 44 #endif 45 __RCSID("$NetBSD: locate.c,v 1.19 2018/05/14 05:17:10 lukem Exp $"); 46 #endif /* not lint */ 47 48 /* 49 * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. 50 * 51 * Locate scans a file list for the full pathname of a file given only part 52 * of the name. The list has been processed with "front-compression" 53 * and bigram coding. Front compression reduces space by a factor of 4-5, 54 * bigram coding by a further 20-25%. 55 * 56 * The codes are: 57 * 58 * 0-28 likeliest differential counts + offset to make nonnegative 59 * 30 switch code for out-of-range count to follow in next word 60 * 128-255 bigram codes (128 most common, as determined by 'updatedb') 61 * 32-127 single character (printable) ascii residue (ie, literal) 62 * 63 * A novel two-tiered string search technique is employed: 64 * 65 * First, a metacharacter-free subpattern and partial pathname is matched 66 * BACKWARDS to avoid full expansion of the pathname list. The time savings 67 * is 40-50% over forward matching, which cannot efficiently handle 68 * overlapped search patterns and compressed path residue. 69 * 70 * Then, the actual shell glob-style regular expression (if in this form) is 71 * matched against the candidate pathnames using the slower routines provided 72 * in the standard 'find'. 73 */ 74 75 #include <sys/param.h> 76 77 #include <fnmatch.h> 78 #include <unistd.h> 79 #include <stdio.h> 80 #include <string.h> 81 #include <stdlib.h> 82 #include <err.h> 83 #include <errno.h> 84 #include <sys/queue.h> 85 #include <sys/stat.h> 86 87 #include "locate.h" 88 #include "pathnames.h" 89 90 91 struct locate_db { 92 LIST_ENTRY(locate_db) db_link; 93 FILE *db_fp; 94 const char *db_path; 95 }; 96 LIST_HEAD(db_list, locate_db) db_list; 97 98 #ifndef NEW 99 # define NEW(type) (type *) malloc(sizeof (type)) 100 #endif 101 102 static void add_db(const char *); 103 static int fastfind(FILE *, char *); 104 static char *patprep(const char *); 105 106 static void 107 add_db(const char *path) 108 { 109 FILE *fp; 110 struct locate_db *dbp; 111 struct stat st; 112 113 if (path == NULL || *path == '\0') 114 return; 115 116 if ((fp = fopen(path, "r")) == NULL) 117 err(1, "Can't open database `%s'", path); 118 if (fstat(fileno(fp), &st) == -1) 119 err(1, "Can't stat database `%s'", path); 120 if (S_ISDIR(st.st_mode)) { 121 errno = EISDIR; 122 err(1, "Can't use database `%s'", path); 123 } 124 dbp = NEW(struct locate_db); 125 dbp->db_fp = fp; 126 dbp->db_path = path; 127 LIST_INSERT_HEAD(&db_list, dbp, db_link); 128 } 129 130 int 131 main(int argc, char *argv[]) 132 { 133 struct locate_db *dbp; 134 const char *locate_path = getenv("LOCATE_PATH"); 135 char *cp; 136 int c; 137 int rc; 138 int found = 0; 139 140 LIST_INIT(&db_list); 141 142 while ((c = getopt(argc, argv, "d:")) != EOF) { 143 switch (c) { 144 case 'd': 145 locate_path = optarg; 146 break; 147 } 148 } 149 if (argc <= optind) { 150 (void)fprintf(stderr, "Usage: %s [-d dbpath] pattern ...\n", 151 getprogname()); 152 exit(1); 153 } 154 if (!locate_path) 155 locate_path = _PATH_FCODES; 156 157 char *lp = strdup(locate_path); 158 while ((cp = strrchr(lp, ':'))) { 159 *cp++ = '\0'; 160 add_db(cp); 161 } 162 add_db(lp); 163 free(lp); 164 165 if (LIST_EMPTY(&db_list)) 166 exit(1); 167 for (; optind < argc; ++optind) { 168 LIST_FOREACH(dbp, &db_list, db_link) { 169 rc = fastfind(dbp->db_fp, argv[optind]); 170 if (rc > 0) { 171 /* some results found */ 172 found = 1; 173 } else if (rc < 0) { 174 /* error */ 175 errx(2, "Invalid data in database file `%s'", 176 dbp->db_path); 177 } 178 } 179 } 180 exit(found == 0); 181 } 182 183 static int 184 fastfind(FILE *fp, char *pathpart) 185 { 186 char *p, *s; 187 int c; 188 int count, found, globflag, printed; 189 char *cutoff, *patend, *q; 190 char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; 191 192 rewind(fp); 193 194 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) 195 p[c] = getc(fp), s[c] = getc(fp); 196 197 p = pathpart; 198 globflag = strchr(p, '*') || strchr(p, '?') || strchr(p, '['); 199 patend = patprep(p); 200 201 found = printed = 0; 202 for (c = getc(fp), count = 0; c != EOF;) { 203 count += ((c == SWITCH) ? getw(fp) : c) - OFFSET; 204 /* overlay old path */ 205 for (p = path + count; (c = getc(fp)) > SWITCH;) { 206 /* sanity check */ 207 if (p < path || p >= path + sizeof(path) - 1) 208 return -1; /* invalid database file */ 209 if (c < PARITY) 210 *p++ = c; 211 else { /* bigrams are parity-marked */ 212 c &= PARITY - 1; 213 /* sanity check */ 214 if (c < 0 || c >= (int)sizeof(bigram1)) 215 return -1; /* invalid database file */ 216 *p++ = bigram1[c], *p++ = bigram2[c]; 217 } 218 } 219 *p-- = '\0'; 220 if (p < path || count < 0) 221 return -1; 222 cutoff = (found ? path : path + count); 223 for (found = 0, s = p; s >= cutoff; s--) 224 if (*s == *patend) { /* fast first char check */ 225 for (p = patend - 1, q = s - 1; *p != '\0'; 226 p--, q--) 227 if (*q != *p) 228 break; 229 if (*p == '\0') { /* fast match success */ 230 found = 1; 231 if (!globflag || 232 !fnmatch(pathpart, path, 0)) { 233 (void)printf("%s\n", path); 234 ++printed; 235 } 236 break; 237 } 238 } 239 } 240 return printed; 241 } 242 243 /* 244 * extract last glob-free subpattern in name for fast pre-match; prepend 245 * '\0' for backwards match; return end of new pattern 246 */ 247 static char globfree[100]; 248 249 static char * 250 patprep(const char *name) 251 { 252 const char *endmark, *p; 253 char *subp = globfree; 254 255 *subp++ = '\0'; 256 257 p = name + strlen(name) - 1; 258 /* skip trailing metacharacters (and [] ranges) */ 259 for (; p >= name; p--) 260 if (strchr("*?", *p) == 0) 261 break; 262 if (p < name) 263 p = name; 264 if (*p == ']') 265 for (p--; p >= name; p--) 266 if (*p == '[') { 267 p--; 268 break; 269 } 270 if (p < name) 271 p = name; 272 /* 273 * if pattern has only metacharacters, check every path (force '/' 274 * search) 275 */ 276 if ((p == name) && strchr("?*[]", *p) != 0) 277 *subp++ = '/'; 278 else { 279 for (endmark = p; p >= name; p--) 280 if (strchr("]*?", *p) != 0) 281 break; 282 for (++p; 283 (p <= endmark) && subp < (globfree + sizeof(globfree));) 284 *subp++ = *p++; 285 } 286 *subp = '\0'; 287 return --subp; 288 } 289