Home | History | Annotate | Line # | Download | only in locate
      1 /*	$NetBSD: locate.c,v 1.19 2018/05/14 05:17:10 lukem Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1989, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * This code is derived from software contributed to Berkeley by
      8  * James A. Woods.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. Neither the name of the University nor the names of its contributors
     19  *    may be used to endorse or promote products derived from this software
     20  *    without specific prior written permission.
     21  *
     22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     32  * SUCH DAMAGE.
     33  */
     34 
     35 #include <sys/cdefs.h>
     36 #ifndef lint
     37 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\
     38  The Regents of the University of California.  All rights reserved.");
     39 #endif /* not lint */
     40 
     41 #ifndef lint
     42 #if 0
     43 static char sccsid[] = "@(#)locate.c	8.1 (Berkeley) 6/6/93";
     44 #endif
     45 __RCSID("$NetBSD: locate.c,v 1.19 2018/05/14 05:17:10 lukem Exp $");
     46 #endif /* not lint */
     47 
     48 /*
     49  * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8.
     50  *
     51  * Locate scans a file list for the full pathname of a file given only part
     52  * of the name.  The list has been processed with "front-compression"
     53  * and bigram coding.  Front compression reduces space by a factor of 4-5,
     54  * bigram coding by a further 20-25%.
     55  *
     56  * The codes are:
     57  *
     58  * 	0-28	likeliest differential counts + offset to make nonnegative
     59  *	30	switch code for out-of-range count to follow in next word
     60  *	128-255 bigram codes (128 most common, as determined by 'updatedb')
     61  *	32-127  single character (printable) ascii residue (ie, literal)
     62  *
     63  * A novel two-tiered string search technique is employed:
     64  *
     65  * First, a metacharacter-free subpattern and partial pathname is matched
     66  * BACKWARDS to avoid full expansion of the pathname list.  The time savings
     67  * is 40-50% over forward matching, which cannot efficiently handle
     68  * overlapped search patterns and compressed path residue.
     69  *
     70  * Then, the actual shell glob-style regular expression (if in this form) is
     71  * matched against the candidate pathnames using the slower routines provided
     72  * in the standard 'find'.
     73  */
     74 
     75 #include <sys/param.h>
     76 
     77 #include <fnmatch.h>
     78 #include <unistd.h>
     79 #include <stdio.h>
     80 #include <string.h>
     81 #include <stdlib.h>
     82 #include <err.h>
     83 #include <errno.h>
     84 #include <sys/queue.h>
     85 #include <sys/stat.h>
     86 
     87 #include "locate.h"
     88 #include "pathnames.h"
     89 
     90 
     91 struct locate_db {
     92 	LIST_ENTRY(locate_db) db_link;
     93 	FILE *db_fp;
     94 	const char *db_path;
     95 };
     96 LIST_HEAD(db_list, locate_db) db_list;
     97 
     98 #ifndef NEW
     99 # define NEW(type)      (type *) malloc(sizeof (type))
    100 #endif
    101 
    102 static void add_db(const char *);
    103 static int fastfind(FILE *, char *);
    104 static char *patprep(const char *);
    105 
    106 static void
    107 add_db(const char *path)
    108 {
    109 	FILE *fp;
    110 	struct locate_db *dbp;
    111 	struct stat st;
    112 
    113 	if (path == NULL || *path == '\0')
    114 		return;
    115 
    116 	if ((fp = fopen(path, "r")) == NULL)
    117 		err(1, "Can't open database `%s'", path);
    118 	if (fstat(fileno(fp), &st) == -1)
    119 		err(1, "Can't stat database `%s'", path);
    120 	if (S_ISDIR(st.st_mode)) {
    121 		errno = EISDIR;
    122 		err(1, "Can't use database `%s'", path);
    123 	}
    124 	dbp = NEW(struct locate_db);
    125 	dbp->db_fp = fp;
    126 	dbp->db_path = path;
    127 	LIST_INSERT_HEAD(&db_list, dbp, db_link);
    128 }
    129 
    130 int
    131 main(int argc, char *argv[])
    132 {
    133 	struct locate_db *dbp;
    134 	const char *locate_path = getenv("LOCATE_PATH");
    135 	char *cp;
    136 	int c;
    137 	int rc;
    138 	int found = 0;
    139 
    140 	LIST_INIT(&db_list);
    141 
    142 	while ((c = getopt(argc, argv, "d:")) != EOF) {
    143 		switch (c) {
    144 		case 'd':
    145 			locate_path = optarg;
    146 			break;
    147 		}
    148 	}
    149 	if (argc <= optind) {
    150 		(void)fprintf(stderr, "Usage: %s [-d dbpath] pattern ...\n",
    151 		    getprogname());
    152 		exit(1);
    153 	}
    154 	if (!locate_path)
    155 		locate_path = _PATH_FCODES;
    156 
    157 	char *lp = strdup(locate_path);
    158 	while ((cp = strrchr(lp, ':'))) {
    159 		*cp++ = '\0';
    160 		add_db(cp);
    161 	}
    162 	add_db(lp);
    163 	free(lp);
    164 
    165 	if (LIST_EMPTY(&db_list))
    166 		exit(1);
    167 	for (; optind < argc; ++optind) {
    168 		LIST_FOREACH(dbp, &db_list, db_link) {
    169 			rc = fastfind(dbp->db_fp, argv[optind]);
    170 			if (rc > 0) {
    171 				/* some results found */
    172 				found = 1;
    173 			} else if (rc < 0) {
    174 				/* error */
    175 				errx(2, "Invalid data in database file `%s'",
    176 				    dbp->db_path);
    177 			}
    178 		}
    179 	}
    180 	exit(found == 0);
    181 }
    182 
    183 static int
    184 fastfind(FILE *fp, char *pathpart)
    185 {
    186 	char *p, *s;
    187 	int c;
    188 	int count, found, globflag, printed;
    189 	char *cutoff, *patend, *q;
    190 	char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
    191 
    192 	rewind(fp);
    193 
    194 	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++)
    195 		p[c] = getc(fp), s[c] = getc(fp);
    196 
    197 	p = pathpart;
    198 	globflag = strchr(p, '*') || strchr(p, '?') || strchr(p, '[');
    199 	patend = patprep(p);
    200 
    201 	found = printed = 0;
    202 	for (c = getc(fp), count = 0; c != EOF;) {
    203 		count += ((c == SWITCH) ? getw(fp) : c) - OFFSET;
    204 		/* overlay old path */
    205 		for (p = path + count; (c = getc(fp)) > SWITCH;) {
    206 			/* sanity check */
    207 			if (p < path || p >= path + sizeof(path) - 1)
    208 				return -1;	/* invalid database file */
    209 			if (c < PARITY)
    210 				*p++ = c;
    211 			else {		/* bigrams are parity-marked */
    212 				c &= PARITY - 1;
    213 				/* sanity check */
    214 				if (c < 0 || c >= (int)sizeof(bigram1))
    215 					return -1;	/* invalid database file */
    216 				*p++ = bigram1[c], *p++ = bigram2[c];
    217 			}
    218 		}
    219 		*p-- = '\0';
    220 		if (p < path || count < 0)
    221 			return -1;
    222 		cutoff = (found ? path : path + count);
    223 		for (found = 0, s = p; s >= cutoff; s--)
    224 			if (*s == *patend) {	/* fast first char check */
    225 				for (p = patend - 1, q = s - 1; *p != '\0';
    226 				    p--, q--)
    227 					if (*q != *p)
    228 						break;
    229 				if (*p == '\0') {	/* fast match success */
    230 					found = 1;
    231 					if (!globflag ||
    232 					    !fnmatch(pathpart, path, 0)) {
    233 						(void)printf("%s\n", path);
    234 						++printed;
    235 					}
    236 					break;
    237 				}
    238 			}
    239 	}
    240 	return printed;
    241 }
    242 
    243 /*
    244  * extract last glob-free subpattern in name for fast pre-match; prepend
    245  * '\0' for backwards match; return end of new pattern
    246  */
    247 static char globfree[100];
    248 
    249 static char *
    250 patprep(const char *name)
    251 {
    252 	const char *endmark, *p;
    253 	char *subp = globfree;
    254 
    255 	*subp++ = '\0';
    256 
    257 	p = name + strlen(name) - 1;
    258 	/* skip trailing metacharacters (and [] ranges) */
    259 	for (; p >= name; p--)
    260 		if (strchr("*?", *p) == 0)
    261 			break;
    262 	if (p < name)
    263 		p = name;
    264 	if (*p == ']')
    265 		for (p--; p >= name; p--)
    266 			if (*p == '[') {
    267 				p--;
    268 				break;
    269 			}
    270 	if (p < name)
    271 		p = name;
    272 	/*
    273 	 * if pattern has only metacharacters, check every path (force '/'
    274 	 * search)
    275 	 */
    276 	if ((p == name) && strchr("?*[]", *p) != 0)
    277 		*subp++ = '/';
    278 	else {
    279 		for (endmark = p; p >= name; p--)
    280 			if (strchr("]*?", *p) != 0)
    281 				break;
    282 		for (++p;
    283 		    (p <= endmark) && subp < (globfree + sizeof(globfree));)
    284 			*subp++ = *p++;
    285 	}
    286 	*subp = '\0';
    287 	return --subp;
    288 }
    289