Home | History | Annotate | Line # | Download | only in gen
glob.c revision 1.3
      1 /*
      2  * Copyright (c) 1989 The Regents of the University of California.
      3  * All rights reserved.
      4  *
      5  * This code is derived from software contributed to Berkeley by
      6  * Guido van Rossum.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. All advertising materials mentioning features or use of this software
     17  *    must display the following acknowledgement:
     18  *	This product includes software developed by the University of
     19  *	California, Berkeley and its contributors.
     20  * 4. Neither the name of the University nor the names of its contributors
     21  *    may be used to endorse or promote products derived from this software
     22  *    without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  */
     36 
     37 #if defined(LIBC_SCCS) && !defined(lint)
     38 /*static char *sccsid = "from: @(#)glob.c	5.18 (Berkeley) 12/4/92";*/
     39 static char *rcsid = "$Id: glob.c,v 1.3 1993/08/26 00:44:46 jtc Exp $";
     40 #endif /* LIBC_SCCS and not lint */
     41 
     42 /*
     43  * glob(3) -- a superset of the one defined in POSIX 1003.2.
     44  *
     45  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
     46  *
     47  * Optional extra services, controlled by flags not defined by POSIX:
     48  *
     49  * GLOB_QUOTE:
     50  *	Escaping convention: \ inhibits any special meaning the following
     51  *	character might have (except \ at end of string is retained).
     52  * GLOB_MAGCHAR:
     53  *	Set in gl_flags if pattern contained a globbing character.
     54  * GLOB_NOMAGIC:
     55  *	Same as GLOB_NOCHECK, but it will only append pattern if it did
     56  *	not contain any magic characters.  [Used in csh style globbing]
     57  * GLOB_ALTDIRFUNC:
     58  *	Use alternately specified directory access functions.
     59  * gl_matchc:
     60  *	Number of matches in the current invocation of glob.
     61  */
     62 
     63 #include <sys/param.h>
     64 #include <sys/stat.h>
     65 #include <dirent.h>
     66 #include <glob.h>
     67 #include <ctype.h>
     68 #include <errno.h>
     69 #include <string.h>
     70 #include <stdio.h>
     71 #include <stdlib.h>
     72 
     73 #define	DOLLAR		'$'
     74 #define	DOT		'.'
     75 #define	EOS		'\0'
     76 #define	LBRACKET	'['
     77 #define	NOT		'!'
     78 #define	QUESTION	'?'
     79 #define	QUOTE		'\\'
     80 #define	RANGE		'-'
     81 #define	RBRACKET	']'
     82 #define	SEP		'/'
     83 #define	STAR		'*'
     84 #define	TILDE		'~'
     85 #define	UNDERSCORE	'_'
     86 
     87 #define	M_QUOTE		0x8000
     88 #define	M_PROTECT	0x4000
     89 #define	M_MASK		0xffff
     90 #define	M_ASCII		0x00ff
     91 
     92 #define	CHAR(c)		((c)&M_ASCII)
     93 #define	META(c)		((c)|M_QUOTE)
     94 #define	M_ALL		META('*')
     95 #define	M_END		META(']')
     96 #define	M_NOT		META('!')
     97 #define	M_ONE		META('?')
     98 #define	M_RNG		META('-')
     99 #define	M_SET		META('[')
    100 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
    101 
    102 typedef u_short Char;
    103 
    104 static int	 compare __P((const void *, const void *));
    105 static void	 g_Ctoc __P((Char *, char *));
    106 static int	 g_lstat __P((Char *, struct stat *, glob_t *));
    107 static DIR	*g_opendir __P((Char *, glob_t *));
    108 static Char	*g_strchr __P((Char *, int));
    109 static int	 g_stat __P((Char *, struct stat *, glob_t *));
    110 static int	 glob1 __P((Char *, glob_t *));
    111 static int	 glob2 __P((Char *, Char *, Char *, glob_t *));
    112 static int	 glob3 __P((Char *, Char *, Char *, Char *, glob_t *));
    113 static int	 globextend __P((Char *, glob_t *));
    114 static int	 match __P((Char *, Char *, Char *));
    115 #ifdef DEBUG
    116 static void	 qprintf __P((Char *));
    117 #endif
    118 
    119 /*
    120  * The main glob() routine: compiles the pattern (optionally processing
    121  * quotes), calls glob1() to do the real pattern matching, and finally
    122  * sorts the list (unless unsorted operation is requested).  Returns 0
    123  * if things went well, nonzero if errors occurred.  It is not an error
    124  * to find no matches.
    125  */
    126 glob(pattern, flags, errfunc, pglob)
    127 	const char *pattern;
    128 	int flags, (*errfunc) __P((char *, int));
    129 	glob_t *pglob;
    130 {
    131 	const u_char *compilepat, *patnext;
    132 	int c, err, oldpathc;
    133 	Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1];
    134 
    135 	patnext = (u_char *) pattern;
    136 	if (!(flags & GLOB_APPEND)) {
    137 		pglob->gl_pathc = 0;
    138 		pglob->gl_pathv = NULL;
    139 		if (!(flags & GLOB_DOOFFS))
    140 			pglob->gl_offs = 0;
    141 	}
    142 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
    143 	pglob->gl_errfunc = errfunc;
    144 	oldpathc = pglob->gl_pathc;
    145 	pglob->gl_matchc = 0;
    146 
    147 	bufnext = patbuf;
    148 	bufend = bufnext + MAXPATHLEN;
    149 	compilebuf = bufnext;
    150 	compilepat = patnext;
    151 	if (flags & GLOB_QUOTE) {
    152 		/* Protect the quoted characters. */
    153 		while (bufnext < bufend && (c = *patnext++) != EOS)
    154 			if (c == QUOTE) {
    155 				if ((c = *patnext++) == EOS) {
    156 					c = QUOTE;
    157 					--patnext;
    158 				}
    159 				*bufnext++ = c | M_PROTECT;
    160 			}
    161 			else
    162 				*bufnext++ = c;
    163 	}
    164 	else
    165 	    while (bufnext < bufend && (c = *patnext++) != EOS)
    166 		    *bufnext++ = c;
    167 	*bufnext = EOS;
    168 
    169 	bufnext = patbuf;
    170 	qpatnext = patbuf;
    171 	/* We don't need to check for buffer overflow any more. */
    172 	while ((c = *qpatnext++) != EOS) {
    173 		switch (c) {
    174 		case LBRACKET:
    175 			c = *qpatnext;
    176 			if (c == NOT)
    177 				++qpatnext;
    178 			if (*qpatnext == EOS ||
    179 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
    180 				*bufnext++ = LBRACKET;
    181 				if (c == NOT)
    182 					--qpatnext;
    183 				break;
    184 			}
    185 			*bufnext++ = M_SET;
    186 			if (c == NOT)
    187 				*bufnext++ = M_NOT;
    188 			c = *qpatnext++;
    189 			do {
    190 				*bufnext++ = CHAR(c);
    191 				if (*qpatnext == RANGE &&
    192 				    (c = qpatnext[1]) != RBRACKET) {
    193 					*bufnext++ = M_RNG;
    194 					*bufnext++ = CHAR(c);
    195 					qpatnext += 2;
    196 				}
    197 			} while ((c = *qpatnext++) != RBRACKET);
    198 			pglob->gl_flags |= GLOB_MAGCHAR;
    199 			*bufnext++ = M_END;
    200 			break;
    201 		case QUESTION:
    202 			pglob->gl_flags |= GLOB_MAGCHAR;
    203 			*bufnext++ = M_ONE;
    204 			break;
    205 		case STAR:
    206 			pglob->gl_flags |= GLOB_MAGCHAR;
    207 			/* collapse adjacent stars to one,
    208 			 * to avoid exponential behavior
    209 			 */
    210 			if (bufnext == patbuf || bufnext[-1] != M_ALL)
    211 			    *bufnext++ = M_ALL;
    212 			break;
    213 		default:
    214 			*bufnext++ = CHAR(c);
    215 			break;
    216 		}
    217 	}
    218 	*bufnext = EOS;
    219 #ifdef DEBUG
    220 	qprintf(patbuf);
    221 #endif
    222 
    223 	if ((err = glob1(patbuf, pglob)) != 0)
    224 		return(err);
    225 
    226 	/*
    227 	 * If there was no match we are going to append the pattern
    228 	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
    229 	 * and the pattern did not contain any magic characters
    230 	 * GLOB_NOMAGIC is there just for compatibility with csh.
    231 	 */
    232 	if (pglob->gl_pathc == oldpathc &&
    233 	    ((flags & GLOB_NOCHECK) ||
    234 	     ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) {
    235 		if (!(flags & GLOB_QUOTE)) {
    236 			Char *dp = compilebuf;
    237 			const u_char *sp = compilepat;
    238 			while (*dp++ = *sp++);
    239 		}
    240 		else {
    241 			/*
    242 			 * Copy pattern, interpreting quotes; this is slightly
    243 			 * different than the interpretation of quotes above
    244 			 * -- which should prevail?
    245 			 */
    246 			while (*compilepat != EOS) {
    247 				if (*compilepat == QUOTE) {
    248 					if (*++compilepat == EOS)
    249 						--compilepat;
    250 				}
    251 				*compilebuf++ = (u_char)*compilepat++;
    252 			}
    253 			*compilebuf = EOS;
    254 		}
    255 		return(globextend(patbuf, pglob));
    256 	} else if (!(flags & GLOB_NOSORT))
    257 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
    258 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
    259 	return(0);
    260 }
    261 
    262 static int
    263 compare(p, q)
    264 	const void *p, *q;
    265 {
    266 	return(strcmp(*(char **)p, *(char **)q));
    267 }
    268 
    269 static
    270 glob1(pattern, pglob)
    271 	Char *pattern;
    272 	glob_t *pglob;
    273 {
    274 	Char pathbuf[MAXPATHLEN+1];
    275 
    276 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
    277 	if (*pattern == EOS)
    278 		return(0);
    279 	return(glob2(pathbuf, pathbuf, pattern, pglob));
    280 }
    281 
    282 /*
    283  * The functions glob2 and glob3 are mutually recursive; there is one level
    284  * of recursion for each segment in the pattern that contains one or more
    285  * meta characters.
    286  */
    287 static
    288 glob2(pathbuf, pathend, pattern, pglob)
    289 	Char *pathbuf, *pathend, *pattern;
    290 	glob_t *pglob;
    291 {
    292 	struct stat sb;
    293 	Char *p, *q;
    294 	int anymeta;
    295 
    296 	/*
    297 	 * Loop over pattern segments until end of pattern or until
    298 	 * segment with meta character found.
    299 	 */
    300 	for (anymeta = 0;;) {
    301 		if (*pattern == EOS) {		/* End of pattern? */
    302 			*pathend = EOS;
    303 			if (g_lstat(pathbuf, &sb, pglob))
    304 				return(0);
    305 
    306 			if (((pglob->gl_flags & GLOB_MARK) &&
    307 			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
    308 			    || (S_ISLNK(sb.st_mode) &&
    309 			    (g_stat(pathbuf, &sb, pglob) == 0) &&
    310 			    S_ISDIR(sb.st_mode)))) {
    311 				*pathend++ = SEP;
    312 				*pathend = EOS;
    313 			}
    314 			++pglob->gl_matchc;
    315 			return(globextend(pathbuf, pglob));
    316 		}
    317 
    318 		/* Find end of next segment, copy tentatively to pathend. */
    319 		q = pathend;
    320 		p = pattern;
    321 		while (*p != EOS && *p != SEP) {
    322 			if (ismeta(*p))
    323 				anymeta = 1;
    324 			*q++ = *p++;
    325 		}
    326 
    327 		if (!anymeta) {		/* No expansion, do next segment. */
    328 			pathend = q;
    329 			pattern = p;
    330 			while (*pattern == SEP)
    331 				*pathend++ = *pattern++;
    332 		} else			/* Need expansion, recurse. */
    333 			return(glob3(pathbuf, pathend, pattern, p, pglob));
    334 	}
    335 	/* NOTREACHED */
    336 }
    337 
    338 static
    339 glob3(pathbuf, pathend, pattern, restpattern, pglob)
    340 	Char *pathbuf, *pathend, *pattern, *restpattern;
    341 	glob_t *pglob;
    342 {
    343 	register struct dirent *dp;
    344 	struct dirent *(*readdirfunc)();
    345 	DIR *dirp;
    346 	int len, err;
    347 	char buf[MAXPATHLEN];
    348 
    349 	*pathend = EOS;
    350 	errno = 0;
    351 
    352 	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
    353 		/* TODO: don't call for ENOENT or ENOTDIR? */
    354 		if (pglob->gl_errfunc) {
    355 			g_Ctoc(pathbuf, buf);
    356 			if (pglob->gl_errfunc(buf, errno) ||
    357 			    pglob->gl_flags & GLOB_ERR)
    358 				return (GLOB_ABEND);
    359 		}
    360 		return(0);
    361 	}
    362 
    363 	err = 0;
    364 
    365 	/* Search directory for matching names. */
    366 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
    367 		readdirfunc = pglob->gl_readdir;
    368 	else
    369 		readdirfunc = readdir;
    370 	while ((dp = (*readdirfunc)(dirp))) {
    371 		register u_char *sc;
    372 		register Char *dc;
    373 
    374 		/* Initial DOT must be matched literally. */
    375 		if (dp->d_name[0] == DOT && *pattern != DOT)
    376 			continue;
    377 		for (sc = (u_char *) dp->d_name, dc = pathend;
    378 		     *dc++ = *sc++;);
    379 		if (!match(pathend, pattern, restpattern)) {
    380 			*pathend = EOS;
    381 			continue;
    382 		}
    383 		err = glob2(pathbuf, --dc, restpattern, pglob);
    384 		if (err)
    385 			break;
    386 	}
    387 
    388 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
    389 		(*pglob->gl_closedir)(dirp);
    390 	else
    391 		closedir(dirp);
    392 	return(err);
    393 }
    394 
    395 
    396 /*
    397  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
    398  * add the new item, and update gl_pathc.
    399  *
    400  * This assumes the BSD realloc, which only copies the block when its size
    401  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
    402  * behavior.
    403  *
    404  * Return 0 if new item added, error code if memory couldn't be allocated.
    405  *
    406  * Invariant of the glob_t structure:
    407  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
    408  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
    409  */
    410 static int
    411 globextend(path, pglob)
    412 	Char *path;
    413 	glob_t *pglob;
    414 {
    415 	register char **pathv;
    416 	register int i;
    417 	u_int newsize;
    418 	char *copy;
    419 	Char *p;
    420 
    421 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
    422 	pathv = (char **)realloc((char *)pglob->gl_pathv, newsize);
    423 	if (pathv == NULL)
    424 		return(GLOB_NOSPACE);
    425 
    426 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
    427 		/* first time around -- clear initial gl_offs items */
    428 		pathv += pglob->gl_offs;
    429 		for (i = pglob->gl_offs; --i >= 0; )
    430 			*--pathv = NULL;
    431 	}
    432 	pglob->gl_pathv = pathv;
    433 
    434 	for (p = path; *p++;);
    435 	if ((copy = malloc(p - path)) != NULL) {
    436 		g_Ctoc(path, copy);
    437 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
    438 	}
    439 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
    440 	return(copy == NULL ? GLOB_NOSPACE : 0);
    441 }
    442 
    443 
    444 /*
    445  * pattern matching function for filenames.  Each occurrence of the *
    446  * pattern causes a recursion level.
    447  */
    448 static
    449 match(name, pat, patend)
    450 	register Char *name, *pat, *patend;
    451 {
    452 	int ok, negate_range;
    453 	Char c, k;
    454 
    455 	while (pat < patend) {
    456 		c = *pat++;
    457 		switch (c & M_MASK) {
    458 		case M_ALL:
    459 			if (pat == patend)
    460 				return(1);
    461 			do
    462 			    if (match(name, pat, patend))
    463 				    return(1);
    464 			while (*name++ != EOS);
    465 			return(0);
    466 		case M_ONE:
    467 			if (*name++ == EOS)
    468 				return(0);
    469 			break;
    470 		case M_SET:
    471 			ok = 0;
    472 			if ((k = *name++) == EOS)
    473 				return(0);
    474 			if (negate_range = ((*pat & M_MASK) == M_NOT))
    475 				++pat;
    476 			while (((c = *pat++) & M_MASK) != M_END)
    477 				if ((*pat & M_MASK) == M_RNG) {
    478 					if (c <= k && k <= pat[1])
    479 						ok = 1;
    480 					pat += 2;
    481 				} else if (c == k)
    482 					ok = 1;
    483 			if (ok == negate_range)
    484 				return(0);
    485 			break;
    486 		default:
    487 			if (*name++ != c)
    488 				return(0);
    489 			break;
    490 		}
    491 	}
    492 	return(*name == EOS);
    493 }
    494 
    495 /* Free allocated data belonging to a glob_t structure. */
    496 void
    497 globfree(pglob)
    498 	glob_t *pglob;
    499 {
    500 	register int i;
    501 	register char **pp;
    502 
    503 	if (pglob->gl_pathv != NULL) {
    504 		pp = pglob->gl_pathv + pglob->gl_offs;
    505 		for (i = pglob->gl_pathc; i--; ++pp)
    506 			if (*pp)
    507 				free(*pp);
    508 		free(pglob->gl_pathv);
    509 	}
    510 }
    511 
    512 static DIR *
    513 g_opendir(str, pglob)
    514 	register Char *str;
    515 	glob_t *pglob;
    516 {
    517 	char buf[MAXPATHLEN];
    518 	char *dirname;
    519 
    520 	if (!*str)
    521 		strcpy(buf, ".");
    522 	else
    523 		g_Ctoc(str, buf);
    524 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
    525 		return((*pglob->gl_opendir)(buf));
    526 	return(opendir(buf));
    527 }
    528 
    529 static int
    530 g_lstat(fn, sb, pglob)
    531 	register Char *fn;
    532 	struct stat *sb;
    533 	glob_t *pglob;
    534 {
    535 	char buf[MAXPATHLEN];
    536 
    537 	g_Ctoc(fn, buf);
    538 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
    539 		return((*pglob->gl_lstat)(buf, sb));
    540 	return(lstat(buf, sb));
    541 }
    542 
    543 static int
    544 g_stat(fn, sb, pglob)
    545 	register Char *fn;
    546 	struct stat *sb;
    547 	glob_t *pglob;
    548 {
    549 	char buf[MAXPATHLEN];
    550 
    551 	g_Ctoc(fn, buf);
    552 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
    553 		return((*pglob->gl_stat)(buf, sb));
    554 	return(stat(buf, sb));
    555 }
    556 
    557 static Char *
    558 g_strchr(str, ch)
    559 	Char *str;
    560 	int ch;
    561 {
    562 	do {
    563 		if (*str == ch)
    564 			return (str);
    565 	} while (*str++);
    566 	return (NULL);
    567 }
    568 
    569 static void
    570 g_Ctoc(str, buf)
    571 	register Char *str;
    572 	char *buf;
    573 {
    574 	register char *dc;
    575 
    576 	for (dc = buf; *dc++ = *str++;);
    577 }
    578 
    579 #ifdef DEBUG
    580 static void
    581 qprintf(s)
    582 	register Char *s;
    583 {
    584 	register Char *p;
    585 
    586 	for (p = s; *p; p++)
    587 		(void)printf("%c", CHAR(*p));
    588 	(void)printf("\n");
    589 	for (p = s; *p; p++)
    590 		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
    591 	(void)printf("\n");
    592 	for (p = s; *p; p++)
    593 		(void)printf("%c", ismeta(*p) ? '_' : ' ');
    594 	(void)printf("\n");
    595 }
    596 #endif
    597