Home | History | Annotate | Line # | Download | only in gen
glob.c revision 1.1.1.1
      1 /*
      2  * Copyright (c) 1989 The Regents of the University of California.
      3  * All rights reserved.
      4  *
      5  * This code is derived from software contributed to Berkeley by
      6  * Guido van Rossum.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. All advertising materials mentioning features or use of this software
     17  *    must display the following acknowledgement:
     18  *	This product includes software developed by the University of
     19  *	California, Berkeley and its contributors.
     20  * 4. Neither the name of the University nor the names of its contributors
     21  *    may be used to endorse or promote products derived from this software
     22  *    without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  */
     36 
     37 #if defined(LIBC_SCCS) && !defined(lint)
     38 static char sccsid[] = "@(#)glob.c	5.12 (Berkeley) 6/24/91";
     39 #endif /* LIBC_SCCS and not lint */
     40 
     41 /*
     42  * glob(3) -- a superset of the one defined in POSIX 1003.2.
     43  *
     44  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
     45  *
     46  * Optional extra services, controlled by flags not defined by POSIX:
     47  *
     48  * GLOB_QUOTE:
     49  *	Escaping convention: \ inhibits any special meaning the following
     50  *	character might have (except \ at end of string is retained).
     51  * GLOB_MAGCHAR:
     52  *	Set in gl_flags if pattern contained a globbing character.
     53  * gl_matchc:
     54  *	Number of matches in the current invocation of glob.
     55  */
     56 
     57 #include <sys/cdefs.h>
     58 #include <sys/param.h>
     59 #include <sys/stat.h>
     60 #include <dirent.h>
     61 #include <glob.h>
     62 #include <ctype.h>
     63 #include <errno.h>
     64 #include <string.h>
     65 #include <stdio.h>
     66 #include <stdlib.h>
     67 
     68 #define	DOLLAR		'$'
     69 #define	DOT		'.'
     70 #define	EOS		'\0'
     71 #define	LBRACKET	'['
     72 #define	NOT		'!'
     73 #define	QUESTION	'?'
     74 #define	QUOTE		'\\'
     75 #define	RANGE		'-'
     76 #define	RBRACKET	']'
     77 #define	SEP		'/'
     78 #define	STAR		'*'
     79 #define	TILDE		'~'
     80 #define	UNDERSCORE	'_'
     81 
     82 #define	M_QUOTE		0x8000
     83 #define	M_PROTECT	0x4000
     84 #define	M_MASK		0xffff
     85 #define	M_ASCII		0x00ff
     86 
     87 #define	CHAR(c)		((c)&M_ASCII)
     88 #define	META(c)		((c)|M_QUOTE)
     89 #define	M_ALL		META('*')
     90 #define	M_END		META(']')
     91 #define	M_NOT		META('!')
     92 #define	M_ONE		META('?')
     93 #define	M_RNG		META('-')
     94 #define	M_SET		META('[')
     95 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
     96 
     97 typedef u_short Char;
     98 
     99 static int	 compare __P((const void *, const void *));
    100 static void	 g_Ctoc __P((Char *, char *));
    101 static int	 g_lstat __P((Char *, struct stat *));
    102 static DIR	*g_opendir __P((Char *));
    103 static Char	*g_strchr __P((Char *, int));
    104 static int	 g_stat __P((Char *, struct stat *));
    105 static int	 glob1 __P((Char *, glob_t *));
    106 static int	 glob2 __P((Char *, Char *, Char *, glob_t *));
    107 static int	 glob3 __P((Char *, Char *, Char *, Char *, glob_t *));
    108 static int	 globextend __P((Char *, glob_t *));
    109 static int	 match __P((Char *, Char *, Char *));
    110 #ifdef DEBUG
    111 static void	 qprintf __P((Char *));
    112 #endif
    113 
    114 /*
    115  * The main glob() routine: compiles the pattern (optionally processing
    116  * quotes), calls glob1() to do the real pattern matching, and finally
    117  * sorts the list (unless unsorted operation is requested).  Returns 0
    118  * if things went well, nonzero if errors occurred.  It is not an error
    119  * to find no matches.
    120  */
    121 glob(pattern, flags, errfunc, pglob)
    122 	const char *pattern;
    123 	int flags, (*errfunc) __P((char *, int));
    124 	glob_t *pglob;
    125 {
    126 	const u_char *compilepat, *patnext;
    127 	int c, err, oldpathc;
    128 	Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1];
    129 
    130 	patnext = (u_char *) pattern;
    131 	if (!(flags & GLOB_APPEND)) {
    132 		pglob->gl_pathc = 0;
    133 		pglob->gl_pathv = NULL;
    134 		if (!(flags & GLOB_DOOFFS))
    135 			pglob->gl_offs = 0;
    136 	}
    137 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
    138 	pglob->gl_errfunc = errfunc;
    139 	oldpathc = pglob->gl_pathc;
    140 	pglob->gl_matchc = 0;
    141 
    142 	bufnext = patbuf;
    143 	bufend = bufnext + MAXPATHLEN;
    144 	compilebuf = bufnext;
    145 	compilepat = patnext;
    146 	if (flags & GLOB_QUOTE) {
    147 		/* Protect the quoted characters. */
    148 		while (bufnext < bufend && (c = *patnext++) != EOS)
    149 			if (c == QUOTE) {
    150 				if ((c = *patnext++) == EOS) {
    151 					c = QUOTE;
    152 					--patnext;
    153 				}
    154 				*bufnext++ = c | M_PROTECT;
    155 			}
    156 			else
    157 				*bufnext++ = c;
    158 	}
    159 	else
    160 	    while (bufnext < bufend && (c = *patnext++) != EOS)
    161 		    *bufnext++ = c;
    162 	*bufnext = EOS;
    163 
    164 	bufnext = patbuf;
    165 	qpatnext = patbuf;
    166 	/* We don't need to check for buffer overflow any more. */
    167 	while ((c = *qpatnext++) != EOS) {
    168 		switch (c) {
    169 		case LBRACKET:
    170 			pglob->gl_flags |= GLOB_MAGCHAR;
    171 			c = *qpatnext;
    172 			if (c == NOT)
    173 				++qpatnext;
    174 			if (*qpatnext == EOS ||
    175 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
    176 				*bufnext++ = LBRACKET;
    177 				if (c == NOT)
    178 					--qpatnext;
    179 				break;
    180 			}
    181 			*bufnext++ = M_SET;
    182 			if (c == NOT)
    183 				*bufnext++ = M_NOT;
    184 			c = *qpatnext++;
    185 			do {
    186 				*bufnext++ = CHAR(c);
    187 				if (*qpatnext == RANGE &&
    188 				    (c = qpatnext[1]) != RBRACKET) {
    189 					*bufnext++ = M_RNG;
    190 					*bufnext++ = CHAR(c);
    191 					qpatnext += 2;
    192 				}
    193 			} while ((c = *qpatnext++) != RBRACKET);
    194 			*bufnext++ = M_END;
    195 			break;
    196 		case QUESTION:
    197 			pglob->gl_flags |= GLOB_MAGCHAR;
    198 			*bufnext++ = M_ONE;
    199 			break;
    200 		case STAR:
    201 			pglob->gl_flags |= GLOB_MAGCHAR;
    202 			*bufnext++ = M_ALL;
    203 			break;
    204 		default:
    205 			*bufnext++ = CHAR(c);
    206 			break;
    207 		}
    208 	}
    209 	*bufnext = EOS;
    210 #ifdef DEBUG
    211 	qprintf(patbuf);
    212 #endif
    213 
    214 	if ((err = glob1(patbuf, pglob)) != 0)
    215 		return(err);
    216 
    217 	if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
    218 		if (!(flags & GLOB_QUOTE)) {
    219 			Char *dp = compilebuf;
    220 			const u_char *sp = compilepat;
    221 			while (*dp++ = *sp++);
    222 		}
    223 		else {
    224 			/*
    225 			 * Copy pattern, interpreting quotes; this is slightly
    226 			 * different than the interpretation of quotes above
    227 			 * -- which should prevail?
    228 			 */
    229 			while (*compilepat != EOS) {
    230 				if (*compilepat == QUOTE) {
    231 					if (*++compilepat == EOS)
    232 						--compilepat;
    233 				}
    234 				*compilebuf++ = (u_char)*compilepat++;
    235 			}
    236 			*compilebuf = EOS;
    237 		}
    238 		return(globextend(patbuf, pglob));
    239 	} else if (!(flags & GLOB_NOSORT))
    240 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
    241 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
    242 	return(0);
    243 }
    244 
    245 static int
    246 compare(p, q)
    247 	const void *p, *q;
    248 {
    249 	return(strcmp(*(char **)p, *(char **)q));
    250 }
    251 
    252 static
    253 glob1(pattern, pglob)
    254 	Char *pattern;
    255 	glob_t *pglob;
    256 {
    257 	Char pathbuf[MAXPATHLEN+1];
    258 
    259 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
    260 	if (*pattern == EOS)
    261 		return(0);
    262 	return(glob2(pathbuf, pathbuf, pattern, pglob));
    263 }
    264 
    265 /*
    266  * The functions glob2 and glob3 are mutually recursive; there is one level
    267  * of recursion for each segment in the pattern that contains one or more
    268  * meta characters.
    269  */
    270 static
    271 glob2(pathbuf, pathend, pattern, pglob)
    272 	Char *pathbuf, *pathend, *pattern;
    273 	glob_t *pglob;
    274 {
    275 	struct stat sb;
    276 	Char *p, *q;
    277 	int anymeta;
    278 
    279 	/*
    280 	 * Loop over pattern segments until end of pattern or until
    281 	 * segment with meta character found.
    282 	 */
    283 	for (anymeta = 0;;) {
    284 		if (*pattern == EOS) {		/* End of pattern? */
    285 			*pathend = EOS;
    286 			if (g_stat(pathbuf, &sb))
    287 				return(0);
    288 
    289 			if (((pglob->gl_flags & GLOB_MARK) &&
    290 			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
    291 			    || (S_ISLNK(sb.st_mode) &&
    292 			    (g_stat(pathbuf, &sb) == 0) &&
    293 			    S_ISDIR(sb.st_mode)))) {
    294 				*pathend++ = SEP;
    295 				*pathend = EOS;
    296 			}
    297 			++pglob->gl_matchc;
    298 			return(globextend(pathbuf, pglob));
    299 		}
    300 
    301 		/* Find end of next segment, copy tentatively to pathend. */
    302 		q = pathend;
    303 		p = pattern;
    304 		while (*p != EOS && *p != SEP) {
    305 			if (ismeta(*p))
    306 				anymeta = 1;
    307 			*q++ = *p++;
    308 		}
    309 
    310 		if (!anymeta) {		/* No expansion, do next segment. */
    311 			pathend = q;
    312 			pattern = p;
    313 			while (*pattern == SEP)
    314 				*pathend++ = *pattern++;
    315 		} else			/* Need expansion, recurse. */
    316 			return(glob3(pathbuf, pathend, pattern, p, pglob));
    317 	}
    318 	/* NOTREACHED */
    319 }
    320 
    321 static
    322 glob3(pathbuf, pathend, pattern, restpattern, pglob)
    323 	Char *pathbuf, *pathend, *pattern, *restpattern;
    324 	glob_t *pglob;
    325 {
    326 	register struct dirent *dp;
    327 	DIR *dirp;
    328 	int len, err;
    329 
    330 	*pathend = EOS;
    331 	errno = 0;
    332 
    333 	if (!(dirp = g_opendir(pathbuf)))
    334 		/* TODO: don't call for ENOENT or ENOTDIR? */
    335 		if (pglob->gl_errfunc &&
    336 		    (*pglob->gl_errfunc)(pathbuf, errno) ||
    337 		    (pglob->gl_flags & GLOB_ERR))
    338 			return(GLOB_ABEND);
    339 		else
    340 			return(0);
    341 
    342 	err = 0;
    343 
    344 	/* Search directory for matching names. */
    345 	while ((dp = readdir(dirp))) {
    346 		register u_char *sc;
    347 		register Char *dc;
    348 
    349 		/* Initial DOT must be matched literally. */
    350 		if (dp->d_name[0] == DOT && *pattern != DOT)
    351 			continue;
    352 		for (sc = (u_char *) dp->d_name, dc = pathend;
    353 		     *dc++ = *sc++;);
    354 		if (!match(pathend, pattern, restpattern)) {
    355 			*pathend = EOS;
    356 			continue;
    357 		}
    358 		err = glob2(pathbuf, --dc, restpattern, pglob);
    359 		if (err)
    360 			break;
    361 	}
    362 
    363 	/* TODO: check error from readdir? */
    364 	(void)closedir(dirp);
    365 	return(err);
    366 }
    367 
    368 
    369 /*
    370  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
    371  * add the new item, and update gl_pathc.
    372  *
    373  * This assumes the BSD realloc, which only copies the block when its size
    374  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
    375  * behavior.
    376  *
    377  * Return 0 if new item added, error code if memory couldn't be allocated.
    378  *
    379  * Invariant of the glob_t structure:
    380  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
    381  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
    382  */
    383 static int
    384 globextend(path, pglob)
    385 	Char *path;
    386 	glob_t *pglob;
    387 {
    388 	register char **pathv;
    389 	register int i;
    390 	u_int newsize;
    391 	char *copy;
    392 	Char *p;
    393 
    394 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
    395 	pathv = (char **)realloc((char *)pglob->gl_pathv, newsize);
    396 	if (pathv == NULL)
    397 		return(GLOB_NOSPACE);
    398 
    399 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
    400 		/* first time around -- clear initial gl_offs items */
    401 		pathv += pglob->gl_offs;
    402 		for (i = pglob->gl_offs; --i >= 0; )
    403 			*--pathv = NULL;
    404 	}
    405 	pglob->gl_pathv = pathv;
    406 
    407 	for (p = path; *p++;);
    408 	if ((copy = malloc(p - path)) != NULL) {
    409 		g_Ctoc(path, copy);
    410 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
    411 	}
    412 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
    413 	return(copy == NULL ? GLOB_NOSPACE : 0);
    414 }
    415 
    416 
    417 /*
    418  * pattern matching function for filenames.  Each occurrence of the *
    419  * pattern causes a recursion level.
    420  */
    421 static
    422 match(name, pat, patend)
    423 	register Char *name, *pat, *patend;
    424 {
    425 	int ok, negate_range;
    426 	Char c, k;
    427 
    428 	while (pat < patend) {
    429 		c = *pat++;
    430 		switch (c & M_MASK) {
    431 		case M_ALL:
    432 			if (pat == patend)
    433 				return(1);
    434 			for (; *name != EOS; ++name)
    435 				if (match(name, pat, patend))
    436 					return(1);
    437 			return(0);
    438 		case M_ONE:
    439 			if (*name++ == EOS)
    440 				return(0);
    441 			break;
    442 		case M_SET:
    443 			ok = 0;
    444 			k = *name++;
    445 			if (negate_range = ((*pat & M_MASK) == M_NOT))
    446 				++pat;
    447 			while (((c = *pat++) & M_MASK) != M_END)
    448 				if ((*pat & M_MASK) == M_RNG) {
    449 					if (c <= k && k <= pat[1])
    450 						ok = 1;
    451 					pat += 2;
    452 				} else if (c == k)
    453 					ok = 1;
    454 			if (ok == negate_range)
    455 				return(0);
    456 			break;
    457 		default:
    458 			if (*name++ != c)
    459 				return(0);
    460 			break;
    461 		}
    462 	}
    463 	return(*name == EOS);
    464 }
    465 
    466 /* Free allocated data belonging to a glob_t structure. */
    467 void
    468 globfree(pglob)
    469 	glob_t *pglob;
    470 {
    471 	register int i;
    472 	register char **pp;
    473 
    474 	if (pglob->gl_pathv != NULL) {
    475 		pp = pglob->gl_pathv + pglob->gl_offs;
    476 		for (i = pglob->gl_pathc; i--; ++pp)
    477 			if (*pp)
    478 				free(*pp);
    479 		free(pglob->gl_pathv);
    480 	}
    481 }
    482 
    483 static DIR *
    484 g_opendir(str)
    485 	register Char *str;
    486 {
    487 	char buf[MAXPATHLEN];
    488 
    489 	if (!*str)
    490 		return(opendir("."));
    491 	g_Ctoc(str, buf);
    492 	return(opendir(buf));
    493 }
    494 
    495 static int
    496 g_lstat(fn, sb)
    497 	register Char *fn;
    498 	struct stat *sb;
    499 {
    500 	char buf[MAXPATHLEN];
    501 
    502 	g_Ctoc(fn, buf);
    503 	return(lstat(buf, sb));
    504 }
    505 
    506 static int
    507 g_stat(fn, sb)
    508 	register Char *fn;
    509 	struct stat *sb;
    510 {
    511 	char buf[MAXPATHLEN];
    512 
    513 	g_Ctoc(fn, buf);
    514 	return(stat(buf, sb));
    515 }
    516 
    517 static Char *
    518 g_strchr(str, ch)
    519 	Char *str;
    520 	int ch;
    521 {
    522 	do {
    523 		if (*str == ch)
    524 			return (str);
    525 	} while (*str++);
    526 	return (NULL);
    527 }
    528 
    529 static void
    530 g_Ctoc(str, buf)
    531 	register Char *str;
    532 	char *buf;
    533 {
    534 	register char *dc;
    535 
    536 	for (dc = buf; *dc++ = *str++;);
    537 }
    538 
    539 #ifdef DEBUG
    540 static void
    541 qprintf(s)
    542 	register Char *s;
    543 {
    544 	register Char *p;
    545 
    546 	for (p = s; *p; p++)
    547 		(void)printf("%c", *p & 0xff);
    548 	(void)printf("\n");
    549 	for (p = s; *p; p++)
    550 		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
    551 	(void)printf("\n");
    552 	for (p = s; *p; p++)
    553 		(void)printf("%c", *p & M_META ? '_' : ' ');
    554 	(void)printf("\n");
    555 }
    556 #endif
    557