glob.c revision 1.2 1 /*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Guido van Rossum.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #if defined(LIBC_SCCS) && !defined(lint)
38 /*static char sccsid[] = "from: @(#)glob.c 5.18 (Berkeley) 12/4/92";*/
39 static char rcsid[] = "$Id: glob.c,v 1.2 1993/07/30 07:57:52 mycroft Exp $";
40 #endif /* LIBC_SCCS and not lint */
41
42 /*
43 * glob(3) -- a superset of the one defined in POSIX 1003.2.
44 *
45 * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
46 *
47 * Optional extra services, controlled by flags not defined by POSIX:
48 *
49 * GLOB_QUOTE:
50 * Escaping convention: \ inhibits any special meaning the following
51 * character might have (except \ at end of string is retained).
52 * GLOB_MAGCHAR:
53 * Set in gl_flags if pattern contained a globbing character.
54 * GLOB_NOMAGIC:
55 * Same as GLOB_NOCHECK, but it will only append pattern if it did
56 * not contain any magic characters. [Used in csh style globbing]
57 * GLOB_ALTDIRFUNC:
58 * Use alternately specified directory access functions.
59 * gl_matchc:
60 * Number of matches in the current invocation of glob.
61 */
62
63 #include <sys/param.h>
64 #include <sys/stat.h>
65 #include <dirent.h>
66 #include <glob.h>
67 #include <ctype.h>
68 #include <errno.h>
69 #include <string.h>
70 #include <stdio.h>
71 #include <stdlib.h>
72
73 #define DOLLAR '$'
74 #define DOT '.'
75 #define EOS '\0'
76 #define LBRACKET '['
77 #define NOT '!'
78 #define QUESTION '?'
79 #define QUOTE '\\'
80 #define RANGE '-'
81 #define RBRACKET ']'
82 #define SEP '/'
83 #define STAR '*'
84 #define TILDE '~'
85 #define UNDERSCORE '_'
86
87 #define M_QUOTE 0x8000
88 #define M_PROTECT 0x4000
89 #define M_MASK 0xffff
90 #define M_ASCII 0x00ff
91
92 #define CHAR(c) ((c)&M_ASCII)
93 #define META(c) ((c)|M_QUOTE)
94 #define M_ALL META('*')
95 #define M_END META(']')
96 #define M_NOT META('!')
97 #define M_ONE META('?')
98 #define M_RNG META('-')
99 #define M_SET META('[')
100 #define ismeta(c) (((c)&M_QUOTE) != 0)
101
102 typedef u_short Char;
103
104 static int compare __P((const void *, const void *));
105 static void g_Ctoc __P((Char *, char *));
106 static int g_lstat __P((Char *, struct stat *, glob_t *));
107 static DIR *g_opendir __P((Char *, glob_t *));
108 static Char *g_strchr __P((Char *, int));
109 static int g_stat __P((Char *, struct stat *, glob_t *));
110 static int glob1 __P((Char *, glob_t *));
111 static int glob2 __P((Char *, Char *, Char *, glob_t *));
112 static int glob3 __P((Char *, Char *, Char *, Char *, glob_t *));
113 static int globextend __P((Char *, glob_t *));
114 static int match __P((Char *, Char *, Char *));
115 #ifdef DEBUG
116 static void qprintf __P((Char *));
117 #endif
118
119 /*
120 * The main glob() routine: compiles the pattern (optionally processing
121 * quotes), calls glob1() to do the real pattern matching, and finally
122 * sorts the list (unless unsorted operation is requested). Returns 0
123 * if things went well, nonzero if errors occurred. It is not an error
124 * to find no matches.
125 */
126 glob(pattern, flags, errfunc, pglob)
127 const char *pattern;
128 int flags, (*errfunc) __P((char *, int));
129 glob_t *pglob;
130 {
131 const u_char *compilepat, *patnext;
132 int c, err, oldpathc;
133 Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1];
134
135 patnext = (u_char *) pattern;
136 if (!(flags & GLOB_APPEND)) {
137 pglob->gl_pathc = 0;
138 pglob->gl_pathv = NULL;
139 if (!(flags & GLOB_DOOFFS))
140 pglob->gl_offs = 0;
141 }
142 pglob->gl_flags = flags & ~GLOB_MAGCHAR;
143 pglob->gl_errfunc = errfunc;
144 oldpathc = pglob->gl_pathc;
145 pglob->gl_matchc = 0;
146
147 bufnext = patbuf;
148 bufend = bufnext + MAXPATHLEN;
149 compilebuf = bufnext;
150 compilepat = patnext;
151 if (flags & GLOB_QUOTE) {
152 /* Protect the quoted characters. */
153 while (bufnext < bufend && (c = *patnext++) != EOS)
154 if (c == QUOTE) {
155 if ((c = *patnext++) == EOS) {
156 c = QUOTE;
157 --patnext;
158 }
159 *bufnext++ = c | M_PROTECT;
160 }
161 else
162 *bufnext++ = c;
163 }
164 else
165 while (bufnext < bufend && (c = *patnext++) != EOS)
166 *bufnext++ = c;
167 *bufnext = EOS;
168
169 bufnext = patbuf;
170 qpatnext = patbuf;
171 /* We don't need to check for buffer overflow any more. */
172 while ((c = *qpatnext++) != EOS) {
173 switch (c) {
174 case LBRACKET:
175 c = *qpatnext;
176 if (c == NOT)
177 ++qpatnext;
178 if (*qpatnext == EOS ||
179 g_strchr(qpatnext+1, RBRACKET) == NULL) {
180 *bufnext++ = LBRACKET;
181 if (c == NOT)
182 --qpatnext;
183 break;
184 }
185 *bufnext++ = M_SET;
186 if (c == NOT)
187 *bufnext++ = M_NOT;
188 c = *qpatnext++;
189 do {
190 *bufnext++ = CHAR(c);
191 if (*qpatnext == RANGE &&
192 (c = qpatnext[1]) != RBRACKET) {
193 *bufnext++ = M_RNG;
194 *bufnext++ = CHAR(c);
195 qpatnext += 2;
196 }
197 } while ((c = *qpatnext++) != RBRACKET);
198 pglob->gl_flags |= GLOB_MAGCHAR;
199 *bufnext++ = M_END;
200 break;
201 case QUESTION:
202 pglob->gl_flags |= GLOB_MAGCHAR;
203 *bufnext++ = M_ONE;
204 break;
205 case STAR:
206 pglob->gl_flags |= GLOB_MAGCHAR;
207 /* collapse adjacent stars to one,
208 * to avoid exponential behavior
209 */
210 if (bufnext == patbuf || bufnext[-1] != M_ALL)
211 *bufnext++ = M_ALL;
212 break;
213 default:
214 *bufnext++ = CHAR(c);
215 break;
216 }
217 }
218 *bufnext = EOS;
219 #ifdef DEBUG
220 qprintf(patbuf);
221 #endif
222
223 if ((err = glob1(patbuf, pglob)) != 0)
224 return(err);
225
226 /*
227 * If there was no match we are going to append the pattern
228 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
229 * and the pattern did not contain any magic characters
230 * GLOB_NOMAGIC is there just for compatibility with csh.
231 */
232 if (pglob->gl_pathc == oldpathc &&
233 ((flags & GLOB_NOCHECK) ||
234 ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) {
235 if (!(flags & GLOB_QUOTE)) {
236 Char *dp = compilebuf;
237 const u_char *sp = compilepat;
238 while (*dp++ = *sp++);
239 }
240 else {
241 /*
242 * Copy pattern, interpreting quotes; this is slightly
243 * different than the interpretation of quotes above
244 * -- which should prevail?
245 */
246 while (*compilepat != EOS) {
247 if (*compilepat == QUOTE) {
248 if (*++compilepat == EOS)
249 --compilepat;
250 }
251 *compilebuf++ = (u_char)*compilepat++;
252 }
253 *compilebuf = EOS;
254 }
255 return(globextend(patbuf, pglob));
256 } else if (!(flags & GLOB_NOSORT))
257 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
258 pglob->gl_pathc - oldpathc, sizeof(char *), compare);
259 return(0);
260 }
261
262 static int
263 compare(p, q)
264 const void *p, *q;
265 {
266 return(strcmp(*(char **)p, *(char **)q));
267 }
268
269 static
270 glob1(pattern, pglob)
271 Char *pattern;
272 glob_t *pglob;
273 {
274 Char pathbuf[MAXPATHLEN+1];
275
276 /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
277 if (*pattern == EOS)
278 return(0);
279 return(glob2(pathbuf, pathbuf, pattern, pglob));
280 }
281
282 /*
283 * The functions glob2 and glob3 are mutually recursive; there is one level
284 * of recursion for each segment in the pattern that contains one or more
285 * meta characters.
286 */
287 static
288 glob2(pathbuf, pathend, pattern, pglob)
289 Char *pathbuf, *pathend, *pattern;
290 glob_t *pglob;
291 {
292 struct stat sb;
293 Char *p, *q;
294 int anymeta;
295
296 /*
297 * Loop over pattern segments until end of pattern or until
298 * segment with meta character found.
299 */
300 for (anymeta = 0;;) {
301 if (*pattern == EOS) { /* End of pattern? */
302 *pathend = EOS;
303 if (g_lstat(pathbuf, &sb, pglob))
304 return(0);
305
306 if (((pglob->gl_flags & GLOB_MARK) &&
307 pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
308 || (S_ISLNK(sb.st_mode) &&
309 (g_stat(pathbuf, &sb, pglob) == 0) &&
310 S_ISDIR(sb.st_mode)))) {
311 *pathend++ = SEP;
312 *pathend = EOS;
313 }
314 ++pglob->gl_matchc;
315 return(globextend(pathbuf, pglob));
316 }
317
318 /* Find end of next segment, copy tentatively to pathend. */
319 q = pathend;
320 p = pattern;
321 while (*p != EOS && *p != SEP) {
322 if (ismeta(*p))
323 anymeta = 1;
324 *q++ = *p++;
325 }
326
327 if (!anymeta) { /* No expansion, do next segment. */
328 pathend = q;
329 pattern = p;
330 while (*pattern == SEP)
331 *pathend++ = *pattern++;
332 } else /* Need expansion, recurse. */
333 return(glob3(pathbuf, pathend, pattern, p, pglob));
334 }
335 /* NOTREACHED */
336 }
337
338 static
339 glob3(pathbuf, pathend, pattern, restpattern, pglob)
340 Char *pathbuf, *pathend, *pattern, *restpattern;
341 glob_t *pglob;
342 {
343 register struct dirent *dp;
344 struct dirent *(*readdirfunc)();
345 DIR *dirp;
346 int len, err;
347 char buf[MAXPATHLEN];
348
349 *pathend = EOS;
350 errno = 0;
351
352 if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
353 /* TODO: don't call for ENOENT or ENOTDIR? */
354 if (pglob->gl_errfunc) {
355 g_Ctoc(pathbuf, buf);
356 if (pglob->gl_errfunc(buf, errno) ||
357 pglob->gl_flags & GLOB_ERR)
358 return (GLOB_ABEND);
359 }
360 return(0);
361 }
362
363 err = 0;
364
365 /* Search directory for matching names. */
366 if (pglob->gl_flags & GLOB_ALTDIRFUNC)
367 readdirfunc = pglob->gl_readdir;
368 else
369 readdirfunc = readdir;
370 while ((dp = (*readdirfunc)(dirp))) {
371 register u_char *sc;
372 register Char *dc;
373
374 /* Initial DOT must be matched literally. */
375 if (dp->d_name[0] == DOT && *pattern != DOT)
376 continue;
377 for (sc = (u_char *) dp->d_name, dc = pathend;
378 *dc++ = *sc++;);
379 if (!match(pathend, pattern, restpattern)) {
380 *pathend = EOS;
381 continue;
382 }
383 err = glob2(pathbuf, --dc, restpattern, pglob);
384 if (err)
385 break;
386 }
387
388 if (pglob->gl_flags & GLOB_ALTDIRFUNC)
389 (*pglob->gl_closedir)(dirp);
390 else
391 closedir(dirp);
392 return(err);
393 }
394
395
396 /*
397 * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
398 * add the new item, and update gl_pathc.
399 *
400 * This assumes the BSD realloc, which only copies the block when its size
401 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
402 * behavior.
403 *
404 * Return 0 if new item added, error code if memory couldn't be allocated.
405 *
406 * Invariant of the glob_t structure:
407 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
408 * gl_pathv points to (gl_offs + gl_pathc + 1) items.
409 */
410 static int
411 globextend(path, pglob)
412 Char *path;
413 glob_t *pglob;
414 {
415 register char **pathv;
416 register int i;
417 u_int newsize;
418 char *copy;
419 Char *p;
420
421 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
422 pathv = (char **)realloc((char *)pglob->gl_pathv, newsize);
423 if (pathv == NULL)
424 return(GLOB_NOSPACE);
425
426 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
427 /* first time around -- clear initial gl_offs items */
428 pathv += pglob->gl_offs;
429 for (i = pglob->gl_offs; --i >= 0; )
430 *--pathv = NULL;
431 }
432 pglob->gl_pathv = pathv;
433
434 for (p = path; *p++;);
435 if ((copy = malloc(p - path)) != NULL) {
436 g_Ctoc(path, copy);
437 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
438 }
439 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
440 return(copy == NULL ? GLOB_NOSPACE : 0);
441 }
442
443
444 /*
445 * pattern matching function for filenames. Each occurrence of the *
446 * pattern causes a recursion level.
447 */
448 static
449 match(name, pat, patend)
450 register Char *name, *pat, *patend;
451 {
452 int ok, negate_range;
453 Char c, k;
454
455 while (pat < patend) {
456 c = *pat++;
457 switch (c & M_MASK) {
458 case M_ALL:
459 if (pat == patend)
460 return(1);
461 do
462 if (match(name, pat, patend))
463 return(1);
464 while (*name++ != EOS);
465 return(0);
466 case M_ONE:
467 if (*name++ == EOS)
468 return(0);
469 break;
470 case M_SET:
471 ok = 0;
472 if ((k = *name++) == EOS)
473 return(0);
474 if (negate_range = ((*pat & M_MASK) == M_NOT))
475 ++pat;
476 while (((c = *pat++) & M_MASK) != M_END)
477 if ((*pat & M_MASK) == M_RNG) {
478 if (c <= k && k <= pat[1])
479 ok = 1;
480 pat += 2;
481 } else if (c == k)
482 ok = 1;
483 if (ok == negate_range)
484 return(0);
485 break;
486 default:
487 if (*name++ != c)
488 return(0);
489 break;
490 }
491 }
492 return(*name == EOS);
493 }
494
495 /* Free allocated data belonging to a glob_t structure. */
496 void
497 globfree(pglob)
498 glob_t *pglob;
499 {
500 register int i;
501 register char **pp;
502
503 if (pglob->gl_pathv != NULL) {
504 pp = pglob->gl_pathv + pglob->gl_offs;
505 for (i = pglob->gl_pathc; i--; ++pp)
506 if (*pp)
507 free(*pp);
508 free(pglob->gl_pathv);
509 }
510 }
511
512 static DIR *
513 g_opendir(str, pglob)
514 register Char *str;
515 glob_t *pglob;
516 {
517 char buf[MAXPATHLEN];
518 char *dirname;
519
520 if (!*str)
521 strcpy(buf, ".");
522 else
523 g_Ctoc(str, buf);
524 if (pglob->gl_flags & GLOB_ALTDIRFUNC)
525 return((*pglob->gl_opendir)(buf));
526 return(opendir(buf));
527 }
528
529 static int
530 g_lstat(fn, sb, pglob)
531 register Char *fn;
532 struct stat *sb;
533 glob_t *pglob;
534 {
535 char buf[MAXPATHLEN];
536
537 g_Ctoc(fn, buf);
538 if (pglob->gl_flags & GLOB_ALTDIRFUNC)
539 return((*pglob->gl_lstat)(buf, sb));
540 return(lstat(buf, sb));
541 }
542
543 static int
544 g_stat(fn, sb, pglob)
545 register Char *fn;
546 struct stat *sb;
547 glob_t *pglob;
548 {
549 char buf[MAXPATHLEN];
550
551 g_Ctoc(fn, buf);
552 if (pglob->gl_flags & GLOB_ALTDIRFUNC)
553 return((*pglob->gl_stat)(buf, sb));
554 return(stat(buf, sb));
555 }
556
557 static Char *
558 g_strchr(str, ch)
559 Char *str;
560 int ch;
561 {
562 do {
563 if (*str == ch)
564 return (str);
565 } while (*str++);
566 return (NULL);
567 }
568
569 static void
570 g_Ctoc(str, buf)
571 register Char *str;
572 char *buf;
573 {
574 register char *dc;
575
576 for (dc = buf; *dc++ = *str++;);
577 }
578
579 #ifdef DEBUG
580 static void
581 qprintf(s)
582 register Char *s;
583 {
584 register Char *p;
585
586 for (p = s; *p; p++)
587 (void)printf("%c", CHAR(*p));
588 (void)printf("\n");
589 for (p = s; *p; p++)
590 (void)printf("%c", *p & M_PROTECT ? '"' : ' ');
591 (void)printf("\n");
592 for (p = s; *p; p++)
593 (void)printf("%c", ismeta(*p) ? '_' : ' ');
594 (void)printf("\n");
595 }
596 #endif
597