glob.c revision 1.1 1 /*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Guido van Rossum.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #if defined(LIBC_SCCS) && !defined(lint)
38 static char sccsid[] = "@(#)glob.c 5.12 (Berkeley) 6/24/91";
39 #endif /* LIBC_SCCS and not lint */
40
41 /*
42 * glob(3) -- a superset of the one defined in POSIX 1003.2.
43 *
44 * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
45 *
46 * Optional extra services, controlled by flags not defined by POSIX:
47 *
48 * GLOB_QUOTE:
49 * Escaping convention: \ inhibits any special meaning the following
50 * character might have (except \ at end of string is retained).
51 * GLOB_MAGCHAR:
52 * Set in gl_flags if pattern contained a globbing character.
53 * gl_matchc:
54 * Number of matches in the current invocation of glob.
55 */
56
57 #include <sys/cdefs.h>
58 #include <sys/param.h>
59 #include <sys/stat.h>
60 #include <dirent.h>
61 #include <glob.h>
62 #include <ctype.h>
63 #include <errno.h>
64 #include <string.h>
65 #include <stdio.h>
66 #include <stdlib.h>
67
68 #define DOLLAR '$'
69 #define DOT '.'
70 #define EOS '\0'
71 #define LBRACKET '['
72 #define NOT '!'
73 #define QUESTION '?'
74 #define QUOTE '\\'
75 #define RANGE '-'
76 #define RBRACKET ']'
77 #define SEP '/'
78 #define STAR '*'
79 #define TILDE '~'
80 #define UNDERSCORE '_'
81
82 #define M_QUOTE 0x8000
83 #define M_PROTECT 0x4000
84 #define M_MASK 0xffff
85 #define M_ASCII 0x00ff
86
87 #define CHAR(c) ((c)&M_ASCII)
88 #define META(c) ((c)|M_QUOTE)
89 #define M_ALL META('*')
90 #define M_END META(']')
91 #define M_NOT META('!')
92 #define M_ONE META('?')
93 #define M_RNG META('-')
94 #define M_SET META('[')
95 #define ismeta(c) (((c)&M_QUOTE) != 0)
96
97 typedef u_short Char;
98
99 static int compare __P((const void *, const void *));
100 static void g_Ctoc __P((Char *, char *));
101 static int g_lstat __P((Char *, struct stat *));
102 static DIR *g_opendir __P((Char *));
103 static Char *g_strchr __P((Char *, int));
104 static int g_stat __P((Char *, struct stat *));
105 static int glob1 __P((Char *, glob_t *));
106 static int glob2 __P((Char *, Char *, Char *, glob_t *));
107 static int glob3 __P((Char *, Char *, Char *, Char *, glob_t *));
108 static int globextend __P((Char *, glob_t *));
109 static int match __P((Char *, Char *, Char *));
110 #ifdef DEBUG
111 static void qprintf __P((Char *));
112 #endif
113
114 /*
115 * The main glob() routine: compiles the pattern (optionally processing
116 * quotes), calls glob1() to do the real pattern matching, and finally
117 * sorts the list (unless unsorted operation is requested). Returns 0
118 * if things went well, nonzero if errors occurred. It is not an error
119 * to find no matches.
120 */
121 glob(pattern, flags, errfunc, pglob)
122 const char *pattern;
123 int flags, (*errfunc) __P((char *, int));
124 glob_t *pglob;
125 {
126 const u_char *compilepat, *patnext;
127 int c, err, oldpathc;
128 Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1];
129
130 patnext = (u_char *) pattern;
131 if (!(flags & GLOB_APPEND)) {
132 pglob->gl_pathc = 0;
133 pglob->gl_pathv = NULL;
134 if (!(flags & GLOB_DOOFFS))
135 pglob->gl_offs = 0;
136 }
137 pglob->gl_flags = flags & ~GLOB_MAGCHAR;
138 pglob->gl_errfunc = errfunc;
139 oldpathc = pglob->gl_pathc;
140 pglob->gl_matchc = 0;
141
142 bufnext = patbuf;
143 bufend = bufnext + MAXPATHLEN;
144 compilebuf = bufnext;
145 compilepat = patnext;
146 if (flags & GLOB_QUOTE) {
147 /* Protect the quoted characters. */
148 while (bufnext < bufend && (c = *patnext++) != EOS)
149 if (c == QUOTE) {
150 if ((c = *patnext++) == EOS) {
151 c = QUOTE;
152 --patnext;
153 }
154 *bufnext++ = c | M_PROTECT;
155 }
156 else
157 *bufnext++ = c;
158 }
159 else
160 while (bufnext < bufend && (c = *patnext++) != EOS)
161 *bufnext++ = c;
162 *bufnext = EOS;
163
164 bufnext = patbuf;
165 qpatnext = patbuf;
166 /* We don't need to check for buffer overflow any more. */
167 while ((c = *qpatnext++) != EOS) {
168 switch (c) {
169 case LBRACKET:
170 pglob->gl_flags |= GLOB_MAGCHAR;
171 c = *qpatnext;
172 if (c == NOT)
173 ++qpatnext;
174 if (*qpatnext == EOS ||
175 g_strchr(qpatnext+1, RBRACKET) == NULL) {
176 *bufnext++ = LBRACKET;
177 if (c == NOT)
178 --qpatnext;
179 break;
180 }
181 *bufnext++ = M_SET;
182 if (c == NOT)
183 *bufnext++ = M_NOT;
184 c = *qpatnext++;
185 do {
186 *bufnext++ = CHAR(c);
187 if (*qpatnext == RANGE &&
188 (c = qpatnext[1]) != RBRACKET) {
189 *bufnext++ = M_RNG;
190 *bufnext++ = CHAR(c);
191 qpatnext += 2;
192 }
193 } while ((c = *qpatnext++) != RBRACKET);
194 *bufnext++ = M_END;
195 break;
196 case QUESTION:
197 pglob->gl_flags |= GLOB_MAGCHAR;
198 *bufnext++ = M_ONE;
199 break;
200 case STAR:
201 pglob->gl_flags |= GLOB_MAGCHAR;
202 *bufnext++ = M_ALL;
203 break;
204 default:
205 *bufnext++ = CHAR(c);
206 break;
207 }
208 }
209 *bufnext = EOS;
210 #ifdef DEBUG
211 qprintf(patbuf);
212 #endif
213
214 if ((err = glob1(patbuf, pglob)) != 0)
215 return(err);
216
217 if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
218 if (!(flags & GLOB_QUOTE)) {
219 Char *dp = compilebuf;
220 const u_char *sp = compilepat;
221 while (*dp++ = *sp++);
222 }
223 else {
224 /*
225 * Copy pattern, interpreting quotes; this is slightly
226 * different than the interpretation of quotes above
227 * -- which should prevail?
228 */
229 while (*compilepat != EOS) {
230 if (*compilepat == QUOTE) {
231 if (*++compilepat == EOS)
232 --compilepat;
233 }
234 *compilebuf++ = (u_char)*compilepat++;
235 }
236 *compilebuf = EOS;
237 }
238 return(globextend(patbuf, pglob));
239 } else if (!(flags & GLOB_NOSORT))
240 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
241 pglob->gl_pathc - oldpathc, sizeof(char *), compare);
242 return(0);
243 }
244
245 static int
246 compare(p, q)
247 const void *p, *q;
248 {
249 return(strcmp(*(char **)p, *(char **)q));
250 }
251
252 static
253 glob1(pattern, pglob)
254 Char *pattern;
255 glob_t *pglob;
256 {
257 Char pathbuf[MAXPATHLEN+1];
258
259 /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
260 if (*pattern == EOS)
261 return(0);
262 return(glob2(pathbuf, pathbuf, pattern, pglob));
263 }
264
265 /*
266 * The functions glob2 and glob3 are mutually recursive; there is one level
267 * of recursion for each segment in the pattern that contains one or more
268 * meta characters.
269 */
270 static
271 glob2(pathbuf, pathend, pattern, pglob)
272 Char *pathbuf, *pathend, *pattern;
273 glob_t *pglob;
274 {
275 struct stat sb;
276 Char *p, *q;
277 int anymeta;
278
279 /*
280 * Loop over pattern segments until end of pattern or until
281 * segment with meta character found.
282 */
283 for (anymeta = 0;;) {
284 if (*pattern == EOS) { /* End of pattern? */
285 *pathend = EOS;
286 if (g_stat(pathbuf, &sb))
287 return(0);
288
289 if (((pglob->gl_flags & GLOB_MARK) &&
290 pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
291 || (S_ISLNK(sb.st_mode) &&
292 (g_stat(pathbuf, &sb) == 0) &&
293 S_ISDIR(sb.st_mode)))) {
294 *pathend++ = SEP;
295 *pathend = EOS;
296 }
297 ++pglob->gl_matchc;
298 return(globextend(pathbuf, pglob));
299 }
300
301 /* Find end of next segment, copy tentatively to pathend. */
302 q = pathend;
303 p = pattern;
304 while (*p != EOS && *p != SEP) {
305 if (ismeta(*p))
306 anymeta = 1;
307 *q++ = *p++;
308 }
309
310 if (!anymeta) { /* No expansion, do next segment. */
311 pathend = q;
312 pattern = p;
313 while (*pattern == SEP)
314 *pathend++ = *pattern++;
315 } else /* Need expansion, recurse. */
316 return(glob3(pathbuf, pathend, pattern, p, pglob));
317 }
318 /* NOTREACHED */
319 }
320
321 static
322 glob3(pathbuf, pathend, pattern, restpattern, pglob)
323 Char *pathbuf, *pathend, *pattern, *restpattern;
324 glob_t *pglob;
325 {
326 register struct dirent *dp;
327 DIR *dirp;
328 int len, err;
329
330 *pathend = EOS;
331 errno = 0;
332
333 if (!(dirp = g_opendir(pathbuf)))
334 /* TODO: don't call for ENOENT or ENOTDIR? */
335 if (pglob->gl_errfunc &&
336 (*pglob->gl_errfunc)(pathbuf, errno) ||
337 (pglob->gl_flags & GLOB_ERR))
338 return(GLOB_ABEND);
339 else
340 return(0);
341
342 err = 0;
343
344 /* Search directory for matching names. */
345 while ((dp = readdir(dirp))) {
346 register u_char *sc;
347 register Char *dc;
348
349 /* Initial DOT must be matched literally. */
350 if (dp->d_name[0] == DOT && *pattern != DOT)
351 continue;
352 for (sc = (u_char *) dp->d_name, dc = pathend;
353 *dc++ = *sc++;);
354 if (!match(pathend, pattern, restpattern)) {
355 *pathend = EOS;
356 continue;
357 }
358 err = glob2(pathbuf, --dc, restpattern, pglob);
359 if (err)
360 break;
361 }
362
363 /* TODO: check error from readdir? */
364 (void)closedir(dirp);
365 return(err);
366 }
367
368
369 /*
370 * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
371 * add the new item, and update gl_pathc.
372 *
373 * This assumes the BSD realloc, which only copies the block when its size
374 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
375 * behavior.
376 *
377 * Return 0 if new item added, error code if memory couldn't be allocated.
378 *
379 * Invariant of the glob_t structure:
380 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
381 * gl_pathv points to (gl_offs + gl_pathc + 1) items.
382 */
383 static int
384 globextend(path, pglob)
385 Char *path;
386 glob_t *pglob;
387 {
388 register char **pathv;
389 register int i;
390 u_int newsize;
391 char *copy;
392 Char *p;
393
394 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
395 pathv = (char **)realloc((char *)pglob->gl_pathv, newsize);
396 if (pathv == NULL)
397 return(GLOB_NOSPACE);
398
399 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
400 /* first time around -- clear initial gl_offs items */
401 pathv += pglob->gl_offs;
402 for (i = pglob->gl_offs; --i >= 0; )
403 *--pathv = NULL;
404 }
405 pglob->gl_pathv = pathv;
406
407 for (p = path; *p++;);
408 if ((copy = malloc(p - path)) != NULL) {
409 g_Ctoc(path, copy);
410 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
411 }
412 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
413 return(copy == NULL ? GLOB_NOSPACE : 0);
414 }
415
416
417 /*
418 * pattern matching function for filenames. Each occurrence of the *
419 * pattern causes a recursion level.
420 */
421 static
422 match(name, pat, patend)
423 register Char *name, *pat, *patend;
424 {
425 int ok, negate_range;
426 Char c, k;
427
428 while (pat < patend) {
429 c = *pat++;
430 switch (c & M_MASK) {
431 case M_ALL:
432 if (pat == patend)
433 return(1);
434 for (; *name != EOS; ++name)
435 if (match(name, pat, patend))
436 return(1);
437 return(0);
438 case M_ONE:
439 if (*name++ == EOS)
440 return(0);
441 break;
442 case M_SET:
443 ok = 0;
444 k = *name++;
445 if (negate_range = ((*pat & M_MASK) == M_NOT))
446 ++pat;
447 while (((c = *pat++) & M_MASK) != M_END)
448 if ((*pat & M_MASK) == M_RNG) {
449 if (c <= k && k <= pat[1])
450 ok = 1;
451 pat += 2;
452 } else if (c == k)
453 ok = 1;
454 if (ok == negate_range)
455 return(0);
456 break;
457 default:
458 if (*name++ != c)
459 return(0);
460 break;
461 }
462 }
463 return(*name == EOS);
464 }
465
466 /* Free allocated data belonging to a glob_t structure. */
467 void
468 globfree(pglob)
469 glob_t *pglob;
470 {
471 register int i;
472 register char **pp;
473
474 if (pglob->gl_pathv != NULL) {
475 pp = pglob->gl_pathv + pglob->gl_offs;
476 for (i = pglob->gl_pathc; i--; ++pp)
477 if (*pp)
478 free(*pp);
479 free(pglob->gl_pathv);
480 }
481 }
482
483 static DIR *
484 g_opendir(str)
485 register Char *str;
486 {
487 char buf[MAXPATHLEN];
488
489 if (!*str)
490 return(opendir("."));
491 g_Ctoc(str, buf);
492 return(opendir(buf));
493 }
494
495 static int
496 g_lstat(fn, sb)
497 register Char *fn;
498 struct stat *sb;
499 {
500 char buf[MAXPATHLEN];
501
502 g_Ctoc(fn, buf);
503 return(lstat(buf, sb));
504 }
505
506 static int
507 g_stat(fn, sb)
508 register Char *fn;
509 struct stat *sb;
510 {
511 char buf[MAXPATHLEN];
512
513 g_Ctoc(fn, buf);
514 return(stat(buf, sb));
515 }
516
517 static Char *
518 g_strchr(str, ch)
519 Char *str;
520 int ch;
521 {
522 do {
523 if (*str == ch)
524 return (str);
525 } while (*str++);
526 return (NULL);
527 }
528
529 static void
530 g_Ctoc(str, buf)
531 register Char *str;
532 char *buf;
533 {
534 register char *dc;
535
536 for (dc = buf; *dc++ = *str++;);
537 }
538
539 #ifdef DEBUG
540 static void
541 qprintf(s)
542 register Char *s;
543 {
544 register Char *p;
545
546 for (p = s; *p; p++)
547 (void)printf("%c", *p & 0xff);
548 (void)printf("\n");
549 for (p = s; *p; p++)
550 (void)printf("%c", *p & M_PROTECT ? '"' : ' ');
551 (void)printf("\n");
552 for (p = s; *p; p++)
553 (void)printf("%c", *p & M_META ? '_' : ' ');
554 (void)printf("\n");
555 }
556 #endif
557