pat_rep.c revision 1.20 1 /* $NetBSD: pat_rep.c,v 1.20 2003/10/13 07:41:22 agc Exp $ */
2
3 /*-
4 * Copyright (c) 1992 Keith Muller.
5 * Copyright (c) 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Keith Muller of the University of California, San Diego.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include <sys/cdefs.h>
37 #if defined(__RCSID) && !defined(lint)
38 #if 0
39 static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94";
40 #else
41 __RCSID("$NetBSD: pat_rep.c,v 1.20 2003/10/13 07:41:22 agc Exp $");
42 #endif
43 #endif /* not lint */
44
45 #include <sys/types.h>
46 #include <sys/time.h>
47 #include <sys/stat.h>
48 #include <sys/param.h>
49 #include <stdio.h>
50 #include <ctype.h>
51 #include <string.h>
52 #include <unistd.h>
53 #include <stdlib.h>
54 #ifdef NET2_REGEX
55 #include <regexp.h>
56 #else
57 #include <regex.h>
58 #endif
59 #include "pax.h"
60 #include "pat_rep.h"
61 #include "extern.h"
62
63 /*
64 * routines to handle pattern matching, name modification (regular expression
65 * substitution and interactive renames), and destination name modification for
66 * copy (-rw). Both file name and link names are adjusted as required in these
67 * routines.
68 */
69
70 #define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */
71 static PATTERN *pathead = NULL; /* file pattern match list head */
72 static PATTERN *pattail = NULL; /* file pattern match list tail */
73 static REPLACE *rephead = NULL; /* replacement string list head */
74 static REPLACE *reptail = NULL; /* replacement string list tail */
75
76 static int rep_name(char *, size_t, int *, int);
77 static int tty_rename(ARCHD *);
78 static int fix_path(char *, int *, char *, int);
79 static int fn_match(char *, char *, char **);
80 static char * range_match(char *, int);
81 static int checkdotdot(const char *);
82 #ifdef NET2_REGEX
83 static int resub(regexp *, char *, char *, char *);
84 #else
85 static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *);
86 #endif
87
88 /*
89 * rep_add()
90 * parses the -s replacement string; compiles the regular expression
91 * and stores the compiled value and it's replacement string together in
92 * replacement string list. Input to this function is of the form:
93 * /old/new/pg
94 * The first char in the string specifies the delimiter used by this
95 * replacement string. "Old" is a regular expression in "ed" format which
96 * is compiled by regcomp() and is applied to filenames. "new" is the
97 * substitution string; p and g are options flags for printing and global
98 * replacement (over the single filename)
99 * Return:
100 * 0 if a proper replacement string and regular expression was added to
101 * the list of replacement patterns; -1 otherwise.
102 */
103
104 int
105 rep_add(char *str)
106 {
107 char *pt1;
108 char *pt2;
109 REPLACE *rep;
110 #ifndef NET2_REGEX
111 int res;
112 char rebuf[BUFSIZ];
113 #endif
114
115 /*
116 * throw out the bad parameters
117 */
118 if ((str == NULL) || (*str == '\0')) {
119 tty_warn(1, "Empty replacement string");
120 return(-1);
121 }
122
123 /*
124 * first character in the string specifies what the delimiter is for
125 * this expression.
126 */
127 for (pt1 = str+1; *pt1; pt1++) {
128 if (*pt1 == '\\') {
129 pt1++;
130 continue;
131 }
132 if (*pt1 == *str)
133 break;
134 }
135 if (pt1 == NULL) {
136 tty_warn(1, "Invalid replacement string %s", str);
137 return(-1);
138 }
139
140 /*
141 * allocate space for the node that handles this replacement pattern
142 * and split out the regular expression and try to compile it
143 */
144 if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
145 tty_warn(1, "Unable to allocate memory for replacement string");
146 return(-1);
147 }
148
149 *pt1 = '\0';
150 #ifdef NET2_REGEX
151 if ((rep->rcmp = regcomp(str+1)) == NULL) {
152 #else
153 if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
154 regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
155 tty_warn(1, "%s while compiling regular expression %s", rebuf,
156 str);
157 #endif
158 (void)free((char *)rep);
159 return(-1);
160 }
161
162 /*
163 * put the delimiter back in case we need an error message and
164 * locate the delimiter at the end of the replacement string
165 * we then point the node at the new substitution string
166 */
167 *pt1++ = *str;
168 for (pt2 = pt1; *pt2; pt2++) {
169 if (*pt2 == '\\') {
170 pt2++;
171 continue;
172 }
173 if (*pt2 == *str)
174 break;
175 }
176 if (pt2 == NULL) {
177 #ifdef NET2_REGEX
178 (void)free((char *)rep->rcmp);
179 #else
180 regfree(&(rep->rcmp));
181 #endif
182 (void)free((char *)rep);
183 tty_warn(1, "Invalid replacement string %s", str);
184 return(-1);
185 }
186
187 *pt2 = '\0';
188
189 /* Make sure to dup replacement, who knows where it came from! */
190 if ((rep->nstr = strdup(pt1)) == NULL) {
191 #ifdef NET2_REGEX
192 (void)free((char *)rep->rcmp);
193 #else
194 regfree(&(rep->rcmp));
195 #endif
196 (void)free((char *)rep);
197 tty_warn(1, "Unable to allocate memory for replacement string");
198 return(-1);
199 }
200
201 pt1 = pt2++;
202 rep->flgs = 0;
203
204 /*
205 * set the options if any
206 */
207 while (*pt2 != '\0') {
208 switch(*pt2) {
209 case 'g':
210 case 'G':
211 rep->flgs |= GLOB;
212 break;
213 case 'p':
214 case 'P':
215 rep->flgs |= PRNT;
216 break;
217 default:
218 #ifdef NET2_REGEX
219 (void)free((char *)rep->rcmp);
220 #else
221 regfree(&(rep->rcmp));
222 #endif
223 (void)free((char *)rep);
224 *pt1 = *str;
225 tty_warn(1, "Invalid replacement string option %s",
226 str);
227 return(-1);
228 }
229 ++pt2;
230 }
231
232 /*
233 * all done, link it in at the end
234 */
235 rep->fow = NULL;
236 if (rephead == NULL) {
237 reptail = rephead = rep;
238 return(0);
239 }
240 reptail->fow = rep;
241 reptail = rep;
242 return(0);
243 }
244
245 /*
246 * pat_add()
247 * add a pattern match to the pattern match list. Pattern matches are used
248 * to select which archive members are extracted. (They appear as
249 * arguments to pax in the list and read modes). If no patterns are
250 * supplied to pax, all members in the archive will be selected (and the
251 * pattern match list is empty).
252 *
253 * Return:
254 * 0 if the pattern was added to the list, -1 otherwise
255 */
256
257 int
258 pat_add(char *str, char *chdn)
259 {
260 PATTERN *pt;
261
262 /*
263 * throw out the junk
264 */
265 if ((str == NULL) || (*str == '\0')) {
266 tty_warn(1, "Empty pattern string");
267 return(-1);
268 }
269
270 /*
271 * allocate space for the pattern and store the pattern. the pattern is
272 * part of argv so do not bother to copy it, just point at it. Add the
273 * node to the end of the pattern list
274 */
275 if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
276 tty_warn(1, "Unable to allocate memory for pattern string");
277 return(-1);
278 }
279
280 pt->pstr = str;
281 pt->pend = NULL;
282 pt->plen = strlen(str);
283 pt->fow = NULL;
284 pt->flgs = 0;
285 pt->chdname = chdn;
286 if (pathead == NULL) {
287 pattail = pathead = pt;
288 return(0);
289 }
290 pattail->fow = pt;
291 pattail = pt;
292 return(0);
293 }
294
295 /*
296 * pat_chk()
297 * complain if any the user supplied pattern did not result in a match to
298 * a selected archive member.
299 */
300
301 void
302 pat_chk(void)
303 {
304 PATTERN *pt;
305 int wban = 0;
306
307 /*
308 * walk down the list checking the flags to make sure MTCH was set,
309 * if not complain
310 */
311 for (pt = pathead; pt != NULL; pt = pt->fow) {
312 if (pt->flgs & MTCH)
313 continue;
314 if (!wban) {
315 tty_warn(1, "WARNING! These patterns were not matched:");
316 ++wban;
317 }
318 (void)fprintf(stderr, "%s\n", pt->pstr);
319 }
320 }
321
322 /*
323 * pat_sel()
324 * the archive member which matches a pattern was selected. Mark the
325 * pattern as having selected an archive member. arcn->pat points at the
326 * pattern that was matched. arcn->pat is set in pat_match()
327 *
328 * NOTE: When the -c option is used, we are called when there was no match
329 * by pat_match() (that means we did match before the inverted sense of
330 * the logic). Now this seems really strange at first, but with -c we
331 * need to keep track of those patterns that cause a archive member to NOT
332 * be selected (it found an archive member with a specified pattern)
333 * Return:
334 * 0 if the pattern pointed at by arcn->pat was tagged as creating a
335 * match, -1 otherwise.
336 */
337
338 int
339 pat_sel(ARCHD *arcn)
340 {
341 PATTERN *pt;
342 PATTERN **ppt;
343 int len;
344
345 /*
346 * if no patterns just return
347 */
348 if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
349 return(0);
350
351 /*
352 * when we are NOT limited to a single match per pattern mark the
353 * pattern and return
354 */
355 if (!nflag) {
356 pt->flgs |= MTCH;
357 return(0);
358 }
359
360 /*
361 * we reach this point only when we allow a single selected match per
362 * pattern, if the pattern matches a directory and we do not have -d
363 * (dflag) we are done with this pattern. We may also be handed a file
364 * in the subtree of a directory. in that case when we are operating
365 * with -d, this pattern was already selected and we are done
366 */
367 if (pt->flgs & DIR_MTCH)
368 return(0);
369
370 if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
371 /*
372 * ok we matched a directory and we are allowing
373 * subtree matches but because of the -n only its children will
374 * match. This is tagged as a DIR_MTCH type.
375 * WATCH IT, the code assumes that pt->pend points
376 * into arcn->name and arcn->name has not been modified.
377 * If not we will have a big mess. Yup this is another kludge
378 */
379
380 /*
381 * if this was a prefix match, remove trailing part of path
382 * so we can copy it. Future matches will be exact prefix match
383 */
384 if (pt->pend != NULL)
385 *pt->pend = '\0';
386
387 if ((pt->pstr = strdup(arcn->name)) == NULL) {
388 tty_warn(1, "Pattern select out of memory");
389 if (pt->pend != NULL)
390 *pt->pend = '/';
391 pt->pend = NULL;
392 return(-1);
393 }
394
395 /*
396 * put the trailing / back in the source string
397 */
398 if (pt->pend != NULL) {
399 *pt->pend = '/';
400 pt->pend = NULL;
401 }
402 pt->plen = strlen(pt->pstr);
403
404 /*
405 * strip off any trailing /, this should really never happen
406 */
407 len = pt->plen - 1;
408 if (*(pt->pstr + len) == '/') {
409 *(pt->pstr + len) = '\0';
410 pt->plen = len;
411 }
412 pt->flgs = DIR_MTCH | MTCH;
413 arcn->pat = pt;
414 return(0);
415 }
416
417 /*
418 * we are then done with this pattern, so we delete it from the list
419 * because it can never be used for another match.
420 * Seems kind of strange to do for a -c, but the pax spec is really
421 * vague on the interaction of -c, -n, and -d. We assume that when -c
422 * and the pattern rejects a member (i.e. it matched it) it is done.
423 * In effect we place the order of the flags as having -c last.
424 */
425 pt = pathead;
426 ppt = &pathead;
427 while ((pt != NULL) && (pt != arcn->pat)) {
428 ppt = &(pt->fow);
429 pt = pt->fow;
430 }
431
432 if (pt == NULL) {
433 /*
434 * should never happen....
435 */
436 tty_warn(1, "Pattern list inconsistant");
437 return(-1);
438 }
439 *ppt = pt->fow;
440 (void)free((char *)pt);
441 arcn->pat = NULL;
442 return(0);
443 }
444
445 /*
446 * pat_match()
447 * see if this archive member matches any supplied pattern, if a match
448 * is found, arcn->pat is set to point at the potential pattern. Later if
449 * this archive member is "selected" we process and mark the pattern as
450 * one which matched a selected archive member (see pat_sel())
451 * Return:
452 * 0 if this archive member should be processed, 1 if it should be
453 * skipped and -1 if we are done with all patterns (and pax should quit
454 * looking for more members)
455 */
456
457 int
458 pat_match(ARCHD *arcn)
459 {
460 PATTERN *pt;
461
462 arcn->pat = NULL;
463
464 /*
465 * if there are no more patterns and we have -n (and not -c) we are
466 * done. otherwise with no patterns to match, matches all
467 */
468 if (pathead == NULL) {
469 if (nflag && !cflag)
470 return(-1);
471 return(0);
472 }
473
474 /*
475 * have to search down the list one at a time looking for a match.
476 */
477 pt = pathead;
478 while (pt != NULL) {
479 /*
480 * check for a file name match unless we have DIR_MTCH set in
481 * this pattern then we want a prefix match
482 */
483 if (pt->flgs & DIR_MTCH) {
484 /*
485 * this pattern was matched before to a directory
486 * as we must have -n set for this (but not -d). We can
487 * only match CHILDREN of that directory so we must use
488 * an exact prefix match (no wildcards).
489 */
490 if ((arcn->name[pt->plen] == '/') &&
491 (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
492 break;
493 } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
494 break;
495 pt = pt->fow;
496 }
497
498 /*
499 * return the result, remember that cflag (-c) inverts the sense of a
500 * match
501 */
502 if (pt == NULL)
503 return(cflag ? 0 : 1);
504
505 /*
506 * we had a match, now when we invert the sense (-c) we reject this
507 * member. However we have to tag the pattern a being successful, (in a
508 * match, not in selecting a archive member) so we call pat_sel() here.
509 */
510 arcn->pat = pt;
511 if (!cflag)
512 return(0);
513
514 if (pat_sel(arcn) < 0)
515 return(-1);
516 arcn->pat = NULL;
517 return(1);
518 }
519
520 /*
521 * fn_match()
522 * Return:
523 * 0 if this archive member should be processed, 1 if it should be
524 * skipped and -1 if we are done with all patterns (and pax should quit
525 * looking for more members)
526 * Note: *pend may be changed to show where the prefix ends.
527 */
528
529 static int
530 fn_match(char *pattern, char *string, char **pend)
531 {
532 char c;
533 char test;
534
535 *pend = NULL;
536 for (;;) {
537 switch (c = *pattern++) {
538 case '\0':
539 /*
540 * Ok we found an exact match
541 */
542 if (*string == '\0')
543 return(0);
544
545 /*
546 * Check if it is a prefix match
547 */
548 if ((dflag == 1) || (*string != '/'))
549 return(-1);
550
551 /*
552 * It is a prefix match, remember where the trailing
553 * / is located
554 */
555 *pend = string;
556 return(0);
557 case '?':
558 if ((test = *string++) == '\0')
559 return (-1);
560 break;
561 case '*':
562 c = *pattern;
563 /*
564 * Collapse multiple *'s.
565 */
566 while (c == '*')
567 c = *++pattern;
568
569 /*
570 * Optimized hack for pattern with a * at the end
571 */
572 if (c == '\0')
573 return (0);
574
575 /*
576 * General case, use recursion.
577 */
578 while ((test = *string) != '\0') {
579 if (!fn_match(pattern, string, pend))
580 return (0);
581 ++string;
582 }
583 return (-1);
584 case '[':
585 /*
586 * range match
587 */
588 if (((test = *string++) == '\0') ||
589 ((pattern = range_match(pattern, test)) == NULL))
590 return (-1);
591 break;
592 case '\\':
593 default:
594 if (c != *string++)
595 return (-1);
596 break;
597 }
598 }
599 /* NOTREACHED */
600 }
601
602 static char *
603 range_match(char *pattern, int test)
604 {
605 char c;
606 char c2;
607 int negate;
608 int ok = 0;
609
610 if ((negate = (*pattern == '!')) != 0)
611 ++pattern;
612
613 while ((c = *pattern++) != ']') {
614 /*
615 * Illegal pattern
616 */
617 if (c == '\0')
618 return (NULL);
619
620 if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
621 (c2 != ']')) {
622 if ((c <= test) && (test <= c2))
623 ok = 1;
624 pattern += 2;
625 } else if (c == test)
626 ok = 1;
627 }
628 return (ok == negate ? NULL : pattern);
629 }
630
631 /*
632 * mod_name()
633 * modify a selected file name. first attempt to apply replacement string
634 * expressions, then apply interactive file rename. We apply replacement
635 * string expressions to both filenames and file links (if we didn't the
636 * links would point to the wrong place, and we could never be able to
637 * move an archive that has a file link in it). When we rename files
638 * interactively, we store that mapping (old name to user input name) so
639 * if we spot any file links to the old file name in the future, we will
640 * know exactly how to fix the file link.
641 * Return:
642 * 0 continue to process file, 1 skip this file, -1 pax is finished
643 */
644
645 int
646 mod_name(ARCHD *arcn)
647 {
648 int res = 0;
649
650 /*
651 * Strip off leading '/' if appropriate.
652 * Currently, this option is only set for the tar format.
653 */
654 if (rmleadslash && arcn->name[0] == '/') {
655 if (arcn->name[1] == '\0') {
656 arcn->name[0] = '.';
657 } else {
658 (void)memmove(arcn->name, &arcn->name[1],
659 strlen(arcn->name));
660 arcn->nlen--;
661 }
662 if (rmleadslash < 2) {
663 rmleadslash = 2;
664 tty_warn(0, "Removing leading / from absolute path names in the archive");
665 }
666 }
667 if (rmleadslash && arcn->ln_name[0] == '/' &&
668 (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) {
669 if (arcn->ln_name[1] == '\0') {
670 arcn->ln_name[0] = '.';
671 } else {
672 (void)memmove(arcn->ln_name, &arcn->ln_name[1],
673 strlen(arcn->ln_name));
674 arcn->ln_nlen--;
675 }
676 if (rmleadslash < 2) {
677 rmleadslash = 2;
678 tty_warn(0, "Removing leading / from absolute path names in the archive");
679 }
680 }
681
682 if (secure) {
683 if (checkdotdot(arcn->name)) {
684 tty_warn(0, "Ignoring file containing `..' (%s)",
685 arcn->name);
686 return 1;
687 }
688 #ifdef notdef
689 if (checkdotdot(arcn->ln_name)) {
690 tty_warn(0, "Ignoring link containing `..' (%s)",
691 arcn->ln_name);
692 return 1;
693 }
694 #endif
695 }
696
697 /*
698 * IMPORTANT: We have a problem. what do we do with symlinks?
699 * Modifying a hard link name makes sense, as we know the file it
700 * points at should have been seen already in the archive (and if it
701 * wasn't seen because of a read error or a bad archive, we lose
702 * anyway). But there are no such requirements for symlinks. On one
703 * hand the symlink that refers to a file in the archive will have to
704 * be modified to so it will still work at its new location in the
705 * file system. On the other hand a symlink that points elsewhere (and
706 * should continue to do so) should not be modified. There is clearly
707 * no perfect solution here. So we handle them like hardlinks. Clearly
708 * a replacement made by the interactive rename mapping is very likely
709 * to be correct since it applies to a single file and is an exact
710 * match. The regular expression replacements are a little harder to
711 * justify though. We claim that the symlink name is only likely
712 * to be replaced when it points within the file tree being moved and
713 * in that case it should be modified. what we really need to do is to
714 * call an oracle here. :)
715 */
716 if (rephead != NULL) {
717 /*
718 * we have replacement strings, modify the name and the link
719 * name if any.
720 */
721 if ((res = rep_name(arcn->name, sizeof(arcn->name),
722 &(arcn->nlen), 1)) != 0)
723 return(res);
724
725 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
726 (arcn->type == PAX_HRG)) &&
727 ((res = rep_name(arcn->ln_name, sizeof(arcn->ln_name),
728 &(arcn->ln_nlen), 0)) != 0))
729 return(res);
730 }
731
732 if (iflag) {
733 /*
734 * perform interactive file rename, then map the link if any
735 */
736 if ((res = tty_rename(arcn)) != 0)
737 return(res);
738 if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
739 (arcn->type == PAX_HRG))
740 sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name));
741 }
742 return(res);
743 }
744
745 /*
746 * tty_rename()
747 * Prompt the user for a replacement file name. A "." keeps the old name,
748 * a empty line skips the file, and an EOF on reading the tty, will cause
749 * pax to stop processing and exit. Otherwise the file name input, replaces
750 * the old one.
751 * Return:
752 * 0 process this file, 1 skip this file, -1 we need to exit pax
753 */
754
755 static int
756 tty_rename(ARCHD *arcn)
757 {
758 char tmpname[PAXPATHLEN+2];
759 int res;
760
761 /*
762 * prompt user for the replacement name for a file, keep trying until
763 * we get some reasonable input. Archives may have more than one file
764 * on them with the same name (from updates etc). We print verbose info
765 * on the file so the user knows what is up.
766 */
767 tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
768
769 for (;;) {
770 ls_tty(arcn);
771 tty_prnt("Input new name, or a \".\" to keep the old name, ");
772 tty_prnt("or a \"return\" to skip this file.\n");
773 tty_prnt("Input > ");
774 if (tty_read(tmpname, sizeof(tmpname)) < 0)
775 return(-1);
776 if (strcmp(tmpname, "..") == 0) {
777 tty_prnt("Try again, illegal file name: ..\n");
778 continue;
779 }
780 if (strlen(tmpname) > PAXPATHLEN) {
781 tty_prnt("Try again, file name too long\n");
782 continue;
783 }
784 break;
785 }
786
787 /*
788 * empty file name, skips this file. a "." leaves it alone
789 */
790 if (tmpname[0] == '\0') {
791 tty_prnt("Skipping file.\n");
792 return(1);
793 }
794 if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
795 tty_prnt("Processing continues, name unchanged.\n");
796 return(0);
797 }
798
799 /*
800 * ok the name changed. We may run into links that point at this
801 * file later. we have to remember where the user sent the file
802 * in order to repair any links.
803 */
804 tty_prnt("Processing continues, name changed to: %s\n", tmpname);
805 res = add_name(arcn->name, arcn->nlen, tmpname);
806 arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name));
807 if (res < 0)
808 return(-1);
809 return(0);
810 }
811
812 /*
813 * set_dest()
814 * fix up the file name and the link name (if any) so this file will land
815 * in the destination directory (used during copy() -rw).
816 * Return:
817 * 0 if ok, -1 if failure (name too long)
818 */
819
820 int
821 set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
822 {
823 if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
824 return(-1);
825
826 /*
827 * It is really hard to deal with symlinks here, we cannot be sure
828 * if the name they point was moved (or will be moved). It is best to
829 * leave them alone.
830 */
831 if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
832 return(0);
833
834 if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
835 return(-1);
836 return(0);
837 }
838
839 /*
840 * fix_path
841 * concatenate dir_name and or_name and store the result in or_name (if
842 * it fits). This is one ugly function.
843 * Return:
844 * 0 if ok, -1 if the final name is too long
845 */
846
847 static int
848 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
849 {
850 char *src;
851 char *dest;
852 char *start;
853 int len;
854
855 /*
856 * we shift the or_name to the right enough to tack in the dir_name
857 * at the front. We make sure we have enough space for it all before
858 * we start. since dest always ends in a slash, we skip of or_name
859 * if it also starts with one.
860 */
861 start = or_name;
862 src = start + *or_len;
863 dest = src + dir_len;
864 if (*start == '/') {
865 ++start;
866 --dest;
867 }
868 if ((len = dest - or_name) > PAXPATHLEN) {
869 tty_warn(1, "File name %s/%s, too long", dir_name, start);
870 return(-1);
871 }
872 *or_len = len;
873
874 /*
875 * enough space, shift
876 */
877 while (src >= start)
878 *dest-- = *src--;
879 src = dir_name + dir_len - 1;
880
881 /*
882 * splice in the destination directory name
883 */
884 while (src >= dir_name)
885 *dest-- = *src--;
886
887 *(or_name + len) = '\0';
888 return(0);
889 }
890
891 /*
892 * rep_name()
893 * walk down the list of replacement strings applying each one in order.
894 * when we find one with a successful substitution, we modify the name
895 * as specified. if required, we print the results. if the resulting name
896 * is empty, we will skip this archive member. We use the regexp(3)
897 * routines (regexp() ought to win a prize as having the most cryptic
898 * library function manual page).
899 * --Parameters--
900 * name is the file name we are going to apply the regular expressions to
901 * (and may be modified)
902 * namelen the size of the name buffer.
903 * nlen is the length of this name (and is modified to hold the length of
904 * the final string).
905 * prnt is a flag that says whether to print the final result.
906 * Return:
907 * 0 if substitution was successful, 1 if we are to skip the file (the name
908 * ended up empty)
909 */
910
911 static int
912 rep_name(char *name, size_t namelen, int *nlen, int prnt)
913 {
914 REPLACE *pt;
915 char *inpt;
916 char *outpt;
917 char *endpt;
918 char *rpt;
919 int found = 0;
920 int res;
921 #ifndef NET2_REGEX
922 regmatch_t pm[MAXSUBEXP];
923 #endif
924 char nname[PAXPATHLEN+1]; /* final result of all replacements */
925 char buf1[PAXPATHLEN+1]; /* where we work on the name */
926
927 /*
928 * copy the name into buf1, where we will work on it. We need to keep
929 * the orig string around so we can print out the result of the final
930 * replacement. We build up the final result in nname. inpt points at
931 * the string we apply the regular expression to. prnt is used to
932 * suppress printing when we handle replacements on the link field
933 * (the user already saw that substitution go by)
934 */
935 pt = rephead;
936 (void)strcpy(buf1, name);
937 inpt = buf1;
938 outpt = nname;
939 endpt = outpt + PAXPATHLEN;
940
941 /*
942 * try each replacement string in order
943 */
944 while (pt != NULL) {
945 do {
946 /*
947 * check for a successful substitution, if not go to
948 * the next pattern, or cleanup if we were global
949 */
950 #ifdef NET2_REGEX
951 if (regexec(pt->rcmp, inpt) == 0)
952 #else
953 if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
954 #endif
955 break;
956
957 /*
958 * ok we found one. We have three parts, the prefix
959 * which did not match, the section that did and the
960 * tail (that also did not match). Copy the prefix to
961 * the final output buffer (watching to make sure we
962 * do not create a string too long).
963 */
964 found = 1;
965 #ifdef NET2_REGEX
966 rpt = pt->rcmp->startp[0];
967 #else
968 rpt = inpt + pm[0].rm_so;
969 #endif
970
971 while ((inpt < rpt) && (outpt < endpt))
972 *outpt++ = *inpt++;
973 if (outpt == endpt)
974 break;
975
976 /*
977 * for the second part (which matched the regular
978 * expression) apply the substitution using the
979 * replacement string and place it the prefix in the
980 * final output. If we have problems, skip it.
981 */
982 if ((res =
983 #ifdef NET2_REGEX
984 resub(pt->rcmp,pt->nstr,outpt,endpt)
985 #else
986 resub(&(pt->rcmp),pm,pt->nstr,inpt, outpt,endpt)
987 #endif
988 ) < 0) {
989 if (prnt)
990 tty_warn(1, "Replacement name error %s",
991 name);
992 return(1);
993 }
994 outpt += res;
995
996 /*
997 * we set up to look again starting at the first
998 * character in the tail (of the input string right
999 * after the last character matched by the regular
1000 * expression (inpt always points at the first char in
1001 * the string to process). If we are not doing a global
1002 * substitution, we will use inpt to copy the tail to
1003 * the final result. Make sure we do not overrun the
1004 * output buffer
1005 */
1006 #ifdef NET2_REGEX
1007 inpt = pt->rcmp->endp[0];
1008 #else
1009 inpt += pm[0].rm_eo - pm[0].rm_so;
1010 #endif
1011
1012 if ((outpt == endpt) || (*inpt == '\0'))
1013 break;
1014
1015 /*
1016 * if the user wants global we keep trying to
1017 * substitute until it fails, then we are done.
1018 */
1019 } while (pt->flgs & GLOB);
1020
1021 if (found)
1022 break;
1023
1024 /*
1025 * a successful substitution did NOT occur, try the next one
1026 */
1027 pt = pt->fow;
1028 }
1029
1030 if (found) {
1031 /*
1032 * we had a substitution, copy the last tail piece (if there is
1033 * room) to the final result
1034 */
1035 while ((outpt < endpt) && (*inpt != '\0'))
1036 *outpt++ = *inpt++;
1037
1038 *outpt = '\0';
1039 if ((outpt == endpt) && (*inpt != '\0')) {
1040 if (prnt)
1041 tty_warn(1,"Replacement name too long %s >> %s",
1042 name, nname);
1043 return(1);
1044 }
1045
1046 /*
1047 * inform the user of the result if wanted
1048 */
1049 if (prnt && (pt->flgs & PRNT)) {
1050 if (*nname == '\0')
1051 (void)fprintf(stderr,"%s >> <empty string>\n",
1052 name);
1053 else
1054 (void)fprintf(stderr,"%s >> %s\n", name, nname);
1055 }
1056
1057 /*
1058 * if empty inform the caller this file is to be skipped
1059 * otherwise copy the new name over the orig name and return
1060 */
1061 if (*nname == '\0')
1062 return(1);
1063 *nlen = strlcpy(name, nname, namelen);
1064 }
1065 return(0);
1066 }
1067
1068
1069 /*
1070 * checkdotdot()
1071 * Return true if a component of the name contains a reference to ".."
1072 */
1073 static int
1074 checkdotdot(const char *name)
1075 {
1076 const char *p;
1077 /* 1. "..{[/],}" */
1078 if (name[0] == '.' && name[1] == '.' &&
1079 (name[2] == '/' || name[2] == '\0'))
1080 return 1;
1081
1082 /* 2. "*[/]..[/]*" */
1083 if (strstr(name, "/../") != NULL)
1084 return 1;
1085
1086 /* 3. "*[/].." */
1087 for (p = name; *p; p++)
1088 continue;
1089 if (p - name < 3)
1090 return 0;
1091 if (p[-1] == '.' && p[-2] == '.' && p[-3] == '/')
1092 return 1;
1093
1094 return 0;
1095 }
1096
1097 #ifdef NET2_REGEX
1098 /*
1099 * resub()
1100 * apply the replacement to the matched expression. expand out the old
1101 * style ed(1) subexpression expansion.
1102 * Return:
1103 * -1 if error, or the number of characters added to the destination.
1104 */
1105
1106 static int
1107 resub(regexp *prog, char *src, char *dest, char *destend)
1108 {
1109 char *spt;
1110 char *dpt;
1111 char c;
1112 int no;
1113 int len;
1114
1115 spt = src;
1116 dpt = dest;
1117 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1118 if (c == '&')
1119 no = 0;
1120 else if ((c == '\\') && (*spt >= '0') && (*spt <= '9'))
1121 no = *spt++ - '0';
1122 else {
1123 if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1124 c = *spt++;
1125 *dpt++ = c;
1126 continue;
1127 }
1128 if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) ||
1129 ((len = prog->endp[no] - prog->startp[no]) <= 0))
1130 continue;
1131
1132 /*
1133 * copy the subexpression to the destination.
1134 * fail if we run out of space or the match string is damaged
1135 */
1136 if (len > (destend - dpt))
1137 return (-1);
1138 strncpy(dpt, prog->startp[no], len);
1139 dpt += len;
1140 }
1141 return(dpt - dest);
1142 }
1143
1144 #else
1145
1146 /*
1147 * resub()
1148 * apply the replacement to the matched expression. expand out the old
1149 * style ed(1) subexpression expansion.
1150 * Return:
1151 * -1 if error, or the number of characters added to the destination.
1152 */
1153
1154 static int
1155 resub(regex_t *rp, regmatch_t *pm, char *src, char *txt, char *dest,
1156 char *destend)
1157 {
1158 char *spt;
1159 char *dpt;
1160 char c;
1161 regmatch_t *pmpt;
1162 int len;
1163 int subexcnt;
1164
1165 spt = src;
1166 dpt = dest;
1167 subexcnt = rp->re_nsub;
1168 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1169 /*
1170 * see if we just have an ordinary replacement character
1171 * or we refer to a subexpression.
1172 */
1173 if (c == '&') {
1174 pmpt = pm;
1175 } else if ((c == '\\') && (*spt >= '1') && (*spt <= '9')) {
1176 /*
1177 * make sure there is a subexpression as specified
1178 */
1179 if ((len = *spt++ - '0') > subexcnt)
1180 return(-1);
1181 pmpt = pm + len;
1182 } else {
1183 /*
1184 * Ordinary character, just copy it
1185 */
1186 if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1187 c = *spt++;
1188 *dpt++ = c;
1189 continue;
1190 }
1191
1192 /*
1193 * continue if the subexpression is bogus
1194 */
1195 if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1196 ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1197 continue;
1198
1199 /*
1200 * copy the subexpression to the destination.
1201 * fail if we run out of space or the match string is damaged
1202 */
1203 if (len > (destend - dpt))
1204 return -1;
1205 strncpy(dpt, txt + pmpt->rm_so, len);
1206 dpt += len;
1207 }
1208 return(dpt - dest);
1209 }
1210 #endif
1211