pat_rep.c revision 1.19 1 /* $NetBSD: pat_rep.c,v 1.19 2003/08/07 09:05:21 agc Exp $ */
2
3 /*-
4 * Copyright (c) 1992, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Keith Muller of the University of California, San Diego.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 /*-
36 * Copyright (c) 1992 Keith Muller.
37 *
38 * This code is derived from software contributed to Berkeley by
39 * Keith Muller of the University of California, San Diego.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. All advertising materials mentioning features or use of this software
50 * must display the following acknowledgement:
51 * This product includes software developed by the University of
52 * California, Berkeley and its contributors.
53 * 4. Neither the name of the University nor the names of its contributors
54 * may be used to endorse or promote products derived from this software
55 * without specific prior written permission.
56 *
57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67 * SUCH DAMAGE.
68 */
69
70 #include <sys/cdefs.h>
71 #if defined(__RCSID) && !defined(lint)
72 #if 0
73 static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94";
74 #else
75 __RCSID("$NetBSD: pat_rep.c,v 1.19 2003/08/07 09:05:21 agc Exp $");
76 #endif
77 #endif /* not lint */
78
79 #include <sys/types.h>
80 #include <sys/time.h>
81 #include <sys/stat.h>
82 #include <sys/param.h>
83 #include <stdio.h>
84 #include <ctype.h>
85 #include <string.h>
86 #include <unistd.h>
87 #include <stdlib.h>
88 #ifdef NET2_REGEX
89 #include <regexp.h>
90 #else
91 #include <regex.h>
92 #endif
93 #include "pax.h"
94 #include "pat_rep.h"
95 #include "extern.h"
96
97 /*
98 * routines to handle pattern matching, name modification (regular expression
99 * substitution and interactive renames), and destination name modification for
100 * copy (-rw). Both file name and link names are adjusted as required in these
101 * routines.
102 */
103
104 #define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */
105 static PATTERN *pathead = NULL; /* file pattern match list head */
106 static PATTERN *pattail = NULL; /* file pattern match list tail */
107 static REPLACE *rephead = NULL; /* replacement string list head */
108 static REPLACE *reptail = NULL; /* replacement string list tail */
109
110 static int rep_name(char *, size_t, int *, int);
111 static int tty_rename(ARCHD *);
112 static int fix_path(char *, int *, char *, int);
113 static int fn_match(char *, char *, char **);
114 static char * range_match(char *, int);
115 static int checkdotdot(const char *);
116 #ifdef NET2_REGEX
117 static int resub(regexp *, char *, char *, char *);
118 #else
119 static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *);
120 #endif
121
122 /*
123 * rep_add()
124 * parses the -s replacement string; compiles the regular expression
125 * and stores the compiled value and it's replacement string together in
126 * replacement string list. Input to this function is of the form:
127 * /old/new/pg
128 * The first char in the string specifies the delimiter used by this
129 * replacement string. "Old" is a regular expression in "ed" format which
130 * is compiled by regcomp() and is applied to filenames. "new" is the
131 * substitution string; p and g are options flags for printing and global
132 * replacement (over the single filename)
133 * Return:
134 * 0 if a proper replacement string and regular expression was added to
135 * the list of replacement patterns; -1 otherwise.
136 */
137
138 int
139 rep_add(char *str)
140 {
141 char *pt1;
142 char *pt2;
143 REPLACE *rep;
144 #ifndef NET2_REGEX
145 int res;
146 char rebuf[BUFSIZ];
147 #endif
148
149 /*
150 * throw out the bad parameters
151 */
152 if ((str == NULL) || (*str == '\0')) {
153 tty_warn(1, "Empty replacement string");
154 return(-1);
155 }
156
157 /*
158 * first character in the string specifies what the delimiter is for
159 * this expression.
160 */
161 for (pt1 = str+1; *pt1; pt1++) {
162 if (*pt1 == '\\') {
163 pt1++;
164 continue;
165 }
166 if (*pt1 == *str)
167 break;
168 }
169 if (pt1 == NULL) {
170 tty_warn(1, "Invalid replacement string %s", str);
171 return(-1);
172 }
173
174 /*
175 * allocate space for the node that handles this replacement pattern
176 * and split out the regular expression and try to compile it
177 */
178 if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
179 tty_warn(1, "Unable to allocate memory for replacement string");
180 return(-1);
181 }
182
183 *pt1 = '\0';
184 #ifdef NET2_REGEX
185 if ((rep->rcmp = regcomp(str+1)) == NULL) {
186 #else
187 if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
188 regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
189 tty_warn(1, "%s while compiling regular expression %s", rebuf,
190 str);
191 #endif
192 (void)free((char *)rep);
193 return(-1);
194 }
195
196 /*
197 * put the delimiter back in case we need an error message and
198 * locate the delimiter at the end of the replacement string
199 * we then point the node at the new substitution string
200 */
201 *pt1++ = *str;
202 for (pt2 = pt1; *pt2; pt2++) {
203 if (*pt2 == '\\') {
204 pt2++;
205 continue;
206 }
207 if (*pt2 == *str)
208 break;
209 }
210 if (pt2 == NULL) {
211 #ifdef NET2_REGEX
212 (void)free((char *)rep->rcmp);
213 #else
214 regfree(&(rep->rcmp));
215 #endif
216 (void)free((char *)rep);
217 tty_warn(1, "Invalid replacement string %s", str);
218 return(-1);
219 }
220
221 *pt2 = '\0';
222
223 /* Make sure to dup replacement, who knows where it came from! */
224 if ((rep->nstr = strdup(pt1)) == NULL) {
225 #ifdef NET2_REGEX
226 (void)free((char *)rep->rcmp);
227 #else
228 regfree(&(rep->rcmp));
229 #endif
230 (void)free((char *)rep);
231 tty_warn(1, "Unable to allocate memory for replacement string");
232 return(-1);
233 }
234
235 pt1 = pt2++;
236 rep->flgs = 0;
237
238 /*
239 * set the options if any
240 */
241 while (*pt2 != '\0') {
242 switch(*pt2) {
243 case 'g':
244 case 'G':
245 rep->flgs |= GLOB;
246 break;
247 case 'p':
248 case 'P':
249 rep->flgs |= PRNT;
250 break;
251 default:
252 #ifdef NET2_REGEX
253 (void)free((char *)rep->rcmp);
254 #else
255 regfree(&(rep->rcmp));
256 #endif
257 (void)free((char *)rep);
258 *pt1 = *str;
259 tty_warn(1, "Invalid replacement string option %s",
260 str);
261 return(-1);
262 }
263 ++pt2;
264 }
265
266 /*
267 * all done, link it in at the end
268 */
269 rep->fow = NULL;
270 if (rephead == NULL) {
271 reptail = rephead = rep;
272 return(0);
273 }
274 reptail->fow = rep;
275 reptail = rep;
276 return(0);
277 }
278
279 /*
280 * pat_add()
281 * add a pattern match to the pattern match list. Pattern matches are used
282 * to select which archive members are extracted. (They appear as
283 * arguments to pax in the list and read modes). If no patterns are
284 * supplied to pax, all members in the archive will be selected (and the
285 * pattern match list is empty).
286 *
287 * Return:
288 * 0 if the pattern was added to the list, -1 otherwise
289 */
290
291 int
292 pat_add(char *str, char *chdn)
293 {
294 PATTERN *pt;
295
296 /*
297 * throw out the junk
298 */
299 if ((str == NULL) || (*str == '\0')) {
300 tty_warn(1, "Empty pattern string");
301 return(-1);
302 }
303
304 /*
305 * allocate space for the pattern and store the pattern. the pattern is
306 * part of argv so do not bother to copy it, just point at it. Add the
307 * node to the end of the pattern list
308 */
309 if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
310 tty_warn(1, "Unable to allocate memory for pattern string");
311 return(-1);
312 }
313
314 pt->pstr = str;
315 pt->pend = NULL;
316 pt->plen = strlen(str);
317 pt->fow = NULL;
318 pt->flgs = 0;
319 pt->chdname = chdn;
320 if (pathead == NULL) {
321 pattail = pathead = pt;
322 return(0);
323 }
324 pattail->fow = pt;
325 pattail = pt;
326 return(0);
327 }
328
329 /*
330 * pat_chk()
331 * complain if any the user supplied pattern did not result in a match to
332 * a selected archive member.
333 */
334
335 void
336 pat_chk(void)
337 {
338 PATTERN *pt;
339 int wban = 0;
340
341 /*
342 * walk down the list checking the flags to make sure MTCH was set,
343 * if not complain
344 */
345 for (pt = pathead; pt != NULL; pt = pt->fow) {
346 if (pt->flgs & MTCH)
347 continue;
348 if (!wban) {
349 tty_warn(1, "WARNING! These patterns were not matched:");
350 ++wban;
351 }
352 (void)fprintf(stderr, "%s\n", pt->pstr);
353 }
354 }
355
356 /*
357 * pat_sel()
358 * the archive member which matches a pattern was selected. Mark the
359 * pattern as having selected an archive member. arcn->pat points at the
360 * pattern that was matched. arcn->pat is set in pat_match()
361 *
362 * NOTE: When the -c option is used, we are called when there was no match
363 * by pat_match() (that means we did match before the inverted sense of
364 * the logic). Now this seems really strange at first, but with -c we
365 * need to keep track of those patterns that cause a archive member to NOT
366 * be selected (it found an archive member with a specified pattern)
367 * Return:
368 * 0 if the pattern pointed at by arcn->pat was tagged as creating a
369 * match, -1 otherwise.
370 */
371
372 int
373 pat_sel(ARCHD *arcn)
374 {
375 PATTERN *pt;
376 PATTERN **ppt;
377 int len;
378
379 /*
380 * if no patterns just return
381 */
382 if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
383 return(0);
384
385 /*
386 * when we are NOT limited to a single match per pattern mark the
387 * pattern and return
388 */
389 if (!nflag) {
390 pt->flgs |= MTCH;
391 return(0);
392 }
393
394 /*
395 * we reach this point only when we allow a single selected match per
396 * pattern, if the pattern matches a directory and we do not have -d
397 * (dflag) we are done with this pattern. We may also be handed a file
398 * in the subtree of a directory. in that case when we are operating
399 * with -d, this pattern was already selected and we are done
400 */
401 if (pt->flgs & DIR_MTCH)
402 return(0);
403
404 if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
405 /*
406 * ok we matched a directory and we are allowing
407 * subtree matches but because of the -n only its children will
408 * match. This is tagged as a DIR_MTCH type.
409 * WATCH IT, the code assumes that pt->pend points
410 * into arcn->name and arcn->name has not been modified.
411 * If not we will have a big mess. Yup this is another kludge
412 */
413
414 /*
415 * if this was a prefix match, remove trailing part of path
416 * so we can copy it. Future matches will be exact prefix match
417 */
418 if (pt->pend != NULL)
419 *pt->pend = '\0';
420
421 if ((pt->pstr = strdup(arcn->name)) == NULL) {
422 tty_warn(1, "Pattern select out of memory");
423 if (pt->pend != NULL)
424 *pt->pend = '/';
425 pt->pend = NULL;
426 return(-1);
427 }
428
429 /*
430 * put the trailing / back in the source string
431 */
432 if (pt->pend != NULL) {
433 *pt->pend = '/';
434 pt->pend = NULL;
435 }
436 pt->plen = strlen(pt->pstr);
437
438 /*
439 * strip off any trailing /, this should really never happen
440 */
441 len = pt->plen - 1;
442 if (*(pt->pstr + len) == '/') {
443 *(pt->pstr + len) = '\0';
444 pt->plen = len;
445 }
446 pt->flgs = DIR_MTCH | MTCH;
447 arcn->pat = pt;
448 return(0);
449 }
450
451 /*
452 * we are then done with this pattern, so we delete it from the list
453 * because it can never be used for another match.
454 * Seems kind of strange to do for a -c, but the pax spec is really
455 * vague on the interaction of -c, -n, and -d. We assume that when -c
456 * and the pattern rejects a member (i.e. it matched it) it is done.
457 * In effect we place the order of the flags as having -c last.
458 */
459 pt = pathead;
460 ppt = &pathead;
461 while ((pt != NULL) && (pt != arcn->pat)) {
462 ppt = &(pt->fow);
463 pt = pt->fow;
464 }
465
466 if (pt == NULL) {
467 /*
468 * should never happen....
469 */
470 tty_warn(1, "Pattern list inconsistant");
471 return(-1);
472 }
473 *ppt = pt->fow;
474 (void)free((char *)pt);
475 arcn->pat = NULL;
476 return(0);
477 }
478
479 /*
480 * pat_match()
481 * see if this archive member matches any supplied pattern, if a match
482 * is found, arcn->pat is set to point at the potential pattern. Later if
483 * this archive member is "selected" we process and mark the pattern as
484 * one which matched a selected archive member (see pat_sel())
485 * Return:
486 * 0 if this archive member should be processed, 1 if it should be
487 * skipped and -1 if we are done with all patterns (and pax should quit
488 * looking for more members)
489 */
490
491 int
492 pat_match(ARCHD *arcn)
493 {
494 PATTERN *pt;
495
496 arcn->pat = NULL;
497
498 /*
499 * if there are no more patterns and we have -n (and not -c) we are
500 * done. otherwise with no patterns to match, matches all
501 */
502 if (pathead == NULL) {
503 if (nflag && !cflag)
504 return(-1);
505 return(0);
506 }
507
508 /*
509 * have to search down the list one at a time looking for a match.
510 */
511 pt = pathead;
512 while (pt != NULL) {
513 /*
514 * check for a file name match unless we have DIR_MTCH set in
515 * this pattern then we want a prefix match
516 */
517 if (pt->flgs & DIR_MTCH) {
518 /*
519 * this pattern was matched before to a directory
520 * as we must have -n set for this (but not -d). We can
521 * only match CHILDREN of that directory so we must use
522 * an exact prefix match (no wildcards).
523 */
524 if ((arcn->name[pt->plen] == '/') &&
525 (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
526 break;
527 } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
528 break;
529 pt = pt->fow;
530 }
531
532 /*
533 * return the result, remember that cflag (-c) inverts the sense of a
534 * match
535 */
536 if (pt == NULL)
537 return(cflag ? 0 : 1);
538
539 /*
540 * we had a match, now when we invert the sense (-c) we reject this
541 * member. However we have to tag the pattern a being successful, (in a
542 * match, not in selecting a archive member) so we call pat_sel() here.
543 */
544 arcn->pat = pt;
545 if (!cflag)
546 return(0);
547
548 if (pat_sel(arcn) < 0)
549 return(-1);
550 arcn->pat = NULL;
551 return(1);
552 }
553
554 /*
555 * fn_match()
556 * Return:
557 * 0 if this archive member should be processed, 1 if it should be
558 * skipped and -1 if we are done with all patterns (and pax should quit
559 * looking for more members)
560 * Note: *pend may be changed to show where the prefix ends.
561 */
562
563 static int
564 fn_match(char *pattern, char *string, char **pend)
565 {
566 char c;
567 char test;
568
569 *pend = NULL;
570 for (;;) {
571 switch (c = *pattern++) {
572 case '\0':
573 /*
574 * Ok we found an exact match
575 */
576 if (*string == '\0')
577 return(0);
578
579 /*
580 * Check if it is a prefix match
581 */
582 if ((dflag == 1) || (*string != '/'))
583 return(-1);
584
585 /*
586 * It is a prefix match, remember where the trailing
587 * / is located
588 */
589 *pend = string;
590 return(0);
591 case '?':
592 if ((test = *string++) == '\0')
593 return (-1);
594 break;
595 case '*':
596 c = *pattern;
597 /*
598 * Collapse multiple *'s.
599 */
600 while (c == '*')
601 c = *++pattern;
602
603 /*
604 * Optimized hack for pattern with a * at the end
605 */
606 if (c == '\0')
607 return (0);
608
609 /*
610 * General case, use recursion.
611 */
612 while ((test = *string) != '\0') {
613 if (!fn_match(pattern, string, pend))
614 return (0);
615 ++string;
616 }
617 return (-1);
618 case '[':
619 /*
620 * range match
621 */
622 if (((test = *string++) == '\0') ||
623 ((pattern = range_match(pattern, test)) == NULL))
624 return (-1);
625 break;
626 case '\\':
627 default:
628 if (c != *string++)
629 return (-1);
630 break;
631 }
632 }
633 /* NOTREACHED */
634 }
635
636 static char *
637 range_match(char *pattern, int test)
638 {
639 char c;
640 char c2;
641 int negate;
642 int ok = 0;
643
644 if ((negate = (*pattern == '!')) != 0)
645 ++pattern;
646
647 while ((c = *pattern++) != ']') {
648 /*
649 * Illegal pattern
650 */
651 if (c == '\0')
652 return (NULL);
653
654 if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
655 (c2 != ']')) {
656 if ((c <= test) && (test <= c2))
657 ok = 1;
658 pattern += 2;
659 } else if (c == test)
660 ok = 1;
661 }
662 return (ok == negate ? NULL : pattern);
663 }
664
665 /*
666 * mod_name()
667 * modify a selected file name. first attempt to apply replacement string
668 * expressions, then apply interactive file rename. We apply replacement
669 * string expressions to both filenames and file links (if we didn't the
670 * links would point to the wrong place, and we could never be able to
671 * move an archive that has a file link in it). When we rename files
672 * interactively, we store that mapping (old name to user input name) so
673 * if we spot any file links to the old file name in the future, we will
674 * know exactly how to fix the file link.
675 * Return:
676 * 0 continue to process file, 1 skip this file, -1 pax is finished
677 */
678
679 int
680 mod_name(ARCHD *arcn)
681 {
682 int res = 0;
683
684 /*
685 * Strip off leading '/' if appropriate.
686 * Currently, this option is only set for the tar format.
687 */
688 if (rmleadslash && arcn->name[0] == '/') {
689 if (arcn->name[1] == '\0') {
690 arcn->name[0] = '.';
691 } else {
692 (void)memmove(arcn->name, &arcn->name[1],
693 strlen(arcn->name));
694 arcn->nlen--;
695 }
696 if (rmleadslash < 2) {
697 rmleadslash = 2;
698 tty_warn(0, "Removing leading / from absolute path names in the archive");
699 }
700 }
701 if (rmleadslash && arcn->ln_name[0] == '/' &&
702 (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) {
703 if (arcn->ln_name[1] == '\0') {
704 arcn->ln_name[0] = '.';
705 } else {
706 (void)memmove(arcn->ln_name, &arcn->ln_name[1],
707 strlen(arcn->ln_name));
708 arcn->ln_nlen--;
709 }
710 if (rmleadslash < 2) {
711 rmleadslash = 2;
712 tty_warn(0, "Removing leading / from absolute path names in the archive");
713 }
714 }
715
716 if (secure) {
717 if (checkdotdot(arcn->name)) {
718 tty_warn(0, "Ignoring file containing `..' (%s)",
719 arcn->name);
720 return 1;
721 }
722 #ifdef notdef
723 if (checkdotdot(arcn->ln_name)) {
724 tty_warn(0, "Ignoring link containing `..' (%s)",
725 arcn->ln_name);
726 return 1;
727 }
728 #endif
729 }
730
731 /*
732 * IMPORTANT: We have a problem. what do we do with symlinks?
733 * Modifying a hard link name makes sense, as we know the file it
734 * points at should have been seen already in the archive (and if it
735 * wasn't seen because of a read error or a bad archive, we lose
736 * anyway). But there are no such requirements for symlinks. On one
737 * hand the symlink that refers to a file in the archive will have to
738 * be modified to so it will still work at its new location in the
739 * file system. On the other hand a symlink that points elsewhere (and
740 * should continue to do so) should not be modified. There is clearly
741 * no perfect solution here. So we handle them like hardlinks. Clearly
742 * a replacement made by the interactive rename mapping is very likely
743 * to be correct since it applies to a single file and is an exact
744 * match. The regular expression replacements are a little harder to
745 * justify though. We claim that the symlink name is only likely
746 * to be replaced when it points within the file tree being moved and
747 * in that case it should be modified. what we really need to do is to
748 * call an oracle here. :)
749 */
750 if (rephead != NULL) {
751 /*
752 * we have replacement strings, modify the name and the link
753 * name if any.
754 */
755 if ((res = rep_name(arcn->name, sizeof(arcn->name),
756 &(arcn->nlen), 1)) != 0)
757 return(res);
758
759 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
760 (arcn->type == PAX_HRG)) &&
761 ((res = rep_name(arcn->ln_name, sizeof(arcn->ln_name),
762 &(arcn->ln_nlen), 0)) != 0))
763 return(res);
764 }
765
766 if (iflag) {
767 /*
768 * perform interactive file rename, then map the link if any
769 */
770 if ((res = tty_rename(arcn)) != 0)
771 return(res);
772 if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
773 (arcn->type == PAX_HRG))
774 sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name));
775 }
776 return(res);
777 }
778
779 /*
780 * tty_rename()
781 * Prompt the user for a replacement file name. A "." keeps the old name,
782 * a empty line skips the file, and an EOF on reading the tty, will cause
783 * pax to stop processing and exit. Otherwise the file name input, replaces
784 * the old one.
785 * Return:
786 * 0 process this file, 1 skip this file, -1 we need to exit pax
787 */
788
789 static int
790 tty_rename(ARCHD *arcn)
791 {
792 char tmpname[PAXPATHLEN+2];
793 int res;
794
795 /*
796 * prompt user for the replacement name for a file, keep trying until
797 * we get some reasonable input. Archives may have more than one file
798 * on them with the same name (from updates etc). We print verbose info
799 * on the file so the user knows what is up.
800 */
801 tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
802
803 for (;;) {
804 ls_tty(arcn);
805 tty_prnt("Input new name, or a \".\" to keep the old name, ");
806 tty_prnt("or a \"return\" to skip this file.\n");
807 tty_prnt("Input > ");
808 if (tty_read(tmpname, sizeof(tmpname)) < 0)
809 return(-1);
810 if (strcmp(tmpname, "..") == 0) {
811 tty_prnt("Try again, illegal file name: ..\n");
812 continue;
813 }
814 if (strlen(tmpname) > PAXPATHLEN) {
815 tty_prnt("Try again, file name too long\n");
816 continue;
817 }
818 break;
819 }
820
821 /*
822 * empty file name, skips this file. a "." leaves it alone
823 */
824 if (tmpname[0] == '\0') {
825 tty_prnt("Skipping file.\n");
826 return(1);
827 }
828 if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
829 tty_prnt("Processing continues, name unchanged.\n");
830 return(0);
831 }
832
833 /*
834 * ok the name changed. We may run into links that point at this
835 * file later. we have to remember where the user sent the file
836 * in order to repair any links.
837 */
838 tty_prnt("Processing continues, name changed to: %s\n", tmpname);
839 res = add_name(arcn->name, arcn->nlen, tmpname);
840 arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name));
841 if (res < 0)
842 return(-1);
843 return(0);
844 }
845
846 /*
847 * set_dest()
848 * fix up the file name and the link name (if any) so this file will land
849 * in the destination directory (used during copy() -rw).
850 * Return:
851 * 0 if ok, -1 if failure (name too long)
852 */
853
854 int
855 set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
856 {
857 if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
858 return(-1);
859
860 /*
861 * It is really hard to deal with symlinks here, we cannot be sure
862 * if the name they point was moved (or will be moved). It is best to
863 * leave them alone.
864 */
865 if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
866 return(0);
867
868 if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
869 return(-1);
870 return(0);
871 }
872
873 /*
874 * fix_path
875 * concatenate dir_name and or_name and store the result in or_name (if
876 * it fits). This is one ugly function.
877 * Return:
878 * 0 if ok, -1 if the final name is too long
879 */
880
881 static int
882 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
883 {
884 char *src;
885 char *dest;
886 char *start;
887 int len;
888
889 /*
890 * we shift the or_name to the right enough to tack in the dir_name
891 * at the front. We make sure we have enough space for it all before
892 * we start. since dest always ends in a slash, we skip of or_name
893 * if it also starts with one.
894 */
895 start = or_name;
896 src = start + *or_len;
897 dest = src + dir_len;
898 if (*start == '/') {
899 ++start;
900 --dest;
901 }
902 if ((len = dest - or_name) > PAXPATHLEN) {
903 tty_warn(1, "File name %s/%s, too long", dir_name, start);
904 return(-1);
905 }
906 *or_len = len;
907
908 /*
909 * enough space, shift
910 */
911 while (src >= start)
912 *dest-- = *src--;
913 src = dir_name + dir_len - 1;
914
915 /*
916 * splice in the destination directory name
917 */
918 while (src >= dir_name)
919 *dest-- = *src--;
920
921 *(or_name + len) = '\0';
922 return(0);
923 }
924
925 /*
926 * rep_name()
927 * walk down the list of replacement strings applying each one in order.
928 * when we find one with a successful substitution, we modify the name
929 * as specified. if required, we print the results. if the resulting name
930 * is empty, we will skip this archive member. We use the regexp(3)
931 * routines (regexp() ought to win a prize as having the most cryptic
932 * library function manual page).
933 * --Parameters--
934 * name is the file name we are going to apply the regular expressions to
935 * (and may be modified)
936 * namelen the size of the name buffer.
937 * nlen is the length of this name (and is modified to hold the length of
938 * the final string).
939 * prnt is a flag that says whether to print the final result.
940 * Return:
941 * 0 if substitution was successful, 1 if we are to skip the file (the name
942 * ended up empty)
943 */
944
945 static int
946 rep_name(char *name, size_t namelen, int *nlen, int prnt)
947 {
948 REPLACE *pt;
949 char *inpt;
950 char *outpt;
951 char *endpt;
952 char *rpt;
953 int found = 0;
954 int res;
955 #ifndef NET2_REGEX
956 regmatch_t pm[MAXSUBEXP];
957 #endif
958 char nname[PAXPATHLEN+1]; /* final result of all replacements */
959 char buf1[PAXPATHLEN+1]; /* where we work on the name */
960
961 /*
962 * copy the name into buf1, where we will work on it. We need to keep
963 * the orig string around so we can print out the result of the final
964 * replacement. We build up the final result in nname. inpt points at
965 * the string we apply the regular expression to. prnt is used to
966 * suppress printing when we handle replacements on the link field
967 * (the user already saw that substitution go by)
968 */
969 pt = rephead;
970 (void)strcpy(buf1, name);
971 inpt = buf1;
972 outpt = nname;
973 endpt = outpt + PAXPATHLEN;
974
975 /*
976 * try each replacement string in order
977 */
978 while (pt != NULL) {
979 do {
980 /*
981 * check for a successful substitution, if not go to
982 * the next pattern, or cleanup if we were global
983 */
984 #ifdef NET2_REGEX
985 if (regexec(pt->rcmp, inpt) == 0)
986 #else
987 if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
988 #endif
989 break;
990
991 /*
992 * ok we found one. We have three parts, the prefix
993 * which did not match, the section that did and the
994 * tail (that also did not match). Copy the prefix to
995 * the final output buffer (watching to make sure we
996 * do not create a string too long).
997 */
998 found = 1;
999 #ifdef NET2_REGEX
1000 rpt = pt->rcmp->startp[0];
1001 #else
1002 rpt = inpt + pm[0].rm_so;
1003 #endif
1004
1005 while ((inpt < rpt) && (outpt < endpt))
1006 *outpt++ = *inpt++;
1007 if (outpt == endpt)
1008 break;
1009
1010 /*
1011 * for the second part (which matched the regular
1012 * expression) apply the substitution using the
1013 * replacement string and place it the prefix in the
1014 * final output. If we have problems, skip it.
1015 */
1016 if ((res =
1017 #ifdef NET2_REGEX
1018 resub(pt->rcmp,pt->nstr,outpt,endpt)
1019 #else
1020 resub(&(pt->rcmp),pm,pt->nstr,inpt, outpt,endpt)
1021 #endif
1022 ) < 0) {
1023 if (prnt)
1024 tty_warn(1, "Replacement name error %s",
1025 name);
1026 return(1);
1027 }
1028 outpt += res;
1029
1030 /*
1031 * we set up to look again starting at the first
1032 * character in the tail (of the input string right
1033 * after the last character matched by the regular
1034 * expression (inpt always points at the first char in
1035 * the string to process). If we are not doing a global
1036 * substitution, we will use inpt to copy the tail to
1037 * the final result. Make sure we do not overrun the
1038 * output buffer
1039 */
1040 #ifdef NET2_REGEX
1041 inpt = pt->rcmp->endp[0];
1042 #else
1043 inpt += pm[0].rm_eo - pm[0].rm_so;
1044 #endif
1045
1046 if ((outpt == endpt) || (*inpt == '\0'))
1047 break;
1048
1049 /*
1050 * if the user wants global we keep trying to
1051 * substitute until it fails, then we are done.
1052 */
1053 } while (pt->flgs & GLOB);
1054
1055 if (found)
1056 break;
1057
1058 /*
1059 * a successful substitution did NOT occur, try the next one
1060 */
1061 pt = pt->fow;
1062 }
1063
1064 if (found) {
1065 /*
1066 * we had a substitution, copy the last tail piece (if there is
1067 * room) to the final result
1068 */
1069 while ((outpt < endpt) && (*inpt != '\0'))
1070 *outpt++ = *inpt++;
1071
1072 *outpt = '\0';
1073 if ((outpt == endpt) && (*inpt != '\0')) {
1074 if (prnt)
1075 tty_warn(1,"Replacement name too long %s >> %s",
1076 name, nname);
1077 return(1);
1078 }
1079
1080 /*
1081 * inform the user of the result if wanted
1082 */
1083 if (prnt && (pt->flgs & PRNT)) {
1084 if (*nname == '\0')
1085 (void)fprintf(stderr,"%s >> <empty string>\n",
1086 name);
1087 else
1088 (void)fprintf(stderr,"%s >> %s\n", name, nname);
1089 }
1090
1091 /*
1092 * if empty inform the caller this file is to be skipped
1093 * otherwise copy the new name over the orig name and return
1094 */
1095 if (*nname == '\0')
1096 return(1);
1097 *nlen = strlcpy(name, nname, namelen);
1098 }
1099 return(0);
1100 }
1101
1102
1103 /*
1104 * checkdotdot()
1105 * Return true if a component of the name contains a reference to ".."
1106 */
1107 static int
1108 checkdotdot(const char *name)
1109 {
1110 const char *p;
1111 /* 1. "..{[/],}" */
1112 if (name[0] == '.' && name[1] == '.' &&
1113 (name[2] == '/' || name[2] == '\0'))
1114 return 1;
1115
1116 /* 2. "*[/]..[/]*" */
1117 if (strstr(name, "/../") != NULL)
1118 return 1;
1119
1120 /* 3. "*[/].." */
1121 for (p = name; *p; p++)
1122 continue;
1123 if (p - name < 3)
1124 return 0;
1125 if (p[-1] == '.' && p[-2] == '.' && p[-3] == '/')
1126 return 1;
1127
1128 return 0;
1129 }
1130
1131 #ifdef NET2_REGEX
1132 /*
1133 * resub()
1134 * apply the replacement to the matched expression. expand out the old
1135 * style ed(1) subexpression expansion.
1136 * Return:
1137 * -1 if error, or the number of characters added to the destination.
1138 */
1139
1140 static int
1141 resub(regexp *prog, char *src, char *dest, char *destend)
1142 {
1143 char *spt;
1144 char *dpt;
1145 char c;
1146 int no;
1147 int len;
1148
1149 spt = src;
1150 dpt = dest;
1151 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1152 if (c == '&')
1153 no = 0;
1154 else if ((c == '\\') && (*spt >= '0') && (*spt <= '9'))
1155 no = *spt++ - '0';
1156 else {
1157 if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1158 c = *spt++;
1159 *dpt++ = c;
1160 continue;
1161 }
1162 if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) ||
1163 ((len = prog->endp[no] - prog->startp[no]) <= 0))
1164 continue;
1165
1166 /*
1167 * copy the subexpression to the destination.
1168 * fail if we run out of space or the match string is damaged
1169 */
1170 if (len > (destend - dpt))
1171 return (-1);
1172 strncpy(dpt, prog->startp[no], len);
1173 dpt += len;
1174 }
1175 return(dpt - dest);
1176 }
1177
1178 #else
1179
1180 /*
1181 * resub()
1182 * apply the replacement to the matched expression. expand out the old
1183 * style ed(1) subexpression expansion.
1184 * Return:
1185 * -1 if error, or the number of characters added to the destination.
1186 */
1187
1188 static int
1189 resub(regex_t *rp, regmatch_t *pm, char *src, char *txt, char *dest,
1190 char *destend)
1191 {
1192 char *spt;
1193 char *dpt;
1194 char c;
1195 regmatch_t *pmpt;
1196 int len;
1197 int subexcnt;
1198
1199 spt = src;
1200 dpt = dest;
1201 subexcnt = rp->re_nsub;
1202 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1203 /*
1204 * see if we just have an ordinary replacement character
1205 * or we refer to a subexpression.
1206 */
1207 if (c == '&') {
1208 pmpt = pm;
1209 } else if ((c == '\\') && (*spt >= '1') && (*spt <= '9')) {
1210 /*
1211 * make sure there is a subexpression as specified
1212 */
1213 if ((len = *spt++ - '0') > subexcnt)
1214 return(-1);
1215 pmpt = pm + len;
1216 } else {
1217 /*
1218 * Ordinary character, just copy it
1219 */
1220 if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1221 c = *spt++;
1222 *dpt++ = c;
1223 continue;
1224 }
1225
1226 /*
1227 * continue if the subexpression is bogus
1228 */
1229 if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1230 ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1231 continue;
1232
1233 /*
1234 * copy the subexpression to the destination.
1235 * fail if we run out of space or the match string is damaged
1236 */
1237 if (len > (destend - dpt))
1238 return -1;
1239 strncpy(dpt, txt + pmpt->rm_so, len);
1240 dpt += len;
1241 }
1242 return(dpt - dest);
1243 }
1244 #endif
1245