Home | History | Annotate | Line # | Download | only in pax
pat_rep.c revision 1.29
      1 /*	$NetBSD: pat_rep.c,v 1.29 2009/04/07 19:52:35 perry Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1992 Keith Muller.
      5  * Copyright (c) 1992, 1993
      6  *	The Regents of the University of California.  All rights reserved.
      7  *
      8  * This code is derived from software contributed to Berkeley by
      9  * Keith Muller of the University of California, San Diego.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  */
     35 
     36 #if HAVE_NBTOOL_CONFIG_H
     37 #include "nbtool_config.h"
     38 #endif
     39 
     40 #include <sys/cdefs.h>
     41 #if !defined(lint)
     42 #if 0
     43 static char sccsid[] = "@(#)pat_rep.c	8.2 (Berkeley) 4/18/94";
     44 #else
     45 __RCSID("$NetBSD: pat_rep.c,v 1.29 2009/04/07 19:52:35 perry Exp $");
     46 #endif
     47 #endif /* not lint */
     48 
     49 #include <sys/types.h>
     50 #include <sys/time.h>
     51 #include <sys/stat.h>
     52 #include <sys/param.h>
     53 #include <stdio.h>
     54 #include <ctype.h>
     55 #include <string.h>
     56 #include <unistd.h>
     57 #include <stdlib.h>
     58 #include "pax.h"
     59 #include "pat_rep.h"
     60 #include "extern.h"
     61 
     62 /*
     63  * routines to handle pattern matching, name modification (regular expression
     64  * substitution and interactive renames), and destination name modification for
     65  * copy (-rw). Both file name and link names are adjusted as required in these
     66  * routines.
     67  */
     68 
     69 #define MAXSUBEXP	10		/* max subexpressions, DO NOT CHANGE */
     70 static PATTERN *pathead = NULL;		/* file pattern match list head */
     71 static PATTERN *pattail = NULL;		/* file pattern match list tail */
     72 static REPLACE *rephead = NULL;		/* replacement string list head */
     73 static REPLACE *reptail = NULL;		/* replacement string list tail */
     74 
     75 static int rep_name(char *, size_t, int *, int);
     76 static int tty_rename(ARCHD *);
     77 static int fix_path(char *, int *, char *, int);
     78 static int fn_match(char *, char *, char **, int);
     79 static char * range_match(char *, int);
     80 static int checkdotdot(const char *);
     81 static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *);
     82 
     83 /*
     84  * rep_add()
     85  *	parses the -s replacement string; compiles the regular expression
     86  *	and stores the compiled value and its replacement string together in
     87  *	replacement string list. Input to this function is of the form:
     88  *		/old/new/pg
     89  *	The first char in the string specifies the delimiter used by this
     90  *	replacement string. "Old" is a regular expression in "ed" format which
     91  *	is compiled by regcomp() and is applied to filenames. "new" is the
     92  *	substitution string; p and g are options flags for printing and global
     93  *	replacement (over the single filename)
     94  * Return:
     95  *	0 if a proper replacement string and regular expression was added to
     96  *	the list of replacement patterns; -1 otherwise.
     97  */
     98 
     99 int
    100 rep_add(char *str)
    101 {
    102 	char *pt1;
    103 	char *pt2;
    104 	REPLACE *rep;
    105 	int res;
    106 	char rebuf[BUFSIZ];
    107 
    108 	/*
    109 	 * throw out the bad parameters
    110 	 */
    111 	if ((str == NULL) || (*str == '\0')) {
    112 		tty_warn(1, "Empty replacement string");
    113 		return -1;
    114 	}
    115 
    116 	/*
    117 	 * first character in the string specifies what the delimiter is for
    118 	 * this expression.
    119 	 */
    120 	for (pt1 = str+1; *pt1; pt1++) {
    121 		if (*pt1 == '\\') {
    122 			pt1++;
    123 			continue;
    124 		}
    125 		if (*pt1 == *str)
    126 			break;
    127 	}
    128 	if (*pt1 == 0) {
    129 		tty_warn(1, "Invalid replacement string %s", str);
    130 		return -1;
    131 	}
    132 
    133 	/*
    134 	 * allocate space for the node that handles this replacement pattern
    135 	 * and split out the regular expression and try to compile it
    136 	 */
    137 	if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
    138 		tty_warn(1, "Unable to allocate memory for replacement string");
    139 		return -1;
    140 	}
    141 
    142 	*pt1 = '\0';
    143 	if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
    144 		regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
    145 		tty_warn(1, "%s while compiling regular expression %s", rebuf,
    146 		    str);
    147 		(void)free((char *)rep);
    148 		return -1;
    149 	}
    150 
    151 	/*
    152 	 * put the delimiter back in case we need an error message and
    153 	 * locate the delimiter at the end of the replacement string
    154 	 * we then point the node at the new substitution string
    155 	 */
    156 	*pt1++ = *str;
    157 	for (pt2 = pt1; *pt2; pt2++) {
    158 		if (*pt2 == '\\') {
    159 			pt2++;
    160 			continue;
    161 		}
    162 		if (*pt2 == *str)
    163 			break;
    164 	}
    165 	if (*pt2 == 0) {
    166 		regfree(&(rep->rcmp));
    167 		(void)free((char *)rep);
    168 		tty_warn(1, "Invalid replacement string %s", str);
    169 		return -1;
    170 	}
    171 
    172 	*pt2 = '\0';
    173 
    174 	/* Make sure to dup replacement, who knows where it came from! */
    175 	if ((rep->nstr = strdup(pt1)) == NULL) {
    176 		regfree(&(rep->rcmp));
    177 		(void)free((char *)rep);
    178 		tty_warn(1, "Unable to allocate memory for replacement string");
    179 		return -1;
    180 	}
    181 
    182 	pt1 = pt2++;
    183 	rep->flgs = 0;
    184 
    185 	/*
    186 	 * set the options if any
    187 	 */
    188 	while (*pt2 != '\0') {
    189 		switch(*pt2) {
    190 		case 'g':
    191 		case 'G':
    192 			rep->flgs  |= GLOB;
    193 			break;
    194 		case 'p':
    195 		case 'P':
    196 			rep->flgs  |= PRNT;
    197 			break;
    198 		case 's':
    199 		case 'S':
    200 			rep->flgs  |= SYML;
    201 			break;
    202 		default:
    203 			regfree(&(rep->rcmp));
    204 			(void)free((char *)rep);
    205 			*pt1 = *str;
    206 			tty_warn(1, "Invalid replacement string option %s",
    207 			    str);
    208 			return -1;
    209 		}
    210 		++pt2;
    211 	}
    212 
    213 	/*
    214 	 * all done, link it in at the end
    215 	 */
    216 	rep->fow = NULL;
    217 	if (rephead == NULL) {
    218 		reptail = rephead = rep;
    219 		return 0;
    220 	}
    221 	reptail->fow = rep;
    222 	reptail = rep;
    223 	return 0;
    224 }
    225 
    226 /*
    227  * pat_add()
    228  *	add a pattern match to the pattern match list. Pattern matches are used
    229  *	to select which archive members are extracted. (They appear as
    230  *	arguments to pax in the list and read modes). If no patterns are
    231  *	supplied to pax, all members in the archive will be selected (and the
    232  *	pattern match list is empty).
    233  *
    234  * Return:
    235  *	0 if the pattern was added to the list, -1 otherwise
    236  */
    237 
    238 int
    239 pat_add(char *str, char *chdn, int flags)
    240 {
    241 	PATTERN *pt;
    242 
    243 	/*
    244 	 * throw out the junk
    245 	 */
    246 	if ((str == NULL) || (*str == '\0')) {
    247 		tty_warn(1, "Empty pattern string");
    248 		return -1;
    249 	}
    250 
    251 	/*
    252 	 * allocate space for the pattern and store the pattern. the pattern is
    253 	 * part of argv so do not bother to copy it, just point at it. Add the
    254 	 * node to the end of the pattern list
    255 	 */
    256 	if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
    257 		tty_warn(1, "Unable to allocate memory for pattern string");
    258 		return -1;
    259 	}
    260 
    261 	pt->pstr = str;
    262 	pt->pend = NULL;
    263 	pt->plen = strlen(str);
    264 	pt->fow = NULL;
    265 	pt->flgs = flags;
    266 	pt->chdname = chdn;
    267 	if (pathead == NULL) {
    268 		pattail = pathead = pt;
    269 		return 0;
    270 	}
    271 	pattail->fow = pt;
    272 	pattail = pt;
    273 	return 0;
    274 }
    275 
    276 /*
    277  * pat_chk()
    278  *	complain if any the user supplied pattern did not result in a match to
    279  *	a selected archive member.
    280  */
    281 
    282 void
    283 pat_chk(void)
    284 {
    285 	PATTERN *pt;
    286 	int wban = 0;
    287 
    288 	/*
    289 	 * walk down the list checking the flags to make sure MTCH was set,
    290 	 * if not complain
    291 	 */
    292 	for (pt = pathead; pt != NULL; pt = pt->fow) {
    293 		if (pt->flgs & MTCH)
    294 			continue;
    295 		if (!wban) {
    296 			tty_warn(1, "WARNING! These patterns were not matched:");
    297 			++wban;
    298 		}
    299 		(void)fprintf(stderr, "%s\n", pt->pstr);
    300 	}
    301 }
    302 
    303 /*
    304  * pat_sel()
    305  *	the archive member which matches a pattern was selected. Mark the
    306  *	pattern as having selected an archive member. arcn->pat points at the
    307  *	pattern that was matched. arcn->pat is set in pat_match()
    308  *
    309  *	NOTE: When the -c option is used, we are called when there was no match
    310  *	by pat_match() (that means we did match before the inverted sense of
    311  *	the logic). Now this seems really strange at first, but with -c we
    312  *	need to keep track of those patterns that cause an archive member to
    313  *	NOT be selected (it found an archive member with a specified pattern)
    314  * Return:
    315  *	0 if the pattern pointed at by arcn->pat was tagged as creating a
    316  *	match, -1 otherwise.
    317  */
    318 
    319 int
    320 pat_sel(ARCHD *arcn)
    321 {
    322 	PATTERN *pt;
    323 	PATTERN **ppt;
    324 	int len;
    325 
    326 	/*
    327 	 * if no patterns just return
    328 	 */
    329 	if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
    330 		return 0;
    331 
    332 	/*
    333 	 * when we are NOT limited to a single match per pattern mark the
    334 	 * pattern and return
    335 	 */
    336 	if (!nflag) {
    337 		pt->flgs |= MTCH;
    338 		return 0;
    339 	}
    340 
    341 	/*
    342 	 * we reach this point only when we allow a single selected match per
    343 	 * pattern, if the pattern matches a directory and we do not have -d
    344 	 * (dflag) we are done with this pattern. We may also be handed a file
    345 	 * in the subtree of a directory. in that case when we are operating
    346 	 * with -d, this pattern was already selected and we are done
    347 	 */
    348 	if (pt->flgs & DIR_MTCH)
    349 		return 0;
    350 
    351 	if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
    352 		/*
    353 		 * ok we matched a directory and we are allowing
    354 		 * subtree matches but because of the -n only its children will
    355 		 * match. This is tagged as a DIR_MTCH type.
    356 		 * WATCH IT, the code assumes that pt->pend points
    357 		 * into arcn->name and arcn->name has not been modified.
    358 		 * If not we will have a big mess. Yup this is another kludge
    359 		 */
    360 
    361 		/*
    362 		 * if this was a prefix match, remove trailing part of path
    363 		 * so we can copy it. Future matches will be exact prefix match
    364 		 */
    365 		if (pt->pend != NULL)
    366 			*pt->pend = '\0';
    367 
    368 		if ((pt->pstr = strdup(arcn->name)) == NULL) {
    369 			tty_warn(1, "Pattern select out of memory");
    370 			if (pt->pend != NULL)
    371 				*pt->pend = '/';
    372 			pt->pend = NULL;
    373 			return -1;
    374 		}
    375 
    376 		/*
    377 		 * put the trailing / back in the source string
    378 		 */
    379 		if (pt->pend != NULL) {
    380 			*pt->pend = '/';
    381 			pt->pend = NULL;
    382 		}
    383 		pt->plen = strlen(pt->pstr);
    384 
    385 		/*
    386 		 * strip off any trailing /, this should really never happen
    387 		 */
    388 		len = pt->plen - 1;
    389 		if (*(pt->pstr + len) == '/') {
    390 			*(pt->pstr + len) = '\0';
    391 			pt->plen = len;
    392 		}
    393 		pt->flgs = DIR_MTCH | MTCH;
    394 		arcn->pat = pt;
    395 		return 0;
    396 	}
    397 
    398 	/*
    399 	 * we are then done with this pattern, so we delete it from the list
    400 	 * because it can never be used for another match.
    401 	 * Seems kind of strange to do for a -c, but the pax spec is really
    402 	 * vague on the interaction of -c, -n, and -d. We assume that when -c
    403 	 * and the pattern rejects a member (i.e. it matched it) it is done.
    404 	 * In effect we place the order of the flags as having -c last.
    405 	 */
    406 	pt = pathead;
    407 	ppt = &pathead;
    408 	while ((pt != NULL) && (pt != arcn->pat)) {
    409 		ppt = &(pt->fow);
    410 		pt = pt->fow;
    411 	}
    412 
    413 	if (pt == NULL) {
    414 		/*
    415 		 * should never happen....
    416 		 */
    417 		tty_warn(1, "Pattern list inconsistent");
    418 		return -1;
    419 	}
    420 	*ppt = pt->fow;
    421 	(void)free((char *)pt);
    422 	arcn->pat = NULL;
    423 	return 0;
    424 }
    425 
    426 /*
    427  * pat_match()
    428  *	see if this archive member matches any supplied pattern, if a match
    429  *	is found, arcn->pat is set to point at the potential pattern. Later if
    430  *	this archive member is "selected" we process and mark the pattern as
    431  *	one which matched a selected archive member (see pat_sel())
    432  * Return:
    433  *	0 if this archive member should be processed, 1 if it should be
    434  *	skipped and -1 if we are done with all patterns (and pax should quit
    435  *	looking for more members)
    436  */
    437 
    438 int
    439 pat_match(ARCHD *arcn)
    440 {
    441 	PATTERN *pt;
    442 
    443 	arcn->pat = NULL;
    444 
    445 	/*
    446 	 * if there are no more patterns and we have -n (and not -c) we are
    447 	 * done. otherwise with no patterns to match, matches all
    448 	 */
    449 	if (pathead == NULL) {
    450 		if (nflag && !cflag)
    451 			return -1;
    452 		return 0;
    453 	}
    454 
    455 	/*
    456 	 * have to search down the list one at a time looking for a match.
    457 	 */
    458 	pt = pathead;
    459 	while (pt != NULL) {
    460 		/*
    461 		 * check for a file name match unless we have DIR_MTCH set in
    462 		 * this pattern then we want a prefix match
    463 		 */
    464 		if (pt->flgs & DIR_MTCH) {
    465 			/*
    466 			 * this pattern was matched before to a directory
    467 			 * as we must have -n set for this (but not -d). We can
    468 			 * only match CHILDREN of that directory so we must use
    469 			 * an exact prefix match (no wildcards).
    470 			 */
    471 			if ((arcn->name[pt->plen] == '/') &&
    472 			    (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
    473 				break;
    474 		} else if (fn_match(pt->pstr, arcn->name, &pt->pend,
    475 		    pt->flgs & NOGLOB_MTCH) == 0)
    476 			break;
    477 		pt = pt->fow;
    478 	}
    479 
    480 	/*
    481 	 * return the result, remember that cflag (-c) inverts the sense of a
    482 	 * match
    483 	 */
    484 	if (pt == NULL)
    485 		return cflag ? 0 : 1;
    486 
    487 	/*
    488 	 * we had a match, now when we invert the sense (-c) we reject this
    489 	 * member. However we have to tag the pattern a being successful, (in a
    490 	 * match, not in selecting an archive member) so we call pat_sel()
    491 	 * here.
    492 	 */
    493 	arcn->pat = pt;
    494 	if (!cflag)
    495 		return 0;
    496 
    497 	if (pat_sel(arcn) < 0)
    498 		return -1;
    499 	arcn->pat = NULL;
    500 	return 1;
    501 }
    502 
    503 /*
    504  * fn_match()
    505  * Return:
    506  *	0 if this archive member should be processed, 1 if it should be
    507  *	skipped and -1 if we are done with all patterns (and pax should quit
    508  *	looking for more members)
    509  *	Note: *pend may be changed to show where the prefix ends.
    510  */
    511 
    512 static int
    513 fn_match(char *pattern, char *string, char **pend, int noglob)
    514 {
    515 	char c;
    516 	char test;
    517 
    518 	*pend = NULL;
    519 	for (;;) {
    520 		switch (c = *pattern++) {
    521 		case '\0':
    522 			/*
    523 			 * Ok we found an exact match
    524 			 */
    525 			if (*string == '\0')
    526 				return 0;
    527 
    528 			/*
    529 			 * Check if it is a prefix match
    530 			 */
    531 			if ((dflag == 1) || (*string != '/'))
    532 				return -1;
    533 
    534 			/*
    535 			 * It is a prefix match, remember where the trailing
    536 			 * / is located
    537 			 */
    538 			*pend = string;
    539 			return 0;
    540 		case '?':
    541 			if (noglob)
    542 				goto regular;
    543 			if ((test = *string++) == '\0')
    544 				return (-1);
    545 			break;
    546 		case '*':
    547 			if (noglob)
    548 				goto regular;
    549 			c = *pattern;
    550 			/*
    551 			 * Collapse multiple *'s.
    552 			 */
    553 			while (c == '*')
    554 				c = *++pattern;
    555 
    556 			/*
    557 			 * Optimized hack for pattern with a * at the end
    558 			 */
    559 			if (c == '\0')
    560 				return (0);
    561 
    562 			/*
    563 			 * General case, use recursion.
    564 			 */
    565 			while ((test = *string) != '\0') {
    566 				if (!fn_match(pattern, string, pend, noglob))
    567 					return (0);
    568 				++string;
    569 			}
    570 			return (-1);
    571 		case '[':
    572 			if (noglob)
    573 				goto regular;
    574 			/*
    575 			 * range match
    576 			 */
    577 			if (((test = *string++) == '\0') ||
    578 			    ((pattern = range_match(pattern, test)) == NULL))
    579 				return (-1);
    580 			break;
    581 		case '\\':
    582 		default:
    583 		regular:
    584 			if (c != *string++)
    585 				return (-1);
    586 			break;
    587 		}
    588 	}
    589 	/* NOTREACHED */
    590 }
    591 
    592 static char *
    593 range_match(char *pattern, int test)
    594 {
    595 	char c;
    596 	char c2;
    597 	int negate;
    598 	int ok = 0;
    599 
    600 	if ((negate = (*pattern == '!')) != 0)
    601 		++pattern;
    602 
    603 	while ((c = *pattern++) != ']') {
    604 		/*
    605 		 * Illegal pattern
    606 		 */
    607 		if (c == '\0')
    608 			return (NULL);
    609 
    610 		if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
    611 		    (c2 != ']')) {
    612 			if ((c <= test) && (test <= c2))
    613 				ok = 1;
    614 			pattern += 2;
    615 		} else if (c == test)
    616 			ok = 1;
    617 	}
    618 	return (ok == negate ? NULL : pattern);
    619 }
    620 
    621 /*
    622  * mod_name()
    623  *	modify a selected file name. first attempt to apply replacement string
    624  *	expressions, then apply interactive file rename. We apply replacement
    625  *	string expressions to both filenames and file links (if we didn't the
    626  *	links would point to the wrong place, and we could never be able to
    627  *	move an archive that has a file link in it). When we rename files
    628  *	interactively, we store that mapping (old name to user input name) so
    629  *	if we spot any file links to the old file name in the future, we will
    630  *	know exactly how to fix the file link.
    631  * Return:
    632  *	0 continue to  process file, 1 skip this file, -1 pax is finished
    633  */
    634 
    635 int
    636 mod_name(ARCHD *arcn, int flags)
    637 {
    638 	int res = 0;
    639 
    640 	if (secure) {
    641 		if (checkdotdot(arcn->name)) {
    642 			tty_warn(0, "Ignoring file containing `..' (%s)",
    643 				arcn->name);
    644 			return 1;
    645 		}
    646 #ifdef notdef
    647 		if (checkdotdot(arcn->ln_name)) {
    648 			tty_warn(0, "Ignoring link containing `..' (%s)",
    649 				arcn->ln_name);
    650 			return 1;
    651 		}
    652 #endif
    653 	}
    654 
    655 	/*
    656 	 * IMPORTANT: We have a problem. what do we do with symlinks?
    657 	 * Modifying a hard link name makes sense, as we know the file it
    658 	 * points at should have been seen already in the archive (and if it
    659 	 * wasn't seen because of a read error or a bad archive, we lose
    660 	 * anyway). But there are no such requirements for symlinks. On one
    661 	 * hand the symlink that refers to a file in the archive will have to
    662 	 * be modified to so it will still work at its new location in the
    663 	 * file system. On the other hand a symlink that points elsewhere (and
    664 	 * should continue to do so) should not be modified. There is clearly
    665 	 * no perfect solution here. So we handle them like hardlinks. Clearly
    666 	 * a replacement made by the interactive rename mapping is very likely
    667 	 * to be correct since it applies to a single file and is an exact
    668 	 * match. The regular expression replacements are a little harder to
    669 	 * justify though. We claim that the symlink name is only likely
    670 	 * to be replaced when it points within the file tree being moved and
    671 	 * in that case it should be modified. what we really need to do is to
    672 	 * call an oracle here. :)
    673 	 */
    674 	if (rephead != NULL) {
    675 		flags |= (flags & RENM) ? PRNT : 0;
    676 		/*
    677 		 * we have replacement strings, modify the name and the link
    678 		 * name if any.
    679 		 */
    680 		if ((res = rep_name(arcn->name, sizeof(arcn->name),
    681 			&(arcn->nlen), flags)) != 0)
    682 			return res;
    683 
    684 		if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
    685 		    (arcn->type == PAX_HRG)) &&
    686 		    ((res = rep_name(arcn->ln_name,
    687 		    sizeof(arcn->ln_name), &(arcn->ln_nlen),
    688 		    flags | (arcn->type == PAX_SLK ? SYML : 0))) != 0))
    689 			return res;
    690 	}
    691 
    692 	if (iflag) {
    693 		/*
    694 		 * perform interactive file rename, then map the link if any
    695 		 */
    696 		if ((res = tty_rename(arcn)) != 0)
    697 			return res;
    698 		if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
    699 		    (arcn->type == PAX_HRG))
    700 			sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name));
    701 	}
    702 
    703 	/*
    704 	 * Strip off leading '/' if appropriate.
    705 	 * Currently, this option is only set for the tar format.
    706 	 */
    707 	if (rmleadslash && arcn->name[0] == '/') {
    708 		if (arcn->name[1] == '\0') {
    709 			arcn->name[0] = '.';
    710 		} else {
    711 			(void)memmove(arcn->name, &arcn->name[1],
    712 			    strlen(arcn->name));
    713 			arcn->nlen--;
    714 		}
    715 		if (rmleadslash < 2) {
    716 			rmleadslash = 2;
    717 			tty_warn(0, "Removing leading / from absolute path names in the archive");
    718 		}
    719 	}
    720 	if (rmleadslash && arcn->ln_name[0] == '/' &&
    721 	    (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) {
    722 		if (arcn->ln_name[1] == '\0') {
    723 			arcn->ln_name[0] = '.';
    724 		} else {
    725 			(void)memmove(arcn->ln_name, &arcn->ln_name[1],
    726 			    strlen(arcn->ln_name));
    727 			arcn->ln_nlen--;
    728 		}
    729 		if (rmleadslash < 2) {
    730 			rmleadslash = 2;
    731 			tty_warn(0, "Removing leading / from absolute path names in the archive");
    732 		}
    733 	}
    734 
    735 	return res;
    736 }
    737 
    738 /*
    739  * tty_rename()
    740  *	Prompt the user for a replacement file name. A "." keeps the old name,
    741  *	a empty line skips the file, and an EOF on reading the tty, will cause
    742  *	pax to stop processing and exit. Otherwise the file name input, replaces
    743  *	the old one.
    744  * Return:
    745  *	0 process this file, 1 skip this file, -1 we need to exit pax
    746  */
    747 
    748 static int
    749 tty_rename(ARCHD *arcn)
    750 {
    751 	char tmpname[PAXPATHLEN+2];
    752 	int res;
    753 
    754 	/*
    755 	 * prompt user for the replacement name for a file, keep trying until
    756 	 * we get some reasonable input. Archives may have more than one file
    757 	 * on them with the same name (from updates etc). We print verbose info
    758 	 * on the file so the user knows what is up.
    759 	 */
    760 	tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
    761 
    762 	for (;;) {
    763 		ls_tty(arcn);
    764 		tty_prnt("Input new name, or a \".\" to keep the old name, ");
    765 		tty_prnt("or a \"return\" to skip this file.\n");
    766 		tty_prnt("Input > ");
    767 		if (tty_read(tmpname, sizeof(tmpname)) < 0)
    768 			return -1;
    769 		if (strcmp(tmpname, "..") == 0) {
    770 			tty_prnt("Try again, illegal file name: ..\n");
    771 			continue;
    772 		}
    773 		if (strlen(tmpname) > PAXPATHLEN) {
    774 			tty_prnt("Try again, file name too long\n");
    775 			continue;
    776 		}
    777 		break;
    778 	}
    779 
    780 	/*
    781 	 * empty file name, skips this file. a "." leaves it alone
    782 	 */
    783 	if (tmpname[0] == '\0') {
    784 		tty_prnt("Skipping file.\n");
    785 		return 1;
    786 	}
    787 	if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
    788 		tty_prnt("Processing continues, name unchanged.\n");
    789 		return 0;
    790 	}
    791 
    792 	/*
    793 	 * ok the name changed. We may run into links that point at this
    794 	 * file later. we have to remember where the user sent the file
    795 	 * in order to repair any links.
    796 	 */
    797 	tty_prnt("Processing continues, name changed to: %s\n", tmpname);
    798 	res = add_name(arcn->name, arcn->nlen, tmpname);
    799 	arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name));
    800 	if (res < 0)
    801 		return -1;
    802 	return 0;
    803 }
    804 
    805 /*
    806  * set_dest()
    807  *	fix up the file name and the link name (if any) so this file will land
    808  *	in the destination directory (used during copy() -rw).
    809  * Return:
    810  *	0 if ok, -1 if failure (name too long)
    811  */
    812 
    813 int
    814 set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
    815 {
    816 	if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
    817 		return -1;
    818 
    819 	/*
    820 	 * It is really hard to deal with symlinks here, we cannot be sure
    821 	 * if the name they point was moved (or will be moved). It is best to
    822 	 * leave them alone.
    823 	 */
    824 	if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
    825 		return 0;
    826 
    827 	if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
    828 		return -1;
    829 	return 0;
    830 }
    831 
    832 /*
    833  * fix_path
    834  *	concatenate dir_name and or_name and store the result in or_name (if
    835  *	it fits). This is one ugly function.
    836  * Return:
    837  *	0 if ok, -1 if the final name is too long
    838  */
    839 
    840 static int
    841 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
    842 {
    843 	char *src;
    844 	char *dest;
    845 	char *start;
    846 	int len;
    847 
    848 	/*
    849 	 * we shift the or_name to the right enough to tack in the dir_name
    850 	 * at the front. We make sure we have enough space for it all before
    851 	 * we start. since dest always ends in a slash, we skip of or_name
    852 	 * if it also starts with one.
    853 	 */
    854 	start = or_name;
    855 	src = start + *or_len;
    856 	dest = src + dir_len;
    857 	if (*start == '/') {
    858 		++start;
    859 		--dest;
    860 	}
    861 	if ((len = dest - or_name) > PAXPATHLEN) {
    862 		tty_warn(1, "File name %s/%s, too long", dir_name, start);
    863 		return -1;
    864 	}
    865 	*or_len = len;
    866 
    867 	/*
    868 	 * enough space, shift
    869 	 */
    870 	while (src >= start)
    871 		*dest-- = *src--;
    872 	src = dir_name + dir_len - 1;
    873 
    874 	/*
    875 	 * splice in the destination directory name
    876 	 */
    877 	while (src >= dir_name)
    878 		*dest-- = *src--;
    879 
    880 	*(or_name + len) = '\0';
    881 	return 0;
    882 }
    883 
    884 /*
    885  * rep_name()
    886  *	walk down the list of replacement strings applying each one in order.
    887  *	when we find one with a successful substitution, we modify the name
    888  *	as specified. if required, we print the results. if the resulting name
    889  *	is empty, we will skip this archive member. We use the regexp(3)
    890  *	routines (regexp() ought to win a prize as having the most cryptic
    891  *	library function manual page).
    892  *	--Parameters--
    893  *	name is the file name we are going to apply the regular expressions to
    894  *	(and may be modified)
    895  *	namelen the size of the name buffer.
    896  *	nlen is the length of this name (and is modified to hold the length of
    897  *	the final string).
    898  *	prnt is a flag that says whether to print the final result.
    899  * Return:
    900  *	0 if substitution was successful, 1 if we are to skip the file (the name
    901  *	ended up empty)
    902  */
    903 
    904 static int
    905 rep_name(char *name, size_t namelen, int *nlen, int flags)
    906 {
    907 	REPLACE *pt;
    908 	char *inpt;
    909 	char *outpt;
    910 	char *endpt;
    911 	char *rpt;
    912 	int found = 0;
    913 	int res;
    914 	regmatch_t pm[MAXSUBEXP];
    915 	char nname[PAXPATHLEN+1];	/* final result of all replacements */
    916 	char buf1[PAXPATHLEN+1];	/* where we work on the name */
    917 
    918 	/*
    919 	 * copy the name into buf1, where we will work on it. We need to keep
    920 	 * the orig string around so we can print out the result of the final
    921 	 * replacement. We build up the final result in nname. inpt points at
    922 	 * the string we apply the regular expression to. prnt is used to
    923 	 * suppress printing when we handle replacements on the link field
    924 	 * (the user already saw that substitution go by)
    925 	 */
    926 	pt = rephead;
    927 	(void)strcpy(buf1, name);
    928 	inpt = buf1;
    929 	outpt = nname;
    930 	endpt = outpt + PAXPATHLEN;
    931 
    932 	/*
    933 	 * try each replacement string in order
    934 	 */
    935 	while (pt != NULL) {
    936 		do {
    937 			if ((flags & SYML) && (pt->flgs & SYML))
    938 				continue;
    939 			/*
    940 			 * check for a successful substitution, if not go to
    941 			 * the next pattern, or cleanup if we were global
    942 			 */
    943 			if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
    944 				break;
    945 
    946 			/*
    947 			 * ok we found one. We have three parts, the prefix
    948 			 * which did not match, the section that did and the
    949 			 * tail (that also did not match). Copy the prefix to
    950 			 * the final output buffer (watching to make sure we
    951 			 * do not create a string too long).
    952 			 */
    953 			found = 1;
    954 			rpt = inpt + pm[0].rm_so;
    955 
    956 			while ((inpt < rpt) && (outpt < endpt))
    957 				*outpt++ = *inpt++;
    958 			if (outpt == endpt)
    959 				break;
    960 
    961 			/*
    962 			 * for the second part (which matched the regular
    963 			 * expression) apply the substitution using the
    964 			 * replacement string and place it the prefix in the
    965 			 * final output. If we have problems, skip it.
    966 			 */
    967 			if ((res =
    968 			    resub(&(pt->rcmp),pm,pt->nstr,inpt, outpt,endpt)
    969 			    ) < 0) {
    970 				if (flags & PRNT)
    971 					tty_warn(1, "Replacement name error %s",
    972 					    name);
    973 				return 1;
    974 			}
    975 			outpt += res;
    976 
    977 			/*
    978 			 * we set up to look again starting at the first
    979 			 * character in the tail (of the input string right
    980 			 * after the last character matched by the regular
    981 			 * expression (inpt always points at the first char in
    982 			 * the string to process). If we are not doing a global
    983 			 * substitution, we will use inpt to copy the tail to
    984 			 * the final result. Make sure we do not overrun the
    985 			 * output buffer
    986 			 */
    987 			inpt += pm[0].rm_eo - pm[0].rm_so;
    988 
    989 			if ((outpt == endpt) || (*inpt == '\0'))
    990 				break;
    991 
    992 			/*
    993 			 * if the user wants global we keep trying to
    994 			 * substitute until it fails, then we are done.
    995 			 */
    996 		} while (pt->flgs & GLOB);
    997 
    998 		if (found)
    999 			break;
   1000 
   1001 		/*
   1002 		 * a successful substitution did NOT occur, try the next one
   1003 		 */
   1004 		pt = pt->fow;
   1005 	}
   1006 
   1007 	if (found) {
   1008 		/*
   1009 		 * we had a substitution, copy the last tail piece (if there is
   1010 		 * room) to the final result
   1011 		 */
   1012 		while ((outpt < endpt) && (*inpt != '\0'))
   1013 			*outpt++ = *inpt++;
   1014 
   1015 		*outpt = '\0';
   1016 		if ((outpt == endpt) && (*inpt != '\0')) {
   1017 			if (flags & PRNT)
   1018 				tty_warn(1,"Replacement name too long %s >> %s",
   1019 				    name, nname);
   1020 			return 1;
   1021 		}
   1022 
   1023 		/*
   1024 		 * inform the user of the result if wanted
   1025 		 */
   1026 		if ((flags & PRNT) && (pt->flgs & PRNT)) {
   1027 			if (*nname == '\0')
   1028 				(void)fprintf(stderr,"%s >> <empty string>\n",
   1029 				    name);
   1030 			else
   1031 				(void)fprintf(stderr,"%s >> %s\n", name, nname);
   1032 		}
   1033 
   1034 		/*
   1035 		 * if empty inform the caller this file is to be skipped
   1036 		 * otherwise copy the new name over the orig name and return
   1037 		 */
   1038 		if (*nname == '\0')
   1039 			return 1;
   1040 		if (flags & RENM)
   1041 			*nlen = strlcpy(name, nname, namelen);
   1042 	}
   1043 	return 0;
   1044 }
   1045 
   1046 
   1047 /*
   1048  * checkdotdot()
   1049  *	Return true if a component of the name contains a reference to ".."
   1050  */
   1051 static int
   1052 checkdotdot(const char *name)
   1053 {
   1054 	const char *p;
   1055 	/* 1. "..{[/],}" */
   1056 	if (name[0] == '.' && name[1] == '.' &&
   1057 	    (name[2] == '/' || name[2] == '\0'))
   1058 		return 1;
   1059 
   1060 	/* 2. "*[/]..[/]*" */
   1061 	if (strstr(name, "/../") != NULL)
   1062 		return 1;
   1063 
   1064 	/* 3. "*[/].." */
   1065 	for (p = name; *p; p++)
   1066 		continue;
   1067 	if (p - name < 3)
   1068 		return 0;
   1069 	if (p[-1] == '.' && p[-2] == '.' && p[-3] == '/')
   1070 		return 1;
   1071 
   1072 	return 0;
   1073 }
   1074 
   1075 
   1076 /*
   1077  * resub()
   1078  *	apply the replacement to the matched expression. expand out the old
   1079  *	style ed(1) subexpression expansion.
   1080  * Return:
   1081  *	-1 if error, or the number of characters added to the destination.
   1082  */
   1083 
   1084 static int
   1085 resub(regex_t *rp, regmatch_t *pm, char *src, char *txt, char *dest,
   1086 	char *destend)
   1087 {
   1088 	char *spt;
   1089 	char *dpt;
   1090 	char c;
   1091 	regmatch_t *pmpt;
   1092 	int len;
   1093 	int subexcnt;
   1094 
   1095 	spt =  src;
   1096 	dpt = dest;
   1097 	subexcnt = rp->re_nsub;
   1098 	while ((dpt < destend) && ((c = *spt++) != '\0')) {
   1099 		/*
   1100 		 * see if we just have an ordinary replacement character
   1101 		 * or we refer to a subexpression.
   1102 		 */
   1103 		if (c == '&') {
   1104 			pmpt = pm;
   1105 		} else if ((c == '\\') && (*spt >= '1') && (*spt <= '9')) {
   1106 			/*
   1107 			 * make sure there is a subexpression as specified
   1108 			 */
   1109 			if ((len = *spt++ - '0') > subexcnt)
   1110 				return -1;
   1111 			pmpt = pm + len;
   1112 		} else {
   1113 			/*
   1114 			 * Ordinary character, just copy it
   1115 			 */
   1116 			if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
   1117 				c = *spt++;
   1118 			*dpt++ = c;
   1119 			continue;
   1120 		}
   1121 
   1122 		/*
   1123 		 * continue if the subexpression is bogus
   1124 		 */
   1125 		if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
   1126 		    ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
   1127 			continue;
   1128 
   1129 		/*
   1130 		 * copy the subexpression to the destination.
   1131 		 * fail if we run out of space or the match string is damaged
   1132 		 */
   1133 		if (len > (destend - dpt))
   1134 			return -1;
   1135 		strncpy(dpt, txt + pmpt->rm_so, len);
   1136 		dpt += len;
   1137 	}
   1138 	return dpt - dest;
   1139 }
   1140