pat_rep.c revision 1.14 1 /* $NetBSD: pat_rep.c,v 1.14 2002/10/12 15:39:30 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1992 Keith Muller.
5 * Copyright (c) 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Keith Muller of the University of California, San Diego.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41 #if defined(__RCSID) && !defined(lint)
42 #if 0
43 static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94";
44 #else
45 __RCSID("$NetBSD: pat_rep.c,v 1.14 2002/10/12 15:39:30 christos Exp $");
46 #endif
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/time.h>
51 #include <sys/stat.h>
52 #include <sys/param.h>
53 #include <stdio.h>
54 #include <ctype.h>
55 #include <string.h>
56 #include <unistd.h>
57 #include <stdlib.h>
58 #ifdef NET2_REGEX
59 #include <regexp.h>
60 #else
61 #include <regex.h>
62 #endif
63 #include "pax.h"
64 #include "pat_rep.h"
65 #include "extern.h"
66
67 /*
68 * routines to handle pattern matching, name modification (regular expression
69 * substitution and interactive renames), and destination name modification for
70 * copy (-rw). Both file name and link names are adjusted as required in these
71 * routines.
72 */
73
74 #define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */
75 static PATTERN *pathead = NULL; /* file pattern match list head */
76 static PATTERN *pattail = NULL; /* file pattern match list tail */
77 static REPLACE *rephead = NULL; /* replacement string list head */
78 static REPLACE *reptail = NULL; /* replacement string list tail */
79
80 static int rep_name(char *, size_t, int *, int);
81 static int tty_rename(ARCHD *);
82 static int fix_path(char *, int *, char *, int);
83 static int fn_match(char *, char *, char **);
84 static char * range_match(char *, int);
85 #ifdef NET2_REGEX
86 static int resub(regexp *, char *, char *, char *);
87 #else
88 static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *);
89 #endif
90
91 /*
92 * rep_add()
93 * parses the -s replacement string; compiles the regular expression
94 * and stores the compiled value and it's replacement string together in
95 * replacement string list. Input to this function is of the form:
96 * /old/new/pg
97 * The first char in the string specifies the delimiter used by this
98 * replacement string. "Old" is a regular expression in "ed" format which
99 * is compiled by regcomp() and is applied to filenames. "new" is the
100 * substitution string; p and g are options flags for printing and global
101 * replacement (over the single filename)
102 * Return:
103 * 0 if a proper replacement string and regular expression was added to
104 * the list of replacement patterns; -1 otherwise.
105 */
106
107 int
108 rep_add(char *str)
109 {
110 char *pt1;
111 char *pt2;
112 REPLACE *rep;
113 #ifndef NET2_REGEX
114 int res;
115 char rebuf[BUFSIZ];
116 #endif
117
118 /*
119 * throw out the bad parameters
120 */
121 if ((str == NULL) || (*str == '\0')) {
122 tty_warn(1, "Empty replacement string");
123 return(-1);
124 }
125
126 /*
127 * first character in the string specifies what the delimiter is for
128 * this expression.
129 */
130 for (pt1 = str+1; *pt1; pt1++) {
131 if (*pt1 == '\\') {
132 pt1++;
133 continue;
134 }
135 if (*pt1 == *str)
136 break;
137 }
138 if (pt1 == NULL) {
139 tty_warn(1, "Invalid replacement string %s", str);
140 return(-1);
141 }
142
143 /*
144 * allocate space for the node that handles this replacement pattern
145 * and split out the regular expression and try to compile it
146 */
147 if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
148 tty_warn(1, "Unable to allocate memory for replacement string");
149 return(-1);
150 }
151
152 *pt1 = '\0';
153 #ifdef NET2_REGEX
154 if ((rep->rcmp = regcomp(str+1)) == NULL) {
155 #else
156 if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
157 regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
158 tty_warn(1, "%s while compiling regular expression %s", rebuf,
159 str);
160 #endif
161 (void)free((char *)rep);
162 return(-1);
163 }
164
165 /*
166 * put the delimiter back in case we need an error message and
167 * locate the delimiter at the end of the replacement string
168 * we then point the node at the new substitution string
169 */
170 *pt1++ = *str;
171 for (pt2 = pt1; *pt2; pt2++) {
172 if (*pt2 == '\\') {
173 pt2++;
174 continue;
175 }
176 if (*pt2 == *str)
177 break;
178 }
179 if (pt2 == NULL) {
180 #ifdef NET2_REGEX
181 (void)free((char *)rep->rcmp);
182 #else
183 regfree(&(rep->rcmp));
184 #endif
185 (void)free((char *)rep);
186 tty_warn(1, "Invalid replacement string %s", str);
187 return(-1);
188 }
189
190 *pt2 = '\0';
191 rep->nstr = pt1;
192 pt1 = pt2++;
193 rep->flgs = 0;
194
195 /*
196 * set the options if any
197 */
198 while (*pt2 != '\0') {
199 switch(*pt2) {
200 case 'g':
201 case 'G':
202 rep->flgs |= GLOB;
203 break;
204 case 'p':
205 case 'P':
206 rep->flgs |= PRNT;
207 break;
208 default:
209 #ifdef NET2_REGEX
210 (void)free((char *)rep->rcmp);
211 #else
212 regfree(&(rep->rcmp));
213 #endif
214 (void)free((char *)rep);
215 *pt1 = *str;
216 tty_warn(1, "Invalid replacement string option %s",
217 str);
218 return(-1);
219 }
220 ++pt2;
221 }
222
223 /*
224 * all done, link it in at the end
225 */
226 rep->fow = NULL;
227 if (rephead == NULL) {
228 reptail = rephead = rep;
229 return(0);
230 }
231 reptail->fow = rep;
232 reptail = rep;
233 return(0);
234 }
235
236 /*
237 * pat_add()
238 * add a pattern match to the pattern match list. Pattern matches are used
239 * to select which archive members are extracted. (They appear as
240 * arguments to pax in the list and read modes). If no patterns are
241 * supplied to pax, all members in the archive will be selected (and the
242 * pattern match list is empty).
243 *
244 * Return:
245 * 0 if the pattern was added to the list, -1 otherwise
246 */
247
248 int
249 pat_add(char *str, char *chdn)
250 {
251 PATTERN *pt;
252
253 /*
254 * throw out the junk
255 */
256 if ((str == NULL) || (*str == '\0')) {
257 tty_warn(1, "Empty pattern string");
258 return(-1);
259 }
260
261 /*
262 * allocate space for the pattern and store the pattern. the pattern is
263 * part of argv so do not bother to copy it, just point at it. Add the
264 * node to the end of the pattern list
265 */
266 if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
267 tty_warn(1, "Unable to allocate memory for pattern string");
268 return(-1);
269 }
270
271 pt->pstr = str;
272 pt->pend = NULL;
273 pt->plen = strlen(str);
274 pt->fow = NULL;
275 pt->flgs = 0;
276 pt->chdname = chdn;
277 if (pathead == NULL) {
278 pattail = pathead = pt;
279 return(0);
280 }
281 pattail->fow = pt;
282 pattail = pt;
283 return(0);
284 }
285
286 /*
287 * pat_chk()
288 * complain if any the user supplied pattern did not result in a match to
289 * a selected archive member.
290 */
291
292 void
293 pat_chk(void)
294 {
295 PATTERN *pt;
296 int wban = 0;
297
298 /*
299 * walk down the list checking the flags to make sure MTCH was set,
300 * if not complain
301 */
302 for (pt = pathead; pt != NULL; pt = pt->fow) {
303 if (pt->flgs & MTCH)
304 continue;
305 if (!wban) {
306 tty_warn(1, "WARNING! These patterns were not matched:");
307 ++wban;
308 }
309 (void)fprintf(stderr, "%s\n", pt->pstr);
310 }
311 }
312
313 /*
314 * pat_sel()
315 * the archive member which matches a pattern was selected. Mark the
316 * pattern as having selected an archive member. arcn->pat points at the
317 * pattern that was matched. arcn->pat is set in pat_match()
318 *
319 * NOTE: When the -c option is used, we are called when there was no match
320 * by pat_match() (that means we did match before the inverted sense of
321 * the logic). Now this seems really strange at first, but with -c we
322 * need to keep track of those patterns that cause a archive member to NOT
323 * be selected (it found an archive member with a specified pattern)
324 * Return:
325 * 0 if the pattern pointed at by arcn->pat was tagged as creating a
326 * match, -1 otherwise.
327 */
328
329 int
330 pat_sel(ARCHD *arcn)
331 {
332 PATTERN *pt;
333 PATTERN **ppt;
334 int len;
335
336 /*
337 * if no patterns just return
338 */
339 if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
340 return(0);
341
342 /*
343 * when we are NOT limited to a single match per pattern mark the
344 * pattern and return
345 */
346 if (!nflag) {
347 pt->flgs |= MTCH;
348 return(0);
349 }
350
351 /*
352 * we reach this point only when we allow a single selected match per
353 * pattern, if the pattern matches a directory and we do not have -d
354 * (dflag) we are done with this pattern. We may also be handed a file
355 * in the subtree of a directory. in that case when we are operating
356 * with -d, this pattern was already selected and we are done
357 */
358 if (pt->flgs & DIR_MTCH)
359 return(0);
360
361 if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
362 /*
363 * ok we matched a directory and we are allowing
364 * subtree matches but because of the -n only its children will
365 * match. This is tagged as a DIR_MTCH type.
366 * WATCH IT, the code assumes that pt->pend points
367 * into arcn->name and arcn->name has not been modified.
368 * If not we will have a big mess. Yup this is another kludge
369 */
370
371 /*
372 * if this was a prefix match, remove trailing part of path
373 * so we can copy it. Future matches will be exact prefix match
374 */
375 if (pt->pend != NULL)
376 *pt->pend = '\0';
377
378 if ((pt->pstr = strdup(arcn->name)) == NULL) {
379 tty_warn(1, "Pattern select out of memory");
380 if (pt->pend != NULL)
381 *pt->pend = '/';
382 pt->pend = NULL;
383 return(-1);
384 }
385
386 /*
387 * put the trailing / back in the source string
388 */
389 if (pt->pend != NULL) {
390 *pt->pend = '/';
391 pt->pend = NULL;
392 }
393 pt->plen = strlen(pt->pstr);
394
395 /*
396 * strip off any trailing /, this should really never happen
397 */
398 len = pt->plen - 1;
399 if (*(pt->pstr + len) == '/') {
400 *(pt->pstr + len) = '\0';
401 pt->plen = len;
402 }
403 pt->flgs = DIR_MTCH | MTCH;
404 arcn->pat = pt;
405 return(0);
406 }
407
408 /*
409 * we are then done with this pattern, so we delete it from the list
410 * because it can never be used for another match.
411 * Seems kind of strange to do for a -c, but the pax spec is really
412 * vague on the interaction of -c -n and -d. We assume that when -c
413 * and the pattern rejects a member (i.e. it matched it) it is done.
414 * In effect we place the order of the flags as having -c last.
415 */
416 pt = pathead;
417 ppt = &pathead;
418 while ((pt != NULL) && (pt != arcn->pat)) {
419 ppt = &(pt->fow);
420 pt = pt->fow;
421 }
422
423 if (pt == NULL) {
424 /*
425 * should never happen....
426 */
427 tty_warn(1, "Pattern list inconsistant");
428 return(-1);
429 }
430 *ppt = pt->fow;
431 (void)free((char *)pt);
432 arcn->pat = NULL;
433 return(0);
434 }
435
436 /*
437 * pat_match()
438 * see if this archive member matches any supplied pattern, if a match
439 * is found, arcn->pat is set to point at the potential pattern. Later if
440 * this archive member is "selected" we process and mark the pattern as
441 * one which matched a selected archive member (see pat_sel())
442 * Return:
443 * 0 if this archive member should be processed, 1 if it should be
444 * skipped and -1 if we are done with all patterns (and pax should quit
445 * looking for more members)
446 */
447
448 int
449 pat_match(ARCHD *arcn)
450 {
451 PATTERN *pt;
452
453 arcn->pat = NULL;
454
455 /*
456 * if there are no more patterns and we have -n (and not -c) we are
457 * done. otherwise with no patterns to match, matches all
458 */
459 if (pathead == NULL) {
460 if (nflag && !cflag)
461 return(-1);
462 return(0);
463 }
464
465 /*
466 * have to search down the list one at a time looking for a match.
467 */
468 pt = pathead;
469 while (pt != NULL) {
470 /*
471 * check for a file name match unless we have DIR_MTCH set in
472 * this pattern then we want a prefix match
473 */
474 if (pt->flgs & DIR_MTCH) {
475 /*
476 * this pattern was matched before to a directory
477 * as we must have -n set for this (but not -d). We can
478 * only match CHILDREN of that directory so we must use
479 * an exact prefix match (no wildcards).
480 */
481 if ((arcn->name[pt->plen] == '/') &&
482 (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
483 break;
484 } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
485 break;
486 pt = pt->fow;
487 }
488
489 /*
490 * return the result, remember that cflag (-c) inverts the sense of a
491 * match
492 */
493 if (pt == NULL)
494 return(cflag ? 0 : 1);
495
496 /*
497 * we had a match, now when we invert the sense (-c) we reject this
498 * member. However we have to tag the pattern a being successful, (in a
499 * match, not in selecting a archive member) so we call pat_sel() here.
500 */
501 arcn->pat = pt;
502 if (!cflag)
503 return(0);
504
505 if (pat_sel(arcn) < 0)
506 return(-1);
507 arcn->pat = NULL;
508 return(1);
509 }
510
511 /*
512 * fn_match()
513 * Return:
514 * 0 if this archive member should be processed, 1 if it should be
515 * skipped and -1 if we are done with all patterns (and pax should quit
516 * looking for more members)
517 * Note: *pend may be changed to show where the prefix ends.
518 */
519
520 static int
521 fn_match(char *pattern, char *string, char **pend)
522 {
523 char c;
524 char test;
525
526 *pend = NULL;
527 for (;;) {
528 switch (c = *pattern++) {
529 case '\0':
530 /*
531 * Ok we found an exact match
532 */
533 if (*string == '\0')
534 return(0);
535
536 /*
537 * Check if it is a prefix match
538 */
539 if ((dflag == 1) || (*string != '/'))
540 return(-1);
541
542 /*
543 * It is a prefix match, remember where the trailing
544 * / is located
545 */
546 *pend = string;
547 return(0);
548 case '?':
549 if ((test = *string++) == '\0')
550 return (-1);
551 break;
552 case '*':
553 c = *pattern;
554 /*
555 * Collapse multiple *'s.
556 */
557 while (c == '*')
558 c = *++pattern;
559
560 /*
561 * Optimized hack for pattern with a * at the end
562 */
563 if (c == '\0')
564 return (0);
565
566 /*
567 * General case, use recursion.
568 */
569 while ((test = *string) != '\0') {
570 if (!fn_match(pattern, string, pend))
571 return (0);
572 ++string;
573 }
574 return (-1);
575 case '[':
576 /*
577 * range match
578 */
579 if (((test = *string++) == '\0') ||
580 ((pattern = range_match(pattern, test)) == NULL))
581 return (-1);
582 break;
583 case '\\':
584 default:
585 if (c != *string++)
586 return (-1);
587 break;
588 }
589 }
590 /* NOTREACHED */
591 }
592
593 static char *
594 range_match(char *pattern, int test)
595 {
596 char c;
597 char c2;
598 int negate;
599 int ok = 0;
600
601 if ((negate = (*pattern == '!')) != 0)
602 ++pattern;
603
604 while ((c = *pattern++) != ']') {
605 /*
606 * Illegal pattern
607 */
608 if (c == '\0')
609 return (NULL);
610
611 if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
612 (c2 != ']')) {
613 if ((c <= test) && (test <= c2))
614 ok = 1;
615 pattern += 2;
616 } else if (c == test)
617 ok = 1;
618 }
619 return (ok == negate ? NULL : pattern);
620 }
621
622 /*
623 * mod_name()
624 * modify a selected file name. first attempt to apply replacement string
625 * expressions, then apply interactive file rename. We apply replacement
626 * string expressions to both filenames and file links (if we didn't the
627 * links would point to the wrong place, and we could never be able to
628 * move an archive that has a file link in it). When we rename files
629 * interactively, we store that mapping (old name to user input name) so
630 * if we spot any file links to the old file name in the future, we will
631 * know exactly how to fix the file link.
632 * Return:
633 * 0 continue to process file, 1 skip this file, -1 pax is finished
634 */
635
636 int
637 mod_name(ARCHD *arcn)
638 {
639 int res = 0;
640
641 /*
642 * Strip off leading '/' if appropriate.
643 * Currently, this option is only set for the tar format.
644 */
645 if (rmleadslash && arcn->name[0] == '/') {
646 if (arcn->name[1] == '\0') {
647 arcn->name[0] = '.';
648 } else {
649 (void)memmove(arcn->name, &arcn->name[1],
650 strlen(arcn->name));
651 arcn->nlen--;
652 }
653 if (rmleadslash < 2) {
654 rmleadslash = 2;
655 tty_warn(0, "Removing leading / from absolute path names in the archive");
656 }
657 }
658 if (rmleadslash && arcn->ln_name[0] == '/' &&
659 (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) {
660 if (arcn->ln_name[1] == '\0') {
661 arcn->ln_name[0] = '.';
662 } else {
663 (void)memmove(arcn->ln_name, &arcn->ln_name[1],
664 strlen(arcn->ln_name));
665 arcn->ln_nlen--;
666 }
667 if (rmleadslash < 2) {
668 rmleadslash = 2;
669 tty_warn(0, "Removing leading / from absolute path names in the archive");
670 }
671 }
672
673 /*
674 * IMPORTANT: We have a problem. what do we do with symlinks?
675 * Modifying a hard link name makes sense, as we know the file it
676 * points at should have been seen already in the archive (and if it
677 * wasn't seen because of a read error or a bad archive, we lose
678 * anyway). But there are no such requirements for symlinks. On one
679 * hand the symlink that refers to a file in the archive will have to
680 * be modified to so it will still work at its new location in the
681 * file system. On the other hand a symlink that points elsewhere (and
682 * should continue to do so) should not be modified. There is clearly
683 * no perfect solution here. So we handle them like hardlinks. Clearly
684 * a replacement made by the interactive rename mapping is very likely
685 * to be correct since it applies to a single file and is an exact
686 * match. The regular expression replacements are a little harder to
687 * justify though. We claim that the symlink name is only likely
688 * to be replaced when it points within the file tree being moved and
689 * in that case it should be modified. what we really need to do is to
690 * call an oracle here. :)
691 */
692 if (rephead != NULL) {
693 /*
694 * we have replacement strings, modify the name and the link
695 * name if any.
696 */
697 if ((res = rep_name(arcn->name, sizeof(arcn->name),
698 &(arcn->nlen), 1)) != 0)
699 return(res);
700
701 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
702 (arcn->type == PAX_HRG)) &&
703 ((res = rep_name(arcn->ln_name, sizeof(arcn->ln_name),
704 &(arcn->ln_nlen), 0)) != 0))
705 return(res);
706 }
707
708 if (iflag) {
709 /*
710 * perform interactive file rename, then map the link if any
711 */
712 if ((res = tty_rename(arcn)) != 0)
713 return(res);
714 if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
715 (arcn->type == PAX_HRG))
716 sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name));
717 }
718 return(res);
719 }
720
721 /*
722 * tty_rename()
723 * Prompt the user for a replacement file name. A "." keeps the old name,
724 * a empty line skips the file, and an EOF on reading the tty, will cause
725 * pax to stop processing and exit. Otherwise the file name input, replaces
726 * the old one.
727 * Return:
728 * 0 process this file, 1 skip this file, -1 we need to exit pax
729 */
730
731 static int
732 tty_rename(ARCHD *arcn)
733 {
734 char tmpname[PAXPATHLEN+2];
735 int res;
736
737 /*
738 * prompt user for the replacement name for a file, keep trying until
739 * we get some reasonable input. Archives may have more than one file
740 * on them with the same name (from updates etc). We print verbose info
741 * on the file so the user knows what is up.
742 */
743 tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
744
745 for (;;) {
746 ls_tty(arcn);
747 tty_prnt("Input new name, or a \".\" to keep the old name, ");
748 tty_prnt("or a \"return\" to skip this file.\n");
749 tty_prnt("Input > ");
750 if (tty_read(tmpname, sizeof(tmpname)) < 0)
751 return(-1);
752 if (strcmp(tmpname, "..") == 0) {
753 tty_prnt("Try again, illegal file name: ..\n");
754 continue;
755 }
756 if (strlen(tmpname) > PAXPATHLEN) {
757 tty_prnt("Try again, file name too long\n");
758 continue;
759 }
760 break;
761 }
762
763 /*
764 * empty file name, skips this file. a "." leaves it alone
765 */
766 if (tmpname[0] == '\0') {
767 tty_prnt("Skipping file.\n");
768 return(1);
769 }
770 if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
771 tty_prnt("Processing continues, name unchanged.\n");
772 return(0);
773 }
774
775 /*
776 * ok the name changed. We may run into links that point at this
777 * file later. we have to remember where the user sent the file
778 * in order to repair any links.
779 */
780 tty_prnt("Processing continues, name changed to: %s\n", tmpname);
781 res = add_name(arcn->name, arcn->nlen, tmpname);
782 arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name));
783 if (res < 0)
784 return(-1);
785 return(0);
786 }
787
788 /*
789 * set_dest()
790 * fix up the file name and the link name (if any) so this file will land
791 * in the destination directory (used during copy() -rw).
792 * Return:
793 * 0 if ok, -1 if failure (name too long)
794 */
795
796 int
797 set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
798 {
799 if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
800 return(-1);
801
802 /*
803 * It is really hard to deal with symlinks here, we cannot be sure
804 * if the name they point was moved (or will be moved). It is best to
805 * leave them alone.
806 */
807 if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
808 return(0);
809
810 if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
811 return(-1);
812 return(0);
813 }
814
815 /*
816 * fix_path
817 * concatenate dir_name and or_name and store the result in or_name (if
818 * it fits). This is one ugly function.
819 * Return:
820 * 0 if ok, -1 if the final name is too long
821 */
822
823 static int
824 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
825 {
826 char *src;
827 char *dest;
828 char *start;
829 int len;
830
831 /*
832 * we shift the or_name to the right enough to tack in the dir_name
833 * at the front. We make sure we have enough space for it all before
834 * we start. since dest always ends in a slash, we skip of or_name
835 * if it also starts with one.
836 */
837 start = or_name;
838 src = start + *or_len;
839 dest = src + dir_len;
840 if (*start == '/') {
841 ++start;
842 --dest;
843 }
844 if ((len = dest - or_name) > PAXPATHLEN) {
845 tty_warn(1, "File name %s/%s, too long", dir_name, start);
846 return(-1);
847 }
848 *or_len = len;
849
850 /*
851 * enough space, shift
852 */
853 while (src >= start)
854 *dest-- = *src--;
855 src = dir_name + dir_len - 1;
856
857 /*
858 * splice in the destination directory name
859 */
860 while (src >= dir_name)
861 *dest-- = *src--;
862
863 *(or_name + len) = '\0';
864 return(0);
865 }
866
867 /*
868 * rep_name()
869 * walk down the list of replacement strings applying each one in order.
870 * when we find one with a successful substitution, we modify the name
871 * as specified. if required, we print the results. if the resulting name
872 * is empty, we will skip this archive member. We use the regexp(3)
873 * routines (regexp() ought to win a prize as having the most cryptic
874 * library function manual page).
875 * --Parameters--
876 * name is the file name we are going to apply the regular expressions to
877 * (and may be modified)
878 * namelen the size of the name buffer.
879 * nlen is the length of this name (and is modified to hold the length of
880 * the final string).
881 * prnt is a flag that says whether to print the final result.
882 * Return:
883 * 0 if substitution was successful, 1 if we are to skip the file (the name
884 * ended up empty)
885 */
886
887 static int
888 rep_name(char *name, size_t namelen, int *nlen, int prnt)
889 {
890 REPLACE *pt;
891 char *inpt;
892 char *outpt;
893 char *endpt;
894 char *rpt;
895 int found = 0;
896 int res;
897 #ifndef NET2_REGEX
898 regmatch_t pm[MAXSUBEXP];
899 #endif
900 char nname[PAXPATHLEN+1]; /* final result of all replacements */
901 char buf1[PAXPATHLEN+1]; /* where we work on the name */
902
903 /*
904 * copy the name into buf1, where we will work on it. We need to keep
905 * the orig string around so we can print out the result of the final
906 * replacement. We build up the final result in nname. inpt points at
907 * the string we apply the regular expression to. prnt is used to
908 * suppress printing when we handle replacements on the link field
909 * (the user already saw that substitution go by)
910 */
911 pt = rephead;
912 (void)strcpy(buf1, name);
913 inpt = buf1;
914 outpt = nname;
915 endpt = outpt + PAXPATHLEN;
916
917 /*
918 * try each replacement string in order
919 */
920 while (pt != NULL) {
921 do {
922 /*
923 * check for a successful substitution, if not go to
924 * the next pattern, or cleanup if we were global
925 */
926 #ifdef NET2_REGEX
927 if (regexec(pt->rcmp, inpt) == 0)
928 #else
929 if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
930 #endif
931 break;
932
933 /*
934 * ok we found one. We have three parts, the prefix
935 * which did not match, the section that did and the
936 * tail (that also did not match). Copy the prefix to
937 * the final output buffer (watching to make sure we
938 * do not create a string too long).
939 */
940 found = 1;
941 #ifdef NET2_REGEX
942 rpt = pt->rcmp->startp[0];
943 #else
944 rpt = inpt + pm[0].rm_so;
945 #endif
946
947 while ((inpt < rpt) && (outpt < endpt))
948 *outpt++ = *inpt++;
949 if (outpt == endpt)
950 break;
951
952 /*
953 * for the second part (which matched the regular
954 * expression) apply the substitution using the
955 * replacement string and place it the prefix in the
956 * final output. If we have problems, skip it.
957 */
958 if ((res =
959 #ifdef NET2_REGEX
960 resub(pt->rcmp,pt->nstr,outpt,endpt)
961 #else
962 resub(&(pt->rcmp),pm,pt->nstr,inpt, outpt,endpt)
963 #endif
964 ) < 0) {
965 if (prnt)
966 tty_warn(1, "Replacement name error %s",
967 name);
968 return(1);
969 }
970 outpt += res;
971
972 /*
973 * we set up to look again starting at the first
974 * character in the tail (of the input string right
975 * after the last character matched by the regular
976 * expression (inpt always points at the first char in
977 * the string to process). If we are not doing a global
978 * substitution, we will use inpt to copy the tail to
979 * the final result. Make sure we do not overrun the
980 * output buffer
981 */
982 #ifdef NET2_REGEX
983 inpt = pt->rcmp->endp[0];
984 #else
985 inpt += pm[0].rm_eo - pm[0].rm_so;
986 #endif
987
988 if ((outpt == endpt) || (*inpt == '\0'))
989 break;
990
991 /*
992 * if the user wants global we keep trying to
993 * substitute until it fails, then we are done.
994 */
995 } while (pt->flgs & GLOB);
996
997 if (found)
998 break;
999
1000 /*
1001 * a successful substitution did NOT occur, try the next one
1002 */
1003 pt = pt->fow;
1004 }
1005
1006 if (found) {
1007 /*
1008 * we had a substitution, copy the last tail piece (if there is
1009 * room) to the final result
1010 */
1011 while ((outpt < endpt) && (*inpt != '\0'))
1012 *outpt++ = *inpt++;
1013
1014 *outpt = '\0';
1015 if ((outpt == endpt) && (*inpt != '\0')) {
1016 if (prnt)
1017 tty_warn(1,"Replacement name too long %s >> %s",
1018 name, nname);
1019 return(1);
1020 }
1021
1022 /*
1023 * inform the user of the result if wanted
1024 */
1025 if (prnt && (pt->flgs & PRNT)) {
1026 if (*nname == '\0')
1027 (void)fprintf(stderr,"%s >> <empty string>\n",
1028 name);
1029 else
1030 (void)fprintf(stderr,"%s >> %s\n", name, nname);
1031 }
1032
1033 /*
1034 * if empty inform the caller this file is to be skipped
1035 * otherwise copy the new name over the orig name and return
1036 */
1037 if (*nname == '\0')
1038 return(1);
1039 *nlen = strlcpy(name, nname, namelen);
1040 }
1041 return(0);
1042 }
1043
1044 #ifdef NET2_REGEX
1045 /*
1046 * resub()
1047 * apply the replacement to the matched expression. expand out the old
1048 * style ed(1) subexpression expansion.
1049 * Return:
1050 * -1 if error, or the number of characters added to the destination.
1051 */
1052
1053 static int
1054 resub(regexp *prog, char *src, char *dest, char *destend)
1055 {
1056 char *spt;
1057 char *dpt;
1058 char c;
1059 int no;
1060 int len;
1061
1062 spt = src;
1063 dpt = dest;
1064 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1065 if (c == '&')
1066 no = 0;
1067 else if ((c == '\\') && (*spt >= '0') && (*spt <= '9'))
1068 no = *spt++ - '0';
1069 else {
1070 if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1071 c = *spt++;
1072 *dpt++ = c;
1073 continue;
1074 }
1075 if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) ||
1076 ((len = prog->endp[no] - prog->startp[no]) <= 0))
1077 continue;
1078
1079 /*
1080 * copy the subexpression to the destination.
1081 * fail if we run out of space or the match string is damaged
1082 */
1083 if (len > (destend - dpt))
1084 return (-1);
1085 strncpy(dpt, prog->startp[no], len);
1086 dpt += len;
1087 }
1088 return(dpt - dest);
1089 }
1090
1091 #else
1092
1093 /*
1094 * resub()
1095 * apply the replacement to the matched expression. expand out the old
1096 * style ed(1) subexpression expansion.
1097 * Return:
1098 * -1 if error, or the number of characters added to the destination.
1099 */
1100
1101 static int
1102 resub(regex_t *rp, regmatch_t *pm, char *src, char *txt, char *dest,
1103 char *destend)
1104 {
1105 char *spt;
1106 char *dpt;
1107 char c;
1108 regmatch_t *pmpt;
1109 int len;
1110 int subexcnt;
1111
1112 spt = src;
1113 dpt = dest;
1114 subexcnt = rp->re_nsub;
1115 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1116 /*
1117 * see if we just have an ordinary replacement character
1118 * or we refer to a subexpression.
1119 */
1120 if (c == '&') {
1121 pmpt = pm;
1122 } else if ((c == '\\') && (*spt >= '1') && (*spt <= '9')) {
1123 /*
1124 * make sure there is a subexpression as specified
1125 */
1126 if ((len = *spt++ - '0') > subexcnt)
1127 return(-1);
1128 pmpt = pm + len;
1129 } else {
1130 /*
1131 * Ordinary character, just copy it
1132 */
1133 if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1134 c = *spt++;
1135 *dpt++ = c;
1136 continue;
1137 }
1138
1139 /*
1140 * continue if the subexpression is bogus
1141 */
1142 if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1143 ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1144 continue;
1145
1146 /*
1147 * copy the subexpression to the destination.
1148 * fail if we run out of space or the match string is damaged
1149 */
1150 if (len > (destend - dpt))
1151 return -1;
1152 strncpy(dpt, txt + pmpt->rm_so, len);
1153 dpt += len;
1154 }
1155 return(dpt - dest);
1156 }
1157 #endif
1158