pat_rep.c revision 1.32 1 /* $NetBSD: pat_rep.c,v 1.32 2024/08/05 13:37:27 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 1992 Keith Muller.
5 * Copyright (c) 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Keith Muller of the University of California, San Diego.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #if HAVE_NBTOOL_CONFIG_H
37 #include "nbtool_config.h"
38 #endif
39
40 #include <sys/cdefs.h>
41 #if !defined(lint)
42 #if 0
43 static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94";
44 #else
45 __RCSID("$NetBSD: pat_rep.c,v 1.32 2024/08/05 13:37:27 riastradh Exp $");
46 #endif
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/time.h>
51 #include <sys/stat.h>
52 #include <sys/param.h>
53 #include <stdio.h>
54 #include <ctype.h>
55 #include <string.h>
56 #include <unistd.h>
57 #include <stdlib.h>
58 #include "pax.h"
59 #include "pat_rep.h"
60 #include "extern.h"
61
62 /*
63 * routines to handle pattern matching, name modification (regular expression
64 * substitution and interactive renames), and destination name modification for
65 * copy (-rw). Both file name and link names are adjusted as required in these
66 * routines.
67 */
68
69 #define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */
70 static PATTERN *pathead = NULL; /* file pattern match list head */
71 static PATTERN *pattail = NULL; /* file pattern match list tail */
72 static REPLACE *rephead = NULL; /* replacement string list head */
73 static REPLACE *reptail = NULL; /* replacement string list tail */
74
75 static int rep_name(char *, size_t, int *, int);
76 static int tty_rename(ARCHD *);
77 static int fix_path(char *, int *, char *, int);
78 static int fn_match(char *, char *, char **, int);
79 static char * range_match(char *, int);
80 static int checkdotdot(const char *);
81 static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *);
82
83 /*
84 * rep_add()
85 * parses the -s replacement string; compiles the regular expression
86 * and stores the compiled value and its replacement string together in
87 * replacement string list. Input to this function is of the form:
88 * /old/new/gps
89 * The first char in the string specifies the delimiter used by this
90 * replacement string. "Old" is a regular expression in "ed" format which
91 * is compiled by regcomp() and is applied to filenames. "new" is the
92 * substitution string; g, p, and s are options flags for global
93 * replacement (over the single filename), printing, and preventing
94 * substitutions on symbolic link destinations.
95 * Return:
96 * 0 if a proper replacement string and regular expression was added to
97 * the list of replacement patterns; -1 otherwise.
98 */
99
100 int
101 rep_add(char *str)
102 {
103 char *pt1;
104 char *pt2;
105 REPLACE *rep;
106 int res;
107 char rebuf[BUFSIZ];
108
109 /*
110 * throw out the bad parameters
111 */
112 if ((str == NULL) || (*str == '\0')) {
113 tty_warn(1, "Empty replacement string");
114 return -1;
115 }
116
117 /*
118 * first character in the string specifies what the delimiter is for
119 * this expression.
120 */
121 for (pt1 = str+1; *pt1; pt1++) {
122 if (*pt1 == '\\') {
123 pt1++;
124 continue;
125 }
126 if (*pt1 == *str)
127 break;
128 }
129 if (*pt1 == 0) {
130 tty_warn(1, "Invalid replacement string %s", str);
131 return -1;
132 }
133
134 /*
135 * allocate space for the node that handles this replacement pattern
136 * and split out the regular expression and try to compile it
137 */
138 if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
139 tty_warn(1, "Unable to allocate memory for replacement string");
140 return -1;
141 }
142
143 *pt1 = '\0';
144 if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
145 regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
146 tty_warn(1, "%s while compiling regular expression %s", rebuf,
147 str);
148 (void)free((char *)rep);
149 return -1;
150 }
151
152 /*
153 * put the delimiter back in case we need an error message and
154 * locate the delimiter at the end of the replacement string
155 * we then point the node at the new substitution string
156 */
157 *pt1++ = *str;
158 for (pt2 = pt1; *pt2; pt2++) {
159 if (*pt2 == '\\') {
160 pt2++;
161 continue;
162 }
163 if (*pt2 == *str)
164 break;
165 }
166 if (*pt2 == 0) {
167 regfree(&(rep->rcmp));
168 (void)free((char *)rep);
169 tty_warn(1, "Invalid replacement string %s", str);
170 return -1;
171 }
172
173 *pt2 = '\0';
174
175 /* Make sure to dup replacement, who knows where it came from! */
176 if ((rep->nstr = strdup(pt1)) == NULL) {
177 regfree(&(rep->rcmp));
178 (void)free((char *)rep);
179 tty_warn(1, "Unable to allocate memory for replacement string");
180 return -1;
181 }
182
183 pt1 = pt2++;
184 rep->flgs = 0;
185
186 /*
187 * set the options if any
188 */
189 while (*pt2 != '\0') {
190 switch(*pt2) {
191 case 'g':
192 case 'G':
193 rep->flgs |= GLOB;
194 break;
195 case 'p':
196 case 'P':
197 rep->flgs |= PRNT;
198 break;
199 case 's':
200 case 'S':
201 rep->flgs |= SYML;
202 break;
203 default:
204 regfree(&(rep->rcmp));
205 (void)free((char *)rep);
206 *pt1 = *str;
207 tty_warn(1, "Invalid replacement string option %s",
208 str);
209 return -1;
210 }
211 ++pt2;
212 }
213
214 /*
215 * all done, link it in at the end
216 */
217 rep->fow = NULL;
218 if (rephead == NULL) {
219 reptail = rephead = rep;
220 return 0;
221 }
222 reptail->fow = rep;
223 reptail = rep;
224 return 0;
225 }
226
227 /*
228 * pat_add()
229 * add a pattern match to the pattern match list. Pattern matches are used
230 * to select which archive members are extracted. (They appear as
231 * arguments to pax in the list and read modes). If no patterns are
232 * supplied to pax, all members in the archive will be selected (and the
233 * pattern match list is empty).
234 *
235 * Return:
236 * 0 if the pattern was added to the list, -1 otherwise
237 */
238
239 int
240 pat_add(char *str, char *chdn, int flags)
241 {
242 PATTERN *pt;
243
244 /*
245 * throw out the junk
246 */
247 if ((str == NULL) || (*str == '\0')) {
248 tty_warn(1, "Empty pattern string");
249 return -1;
250 }
251
252 /*
253 * allocate space for the pattern and store the pattern. the pattern is
254 * part of argv so do not bother to copy it, just point at it. Add the
255 * node to the end of the pattern list
256 */
257 if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
258 tty_warn(1, "Unable to allocate memory for pattern string");
259 return -1;
260 }
261
262 pt->pstr = str;
263 pt->pend = NULL;
264 pt->plen = strlen(str);
265 pt->fow = NULL;
266 pt->flgs = flags;
267 pt->chdname = chdn;
268 if (pathead == NULL) {
269 pattail = pathead = pt;
270 return 0;
271 }
272 pattail->fow = pt;
273 pattail = pt;
274 return 0;
275 }
276
277 /*
278 * pat_chk()
279 * complain if any the user supplied pattern did not result in a match to
280 * a selected archive member.
281 */
282
283 void
284 pat_chk(void)
285 {
286 PATTERN *pt;
287 int wban = 0;
288
289 /*
290 * walk down the list checking the flags to make sure MTCH was set,
291 * if not complain
292 */
293 for (pt = pathead; pt != NULL; pt = pt->fow) {
294 if (pt->flgs & MTCH)
295 continue;
296 if (!wban) {
297 tty_warn(1, "WARNING! These patterns were not matched:");
298 ++wban;
299 }
300 (void)fprintf(stderr, "%s\n", pt->pstr);
301 }
302 }
303
304 /*
305 * pat_sel()
306 * the archive member which matches a pattern was selected. Mark the
307 * pattern as having selected an archive member. arcn->pat points at the
308 * pattern that was matched. arcn->pat is set in pat_match()
309 *
310 * NOTE: When the -c option is used, we are called when there was no match
311 * by pat_match() (that means we did match before the inverted sense of
312 * the logic). Now this seems really strange at first, but with -c we
313 * need to keep track of those patterns that cause an archive member to
314 * NOT be selected (it found an archive member with a specified pattern)
315 * Return:
316 * 0 if the pattern pointed at by arcn->pat was tagged as creating a
317 * match, -1 otherwise.
318 */
319
320 int
321 pat_sel(ARCHD *arcn)
322 {
323 PATTERN *pt;
324 PATTERN **ppt;
325 int len;
326
327 /*
328 * if no patterns just return
329 */
330 if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
331 return 0;
332
333 /*
334 * when we are NOT limited to a single match per pattern mark the
335 * pattern and return
336 */
337 if (!nflag) {
338 pt->flgs |= MTCH;
339 return 0;
340 }
341
342 /*
343 * we reach this point only when we allow a single selected match per
344 * pattern, if the pattern matches a directory and we do not have -d
345 * (dflag) we are done with this pattern. We may also be handed a file
346 * in the subtree of a directory. in that case when we are operating
347 * with -d, this pattern was already selected and we are done
348 */
349 if (pt->flgs & DIR_MTCH)
350 return 0;
351
352 if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
353 /*
354 * ok we matched a directory and we are allowing
355 * subtree matches but because of the -n only its children will
356 * match. This is tagged as a DIR_MTCH type.
357 * WATCH IT, the code assumes that pt->pend points
358 * into arcn->name and arcn->name has not been modified.
359 * If not we will have a big mess. Yup this is another kludge
360 */
361
362 /*
363 * if this was a prefix match, remove trailing part of path
364 * so we can copy it. Future matches will be exact prefix match
365 */
366 if (pt->pend != NULL)
367 *pt->pend = '\0';
368
369 if ((pt->pstr = strdup(arcn->name)) == NULL) {
370 tty_warn(1, "Pattern select out of memory");
371 if (pt->pend != NULL)
372 *pt->pend = '/';
373 pt->pend = NULL;
374 return -1;
375 }
376
377 /*
378 * put the trailing / back in the source string
379 */
380 if (pt->pend != NULL) {
381 *pt->pend = '/';
382 pt->pend = NULL;
383 }
384 pt->plen = strlen(pt->pstr);
385
386 /*
387 * strip off any trailing /, this should really never happen
388 */
389 len = pt->plen - 1;
390 if (*(pt->pstr + len) == '/') {
391 *(pt->pstr + len) = '\0';
392 pt->plen = len;
393 }
394 pt->flgs = DIR_MTCH | MTCH;
395 arcn->pat = pt;
396 return 0;
397 }
398
399 /*
400 * we are then done with this pattern, so we delete it from the list
401 * because it can never be used for another match.
402 * Seems kind of strange to do for a -c, but the pax spec is really
403 * vague on the interaction of -c, -n, and -d. We assume that when -c
404 * and the pattern rejects a member (i.e. it matched it) it is done.
405 * In effect we place the order of the flags as having -c last.
406 */
407 pt = pathead;
408 ppt = &pathead;
409 while ((pt != NULL) && (pt != arcn->pat)) {
410 ppt = &(pt->fow);
411 pt = pt->fow;
412 }
413
414 if (pt == NULL) {
415 /*
416 * should never happen....
417 */
418 tty_warn(1, "Pattern list inconsistent");
419 return -1;
420 }
421 *ppt = pt->fow;
422 (void)free((char *)pt);
423 arcn->pat = NULL;
424 return 0;
425 }
426
427 /*
428 * pat_match()
429 * see if this archive member matches any supplied pattern, if a match
430 * is found, arcn->pat is set to point at the potential pattern. Later if
431 * this archive member is "selected" we process and mark the pattern as
432 * one which matched a selected archive member (see pat_sel())
433 * Return:
434 * 0 if this archive member should be processed, 1 if it should be
435 * skipped and -1 if we are done with all patterns (and pax should quit
436 * looking for more members)
437 */
438
439 int
440 pat_match(ARCHD *arcn)
441 {
442 PATTERN *pt;
443
444 arcn->pat = NULL;
445
446 /*
447 * if there are no more patterns and we have -n (and not -c) we are
448 * done. otherwise with no patterns to match, matches all
449 */
450 if (pathead == NULL) {
451 if (nflag && !cflag)
452 return -1;
453 return 0;
454 }
455
456 /*
457 * have to search down the list one at a time looking for a match.
458 */
459 pt = pathead;
460 while (pt != NULL) {
461 /*
462 * check for a file name match unless we have DIR_MTCH set in
463 * this pattern then we want a prefix match
464 */
465 if (pt->flgs & DIR_MTCH) {
466 /*
467 * this pattern was matched before to a directory
468 * as we must have -n set for this (but not -d). We can
469 * only match CHILDREN of that directory so we must use
470 * an exact prefix match (no wildcards).
471 */
472 if ((arcn->name[pt->plen] == '/') &&
473 (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
474 break;
475 } else if (fn_match(pt->pstr, arcn->name, &pt->pend,
476 pt->flgs & NOGLOB_MTCH) == 0)
477 break;
478 pt = pt->fow;
479 }
480
481 /*
482 * return the result, remember that cflag (-c) inverts the sense of a
483 * match
484 */
485 if (pt == NULL)
486 return cflag ? 0 : 1;
487
488 /*
489 * we had a match, now when we invert the sense (-c) we reject this
490 * member. However we have to tag the pattern a being successful, (in a
491 * match, not in selecting an archive member) so we call pat_sel()
492 * here.
493 */
494 arcn->pat = pt;
495 if (!cflag)
496 return 0;
497
498 if (pat_sel(arcn) < 0)
499 return -1;
500 arcn->pat = NULL;
501 return 1;
502 }
503
504 /*
505 * fn_match()
506 * Return:
507 * 0 if this archive member should be processed, 1 if it should be
508 * skipped and -1 if we are done with all patterns (and pax should quit
509 * looking for more members)
510 * Note: *pend may be changed to show where the prefix ends.
511 */
512
513 static int
514 fn_match(char *pattern, char *string, char **pend, int noglob)
515 {
516 char c;
517 char test;
518
519 *pend = NULL;
520 for (;;) {
521 switch (c = *pattern++) {
522 case '\0':
523 /*
524 * Ok we found an exact match
525 */
526 if (*string == '\0')
527 return 0;
528
529 /*
530 * Check if it is a prefix match
531 */
532 if ((dflag == 1) || (*string != '/'))
533 return -1;
534
535 /*
536 * It is a prefix match, remember where the trailing
537 * / is located
538 */
539 *pend = string;
540 return 0;
541 case '?':
542 if (noglob)
543 goto regular;
544 if ((test = *string++) == '\0')
545 return (-1);
546 break;
547 case '*':
548 if (noglob)
549 goto regular;
550 c = *pattern;
551 /*
552 * Collapse multiple *'s.
553 */
554 while (c == '*')
555 c = *++pattern;
556
557 /*
558 * Optimized hack for pattern with a * at the end
559 */
560 if (c == '\0')
561 return (0);
562
563 /*
564 * General case, use recursion.
565 */
566 while ((test = *string) != '\0') {
567 if (!fn_match(pattern, string, pend, noglob))
568 return (0);
569 ++string;
570 }
571 return (-1);
572 case '[':
573 if (noglob)
574 goto regular;
575 /*
576 * range match
577 */
578 if (((test = *string++) == '\0') ||
579 ((pattern = range_match(pattern, test)) == NULL))
580 return (-1);
581 break;
582 case '\\':
583 default:
584 regular:
585 if (c != *string++)
586 return (-1);
587 break;
588 }
589 }
590 /* NOTREACHED */
591 }
592
593 static char *
594 range_match(char *pattern, int test)
595 {
596 char c;
597 char c2;
598 int negate;
599 int ok = 0;
600
601 if ((negate = (*pattern == '!')) != 0)
602 ++pattern;
603
604 while ((c = *pattern++) != ']') {
605 /*
606 * Illegal pattern
607 */
608 if (c == '\0')
609 return (NULL);
610
611 if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
612 (c2 != ']')) {
613 if ((c <= test) && (test <= c2))
614 ok = 1;
615 pattern += 2;
616 } else if (c == test)
617 ok = 1;
618 }
619 return (ok == negate ? NULL : pattern);
620 }
621
622 /*
623 * mod_name()
624 * modify a selected file name. first attempt to apply replacement string
625 * expressions, then apply interactive file rename. We apply replacement
626 * string expressions to both filenames and file links (if we didn't the
627 * links would point to the wrong place, and we could never be able to
628 * move an archive that has a file link in it). When we rename files
629 * interactively, we store that mapping (old name to user input name) so
630 * if we spot any file links to the old file name in the future, we will
631 * know exactly how to fix the file link.
632 * Return:
633 * 0 continue to process file, 1 skip this file, -1 pax is finished
634 */
635
636 int
637 mod_name(ARCHD *arcn, int flags)
638 {
639 int res = 0;
640
641 if (secure) {
642 if (checkdotdot(arcn->name)) {
643 tty_warn(0, "Ignoring file containing `..' (%s)",
644 arcn->name);
645 return 1;
646 }
647 #ifdef notdef
648 if (checkdotdot(arcn->ln_name)) {
649 tty_warn(0, "Ignoring link containing `..' (%s)",
650 arcn->ln_name);
651 return 1;
652 }
653 #endif
654 }
655
656 /*
657 * IMPORTANT: We have a problem. what do we do with symlinks?
658 * Modifying a hard link name makes sense, as we know the file it
659 * points at should have been seen already in the archive (and if it
660 * wasn't seen because of a read error or a bad archive, we lose
661 * anyway). But there are no such requirements for symlinks. On one
662 * hand the symlink that refers to a file in the archive will have to
663 * be modified to so it will still work at its new location in the
664 * file system. On the other hand a symlink that points elsewhere (and
665 * should continue to do so) should not be modified. There is clearly
666 * no perfect solution here. So we handle them like hardlinks. Clearly
667 * a replacement made by the interactive rename mapping is very likely
668 * to be correct since it applies to a single file and is an exact
669 * match. The regular expression replacements are a little harder to
670 * justify though. We claim that the symlink name is only likely
671 * to be replaced when it points within the file tree being moved and
672 * in that case it should be modified. what we really need to do is to
673 * call an oracle here. :)
674 */
675 if (rephead != NULL) {
676 flags |= (flags & RENM) ? PRNT : 0;
677 /*
678 * we have replacement strings, modify the name and the link
679 * name if any.
680 */
681 if ((res = rep_name(arcn->name, sizeof(arcn->name),
682 &(arcn->nlen), flags)) != 0)
683 return res;
684
685 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
686 (arcn->type == PAX_HRG)) &&
687 ((res = rep_name(arcn->ln_name,
688 sizeof(arcn->ln_name), &(arcn->ln_nlen),
689 flags | (arcn->type == PAX_SLK ? SYML : 0))) != 0))
690 return res;
691 }
692
693 if (iflag) {
694 /*
695 * perform interactive file rename, then map the link if any
696 */
697 if ((res = tty_rename(arcn)) != 0)
698 return res;
699 if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
700 (arcn->type == PAX_HRG))
701 sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name));
702 }
703
704 /*
705 * Strip off leading '/' if appropriate.
706 * Currently, this option is only set for the tar format.
707 */
708 if (rmleadslash && arcn->name[0] == '/') {
709 if (arcn->name[1] == '\0') {
710 arcn->name[0] = '.';
711 } else {
712 (void)memmove(arcn->name, &arcn->name[1],
713 strlen(arcn->name));
714 arcn->nlen--;
715 }
716 if (rmleadslash < 2) {
717 rmleadslash = 2;
718 tty_warn(0, "Removing leading / from absolute path names in the archive");
719 }
720 }
721 if (rmleadslash && arcn->ln_name[0] == '/' &&
722 (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) {
723 if (arcn->ln_name[1] == '\0') {
724 arcn->ln_name[0] = '.';
725 } else {
726 (void)memmove(arcn->ln_name, &arcn->ln_name[1],
727 strlen(arcn->ln_name));
728 arcn->ln_nlen--;
729 }
730 if (rmleadslash < 2) {
731 rmleadslash = 2;
732 tty_warn(0, "Removing leading / from absolute path names in the archive");
733 }
734 }
735
736 return res;
737 }
738
739 /*
740 * tty_rename()
741 * Prompt the user for a replacement file name. A "." keeps the old name,
742 * a empty line skips the file, and an EOF on reading the tty, will cause
743 * pax to stop processing and exit. Otherwise the file name input, replaces
744 * the old one.
745 * Return:
746 * 0 process this file, 1 skip this file, -1 we need to exit pax
747 */
748
749 static int
750 tty_rename(ARCHD *arcn)
751 {
752 char tmpname[PAXPATHLEN+2];
753 int res;
754
755 /*
756 * prompt user for the replacement name for a file, keep trying until
757 * we get some reasonable input. Archives may have more than one file
758 * on them with the same name (from updates etc). We print verbose info
759 * on the file so the user knows what is up.
760 */
761 tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
762
763 for (;;) {
764 ls_tty(arcn);
765 tty_prnt("Input new name, or a \".\" to keep the old name, ");
766 tty_prnt("or a \"return\" to skip this file.\n");
767 tty_prnt("Input > ");
768 if (tty_read(tmpname, sizeof(tmpname)) < 0)
769 return -1;
770 if (strcmp(tmpname, "..") == 0) {
771 tty_prnt("Try again, illegal file name: ..\n");
772 continue;
773 }
774 if (strlen(tmpname) > PAXPATHLEN) {
775 tty_prnt("Try again, file name too long\n");
776 continue;
777 }
778 break;
779 }
780
781 /*
782 * empty file name, skips this file. a "." leaves it alone
783 */
784 if (tmpname[0] == '\0') {
785 tty_prnt("Skipping file.\n");
786 return 1;
787 }
788 if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
789 tty_prnt("Processing continues, name unchanged.\n");
790 return 0;
791 }
792
793 /*
794 * ok the name changed. We may run into links that point at this
795 * file later. we have to remember where the user sent the file
796 * in order to repair any links.
797 */
798 tty_prnt("Processing continues, name changed to: %s\n", tmpname);
799 res = add_name(arcn->name, arcn->nlen, tmpname);
800 arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name));
801 if (res < 0)
802 return -1;
803 return 0;
804 }
805
806 /*
807 * set_dest()
808 * fix up the file name and the link name (if any) so this file will land
809 * in the destination directory (used during copy() -rw).
810 * Return:
811 * 0 if ok, -1 if failure (name too long)
812 */
813
814 int
815 set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
816 {
817 if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
818 return -1;
819
820 /*
821 * It is really hard to deal with symlinks here, we cannot be sure
822 * if the name they point was moved (or will be moved). It is best to
823 * leave them alone.
824 */
825 if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
826 return 0;
827
828 if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
829 return -1;
830 return 0;
831 }
832
833 /*
834 * fix_path
835 * concatenate dir_name and or_name and store the result in or_name (if
836 * it fits). This is one ugly function.
837 * Return:
838 * 0 if ok, -1 if the final name is too long
839 */
840
841 static int
842 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
843 {
844 char *src;
845 char *dest;
846 char *start;
847 int len;
848
849 /*
850 * we shift the or_name to the right enough to tack in the dir_name
851 * at the front. We make sure we have enough space for it all before
852 * we start. since dest always ends in a slash, we skip of or_name
853 * if it also starts with one.
854 */
855 start = or_name;
856 src = start + *or_len;
857 dest = src + dir_len;
858 if (*start == '/') {
859 ++start;
860 --dest;
861 }
862 if ((len = dest - or_name) > PAXPATHLEN) {
863 tty_warn(1, "File name %s/%s, too long", dir_name, start);
864 return -1;
865 }
866 *or_len = len;
867
868 /*
869 * enough space, shift
870 */
871 while (src >= start)
872 *dest-- = *src--;
873 src = dir_name + dir_len - 1;
874
875 /*
876 * splice in the destination directory name
877 */
878 while (src >= dir_name)
879 *dest-- = *src--;
880
881 *(or_name + len) = '\0';
882 return 0;
883 }
884
885 /*
886 * rep_name()
887 * walk down the list of replacement strings applying each one in order.
888 * when we find one with a successful substitution, we modify the name
889 * as specified. if required, we print the results. if the resulting name
890 * is empty, we will skip this archive member. We use the regexp(3)
891 * routines (regexp() ought to win a prize as having the most cryptic
892 * library function manual page).
893 * --Parameters--
894 * name is the file name we are going to apply the regular expressions to
895 * (and may be modified)
896 * namelen the size of the name buffer.
897 * nlen is the length of this name (and is modified to hold the length of
898 * the final string).
899 * flags contains various options to control behavior.
900 * Return:
901 * 0 if substitution was successful, 1 if we are to skip the file (the name
902 * ended up empty)
903 */
904
905 static int
906 rep_name(char *name, size_t namelen, int *nlen, int flags)
907 {
908 REPLACE *pt;
909 char *inpt;
910 char *outpt;
911 char *endpt;
912 char *rpt;
913 int found = 0;
914 int res;
915 regmatch_t pm[MAXSUBEXP];
916 char nname[PAXPATHLEN+1]; /* final result of all replacements */
917 char buf1[PAXPATHLEN+1]; /* where we work on the name */
918
919 /*
920 * copy the name into buf1, where we will work on it. We need to keep
921 * the orig string around so we can print out the result of the final
922 * replacement. We build up the final result in nname. inpt points at
923 * the string we apply the regular expression to. prnt is used to
924 * suppress printing when we handle replacements on the link field
925 * (the user already saw that substitution go by)
926 */
927 pt = rephead;
928 (void)strlcpy(buf1, name, sizeof(buf1));
929 inpt = buf1;
930 outpt = nname;
931 endpt = outpt + PAXPATHLEN;
932
933 /*
934 * try each replacement string in order
935 */
936 while (pt != NULL) {
937 do {
938 if ((flags & SYML) && (pt->flgs & SYML))
939 continue;
940 /*
941 * check for a successful substitution, if not go to
942 * the next pattern, or cleanup if we were global
943 */
944 if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
945 break;
946
947 /*
948 * ok we found one. We have three parts, the prefix
949 * which did not match, the section that did and the
950 * tail (that also did not match). Copy the prefix to
951 * the final output buffer (watching to make sure we
952 * do not create a string too long).
953 */
954 found = 1;
955 rpt = inpt + pm[0].rm_so;
956
957 while ((inpt < rpt) && (outpt < endpt))
958 *outpt++ = *inpt++;
959 if (outpt == endpt)
960 break;
961
962 /*
963 * for the second part (which matched the regular
964 * expression) apply the substitution using the
965 * replacement string and place it the prefix in the
966 * final output. If we have problems, skip it.
967 */
968 if ((res =
969 resub(&(pt->rcmp),pm,pt->nstr,inpt, outpt,endpt)
970 ) < 0) {
971 if (flags & PRNT)
972 tty_warn(1, "Replacement name error %s",
973 name);
974 return 1;
975 }
976 outpt += res;
977
978 /*
979 * we set up to look again starting at the first
980 * character in the tail (of the input string right
981 * after the last character matched by the regular
982 * expression (inpt always points at the first char in
983 * the string to process). If we are not doing a global
984 * substitution, we will use inpt to copy the tail to
985 * the final result. Make sure we do not overrun the
986 * output buffer
987 */
988 inpt += pm[0].rm_eo - pm[0].rm_so;
989
990 if ((outpt == endpt) || (*inpt == '\0'))
991 break;
992
993 /*
994 * if the user wants global we keep trying to
995 * substitute until it fails, then we are done.
996 */
997 } while (pt->flgs & GLOB);
998
999 if (found)
1000 break;
1001
1002 /*
1003 * a successful substitution did NOT occur, try the next one
1004 */
1005 pt = pt->fow;
1006 }
1007
1008 if (found) {
1009 /*
1010 * we had a substitution, copy the last tail piece (if there is
1011 * room) to the final result
1012 */
1013 while ((outpt < endpt) && (*inpt != '\0'))
1014 *outpt++ = *inpt++;
1015
1016 *outpt = '\0';
1017 if ((outpt == endpt) && (*inpt != '\0')) {
1018 if (flags & PRNT)
1019 tty_warn(1,"Replacement name too long %s >> %s",
1020 name, nname);
1021 return 1;
1022 }
1023
1024 /*
1025 * inform the user of the result if wanted
1026 */
1027 if ((flags & PRNT) && (pt->flgs & PRNT)) {
1028 if (*nname == '\0')
1029 (void)fprintf(stderr,"%s >> <empty string>\n",
1030 name);
1031 else
1032 (void)fprintf(stderr,"%s >> %s\n", name, nname);
1033 }
1034
1035 /*
1036 * if empty inform the caller this file is to be skipped
1037 * otherwise copy the new name over the orig name and return
1038 */
1039 if (*nname == '\0')
1040 return 1;
1041 if (flags & RENM)
1042 *nlen = strlcpy(name, nname, namelen);
1043 }
1044 return 0;
1045 }
1046
1047
1048 /*
1049 * checkdotdot()
1050 * Return true if a component of the name contains a reference to ".."
1051 */
1052 static int
1053 checkdotdot(const char *name)
1054 {
1055 const char *p;
1056 /* 1. "..{[/],}" */
1057 if (name[0] == '.' && name[1] == '.' &&
1058 (name[2] == '/' || name[2] == '\0'))
1059 return 1;
1060
1061 /* 2. "*[/]..[/]*" */
1062 if (strstr(name, "/../") != NULL)
1063 return 1;
1064
1065 /* 3. "*[/].." */
1066 for (p = name; *p; p++)
1067 continue;
1068 if (p - name < 3)
1069 return 0;
1070 if (p[-1] == '.' && p[-2] == '.' && p[-3] == '/')
1071 return 1;
1072
1073 return 0;
1074 }
1075
1076
1077 /*
1078 * resub()
1079 * apply the replacement to the matched expression. expand out the old
1080 * style ed(1) subexpression expansion.
1081 * Return:
1082 * -1 if error, or the number of characters added to the destination.
1083 */
1084
1085 static int
1086 resub(regex_t *rp, regmatch_t *pm, char *src, char *txt, char *dest,
1087 char *destend)
1088 {
1089 char *spt;
1090 char *dpt;
1091 char c;
1092 regmatch_t *pmpt;
1093 int len;
1094 int subexcnt;
1095
1096 spt = src;
1097 dpt = dest;
1098 subexcnt = rp->re_nsub;
1099 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1100 /*
1101 * see if we just have an ordinary replacement character
1102 * or we refer to a subexpression.
1103 */
1104 if (c == '&') {
1105 pmpt = pm;
1106 } else if ((c == '\\') && (*spt >= '1') && (*spt <= '9')) {
1107 /*
1108 * make sure there is a subexpression as specified
1109 */
1110 if ((len = *spt++ - '0') > subexcnt)
1111 return -1;
1112 pmpt = pm + len;
1113 } else {
1114 /*
1115 * Ordinary character, just copy it
1116 */
1117 if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1118 c = *spt++;
1119 *dpt++ = c;
1120 continue;
1121 }
1122
1123 /*
1124 * continue if the subexpression is bogus
1125 */
1126 if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1127 ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1128 continue;
1129
1130 /*
1131 * copy the subexpression to the destination.
1132 * fail if we run out of space or the match string is damaged
1133 */
1134 if (len > (destend - dpt))
1135 return -1;
1136 strncpy(dpt, txt + pmpt->rm_so, len);
1137 dpt += len;
1138 }
1139 return dpt - dest;
1140 }
1141