pat_rep.c revision 1.26 1 /* $NetBSD: pat_rep.c,v 1.26 2007/04/29 20:23:34 msaitoh Exp $ */
2
3 /*-
4 * Copyright (c) 1992 Keith Muller.
5 * Copyright (c) 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Keith Muller of the University of California, San Diego.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #if HAVE_NBTOOL_CONFIG_H
37 #include "nbtool_config.h"
38 #endif
39
40 #include <sys/cdefs.h>
41 #if !defined(lint)
42 #if 0
43 static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94";
44 #else
45 __RCSID("$NetBSD: pat_rep.c,v 1.26 2007/04/29 20:23:34 msaitoh Exp $");
46 #endif
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/time.h>
51 #include <sys/stat.h>
52 #include <sys/param.h>
53 #include <stdio.h>
54 #include <ctype.h>
55 #include <string.h>
56 #include <unistd.h>
57 #include <stdlib.h>
58 #ifdef NET2_REGEX
59 #include <regexp.h>
60 #else
61 #include <regex.h>
62 #endif
63 #include "pax.h"
64 #include "pat_rep.h"
65 #include "extern.h"
66
67 /*
68 * routines to handle pattern matching, name modification (regular expression
69 * substitution and interactive renames), and destination name modification for
70 * copy (-rw). Both file name and link names are adjusted as required in these
71 * routines.
72 */
73
74 #define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */
75 static PATTERN *pathead = NULL; /* file pattern match list head */
76 static PATTERN *pattail = NULL; /* file pattern match list tail */
77 static REPLACE *rephead = NULL; /* replacement string list head */
78 static REPLACE *reptail = NULL; /* replacement string list tail */
79
80 static int rep_name(char *, size_t, int *, int);
81 static int tty_rename(ARCHD *);
82 static int fix_path(char *, int *, char *, int);
83 static int fn_match(char *, char *, char **);
84 static char * range_match(char *, int);
85 static int checkdotdot(const char *);
86 #ifdef NET2_REGEX
87 static int resub(regexp *, char *, char *, char *);
88 #else
89 static int resub(regex_t *, regmatch_t *, char *, char *, char *, char *);
90 #endif
91
92 /*
93 * rep_add()
94 * parses the -s replacement string; compiles the regular expression
95 * and stores the compiled value and its replacement string together in
96 * replacement string list. Input to this function is of the form:
97 * /old/new/pg
98 * The first char in the string specifies the delimiter used by this
99 * replacement string. "Old" is a regular expression in "ed" format which
100 * is compiled by regcomp() and is applied to filenames. "new" is the
101 * substitution string; p and g are options flags for printing and global
102 * replacement (over the single filename)
103 * Return:
104 * 0 if a proper replacement string and regular expression was added to
105 * the list of replacement patterns; -1 otherwise.
106 */
107
108 int
109 rep_add(char *str)
110 {
111 char *pt1;
112 char *pt2;
113 REPLACE *rep;
114 #ifdef NET2_REGEX
115 static const char rebuf[] = "Error";
116 #else
117 int res;
118 char rebuf[BUFSIZ];
119 #endif
120
121 /*
122 * throw out the bad parameters
123 */
124 if ((str == NULL) || (*str == '\0')) {
125 tty_warn(1, "Empty replacement string");
126 return -1;
127 }
128
129 /*
130 * first character in the string specifies what the delimiter is for
131 * this expression.
132 */
133 for (pt1 = str+1; *pt1; pt1++) {
134 if (*pt1 == '\\') {
135 pt1++;
136 continue;
137 }
138 if (*pt1 == *str)
139 break;
140 }
141 if (*pt1 == 0) {
142 tty_warn(1, "Invalid replacement string %s", str);
143 return -1;
144 }
145
146 /*
147 * allocate space for the node that handles this replacement pattern
148 * and split out the regular expression and try to compile it
149 */
150 if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
151 tty_warn(1, "Unable to allocate memory for replacement string");
152 return -1;
153 }
154
155 *pt1 = '\0';
156 #ifdef NET2_REGEX
157 if ((rep->rcmp = regcomp(str+1)) == NULL) {
158 #else
159 if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
160 regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
161 #endif
162 tty_warn(1, "%s while compiling regular expression %s", rebuf,
163 str);
164 (void)free((char *)rep);
165 return -1;
166 }
167
168 /*
169 * put the delimiter back in case we need an error message and
170 * locate the delimiter at the end of the replacement string
171 * we then point the node at the new substitution string
172 */
173 *pt1++ = *str;
174 for (pt2 = pt1; *pt2; pt2++) {
175 if (*pt2 == '\\') {
176 pt2++;
177 continue;
178 }
179 if (*pt2 == *str)
180 break;
181 }
182 if (*pt2 == 0) {
183 #ifdef NET2_REGEX
184 (void)free((char *)rep->rcmp);
185 #else
186 regfree(&(rep->rcmp));
187 #endif
188 (void)free((char *)rep);
189 tty_warn(1, "Invalid replacement string %s", str);
190 return -1;
191 }
192
193 *pt2 = '\0';
194
195 /* Make sure to dup replacement, who knows where it came from! */
196 if ((rep->nstr = strdup(pt1)) == NULL) {
197 #ifdef NET2_REGEX
198 (void)free((char *)rep->rcmp);
199 #else
200 regfree(&(rep->rcmp));
201 #endif
202 (void)free((char *)rep);
203 tty_warn(1, "Unable to allocate memory for replacement string");
204 return -1;
205 }
206
207 pt1 = pt2++;
208 rep->flgs = 0;
209
210 /*
211 * set the options if any
212 */
213 while (*pt2 != '\0') {
214 switch(*pt2) {
215 case 'g':
216 case 'G':
217 rep->flgs |= GLOB;
218 break;
219 case 'p':
220 case 'P':
221 rep->flgs |= PRNT;
222 break;
223 default:
224 #ifdef NET2_REGEX
225 (void)free((char *)rep->rcmp);
226 #else
227 regfree(&(rep->rcmp));
228 #endif
229 (void)free((char *)rep);
230 *pt1 = *str;
231 tty_warn(1, "Invalid replacement string option %s",
232 str);
233 return -1;
234 }
235 ++pt2;
236 }
237
238 /*
239 * all done, link it in at the end
240 */
241 rep->fow = NULL;
242 if (rephead == NULL) {
243 reptail = rephead = rep;
244 return 0;
245 }
246 reptail->fow = rep;
247 reptail = rep;
248 return 0;
249 }
250
251 /*
252 * pat_add()
253 * add a pattern match to the pattern match list. Pattern matches are used
254 * to select which archive members are extracted. (They appear as
255 * arguments to pax in the list and read modes). If no patterns are
256 * supplied to pax, all members in the archive will be selected (and the
257 * pattern match list is empty).
258 *
259 * Return:
260 * 0 if the pattern was added to the list, -1 otherwise
261 */
262
263 int
264 pat_add(char *str, char *chdn)
265 {
266 PATTERN *pt;
267
268 /*
269 * throw out the junk
270 */
271 if ((str == NULL) || (*str == '\0')) {
272 tty_warn(1, "Empty pattern string");
273 return -1;
274 }
275
276 /*
277 * allocate space for the pattern and store the pattern. the pattern is
278 * part of argv so do not bother to copy it, just point at it. Add the
279 * node to the end of the pattern list
280 */
281 if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
282 tty_warn(1, "Unable to allocate memory for pattern string");
283 return -1;
284 }
285
286 pt->pstr = str;
287 pt->pend = NULL;
288 pt->plen = strlen(str);
289 pt->fow = NULL;
290 pt->flgs = 0;
291 pt->chdname = chdn;
292 if (pathead == NULL) {
293 pattail = pathead = pt;
294 return 0;
295 }
296 pattail->fow = pt;
297 pattail = pt;
298 return 0;
299 }
300
301 /*
302 * pat_chk()
303 * complain if any the user supplied pattern did not result in a match to
304 * a selected archive member.
305 */
306
307 void
308 pat_chk(void)
309 {
310 PATTERN *pt;
311 int wban = 0;
312
313 /*
314 * walk down the list checking the flags to make sure MTCH was set,
315 * if not complain
316 */
317 for (pt = pathead; pt != NULL; pt = pt->fow) {
318 if (pt->flgs & MTCH)
319 continue;
320 if (!wban) {
321 tty_warn(1, "WARNING! These patterns were not matched:");
322 ++wban;
323 }
324 (void)fprintf(stderr, "%s\n", pt->pstr);
325 }
326 }
327
328 /*
329 * pat_sel()
330 * the archive member which matches a pattern was selected. Mark the
331 * pattern as having selected an archive member. arcn->pat points at the
332 * pattern that was matched. arcn->pat is set in pat_match()
333 *
334 * NOTE: When the -c option is used, we are called when there was no match
335 * by pat_match() (that means we did match before the inverted sense of
336 * the logic). Now this seems really strange at first, but with -c we
337 * need to keep track of those patterns that cause an archive member to
338 * NOT be selected (it found an archive member with a specified pattern)
339 * Return:
340 * 0 if the pattern pointed at by arcn->pat was tagged as creating a
341 * match, -1 otherwise.
342 */
343
344 int
345 pat_sel(ARCHD *arcn)
346 {
347 PATTERN *pt;
348 PATTERN **ppt;
349 int len;
350
351 /*
352 * if no patterns just return
353 */
354 if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
355 return 0;
356
357 /*
358 * when we are NOT limited to a single match per pattern mark the
359 * pattern and return
360 */
361 if (!nflag) {
362 pt->flgs |= MTCH;
363 return 0;
364 }
365
366 /*
367 * we reach this point only when we allow a single selected match per
368 * pattern, if the pattern matches a directory and we do not have -d
369 * (dflag) we are done with this pattern. We may also be handed a file
370 * in the subtree of a directory. in that case when we are operating
371 * with -d, this pattern was already selected and we are done
372 */
373 if (pt->flgs & DIR_MTCH)
374 return 0;
375
376 if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
377 /*
378 * ok we matched a directory and we are allowing
379 * subtree matches but because of the -n only its children will
380 * match. This is tagged as a DIR_MTCH type.
381 * WATCH IT, the code assumes that pt->pend points
382 * into arcn->name and arcn->name has not been modified.
383 * If not we will have a big mess. Yup this is another kludge
384 */
385
386 /*
387 * if this was a prefix match, remove trailing part of path
388 * so we can copy it. Future matches will be exact prefix match
389 */
390 if (pt->pend != NULL)
391 *pt->pend = '\0';
392
393 if ((pt->pstr = strdup(arcn->name)) == NULL) {
394 tty_warn(1, "Pattern select out of memory");
395 if (pt->pend != NULL)
396 *pt->pend = '/';
397 pt->pend = NULL;
398 return -1;
399 }
400
401 /*
402 * put the trailing / back in the source string
403 */
404 if (pt->pend != NULL) {
405 *pt->pend = '/';
406 pt->pend = NULL;
407 }
408 pt->plen = strlen(pt->pstr);
409
410 /*
411 * strip off any trailing /, this should really never happen
412 */
413 len = pt->plen - 1;
414 if (*(pt->pstr + len) == '/') {
415 *(pt->pstr + len) = '\0';
416 pt->plen = len;
417 }
418 pt->flgs = DIR_MTCH | MTCH;
419 arcn->pat = pt;
420 return 0;
421 }
422
423 /*
424 * we are then done with this pattern, so we delete it from the list
425 * because it can never be used for another match.
426 * Seems kind of strange to do for a -c, but the pax spec is really
427 * vague on the interaction of -c, -n, and -d. We assume that when -c
428 * and the pattern rejects a member (i.e. it matched it) it is done.
429 * In effect we place the order of the flags as having -c last.
430 */
431 pt = pathead;
432 ppt = &pathead;
433 while ((pt != NULL) && (pt != arcn->pat)) {
434 ppt = &(pt->fow);
435 pt = pt->fow;
436 }
437
438 if (pt == NULL) {
439 /*
440 * should never happen....
441 */
442 tty_warn(1, "Pattern list inconsistent");
443 return -1;
444 }
445 *ppt = pt->fow;
446 (void)free((char *)pt);
447 arcn->pat = NULL;
448 return 0;
449 }
450
451 /*
452 * pat_match()
453 * see if this archive member matches any supplied pattern, if a match
454 * is found, arcn->pat is set to point at the potential pattern. Later if
455 * this archive member is "selected" we process and mark the pattern as
456 * one which matched a selected archive member (see pat_sel())
457 * Return:
458 * 0 if this archive member should be processed, 1 if it should be
459 * skipped and -1 if we are done with all patterns (and pax should quit
460 * looking for more members)
461 */
462
463 int
464 pat_match(ARCHD *arcn)
465 {
466 PATTERN *pt;
467
468 arcn->pat = NULL;
469
470 /*
471 * if there are no more patterns and we have -n (and not -c) we are
472 * done. otherwise with no patterns to match, matches all
473 */
474 if (pathead == NULL) {
475 if (nflag && !cflag)
476 return -1;
477 return 0;
478 }
479
480 /*
481 * have to search down the list one at a time looking for a match.
482 */
483 pt = pathead;
484 while (pt != NULL) {
485 /*
486 * check for a file name match unless we have DIR_MTCH set in
487 * this pattern then we want a prefix match
488 */
489 if (pt->flgs & DIR_MTCH) {
490 /*
491 * this pattern was matched before to a directory
492 * as we must have -n set for this (but not -d). We can
493 * only match CHILDREN of that directory so we must use
494 * an exact prefix match (no wildcards).
495 */
496 if ((arcn->name[pt->plen] == '/') &&
497 (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
498 break;
499 } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
500 break;
501 pt = pt->fow;
502 }
503
504 /*
505 * return the result, remember that cflag (-c) inverts the sense of a
506 * match
507 */
508 if (pt == NULL)
509 return cflag ? 0 : 1;
510
511 /*
512 * we had a match, now when we invert the sense (-c) we reject this
513 * member. However we have to tag the pattern a being successful, (in a
514 * match, not in selecting an archive member) so we call pat_sel()
515 * here.
516 */
517 arcn->pat = pt;
518 if (!cflag)
519 return 0;
520
521 if (pat_sel(arcn) < 0)
522 return -1;
523 arcn->pat = NULL;
524 return 1;
525 }
526
527 /*
528 * fn_match()
529 * Return:
530 * 0 if this archive member should be processed, 1 if it should be
531 * skipped and -1 if we are done with all patterns (and pax should quit
532 * looking for more members)
533 * Note: *pend may be changed to show where the prefix ends.
534 */
535
536 static int
537 fn_match(char *pattern, char *string, char **pend)
538 {
539 char c;
540 char test;
541
542 *pend = NULL;
543 for (;;) {
544 switch (c = *pattern++) {
545 case '\0':
546 /*
547 * Ok we found an exact match
548 */
549 if (*string == '\0')
550 return 0;
551
552 /*
553 * Check if it is a prefix match
554 */
555 if ((dflag == 1) || (*string != '/'))
556 return -1;
557
558 /*
559 * It is a prefix match, remember where the trailing
560 * / is located
561 */
562 *pend = string;
563 return 0;
564 case '?':
565 if ((test = *string++) == '\0')
566 return (-1);
567 break;
568 case '*':
569 c = *pattern;
570 /*
571 * Collapse multiple *'s.
572 */
573 while (c == '*')
574 c = *++pattern;
575
576 /*
577 * Optimized hack for pattern with a * at the end
578 */
579 if (c == '\0')
580 return (0);
581
582 /*
583 * General case, use recursion.
584 */
585 while ((test = *string) != '\0') {
586 if (!fn_match(pattern, string, pend))
587 return (0);
588 ++string;
589 }
590 return (-1);
591 case '[':
592 /*
593 * range match
594 */
595 if (((test = *string++) == '\0') ||
596 ((pattern = range_match(pattern, test)) == NULL))
597 return (-1);
598 break;
599 case '\\':
600 default:
601 if (c != *string++)
602 return (-1);
603 break;
604 }
605 }
606 /* NOTREACHED */
607 }
608
609 static char *
610 range_match(char *pattern, int test)
611 {
612 char c;
613 char c2;
614 int negate;
615 int ok = 0;
616
617 if ((negate = (*pattern == '!')) != 0)
618 ++pattern;
619
620 while ((c = *pattern++) != ']') {
621 /*
622 * Illegal pattern
623 */
624 if (c == '\0')
625 return (NULL);
626
627 if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
628 (c2 != ']')) {
629 if ((c <= test) && (test <= c2))
630 ok = 1;
631 pattern += 2;
632 } else if (c == test)
633 ok = 1;
634 }
635 return (ok == negate ? NULL : pattern);
636 }
637
638 /*
639 * mod_name()
640 * modify a selected file name. first attempt to apply replacement string
641 * expressions, then apply interactive file rename. We apply replacement
642 * string expressions to both filenames and file links (if we didn't the
643 * links would point to the wrong place, and we could never be able to
644 * move an archive that has a file link in it). When we rename files
645 * interactively, we store that mapping (old name to user input name) so
646 * if we spot any file links to the old file name in the future, we will
647 * know exactly how to fix the file link.
648 * Return:
649 * 0 continue to process file, 1 skip this file, -1 pax is finished
650 */
651
652 int
653 mod_name(ARCHD *arcn)
654 {
655 int res = 0;
656
657 if (secure) {
658 if (checkdotdot(arcn->name)) {
659 tty_warn(0, "Ignoring file containing `..' (%s)",
660 arcn->name);
661 return 1;
662 }
663 #ifdef notdef
664 if (checkdotdot(arcn->ln_name)) {
665 tty_warn(0, "Ignoring link containing `..' (%s)",
666 arcn->ln_name);
667 return 1;
668 }
669 #endif
670 }
671
672 /*
673 * IMPORTANT: We have a problem. what do we do with symlinks?
674 * Modifying a hard link name makes sense, as we know the file it
675 * points at should have been seen already in the archive (and if it
676 * wasn't seen because of a read error or a bad archive, we lose
677 * anyway). But there are no such requirements for symlinks. On one
678 * hand the symlink that refers to a file in the archive will have to
679 * be modified to so it will still work at its new location in the
680 * file system. On the other hand a symlink that points elsewhere (and
681 * should continue to do so) should not be modified. There is clearly
682 * no perfect solution here. So we handle them like hardlinks. Clearly
683 * a replacement made by the interactive rename mapping is very likely
684 * to be correct since it applies to a single file and is an exact
685 * match. The regular expression replacements are a little harder to
686 * justify though. We claim that the symlink name is only likely
687 * to be replaced when it points within the file tree being moved and
688 * in that case it should be modified. what we really need to do is to
689 * call an oracle here. :)
690 */
691 if (rephead != NULL) {
692 /*
693 * we have replacement strings, modify the name and the link
694 * name if any.
695 */
696 if ((res = rep_name(arcn->name, sizeof(arcn->name),
697 &(arcn->nlen), 1)) != 0)
698 return res;
699
700 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
701 (arcn->type == PAX_HRG)) &&
702 ((res = rep_name(arcn->ln_name, sizeof(arcn->ln_name),
703 &(arcn->ln_nlen), 0)) != 0))
704 return res;
705 }
706
707 if (iflag) {
708 /*
709 * perform interactive file rename, then map the link if any
710 */
711 if ((res = tty_rename(arcn)) != 0)
712 return res;
713 if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
714 (arcn->type == PAX_HRG))
715 sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name));
716 }
717
718 /*
719 * Strip off leading '/' if appropriate.
720 * Currently, this option is only set for the tar format.
721 */
722 if (rmleadslash && arcn->name[0] == '/') {
723 if (arcn->name[1] == '\0') {
724 arcn->name[0] = '.';
725 } else {
726 (void)memmove(arcn->name, &arcn->name[1],
727 strlen(arcn->name));
728 arcn->nlen--;
729 }
730 if (rmleadslash < 2) {
731 rmleadslash = 2;
732 tty_warn(0, "Removing leading / from absolute path names in the archive");
733 }
734 }
735 if (rmleadslash && arcn->ln_name[0] == '/' &&
736 (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) {
737 if (arcn->ln_name[1] == '\0') {
738 arcn->ln_name[0] = '.';
739 } else {
740 (void)memmove(arcn->ln_name, &arcn->ln_name[1],
741 strlen(arcn->ln_name));
742 arcn->ln_nlen--;
743 }
744 if (rmleadslash < 2) {
745 rmleadslash = 2;
746 tty_warn(0, "Removing leading / from absolute path names in the archive");
747 }
748 }
749
750 return res;
751 }
752
753 /*
754 * tty_rename()
755 * Prompt the user for a replacement file name. A "." keeps the old name,
756 * a empty line skips the file, and an EOF on reading the tty, will cause
757 * pax to stop processing and exit. Otherwise the file name input, replaces
758 * the old one.
759 * Return:
760 * 0 process this file, 1 skip this file, -1 we need to exit pax
761 */
762
763 static int
764 tty_rename(ARCHD *arcn)
765 {
766 char tmpname[PAXPATHLEN+2];
767 int res;
768
769 /*
770 * prompt user for the replacement name for a file, keep trying until
771 * we get some reasonable input. Archives may have more than one file
772 * on them with the same name (from updates etc). We print verbose info
773 * on the file so the user knows what is up.
774 */
775 tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
776
777 for (;;) {
778 ls_tty(arcn);
779 tty_prnt("Input new name, or a \".\" to keep the old name, ");
780 tty_prnt("or a \"return\" to skip this file.\n");
781 tty_prnt("Input > ");
782 if (tty_read(tmpname, sizeof(tmpname)) < 0)
783 return -1;
784 if (strcmp(tmpname, "..") == 0) {
785 tty_prnt("Try again, illegal file name: ..\n");
786 continue;
787 }
788 if (strlen(tmpname) > PAXPATHLEN) {
789 tty_prnt("Try again, file name too long\n");
790 continue;
791 }
792 break;
793 }
794
795 /*
796 * empty file name, skips this file. a "." leaves it alone
797 */
798 if (tmpname[0] == '\0') {
799 tty_prnt("Skipping file.\n");
800 return 1;
801 }
802 if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
803 tty_prnt("Processing continues, name unchanged.\n");
804 return 0;
805 }
806
807 /*
808 * ok the name changed. We may run into links that point at this
809 * file later. we have to remember where the user sent the file
810 * in order to repair any links.
811 */
812 tty_prnt("Processing continues, name changed to: %s\n", tmpname);
813 res = add_name(arcn->name, arcn->nlen, tmpname);
814 arcn->nlen = strlcpy(arcn->name, tmpname, sizeof(arcn->name));
815 if (res < 0)
816 return -1;
817 return 0;
818 }
819
820 /*
821 * set_dest()
822 * fix up the file name and the link name (if any) so this file will land
823 * in the destination directory (used during copy() -rw).
824 * Return:
825 * 0 if ok, -1 if failure (name too long)
826 */
827
828 int
829 set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
830 {
831 if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
832 return -1;
833
834 /*
835 * It is really hard to deal with symlinks here, we cannot be sure
836 * if the name they point was moved (or will be moved). It is best to
837 * leave them alone.
838 */
839 if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
840 return 0;
841
842 if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
843 return -1;
844 return 0;
845 }
846
847 /*
848 * fix_path
849 * concatenate dir_name and or_name and store the result in or_name (if
850 * it fits). This is one ugly function.
851 * Return:
852 * 0 if ok, -1 if the final name is too long
853 */
854
855 static int
856 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
857 {
858 char *src;
859 char *dest;
860 char *start;
861 int len;
862
863 /*
864 * we shift the or_name to the right enough to tack in the dir_name
865 * at the front. We make sure we have enough space for it all before
866 * we start. since dest always ends in a slash, we skip of or_name
867 * if it also starts with one.
868 */
869 start = or_name;
870 src = start + *or_len;
871 dest = src + dir_len;
872 if (*start == '/') {
873 ++start;
874 --dest;
875 }
876 if ((len = dest - or_name) > PAXPATHLEN) {
877 tty_warn(1, "File name %s/%s, too long", dir_name, start);
878 return -1;
879 }
880 *or_len = len;
881
882 /*
883 * enough space, shift
884 */
885 while (src >= start)
886 *dest-- = *src--;
887 src = dir_name + dir_len - 1;
888
889 /*
890 * splice in the destination directory name
891 */
892 while (src >= dir_name)
893 *dest-- = *src--;
894
895 *(or_name + len) = '\0';
896 return 0;
897 }
898
899 /*
900 * rep_name()
901 * walk down the list of replacement strings applying each one in order.
902 * when we find one with a successful substitution, we modify the name
903 * as specified. if required, we print the results. if the resulting name
904 * is empty, we will skip this archive member. We use the regexp(3)
905 * routines (regexp() ought to win a prize as having the most cryptic
906 * library function manual page).
907 * --Parameters--
908 * name is the file name we are going to apply the regular expressions to
909 * (and may be modified)
910 * namelen the size of the name buffer.
911 * nlen is the length of this name (and is modified to hold the length of
912 * the final string).
913 * prnt is a flag that says whether to print the final result.
914 * Return:
915 * 0 if substitution was successful, 1 if we are to skip the file (the name
916 * ended up empty)
917 */
918
919 static int
920 rep_name(char *name, size_t namelen, int *nlen, int prnt)
921 {
922 REPLACE *pt;
923 char *inpt;
924 char *outpt;
925 char *endpt;
926 char *rpt;
927 int found = 0;
928 int res;
929 #ifndef NET2_REGEX
930 regmatch_t pm[MAXSUBEXP];
931 #endif
932 char nname[PAXPATHLEN+1]; /* final result of all replacements */
933 char buf1[PAXPATHLEN+1]; /* where we work on the name */
934
935 /*
936 * copy the name into buf1, where we will work on it. We need to keep
937 * the orig string around so we can print out the result of the final
938 * replacement. We build up the final result in nname. inpt points at
939 * the string we apply the regular expression to. prnt is used to
940 * suppress printing when we handle replacements on the link field
941 * (the user already saw that substitution go by)
942 */
943 pt = rephead;
944 (void)strcpy(buf1, name);
945 inpt = buf1;
946 outpt = nname;
947 endpt = outpt + PAXPATHLEN;
948
949 /*
950 * try each replacement string in order
951 */
952 while (pt != NULL) {
953 do {
954 /*
955 * check for a successful substitution, if not go to
956 * the next pattern, or cleanup if we were global
957 */
958 #ifdef NET2_REGEX
959 if (regexec(pt->rcmp, inpt) == 0)
960 #else
961 if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
962 #endif
963 break;
964
965 /*
966 * ok we found one. We have three parts, the prefix
967 * which did not match, the section that did and the
968 * tail (that also did not match). Copy the prefix to
969 * the final output buffer (watching to make sure we
970 * do not create a string too long).
971 */
972 found = 1;
973 #ifdef NET2_REGEX
974 rpt = pt->rcmp->startp[0];
975 #else
976 rpt = inpt + pm[0].rm_so;
977 #endif
978
979 while ((inpt < rpt) && (outpt < endpt))
980 *outpt++ = *inpt++;
981 if (outpt == endpt)
982 break;
983
984 /*
985 * for the second part (which matched the regular
986 * expression) apply the substitution using the
987 * replacement string and place it the prefix in the
988 * final output. If we have problems, skip it.
989 */
990 if ((res =
991 #ifdef NET2_REGEX
992 resub(pt->rcmp,pt->nstr,outpt,endpt)
993 #else
994 resub(&(pt->rcmp),pm,pt->nstr,inpt, outpt,endpt)
995 #endif
996 ) < 0) {
997 if (prnt)
998 tty_warn(1, "Replacement name error %s",
999 name);
1000 return 1;
1001 }
1002 outpt += res;
1003
1004 /*
1005 * we set up to look again starting at the first
1006 * character in the tail (of the input string right
1007 * after the last character matched by the regular
1008 * expression (inpt always points at the first char in
1009 * the string to process). If we are not doing a global
1010 * substitution, we will use inpt to copy the tail to
1011 * the final result. Make sure we do not overrun the
1012 * output buffer
1013 */
1014 #ifdef NET2_REGEX
1015 inpt = pt->rcmp->endp[0];
1016 #else
1017 inpt += pm[0].rm_eo - pm[0].rm_so;
1018 #endif
1019
1020 if ((outpt == endpt) || (*inpt == '\0'))
1021 break;
1022
1023 /*
1024 * if the user wants global we keep trying to
1025 * substitute until it fails, then we are done.
1026 */
1027 } while (pt->flgs & GLOB);
1028
1029 if (found)
1030 break;
1031
1032 /*
1033 * a successful substitution did NOT occur, try the next one
1034 */
1035 pt = pt->fow;
1036 }
1037
1038 if (found) {
1039 /*
1040 * we had a substitution, copy the last tail piece (if there is
1041 * room) to the final result
1042 */
1043 while ((outpt < endpt) && (*inpt != '\0'))
1044 *outpt++ = *inpt++;
1045
1046 *outpt = '\0';
1047 if ((outpt == endpt) && (*inpt != '\0')) {
1048 if (prnt)
1049 tty_warn(1,"Replacement name too long %s >> %s",
1050 name, nname);
1051 return 1;
1052 }
1053
1054 /*
1055 * inform the user of the result if wanted
1056 */
1057 if (prnt && (pt->flgs & PRNT)) {
1058 if (*nname == '\0')
1059 (void)fprintf(stderr,"%s >> <empty string>\n",
1060 name);
1061 else
1062 (void)fprintf(stderr,"%s >> %s\n", name, nname);
1063 }
1064
1065 /*
1066 * if empty inform the caller this file is to be skipped
1067 * otherwise copy the new name over the orig name and return
1068 */
1069 if (*nname == '\0')
1070 return 1;
1071 *nlen = strlcpy(name, nname, namelen);
1072 }
1073 return 0;
1074 }
1075
1076
1077 /*
1078 * checkdotdot()
1079 * Return true if a component of the name contains a reference to ".."
1080 */
1081 static int
1082 checkdotdot(const char *name)
1083 {
1084 const char *p;
1085 /* 1. "..{[/],}" */
1086 if (name[0] == '.' && name[1] == '.' &&
1087 (name[2] == '/' || name[2] == '\0'))
1088 return 1;
1089
1090 /* 2. "*[/]..[/]*" */
1091 if (strstr(name, "/../") != NULL)
1092 return 1;
1093
1094 /* 3. "*[/].." */
1095 for (p = name; *p; p++)
1096 continue;
1097 if (p - name < 3)
1098 return 0;
1099 if (p[-1] == '.' && p[-2] == '.' && p[-3] == '/')
1100 return 1;
1101
1102 return 0;
1103 }
1104
1105 #ifdef NET2_REGEX
1106 /*
1107 * resub()
1108 * apply the replacement to the matched expression. expand out the old
1109 * style ed(1) subexpression expansion.
1110 * Return:
1111 * -1 if error, or the number of characters added to the destination.
1112 */
1113
1114 static int
1115 resub(regexp *prog, char *src, char *dest, char *destend)
1116 {
1117 char *spt;
1118 char *dpt;
1119 char c;
1120 int no;
1121 int len;
1122
1123 spt = src;
1124 dpt = dest;
1125 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1126 if (c == '&')
1127 no = 0;
1128 else if ((c == '\\') && (*spt >= '0') && (*spt <= '9'))
1129 no = *spt++ - '0';
1130 else {
1131 if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1132 c = *spt++;
1133 *dpt++ = c;
1134 continue;
1135 }
1136 if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) ||
1137 ((len = prog->endp[no] - prog->startp[no]) <= 0))
1138 continue;
1139
1140 /*
1141 * copy the subexpression to the destination.
1142 * fail if we run out of space or the match string is damaged
1143 */
1144 if (len > (destend - dpt))
1145 return (-1);
1146 strncpy(dpt, prog->startp[no], len);
1147 dpt += len;
1148 }
1149 return dpt - dest;
1150 }
1151
1152 #else
1153
1154 /*
1155 * resub()
1156 * apply the replacement to the matched expression. expand out the old
1157 * style ed(1) subexpression expansion.
1158 * Return:
1159 * -1 if error, or the number of characters added to the destination.
1160 */
1161
1162 static int
1163 resub(regex_t *rp, regmatch_t *pm, char *src, char *txt, char *dest,
1164 char *destend)
1165 {
1166 char *spt;
1167 char *dpt;
1168 char c;
1169 regmatch_t *pmpt;
1170 int len;
1171 int subexcnt;
1172
1173 spt = src;
1174 dpt = dest;
1175 subexcnt = rp->re_nsub;
1176 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1177 /*
1178 * see if we just have an ordinary replacement character
1179 * or we refer to a subexpression.
1180 */
1181 if (c == '&') {
1182 pmpt = pm;
1183 } else if ((c == '\\') && (*spt >= '1') && (*spt <= '9')) {
1184 /*
1185 * make sure there is a subexpression as specified
1186 */
1187 if ((len = *spt++ - '0') > subexcnt)
1188 return -1;
1189 pmpt = pm + len;
1190 } else {
1191 /*
1192 * Ordinary character, just copy it
1193 */
1194 if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1195 c = *spt++;
1196 *dpt++ = c;
1197 continue;
1198 }
1199
1200 /*
1201 * continue if the subexpression is bogus
1202 */
1203 if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1204 ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1205 continue;
1206
1207 /*
1208 * copy the subexpression to the destination.
1209 * fail if we run out of space or the match string is damaged
1210 */
1211 if (len > (destend - dpt))
1212 return -1;
1213 strncpy(dpt, txt + pmpt->rm_so, len);
1214 dpt += len;
1215 }
1216 return dpt - dest;
1217 }
1218 #endif
1219