pat_rep.c revision 1.9 1 /* $NetBSD: pat_rep.c,v 1.9 1999/03/24 17:00:23 pk Exp $ */
2
3 /*-
4 * Copyright (c) 1992 Keith Muller.
5 * Copyright (c) 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Keith Muller of the University of California, San Diego.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41 #ifndef lint
42 #if 0
43 static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94";
44 #else
45 __RCSID("$NetBSD: pat_rep.c,v 1.9 1999/03/24 17:00:23 pk Exp $");
46 #endif
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/time.h>
51 #include <sys/stat.h>
52 #include <sys/param.h>
53 #include <stdio.h>
54 #include <ctype.h>
55 #include <string.h>
56 #include <unistd.h>
57 #include <stdlib.h>
58 #ifdef NET2_REGEX
59 #include <regexp.h>
60 #else
61 #include <regex.h>
62 #endif
63 #include "pax.h"
64 #include "pat_rep.h"
65 #include "extern.h"
66
67 /*
68 * routines to handle pattern matching, name modification (regular expression
69 * substitution and interactive renames), and destination name modification for
70 * copy (-rw). Both file name and link names are adjusted as required in these
71 * routines.
72 */
73
74 #define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */
75 static PATTERN *pathead = NULL; /* file pattern match list head */
76 static PATTERN *pattail = NULL; /* file pattern match list tail */
77 static REPLACE *rephead = NULL; /* replacement string list head */
78 static REPLACE *reptail = NULL; /* replacement string list tail */
79
80 static int rep_name __P((char *, int *, int));
81 static int tty_rename __P((ARCHD *));
82 static int fix_path __P((char *, int *, char *, int));
83 static int fn_match __P((char *, char *, char **));
84 static char * range_match __P((char *, int));
85 #ifdef NET2_REGEX
86 static int resub __P((regexp *, char *, char *, char *));
87 #else
88 static int resub __P((regex_t *, regmatch_t *, char *, char *, char *, char *));
89 #endif
90
91 /*
92 * rep_add()
93 * parses the -s replacement string; compiles the regular expression
94 * and stores the compiled value and it's replacement string together in
95 * replacement string list. Input to this function is of the form:
96 * /old/new/pg
97 * The first char in the string specifies the delimiter used by this
98 * replacement string. "Old" is a regular expression in "ed" format which
99 * is compiled by regcomp() and is applied to filenames. "new" is the
100 * substitution string; p and g are options flags for printing and global
101 * replacement (over the single filename)
102 * Return:
103 * 0 if a proper replacement string and regular expression was added to
104 * the list of replacement patterns; -1 otherwise.
105 */
106
107 #if __STDC__
108 int
109 rep_add(char *str)
110 #else
111 int
112 rep_add(str)
113 char *str;
114 #endif
115 {
116 char *pt1;
117 char *pt2;
118 REPLACE *rep;
119 # ifndef NET2_REGEX
120 int res;
121 char rebuf[BUFSIZ];
122 # endif
123
124 /*
125 * throw out the bad parameters
126 */
127 if ((str == NULL) || (*str == '\0')) {
128 tty_warn(1, "Empty replacement string");
129 return(-1);
130 }
131
132 /*
133 * first character in the string specifies what the delimiter is for
134 * this expression. find the end and middle, from the end. this
135 * allows the string to be something like /foo\/bar//, but will still
136 * fail on /foo\/bar/foo\/baz/. XXX need to parse the RE to properly
137 * do this!
138 */
139 if ((pt2 = strrchr(str+1, *str)) == NULL || pt2 == str+1 ||
140 (*pt2 = '\0') || (pt1 = strrchr(str+1, *str)) == NULL) {
141 tty_warn(1, "Invalid replacement string %s", str);
142 return(-1);
143 }
144
145 /*
146 * allocate space for the node that handles this replacement pattern
147 * and split out the regular expression and try to compile it
148 */
149 if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
150 tty_warn(1, "Unable to allocate memory for replacement string");
151 return(-1);
152 }
153
154 *pt1 = '\0';
155 # ifdef NET2_REGEX
156 if ((rep->rcmp = regcomp(str+1)) == NULL) {
157 # else
158 if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
159 regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
160 tty_warn(1, "%s while compiling regular expression %s", rebuf,
161 str);
162 # endif
163 (void)free((char *)rep);
164 return(-1);
165 }
166
167 /*
168 * put the delimiter back in case we need an error message and
169 * locate the delimiter at the end of the replacement string
170 * we then point the node at the new substitution string
171 */
172 *pt1++ = *str;
173 rep->nstr = pt1;
174 pt1 = pt2++;
175 rep->flgs = 0;
176
177 /*
178 * set the options if any
179 */
180 while (*pt2 != '\0') {
181 switch(*pt2) {
182 case 'g':
183 case 'G':
184 rep->flgs |= GLOB;
185 break;
186 case 'p':
187 case 'P':
188 rep->flgs |= PRNT;
189 break;
190 default:
191 # ifdef NET2_REGEX
192 (void)free((char *)rep->rcmp);
193 # else
194 regfree(&(rep->rcmp));
195 # endif
196 (void)free((char *)rep);
197 *pt1 = *str;
198 tty_warn(1, "Invalid replacement string option %s",
199 str);
200 return(-1);
201 }
202 ++pt2;
203 }
204
205 /*
206 * all done, link it in at the end
207 */
208 rep->fow = NULL;
209 if (rephead == NULL) {
210 reptail = rephead = rep;
211 return(0);
212 }
213 reptail->fow = rep;
214 reptail = rep;
215 return(0);
216 }
217
218 /*
219 * pat_add()
220 * add a pattern match to the pattern match list. Pattern matches are used
221 * to select which archive members are extracted. (They appear as
222 * arguments to pax in the list and read modes). If no patterns are
223 * supplied to pax, all members in the archive will be selected (and the
224 * pattern match list is empty).
225 * Return:
226 * 0 if the pattern was added to the list, -1 otherwise
227 */
228
229 #if __STDC__
230 int
231 pat_add(char *str)
232 #else
233 int
234 pat_add(str)
235 char *str;
236 #endif
237 {
238 PATTERN *pt;
239
240 /*
241 * throw out the junk
242 */
243 if ((str == NULL) || (*str == '\0')) {
244 tty_warn(1, "Empty pattern string");
245 return(-1);
246 }
247
248 /*
249 * allocate space for the pattern and store the pattern. the pattern is
250 * part of argv so do not bother to copy it, just point at it. Add the
251 * node to the end of the pattern list
252 */
253 if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
254 tty_warn(1, "Unable to allocate memory for pattern string");
255 return(-1);
256 }
257
258 pt->pstr = str;
259 pt->pend = NULL;
260 pt->plen = strlen(str);
261 pt->fow = NULL;
262 pt->flgs = 0;
263 if (pathead == NULL) {
264 pattail = pathead = pt;
265 return(0);
266 }
267 pattail->fow = pt;
268 pattail = pt;
269 return(0);
270 }
271
272 /*
273 * pat_chk()
274 * complain if any the user supplied pattern did not result in a match to
275 * a selected archive member.
276 */
277
278 #if __STDC__
279 void
280 pat_chk(void)
281 #else
282 void
283 pat_chk()
284 #endif
285 {
286 PATTERN *pt;
287 int wban = 0;
288
289 /*
290 * walk down the list checking the flags to make sure MTCH was set,
291 * if not complain
292 */
293 for (pt = pathead; pt != NULL; pt = pt->fow) {
294 if (pt->flgs & MTCH)
295 continue;
296 if (!wban) {
297 tty_warn(1, "WARNING! These patterns were not matched:");
298 ++wban;
299 }
300 (void)fprintf(stderr, "%s\n", pt->pstr);
301 }
302 }
303
304 /*
305 * pat_sel()
306 * the archive member which matches a pattern was selected. Mark the
307 * pattern as having selected an archive member. arcn->pat points at the
308 * pattern that was matched. arcn->pat is set in pat_match()
309 *
310 * NOTE: When the -c option is used, we are called when there was no match
311 * by pat_match() (that means we did match before the inverted sense of
312 * the logic). Now this seems really strange at first, but with -c we
313 * need to keep track of those patterns that cause a archive member to NOT
314 * be selected (it found an archive member with a specified pattern)
315 * Return:
316 * 0 if the pattern pointed at by arcn->pat was tagged as creating a
317 * match, -1 otherwise.
318 */
319
320 #if __STDC__
321 int
322 pat_sel(ARCHD *arcn)
323 #else
324 int
325 pat_sel(arcn)
326 ARCHD *arcn;
327 #endif
328 {
329 PATTERN *pt;
330 PATTERN **ppt;
331 int len;
332
333 /*
334 * if no patterns just return
335 */
336 if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
337 return(0);
338
339 /*
340 * when we are NOT limited to a single match per pattern mark the
341 * pattern and return
342 */
343 if (!nflag) {
344 pt->flgs |= MTCH;
345 return(0);
346 }
347
348 /*
349 * we reach this point only when we allow a single selected match per
350 * pattern, if the pattern matches a directory and we do not have -d
351 * (dflag) we are done with this pattern. We may also be handed a file
352 * in the subtree of a directory. in that case when we are operating
353 * with -d, this pattern was already selected and we are done
354 */
355 if (pt->flgs & DIR_MTCH)
356 return(0);
357
358 if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
359 /*
360 * ok we matched a directory and we are allowing
361 * subtree matches but because of the -n only its children will
362 * match. This is tagged as a DIR_MTCH type.
363 * WATCH IT, the code assumes that pt->pend points
364 * into arcn->name and arcn->name has not been modified.
365 * If not we will have a big mess. Yup this is another kludge
366 */
367
368 /*
369 * if this was a prefix match, remove trailing part of path
370 * so we can copy it. Future matches will be exact prefix match
371 */
372 if (pt->pend != NULL)
373 *pt->pend = '\0';
374
375 if ((pt->pstr = strdup(arcn->name)) == NULL) {
376 tty_warn(1, "Pattern select out of memory");
377 if (pt->pend != NULL)
378 *pt->pend = '/';
379 pt->pend = NULL;
380 return(-1);
381 }
382
383 /*
384 * put the trailing / back in the source string
385 */
386 if (pt->pend != NULL) {
387 *pt->pend = '/';
388 pt->pend = NULL;
389 }
390 pt->plen = strlen(pt->pstr);
391
392 /*
393 * strip off any trailing /, this should really never happen
394 */
395 len = pt->plen - 1;
396 if (*(pt->pstr + len) == '/') {
397 *(pt->pstr + len) = '\0';
398 pt->plen = len;
399 }
400 pt->flgs = DIR_MTCH | MTCH;
401 arcn->pat = pt;
402 return(0);
403 }
404
405 /*
406 * we are then done with this pattern, so we delete it from the list
407 * because it can never be used for another match.
408 * Seems kind of strange to do for a -c, but the pax spec is really
409 * vague on the interaction of -c -n and -d. We assume that when -c
410 * and the pattern rejects a member (i.e. it matched it) it is done.
411 * In effect we place the order of the flags as having -c last.
412 */
413 pt = pathead;
414 ppt = &pathead;
415 while ((pt != NULL) && (pt != arcn->pat)) {
416 ppt = &(pt->fow);
417 pt = pt->fow;
418 }
419
420 if (pt == NULL) {
421 /*
422 * should never happen....
423 */
424 tty_warn(1, "Pattern list inconsistant");
425 return(-1);
426 }
427 *ppt = pt->fow;
428 (void)free((char *)pt);
429 arcn->pat = NULL;
430 return(0);
431 }
432
433 /*
434 * pat_match()
435 * see if this archive member matches any supplied pattern, if a match
436 * is found, arcn->pat is set to point at the potential pattern. Later if
437 * this archive member is "selected" we process and mark the pattern as
438 * one which matched a selected archive member (see pat_sel())
439 * Return:
440 * 0 if this archive member should be processed, 1 if it should be
441 * skipped and -1 if we are done with all patterns (and pax should quit
442 * looking for more members)
443 */
444
445 #if __STDC__
446 int
447 pat_match(ARCHD *arcn)
448 #else
449 int
450 pat_match(arcn)
451 ARCHD *arcn;
452 #endif
453 {
454 PATTERN *pt;
455
456 arcn->pat = NULL;
457
458 /*
459 * if there are no more patterns and we have -n (and not -c) we are
460 * done. otherwise with no patterns to match, matches all
461 */
462 if (pathead == NULL) {
463 if (nflag && !cflag)
464 return(-1);
465 return(0);
466 }
467
468 /*
469 * have to search down the list one at a time looking for a match.
470 */
471 pt = pathead;
472 while (pt != NULL) {
473 /*
474 * check for a file name match unless we have DIR_MTCH set in
475 * this pattern then we want a prefix match
476 */
477 if (pt->flgs & DIR_MTCH) {
478 /*
479 * this pattern was matched before to a directory
480 * as we must have -n set for this (but not -d). We can
481 * only match CHILDREN of that directory so we must use
482 * an exact prefix match (no wildcards).
483 */
484 if ((arcn->name[pt->plen] == '/') &&
485 (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
486 break;
487 } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
488 break;
489 pt = pt->fow;
490 }
491
492 /*
493 * return the result, remember that cflag (-c) inverts the sense of a
494 * match
495 */
496 if (pt == NULL)
497 return(cflag ? 0 : 1);
498
499 /*
500 * we had a match, now when we invert the sense (-c) we reject this
501 * member. However we have to tag the pattern a being successful, (in a
502 * match, not in selecting a archive member) so we call pat_sel() here.
503 */
504 arcn->pat = pt;
505 if (!cflag)
506 return(0);
507
508 if (pat_sel(arcn) < 0)
509 return(-1);
510 arcn->pat = NULL;
511 return(1);
512 }
513
514 /*
515 * fn_match()
516 * Return:
517 * 0 if this archive member should be processed, 1 if it should be
518 * skipped and -1 if we are done with all patterns (and pax should quit
519 * looking for more members)
520 * Note: *pend may be changed to show where the prefix ends.
521 */
522
523 #if __STDC__
524 static int
525 fn_match(char *pattern, char *string, char **pend)
526 #else
527 static int
528 fn_match(pattern, string, pend)
529 char *pattern;
530 char *string;
531 char **pend;
532 #endif
533 {
534 char c;
535 char test;
536
537 *pend = NULL;
538 for (;;) {
539 switch (c = *pattern++) {
540 case '\0':
541 /*
542 * Ok we found an exact match
543 */
544 if (*string == '\0')
545 return(0);
546
547 /*
548 * Check if it is a prefix match
549 */
550 if ((dflag == 1) || (*string != '/'))
551 return(-1);
552
553 /*
554 * It is a prefix match, remember where the trailing
555 * / is located
556 */
557 *pend = string;
558 return(0);
559 case '?':
560 if ((test = *string++) == '\0')
561 return (-1);
562 break;
563 case '*':
564 c = *pattern;
565 /*
566 * Collapse multiple *'s.
567 */
568 while (c == '*')
569 c = *++pattern;
570
571 /*
572 * Optimized hack for pattern with a * at the end
573 */
574 if (c == '\0')
575 return (0);
576
577 /*
578 * General case, use recursion.
579 */
580 while ((test = *string) != '\0') {
581 if (!fn_match(pattern, string, pend))
582 return (0);
583 ++string;
584 }
585 return (-1);
586 case '[':
587 /*
588 * range match
589 */
590 if (((test = *string++) == '\0') ||
591 ((pattern = range_match(pattern, test)) == NULL))
592 return (-1);
593 break;
594 case '\\':
595 default:
596 if (c != *string++)
597 return (-1);
598 break;
599 }
600 }
601 /* NOTREACHED */
602 }
603
604 #ifdef __STDC__
605 static char *
606 range_match(char *pattern, int test)
607 #else
608 static char *
609 range_match(pattern, test)
610 char *pattern;
611 int test;
612 #endif
613 {
614 char c;
615 char c2;
616 int negate;
617 int ok = 0;
618
619 if ((negate = (*pattern == '!')) != 0)
620 ++pattern;
621
622 while ((c = *pattern++) != ']') {
623 /*
624 * Illegal pattern
625 */
626 if (c == '\0')
627 return (NULL);
628
629 if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
630 (c2 != ']')) {
631 if ((c <= test) && (test <= c2))
632 ok = 1;
633 pattern += 2;
634 } else if (c == test)
635 ok = 1;
636 }
637 return (ok == negate ? NULL : pattern);
638 }
639
640 /*
641 * mod_name()
642 * modify a selected file name. first attempt to apply replacement string
643 * expressions, then apply interactive file rename. We apply replacement
644 * string expressions to both filenames and file links (if we didn't the
645 * links would point to the wrong place, and we could never be able to
646 * move an archive that has a file link in it). When we rename files
647 * interactively, we store that mapping (old name to user input name) so
648 * if we spot any file links to the old file name in the future, we will
649 * know exactly how to fix the file link.
650 * Return:
651 * 0 continue to process file, 1 skip this file, -1 pax is finished
652 */
653
654 #if __STDC__
655 int
656 mod_name(ARCHD *arcn)
657 #else
658 int
659 mod_name(arcn)
660 ARCHD *arcn;
661 #endif
662 {
663 int res = 0;
664
665 /*
666 * IMPORTANT: We have a problem. what do we do with symlinks?
667 * Modifying a hard link name makes sense, as we know the file it
668 * points at should have been seen already in the archive (and if it
669 * wasn't seen because of a read error or a bad archive, we lose
670 * anyway). But there are no such requirements for symlinks. On one
671 * hand the symlink that refers to a file in the archive will have to
672 * be modified to so it will still work at its new location in the
673 * file system. On the other hand a symlink that points elsewhere (and
674 * should continue to do so) should not be modified. There is clearly
675 * no perfect solution here. So we handle them like hardlinks. Clearly
676 * a replacement made by the interactive rename mapping is very likely
677 * to be correct since it applies to a single file and is an exact
678 * match. The regular expression replacements are a little harder to
679 * justify though. We claim that the symlink name is only likely
680 * to be replaced when it points within the file tree being moved and
681 * in that case it should be modified. what we really need to do is to
682 * call an oracle here. :)
683 */
684 if (rephead != NULL) {
685 /*
686 * we have replacement strings, modify the name and the link
687 * name if any.
688 */
689 if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0)
690 return(res);
691
692 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
693 (arcn->type == PAX_HRG)) &&
694 ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0))
695 return(res);
696 }
697
698 if (iflag) {
699 /*
700 * perform interactive file rename, then map the link if any
701 */
702 if ((res = tty_rename(arcn)) != 0)
703 return(res);
704 if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
705 (arcn->type == PAX_HRG))
706 sub_name(arcn->ln_name, &(arcn->ln_nlen));
707 }
708 return(res);
709 }
710
711 /*
712 * tty_rename()
713 * Prompt the user for a replacement file name. A "." keeps the old name,
714 * a empty line skips the file, and an EOF on reading the tty, will cause
715 * pax to stop processing and exit. Otherwise the file name input, replaces
716 * the old one.
717 * Return:
718 * 0 process this file, 1 skip this file, -1 we need to exit pax
719 */
720
721 #if __STDC__
722 static int
723 tty_rename(ARCHD *arcn)
724 #else
725 static int
726 tty_rename(arcn)
727 ARCHD *arcn;
728 #endif
729 {
730 char tmpname[PAXPATHLEN+2];
731 int res;
732
733 /*
734 * prompt user for the replacement name for a file, keep trying until
735 * we get some reasonable input. Archives may have more than one file
736 * on them with the same name (from updates etc). We print verbose info
737 * on the file so the user knows what is up.
738 */
739 tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
740
741 for (;;) {
742 ls_tty(arcn);
743 tty_prnt("Input new name, or a \".\" to keep the old name, ");
744 tty_prnt("or a \"return\" to skip this file.\n");
745 tty_prnt("Input > ");
746 if (tty_read(tmpname, sizeof(tmpname)) < 0)
747 return(-1);
748 if (strcmp(tmpname, "..") == 0) {
749 tty_prnt("Try again, illegal file name: ..\n");
750 continue;
751 }
752 if (strlen(tmpname) > PAXPATHLEN) {
753 tty_prnt("Try again, file name too long\n");
754 continue;
755 }
756 break;
757 }
758
759 /*
760 * empty file name, skips this file. a "." leaves it alone
761 */
762 if (tmpname[0] == '\0') {
763 tty_prnt("Skipping file.\n");
764 return(1);
765 }
766 if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
767 tty_prnt("Processing continues, name unchanged.\n");
768 return(0);
769 }
770
771 /*
772 * ok the name changed. We may run into links that point at this
773 * file later. we have to remember where the user sent the file
774 * in order to repair any links.
775 */
776 tty_prnt("Processing continues, name changed to: %s\n", tmpname);
777 res = add_name(arcn->name, arcn->nlen, tmpname);
778 arcn->nlen = l_strncpy(arcn->name, tmpname, PAXPATHLEN+1);
779 if (res < 0)
780 return(-1);
781 return(0);
782 }
783
784 /*
785 * set_dest()
786 * fix up the file name and the link name (if any) so this file will land
787 * in the destination directory (used during copy() -rw).
788 * Return:
789 * 0 if ok, -1 if failure (name too long)
790 */
791
792 #if __STDC__
793 int
794 set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
795 #else
796 int
797 set_dest(arcn, dest_dir, dir_len)
798 ARCHD *arcn;
799 char *dest_dir;
800 int dir_len;
801 #endif
802 {
803 if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
804 return(-1);
805
806 /*
807 * It is really hard to deal with symlinks here, we cannot be sure
808 * if the name they point was moved (or will be moved). It is best to
809 * leave them alone.
810 */
811 if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
812 return(0);
813
814 if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
815 return(-1);
816 return(0);
817 }
818
819 /*
820 * fix_path
821 * concatenate dir_name and or_name and store the result in or_name (if
822 * it fits). This is one ugly function.
823 * Return:
824 * 0 if ok, -1 if the final name is too long
825 */
826
827 #if __STDC__
828 static int
829 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
830 #else
831 static int
832 fix_path(or_name, or_len, dir_name, dir_len)
833 char *or_name;
834 int *or_len;
835 char *dir_name;
836 int dir_len;
837 #endif
838 {
839 char *src;
840 char *dest;
841 char *start;
842 int len;
843
844 /*
845 * we shift the or_name to the right enough to tack in the dir_name
846 * at the front. We make sure we have enough space for it all before
847 * we start. since dest always ends in a slash, we skip of or_name
848 * if it also starts with one.
849 */
850 start = or_name;
851 src = start + *or_len;
852 dest = src + dir_len;
853 if (*start == '/') {
854 ++start;
855 --dest;
856 }
857 if ((len = dest - or_name) > PAXPATHLEN) {
858 tty_warn(1, "File name %s/%s, too long", dir_name, start);
859 return(-1);
860 }
861 *or_len = len;
862
863 /*
864 * enough space, shift
865 */
866 while (src >= start)
867 *dest-- = *src--;
868 src = dir_name + dir_len - 1;
869
870 /*
871 * splice in the destination directory name
872 */
873 while (src >= dir_name)
874 *dest-- = *src--;
875
876 *(or_name + len) = '\0';
877 return(0);
878 }
879
880 /*
881 * rep_name()
882 * walk down the list of replacement strings applying each one in order.
883 * when we find one with a successful substitution, we modify the name
884 * as specified. if required, we print the results. if the resulting name
885 * is empty, we will skip this archive member. We use the regexp(3)
886 * routines (regexp() ought to win a prize as having the most cryptic
887 * library function manual page).
888 * --Parameters--
889 * name is the file name we are going to apply the regular expressions to
890 * (and may be modified)
891 * nlen is the length of this name (and is modified to hold the length of
892 * the final string).
893 * prnt is a flag that says whether to print the final result.
894 * Return:
895 * 0 if substitution was successful, 1 if we are to skip the file (the name
896 * ended up empty)
897 */
898
899 #if __STDC__
900 static int
901 rep_name(char *name, int *nlen, int prnt)
902 #else
903 static int
904 rep_name(name, nlen, prnt)
905 char *name;
906 int *nlen;
907 int prnt;
908 #endif
909 {
910 REPLACE *pt;
911 char *inpt;
912 char *outpt;
913 char *endpt;
914 char *rpt;
915 int found = 0;
916 int res;
917 # ifndef NET2_REGEX
918 regmatch_t pm[MAXSUBEXP];
919 # endif
920 char nname[PAXPATHLEN+1]; /* final result of all replacements */
921 char buf1[PAXPATHLEN+1]; /* where we work on the name */
922
923 /*
924 * copy the name into buf1, where we will work on it. We need to keep
925 * the orig string around so we can print out the result of the final
926 * replacement. We build up the final result in nname. inpt points at
927 * the string we apply the regular expression to. prnt is used to
928 * suppress printing when we handle replacements on the link field
929 * (the user already saw that substitution go by)
930 */
931 pt = rephead;
932 (void)strcpy(buf1, name);
933 inpt = buf1;
934 outpt = nname;
935 endpt = outpt + PAXPATHLEN;
936
937 /*
938 * try each replacement string in order
939 */
940 while (pt != NULL) {
941 do {
942 /*
943 * check for a successful substitution, if not go to
944 * the next pattern, or cleanup if we were global
945 */
946 # ifdef NET2_REGEX
947 if (regexec(pt->rcmp, inpt) == 0)
948 # else
949 if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
950 # endif
951 break;
952
953 /*
954 * ok we found one. We have three parts, the prefix
955 * which did not match, the section that did and the
956 * tail (that also did not match). Copy the prefix to
957 * the final output buffer (watching to make sure we
958 * do not create a string too long).
959 */
960 found = 1;
961 # ifdef NET2_REGEX
962 rpt = pt->rcmp->startp[0];
963 # else
964 rpt = inpt + pm[0].rm_so;
965 # endif
966
967 while ((inpt < rpt) && (outpt < endpt))
968 *outpt++ = *inpt++;
969 if (outpt == endpt)
970 break;
971
972 /*
973 * for the second part (which matched the regular
974 * expression) apply the substitution using the
975 * replacement string and place it the prefix in the
976 * final output. If we have problems, skip it.
977 */
978 # ifdef NET2_REGEX
979 if ((res = resub(pt->rcmp,pt->nstr,outpt,endpt)) < 0) {
980 # else
981 if ((res = resub(&(pt->rcmp),pm,pt->nstr,inpt,
982 outpt,endpt)) < 0) {
983 # endif
984 if (prnt)
985 tty_warn(1, "Replacement name error %s",
986 name);
987 return(1);
988 }
989 outpt += res;
990
991 /*
992 * we set up to look again starting at the first
993 * character in the tail (of the input string right
994 * after the last character matched by the regular
995 * expression (inpt always points at the first char in
996 * the string to process). If we are not doing a global
997 * substitution, we will use inpt to copy the tail to
998 * the final result. Make sure we do not overrun the
999 * output buffer
1000 */
1001 # ifdef NET2_REGEX
1002 inpt = pt->rcmp->endp[0];
1003 # else
1004 inpt += pm[0].rm_eo - pm[0].rm_so;
1005 # endif
1006
1007 if ((outpt == endpt) || (*inpt == '\0'))
1008 break;
1009
1010 /*
1011 * if the user wants global we keep trying to
1012 * substitute until it fails, then we are done.
1013 */
1014 } while (pt->flgs & GLOB);
1015
1016 if (found)
1017 break;
1018
1019 /*
1020 * a successful substitution did NOT occur, try the next one
1021 */
1022 pt = pt->fow;
1023 }
1024
1025 if (found) {
1026 /*
1027 * we had a substitution, copy the last tail piece (if there is
1028 * room) to the final result
1029 */
1030 while ((outpt < endpt) && (*inpt != '\0'))
1031 *outpt++ = *inpt++;
1032
1033 *outpt = '\0';
1034 if ((outpt == endpt) && (*inpt != '\0')) {
1035 if (prnt)
1036 tty_warn(1,"Replacement name too long %s >> %s",
1037 name, nname);
1038 return(1);
1039 }
1040
1041 /*
1042 * inform the user of the result if wanted
1043 */
1044 if (prnt && (pt->flgs & PRNT)) {
1045 if (*nname == '\0')
1046 (void)fprintf(stderr,"%s >> <empty string>\n",
1047 name);
1048 else
1049 (void)fprintf(stderr,"%s >> %s\n", name, nname);
1050 }
1051
1052 /*
1053 * if empty inform the caller this file is to be skipped
1054 * otherwise copy the new name over the orig name and return
1055 */
1056 if (*nname == '\0')
1057 return(1);
1058 *nlen = l_strncpy(name, nname, PAXPATHLEN + 1);
1059 }
1060 return(0);
1061 }
1062
1063 #ifdef NET2_REGEX
1064 /*
1065 * resub()
1066 * apply the replacement to the matched expression. expand out the old
1067 * style ed(1) subexpression expansion.
1068 * Return:
1069 * -1 if error, or the number of characters added to the destination.
1070 */
1071
1072 #if __STDC__
1073 static int
1074 resub(regexp *prog, char *src, char *dest, char *destend)
1075 #else
1076 static int
1077 resub(prog, src, dest, destend)
1078 regexp *prog;
1079 char *src;
1080 char *dest;
1081 char *destend;
1082 #endif
1083 {
1084 char *spt;
1085 char *dpt;
1086 char c;
1087 int no;
1088 int len;
1089
1090 spt = src;
1091 dpt = dest;
1092 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1093 if (c == '&')
1094 no = 0;
1095 else if ((c == '\\') && (*spt >= '0') && (*spt <= '9'))
1096 no = *spt++ - '0';
1097 else {
1098 if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1099 c = *spt++;
1100 *dpt++ = c;
1101 continue;
1102 }
1103 if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) ||
1104 ((len = prog->endp[no] - prog->startp[no]) <= 0))
1105 continue;
1106
1107 /*
1108 * copy the subexpression to the destination.
1109 * fail if we run out of space or the match string is damaged
1110 */
1111 if (len > (destend - dpt))
1112 len = destend - dpt;
1113 if (l_strncpy(dpt, prog->startp[no], len) != len)
1114 return(-1);
1115 dpt += len;
1116 }
1117 return(dpt - dest);
1118 }
1119
1120 #else
1121
1122 /*
1123 * resub()
1124 * apply the replacement to the matched expression. expand out the old
1125 * style ed(1) subexpression expansion.
1126 * Return:
1127 * -1 if error, or the number of characters added to the destination.
1128 */
1129
1130 #if __STDC__
1131 static int
1132 resub(regex_t *rp, regmatch_t *pm, char *src, char *txt, char *dest,
1133 char *destend)
1134 #else
1135 static int
1136 resub(rp, pm, src, txt, dest, destend)
1137 regex_t *rp;
1138 regmatch_t *pm;
1139 char *src;
1140 char *txt;
1141 char *dest;
1142 char *destend;
1143 #endif
1144 {
1145 char *spt;
1146 char *dpt;
1147 char c;
1148 regmatch_t *pmpt;
1149 int len;
1150 int subexcnt;
1151
1152 spt = src;
1153 dpt = dest;
1154 subexcnt = rp->re_nsub;
1155 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1156 /*
1157 * see if we just have an ordinary replacement character
1158 * or we refer to a subexpression.
1159 */
1160 if (c == '&') {
1161 pmpt = pm;
1162 } else if ((c == '\\') && (*spt >= '1') && (*spt <= '9')) {
1163 /*
1164 * make sure there is a subexpression as specified
1165 */
1166 if ((len = *spt++ - '0') > subexcnt)
1167 return(-1);
1168 pmpt = pm + len;
1169 } else {
1170 /*
1171 * Ordinary character, just copy it
1172 */
1173 if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1174 c = *spt++;
1175 *dpt++ = c;
1176 continue;
1177 }
1178
1179 /*
1180 * continue if the subexpression is bogus
1181 */
1182 if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1183 ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1184 continue;
1185
1186 /*
1187 * copy the subexpression to the destination.
1188 * fail if we run out of space or the match string is damaged
1189 */
1190 if (len > (destend - dpt))
1191 len = destend - dpt;
1192 if (l_strncpy(dpt, txt + pmpt->rm_so, len) != len)
1193 return(-1);
1194 dpt += len;
1195 }
1196 return(dpt - dest);
1197 }
1198 #endif
1199