pat_rep.c revision 1.4 1 /* $NetBSD: pat_rep.c,v 1.4 1995/03/21 09:07:33 cgd Exp $ */
2
3 /*-
4 * Copyright (c) 1992 Keith Muller.
5 * Copyright (c) 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Keith Muller of the University of California, San Diego.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)pat_rep.c 8.2 (Berkeley) 4/18/94";
43 #else
44 static char rcsid[] = "$NetBSD: pat_rep.c,v 1.4 1995/03/21 09:07:33 cgd Exp $";
45 #endif
46 #endif /* not lint */
47
48 #include <sys/types.h>
49 #include <sys/time.h>
50 #include <sys/stat.h>
51 #include <sys/param.h>
52 #include <stdio.h>
53 #include <ctype.h>
54 #include <string.h>
55 #include <unistd.h>
56 #include <stdlib.h>
57 #ifdef NET2_REGEX
58 #include <regexp.h>
59 #else
60 #include <regex.h>
61 #endif
62 #include "pax.h"
63 #include "pat_rep.h"
64 #include "extern.h"
65
66 /*
67 * routines to handle pattern matching, name modification (regular expression
68 * substitution and interactive renames), and destination name modification for
69 * copy (-rw). Both file name and link names are adjusted as required in these
70 * routines.
71 */
72
73 #define MAXSUBEXP 10 /* max subexpressions, DO NOT CHANGE */
74 static PATTERN *pathead = NULL; /* file pattern match list head */
75 static PATTERN *pattail = NULL; /* file pattern match list tail */
76 static REPLACE *rephead = NULL; /* replacement string list head */
77 static REPLACE *reptail = NULL; /* replacement string list tail */
78
79 static int rep_name __P((char *, int *, int));
80 static int tty_rename __P((register ARCHD *));
81 static int fix_path __P((char *, int *, char *, int));
82 static int fn_match __P((register char *, register char *, char **));
83 static char * range_match __P((register char *, register int));
84 #ifdef NET2_REGEX
85 static int resub __P((regexp *, char *, char *, register char *));
86 #else
87 static int resub __P((regex_t *, regmatch_t *, char *, char *, char *));
88 #endif
89
90 /*
91 * rep_add()
92 * parses the -s replacement string; compiles the regular expression
93 * and stores the compiled value and it's replacement string together in
94 * replacement string list. Input to this function is of the form:
95 * /old/new/pg
96 * The first char in the string specifies the delimiter used by this
97 * replacement string. "Old" is a regular expression in "ed" format which
98 * is compiled by regcomp() and is applied to filenames. "new" is the
99 * substitution string; p and g are options flags for printing and global
100 * replacement (over the single filename)
101 * Return:
102 * 0 if a proper replacement string and regular expression was added to
103 * the list of replacement patterns; -1 otherwise.
104 */
105
106 #if __STDC__
107 int
108 rep_add(register char *str)
109 #else
110 int
111 rep_add(str)
112 register char *str;
113 #endif
114 {
115 register char *pt1;
116 register char *pt2;
117 register REPLACE *rep;
118 # ifndef NET2_REGEX
119 register int res;
120 char rebuf[BUFSIZ];
121 # endif
122
123 /*
124 * throw out the bad parameters
125 */
126 if ((str == NULL) || (*str == '\0')) {
127 warn(1, "Empty replacement string");
128 return(-1);
129 }
130
131 /*
132 * first character in the string specifies what the delimiter is for
133 * this expression
134 */
135 if ((pt1 = strchr(str+1, *str)) == NULL) {
136 warn(1, "Invalid replacement string %s", str);
137 return(-1);
138 }
139
140 /*
141 * allocate space for the node that handles this replacement pattern
142 * and split out the regular expression and try to compile it
143 */
144 if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
145 warn(1, "Unable to allocate memory for replacement string");
146 return(-1);
147 }
148
149 *pt1 = '\0';
150 # ifdef NET2_REGEX
151 if ((rep->rcmp = regcomp(str+1)) == NULL) {
152 # else
153 if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
154 regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
155 warn(1, "%s while compiling regular expression %s", rebuf, str);
156 # endif
157 (void)free((char *)rep);
158 return(-1);
159 }
160
161 /*
162 * put the delimiter back in case we need an error message and
163 * locate the delimiter at the end of the replacement string
164 * we then point the node at the new substitution string
165 */
166 *pt1++ = *str;
167 if ((pt2 = strchr(pt1, *str)) == NULL) {
168 # ifdef NET2_REGEX
169 (void)free((char *)rep->rcmp);
170 # else
171 regfree(&(rep->rcmp));
172 # endif
173 (void)free((char *)rep);
174 warn(1, "Invalid replacement string %s", str);
175 return(-1);
176 }
177
178 *pt2 = '\0';
179 rep->nstr = pt1;
180 pt1 = pt2++;
181 rep->flgs = 0;
182
183 /*
184 * set the options if any
185 */
186 while (*pt2 != '\0') {
187 switch(*pt2) {
188 case 'g':
189 case 'G':
190 rep->flgs |= GLOB;
191 break;
192 case 'p':
193 case 'P':
194 rep->flgs |= PRNT;
195 break;
196 default:
197 # ifdef NET2_REGEX
198 (void)free((char *)rep->rcmp);
199 # else
200 regfree(&(rep->rcmp));
201 # endif
202 (void)free((char *)rep);
203 *pt1 = *str;
204 warn(1, "Invalid replacement string option %s", str);
205 return(-1);
206 }
207 ++pt2;
208 }
209
210 /*
211 * all done, link it in at the end
212 */
213 rep->fow = NULL;
214 if (rephead == NULL) {
215 reptail = rephead = rep;
216 return(0);
217 }
218 reptail->fow = rep;
219 reptail = rep;
220 return(0);
221 }
222
223 /*
224 * pat_add()
225 * add a pattern match to the pattern match list. Pattern matches are used
226 * to select which archive members are extracted. (They appear as
227 * arguments to pax in the list and read modes). If no patterns are
228 * supplied to pax, all members in the archive will be selected (and the
229 * pattern match list is empty).
230 * Return:
231 * 0 if the pattern was added to the list, -1 otherwise
232 */
233
234 #if __STDC__
235 int
236 pat_add(char *str)
237 #else
238 int
239 pat_add(str)
240 char *str;
241 #endif
242 {
243 register PATTERN *pt;
244
245 /*
246 * throw out the junk
247 */
248 if ((str == NULL) || (*str == '\0')) {
249 warn(1, "Empty pattern string");
250 return(-1);
251 }
252
253 /*
254 * allocate space for the pattern and store the pattern. the pattern is
255 * part of argv so do not bother to copy it, just point at it. Add the
256 * node to the end of the pattern list
257 */
258 if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
259 warn(1, "Unable to allocate memory for pattern string");
260 return(-1);
261 }
262
263 pt->pstr = str;
264 pt->pend = NULL;
265 pt->plen = strlen(str);
266 pt->fow = NULL;
267 pt->flgs = 0;
268 if (pathead == NULL) {
269 pattail = pathead = pt;
270 return(0);
271 }
272 pattail->fow = pt;
273 pattail = pt;
274 return(0);
275 }
276
277 /*
278 * pat_chk()
279 * complain if any the user supplied pattern did not result in a match to
280 * a selected archive member.
281 */
282
283 #if __STDC__
284 void
285 pat_chk(void)
286 #else
287 void
288 pat_chk()
289 #endif
290 {
291 register PATTERN *pt;
292 register int wban = 0;
293
294 /*
295 * walk down the list checking the flags to make sure MTCH was set,
296 * if not complain
297 */
298 for (pt = pathead; pt != NULL; pt = pt->fow) {
299 if (pt->flgs & MTCH)
300 continue;
301 if (!wban) {
302 warn(1, "WARNING! These patterns were not matched:");
303 ++wban;
304 }
305 (void)fprintf(stderr, "%s\n", pt->pstr);
306 }
307 }
308
309 /*
310 * pat_sel()
311 * the archive member which matches a pattern was selected. Mark the
312 * pattern as having selected an archive member. arcn->pat points at the
313 * pattern that was matched. arcn->pat is set in pat_match()
314 *
315 * NOTE: When the -c option is used, we are called when there was no match
316 * by pat_match() (that means we did match before the inverted sense of
317 * the logic). Now this seems really strange at first, but with -c we
318 * need to keep track of those patterns that cause a archive member to NOT
319 * be selected (it found an archive member with a specified pattern)
320 * Return:
321 * 0 if the pattern pointed at by arcn->pat was tagged as creating a
322 * match, -1 otherwise.
323 */
324
325 #if __STDC__
326 int
327 pat_sel(register ARCHD *arcn)
328 #else
329 int
330 pat_sel(arcn)
331 register ARCHD *arcn;
332 #endif
333 {
334 register PATTERN *pt;
335 register PATTERN **ppt;
336 register int len;
337
338 /*
339 * if no patterns just return
340 */
341 if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
342 return(0);
343
344 /*
345 * when we are NOT limited to a single match per pattern mark the
346 * pattern and return
347 */
348 if (!nflag) {
349 pt->flgs |= MTCH;
350 return(0);
351 }
352
353 /*
354 * we reach this point only when we allow a single selected match per
355 * pattern, if the pattern matches a directory and we do not have -d
356 * (dflag) we are done with this pattern. We may also be handed a file
357 * in the subtree of a directory. in that case when we are operating
358 * with -d, this pattern was already selected and we are done
359 */
360 if (pt->flgs & DIR_MTCH)
361 return(0);
362
363 if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
364 /*
365 * ok we matched a directory and we are allowing
366 * subtree matches but because of the -n only its children will
367 * match. This is tagged as a DIR_MTCH type.
368 * WATCH IT, the code assumes that pt->pend points
369 * into arcn->name and arcn->name has not been modified.
370 * If not we will have a big mess. Yup this is another kludge
371 */
372
373 /*
374 * if this was a prefix match, remove trailing part of path
375 * so we can copy it. Future matches will be exact prefix match
376 */
377 if (pt->pend != NULL)
378 *pt->pend = '\0';
379
380 if ((pt->pstr = strdup(arcn->name)) == NULL) {
381 warn(1, "Pattern select out of memory");
382 if (pt->pend != NULL)
383 *pt->pend = '/';
384 pt->pend = NULL;
385 return(-1);
386 }
387
388 /*
389 * put the trailing / back in the source string
390 */
391 if (pt->pend != NULL) {
392 *pt->pend = '/';
393 pt->pend = NULL;
394 }
395 pt->plen = strlen(pt->pstr);
396
397 /*
398 * strip off any trailing /, this should really never happen
399 */
400 len = pt->plen - 1;
401 if (*(pt->pstr + len) == '/') {
402 *(pt->pstr + len) = '\0';
403 pt->plen = len;
404 }
405 pt->flgs = DIR_MTCH | MTCH;
406 arcn->pat = pt;
407 return(0);
408 }
409
410 /*
411 * we are then done with this pattern, so we delete it from the list
412 * because it can never be used for another match.
413 * Seems kind of strange to do for a -c, but the pax spec is really
414 * vague on the interaction of -c -n and -d. We assume that when -c
415 * and the pattern rejects a member (i.e. it matched it) it is done.
416 * In effect we place the order of the flags as having -c last.
417 */
418 pt = pathead;
419 ppt = &pathead;
420 while ((pt != NULL) && (pt != arcn->pat)) {
421 ppt = &(pt->fow);
422 pt = pt->fow;
423 }
424
425 if (pt == NULL) {
426 /*
427 * should never happen....
428 */
429 warn(1, "Pattern list inconsistant");
430 return(-1);
431 }
432 *ppt = pt->fow;
433 (void)free((char *)pt);
434 arcn->pat = NULL;
435 return(0);
436 }
437
438 /*
439 * pat_match()
440 * see if this archive member matches any supplied pattern, if a match
441 * is found, arcn->pat is set to point at the potential pattern. Later if
442 * this archive member is "selected" we process and mark the pattern as
443 * one which matched a selected archive member (see pat_sel())
444 * Return:
445 * 0 if this archive member should be processed, 1 if it should be
446 * skipped and -1 if we are done with all patterns (and pax should quit
447 * looking for more members)
448 */
449
450 #if __STDC__
451 int
452 pat_match(register ARCHD *arcn)
453 #else
454 int
455 pat_match(arcn)
456 register ARCHD *arcn;
457 #endif
458 {
459 register PATTERN *pt;
460
461 arcn->pat = NULL;
462
463 /*
464 * if there are no more patterns and we have -n (and not -c) we are
465 * done. otherwise with no patterns to match, matches all
466 */
467 if (pathead == NULL) {
468 if (nflag && !cflag)
469 return(-1);
470 return(0);
471 }
472
473 /*
474 * have to search down the list one at a time looking for a match.
475 */
476 pt = pathead;
477 while (pt != NULL) {
478 /*
479 * check for a file name match unless we have DIR_MTCH set in
480 * this pattern then we want a prefix match
481 */
482 if (pt->flgs & DIR_MTCH) {
483 /*
484 * this pattern was matched before to a directory
485 * as we must have -n set for this (but not -d). We can
486 * only match CHILDREN of that directory so we must use
487 * an exact prefix match (no wildcards).
488 */
489 if ((arcn->name[pt->plen] == '/') &&
490 (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
491 break;
492 } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
493 break;
494 pt = pt->fow;
495 }
496
497 /*
498 * return the result, remember that cflag (-c) inverts the sense of a
499 * match
500 */
501 if (pt == NULL)
502 return(cflag ? 0 : 1);
503
504 /*
505 * we had a match, now when we invert the sense (-c) we reject this
506 * member. However we have to tag the pattern a being successful, (in a
507 * match, not in selecting a archive member) so we call pat_sel() here.
508 */
509 arcn->pat = pt;
510 if (!cflag)
511 return(0);
512
513 if (pat_sel(arcn) < 0)
514 return(-1);
515 arcn->pat = NULL;
516 return(1);
517 }
518
519 /*
520 * fn_match()
521 * Return:
522 * 0 if this archive member should be processed, 1 if it should be
523 * skipped and -1 if we are done with all patterns (and pax should quit
524 * looking for more members)
525 * Note: *pend may be changed to show where the prefix ends.
526 */
527
528 #if __STDC__
529 static int
530 fn_match(register char *pattern, register char *string, char **pend)
531 #else
532 static int
533 fn_match(pattern, string, pend)
534 register char *pattern;
535 register char *string;
536 char **pend;
537 #endif
538 {
539 register char c;
540 char test;
541
542 *pend = NULL;
543 for (;;) {
544 switch (c = *pattern++) {
545 case '\0':
546 /*
547 * Ok we found an exact match
548 */
549 if (*string == '\0')
550 return(0);
551
552 /*
553 * Check if it is a prefix match
554 */
555 if ((dflag == 1) || (*string != '/'))
556 return(-1);
557
558 /*
559 * It is a prefix match, remember where the trailing
560 * / is located
561 */
562 *pend = string;
563 return(0);
564 case '?':
565 if ((test = *string++) == '\0')
566 return (-1);
567 break;
568 case '*':
569 c = *pattern;
570 /*
571 * Collapse multiple *'s.
572 */
573 while (c == '*')
574 c = *++pattern;
575
576 /*
577 * Optimized hack for pattern with a * at the end
578 */
579 if (c == '\0')
580 return (0);
581
582 /*
583 * General case, use recursion.
584 */
585 while ((test = *string) != '\0') {
586 if (!fn_match(pattern, string, pend))
587 return (0);
588 ++string;
589 }
590 return (-1);
591 case '[':
592 /*
593 * range match
594 */
595 if (((test = *string++) == '\0') ||
596 ((pattern = range_match(pattern, test)) == NULL))
597 return (-1);
598 break;
599 case '\\':
600 default:
601 if (c != *string++)
602 return (-1);
603 break;
604 }
605 }
606 /* NOTREACHED */
607 }
608
609 #ifdef __STDC__
610 static char *
611 range_match(register char *pattern, register int test)
612 #else
613 static char *
614 range_match(pattern, test)
615 register char *pattern;
616 register int test;
617 #endif
618 {
619 register char c;
620 register char c2;
621 int negate;
622 int ok = 0;
623
624 if (negate = (*pattern == '!'))
625 ++pattern;
626
627 while ((c = *pattern++) != ']') {
628 /*
629 * Illegal pattern
630 */
631 if (c == '\0')
632 return (NULL);
633
634 if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
635 (c2 != ']')) {
636 if ((c <= test) && (test <= c2))
637 ok = 1;
638 pattern += 2;
639 } else if (c == test)
640 ok = 1;
641 }
642 return (ok == negate ? NULL : pattern);
643 }
644
645 /*
646 * mod_name()
647 * modify a selected file name. first attempt to apply replacement string
648 * expressions, then apply interactive file rename. We apply replacement
649 * string expressions to both filenames and file links (if we didn't the
650 * links would point to the wrong place, and we could never be able to
651 * move an archive that has a file link in it). When we rename files
652 * interactively, we store that mapping (old name to user input name) so
653 * if we spot any file links to the old file name in the future, we will
654 * know exactly how to fix the file link.
655 * Return:
656 * 0 continue to process file, 1 skip this file, -1 pax is finished
657 */
658
659 #if __STDC__
660 int
661 mod_name(register ARCHD *arcn)
662 #else
663 int
664 mod_name(arcn)
665 register ARCHD *arcn;
666 #endif
667 {
668 register int res = 0;
669
670 /*
671 * IMPORTANT: We have a problem. what do we do with symlinks?
672 * Modifying a hard link name makes sense, as we know the file it
673 * points at should have been seen already in the archive (and if it
674 * wasn't seen because of a read error or a bad archive, we lose
675 * anyway). But there are no such requirements for symlinks. On one
676 * hand the symlink that refers to a file in the archive will have to
677 * be modified to so it will still work at its new location in the
678 * file system. On the other hand a symlink that points elsewhere (and
679 * should continue to do so) should not be modified. There is clearly
680 * no perfect solution here. So we handle them like hardlinks. Clearly
681 * a replacement made by the interactive rename mapping is very likely
682 * to be correct since it applies to a single file and is an exact
683 * match. The regular expression replacements are a little harder to
684 * justify though. We claim that the symlink name is only likely
685 * to be replaced when it points within the file tree being moved and
686 * in that case it should be modified. what we really need to do is to
687 * call an oracle here. :)
688 */
689 if (rephead != NULL) {
690 /*
691 * we have replacement strings, modify the name and the link
692 * name if any.
693 */
694 if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0)
695 return(res);
696
697 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
698 (arcn->type == PAX_HRG)) &&
699 ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0))
700 return(res);
701 }
702
703 if (iflag) {
704 /*
705 * perform interactive file rename, then map the link if any
706 */
707 if ((res = tty_rename(arcn)) != 0)
708 return(res);
709 if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
710 (arcn->type == PAX_HRG))
711 sub_name(arcn->ln_name, &(arcn->ln_nlen));
712 }
713 return(res);
714 }
715
716 /*
717 * tty_rename()
718 * Prompt the user for a replacement file name. A "." keeps the old name,
719 * a empty line skips the file, and an EOF on reading the tty, will cause
720 * pax to stop processing and exit. Otherwise the file name input, replaces
721 * the old one.
722 * Return:
723 * 0 process this file, 1 skip this file, -1 we need to exit pax
724 */
725
726 #if __STDC__
727 static int
728 tty_rename(register ARCHD *arcn)
729 #else
730 static int
731 tty_rename(arcn)
732 register ARCHD *arcn;
733 #endif
734 {
735 char tmpname[PAXPATHLEN+2];
736 int res;
737
738 /*
739 * prompt user for the replacement name for a file, keep trying until
740 * we get some reasonable input. Archives may have more than one file
741 * on them with the same name (from updates etc). We print verbose info
742 * on the file so the user knows what is up.
743 */
744 tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
745
746 for (;;) {
747 ls_tty(arcn);
748 tty_prnt("Input new name, or a \".\" to keep the old name, ");
749 tty_prnt("or a \"return\" to skip this file.\n");
750 tty_prnt("Input > ");
751 if (tty_read(tmpname, sizeof(tmpname)) < 0)
752 return(-1);
753 if (strcmp(tmpname, "..") == 0) {
754 tty_prnt("Try again, illegal file name: ..\n");
755 continue;
756 }
757 if (strlen(tmpname) > PAXPATHLEN) {
758 tty_prnt("Try again, file name too long\n");
759 continue;
760 }
761 break;
762 }
763
764 /*
765 * empty file name, skips this file. a "." leaves it alone
766 */
767 if (tmpname[0] == '\0') {
768 tty_prnt("Skipping file.\n");
769 return(1);
770 }
771 if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
772 tty_prnt("Processing continues, name unchanged.\n");
773 return(0);
774 }
775
776 /*
777 * ok the name changed. We may run into links that point at this
778 * file later. we have to remember where the user sent the file
779 * in order to repair any links.
780 */
781 tty_prnt("Processing continues, name changed to: %s\n", tmpname);
782 res = add_name(arcn->name, arcn->nlen, tmpname);
783 arcn->nlen = l_strncpy(arcn->name, tmpname, PAXPATHLEN+1);
784 if (res < 0)
785 return(-1);
786 return(0);
787 }
788
789 /*
790 * set_dest()
791 * fix up the file name and the link name (if any) so this file will land
792 * in the destination directory (used during copy() -rw).
793 * Return:
794 * 0 if ok, -1 if failure (name too long)
795 */
796
797 #if __STDC__
798 int
799 set_dest(register ARCHD *arcn, char *dest_dir, int dir_len)
800 #else
801 int
802 set_dest(arcn, dest_dir, dir_len)
803 register ARCHD *arcn;
804 char *dest_dir;
805 int dir_len;
806 #endif
807 {
808 if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
809 return(-1);
810
811 /*
812 * It is really hard to deal with symlinks here, we cannot be sure
813 * if the name they point was moved (or will be moved). It is best to
814 * leave them alone.
815 */
816 if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
817 return(0);
818
819 if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
820 return(-1);
821 return(0);
822 }
823
824 /*
825 * fix_path
826 * concatenate dir_name and or_name and store the result in or_name (if
827 * it fits). This is one ugly function.
828 * Return:
829 * 0 if ok, -1 if the final name is too long
830 */
831
832 #if __STDC__
833 static int
834 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
835 #else
836 static int
837 fix_path(or_name, or_len, dir_name, dir_len)
838 char *or_name;
839 int *or_len;
840 char *dir_name;
841 int dir_len;
842 #endif
843 {
844 register char *src;
845 register char *dest;
846 register char *start;
847 int len;
848
849 /*
850 * we shift the or_name to the right enough to tack in the dir_name
851 * at the front. We make sure we have enough space for it all before
852 * we start. since dest always ends in a slash, we skip of or_name
853 * if it also starts with one.
854 */
855 start = or_name;
856 src = start + *or_len;
857 dest = src + dir_len;
858 if (*start == '/') {
859 ++start;
860 --dest;
861 }
862 if ((len = dest - or_name) > PAXPATHLEN) {
863 warn(1, "File name %s/%s, too long", dir_name, start);
864 return(-1);
865 }
866 *or_len = len;
867
868 /*
869 * enough space, shift
870 */
871 while (src >= start)
872 *dest-- = *src--;
873 src = dir_name + dir_len - 1;
874
875 /*
876 * splice in the destination directory name
877 */
878 while (src >= dir_name)
879 *dest-- = *src--;
880
881 *(or_name + len) = '\0';
882 return(0);
883 }
884
885 /*
886 * rep_name()
887 * walk down the list of replacement strings applying each one in order.
888 * when we find one with a successful substitution, we modify the name
889 * as specified. if required, we print the results. if the resulting name
890 * is empty, we will skip this archive member. We use the regexp(3)
891 * routines (regexp() ought to win a prize as having the most cryptic
892 * library function manual page).
893 * --Parameters--
894 * name is the file name we are going to apply the regular expressions to
895 * (and may be modified)
896 * nlen is the length of this name (and is modified to hold the length of
897 * the final string).
898 * prnt is a flag that says whether to print the final result.
899 * Return:
900 * 0 if substitution was successful, 1 if we are to skip the file (the name
901 * ended up empty)
902 */
903
904 #if __STDC__
905 static int
906 rep_name(char *name, int *nlen, int prnt)
907 #else
908 static int
909 rep_name(name, nlen, prnt)
910 char *name;
911 int *nlen;
912 int prnt;
913 #endif
914 {
915 register REPLACE *pt;
916 register char *inpt;
917 register char *outpt;
918 register char *endpt;
919 register char *rpt;
920 register int found = 0;
921 register int res;
922 # ifndef NET2_REGEX
923 regmatch_t pm[MAXSUBEXP];
924 # endif
925 char nname[PAXPATHLEN+1]; /* final result of all replacements */
926 char buf1[PAXPATHLEN+1]; /* where we work on the name */
927
928 /*
929 * copy the name into buf1, where we will work on it. We need to keep
930 * the orig string around so we can print out the result of the final
931 * replacement. We build up the final result in nname. inpt points at
932 * the string we apply the regular expression to. prnt is used to
933 * suppress printing when we handle replacements on the link field
934 * (the user already saw that substitution go by)
935 */
936 pt = rephead;
937 (void)strcpy(buf1, name);
938 inpt = buf1;
939 outpt = nname;
940 endpt = outpt + PAXPATHLEN;
941
942 /*
943 * try each replacement string in order
944 */
945 while (pt != NULL) {
946 do {
947 /*
948 * check for a successful substitution, if not go to
949 * the next pattern, or cleanup if we were global
950 */
951 # ifdef NET2_REGEX
952 if (regexec(pt->rcmp, inpt) == 0)
953 # else
954 if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
955 # endif
956 break;
957
958 /*
959 * ok we found one. We have three parts, the prefix
960 * which did not match, the section that did and the
961 * tail (that also did not match). Copy the prefix to
962 * the final output buffer (watching to make sure we
963 * do not create a string too long).
964 */
965 found = 1;
966 # ifdef NET2_REGEX
967 rpt = pt->rcmp->startp[0];
968 # else
969 rpt = inpt + pm[0].rm_so;
970 # endif
971
972 while ((inpt < rpt) && (outpt < endpt))
973 *outpt++ = *inpt++;
974 if (outpt == endpt)
975 break;
976
977 /*
978 * for the second part (which matched the regular
979 * expression) apply the substitution using the
980 * replacement string and place it the prefix in the
981 * final output. If we have problems, skip it.
982 */
983 # ifdef NET2_REGEX
984 if ((res = resub(pt->rcmp,pt->nstr,outpt,endpt)) < 0) {
985 # else
986 if ((res = resub(&(pt->rcmp),pm,pt->nstr,outpt,endpt))
987 < 0) {
988 # endif
989 if (prnt)
990 warn(1, "Replacement name error %s",
991 name);
992 return(1);
993 }
994 outpt += res;
995
996 /*
997 * we set up to look again starting at the first
998 * character in the tail (of the input string right
999 * after the last character matched by the regular
1000 * expression (inpt always points at the first char in
1001 * the string to process). If we are not doing a global
1002 * substitution, we will use inpt to copy the tail to
1003 * the final result. Make sure we do not overrun the
1004 * output buffer
1005 */
1006 # ifdef NET2_REGEX
1007 inpt = pt->rcmp->endp[0];
1008 # else
1009 inpt += pm[0].rm_eo;
1010 # endif
1011
1012 if ((outpt == endpt) || (*inpt == '\0'))
1013 break;
1014
1015 /*
1016 * if the user wants global we keep trying to
1017 * substitute until it fails, then we are done.
1018 */
1019 } while (pt->flgs & GLOB);
1020
1021 if (found)
1022 break;
1023
1024 /*
1025 * a successful substitution did NOT occur, try the next one
1026 */
1027 pt = pt->fow;
1028 }
1029
1030 if (found) {
1031 /*
1032 * we had a substitution, copy the last tail piece (if there is
1033 * room) to the final result
1034 */
1035 while ((outpt < endpt) && (*inpt != '\0'))
1036 *outpt++ = *inpt++;
1037
1038 *outpt = '\0';
1039 if ((outpt == endpt) && (*inpt != '\0')) {
1040 if (prnt)
1041 warn(1,"Replacement name too long %s >> %s",
1042 name, nname);
1043 return(1);
1044 }
1045
1046 /*
1047 * inform the user of the result if wanted
1048 */
1049 if (prnt && (pt->flgs & PRNT)) {
1050 if (*nname == '\0')
1051 (void)fprintf(stderr,"%s >> <empty string>\n",
1052 name);
1053 else
1054 (void)fprintf(stderr,"%s >> %s\n", name, nname);
1055 }
1056
1057 /*
1058 * if empty inform the caller this file is to be skipped
1059 * otherwise copy the new name over the orig name and return
1060 */
1061 if (*nname == '\0')
1062 return(1);
1063 *nlen = l_strncpy(name, nname, PAXPATHLEN + 1);
1064 }
1065 return(0);
1066 }
1067
1068 #ifdef NET2_REGEX
1069 /*
1070 * resub()
1071 * apply the replacement to the matched expression. expand out the old
1072 * style ed(1) subexpression expansion.
1073 * Return:
1074 * -1 if error, or the number of characters added to the destination.
1075 */
1076
1077 #if __STDC__
1078 static int
1079 resub(regexp *prog, char *src, char *dest, register char *destend)
1080 #else
1081 static int
1082 resub(prog, src, dest, destend)
1083 regexp *prog;
1084 char *src;
1085 char *dest;
1086 register char *destend;
1087 #endif
1088 {
1089 register char *spt;
1090 register char *dpt;
1091 register char c;
1092 register int no;
1093 register int len;
1094
1095 spt = src;
1096 dpt = dest;
1097 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1098 if (c == '&')
1099 no = 0;
1100 else if ((c == '\\') && (*spt >= '0') && (*spt <= '9'))
1101 no = *spt++ - '0';
1102 else {
1103 if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1104 c = *spt++;
1105 *dpt++ = c;
1106 continue;
1107 }
1108 if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) ||
1109 ((len = prog->endp[no] - prog->startp[no]) <= 0))
1110 continue;
1111
1112 /*
1113 * copy the subexpression to the destination.
1114 * fail if we run out of space or the match string is damaged
1115 */
1116 if (len > (destend - dpt))
1117 len = destend - dpt;
1118 if (l_strncpy(dpt, prog->startp[no], len) != len)
1119 return(-1);
1120 dpt += len;
1121 }
1122 return(dpt - dest);
1123 }
1124
1125 #else
1126
1127 /*
1128 * resub()
1129 * apply the replacement to the matched expression. expand out the old
1130 * style ed(1) subexpression expansion.
1131 * Return:
1132 * -1 if error, or the number of characters added to the destination.
1133 */
1134
1135 #if __STDC__
1136 static int
1137 resub(regex_t *rp, register regmatch_t *pm, char *src, char *dest,
1138 register char *destend)
1139 #else
1140 static int
1141 resub(rp, pm, src, dest, destend)
1142 regex_t *rp;
1143 register regmatch_t *pm;
1144 char *src;
1145 char *dest;
1146 register char *destend;
1147 #endif
1148 {
1149 register char *spt;
1150 register char *dpt;
1151 register char c;
1152 register regmatch_t *pmpt;
1153 register int len;
1154 int subexcnt;
1155
1156 spt = src;
1157 dpt = dest;
1158 subexcnt = rp->re_nsub;
1159 while ((dpt < destend) && ((c = *spt++) != '\0')) {
1160 /*
1161 * see if we just have an ordinary replacement character
1162 * or we refer to a subexpression.
1163 */
1164 if (c == '&') {
1165 pmpt = pm;
1166 } else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) {
1167 /*
1168 * make sure there is a subexpression as specified
1169 */
1170 if ((len = *spt++ - '0') > subexcnt)
1171 return(-1);
1172 pmpt = pm + len;
1173 } else {
1174 /*
1175 * Ordinary character, just copy it
1176 */
1177 if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
1178 c = *spt++;
1179 *dpt++ = c;
1180 continue;
1181 }
1182
1183 /*
1184 * continue if the subexpression is bogus
1185 */
1186 if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
1187 ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
1188 continue;
1189
1190 /*
1191 * copy the subexpression to the destination.
1192 * fail if we run out of space or the match string is damaged
1193 */
1194 if (len > (destend - dpt))
1195 len = destend - dpt;
1196 if (l_strncpy(dpt, src + pmpt->rm_so, len) != len)
1197 return(-1);
1198 dpt += len;
1199 }
1200 return(dpt - dest);
1201 }
1202 #endif
1203