1 /* $NetBSD: ar_subs.c,v 1.59 2024/08/05 13:37:26 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 1992 Keith Muller. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Keith Muller of the University of California, San Diego. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #if HAVE_NBTOOL_CONFIG_H 37 #include "nbtool_config.h" 38 #endif 39 40 #include <sys/cdefs.h> 41 #if !defined(lint) 42 #if 0 43 static char sccsid[] = "@(#)ar_subs.c 8.2 (Berkeley) 4/18/94"; 44 #else 45 __RCSID("$NetBSD: ar_subs.c,v 1.59 2024/08/05 13:37:26 riastradh Exp $"); 46 #endif 47 #endif /* not lint */ 48 49 #include <sys/types.h> 50 #include <sys/time.h> 51 #include <sys/stat.h> 52 #include <sys/param.h> 53 #include <signal.h> 54 #include <string.h> 55 #include <stdio.h> 56 #include <ctype.h> 57 #include <fcntl.h> 58 #include <errno.h> 59 #include <time.h> 60 #include <unistd.h> 61 #include <stdlib.h> 62 #include "pax.h" 63 #include "pat_rep.h" 64 #include "extern.h" 65 66 static int path_check(ARCHD *, int); 67 static int wr_archive(ARCHD *, int is_app); 68 static int get_arc(void); 69 static int next_head(ARCHD *); 70 #if !HAVE_NBTOOL_CONFIG_H 71 static int fdochroot(int); 72 #endif 73 extern sigset_t s_mask; 74 75 /* 76 * Routines which control the overall operation modes of pax as specified by 77 * the user: list, append, read ... 78 */ 79 80 static char hdbuf[BLKMULT]; /* space for archive header on read */ 81 u_long flcnt; /* number of files processed */ 82 ARCHD archd; 83 84 static char cwdpath[MAXPATHLEN]; /* current working directory path */ 85 static size_t cwdpathlen; /* current working directory path len */ 86 87 int 88 updatepath(void) 89 { 90 if (getcwd(cwdpath, sizeof(cwdpath)) == NULL) { 91 syswarn(1, errno, "Cannot get working directory"); 92 return -1; 93 } 94 cwdpathlen = strlen(cwdpath); 95 return 0; 96 } 97 98 int 99 fdochdir(int fcwd) 100 { 101 if (fchdir(fcwd) == -1) { 102 syswarn(1, errno, "Cannot chdir to `.'"); 103 return -1; 104 } 105 return updatepath(); 106 } 107 108 int 109 dochdir(const char *name) 110 { 111 if (chdir(name) == -1) 112 syswarn(1, errno, "Cannot chdir to `%s'", name); 113 return updatepath(); 114 } 115 116 #if !HAVE_NBTOOL_CONFIG_H 117 static int 118 fdochroot(int fcwd) 119 { 120 if (fchroot(fcwd) != 0) { 121 syswarn(1, errno, "Can't fchroot to \".\""); 122 return -1; 123 } 124 return updatepath(); 125 } 126 #endif 127 128 /* 129 * mkdir(), but if we failed, check if someone else made it for us 130 * already and don't error out. 131 */ 132 int 133 domkdir(const char *fname, mode_t mode) 134 { 135 int error; 136 struct stat sb; 137 138 if ((error = mkdir(fname, mode)) != -1) 139 return error; 140 141 switch (errno) { 142 case EISDIR: 143 return 0; 144 case EEXIST: 145 case EACCES: 146 case ENOSYS: /* Grr Solaris */ 147 case EROFS: 148 error = errno; 149 if (stat(fname, &sb) != -1 && S_ISDIR(sb.st_mode)) 150 return 0; 151 errno = error; 152 /*FALLTHROUGH*/ 153 default: 154 return -1; 155 } 156 } 157 158 static int 159 path_check(ARCHD *arcn, int level) 160 { 161 char buf[MAXPATHLEN]; 162 char *p; 163 164 if ((p = strrchr(arcn->name, '/')) == NULL) 165 return 0; 166 *p = '\0'; 167 168 if (realpath(arcn->name, buf) == NULL) { 169 int error; 170 error = path_check(arcn, level + 1); 171 *p = '/'; 172 if (error == 0) 173 return 0; 174 if (level == 0) 175 syswarn(1, 0, "Cannot resolve `%s'", arcn->name); 176 return -1; 177 } 178 if (strncmp(buf, cwdpath, cwdpathlen) != 0) { 179 *p = '/'; 180 syswarn(1, 0, "Attempt to write file `%s' that resolves into " 181 "`%s/%s' outside current working directory `%s' ignored", 182 arcn->name, buf, p + 1, cwdpath); 183 return -1; 184 } 185 *p = '/'; 186 return 0; 187 } 188 189 /* 190 * list() 191 * list the contents of an archive which match user supplied pattern(s) 192 * (if no pattern is supplied, list entire contents). 193 */ 194 195 int 196 list(void) 197 { 198 ARCHD *arcn; 199 int res; 200 time_t now; 201 202 arcn = &archd; 203 /* 204 * figure out archive type; pass any format specific options to the 205 * archive option processing routine; call the format init routine. We 206 * also save current time for ls_list() so we do not make a system 207 * call for each file we need to print. If verbose (vflag) start up 208 * the name and group caches. 209 */ 210 if ((get_arc() < 0) || ((*frmt->options)() < 0) || 211 ((*frmt->st_rd)() < 0)) 212 return 1; 213 214 now = time(NULL); 215 216 /* 217 * step through the archive until the format says it is done 218 */ 219 while (next_head(arcn) == 0) { 220 if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) { 221 /* 222 * we need to read, to get the real filename 223 */ 224 off_t cnt; 225 if (!(*frmt->rd_data)(arcn, -arcn->type, &cnt)) 226 (void)rd_skip(cnt + arcn->pad); 227 continue; 228 } 229 230 /* 231 * check for pattern, and user specified options match. 232 * When all patterns are matched we are done. 233 */ 234 if ((res = pat_match(arcn)) < 0) 235 break; 236 237 if ((res == 0) && (sel_chk(arcn) == 0)) { 238 /* 239 * pattern resulted in a selected file 240 */ 241 if (pat_sel(arcn) < 0) 242 break; 243 244 /* 245 * modify the name as requested by the user if name 246 * survives modification, do a listing of the file 247 */ 248 if ((res = mod_name(arcn, RENM)) < 0) 249 break; 250 if (res == 0) { 251 if (arcn->name[0] == '/' && !check_Aflag()) { 252 memmove(arcn->name, arcn->name + 1, 253 strlen(arcn->name)); 254 } 255 ls_list(arcn, now, stdout); 256 } 257 /* 258 * if there's an error writing to stdout then we must 259 * stop now -- we're probably writing to a pipe that 260 * has been closed by the reader. 261 */ 262 if (ferror(stdout)) { 263 syswarn(1, errno, "Listing incomplete."); 264 break; 265 } 266 } 267 /* 268 * skip to next archive format header using values calculated 269 * by the format header read routine 270 */ 271 if (rd_skip(arcn->skip + arcn->pad) == 1) 272 break; 273 } 274 275 /* 276 * all done, let format have a chance to cleanup, and make sure that 277 * the patterns supplied by the user were all matched 278 */ 279 (void)(*frmt->end_rd)(); 280 (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); 281 ar_close(); 282 pat_chk(); 283 284 return 0; 285 } 286 287 /* 288 * extract() 289 * extract the member(s) of an archive as specified by user supplied 290 * pattern(s) (no patterns extracts all members) 291 */ 292 293 int 294 extract(void) 295 { 296 ARCHD *arcn; 297 int res; 298 off_t cnt; 299 struct stat sb; 300 int fd; 301 time_t now; 302 303 arcn = &archd; 304 /* 305 * figure out archive type; pass any format specific options to the 306 * archive option processing routine; call the format init routine; 307 * start up the directory modification time and access mode database 308 */ 309 if ((get_arc() < 0) || ((*frmt->options)() < 0) || 310 ((*frmt->st_rd)() < 0) || (dir_start() < 0)) 311 return 1; 312 313 now = time(NULL); 314 #if !HAVE_NBTOOL_CONFIG_H 315 if (do_chroot) 316 (void)fdochroot(cwdfd); 317 #endif 318 319 /* 320 * When we are doing interactive rename, we store the mapping of names 321 * so we can fix up hard links files later in the archive. 322 */ 323 if (iflag && (name_start() < 0)) 324 return 1; 325 326 /* 327 * step through each entry on the archive until the format read routine 328 * says it is done 329 */ 330 while (next_head(arcn) == 0) { 331 int write_to_hard_link = 0; 332 333 if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) { 334 /* 335 * we need to read, to get the real filename 336 */ 337 if (!(*frmt->rd_data)(arcn, -arcn->type, &cnt)) 338 (void)rd_skip(cnt + arcn->pad); 339 continue; 340 } 341 342 /* 343 * check for pattern, and user specified options match. When 344 * all the patterns are matched we are done 345 */ 346 if ((res = pat_match(arcn)) < 0) 347 break; 348 349 if ((res > 0) || (sel_chk(arcn) != 0)) { 350 /* 351 * file is not selected. skip past any file 352 * data and padding and go back for the next 353 * archive member 354 */ 355 (void)rd_skip(arcn->skip + arcn->pad); 356 continue; 357 } 358 359 if (kflag && (lstat(arcn->name, &sb) == 0)) { 360 (void)rd_skip(arcn->skip + arcn->pad); 361 continue; 362 } 363 364 /* 365 * with -u or -D only extract when the archive member is newer 366 * than the file with the same name in the file system (no 367 * test of being the same type is required). 368 * NOTE: this test is done BEFORE name modifications as 369 * specified by pax. this operation can be confusing to the 370 * user who might expect the test to be done on an existing 371 * file AFTER the name mod. In honesty the pax spec is probably 372 * flawed in this respect. ignore this for GNU long links. 373 */ 374 if ((uflag || Dflag) && ((lstat(arcn->name, &sb) == 0))) { 375 if (uflag && Dflag) { 376 if ((arcn->sb.st_mtime <= sb.st_mtime) && 377 (arcn->sb.st_ctime <= sb.st_ctime)) { 378 (void)rd_skip(arcn->skip + arcn->pad); 379 continue; 380 } 381 } else if (Dflag) { 382 if (arcn->sb.st_ctime <= sb.st_ctime) { 383 (void)rd_skip(arcn->skip + arcn->pad); 384 continue; 385 } 386 } else if (arcn->sb.st_mtime <= sb.st_mtime) { 387 (void)rd_skip(arcn->skip + arcn->pad); 388 continue; 389 } 390 } 391 392 /* 393 * this archive member is now been selected. modify the name. 394 */ 395 if ((pat_sel(arcn) < 0) || ((res = mod_name(arcn, RENM)) < 0)) 396 break; 397 if (res > 0) { 398 /* 399 * a bad name mod, skip and purge name from link table 400 */ 401 purg_lnk(arcn); 402 (void)rd_skip(arcn->skip + arcn->pad); 403 continue; 404 } 405 406 if (arcn->name[0] == '/' && !check_Aflag()) { 407 memmove(arcn->name, arcn->name + 1, strlen(arcn->name)); 408 } 409 /* 410 * Non standard -Y and -Z flag. When the existing file is 411 * same age or newer skip; ignore this for GNU long links. 412 */ 413 if ((Yflag || Zflag) && ((lstat(arcn->name, &sb) == 0))) { 414 if (Yflag && Zflag) { 415 if ((arcn->sb.st_mtime <= sb.st_mtime) && 416 (arcn->sb.st_ctime <= sb.st_ctime)) { 417 (void)rd_skip(arcn->skip + arcn->pad); 418 continue; 419 } 420 } else if (Yflag) { 421 if (arcn->sb.st_ctime <= sb.st_ctime) { 422 (void)rd_skip(arcn->skip + arcn->pad); 423 continue; 424 } 425 } else if (arcn->sb.st_mtime <= sb.st_mtime) { 426 (void)rd_skip(arcn->skip + arcn->pad); 427 continue; 428 } 429 } 430 431 if (vflag) { 432 if (vflag > 1) 433 ls_list(arcn, now, listf); 434 else { 435 (void)safe_print(arcn->name, listf); 436 vfpart = 1; 437 } 438 } 439 440 /* 441 * if required, chdir around. 442 */ 443 if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL) && 444 !to_stdout) 445 dochdir(arcn->pat->chdname); 446 447 if (secure && path_check(arcn, 0) != 0) { 448 (void)rd_skip(arcn->skip + arcn->pad); 449 continue; 450 } 451 452 /* 453 * all ok, extract this member based on type 454 */ 455 if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) { 456 /* 457 * process archive members that are not regular files. 458 * throw out padding and any data that might follow the 459 * header (as determined by the format). 460 */ 461 if ((arcn->type == PAX_HLK) || 462 (arcn->type == PAX_HRG)) 463 res = lnk_creat(arcn, &write_to_hard_link); 464 else 465 res = node_creat(arcn); 466 467 if (!write_to_hard_link) { 468 (void)rd_skip(arcn->skip + arcn->pad); 469 if (res < 0) 470 purg_lnk(arcn); 471 472 if (vflag && vfpart) { 473 (void)putc('\n', listf); 474 vfpart = 0; 475 } 476 continue; 477 } 478 } 479 if (to_stdout) 480 fd = STDOUT_FILENO; 481 else { 482 /* 483 * We have a file with data here. If we cannot create 484 * it, skip over the data and purge the name from hard 485 * link table. 486 */ 487 if ((fd = file_creat(arcn, write_to_hard_link)) < 0) { 488 (void)fflush(listf); 489 (void)rd_skip(arcn->skip + arcn->pad); 490 purg_lnk(arcn); 491 continue; 492 } 493 } 494 /* 495 * extract the file from the archive and skip over padding and 496 * any unprocessed data 497 */ 498 res = (*frmt->rd_data)(arcn, fd, &cnt); 499 if (!to_stdout) 500 file_close(arcn, fd); 501 if (vflag && vfpart) { 502 (void)putc('\n', listf); 503 vfpart = 0; 504 } 505 if (!res) 506 (void)rd_skip(cnt + arcn->pad); 507 508 /* 509 * if required, chdir around. 510 */ 511 if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL)) 512 fdochdir(cwdfd); 513 } 514 515 /* 516 * all done, restore directory modes and times as required; make sure 517 * all patterns supplied by the user were matched; block off signals 518 * to avoid chance for multiple entry into the cleanup code. 519 */ 520 (void)(*frmt->end_rd)(); 521 (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); 522 ar_close(); 523 proc_dir(); 524 pat_chk(); 525 526 return 0; 527 } 528 529 /* 530 * wr_archive() 531 * Write an archive. used in both creating a new archive and appends on 532 * previously written archive. 533 */ 534 535 static int 536 wr_archive(ARCHD *arcn, int is_app) 537 { 538 int res; 539 int hlk; 540 int wr_one; 541 off_t cnt; 542 int (*wrf)(ARCHD *); 543 int fd = -1; 544 time_t now; 545 546 /* 547 * if this format supports hard link storage, start up the database 548 * that detects them. 549 */ 550 if (((hlk = frmt->hlk) == 1) && (lnk_start() < 0)) 551 return 1; 552 553 /* 554 * start up the file traversal code and format specific write 555 */ 556 if ((ftree_start() < 0) || ((*frmt->st_wr)() < 0)) 557 return 1; 558 wrf = frmt->wr; 559 560 now = time(NULL); 561 562 /* 563 * When we are doing interactive rename, we store the mapping of names 564 * so we can fix up hard links files later in the archive. 565 */ 566 if (iflag && (name_start() < 0)) 567 return 1; 568 569 /* 570 * if this is not append, and there are no files, we do no write a trailer 571 */ 572 wr_one = is_app; 573 574 /* 575 * while there are files to archive, process them one at at time 576 */ 577 while (next_file(arcn) == 0) { 578 /* 579 * check if this file meets user specified options match. 580 */ 581 if (sel_chk(arcn) != 0) 582 continue; 583 /* 584 * Here we handle the exclusion -X gnu style patterns which 585 * are implemented like a pattern list. We don't modify the 586 * name as this will be done below again, and we don't want 587 * to double modify it. 588 */ 589 if ((res = mod_name(arcn, 0)) < 0) 590 break; 591 if (res == 1) 592 continue; 593 fd = -1; 594 if (uflag) { 595 /* 596 * only archive if this file is newer than a file with 597 * the same name that is already stored on the archive 598 */ 599 if ((res = chk_ftime(arcn)) < 0) 600 break; 601 if (res > 0) 602 continue; 603 } 604 605 /* 606 * this file is considered selected now. see if this is a hard 607 * link to a file already stored 608 */ 609 ftree_sel(arcn); 610 if (hlk && (chk_lnk(arcn) < 0)) 611 break; 612 613 if ((arcn->type == PAX_REG) || (arcn->type == PAX_HRG) || 614 (arcn->type == PAX_CTG)) { 615 /* 616 * we will have to read this file. by opening it now we 617 * can avoid writing a header to the archive for a file 618 * we were later unable to read (we also purge it from 619 * the link table). 620 */ 621 if ((fd = open(arcn->org_name, O_RDONLY, 0)) < 0) { 622 syswarn(1, errno, "Unable to open %s to read", 623 arcn->org_name); 624 purg_lnk(arcn); 625 continue; 626 } 627 } 628 629 /* 630 * Now modify the name as requested by the user 631 */ 632 if ((res = mod_name(arcn, RENM)) < 0) { 633 /* 634 * name modification says to skip this file, close the 635 * file and purge link table entry 636 */ 637 rdfile_close(arcn, &fd); 638 purg_lnk(arcn); 639 break; 640 } 641 642 if (arcn->name[0] == '/' && !check_Aflag()) { 643 memmove(arcn->name, arcn->name + 1, strlen(arcn->name)); 644 } 645 646 if ((res > 0) || (docrc && (set_crc(arcn, fd) < 0))) { 647 /* 648 * unable to obtain the crc we need, close the file, 649 * purge link table entry 650 */ 651 rdfile_close(arcn, &fd); 652 purg_lnk(arcn); 653 continue; 654 } 655 656 if (vflag) { 657 if (vflag > 1) 658 ls_list(arcn, now, listf); 659 else { 660 (void)safe_print(arcn->name, listf); 661 vfpart = 1; 662 } 663 } 664 ++flcnt; 665 666 /* 667 * looks safe to store the file, have the format specific 668 * routine write routine store the file header on the archive 669 */ 670 if ((res = (*wrf)(arcn)) < 0) { 671 rdfile_close(arcn, &fd); 672 break; 673 } 674 wr_one = 1; 675 if (res > 0) { 676 /* 677 * format write says no file data needs to be stored 678 * so we are done messing with this file 679 */ 680 if (vflag && vfpart) { 681 (void)putc('\n', listf); 682 vfpart = 0; 683 } 684 rdfile_close(arcn, &fd); 685 continue; 686 } 687 688 /* 689 * Add file data to the archive, quit on write error. if we 690 * cannot write the entire file contents to the archive we 691 * must pad the archive to replace the missing file data 692 * (otherwise during an extract the file header for the file 693 * which FOLLOWS this one will not be where we expect it to 694 * be). 695 */ 696 res = (*frmt->wr_data)(arcn, fd, &cnt); 697 rdfile_close(arcn, &fd); 698 if (vflag && vfpart) { 699 (void)putc('\n', listf); 700 vfpart = 0; 701 } 702 if (res < 0) 703 break; 704 705 /* 706 * pad as required, cnt is number of bytes not written 707 */ 708 if (((cnt > 0) && (wr_skip(cnt) < 0)) || 709 ((arcn->pad > 0) && (wr_skip(arcn->pad) < 0))) 710 break; 711 } 712 713 /* 714 * tell format to write trailer; pad to block boundary; reset directory 715 * mode/access times, and check if all patterns supplied by the user 716 * were matched. block off signals to avoid chance for multiple entry 717 * into the cleanup code 718 */ 719 if (wr_one) { 720 (*frmt->end_wr)(); 721 wr_fin(); 722 } 723 (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); 724 ar_close(); 725 if (tflag) 726 proc_dir(); 727 ftree_chk(); 728 729 return 0; 730 } 731 732 /* 733 * append() 734 * Add file to previously written archive. Archive format specified by the 735 * user must agree with archive. The archive is read first to collect 736 * modification times (if -u) and locate the archive trailer. The archive 737 * is positioned in front of the record with the trailer and wr_archive() 738 * is called to add the new members. 739 * PAX IMPLEMENTATION DETAIL NOTE: 740 * -u is implemented by adding the new members to the end of the archive. 741 * Care is taken so that these do not end up as links to the older 742 * version of the same file already stored in the archive. It is expected 743 * when extraction occurs these newer versions will over-write the older 744 * ones stored "earlier" in the archive (this may be a bad assumption as 745 * it depends on the implementation of the program doing the extraction). 746 * It is really difficult to splice in members without either re-writing 747 * the entire archive (from the point were the old version was), or having 748 * assistance of the format specification in terms of a special update 749 * header that invalidates a previous archive record. The posix spec left 750 * the method used to implement -u unspecified. This pax is able to 751 * over write existing files that it creates. 752 */ 753 754 int 755 append(void) 756 { 757 ARCHD *arcn; 758 int res; 759 FSUB *orgfrmt; 760 int udev; 761 off_t tlen; 762 763 arcn = &archd; 764 orgfrmt = frmt; 765 766 /* 767 * Do not allow an append operation if the actual archive is of a 768 * different format than the user specified format. 769 */ 770 if (get_arc() < 0) 771 return 1; 772 if ((orgfrmt != NULL) && (orgfrmt != frmt)) { 773 tty_warn(1, "Cannot mix current archive format %s with %s", 774 frmt->name, orgfrmt->name); 775 return 1; 776 } 777 778 /* 779 * pass the format any options and start up format 780 */ 781 if (((*frmt->options)() < 0) || ((*frmt->st_rd)() < 0)) 782 return 1; 783 784 /* 785 * if we only are adding members that are newer, we need to save the 786 * mod times for all files we see. 787 */ 788 if (uflag && (ftime_start() < 0)) 789 return 1; 790 791 /* 792 * some archive formats encode hard links by recording the device and 793 * file serial number (inode) but copy the file anyway (multiple times) 794 * to the archive. When we append, we run the risk that newly added 795 * files may have the same device and inode numbers as those recorded 796 * on the archive but during a previous run. If this happens, when the 797 * archive is extracted we get INCORRECT hard links. We avoid this by 798 * remapping the device numbers so that newly added files will never 799 * use the same device number as one found on the archive. remapping 800 * allows new members to safely have links among themselves. remapping 801 * also avoids problems with file inode (serial number) truncations 802 * when the inode number is larger than storage space in the archive 803 * header. See the remap routines for more details. 804 */ 805 if ((udev = frmt->udev) && (dev_start() < 0)) 806 return 1; 807 808 /* 809 * reading the archive may take a long time. If verbose tell the user 810 */ 811 if (vflag || Vflag) { 812 (void)fprintf(listf, 813 "%s: Reading archive to position at the end...", argv0); 814 vfpart = 1; 815 } 816 817 /* 818 * step through the archive until the format says it is done 819 */ 820 while (next_head(arcn) == 0) { 821 /* 822 * check if this file meets user specified options. 823 */ 824 if (sel_chk(arcn) != 0) { 825 if (rd_skip(arcn->skip + arcn->pad) == 1) 826 break; 827 continue; 828 } 829 830 if (uflag) { 831 /* 832 * see if this is the newest version of this file has 833 * already been seen, if so skip. 834 */ 835 if ((res = chk_ftime(arcn)) < 0) 836 break; 837 if (res > 0) { 838 if (rd_skip(arcn->skip + arcn->pad) == 1) 839 break; 840 continue; 841 } 842 } 843 844 /* 845 * Store this device number. Device numbers seen during the 846 * read phase of append will cause newly appended files with a 847 * device number seen in the old part of the archive to be 848 * remapped to an unused device number. 849 */ 850 if ((udev && (add_dev(arcn) < 0)) || 851 (rd_skip(arcn->skip + arcn->pad) == 1)) 852 break; 853 } 854 855 /* 856 * done, finish up read and get the number of bytes to back up so we 857 * can add new members. The format might have used the hard link table, 858 * purge it. 859 */ 860 tlen = (*frmt->end_rd)(); 861 lnk_end(); 862 863 /* 864 * try to position for write, if this fails quit. if any error occurs, 865 * we will refuse to write 866 */ 867 if (appnd_start(tlen) < 0) 868 return 1; 869 870 /* 871 * tell the user we are done reading. 872 */ 873 if ((vflag || Vflag) && vfpart) { 874 (void)safe_print("done.\n", listf); 875 vfpart = 0; 876 } 877 878 /* 879 * go to the writing phase to add the new members 880 */ 881 res = wr_archive(arcn, 1); 882 if (res == 1) { 883 /* 884 * wr_archive failed in some way, but before any files were 885 * added. These are the only steps needed to cleanup (and 886 * not truncate the archive). 887 */ 888 wr_fin(); 889 (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); 890 ar_close(); 891 } 892 return res; 893 } 894 895 /* 896 * archive() 897 * write a new archive 898 */ 899 900 int 901 archive(void) 902 { 903 904 /* 905 * if we only are adding members that are newer, we need to save the 906 * mod times for all files; set up for writing; pass the format any 907 * options write the archive 908 */ 909 if ((uflag && (ftime_start() < 0)) || (wr_start() < 0)) 910 return 1; 911 if ((*frmt->options)() < 0) 912 return 1; 913 914 return wr_archive(&archd, 0); 915 } 916 917 /* 918 * copy() 919 * copy files from one part of the file system to another. this does not 920 * use any archive storage. The EFFECT OF THE COPY IS THE SAME as if an 921 * archive was written and then extracted in the destination directory 922 * (except the files are forced to be under the destination directory). 923 */ 924 925 int 926 copy(void) 927 { 928 ARCHD *arcn; 929 int res; 930 int fddest; 931 char *dest_pt; 932 size_t dlen; 933 size_t drem; 934 int fdsrc = -1; 935 struct stat sb; 936 char dirbuf[PAXPATHLEN+1]; 937 938 arcn = &archd; 939 /* 940 * set up the destination dir path and make sure it is a directory. We 941 * make sure we have a trailing / on the destination 942 */ 943 dlen = strlcpy(dirbuf, dirptr, sizeof(dirbuf)); 944 if (dlen >= sizeof(dirbuf) || 945 (dlen == sizeof(dirbuf) - 1 && dirbuf[dlen - 1] != '/')) { 946 tty_warn(1, "directory name is too long %s", dirptr); 947 return 1; 948 } 949 dest_pt = dirbuf + dlen; 950 if (*(dest_pt-1) != '/') { 951 *dest_pt++ = '/'; 952 ++dlen; 953 } 954 *dest_pt = '\0'; 955 drem = PAXPATHLEN - dlen; 956 957 if (stat(dirptr, &sb) < 0) { 958 syswarn(1, errno, "Cannot access destination directory %s", 959 dirptr); 960 return 1; 961 } 962 if (!S_ISDIR(sb.st_mode)) { 963 tty_warn(1, "Destination is not a directory %s", dirptr); 964 return 1; 965 } 966 967 /* 968 * start up the hard link table; file traversal routines and the 969 * modification time and access mode database 970 */ 971 if ((lnk_start() < 0) || (ftree_start() < 0) || (dir_start() < 0)) 972 return 1; 973 974 /* 975 * When we are doing interactive rename, we store the mapping of names 976 * so we can fix up hard links files later in the archive. 977 */ 978 if (iflag && (name_start() < 0)) 979 return 1; 980 981 /* 982 * set up to cp file trees 983 */ 984 cp_start(); 985 986 /* 987 * while there are files to archive, process them 988 */ 989 while (next_file(arcn) == 0) { 990 fdsrc = -1; 991 992 /* 993 * check if this file meets user specified options 994 */ 995 if (sel_chk(arcn) != 0) 996 continue; 997 998 /* 999 * if there is already a file in the destination directory with 1000 * the same name and it is newer, skip the one stored on the 1001 * archive. 1002 * NOTE: this test is done BEFORE name modifications as 1003 * specified by pax. this can be confusing to the user who 1004 * might expect the test to be done on an existing file AFTER 1005 * the name mod. In honesty the pax spec is probably flawed in 1006 * this respect 1007 */ 1008 if (uflag || Dflag) { 1009 /* 1010 * create the destination name 1011 */ 1012 if (strlcpy(dest_pt, arcn->name + (*arcn->name == '/'), 1013 drem + 1) > drem) { 1014 tty_warn(1, "Destination pathname too long %s", 1015 arcn->name); 1016 continue; 1017 } 1018 1019 /* 1020 * if existing file is same age or newer skip 1021 */ 1022 res = lstat(dirbuf, &sb); 1023 *dest_pt = '\0'; 1024 1025 if (res == 0) { 1026 if (uflag && Dflag) { 1027 if ((arcn->sb.st_mtime<=sb.st_mtime) && 1028 (arcn->sb.st_ctime<=sb.st_ctime)) 1029 continue; 1030 } else if (Dflag) { 1031 if (arcn->sb.st_ctime <= sb.st_ctime) 1032 continue; 1033 } else if (arcn->sb.st_mtime <= sb.st_mtime) 1034 continue; 1035 } 1036 } 1037 1038 /* 1039 * this file is considered selected. See if this is a hard link 1040 * to a previous file; modify the name as requested by the 1041 * user; set the final destination. 1042 */ 1043 ftree_sel(arcn); 1044 if ((chk_lnk(arcn) < 0) || ((res = mod_name(arcn, RENM)) < 0)) 1045 break; 1046 if ((res > 0) || (set_dest(arcn, dirbuf, dlen) < 0)) { 1047 /* 1048 * skip file, purge from link table 1049 */ 1050 purg_lnk(arcn); 1051 continue; 1052 } 1053 1054 /* 1055 * Non standard -Y and -Z flag. When the existing file is 1056 * same age or newer skip 1057 */ 1058 if ((Yflag || Zflag) && ((lstat(arcn->name, &sb) == 0))) { 1059 if (Yflag && Zflag) { 1060 if ((arcn->sb.st_mtime <= sb.st_mtime) && 1061 (arcn->sb.st_ctime <= sb.st_ctime)) 1062 continue; 1063 } else if (Yflag) { 1064 if (arcn->sb.st_ctime <= sb.st_ctime) 1065 continue; 1066 } else if (arcn->sb.st_mtime <= sb.st_mtime) 1067 continue; 1068 } 1069 1070 if (vflag) { 1071 (void)safe_print(arcn->name, listf); 1072 vfpart = 1; 1073 } 1074 ++flcnt; 1075 1076 /* 1077 * try to create a hard link to the src file if requested 1078 * but make sure we are not trying to overwrite ourselves. 1079 */ 1080 if (lflag) 1081 res = cross_lnk(arcn); 1082 else 1083 res = chk_same(arcn); 1084 if (res <= 0) { 1085 if (vflag && vfpart) { 1086 (void)putc('\n', listf); 1087 vfpart = 0; 1088 } 1089 continue; 1090 } 1091 1092 /* 1093 * have to create a new file 1094 */ 1095 if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) { 1096 /* 1097 * create a link or special file 1098 */ 1099 if ((arcn->type == PAX_HLK) || 1100 (arcn->type == PAX_HRG)) { 1101 int payload; 1102 1103 res = lnk_creat(arcn, &payload); 1104 } else { 1105 res = node_creat(arcn); 1106 } 1107 if (res < 0) 1108 purg_lnk(arcn); 1109 if (vflag && vfpart) { 1110 (void)putc('\n', listf); 1111 vfpart = 0; 1112 } 1113 continue; 1114 } 1115 1116 /* 1117 * have to copy a regular file to the destination directory. 1118 * first open source file and then create the destination file 1119 */ 1120 if ((fdsrc = open(arcn->org_name, O_RDONLY, 0)) < 0) { 1121 syswarn(1, errno, "Unable to open %s to read", 1122 arcn->org_name); 1123 purg_lnk(arcn); 1124 continue; 1125 } 1126 if ((fddest = file_creat(arcn, 0)) < 0) { 1127 rdfile_close(arcn, &fdsrc); 1128 purg_lnk(arcn); 1129 continue; 1130 } 1131 1132 /* 1133 * copy source file data to the destination file. 1134 * if there was a failure, remove the temporary file 1135 * and leave any existing destination file unmodified. 1136 */ 1137 if (cp_file(arcn, fdsrc, fddest) < 0) 1138 file_cleanup(arcn, fddest); 1139 else 1140 file_close(arcn, fddest); 1141 rdfile_close(arcn, &fdsrc); 1142 1143 if (vflag && vfpart) { 1144 (void)putc('\n', listf); 1145 vfpart = 0; 1146 } 1147 } 1148 1149 /* 1150 * restore directory modes and times as required; make sure all 1151 * patterns were selected block off signals to avoid chance for 1152 * multiple entry into the cleanup code. 1153 */ 1154 (void)sigprocmask(SIG_BLOCK, &s_mask, NULL); 1155 ar_close(); 1156 proc_dir(); 1157 ftree_chk(); 1158 1159 return 0; 1160 } 1161 1162 /* 1163 * next_head() 1164 * try to find a valid header in the archive. Uses format specific 1165 * routines to extract the header and id the trailer. Trailers may be 1166 * located within a valid header or in an invalid header (the location 1167 * is format specific. The inhead field from the option table tells us 1168 * where to look for the trailer). 1169 * We keep reading (and resyncing) until we get enough contiguous data 1170 * to check for a header. If we cannot find one, we shift by a byte 1171 * add a new byte from the archive to the end of the buffer and try again. 1172 * If we get a read error, we throw out what we have (as we must have 1173 * contiguous data) and start over again. 1174 * ASSUMED: headers fit within a BLKMULT header. 1175 * Return: 1176 * 0 if we got a header, -1 if we are unable to ever find another one 1177 * (we reached the end of input, or we reached the limit on retries. see 1178 * the specs for rd_wrbuf() for more details) 1179 */ 1180 1181 static int 1182 next_head(ARCHD *arcn) 1183 { 1184 int ret; 1185 char *hdend; 1186 int res; 1187 int shftsz; 1188 int hsz; 1189 int in_resync = 0; /* set when we are in resync mode */ 1190 int cnt = 0; /* counter for trailer function */ 1191 int first = 1; /* on 1st read, EOF isn't premature. */ 1192 1193 /* 1194 * set up initial conditions, we want a whole frmt->hsz block as we 1195 * have no data yet. 1196 */ 1197 res = hsz = frmt->hsz; 1198 hdend = hdbuf; 1199 shftsz = hsz - 1; 1200 for(;;) { 1201 /* 1202 * keep looping until we get a contiguous FULL buffer 1203 * (frmt->hsz is the proper size) 1204 */ 1205 for (;;) { 1206 if ((ret = rd_wrbuf(hdend, res)) == res) 1207 break; 1208 1209 /* 1210 * If we read 0 bytes (EOF) from an archive when we 1211 * expect to find a header, we have stepped upon 1212 * an archive without the customary block of zeroes 1213 * end marker. It's just stupid to error out on 1214 * them, so exit gracefully. 1215 */ 1216 if (first && ret == 0) 1217 return -1; 1218 first = 0; 1219 1220 /* 1221 * some kind of archive read problem, try to resync the 1222 * storage device, better give the user the bad news. 1223 */ 1224 if ((ret == 0) || (rd_sync() < 0)) { 1225 tty_warn(1, 1226 "Premature end of file on archive read"); 1227 return -1; 1228 } 1229 if (!in_resync) { 1230 if (act == APPND) { 1231 tty_warn(1, 1232 "Archive I/O error, cannot continue"); 1233 return -1; 1234 } 1235 tty_warn(1, 1236 "Archive I/O error. Trying to recover."); 1237 ++in_resync; 1238 } 1239 1240 /* 1241 * oh well, throw it all out and start over 1242 */ 1243 res = hsz; 1244 hdend = hdbuf; 1245 } 1246 1247 /* 1248 * ok we have a contiguous buffer of the right size. Call the 1249 * format read routine. If this was not a valid header and this 1250 * format stores trailers outside of the header, call the 1251 * format specific trailer routine to check for a trailer. We 1252 * have to watch out that we do not mis-identify file data or 1253 * block padding as a header or trailer. Format specific 1254 * trailer functions must NOT check for the trailer while we 1255 * are running in resync mode. Some trailer functions may tell 1256 * us that this block cannot contain a valid header either, so 1257 * we then throw out the entire block and start over. 1258 */ 1259 if ((*frmt->rd)(arcn, hdbuf) == 0) 1260 break; 1261 1262 if (!frmt->inhead) { 1263 /* 1264 * this format has trailers outside of valid headers 1265 */ 1266 if ((ret = (*frmt->trail)(hdbuf,in_resync,&cnt)) == 0){ 1267 /* 1268 * valid trailer found, drain input as required 1269 */ 1270 ar_drain(); 1271 return -1; 1272 } 1273 1274 if (ret == 1) { 1275 /* 1276 * we are in resync and we were told to throw 1277 * the whole block out because none of the 1278 * bytes in this block can be used to form a 1279 * valid header 1280 */ 1281 res = hsz; 1282 hdend = hdbuf; 1283 continue; 1284 } 1285 } 1286 1287 /* 1288 * Brute force section. 1289 * not a valid header. We may be able to find a header yet. So 1290 * we shift over by one byte, and set up to read one byte at a 1291 * time from the archive and place it at the end of the buffer. 1292 * We will keep moving byte at a time until we find a header or 1293 * get a read error and have to start over. 1294 */ 1295 if (!in_resync) { 1296 if (act == APPND) { 1297 tty_warn(1, 1298 "Unable to append, archive header flaw"); 1299 return -1; 1300 } 1301 tty_warn(1, 1302 "Invalid header, starting valid header search."); 1303 ++in_resync; 1304 } 1305 memmove(hdbuf, hdbuf+1, shftsz); 1306 res = 1; 1307 hdend = hdbuf + shftsz; 1308 } 1309 1310 /* 1311 * ok got a valid header, check for trailer if format encodes it in the 1312 * the header. NOTE: the parameters are different than trailer routines 1313 * which encode trailers outside of the header! 1314 */ 1315 if (frmt->inhead && ((*frmt->subtrail)(arcn) == 0)) { 1316 /* 1317 * valid trailer found, drain input as required 1318 */ 1319 ar_drain(); 1320 return -1; 1321 } 1322 1323 ++flcnt; 1324 return 0; 1325 } 1326 1327 /* 1328 * get_arc() 1329 * Figure out what format an archive is. Handles archive with flaws by 1330 * brute force searches for a legal header in any supported format. The 1331 * format id routines have to be careful to NOT mis-identify a format. 1332 * ASSUMED: headers fit within a BLKMULT header. 1333 * Return: 1334 * 0 if archive found -1 otherwise 1335 */ 1336 1337 static int 1338 get_arc(void) 1339 { 1340 int i; 1341 int hdsz = 0; 1342 int res; 1343 int minhd = BLKMULT; 1344 char *hdend; 1345 int notice = 0; 1346 1347 /* 1348 * find the smallest header size in all archive formats and then set up 1349 * to read the archive. 1350 */ 1351 for (i = 0; ford[i] >= 0; ++i) { 1352 if (fsub[ford[i]].hsz < minhd) 1353 minhd = fsub[ford[i]].hsz; 1354 } 1355 if (rd_start() < 0) 1356 return -1; 1357 res = BLKMULT; 1358 hdsz = 0; 1359 hdend = hdbuf; 1360 for(;;) { 1361 for (;;) { 1362 /* 1363 * fill the buffer with at least the smallest header 1364 */ 1365 i = rd_wrbuf(hdend, res); 1366 if (i > 0) 1367 hdsz += i; 1368 if (hdsz >= minhd) 1369 break; 1370 1371 /* 1372 * if we cannot recover from a read error quit 1373 */ 1374 if ((i == 0) || (rd_sync() < 0)) 1375 goto out; 1376 1377 /* 1378 * when we get an error none of the data we already 1379 * have can be used to create a legal header (we just 1380 * got an error in the middle), so we throw it all out 1381 * and refill the buffer with fresh data. 1382 */ 1383 res = BLKMULT; 1384 hdsz = 0; 1385 hdend = hdbuf; 1386 if (!notice) { 1387 if (act == APPND) 1388 return -1; 1389 tty_warn(1, 1390 "Cannot identify format. Searching..."); 1391 ++notice; 1392 } 1393 } 1394 1395 /* 1396 * we have at least the size of the smallest header in any 1397 * archive format. Look to see if we have a match. The array 1398 * ford[] is used to specify the header id order to reduce the 1399 * chance of incorrectly id'ing a valid header (some formats 1400 * may be subsets of each other and the order would then be 1401 * important). 1402 */ 1403 for (i = 0; ford[i] >= 0; ++i) { 1404 if ((*fsub[ford[i]].id)(hdbuf, hdsz) < 0) 1405 continue; 1406 frmt = &(fsub[ford[i]]); 1407 /* 1408 * yuck, to avoid slow special case code in the extract 1409 * routines, just push this header back as if it was 1410 * not seen. We have left extra space at start of the 1411 * buffer for this purpose. This is a bit ugly, but 1412 * adding all the special case code is far worse. 1413 */ 1414 pback(hdbuf, hdsz); 1415 return 0; 1416 } 1417 1418 /* 1419 * We have a flawed archive, no match. we start searching, but 1420 * we never allow additions to flawed archives 1421 */ 1422 if (!notice) { 1423 if (act == APPND) 1424 return -1; 1425 tty_warn(1, "Cannot identify format. Searching..."); 1426 ++notice; 1427 } 1428 1429 /* 1430 * brute force search for a header that we can id. 1431 * we shift through byte at a time. this is slow, but we cannot 1432 * determine the nature of the flaw in the archive in a 1433 * portable manner 1434 */ 1435 if (--hdsz > 0) { 1436 memmove(hdbuf, hdbuf+1, hdsz); 1437 res = BLKMULT - hdsz; 1438 hdend = hdbuf + hdsz; 1439 } else { 1440 res = BLKMULT; 1441 hdend = hdbuf; 1442 hdsz = 0; 1443 } 1444 } 1445 1446 out: 1447 /* 1448 * we cannot find a header, bow, apologize and quit 1449 */ 1450 tty_warn(1, "Sorry, unable to determine archive format."); 1451 return -1; 1452 } 1453