tar.c revision 1.44 1 /* $NetBSD: tar.c,v 1.44 2003/10/13 07:41:22 agc Exp $ */
2
3 /*-
4 * Copyright (c) 1992 Keith Muller.
5 * Copyright (c) 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Keith Muller of the University of California, San Diego.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include <sys/cdefs.h>
37 #if defined(__RCSID) && !defined(lint)
38 #if 0
39 static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94";
40 #else
41 __RCSID("$NetBSD: tar.c,v 1.44 2003/10/13 07:41:22 agc Exp $");
42 #endif
43 #endif /* not lint */
44
45 #include <sys/types.h>
46 #include <sys/time.h>
47 #include <sys/stat.h>
48 #include <sys/param.h>
49
50 #include <ctype.h>
51 #include <errno.h>
52 #include <grp.h>
53 #include <pwd.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <unistd.h>
58
59 #include "pax.h"
60 #include "extern.h"
61 #include "tar.h"
62
63 /*
64 * Routines for reading, writing and header identify of various versions of tar
65 */
66
67 static int expandname(char *, size_t, char **, const char *, size_t);
68 static void longlink(ARCHD *);
69 static u_long tar_chksm(char *, int);
70 static char *name_split(char *, int);
71 static int ul_oct(u_long, char *, int, int);
72 #if !defined(NET2_STAT) && !defined(_LP64)
73 static int ull_oct(unsigned long long, char *, int, int);
74 #endif
75 static int tar_gnutar_exclude_one(const char *, size_t);
76 static int check_sum(char *, size_t, char *, size_t);
77
78 /*
79 * Routines common to all versions of tar
80 */
81
82 static int tar_nodir; /* do not write dirs under old tar */
83 int is_gnutar; /* behave like gnu tar; enable gnu
84 * extensions and skip end-ofvolume
85 * checks
86 */
87 static int seen_gnu_warning; /* Have we warned yet? */
88 static char *gnu_hack_string; /* ././@LongLink hackery */
89 static int gnu_hack_len; /* len of gnu_hack_string */
90 char *gnu_name_string; /* ././@LongLink hackery name */
91 char *gnu_link_string; /* ././@LongLink hackery link */
92
93 static int
94 check_sum(char *hd, size_t hdlen, char *bl, size_t bllen)
95 {
96 u_long hdck, blck;
97
98 hdck = asc_ul(hd, hdlen, OCT);
99 blck = tar_chksm(bl, bllen);
100
101 if (hdck != blck) {
102 tty_warn(0, "Header checksum %lo does not match %lo",
103 hdck, blck);
104 return(-1);
105 }
106 return(0);
107 }
108
109
110 /*
111 * tar_endwr()
112 * add the tar trailer of two null blocks
113 * Return:
114 * 0 if ok, -1 otherwise (what wr_skip returns)
115 */
116
117 int
118 tar_endwr(void)
119 {
120 return(wr_skip((off_t)(NULLCNT*BLKMULT)));
121 }
122
123 /*
124 * tar_endrd()
125 * no cleanup needed here, just return size of trailer (for append)
126 * Return:
127 * size of trailer (2 * BLKMULT)
128 */
129
130 off_t
131 tar_endrd(void)
132 {
133 return((off_t)(NULLCNT*BLKMULT));
134 }
135
136 /*
137 * tar_trail()
138 * Called to determine if a header block is a valid trailer. We are passed
139 * the block, the in_sync flag (which tells us we are in resync mode;
140 * looking for a valid header), and cnt (which starts at zero) which is
141 * used to count the number of empty blocks we have seen so far.
142 * Return:
143 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
144 * could never contain a header.
145 */
146
147 int
148 tar_trail(char *buf, int in_resync, int *cnt)
149 {
150 int i;
151
152 /*
153 * look for all zero, trailer is two consecutive blocks of zero
154 */
155 for (i = 0; i < BLKMULT; ++i) {
156 if (buf[i] != '\0')
157 break;
158 }
159
160 /*
161 * if not all zero it is not a trailer, but MIGHT be a header.
162 */
163 if (i != BLKMULT)
164 return(-1);
165
166 /*
167 * When given a zero block, we must be careful!
168 * If we are not in resync mode, check for the trailer. Have to watch
169 * out that we do not mis-identify file data as the trailer, so we do
170 * NOT try to id a trailer during resync mode. During resync mode we
171 * might as well throw this block out since a valid header can NEVER be
172 * a block of all 0 (we must have a valid file name).
173 */
174 if (!in_resync) {
175 ++*cnt;
176 /*
177 * old GNU tar (up through 1.13) only writes one block of
178 * trailers, so we pretend we got another
179 */
180 if (is_gnutar)
181 ++*cnt;
182 if (*cnt >= NULLCNT)
183 return(0);
184 }
185 return(1);
186 }
187
188 /*
189 * ul_oct()
190 * convert an unsigned long to an octal string. many oddball field
191 * termination characters are used by the various versions of tar in the
192 * different fields. term selects which kind to use. str is '0' padded
193 * at the front to len. we are unable to use only one format as many old
194 * tar readers are very cranky about this.
195 * Return:
196 * 0 if the number fit into the string, -1 otherwise
197 */
198
199 static int
200 ul_oct(u_long val, char *str, int len, int term)
201 {
202 char *pt;
203
204 /*
205 * term selects the appropriate character(s) for the end of the string
206 */
207 pt = str + len - 1;
208 switch(term) {
209 case 3:
210 *pt-- = '\0';
211 break;
212 case 2:
213 *pt-- = ' ';
214 *pt-- = '\0';
215 break;
216 case 1:
217 *pt-- = ' ';
218 break;
219 case 0:
220 default:
221 *pt-- = '\0';
222 *pt-- = ' ';
223 break;
224 }
225
226 /*
227 * convert and blank pad if there is space
228 */
229 while (pt >= str) {
230 *pt-- = '0' + (char)(val & 0x7);
231 if ((val = val >> 3) == (u_long)0)
232 break;
233 }
234
235 while (pt >= str)
236 *pt-- = '0';
237 if (val != (u_long)0)
238 return(-1);
239 return(0);
240 }
241
242 #if !defined(NET2_STAT) && !defined(_LP64)
243 /*
244 * ull_oct()
245 * convert an unsigned long long to an octal string. one of many oddball
246 * field termination characters are used by the various versions of tar
247 * in the different fields. term selects which kind to use. str is '0'
248 * padded at the front to len. we are unable to use only one format as
249 * many old tar readers are very cranky about this.
250 * Return:
251 * 0 if the number fit into the string, -1 otherwise
252 */
253
254 static int
255 ull_oct(unsigned long long val, char *str, int len, int term)
256 {
257 char *pt;
258
259 /*
260 * term selects the appropriate character(s) for the end of the string
261 */
262 pt = str + len - 1;
263 switch(term) {
264 case 3:
265 *pt-- = '\0';
266 break;
267 case 2:
268 *pt-- = ' ';
269 *pt-- = '\0';
270 break;
271 case 1:
272 *pt-- = ' ';
273 break;
274 case 0:
275 default:
276 *pt-- = '\0';
277 *pt-- = ' ';
278 break;
279 }
280
281 /*
282 * convert and blank pad if there is space
283 */
284 while (pt >= str) {
285 *pt-- = '0' + (char)(val & 0x7);
286 if ((val = val >> 3) == 0)
287 break;
288 }
289
290 while (pt >= str)
291 *pt-- = '0';
292 if (val != (unsigned long long)0)
293 return(-1);
294 return(0);
295 }
296 #endif
297
298 /*
299 * tar_chksm()
300 * calculate the checksum for a tar block counting the checksum field as
301 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
302 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS
303 * pad headers with 0.
304 * Return:
305 * unsigned long checksum
306 */
307
308 static u_long
309 tar_chksm(char *blk, int len)
310 {
311 char *stop;
312 char *pt;
313 u_long chksm = BLNKSUM; /* initial value is checksum field sum */
314
315 /*
316 * add the part of the block before the checksum field
317 */
318 pt = blk;
319 stop = blk + CHK_OFFSET;
320 while (pt < stop)
321 chksm += (u_long)(*pt++ & 0xff);
322 /*
323 * move past the checksum field and keep going, spec counts the
324 * checksum field as the sum of 8 blanks (which is pre-computed as
325 * BLNKSUM).
326 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
327 * starts, no point in summing zero's)
328 */
329 pt += CHK_LEN;
330 stop = blk + len;
331 while (pt < stop)
332 chksm += (u_long)(*pt++ & 0xff);
333 return(chksm);
334 }
335
336 /*
337 * Routines for old BSD style tar (also made portable to sysV tar)
338 */
339
340 /*
341 * tar_id()
342 * determine if a block given to us is a valid tar header (and not a USTAR
343 * header). We have to be on the lookout for those pesky blocks of all
344 * zero's.
345 * Return:
346 * 0 if a tar header, -1 otherwise
347 */
348
349 int
350 tar_id(char *blk, int size)
351 {
352 HD_TAR *hd;
353 HD_USTAR *uhd;
354
355 if (size < BLKMULT)
356 return(-1);
357 hd = (HD_TAR *)blk;
358 uhd = (HD_USTAR *)blk;
359
360 /*
361 * check for block of zero's first, a simple and fast test, then make
362 * sure this is not a ustar header by looking for the ustar magic
363 * cookie. We should use TMAGLEN, but some USTAR archive programs are
364 * wrong and create archives missing the \0. Last we check the
365 * checksum. If this is ok we have to assume it is a valid header.
366 */
367 if (hd->name[0] == '\0')
368 return(-1);
369 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
370 return(-1);
371 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT);
372 }
373
374 /*
375 * tar_opt()
376 * handle tar format specific -o options
377 * Return:
378 * 0 if ok -1 otherwise
379 */
380
381 int
382 tar_opt(void)
383 {
384 OPLIST *opt;
385
386 while ((opt = opt_next()) != NULL) {
387 if (strcmp(opt->name, TAR_OPTION) ||
388 strcmp(opt->value, TAR_NODIR)) {
389 tty_warn(1,
390 "Unknown tar format -o option/value pair %s=%s",
391 opt->name, opt->value);
392 tty_warn(1,
393 "%s=%s is the only supported tar format option",
394 TAR_OPTION, TAR_NODIR);
395 return(-1);
396 }
397
398 /*
399 * we only support one option, and only when writing
400 */
401 if ((act != APPND) && (act != ARCHIVE)) {
402 tty_warn(1, "%s=%s is only supported when writing.",
403 opt->name, opt->value);
404 return(-1);
405 }
406 tar_nodir = 1;
407 }
408 return(0);
409 }
410
411
412 /*
413 * tar_rd()
414 * extract the values out of block already determined to be a tar header.
415 * store the values in the ARCHD parameter.
416 * Return:
417 * 0
418 */
419
420 int
421 tar_rd(ARCHD *arcn, char *buf)
422 {
423 HD_TAR *hd;
424 char *pt;
425
426 /*
427 * we only get proper sized buffers passed to us
428 */
429 if (tar_id(buf, BLKMULT) < 0)
430 return(-1);
431 memset(arcn, 0, sizeof(*arcn));
432 arcn->org_name = arcn->name;
433 arcn->pat = NULL;
434 arcn->sb.st_nlink = 1;
435
436 /*
437 * copy out the name and values in the stat buffer
438 */
439 hd = (HD_TAR *)buf;
440 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) {
441 arcn->nlen = expandname(arcn->name, sizeof(arcn->name),
442 &gnu_name_string, hd->name, sizeof(hd->name));
443 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
444 &gnu_link_string, hd->linkname, sizeof(hd->linkname));
445 }
446 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
447 0xfff);
448 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
449 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
450 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
451 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
452 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
453
454 /*
455 * have to look at the last character, it may be a '/' and that is used
456 * to encode this as a directory
457 */
458 pt = &(arcn->name[arcn->nlen - 1]);
459 arcn->pad = 0;
460 arcn->skip = 0;
461 switch(hd->linkflag) {
462 case SYMTYPE:
463 /*
464 * symbolic link, need to get the link name and set the type in
465 * the st_mode so -v printing will look correct.
466 */
467 arcn->type = PAX_SLK;
468 arcn->sb.st_mode |= S_IFLNK;
469 break;
470 case LNKTYPE:
471 /*
472 * hard link, need to get the link name, set the type in the
473 * st_mode and st_nlink so -v printing will look better.
474 */
475 arcn->type = PAX_HLK;
476 arcn->sb.st_nlink = 2;
477
478 /*
479 * no idea of what type this thing really points at, but
480 * we set something for printing only.
481 */
482 arcn->sb.st_mode |= S_IFREG;
483 break;
484 case LONGLINKTYPE:
485 arcn->type = PAX_GLL;
486 /* FALLTHROUGH */
487 case LONGNAMETYPE:
488 /*
489 * GNU long link/file; we tag these here and let the
490 * pax internals deal with it -- too ugly otherwise.
491 */
492 if (hd->linkflag != LONGLINKTYPE)
493 arcn->type = PAX_GLF;
494 arcn->pad = TAR_PAD(arcn->sb.st_size);
495 arcn->skip = arcn->sb.st_size;
496 break;
497 case AREGTYPE:
498 case REGTYPE:
499 case DIRTYPE: /* see below */
500 default:
501 /*
502 * If we have a trailing / this is a directory and NOT a file.
503 * Note: V7 tar doesn't actually have DIRTYPE, but it was
504 * reported that V7 archives using USTAR directories do exist.
505 */
506 if (*pt == '/' || hd->linkflag == DIRTYPE) {
507 /*
508 * it is a directory, set the mode for -v printing
509 */
510 arcn->type = PAX_DIR;
511 arcn->sb.st_mode |= S_IFDIR;
512 arcn->sb.st_nlink = 2;
513 } else {
514 /*
515 * have a file that will be followed by data. Set the
516 * skip value to the size field and calculate the size
517 * of the padding.
518 */
519 arcn->type = PAX_REG;
520 arcn->sb.st_mode |= S_IFREG;
521 arcn->pad = TAR_PAD(arcn->sb.st_size);
522 arcn->skip = arcn->sb.st_size;
523 }
524 break;
525 }
526
527 /*
528 * strip off any trailing slash.
529 */
530 if (*pt == '/') {
531 *pt = '\0';
532 --arcn->nlen;
533 }
534 return(0);
535 }
536
537 /*
538 * tar_wr()
539 * write a tar header for the file specified in the ARCHD to the archive.
540 * Have to check for file types that cannot be stored and file names that
541 * are too long. Be careful of the term (last arg) to ul_oct, each field
542 * of tar has it own spec for the termination character(s).
543 * ASSUMED: space after header in header block is zero filled
544 * Return:
545 * 0 if file has data to be written after the header, 1 if file has NO
546 * data to write after the header, -1 if archive write failed
547 */
548
549 int
550 tar_wr(ARCHD *arcn)
551 {
552 HD_TAR *hd;
553 int len;
554 char hdblk[sizeof(HD_TAR)];
555
556 /*
557 * check for those file system types which tar cannot store
558 */
559 switch(arcn->type) {
560 case PAX_DIR:
561 /*
562 * user asked that dirs not be written to the archive
563 */
564 if (tar_nodir)
565 return(1);
566 break;
567 case PAX_CHR:
568 tty_warn(1, "Tar cannot archive a character device %s",
569 arcn->org_name);
570 return(1);
571 case PAX_BLK:
572 tty_warn(1,
573 "Tar cannot archive a block device %s", arcn->org_name);
574 return(1);
575 case PAX_SCK:
576 tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name);
577 return(1);
578 case PAX_FIF:
579 tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name);
580 return(1);
581 case PAX_SLK:
582 case PAX_HLK:
583 case PAX_HRG:
584 if (arcn->ln_nlen > sizeof(hd->linkname)) {
585 tty_warn(1,"Link name too long for tar %s",
586 arcn->ln_name);
587 return(1);
588 }
589 break;
590 case PAX_REG:
591 case PAX_CTG:
592 default:
593 break;
594 }
595
596 /*
597 * check file name len, remember extra char for dirs (the / at the end)
598 */
599 len = arcn->nlen;
600 if (arcn->type == PAX_DIR)
601 ++len;
602 if (len >= sizeof(hd->name)) {
603 tty_warn(1, "File name too long for tar %s", arcn->name);
604 return(1);
605 }
606
607 /*
608 * copy the data out of the ARCHD into the tar header based on the type
609 * of the file. Remember many tar readers want the unused fields to be
610 * padded with zero. We set the linkflag field (type), the linkname
611 * (or zero if not used),the size, and set the padding (if any) to be
612 * added after the file data (0 for all other types, as they only have
613 * a header)
614 */
615 memset(hdblk, 0, sizeof(hdblk));
616 hd = (HD_TAR *)hdblk;
617 strlcpy(hd->name, arcn->name, sizeof(hd->name));
618 arcn->pad = 0;
619
620 if (arcn->type == PAX_DIR) {
621 /*
622 * directories are the same as files, except have a filename
623 * that ends with a /, we add the slash here. No data follows,
624 * dirs, so no pad.
625 */
626 hd->linkflag = AREGTYPE;
627 hd->name[len-1] = '/';
628 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
629 goto out;
630 } else if (arcn->type == PAX_SLK) {
631 /*
632 * no data follows this file, so no pad
633 */
634 hd->linkflag = SYMTYPE;
635 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
636 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
637 goto out;
638 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
639 /*
640 * no data follows this file, so no pad
641 */
642 hd->linkflag = LNKTYPE;
643 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
644 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
645 goto out;
646 } else {
647 /*
648 * data follows this file, so set the pad
649 */
650 hd->linkflag = AREGTYPE;
651 if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) {
652 tty_warn(1,"File is too large for tar %s",
653 arcn->org_name);
654 return(1);
655 }
656 arcn->pad = TAR_PAD(arcn->sb.st_size);
657 }
658
659 /*
660 * copy those fields that are independent of the type
661 */
662 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
663 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
664 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
665 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
666 goto out;
667
668 /*
669 * calculate and add the checksum, then write the header. A return of
670 * 0 tells the caller to now write the file data, 1 says no data needs
671 * to be written
672 */
673 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
674 sizeof(hd->chksum), 3))
675 goto out; /* XXX Something's wrong here
676 * because a zero-byte file can
677 * cause this to be done and
678 * yet the resulting warning
679 * seems incorrect */
680
681 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0)
682 return(-1);
683 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
684 return(-1);
685 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
686 return(0);
687 return(1);
688
689 out:
690 /*
691 * header field is out of range
692 */
693 tty_warn(1, "Tar header field is too small for %s", arcn->org_name);
694 return(1);
695 }
696
697 /*
698 * Routines for POSIX ustar
699 */
700
701 /*
702 * ustar_strd()
703 * initialization for ustar read
704 * Return:
705 * 0 if ok, -1 otherwise
706 */
707
708 int
709 ustar_strd(void)
710 {
711 return(0);
712 }
713
714 /*
715 * ustar_stwr()
716 * initialization for ustar write
717 * Return:
718 * 0 if ok, -1 otherwise
719 */
720
721 int
722 ustar_stwr(void)
723 {
724 return(0);
725 }
726
727 /*
728 * ustar_id()
729 * determine if a block given to us is a valid ustar header. We have to
730 * be on the lookout for those pesky blocks of all zero's
731 * Return:
732 * 0 if a ustar header, -1 otherwise
733 */
734
735 int
736 ustar_id(char *blk, int size)
737 {
738 HD_USTAR *hd;
739
740 if (size < BLKMULT)
741 return(-1);
742 hd = (HD_USTAR *)blk;
743
744 /*
745 * check for block of zero's first, a simple and fast test then check
746 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
747 * programs are fouled up and create archives missing the \0. Last we
748 * check the checksum. If ok we have to assume it is a valid header.
749 */
750 if (hd->name[0] == '\0')
751 return(-1);
752 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
753 return(-1);
754 /* This is GNU tar */
755 if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar &&
756 !seen_gnu_warning) {
757 seen_gnu_warning = 1;
758 tty_warn(0,
759 "Trying to read GNU tar archive with extensions off");
760 }
761 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT);
762 }
763
764 /*
765 * ustar_rd()
766 * extract the values out of block already determined to be a ustar header.
767 * store the values in the ARCHD parameter.
768 * Return:
769 * 0
770 */
771
772 int
773 ustar_rd(ARCHD *arcn, char *buf)
774 {
775 HD_USTAR *hd;
776 char *dest;
777 int cnt;
778 dev_t devmajor;
779 dev_t devminor;
780
781 /*
782 * we only get proper sized buffers
783 */
784 if (ustar_id(buf, BLKMULT) < 0)
785 return(-1);
786
787 memset(arcn, 0, sizeof(*arcn));
788 arcn->org_name = arcn->name;
789 arcn->pat = NULL;
790 arcn->sb.st_nlink = 1;
791 hd = (HD_USTAR *)buf;
792
793 /*
794 * see if the filename is split into two parts. if, so joint the parts.
795 * we copy the prefix first and add a / between the prefix and name.
796 */
797 dest = arcn->name;
798 if (*(hd->prefix) != '\0') {
799 cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name));
800 dest += cnt;
801 *dest++ = '/';
802 cnt++;
803 } else {
804 cnt = 0;
805 }
806
807 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) {
808 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt,
809 &gnu_name_string, hd->name, sizeof(hd->name));
810 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
811 &gnu_link_string, hd->linkname, sizeof(hd->linkname));
812 }
813
814 /*
815 * follow the spec to the letter. we should only have mode bits, strip
816 * off all other crud we may be passed.
817 */
818 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
819 0xfff);
820 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
821 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
822 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
823
824 /*
825 * If we can find the ascii names for gname and uname in the password
826 * and group files we will use the uid's and gid they bind. Otherwise
827 * we use the uid and gid values stored in the header. (This is what
828 * the posix spec wants).
829 */
830 hd->gname[sizeof(hd->gname) - 1] = '\0';
831 if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0)
832 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
833 hd->uname[sizeof(hd->uname) - 1] = '\0';
834 if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0)
835 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
836
837 /*
838 * set the defaults, these may be changed depending on the file type
839 */
840 arcn->pad = 0;
841 arcn->skip = 0;
842 arcn->sb.st_rdev = (dev_t)0;
843
844 /*
845 * set the mode and PAX type according to the typeflag in the header
846 */
847 switch(hd->typeflag) {
848 case FIFOTYPE:
849 arcn->type = PAX_FIF;
850 arcn->sb.st_mode |= S_IFIFO;
851 break;
852 case DIRTYPE:
853 arcn->type = PAX_DIR;
854 arcn->sb.st_mode |= S_IFDIR;
855 arcn->sb.st_nlink = 2;
856
857 /*
858 * Some programs that create ustar archives append a '/'
859 * to the pathname for directories. This clearly violates
860 * ustar specs, but we will silently strip it off anyway.
861 */
862 if (arcn->name[arcn->nlen - 1] == '/')
863 arcn->name[--arcn->nlen] = '\0';
864 break;
865 case BLKTYPE:
866 case CHRTYPE:
867 /*
868 * this type requires the rdev field to be set.
869 */
870 if (hd->typeflag == BLKTYPE) {
871 arcn->type = PAX_BLK;
872 arcn->sb.st_mode |= S_IFBLK;
873 } else {
874 arcn->type = PAX_CHR;
875 arcn->sb.st_mode |= S_IFCHR;
876 }
877 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
878 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
879 arcn->sb.st_rdev = TODEV(devmajor, devminor);
880 break;
881 case SYMTYPE:
882 case LNKTYPE:
883 if (hd->typeflag == SYMTYPE) {
884 arcn->type = PAX_SLK;
885 arcn->sb.st_mode |= S_IFLNK;
886 } else {
887 arcn->type = PAX_HLK;
888 /*
889 * so printing looks better
890 */
891 arcn->sb.st_mode |= S_IFREG;
892 arcn->sb.st_nlink = 2;
893 }
894 break;
895 case LONGLINKTYPE:
896 if (is_gnutar)
897 arcn->type = PAX_GLL;
898 /* FALLTHROUGH */
899 case LONGNAMETYPE:
900 if (is_gnutar) {
901 /*
902 * GNU long link/file; we tag these here and let the
903 * pax internals deal with it -- too ugly otherwise.
904 */
905 if (hd->typeflag != LONGLINKTYPE)
906 arcn->type = PAX_GLF;
907 arcn->pad = TAR_PAD(arcn->sb.st_size);
908 arcn->skip = arcn->sb.st_size;
909 } else {
910 tty_warn(1, "GNU Long %s found in posix ustar archive.",
911 hd->typeflag == LONGLINKTYPE ? "Link" : "File");
912 }
913 break;
914 case CONTTYPE:
915 case AREGTYPE:
916 case REGTYPE:
917 default:
918 /*
919 * these types have file data that follows. Set the skip and
920 * pad fields.
921 */
922 arcn->type = PAX_REG;
923 arcn->pad = TAR_PAD(arcn->sb.st_size);
924 arcn->skip = arcn->sb.st_size;
925 arcn->sb.st_mode |= S_IFREG;
926 break;
927 }
928 return(0);
929 }
930
931 static int
932 expandname(char *buf, size_t len, char **gnu_name, const char *name,
933 size_t nlen)
934 {
935 if (*gnu_name) {
936 len = strlcpy(buf, *gnu_name, len);
937 free(*gnu_name);
938 *gnu_name = NULL;
939 } else {
940 if (len > ++nlen)
941 len = nlen;
942 len = strlcpy(buf, name, len);
943 }
944 return len;
945 }
946
947 static void
948 longlink(ARCHD *arcn)
949 {
950 ARCHD larc;
951
952 memset(&larc, 0, sizeof(larc));
953
954 switch (arcn->type) {
955 case PAX_SLK:
956 case PAX_HRG:
957 case PAX_HLK:
958 larc.type = PAX_GLL;
959 larc.ln_nlen = strlcpy(larc.ln_name, "././@LongLink",
960 sizeof(larc.ln_name));
961 gnu_hack_string = arcn->ln_name;
962 gnu_hack_len = arcn->ln_nlen + 1;
963 break;
964 default:
965 larc.nlen = strlcpy(larc.name, "././@LongLink",
966 sizeof(larc.name));
967 gnu_hack_string = arcn->name;
968 gnu_hack_len = arcn->nlen + 1;
969 larc.type = PAX_GLF;
970 }
971 /*
972 * We need a longlink now.
973 */
974 ustar_wr(&larc);
975 }
976
977 /*
978 * ustar_wr()
979 * write a ustar header for the file specified in the ARCHD to the archive
980 * Have to check for file types that cannot be stored and file names that
981 * are too long. Be careful of the term (last arg) to ul_oct, we only use
982 * '\0' for the termination character (this is different than picky tar)
983 * ASSUMED: space after header in header block is zero filled
984 * Return:
985 * 0 if file has data to be written after the header, 1 if file has NO
986 * data to write after the header, -1 if archive write failed
987 */
988
989 int
990 ustar_wr(ARCHD *arcn)
991 {
992 HD_USTAR *hd;
993 char *pt;
994 char hdblk[sizeof(HD_USTAR)];
995 const char *user, *group;
996
997 /*
998 * check for those file system types ustar cannot store
999 */
1000 if (arcn->type == PAX_SCK) {
1001 if (!is_gnutar)
1002 tty_warn(1, "Ustar cannot archive a socket %s",
1003 arcn->org_name);
1004 return(1);
1005 }
1006
1007 /*
1008 * check the length of the linkname
1009 */
1010 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
1011 (arcn->type == PAX_HRG)) &&
1012 (arcn->ln_nlen >= sizeof(hd->linkname))){
1013 if (is_gnutar) {
1014 longlink(arcn);
1015 } else {
1016 tty_warn(1, "Link name too long for ustar %s",
1017 arcn->ln_name);
1018 return(1);
1019 }
1020 }
1021
1022 /*
1023 * split the path name into prefix and name fields (if needed). if
1024 * pt != arcn->name, the name has to be split
1025 */
1026 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
1027 if (is_gnutar) {
1028 longlink(arcn);
1029 pt = arcn->name;
1030 } else {
1031 tty_warn(1, "File name too long for ustar %s",
1032 arcn->name);
1033 return(1);
1034 }
1035 }
1036
1037 /*
1038 * zero out the header so we don't have to worry about zero fill below
1039 */
1040 memset(hdblk, 0, sizeof(hdblk));
1041 hd = (HD_USTAR *)hdblk;
1042 arcn->pad = 0L;
1043
1044 /*
1045 * split the name, or zero out the prefix
1046 */
1047 if (pt != arcn->name) {
1048 /*
1049 * name was split, pt points at the / where the split is to
1050 * occur, we remove the / and copy the first part to the prefix
1051 */
1052 *pt = '\0';
1053 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix));
1054 *pt++ = '/';
1055 }
1056
1057 /*
1058 * copy the name part. this may be the whole path or the part after
1059 * the prefix
1060 */
1061 strlcpy(hd->name, pt, sizeof(hd->name));
1062
1063 /*
1064 * set the fields in the header that are type dependent
1065 */
1066 switch(arcn->type) {
1067 case PAX_DIR:
1068 hd->typeflag = DIRTYPE;
1069 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1070 goto out;
1071 break;
1072 case PAX_CHR:
1073 case PAX_BLK:
1074 if (arcn->type == PAX_CHR)
1075 hd->typeflag = CHRTYPE;
1076 else
1077 hd->typeflag = BLKTYPE;
1078 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
1079 sizeof(hd->devmajor), 3) ||
1080 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
1081 sizeof(hd->devminor), 3) ||
1082 ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1083 goto out;
1084 break;
1085 case PAX_FIF:
1086 hd->typeflag = FIFOTYPE;
1087 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1088 goto out;
1089 break;
1090 case PAX_GLL:
1091 case PAX_SLK:
1092 case PAX_HLK:
1093 case PAX_HRG:
1094 if (arcn->type == PAX_SLK)
1095 hd->typeflag = SYMTYPE;
1096 else if (arcn->type == PAX_GLL)
1097 hd->typeflag = LONGLINKTYPE;
1098 else
1099 hd->typeflag = LNKTYPE;
1100 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1101 if (ul_oct((u_long)gnu_hack_len, hd->size,
1102 sizeof(hd->size), 3))
1103 goto out;
1104 break;
1105 case PAX_GLF:
1106 case PAX_REG:
1107 case PAX_CTG:
1108 default:
1109 /*
1110 * file data with this type, set the padding
1111 */
1112 if (arcn->type == PAX_GLF) {
1113 hd->typeflag = LONGNAMETYPE;
1114 arcn->pad = TAR_PAD(gnu_hack_len);
1115 if (OFFT_OCT((u_long)gnu_hack_len, hd->size,
1116 sizeof(hd->size), 3)) {
1117 tty_warn(1,"File is too long for ustar %s",
1118 arcn->org_name);
1119 return(1);
1120 }
1121 } else {
1122 if (arcn->type == PAX_CTG)
1123 hd->typeflag = CONTTYPE;
1124 else
1125 hd->typeflag = REGTYPE;
1126 arcn->pad = TAR_PAD(arcn->sb.st_size);
1127 if (OFFT_OCT(arcn->sb.st_size, hd->size,
1128 sizeof(hd->size), 3)) {
1129 tty_warn(1,"File is too long for ustar %s",
1130 arcn->org_name);
1131 return(1);
1132 }
1133 }
1134 break;
1135 }
1136
1137 strncpy(hd->magic, TMAGIC, TMAGLEN);
1138 if (is_gnutar)
1139 hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' ';
1140 else
1141 strncpy(hd->version, TVERSION, TVERSLEN);
1142
1143 /*
1144 * set the remaining fields. Some versions want all 16 bits of mode
1145 * we better humor them (they really do not meet spec though)....
1146 */
1147 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) ||
1148 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3) ||
1149 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) ||
1150 ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3))
1151 goto out;
1152 user = user_from_uid(arcn->sb.st_uid, 1);
1153 group = group_from_gid(arcn->sb.st_gid, 1);
1154 strncpy(hd->uname, user ? user : "", sizeof(hd->uname));
1155 strncpy(hd->gname, group ? group : "", sizeof(hd->gname));
1156
1157 /*
1158 * calculate and store the checksum write the header to the archive
1159 * return 0 tells the caller to now write the file data, 1 says no data
1160 * needs to be written
1161 */
1162 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1163 sizeof(hd->chksum), 3))
1164 goto out;
1165 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0)
1166 return(-1);
1167 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1168 return(-1);
1169 if (gnu_hack_string) {
1170 int res = wr_rdbuf(gnu_hack_string, gnu_hack_len);
1171 int pad = gnu_hack_len;
1172 gnu_hack_string = NULL;
1173 gnu_hack_len = 0;
1174 if (res < 0)
1175 return(-1);
1176 if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0)
1177 return(-1);
1178 }
1179 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1180 return(0);
1181 return(1);
1182
1183 out:
1184 /*
1185 * header field is out of range
1186 */
1187 tty_warn(1, "Ustar header field is too small for %s", arcn->org_name);
1188 return(1);
1189 }
1190
1191 /*
1192 * name_split()
1193 * see if the name has to be split for storage in a ustar header. We try
1194 * to fit the entire name in the name field without splitting if we can.
1195 * The split point is always at a /
1196 * Return
1197 * character pointer to split point (always the / that is to be removed
1198 * if the split is not needed, the points is set to the start of the file
1199 * name (it would violate the spec to split there). A NULL is returned if
1200 * the file name is too long
1201 */
1202
1203 static char *
1204 name_split(char *name, int len)
1205 {
1206 char *start;
1207
1208 /*
1209 * check to see if the file name is small enough to fit in the name
1210 * field. if so just return a pointer to the name.
1211 */
1212 if (len < TNMSZ)
1213 return(name);
1214 if (len > (TPFSZ + TNMSZ))
1215 return(NULL);
1216
1217 /*
1218 * we start looking at the biggest sized piece that fits in the name
1219 * field. We walk forward looking for a slash to split at. The idea is
1220 * to find the biggest piece to fit in the name field (or the smallest
1221 * prefix we can find) (the -1 is correct the biggest piece would
1222 * include the slash between the two parts that gets thrown away)
1223 */
1224 start = name + len - TNMSZ;
1225 while ((*start != '\0') && (*start != '/'))
1226 ++start;
1227
1228 /*
1229 * if we hit the end of the string, this name cannot be split, so we
1230 * cannot store this file.
1231 */
1232 if (*start == '\0')
1233 return(NULL);
1234 len = start - name;
1235
1236 /*
1237 * NOTE: /str where the length of str == TNMSZ cannot be stored under
1238 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1239 * the file would then expand on extract to //str. The len == 0 below
1240 * makes this special case follow the spec to the letter.
1241 */
1242 if ((len >= TPFSZ) || (len == 0))
1243 return(NULL);
1244
1245 /*
1246 * ok have a split point, return it to the caller
1247 */
1248 return(start);
1249 }
1250
1251 /* convert a glob into a RE, and add it to the list */
1252 static int
1253 tar_gnutar_exclude_one(const char *line, size_t len)
1254 {
1255 char sbuf[MAXPATHLEN * 2 + 1 + 5];
1256 int i, j;
1257
1258 if (line[len - 1] == '\n')
1259 len--;
1260 for (i = 0, j = 2; i < len; i++) {
1261 /*
1262 * convert glob to regexp, escaping everything
1263 */
1264 if (line[i] == '*')
1265 sbuf[j++] = '.';
1266 else if (line[i] == '?') {
1267 sbuf[j++] = '.';
1268 continue;
1269 } else if (!isalnum(line[i]) && !isblank(line[i]))
1270 sbuf[j++] = '\\';
1271 sbuf[j++] = line[i];
1272 }
1273 sbuf[0] = sbuf[j + 1] = sbuf[j + 2] = '/';
1274 sbuf[1] = '^';
1275 sbuf[j] = '$';
1276 sbuf[j + 3] = '\0';
1277 if (rep_add(sbuf) < 0)
1278 return (-1);
1279
1280 return (0);
1281 }
1282
1283 /*
1284 * deal with GNU tar -X/--exclude-from & --exclude switchs. basically,
1285 * we go through each line of the file, building a string from the "glob"
1286 * lines in the file into RE lines, of the form `/^RE$//', which we pass
1287 * to rep_add(), which will add a empty replacement (exclusion), for the
1288 * named files.
1289 */
1290 int
1291 tar_gnutar_minus_minus_exclude(path)
1292 const char *path;
1293 {
1294 size_t len = strlen(path);
1295
1296 if (len > MAXPATHLEN)
1297 tty_warn(0, "pathname too long: %s", path);
1298
1299 return (tar_gnutar_exclude_one(path, len));
1300 }
1301
1302 int
1303 tar_gnutar_X_compat(path)
1304 const char *path;
1305 {
1306 char *line;
1307 FILE *fp;
1308 int lineno = 0;
1309 size_t len;
1310
1311 fp = fopen(path, "r");
1312 if (fp == NULL) {
1313 tty_warn(1, "cannot open %s: %s", path,
1314 strerror(errno));
1315 return(-1);
1316 }
1317
1318 while ((line = fgetln(fp, &len))) {
1319 lineno++;
1320 if (len > MAXPATHLEN) {
1321 tty_warn(0, "pathname too long, line %d of %s",
1322 lineno, path);
1323 }
1324 if (tar_gnutar_exclude_one(line, len))
1325 return (-1);
1326 }
1327 return (0);
1328 }
1329