tar.c revision 1.55 1 /* $NetBSD: tar.c,v 1.55 2004/06/15 21:52:00 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1992 Keith Muller.
5 * Copyright (c) 1992, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Keith Muller of the University of California, San Diego.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #if HAVE_NBTOOL_CONFIG_H
37 #include "nbtool_config.h"
38 #endif
39
40 #include <sys/cdefs.h>
41 #if !defined(lint)
42 #if 0
43 static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94";
44 #else
45 __RCSID("$NetBSD: tar.c,v 1.55 2004/06/15 21:52:00 christos Exp $");
46 #endif
47 #endif /* not lint */
48
49 #include <sys/types.h>
50 #include <sys/time.h>
51 #include <sys/stat.h>
52 #include <sys/param.h>
53
54 #include <ctype.h>
55 #include <errno.h>
56 #include <grp.h>
57 #include <pwd.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <unistd.h>
62
63 #include "pax.h"
64 #include "extern.h"
65 #include "tar.h"
66
67 /*
68 * Routines for reading, writing and header identify of various versions of tar
69 */
70
71 static int expandname(char *, size_t, char **, const char *, size_t);
72 static void longlink(ARCHD *, int);
73 static u_long tar_chksm(char *, int);
74 static char *name_split(char *, int);
75 static int ul_oct(u_long, char *, int, int);
76 #if !defined(NET2_STAT) && !defined(_LP64)
77 static int ull_oct(unsigned long long, char *, int, int);
78 #endif
79 static int tar_gnutar_exclude_one(const char *, size_t);
80 static int check_sum(char *, size_t, char *, size_t, int);
81
82 /*
83 * Routines common to all versions of tar
84 */
85
86 static int tar_nodir; /* do not write dirs under old tar */
87 int is_gnutar; /* behave like gnu tar; enable gnu
88 * extensions and skip end-ofvolume
89 * checks
90 */
91 static int seen_gnu_warning; /* Have we warned yet? */
92 static char *gnu_hack_string; /* ././@LongLink hackery */
93 static int gnu_hack_len; /* len of gnu_hack_string */
94 char *gnu_name_string; /* ././@LongLink hackery name */
95 char *gnu_link_string; /* ././@LongLink hackery link */
96 static int gnu_short_trailer; /* gnu short trailer */
97
98 static const char LONG_LINK[] = "././@LongLink";
99
100 #ifdef _PAX_
101 char DEV_0[] = "/dev/rst0";
102 char DEV_1[] = "/dev/rst1";
103 char DEV_4[] = "/dev/rst4";
104 char DEV_5[] = "/dev/rst5";
105 char DEV_7[] = "/dev/rst7";
106 char DEV_8[] = "/dev/rst8";
107 #endif
108
109 static int
110 check_sum(char *hd, size_t hdlen, char *bl, size_t bllen, int quiet)
111 {
112 u_long hdck, blck;
113
114 hdck = asc_ul(hd, hdlen, OCT);
115 blck = tar_chksm(bl, bllen);
116
117 if (hdck != blck) {
118 if (!quiet)
119 tty_warn(0, "Header checksum %lo does not match %lo",
120 hdck, blck);
121 return(-1);
122 }
123 return(0);
124 }
125
126
127 /*
128 * tar_endwr()
129 * add the tar trailer of two null blocks
130 * Return:
131 * 0 if ok, -1 otherwise (what wr_skip returns)
132 */
133
134 int
135 tar_endwr(void)
136 {
137 return(wr_skip((off_t)(NULLCNT * BLKMULT)));
138 }
139
140 /*
141 * tar_endrd()
142 * no cleanup needed here, just return size of trailer (for append)
143 * Return:
144 * size of trailer BLKMULT
145 */
146
147 off_t
148 tar_endrd(void)
149 {
150 return((off_t)((gnu_short_trailer ? 1 : NULLCNT) * BLKMULT));
151 }
152
153 /*
154 * tar_trail()
155 * Called to determine if a header block is a valid trailer. We are passed
156 * the block, the in_sync flag (which tells us we are in resync mode;
157 * looking for a valid header), and cnt (which starts at zero) which is
158 * used to count the number of empty blocks we have seen so far.
159 * Return:
160 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
161 * could never contain a header.
162 */
163
164 int
165 tar_trail(char *buf, int in_resync, int *cnt)
166 {
167 int i;
168
169 gnu_short_trailer = 0;
170 /*
171 * look for all zero, trailer is two consecutive blocks of zero
172 */
173 for (i = 0; i < BLKMULT; ++i) {
174 if (buf[i] != '\0')
175 break;
176 }
177
178 /*
179 * if not all zero it is not a trailer, but MIGHT be a header.
180 */
181 if (i != BLKMULT)
182 return(-1);
183
184 /*
185 * When given a zero block, we must be careful!
186 * If we are not in resync mode, check for the trailer. Have to watch
187 * out that we do not mis-identify file data as the trailer, so we do
188 * NOT try to id a trailer during resync mode. During resync mode we
189 * might as well throw this block out since a valid header can NEVER be
190 * a block of all 0 (we must have a valid file name).
191 */
192 if (!in_resync) {
193 ++*cnt;
194 /*
195 * old GNU tar (up through 1.13) only writes one block of
196 * trailers, so we pretend we got another
197 */
198 if (is_gnutar) {
199 gnu_short_trailer = 1;
200 ++*cnt;
201 }
202 if (*cnt >= NULLCNT)
203 return(0);
204 }
205 return(1);
206 }
207
208 /*
209 * ul_oct()
210 * convert an unsigned long to an octal string. many oddball field
211 * termination characters are used by the various versions of tar in the
212 * different fields. term selects which kind to use. str is '0' padded
213 * at the front to len. we are unable to use only one format as many old
214 * tar readers are very cranky about this.
215 * Return:
216 * 0 if the number fit into the string, -1 otherwise
217 */
218
219 static int
220 ul_oct(u_long val, char *str, int len, int term)
221 {
222 char *pt;
223
224 /*
225 * term selects the appropriate character(s) for the end of the string
226 */
227 pt = str + len - 1;
228 switch(term) {
229 case 3:
230 *pt-- = '\0';
231 break;
232 case 2:
233 *pt-- = ' ';
234 *pt-- = '\0';
235 break;
236 case 1:
237 *pt-- = ' ';
238 break;
239 case 0:
240 default:
241 *pt-- = '\0';
242 *pt-- = ' ';
243 break;
244 }
245
246 /*
247 * convert and blank pad if there is space
248 */
249 while (pt >= str) {
250 *pt-- = '0' + (char)(val & 0x7);
251 if ((val = val >> 3) == (u_long)0)
252 break;
253 }
254
255 while (pt >= str)
256 *pt-- = '0';
257 if (val != (u_long)0)
258 return(-1);
259 return(0);
260 }
261
262 #if !defined(NET2_STAT) && !defined(_LP64)
263 /*
264 * ull_oct()
265 * convert an unsigned long long to an octal string. one of many oddball
266 * field termination characters are used by the various versions of tar
267 * in the different fields. term selects which kind to use. str is '0'
268 * padded at the front to len. we are unable to use only one format as
269 * many old tar readers are very cranky about this.
270 * Return:
271 * 0 if the number fit into the string, -1 otherwise
272 */
273
274 static int
275 ull_oct(unsigned long long val, char *str, int len, int term)
276 {
277 char *pt;
278
279 /*
280 * term selects the appropriate character(s) for the end of the string
281 */
282 pt = str + len - 1;
283 switch(term) {
284 case 3:
285 *pt-- = '\0';
286 break;
287 case 2:
288 *pt-- = ' ';
289 *pt-- = '\0';
290 break;
291 case 1:
292 *pt-- = ' ';
293 break;
294 case 0:
295 default:
296 *pt-- = '\0';
297 *pt-- = ' ';
298 break;
299 }
300
301 /*
302 * convert and blank pad if there is space
303 */
304 while (pt >= str) {
305 *pt-- = '0' + (char)(val & 0x7);
306 if ((val = val >> 3) == 0)
307 break;
308 }
309
310 while (pt >= str)
311 *pt-- = '0';
312 if (val != (unsigned long long)0)
313 return(-1);
314 return(0);
315 }
316 #endif
317
318 /*
319 * tar_chksm()
320 * calculate the checksum for a tar block counting the checksum field as
321 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
322 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS
323 * pad headers with 0.
324 * Return:
325 * unsigned long checksum
326 */
327
328 static u_long
329 tar_chksm(char *blk, int len)
330 {
331 char *stop;
332 char *pt;
333 u_long chksm = BLNKSUM; /* initial value is checksum field sum */
334
335 /*
336 * add the part of the block before the checksum field
337 */
338 pt = blk;
339 stop = blk + CHK_OFFSET;
340 while (pt < stop)
341 chksm += (u_long)(*pt++ & 0xff);
342 /*
343 * move past the checksum field and keep going, spec counts the
344 * checksum field as the sum of 8 blanks (which is pre-computed as
345 * BLNKSUM).
346 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
347 * starts, no point in summing zero's)
348 */
349 pt += CHK_LEN;
350 stop = blk + len;
351 while (pt < stop)
352 chksm += (u_long)(*pt++ & 0xff);
353 return(chksm);
354 }
355
356 /*
357 * Routines for old BSD style tar (also made portable to sysV tar)
358 */
359
360 /*
361 * tar_id()
362 * determine if a block given to us is a valid tar header (and not a USTAR
363 * header). We have to be on the lookout for those pesky blocks of all
364 * zero's.
365 * Return:
366 * 0 if a tar header, -1 otherwise
367 */
368
369 int
370 tar_id(char *blk, int size)
371 {
372 HD_TAR *hd;
373 HD_USTAR *uhd;
374
375 if (size < BLKMULT)
376 return(-1);
377 hd = (HD_TAR *)blk;
378 uhd = (HD_USTAR *)blk;
379
380 /*
381 * check for block of zero's first, a simple and fast test, then make
382 * sure this is not a ustar header by looking for the ustar magic
383 * cookie. We should use TMAGLEN, but some USTAR archive programs are
384 * wrong and create archives missing the \0. Last we check the
385 * checksum. If this is ok we have to assume it is a valid header.
386 */
387 if (hd->name[0] == '\0')
388 return(-1);
389 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
390 return(-1);
391 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 1);
392 }
393
394 /*
395 * tar_opt()
396 * handle tar format specific -o options
397 * Return:
398 * 0 if ok -1 otherwise
399 */
400
401 int
402 tar_opt(void)
403 {
404 OPLIST *opt;
405
406 while ((opt = opt_next()) != NULL) {
407 if (strcmp(opt->name, TAR_OPTION) ||
408 strcmp(opt->value, TAR_NODIR)) {
409 tty_warn(1,
410 "Unknown tar format -o option/value pair %s=%s",
411 opt->name, opt->value);
412 tty_warn(1,
413 "%s=%s is the only supported tar format option",
414 TAR_OPTION, TAR_NODIR);
415 return(-1);
416 }
417
418 /*
419 * we only support one option, and only when writing
420 */
421 if ((act != APPND) && (act != ARCHIVE)) {
422 tty_warn(1, "%s=%s is only supported when writing.",
423 opt->name, opt->value);
424 return(-1);
425 }
426 tar_nodir = 1;
427 }
428 return(0);
429 }
430
431
432 /*
433 * tar_rd()
434 * extract the values out of block already determined to be a tar header.
435 * store the values in the ARCHD parameter.
436 * Return:
437 * 0
438 */
439
440 int
441 tar_rd(ARCHD *arcn, char *buf)
442 {
443 HD_TAR *hd;
444 char *pt;
445
446 /*
447 * we only get proper sized buffers passed to us
448 */
449 if (tar_id(buf, BLKMULT) < 0)
450 return(-1);
451 memset(arcn, 0, sizeof(*arcn));
452 arcn->org_name = arcn->name;
453 arcn->pat = NULL;
454 arcn->sb.st_nlink = 1;
455
456 /*
457 * copy out the name and values in the stat buffer
458 */
459 hd = (HD_TAR *)buf;
460 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) {
461 arcn->nlen = expandname(arcn->name, sizeof(arcn->name),
462 &gnu_name_string, hd->name, sizeof(hd->name));
463 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
464 &gnu_link_string, hd->linkname, sizeof(hd->linkname));
465 }
466 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
467 0xfff);
468 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
469 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
470 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
471 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
472 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
473
474 /*
475 * have to look at the last character, it may be a '/' and that is used
476 * to encode this as a directory
477 */
478 pt = &(arcn->name[arcn->nlen - 1]);
479 arcn->pad = 0;
480 arcn->skip = 0;
481 switch(hd->linkflag) {
482 case SYMTYPE:
483 /*
484 * symbolic link, need to get the link name and set the type in
485 * the st_mode so -v printing will look correct.
486 */
487 arcn->type = PAX_SLK;
488 arcn->sb.st_mode |= S_IFLNK;
489 break;
490 case LNKTYPE:
491 /*
492 * hard link, need to get the link name, set the type in the
493 * st_mode and st_nlink so -v printing will look better.
494 */
495 arcn->type = PAX_HLK;
496 arcn->sb.st_nlink = 2;
497
498 /*
499 * no idea of what type this thing really points at, but
500 * we set something for printing only.
501 */
502 arcn->sb.st_mode |= S_IFREG;
503 break;
504 case LONGLINKTYPE:
505 case LONGNAMETYPE:
506 /*
507 * GNU long link/file; we tag these here and let the
508 * pax internals deal with it -- too ugly otherwise.
509 */
510 if (hd->linkflag != LONGLINKTYPE)
511 arcn->type = PAX_GLF;
512 else
513 arcn->type = PAX_GLL;
514 arcn->pad = TAR_PAD(arcn->sb.st_size);
515 arcn->skip = arcn->sb.st_size;
516 break;
517 case AREGTYPE:
518 case REGTYPE:
519 case DIRTYPE: /* see below */
520 default:
521 /*
522 * If we have a trailing / this is a directory and NOT a file.
523 * Note: V7 tar doesn't actually have DIRTYPE, but it was
524 * reported that V7 archives using USTAR directories do exist.
525 */
526 if (*pt == '/' || hd->linkflag == DIRTYPE) {
527 /*
528 * it is a directory, set the mode for -v printing
529 */
530 arcn->type = PAX_DIR;
531 arcn->sb.st_mode |= S_IFDIR;
532 arcn->sb.st_nlink = 2;
533 } else {
534 /*
535 * have a file that will be followed by data. Set the
536 * skip value to the size field and calculate the size
537 * of the padding.
538 */
539 arcn->type = PAX_REG;
540 arcn->sb.st_mode |= S_IFREG;
541 arcn->pad = TAR_PAD(arcn->sb.st_size);
542 arcn->skip = arcn->sb.st_size;
543 }
544 break;
545 }
546
547 /*
548 * strip off any trailing slash.
549 */
550 if (*pt == '/') {
551 *pt = '\0';
552 --arcn->nlen;
553 }
554 return(0);
555 }
556
557 /*
558 * tar_wr()
559 * write a tar header for the file specified in the ARCHD to the archive.
560 * Have to check for file types that cannot be stored and file names that
561 * are too long. Be careful of the term (last arg) to ul_oct, each field
562 * of tar has it own spec for the termination character(s).
563 * ASSUMED: space after header in header block is zero filled
564 * Return:
565 * 0 if file has data to be written after the header, 1 if file has NO
566 * data to write after the header, -1 if archive write failed
567 */
568
569 int
570 tar_wr(ARCHD *arcn)
571 {
572 HD_TAR *hd;
573 int len;
574 char hdblk[sizeof(HD_TAR)];
575
576 /*
577 * check for those file system types which tar cannot store
578 */
579 switch(arcn->type) {
580 case PAX_DIR:
581 /*
582 * user asked that dirs not be written to the archive
583 */
584 if (tar_nodir)
585 return(1);
586 break;
587 case PAX_CHR:
588 tty_warn(1, "Tar cannot archive a character device %s",
589 arcn->org_name);
590 return(1);
591 case PAX_BLK:
592 tty_warn(1,
593 "Tar cannot archive a block device %s", arcn->org_name);
594 return(1);
595 case PAX_SCK:
596 tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name);
597 return(1);
598 case PAX_FIF:
599 tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name);
600 return(1);
601 case PAX_SLK:
602 case PAX_HLK:
603 case PAX_HRG:
604 if (arcn->ln_nlen > sizeof(hd->linkname)) {
605 tty_warn(1,"Link name too long for tar %s",
606 arcn->ln_name);
607 return(1);
608 }
609 break;
610 case PAX_REG:
611 case PAX_CTG:
612 default:
613 break;
614 }
615
616 /*
617 * check file name len, remember extra char for dirs (the / at the end)
618 */
619 len = arcn->nlen;
620 if (arcn->type == PAX_DIR)
621 ++len;
622 if (len >= sizeof(hd->name)) {
623 tty_warn(1, "File name too long for tar %s", arcn->name);
624 return(1);
625 }
626
627 /*
628 * copy the data out of the ARCHD into the tar header based on the type
629 * of the file. Remember many tar readers want the unused fields to be
630 * padded with zero. We set the linkflag field (type), the linkname
631 * (or zero if not used),the size, and set the padding (if any) to be
632 * added after the file data (0 for all other types, as they only have
633 * a header)
634 */
635 memset(hdblk, 0, sizeof(hdblk));
636 hd = (HD_TAR *)hdblk;
637 strlcpy(hd->name, arcn->name, sizeof(hd->name));
638 arcn->pad = 0;
639
640 if (arcn->type == PAX_DIR) {
641 /*
642 * directories are the same as files, except have a filename
643 * that ends with a /, we add the slash here. No data follows,
644 * dirs, so no pad.
645 */
646 hd->linkflag = AREGTYPE;
647 hd->name[len-1] = '/';
648 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
649 goto out;
650 } else if (arcn->type == PAX_SLK) {
651 /*
652 * no data follows this file, so no pad
653 */
654 hd->linkflag = SYMTYPE;
655 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
656 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
657 goto out;
658 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
659 /*
660 * no data follows this file, so no pad
661 */
662 hd->linkflag = LNKTYPE;
663 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
664 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
665 goto out;
666 } else {
667 /*
668 * data follows this file, so set the pad
669 */
670 hd->linkflag = AREGTYPE;
671 if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) {
672 tty_warn(1,"File is too large for tar %s",
673 arcn->org_name);
674 return(1);
675 }
676 arcn->pad = TAR_PAD(arcn->sb.st_size);
677 }
678
679 /*
680 * copy those fields that are independent of the type
681 */
682 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
683 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
684 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
685 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
686 goto out;
687
688 /*
689 * calculate and add the checksum, then write the header. A return of
690 * 0 tells the caller to now write the file data, 1 says no data needs
691 * to be written
692 */
693 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
694 sizeof(hd->chksum), 3))
695 goto out; /* XXX Something's wrong here
696 * because a zero-byte file can
697 * cause this to be done and
698 * yet the resulting warning
699 * seems incorrect */
700
701 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0)
702 return(-1);
703 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
704 return(-1);
705 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
706 return(0);
707 return(1);
708
709 out:
710 /*
711 * header field is out of range
712 */
713 tty_warn(1, "Tar header field is too small for %s", arcn->org_name);
714 return(1);
715 }
716
717 /*
718 * Routines for POSIX ustar
719 */
720
721 /*
722 * ustar_strd()
723 * initialization for ustar read
724 * Return:
725 * 0 if ok, -1 otherwise
726 */
727
728 int
729 ustar_strd(void)
730 {
731 return(0);
732 }
733
734 /*
735 * ustar_stwr()
736 * initialization for ustar write
737 * Return:
738 * 0 if ok, -1 otherwise
739 */
740
741 int
742 ustar_stwr(void)
743 {
744 return(0);
745 }
746
747 /*
748 * ustar_id()
749 * determine if a block given to us is a valid ustar header. We have to
750 * be on the lookout for those pesky blocks of all zero's
751 * Return:
752 * 0 if a ustar header, -1 otherwise
753 */
754
755 int
756 ustar_id(char *blk, int size)
757 {
758 HD_USTAR *hd;
759
760 if (size < BLKMULT)
761 return(-1);
762 hd = (HD_USTAR *)blk;
763
764 /*
765 * check for block of zero's first, a simple and fast test then check
766 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
767 * programs are fouled up and create archives missing the \0. Last we
768 * check the checksum. If ok we have to assume it is a valid header.
769 */
770 if (hd->name[0] == '\0')
771 return(-1);
772 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
773 return(-1);
774 /* This is GNU tar */
775 if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar &&
776 !seen_gnu_warning) {
777 seen_gnu_warning = 1;
778 tty_warn(0,
779 "Trying to read GNU tar archive with extensions off");
780 }
781 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 0);
782 }
783
784 /*
785 * ustar_rd()
786 * extract the values out of block already determined to be a ustar header.
787 * store the values in the ARCHD parameter.
788 * Return:
789 * 0
790 */
791
792 int
793 ustar_rd(ARCHD *arcn, char *buf)
794 {
795 HD_USTAR *hd;
796 char *dest;
797 int cnt;
798 dev_t devmajor;
799 dev_t devminor;
800
801 /*
802 * we only get proper sized buffers
803 */
804 if (ustar_id(buf, BLKMULT) < 0)
805 return(-1);
806
807 memset(arcn, 0, sizeof(*arcn));
808 arcn->org_name = arcn->name;
809 arcn->pat = NULL;
810 arcn->sb.st_nlink = 1;
811 hd = (HD_USTAR *)buf;
812
813 /*
814 * see if the filename is split into two parts. if, so joint the parts.
815 * we copy the prefix first and add a / between the prefix and name.
816 */
817 dest = arcn->name;
818 if (*(hd->prefix) != '\0') {
819 cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name));
820 dest += cnt;
821 *dest++ = '/';
822 cnt++;
823 } else {
824 cnt = 0;
825 }
826
827 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) {
828 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt,
829 &gnu_name_string, hd->name, sizeof(hd->name)) + cnt;
830 arcn->ln_nlen = expandname(arcn->ln_name,
831 sizeof(arcn->ln_name), &gnu_link_string, hd->linkname,
832 sizeof(hd->linkname));
833 }
834
835 /*
836 * follow the spec to the letter. we should only have mode bits, strip
837 * off all other crud we may be passed.
838 */
839 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
840 0xfff);
841 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
842 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
843 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
844
845 /*
846 * If we can find the ascii names for gname and uname in the password
847 * and group files we will use the uid's and gid they bind. Otherwise
848 * we use the uid and gid values stored in the header. (This is what
849 * the posix spec wants).
850 */
851 hd->gname[sizeof(hd->gname) - 1] = '\0';
852 if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0)
853 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
854 hd->uname[sizeof(hd->uname) - 1] = '\0';
855 if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0)
856 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
857
858 /*
859 * set the defaults, these may be changed depending on the file type
860 */
861 arcn->pad = 0;
862 arcn->skip = 0;
863 arcn->sb.st_rdev = (dev_t)0;
864
865 /*
866 * set the mode and PAX type according to the typeflag in the header
867 */
868 switch(hd->typeflag) {
869 case FIFOTYPE:
870 arcn->type = PAX_FIF;
871 arcn->sb.st_mode |= S_IFIFO;
872 break;
873 case DIRTYPE:
874 arcn->type = PAX_DIR;
875 arcn->sb.st_mode |= S_IFDIR;
876 arcn->sb.st_nlink = 2;
877
878 /*
879 * Some programs that create ustar archives append a '/'
880 * to the pathname for directories. This clearly violates
881 * ustar specs, but we will silently strip it off anyway.
882 */
883 if (arcn->name[arcn->nlen - 1] == '/')
884 arcn->name[--arcn->nlen] = '\0';
885 break;
886 case BLKTYPE:
887 case CHRTYPE:
888 /*
889 * this type requires the rdev field to be set.
890 */
891 if (hd->typeflag == BLKTYPE) {
892 arcn->type = PAX_BLK;
893 arcn->sb.st_mode |= S_IFBLK;
894 } else {
895 arcn->type = PAX_CHR;
896 arcn->sb.st_mode |= S_IFCHR;
897 }
898 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
899 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
900 arcn->sb.st_rdev = TODEV(devmajor, devminor);
901 break;
902 case SYMTYPE:
903 case LNKTYPE:
904 if (hd->typeflag == SYMTYPE) {
905 arcn->type = PAX_SLK;
906 arcn->sb.st_mode |= S_IFLNK;
907 } else {
908 arcn->type = PAX_HLK;
909 /*
910 * so printing looks better
911 */
912 arcn->sb.st_mode |= S_IFREG;
913 arcn->sb.st_nlink = 2;
914 }
915 break;
916 case LONGLINKTYPE:
917 case LONGNAMETYPE:
918 if (is_gnutar) {
919 /*
920 * GNU long link/file; we tag these here and let the
921 * pax internals deal with it -- too ugly otherwise.
922 */
923 if (hd->typeflag != LONGLINKTYPE)
924 arcn->type = PAX_GLF;
925 else
926 arcn->type = PAX_GLL;
927 arcn->pad = TAR_PAD(arcn->sb.st_size);
928 arcn->skip = arcn->sb.st_size;
929 } else {
930 tty_warn(1, "GNU Long %s found in posix ustar archive.",
931 hd->typeflag == LONGLINKTYPE ? "Link" : "File");
932 }
933 break;
934 case CONTTYPE:
935 case AREGTYPE:
936 case REGTYPE:
937 default:
938 /*
939 * these types have file data that follows. Set the skip and
940 * pad fields.
941 */
942 arcn->type = PAX_REG;
943 arcn->pad = TAR_PAD(arcn->sb.st_size);
944 arcn->skip = arcn->sb.st_size;
945 arcn->sb.st_mode |= S_IFREG;
946 break;
947 }
948 return(0);
949 }
950
951 static int
952 expandname(char *buf, size_t len, char **gnu_name, const char *name,
953 size_t nlen)
954 {
955 if (*gnu_name) {
956 len = strlcpy(buf, *gnu_name, len);
957 free(*gnu_name);
958 *gnu_name = NULL;
959 } else {
960 if (len > ++nlen)
961 len = nlen;
962 len = strlcpy(buf, name, len);
963 }
964 return len;
965 }
966
967 static void
968 longlink(ARCHD *arcn, int type)
969 {
970 ARCHD larc;
971
972 memset(&larc, 0, sizeof(larc));
973
974 switch (arcn->type) {
975 case PAX_SLK:
976 case PAX_HRG:
977 case PAX_HLK:
978 larc.type = type;
979 larc.nlen = strlcpy(larc.name, LONG_LINK,
980 sizeof(larc.ln_name));
981 gnu_hack_string = arcn->ln_name;
982 gnu_hack_len = arcn->ln_nlen + 1;
983 break;
984 default:
985 larc.type = type;
986 larc.nlen = strlcpy(larc.name, LONG_LINK, sizeof(larc.name));
987 gnu_hack_string = arcn->name;
988 gnu_hack_len = arcn->nlen + 1;
989 }
990 /*
991 * We need a longlink now.
992 */
993 ustar_wr(&larc);
994 }
995
996 /*
997 * ustar_wr()
998 * write a ustar header for the file specified in the ARCHD to the archive
999 * Have to check for file types that cannot be stored and file names that
1000 * are too long. Be careful of the term (last arg) to ul_oct, we only use
1001 * '\0' for the termination character (this is different than picky tar)
1002 * ASSUMED: space after header in header block is zero filled
1003 * Return:
1004 * 0 if file has data to be written after the header, 1 if file has NO
1005 * data to write after the header, -1 if archive write failed
1006 */
1007
1008 int
1009 ustar_wr(ARCHD *arcn)
1010 {
1011 HD_USTAR *hd;
1012 char *pt;
1013 char hdblk[sizeof(HD_USTAR)];
1014 const char *user, *group;
1015
1016 switch (arcn->type) {
1017 case PAX_SCK:
1018 /*
1019 * check for those file system types ustar cannot store
1020 */
1021 if (!is_gnutar)
1022 tty_warn(1, "Ustar cannot archive a socket %s",
1023 arcn->org_name);
1024 return(1);
1025
1026 case PAX_SLK:
1027 case PAX_HLK:
1028 case PAX_HRG:
1029 /*
1030 * check the length of the linkname
1031 */
1032 if (arcn->ln_nlen >= sizeof(hd->linkname)) {
1033 if (is_gnutar) {
1034 longlink(arcn, PAX_GLL);
1035 } else {
1036 tty_warn(1, "Link name too long for ustar %s",
1037 arcn->ln_name);
1038 return(1);
1039 }
1040 }
1041 break;
1042 default:
1043 break;
1044 }
1045
1046 /*
1047 * split the path name into prefix and name fields (if needed). if
1048 * pt != arcn->name, the name has to be split
1049 */
1050 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
1051 if (is_gnutar) {
1052 longlink(arcn, PAX_GLF);
1053 pt = arcn->name;
1054 } else {
1055 tty_warn(1, "File name too long for ustar %s",
1056 arcn->name);
1057 return(1);
1058 }
1059 }
1060
1061 /*
1062 * zero out the header so we don't have to worry about zero fill below
1063 */
1064 memset(hdblk, 0, sizeof(hdblk));
1065 hd = (HD_USTAR *)hdblk;
1066 arcn->pad = 0L;
1067
1068 /*
1069 * split the name, or zero out the prefix
1070 */
1071 if (pt != arcn->name) {
1072 /*
1073 * name was split, pt points at the / where the split is to
1074 * occur, we remove the / and copy the first part to the prefix
1075 */
1076 *pt = '\0';
1077 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix));
1078 *pt++ = '/';
1079 }
1080
1081 /*
1082 * copy the name part. this may be the whole path or the part after
1083 * the prefix
1084 */
1085 strlcpy(hd->name, pt, sizeof(hd->name));
1086
1087 /*
1088 * set the fields in the header that are type dependent
1089 */
1090 switch(arcn->type) {
1091 case PAX_DIR:
1092 hd->typeflag = DIRTYPE;
1093 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1094 goto out;
1095 break;
1096 case PAX_CHR:
1097 case PAX_BLK:
1098 if (arcn->type == PAX_CHR)
1099 hd->typeflag = CHRTYPE;
1100 else
1101 hd->typeflag = BLKTYPE;
1102 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
1103 sizeof(hd->devmajor), 3) ||
1104 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
1105 sizeof(hd->devminor), 3) ||
1106 ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1107 goto out;
1108 break;
1109 case PAX_FIF:
1110 hd->typeflag = FIFOTYPE;
1111 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1112 goto out;
1113 break;
1114 case PAX_GLL:
1115 case PAX_SLK:
1116 case PAX_HLK:
1117 case PAX_HRG:
1118 if (arcn->type == PAX_SLK)
1119 hd->typeflag = SYMTYPE;
1120 else if (arcn->type == PAX_GLL)
1121 hd->typeflag = LONGLINKTYPE;
1122 else
1123 hd->typeflag = LNKTYPE;
1124 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1125 if (ul_oct((u_long)gnu_hack_len, hd->size,
1126 sizeof(hd->size), 3))
1127 goto out;
1128 break;
1129 case PAX_GLF:
1130 case PAX_REG:
1131 case PAX_CTG:
1132 default:
1133 /*
1134 * file data with this type, set the padding
1135 */
1136 if (arcn->type == PAX_GLF) {
1137 hd->typeflag = LONGNAMETYPE;
1138 arcn->pad = TAR_PAD(gnu_hack_len);
1139 if (OFFT_OCT((u_long)gnu_hack_len, hd->size,
1140 sizeof(hd->size), 3)) {
1141 tty_warn(1,"File is too long for ustar %s",
1142 arcn->org_name);
1143 return(1);
1144 }
1145 } else {
1146 if (arcn->type == PAX_CTG)
1147 hd->typeflag = CONTTYPE;
1148 else
1149 hd->typeflag = REGTYPE;
1150 arcn->pad = TAR_PAD(arcn->sb.st_size);
1151 if (OFFT_OCT(arcn->sb.st_size, hd->size,
1152 sizeof(hd->size), 3)) {
1153 tty_warn(1,"File is too long for ustar %s",
1154 arcn->org_name);
1155 return(1);
1156 }
1157 }
1158 break;
1159 }
1160
1161 strncpy(hd->magic, TMAGIC, TMAGLEN);
1162 if (is_gnutar)
1163 hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' ';
1164 else
1165 strncpy(hd->version, TVERSION, TVERSLEN);
1166
1167 /*
1168 * set the remaining fields. Some versions want all 16 bits of mode
1169 * we better humor them (they really do not meet spec though)....
1170 */
1171 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) ||
1172 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3) ||
1173 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) ||
1174 ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3))
1175 goto out;
1176 user = user_from_uid(arcn->sb.st_uid, 1);
1177 group = group_from_gid(arcn->sb.st_gid, 1);
1178 strncpy(hd->uname, user ? user : "", sizeof(hd->uname));
1179 strncpy(hd->gname, group ? group : "", sizeof(hd->gname));
1180
1181 /*
1182 * calculate and store the checksum write the header to the archive
1183 * return 0 tells the caller to now write the file data, 1 says no data
1184 * needs to be written
1185 */
1186 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1187 sizeof(hd->chksum), 3))
1188 goto out;
1189 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0)
1190 return(-1);
1191 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1192 return(-1);
1193 if (gnu_hack_string) {
1194 int res = wr_rdbuf(gnu_hack_string, gnu_hack_len);
1195 int pad = gnu_hack_len;
1196 gnu_hack_string = NULL;
1197 gnu_hack_len = 0;
1198 if (res < 0)
1199 return(-1);
1200 if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0)
1201 return(-1);
1202 }
1203 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1204 return(0);
1205 return(1);
1206
1207 out:
1208 /*
1209 * header field is out of range
1210 */
1211 tty_warn(1, "Ustar header field is too small for %s", arcn->org_name);
1212 return(1);
1213 }
1214
1215 /*
1216 * name_split()
1217 * see if the name has to be split for storage in a ustar header. We try
1218 * to fit the entire name in the name field without splitting if we can.
1219 * The split point is always at a /
1220 * Return
1221 * character pointer to split point (always the / that is to be removed
1222 * if the split is not needed, the points is set to the start of the file
1223 * name (it would violate the spec to split there). A NULL is returned if
1224 * the file name is too long
1225 */
1226
1227 static char *
1228 name_split(char *name, int len)
1229 {
1230 char *start;
1231
1232 /*
1233 * check to see if the file name is small enough to fit in the name
1234 * field. if so just return a pointer to the name.
1235 */
1236 if (len < TNMSZ)
1237 return(name);
1238 if (len > (TPFSZ + TNMSZ))
1239 return(NULL);
1240
1241 /*
1242 * we start looking at the biggest sized piece that fits in the name
1243 * field. We walk forward looking for a slash to split at. The idea is
1244 * to find the biggest piece to fit in the name field (or the smallest
1245 * prefix we can find) (the -1 is correct the biggest piece would
1246 * include the slash between the two parts that gets thrown away)
1247 */
1248 start = name + len - TNMSZ;
1249 while ((*start != '\0') && (*start != '/'))
1250 ++start;
1251
1252 /*
1253 * if we hit the end of the string, this name cannot be split, so we
1254 * cannot store this file.
1255 */
1256 if (*start == '\0')
1257 return(NULL);
1258 len = start - name;
1259
1260 /*
1261 * NOTE: /str where the length of str == TNMSZ cannot be stored under
1262 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1263 * the file would then expand on extract to //str. The len == 0 below
1264 * makes this special case follow the spec to the letter.
1265 */
1266 if ((len >= TPFSZ) || (len == 0))
1267 return(NULL);
1268
1269 /*
1270 * ok have a split point, return it to the caller
1271 */
1272 return(start);
1273 }
1274
1275 /*
1276 * convert a glob into a RE, and add it to the list. we convert to
1277 * four different RE's (because we're using BRE's and can't use |
1278 * alternation :-() with this padding:
1279 * .*\/ and $
1280 * .*\/ and \/.*
1281 * ^ and $
1282 * ^ and \/.*
1283 */
1284 static int
1285 tar_gnutar_exclude_one(const char *line, size_t len)
1286 {
1287 /* 2 * buffer len + nul */
1288 char sbuf[MAXPATHLEN * 2 + 1];
1289 /* + / + // + .*""/\/ + \/.* */
1290 char rabuf[MAXPATHLEN * 2 + 1 + 1 + 2 + 4 + 4];
1291 int i, j;
1292
1293 if (line[len - 1] == '\n')
1294 len--;
1295 strncpy(sbuf, ".*" "\\/", j = 4);
1296 for (i = 0; i < len; i++) {
1297 /*
1298 * convert glob to regexp, escaping everything
1299 */
1300 if (line[i] == '*')
1301 sbuf[j++] = '.';
1302 else if (line[i] == '?') {
1303 sbuf[j++] = '.';
1304 continue;
1305 } else if (!isalnum(line[i]) && !isblank(line[i]))
1306 sbuf[j++] = '\\';
1307 sbuf[j++] = line[i];
1308 }
1309 /* don't need the .*\/ ones if we start with /, i guess */
1310 if (line[0] != '/') {
1311 (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s$//", sbuf);
1312 if (rep_add(rabuf) < 0)
1313 return (-1);
1314 (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s\\/.*//", sbuf);
1315 if (rep_add(rabuf) < 0)
1316 return (-1);
1317 }
1318
1319 (void)snprintf(rabuf, sizeof rabuf, "/^%s$//", sbuf);
1320 if (rep_add(rabuf) < 0)
1321 return (-1);
1322 (void)snprintf(rabuf, sizeof rabuf, "/^%s\\/.*//", sbuf);
1323 if (rep_add(rabuf) < 0)
1324 return (-1);
1325
1326 return (0);
1327 }
1328
1329 /*
1330 * deal with GNU tar -X/--exclude-from & --exclude switchs. basically,
1331 * we go through each line of the file, building a string from the "glob"
1332 * lines in the file into RE lines, of the form `/^RE$//', which we pass
1333 * to rep_add(), which will add a empty replacement (exclusion), for the
1334 * named files.
1335 */
1336 int
1337 tar_gnutar_minus_minus_exclude(path)
1338 const char *path;
1339 {
1340 size_t len = strlen(path);
1341
1342 if (len > MAXPATHLEN)
1343 tty_warn(0, "pathname too long: %s", path);
1344
1345 return (tar_gnutar_exclude_one(path, len));
1346 }
1347
1348 int
1349 tar_gnutar_X_compat(path)
1350 const char *path;
1351 {
1352 char *line;
1353 FILE *fp;
1354 int lineno = 0;
1355 size_t len;
1356
1357 fp = fopen(path, "r");
1358 if (fp == NULL) {
1359 tty_warn(1, "cannot open %s: %s", path,
1360 strerror(errno));
1361 return(-1);
1362 }
1363
1364 while ((line = fgetln(fp, &len))) {
1365 lineno++;
1366 if (len > MAXPATHLEN) {
1367 tty_warn(0, "pathname too long, line %d of %s",
1368 lineno, path);
1369 }
1370 if (tar_gnutar_exclude_one(line, len))
1371 return (-1);
1372 }
1373 return (0);
1374 }
1375