tar.c revision 1.43 1 /* $NetBSD: tar.c,v 1.43 2003/08/07 09:05:22 agc Exp $ */
2
3 /*-
4 * Copyright (c) 1992, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Keith Muller of the University of California, San Diego.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 /*-
36 * Copyright (c) 1992 Keith Muller.
37 *
38 * This code is derived from software contributed to Berkeley by
39 * Keith Muller of the University of California, San Diego.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. All advertising materials mentioning features or use of this software
50 * must display the following acknowledgement:
51 * This product includes software developed by the University of
52 * California, Berkeley and its contributors.
53 * 4. Neither the name of the University nor the names of its contributors
54 * may be used to endorse or promote products derived from this software
55 * without specific prior written permission.
56 *
57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67 * SUCH DAMAGE.
68 */
69
70 #include <sys/cdefs.h>
71 #if defined(__RCSID) && !defined(lint)
72 #if 0
73 static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94";
74 #else
75 __RCSID("$NetBSD: tar.c,v 1.43 2003/08/07 09:05:22 agc Exp $");
76 #endif
77 #endif /* not lint */
78
79 #include <sys/types.h>
80 #include <sys/time.h>
81 #include <sys/stat.h>
82 #include <sys/param.h>
83
84 #include <ctype.h>
85 #include <errno.h>
86 #include <grp.h>
87 #include <pwd.h>
88 #include <stdio.h>
89 #include <stdlib.h>
90 #include <string.h>
91 #include <unistd.h>
92
93 #include "pax.h"
94 #include "extern.h"
95 #include "tar.h"
96
97 /*
98 * Routines for reading, writing and header identify of various versions of tar
99 */
100
101 static int expandname(char *, size_t, char **, const char *, size_t);
102 static void longlink(ARCHD *);
103 static u_long tar_chksm(char *, int);
104 static char *name_split(char *, int);
105 static int ul_oct(u_long, char *, int, int);
106 #if !defined(NET2_STAT) && !defined(_LP64)
107 static int ull_oct(unsigned long long, char *, int, int);
108 #endif
109 static int tar_gnutar_exclude_one(const char *, size_t);
110 static int check_sum(char *, size_t, char *, size_t);
111
112 /*
113 * Routines common to all versions of tar
114 */
115
116 static int tar_nodir; /* do not write dirs under old tar */
117 int is_gnutar; /* behave like gnu tar; enable gnu
118 * extensions and skip end-ofvolume
119 * checks
120 */
121 static int seen_gnu_warning; /* Have we warned yet? */
122 static char *gnu_hack_string; /* ././@LongLink hackery */
123 static int gnu_hack_len; /* len of gnu_hack_string */
124 char *gnu_name_string; /* ././@LongLink hackery name */
125 char *gnu_link_string; /* ././@LongLink hackery link */
126
127 static int
128 check_sum(char *hd, size_t hdlen, char *bl, size_t bllen)
129 {
130 u_long hdck, blck;
131
132 hdck = asc_ul(hd, hdlen, OCT);
133 blck = tar_chksm(bl, bllen);
134
135 if (hdck != blck) {
136 tty_warn(0, "Header checksum %lo does not match %lo",
137 hdck, blck);
138 return(-1);
139 }
140 return(0);
141 }
142
143
144 /*
145 * tar_endwr()
146 * add the tar trailer of two null blocks
147 * Return:
148 * 0 if ok, -1 otherwise (what wr_skip returns)
149 */
150
151 int
152 tar_endwr(void)
153 {
154 return(wr_skip((off_t)(NULLCNT*BLKMULT)));
155 }
156
157 /*
158 * tar_endrd()
159 * no cleanup needed here, just return size of trailer (for append)
160 * Return:
161 * size of trailer (2 * BLKMULT)
162 */
163
164 off_t
165 tar_endrd(void)
166 {
167 return((off_t)(NULLCNT*BLKMULT));
168 }
169
170 /*
171 * tar_trail()
172 * Called to determine if a header block is a valid trailer. We are passed
173 * the block, the in_sync flag (which tells us we are in resync mode;
174 * looking for a valid header), and cnt (which starts at zero) which is
175 * used to count the number of empty blocks we have seen so far.
176 * Return:
177 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
178 * could never contain a header.
179 */
180
181 int
182 tar_trail(char *buf, int in_resync, int *cnt)
183 {
184 int i;
185
186 /*
187 * look for all zero, trailer is two consecutive blocks of zero
188 */
189 for (i = 0; i < BLKMULT; ++i) {
190 if (buf[i] != '\0')
191 break;
192 }
193
194 /*
195 * if not all zero it is not a trailer, but MIGHT be a header.
196 */
197 if (i != BLKMULT)
198 return(-1);
199
200 /*
201 * When given a zero block, we must be careful!
202 * If we are not in resync mode, check for the trailer. Have to watch
203 * out that we do not mis-identify file data as the trailer, so we do
204 * NOT try to id a trailer during resync mode. During resync mode we
205 * might as well throw this block out since a valid header can NEVER be
206 * a block of all 0 (we must have a valid file name).
207 */
208 if (!in_resync) {
209 ++*cnt;
210 /*
211 * old GNU tar (up through 1.13) only writes one block of
212 * trailers, so we pretend we got another
213 */
214 if (is_gnutar)
215 ++*cnt;
216 if (*cnt >= NULLCNT)
217 return(0);
218 }
219 return(1);
220 }
221
222 /*
223 * ul_oct()
224 * convert an unsigned long to an octal string. many oddball field
225 * termination characters are used by the various versions of tar in the
226 * different fields. term selects which kind to use. str is '0' padded
227 * at the front to len. we are unable to use only one format as many old
228 * tar readers are very cranky about this.
229 * Return:
230 * 0 if the number fit into the string, -1 otherwise
231 */
232
233 static int
234 ul_oct(u_long val, char *str, int len, int term)
235 {
236 char *pt;
237
238 /*
239 * term selects the appropriate character(s) for the end of the string
240 */
241 pt = str + len - 1;
242 switch(term) {
243 case 3:
244 *pt-- = '\0';
245 break;
246 case 2:
247 *pt-- = ' ';
248 *pt-- = '\0';
249 break;
250 case 1:
251 *pt-- = ' ';
252 break;
253 case 0:
254 default:
255 *pt-- = '\0';
256 *pt-- = ' ';
257 break;
258 }
259
260 /*
261 * convert and blank pad if there is space
262 */
263 while (pt >= str) {
264 *pt-- = '0' + (char)(val & 0x7);
265 if ((val = val >> 3) == (u_long)0)
266 break;
267 }
268
269 while (pt >= str)
270 *pt-- = '0';
271 if (val != (u_long)0)
272 return(-1);
273 return(0);
274 }
275
276 #if !defined(NET2_STAT) && !defined(_LP64)
277 /*
278 * ull_oct()
279 * convert an unsigned long long to an octal string. one of many oddball
280 * field termination characters are used by the various versions of tar
281 * in the different fields. term selects which kind to use. str is '0'
282 * padded at the front to len. we are unable to use only one format as
283 * many old tar readers are very cranky about this.
284 * Return:
285 * 0 if the number fit into the string, -1 otherwise
286 */
287
288 static int
289 ull_oct(unsigned long long val, char *str, int len, int term)
290 {
291 char *pt;
292
293 /*
294 * term selects the appropriate character(s) for the end of the string
295 */
296 pt = str + len - 1;
297 switch(term) {
298 case 3:
299 *pt-- = '\0';
300 break;
301 case 2:
302 *pt-- = ' ';
303 *pt-- = '\0';
304 break;
305 case 1:
306 *pt-- = ' ';
307 break;
308 case 0:
309 default:
310 *pt-- = '\0';
311 *pt-- = ' ';
312 break;
313 }
314
315 /*
316 * convert and blank pad if there is space
317 */
318 while (pt >= str) {
319 *pt-- = '0' + (char)(val & 0x7);
320 if ((val = val >> 3) == 0)
321 break;
322 }
323
324 while (pt >= str)
325 *pt-- = '0';
326 if (val != (unsigned long long)0)
327 return(-1);
328 return(0);
329 }
330 #endif
331
332 /*
333 * tar_chksm()
334 * calculate the checksum for a tar block counting the checksum field as
335 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
336 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS
337 * pad headers with 0.
338 * Return:
339 * unsigned long checksum
340 */
341
342 static u_long
343 tar_chksm(char *blk, int len)
344 {
345 char *stop;
346 char *pt;
347 u_long chksm = BLNKSUM; /* initial value is checksum field sum */
348
349 /*
350 * add the part of the block before the checksum field
351 */
352 pt = blk;
353 stop = blk + CHK_OFFSET;
354 while (pt < stop)
355 chksm += (u_long)(*pt++ & 0xff);
356 /*
357 * move past the checksum field and keep going, spec counts the
358 * checksum field as the sum of 8 blanks (which is pre-computed as
359 * BLNKSUM).
360 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
361 * starts, no point in summing zero's)
362 */
363 pt += CHK_LEN;
364 stop = blk + len;
365 while (pt < stop)
366 chksm += (u_long)(*pt++ & 0xff);
367 return(chksm);
368 }
369
370 /*
371 * Routines for old BSD style tar (also made portable to sysV tar)
372 */
373
374 /*
375 * tar_id()
376 * determine if a block given to us is a valid tar header (and not a USTAR
377 * header). We have to be on the lookout for those pesky blocks of all
378 * zero's.
379 * Return:
380 * 0 if a tar header, -1 otherwise
381 */
382
383 int
384 tar_id(char *blk, int size)
385 {
386 HD_TAR *hd;
387 HD_USTAR *uhd;
388
389 if (size < BLKMULT)
390 return(-1);
391 hd = (HD_TAR *)blk;
392 uhd = (HD_USTAR *)blk;
393
394 /*
395 * check for block of zero's first, a simple and fast test, then make
396 * sure this is not a ustar header by looking for the ustar magic
397 * cookie. We should use TMAGLEN, but some USTAR archive programs are
398 * wrong and create archives missing the \0. Last we check the
399 * checksum. If this is ok we have to assume it is a valid header.
400 */
401 if (hd->name[0] == '\0')
402 return(-1);
403 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
404 return(-1);
405 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT);
406 }
407
408 /*
409 * tar_opt()
410 * handle tar format specific -o options
411 * Return:
412 * 0 if ok -1 otherwise
413 */
414
415 int
416 tar_opt(void)
417 {
418 OPLIST *opt;
419
420 while ((opt = opt_next()) != NULL) {
421 if (strcmp(opt->name, TAR_OPTION) ||
422 strcmp(opt->value, TAR_NODIR)) {
423 tty_warn(1,
424 "Unknown tar format -o option/value pair %s=%s",
425 opt->name, opt->value);
426 tty_warn(1,
427 "%s=%s is the only supported tar format option",
428 TAR_OPTION, TAR_NODIR);
429 return(-1);
430 }
431
432 /*
433 * we only support one option, and only when writing
434 */
435 if ((act != APPND) && (act != ARCHIVE)) {
436 tty_warn(1, "%s=%s is only supported when writing.",
437 opt->name, opt->value);
438 return(-1);
439 }
440 tar_nodir = 1;
441 }
442 return(0);
443 }
444
445
446 /*
447 * tar_rd()
448 * extract the values out of block already determined to be a tar header.
449 * store the values in the ARCHD parameter.
450 * Return:
451 * 0
452 */
453
454 int
455 tar_rd(ARCHD *arcn, char *buf)
456 {
457 HD_TAR *hd;
458 char *pt;
459
460 /*
461 * we only get proper sized buffers passed to us
462 */
463 if (tar_id(buf, BLKMULT) < 0)
464 return(-1);
465 memset(arcn, 0, sizeof(*arcn));
466 arcn->org_name = arcn->name;
467 arcn->pat = NULL;
468 arcn->sb.st_nlink = 1;
469
470 /*
471 * copy out the name and values in the stat buffer
472 */
473 hd = (HD_TAR *)buf;
474 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) {
475 arcn->nlen = expandname(arcn->name, sizeof(arcn->name),
476 &gnu_name_string, hd->name, sizeof(hd->name));
477 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
478 &gnu_link_string, hd->linkname, sizeof(hd->linkname));
479 }
480 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
481 0xfff);
482 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
483 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
484 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
485 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
486 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
487
488 /*
489 * have to look at the last character, it may be a '/' and that is used
490 * to encode this as a directory
491 */
492 pt = &(arcn->name[arcn->nlen - 1]);
493 arcn->pad = 0;
494 arcn->skip = 0;
495 switch(hd->linkflag) {
496 case SYMTYPE:
497 /*
498 * symbolic link, need to get the link name and set the type in
499 * the st_mode so -v printing will look correct.
500 */
501 arcn->type = PAX_SLK;
502 arcn->sb.st_mode |= S_IFLNK;
503 break;
504 case LNKTYPE:
505 /*
506 * hard link, need to get the link name, set the type in the
507 * st_mode and st_nlink so -v printing will look better.
508 */
509 arcn->type = PAX_HLK;
510 arcn->sb.st_nlink = 2;
511
512 /*
513 * no idea of what type this thing really points at, but
514 * we set something for printing only.
515 */
516 arcn->sb.st_mode |= S_IFREG;
517 break;
518 case LONGLINKTYPE:
519 arcn->type = PAX_GLL;
520 /* FALLTHROUGH */
521 case LONGNAMETYPE:
522 /*
523 * GNU long link/file; we tag these here and let the
524 * pax internals deal with it -- too ugly otherwise.
525 */
526 if (hd->linkflag != LONGLINKTYPE)
527 arcn->type = PAX_GLF;
528 arcn->pad = TAR_PAD(arcn->sb.st_size);
529 arcn->skip = arcn->sb.st_size;
530 break;
531 case AREGTYPE:
532 case REGTYPE:
533 case DIRTYPE: /* see below */
534 default:
535 /*
536 * If we have a trailing / this is a directory and NOT a file.
537 * Note: V7 tar doesn't actually have DIRTYPE, but it was
538 * reported that V7 archives using USTAR directories do exist.
539 */
540 if (*pt == '/' || hd->linkflag == DIRTYPE) {
541 /*
542 * it is a directory, set the mode for -v printing
543 */
544 arcn->type = PAX_DIR;
545 arcn->sb.st_mode |= S_IFDIR;
546 arcn->sb.st_nlink = 2;
547 } else {
548 /*
549 * have a file that will be followed by data. Set the
550 * skip value to the size field and calculate the size
551 * of the padding.
552 */
553 arcn->type = PAX_REG;
554 arcn->sb.st_mode |= S_IFREG;
555 arcn->pad = TAR_PAD(arcn->sb.st_size);
556 arcn->skip = arcn->sb.st_size;
557 }
558 break;
559 }
560
561 /*
562 * strip off any trailing slash.
563 */
564 if (*pt == '/') {
565 *pt = '\0';
566 --arcn->nlen;
567 }
568 return(0);
569 }
570
571 /*
572 * tar_wr()
573 * write a tar header for the file specified in the ARCHD to the archive.
574 * Have to check for file types that cannot be stored and file names that
575 * are too long. Be careful of the term (last arg) to ul_oct, each field
576 * of tar has it own spec for the termination character(s).
577 * ASSUMED: space after header in header block is zero filled
578 * Return:
579 * 0 if file has data to be written after the header, 1 if file has NO
580 * data to write after the header, -1 if archive write failed
581 */
582
583 int
584 tar_wr(ARCHD *arcn)
585 {
586 HD_TAR *hd;
587 int len;
588 char hdblk[sizeof(HD_TAR)];
589
590 /*
591 * check for those file system types which tar cannot store
592 */
593 switch(arcn->type) {
594 case PAX_DIR:
595 /*
596 * user asked that dirs not be written to the archive
597 */
598 if (tar_nodir)
599 return(1);
600 break;
601 case PAX_CHR:
602 tty_warn(1, "Tar cannot archive a character device %s",
603 arcn->org_name);
604 return(1);
605 case PAX_BLK:
606 tty_warn(1,
607 "Tar cannot archive a block device %s", arcn->org_name);
608 return(1);
609 case PAX_SCK:
610 tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name);
611 return(1);
612 case PAX_FIF:
613 tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name);
614 return(1);
615 case PAX_SLK:
616 case PAX_HLK:
617 case PAX_HRG:
618 if (arcn->ln_nlen > sizeof(hd->linkname)) {
619 tty_warn(1,"Link name too long for tar %s",
620 arcn->ln_name);
621 return(1);
622 }
623 break;
624 case PAX_REG:
625 case PAX_CTG:
626 default:
627 break;
628 }
629
630 /*
631 * check file name len, remember extra char for dirs (the / at the end)
632 */
633 len = arcn->nlen;
634 if (arcn->type == PAX_DIR)
635 ++len;
636 if (len >= sizeof(hd->name)) {
637 tty_warn(1, "File name too long for tar %s", arcn->name);
638 return(1);
639 }
640
641 /*
642 * copy the data out of the ARCHD into the tar header based on the type
643 * of the file. Remember many tar readers want the unused fields to be
644 * padded with zero. We set the linkflag field (type), the linkname
645 * (or zero if not used),the size, and set the padding (if any) to be
646 * added after the file data (0 for all other types, as they only have
647 * a header)
648 */
649 memset(hdblk, 0, sizeof(hdblk));
650 hd = (HD_TAR *)hdblk;
651 strlcpy(hd->name, arcn->name, sizeof(hd->name));
652 arcn->pad = 0;
653
654 if (arcn->type == PAX_DIR) {
655 /*
656 * directories are the same as files, except have a filename
657 * that ends with a /, we add the slash here. No data follows,
658 * dirs, so no pad.
659 */
660 hd->linkflag = AREGTYPE;
661 hd->name[len-1] = '/';
662 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
663 goto out;
664 } else if (arcn->type == PAX_SLK) {
665 /*
666 * no data follows this file, so no pad
667 */
668 hd->linkflag = SYMTYPE;
669 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
670 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
671 goto out;
672 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
673 /*
674 * no data follows this file, so no pad
675 */
676 hd->linkflag = LNKTYPE;
677 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
678 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
679 goto out;
680 } else {
681 /*
682 * data follows this file, so set the pad
683 */
684 hd->linkflag = AREGTYPE;
685 if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) {
686 tty_warn(1,"File is too large for tar %s",
687 arcn->org_name);
688 return(1);
689 }
690 arcn->pad = TAR_PAD(arcn->sb.st_size);
691 }
692
693 /*
694 * copy those fields that are independent of the type
695 */
696 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
697 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
698 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
699 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
700 goto out;
701
702 /*
703 * calculate and add the checksum, then write the header. A return of
704 * 0 tells the caller to now write the file data, 1 says no data needs
705 * to be written
706 */
707 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
708 sizeof(hd->chksum), 3))
709 goto out; /* XXX Something's wrong here
710 * because a zero-byte file can
711 * cause this to be done and
712 * yet the resulting warning
713 * seems incorrect */
714
715 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0)
716 return(-1);
717 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
718 return(-1);
719 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
720 return(0);
721 return(1);
722
723 out:
724 /*
725 * header field is out of range
726 */
727 tty_warn(1, "Tar header field is too small for %s", arcn->org_name);
728 return(1);
729 }
730
731 /*
732 * Routines for POSIX ustar
733 */
734
735 /*
736 * ustar_strd()
737 * initialization for ustar read
738 * Return:
739 * 0 if ok, -1 otherwise
740 */
741
742 int
743 ustar_strd(void)
744 {
745 return(0);
746 }
747
748 /*
749 * ustar_stwr()
750 * initialization for ustar write
751 * Return:
752 * 0 if ok, -1 otherwise
753 */
754
755 int
756 ustar_stwr(void)
757 {
758 return(0);
759 }
760
761 /*
762 * ustar_id()
763 * determine if a block given to us is a valid ustar header. We have to
764 * be on the lookout for those pesky blocks of all zero's
765 * Return:
766 * 0 if a ustar header, -1 otherwise
767 */
768
769 int
770 ustar_id(char *blk, int size)
771 {
772 HD_USTAR *hd;
773
774 if (size < BLKMULT)
775 return(-1);
776 hd = (HD_USTAR *)blk;
777
778 /*
779 * check for block of zero's first, a simple and fast test then check
780 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
781 * programs are fouled up and create archives missing the \0. Last we
782 * check the checksum. If ok we have to assume it is a valid header.
783 */
784 if (hd->name[0] == '\0')
785 return(-1);
786 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
787 return(-1);
788 /* This is GNU tar */
789 if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar &&
790 !seen_gnu_warning) {
791 seen_gnu_warning = 1;
792 tty_warn(0,
793 "Trying to read GNU tar archive with extensions off");
794 }
795 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT);
796 }
797
798 /*
799 * ustar_rd()
800 * extract the values out of block already determined to be a ustar header.
801 * store the values in the ARCHD parameter.
802 * Return:
803 * 0
804 */
805
806 int
807 ustar_rd(ARCHD *arcn, char *buf)
808 {
809 HD_USTAR *hd;
810 char *dest;
811 int cnt;
812 dev_t devmajor;
813 dev_t devminor;
814
815 /*
816 * we only get proper sized buffers
817 */
818 if (ustar_id(buf, BLKMULT) < 0)
819 return(-1);
820
821 memset(arcn, 0, sizeof(*arcn));
822 arcn->org_name = arcn->name;
823 arcn->pat = NULL;
824 arcn->sb.st_nlink = 1;
825 hd = (HD_USTAR *)buf;
826
827 /*
828 * see if the filename is split into two parts. if, so joint the parts.
829 * we copy the prefix first and add a / between the prefix and name.
830 */
831 dest = arcn->name;
832 if (*(hd->prefix) != '\0') {
833 cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name));
834 dest += cnt;
835 *dest++ = '/';
836 cnt++;
837 } else {
838 cnt = 0;
839 }
840
841 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) {
842 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt,
843 &gnu_name_string, hd->name, sizeof(hd->name));
844 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
845 &gnu_link_string, hd->linkname, sizeof(hd->linkname));
846 }
847
848 /*
849 * follow the spec to the letter. we should only have mode bits, strip
850 * off all other crud we may be passed.
851 */
852 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
853 0xfff);
854 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
855 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
856 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
857
858 /*
859 * If we can find the ascii names for gname and uname in the password
860 * and group files we will use the uid's and gid they bind. Otherwise
861 * we use the uid and gid values stored in the header. (This is what
862 * the posix spec wants).
863 */
864 hd->gname[sizeof(hd->gname) - 1] = '\0';
865 if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0)
866 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
867 hd->uname[sizeof(hd->uname) - 1] = '\0';
868 if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0)
869 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
870
871 /*
872 * set the defaults, these may be changed depending on the file type
873 */
874 arcn->pad = 0;
875 arcn->skip = 0;
876 arcn->sb.st_rdev = (dev_t)0;
877
878 /*
879 * set the mode and PAX type according to the typeflag in the header
880 */
881 switch(hd->typeflag) {
882 case FIFOTYPE:
883 arcn->type = PAX_FIF;
884 arcn->sb.st_mode |= S_IFIFO;
885 break;
886 case DIRTYPE:
887 arcn->type = PAX_DIR;
888 arcn->sb.st_mode |= S_IFDIR;
889 arcn->sb.st_nlink = 2;
890
891 /*
892 * Some programs that create ustar archives append a '/'
893 * to the pathname for directories. This clearly violates
894 * ustar specs, but we will silently strip it off anyway.
895 */
896 if (arcn->name[arcn->nlen - 1] == '/')
897 arcn->name[--arcn->nlen] = '\0';
898 break;
899 case BLKTYPE:
900 case CHRTYPE:
901 /*
902 * this type requires the rdev field to be set.
903 */
904 if (hd->typeflag == BLKTYPE) {
905 arcn->type = PAX_BLK;
906 arcn->sb.st_mode |= S_IFBLK;
907 } else {
908 arcn->type = PAX_CHR;
909 arcn->sb.st_mode |= S_IFCHR;
910 }
911 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
912 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
913 arcn->sb.st_rdev = TODEV(devmajor, devminor);
914 break;
915 case SYMTYPE:
916 case LNKTYPE:
917 if (hd->typeflag == SYMTYPE) {
918 arcn->type = PAX_SLK;
919 arcn->sb.st_mode |= S_IFLNK;
920 } else {
921 arcn->type = PAX_HLK;
922 /*
923 * so printing looks better
924 */
925 arcn->sb.st_mode |= S_IFREG;
926 arcn->sb.st_nlink = 2;
927 }
928 break;
929 case LONGLINKTYPE:
930 if (is_gnutar)
931 arcn->type = PAX_GLL;
932 /* FALLTHROUGH */
933 case LONGNAMETYPE:
934 if (is_gnutar) {
935 /*
936 * GNU long link/file; we tag these here and let the
937 * pax internals deal with it -- too ugly otherwise.
938 */
939 if (hd->typeflag != LONGLINKTYPE)
940 arcn->type = PAX_GLF;
941 arcn->pad = TAR_PAD(arcn->sb.st_size);
942 arcn->skip = arcn->sb.st_size;
943 } else {
944 tty_warn(1, "GNU Long %s found in posix ustar archive.",
945 hd->typeflag == LONGLINKTYPE ? "Link" : "File");
946 }
947 break;
948 case CONTTYPE:
949 case AREGTYPE:
950 case REGTYPE:
951 default:
952 /*
953 * these types have file data that follows. Set the skip and
954 * pad fields.
955 */
956 arcn->type = PAX_REG;
957 arcn->pad = TAR_PAD(arcn->sb.st_size);
958 arcn->skip = arcn->sb.st_size;
959 arcn->sb.st_mode |= S_IFREG;
960 break;
961 }
962 return(0);
963 }
964
965 static int
966 expandname(char *buf, size_t len, char **gnu_name, const char *name,
967 size_t nlen)
968 {
969 if (*gnu_name) {
970 len = strlcpy(buf, *gnu_name, len);
971 free(*gnu_name);
972 *gnu_name = NULL;
973 } else {
974 if (len > ++nlen)
975 len = nlen;
976 len = strlcpy(buf, name, len);
977 }
978 return len;
979 }
980
981 static void
982 longlink(ARCHD *arcn)
983 {
984 ARCHD larc;
985
986 memset(&larc, 0, sizeof(larc));
987
988 switch (arcn->type) {
989 case PAX_SLK:
990 case PAX_HRG:
991 case PAX_HLK:
992 larc.type = PAX_GLL;
993 larc.ln_nlen = strlcpy(larc.ln_name, "././@LongLink",
994 sizeof(larc.ln_name));
995 gnu_hack_string = arcn->ln_name;
996 gnu_hack_len = arcn->ln_nlen + 1;
997 break;
998 default:
999 larc.nlen = strlcpy(larc.name, "././@LongLink",
1000 sizeof(larc.name));
1001 gnu_hack_string = arcn->name;
1002 gnu_hack_len = arcn->nlen + 1;
1003 larc.type = PAX_GLF;
1004 }
1005 /*
1006 * We need a longlink now.
1007 */
1008 ustar_wr(&larc);
1009 }
1010
1011 /*
1012 * ustar_wr()
1013 * write a ustar header for the file specified in the ARCHD to the archive
1014 * Have to check for file types that cannot be stored and file names that
1015 * are too long. Be careful of the term (last arg) to ul_oct, we only use
1016 * '\0' for the termination character (this is different than picky tar)
1017 * ASSUMED: space after header in header block is zero filled
1018 * Return:
1019 * 0 if file has data to be written after the header, 1 if file has NO
1020 * data to write after the header, -1 if archive write failed
1021 */
1022
1023 int
1024 ustar_wr(ARCHD *arcn)
1025 {
1026 HD_USTAR *hd;
1027 char *pt;
1028 char hdblk[sizeof(HD_USTAR)];
1029 const char *user, *group;
1030
1031 /*
1032 * check for those file system types ustar cannot store
1033 */
1034 if (arcn->type == PAX_SCK) {
1035 if (!is_gnutar)
1036 tty_warn(1, "Ustar cannot archive a socket %s",
1037 arcn->org_name);
1038 return(1);
1039 }
1040
1041 /*
1042 * check the length of the linkname
1043 */
1044 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
1045 (arcn->type == PAX_HRG)) &&
1046 (arcn->ln_nlen >= sizeof(hd->linkname))){
1047 if (is_gnutar) {
1048 longlink(arcn);
1049 } else {
1050 tty_warn(1, "Link name too long for ustar %s",
1051 arcn->ln_name);
1052 return(1);
1053 }
1054 }
1055
1056 /*
1057 * split the path name into prefix and name fields (if needed). if
1058 * pt != arcn->name, the name has to be split
1059 */
1060 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
1061 if (is_gnutar) {
1062 longlink(arcn);
1063 pt = arcn->name;
1064 } else {
1065 tty_warn(1, "File name too long for ustar %s",
1066 arcn->name);
1067 return(1);
1068 }
1069 }
1070
1071 /*
1072 * zero out the header so we don't have to worry about zero fill below
1073 */
1074 memset(hdblk, 0, sizeof(hdblk));
1075 hd = (HD_USTAR *)hdblk;
1076 arcn->pad = 0L;
1077
1078 /*
1079 * split the name, or zero out the prefix
1080 */
1081 if (pt != arcn->name) {
1082 /*
1083 * name was split, pt points at the / where the split is to
1084 * occur, we remove the / and copy the first part to the prefix
1085 */
1086 *pt = '\0';
1087 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix));
1088 *pt++ = '/';
1089 }
1090
1091 /*
1092 * copy the name part. this may be the whole path or the part after
1093 * the prefix
1094 */
1095 strlcpy(hd->name, pt, sizeof(hd->name));
1096
1097 /*
1098 * set the fields in the header that are type dependent
1099 */
1100 switch(arcn->type) {
1101 case PAX_DIR:
1102 hd->typeflag = DIRTYPE;
1103 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1104 goto out;
1105 break;
1106 case PAX_CHR:
1107 case PAX_BLK:
1108 if (arcn->type == PAX_CHR)
1109 hd->typeflag = CHRTYPE;
1110 else
1111 hd->typeflag = BLKTYPE;
1112 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
1113 sizeof(hd->devmajor), 3) ||
1114 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
1115 sizeof(hd->devminor), 3) ||
1116 ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1117 goto out;
1118 break;
1119 case PAX_FIF:
1120 hd->typeflag = FIFOTYPE;
1121 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1122 goto out;
1123 break;
1124 case PAX_GLL:
1125 case PAX_SLK:
1126 case PAX_HLK:
1127 case PAX_HRG:
1128 if (arcn->type == PAX_SLK)
1129 hd->typeflag = SYMTYPE;
1130 else if (arcn->type == PAX_GLL)
1131 hd->typeflag = LONGLINKTYPE;
1132 else
1133 hd->typeflag = LNKTYPE;
1134 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1135 if (ul_oct((u_long)gnu_hack_len, hd->size,
1136 sizeof(hd->size), 3))
1137 goto out;
1138 break;
1139 case PAX_GLF:
1140 case PAX_REG:
1141 case PAX_CTG:
1142 default:
1143 /*
1144 * file data with this type, set the padding
1145 */
1146 if (arcn->type == PAX_GLF) {
1147 hd->typeflag = LONGNAMETYPE;
1148 arcn->pad = TAR_PAD(gnu_hack_len);
1149 if (OFFT_OCT((u_long)gnu_hack_len, hd->size,
1150 sizeof(hd->size), 3)) {
1151 tty_warn(1,"File is too long for ustar %s",
1152 arcn->org_name);
1153 return(1);
1154 }
1155 } else {
1156 if (arcn->type == PAX_CTG)
1157 hd->typeflag = CONTTYPE;
1158 else
1159 hd->typeflag = REGTYPE;
1160 arcn->pad = TAR_PAD(arcn->sb.st_size);
1161 if (OFFT_OCT(arcn->sb.st_size, hd->size,
1162 sizeof(hd->size), 3)) {
1163 tty_warn(1,"File is too long for ustar %s",
1164 arcn->org_name);
1165 return(1);
1166 }
1167 }
1168 break;
1169 }
1170
1171 strncpy(hd->magic, TMAGIC, TMAGLEN);
1172 if (is_gnutar)
1173 hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' ';
1174 else
1175 strncpy(hd->version, TVERSION, TVERSLEN);
1176
1177 /*
1178 * set the remaining fields. Some versions want all 16 bits of mode
1179 * we better humor them (they really do not meet spec though)....
1180 */
1181 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) ||
1182 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3) ||
1183 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) ||
1184 ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3))
1185 goto out;
1186 user = user_from_uid(arcn->sb.st_uid, 1);
1187 group = group_from_gid(arcn->sb.st_gid, 1);
1188 strncpy(hd->uname, user ? user : "", sizeof(hd->uname));
1189 strncpy(hd->gname, group ? group : "", sizeof(hd->gname));
1190
1191 /*
1192 * calculate and store the checksum write the header to the archive
1193 * return 0 tells the caller to now write the file data, 1 says no data
1194 * needs to be written
1195 */
1196 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1197 sizeof(hd->chksum), 3))
1198 goto out;
1199 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0)
1200 return(-1);
1201 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1202 return(-1);
1203 if (gnu_hack_string) {
1204 int res = wr_rdbuf(gnu_hack_string, gnu_hack_len);
1205 int pad = gnu_hack_len;
1206 gnu_hack_string = NULL;
1207 gnu_hack_len = 0;
1208 if (res < 0)
1209 return(-1);
1210 if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0)
1211 return(-1);
1212 }
1213 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1214 return(0);
1215 return(1);
1216
1217 out:
1218 /*
1219 * header field is out of range
1220 */
1221 tty_warn(1, "Ustar header field is too small for %s", arcn->org_name);
1222 return(1);
1223 }
1224
1225 /*
1226 * name_split()
1227 * see if the name has to be split for storage in a ustar header. We try
1228 * to fit the entire name in the name field without splitting if we can.
1229 * The split point is always at a /
1230 * Return
1231 * character pointer to split point (always the / that is to be removed
1232 * if the split is not needed, the points is set to the start of the file
1233 * name (it would violate the spec to split there). A NULL is returned if
1234 * the file name is too long
1235 */
1236
1237 static char *
1238 name_split(char *name, int len)
1239 {
1240 char *start;
1241
1242 /*
1243 * check to see if the file name is small enough to fit in the name
1244 * field. if so just return a pointer to the name.
1245 */
1246 if (len < TNMSZ)
1247 return(name);
1248 if (len > (TPFSZ + TNMSZ))
1249 return(NULL);
1250
1251 /*
1252 * we start looking at the biggest sized piece that fits in the name
1253 * field. We walk forward looking for a slash to split at. The idea is
1254 * to find the biggest piece to fit in the name field (or the smallest
1255 * prefix we can find) (the -1 is correct the biggest piece would
1256 * include the slash between the two parts that gets thrown away)
1257 */
1258 start = name + len - TNMSZ;
1259 while ((*start != '\0') && (*start != '/'))
1260 ++start;
1261
1262 /*
1263 * if we hit the end of the string, this name cannot be split, so we
1264 * cannot store this file.
1265 */
1266 if (*start == '\0')
1267 return(NULL);
1268 len = start - name;
1269
1270 /*
1271 * NOTE: /str where the length of str == TNMSZ cannot be stored under
1272 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1273 * the file would then expand on extract to //str. The len == 0 below
1274 * makes this special case follow the spec to the letter.
1275 */
1276 if ((len >= TPFSZ) || (len == 0))
1277 return(NULL);
1278
1279 /*
1280 * ok have a split point, return it to the caller
1281 */
1282 return(start);
1283 }
1284
1285 /* convert a glob into a RE, and add it to the list */
1286 static int
1287 tar_gnutar_exclude_one(const char *line, size_t len)
1288 {
1289 char sbuf[MAXPATHLEN * 2 + 1 + 5];
1290 int i, j;
1291
1292 if (line[len - 1] == '\n')
1293 len--;
1294 for (i = 0, j = 2; i < len; i++) {
1295 /*
1296 * convert glob to regexp, escaping everything
1297 */
1298 if (line[i] == '*')
1299 sbuf[j++] = '.';
1300 else if (line[i] == '?') {
1301 sbuf[j++] = '.';
1302 continue;
1303 } else if (!isalnum(line[i]) && !isblank(line[i]))
1304 sbuf[j++] = '\\';
1305 sbuf[j++] = line[i];
1306 }
1307 sbuf[0] = sbuf[j + 1] = sbuf[j + 2] = '/';
1308 sbuf[1] = '^';
1309 sbuf[j] = '$';
1310 sbuf[j + 3] = '\0';
1311 if (rep_add(sbuf) < 0)
1312 return (-1);
1313
1314 return (0);
1315 }
1316
1317 /*
1318 * deal with GNU tar -X/--exclude-from & --exclude switchs. basically,
1319 * we go through each line of the file, building a string from the "glob"
1320 * lines in the file into RE lines, of the form `/^RE$//', which we pass
1321 * to rep_add(), which will add a empty replacement (exclusion), for the
1322 * named files.
1323 */
1324 int
1325 tar_gnutar_minus_minus_exclude(path)
1326 const char *path;
1327 {
1328 size_t len = strlen(path);
1329
1330 if (len > MAXPATHLEN)
1331 tty_warn(0, "pathname too long: %s", path);
1332
1333 return (tar_gnutar_exclude_one(path, len));
1334 }
1335
1336 int
1337 tar_gnutar_X_compat(path)
1338 const char *path;
1339 {
1340 char *line;
1341 FILE *fp;
1342 int lineno = 0;
1343 size_t len;
1344
1345 fp = fopen(path, "r");
1346 if (fp == NULL) {
1347 tty_warn(1, "cannot open %s: %s", path,
1348 strerror(errno));
1349 return(-1);
1350 }
1351
1352 while ((line = fgetln(fp, &len))) {
1353 lineno++;
1354 if (len > MAXPATHLEN) {
1355 tty_warn(0, "pathname too long, line %d of %s",
1356 lineno, path);
1357 }
1358 if (tar_gnutar_exclude_one(line, len))
1359 return (-1);
1360 }
1361 return (0);
1362 }
1363