Home | History | Annotate | Line # | Download | only in src
      1 /*	$NetBSD: is_tar.c,v 1.1.1.12 2026/06/10 15:59:13 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (c) Ian F. Darwin 1986-1995.
      5  * Software written by Ian F. Darwin and others;
      6  * maintained 1995-present by Christos Zoulas and others.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice immediately at the beginning of the file, without modification,
     13  *    this list of conditions, and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
     22  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     28  * SUCH DAMAGE.
     29  */
     30 /*
     31  * is_tar() -- figure out whether file is a tar archive.
     32  *
     33  * Stolen (by the author!) from the file_public domain tar program:
     34  * Public Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
     35  *
     36  * @(#)list.c 1.18 9/23/86 Public Domain - gnu
     37  *
     38  * Comments changed and some code/comments reformatted
     39  * for file command by Ian Darwin.
     40  */
     41 
     42 #include "file.h"
     43 
     44 #ifndef lint
     45 #if 0
     46 FILE_RCSID("@(#)$File: is_tar.c,v 1.51 2026/06/02 17:05:51 christos Exp $")
     47 #else
     48 __RCSID("$NetBSD: is_tar.c,v 1.1.1.12 2026/06/10 15:59:13 christos Exp $");
     49 #endif
     50 #endif
     51 
     52 #include "magic.h"
     53 #include <string.h>
     54 #include <ctype.h>
     55 #include "tar.h"
     56 
     57 #define	isodigit(c)	( ((c) >= '0') && ((c) <= '7') )
     58 
     59 file_private int is_tar(const unsigned char *, size_t);
     60 file_private int from_oct(const char *, size_t);	/* Decode octal number */
     61 
     62 static const char tartype[][32] = {	/* should be equal to messages */
     63 	"tar archive",			/* found in ../magic/Magdir/archive */
     64 	"POSIX tar archive",
     65 	"POSIX tar archive (GNU)",	/*  */
     66 };
     67 
     68 file_protected int
     69 file_is_tar(struct magic_set *ms, const struct buffer *b)
     70 {
     71 	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
     72 	size_t nbytes = b->flen;
     73 	/*
     74 	 * Do the tar test first, because if the first file in the tar
     75 	 * archive starts with a dot, we can confuse it with an nroff file.
     76 	 */
     77 	int tar;
     78 	int mime = ms->flags & MAGIC_MIME;
     79 
     80 	if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
     81 		return 0;
     82 
     83 	tar = is_tar(buf, nbytes);
     84 	if (tar < 1 || tar > 3)
     85 		return 0;
     86 
     87 	if (mime == MAGIC_MIME_ENCODING)
     88 		return 1;
     89 
     90 	if (file_printf(ms, "%s", mime ? "application/x-tar" :
     91 	    tartype[tar - 1]) == -1)
     92 		return -1;
     93 
     94 	return 1;
     95 }
     96 
     97 /*
     98  * Return
     99  *	0 if the checksum is bad (i.e., probably not a tar archive),
    100  *	1 for old UNIX tar file,
    101  *	2 for Unix Std (POSIX) tar file,
    102  *	3 for GNU tar file.
    103  */
    104 file_private int
    105 is_tar(const unsigned char *buf, size_t nbytes)
    106 {
    107 	static const char gpkg_match[] = "/gpkg-1";
    108 
    109 	const union record *header = RCAST(const union record *,
    110 	    RCAST(const void *, buf));
    111 	size_t i;
    112 	int sum, recsum;
    113 	const unsigned char *p, *ep;
    114 	const char *nulp;
    115 
    116 	if (nbytes < sizeof(*header))
    117 		return 0;
    118 
    119 	/* If the file looks like Gentoo GLEP 78 binary package (GPKG),
    120 	 * don't waste time on further checks and fall back to magic rules.
    121 	 */
    122 	nulp = CAST(const char *,
    123 	    memchr(header->header.name, 0, sizeof(header->header.name)));
    124 	if (nulp != NULL && nulp >= header->header.name + sizeof(gpkg_match) &&
    125 	    memcmp(nulp - sizeof(gpkg_match) + 1, gpkg_match,
    126 	    sizeof(gpkg_match)) == 0)
    127 	    return 0;
    128 
    129 	recsum = from_oct(header->header.chksum, sizeof(header->header.chksum));
    130 
    131 	sum = 0;
    132 	p = header->charptr;
    133 	ep = header->charptr + sizeof(*header);
    134 	while (p < ep)
    135 		sum += *p++;
    136 
    137 	/* Adjust checksum to count the "chksum" field as blanks. */
    138 	for (i = 0; i < sizeof(header->header.chksum); i++)
    139 		sum -= header->header.chksum[i];
    140 	sum += ' ' * sizeof(header->header.chksum);
    141 
    142 	if (sum != recsum)
    143 		return 0;	/* Not a tar archive */
    144 
    145 	if (strncmp(header->header.magic, GNUTMAGIC,
    146 	    sizeof(header->header.magic)) == 0)
    147 		return 3;		/* GNU Unix Standard tar archive */
    148 
    149 	if (strncmp(header->header.magic, TMAGIC,
    150 	    sizeof(header->header.magic)) == 0)
    151 		return 2;		/* Unix Standard tar archive */
    152 
    153 	return 1;			/* Old fashioned tar archive */
    154 }
    155 
    156 
    157 /*
    158  * Quick and dirty octal conversion.
    159  *
    160  * Result is -1 if the field is invalid (all blank, or non-octal).
    161  */
    162 file_private int
    163 from_oct(const char *where, size_t digs)
    164 {
    165 	int	value;
    166 
    167 	while (digs > 0 && isspace(CAST(unsigned char, *where))) {	/* Skip spaces */
    168 		where++;
    169 		digs--;
    170 	}
    171 	if (digs == 0)
    172 		return -1;		/* All blank field */
    173 	value = 0;
    174 	while (digs > 0 && isodigit(*where)) {	/* Scan til non-octal */
    175 		value = (value << 3) | (*where++ - '0');
    176 		digs--;
    177 	}
    178 
    179 	if (digs > 0 && *where && !isspace(CAST(unsigned char, *where)))
    180 		return -1;			/* Ended on non-(space/NUL) */
    181 
    182 	return value;
    183 }
    184