Home | History | Annotate | Line # | Download | only in unzip
unzip.c revision 1.20
      1 /* $NetBSD: unzip.c,v 1.20 2015/12/03 20:00:12 christos Exp $ */
      2 
      3 /*-
      4  * Copyright (c) 2009, 2010 Joerg Sonnenberger <joerg (at) NetBSD.org>
      5  * Copyright (c) 2007-2008 Dag-Erling Codan Smrgrav
      6  * All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer
     13  *    in this position and unchanged.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     28  * SUCH DAMAGE.
     29  *
     30  * $FreeBSD: revision 180124$
     31  *
     32  * This file would be much shorter if we didn't care about command-line
     33  * compatibility with Info-ZIP's UnZip, which requires us to duplicate
     34  * parts of libarchive in order to gain more detailed control of its
     35  * behaviour for the purpose of implementing the -n, -o, -L and -a
     36  * options.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __RCSID("$NetBSD: unzip.c,v 1.20 2015/12/03 20:00:12 christos Exp $");
     41 
     42 #include <sys/queue.h>
     43 #include <sys/stat.h>
     44 
     45 #include <ctype.h>
     46 #include <errno.h>
     47 #include <fcntl.h>
     48 #include <fnmatch.h>
     49 #include <stdarg.h>
     50 #include <stdio.h>
     51 #include <stdlib.h>
     52 #include <string.h>
     53 #include <unistd.h>
     54 
     55 #include <archive.h>
     56 #include <archive_entry.h>
     57 
     58 /* command-line options */
     59 static int		 a_opt;		/* convert EOL */
     60 static int		 C_opt;		/* match case-insensitively */
     61 static int		 c_opt;		/* extract to stdout */
     62 static const char	*d_arg;		/* directory */
     63 static int		 f_opt;		/* update existing files only */
     64 static int		 j_opt;		/* junk directories */
     65 static int		 L_opt;		/* lowercase names */
     66 static int		 n_opt;		/* never overwrite */
     67 static int		 o_opt;		/* always overwrite */
     68 static int		 p_opt;		/* extract to stdout, quiet */
     69 static int		 q_opt;		/* quiet */
     70 static int		 t_opt;		/* test */
     71 static int		 u_opt;		/* update */
     72 static int		 v_opt;		/* verbose/list */
     73 static const char *	 y_str = "";	/* 4 digit year */
     74 
     75 /* time when unzip started */
     76 static time_t		 now;
     77 
     78 /* debug flag */
     79 static int		 unzip_debug;
     80 
     81 /* running on tty? */
     82 static int		 tty;
     83 
     84 /* convenience macro */
     85 /* XXX should differentiate between ARCHIVE_{WARN,FAIL,RETRY} */
     86 #define ac(call)						\
     87 	do {							\
     88 		int acret = (call);				\
     89 		if (acret != ARCHIVE_OK)			\
     90 			errorx("%s", archive_error_string(a));	\
     91 	} while (0)
     92 
     93 /*
     94  * Indicates that last info() did not end with EOL.  This helps error() et
     95  * al. avoid printing an error message on the same line as an incomplete
     96  * informational message.
     97  */
     98 static int noeol;
     99 
    100 /* fatal error message + errno */
    101 __dead __printflike(1, 2) static void
    102 error(const char *fmt, ...)
    103 {
    104 	va_list ap;
    105 
    106 	if (noeol)
    107 		fprintf(stdout, "\n");
    108 	fflush(stdout);
    109 	fprintf(stderr, "unzip: ");
    110 	va_start(ap, fmt);
    111 	vfprintf(stderr, fmt, ap);
    112 	va_end(ap);
    113 	fprintf(stderr, ": %s\n", strerror(errno));
    114 	exit(1);
    115 }
    116 
    117 /* fatal error message, no errno */
    118 __dead __printflike(1, 2) static void
    119 errorx(const char *fmt, ...)
    120 {
    121 	va_list ap;
    122 
    123 	if (noeol)
    124 		fprintf(stdout, "\n");
    125 	fflush(stdout);
    126 	fprintf(stderr, "unzip: ");
    127 	va_start(ap, fmt);
    128 	vfprintf(stderr, fmt, ap);
    129 	va_end(ap);
    130 	fprintf(stderr, "\n");
    131 	exit(1);
    132 }
    133 
    134 /* non-fatal error message + errno */
    135 __printflike(1, 2) static void
    136 warning(const char *fmt, ...)
    137 {
    138 	va_list ap;
    139 
    140 	if (noeol)
    141 		fprintf(stdout, "\n");
    142 	fflush(stdout);
    143 	fprintf(stderr, "unzip: ");
    144 	va_start(ap, fmt);
    145 	vfprintf(stderr, fmt, ap);
    146 	va_end(ap);
    147 	fprintf(stderr, ": %s\n", strerror(errno));
    148 }
    149 
    150 /* non-fatal error message, no errno */
    151 __printflike(1, 2) static void
    152 warningx(const char *fmt, ...)
    153 {
    154 	va_list ap;
    155 
    156 	if (noeol)
    157 		fprintf(stdout, "\n");
    158 	fflush(stdout);
    159 	fprintf(stderr, "unzip: ");
    160 	va_start(ap, fmt);
    161 	vfprintf(stderr, fmt, ap);
    162 	va_end(ap);
    163 	fprintf(stderr, "\n");
    164 }
    165 
    166 /* informational message (if not -q) */
    167 __printflike(1, 2) static void
    168 info(const char *fmt, ...)
    169 {
    170 	va_list ap;
    171 
    172 	if (q_opt && !unzip_debug)
    173 		return;
    174 	va_start(ap, fmt);
    175 	vfprintf(stdout, fmt, ap);
    176 	va_end(ap);
    177 	fflush(stdout);
    178 
    179 	if (*fmt == '\0')
    180 		noeol = 1;
    181 	else
    182 		noeol = fmt[strlen(fmt) - 1] != '\n';
    183 }
    184 
    185 /* debug message (if unzip_debug) */
    186 __printflike(1, 2) static void
    187 debug(const char *fmt, ...)
    188 {
    189 	va_list ap;
    190 
    191 	if (!unzip_debug)
    192 		return;
    193 	va_start(ap, fmt);
    194 	vfprintf(stderr, fmt, ap);
    195 	va_end(ap);
    196 	fflush(stderr);
    197 
    198 	if (*fmt == '\0')
    199 		noeol = 1;
    200 	else
    201 		noeol = fmt[strlen(fmt) - 1] != '\n';
    202 }
    203 
    204 /* duplicate a path name, possibly converting to lower case */
    205 static char *
    206 pathdup(const char *path)
    207 {
    208 	char *str;
    209 	size_t i, len;
    210 
    211 	len = strlen(path);
    212 	while (len && path[len - 1] == '/')
    213 		len--;
    214 	if ((str = malloc(len + 1)) == NULL) {
    215 		errno = ENOMEM;
    216 		error("malloc()");
    217 	}
    218 	if (L_opt) {
    219 		for (i = 0; i < len; ++i)
    220 			str[i] = tolower((unsigned char)path[i]);
    221 	} else {
    222 		memcpy(str, path, len);
    223 	}
    224 	str[len] = '\0';
    225 
    226 	return (str);
    227 }
    228 
    229 /* concatenate two path names */
    230 static char *
    231 pathcat(const char *prefix, const char *path)
    232 {
    233 	char *str;
    234 	size_t prelen, len;
    235 
    236 	prelen = prefix ? strlen(prefix) + 1 : 0;
    237 	len = strlen(path) + 1;
    238 	if ((str = malloc(prelen + len)) == NULL) {
    239 		errno = ENOMEM;
    240 		error("malloc()");
    241 	}
    242 	if (prefix) {
    243 		memcpy(str, prefix, prelen);	/* includes zero */
    244 		str[prelen - 1] = '/';		/* splat zero */
    245 	}
    246 	memcpy(str + prelen, path, len);	/* includes zero */
    247 
    248 	return (str);
    249 }
    250 
    251 /*
    252  * Pattern lists for include / exclude processing
    253  */
    254 struct pattern {
    255 	STAILQ_ENTRY(pattern) link;
    256 	char pattern[];
    257 };
    258 
    259 STAILQ_HEAD(pattern_list, pattern);
    260 static struct pattern_list include = STAILQ_HEAD_INITIALIZER(include);
    261 static struct pattern_list exclude = STAILQ_HEAD_INITIALIZER(exclude);
    262 
    263 /*
    264  * Add an entry to a pattern list
    265  */
    266 static void
    267 add_pattern(struct pattern_list *list, const char *pattern)
    268 {
    269 	struct pattern *entry;
    270 	size_t len;
    271 
    272 	debug("adding pattern '%s'\n", pattern);
    273 	len = strlen(pattern);
    274 	if ((entry = malloc(sizeof *entry + len + 1)) == NULL) {
    275 		errno = ENOMEM;
    276 		error("malloc()");
    277 	}
    278 	memcpy(entry->pattern, pattern, len + 1);
    279 	STAILQ_INSERT_TAIL(list, entry, link);
    280 }
    281 
    282 /*
    283  * Match a string against a list of patterns
    284  */
    285 static int
    286 match_pattern(struct pattern_list *list, const char *str)
    287 {
    288 	struct pattern *entry;
    289 
    290 	STAILQ_FOREACH(entry, list, link) {
    291 		if (fnmatch(entry->pattern, str, C_opt ? FNM_CASEFOLD : 0) == 0)
    292 			return (1);
    293 	}
    294 	return (0);
    295 }
    296 
    297 /*
    298  * Verify that a given pathname is in the include list and not in the
    299  * exclude list.
    300  */
    301 static int
    302 accept_pathname(const char *pathname)
    303 {
    304 
    305 	if (!STAILQ_EMPTY(&include) && !match_pattern(&include, pathname))
    306 		return (0);
    307 	if (!STAILQ_EMPTY(&exclude) && match_pattern(&exclude, pathname))
    308 		return (0);
    309 	return (1);
    310 }
    311 
    312 /*
    313  * Create the specified directory with the specified mode, taking certain
    314  * precautions on they way.
    315  */
    316 static void
    317 make_dir(const char *path, int mode)
    318 {
    319 	struct stat sb;
    320 
    321 	if (lstat(path, &sb) == 0) {
    322 		if (S_ISDIR(sb.st_mode))
    323 			return;
    324 		/*
    325 		 * Normally, we should either ask the user about removing
    326 		 * the non-directory of the same name as a directory we
    327 		 * wish to create, or respect the -n or -o command-line
    328 		 * options.  However, this may lead to a later failure or
    329 		 * even compromise (if this non-directory happens to be a
    330 		 * symlink to somewhere unsafe), so we don't.
    331 		 */
    332 
    333 		/*
    334 		 * Don't check unlink() result; failure will cause mkdir()
    335 		 * to fail later, which we will catch.
    336 		 */
    337 		(void)unlink(path);
    338 	}
    339 	if (mkdir(path, mode) != 0 && errno != EEXIST)
    340 		error("mkdir('%s')", path);
    341 }
    342 
    343 /*
    344  * Ensure that all directories leading up to (but not including) the
    345  * specified path exist.
    346  *
    347  * XXX inefficient + modifies the file in-place
    348  */
    349 static void
    350 make_parent(char *path)
    351 {
    352 	struct stat sb;
    353 	char *sep;
    354 
    355 	sep = strrchr(path, '/');
    356 	if (sep == NULL || sep == path)
    357 		return;
    358 	*sep = '\0';
    359 	if (lstat(path, &sb) == 0) {
    360 		if (S_ISDIR(sb.st_mode)) {
    361 			*sep = '/';
    362 			return;
    363 		}
    364 		unlink(path);
    365 	}
    366 	make_parent(path);
    367 	mkdir(path, 0755);
    368 	*sep = '/';
    369 
    370 #if 0
    371 	for (sep = path; (sep = strchr(sep, '/')) != NULL; sep++) {
    372 		/* root in case of absolute d_arg */
    373 		if (sep == path)
    374 			continue;
    375 		*sep = '\0';
    376 		make_dir(path, 0755);
    377 		*sep = '/';
    378 	}
    379 #endif
    380 }
    381 
    382 /*
    383  * Extract a directory.
    384  */
    385 static void
    386 extract_dir(struct archive *a, struct archive_entry *e, const char *path)
    387 {
    388 	int mode;
    389 
    390 	mode = archive_entry_mode(e) & 0777;
    391 	if (mode == 0)
    392 		mode = 0755;
    393 
    394 	/*
    395 	 * Some zipfiles contain directories with weird permissions such
    396 	 * as 0644 or 0444.  This can cause strange issues such as being
    397 	 * unable to extract files into the directory we just created, or
    398 	 * the user being unable to remove the directory later without
    399 	 * first manually changing its permissions.  Therefore, we whack
    400 	 * the permissions into shape, assuming that the user wants full
    401 	 * access and that anyone who gets read access also gets execute
    402 	 * access.
    403 	 */
    404 	mode |= 0700;
    405 	if (mode & 0040)
    406 		mode |= 0010;
    407 	if (mode & 0004)
    408 		mode |= 0001;
    409 
    410 	info("   creating: %s/\n", path);
    411 	make_dir(path, mode);
    412 	ac(archive_read_data_skip(a));
    413 }
    414 
    415 static unsigned char buffer[8192];
    416 static char spinner[] = { '|', '/', '-', '\\' };
    417 
    418 static int
    419 handle_existing_file(char **path)
    420 {
    421 	size_t alen;
    422 	ssize_t len;
    423 	char buf[4];
    424 
    425 	for (;;) {
    426 		fprintf(stderr,
    427 		    "replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ",
    428 		    *path);
    429 		fgets(buf, 4, stdin);
    430 		switch (*buf) {
    431 		case 'A':
    432 			o_opt = 1;
    433 			/* FALL THROUGH */
    434 		case 'y':
    435 		case 'Y':
    436 			(void)unlink(*path);
    437 			return 1;
    438 		case 'N':
    439 			n_opt = 1;
    440 			/* FALL THROUGH */
    441 		case 'n':
    442 			return -1;
    443 		case 'r':
    444 		case 'R':
    445 			printf("New name: ");
    446 			fflush(stdout);
    447 			free(*path);
    448 			*path = NULL;
    449 			alen = 0;
    450 			len = getline(path, &alen, stdin);
    451 			if ((*path)[len - 1] == '\n')
    452 				(*path)[len - 1] = '\0';
    453 			return 0;
    454 		default:
    455 			break;
    456 		}
    457 	}
    458 }
    459 
    460 /*
    461  * Detect binary files by a combination of character white list and
    462  * black list. NUL bytes and other control codes without use in text files
    463  * result directly in switching the file to binary mode. Otherwise, at least
    464  * one white-listed byte has to be found.
    465  *
    466  * Black-listed: 0..6, 14..25, 28..31
    467  * White-listed: 9..10, 13, >= 32
    468  *
    469  * See the proginfo/txtvsbin.txt in the zip sources for a detailed discussion.
    470  */
    471 #define BYTE_IS_BINARY(x)	((x) < 32 && (0xf3ffc07fU & (1U << (x))))
    472 #define	BYTE_IS_TEXT(x)		((x) >= 32 || (0x00002600U & (1U << (x))))
    473 
    474 static int
    475 check_binary(const unsigned char *buf, size_t len)
    476 {
    477 	int rv;
    478 	for (rv = 1; len--; ++buf) {
    479 		if (BYTE_IS_BINARY(*buf))
    480 			return 1;
    481 		if (BYTE_IS_TEXT(*buf))
    482 			rv = 0;
    483 	}
    484 
    485 	return rv;
    486 }
    487 
    488 /*
    489  * Extract a regular file.
    490  */
    491 static void
    492 extract_file(struct archive *a, struct archive_entry *e, char **path)
    493 {
    494 	int mode;
    495 	time_t mtime;
    496 	struct stat sb;
    497 	struct timeval tv[2];
    498 	int cr, fd, text, warn, check;
    499 	ssize_t len;
    500 	unsigned char *p, *q, *end;
    501 	const char *linkname;
    502 
    503 	mode = archive_entry_mode(e) & 0777;
    504 	if (mode == 0)
    505 		mode = 0644;
    506 	mtime = archive_entry_mtime(e);
    507 
    508 	/* look for existing file of same name */
    509 recheck:
    510 	if (lstat(*path, &sb) == 0) {
    511 		if (u_opt || f_opt) {
    512 			/* check if up-to-date */
    513 			if (S_ISREG(sb.st_mode) && sb.st_mtime >= mtime)
    514 				return;
    515 			(void)unlink(*path);
    516 		} else if (o_opt) {
    517 			/* overwrite */
    518 			(void)unlink(*path);
    519 		} else if (n_opt) {
    520 			/* do not overwrite */
    521 			return;
    522 		} else {
    523 			check = handle_existing_file(path);
    524 			if (check == 0)
    525 				goto recheck;
    526 			if (check == -1)
    527 				return; /* do not overwrite */
    528 		}
    529 	} else {
    530 		if (f_opt)
    531 			return;
    532 	}
    533 
    534 	/* process symlinks */
    535 	linkname = archive_entry_symlink(e);
    536 	if (linkname != NULL) {
    537 		if (symlink(linkname, *path) == -1)
    538 			error("symlink('%s', '%s')", linkname, *path);
    539 		info(" extracting: %s -> %s\n", *path, linkname);
    540 		if (lchmod(*path, mode) == -1)
    541 			warning("Cannot set mode for '%s'", *path);
    542 		tv[0].tv_sec = now;
    543 		tv[0].tv_usec = 0;
    544 		tv[1].tv_sec = mtime;
    545 		tv[1].tv_usec = 0;
    546 		if (lutimes(*path, tv) == -1)
    547 			warning("utimes('%s')", *path);
    548 		return;
    549 	}
    550 
    551 	/* process hardlinks */
    552 	linkname = archive_entry_hardlink(e);
    553 	if (linkname != NULL) {
    554 		if (link(linkname, *path) == -1)
    555 			error("link('%s', '%s')", linkname, *path);
    556 		info(" extracting: %s link to %s\n", *path, linkname);
    557 		return;
    558 	}
    559 
    560 	if ((fd = open(*path, O_RDWR|O_CREAT|O_TRUNC, mode)) < 0)
    561 		error("open('%s')", *path);
    562 
    563 	/* loop over file contents and write to disk */
    564 	info(" extracting: %s", *path);
    565 	text = a_opt;
    566 	warn = 0;
    567 	cr = 0;
    568 	for (int n = 0; ; n++) {
    569 		if (tty && (n % 4) == 0)
    570 			info(" %c\b\b", spinner[(n / 4) % sizeof spinner]);
    571 
    572 		len = archive_read_data(a, buffer, sizeof buffer);
    573 
    574 		if (len < 0)
    575 			ac(len);
    576 
    577 		/* left over CR from previous buffer */
    578 		if (a_opt && cr) {
    579 			if (len == 0 || buffer[0] != '\n')
    580 				if (write(fd, "\r", 1) != 1)
    581 					error("write('%s')", *path);
    582 			cr = 0;
    583 		}
    584 
    585 		/* EOF */
    586 		if (len == 0)
    587 			break;
    588 		end = buffer + len;
    589 
    590 		/*
    591 		 * Detect whether this is a text file.  The correct way to
    592 		 * do this is to check the least significant bit of the
    593 		 * "internal file attributes" field of the corresponding
    594 		 * file header in the central directory, but libarchive
    595 		 * does not read the central directory, so we have to
    596 		 * guess by looking for non-ASCII characters in the
    597 		 * buffer.  Hopefully we won't guess wrong.  If we do
    598 		 * guess wrong, we print a warning message later.
    599 		 */
    600 		if (a_opt && n == 0) {
    601 			if (check_binary(buffer, len))
    602 				text = 0;
    603 		}
    604 
    605 		/* simple case */
    606 		if (!a_opt || !text) {
    607 			if (write(fd, buffer, len) != len)
    608 				error("write('%s')", *path);
    609 			continue;
    610 		}
    611 
    612 		/* hard case: convert \r\n to \n (sigh...) */
    613 		for (p = buffer; p < end; p = q + 1) {
    614 			for (q = p; q < end; q++) {
    615 				if (!warn && BYTE_IS_BINARY(*q)) {
    616 					warningx("%s may be corrupted due"
    617 					    " to weak text file detection"
    618 					    " heuristic", *path);
    619 					warn = 1;
    620 				}
    621 				if (q[0] != '\r')
    622 					continue;
    623 				if (&q[1] == end) {
    624 					cr = 1;
    625 					break;
    626 				}
    627 				if (q[1] == '\n')
    628 					break;
    629 			}
    630 			if (write(fd, p, q - p) != q - p)
    631 				error("write('%s')", *path);
    632 		}
    633 	}
    634 	if (tty)
    635 		info("  \b\b");
    636 	if (text)
    637 		info(" (text)");
    638 	info("\n");
    639 
    640 	/* set access and modification time */
    641 	tv[0].tv_sec = now;
    642 	tv[0].tv_usec = 0;
    643 	tv[1].tv_sec = mtime;
    644 	tv[1].tv_usec = 0;
    645 	if (futimes(fd, tv) != 0)
    646 		error("utimes('%s')", *path);
    647 	if (close(fd) != 0)
    648 		error("close('%s')", *path);
    649 }
    650 
    651 /*
    652  * Extract a zipfile entry: first perform some sanity checks to ensure
    653  * that it is either a directory or a regular file and that the path is
    654  * not absolute and does not try to break out of the current directory;
    655  * then call either extract_dir() or extract_file() as appropriate.
    656  *
    657  * This is complicated a bit by the various ways in which we need to
    658  * manipulate the path name.  Case conversion (if requested by the -L
    659  * option) happens first, but the include / exclude patterns are applied
    660  * to the full converted path name, before the directory part of the path
    661  * is removed in accordance with the -j option.  Sanity checks are
    662  * intentionally done earlier than they need to be, so the user will get a
    663  * warning about insecure paths even for files or directories which
    664  * wouldn't be extracted anyway.
    665  */
    666 static void
    667 extract(struct archive *a, struct archive_entry *e)
    668 {
    669 	char *pathname, *realpathname;
    670 	mode_t filetype;
    671 	char *p, *q;
    672 
    673 	pathname = pathdup(archive_entry_pathname(e));
    674 	filetype = archive_entry_filetype(e);
    675 
    676 	/* sanity checks */
    677 	if (pathname[0] == '/' ||
    678 	    strncmp(pathname, "../", 3) == 0 ||
    679 	    strstr(pathname, "/../") != NULL) {
    680 		warningx("skipping insecure entry '%s'", pathname);
    681 		ac(archive_read_data_skip(a));
    682 		free(pathname);
    683 		return;
    684 	}
    685 
    686 	/* I don't think this can happen in a zipfile.. */
    687 	if (!S_ISDIR(filetype) && !S_ISREG(filetype) && !S_ISLNK(filetype)) {
    688 		warningx("skipping non-regular entry '%s'", pathname);
    689 		ac(archive_read_data_skip(a));
    690 		free(pathname);
    691 		return;
    692 	}
    693 
    694 	/* skip directories in -j case */
    695 	if (S_ISDIR(filetype) && j_opt) {
    696 		ac(archive_read_data_skip(a));
    697 		free(pathname);
    698 		return;
    699 	}
    700 
    701 	/* apply include / exclude patterns */
    702 	if (!accept_pathname(pathname)) {
    703 		ac(archive_read_data_skip(a));
    704 		free(pathname);
    705 		return;
    706 	}
    707 
    708 	/* apply -j and -d */
    709 	if (j_opt) {
    710 		for (p = q = pathname; *p; ++p)
    711 			if (*p == '/')
    712 				q = p + 1;
    713 		realpathname = pathcat(d_arg, q);
    714 	} else {
    715 		realpathname = pathcat(d_arg, pathname);
    716 	}
    717 
    718 	/* ensure that parent directory exists */
    719 	make_parent(realpathname);
    720 
    721 	if (S_ISDIR(filetype))
    722 		extract_dir(a, e, realpathname);
    723 	else
    724 		extract_file(a, e, &realpathname);
    725 
    726 	free(realpathname);
    727 	free(pathname);
    728 }
    729 
    730 static void
    731 extract_stdout(struct archive *a, struct archive_entry *e)
    732 {
    733 	char *pathname;
    734 	mode_t filetype;
    735 	int cr, text, warn;
    736 	ssize_t len;
    737 	unsigned char *p, *q, *end;
    738 
    739 	pathname = pathdup(archive_entry_pathname(e));
    740 	filetype = archive_entry_filetype(e);
    741 
    742 	/* I don't think this can happen in a zipfile.. */
    743 	if (!S_ISDIR(filetype) && !S_ISREG(filetype) && !S_ISLNK(filetype)) {
    744 		warningx("skipping non-regular entry '%s'", pathname);
    745 		ac(archive_read_data_skip(a));
    746 		free(pathname);
    747 		return;
    748 	}
    749 
    750 	/* skip directories in -j case */
    751 	if (S_ISDIR(filetype)) {
    752 		ac(archive_read_data_skip(a));
    753 		free(pathname);
    754 		return;
    755 	}
    756 
    757 	/* apply include / exclude patterns */
    758 	if (!accept_pathname(pathname)) {
    759 		ac(archive_read_data_skip(a));
    760 		free(pathname);
    761 		return;
    762 	}
    763 
    764 	if (c_opt)
    765 		info("x %s\n", pathname);
    766 
    767 	text = a_opt;
    768 	warn = 0;
    769 	cr = 0;
    770 	for (int n = 0; ; n++) {
    771 		len = archive_read_data(a, buffer, sizeof buffer);
    772 
    773 		if (len < 0)
    774 			ac(len);
    775 
    776 		/* left over CR from previous buffer */
    777 		if (a_opt && cr) {
    778 			if (len == 0 || buffer[0] != '\n') {
    779 				if (fwrite("\r", 1, 1, stderr) != 1)
    780 					error("write('%s')", pathname);
    781 			}
    782 			cr = 0;
    783 		}
    784 
    785 		/* EOF */
    786 		if (len == 0)
    787 			break;
    788 		end = buffer + len;
    789 
    790 		/*
    791 		 * Detect whether this is a text file.  The correct way to
    792 		 * do this is to check the least significant bit of the
    793 		 * "internal file attributes" field of the corresponding
    794 		 * file header in the central directory, but libarchive
    795 		 * does not read the central directory, so we have to
    796 		 * guess by looking for non-ASCII characters in the
    797 		 * buffer.  Hopefully we won't guess wrong.  If we do
    798 		 * guess wrong, we print a warning message later.
    799 		 */
    800 		if (a_opt && n == 0) {
    801 			for (p = buffer; p < end; ++p) {
    802 				if (!isascii((unsigned char)*p)) {
    803 					text = 0;
    804 					break;
    805 				}
    806 			}
    807 		}
    808 
    809 		/* simple case */
    810 		if (!a_opt || !text) {
    811 			if (fwrite(buffer, 1, len, stdout) != (size_t)len)
    812 				error("write('%s')", pathname);
    813 			continue;
    814 		}
    815 
    816 		/* hard case: convert \r\n to \n (sigh...) */
    817 		for (p = buffer; p < end; p = q + 1) {
    818 			for (q = p; q < end; q++) {
    819 				if (!warn && !isascii(*q)) {
    820 					warningx("%s may be corrupted due"
    821 					    " to weak text file detection"
    822 					    " heuristic", pathname);
    823 					warn = 1;
    824 				}
    825 				if (q[0] != '\r')
    826 					continue;
    827 				if (&q[1] == end) {
    828 					cr = 1;
    829 					break;
    830 				}
    831 				if (q[1] == '\n')
    832 					break;
    833 			}
    834 			if (fwrite(p, 1, q - p, stdout) != (size_t)(q - p))
    835 				error("write('%s')", pathname);
    836 		}
    837 	}
    838 
    839 	free(pathname);
    840 }
    841 
    842 /*
    843  * Print the name of an entry to stdout.
    844  */
    845 static void
    846 list(struct archive *a, struct archive_entry *e)
    847 {
    848 	char buf[20];
    849 	time_t mtime;
    850 	struct tm *tm;
    851 
    852 	mtime = archive_entry_mtime(e);
    853 	tm = localtime(&mtime);
    854 	if (*y_str)
    855 		strftime(buf, sizeof(buf), "%m-%d-%G %R", tm);
    856 	else
    857 		strftime(buf, sizeof(buf), "%m-%d-%g %R", tm);
    858 
    859 	if (v_opt == 1) {
    860 		printf(" %8ju  %s   %s\n",
    861 		    (uintmax_t)archive_entry_size(e),
    862 		    buf, archive_entry_pathname(e));
    863 	} else if (v_opt == 2) {
    864 		printf("%8ju  Stored  %7ju   0%%  %s  %08x  %s\n",
    865 		    (uintmax_t)archive_entry_size(e),
    866 		    (uintmax_t)archive_entry_size(e),
    867 		    buf,
    868 		    0U,
    869 		    archive_entry_pathname(e));
    870 	}
    871 	ac(archive_read_data_skip(a));
    872 }
    873 
    874 /*
    875  * Extract to memory to check CRC
    876  */
    877 static int
    878 test(struct archive *a, struct archive_entry *e)
    879 {
    880 	ssize_t len;
    881 	int error_count;
    882 
    883 	error_count = 0;
    884 	if (S_ISDIR(archive_entry_filetype(e)))
    885 		return 0;
    886 
    887 	info("    testing: %s\t", archive_entry_pathname(e));
    888 	while ((len = archive_read_data(a, buffer, sizeof buffer)) > 0)
    889 		/* nothing */;
    890 	if (len < 0) {
    891 		info(" %s\n", archive_error_string(a));
    892 		++error_count;
    893 	} else {
    894 		info(" OK\n");
    895 	}
    896 
    897 	/* shouldn't be necessary, but it doesn't hurt */
    898 	ac(archive_read_data_skip(a));
    899 
    900 	return error_count;
    901 }
    902 
    903 
    904 /*
    905  * Main loop: open the zipfile, iterate over its contents and decide what
    906  * to do with each entry.
    907  */
    908 static void
    909 unzip(const char *fn)
    910 {
    911 	struct archive *a;
    912 	struct archive_entry *e;
    913 	int fd, ret;
    914 	uintmax_t total_size, file_count, error_count;
    915 
    916 	if ((fd = open(fn, O_RDONLY)) < 0)
    917 		error("%s", fn);
    918 
    919 	a = archive_read_new();
    920 	ac(archive_read_support_format_zip(a));
    921 	ac(archive_read_open_fd(a, fd, 8192));
    922 
    923 	if (!q_opt && !p_opt)
    924 	    printf("Archive:  %s\n", fn);
    925 
    926 	if (v_opt == 1) {
    927 		printf("  Length     %sDate   Time    Name\n", y_str);
    928 		printf(" --------    %s----   ----    ----\n", y_str);
    929 	} else if (v_opt == 2) {
    930 		printf(" Length   Method    Size  Ratio   %sDate   Time   CRC-32    Name\n", y_str);
    931 		printf("--------  ------  ------- -----   %s----   ----   ------    ----\n", y_str);
    932 	}
    933 
    934 	total_size = 0;
    935 	file_count = 0;
    936 	error_count = 0;
    937 	for (;;) {
    938 		ret = archive_read_next_header(a, &e);
    939 		if (ret == ARCHIVE_EOF)
    940 			break;
    941 		ac(ret);
    942 		if (t_opt)
    943 			error_count += test(a, e);
    944 		else if (v_opt)
    945 			list(a, e);
    946 		else if (p_opt || c_opt)
    947 			extract_stdout(a, e);
    948 		else
    949 			extract(a, e);
    950 
    951 		total_size += archive_entry_size(e);
    952 		++file_count;
    953 	}
    954 
    955 	if (v_opt == 1) {
    956 		printf(" --------                   %s-------\n", y_str);
    957 		printf(" %8ju                   %s%ju file%s\n",
    958 		    total_size, y_str, file_count, file_count != 1 ? "s" : "");
    959 	} else if (v_opt == 2) {
    960 		printf("--------          -------  ---                            %s-------\n", y_str);
    961 		printf("%8ju          %7ju   0%%                            %s%ju file%s\n",
    962 		    total_size, total_size, y_str, file_count,
    963 		    file_count != 1 ? "s" : "");
    964 	}
    965 
    966 	ac(archive_read_close(a));
    967 	(void)archive_read_finish(a);
    968 
    969 	if (close(fd) != 0)
    970 		error("%s", fn);
    971 
    972 	if (t_opt) {
    973 		if (error_count > 0) {
    974 			errorx("%ju checksum error(s) found.", error_count);
    975 		}
    976 		else {
    977 			printf("No errors detected in compressed data of %s.\n",
    978 			       fn);
    979 		}
    980 	}
    981 }
    982 
    983 static void __dead
    984 usage(void)
    985 {
    986 
    987 	fprintf(stderr, "Usage: %s [-aCcfjLlnopqtuvy] [-d dir] [-x pattern] "
    988 	    "zipfile\n", getprogname());
    989 	exit(1);
    990 }
    991 
    992 static int
    993 getopts(int argc, char *argv[])
    994 {
    995 	int opt;
    996 
    997 	optreset = optind = 1;
    998 	while ((opt = getopt(argc, argv, "aCcd:fjLlnopqtuvyx:")) != -1)
    999 		switch (opt) {
   1000 		case 'a':
   1001 			a_opt = 1;
   1002 			break;
   1003 		case 'C':
   1004 			C_opt = 1;
   1005 			break;
   1006 		case 'c':
   1007 			c_opt = 1;
   1008 			break;
   1009 		case 'd':
   1010 			d_arg = optarg;
   1011 			break;
   1012 		case 'f':
   1013 			f_opt = 1;
   1014 			break;
   1015 		case 'j':
   1016 			j_opt = 1;
   1017 			break;
   1018 		case 'L':
   1019 			L_opt = 1;
   1020 			break;
   1021 		case 'l':
   1022 			if (v_opt == 0)
   1023 				v_opt = 1;
   1024 			break;
   1025 		case 'n':
   1026 			n_opt = 1;
   1027 			break;
   1028 		case 'o':
   1029 			o_opt = 1;
   1030 			q_opt = 1;
   1031 			break;
   1032 		case 'p':
   1033 			p_opt = 1;
   1034 			break;
   1035 		case 'q':
   1036 			q_opt = 1;
   1037 			break;
   1038 		case 't':
   1039 			t_opt = 1;
   1040 			break;
   1041 		case 'u':
   1042 			u_opt = 1;
   1043 			break;
   1044 		case 'v':
   1045 			v_opt = 2;
   1046 			break;
   1047 		case 'x':
   1048 			add_pattern(&exclude, optarg);
   1049 			break;
   1050 		case 'y':
   1051 			y_str = "  ";
   1052 			break;
   1053 		default:
   1054 			usage();
   1055 		}
   1056 
   1057 	return (optind);
   1058 }
   1059 
   1060 int
   1061 main(int argc, char *argv[])
   1062 {
   1063 	const char *zipfile;
   1064 	int nopts;
   1065 
   1066 	if (isatty(STDOUT_FILENO))
   1067 		tty = 1;
   1068 
   1069 	if (getenv("UNZIP_DEBUG") != NULL)
   1070 		unzip_debug = 1;
   1071 	for (int i = 0; i < argc; ++i)
   1072 		debug("%s%c", argv[i], (i < argc - 1) ? ' ' : '\n');
   1073 
   1074 	/*
   1075 	 * Info-ZIP's unzip(1) expects certain options to come before the
   1076 	 * zipfile name, and others to come after - though it does not
   1077 	 * enforce this.  For simplicity, we accept *all* options both
   1078 	 * before and after the zipfile name.
   1079 	 */
   1080 	nopts = getopts(argc, argv);
   1081 
   1082 	if (argc <= nopts)
   1083 		usage();
   1084 	zipfile = argv[nopts++];
   1085 
   1086 	while (nopts < argc && *argv[nopts] != '-')
   1087 		add_pattern(&include, argv[nopts++]);
   1088 
   1089 	nopts--; /* fake argv[0] */
   1090 	nopts += getopts(argc - nopts, argv + nopts);
   1091 
   1092 	if (n_opt + o_opt + u_opt > 1)
   1093 		errorx("-n, -o and -u are contradictory");
   1094 
   1095 	time(&now);
   1096 
   1097 	unzip(zipfile);
   1098 
   1099 	exit(0);
   1100 }
   1101