gzip.c revision 1.3 1 /* $NetBSD: gzip.c,v 1.3 2003/12/23 08:11:58 jdolecek Exp $ */
2
3 /*
4 * Copyright (c) 1997, 1998, 2003 Matthew R. Green
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
25 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 /*
32 * gzip.c -- GPL free gzip using zlib.
33 *
34 * Very minor portions of this code are (very loosely) derived from
35 * the minigzip.c in the zlib distribution.
36 */
37
38 #include <sys/param.h>
39 #include <sys/stat.h>
40 #include <sys/time.h>
41
42 #include <unistd.h>
43 #include <stdio.h>
44 #include <string.h>
45 #include <stdlib.h>
46 #include <err.h>
47 #include <errno.h>
48 #include <fcntl.h>
49 #include <zlib.h>
50 #include <fts.h>
51 #include <libgen.h>
52 #include <stdarg.h>
53 #include <getopt.h>
54
55 #ifndef GZ_SUFFIX
56 # define GZ_SUFFIX ".gz"
57 #endif
58
59 #define SUFFIX_LEN sizeof(GZ_SUFFIX)
60
61 #define BUFLEN 4096
62
63 #define ORIG_NAME 0x08
64
65 /* Define this if you have the NetBSD gzopenfull(3) extension to zlib(3) */
66 #define HAVE_ZLIB_GZOPENFULL 0
67
68 static const char gzip_version[] = "NetBSD gzip 2.0";
69
70 static char gzipflags[3]; /* `w' or `r', possible with [1-9] */
71 static int cflag; /* stdout mode */
72 static int dflag; /* decompress mode */
73 static int fflag; /* force mode */
74 static int nflag; /* don't save name/timestamp */
75 static int Nflag; /* don't restore name/timestamp */
76 static int qflag; /* quiet mode */
77 static int rflag; /* recursive mode */
78 static int tflag; /* test */
79 static int vflag; /* verbose mode */
80 static const char *Sflag = GZ_SUFFIX; /* suffix (.gz) */
81
82 static int suffix_len = SUFFIX_LEN; /* length of suffix; includes nul */
83 static char *newfile; /* name of newly created file */
84 static char *infile; /* name of file coming in */
85
86 static void maybe_err(int rv, const char *fmt, ...);
87 static void maybe_warn(const char *fmt, ...);
88 static void maybe_warnx(const char *fmt, ...);
89 static void usage(void);
90 static void display_version(void);
91 static void gz_compress(FILE *, gzFile);
92 static off_t gz_uncompress(gzFile, FILE *);
93 static void copymodes(const char *, struct stat *);
94 static ssize_t file_compress(char *);
95 static ssize_t file_uncompress(char *);
96 static void handle_pathname(char *);
97 static void handle_file(char *, struct stat *);
98 static void handle_dir(char *, struct stat *);
99 static void handle_stdin(void);
100 static void handle_stdout(void);
101 static void print_verbage(char *, char *, ssize_t, ssize_t);
102 static void print_test(char *, int);
103
104 int main(int, char *p[]);
105
106 static const struct option longopts[] = {
107 { "stdout", no_argument, 0, 'c' },
108 { "to-stdout", no_argument, 0, 'c' },
109 { "decompress", no_argument, 0, 'd' },
110 { "uncompress", no_argument, 0, 'd' },
111 { "force", no_argument, 0, 'f' },
112 { "help", no_argument, 0, 'h' },
113 { "no-name", no_argument, 0, 'n' },
114 { "name", no_argument, 0, 'N' },
115 { "quiet", no_argument, 0, 'q' },
116 { "recursive", no_argument, 0, 'r' },
117 { "suffix", required_argument, 0, 'S' },
118 { "test", no_argument, 0, 't' },
119 { "verbose", no_argument, 0, 'v' },
120 { "version", no_argument, 0, 'V' },
121 { "fast", no_argument, 0, '1' },
122 { "best", no_argument, 0, '9' },
123 #if 0
124 /*
125 * This is what else GNU gzip implements. Maybe --list is
126 * useful, but --ascii isn't useful on NetBSD, and I don't
127 * care to have a --license.
128 */
129 { "ascii", no_argument, 0, 'a' },
130 { "list", no_argument, 0, 'l' },
131 { "license", no_argument, 0, 'L' },
132 #endif
133 };
134
135 int
136 main(int argc, char **argv)
137 {
138 const char *progname = getprogname();
139 int ch;
140
141 gzipflags[0] = 'w';
142 gzipflags[1] = '\0';
143
144 /*
145 * XXX
146 * handle being called `gunzip', `zcat' and `gzcat'
147 */
148 if (strcmp(progname, "gunzip") == 0)
149 dflag = 1;
150 else if (strcmp(progname, "zcat") == 0 ||
151 strcmp(progname, "gzcat") == 0)
152 dflag = cflag = 1;
153
154 while ((ch = getopt_long(argc, argv, "cdfhHlnNqrS:tvV123456789",
155 longopts, NULL)) != -1)
156 switch (ch) {
157 case 'c':
158 cflag = 1;
159 break;
160 case 'd':
161 dflag = 1;
162 break;
163 case 'f':
164 fflag = 1;
165 break;
166 case 'h':
167 case 'H':
168 usage();
169 /* NOTREACHED */
170 case 'n':
171 nflag = 1;
172 Nflag = 0;
173 break;
174 case 'N':
175 nflag = 0;
176 Nflag = 1;
177 break;
178 case 'q':
179 qflag = 1;
180 break;
181 case 'r':
182 rflag = 1;
183 break;
184 case 'S':
185 Sflag = optarg;
186 suffix_len = strlen(Sflag) + 1;
187 break;
188 case 't':
189 cflag = 1;
190 tflag = 1;
191 dflag = 1;
192 break;
193 case 'v':
194 vflag = 1;
195 break;
196 case 'V':
197 display_version();
198 /* NOTREACHED */
199 case '1': case '2': case '3':
200 case '4': case '5': case '6':
201 case '7': case '8': case '9':
202 gzipflags[1] = (char)ch;
203 gzipflags[2] = '\0';
204 break;
205 }
206 argv += optind;
207 argc -= optind;
208 if (dflag)
209 gzipflags[0] = 'r';
210
211 if (argc == 0) {
212 if (dflag) /* stdin mode */
213 handle_stdin();
214 else /* stdout mode */
215 handle_stdout();
216 } else {
217 do {
218 handle_pathname(argv[0]);
219 } while (argv++, --argc);
220 }
221 exit(0);
222 }
223
224 /* maybe print a warning */
225 void
226 maybe_warn(const char *fmt, ...)
227 {
228 va_list ap;
229
230 if (qflag == 0) {
231 va_start(ap, fmt);
232 vwarn(fmt, ap);
233 va_end(ap);
234 }
235 }
236
237 void
238 maybe_warnx(const char *fmt, ...)
239 {
240 va_list ap;
241
242 if (qflag == 0) {
243 va_start(ap, fmt);
244 vwarnx(fmt, ap);
245 va_end(ap);
246 }
247 }
248
249 /* maybe print a warning */
250 void
251 maybe_err(int rv, const char *fmt, ...)
252 {
253 va_list ap;
254
255 if (qflag == 0) {
256 va_start(ap, fmt);
257 vwarn(fmt, ap);
258 va_end(ap);
259 }
260 exit(rv);
261 }
262
263 /* compress input to output then close both files */
264 static void
265 gz_compress(FILE *in, gzFile out)
266 {
267 char buf[BUFLEN];
268 ssize_t len;
269 int i;
270
271 for (;;) {
272 len = fread(buf, 1, sizeof(buf), in);
273 if (ferror(in))
274 maybe_err(1, "fread");
275 if (len == 0)
276 break;
277
278 if ((ssize_t)gzwrite(out, buf, len) != len)
279 maybe_err(1, gzerror(out, &i));
280 }
281 if (fclose(in) < 0)
282 maybe_err(1, "failed fclose");
283 if (gzclose(out) != Z_OK)
284 maybe_err(1, "failed gzclose");
285 }
286
287 /* uncompress input to output then close the input */
288 static off_t
289 gz_uncompress(gzFile in, FILE *out)
290 {
291 char buf[BUFLEN];
292 off_t size;
293 ssize_t len;
294 int i;
295
296 for (size = 0;;) {
297 len = gzread(in, buf, sizeof(buf));
298
299 if (len < 0) {
300 if (tflag) {
301 print_test(infile, 0);
302 return (0);
303 } else
304 maybe_err(1, gzerror(in, &i));
305 } else if (len == 0) {
306 if (tflag)
307 print_test(infile, 1);
308 break;
309 }
310
311 size += len;
312
313 /* don't write anything with -t */
314 if (tflag)
315 continue;
316
317 if (fwrite(buf, 1, (unsigned)len, out) != (ssize_t)len)
318 maybe_err(1, "failed fwrite");
319 }
320 if (gzclose(in) != Z_OK)
321 maybe_err(1, "failed gzclose");
322
323 return (size);
324 }
325
326 /*
327 * set the owner, mode, flags & utimes for a file
328 */
329 static void
330 copymodes(const char *file, struct stat *sbp)
331 {
332 struct timeval times[2];
333
334 /*
335 * If we have no info on the input, give this file some
336 * default values and return..
337 */
338 if (sbp == NULL) {
339 mode_t mask = umask(022);
340
341 (void)chmod(file, DEFFILEMODE & ~mask);
342 (void)umask(mask);
343 return;
344 }
345
346 /* if the chown fails, remove set-id bits as-per compress(1) */
347 if (chown(file, sbp->st_uid, sbp->st_gid) < 0) {
348 if (errno != EPERM)
349 maybe_warn("couldn't chown: %s", file);
350 sbp->st_mode &= ~(S_ISUID|S_ISGID);
351 }
352
353 /* we only allow set-id and the 9 normal permission bits */
354 sbp->st_mode &= S_ISUID|S_ISGID|S_IRWXU|S_IRWXG|S_IRWXO;
355 if (chmod(file, sbp->st_mode) < 0)
356 maybe_warn("couldn't chmod: %s", file);
357
358 /* only try flags if they exist already */
359 if (sbp->st_flags != 0 && chflags(file, sbp->st_flags) < 0)
360 maybe_warn("couldn't chflags: %s", file);
361
362 TIMESPEC_TO_TIMEVAL(×[0], &sbp->st_atimespec);
363 TIMESPEC_TO_TIMEVAL(×[1], &sbp->st_mtimespec);
364 if (utimes(file, times) < 0)
365 maybe_warn("couldn't utimes: %s", file);
366 }
367
368 /*
369 * compress the given file: create a corresponding .gz file and remove the
370 * original.
371 */
372 static ssize_t
373 file_compress(char *file)
374 {
375 char outfile[MAXPATHLEN];
376 FILE *in;
377 gzFile out;
378 struct stat isb, osb;
379 ssize_t size;
380 u_int32_t mtime = 0;
381
382 if (cflag == 0) {
383 (void)strncpy(outfile, file, MAXPATHLEN - suffix_len);
384 outfile[MAXPATHLEN - suffix_len] = '\0';
385 (void)strlcat(outfile, Sflag, sizeof(outfile));
386
387 if (fflag == 0) {
388 if (stat(outfile, &osb) == 0) {
389 maybe_warnx("%s already exists -- skipping",
390 outfile);
391 goto lose;
392 }
393 }
394 if (stat(file, &isb) == 0) {
395 if (isb.st_nlink > 1) {
396 maybe_warnx("%s has %d other link%s -- "
397 "skipping", file, isb.st_nlink-1,
398 isb.st_nlink == 1 ? "" : "s");
399 goto lose;
400 }
401 if (nflag == 0)
402 mtime = (u_int32_t)isb.st_mtime;
403 }
404 }
405 in = fopen(file, "r");
406 if (in == 0)
407 maybe_err(1, "can't fopen %s", file);
408
409 if (cflag == 0) {
410 #if HAVE_ZLIB_GZOPENFULL
411 char *savename;
412
413 if (nflag == 0)
414 savename = basename(file);
415 else
416 savename = NULL;
417 out = gzopenfull(outfile, gzipflags, savename, mtime);
418 #else
419 out = gzopen(outfile, gzipflags);
420 #endif
421 } else
422 out = gzdopen(STDOUT_FILENO, gzipflags);
423
424 if (out == 0)
425 maybe_err(1, "can't gz%sopen %s",
426 cflag ? "d" : "",
427 cflag ? "stdout" : outfile);
428
429 gz_compress(in, out);
430
431 /*
432 * if we compressed to stdout, we don't know the size and
433 * we don't know the new file name, punt. if we can't stat
434 * the file, whine, otherwise set the size from the stat
435 * buffer. we only blow away the file if we can stat the
436 * output, just in case.
437 */
438 if (cflag == 0) {
439 if (stat(outfile, &osb) < 0) {
440 maybe_warn("couldn't stat: %s", outfile);
441 maybe_warnx("leaving original %s", file);
442 size = 0;
443 } else {
444 unlink(file);
445 size = osb.st_size;
446 }
447 newfile = outfile;
448 copymodes(outfile, &isb);
449 } else {
450 lose:
451 size = 0;
452 newfile = 0;
453 }
454
455 return (size);
456 }
457
458 /* uncompress the given file and remove the original */
459 static ssize_t
460 file_uncompress(char *file)
461 {
462 struct stat isb, osb;
463 char buf[MAXPATHLEN];
464 char *outfile = buf, *s;
465 FILE *out;
466 gzFile in;
467 off_t size;
468 ssize_t len = strlen(file);
469
470 if (cflag == 0) {
471 s = &file[len - suffix_len + 1];
472 if (strncmp(s, Sflag, suffix_len) == 0) {
473 (void)strncpy(outfile, file, len - suffix_len + 1);
474 outfile[len - suffix_len + 1] = '\0';
475 } else
476 maybe_err(1, "unknown suffix %s", s);
477
478 /* gather the old name info */
479 if (Nflag) {
480 int fd;
481 char header1[10], name[PATH_MAX + 1];
482
483 fd = open(file, O_RDONLY);
484 if (fd < 0)
485 maybe_err(1, "can't open %s", file);
486 if (read(fd, header1, 10) != 10)
487 maybe_err(1, "can't read %s", file);
488
489 if (header1[3] & ORIG_NAME) {
490 size_t rbytes;
491 int i;
492
493 rbytes = read(fd, name, PATH_MAX + 1);
494 if (rbytes < 0)
495 maybe_err(1, "can't read %s", file);
496 for (i = 0; i < rbytes && name[i]; i++)
497 ;
498 if (i < rbytes) {
499 name[i] = 0;
500 /* now maybe merge old dirname */
501 if (strchr(outfile, '/') == 0)
502 outfile = name;
503 else {
504 char *dir = dirname(outfile);
505 if (asprintf(&outfile, "%s/%s",
506 dir, name) == -1)
507 maybe_err(1, "malloc");
508 }
509 }
510 }
511 close(fd);
512 }
513
514 if (fflag == 0) {
515 if (stat(outfile, &isb) == 0) {
516 maybe_warnx("%s already exists -- skipping",
517 outfile);
518 goto lose;
519 }
520 if (stat(file, &isb) == 0 && isb.st_nlink > 1) {
521 maybe_warnx("%s has %d other link%s -- "
522 "skipping", file, isb.st_nlink-1,
523 isb.st_nlink == 1 ? "" : "s");
524 goto lose;
525 }
526 }
527 }
528 in = gzopen(file, gzipflags);
529 if (in == NULL)
530 maybe_err(1, "can't gzopen %s", file);
531
532 if (cflag == 0) {
533 int fd;
534
535 /* Use open(2) directly to get a safe file. */
536 fd = open(outfile, O_WRONLY|O_CREAT|O_EXCL, 0600);
537 if (fd < 0)
538 maybe_err(1, "can't open %s", outfile);
539 out = fdopen(fd, "w");
540 if (out == NULL)
541 maybe_err(1, "can't fdopen %s", outfile);
542 } else
543 out = stdout;
544
545 if ((size = gz_uncompress(in, out)) == 0)
546 goto lose;
547
548 /* if testing, or we uncompressed to stdout, this is all we need */
549 if (tflag || cflag)
550 return (size);
551
552 /*
553 * if we create a file...
554 */
555 if (cflag == 0) {
556 /* close the file */
557 if (fclose(out))
558 maybe_err(1, "failed fclose");
559
560 /*
561 * if we can't stat the file, or we are uncompressing to
562 * stdin, don't remove the file.
563 */
564 if (stat(outfile, &osb) < 0) {
565 maybe_warn("couldn't stat (leaving original): %s",
566 outfile);
567 goto lose;
568 }
569 if (osb.st_size != size) {
570 maybe_warn("stat gave different size: %llu != %llu "
571 "(leaving original)",
572 (unsigned long long)size,
573 (unsigned long long)osb.st_size);
574 goto lose;
575 }
576 newfile = outfile;
577 unlink(file);
578 size = osb.st_size;
579 copymodes(outfile, &isb);
580 }
581 return (size);
582
583 lose:
584 newfile = 0;
585 return (0);
586 }
587
588 static void
589 handle_stdin(void)
590 {
591 gzFile *file;
592
593 if (fflag == 0 && isatty(STDIN_FILENO)) {
594 maybe_warnx("standard input is a terminal -- ignoring");
595 return;
596 }
597 file = gzdopen(STDIN_FILENO, gzipflags);
598 if (file == NULL)
599 maybe_err(1, "can't gzdopen stdin");
600 gz_uncompress(file, stdout);
601 }
602
603 static void
604 handle_stdout(void)
605 {
606 gzFile *file;
607
608 if (fflag == 0 && isatty(STDOUT_FILENO)) {
609 maybe_warnx("standard output is a terminal -- ignoring");
610 return;
611 }
612 file = gzdopen(STDOUT_FILENO, gzipflags);
613 if (file == NULL)
614 maybe_err(1, "can't gzdopen stdout");
615 gz_compress(stdin, file);
616 }
617
618 /* do what is asked for, for the path name */
619 static void
620 handle_pathname(char *path)
621 {
622 char *opath = path, *s = 0;
623 ssize_t len;
624 struct stat sb;
625
626 /* check for stdout/stdin */
627 if (path[0] == '-' && path[1] == '\0') {
628 if (dflag)
629 handle_stdin();
630 else
631 handle_stdout();
632 }
633
634 retry:
635 if (stat(path, &sb) < 0) {
636 /* lets try <path>.gz if we're decompressing */
637 if (dflag && s == 0 && errno == ENOENT) {
638 len = strlen(path);
639 s = malloc(len + suffix_len);
640 if (s == 0)
641 maybe_err(1, "malloc");
642 memmove(s, path, len);
643 memmove(&s[len], Sflag, suffix_len);
644 path = s;
645 goto retry;
646 }
647 maybe_warn("can't stat: %s", opath);
648 goto out;
649 }
650
651 if (S_ISDIR(sb.st_mode)) {
652 if (rflag)
653 handle_dir(path, &sb);
654 else
655 maybe_warn("%s is a directory", path);
656 goto out;
657 }
658
659 if (S_ISREG(sb.st_mode))
660 handle_file(path, &sb);
661
662 out:
663 if (s)
664 free(s);
665 return;
666 }
667
668 /* compress/decompress a file */
669 static void
670 handle_file(char *file, struct stat *sbp)
671 {
672 ssize_t usize, gsize;
673
674 infile = file;
675 if (dflag) {
676 usize = file_uncompress(file);
677 if (usize == 0)
678 return;
679 gsize = sbp->st_size;
680 } else {
681 gsize = file_compress(file);
682 if (gsize == 0)
683 return;
684 usize = sbp->st_size;
685 }
686
687 if (vflag && !tflag)
688 print_verbage(file, cflag == 0 ? newfile : 0, usize, gsize);
689 }
690
691 /* this is used with -r to recursively decend directories */
692 static void
693 handle_dir(char *dir, struct stat *sbp)
694 {
695 char *path_argv[2];
696 FTS *fts;
697 FTSENT *entry;
698
699 path_argv[0] = dir;
700 path_argv[1] = 0;
701 fts = fts_open(path_argv, FTS_PHYSICAL, NULL);
702 if (fts == NULL) {
703 warn("couldn't fts_open %s", dir);
704 return;
705 }
706
707 while ((entry = fts_read(fts))) {
708 switch(entry->fts_info) {
709 case FTS_D:
710 case FTS_DP:
711 continue;
712
713 case FTS_DNR:
714 case FTS_ERR:
715 case FTS_NS:
716 maybe_warn("%s", entry->fts_path);
717 continue;
718 case FTS_F:
719 handle_file(entry->fts_name, entry->fts_statp);
720 }
721 }
722 (void)fts_close(fts);
723 }
724
725 /* print compression statistics, and the new name (if there is one!) */
726 static void
727 print_verbage(char *file, char *newfile, ssize_t usize, ssize_t gsize)
728 {
729 float percent = 100.0 - (100.0 * gsize / usize);
730
731 fprintf(stderr, "%s:%s %4.1f%%", file,
732 strlen(file) < 7 ? "\t\t" : "\t", percent);
733 if (newfile)
734 fprintf(stderr, " -- replaced with %s", newfile);
735 fprintf(stderr, "\n");
736 fflush(stderr);
737 }
738
739 /* print test results */
740 static void
741 print_test(char *file, int ok)
742 {
743
744 fprintf(stderr, "%s:%s %s\n", file,
745 strlen(file) < 7 ? "\t\t" : "\t", ok ? "OK" : "NOT OK");
746 fflush(stderr);
747 }
748
749 /* display the usage of NetBSD gzip */
750 static void
751 usage(void)
752 {
753
754 fprintf(stderr, "%s\n", gzip_version);
755 fprintf(stderr,
756 "Usage: %s [-cdfhnNqrStvV123456789] [<file> [<file> ...]]\n"
757 " -c --stdout write to stdout, keep original files\n"
758 " --to-stdout\n"
759 " -d --decompress uncompress files\n"
760 " --uncompress\n"
761 " -f --force force overwriting & compress links\n"
762 " -h --help display this help\n"
763 " -n --no-name don't save original file name or time stamp\n"
764 " -N --name save or restore original file name and time stamp\n"
765 " -q --quiet output no warnings\n"
766 " -r --recursive recursively compress files in directories\n"
767 " -S .suf use suffix .suf instead of .gz\n"
768 " --suffix .suf\n"
769 " -t --test test compressed file\n"
770 " -v --verbose print extra statistics\n"
771 " -V --version display program version\n"
772 " -1 --fast fastest (worst) compression\n"
773 " -2 .. -8 set compression level\n"
774 " -9 --best best (slowest) compression\n",
775 getprogname());
776 fflush(stderr);
777 exit(0);
778 }
779
780 /* display the version of NetBSD gzip */
781 static void
782 display_version(void)
783 {
784
785 fprintf(stderr, "%s\n", gzip_version);
786 fflush(stderr);
787 exit(0);
788 }
789