gzip.c revision 1.4 1 /* $NetBSD: gzip.c,v 1.4 2003/12/23 15:02:40 mrg Exp $ */
2
3 /*
4 * Copyright (c) 1997, 1998, 2003 Matthew R. Green
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
25 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 #ifndef lint
33 __COPYRIGHT("@(#) Copyright (c) 1997, 1998, 2003 Matthew R. Green\n\
34 All rights reserved.\n");
35 __RCSID("$NetBSD: gzip.c,v 1.4 2003/12/23 15:02:40 mrg Exp $");
36 #endif /* not lint */
37
38 /*
39 * gzip.c -- GPL free gzip using zlib.
40 *
41 * Very minor portions of this code are (very loosely) derived from
42 * the minigzip.c in the zlib distribution.
43 */
44
45 #include <sys/param.h>
46 #include <sys/stat.h>
47 #include <sys/time.h>
48
49 #include <unistd.h>
50 #include <stdio.h>
51 #include <string.h>
52 #include <stdlib.h>
53 #include <err.h>
54 #include <errno.h>
55 #include <fcntl.h>
56 #include <zlib.h>
57 #include <fts.h>
58 #include <libgen.h>
59 #include <stdarg.h>
60 #include <getopt.h>
61
62 #ifndef GZ_SUFFIX
63 # define GZ_SUFFIX ".gz"
64 #endif
65
66 #define BUFLEN 4096
67
68 #define ORIG_NAME 0x08
69
70 /* Define this if you have the NetBSD gzopenfull(3) extension to zlib(3) */
71 #define HAVE_ZLIB_GZOPENFULL 0
72
73 static const char gzip_version[] = "NetBSD gzip 2.0";
74
75 static char gzipflags[3]; /* `w' or `r', possible with [1-9] */
76 static int cflag; /* stdout mode */
77 static int dflag; /* decompress mode */
78 static int fflag; /* force mode */
79 static int nflag; /* don't save name/timestamp */
80 static int Nflag; /* don't restore name/timestamp */
81 static int qflag; /* quiet mode */
82 static int rflag; /* recursive mode */
83 static int tflag; /* test */
84 static int vflag; /* verbose mode */
85 static const char *Sflag = GZ_SUFFIX; /* suffix (.gz) */
86
87 static int suffix_len; /* length of suffix; includes nul */
88 static char *newfile; /* name of newly created file */
89 static char *infile; /* name of file coming in */
90
91 static void maybe_err(int rv, const char *fmt, ...);
92 static void maybe_warn(const char *fmt, ...);
93 static void maybe_warnx(const char *fmt, ...);
94 static void usage(void);
95 static void display_version(void);
96 static void gz_compress(FILE *, gzFile);
97 static off_t gz_uncompress(gzFile, FILE *);
98 static void copymodes(const char *, struct stat *);
99 static ssize_t file_compress(char *);
100 static ssize_t file_uncompress(char *);
101 static void handle_pathname(char *);
102 static void handle_file(char *, struct stat *);
103 static void handle_dir(char *, struct stat *);
104 static void handle_stdin(void);
105 static void handle_stdout(void);
106 static void print_verbage(char *, char *, ssize_t, ssize_t);
107 static void print_test(char *, int);
108
109 int main(int, char *p[]);
110
111 static const struct option longopts[] = {
112 { "stdout", no_argument, 0, 'c' },
113 { "to-stdout", no_argument, 0, 'c' },
114 { "decompress", no_argument, 0, 'd' },
115 { "uncompress", no_argument, 0, 'd' },
116 { "force", no_argument, 0, 'f' },
117 { "help", no_argument, 0, 'h' },
118 { "no-name", no_argument, 0, 'n' },
119 { "name", no_argument, 0, 'N' },
120 { "quiet", no_argument, 0, 'q' },
121 { "recursive", no_argument, 0, 'r' },
122 { "suffix", required_argument, 0, 'S' },
123 { "test", no_argument, 0, 't' },
124 { "verbose", no_argument, 0, 'v' },
125 { "version", no_argument, 0, 'V' },
126 { "fast", no_argument, 0, '1' },
127 { "best", no_argument, 0, '9' },
128 #if 0
129 /*
130 * This is what else GNU gzip implements. Maybe --list is
131 * useful, but --ascii isn't useful on NetBSD, and I don't
132 * care to have a --license.
133 */
134 { "ascii", no_argument, 0, 'a' },
135 { "list", no_argument, 0, 'l' },
136 { "license", no_argument, 0, 'L' },
137 #endif
138 };
139
140 int
141 main(int argc, char **argv)
142 {
143 const char *progname = getprogname();
144 int ch;
145
146 gzipflags[0] = 'w';
147 gzipflags[1] = '\0';
148
149 /*
150 * XXX
151 * handle being called `gunzip', `zcat' and `gzcat'
152 */
153 if (strcmp(progname, "gunzip") == 0)
154 dflag = 1;
155 else if (strcmp(progname, "zcat") == 0 ||
156 strcmp(progname, "gzcat") == 0)
157 dflag = cflag = 1;
158
159 while ((ch = getopt_long(argc, argv, "cdfhHlnNqrS:tvV123456789",
160 longopts, NULL)) != -1)
161 switch (ch) {
162 case 'c':
163 cflag = 1;
164 break;
165 case 'd':
166 dflag = 1;
167 break;
168 case 'f':
169 fflag = 1;
170 break;
171 case 'h':
172 case 'H':
173 usage();
174 /* NOTREACHED */
175 case 'n':
176 nflag = 1;
177 Nflag = 0;
178 break;
179 case 'N':
180 nflag = 0;
181 Nflag = 1;
182 break;
183 case 'q':
184 qflag = 1;
185 break;
186 case 'r':
187 rflag = 1;
188 break;
189 case 'S':
190 Sflag = optarg;
191 break;
192 case 't':
193 cflag = 1;
194 tflag = 1;
195 dflag = 1;
196 break;
197 case 'v':
198 vflag = 1;
199 break;
200 case 'V':
201 display_version();
202 /* NOTREACHED */
203 case '1': case '2': case '3':
204 case '4': case '5': case '6':
205 case '7': case '8': case '9':
206 gzipflags[1] = (char)ch;
207 gzipflags[2] = '\0';
208 break;
209 }
210 argv += optind;
211 argc -= optind;
212 if (dflag)
213 gzipflags[0] = 'r';
214
215 suffix_len = strlen(Sflag) + 1;
216
217 if (argc == 0) {
218 if (dflag) /* stdin mode */
219 handle_stdin();
220 else /* stdout mode */
221 handle_stdout();
222 } else {
223 do {
224 handle_pathname(argv[0]);
225 } while (argv++, --argc);
226 }
227 exit(0);
228 }
229
230 /* maybe print a warning */
231 void
232 maybe_warn(const char *fmt, ...)
233 {
234 va_list ap;
235
236 if (qflag == 0) {
237 va_start(ap, fmt);
238 vwarn(fmt, ap);
239 va_end(ap);
240 }
241 }
242
243 void
244 maybe_warnx(const char *fmt, ...)
245 {
246 va_list ap;
247
248 if (qflag == 0) {
249 va_start(ap, fmt);
250 vwarnx(fmt, ap);
251 va_end(ap);
252 }
253 }
254
255 /* maybe print a warning */
256 void
257 maybe_err(int rv, const char *fmt, ...)
258 {
259 va_list ap;
260
261 if (qflag == 0) {
262 va_start(ap, fmt);
263 vwarn(fmt, ap);
264 va_end(ap);
265 }
266 exit(rv);
267 }
268
269 /* compress input to output then close both files */
270 static void
271 gz_compress(FILE *in, gzFile out)
272 {
273 char buf[BUFLEN];
274 ssize_t len;
275 int i;
276
277 for (;;) {
278 len = fread(buf, 1, sizeof(buf), in);
279 if (ferror(in))
280 maybe_err(1, "fread");
281 if (len == 0)
282 break;
283
284 if ((ssize_t)gzwrite(out, buf, len) != len)
285 maybe_err(1, gzerror(out, &i));
286 }
287 if (fclose(in) < 0)
288 maybe_err(1, "failed fclose");
289 if (gzclose(out) != Z_OK)
290 maybe_err(1, "failed gzclose");
291 }
292
293 /* uncompress input to output then close the input */
294 static off_t
295 gz_uncompress(gzFile in, FILE *out)
296 {
297 char buf[BUFLEN];
298 off_t size;
299 ssize_t len;
300 int i;
301
302 for (size = 0;;) {
303 len = gzread(in, buf, sizeof(buf));
304
305 if (len < 0) {
306 if (tflag) {
307 print_test(infile, 0);
308 return (0);
309 } else
310 maybe_err(1, gzerror(in, &i));
311 } else if (len == 0) {
312 if (tflag)
313 print_test(infile, 1);
314 break;
315 }
316
317 size += len;
318
319 /* don't write anything with -t */
320 if (tflag)
321 continue;
322
323 if (fwrite(buf, 1, (unsigned)len, out) != (ssize_t)len)
324 maybe_err(1, "failed fwrite");
325 }
326 if (gzclose(in) != Z_OK)
327 maybe_err(1, "failed gzclose");
328
329 return (size);
330 }
331
332 /*
333 * set the owner, mode, flags & utimes for a file
334 */
335 static void
336 copymodes(const char *file, struct stat *sbp)
337 {
338 struct timeval times[2];
339
340 /*
341 * If we have no info on the input, give this file some
342 * default values and return..
343 */
344 if (sbp == NULL) {
345 mode_t mask = umask(022);
346
347 (void)chmod(file, DEFFILEMODE & ~mask);
348 (void)umask(mask);
349 return;
350 }
351
352 /* if the chown fails, remove set-id bits as-per compress(1) */
353 if (chown(file, sbp->st_uid, sbp->st_gid) < 0) {
354 if (errno != EPERM)
355 maybe_warn("couldn't chown: %s", file);
356 sbp->st_mode &= ~(S_ISUID|S_ISGID);
357 }
358
359 /* we only allow set-id and the 9 normal permission bits */
360 sbp->st_mode &= S_ISUID|S_ISGID|S_IRWXU|S_IRWXG|S_IRWXO;
361 if (chmod(file, sbp->st_mode) < 0)
362 maybe_warn("couldn't chmod: %s", file);
363
364 /* only try flags if they exist already */
365 if (sbp->st_flags != 0 && chflags(file, sbp->st_flags) < 0)
366 maybe_warn("couldn't chflags: %s", file);
367
368 TIMESPEC_TO_TIMEVAL(×[0], &sbp->st_atimespec);
369 TIMESPEC_TO_TIMEVAL(×[1], &sbp->st_mtimespec);
370 if (utimes(file, times) < 0)
371 maybe_warn("couldn't utimes: %s", file);
372 }
373
374 /*
375 * compress the given file: create a corresponding .gz file and remove the
376 * original.
377 */
378 static ssize_t
379 file_compress(char *file)
380 {
381 char outfile[MAXPATHLEN];
382 FILE *in;
383 gzFile out;
384 struct stat isb, osb;
385 ssize_t size;
386 u_int32_t mtime = 0;
387
388 if (cflag == 0) {
389 (void)strncpy(outfile, file, MAXPATHLEN - suffix_len);
390 outfile[MAXPATHLEN - suffix_len] = '\0';
391 (void)strlcat(outfile, Sflag, sizeof(outfile));
392
393 if (fflag == 0) {
394 if (stat(outfile, &osb) == 0) {
395 maybe_warnx("%s already exists -- skipping",
396 outfile);
397 goto lose;
398 }
399 }
400 if (stat(file, &isb) == 0) {
401 if (isb.st_nlink > 1) {
402 maybe_warnx("%s has %d other link%s -- "
403 "skipping", file, isb.st_nlink-1,
404 isb.st_nlink == 1 ? "" : "s");
405 goto lose;
406 }
407 if (nflag == 0)
408 mtime = (u_int32_t)isb.st_mtime;
409 }
410 }
411 in = fopen(file, "r");
412 if (in == 0)
413 maybe_err(1, "can't fopen %s", file);
414
415 if (cflag == 0) {
416 #if HAVE_ZLIB_GZOPENFULL
417 char *savename;
418
419 if (nflag == 0)
420 savename = basename(file);
421 else
422 savename = NULL;
423 out = gzopenfull(outfile, gzipflags, savename, mtime);
424 #else
425 out = gzopen(outfile, gzipflags);
426 #endif
427 } else
428 out = gzdopen(STDOUT_FILENO, gzipflags);
429
430 if (out == 0)
431 maybe_err(1, "can't gz%sopen %s",
432 cflag ? "d" : "",
433 cflag ? "stdout" : outfile);
434
435 gz_compress(in, out);
436
437 /*
438 * if we compressed to stdout, we don't know the size and
439 * we don't know the new file name, punt. if we can't stat
440 * the file, whine, otherwise set the size from the stat
441 * buffer. we only blow away the file if we can stat the
442 * output, just in case.
443 */
444 if (cflag == 0) {
445 if (stat(outfile, &osb) < 0) {
446 maybe_warn("couldn't stat: %s", outfile);
447 maybe_warnx("leaving original %s", file);
448 size = 0;
449 } else {
450 unlink(file);
451 size = osb.st_size;
452 }
453 newfile = outfile;
454 copymodes(outfile, &isb);
455 } else {
456 lose:
457 size = 0;
458 newfile = 0;
459 }
460
461 return (size);
462 }
463
464 /* uncompress the given file and remove the original */
465 static ssize_t
466 file_uncompress(char *file)
467 {
468 struct stat isb, osb;
469 char buf[MAXPATHLEN];
470 char *outfile = buf, *s;
471 FILE *out;
472 gzFile in;
473 off_t size;
474 ssize_t len = strlen(file);
475
476 if (cflag == 0) {
477 s = &file[len - suffix_len + 1];
478 if (strncmp(s, Sflag, suffix_len) == 0) {
479 (void)strncpy(outfile, file, len - suffix_len + 1);
480 outfile[len - suffix_len + 1] = '\0';
481 } else
482 maybe_err(1, "unknown suffix %s", s);
483
484 /* gather the old name info */
485 if (Nflag) {
486 int fd;
487 char header1[10], name[PATH_MAX + 1];
488
489 fd = open(file, O_RDONLY);
490 if (fd < 0)
491 maybe_err(1, "can't open %s", file);
492 if (read(fd, header1, 10) != 10)
493 maybe_err(1, "can't read %s", file);
494
495 if (header1[3] & ORIG_NAME) {
496 size_t rbytes;
497 int i;
498
499 rbytes = read(fd, name, PATH_MAX + 1);
500 if (rbytes < 0)
501 maybe_err(1, "can't read %s", file);
502 for (i = 0; i < rbytes && name[i]; i++)
503 ;
504 if (i < rbytes) {
505 name[i] = 0;
506 /* now maybe merge old dirname */
507 if (strchr(outfile, '/') == 0)
508 outfile = name;
509 else {
510 char *dir = dirname(outfile);
511 if (asprintf(&outfile, "%s/%s",
512 dir, name) == -1)
513 maybe_err(1, "malloc");
514 }
515 }
516 }
517 close(fd);
518 }
519
520 if (fflag == 0) {
521 if (stat(outfile, &isb) == 0) {
522 maybe_warnx("%s already exists -- skipping",
523 outfile);
524 goto lose;
525 }
526 if (stat(file, &isb) == 0 && isb.st_nlink > 1) {
527 maybe_warnx("%s has %d other link%s -- "
528 "skipping", file, isb.st_nlink-1,
529 isb.st_nlink == 1 ? "" : "s");
530 goto lose;
531 }
532 }
533 }
534 in = gzopen(file, gzipflags);
535 if (in == NULL)
536 maybe_err(1, "can't gzopen %s", file);
537
538 if (cflag == 0) {
539 int fd;
540
541 /* Use open(2) directly to get a safe file. */
542 fd = open(outfile, O_WRONLY|O_CREAT|O_EXCL, 0600);
543 if (fd < 0)
544 maybe_err(1, "can't open %s", outfile);
545 out = fdopen(fd, "w");
546 if (out == NULL)
547 maybe_err(1, "can't fdopen %s", outfile);
548 } else
549 out = stdout;
550
551 if ((size = gz_uncompress(in, out)) == 0)
552 goto lose;
553
554 /* if testing, or we uncompressed to stdout, this is all we need */
555 if (tflag || cflag)
556 return (size);
557
558 /*
559 * if we create a file...
560 */
561 if (cflag == 0) {
562 /* close the file */
563 if (fclose(out))
564 maybe_err(1, "failed fclose");
565
566 /*
567 * if we can't stat the file, or we are uncompressing to
568 * stdin, don't remove the file.
569 */
570 if (stat(outfile, &osb) < 0) {
571 maybe_warn("couldn't stat (leaving original): %s",
572 outfile);
573 goto lose;
574 }
575 if (osb.st_size != size) {
576 maybe_warn("stat gave different size: %llu != %llu "
577 "(leaving original)",
578 (unsigned long long)size,
579 (unsigned long long)osb.st_size);
580 goto lose;
581 }
582 newfile = outfile;
583 unlink(file);
584 size = osb.st_size;
585 copymodes(outfile, &isb);
586 }
587 return (size);
588
589 lose:
590 newfile = 0;
591 return (0);
592 }
593
594 static void
595 handle_stdin(void)
596 {
597 gzFile *file;
598
599 if (fflag == 0 && isatty(STDIN_FILENO)) {
600 maybe_warnx("standard input is a terminal -- ignoring");
601 return;
602 }
603 file = gzdopen(STDIN_FILENO, gzipflags);
604 if (file == NULL)
605 maybe_err(1, "can't gzdopen stdin");
606 gz_uncompress(file, stdout);
607 }
608
609 static void
610 handle_stdout(void)
611 {
612 gzFile *file;
613
614 if (fflag == 0 && isatty(STDOUT_FILENO)) {
615 maybe_warnx("standard output is a terminal -- ignoring");
616 return;
617 }
618 file = gzdopen(STDOUT_FILENO, gzipflags);
619 if (file == NULL)
620 maybe_err(1, "can't gzdopen stdout");
621 gz_compress(stdin, file);
622 }
623
624 /* do what is asked for, for the path name */
625 static void
626 handle_pathname(char *path)
627 {
628 char *opath = path, *s = 0;
629 ssize_t len;
630 struct stat sb;
631
632 /* check for stdout/stdin */
633 if (path[0] == '-' && path[1] == '\0') {
634 if (dflag)
635 handle_stdin();
636 else
637 handle_stdout();
638 }
639
640 retry:
641 if (stat(path, &sb) < 0) {
642 /* lets try <path>.gz if we're decompressing */
643 if (dflag && s == 0 && errno == ENOENT) {
644 len = strlen(path);
645 s = malloc(len + suffix_len);
646 if (s == 0)
647 maybe_err(1, "malloc");
648 memmove(s, path, len);
649 memmove(&s[len], Sflag, suffix_len);
650 path = s;
651 goto retry;
652 }
653 maybe_warn("can't stat: %s", opath);
654 goto out;
655 }
656
657 if (S_ISDIR(sb.st_mode)) {
658 if (rflag)
659 handle_dir(path, &sb);
660 else
661 maybe_warn("%s is a directory", path);
662 goto out;
663 }
664
665 if (S_ISREG(sb.st_mode))
666 handle_file(path, &sb);
667
668 out:
669 if (s)
670 free(s);
671 return;
672 }
673
674 /* compress/decompress a file */
675 static void
676 handle_file(char *file, struct stat *sbp)
677 {
678 ssize_t usize, gsize;
679
680 infile = file;
681 if (dflag) {
682 usize = file_uncompress(file);
683 if (usize == 0)
684 return;
685 gsize = sbp->st_size;
686 } else {
687 gsize = file_compress(file);
688 if (gsize == 0)
689 return;
690 usize = sbp->st_size;
691 }
692
693 if (vflag && !tflag)
694 print_verbage(file, cflag == 0 ? newfile : 0, usize, gsize);
695 }
696
697 /* this is used with -r to recursively decend directories */
698 static void
699 handle_dir(char *dir, struct stat *sbp)
700 {
701 char *path_argv[2];
702 FTS *fts;
703 FTSENT *entry;
704
705 path_argv[0] = dir;
706 path_argv[1] = 0;
707 fts = fts_open(path_argv, FTS_PHYSICAL, NULL);
708 if (fts == NULL) {
709 warn("couldn't fts_open %s", dir);
710 return;
711 }
712
713 while ((entry = fts_read(fts))) {
714 switch(entry->fts_info) {
715 case FTS_D:
716 case FTS_DP:
717 continue;
718
719 case FTS_DNR:
720 case FTS_ERR:
721 case FTS_NS:
722 maybe_warn("%s", entry->fts_path);
723 continue;
724 case FTS_F:
725 handle_file(entry->fts_name, entry->fts_statp);
726 }
727 }
728 (void)fts_close(fts);
729 }
730
731 /* print compression statistics, and the new name (if there is one!) */
732 static void
733 print_verbage(char *file, char *nfile, ssize_t usize, ssize_t gsize)
734 {
735 float percent = 100.0 - (100.0 * gsize / usize);
736
737 fprintf(stderr, "%s:%s %4.1f%%", file,
738 strlen(file) < 7 ? "\t\t" : "\t", percent);
739 if (nfile)
740 fprintf(stderr, " -- replaced with %s", nfile);
741 fprintf(stderr, "\n");
742 fflush(stderr);
743 }
744
745 /* print test results */
746 static void
747 print_test(char *file, int ok)
748 {
749
750 fprintf(stderr, "%s:%s %s\n", file,
751 strlen(file) < 7 ? "\t\t" : "\t", ok ? "OK" : "NOT OK");
752 fflush(stderr);
753 }
754
755 /* display the usage of NetBSD gzip */
756 static void
757 usage(void)
758 {
759
760 fprintf(stderr, "%s\n", gzip_version);
761 fprintf(stderr,
762 "Usage: %s [-cdfhnNqrStvV123456789] [<file> [<file> ...]]\n"
763 " -c --stdout write to stdout, keep original files\n"
764 " --to-stdout\n"
765 " -d --decompress uncompress files\n"
766 " --uncompress\n"
767 " -f --force force overwriting & compress links\n"
768 " -h --help display this help\n"
769 " -n --no-name don't save original file name or time stamp\n"
770 " -N --name save or restore original file name and time stamp\n"
771 " -q --quiet output no warnings\n"
772 " -r --recursive recursively compress files in directories\n"
773 " -S .suf use suffix .suf instead of .gz\n"
774 " --suffix .suf\n"
775 " -t --test test compressed file\n"
776 " -v --verbose print extra statistics\n"
777 " -V --version display program version\n"
778 " -1 --fast fastest (worst) compression\n"
779 " -2 .. -8 set compression level\n"
780 " -9 --best best (slowest) compression\n",
781 getprogname());
782 fflush(stderr);
783 exit(0);
784 }
785
786 /* display the version of NetBSD gzip */
787 static void
788 display_version(void)
789 {
790
791 fprintf(stderr, "%s\n", gzip_version);
792 fflush(stderr);
793 exit(0);
794 }
795