Home | History | Annotate | Line # | Download | only in examples
02_decompress.c revision 1.1
      1  1.1  christos ///////////////////////////////////////////////////////////////////////////////
      2  1.1  christos //
      3  1.1  christos /// \file       02_decompress.c
      4  1.1  christos /// \brief      Decompress .xz files to stdout
      5  1.1  christos ///
      6  1.1  christos /// Usage:      ./02_decompress INPUT_FILES... > OUTFILE
      7  1.1  christos ///
      8  1.1  christos /// Example:    ./02_decompress foo.xz bar.xz > foobar
      9  1.1  christos //
     10  1.1  christos //  Author:     Lasse Collin
     11  1.1  christos //
     12  1.1  christos //  This file has been put into the public domain.
     13  1.1  christos //  You can do whatever you want with this file.
     14  1.1  christos //
     15  1.1  christos ///////////////////////////////////////////////////////////////////////////////
     16  1.1  christos 
     17  1.1  christos #include <stdbool.h>
     18  1.1  christos #include <stdlib.h>
     19  1.1  christos #include <stdio.h>
     20  1.1  christos #include <string.h>
     21  1.1  christos #include <errno.h>
     22  1.1  christos #include <lzma.h>
     23  1.1  christos 
     24  1.1  christos 
     25  1.1  christos static bool
     26  1.1  christos init_decoder(lzma_stream *strm)
     27  1.1  christos {
     28  1.1  christos 	// Initialize a .xz decoder. The decoder supports a memory usage limit
     29  1.1  christos 	// and a set of flags.
     30  1.1  christos 	//
     31  1.1  christos 	// The memory usage of the decompressor depends on the settings used
     32  1.1  christos 	// to compress a .xz file. It can vary from less than a megabyte to
     33  1.1  christos 	// a few gigabytes, but in practice (at least for now) it rarely
     34  1.1  christos 	// exceeds 65 MiB because that's how much memory is required to
     35  1.1  christos 	// decompress files created with "xz -9". Settings requiring more
     36  1.1  christos 	// memory take extra effort to use and don't (at least for now)
     37  1.1  christos 	// provide significantly better compression in most cases.
     38  1.1  christos 	//
     39  1.1  christos 	// Memory usage limit is useful if it is important that the
     40  1.1  christos 	// decompressor won't consume gigabytes of memory. The need
     41  1.1  christos 	// for limiting depends on the application. In this example,
     42  1.1  christos 	// no memory usage limiting is used. This is done by setting
     43  1.1  christos 	// the limit to UINT64_MAX.
     44  1.1  christos 	//
     45  1.1  christos 	// The .xz format allows concatenating compressed files as is:
     46  1.1  christos 	//
     47  1.1  christos 	//     echo foo | xz > foobar.xz
     48  1.1  christos 	//     echo bar | xz >> foobar.xz
     49  1.1  christos 	//
     50  1.1  christos 	// When decompressing normal standalone .xz files, LZMA_CONCATENATED
     51  1.1  christos 	// should always be used to support decompression of concatenated
     52  1.1  christos 	// .xz files. If LZMA_CONCATENATED isn't used, the decoder will stop
     53  1.1  christos 	// after the first .xz stream. This can be useful when .xz data has
     54  1.1  christos 	// been embedded inside another file format.
     55  1.1  christos 	//
     56  1.1  christos 	// Flags other than LZMA_CONCATENATED are supported too, and can
     57  1.1  christos 	// be combined with bitwise-or. See lzma/container.h
     58  1.1  christos 	// (src/liblzma/api/lzma/container.h in the source package or e.g.
     59  1.1  christos 	// /usr/include/lzma/container.h depending on the install prefix)
     60  1.1  christos 	// for details.
     61  1.1  christos 	lzma_ret ret = lzma_stream_decoder(
     62  1.1  christos 			strm, UINT64_MAX, LZMA_CONCATENATED);
     63  1.1  christos 
     64  1.1  christos 	// Return successfully if the initialization went fine.
     65  1.1  christos 	if (ret == LZMA_OK)
     66  1.1  christos 		return true;
     67  1.1  christos 
     68  1.1  christos 	// Something went wrong. The possible errors are documented in
     69  1.1  christos 	// lzma/container.h (src/liblzma/api/lzma/container.h in the source
     70  1.1  christos 	// package or e.g. /usr/include/lzma/container.h depending on the
     71  1.1  christos 	// install prefix).
     72  1.1  christos 	//
     73  1.1  christos 	// Note that LZMA_MEMLIMIT_ERROR is never possible here. If you
     74  1.1  christos 	// specify a very tiny limit, the error will be delayed until
     75  1.1  christos 	// the first headers have been parsed by a call to lzma_code().
     76  1.1  christos 	const char *msg;
     77  1.1  christos 	switch (ret) {
     78  1.1  christos 	case LZMA_MEM_ERROR:
     79  1.1  christos 		msg = "Memory allocation failed";
     80  1.1  christos 		break;
     81  1.1  christos 
     82  1.1  christos 	case LZMA_OPTIONS_ERROR:
     83  1.1  christos 		msg = "Unsupported decompressor flags";
     84  1.1  christos 		break;
     85  1.1  christos 
     86  1.1  christos 	default:
     87  1.1  christos 		// This is most likely LZMA_PROG_ERROR indicating a bug in
     88  1.1  christos 		// this program or in liblzma. It is inconvenient to have a
     89  1.1  christos 		// separate error message for errors that should be impossible
     90  1.1  christos 		// to occur, but knowing the error code is important for
     91  1.1  christos 		// debugging. That's why it is good to print the error code
     92  1.1  christos 		// at least when there is no good error message to show.
     93  1.1  christos 		msg = "Unknown error, possibly a bug";
     94  1.1  christos 		break;
     95  1.1  christos 	}
     96  1.1  christos 
     97  1.1  christos 	fprintf(stderr, "Error initializing the decoder: %s (error code %u)\n",
     98  1.1  christos 			msg, ret);
     99  1.1  christos 	return false;
    100  1.1  christos }
    101  1.1  christos 
    102  1.1  christos 
    103  1.1  christos static bool
    104  1.1  christos decompress(lzma_stream *strm, const char *inname, FILE *infile, FILE *outfile)
    105  1.1  christos {
    106  1.1  christos 	// When LZMA_CONCATENATED flag was used when initializing the decoder,
    107  1.1  christos 	// we need to tell lzma_code() when there will be no more input.
    108  1.1  christos 	// This is done by setting action to LZMA_FINISH instead of LZMA_RUN
    109  1.1  christos 	// in the same way as it is done when encoding.
    110  1.1  christos 	//
    111  1.1  christos 	// When LZMA_CONCATENATED isn't used, there is no need to use
    112  1.1  christos 	// LZMA_FINISH to tell when all the input has been read, but it
    113  1.1  christos 	// is still OK to use it if you want. When LZMA_CONCATENATED isn't
    114  1.1  christos 	// used, the decoder will stop after the first .xz stream. In that
    115  1.1  christos 	// case some unused data may be left in strm->next_in.
    116  1.1  christos 	lzma_action action = LZMA_RUN;
    117  1.1  christos 
    118  1.1  christos 	uint8_t inbuf[BUFSIZ];
    119  1.1  christos 	uint8_t outbuf[BUFSIZ];
    120  1.1  christos 
    121  1.1  christos 	strm->next_in = NULL;
    122  1.1  christos 	strm->avail_in = 0;
    123  1.1  christos 	strm->next_out = outbuf;
    124  1.1  christos 	strm->avail_out = sizeof(outbuf);
    125  1.1  christos 
    126  1.1  christos 	while (true) {
    127  1.1  christos 		if (strm->avail_in == 0 && !feof(infile)) {
    128  1.1  christos 			strm->next_in = inbuf;
    129  1.1  christos 			strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
    130  1.1  christos 					infile);
    131  1.1  christos 
    132  1.1  christos 			if (ferror(infile)) {
    133  1.1  christos 				fprintf(stderr, "%s: Read error: %s\n",
    134  1.1  christos 						inname, strerror(errno));
    135  1.1  christos 				return false;
    136  1.1  christos 			}
    137  1.1  christos 
    138  1.1  christos 			// Once the end of the input file has been reached,
    139  1.1  christos 			// we need to tell lzma_code() that no more input
    140  1.1  christos 			// will be coming. As said before, this isn't required
    141  1.1  christos 			// if the LZMA_CONATENATED flag isn't used when
    142  1.1  christos 			// initializing the decoder.
    143  1.1  christos 			if (feof(infile))
    144  1.1  christos 				action = LZMA_FINISH;
    145  1.1  christos 		}
    146  1.1  christos 
    147  1.1  christos 		lzma_ret ret = lzma_code(strm, action);
    148  1.1  christos 
    149  1.1  christos 		if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
    150  1.1  christos 			size_t write_size = sizeof(outbuf) - strm->avail_out;
    151  1.1  christos 
    152  1.1  christos 			if (fwrite(outbuf, 1, write_size, outfile)
    153  1.1  christos 					!= write_size) {
    154  1.1  christos 				fprintf(stderr, "Write error: %s\n",
    155  1.1  christos 						strerror(errno));
    156  1.1  christos 				return false;
    157  1.1  christos 			}
    158  1.1  christos 
    159  1.1  christos 			strm->next_out = outbuf;
    160  1.1  christos 			strm->avail_out = sizeof(outbuf);
    161  1.1  christos 		}
    162  1.1  christos 
    163  1.1  christos 		if (ret != LZMA_OK) {
    164  1.1  christos 			// Once everything has been decoded successfully, the
    165  1.1  christos 			// return value of lzma_code() will be LZMA_STREAM_END.
    166  1.1  christos 			//
    167  1.1  christos 			// It is important to check for LZMA_STREAM_END. Do not
    168  1.1  christos 			// assume that getting ret != LZMA_OK would mean that
    169  1.1  christos 			// everything has gone well or that when you aren't
    170  1.1  christos 			// getting more output it must have successfully
    171  1.1  christos 			// decoded everything.
    172  1.1  christos 			if (ret == LZMA_STREAM_END)
    173  1.1  christos 				return true;
    174  1.1  christos 
    175  1.1  christos 			// It's not LZMA_OK nor LZMA_STREAM_END,
    176  1.1  christos 			// so it must be an error code. See lzma/base.h
    177  1.1  christos 			// (src/liblzma/api/lzma/base.h in the source package
    178  1.1  christos 			// or e.g. /usr/include/lzma/base.h depending on the
    179  1.1  christos 			// install prefix) for the list and documentation of
    180  1.1  christos 			// possible values. Many values listen in lzma_ret
    181  1.1  christos 			// enumeration aren't possible in this example, but
    182  1.1  christos 			// can be made possible by enabling memory usage limit
    183  1.1  christos 			// or adding flags to the decoder initialization.
    184  1.1  christos 			const char *msg;
    185  1.1  christos 			switch (ret) {
    186  1.1  christos 			case LZMA_MEM_ERROR:
    187  1.1  christos 				msg = "Memory allocation failed";
    188  1.1  christos 				break;
    189  1.1  christos 
    190  1.1  christos 			case LZMA_FORMAT_ERROR:
    191  1.1  christos 				// .xz magic bytes weren't found.
    192  1.1  christos 				msg = "The input is not in the .xz format";
    193  1.1  christos 				break;
    194  1.1  christos 
    195  1.1  christos 			case LZMA_OPTIONS_ERROR:
    196  1.1  christos 				// For example, the headers specify a filter
    197  1.1  christos 				// that isn't supported by this liblzma
    198  1.1  christos 				// version (or it hasn't been enabled when
    199  1.1  christos 				// building liblzma, but no-one sane does
    200  1.1  christos 				// that unless building liblzma for an
    201  1.1  christos 				// embedded system). Upgrading to a newer
    202  1.1  christos 				// liblzma might help.
    203  1.1  christos 				//
    204  1.1  christos 				// Note that it is unlikely that the file has
    205  1.1  christos 				// accidentally became corrupt if you get this
    206  1.1  christos 				// error. The integrity of the .xz headers is
    207  1.1  christos 				// always verified with a CRC32, so
    208  1.1  christos 				// unintentionally corrupt files can be
    209  1.1  christos 				// distinguished from unsupported files.
    210  1.1  christos 				msg = "Unsupported compression options";
    211  1.1  christos 				break;
    212  1.1  christos 
    213  1.1  christos 			case LZMA_DATA_ERROR:
    214  1.1  christos 				msg = "Compressed file is corrupt";
    215  1.1  christos 				break;
    216  1.1  christos 
    217  1.1  christos 			case LZMA_BUF_ERROR:
    218  1.1  christos 				// Typically this error means that a valid
    219  1.1  christos 				// file has got truncated, but it might also
    220  1.1  christos 				// be a damaged part in the file that makes
    221  1.1  christos 				// the decoder think the file is truncated.
    222  1.1  christos 				// If you prefer, you can use the same error
    223  1.1  christos 				// message for this as for LZMA_DATA_ERROR.
    224  1.1  christos 				msg = "Compressed file is truncated or "
    225  1.1  christos 						"otherwise corrupt";
    226  1.1  christos 				break;
    227  1.1  christos 
    228  1.1  christos 			default:
    229  1.1  christos 				// This is most likely LZMA_PROG_ERROR.
    230  1.1  christos 				msg = "Unknown error, possibly a bug";
    231  1.1  christos 				break;
    232  1.1  christos 			}
    233  1.1  christos 
    234  1.1  christos 			fprintf(stderr, "%s: Decoder error: "
    235  1.1  christos 					"%s (error code %u)\n",
    236  1.1  christos 					inname, msg, ret);
    237  1.1  christos 			return false;
    238  1.1  christos 		}
    239  1.1  christos 	}
    240  1.1  christos }
    241  1.1  christos 
    242  1.1  christos 
    243  1.1  christos extern int
    244  1.1  christos main(int argc, char **argv)
    245  1.1  christos {
    246  1.1  christos 	if (argc <= 1) {
    247  1.1  christos 		fprintf(stderr, "Usage: %s FILES...\n", argv[0]);
    248  1.1  christos 		return EXIT_FAILURE;
    249  1.1  christos 	}
    250  1.1  christos 
    251  1.1  christos 	lzma_stream strm = LZMA_STREAM_INIT;
    252  1.1  christos 
    253  1.1  christos 	bool success = true;
    254  1.1  christos 
    255  1.1  christos 	// Try to decompress all files.
    256  1.1  christos 	for (int i = 1; i < argc; ++i) {
    257  1.1  christos 		if (!init_decoder(&strm)) {
    258  1.1  christos 			// Decoder initialization failed. There's no point
    259  1.1  christos 			// to retry it so we need to exit.
    260  1.1  christos 			success = false;
    261  1.1  christos 			break;
    262  1.1  christos 		}
    263  1.1  christos 
    264  1.1  christos 		FILE *infile = fopen(argv[i], "rb");
    265  1.1  christos 
    266  1.1  christos 		if (infile == NULL) {
    267  1.1  christos 			fprintf(stderr, "%s: Error opening the "
    268  1.1  christos 					"input file: %s\n",
    269  1.1  christos 					argv[i], strerror(errno));
    270  1.1  christos 			success = false;
    271  1.1  christos 		} else {
    272  1.1  christos 			success &= decompress(&strm, argv[i], infile, stdout);
    273  1.1  christos 			fclose(infile);
    274  1.1  christos 		}
    275  1.1  christos 	}
    276  1.1  christos 
    277  1.1  christos 	// Free the memory allocated for the decoder. This only needs to be
    278  1.1  christos 	// done after the last file.
    279  1.1  christos 	lzma_end(&strm);
    280  1.1  christos 
    281  1.1  christos 	if (fclose(stdout)) {
    282  1.1  christos 		fprintf(stderr, "Write error: %s\n", strerror(errno));
    283  1.1  christos 		success = false;
    284  1.1  christos 	}
    285  1.1  christos 
    286  1.1  christos 	return success ? EXIT_SUCCESS : EXIT_FAILURE;
    287  1.1  christos }
    288