1 1.1 christos /////////////////////////////////////////////////////////////////////////////// 2 1.1 christos // 3 1.1 christos /// \file 02_decompress.c 4 1.1 christos /// \brief Decompress .xz files to stdout 5 1.1 christos /// 6 1.1 christos /// Usage: ./02_decompress INPUT_FILES... > OUTFILE 7 1.1 christos /// 8 1.1 christos /// Example: ./02_decompress foo.xz bar.xz > foobar 9 1.1 christos // 10 1.1 christos // Author: Lasse Collin 11 1.1 christos // 12 1.1 christos // This file has been put into the public domain. 13 1.1 christos // You can do whatever you want with this file. 14 1.1 christos // 15 1.1 christos /////////////////////////////////////////////////////////////////////////////// 16 1.1 christos 17 1.1 christos #include <stdbool.h> 18 1.1 christos #include <stdlib.h> 19 1.1 christos #include <stdio.h> 20 1.1 christos #include <string.h> 21 1.1 christos #include <errno.h> 22 1.1 christos #include <lzma.h> 23 1.1 christos 24 1.1 christos 25 1.1 christos static bool 26 1.1 christos init_decoder(lzma_stream *strm) 27 1.1 christos { 28 1.1 christos // Initialize a .xz decoder. The decoder supports a memory usage limit 29 1.1 christos // and a set of flags. 30 1.1 christos // 31 1.1 christos // The memory usage of the decompressor depends on the settings used 32 1.1 christos // to compress a .xz file. It can vary from less than a megabyte to 33 1.1 christos // a few gigabytes, but in practice (at least for now) it rarely 34 1.1 christos // exceeds 65 MiB because that's how much memory is required to 35 1.1 christos // decompress files created with "xz -9". Settings requiring more 36 1.1 christos // memory take extra effort to use and don't (at least for now) 37 1.1 christos // provide significantly better compression in most cases. 38 1.1 christos // 39 1.1 christos // Memory usage limit is useful if it is important that the 40 1.1 christos // decompressor won't consume gigabytes of memory. The need 41 1.1 christos // for limiting depends on the application. In this example, 42 1.1 christos // no memory usage limiting is used. This is done by setting 43 1.1 christos // the limit to UINT64_MAX. 44 1.1 christos // 45 1.1 christos // The .xz format allows concatenating compressed files as is: 46 1.1 christos // 47 1.1 christos // echo foo | xz > foobar.xz 48 1.1 christos // echo bar | xz >> foobar.xz 49 1.1 christos // 50 1.1 christos // When decompressing normal standalone .xz files, LZMA_CONCATENATED 51 1.1 christos // should always be used to support decompression of concatenated 52 1.1 christos // .xz files. If LZMA_CONCATENATED isn't used, the decoder will stop 53 1.1 christos // after the first .xz stream. This can be useful when .xz data has 54 1.1 christos // been embedded inside another file format. 55 1.1 christos // 56 1.1 christos // Flags other than LZMA_CONCATENATED are supported too, and can 57 1.1 christos // be combined with bitwise-or. See lzma/container.h 58 1.1 christos // (src/liblzma/api/lzma/container.h in the source package or e.g. 59 1.1 christos // /usr/include/lzma/container.h depending on the install prefix) 60 1.1 christos // for details. 61 1.1 christos lzma_ret ret = lzma_stream_decoder( 62 1.1 christos strm, UINT64_MAX, LZMA_CONCATENATED); 63 1.1 christos 64 1.1 christos // Return successfully if the initialization went fine. 65 1.1 christos if (ret == LZMA_OK) 66 1.1 christos return true; 67 1.1 christos 68 1.1 christos // Something went wrong. The possible errors are documented in 69 1.1 christos // lzma/container.h (src/liblzma/api/lzma/container.h in the source 70 1.1 christos // package or e.g. /usr/include/lzma/container.h depending on the 71 1.1 christos // install prefix). 72 1.1 christos // 73 1.1 christos // Note that LZMA_MEMLIMIT_ERROR is never possible here. If you 74 1.1 christos // specify a very tiny limit, the error will be delayed until 75 1.1 christos // the first headers have been parsed by a call to lzma_code(). 76 1.1 christos const char *msg; 77 1.1 christos switch (ret) { 78 1.1 christos case LZMA_MEM_ERROR: 79 1.1 christos msg = "Memory allocation failed"; 80 1.1 christos break; 81 1.1 christos 82 1.1 christos case LZMA_OPTIONS_ERROR: 83 1.1 christos msg = "Unsupported decompressor flags"; 84 1.1 christos break; 85 1.1 christos 86 1.1 christos default: 87 1.1 christos // This is most likely LZMA_PROG_ERROR indicating a bug in 88 1.1 christos // this program or in liblzma. It is inconvenient to have a 89 1.1 christos // separate error message for errors that should be impossible 90 1.1 christos // to occur, but knowing the error code is important for 91 1.1 christos // debugging. That's why it is good to print the error code 92 1.1 christos // at least when there is no good error message to show. 93 1.1 christos msg = "Unknown error, possibly a bug"; 94 1.1 christos break; 95 1.1 christos } 96 1.1 christos 97 1.1 christos fprintf(stderr, "Error initializing the decoder: %s (error code %u)\n", 98 1.1 christos msg, ret); 99 1.1 christos return false; 100 1.1 christos } 101 1.1 christos 102 1.1 christos 103 1.1 christos static bool 104 1.1 christos decompress(lzma_stream *strm, const char *inname, FILE *infile, FILE *outfile) 105 1.1 christos { 106 1.1 christos // When LZMA_CONCATENATED flag was used when initializing the decoder, 107 1.1 christos // we need to tell lzma_code() when there will be no more input. 108 1.1 christos // This is done by setting action to LZMA_FINISH instead of LZMA_RUN 109 1.1 christos // in the same way as it is done when encoding. 110 1.1 christos // 111 1.1 christos // When LZMA_CONCATENATED isn't used, there is no need to use 112 1.1 christos // LZMA_FINISH to tell when all the input has been read, but it 113 1.1 christos // is still OK to use it if you want. When LZMA_CONCATENATED isn't 114 1.1 christos // used, the decoder will stop after the first .xz stream. In that 115 1.1 christos // case some unused data may be left in strm->next_in. 116 1.1 christos lzma_action action = LZMA_RUN; 117 1.1 christos 118 1.1 christos uint8_t inbuf[BUFSIZ]; 119 1.1 christos uint8_t outbuf[BUFSIZ]; 120 1.1 christos 121 1.1 christos strm->next_in = NULL; 122 1.1 christos strm->avail_in = 0; 123 1.1 christos strm->next_out = outbuf; 124 1.1 christos strm->avail_out = sizeof(outbuf); 125 1.1 christos 126 1.1 christos while (true) { 127 1.1 christos if (strm->avail_in == 0 && !feof(infile)) { 128 1.1 christos strm->next_in = inbuf; 129 1.1 christos strm->avail_in = fread(inbuf, 1, sizeof(inbuf), 130 1.1 christos infile); 131 1.1 christos 132 1.1 christos if (ferror(infile)) { 133 1.1 christos fprintf(stderr, "%s: Read error: %s\n", 134 1.1 christos inname, strerror(errno)); 135 1.1 christos return false; 136 1.1 christos } 137 1.1 christos 138 1.1 christos // Once the end of the input file has been reached, 139 1.1 christos // we need to tell lzma_code() that no more input 140 1.1 christos // will be coming. As said before, this isn't required 141 1.1.1.2 joerg // if the LZMA_CONCATENATED flag isn't used when 142 1.1 christos // initializing the decoder. 143 1.1 christos if (feof(infile)) 144 1.1 christos action = LZMA_FINISH; 145 1.1 christos } 146 1.1 christos 147 1.1 christos lzma_ret ret = lzma_code(strm, action); 148 1.1 christos 149 1.1 christos if (strm->avail_out == 0 || ret == LZMA_STREAM_END) { 150 1.1 christos size_t write_size = sizeof(outbuf) - strm->avail_out; 151 1.1 christos 152 1.1 christos if (fwrite(outbuf, 1, write_size, outfile) 153 1.1 christos != write_size) { 154 1.1 christos fprintf(stderr, "Write error: %s\n", 155 1.1 christos strerror(errno)); 156 1.1 christos return false; 157 1.1 christos } 158 1.1 christos 159 1.1 christos strm->next_out = outbuf; 160 1.1 christos strm->avail_out = sizeof(outbuf); 161 1.1 christos } 162 1.1 christos 163 1.1 christos if (ret != LZMA_OK) { 164 1.1 christos // Once everything has been decoded successfully, the 165 1.1 christos // return value of lzma_code() will be LZMA_STREAM_END. 166 1.1 christos // 167 1.1 christos // It is important to check for LZMA_STREAM_END. Do not 168 1.1 christos // assume that getting ret != LZMA_OK would mean that 169 1.1 christos // everything has gone well or that when you aren't 170 1.1 christos // getting more output it must have successfully 171 1.1 christos // decoded everything. 172 1.1 christos if (ret == LZMA_STREAM_END) 173 1.1 christos return true; 174 1.1 christos 175 1.1 christos // It's not LZMA_OK nor LZMA_STREAM_END, 176 1.1 christos // so it must be an error code. See lzma/base.h 177 1.1 christos // (src/liblzma/api/lzma/base.h in the source package 178 1.1 christos // or e.g. /usr/include/lzma/base.h depending on the 179 1.1 christos // install prefix) for the list and documentation of 180 1.1 christos // possible values. Many values listen in lzma_ret 181 1.1 christos // enumeration aren't possible in this example, but 182 1.1 christos // can be made possible by enabling memory usage limit 183 1.1 christos // or adding flags to the decoder initialization. 184 1.1 christos const char *msg; 185 1.1 christos switch (ret) { 186 1.1 christos case LZMA_MEM_ERROR: 187 1.1 christos msg = "Memory allocation failed"; 188 1.1 christos break; 189 1.1 christos 190 1.1 christos case LZMA_FORMAT_ERROR: 191 1.1 christos // .xz magic bytes weren't found. 192 1.1 christos msg = "The input is not in the .xz format"; 193 1.1 christos break; 194 1.1 christos 195 1.1 christos case LZMA_OPTIONS_ERROR: 196 1.1 christos // For example, the headers specify a filter 197 1.1 christos // that isn't supported by this liblzma 198 1.1 christos // version (or it hasn't been enabled when 199 1.1 christos // building liblzma, but no-one sane does 200 1.1 christos // that unless building liblzma for an 201 1.1 christos // embedded system). Upgrading to a newer 202 1.1 christos // liblzma might help. 203 1.1 christos // 204 1.1 christos // Note that it is unlikely that the file has 205 1.1 christos // accidentally became corrupt if you get this 206 1.1 christos // error. The integrity of the .xz headers is 207 1.1 christos // always verified with a CRC32, so 208 1.1 christos // unintentionally corrupt files can be 209 1.1 christos // distinguished from unsupported files. 210 1.1 christos msg = "Unsupported compression options"; 211 1.1 christos break; 212 1.1 christos 213 1.1 christos case LZMA_DATA_ERROR: 214 1.1 christos msg = "Compressed file is corrupt"; 215 1.1 christos break; 216 1.1 christos 217 1.1 christos case LZMA_BUF_ERROR: 218 1.1 christos // Typically this error means that a valid 219 1.1 christos // file has got truncated, but it might also 220 1.1 christos // be a damaged part in the file that makes 221 1.1 christos // the decoder think the file is truncated. 222 1.1 christos // If you prefer, you can use the same error 223 1.1 christos // message for this as for LZMA_DATA_ERROR. 224 1.1 christos msg = "Compressed file is truncated or " 225 1.1 christos "otherwise corrupt"; 226 1.1 christos break; 227 1.1 christos 228 1.1 christos default: 229 1.1 christos // This is most likely LZMA_PROG_ERROR. 230 1.1 christos msg = "Unknown error, possibly a bug"; 231 1.1 christos break; 232 1.1 christos } 233 1.1 christos 234 1.1 christos fprintf(stderr, "%s: Decoder error: " 235 1.1 christos "%s (error code %u)\n", 236 1.1 christos inname, msg, ret); 237 1.1 christos return false; 238 1.1 christos } 239 1.1 christos } 240 1.1 christos } 241 1.1 christos 242 1.1 christos 243 1.1 christos extern int 244 1.1 christos main(int argc, char **argv) 245 1.1 christos { 246 1.1 christos if (argc <= 1) { 247 1.1 christos fprintf(stderr, "Usage: %s FILES...\n", argv[0]); 248 1.1 christos return EXIT_FAILURE; 249 1.1 christos } 250 1.1 christos 251 1.1 christos lzma_stream strm = LZMA_STREAM_INIT; 252 1.1 christos 253 1.1 christos bool success = true; 254 1.1 christos 255 1.1 christos // Try to decompress all files. 256 1.1 christos for (int i = 1; i < argc; ++i) { 257 1.1 christos if (!init_decoder(&strm)) { 258 1.1 christos // Decoder initialization failed. There's no point 259 1.1 christos // to retry it so we need to exit. 260 1.1 christos success = false; 261 1.1 christos break; 262 1.1 christos } 263 1.1 christos 264 1.1 christos FILE *infile = fopen(argv[i], "rb"); 265 1.1 christos 266 1.1 christos if (infile == NULL) { 267 1.1 christos fprintf(stderr, "%s: Error opening the " 268 1.1 christos "input file: %s\n", 269 1.1 christos argv[i], strerror(errno)); 270 1.1 christos success = false; 271 1.1 christos } else { 272 1.1 christos success &= decompress(&strm, argv[i], infile, stdout); 273 1.1 christos fclose(infile); 274 1.1 christos } 275 1.1 christos } 276 1.1 christos 277 1.1 christos // Free the memory allocated for the decoder. This only needs to be 278 1.1 christos // done after the last file. 279 1.1 christos lzma_end(&strm); 280 1.1 christos 281 1.1 christos if (fclose(stdout)) { 282 1.1 christos fprintf(stderr, "Write error: %s\n", strerror(errno)); 283 1.1 christos success = false; 284 1.1 christos } 285 1.1 christos 286 1.1 christos return success ? EXIT_SUCCESS : EXIT_FAILURE; 287 1.1 christos } 288