1 1.9 christos /* $NetBSD: unxz.c,v 1.9 2024/05/04 13:17:03 christos Exp $ */ 2 1.5 christos 3 1.5 christos /*- 4 1.5 christos * Copyright (c) 2011 The NetBSD Foundation, Inc. 5 1.5 christos * All rights reserved. 6 1.5 christos * 7 1.5 christos * This code is derived from software contributed to The NetBSD Foundation 8 1.5 christos * by Christos Zoulas. 9 1.5 christos * 10 1.5 christos * Redistribution and use in source and binary forms, with or without 11 1.5 christos * modification, are permitted provided that the following conditions 12 1.5 christos * are met: 13 1.5 christos * 1. Redistributions of source code must retain the above copyright 14 1.5 christos * notice, this list of conditions and the following disclaimer. 15 1.5 christos * 2. Redistributions in binary form must reproduce the above copyright 16 1.5 christos * notice, this list of conditions and the following disclaimer in the 17 1.5 christos * documentation and/or other materials provided with the distribution. 18 1.5 christos * 19 1.5 christos * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.5 christos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.5 christos * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.5 christos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.5 christos * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.5 christos * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.5 christos * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.5 christos * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.5 christos * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.5 christos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.5 christos * POSSIBILITY OF SUCH DAMAGE. 30 1.5 christos */ 31 1.5 christos #include <sys/cdefs.h> 32 1.9 christos __RCSID("$NetBSD: unxz.c,v 1.9 2024/05/04 13:17:03 christos Exp $"); 33 1.1 christos 34 1.1 christos #include <stdarg.h> 35 1.1 christos #include <errno.h> 36 1.1 christos #include <stdio.h> 37 1.1 christos #include <unistd.h> 38 1.1 christos #include <lzma.h> 39 1.1 christos 40 1.1 christos static off_t 41 1.1 christos unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in) 42 1.1 christos { 43 1.1 christos lzma_stream strm = LZMA_STREAM_INIT; 44 1.2 christos static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED; 45 1.1 christos lzma_ret ret; 46 1.3 christos lzma_action action = LZMA_RUN; 47 1.3 christos off_t bytes_out, bp; 48 1.1 christos uint8_t ibuf[BUFSIZ]; 49 1.1 christos uint8_t obuf[BUFSIZ]; 50 1.1 christos 51 1.3 christos if (bytes_in == NULL) 52 1.3 christos bytes_in = &bp; 53 1.3 christos 54 1.1 christos strm.next_in = ibuf; 55 1.2 christos memcpy(ibuf, pre, prelen); 56 1.1 christos strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen); 57 1.1 christos if (strm.avail_in == (size_t)-1) 58 1.3 christos maybe_err("read failed"); 59 1.7 mrg infile_newdata(strm.avail_in); 60 1.3 christos strm.avail_in += prelen; 61 1.3 christos *bytes_in = strm.avail_in; 62 1.1 christos 63 1.2 christos if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK) 64 1.2 christos maybe_errx("Can't initialize decoder (%d)", ret); 65 1.2 christos 66 1.2 christos strm.next_out = NULL; 67 1.2 christos strm.avail_out = 0; 68 1.2 christos if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK) 69 1.2 christos maybe_errx("Can't read headers (%d)", ret); 70 1.1 christos 71 1.3 christos bytes_out = 0; 72 1.1 christos strm.next_out = obuf; 73 1.1 christos strm.avail_out = sizeof(obuf); 74 1.1 christos 75 1.1 christos for (;;) { 76 1.7 mrg check_siginfo(); 77 1.1 christos if (strm.avail_in == 0) { 78 1.1 christos strm.next_in = ibuf; 79 1.1 christos strm.avail_in = read(i, ibuf, sizeof(ibuf)); 80 1.3 christos switch (strm.avail_in) { 81 1.3 christos case (size_t)-1: 82 1.3 christos maybe_err("read failed"); 83 1.3 christos /*NOTREACHED*/ 84 1.3 christos case 0: 85 1.3 christos action = LZMA_FINISH; 86 1.3 christos break; 87 1.3 christos default: 88 1.7 mrg infile_newdata(strm.avail_in); 89 1.3 christos *bytes_in += strm.avail_in; 90 1.3 christos break; 91 1.3 christos } 92 1.1 christos } 93 1.1 christos 94 1.3 christos ret = lzma_code(&strm, action); 95 1.1 christos 96 1.1 christos // Write and check write error before checking decoder error. 97 1.1 christos // This way as much data as possible gets written to output 98 1.1 christos // even if decoder detected an error. 99 1.1 christos if (strm.avail_out == 0 || ret != LZMA_OK) { 100 1.1 christos const size_t write_size = sizeof(obuf) - strm.avail_out; 101 1.1 christos 102 1.9 christos if (!tflag && 103 1.9 christos write(o, obuf, write_size) != (ssize_t)write_size) 104 1.1 christos maybe_err("write failed"); 105 1.1 christos 106 1.1 christos strm.next_out = obuf; 107 1.1 christos strm.avail_out = sizeof(obuf); 108 1.3 christos bytes_out += write_size; 109 1.1 christos } 110 1.1 christos 111 1.1 christos if (ret != LZMA_OK) { 112 1.1 christos if (ret == LZMA_STREAM_END) { 113 1.1 christos // Check that there's no trailing garbage. 114 1.1 christos if (strm.avail_in != 0 || read(i, ibuf, 1)) 115 1.1 christos ret = LZMA_DATA_ERROR; 116 1.1 christos else { 117 1.1 christos lzma_end(&strm); 118 1.3 christos return bytes_out; 119 1.1 christos } 120 1.1 christos } 121 1.1 christos 122 1.1 christos const char *msg; 123 1.1 christos switch (ret) { 124 1.1 christos case LZMA_MEM_ERROR: 125 1.1 christos msg = strerror(ENOMEM); 126 1.1 christos break; 127 1.1 christos 128 1.1 christos case LZMA_FORMAT_ERROR: 129 1.1 christos msg = "File format not recognized"; 130 1.1 christos break; 131 1.1 christos 132 1.1 christos case LZMA_OPTIONS_ERROR: 133 1.1 christos // FIXME: Better message? 134 1.1 christos msg = "Unsupported compression options"; 135 1.1 christos break; 136 1.1 christos 137 1.1 christos case LZMA_DATA_ERROR: 138 1.1 christos msg = "File is corrupt"; 139 1.1 christos break; 140 1.1 christos 141 1.1 christos case LZMA_BUF_ERROR: 142 1.1 christos msg = "Unexpected end of input"; 143 1.1 christos break; 144 1.1 christos 145 1.1 christos case LZMA_MEMLIMIT_ERROR: 146 1.1 christos msg = "Reached memory limit"; 147 1.1 christos break; 148 1.1 christos 149 1.1 christos default: 150 1.4 christos maybe_errx("Unknown error (%d)", ret); 151 1.1 christos break; 152 1.1 christos } 153 1.4 christos maybe_errx("%s", msg); 154 1.1 christos 155 1.1 christos } 156 1.1 christos } 157 1.1 christos } 158 1.8 martin 159 1.8 martin #include <stdbool.h> 160 1.8 martin 161 1.8 martin /* 162 1.8 martin * Copied various bits and pieces from xz support code or brute force 163 1.8 martin * replacements. 164 1.8 martin */ 165 1.8 martin 166 1.8 martin #define my_min(A,B) ((A)<(B)?(A):(B)) 167 1.8 martin 168 1.8 martin // Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. 169 1.8 martin // We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t)) 170 1.8 martin #if BUFSIZ <= 1024 171 1.8 martin # define IO_BUFFER_SIZE 8192 172 1.8 martin #else 173 1.8 martin # define IO_BUFFER_SIZE (BUFSIZ & ~7U) 174 1.8 martin #endif 175 1.8 martin 176 1.8 martin /// is_sparse() accesses the buffer as uint64_t for maximum speed. 177 1.8 martin /// Use an union to make sure that the buffer is properly aligned. 178 1.8 martin typedef union { 179 1.8 martin uint8_t u8[IO_BUFFER_SIZE]; 180 1.8 martin uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)]; 181 1.8 martin uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)]; 182 1.8 martin } io_buf; 183 1.8 martin 184 1.8 martin 185 1.8 martin static bool 186 1.8 martin io_pread(int fd, io_buf *buf, size_t size, off_t pos) 187 1.8 martin { 188 1.8 martin // Using lseek() and read() is more portable than pread() and 189 1.8 martin // for us it is as good as real pread(). 190 1.8 martin if (lseek(fd, pos, SEEK_SET) != pos) { 191 1.8 martin return true; 192 1.8 martin } 193 1.8 martin 194 1.8 martin const size_t amount = read(fd, buf, size); 195 1.8 martin if (amount == SIZE_MAX) 196 1.8 martin return true; 197 1.8 martin 198 1.8 martin if (amount != size) { 199 1.8 martin return true; 200 1.8 martin } 201 1.8 martin 202 1.8 martin return false; 203 1.8 martin } 204 1.8 martin 205 1.8 martin /* 206 1.8 martin * Most of the following is copied (mostly verbatim) from the xz 207 1.8 martin * distribution, from file src/xz/list.c 208 1.8 martin */ 209 1.8 martin 210 1.8 martin /////////////////////////////////////////////////////////////////////////////// 211 1.8 martin // 212 1.8 martin /// \file list.c 213 1.8 martin /// \brief Listing information about .xz files 214 1.8 martin // 215 1.8 martin // Author: Lasse Collin 216 1.8 martin // 217 1.8 martin // This file has been put into the public domain. 218 1.8 martin // You can do whatever you want with this file. 219 1.8 martin // 220 1.8 martin /////////////////////////////////////////////////////////////////////////////// 221 1.8 martin 222 1.8 martin 223 1.8 martin /// Information about a .xz file 224 1.8 martin typedef struct { 225 1.8 martin /// Combined Index of all Streams in the file 226 1.8 martin lzma_index *idx; 227 1.8 martin 228 1.8 martin /// Total amount of Stream Padding 229 1.8 martin uint64_t stream_padding; 230 1.8 martin 231 1.8 martin /// Highest memory usage so far 232 1.8 martin uint64_t memusage_max; 233 1.8 martin 234 1.8 martin /// True if all Blocks so far have Compressed Size and 235 1.8 martin /// Uncompressed Size fields 236 1.8 martin bool all_have_sizes; 237 1.8 martin 238 1.8 martin /// Oldest XZ Utils version that will decompress the file 239 1.8 martin uint32_t min_version; 240 1.8 martin 241 1.8 martin } xz_file_info; 242 1.8 martin 243 1.8 martin #define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 } 244 1.8 martin 245 1.8 martin 246 1.8 martin /// \brief Parse the Index(es) from the given .xz file 247 1.8 martin /// 248 1.8 martin /// \param xfi Pointer to structure where the decoded information 249 1.8 martin /// is stored. 250 1.8 martin /// \param pair Input file 251 1.8 martin /// 252 1.8 martin /// \return On success, false is returned. On error, true is returned. 253 1.8 martin /// 254 1.8 martin // TODO: This function is pretty big. liblzma should have a function that 255 1.8 martin // takes a callback function to parse the Index(es) from a .xz file to make 256 1.8 martin // it easy for applications. 257 1.8 martin static bool 258 1.8 martin parse_indexes(xz_file_info *xfi, int src_fd) 259 1.8 martin { 260 1.8 martin struct stat st; 261 1.8 martin 262 1.8 martin fstat(src_fd, &st); 263 1.8 martin if (st.st_size <= 0) { 264 1.8 martin return true; 265 1.8 martin } 266 1.8 martin 267 1.8 martin if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { 268 1.8 martin return true; 269 1.8 martin } 270 1.8 martin 271 1.8 martin io_buf buf; 272 1.8 martin lzma_stream_flags header_flags; 273 1.8 martin lzma_stream_flags footer_flags; 274 1.8 martin lzma_ret ret; 275 1.8 martin 276 1.8 martin // lzma_stream for the Index decoder 277 1.8 martin lzma_stream strm = LZMA_STREAM_INIT; 278 1.8 martin 279 1.8 martin // All Indexes decoded so far 280 1.8 martin lzma_index *combined_index = NULL; 281 1.8 martin 282 1.8 martin // The Index currently being decoded 283 1.8 martin lzma_index *this_index = NULL; 284 1.8 martin 285 1.8 martin // Current position in the file. We parse the file backwards so 286 1.8 martin // initialize it to point to the end of the file. 287 1.8 martin off_t pos = st.st_size; 288 1.8 martin 289 1.8 martin // Each loop iteration decodes one Index. 290 1.8 martin do { 291 1.8 martin // Check that there is enough data left to contain at least 292 1.8 martin // the Stream Header and Stream Footer. This check cannot 293 1.8 martin // fail in the first pass of this loop. 294 1.8 martin if (pos < 2 * LZMA_STREAM_HEADER_SIZE) { 295 1.8 martin goto error; 296 1.8 martin } 297 1.8 martin 298 1.8 martin pos -= LZMA_STREAM_HEADER_SIZE; 299 1.8 martin lzma_vli stream_padding = 0; 300 1.8 martin 301 1.8 martin // Locate the Stream Footer. There may be Stream Padding which 302 1.8 martin // we must skip when reading backwards. 303 1.8 martin while (true) { 304 1.8 martin if (pos < LZMA_STREAM_HEADER_SIZE) { 305 1.8 martin goto error; 306 1.8 martin } 307 1.8 martin 308 1.8 martin if (io_pread(src_fd, &buf, 309 1.8 martin LZMA_STREAM_HEADER_SIZE, pos)) 310 1.8 martin goto error; 311 1.8 martin 312 1.8 martin // Stream Padding is always a multiple of four bytes. 313 1.8 martin int i = 2; 314 1.8 martin if (buf.u32[i] != 0) 315 1.8 martin break; 316 1.8 martin 317 1.8 martin // To avoid calling io_pread() for every four bytes 318 1.8 martin // of Stream Padding, take advantage that we read 319 1.8 martin // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and 320 1.8 martin // check them too before calling io_pread() again. 321 1.8 martin do { 322 1.8 martin stream_padding += 4; 323 1.8 martin pos -= 4; 324 1.8 martin --i; 325 1.8 martin } while (i >= 0 && buf.u32[i] == 0); 326 1.8 martin } 327 1.8 martin 328 1.8 martin // Decode the Stream Footer. 329 1.8 martin ret = lzma_stream_footer_decode(&footer_flags, buf.u8); 330 1.8 martin if (ret != LZMA_OK) { 331 1.8 martin goto error; 332 1.8 martin } 333 1.8 martin 334 1.8 martin // Check that the Stream Footer doesn't specify something 335 1.8 martin // that we don't support. This can only happen if the xz 336 1.8 martin // version is older than liblzma and liblzma supports 337 1.8 martin // something new. 338 1.8 martin // 339 1.8 martin // It is enough to check Stream Footer. Stream Header must 340 1.8 martin // match when it is compared against Stream Footer with 341 1.8 martin // lzma_stream_flags_compare(). 342 1.8 martin if (footer_flags.version != 0) { 343 1.8 martin goto error; 344 1.8 martin } 345 1.8 martin 346 1.8 martin // Check that the size of the Index field looks sane. 347 1.8 martin lzma_vli index_size = footer_flags.backward_size; 348 1.8 martin if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) { 349 1.8 martin goto error; 350 1.8 martin } 351 1.8 martin 352 1.8 martin // Set pos to the beginning of the Index. 353 1.8 martin pos -= index_size; 354 1.8 martin 355 1.8 martin // Decode the Index. 356 1.8 martin ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX); 357 1.8 martin if (ret != LZMA_OK) { 358 1.8 martin goto error; 359 1.8 martin } 360 1.8 martin 361 1.8 martin do { 362 1.8 martin // Don't give the decoder more input than the 363 1.8 martin // Index size. 364 1.8 martin strm.avail_in = my_min(IO_BUFFER_SIZE, index_size); 365 1.8 martin if (io_pread(src_fd, &buf, strm.avail_in, pos)) 366 1.8 martin goto error; 367 1.8 martin 368 1.8 martin pos += strm.avail_in; 369 1.8 martin index_size -= strm.avail_in; 370 1.8 martin 371 1.8 martin strm.next_in = buf.u8; 372 1.8 martin ret = lzma_code(&strm, LZMA_RUN); 373 1.8 martin 374 1.8 martin } while (ret == LZMA_OK); 375 1.8 martin 376 1.8 martin // If the decoding seems to be successful, check also that 377 1.8 martin // the Index decoder consumed as much input as indicated 378 1.8 martin // by the Backward Size field. 379 1.8 martin if (ret == LZMA_STREAM_END) 380 1.8 martin if (index_size != 0 || strm.avail_in != 0) 381 1.8 martin ret = LZMA_DATA_ERROR; 382 1.8 martin 383 1.8 martin if (ret != LZMA_STREAM_END) { 384 1.8 martin // LZMA_BUFFER_ERROR means that the Index decoder 385 1.8 martin // would have liked more input than what the Index 386 1.8 martin // size should be according to Stream Footer. 387 1.8 martin // The message for LZMA_DATA_ERROR makes more 388 1.8 martin // sense in that case. 389 1.8 martin if (ret == LZMA_BUF_ERROR) 390 1.8 martin ret = LZMA_DATA_ERROR; 391 1.8 martin 392 1.8 martin goto error; 393 1.8 martin } 394 1.8 martin 395 1.8 martin // Decode the Stream Header and check that its Stream Flags 396 1.8 martin // match the Stream Footer. 397 1.8 martin pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE; 398 1.8 martin if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) { 399 1.8 martin goto error; 400 1.8 martin } 401 1.8 martin 402 1.8 martin pos -= lzma_index_total_size(this_index); 403 1.8 martin if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos)) 404 1.8 martin goto error; 405 1.8 martin 406 1.8 martin ret = lzma_stream_header_decode(&header_flags, buf.u8); 407 1.8 martin if (ret != LZMA_OK) { 408 1.8 martin goto error; 409 1.8 martin } 410 1.8 martin 411 1.8 martin ret = lzma_stream_flags_compare(&header_flags, &footer_flags); 412 1.8 martin if (ret != LZMA_OK) { 413 1.8 martin goto error; 414 1.8 martin } 415 1.8 martin 416 1.8 martin // Store the decoded Stream Flags into this_index. This is 417 1.8 martin // needed so that we can print which Check is used in each 418 1.8 martin // Stream. 419 1.8 martin ret = lzma_index_stream_flags(this_index, &footer_flags); 420 1.8 martin if (ret != LZMA_OK) 421 1.8 martin goto error; 422 1.8 martin 423 1.8 martin // Store also the size of the Stream Padding field. It is 424 1.8 martin // needed to show the offsets of the Streams correctly. 425 1.8 martin ret = lzma_index_stream_padding(this_index, stream_padding); 426 1.8 martin if (ret != LZMA_OK) 427 1.8 martin goto error; 428 1.8 martin 429 1.8 martin if (combined_index != NULL) { 430 1.8 martin // Append the earlier decoded Indexes 431 1.8 martin // after this_index. 432 1.8 martin ret = lzma_index_cat( 433 1.8 martin this_index, combined_index, NULL); 434 1.8 martin if (ret != LZMA_OK) { 435 1.8 martin goto error; 436 1.8 martin } 437 1.8 martin } 438 1.8 martin 439 1.8 martin combined_index = this_index; 440 1.8 martin this_index = NULL; 441 1.8 martin 442 1.8 martin xfi->stream_padding += stream_padding; 443 1.8 martin 444 1.8 martin } while (pos > 0); 445 1.8 martin 446 1.8 martin lzma_end(&strm); 447 1.8 martin 448 1.8 martin // All OK. Make combined_index available to the caller. 449 1.8 martin xfi->idx = combined_index; 450 1.8 martin return false; 451 1.8 martin 452 1.8 martin error: 453 1.8 martin // Something went wrong, free the allocated memory. 454 1.8 martin lzma_end(&strm); 455 1.8 martin lzma_index_end(combined_index, NULL); 456 1.8 martin lzma_index_end(this_index, NULL); 457 1.8 martin return true; 458 1.8 martin } 459 1.8 martin 460 1.8 martin /***************** end of copy form list.c *************************/ 461 1.8 martin 462 1.8 martin /* 463 1.8 martin * Small wrapper to extract total length of a file 464 1.8 martin */ 465 1.8 martin off_t 466 1.8 martin unxz_len(int fd) 467 1.8 martin { 468 1.8 martin xz_file_info xfi = XZ_FILE_INFO_INIT; 469 1.8 martin if (!parse_indexes(&xfi, fd)) { 470 1.8 martin off_t res = lzma_index_uncompressed_size(xfi.idx); 471 1.8 martin lzma_index_end(xfi.idx, NULL); 472 1.8 martin return res; 473 1.8 martin } 474 1.8 martin return 0; 475 1.8 martin } 476 1.8 martin 477