unxz.c revision 1.9 1 1.9 christos /* $NetBSD: unxz.c,v 1.9 2024/05/04 13:17:03 christos Exp $ */
2 1.5 christos
3 1.5 christos /*-
4 1.5 christos * Copyright (c) 2011 The NetBSD Foundation, Inc.
5 1.5 christos * All rights reserved.
6 1.5 christos *
7 1.5 christos * This code is derived from software contributed to The NetBSD Foundation
8 1.5 christos * by Christos Zoulas.
9 1.5 christos *
10 1.5 christos * Redistribution and use in source and binary forms, with or without
11 1.5 christos * modification, are permitted provided that the following conditions
12 1.5 christos * are met:
13 1.5 christos * 1. Redistributions of source code must retain the above copyright
14 1.5 christos * notice, this list of conditions and the following disclaimer.
15 1.5 christos * 2. Redistributions in binary form must reproduce the above copyright
16 1.5 christos * notice, this list of conditions and the following disclaimer in the
17 1.5 christos * documentation and/or other materials provided with the distribution.
18 1.5 christos *
19 1.5 christos * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.5 christos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.5 christos * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.5 christos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.5 christos * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.5 christos * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.5 christos * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.5 christos * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.5 christos * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.5 christos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.5 christos * POSSIBILITY OF SUCH DAMAGE.
30 1.5 christos */
31 1.5 christos #include <sys/cdefs.h>
32 1.9 christos __RCSID("$NetBSD: unxz.c,v 1.9 2024/05/04 13:17:03 christos Exp $");
33 1.1 christos
34 1.1 christos #include <stdarg.h>
35 1.1 christos #include <errno.h>
36 1.1 christos #include <stdio.h>
37 1.1 christos #include <unistd.h>
38 1.1 christos #include <lzma.h>
39 1.1 christos
40 1.1 christos static off_t
41 1.1 christos unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in)
42 1.1 christos {
43 1.1 christos lzma_stream strm = LZMA_STREAM_INIT;
44 1.2 christos static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED;
45 1.1 christos lzma_ret ret;
46 1.3 christos lzma_action action = LZMA_RUN;
47 1.3 christos off_t bytes_out, bp;
48 1.1 christos uint8_t ibuf[BUFSIZ];
49 1.1 christos uint8_t obuf[BUFSIZ];
50 1.1 christos
51 1.3 christos if (bytes_in == NULL)
52 1.3 christos bytes_in = &bp;
53 1.3 christos
54 1.1 christos strm.next_in = ibuf;
55 1.2 christos memcpy(ibuf, pre, prelen);
56 1.1 christos strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen);
57 1.1 christos if (strm.avail_in == (size_t)-1)
58 1.3 christos maybe_err("read failed");
59 1.7 mrg infile_newdata(strm.avail_in);
60 1.3 christos strm.avail_in += prelen;
61 1.3 christos *bytes_in = strm.avail_in;
62 1.1 christos
63 1.2 christos if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK)
64 1.2 christos maybe_errx("Can't initialize decoder (%d)", ret);
65 1.2 christos
66 1.2 christos strm.next_out = NULL;
67 1.2 christos strm.avail_out = 0;
68 1.2 christos if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK)
69 1.2 christos maybe_errx("Can't read headers (%d)", ret);
70 1.1 christos
71 1.3 christos bytes_out = 0;
72 1.1 christos strm.next_out = obuf;
73 1.1 christos strm.avail_out = sizeof(obuf);
74 1.1 christos
75 1.1 christos for (;;) {
76 1.7 mrg check_siginfo();
77 1.1 christos if (strm.avail_in == 0) {
78 1.1 christos strm.next_in = ibuf;
79 1.1 christos strm.avail_in = read(i, ibuf, sizeof(ibuf));
80 1.3 christos switch (strm.avail_in) {
81 1.3 christos case (size_t)-1:
82 1.3 christos maybe_err("read failed");
83 1.3 christos /*NOTREACHED*/
84 1.3 christos case 0:
85 1.3 christos action = LZMA_FINISH;
86 1.3 christos break;
87 1.3 christos default:
88 1.7 mrg infile_newdata(strm.avail_in);
89 1.3 christos *bytes_in += strm.avail_in;
90 1.3 christos break;
91 1.3 christos }
92 1.1 christos }
93 1.1 christos
94 1.3 christos ret = lzma_code(&strm, action);
95 1.1 christos
96 1.1 christos // Write and check write error before checking decoder error.
97 1.1 christos // This way as much data as possible gets written to output
98 1.1 christos // even if decoder detected an error.
99 1.1 christos if (strm.avail_out == 0 || ret != LZMA_OK) {
100 1.1 christos const size_t write_size = sizeof(obuf) - strm.avail_out;
101 1.1 christos
102 1.9 christos if (!tflag &&
103 1.9 christos write(o, obuf, write_size) != (ssize_t)write_size)
104 1.1 christos maybe_err("write failed");
105 1.1 christos
106 1.1 christos strm.next_out = obuf;
107 1.1 christos strm.avail_out = sizeof(obuf);
108 1.3 christos bytes_out += write_size;
109 1.1 christos }
110 1.1 christos
111 1.1 christos if (ret != LZMA_OK) {
112 1.1 christos if (ret == LZMA_STREAM_END) {
113 1.1 christos // Check that there's no trailing garbage.
114 1.1 christos if (strm.avail_in != 0 || read(i, ibuf, 1))
115 1.1 christos ret = LZMA_DATA_ERROR;
116 1.1 christos else {
117 1.1 christos lzma_end(&strm);
118 1.3 christos return bytes_out;
119 1.1 christos }
120 1.1 christos }
121 1.1 christos
122 1.1 christos const char *msg;
123 1.1 christos switch (ret) {
124 1.1 christos case LZMA_MEM_ERROR:
125 1.1 christos msg = strerror(ENOMEM);
126 1.1 christos break;
127 1.1 christos
128 1.1 christos case LZMA_FORMAT_ERROR:
129 1.1 christos msg = "File format not recognized";
130 1.1 christos break;
131 1.1 christos
132 1.1 christos case LZMA_OPTIONS_ERROR:
133 1.1 christos // FIXME: Better message?
134 1.1 christos msg = "Unsupported compression options";
135 1.1 christos break;
136 1.1 christos
137 1.1 christos case LZMA_DATA_ERROR:
138 1.1 christos msg = "File is corrupt";
139 1.1 christos break;
140 1.1 christos
141 1.1 christos case LZMA_BUF_ERROR:
142 1.1 christos msg = "Unexpected end of input";
143 1.1 christos break;
144 1.1 christos
145 1.1 christos case LZMA_MEMLIMIT_ERROR:
146 1.1 christos msg = "Reached memory limit";
147 1.1 christos break;
148 1.1 christos
149 1.1 christos default:
150 1.4 christos maybe_errx("Unknown error (%d)", ret);
151 1.1 christos break;
152 1.1 christos }
153 1.4 christos maybe_errx("%s", msg);
154 1.1 christos
155 1.1 christos }
156 1.1 christos }
157 1.1 christos }
158 1.8 martin
159 1.8 martin #include <stdbool.h>
160 1.8 martin
161 1.8 martin /*
162 1.8 martin * Copied various bits and pieces from xz support code or brute force
163 1.8 martin * replacements.
164 1.8 martin */
165 1.8 martin
166 1.8 martin #define my_min(A,B) ((A)<(B)?(A):(B))
167 1.8 martin
168 1.8 martin // Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
169 1.8 martin // We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
170 1.8 martin #if BUFSIZ <= 1024
171 1.8 martin # define IO_BUFFER_SIZE 8192
172 1.8 martin #else
173 1.8 martin # define IO_BUFFER_SIZE (BUFSIZ & ~7U)
174 1.8 martin #endif
175 1.8 martin
176 1.8 martin /// is_sparse() accesses the buffer as uint64_t for maximum speed.
177 1.8 martin /// Use an union to make sure that the buffer is properly aligned.
178 1.8 martin typedef union {
179 1.8 martin uint8_t u8[IO_BUFFER_SIZE];
180 1.8 martin uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
181 1.8 martin uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
182 1.8 martin } io_buf;
183 1.8 martin
184 1.8 martin
185 1.8 martin static bool
186 1.8 martin io_pread(int fd, io_buf *buf, size_t size, off_t pos)
187 1.8 martin {
188 1.8 martin // Using lseek() and read() is more portable than pread() and
189 1.8 martin // for us it is as good as real pread().
190 1.8 martin if (lseek(fd, pos, SEEK_SET) != pos) {
191 1.8 martin return true;
192 1.8 martin }
193 1.8 martin
194 1.8 martin const size_t amount = read(fd, buf, size);
195 1.8 martin if (amount == SIZE_MAX)
196 1.8 martin return true;
197 1.8 martin
198 1.8 martin if (amount != size) {
199 1.8 martin return true;
200 1.8 martin }
201 1.8 martin
202 1.8 martin return false;
203 1.8 martin }
204 1.8 martin
205 1.8 martin /*
206 1.8 martin * Most of the following is copied (mostly verbatim) from the xz
207 1.8 martin * distribution, from file src/xz/list.c
208 1.8 martin */
209 1.8 martin
210 1.8 martin ///////////////////////////////////////////////////////////////////////////////
211 1.8 martin //
212 1.8 martin /// \file list.c
213 1.8 martin /// \brief Listing information about .xz files
214 1.8 martin //
215 1.8 martin // Author: Lasse Collin
216 1.8 martin //
217 1.8 martin // This file has been put into the public domain.
218 1.8 martin // You can do whatever you want with this file.
219 1.8 martin //
220 1.8 martin ///////////////////////////////////////////////////////////////////////////////
221 1.8 martin
222 1.8 martin
223 1.8 martin /// Information about a .xz file
224 1.8 martin typedef struct {
225 1.8 martin /// Combined Index of all Streams in the file
226 1.8 martin lzma_index *idx;
227 1.8 martin
228 1.8 martin /// Total amount of Stream Padding
229 1.8 martin uint64_t stream_padding;
230 1.8 martin
231 1.8 martin /// Highest memory usage so far
232 1.8 martin uint64_t memusage_max;
233 1.8 martin
234 1.8 martin /// True if all Blocks so far have Compressed Size and
235 1.8 martin /// Uncompressed Size fields
236 1.8 martin bool all_have_sizes;
237 1.8 martin
238 1.8 martin /// Oldest XZ Utils version that will decompress the file
239 1.8 martin uint32_t min_version;
240 1.8 martin
241 1.8 martin } xz_file_info;
242 1.8 martin
243 1.8 martin #define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
244 1.8 martin
245 1.8 martin
246 1.8 martin /// \brief Parse the Index(es) from the given .xz file
247 1.8 martin ///
248 1.8 martin /// \param xfi Pointer to structure where the decoded information
249 1.8 martin /// is stored.
250 1.8 martin /// \param pair Input file
251 1.8 martin ///
252 1.8 martin /// \return On success, false is returned. On error, true is returned.
253 1.8 martin ///
254 1.8 martin // TODO: This function is pretty big. liblzma should have a function that
255 1.8 martin // takes a callback function to parse the Index(es) from a .xz file to make
256 1.8 martin // it easy for applications.
257 1.8 martin static bool
258 1.8 martin parse_indexes(xz_file_info *xfi, int src_fd)
259 1.8 martin {
260 1.8 martin struct stat st;
261 1.8 martin
262 1.8 martin fstat(src_fd, &st);
263 1.8 martin if (st.st_size <= 0) {
264 1.8 martin return true;
265 1.8 martin }
266 1.8 martin
267 1.8 martin if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
268 1.8 martin return true;
269 1.8 martin }
270 1.8 martin
271 1.8 martin io_buf buf;
272 1.8 martin lzma_stream_flags header_flags;
273 1.8 martin lzma_stream_flags footer_flags;
274 1.8 martin lzma_ret ret;
275 1.8 martin
276 1.8 martin // lzma_stream for the Index decoder
277 1.8 martin lzma_stream strm = LZMA_STREAM_INIT;
278 1.8 martin
279 1.8 martin // All Indexes decoded so far
280 1.8 martin lzma_index *combined_index = NULL;
281 1.8 martin
282 1.8 martin // The Index currently being decoded
283 1.8 martin lzma_index *this_index = NULL;
284 1.8 martin
285 1.8 martin // Current position in the file. We parse the file backwards so
286 1.8 martin // initialize it to point to the end of the file.
287 1.8 martin off_t pos = st.st_size;
288 1.8 martin
289 1.8 martin // Each loop iteration decodes one Index.
290 1.8 martin do {
291 1.8 martin // Check that there is enough data left to contain at least
292 1.8 martin // the Stream Header and Stream Footer. This check cannot
293 1.8 martin // fail in the first pass of this loop.
294 1.8 martin if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
295 1.8 martin goto error;
296 1.8 martin }
297 1.8 martin
298 1.8 martin pos -= LZMA_STREAM_HEADER_SIZE;
299 1.8 martin lzma_vli stream_padding = 0;
300 1.8 martin
301 1.8 martin // Locate the Stream Footer. There may be Stream Padding which
302 1.8 martin // we must skip when reading backwards.
303 1.8 martin while (true) {
304 1.8 martin if (pos < LZMA_STREAM_HEADER_SIZE) {
305 1.8 martin goto error;
306 1.8 martin }
307 1.8 martin
308 1.8 martin if (io_pread(src_fd, &buf,
309 1.8 martin LZMA_STREAM_HEADER_SIZE, pos))
310 1.8 martin goto error;
311 1.8 martin
312 1.8 martin // Stream Padding is always a multiple of four bytes.
313 1.8 martin int i = 2;
314 1.8 martin if (buf.u32[i] != 0)
315 1.8 martin break;
316 1.8 martin
317 1.8 martin // To avoid calling io_pread() for every four bytes
318 1.8 martin // of Stream Padding, take advantage that we read
319 1.8 martin // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
320 1.8 martin // check them too before calling io_pread() again.
321 1.8 martin do {
322 1.8 martin stream_padding += 4;
323 1.8 martin pos -= 4;
324 1.8 martin --i;
325 1.8 martin } while (i >= 0 && buf.u32[i] == 0);
326 1.8 martin }
327 1.8 martin
328 1.8 martin // Decode the Stream Footer.
329 1.8 martin ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
330 1.8 martin if (ret != LZMA_OK) {
331 1.8 martin goto error;
332 1.8 martin }
333 1.8 martin
334 1.8 martin // Check that the Stream Footer doesn't specify something
335 1.8 martin // that we don't support. This can only happen if the xz
336 1.8 martin // version is older than liblzma and liblzma supports
337 1.8 martin // something new.
338 1.8 martin //
339 1.8 martin // It is enough to check Stream Footer. Stream Header must
340 1.8 martin // match when it is compared against Stream Footer with
341 1.8 martin // lzma_stream_flags_compare().
342 1.8 martin if (footer_flags.version != 0) {
343 1.8 martin goto error;
344 1.8 martin }
345 1.8 martin
346 1.8 martin // Check that the size of the Index field looks sane.
347 1.8 martin lzma_vli index_size = footer_flags.backward_size;
348 1.8 martin if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
349 1.8 martin goto error;
350 1.8 martin }
351 1.8 martin
352 1.8 martin // Set pos to the beginning of the Index.
353 1.8 martin pos -= index_size;
354 1.8 martin
355 1.8 martin // Decode the Index.
356 1.8 martin ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX);
357 1.8 martin if (ret != LZMA_OK) {
358 1.8 martin goto error;
359 1.8 martin }
360 1.8 martin
361 1.8 martin do {
362 1.8 martin // Don't give the decoder more input than the
363 1.8 martin // Index size.
364 1.8 martin strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
365 1.8 martin if (io_pread(src_fd, &buf, strm.avail_in, pos))
366 1.8 martin goto error;
367 1.8 martin
368 1.8 martin pos += strm.avail_in;
369 1.8 martin index_size -= strm.avail_in;
370 1.8 martin
371 1.8 martin strm.next_in = buf.u8;
372 1.8 martin ret = lzma_code(&strm, LZMA_RUN);
373 1.8 martin
374 1.8 martin } while (ret == LZMA_OK);
375 1.8 martin
376 1.8 martin // If the decoding seems to be successful, check also that
377 1.8 martin // the Index decoder consumed as much input as indicated
378 1.8 martin // by the Backward Size field.
379 1.8 martin if (ret == LZMA_STREAM_END)
380 1.8 martin if (index_size != 0 || strm.avail_in != 0)
381 1.8 martin ret = LZMA_DATA_ERROR;
382 1.8 martin
383 1.8 martin if (ret != LZMA_STREAM_END) {
384 1.8 martin // LZMA_BUFFER_ERROR means that the Index decoder
385 1.8 martin // would have liked more input than what the Index
386 1.8 martin // size should be according to Stream Footer.
387 1.8 martin // The message for LZMA_DATA_ERROR makes more
388 1.8 martin // sense in that case.
389 1.8 martin if (ret == LZMA_BUF_ERROR)
390 1.8 martin ret = LZMA_DATA_ERROR;
391 1.8 martin
392 1.8 martin goto error;
393 1.8 martin }
394 1.8 martin
395 1.8 martin // Decode the Stream Header and check that its Stream Flags
396 1.8 martin // match the Stream Footer.
397 1.8 martin pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
398 1.8 martin if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
399 1.8 martin goto error;
400 1.8 martin }
401 1.8 martin
402 1.8 martin pos -= lzma_index_total_size(this_index);
403 1.8 martin if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos))
404 1.8 martin goto error;
405 1.8 martin
406 1.8 martin ret = lzma_stream_header_decode(&header_flags, buf.u8);
407 1.8 martin if (ret != LZMA_OK) {
408 1.8 martin goto error;
409 1.8 martin }
410 1.8 martin
411 1.8 martin ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
412 1.8 martin if (ret != LZMA_OK) {
413 1.8 martin goto error;
414 1.8 martin }
415 1.8 martin
416 1.8 martin // Store the decoded Stream Flags into this_index. This is
417 1.8 martin // needed so that we can print which Check is used in each
418 1.8 martin // Stream.
419 1.8 martin ret = lzma_index_stream_flags(this_index, &footer_flags);
420 1.8 martin if (ret != LZMA_OK)
421 1.8 martin goto error;
422 1.8 martin
423 1.8 martin // Store also the size of the Stream Padding field. It is
424 1.8 martin // needed to show the offsets of the Streams correctly.
425 1.8 martin ret = lzma_index_stream_padding(this_index, stream_padding);
426 1.8 martin if (ret != LZMA_OK)
427 1.8 martin goto error;
428 1.8 martin
429 1.8 martin if (combined_index != NULL) {
430 1.8 martin // Append the earlier decoded Indexes
431 1.8 martin // after this_index.
432 1.8 martin ret = lzma_index_cat(
433 1.8 martin this_index, combined_index, NULL);
434 1.8 martin if (ret != LZMA_OK) {
435 1.8 martin goto error;
436 1.8 martin }
437 1.8 martin }
438 1.8 martin
439 1.8 martin combined_index = this_index;
440 1.8 martin this_index = NULL;
441 1.8 martin
442 1.8 martin xfi->stream_padding += stream_padding;
443 1.8 martin
444 1.8 martin } while (pos > 0);
445 1.8 martin
446 1.8 martin lzma_end(&strm);
447 1.8 martin
448 1.8 martin // All OK. Make combined_index available to the caller.
449 1.8 martin xfi->idx = combined_index;
450 1.8 martin return false;
451 1.8 martin
452 1.8 martin error:
453 1.8 martin // Something went wrong, free the allocated memory.
454 1.8 martin lzma_end(&strm);
455 1.8 martin lzma_index_end(combined_index, NULL);
456 1.8 martin lzma_index_end(this_index, NULL);
457 1.8 martin return true;
458 1.8 martin }
459 1.8 martin
460 1.8 martin /***************** end of copy form list.c *************************/
461 1.8 martin
462 1.8 martin /*
463 1.8 martin * Small wrapper to extract total length of a file
464 1.8 martin */
465 1.8 martin off_t
466 1.8 martin unxz_len(int fd)
467 1.8 martin {
468 1.8 martin xz_file_info xfi = XZ_FILE_INFO_INIT;
469 1.8 martin if (!parse_indexes(&xfi, fd)) {
470 1.8 martin off_t res = lzma_index_uncompressed_size(xfi.idx);
471 1.8 martin lzma_index_end(xfi.idx, NULL);
472 1.8 martin return res;
473 1.8 martin }
474 1.8 martin return 0;
475 1.8 martin }
476 1.8 martin
477