unxz.c revision 1.8 1 1.8 martin /* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */
2 1.5 christos
3 1.5 christos /*-
4 1.5 christos * Copyright (c) 2011 The NetBSD Foundation, Inc.
5 1.5 christos * All rights reserved.
6 1.5 christos *
7 1.5 christos * This code is derived from software contributed to The NetBSD Foundation
8 1.5 christos * by Christos Zoulas.
9 1.5 christos *
10 1.5 christos * Redistribution and use in source and binary forms, with or without
11 1.5 christos * modification, are permitted provided that the following conditions
12 1.5 christos * are met:
13 1.5 christos * 1. Redistributions of source code must retain the above copyright
14 1.5 christos * notice, this list of conditions and the following disclaimer.
15 1.5 christos * 2. Redistributions in binary form must reproduce the above copyright
16 1.5 christos * notice, this list of conditions and the following disclaimer in the
17 1.5 christos * documentation and/or other materials provided with the distribution.
18 1.5 christos *
19 1.5 christos * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.5 christos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.5 christos * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.5 christos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.5 christos * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.5 christos * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.5 christos * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.5 christos * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.5 christos * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.5 christos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.5 christos * POSSIBILITY OF SUCH DAMAGE.
30 1.5 christos */
31 1.5 christos #include <sys/cdefs.h>
32 1.8 martin __RCSID("$NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $");
33 1.1 christos
34 1.1 christos #include <stdarg.h>
35 1.1 christos #include <errno.h>
36 1.1 christos #include <stdio.h>
37 1.1 christos #include <unistd.h>
38 1.1 christos #include <lzma.h>
39 1.1 christos
40 1.1 christos static off_t
41 1.1 christos unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in)
42 1.1 christos {
43 1.1 christos lzma_stream strm = LZMA_STREAM_INIT;
44 1.2 christos static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED;
45 1.1 christos lzma_ret ret;
46 1.3 christos lzma_action action = LZMA_RUN;
47 1.3 christos off_t bytes_out, bp;
48 1.1 christos uint8_t ibuf[BUFSIZ];
49 1.1 christos uint8_t obuf[BUFSIZ];
50 1.1 christos
51 1.3 christos if (bytes_in == NULL)
52 1.3 christos bytes_in = &bp;
53 1.3 christos
54 1.1 christos strm.next_in = ibuf;
55 1.2 christos memcpy(ibuf, pre, prelen);
56 1.1 christos strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen);
57 1.1 christos if (strm.avail_in == (size_t)-1)
58 1.3 christos maybe_err("read failed");
59 1.7 mrg infile_newdata(strm.avail_in);
60 1.3 christos strm.avail_in += prelen;
61 1.3 christos *bytes_in = strm.avail_in;
62 1.1 christos
63 1.2 christos if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK)
64 1.2 christos maybe_errx("Can't initialize decoder (%d)", ret);
65 1.2 christos
66 1.2 christos strm.next_out = NULL;
67 1.2 christos strm.avail_out = 0;
68 1.2 christos if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK)
69 1.2 christos maybe_errx("Can't read headers (%d)", ret);
70 1.1 christos
71 1.3 christos bytes_out = 0;
72 1.1 christos strm.next_out = obuf;
73 1.1 christos strm.avail_out = sizeof(obuf);
74 1.1 christos
75 1.1 christos for (;;) {
76 1.7 mrg check_siginfo();
77 1.1 christos if (strm.avail_in == 0) {
78 1.1 christos strm.next_in = ibuf;
79 1.1 christos strm.avail_in = read(i, ibuf, sizeof(ibuf));
80 1.3 christos switch (strm.avail_in) {
81 1.3 christos case (size_t)-1:
82 1.3 christos maybe_err("read failed");
83 1.3 christos /*NOTREACHED*/
84 1.3 christos case 0:
85 1.3 christos action = LZMA_FINISH;
86 1.3 christos break;
87 1.3 christos default:
88 1.7 mrg infile_newdata(strm.avail_in);
89 1.3 christos *bytes_in += strm.avail_in;
90 1.3 christos break;
91 1.3 christos }
92 1.1 christos }
93 1.1 christos
94 1.3 christos ret = lzma_code(&strm, action);
95 1.1 christos
96 1.1 christos // Write and check write error before checking decoder error.
97 1.1 christos // This way as much data as possible gets written to output
98 1.1 christos // even if decoder detected an error.
99 1.1 christos if (strm.avail_out == 0 || ret != LZMA_OK) {
100 1.1 christos const size_t write_size = sizeof(obuf) - strm.avail_out;
101 1.1 christos
102 1.1 christos if (write(o, obuf, write_size) != (ssize_t)write_size)
103 1.1 christos maybe_err("write failed");
104 1.1 christos
105 1.1 christos strm.next_out = obuf;
106 1.1 christos strm.avail_out = sizeof(obuf);
107 1.3 christos bytes_out += write_size;
108 1.1 christos }
109 1.1 christos
110 1.1 christos if (ret != LZMA_OK) {
111 1.1 christos if (ret == LZMA_STREAM_END) {
112 1.1 christos // Check that there's no trailing garbage.
113 1.1 christos if (strm.avail_in != 0 || read(i, ibuf, 1))
114 1.1 christos ret = LZMA_DATA_ERROR;
115 1.1 christos else {
116 1.1 christos lzma_end(&strm);
117 1.3 christos return bytes_out;
118 1.1 christos }
119 1.1 christos }
120 1.1 christos
121 1.1 christos const char *msg;
122 1.1 christos switch (ret) {
123 1.1 christos case LZMA_MEM_ERROR:
124 1.1 christos msg = strerror(ENOMEM);
125 1.1 christos break;
126 1.1 christos
127 1.1 christos case LZMA_FORMAT_ERROR:
128 1.1 christos msg = "File format not recognized";
129 1.1 christos break;
130 1.1 christos
131 1.1 christos case LZMA_OPTIONS_ERROR:
132 1.1 christos // FIXME: Better message?
133 1.1 christos msg = "Unsupported compression options";
134 1.1 christos break;
135 1.1 christos
136 1.1 christos case LZMA_DATA_ERROR:
137 1.1 christos msg = "File is corrupt";
138 1.1 christos break;
139 1.1 christos
140 1.1 christos case LZMA_BUF_ERROR:
141 1.1 christos msg = "Unexpected end of input";
142 1.1 christos break;
143 1.1 christos
144 1.1 christos case LZMA_MEMLIMIT_ERROR:
145 1.1 christos msg = "Reached memory limit";
146 1.1 christos break;
147 1.1 christos
148 1.1 christos default:
149 1.4 christos maybe_errx("Unknown error (%d)", ret);
150 1.1 christos break;
151 1.1 christos }
152 1.4 christos maybe_errx("%s", msg);
153 1.1 christos
154 1.1 christos }
155 1.1 christos }
156 1.1 christos }
157 1.8 martin
158 1.8 martin #include <stdbool.h>
159 1.8 martin
160 1.8 martin /*
161 1.8 martin * Copied various bits and pieces from xz support code or brute force
162 1.8 martin * replacements.
163 1.8 martin */
164 1.8 martin
165 1.8 martin #define my_min(A,B) ((A)<(B)?(A):(B))
166 1.8 martin
167 1.8 martin // Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
168 1.8 martin // We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
169 1.8 martin #if BUFSIZ <= 1024
170 1.8 martin # define IO_BUFFER_SIZE 8192
171 1.8 martin #else
172 1.8 martin # define IO_BUFFER_SIZE (BUFSIZ & ~7U)
173 1.8 martin #endif
174 1.8 martin
175 1.8 martin /// is_sparse() accesses the buffer as uint64_t for maximum speed.
176 1.8 martin /// Use an union to make sure that the buffer is properly aligned.
177 1.8 martin typedef union {
178 1.8 martin uint8_t u8[IO_BUFFER_SIZE];
179 1.8 martin uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
180 1.8 martin uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
181 1.8 martin } io_buf;
182 1.8 martin
183 1.8 martin
184 1.8 martin static bool
185 1.8 martin io_pread(int fd, io_buf *buf, size_t size, off_t pos)
186 1.8 martin {
187 1.8 martin // Using lseek() and read() is more portable than pread() and
188 1.8 martin // for us it is as good as real pread().
189 1.8 martin if (lseek(fd, pos, SEEK_SET) != pos) {
190 1.8 martin return true;
191 1.8 martin }
192 1.8 martin
193 1.8 martin const size_t amount = read(fd, buf, size);
194 1.8 martin if (amount == SIZE_MAX)
195 1.8 martin return true;
196 1.8 martin
197 1.8 martin if (amount != size) {
198 1.8 martin return true;
199 1.8 martin }
200 1.8 martin
201 1.8 martin return false;
202 1.8 martin }
203 1.8 martin
204 1.8 martin /*
205 1.8 martin * Most of the following is copied (mostly verbatim) from the xz
206 1.8 martin * distribution, from file src/xz/list.c
207 1.8 martin */
208 1.8 martin
209 1.8 martin ///////////////////////////////////////////////////////////////////////////////
210 1.8 martin //
211 1.8 martin /// \file list.c
212 1.8 martin /// \brief Listing information about .xz files
213 1.8 martin //
214 1.8 martin // Author: Lasse Collin
215 1.8 martin //
216 1.8 martin // This file has been put into the public domain.
217 1.8 martin // You can do whatever you want with this file.
218 1.8 martin //
219 1.8 martin ///////////////////////////////////////////////////////////////////////////////
220 1.8 martin
221 1.8 martin
222 1.8 martin /// Information about a .xz file
223 1.8 martin typedef struct {
224 1.8 martin /// Combined Index of all Streams in the file
225 1.8 martin lzma_index *idx;
226 1.8 martin
227 1.8 martin /// Total amount of Stream Padding
228 1.8 martin uint64_t stream_padding;
229 1.8 martin
230 1.8 martin /// Highest memory usage so far
231 1.8 martin uint64_t memusage_max;
232 1.8 martin
233 1.8 martin /// True if all Blocks so far have Compressed Size and
234 1.8 martin /// Uncompressed Size fields
235 1.8 martin bool all_have_sizes;
236 1.8 martin
237 1.8 martin /// Oldest XZ Utils version that will decompress the file
238 1.8 martin uint32_t min_version;
239 1.8 martin
240 1.8 martin } xz_file_info;
241 1.8 martin
242 1.8 martin #define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
243 1.8 martin
244 1.8 martin
245 1.8 martin /// \brief Parse the Index(es) from the given .xz file
246 1.8 martin ///
247 1.8 martin /// \param xfi Pointer to structure where the decoded information
248 1.8 martin /// is stored.
249 1.8 martin /// \param pair Input file
250 1.8 martin ///
251 1.8 martin /// \return On success, false is returned. On error, true is returned.
252 1.8 martin ///
253 1.8 martin // TODO: This function is pretty big. liblzma should have a function that
254 1.8 martin // takes a callback function to parse the Index(es) from a .xz file to make
255 1.8 martin // it easy for applications.
256 1.8 martin static bool
257 1.8 martin parse_indexes(xz_file_info *xfi, int src_fd)
258 1.8 martin {
259 1.8 martin struct stat st;
260 1.8 martin
261 1.8 martin fstat(src_fd, &st);
262 1.8 martin if (st.st_size <= 0) {
263 1.8 martin return true;
264 1.8 martin }
265 1.8 martin
266 1.8 martin if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
267 1.8 martin return true;
268 1.8 martin }
269 1.8 martin
270 1.8 martin io_buf buf;
271 1.8 martin lzma_stream_flags header_flags;
272 1.8 martin lzma_stream_flags footer_flags;
273 1.8 martin lzma_ret ret;
274 1.8 martin
275 1.8 martin // lzma_stream for the Index decoder
276 1.8 martin lzma_stream strm = LZMA_STREAM_INIT;
277 1.8 martin
278 1.8 martin // All Indexes decoded so far
279 1.8 martin lzma_index *combined_index = NULL;
280 1.8 martin
281 1.8 martin // The Index currently being decoded
282 1.8 martin lzma_index *this_index = NULL;
283 1.8 martin
284 1.8 martin // Current position in the file. We parse the file backwards so
285 1.8 martin // initialize it to point to the end of the file.
286 1.8 martin off_t pos = st.st_size;
287 1.8 martin
288 1.8 martin // Each loop iteration decodes one Index.
289 1.8 martin do {
290 1.8 martin // Check that there is enough data left to contain at least
291 1.8 martin // the Stream Header and Stream Footer. This check cannot
292 1.8 martin // fail in the first pass of this loop.
293 1.8 martin if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
294 1.8 martin goto error;
295 1.8 martin }
296 1.8 martin
297 1.8 martin pos -= LZMA_STREAM_HEADER_SIZE;
298 1.8 martin lzma_vli stream_padding = 0;
299 1.8 martin
300 1.8 martin // Locate the Stream Footer. There may be Stream Padding which
301 1.8 martin // we must skip when reading backwards.
302 1.8 martin while (true) {
303 1.8 martin if (pos < LZMA_STREAM_HEADER_SIZE) {
304 1.8 martin goto error;
305 1.8 martin }
306 1.8 martin
307 1.8 martin if (io_pread(src_fd, &buf,
308 1.8 martin LZMA_STREAM_HEADER_SIZE, pos))
309 1.8 martin goto error;
310 1.8 martin
311 1.8 martin // Stream Padding is always a multiple of four bytes.
312 1.8 martin int i = 2;
313 1.8 martin if (buf.u32[i] != 0)
314 1.8 martin break;
315 1.8 martin
316 1.8 martin // To avoid calling io_pread() for every four bytes
317 1.8 martin // of Stream Padding, take advantage that we read
318 1.8 martin // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
319 1.8 martin // check them too before calling io_pread() again.
320 1.8 martin do {
321 1.8 martin stream_padding += 4;
322 1.8 martin pos -= 4;
323 1.8 martin --i;
324 1.8 martin } while (i >= 0 && buf.u32[i] == 0);
325 1.8 martin }
326 1.8 martin
327 1.8 martin // Decode the Stream Footer.
328 1.8 martin ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
329 1.8 martin if (ret != LZMA_OK) {
330 1.8 martin goto error;
331 1.8 martin }
332 1.8 martin
333 1.8 martin // Check that the Stream Footer doesn't specify something
334 1.8 martin // that we don't support. This can only happen if the xz
335 1.8 martin // version is older than liblzma and liblzma supports
336 1.8 martin // something new.
337 1.8 martin //
338 1.8 martin // It is enough to check Stream Footer. Stream Header must
339 1.8 martin // match when it is compared against Stream Footer with
340 1.8 martin // lzma_stream_flags_compare().
341 1.8 martin if (footer_flags.version != 0) {
342 1.8 martin goto error;
343 1.8 martin }
344 1.8 martin
345 1.8 martin // Check that the size of the Index field looks sane.
346 1.8 martin lzma_vli index_size = footer_flags.backward_size;
347 1.8 martin if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
348 1.8 martin goto error;
349 1.8 martin }
350 1.8 martin
351 1.8 martin // Set pos to the beginning of the Index.
352 1.8 martin pos -= index_size;
353 1.8 martin
354 1.8 martin // Decode the Index.
355 1.8 martin ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX);
356 1.8 martin if (ret != LZMA_OK) {
357 1.8 martin goto error;
358 1.8 martin }
359 1.8 martin
360 1.8 martin do {
361 1.8 martin // Don't give the decoder more input than the
362 1.8 martin // Index size.
363 1.8 martin strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
364 1.8 martin if (io_pread(src_fd, &buf, strm.avail_in, pos))
365 1.8 martin goto error;
366 1.8 martin
367 1.8 martin pos += strm.avail_in;
368 1.8 martin index_size -= strm.avail_in;
369 1.8 martin
370 1.8 martin strm.next_in = buf.u8;
371 1.8 martin ret = lzma_code(&strm, LZMA_RUN);
372 1.8 martin
373 1.8 martin } while (ret == LZMA_OK);
374 1.8 martin
375 1.8 martin // If the decoding seems to be successful, check also that
376 1.8 martin // the Index decoder consumed as much input as indicated
377 1.8 martin // by the Backward Size field.
378 1.8 martin if (ret == LZMA_STREAM_END)
379 1.8 martin if (index_size != 0 || strm.avail_in != 0)
380 1.8 martin ret = LZMA_DATA_ERROR;
381 1.8 martin
382 1.8 martin if (ret != LZMA_STREAM_END) {
383 1.8 martin // LZMA_BUFFER_ERROR means that the Index decoder
384 1.8 martin // would have liked more input than what the Index
385 1.8 martin // size should be according to Stream Footer.
386 1.8 martin // The message for LZMA_DATA_ERROR makes more
387 1.8 martin // sense in that case.
388 1.8 martin if (ret == LZMA_BUF_ERROR)
389 1.8 martin ret = LZMA_DATA_ERROR;
390 1.8 martin
391 1.8 martin goto error;
392 1.8 martin }
393 1.8 martin
394 1.8 martin // Decode the Stream Header and check that its Stream Flags
395 1.8 martin // match the Stream Footer.
396 1.8 martin pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
397 1.8 martin if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
398 1.8 martin goto error;
399 1.8 martin }
400 1.8 martin
401 1.8 martin pos -= lzma_index_total_size(this_index);
402 1.8 martin if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos))
403 1.8 martin goto error;
404 1.8 martin
405 1.8 martin ret = lzma_stream_header_decode(&header_flags, buf.u8);
406 1.8 martin if (ret != LZMA_OK) {
407 1.8 martin goto error;
408 1.8 martin }
409 1.8 martin
410 1.8 martin ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
411 1.8 martin if (ret != LZMA_OK) {
412 1.8 martin goto error;
413 1.8 martin }
414 1.8 martin
415 1.8 martin // Store the decoded Stream Flags into this_index. This is
416 1.8 martin // needed so that we can print which Check is used in each
417 1.8 martin // Stream.
418 1.8 martin ret = lzma_index_stream_flags(this_index, &footer_flags);
419 1.8 martin if (ret != LZMA_OK)
420 1.8 martin goto error;
421 1.8 martin
422 1.8 martin // Store also the size of the Stream Padding field. It is
423 1.8 martin // needed to show the offsets of the Streams correctly.
424 1.8 martin ret = lzma_index_stream_padding(this_index, stream_padding);
425 1.8 martin if (ret != LZMA_OK)
426 1.8 martin goto error;
427 1.8 martin
428 1.8 martin if (combined_index != NULL) {
429 1.8 martin // Append the earlier decoded Indexes
430 1.8 martin // after this_index.
431 1.8 martin ret = lzma_index_cat(
432 1.8 martin this_index, combined_index, NULL);
433 1.8 martin if (ret != LZMA_OK) {
434 1.8 martin goto error;
435 1.8 martin }
436 1.8 martin }
437 1.8 martin
438 1.8 martin combined_index = this_index;
439 1.8 martin this_index = NULL;
440 1.8 martin
441 1.8 martin xfi->stream_padding += stream_padding;
442 1.8 martin
443 1.8 martin } while (pos > 0);
444 1.8 martin
445 1.8 martin lzma_end(&strm);
446 1.8 martin
447 1.8 martin // All OK. Make combined_index available to the caller.
448 1.8 martin xfi->idx = combined_index;
449 1.8 martin return false;
450 1.8 martin
451 1.8 martin error:
452 1.8 martin // Something went wrong, free the allocated memory.
453 1.8 martin lzma_end(&strm);
454 1.8 martin lzma_index_end(combined_index, NULL);
455 1.8 martin lzma_index_end(this_index, NULL);
456 1.8 martin return true;
457 1.8 martin }
458 1.8 martin
459 1.8 martin /***************** end of copy form list.c *************************/
460 1.8 martin
461 1.8 martin /*
462 1.8 martin * Small wrapper to extract total length of a file
463 1.8 martin */
464 1.8 martin off_t
465 1.8 martin unxz_len(int fd)
466 1.8 martin {
467 1.8 martin xz_file_info xfi = XZ_FILE_INFO_INIT;
468 1.8 martin if (!parse_indexes(&xfi, fd)) {
469 1.8 martin off_t res = lzma_index_uncompressed_size(xfi.idx);
470 1.8 martin lzma_index_end(xfi.idx, NULL);
471 1.8 martin return res;
472 1.8 martin }
473 1.8 martin return 0;
474 1.8 martin }
475 1.8 martin
476