1 /*- 2 * Copyright (c) 2018 Grzegorz Antoniak (http://antoniak.org) 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "archive_platform.h" 27 #include "archive_endian.h" 28 29 #ifdef HAVE_ERRNO_H 30 #include <errno.h> 31 #endif 32 #include <time.h> 33 #ifdef HAVE_ZLIB_H 34 #include <zlib.h> /* crc32 */ 35 #endif 36 #ifdef HAVE_LIMITS_H 37 #include <limits.h> 38 #endif 39 40 #include "archive.h" 41 #ifndef HAVE_ZLIB_H 42 #include "archive_crc32.h" 43 #endif 44 45 #include "archive_entry.h" 46 #include "archive_entry_locale.h" 47 #include "archive_ppmd7_private.h" 48 #include "archive_entry_private.h" 49 #include "archive_time_private.h" 50 51 #ifdef HAVE_BLAKE2_H 52 #include <blake2.h> 53 #else 54 #include "archive_blake2.h" 55 #endif 56 57 /*#define CHECK_CRC_ON_SOLID_SKIP*/ 58 /*#define DONT_FAIL_ON_CRC_ERROR*/ 59 /*#define DEBUG*/ 60 61 #define rar5_min(a, b) (((a) > (b)) ? (b) : (a)) 62 #define rar5_max(a, b) (((a) > (b)) ? (a) : (b)) 63 #define rar5_countof(X) ((const ssize_t) (sizeof(X) / sizeof(*X))) 64 65 #if defined DEBUG 66 #define DEBUG_CODE if(1) 67 #define LOG(...) do { printf("rar5: " __VA_ARGS__); puts(""); } while(0) 68 #else 69 #define DEBUG_CODE if(0) 70 #endif 71 72 /* Real RAR5 magic number is: 73 * 74 * 0x52, 0x61, 0x72, 0x21, 0x1a, 0x07, 0x01, 0x00 75 * "Rar!\x00" 76 * 77 * Retrieved with `rar5_signature()` by XOR'ing it with 0xA1, because I don't 78 * want to put this magic sequence in each binary that uses libarchive, so 79 * applications that scan through the file for this marker won't trigger on 80 * this "false" one. 81 * 82 * The array itself is decrypted in `rar5_init` function. */ 83 84 static unsigned char rar5_signature_xor[] = { 243, 192, 211, 128, 187, 166, 160, 161 }; 85 static const size_t g_unpack_window_size = 0x20000; 86 87 /* These could have been static const's, but they aren't, because of 88 * Visual Studio. */ 89 #define MAX_NAME_IN_CHARS 2048 90 #define MAX_NAME_IN_BYTES (4 * MAX_NAME_IN_CHARS) 91 92 struct file_header { 93 ssize_t bytes_remaining; 94 ssize_t unpacked_size; 95 int64_t last_offset; /* Used in sanity checks. */ 96 int64_t last_size; /* Used in sanity checks. */ 97 98 uint8_t solid : 1; /* Is this a solid stream? */ 99 uint8_t service : 1; /* Is this file a service data? */ 100 uint8_t eof : 1; /* Did we finish unpacking the file? */ 101 uint8_t dir : 1; /* Is this file entry a directory? */ 102 103 /* Optional time fields. */ 104 int64_t e_mtime; 105 int64_t e_ctime; 106 int64_t e_atime; 107 uint32_t e_mtime_ns; 108 uint32_t e_ctime_ns; 109 uint32_t e_atime_ns; 110 111 /* Optional hash fields. */ 112 uint32_t stored_crc32; 113 uint32_t calculated_crc32; 114 uint8_t blake2sp[32]; 115 blake2sp_state b2state; 116 char has_blake2; 117 118 /* Optional redir fields */ 119 uint64_t redir_type; 120 uint64_t redir_flags; 121 122 ssize_t solid_window_size; /* Used in file format check. */ 123 }; 124 125 enum EXTRA { 126 EX_CRYPT = 0x01, 127 EX_HASH = 0x02, 128 EX_HTIME = 0x03, 129 EX_VERSION = 0x04, 130 EX_REDIR = 0x05, 131 EX_UOWNER = 0x06, 132 EX_SUBDATA = 0x07 133 }; 134 135 #define REDIR_SYMLINK_IS_DIR 1 136 137 enum REDIR_TYPE { 138 REDIR_TYPE_NONE = 0, 139 REDIR_TYPE_UNIXSYMLINK = 1, 140 REDIR_TYPE_WINSYMLINK = 2, 141 REDIR_TYPE_JUNCTION = 3, 142 REDIR_TYPE_HARDLINK = 4, 143 REDIR_TYPE_FILECOPY = 5, 144 }; 145 146 #define OWNER_USER_NAME 0x01 147 #define OWNER_GROUP_NAME 0x02 148 #define OWNER_USER_UID 0x04 149 #define OWNER_GROUP_GID 0x08 150 #define OWNER_MAXNAMELEN 256 151 152 enum FILTER_TYPE { 153 FILTER_DELTA = 0, /* Generic pattern. */ 154 FILTER_E8 = 1, /* Intel x86 code. */ 155 FILTER_E8E9 = 2, /* Intel x86 code. */ 156 FILTER_ARM = 3, /* ARM code. */ 157 FILTER_AUDIO = 4, /* Audio filter, not used in RARv5. */ 158 FILTER_RGB = 5, /* Color palette, not used in RARv5. */ 159 FILTER_ITANIUM = 6, /* Intel's Itanium, not used in RARv5. */ 160 FILTER_PPM = 7, /* Predictive pattern matching, not used in 161 RARv5. */ 162 FILTER_NONE = 8, 163 }; 164 165 struct filter_info { 166 int type; 167 int channels; 168 int pos_r; 169 170 int64_t block_start; 171 ssize_t block_length; 172 uint16_t width; 173 }; 174 175 struct data_ready { 176 char used; 177 const uint8_t* buf; 178 size_t size; 179 int64_t offset; 180 }; 181 182 struct cdeque { 183 uint16_t beg_pos; 184 uint16_t end_pos; 185 uint16_t cap_mask; 186 uint16_t size; 187 size_t* arr; 188 }; 189 190 struct decode_table { 191 uint32_t size; 192 int32_t decode_len[16]; 193 uint32_t decode_pos[16]; 194 uint32_t quick_bits; 195 uint8_t quick_len[1 << 10]; 196 uint16_t quick_num[1 << 10]; 197 uint16_t decode_num[306]; 198 }; 199 200 struct comp_state { 201 /* Flag used to specify if unpacker needs to reinitialize the 202 uncompression context. */ 203 uint8_t initialized : 1; 204 205 /* Flag used when applying filters. */ 206 uint8_t all_filters_applied : 1; 207 208 /* Flag used to skip file context reinitialization, used when unpacker 209 is skipping through different multivolume archives. */ 210 uint8_t switch_multivolume : 1; 211 212 /* Flag used to specify if unpacker has processed the whole data block 213 or just a part of it. */ 214 uint8_t block_parsing_finished : 1; 215 216 /* Flag used to indicate that a previous file using this buffer was 217 encrypted, meaning no data in the buffer can be trusted */ 218 uint8_t data_encrypted : 1; 219 220 signed int notused : 3; 221 222 int flags; /* Uncompression flags. */ 223 int method; /* Uncompression algorithm method. */ 224 int version; /* Uncompression algorithm version. */ 225 ssize_t window_size; /* Size of window_buf. */ 226 uint8_t* window_buf; /* Circular buffer used during 227 decompression. */ 228 uint8_t* filtered_buf; /* Buffer used when applying filters. */ 229 const uint8_t* block_buf; /* Buffer used when merging blocks. */ 230 ssize_t window_mask; /* Convenience field; window_size - 1. */ 231 int64_t write_ptr; /* This amount of data has been unpacked 232 in the window buffer. */ 233 int64_t last_write_ptr; /* This amount of data has been stored in 234 the output file. */ 235 int64_t last_unstore_ptr; /* Counter of bytes extracted during 236 unstoring. This is separate from 237 last_write_ptr because of how SERVICE 238 base blocks are handled during skipping 239 in solid multiarchive archives. */ 240 int64_t solid_offset; /* Additional offset inside the window 241 buffer, used in unpacking solid 242 archives. */ 243 ssize_t cur_block_size; /* Size of current data block. */ 244 int last_len; /* Flag used in lzss decompression. */ 245 246 /* Decode tables used during lzss uncompression. */ 247 248 #define HUFF_BC 20 249 struct decode_table bd; /* huffman bit lengths */ 250 #define HUFF_NC 306 251 struct decode_table ld; /* literals */ 252 #define HUFF_DC 64 253 struct decode_table dd; /* distances */ 254 #define HUFF_LDC 16 255 struct decode_table ldd; /* lower bits of distances */ 256 #define HUFF_RC 44 257 struct decode_table rd; /* repeating distances */ 258 #define HUFF_TABLE_SIZE (HUFF_NC + HUFF_DC + HUFF_RC + HUFF_LDC) 259 260 /* Circular deque for storing filters. */ 261 struct cdeque filters; 262 int64_t last_block_start; /* Used for sanity checking. */ 263 ssize_t last_block_length; /* Used for sanity checking. */ 264 265 /* Distance cache used during lzss uncompression. */ 266 int dist_cache[4]; 267 268 /* Data buffer stack. */ 269 struct data_ready dready[2]; 270 }; 271 272 /* Bit reader state. */ 273 struct bit_reader { 274 int8_t bit_addr; /* Current bit pointer inside current byte. */ 275 int in_addr; /* Current byte pointer. */ 276 }; 277 278 /* RARv5 block header structure. Use bf_* functions to get values from 279 * block_flags_u8 field. I.e. bf_byte_count, etc. */ 280 struct compressed_block_header { 281 /* block_flags_u8 contain fields encoded in little-endian bitfield: 282 * 283 * - table present flag (shr 7, and 1), 284 * - last block flag (shr 6, and 1), 285 * - byte_count (shr 3, and 7), 286 * - bit_size (shr 0, and 7). 287 */ 288 uint8_t block_flags_u8; 289 uint8_t block_cksum; 290 }; 291 292 /* RARv5 main header structure. */ 293 struct main_header { 294 /* Does the archive contain solid streams? */ 295 uint8_t solid : 1; 296 297 /* If this a multi-file archive? */ 298 uint8_t volume : 1; 299 uint8_t endarc : 1; 300 uint8_t notused : 5; 301 302 unsigned int vol_no; 303 }; 304 305 struct generic_header { 306 uint8_t split_after : 1; 307 uint8_t split_before : 1; 308 uint8_t padding : 6; 309 int size; 310 int last_header_id; 311 }; 312 313 struct multivolume { 314 unsigned int expected_vol_no; 315 uint8_t* push_buf; 316 }; 317 318 /* Main context structure. */ 319 struct rar5 { 320 int header_initialized; 321 322 /* Set to 1 if current file is positioned AFTER the magic value 323 * of the archive file. This is used in header reading functions. */ 324 int skipped_magic; 325 326 /* Set to not zero if we're in skip mode (either by calling 327 * rar5_data_skip function or when skipping over solid streams). 328 * Set to 0 when in * extraction mode. This is used during checksum 329 * calculation functions. */ 330 int skip_mode; 331 332 /* Set to not zero if we're in block merging mode (i.e. when switching 333 * to another file in multivolume archive, last block from 1st archive 334 * needs to be merged with 1st block from 2nd archive). This flag 335 * guards against recursive use of the merging function, which doesn't 336 * support recursive calls. */ 337 int merge_mode; 338 339 /* An offset to QuickOpen list. This is not supported by this unpacker, 340 * because we're focusing on streaming interface. QuickOpen is designed 341 * to make things quicker for non-stream interfaces, so it's not our 342 * use case. */ 343 uint64_t qlist_offset; 344 345 /* An offset to additional Recovery data. This is not supported by this 346 * unpacker. Recovery data are additional Reed-Solomon codes that could 347 * be used to calculate bytes that are missing in archive or are 348 * corrupted. */ 349 uint64_t rr_offset; 350 351 /* Various context variables grouped to different structures. */ 352 struct generic_header generic; 353 struct main_header main; 354 struct comp_state cstate; 355 struct file_header file; 356 struct bit_reader bits; 357 struct multivolume vol; 358 359 /* The header of currently processed RARv5 block. Used in main 360 * decompression logic loop. */ 361 struct compressed_block_header last_block_hdr; 362 363 /* 364 * Custom field to denote that this archive contains encrypted entries 365 */ 366 int has_encrypted_entries; 367 int headers_are_encrypted; 368 }; 369 370 /* Forward function declarations. */ 371 372 static void rar5_signature(char *buf); 373 static int verify_global_checksums(struct archive_read* a); 374 static int rar5_read_data_skip(struct archive_read *a); 375 static int push_data_ready(struct archive_read* a, struct rar5* rar, 376 const uint8_t* buf, size_t size, int64_t offset); 377 static void clear_data_ready_stack(struct rar5* rar); 378 static void rar5_deinit(struct rar5* rar); 379 380 /* CDE_xxx = Circular Double Ended (Queue) return values. */ 381 enum CDE_RETURN_VALUES { 382 CDE_OK, CDE_ALLOC, CDE_PARAM, CDE_OUT_OF_BOUNDS, 383 }; 384 385 /* Clears the contents of this circular deque. */ 386 static void cdeque_clear(struct cdeque* d) { 387 d->size = 0; 388 d->beg_pos = 0; 389 d->end_pos = 0; 390 } 391 392 /* Creates a new circular deque object. Capacity must be power of 2: 8, 16, 32, 393 * 64, 256, etc. When the user will add another item above current capacity, 394 * the circular deque will overwrite the oldest entry. */ 395 static int cdeque_init(struct cdeque* d, int max_capacity_power_of_2) { 396 if(d == NULL || max_capacity_power_of_2 == 0) 397 return CDE_PARAM; 398 399 d->cap_mask = max_capacity_power_of_2 - 1; 400 d->arr = NULL; 401 402 if((max_capacity_power_of_2 & d->cap_mask) != 0) 403 return CDE_PARAM; 404 405 cdeque_clear(d); 406 d->arr = malloc(sizeof(void*) * max_capacity_power_of_2); 407 408 return d->arr ? CDE_OK : CDE_ALLOC; 409 } 410 411 /* Return the current size (not capacity) of circular deque `d`. */ 412 static size_t cdeque_size(struct cdeque* d) { 413 return d->size; 414 } 415 416 /* Returns the first element of current circular deque. Note that this function 417 * doesn't perform any bounds checking. If you need bounds checking, use 418 * `cdeque_front()` function instead. */ 419 static void cdeque_front_fast(struct cdeque* d, void** value) { 420 *value = (void*) d->arr[d->beg_pos]; 421 } 422 423 /* Returns the first element of current circular deque. This function 424 * performs bounds checking. */ 425 static int cdeque_front(struct cdeque* d, void** value) { 426 if(d->size > 0) { 427 cdeque_front_fast(d, value); 428 return CDE_OK; 429 } else 430 return CDE_OUT_OF_BOUNDS; 431 } 432 433 /* Pushes a new element into the end of this circular deque object. */ 434 static int cdeque_push_back(struct cdeque* d, void* item) { 435 if(d == NULL) 436 return CDE_PARAM; 437 438 if(d->size == d->cap_mask + 1) 439 return CDE_OUT_OF_BOUNDS; 440 441 d->arr[d->end_pos] = (size_t) item; 442 d->end_pos = (d->end_pos + 1) & d->cap_mask; 443 d->size++; 444 445 return CDE_OK; 446 } 447 448 /* Pops a front element of this circular deque object and returns its value. 449 * This function doesn't perform any bounds checking. */ 450 static void cdeque_pop_front_fast(struct cdeque* d, void** value) { 451 *value = (void*) d->arr[d->beg_pos]; 452 d->beg_pos = (d->beg_pos + 1) & d->cap_mask; 453 d->size--; 454 } 455 456 /* Pops a front element of this circular deque object and returns its value. 457 * This function performs bounds checking. */ 458 static int cdeque_pop_front(struct cdeque* d, void** value) { 459 if(!d || !value) 460 return CDE_PARAM; 461 462 if(d->size == 0) 463 return CDE_OUT_OF_BOUNDS; 464 465 cdeque_pop_front_fast(d, value); 466 return CDE_OK; 467 } 468 469 /* Convenience function to cast filter_info** to void **. */ 470 static void** cdeque_filter_p(struct filter_info** f) { 471 return (void**) (size_t) f; 472 } 473 474 /* Convenience function to cast filter_info* to void *. */ 475 static void* cdeque_filter(struct filter_info* f) { 476 return (void**) (size_t) f; 477 } 478 479 /* Destroys this circular deque object. Deallocates the memory of the 480 * collection buffer, but doesn't deallocate the memory of any pointer passed 481 * to this deque as a value. */ 482 static void cdeque_free(struct cdeque* d) { 483 if(!d) 484 return; 485 486 if(!d->arr) 487 return; 488 489 free(d->arr); 490 491 d->arr = NULL; 492 d->beg_pos = -1; 493 d->end_pos = -1; 494 d->cap_mask = 0; 495 } 496 497 static inline 498 uint8_t bf_bit_size(const struct compressed_block_header* hdr) { 499 return hdr->block_flags_u8 & 7; 500 } 501 502 static inline 503 uint8_t bf_byte_count(const struct compressed_block_header* hdr) { 504 return (hdr->block_flags_u8 >> 3) & 7; 505 } 506 507 static inline 508 uint8_t bf_is_table_present(const struct compressed_block_header* hdr) { 509 return (hdr->block_flags_u8 >> 7) & 1; 510 } 511 512 static inline 513 uint8_t bf_is_last_block(const struct compressed_block_header* hdr) { 514 return (hdr->block_flags_u8 >> 6) & 1; 515 } 516 517 static inline struct rar5* get_context(struct archive_read* a) { 518 return (struct rar5*) a->format->data; 519 } 520 521 /* Convenience functions used by filter implementations. */ 522 static void circular_memcpy(uint8_t* dst, uint8_t* window, const ssize_t mask, 523 int64_t start, int64_t end) 524 { 525 if((start & mask) > (end & mask)) { 526 ssize_t len1 = mask + 1 - (start & mask); 527 ssize_t len2 = end & mask; 528 529 memcpy(dst, &window[start & mask], len1); 530 memcpy(dst + len1, window, len2); 531 } else { 532 memcpy(dst, &window[start & mask], (size_t) (end - start)); 533 } 534 } 535 536 static uint32_t read_filter_data(struct rar5* rar, uint32_t offset) { 537 uint8_t linear_buf[4]; 538 circular_memcpy(linear_buf, rar->cstate.window_buf, 539 rar->cstate.window_mask, offset, offset + 4); 540 return archive_le32dec(linear_buf); 541 } 542 543 static void write_filter_data(struct rar5* rar, uint32_t offset, 544 uint32_t value) 545 { 546 archive_le32enc(&rar->cstate.filtered_buf[offset], value); 547 } 548 549 /* Allocates a new filter descriptor and adds it to the filter array. */ 550 static struct filter_info* add_new_filter(struct rar5* rar) { 551 struct filter_info* f = calloc(1, sizeof(*f)); 552 553 if(!f) { 554 return NULL; 555 } 556 557 if (CDE_OK != cdeque_push_back(&rar->cstate.filters, cdeque_filter(f))) { 558 free(f); 559 return NULL; 560 } 561 562 return f; 563 } 564 565 static int run_delta_filter(struct rar5* rar, struct filter_info* flt) { 566 int i; 567 ssize_t dest_pos, src_pos = 0; 568 569 for(i = 0; i < flt->channels; i++) { 570 uint8_t prev_byte = 0; 571 for(dest_pos = i; 572 dest_pos < flt->block_length; 573 dest_pos += flt->channels) 574 { 575 uint8_t byte; 576 577 byte = rar->cstate.window_buf[ 578 (rar->cstate.solid_offset + flt->block_start + 579 src_pos) & rar->cstate.window_mask]; 580 581 prev_byte -= byte; 582 rar->cstate.filtered_buf[dest_pos] = prev_byte; 583 src_pos++; 584 } 585 } 586 587 return ARCHIVE_OK; 588 } 589 590 static int run_e8e9_filter(struct rar5* rar, struct filter_info* flt, 591 int extended) 592 { 593 const uint32_t file_size = 0x1000000; 594 ssize_t i; 595 596 circular_memcpy(rar->cstate.filtered_buf, 597 rar->cstate.window_buf, rar->cstate.window_mask, 598 rar->cstate.solid_offset + flt->block_start, 599 rar->cstate.solid_offset + flt->block_start + flt->block_length); 600 601 for(i = 0; i < flt->block_length - 4;) { 602 uint8_t b = rar->cstate.window_buf[ 603 (rar->cstate.solid_offset + flt->block_start + 604 i++) & rar->cstate.window_mask]; 605 606 /* 607 * 0xE8 = x86's call <relative_addr_uint32> (function call) 608 * 0xE9 = x86's jmp <relative_addr_uint32> (unconditional jump) 609 */ 610 if(b == 0xE8 || (extended && b == 0xE9)) { 611 612 uint32_t addr; 613 uint32_t offset = (i + flt->block_start) % file_size; 614 615 addr = read_filter_data(rar, 616 (uint32_t)(rar->cstate.solid_offset + 617 flt->block_start + i) & rar->cstate.window_mask); 618 619 if(addr & 0x80000000) { 620 if(((addr + offset) & 0x80000000) == 0) { 621 write_filter_data(rar, (uint32_t)i, 622 addr + file_size); 623 } 624 } else { 625 if((addr - file_size) & 0x80000000) { 626 uint32_t naddr = addr - offset; 627 write_filter_data(rar, (uint32_t)i, 628 naddr); 629 } 630 } 631 632 i += 4; 633 } 634 } 635 636 return ARCHIVE_OK; 637 } 638 639 static int run_arm_filter(struct rar5* rar, struct filter_info* flt) { 640 ssize_t i = 0; 641 uint32_t offset; 642 643 circular_memcpy(rar->cstate.filtered_buf, 644 rar->cstate.window_buf, rar->cstate.window_mask, 645 rar->cstate.solid_offset + flt->block_start, 646 rar->cstate.solid_offset + flt->block_start + flt->block_length); 647 648 for(i = 0; i < flt->block_length - 3; i += 4) { 649 uint8_t* b = &rar->cstate.window_buf[ 650 (rar->cstate.solid_offset + 651 flt->block_start + i + 3) & rar->cstate.window_mask]; 652 653 if(*b == 0xEB) { 654 /* 0xEB = ARM's BL (branch + link) instruction. */ 655 offset = read_filter_data(rar, 656 (rar->cstate.solid_offset + flt->block_start + i) & 657 (uint32_t)rar->cstate.window_mask) & 0x00ffffff; 658 659 offset -= (uint32_t) ((i + flt->block_start) / 4); 660 offset = (offset & 0x00ffffff) | 0xeb000000; 661 write_filter_data(rar, (uint32_t)i, offset); 662 } 663 } 664 665 return ARCHIVE_OK; 666 } 667 668 static int run_filter(struct archive_read* a, struct filter_info* flt) { 669 int ret; 670 struct rar5* rar = get_context(a); 671 672 clear_data_ready_stack(rar); 673 free(rar->cstate.filtered_buf); 674 675 rar->cstate.filtered_buf = malloc(flt->block_length); 676 if(!rar->cstate.filtered_buf) { 677 archive_set_error(&a->archive, ENOMEM, 678 "Can't allocate memory for filter data"); 679 return ARCHIVE_FATAL; 680 } 681 682 switch(flt->type) { 683 case FILTER_DELTA: 684 ret = run_delta_filter(rar, flt); 685 break; 686 687 case FILTER_E8: 688 /* fallthrough */ 689 case FILTER_E8E9: 690 ret = run_e8e9_filter(rar, flt, 691 flt->type == FILTER_E8E9); 692 break; 693 694 case FILTER_ARM: 695 ret = run_arm_filter(rar, flt); 696 break; 697 698 default: 699 archive_set_error(&a->archive, 700 ARCHIVE_ERRNO_FILE_FORMAT, 701 "Unsupported filter type: 0x%x", 702 (unsigned int)flt->type); 703 return ARCHIVE_FATAL; 704 } 705 706 if(ret != ARCHIVE_OK) { 707 /* Filter has failed. */ 708 return ret; 709 } 710 711 if(ARCHIVE_OK != push_data_ready(a, rar, rar->cstate.filtered_buf, 712 flt->block_length, rar->cstate.last_write_ptr)) 713 { 714 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 715 "Stack overflow when submitting unpacked data"); 716 717 return ARCHIVE_FATAL; 718 } 719 720 rar->cstate.last_write_ptr += flt->block_length; 721 return ARCHIVE_OK; 722 } 723 724 /* The `push_data` function submits the selected data range to the user. 725 * Next call of `use_data` will use the pointer, size and offset arguments 726 * that are specified here. These arguments are pushed to the FIFO stack here, 727 * and popped from the stack by the `use_data` function. */ 728 static void push_data(struct archive_read* a, struct rar5* rar, 729 const uint8_t* buf, int64_t idx_begin, int64_t idx_end) 730 { 731 const ssize_t wmask = rar->cstate.window_mask; 732 const ssize_t solid_write_ptr = (rar->cstate.solid_offset + 733 rar->cstate.last_write_ptr) & wmask; 734 735 idx_begin += rar->cstate.solid_offset; 736 idx_end += rar->cstate.solid_offset; 737 738 /* Check if our unpacked data is wrapped inside the window circular 739 * buffer. If it's not wrapped, it can be copied out by using 740 * a single memcpy, but when it's wrapped, we need to copy the first 741 * part with one memcpy, and the second part with another memcpy. */ 742 743 if((idx_begin & wmask) > (idx_end & wmask)) { 744 /* The data is wrapped (begin offset sis bigger than end 745 * offset). */ 746 const ssize_t frag1_size = rar->cstate.window_size - 747 (idx_begin & wmask); 748 const ssize_t frag2_size = idx_end & wmask; 749 750 /* Copy the first part of the buffer first. */ 751 push_data_ready(a, rar, buf + solid_write_ptr, frag1_size, 752 rar->cstate.last_write_ptr); 753 754 /* Copy the second part of the buffer. */ 755 push_data_ready(a, rar, buf, frag2_size, 756 rar->cstate.last_write_ptr + frag1_size); 757 758 rar->cstate.last_write_ptr += frag1_size + frag2_size; 759 } else { 760 /* Data is not wrapped, so we can just use one call to copy the 761 * data. */ 762 push_data_ready(a, rar, 763 buf + solid_write_ptr, (idx_end - idx_begin) & wmask, 764 rar->cstate.last_write_ptr); 765 766 rar->cstate.last_write_ptr += idx_end - idx_begin; 767 } 768 } 769 770 /* Convenience function that submits the data to the user. It uses the 771 * unpack window buffer as a source location. */ 772 static void push_window_data(struct archive_read* a, struct rar5* rar, 773 int64_t idx_begin, int64_t idx_end) 774 { 775 push_data(a, rar, rar->cstate.window_buf, idx_begin, idx_end); 776 } 777 778 static int apply_filters(struct archive_read* a) { 779 struct filter_info* flt; 780 struct rar5* rar = get_context(a); 781 int ret; 782 783 rar->cstate.all_filters_applied = 0; 784 785 /* Get the first filter that can be applied to our data. The data 786 * needs to be fully unpacked before the filter can be run. */ 787 if(CDE_OK == cdeque_front(&rar->cstate.filters, 788 cdeque_filter_p(&flt))) { 789 /* Check if our unpacked data fully covers this filter's 790 * range. */ 791 if(rar->cstate.write_ptr > flt->block_start && 792 rar->cstate.write_ptr >= flt->block_start + 793 flt->block_length) { 794 /* Check if we have some data pending to be written 795 * right before the filter's start offset. */ 796 if(rar->cstate.last_write_ptr == flt->block_start) { 797 /* Run the filter specified by descriptor 798 * `flt`. */ 799 ret = run_filter(a, flt); 800 if(ret != ARCHIVE_OK) { 801 /* Filter failure, return error. */ 802 return ret; 803 } 804 805 /* Filter descriptor won't be needed anymore 806 * after it's used, * so remove it from the 807 * filter list and free its memory. */ 808 (void) cdeque_pop_front(&rar->cstate.filters, 809 cdeque_filter_p(&flt)); 810 811 free(flt); 812 } else { 813 /* We can't run filters yet, dump the memory 814 * right before the filter. */ 815 push_window_data(a, rar, 816 rar->cstate.last_write_ptr, 817 flt->block_start); 818 } 819 820 /* Return 'filter applied or not needed' state to the 821 * caller. */ 822 return ARCHIVE_RETRY; 823 } 824 } 825 826 rar->cstate.all_filters_applied = 1; 827 return ARCHIVE_OK; 828 } 829 830 static void dist_cache_push(struct rar5* rar, int value) { 831 int* q = rar->cstate.dist_cache; 832 833 q[3] = q[2]; 834 q[2] = q[1]; 835 q[1] = q[0]; 836 q[0] = value; 837 } 838 839 static int dist_cache_touch(struct rar5* rar, int idx) { 840 int* q = rar->cstate.dist_cache; 841 int i, dist = q[idx]; 842 843 for(i = idx; i > 0; i--) 844 q[i] = q[i - 1]; 845 846 q[0] = dist; 847 return dist; 848 } 849 850 static void free_filters(struct rar5* rar) { 851 struct cdeque* d = &rar->cstate.filters; 852 853 /* Free any remaining filters. All filters should be naturally 854 * consumed by the unpacking function, so remaining filters after 855 * unpacking normally mean that unpacking wasn't successful. 856 * But still of course we shouldn't leak memory in such case. */ 857 858 /* cdeque_size() is a fast operation, so we can use it as a loop 859 * expression. */ 860 while(cdeque_size(d) > 0) { 861 struct filter_info* f = NULL; 862 863 /* Pop_front will also decrease the collection's size. */ 864 if (CDE_OK == cdeque_pop_front(d, cdeque_filter_p(&f))) 865 free(f); 866 } 867 868 cdeque_clear(d); 869 870 /* Also clear out the variables needed for sanity checking. */ 871 rar->cstate.last_block_start = 0; 872 rar->cstate.last_block_length = 0; 873 } 874 875 static void reset_file_context(struct rar5* rar) { 876 memset(&rar->file, 0, sizeof(rar->file)); 877 blake2sp_init(&rar->file.b2state, 32); 878 879 if(rar->main.solid) { 880 rar->cstate.solid_offset += rar->cstate.write_ptr; 881 } else { 882 rar->cstate.solid_offset = 0; 883 } 884 885 rar->cstate.write_ptr = 0; 886 rar->cstate.last_write_ptr = 0; 887 rar->cstate.last_unstore_ptr = 0; 888 889 rar->file.redir_type = REDIR_TYPE_NONE; 890 rar->file.redir_flags = 0; 891 892 free_filters(rar); 893 } 894 895 static inline int get_archive_read(struct archive* a, 896 struct archive_read** ar) 897 { 898 *ar = (struct archive_read*) a; 899 archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, 900 "archive_read_support_format_rar5"); 901 902 return ARCHIVE_OK; 903 } 904 905 static int read_ahead(struct archive_read* a, size_t how_many, 906 const uint8_t** ptr) 907 { 908 ssize_t avail = -1; 909 if(!ptr) 910 return 0; 911 912 *ptr = __archive_read_ahead(a, how_many, &avail); 913 if(*ptr == NULL) { 914 return 0; 915 } 916 917 return 1; 918 } 919 920 static int consume(struct archive_read* a, int64_t how_many) { 921 int ret; 922 923 ret = how_many == __archive_read_consume(a, how_many) 924 ? ARCHIVE_OK 925 : ARCHIVE_FATAL; 926 927 return ret; 928 } 929 930 /** 931 * Read a RAR5 variable sized numeric value. This value will be stored in 932 * `pvalue`. The `pvalue_len` argument points to a variable that will receive 933 * the byte count that was consumed in order to decode the `pvalue` value, plus 934 * one. 935 * 936 * pvalue_len is optional and can be NULL. 937 * 938 * NOTE: if `pvalue_len` is NOT NULL, the caller needs to manually consume 939 * the number of bytes that `pvalue_len` value contains. If the `pvalue_len` 940 * is NULL, this consuming operation is done automatically. 941 * 942 * Returns 1 if *pvalue was successfully read. 943 * Returns 0 if there was an error. In this case, *pvalue contains an 944 * invalid value. 945 */ 946 947 static int read_var(struct archive_read* a, uint64_t* pvalue, 948 uint64_t* pvalue_len) 949 { 950 uint64_t result = 0; 951 size_t shift, i; 952 const uint8_t* p; 953 uint8_t b; 954 955 /* We will read maximum of 8 bytes. We don't have to handle the 956 * situation to read the RAR5 variable-sized value stored at the end of 957 * the file, because such situation will never happen. */ 958 if(!read_ahead(a, 8, &p)) 959 return 0; 960 961 for(shift = 0, i = 0; i < 8; i++, shift += 7) { 962 b = p[i]; 963 964 /* Strip the MSB from the input byte and add the resulting 965 * number to the `result`. */ 966 result += (b & (uint64_t)0x7F) << shift; 967 968 /* MSB set to 1 means we need to continue decoding process. 969 * MSB set to 0 means we're done. 970 * 971 * This conditional checks for the second case. */ 972 if((b & 0x80) == 0) { 973 if(pvalue) { 974 *pvalue = result; 975 } 976 977 /* If the caller has passed the `pvalue_len` pointer, 978 * store the number of consumed bytes in it and do NOT 979 * consume those bytes, since the caller has all the 980 * information it needs to perform */ 981 if(pvalue_len) { 982 *pvalue_len = 1 + i; 983 } else { 984 /* If the caller did not provide the 985 * `pvalue_len` pointer, it will not have the 986 * possibility to advance the file pointer, 987 * because it will not know how many bytes it 988 * needs to consume. This is why we handle 989 * such situation here automatically. */ 990 if(ARCHIVE_OK != consume(a, 1 + i)) { 991 return 0; 992 } 993 } 994 995 /* End of decoding process, return success. */ 996 return 1; 997 } 998 } 999 1000 /* The decoded value takes the maximum number of 8 bytes. 1001 * It's a maximum number of bytes, so end decoding process here 1002 * even if the first bit of last byte is 1. */ 1003 if(pvalue) { 1004 *pvalue = result; 1005 } 1006 1007 if(pvalue_len) { 1008 *pvalue_len = 9; 1009 } else { 1010 if(ARCHIVE_OK != consume(a, 9)) { 1011 return 0; 1012 } 1013 } 1014 1015 return 1; 1016 } 1017 1018 static int read_var_sized(struct archive_read* a, size_t* pvalue, 1019 size_t* pvalue_len) 1020 { 1021 uint64_t v; 1022 uint64_t v_size = 0; 1023 1024 const int ret = pvalue_len ? read_var(a, &v, &v_size) 1025 : read_var(a, &v, NULL); 1026 1027 if(ret == 1 && pvalue) { 1028 *pvalue = (size_t) v; 1029 } 1030 1031 if(pvalue_len) { 1032 /* Possible data truncation should be safe. */ 1033 *pvalue_len = (size_t) v_size; 1034 } 1035 1036 return ret; 1037 } 1038 1039 static int read_bits_32(struct archive_read* a, struct rar5* rar, 1040 const uint8_t* p, uint32_t* value) 1041 { 1042 if(rar->bits.in_addr >= rar->cstate.cur_block_size) { 1043 archive_set_error(&a->archive, 1044 ARCHIVE_ERRNO_PROGRAMMER, 1045 "Premature end of stream during extraction of data (#1)"); 1046 return ARCHIVE_FATAL; 1047 } 1048 1049 uint32_t bits = ((uint32_t) p[rar->bits.in_addr]) << 24; 1050 bits |= p[rar->bits.in_addr + 1] << 16; 1051 bits |= p[rar->bits.in_addr + 2] << 8; 1052 bits |= p[rar->bits.in_addr + 3]; 1053 bits <<= rar->bits.bit_addr; 1054 bits |= p[rar->bits.in_addr + 4] >> (8 - rar->bits.bit_addr); 1055 *value = bits; 1056 return ARCHIVE_OK; 1057 } 1058 1059 static int read_bits_16(struct archive_read* a, struct rar5* rar, 1060 const uint8_t* p, uint16_t* value) 1061 { 1062 if(rar->bits.in_addr >= rar->cstate.cur_block_size) { 1063 archive_set_error(&a->archive, 1064 ARCHIVE_ERRNO_PROGRAMMER, 1065 "Premature end of stream during extraction of data (#2)"); 1066 return ARCHIVE_FATAL; 1067 } 1068 1069 int bits = (int) ((uint32_t) p[rar->bits.in_addr]) << 16; 1070 bits |= (int) p[rar->bits.in_addr + 1] << 8; 1071 bits |= (int) p[rar->bits.in_addr + 2]; 1072 bits >>= (8 - rar->bits.bit_addr); 1073 *value = bits & 0xffff; 1074 return ARCHIVE_OK; 1075 } 1076 1077 static void skip_bits(struct rar5* rar, int bits) { 1078 const int new_bits = rar->bits.bit_addr + bits; 1079 rar->bits.in_addr += new_bits >> 3; 1080 rar->bits.bit_addr = new_bits & 7; 1081 } 1082 1083 /* n = up to 16 */ 1084 static int read_consume_bits(struct archive_read* a, struct rar5* rar, 1085 const uint8_t* p, int n, int* value) 1086 { 1087 uint16_t v; 1088 int ret, num; 1089 1090 if(n == 0 || n > 16) { 1091 /* This is a programmer error and should never happen 1092 * in runtime. */ 1093 return ARCHIVE_FATAL; 1094 } 1095 1096 ret = read_bits_16(a, rar, p, &v); 1097 if(ret != ARCHIVE_OK) 1098 return ret; 1099 1100 num = (int) v; 1101 num >>= 16 - n; 1102 1103 skip_bits(rar, n); 1104 1105 if(value) 1106 *value = num; 1107 1108 return ARCHIVE_OK; 1109 } 1110 1111 static char read_u32(struct archive_read* a, uint32_t* pvalue) { 1112 const uint8_t* p; 1113 if(!read_ahead(a, 4, &p)) 1114 return 0; 1115 1116 *pvalue = archive_le32dec(p); 1117 return ARCHIVE_OK == consume(a, 4); 1118 } 1119 1120 static char read_u64(struct archive_read* a, uint64_t* pvalue) { 1121 const uint8_t* p; 1122 if(!read_ahead(a, 8, &p)) 1123 return 0; 1124 1125 *pvalue = archive_le64dec(p); 1126 return ARCHIVE_OK == consume(a, 8); 1127 } 1128 1129 static int bid_standard(struct archive_read* a) { 1130 const uint8_t* p; 1131 char signature[sizeof(rar5_signature_xor)]; 1132 1133 rar5_signature(signature); 1134 1135 if(!read_ahead(a, sizeof(rar5_signature_xor), &p)) 1136 return -1; 1137 1138 if(!memcmp(signature, p, sizeof(rar5_signature_xor))) 1139 return 30; 1140 1141 return -1; 1142 } 1143 1144 static int bid_sfx(struct archive_read *a) 1145 { 1146 const char *p; 1147 1148 if ((p = __archive_read_ahead(a, 7, NULL)) == NULL) 1149 return -1; 1150 1151 if ((p[0] == 'M' && p[1] == 'Z') || memcmp(p, "\x7F\x45LF", 4) == 0) { 1152 /* This is a PE file */ 1153 char signature[sizeof(rar5_signature_xor)]; 1154 ssize_t offset = 0x10000; 1155 ssize_t window = 4096; 1156 ssize_t bytes_avail; 1157 1158 rar5_signature(signature); 1159 1160 while (offset + window <= (1024 * 512)) { 1161 const char *buff = __archive_read_ahead(a, offset + window, &bytes_avail); 1162 if (buff == NULL) { 1163 /* Remaining bytes are less than window. */ 1164 window >>= 1; 1165 if (window < 0x40) 1166 return 0; 1167 continue; 1168 } 1169 p = buff + offset; 1170 while (p + 8 < buff + bytes_avail) { 1171 if (memcmp(p, signature, sizeof(signature)) == 0) 1172 return 30; 1173 p += 0x10; 1174 } 1175 offset = p - buff; 1176 } 1177 } 1178 1179 return 0; 1180 } 1181 1182 static int rar5_bid(struct archive_read* a, int best_bid) { 1183 int my_bid; 1184 1185 if(best_bid > 30) 1186 return -1; 1187 1188 my_bid = bid_standard(a); 1189 if(my_bid > -1) { 1190 return my_bid; 1191 } 1192 my_bid = bid_sfx(a); 1193 if (my_bid > -1) { 1194 return my_bid; 1195 } 1196 1197 return -1; 1198 } 1199 1200 static int rar5_options(struct archive_read *a, const char *key, 1201 const char *val) { 1202 (void) a; 1203 (void) key; 1204 (void) val; 1205 1206 /* No options supported in this version. Return the ARCHIVE_WARN code 1207 * to signal the options supervisor that the unpacker didn't handle 1208 * setting this option. */ 1209 1210 return ARCHIVE_WARN; 1211 } 1212 1213 static void init_header(struct archive_read* a) { 1214 a->archive.archive_format = ARCHIVE_FORMAT_RAR_V5; 1215 a->archive.archive_format_name = "RAR5"; 1216 } 1217 1218 static void init_window_mask(struct rar5* rar) { 1219 if (rar->cstate.window_size) 1220 rar->cstate.window_mask = rar->cstate.window_size - 1; 1221 else 1222 rar->cstate.window_mask = 0; 1223 } 1224 1225 enum HEADER_FLAGS { 1226 HFL_EXTRA_DATA = 0x0001, 1227 HFL_DATA = 0x0002, 1228 HFL_SKIP_IF_UNKNOWN = 0x0004, 1229 HFL_SPLIT_BEFORE = 0x0008, 1230 HFL_SPLIT_AFTER = 0x0010, 1231 HFL_CHILD = 0x0020, 1232 HFL_INHERITED = 0x0040 1233 }; 1234 1235 static int process_main_locator_extra_block(struct archive_read* a, 1236 struct rar5* rar) 1237 { 1238 uint64_t locator_flags; 1239 1240 enum LOCATOR_FLAGS { 1241 QLIST = 0x01, RECOVERY = 0x02, 1242 }; 1243 1244 if(!read_var(a, &locator_flags, NULL)) { 1245 return ARCHIVE_EOF; 1246 } 1247 1248 if(locator_flags & QLIST) { 1249 if(!read_var(a, &rar->qlist_offset, NULL)) { 1250 return ARCHIVE_EOF; 1251 } 1252 1253 /* qlist is not used */ 1254 } 1255 1256 if(locator_flags & RECOVERY) { 1257 if(!read_var(a, &rar->rr_offset, NULL)) { 1258 return ARCHIVE_EOF; 1259 } 1260 1261 /* rr is not used */ 1262 } 1263 1264 return ARCHIVE_OK; 1265 } 1266 1267 static int parse_file_extra_hash(struct archive_read* a, struct rar5* rar, 1268 int64_t* extra_data_size) 1269 { 1270 size_t hash_type = 0; 1271 size_t value_len; 1272 1273 enum HASH_TYPE { 1274 BLAKE2sp = 0x00 1275 }; 1276 1277 if(!read_var_sized(a, &hash_type, &value_len)) 1278 return ARCHIVE_EOF; 1279 1280 *extra_data_size -= value_len; 1281 if(ARCHIVE_OK != consume(a, value_len)) { 1282 return ARCHIVE_EOF; 1283 } 1284 1285 /* The file uses BLAKE2sp checksum algorithm instead of plain old 1286 * CRC32. */ 1287 if(hash_type == BLAKE2sp) { 1288 const uint8_t* p; 1289 const int hash_size = sizeof(rar->file.blake2sp); 1290 1291 if(!read_ahead(a, hash_size, &p)) 1292 return ARCHIVE_EOF; 1293 1294 rar->file.has_blake2 = 1; 1295 memcpy(&rar->file.blake2sp, p, hash_size); 1296 1297 if(ARCHIVE_OK != consume(a, hash_size)) { 1298 return ARCHIVE_EOF; 1299 } 1300 1301 *extra_data_size -= hash_size; 1302 } else { 1303 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1304 "Unsupported hash type (0x%jx)", (uintmax_t)hash_type); 1305 return ARCHIVE_FATAL; 1306 } 1307 1308 return ARCHIVE_OK; 1309 } 1310 1311 static int parse_htime_item(struct archive_read* a, char unix_time, 1312 int64_t* sec, uint32_t* nsec, int64_t* extra_data_size) 1313 { 1314 if(unix_time) { 1315 uint32_t time_val; 1316 if(!read_u32(a, &time_val)) 1317 return ARCHIVE_EOF; 1318 1319 *extra_data_size -= 4; 1320 *sec = (int64_t) time_val; 1321 } else { 1322 uint64_t windows_time; 1323 if(!read_u64(a, &windows_time)) 1324 return ARCHIVE_EOF; 1325 1326 ntfs_to_unix(windows_time, sec, nsec); 1327 *extra_data_size -= 8; 1328 } 1329 1330 return ARCHIVE_OK; 1331 } 1332 1333 static int parse_file_extra_version(struct archive_read* a, 1334 struct archive_entry* e, int64_t* extra_data_size) 1335 { 1336 size_t flags = 0; 1337 size_t version = 0; 1338 size_t value_len = 0; 1339 struct archive_string version_string; 1340 struct archive_string name_utf8_string; 1341 const char* cur_filename; 1342 1343 /* Flags are ignored. */ 1344 if(!read_var_sized(a, &flags, &value_len)) 1345 return ARCHIVE_EOF; 1346 1347 *extra_data_size -= value_len; 1348 if(ARCHIVE_OK != consume(a, value_len)) 1349 return ARCHIVE_EOF; 1350 1351 if(!read_var_sized(a, &version, &value_len)) 1352 return ARCHIVE_EOF; 1353 1354 *extra_data_size -= value_len; 1355 if(ARCHIVE_OK != consume(a, value_len)) 1356 return ARCHIVE_EOF; 1357 1358 /* extra_data_size should be zero here. */ 1359 1360 cur_filename = archive_entry_pathname_utf8(e); 1361 if(cur_filename == NULL) { 1362 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1363 "Version entry without file name"); 1364 return ARCHIVE_FATAL; 1365 } 1366 1367 archive_string_init(&version_string); 1368 archive_string_init(&name_utf8_string); 1369 1370 /* Prepare a ;123 suffix for the filename, where '123' is the version 1371 * value of this file. */ 1372 archive_string_sprintf(&version_string, ";%zu", version); 1373 1374 /* Build the new filename. */ 1375 archive_strcat(&name_utf8_string, cur_filename); 1376 archive_strcat(&name_utf8_string, version_string.s); 1377 1378 /* Apply the new filename into this file's context. */ 1379 archive_entry_update_pathname_utf8(e, name_utf8_string.s); 1380 1381 /* Free buffers. */ 1382 archive_string_free(&version_string); 1383 archive_string_free(&name_utf8_string); 1384 return ARCHIVE_OK; 1385 } 1386 1387 static int parse_file_extra_htime(struct archive_read* a, 1388 struct archive_entry* e, struct rar5* rar, int64_t* extra_data_size) 1389 { 1390 char unix_time, has_unix_ns, has_mtime, has_ctime, has_atime; 1391 size_t flags = 0; 1392 size_t value_len; 1393 1394 enum HTIME_FLAGS { 1395 IS_UNIX = 0x01, 1396 HAS_MTIME = 0x02, 1397 HAS_CTIME = 0x04, 1398 HAS_ATIME = 0x08, 1399 HAS_UNIX_NS = 0x10, 1400 }; 1401 1402 if(!read_var_sized(a, &flags, &value_len)) 1403 return ARCHIVE_EOF; 1404 1405 *extra_data_size -= value_len; 1406 if(ARCHIVE_OK != consume(a, value_len)) { 1407 return ARCHIVE_EOF; 1408 } 1409 1410 unix_time = flags & IS_UNIX; 1411 has_unix_ns = unix_time && (flags & HAS_UNIX_NS); 1412 has_mtime = flags & HAS_MTIME; 1413 has_atime = flags & HAS_ATIME; 1414 has_ctime = flags & HAS_CTIME; 1415 rar->file.e_atime_ns = rar->file.e_ctime_ns = rar->file.e_mtime_ns = 0; 1416 1417 if(has_mtime) { 1418 parse_htime_item(a, unix_time, &rar->file.e_mtime, 1419 &rar->file.e_mtime_ns, extra_data_size); 1420 } 1421 1422 if(has_ctime) { 1423 parse_htime_item(a, unix_time, &rar->file.e_ctime, 1424 &rar->file.e_ctime_ns, extra_data_size); 1425 } 1426 1427 if(has_atime) { 1428 parse_htime_item(a, unix_time, &rar->file.e_atime, 1429 &rar->file.e_atime_ns, extra_data_size); 1430 } 1431 1432 if(has_mtime && has_unix_ns) { 1433 if(!read_u32(a, &rar->file.e_mtime_ns)) 1434 return ARCHIVE_EOF; 1435 1436 *extra_data_size -= 4; 1437 } 1438 1439 if(has_ctime && has_unix_ns) { 1440 if(!read_u32(a, &rar->file.e_ctime_ns)) 1441 return ARCHIVE_EOF; 1442 1443 *extra_data_size -= 4; 1444 } 1445 1446 if(has_atime && has_unix_ns) { 1447 if(!read_u32(a, &rar->file.e_atime_ns)) 1448 return ARCHIVE_EOF; 1449 1450 *extra_data_size -= 4; 1451 } 1452 1453 /* The seconds and nanoseconds are either together, or separated in two 1454 * fields so we parse them, then set the archive_entry's times. */ 1455 if(has_mtime) { 1456 archive_entry_set_mtime(e, rar->file.e_mtime, rar->file.e_mtime_ns); 1457 } 1458 1459 if(has_ctime) { 1460 archive_entry_set_ctime(e, rar->file.e_ctime, rar->file.e_ctime_ns); 1461 } 1462 1463 if(has_atime) { 1464 archive_entry_set_atime(e, rar->file.e_atime, rar->file.e_atime_ns); 1465 } 1466 1467 return ARCHIVE_OK; 1468 } 1469 1470 static int parse_file_extra_redir(struct archive_read* a, 1471 struct archive_entry* e, struct rar5* rar, int64_t* extra_data_size) 1472 { 1473 uint64_t value_size = 0; 1474 size_t target_size = 0; 1475 char target_utf8_buf[MAX_NAME_IN_BYTES]; 1476 const uint8_t* p; 1477 1478 if(!read_var(a, &rar->file.redir_type, &value_size)) 1479 return ARCHIVE_EOF; 1480 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1481 return ARCHIVE_EOF; 1482 *extra_data_size -= value_size; 1483 1484 if(!read_var(a, &rar->file.redir_flags, &value_size)) 1485 return ARCHIVE_EOF; 1486 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1487 return ARCHIVE_EOF; 1488 *extra_data_size -= value_size; 1489 1490 if(!read_var_sized(a, &target_size, NULL)) 1491 return ARCHIVE_EOF; 1492 *extra_data_size -= target_size + 1; 1493 1494 if(target_size > (MAX_NAME_IN_CHARS - 1)) { 1495 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1496 "Link target is too long"); 1497 return ARCHIVE_FATAL; 1498 } 1499 1500 if(target_size == 0) { 1501 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1502 "No link target specified"); 1503 return ARCHIVE_FATAL; 1504 } 1505 1506 if(!read_ahead(a, target_size, &p)) 1507 return ARCHIVE_EOF; 1508 1509 memcpy(target_utf8_buf, p, target_size); 1510 target_utf8_buf[target_size] = 0; 1511 1512 if(ARCHIVE_OK != consume(a, (int64_t)target_size)) 1513 return ARCHIVE_EOF; 1514 1515 switch(rar->file.redir_type) { 1516 case REDIR_TYPE_UNIXSYMLINK: 1517 case REDIR_TYPE_WINSYMLINK: 1518 archive_entry_set_filetype(e, AE_IFLNK); 1519 archive_entry_update_symlink_utf8(e, target_utf8_buf); 1520 if (rar->file.redir_flags & REDIR_SYMLINK_IS_DIR) { 1521 archive_entry_set_symlink_type(e, 1522 AE_SYMLINK_TYPE_DIRECTORY); 1523 } else { 1524 archive_entry_set_symlink_type(e, 1525 AE_SYMLINK_TYPE_FILE); 1526 } 1527 break; 1528 1529 case REDIR_TYPE_HARDLINK: 1530 archive_entry_set_filetype(e, AE_IFREG); 1531 archive_entry_update_hardlink_utf8(e, target_utf8_buf); 1532 break; 1533 1534 default: 1535 /* Unknown redir type, skip it. */ 1536 break; 1537 } 1538 return ARCHIVE_OK; 1539 } 1540 1541 static int parse_file_extra_owner(struct archive_read* a, 1542 struct archive_entry* e, int64_t* extra_data_size) 1543 { 1544 uint64_t flags = 0; 1545 uint64_t value_size = 0; 1546 uint64_t id = 0; 1547 size_t name_len = 0; 1548 size_t name_size = 0; 1549 char namebuf[OWNER_MAXNAMELEN]; 1550 const uint8_t* p; 1551 1552 if(!read_var(a, &flags, &value_size)) 1553 return ARCHIVE_EOF; 1554 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1555 return ARCHIVE_EOF; 1556 *extra_data_size -= value_size; 1557 1558 if ((flags & OWNER_USER_NAME) != 0) { 1559 if(!read_var_sized(a, &name_size, NULL)) 1560 return ARCHIVE_EOF; 1561 *extra_data_size -= name_size + 1; 1562 1563 if(!read_ahead(a, name_size, &p)) 1564 return ARCHIVE_EOF; 1565 1566 if (name_size >= OWNER_MAXNAMELEN) { 1567 name_len = OWNER_MAXNAMELEN - 1; 1568 } else { 1569 name_len = name_size; 1570 } 1571 1572 memcpy(namebuf, p, name_len); 1573 namebuf[name_len] = 0; 1574 if(ARCHIVE_OK != consume(a, (int64_t)name_size)) 1575 return ARCHIVE_EOF; 1576 1577 archive_entry_set_uname(e, namebuf); 1578 } 1579 if ((flags & OWNER_GROUP_NAME) != 0) { 1580 if(!read_var_sized(a, &name_size, NULL)) 1581 return ARCHIVE_EOF; 1582 *extra_data_size -= name_size + 1; 1583 1584 if(!read_ahead(a, name_size, &p)) 1585 return ARCHIVE_EOF; 1586 1587 if (name_size >= OWNER_MAXNAMELEN) { 1588 name_len = OWNER_MAXNAMELEN - 1; 1589 } else { 1590 name_len = name_size; 1591 } 1592 1593 memcpy(namebuf, p, name_len); 1594 namebuf[name_len] = 0; 1595 if(ARCHIVE_OK != consume(a, (int64_t)name_size)) 1596 return ARCHIVE_EOF; 1597 1598 archive_entry_set_gname(e, namebuf); 1599 } 1600 if ((flags & OWNER_USER_UID) != 0) { 1601 if(!read_var(a, &id, &value_size)) 1602 return ARCHIVE_EOF; 1603 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1604 return ARCHIVE_EOF; 1605 *extra_data_size -= value_size; 1606 1607 archive_entry_set_uid(e, (la_int64_t)id); 1608 } 1609 if ((flags & OWNER_GROUP_GID) != 0) { 1610 if(!read_var(a, &id, &value_size)) 1611 return ARCHIVE_EOF; 1612 if(ARCHIVE_OK != consume(a, (int64_t)value_size)) 1613 return ARCHIVE_EOF; 1614 *extra_data_size -= value_size; 1615 1616 archive_entry_set_gid(e, (la_int64_t)id); 1617 } 1618 return ARCHIVE_OK; 1619 } 1620 1621 static int process_head_file_extra(struct archive_read* a, 1622 struct archive_entry* e, struct rar5* rar, int64_t extra_data_size) 1623 { 1624 uint64_t extra_field_size; 1625 uint64_t extra_field_id = 0; 1626 uint64_t var_size; 1627 1628 while(extra_data_size > 0) { 1629 /* Make sure we won't fail if the file declares only unsupported 1630 attributes. */ 1631 int ret = ARCHIVE_OK; 1632 1633 if(!read_var(a, &extra_field_size, &var_size)) 1634 return ARCHIVE_EOF; 1635 1636 extra_data_size -= var_size; 1637 if(ARCHIVE_OK != consume(a, var_size)) { 1638 return ARCHIVE_EOF; 1639 } 1640 1641 if(!read_var(a, &extra_field_id, &var_size)) 1642 return ARCHIVE_EOF; 1643 1644 extra_field_size -= var_size; 1645 extra_data_size -= var_size; 1646 if(ARCHIVE_OK != consume(a, var_size)) { 1647 return ARCHIVE_EOF; 1648 } 1649 1650 switch(extra_field_id) { 1651 case EX_HASH: 1652 ret = parse_file_extra_hash(a, rar, 1653 &extra_data_size); 1654 break; 1655 case EX_HTIME: 1656 ret = parse_file_extra_htime(a, e, rar, 1657 &extra_data_size); 1658 break; 1659 case EX_REDIR: 1660 ret = parse_file_extra_redir(a, e, rar, 1661 &extra_data_size); 1662 break; 1663 case EX_UOWNER: 1664 ret = parse_file_extra_owner(a, e, 1665 &extra_data_size); 1666 break; 1667 case EX_VERSION: 1668 ret = parse_file_extra_version(a, e, 1669 &extra_data_size); 1670 break; 1671 case EX_CRYPT: 1672 /* Mark the entry as encrypted */ 1673 archive_entry_set_is_data_encrypted(e, 1); 1674 rar->has_encrypted_entries = 1; 1675 rar->cstate.data_encrypted = 1; 1676 /* fallthrough */ 1677 case EX_SUBDATA: 1678 /* fallthrough */ 1679 default: 1680 /* Skip unsupported entry. */ 1681 extra_data_size -= extra_field_size; 1682 if (ARCHIVE_OK != consume(a, extra_field_size)) { 1683 return ARCHIVE_EOF; 1684 } 1685 1686 /* Don't fail on unsupported attribute -- we've handled it 1687 by skipping over it. */ 1688 ret = ARCHIVE_OK; 1689 } 1690 1691 if (ret != ARCHIVE_OK) { 1692 /* Forward any errors signalled by the attribute parsing 1693 functions. */ 1694 return ret; 1695 } 1696 } 1697 1698 if (extra_data_size != 0) { 1699 /* We didn't skip everything, or we skipped too much; either way, 1700 there's an error in this parsing function. */ 1701 1702 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1703 "unsupported structure of file header extra data"); 1704 return ARCHIVE_FATAL; 1705 } 1706 1707 return ARCHIVE_OK; 1708 } 1709 1710 static int file_entry_sanity_checks(struct archive_read* a, 1711 size_t block_flags, uint8_t is_dir, uint64_t unpacked_size, 1712 size_t packed_size) 1713 { 1714 if (is_dir) { 1715 const int declares_data_size = 1716 (int) (unpacked_size != 0 || packed_size != 0); 1717 1718 /* FILE entries for directories still declare HFL_DATA in block flags, 1719 even though attaching data to such blocks doesn't make much sense. */ 1720 if (declares_data_size) { 1721 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1722 "directory entries cannot have any data"); 1723 return ARCHIVE_FATAL; 1724 } 1725 } else { 1726 const int declares_hfl_data = (int) ((block_flags & HFL_DATA) != 0); 1727 if (!declares_hfl_data) { 1728 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1729 "no data found in file/service block"); 1730 return ARCHIVE_FATAL; 1731 } 1732 } 1733 1734 return ARCHIVE_OK; 1735 } 1736 1737 static int process_head_file(struct archive_read* a, struct rar5* rar, 1738 struct archive_entry* entry, size_t block_flags) 1739 { 1740 int64_t extra_data_size = 0; 1741 size_t data_size = 0; 1742 size_t file_flags = 0; 1743 size_t file_attr = 0; 1744 size_t compression_info = 0; 1745 size_t host_os = 0; 1746 size_t name_size = 0; 1747 uint64_t unpacked_size, window_size; 1748 uint32_t mtime = 0, crc = 0; 1749 int c_method = 0, c_version = 0; 1750 char name_utf8_buf[MAX_NAME_IN_BYTES]; 1751 const uint8_t* p; 1752 int sanity_ret; 1753 1754 enum FILE_FLAGS { 1755 DIRECTORY = 0x0001, UTIME = 0x0002, CRC32 = 0x0004, 1756 UNKNOWN_UNPACKED_SIZE = 0x0008, 1757 }; 1758 1759 enum FILE_ATTRS { 1760 ATTR_READONLY = 0x1, ATTR_HIDDEN = 0x2, ATTR_SYSTEM = 0x4, 1761 ATTR_DIRECTORY = 0x10, 1762 }; 1763 1764 enum COMP_INFO_FLAGS { 1765 SOLID = 0x0040, 1766 }; 1767 1768 enum HOST_OS { 1769 HOST_WINDOWS = 0, 1770 HOST_UNIX = 1, 1771 }; 1772 1773 archive_entry_clear(entry); 1774 1775 /* Do not reset file context if we're switching archives. */ 1776 if(!rar->cstate.switch_multivolume) { 1777 reset_file_context(rar); 1778 } 1779 1780 if(block_flags & HFL_EXTRA_DATA) { 1781 uint64_t edata_size = 0; 1782 if(!read_var(a, &edata_size, NULL)) 1783 return ARCHIVE_EOF; 1784 1785 /* Intentional type cast from unsigned to signed. */ 1786 extra_data_size = (int64_t) edata_size; 1787 } 1788 1789 if(block_flags & HFL_DATA) { 1790 if(!read_var_sized(a, &data_size, NULL)) 1791 return ARCHIVE_EOF; 1792 1793 rar->file.bytes_remaining = data_size; 1794 } else { 1795 rar->file.bytes_remaining = 0; 1796 } 1797 1798 if(!read_var_sized(a, &file_flags, NULL)) 1799 return ARCHIVE_EOF; 1800 1801 if(!read_var(a, &unpacked_size, NULL)) 1802 return ARCHIVE_EOF; 1803 1804 if(file_flags & UNKNOWN_UNPACKED_SIZE) { 1805 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1806 "Files with unknown unpacked size are not supported"); 1807 return ARCHIVE_FATAL; 1808 } 1809 1810 rar->file.dir = (uint8_t) ((file_flags & DIRECTORY) > 0); 1811 1812 sanity_ret = file_entry_sanity_checks(a, block_flags, rar->file.dir, 1813 unpacked_size, data_size); 1814 1815 if (sanity_ret != ARCHIVE_OK) { 1816 return sanity_ret; 1817 } 1818 1819 if(!read_var_sized(a, &file_attr, NULL)) 1820 return ARCHIVE_EOF; 1821 1822 if(file_flags & UTIME) { 1823 if(!read_u32(a, &mtime)) 1824 return ARCHIVE_EOF; 1825 } 1826 1827 if(file_flags & CRC32) { 1828 if(!read_u32(a, &crc)) 1829 return ARCHIVE_EOF; 1830 } 1831 1832 if(!read_var_sized(a, &compression_info, NULL)) 1833 return ARCHIVE_EOF; 1834 1835 c_method = (int) (compression_info >> 7) & 0x7; 1836 c_version = (int) (compression_info & 0x3f); 1837 1838 /* RAR5 seems to limit the dictionary size to 64MB. */ 1839 window_size = (rar->file.dir > 0) ? 1840 0 : 1841 g_unpack_window_size << ((compression_info >> 10) & 15); 1842 rar->cstate.method = c_method; 1843 rar->cstate.version = c_version + 50; 1844 rar->file.solid = (compression_info & SOLID) > 0; 1845 1846 /* Archives which declare solid files without initializing the window 1847 * buffer first are invalid, unless previous data was encrypted, in 1848 * which case we may never have had the chance */ 1849 1850 if(rar->file.solid > 0 && rar->cstate.data_encrypted == 0 && 1851 rar->cstate.window_buf == NULL) { 1852 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1853 "Declared solid file, but no window buffer " 1854 "initialized yet"); 1855 return ARCHIVE_FATAL; 1856 } 1857 1858 /* Check if window_size is a sane value. Also, if the file is not 1859 * declared as a directory, disallow window_size == 0. */ 1860 if(window_size > (64 * 1024 * 1024) || 1861 (rar->file.dir == 0 && window_size == 0)) 1862 { 1863 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1864 "Declared dictionary size is not supported"); 1865 return ARCHIVE_FATAL; 1866 } 1867 1868 if(rar->file.solid > 0) { 1869 /* Re-check if current window size is the same as previous 1870 * window size (for solid files only). */ 1871 if(rar->file.solid_window_size > 0 && 1872 rar->file.solid_window_size != (ssize_t) window_size) 1873 { 1874 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1875 "Window size for this solid file doesn't match " 1876 "the window size used in previous solid file"); 1877 return ARCHIVE_FATAL; 1878 } 1879 } 1880 else 1881 rar->cstate.data_encrypted = 0; /* Reset for new buffer */ 1882 1883 if(rar->cstate.window_size < (ssize_t) window_size && 1884 rar->cstate.window_buf) 1885 { 1886 /* The `data_ready` stack contains pointers to the `window_buf` or 1887 * `filtered_buf` buffers. Since we're about to reallocate the first 1888 * buffer, some of those pointers could become invalid. Therefore, we 1889 * need to dispose of all entries from the stack before attempting the 1890 * realloc. */ 1891 clear_data_ready_stack(rar); 1892 1893 /* If window_buf has been allocated before, reallocate it, so 1894 * that its size will match new window_size. */ 1895 1896 uint8_t* new_window_buf = 1897 realloc(rar->cstate.window_buf, (size_t) window_size); 1898 1899 if(!new_window_buf) { 1900 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 1901 "Not enough memory when trying to realloc the window " 1902 "buffer"); 1903 return ARCHIVE_FATAL; 1904 } 1905 1906 rar->cstate.window_buf = new_window_buf; 1907 } 1908 1909 /* Values up to 64M should fit into ssize_t on every 1910 * architecture. */ 1911 rar->cstate.window_size = (ssize_t) window_size; 1912 1913 if(rar->file.solid > 0 && rar->file.solid_window_size == 0) { 1914 /* Solid files have to have the same window_size across 1915 whole archive. Remember the window_size parameter 1916 for first solid file found. */ 1917 rar->file.solid_window_size = rar->cstate.window_size; 1918 } 1919 1920 init_window_mask(rar); 1921 1922 rar->file.service = 0; 1923 1924 if(!read_var_sized(a, &host_os, NULL)) 1925 return ARCHIVE_EOF; 1926 1927 if(host_os == HOST_WINDOWS) { 1928 /* Host OS is Windows */ 1929 1930 __LA_MODE_T mode; 1931 1932 if(file_attr & ATTR_DIRECTORY) { 1933 if (file_attr & ATTR_READONLY) { 1934 mode = 0555 | AE_IFDIR; 1935 } else { 1936 mode = 0755 | AE_IFDIR; 1937 } 1938 } else { 1939 if (file_attr & ATTR_READONLY) { 1940 mode = 0444 | AE_IFREG; 1941 } else { 1942 mode = 0644 | AE_IFREG; 1943 } 1944 } 1945 1946 archive_entry_set_mode(entry, mode); 1947 1948 if (file_attr & (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM)) { 1949 char *fflags_text, *ptr; 1950 /* allocate for ",rdonly,hidden,system" */ 1951 fflags_text = malloc(22 * sizeof(*fflags_text)); 1952 if (fflags_text != NULL) { 1953 ptr = fflags_text; 1954 if (file_attr & ATTR_READONLY) { 1955 strcpy(ptr, ",rdonly"); 1956 ptr = ptr + 7; 1957 } 1958 if (file_attr & ATTR_HIDDEN) { 1959 strcpy(ptr, ",hidden"); 1960 ptr = ptr + 7; 1961 } 1962 if (file_attr & ATTR_SYSTEM) { 1963 strcpy(ptr, ",system"); 1964 ptr = ptr + 7; 1965 } 1966 if (ptr > fflags_text) { 1967 archive_entry_copy_fflags_text(entry, 1968 fflags_text + 1); 1969 } 1970 free(fflags_text); 1971 } 1972 } 1973 } else if(host_os == HOST_UNIX) { 1974 /* Host OS is Unix */ 1975 archive_entry_set_mode(entry, (__LA_MODE_T) file_attr); 1976 } else { 1977 /* Unknown host OS */ 1978 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1979 "Unsupported Host OS: 0x%jx", 1980 (uintmax_t)host_os); 1981 1982 return ARCHIVE_FATAL; 1983 } 1984 1985 if(!read_var_sized(a, &name_size, NULL)) 1986 return ARCHIVE_EOF; 1987 1988 if(name_size > (MAX_NAME_IN_CHARS - 1)) { 1989 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1990 "Filename is too long"); 1991 1992 return ARCHIVE_FATAL; 1993 } 1994 1995 if(name_size == 0) { 1996 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1997 "No filename specified"); 1998 1999 return ARCHIVE_FATAL; 2000 } 2001 2002 if(!read_ahead(a, name_size, &p)) 2003 return ARCHIVE_EOF; 2004 2005 memcpy(name_utf8_buf, p, name_size); 2006 name_utf8_buf[name_size] = 0; 2007 if(ARCHIVE_OK != consume(a, name_size)) { 2008 return ARCHIVE_EOF; 2009 } 2010 2011 archive_entry_update_pathname_utf8(entry, name_utf8_buf); 2012 2013 if(extra_data_size > 0) { 2014 int ret = process_head_file_extra(a, entry, rar, 2015 extra_data_size); 2016 2017 /* 2018 * TODO: rewrite or remove useless sanity check 2019 * as extra_data_size is not passed as a pointer 2020 * 2021 if(extra_data_size < 0) { 2022 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 2023 "File extra data size is not zero"); 2024 return ARCHIVE_FATAL; 2025 } 2026 */ 2027 2028 if(ret != ARCHIVE_OK) 2029 return ret; 2030 } 2031 2032 if((file_flags & UNKNOWN_UNPACKED_SIZE) == 0) { 2033 rar->file.unpacked_size = (ssize_t) unpacked_size; 2034 if(rar->file.redir_type == REDIR_TYPE_NONE) 2035 archive_entry_set_size(entry, unpacked_size); 2036 } 2037 2038 if(file_flags & UTIME) { 2039 archive_entry_set_mtime(entry, (time_t) mtime, 0); 2040 } 2041 2042 if(file_flags & CRC32) { 2043 rar->file.stored_crc32 = crc; 2044 } 2045 2046 if(!rar->cstate.switch_multivolume) { 2047 /* Do not reinitialize unpacking state if we're switching 2048 * archives. */ 2049 rar->cstate.block_parsing_finished = 1; 2050 rar->cstate.all_filters_applied = 1; 2051 rar->cstate.initialized = 0; 2052 } 2053 2054 if(rar->generic.split_before > 0) { 2055 /* If now we're standing on a header that has a 'split before' 2056 * mark, it means we're standing on a 'continuation' file 2057 * header. Signal the caller that if it wants to move to 2058 * another file, it must call rar5_read_header() function 2059 * again. */ 2060 2061 return ARCHIVE_RETRY; 2062 } else { 2063 return ARCHIVE_OK; 2064 } 2065 } 2066 2067 static int process_head_service(struct archive_read* a, struct rar5* rar, 2068 struct archive_entry* entry, size_t block_flags) 2069 { 2070 /* Process this SERVICE block the same way as FILE blocks. */ 2071 int ret = process_head_file(a, rar, entry, block_flags); 2072 if(ret != ARCHIVE_OK) 2073 return ret; 2074 2075 rar->file.service = 1; 2076 2077 /* But skip the data part automatically. It's no use for the user 2078 * anyway. It contains only service data, not even needed to 2079 * properly unpack the file. */ 2080 ret = rar5_read_data_skip(a); 2081 if(ret != ARCHIVE_OK) 2082 return ret; 2083 2084 /* After skipping, try parsing another block automatically. */ 2085 return ARCHIVE_RETRY; 2086 } 2087 2088 static int process_head_main(struct archive_read* a, struct rar5* rar, 2089 struct archive_entry* entry, size_t block_flags) 2090 { 2091 int ret; 2092 uint64_t extra_data_size = 0; 2093 size_t extra_field_size = 0; 2094 size_t extra_field_id = 0; 2095 size_t archive_flags = 0; 2096 2097 enum MAIN_FLAGS { 2098 VOLUME = 0x0001, /* multi-volume archive */ 2099 VOLUME_NUMBER = 0x0002, /* volume number, first vol doesn't 2100 * have it */ 2101 SOLID = 0x0004, /* solid archive */ 2102 PROTECT = 0x0008, /* contains Recovery info */ 2103 LOCK = 0x0010, /* readonly flag, not used */ 2104 }; 2105 2106 enum MAIN_EXTRA { 2107 // Just one attribute here. 2108 LOCATOR = 0x01, 2109 }; 2110 2111 (void) entry; 2112 2113 if(block_flags & HFL_EXTRA_DATA) { 2114 if(!read_var(a, &extra_data_size, NULL)) 2115 return ARCHIVE_EOF; 2116 } else { 2117 extra_data_size = 0; 2118 } 2119 2120 if(!read_var_sized(a, &archive_flags, NULL)) { 2121 return ARCHIVE_EOF; 2122 } 2123 2124 rar->main.volume = (archive_flags & VOLUME) > 0; 2125 rar->main.solid = (archive_flags & SOLID) > 0; 2126 2127 if(archive_flags & VOLUME_NUMBER) { 2128 size_t v = 0; 2129 if(!read_var_sized(a, &v, NULL)) { 2130 return ARCHIVE_EOF; 2131 } 2132 2133 if (v > UINT_MAX) { 2134 archive_set_error(&a->archive, 2135 ARCHIVE_ERRNO_FILE_FORMAT, 2136 "Invalid volume number"); 2137 return ARCHIVE_FATAL; 2138 } 2139 2140 rar->main.vol_no = (unsigned int) v; 2141 } else { 2142 rar->main.vol_no = 0; 2143 } 2144 2145 if(rar->vol.expected_vol_no > 0 && 2146 rar->main.vol_no != rar->vol.expected_vol_no) 2147 { 2148 /* Returning EOF instead of FATAL because of strange 2149 * libarchive behavior. When opening multiple files via 2150 * archive_read_open_filenames(), after reading up the whole 2151 * last file, the __archive_read_ahead function wraps up to 2152 * the first archive instead of returning EOF. */ 2153 return ARCHIVE_EOF; 2154 } 2155 2156 if(extra_data_size == 0) { 2157 /* Early return. */ 2158 return ARCHIVE_OK; 2159 } 2160 2161 if(!read_var_sized(a, &extra_field_size, NULL)) { 2162 return ARCHIVE_EOF; 2163 } 2164 2165 if(!read_var_sized(a, &extra_field_id, NULL)) { 2166 return ARCHIVE_EOF; 2167 } 2168 2169 if(extra_field_size == 0) { 2170 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2171 "Invalid extra field size"); 2172 return ARCHIVE_FATAL; 2173 } 2174 2175 switch(extra_field_id) { 2176 case LOCATOR: 2177 ret = process_main_locator_extra_block(a, rar); 2178 if(ret != ARCHIVE_OK) { 2179 /* Error while parsing main locator extra 2180 * block. */ 2181 return ret; 2182 } 2183 2184 break; 2185 default: 2186 archive_set_error(&a->archive, 2187 ARCHIVE_ERRNO_FILE_FORMAT, 2188 "Unsupported extra type (0x%jx)", 2189 (uintmax_t)extra_field_id); 2190 return ARCHIVE_FATAL; 2191 } 2192 2193 return ARCHIVE_OK; 2194 } 2195 2196 static int skip_unprocessed_bytes(struct archive_read* a) { 2197 struct rar5* rar = get_context(a); 2198 int ret; 2199 2200 if(rar->file.bytes_remaining) { 2201 /* Use different skipping method in block merging mode than in 2202 * normal mode. If merge mode is active, rar5_read_data_skip 2203 * can't be used, because it could allow recursive use of 2204 * merge_block() * function, and this function doesn't support 2205 * recursive use. */ 2206 if(rar->merge_mode) { 2207 /* Discard whole merged block. This is valid in solid 2208 * mode as well, because the code will discard blocks 2209 * only if those blocks are safe to discard (i.e. 2210 * they're not FILE blocks). */ 2211 ret = consume(a, rar->file.bytes_remaining); 2212 if(ret != ARCHIVE_OK) { 2213 return ret; 2214 } 2215 rar->file.bytes_remaining = 0; 2216 } else { 2217 /* If we're not in merge mode, use safe skipping code. 2218 * This will ensure we'll handle solid archives 2219 * properly. */ 2220 ret = rar5_read_data_skip(a); 2221 if(ret != ARCHIVE_OK) { 2222 return ret; 2223 } 2224 } 2225 } 2226 2227 return ARCHIVE_OK; 2228 } 2229 2230 static int scan_for_signature(struct archive_read* a); 2231 2232 /* Base block processing function. A 'base block' is a RARv5 header block 2233 * that tells the reader what kind of data is stored inside the block. 2234 * 2235 * From the birds-eye view a RAR file looks file this: 2236 * 2237 * <magic><base_block_1><base_block_2>...<base_block_n> 2238 * 2239 * There are a few types of base blocks. Those types are specified inside 2240 * the 'switch' statement in this function. For example purposes, I'll write 2241 * how a standard RARv5 file could look like here: 2242 * 2243 * <magic><MAIN><FILE><FILE><FILE><SERVICE><ENDARC> 2244 * 2245 * The structure above could describe an archive file with 3 files in it, 2246 * one service "QuickOpen" block (that is ignored by this parser), and an 2247 * end of file base block marker. 2248 * 2249 * If the file is stored in multiple archive files ("multiarchive"), it might 2250 * look like this: 2251 * 2252 * .part01.rar: <magic><MAIN><FILE><ENDARC> 2253 * .part02.rar: <magic><MAIN><FILE><ENDARC> 2254 * .part03.rar: <magic><MAIN><FILE><ENDARC> 2255 * 2256 * This example could describe 3 RAR files that contain ONE archived file. 2257 * Or it could describe 3 RAR files that contain 3 different files. Or 3 2258 * RAR files than contain 2 files. It all depends what metadata is stored in 2259 * the headers of <FILE> blocks. 2260 * 2261 * Each <FILE> block contains info about its size, the name of the file it's 2262 * storing inside, and whether this FILE block is a continuation block of 2263 * previous archive ('split before'), and is this FILE block should be 2264 * continued in another archive ('split after'). By parsing the 'split before' 2265 * and 'split after' flags, we're able to tell if multiple <FILE> base blocks 2266 * are describing one file, or multiple files (with the same filename, for 2267 * example). 2268 * 2269 * One thing to note is that if we're parsing the first <FILE> block, and 2270 * we see 'split after' flag, then we need to jump over to another <FILE> 2271 * block to be able to decompress rest of the data. To do this, we need 2272 * to skip the <ENDARC> block, then switch to another file, then skip the 2273 * <magic> block, <MAIN> block, and then we're standing on the proper 2274 * <FILE> block. 2275 */ 2276 2277 static int process_base_block(struct archive_read* a, 2278 struct archive_entry* entry) 2279 { 2280 const size_t SMALLEST_RAR5_BLOCK_SIZE = 3; 2281 2282 struct rar5* rar = get_context(a); 2283 uint32_t hdr_crc, computed_crc; 2284 size_t raw_hdr_size = 0, hdr_size_len, hdr_size; 2285 size_t header_id = 0; 2286 size_t header_flags = 0; 2287 const uint8_t* p; 2288 int ret; 2289 2290 enum HEADER_TYPE { 2291 HEAD_MARK = 0x00, HEAD_MAIN = 0x01, HEAD_FILE = 0x02, 2292 HEAD_SERVICE = 0x03, HEAD_CRYPT = 0x04, HEAD_ENDARC = 0x05, 2293 HEAD_UNKNOWN = 0xff, 2294 }; 2295 2296 /* Skip any unprocessed data for this file. */ 2297 ret = skip_unprocessed_bytes(a); 2298 if(ret != ARCHIVE_OK) 2299 return ret; 2300 2301 /* Read the expected CRC32 checksum. */ 2302 if(!read_u32(a, &hdr_crc)) { 2303 return ARCHIVE_EOF; 2304 } 2305 2306 /* Read header size. */ 2307 if(!read_var_sized(a, &raw_hdr_size, &hdr_size_len)) { 2308 return ARCHIVE_EOF; 2309 } 2310 2311 hdr_size = raw_hdr_size + hdr_size_len; 2312 2313 /* Sanity check, maximum header size for RAR5 is 2MB. */ 2314 if(hdr_size > (2 * 1024 * 1024)) { 2315 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2316 "Base block header is too large"); 2317 2318 return ARCHIVE_FATAL; 2319 } 2320 2321 /* Additional sanity checks to weed out invalid files. */ 2322 if(raw_hdr_size == 0 || hdr_size_len == 0 || 2323 hdr_size < SMALLEST_RAR5_BLOCK_SIZE) 2324 { 2325 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2326 "Too small block encountered (%zu bytes)", 2327 raw_hdr_size); 2328 2329 return ARCHIVE_FATAL; 2330 } 2331 2332 /* Read the whole header data into memory, maximum memory use here is 2333 * 2MB. */ 2334 if(!read_ahead(a, hdr_size, &p)) { 2335 return ARCHIVE_EOF; 2336 } 2337 2338 /* Verify the CRC32 of the header data. */ 2339 computed_crc = (uint32_t) crc32(0, p, (int) hdr_size); 2340 if(computed_crc != hdr_crc) { 2341 #ifndef DONT_FAIL_ON_CRC_ERROR 2342 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2343 "Header CRC error"); 2344 2345 return ARCHIVE_FATAL; 2346 #endif 2347 } 2348 2349 /* If the checksum is OK, we proceed with parsing. */ 2350 if(ARCHIVE_OK != consume(a, hdr_size_len)) { 2351 return ARCHIVE_EOF; 2352 } 2353 2354 if(!read_var_sized(a, &header_id, NULL)) 2355 return ARCHIVE_EOF; 2356 2357 if(!read_var_sized(a, &header_flags, NULL)) 2358 return ARCHIVE_EOF; 2359 2360 rar->generic.split_after = (header_flags & HFL_SPLIT_AFTER) > 0; 2361 rar->generic.split_before = (header_flags & HFL_SPLIT_BEFORE) > 0; 2362 rar->generic.size = (int)hdr_size; 2363 rar->generic.last_header_id = (int)header_id; 2364 rar->main.endarc = 0; 2365 2366 /* Those are possible header ids in RARv5. */ 2367 switch(header_id) { 2368 case HEAD_MAIN: 2369 ret = process_head_main(a, rar, entry, header_flags); 2370 2371 /* Main header doesn't have any files in it, so it's 2372 * pointless to return to the caller. Retry to next 2373 * header, which should be HEAD_FILE/HEAD_SERVICE. */ 2374 if(ret == ARCHIVE_OK) 2375 return ARCHIVE_RETRY; 2376 2377 return ret; 2378 case HEAD_SERVICE: 2379 ret = process_head_service(a, rar, entry, header_flags); 2380 return ret; 2381 case HEAD_FILE: 2382 ret = process_head_file(a, rar, entry, header_flags); 2383 return ret; 2384 case HEAD_CRYPT: 2385 archive_entry_set_is_metadata_encrypted(entry, 1); 2386 archive_entry_set_is_data_encrypted(entry, 1); 2387 rar->has_encrypted_entries = 1; 2388 rar->headers_are_encrypted = 1; 2389 archive_set_error(&a->archive, 2390 ARCHIVE_ERRNO_FILE_FORMAT, 2391 "Encryption is not supported"); 2392 return ARCHIVE_FATAL; 2393 case HEAD_ENDARC: 2394 rar->main.endarc = 1; 2395 2396 /* After encountering an end of file marker, we need 2397 * to take into consideration if this archive is 2398 * continued in another file (i.e. is it part01.rar: 2399 * is there a part02.rar?) */ 2400 if(rar->main.volume) { 2401 /* In case there is part02.rar, position the 2402 * read pointer in a proper place, so we can 2403 * resume parsing. */ 2404 ret = scan_for_signature(a); 2405 if(ret == ARCHIVE_FATAL) { 2406 return ARCHIVE_EOF; 2407 } else { 2408 if(rar->vol.expected_vol_no == 2409 UINT_MAX) { 2410 archive_set_error(&a->archive, 2411 ARCHIVE_ERRNO_FILE_FORMAT, 2412 "Header error"); 2413 return ARCHIVE_FATAL; 2414 } 2415 2416 rar->vol.expected_vol_no = 2417 rar->main.vol_no + 1; 2418 return ARCHIVE_OK; 2419 } 2420 } else { 2421 return ARCHIVE_EOF; 2422 } 2423 case HEAD_MARK: 2424 return ARCHIVE_EOF; 2425 default: 2426 if((header_flags & HFL_SKIP_IF_UNKNOWN) == 0) { 2427 archive_set_error(&a->archive, 2428 ARCHIVE_ERRNO_FILE_FORMAT, 2429 "Header type error"); 2430 return ARCHIVE_FATAL; 2431 } else { 2432 /* If the block is marked as 'skip if unknown', 2433 * do as the flag says: skip the block 2434 * instead on failing on it. */ 2435 return ARCHIVE_RETRY; 2436 } 2437 } 2438 2439 #if !defined WIN32 2440 // Not reached. 2441 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 2442 "Internal unpacker error"); 2443 return ARCHIVE_FATAL; 2444 #endif 2445 } 2446 2447 static int skip_base_block(struct archive_read* a) { 2448 int ret; 2449 struct rar5* rar = get_context(a); 2450 2451 /* Create a new local archive_entry structure that will be operated on 2452 * by header reader; operations on this archive_entry will be discarded. 2453 */ 2454 struct archive_entry* entry = archive_entry_new(); 2455 ret = process_base_block(a, entry); 2456 2457 /* Discard operations on this archive_entry structure. */ 2458 archive_entry_free(entry); 2459 if(ret == ARCHIVE_FATAL) 2460 return ret; 2461 2462 if(rar->generic.last_header_id == 2 && rar->generic.split_before > 0) 2463 return ARCHIVE_OK; 2464 2465 if(ret == ARCHIVE_OK) 2466 return ARCHIVE_RETRY; 2467 else 2468 return ret; 2469 } 2470 2471 static int try_skip_sfx(struct archive_read *a) 2472 { 2473 const char *p; 2474 2475 if ((p = __archive_read_ahead(a, 7, NULL)) == NULL) 2476 return ARCHIVE_EOF; 2477 2478 if ((p[0] == 'M' && p[1] == 'Z') || memcmp(p, "\x7F\x45LF", 4) == 0) 2479 { 2480 char signature[sizeof(rar5_signature_xor)]; 2481 const void *h; 2482 const char *q; 2483 size_t skip, total = 0; 2484 ssize_t bytes, window = 4096; 2485 2486 rar5_signature(signature); 2487 2488 while (total + window <= (1024 * 512)) { 2489 h = __archive_read_ahead(a, window, &bytes); 2490 if (h == NULL) { 2491 /* Remaining bytes are less than window. */ 2492 window >>= 1; 2493 if (window < 0x40) 2494 goto fatal; 2495 continue; 2496 } 2497 if (bytes < 0x40) 2498 goto fatal; 2499 p = h; 2500 q = p + bytes; 2501 2502 /* 2503 * Scan ahead until we find something that looks 2504 * like the RAR header. 2505 */ 2506 while (p + 8 < q) { 2507 if (memcmp(p, signature, sizeof(signature)) == 0) { 2508 skip = p - (const char *)h; 2509 __archive_read_consume(a, skip); 2510 return (ARCHIVE_OK); 2511 } 2512 p += 0x10; 2513 } 2514 skip = p - (const char *)h; 2515 __archive_read_consume(a, skip); 2516 total += skip; 2517 } 2518 } 2519 2520 return ARCHIVE_OK; 2521 fatal: 2522 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2523 "Couldn't find out RAR header"); 2524 return (ARCHIVE_FATAL); 2525 } 2526 2527 static int rar5_read_header(struct archive_read *a, 2528 struct archive_entry *entry) 2529 { 2530 struct rar5* rar = get_context(a); 2531 int ret; 2532 2533 /* 2534 * It should be sufficient to call archive_read_next_header() for 2535 * a reader to determine if an entry is encrypted or not. 2536 */ 2537 if (rar->has_encrypted_entries == ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW) { 2538 rar->has_encrypted_entries = 0; 2539 } 2540 2541 if(rar->header_initialized == 0) { 2542 init_header(a); 2543 if ((ret = try_skip_sfx(a)) < ARCHIVE_WARN) 2544 return ret; 2545 rar->header_initialized = 1; 2546 } 2547 2548 if(rar->skipped_magic == 0) { 2549 if(ARCHIVE_OK != consume(a, sizeof(rar5_signature_xor))) { 2550 return ARCHIVE_EOF; 2551 } 2552 2553 rar->skipped_magic = 1; 2554 } 2555 2556 do { 2557 ret = process_base_block(a, entry); 2558 } while(ret == ARCHIVE_RETRY || 2559 (rar->main.endarc > 0 && ret == ARCHIVE_OK)); 2560 2561 return ret; 2562 } 2563 2564 static void init_unpack(struct rar5* rar) { 2565 rar->file.calculated_crc32 = 0; 2566 init_window_mask(rar); 2567 2568 free(rar->cstate.window_buf); 2569 free(rar->cstate.filtered_buf); 2570 2571 if(rar->cstate.window_size > 0) { 2572 rar->cstate.window_buf = calloc(1, rar->cstate.window_size); 2573 rar->cstate.filtered_buf = calloc(1, rar->cstate.window_size); 2574 } else { 2575 rar->cstate.window_buf = NULL; 2576 rar->cstate.filtered_buf = NULL; 2577 } 2578 2579 clear_data_ready_stack(rar); 2580 2581 rar->cstate.write_ptr = 0; 2582 rar->cstate.last_write_ptr = 0; 2583 2584 memset(&rar->cstate.bd, 0, sizeof(rar->cstate.bd)); 2585 memset(&rar->cstate.ld, 0, sizeof(rar->cstate.ld)); 2586 memset(&rar->cstate.dd, 0, sizeof(rar->cstate.dd)); 2587 memset(&rar->cstate.ldd, 0, sizeof(rar->cstate.ldd)); 2588 memset(&rar->cstate.rd, 0, sizeof(rar->cstate.rd)); 2589 } 2590 2591 static void update_crc(struct rar5* rar, const uint8_t* p, size_t to_read) { 2592 int verify_crc; 2593 2594 if(rar->skip_mode) { 2595 #if defined CHECK_CRC_ON_SOLID_SKIP 2596 verify_crc = 1; 2597 #else 2598 verify_crc = 0; 2599 #endif 2600 } else 2601 verify_crc = 1; 2602 2603 if(verify_crc) { 2604 /* Don't update CRC32 if the file doesn't have the 2605 * `stored_crc32` info filled in. */ 2606 if(rar->file.stored_crc32 > 0) { 2607 rar->file.calculated_crc32 = 2608 crc32(rar->file.calculated_crc32, p, (unsigned int)to_read); 2609 } 2610 2611 /* Check if the file uses an optional BLAKE2sp checksum 2612 * algorithm. */ 2613 if(rar->file.has_blake2 > 0) { 2614 /* Return value of the `update` function is always 0, 2615 * so we can explicitly ignore it here. */ 2616 (void) blake2sp_update(&rar->file.b2state, p, to_read); 2617 } 2618 } 2619 } 2620 2621 static int create_decode_tables(uint8_t* bit_length, 2622 struct decode_table* table, int size) 2623 { 2624 int code, upper_limit = 0, i, lc[16]; 2625 uint32_t decode_pos_clone[rar5_countof(table->decode_pos)]; 2626 ssize_t cur_len, quick_data_size; 2627 2628 memset(&lc, 0, sizeof(lc)); 2629 memset(table->decode_num, 0, sizeof(table->decode_num)); 2630 table->size = size; 2631 table->quick_bits = size == HUFF_NC ? 10 : 7; 2632 2633 for(i = 0; i < size; i++) { 2634 lc[bit_length[i] & 15]++; 2635 } 2636 2637 lc[0] = 0; 2638 table->decode_pos[0] = 0; 2639 table->decode_len[0] = 0; 2640 2641 for(i = 1; i < 16; i++) { 2642 upper_limit += lc[i]; 2643 2644 table->decode_len[i] = upper_limit << (16 - i); 2645 table->decode_pos[i] = table->decode_pos[i - 1] + lc[i - 1]; 2646 2647 upper_limit <<= 1; 2648 } 2649 2650 memcpy(decode_pos_clone, table->decode_pos, sizeof(decode_pos_clone)); 2651 2652 for(i = 0; i < size; i++) { 2653 uint8_t clen = bit_length[i] & 15; 2654 if(clen > 0) { 2655 int last_pos = decode_pos_clone[clen]; 2656 table->decode_num[last_pos] = i; 2657 decode_pos_clone[clen]++; 2658 } 2659 } 2660 2661 quick_data_size = (int64_t)1 << table->quick_bits; 2662 cur_len = 1; 2663 for(code = 0; code < quick_data_size; code++) { 2664 int bit_field = code << (16 - table->quick_bits); 2665 int dist, pos; 2666 2667 while(cur_len < rar5_countof(table->decode_len) && 2668 bit_field >= table->decode_len[cur_len]) { 2669 cur_len++; 2670 } 2671 2672 table->quick_len[code] = (uint8_t) cur_len; 2673 2674 dist = bit_field - table->decode_len[cur_len - 1]; 2675 dist >>= (16 - cur_len); 2676 2677 pos = table->decode_pos[cur_len & 15] + dist; 2678 if(cur_len < rar5_countof(table->decode_pos) && pos < size) { 2679 table->quick_num[code] = table->decode_num[pos]; 2680 } else { 2681 table->quick_num[code] = 0; 2682 } 2683 } 2684 2685 return ARCHIVE_OK; 2686 } 2687 2688 static int decode_number(struct archive_read* a, struct decode_table* table, 2689 const uint8_t* p, uint16_t* num) 2690 { 2691 int i, bits, dist, ret; 2692 uint16_t bitfield; 2693 uint32_t pos; 2694 struct rar5* rar = get_context(a); 2695 2696 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &bitfield))) { 2697 return ret; 2698 } 2699 2700 bitfield &= 0xfffe; 2701 2702 if(bitfield < table->decode_len[table->quick_bits]) { 2703 int code = bitfield >> (16 - table->quick_bits); 2704 skip_bits(rar, table->quick_len[code]); 2705 *num = table->quick_num[code]; 2706 return ARCHIVE_OK; 2707 } 2708 2709 bits = 15; 2710 2711 for(i = table->quick_bits + 1; i < 15; i++) { 2712 if(bitfield < table->decode_len[i]) { 2713 bits = i; 2714 break; 2715 } 2716 } 2717 2718 skip_bits(rar, bits); 2719 2720 dist = bitfield - table->decode_len[bits - 1]; 2721 dist >>= (16 - bits); 2722 pos = table->decode_pos[bits] + dist; 2723 2724 if(pos >= table->size) 2725 pos = 0; 2726 2727 *num = table->decode_num[pos]; 2728 return ARCHIVE_OK; 2729 } 2730 2731 /* Reads and parses Huffman tables from the beginning of the block. */ 2732 static int parse_tables(struct archive_read* a, struct rar5* rar, 2733 const uint8_t* p) 2734 { 2735 int ret, value, i, w, idx = 0; 2736 uint8_t bit_length[HUFF_BC], 2737 table[HUFF_TABLE_SIZE], 2738 nibble_mask = 0xF0, 2739 nibble_shift = 4; 2740 2741 enum { ESCAPE = 15 }; 2742 2743 /* The data for table generation is compressed using a simple RLE-like 2744 * algorithm when storing zeroes, so we need to unpack it first. */ 2745 for(w = 0, i = 0; w < HUFF_BC;) { 2746 if(i >= rar->cstate.cur_block_size) { 2747 /* Truncated data, can't continue. */ 2748 archive_set_error(&a->archive, 2749 ARCHIVE_ERRNO_FILE_FORMAT, 2750 "Truncated data in huffman tables"); 2751 return ARCHIVE_FATAL; 2752 } 2753 2754 value = (p[i] & nibble_mask) >> nibble_shift; 2755 2756 if(nibble_mask == 0x0F) 2757 ++i; 2758 2759 nibble_mask ^= 0xFF; 2760 nibble_shift ^= 4; 2761 2762 /* Values smaller than 15 is data, so we write it directly. 2763 * Value 15 is a flag telling us that we need to unpack more 2764 * bytes. */ 2765 if(value == ESCAPE) { 2766 value = (p[i] & nibble_mask) >> nibble_shift; 2767 if(nibble_mask == 0x0F) 2768 ++i; 2769 nibble_mask ^= 0xFF; 2770 nibble_shift ^= 4; 2771 2772 if(value == 0) { 2773 /* We sometimes need to write the actual value 2774 * of 15, so this case handles that. */ 2775 bit_length[w++] = ESCAPE; 2776 } else { 2777 int k; 2778 2779 /* Fill zeroes. */ 2780 for(k = 0; (k < value + 2) && (w < HUFF_BC); 2781 k++) { 2782 bit_length[w++] = 0; 2783 } 2784 } 2785 } else { 2786 bit_length[w++] = value; 2787 } 2788 } 2789 2790 rar->bits.in_addr = i; 2791 rar->bits.bit_addr = nibble_shift ^ 4; 2792 2793 ret = create_decode_tables(bit_length, &rar->cstate.bd, HUFF_BC); 2794 if(ret != ARCHIVE_OK) { 2795 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2796 "Decoding huffman tables failed"); 2797 return ARCHIVE_FATAL; 2798 } 2799 2800 for(i = 0; i < HUFF_TABLE_SIZE;) { 2801 uint16_t num; 2802 2803 ret = decode_number(a, &rar->cstate.bd, p, &num); 2804 if(ret != ARCHIVE_OK) { 2805 archive_set_error(&a->archive, 2806 ARCHIVE_ERRNO_FILE_FORMAT, 2807 "Decoding huffman tables failed"); 2808 return ARCHIVE_FATAL; 2809 } 2810 2811 if(num < 16) { 2812 /* 0..15: store directly */ 2813 table[i] = (uint8_t) num; 2814 i++; 2815 } else if(num < 18) { 2816 /* 16..17: repeat previous code */ 2817 uint16_t n; 2818 2819 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &n))) 2820 return ret; 2821 2822 if(num == 16) { 2823 n >>= 13; 2824 n += 3; 2825 skip_bits(rar, 3); 2826 } else { 2827 n >>= 9; 2828 n += 11; 2829 skip_bits(rar, 7); 2830 } 2831 2832 if(i > 0) { 2833 while(n-- > 0 && i < HUFF_TABLE_SIZE) { 2834 table[i] = table[i - 1]; 2835 i++; 2836 } 2837 } else { 2838 archive_set_error(&a->archive, 2839 ARCHIVE_ERRNO_FILE_FORMAT, 2840 "Unexpected error when decoding " 2841 "huffman tables"); 2842 return ARCHIVE_FATAL; 2843 } 2844 } else { 2845 /* other codes: fill with zeroes `n` times */ 2846 uint16_t n; 2847 2848 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &n))) 2849 return ret; 2850 2851 if(num == 18) { 2852 n >>= 13; 2853 n += 3; 2854 skip_bits(rar, 3); 2855 } else { 2856 n >>= 9; 2857 n += 11; 2858 skip_bits(rar, 7); 2859 } 2860 2861 while(n-- > 0 && i < HUFF_TABLE_SIZE) 2862 table[i++] = 0; 2863 } 2864 } 2865 2866 ret = create_decode_tables(&table[idx], &rar->cstate.ld, HUFF_NC); 2867 if(ret != ARCHIVE_OK) { 2868 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2869 "Failed to create literal table"); 2870 return ARCHIVE_FATAL; 2871 } 2872 2873 idx += HUFF_NC; 2874 2875 ret = create_decode_tables(&table[idx], &rar->cstate.dd, HUFF_DC); 2876 if(ret != ARCHIVE_OK) { 2877 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2878 "Failed to create distance table"); 2879 return ARCHIVE_FATAL; 2880 } 2881 2882 idx += HUFF_DC; 2883 2884 ret = create_decode_tables(&table[idx], &rar->cstate.ldd, HUFF_LDC); 2885 if(ret != ARCHIVE_OK) { 2886 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2887 "Failed to create lower bits of distances table"); 2888 return ARCHIVE_FATAL; 2889 } 2890 2891 idx += HUFF_LDC; 2892 2893 ret = create_decode_tables(&table[idx], &rar->cstate.rd, HUFF_RC); 2894 if(ret != ARCHIVE_OK) { 2895 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2896 "Failed to create repeating distances table"); 2897 return ARCHIVE_FATAL; 2898 } 2899 2900 return ARCHIVE_OK; 2901 } 2902 2903 /* Parses the block header, verifies its CRC byte, and saves the header 2904 * fields inside the `hdr` pointer. */ 2905 static int parse_block_header(struct archive_read* a, const uint8_t* p, 2906 ssize_t* block_size, struct compressed_block_header* hdr) 2907 { 2908 uint8_t calculated_cksum; 2909 memcpy(hdr, p, sizeof(struct compressed_block_header)); 2910 2911 if(bf_byte_count(hdr) > 2) { 2912 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2913 "Unsupported block header size (was %d, max is 2)", 2914 bf_byte_count(hdr)); 2915 return ARCHIVE_FATAL; 2916 } 2917 2918 /* This should probably use bit reader interface in order to be more 2919 * future-proof. */ 2920 *block_size = 0; 2921 switch(bf_byte_count(hdr)) { 2922 /* 1-byte block size */ 2923 case 0: 2924 *block_size = *(const uint8_t*) &p[2]; 2925 break; 2926 2927 /* 2-byte block size */ 2928 case 1: 2929 *block_size = archive_le16dec(&p[2]); 2930 break; 2931 2932 /* 3-byte block size */ 2933 case 2: 2934 *block_size = archive_le32dec(&p[2]); 2935 *block_size &= 0x00FFFFFF; 2936 break; 2937 2938 /* Other block sizes are not supported. This case is not 2939 * reached, because we have an 'if' guard before the switch 2940 * that makes sure of it. */ 2941 default: 2942 return ARCHIVE_FATAL; 2943 } 2944 2945 /* Verify the block header checksum. 0x5A is a magic value and is 2946 * always * constant. */ 2947 calculated_cksum = 0x5A 2948 ^ (uint8_t) hdr->block_flags_u8 2949 ^ (uint8_t) *block_size 2950 ^ (uint8_t) (*block_size >> 8) 2951 ^ (uint8_t) (*block_size >> 16); 2952 2953 if(calculated_cksum != hdr->block_cksum) { 2954 #ifndef DONT_FAIL_ON_CRC_ERROR 2955 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 2956 "Block checksum error: got 0x%x, expected 0x%x", 2957 hdr->block_cksum, calculated_cksum); 2958 2959 return ARCHIVE_FATAL; 2960 #endif 2961 } 2962 2963 return ARCHIVE_OK; 2964 } 2965 2966 /* Convenience function used during filter processing. */ 2967 static int parse_filter_data(struct archive_read* a, struct rar5* rar, 2968 const uint8_t* p, uint32_t* filter_data) 2969 { 2970 int i, bytes, ret; 2971 uint32_t data = 0; 2972 2973 if(ARCHIVE_OK != (ret = read_consume_bits(a, rar, p, 2, &bytes))) 2974 return ret; 2975 2976 bytes++; 2977 2978 for(i = 0; i < bytes; i++) { 2979 uint16_t byte; 2980 2981 if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &byte))) { 2982 return ret; 2983 } 2984 2985 /* Cast to uint32_t will ensure the shift operation will not 2986 * produce undefined result. */ 2987 data += ((uint32_t) byte >> 8) << (i * 8); 2988 skip_bits(rar, 8); 2989 } 2990 2991 *filter_data = data; 2992 return ARCHIVE_OK; 2993 } 2994 2995 /* Function is used during sanity checking. */ 2996 static int is_valid_filter_block_start(struct rar5* rar, 2997 uint32_t start) 2998 { 2999 const int64_t block_start = (ssize_t) start + rar->cstate.write_ptr; 3000 const int64_t last_bs = rar->cstate.last_block_start; 3001 const ssize_t last_bl = rar->cstate.last_block_length; 3002 3003 if(last_bs == 0 || last_bl == 0) { 3004 /* We didn't have any filters yet, so accept this offset. */ 3005 return 1; 3006 } 3007 3008 if(block_start >= last_bs + last_bl) { 3009 /* Current offset is bigger than last block's end offset, so 3010 * accept current offset. */ 3011 return 1; 3012 } 3013 3014 /* Any other case is not a normal situation and we should fail. */ 3015 return 0; 3016 } 3017 3018 /* The function will create a new filter, read its parameters from the input 3019 * stream and add it to the filter collection. */ 3020 static int parse_filter(struct archive_read* ar, const uint8_t* p) { 3021 uint32_t block_start, block_length; 3022 uint16_t filter_type; 3023 struct filter_info* filt = NULL; 3024 struct rar5* rar = get_context(ar); 3025 int ret; 3026 3027 /* Read the parameters from the input stream. */ 3028 if(ARCHIVE_OK != (ret = parse_filter_data(ar, rar, p, &block_start))) 3029 return ret; 3030 3031 if(ARCHIVE_OK != (ret = parse_filter_data(ar, rar, p, &block_length))) 3032 return ret; 3033 3034 if(ARCHIVE_OK != (ret = read_bits_16(ar, rar, p, &filter_type))) 3035 return ret; 3036 3037 filter_type >>= 13; 3038 skip_bits(rar, 3); 3039 3040 /* Perform some sanity checks on this filter parameters. Note that we 3041 * allow only DELTA, E8/E9 and ARM filters here, because rest of 3042 * filters are not used in RARv5. */ 3043 3044 if(block_length < 4 || 3045 block_length > 0x400000 || 3046 filter_type > FILTER_ARM || 3047 !is_valid_filter_block_start(rar, block_start) || 3048 (rar->cstate.window_size > 0 && 3049 (ssize_t)block_length > rar->cstate.window_size >> 1)) 3050 { 3051 archive_set_error(&ar->archive, ARCHIVE_ERRNO_FILE_FORMAT, 3052 "Invalid filter encountered"); 3053 return ARCHIVE_FATAL; 3054 } 3055 3056 /* Allocate a new filter. */ 3057 filt = add_new_filter(rar); 3058 if(filt == NULL) { 3059 archive_set_error(&ar->archive, ENOMEM, 3060 "Can't allocate memory for a filter descriptor"); 3061 return ARCHIVE_FATAL; 3062 } 3063 3064 filt->type = filter_type; 3065 filt->block_start = rar->cstate.write_ptr + block_start; 3066 filt->block_length = block_length; 3067 3068 rar->cstate.last_block_start = filt->block_start; 3069 rar->cstate.last_block_length = filt->block_length; 3070 3071 /* Read some more data in case this is a DELTA filter. Other filter 3072 * types don't require any additional data over what was already 3073 * read. */ 3074 if(filter_type == FILTER_DELTA) { 3075 int channels; 3076 3077 if(ARCHIVE_OK != (ret = read_consume_bits(ar, rar, p, 5, &channels))) 3078 return ret; 3079 3080 filt->channels = channels + 1; 3081 } 3082 3083 return ARCHIVE_OK; 3084 } 3085 3086 static int decode_code_length(struct archive_read* a, struct rar5* rar, 3087 const uint8_t* p, uint16_t code) 3088 { 3089 int lbits, length = 2; 3090 3091 if(code < 8) { 3092 lbits = 0; 3093 length += code; 3094 } else { 3095 lbits = code / 4 - 1; 3096 length += (4 | (code & 3)) << lbits; 3097 } 3098 3099 if(lbits > 0) { 3100 int add; 3101 3102 if(ARCHIVE_OK != read_consume_bits(a, rar, p, lbits, &add)) 3103 return -1; 3104 3105 length += add; 3106 } 3107 3108 return length; 3109 } 3110 3111 static int copy_string(struct archive_read* a, int len, int dist) { 3112 struct rar5* rar = get_context(a); 3113 const ssize_t cmask = rar->cstate.window_mask; 3114 const uint64_t write_ptr = rar->cstate.write_ptr + 3115 rar->cstate.solid_offset; 3116 int i; 3117 3118 if (rar->cstate.window_buf == NULL) 3119 return ARCHIVE_FATAL; 3120 3121 /* The unpacker spends most of the time in this function. It would be 3122 * a good idea to introduce some optimizations here. 3123 * 3124 * Just remember that this loop treats buffers that overlap differently 3125 * than buffers that do not overlap. This is why a simple memcpy(3) 3126 * call will not be enough. */ 3127 3128 for(i = 0; i < len; i++) { 3129 const ssize_t write_idx = (write_ptr + i) & cmask; 3130 const ssize_t read_idx = (write_ptr + i - dist) & cmask; 3131 rar->cstate.window_buf[write_idx] = 3132 rar->cstate.window_buf[read_idx]; 3133 } 3134 3135 rar->cstate.write_ptr += len; 3136 return ARCHIVE_OK; 3137 } 3138 3139 static int do_uncompress_block(struct archive_read* a, const uint8_t* p) { 3140 struct rar5* rar = get_context(a); 3141 uint16_t num; 3142 int ret; 3143 3144 const uint64_t cmask = rar->cstate.window_mask; 3145 const struct compressed_block_header* hdr = &rar->last_block_hdr; 3146 const uint8_t bit_size = 1 + bf_bit_size(hdr); 3147 3148 while(1) { 3149 if(rar->cstate.write_ptr - rar->cstate.last_write_ptr > 3150 (rar->cstate.window_size >> 1)) { 3151 /* Don't allow growing data by more than half of the 3152 * window size at a time. In such case, break the loop; 3153 * next call to this function will continue processing 3154 * from this moment. */ 3155 break; 3156 } 3157 3158 if(rar->bits.in_addr > rar->cstate.cur_block_size - 1 || 3159 (rar->bits.in_addr == rar->cstate.cur_block_size - 1 && 3160 rar->bits.bit_addr >= bit_size)) 3161 { 3162 /* If the program counter is here, it means the 3163 * function has finished processing the block. */ 3164 rar->cstate.block_parsing_finished = 1; 3165 break; 3166 } 3167 3168 /* Decode the next literal. */ 3169 if(ARCHIVE_OK != decode_number(a, &rar->cstate.ld, p, &num)) { 3170 return ARCHIVE_EOF; 3171 } 3172 3173 /* Num holds a decompression literal, or 'command code'. 3174 * 3175 * - Values lower than 256 are just bytes. Those codes 3176 * can be stored in the output buffer directly. 3177 * 3178 * - Code 256 defines a new filter, which is later used to 3179 * transform the data block accordingly to the filter type. 3180 * The data block needs to be fully uncompressed first. 3181 * 3182 * - Code bigger than 257 and smaller than 262 define 3183 * a repetition pattern that should be copied from 3184 * an already uncompressed chunk of data. 3185 */ 3186 3187 if(num < 256) { 3188 /* Directly store the byte. */ 3189 int64_t write_idx = rar->cstate.solid_offset + 3190 rar->cstate.write_ptr++; 3191 3192 rar->cstate.window_buf[write_idx & cmask] = 3193 (uint8_t) num; 3194 continue; 3195 } else if(num >= 262) { 3196 uint16_t dist_slot; 3197 int len = decode_code_length(a, rar, p, num - 262), 3198 dbits, 3199 dist = 1; 3200 3201 if(len == -1) { 3202 archive_set_error(&a->archive, 3203 ARCHIVE_ERRNO_PROGRAMMER, 3204 "Failed to decode the code length"); 3205 3206 return ARCHIVE_FATAL; 3207 } 3208 3209 if(ARCHIVE_OK != decode_number(a, &rar->cstate.dd, p, 3210 &dist_slot)) 3211 { 3212 archive_set_error(&a->archive, 3213 ARCHIVE_ERRNO_PROGRAMMER, 3214 "Failed to decode the distance slot"); 3215 3216 return ARCHIVE_FATAL; 3217 } 3218 3219 if(dist_slot < 4) { 3220 dbits = 0; 3221 dist += dist_slot; 3222 } else { 3223 dbits = dist_slot / 2 - 1; 3224 3225 /* Cast to uint32_t will make sure the shift 3226 * left operation won't produce undefined 3227 * result. Then, the uint32_t type will 3228 * be implicitly casted to int. */ 3229 dist += (uint32_t) (2 | 3230 (dist_slot & 1)) << dbits; 3231 } 3232 3233 if(dbits > 0) { 3234 if(dbits >= 4) { 3235 uint32_t add = 0; 3236 uint16_t low_dist; 3237 3238 if(dbits > 4) { 3239 if(ARCHIVE_OK != (ret = read_bits_32( 3240 a, rar, p, &add))) { 3241 /* Return EOF if we 3242 * can't read more 3243 * data. */ 3244 return ret; 3245 } 3246 3247 skip_bits(rar, dbits - 4); 3248 add = (add >> ( 3249 36 - dbits)) << 4; 3250 dist += add; 3251 } 3252 3253 if(ARCHIVE_OK != decode_number(a, 3254 &rar->cstate.ldd, p, &low_dist)) 3255 { 3256 archive_set_error(&a->archive, 3257 ARCHIVE_ERRNO_PROGRAMMER, 3258 "Failed to decode the " 3259 "distance slot"); 3260 3261 return ARCHIVE_FATAL; 3262 } 3263 3264 if(dist >= INT_MAX - low_dist - 1) { 3265 /* This only happens in 3266 * invalid archives. */ 3267 archive_set_error(&a->archive, 3268 ARCHIVE_ERRNO_FILE_FORMAT, 3269 "Distance pointer " 3270 "overflow"); 3271 return ARCHIVE_FATAL; 3272 } 3273 3274 dist += low_dist; 3275 } else { 3276 /* dbits is one of [0,1,2,3] */ 3277 int add; 3278 3279 if(ARCHIVE_OK != (ret = read_consume_bits(a, rar, 3280 p, dbits, &add))) { 3281 /* Return EOF if we can't read 3282 * more data. */ 3283 return ret; 3284 } 3285 3286 dist += add; 3287 } 3288 } 3289 3290 if(dist > 0x100) { 3291 len++; 3292 3293 if(dist > 0x2000) { 3294 len++; 3295 3296 if(dist > 0x40000) { 3297 len++; 3298 } 3299 } 3300 } 3301 3302 dist_cache_push(rar, dist); 3303 rar->cstate.last_len = len; 3304 3305 if(ARCHIVE_OK != copy_string(a, len, dist)) 3306 return ARCHIVE_FATAL; 3307 3308 continue; 3309 } else if(num == 256) { 3310 /* Create a filter. */ 3311 ret = parse_filter(a, p); 3312 if(ret != ARCHIVE_OK) 3313 return ret; 3314 3315 continue; 3316 } else if(num == 257) { 3317 if(rar->cstate.last_len != 0) { 3318 if(ARCHIVE_OK != copy_string(a, 3319 rar->cstate.last_len, 3320 rar->cstate.dist_cache[0])) 3321 { 3322 return ARCHIVE_FATAL; 3323 } 3324 } 3325 3326 continue; 3327 } else { 3328 /* num < 262 */ 3329 const int idx = num - 258; 3330 const int dist = dist_cache_touch(rar, idx); 3331 3332 uint16_t len_slot; 3333 int len; 3334 3335 if(ARCHIVE_OK != decode_number(a, &rar->cstate.rd, p, 3336 &len_slot)) { 3337 return ARCHIVE_FATAL; 3338 } 3339 3340 len = decode_code_length(a, rar, p, len_slot); 3341 if (len == -1) { 3342 return ARCHIVE_FATAL; 3343 } 3344 3345 rar->cstate.last_len = len; 3346 3347 if(ARCHIVE_OK != copy_string(a, len, dist)) 3348 return ARCHIVE_FATAL; 3349 3350 continue; 3351 } 3352 } 3353 3354 return ARCHIVE_OK; 3355 } 3356 3357 /* Binary search for the RARv5 signature. */ 3358 static int scan_for_signature(struct archive_read* a) { 3359 const uint8_t* p; 3360 const int chunk_size = 512; 3361 ssize_t i; 3362 char signature[sizeof(rar5_signature_xor)]; 3363 3364 /* If we're here, it means we're on an 'unknown territory' data. 3365 * There's no indication what kind of data we're reading here. 3366 * It could be some text comment, any kind of binary data, 3367 * digital sign, dragons, etc. 3368 * 3369 * We want to find a valid RARv5 magic header inside this unknown 3370 * data. */ 3371 3372 /* Is it possible in libarchive to just skip everything until the 3373 * end of the file? If so, it would be a better approach than the 3374 * current implementation of this function. */ 3375 3376 rar5_signature(signature); 3377 3378 while(1) { 3379 if(!read_ahead(a, chunk_size, &p)) 3380 return ARCHIVE_EOF; 3381 3382 for(i = 0; i < chunk_size - (int)sizeof(rar5_signature_xor); 3383 i++) { 3384 if(memcmp(&p[i], signature, 3385 sizeof(rar5_signature_xor)) == 0) { 3386 /* Consume the number of bytes we've used to 3387 * search for the signature, as well as the 3388 * number of bytes used by the signature 3389 * itself. After this we should be standing 3390 * on a valid base block header. */ 3391 (void) consume(a, 3392 i + sizeof(rar5_signature_xor)); 3393 return ARCHIVE_OK; 3394 } 3395 } 3396 3397 consume(a, chunk_size); 3398 } 3399 3400 return ARCHIVE_FATAL; 3401 } 3402 3403 /* This function will switch the multivolume archive file to another file, 3404 * i.e. from part03 to part 04. */ 3405 static int advance_multivolume(struct archive_read* a) { 3406 int lret; 3407 struct rar5* rar = get_context(a); 3408 3409 /* A small state machine that will skip unnecessary data, needed to 3410 * switch from one multivolume to another. Such skipping is needed if 3411 * we want to be an stream-oriented (instead of file-oriented) 3412 * unpacker. 3413 * 3414 * The state machine starts with `rar->main.endarc` == 0. It also 3415 * assumes that current stream pointer points to some base block 3416 * header. 3417 * 3418 * The `endarc` field is being set when the base block parsing 3419 * function encounters the 'end of archive' marker. 3420 */ 3421 3422 while(1) { 3423 if(rar->main.endarc == 1) { 3424 int looping = 1; 3425 3426 rar->main.endarc = 0; 3427 3428 while(looping) { 3429 lret = skip_base_block(a); 3430 switch(lret) { 3431 case ARCHIVE_RETRY: 3432 /* Continue looping. */ 3433 break; 3434 case ARCHIVE_OK: 3435 /* Break loop. */ 3436 looping = 0; 3437 break; 3438 default: 3439 /* Forward any errors to the 3440 * caller. */ 3441 return lret; 3442 } 3443 } 3444 3445 break; 3446 } else { 3447 /* Skip current base block. In order to properly skip 3448 * it, we really need to simply parse it and discard 3449 * the results. */ 3450 3451 lret = skip_base_block(a); 3452 if(lret == ARCHIVE_FATAL || lret == ARCHIVE_FAILED) 3453 return lret; 3454 3455 /* The `skip_base_block` function tells us if we 3456 * should continue with skipping, or we should stop 3457 * skipping. We're trying to skip everything up to 3458 * a base FILE block. */ 3459 3460 if(lret != ARCHIVE_RETRY) { 3461 /* If there was an error during skipping, or we 3462 * have just skipped a FILE base block... */ 3463 3464 if(rar->main.endarc == 0) { 3465 return lret; 3466 } else { 3467 continue; 3468 } 3469 } 3470 } 3471 } 3472 3473 return ARCHIVE_OK; 3474 } 3475 3476 /* Merges the partial block from the first multivolume archive file, and 3477 * partial block from the second multivolume archive file. The result is 3478 * a chunk of memory containing the whole block, and the stream pointer 3479 * is advanced to the next block in the second multivolume archive file. */ 3480 static int merge_block(struct archive_read* a, ssize_t block_size, 3481 const uint8_t** p) 3482 { 3483 struct rar5* rar = get_context(a); 3484 ssize_t cur_block_size, partial_offset = 0; 3485 const uint8_t* lp; 3486 int ret; 3487 3488 if(rar->merge_mode) { 3489 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3490 "Recursive merge is not allowed"); 3491 3492 return ARCHIVE_FATAL; 3493 } 3494 3495 /* Set a flag that we're in the switching mode. */ 3496 rar->cstate.switch_multivolume = 1; 3497 3498 /* Reallocate the memory which will hold the whole block. */ 3499 if(rar->vol.push_buf) 3500 free((void*) rar->vol.push_buf); 3501 3502 /* Increasing the allocation block by 8 is due to bit reading functions, 3503 * which are using additional 2 or 4 bytes. Allocating the block size 3504 * by exact value would make bit reader perform reads from invalid 3505 * memory block when reading the last byte from the buffer. */ 3506 rar->vol.push_buf = malloc(block_size + 8); 3507 if(!rar->vol.push_buf) { 3508 archive_set_error(&a->archive, ENOMEM, 3509 "Can't allocate memory for a merge block buffer"); 3510 return ARCHIVE_FATAL; 3511 } 3512 3513 /* Valgrind complains if the extension block for bit reader is not 3514 * initialized, so initialize it. */ 3515 memset(&rar->vol.push_buf[block_size], 0, 8); 3516 3517 /* A single block can span across multiple multivolume archive files, 3518 * so we use a loop here. This loop will consume enough multivolume 3519 * archive files until the whole block is read. */ 3520 3521 while(1) { 3522 /* Get the size of current block chunk in this multivolume 3523 * archive file and read it. */ 3524 cur_block_size = rar5_min(rar->file.bytes_remaining, 3525 block_size - partial_offset); 3526 3527 if(cur_block_size == 0) { 3528 archive_set_error(&a->archive, 3529 ARCHIVE_ERRNO_FILE_FORMAT, 3530 "Encountered block size == 0 during block merge"); 3531 return ARCHIVE_FATAL; 3532 } 3533 3534 if(!read_ahead(a, cur_block_size, &lp)) 3535 return ARCHIVE_EOF; 3536 3537 /* Sanity check; there should never be a situation where this 3538 * function reads more data than the block's size. */ 3539 if(partial_offset + cur_block_size > block_size) { 3540 archive_set_error(&a->archive, 3541 ARCHIVE_ERRNO_PROGRAMMER, 3542 "Consumed too much data when merging blocks"); 3543 return ARCHIVE_FATAL; 3544 } 3545 3546 /* Merge previous block chunk with current block chunk, 3547 * or create first block chunk if this is our first 3548 * iteration. */ 3549 memcpy(&rar->vol.push_buf[partial_offset], lp, cur_block_size); 3550 3551 /* Advance the stream read pointer by this block chunk size. */ 3552 if(ARCHIVE_OK != consume(a, cur_block_size)) 3553 return ARCHIVE_EOF; 3554 3555 /* Update the pointers. `partial_offset` contains information 3556 * about the sum of merged block chunks. */ 3557 partial_offset += cur_block_size; 3558 rar->file.bytes_remaining -= cur_block_size; 3559 3560 /* If `partial_offset` is the same as `block_size`, this means 3561 * we've merged all block chunks and we have a valid full 3562 * block. */ 3563 if(partial_offset == block_size) { 3564 break; 3565 } 3566 3567 /* If we don't have any bytes to read, this means we should 3568 * switch to another multivolume archive file. */ 3569 if(rar->file.bytes_remaining == 0) { 3570 rar->merge_mode++; 3571 ret = advance_multivolume(a); 3572 rar->merge_mode--; 3573 if(ret != ARCHIVE_OK) { 3574 return ret; 3575 } 3576 } 3577 } 3578 3579 *p = rar->vol.push_buf; 3580 3581 /* If we're here, we can resume unpacking by processing the block 3582 * pointed to by the `*p` memory pointer. */ 3583 3584 return ARCHIVE_OK; 3585 } 3586 3587 static int process_block(struct archive_read* a) { 3588 const uint8_t* p; 3589 struct rar5* rar = get_context(a); 3590 int ret; 3591 3592 /* If we don't have any data to be processed, this most probably means 3593 * we need to switch to the next volume. */ 3594 if(rar->main.volume && rar->file.bytes_remaining == 0) { 3595 ret = advance_multivolume(a); 3596 if(ret != ARCHIVE_OK) 3597 return ret; 3598 } 3599 3600 if(rar->cstate.block_parsing_finished) { 3601 ssize_t block_size; 3602 ssize_t to_skip; 3603 ssize_t cur_block_size; 3604 3605 /* The header size won't be bigger than 6 bytes. */ 3606 if(!read_ahead(a, 6, &p)) { 3607 /* Failed to prefetch data block header. */ 3608 return ARCHIVE_EOF; 3609 } 3610 3611 /* 3612 * Read block_size by parsing block header. Validate the header 3613 * by calculating CRC byte stored inside the header. Size of 3614 * the header is not constant (block size can be stored either 3615 * in 1 or 2 bytes), that's why block size is left out from the 3616 * `compressed_block_header` structure and returned by 3617 * `parse_block_header` as the second argument. */ 3618 3619 ret = parse_block_header(a, p, &block_size, 3620 &rar->last_block_hdr); 3621 if(ret != ARCHIVE_OK) { 3622 return ret; 3623 } 3624 3625 /* Skip block header. Next data is huffman tables, 3626 * if present. */ 3627 to_skip = sizeof(struct compressed_block_header) + 3628 bf_byte_count(&rar->last_block_hdr) + 1; 3629 3630 if(ARCHIVE_OK != consume(a, to_skip)) 3631 return ARCHIVE_EOF; 3632 3633 rar->file.bytes_remaining -= to_skip; 3634 3635 /* The block size gives information about the whole block size, 3636 * but the block could be stored in split form when using 3637 * multi-volume archives. In this case, the block size will be 3638 * bigger than the actual data stored in this file. Remaining 3639 * part of the data will be in another file. */ 3640 3641 cur_block_size = 3642 rar5_min(rar->file.bytes_remaining, block_size); 3643 3644 if(block_size > rar->file.bytes_remaining) { 3645 /* If current blocks' size is bigger than our data 3646 * size, this means we have a multivolume archive. 3647 * In this case, skip all base headers until the end 3648 * of the file, proceed to next "partXXX.rar" volume, 3649 * find its signature, skip all headers up to the first 3650 * FILE base header, and continue from there. 3651 * 3652 * Note that `merge_block` will update the `rar` 3653 * context structure quite extensively. */ 3654 3655 ret = merge_block(a, block_size, &p); 3656 if(ret != ARCHIVE_OK) { 3657 return ret; 3658 } 3659 3660 cur_block_size = block_size; 3661 3662 /* Current stream pointer should be now directly 3663 * *after* the block that spanned through multiple 3664 * archive files. `p` pointer should have the data of 3665 * the *whole* block (merged from partial blocks 3666 * stored in multiple archives files). */ 3667 } else { 3668 rar->cstate.switch_multivolume = 0; 3669 3670 /* Read the whole block size into memory. This can take 3671 * up to 8 megabytes of memory in theoretical cases. 3672 * Might be worth to optimize this and use a standard 3673 * chunk of 4kb's. */ 3674 if(!read_ahead(a, 4 + cur_block_size, &p)) { 3675 /* Failed to prefetch block data. */ 3676 return ARCHIVE_EOF; 3677 } 3678 } 3679 3680 rar->cstate.block_buf = p; 3681 rar->cstate.cur_block_size = cur_block_size; 3682 rar->cstate.block_parsing_finished = 0; 3683 3684 rar->bits.in_addr = 0; 3685 rar->bits.bit_addr = 0; 3686 3687 if(bf_is_table_present(&rar->last_block_hdr)) { 3688 /* Load Huffman tables. */ 3689 ret = parse_tables(a, rar, p); 3690 if(ret != ARCHIVE_OK) { 3691 /* Error during decompression of Huffman 3692 * tables. */ 3693 return ret; 3694 } 3695 } 3696 } else { 3697 /* Block parsing not finished, reuse previous memory buffer. */ 3698 p = rar->cstate.block_buf; 3699 } 3700 3701 /* Uncompress the block, or a part of it, depending on how many bytes 3702 * will be generated by uncompressing the block. 3703 * 3704 * In case too many bytes will be generated, calling this function 3705 * again will resume the uncompression operation. */ 3706 ret = do_uncompress_block(a, p); 3707 if(ret != ARCHIVE_OK) { 3708 return ret; 3709 } 3710 3711 if(rar->cstate.block_parsing_finished && 3712 rar->cstate.switch_multivolume == 0 && 3713 rar->cstate.cur_block_size > 0) 3714 { 3715 /* If we're processing a normal block, consume the whole 3716 * block. We can do this because we've already read the whole 3717 * block to memory. */ 3718 if(ARCHIVE_OK != consume(a, rar->cstate.cur_block_size)) 3719 return ARCHIVE_FATAL; 3720 3721 rar->file.bytes_remaining -= rar->cstate.cur_block_size; 3722 } else if(rar->cstate.switch_multivolume) { 3723 /* Don't consume the block if we're doing multivolume 3724 * processing. The volume switching function will consume 3725 * the proper count of bytes instead. */ 3726 rar->cstate.switch_multivolume = 0; 3727 } 3728 3729 return ARCHIVE_OK; 3730 } 3731 3732 /* Pops the `buf`, `size` and `offset` from the "data ready" stack. 3733 * 3734 * Returns ARCHIVE_OK when those arguments can be used, ARCHIVE_RETRY 3735 * when there is no data on the stack. */ 3736 static int use_data(struct rar5* rar, const void** buf, size_t* size, 3737 int64_t* offset) 3738 { 3739 int i; 3740 3741 for(i = 0; i < rar5_countof(rar->cstate.dready); i++) { 3742 struct data_ready *d = &rar->cstate.dready[i]; 3743 3744 if(d->used) { 3745 if(buf) *buf = d->buf; 3746 if(size) *size = d->size; 3747 if(offset) *offset = d->offset; 3748 3749 d->used = 0; 3750 return ARCHIVE_OK; 3751 } 3752 } 3753 3754 return ARCHIVE_RETRY; 3755 } 3756 3757 static void clear_data_ready_stack(struct rar5* rar) { 3758 memset(&rar->cstate.dready, 0, sizeof(rar->cstate.dready)); 3759 } 3760 3761 /* Pushes the `buf`, `size` and `offset` arguments to the rar->cstate.dready 3762 * FIFO stack. Those values will be popped from this stack by the `use_data` 3763 * function. */ 3764 static int push_data_ready(struct archive_read* a, struct rar5* rar, 3765 const uint8_t* buf, size_t size, int64_t offset) 3766 { 3767 int i; 3768 3769 /* Don't push if we're in skip mode. This is needed because solid 3770 * streams need full processing even if we're skipping data. After 3771 * fully processing the stream, we need to discard the generated bytes, 3772 * because we're interested only in the side effect: building up the 3773 * internal window circular buffer. This window buffer will be used 3774 * later during unpacking of requested data. */ 3775 if(rar->skip_mode) 3776 return ARCHIVE_OK; 3777 3778 /* Sanity check. */ 3779 if(offset != rar->file.last_offset + rar->file.last_size) { 3780 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3781 "Sanity check error: output stream is not continuous"); 3782 return ARCHIVE_FATAL; 3783 } 3784 3785 for(i = 0; i < rar5_countof(rar->cstate.dready); i++) { 3786 struct data_ready* d = &rar->cstate.dready[i]; 3787 if(!d->used) { 3788 d->used = 1; 3789 d->buf = buf; 3790 d->size = size; 3791 d->offset = offset; 3792 3793 /* These fields are used only in sanity checking. */ 3794 rar->file.last_offset = offset; 3795 rar->file.last_size = size; 3796 3797 /* Calculate the checksum of this new block before 3798 * submitting data to libarchive's engine. */ 3799 update_crc(rar, d->buf, d->size); 3800 3801 return ARCHIVE_OK; 3802 } 3803 } 3804 3805 /* Program counter will reach this code if the `rar->cstate.data_ready` 3806 * stack will be filled up so that no new entries will be allowed. The 3807 * code shouldn't allow such situation to occur. So we treat this case 3808 * as an internal error. */ 3809 3810 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 3811 "Premature end of data_ready stack"); 3812 return ARCHIVE_FATAL; 3813 } 3814 3815 /* This function uncompresses the data that is stored in the <FILE> base 3816 * block. 3817 * 3818 * The FILE base block looks like this: 3819 * 3820 * <header><huffman tables><block_1><block_2>...<block_n> 3821 * 3822 * The <header> is a block header, that is parsed in parse_block_header(). 3823 * It's a "compressed_block_header" structure, containing metadata needed 3824 * to know when we should stop looking for more <block_n> blocks. 3825 * 3826 * <huffman tables> contain data needed to set up the huffman tables, needed 3827 * for the actual decompression. 3828 * 3829 * Each <block_n> consists of series of literals: 3830 * 3831 * <literal><literal><literal>...<literal> 3832 * 3833 * Those literals generate the uncompression data. They operate on a circular 3834 * buffer, sometimes writing raw data into it, sometimes referencing 3835 * some previous data inside this buffer, and sometimes declaring a filter 3836 * that will need to be executed on the data stored in the circular buffer. 3837 * It all depends on the literal that is used. 3838 * 3839 * Sometimes blocks produce output data, sometimes they don't. For example, for 3840 * some huge files that use lots of filters, sometimes a block is filled with 3841 * only filter declaration literals. Such blocks won't produce any data in the 3842 * circular buffer. 3843 * 3844 * Sometimes blocks will produce 4 bytes of data, and sometimes 1 megabyte, 3845 * because a literal can reference previously decompressed data. For example, 3846 * there can be a literal that says: 'append a byte 0xFE here', and after 3847 * it another literal can say 'append 1 megabyte of data from circular buffer 3848 * offset 0x12345'. This is how RAR format handles compressing repeated 3849 * patterns. 3850 * 3851 * The RAR compressor creates those literals and the actual efficiency of 3852 * compression depends on what those literals are. The literals can also 3853 * be seen as a kind of a non-turing-complete virtual machine that simply 3854 * tells the decompressor what it should do. 3855 * */ 3856 3857 static int do_uncompress_file(struct archive_read* a) { 3858 struct rar5* rar = get_context(a); 3859 int ret; 3860 int64_t max_end_pos; 3861 3862 if(!rar->cstate.initialized) { 3863 /* Don't perform full context reinitialization if we're 3864 * processing a solid archive. */ 3865 if(!rar->main.solid || !rar->cstate.window_buf) { 3866 init_unpack(rar); 3867 } 3868 3869 rar->cstate.initialized = 1; 3870 } 3871 3872 /* Don't allow extraction if window_size is invalid. */ 3873 if(rar->cstate.window_size == 0) { 3874 archive_set_error(&a->archive, 3875 ARCHIVE_ERRNO_FILE_FORMAT, 3876 "Invalid window size declaration in this file"); 3877 3878 /* This should never happen in valid files. */ 3879 return ARCHIVE_FATAL; 3880 } 3881 3882 if(rar->cstate.all_filters_applied == 1) { 3883 /* We use while(1) here, but standard case allows for just 1 3884 * iteration. The loop will iterate if process_block() didn't 3885 * generate any data at all. This can happen if the block 3886 * contains only filter definitions (this is common in big 3887 * files). */ 3888 while(1) { 3889 ret = process_block(a); 3890 if(ret == ARCHIVE_EOF || ret == ARCHIVE_FATAL) 3891 return ret; 3892 3893 if(rar->cstate.last_write_ptr == 3894 rar->cstate.write_ptr) { 3895 /* The block didn't generate any new data, 3896 * so just process a new block if this one 3897 * wasn't the last block in the file. */ 3898 if (bf_is_last_block(&rar->last_block_hdr)) { 3899 return ARCHIVE_EOF; 3900 } 3901 3902 continue; 3903 } 3904 3905 /* The block has generated some new data, so break 3906 * the loop. */ 3907 break; 3908 } 3909 } 3910 3911 /* Try to run filters. If filters won't be applied, it means that 3912 * insufficient data was generated. */ 3913 ret = apply_filters(a); 3914 if(ret == ARCHIVE_RETRY) { 3915 return ARCHIVE_OK; 3916 } else if(ret == ARCHIVE_FATAL) { 3917 return ARCHIVE_FATAL; 3918 } 3919 3920 /* If apply_filters() will return ARCHIVE_OK, we can continue here. */ 3921 3922 if(cdeque_size(&rar->cstate.filters) > 0) { 3923 /* Check if we can write something before hitting first 3924 * filter. */ 3925 struct filter_info* flt; 3926 3927 /* Get the block_start offset from the first filter. */ 3928 if(CDE_OK != cdeque_front(&rar->cstate.filters, 3929 cdeque_filter_p(&flt))) 3930 { 3931 archive_set_error(&a->archive, 3932 ARCHIVE_ERRNO_PROGRAMMER, 3933 "Can't read first filter"); 3934 return ARCHIVE_FATAL; 3935 } 3936 3937 max_end_pos = rar5_min(flt->block_start, 3938 rar->cstate.write_ptr); 3939 } else { 3940 /* There are no filters defined, or all filters were applied. 3941 * This means we can just store the data without any 3942 * postprocessing. */ 3943 max_end_pos = rar->cstate.write_ptr; 3944 } 3945 3946 if(max_end_pos == rar->cstate.last_write_ptr) { 3947 /* We can't write anything yet. The block uncompression 3948 * function did not generate enough data, and no filter can be 3949 * applied. At the same time we don't have any data that can be 3950 * stored without filter postprocessing. This means we need to 3951 * wait for more data to be generated, so we can apply the 3952 * filters. 3953 * 3954 * Signal the caller that we need more data to be able to do 3955 * anything. 3956 */ 3957 return ARCHIVE_RETRY; 3958 } else { 3959 /* We can write the data before hitting the first filter. 3960 * So let's do it. The push_window_data() function will 3961 * effectively return the selected data block to the user 3962 * application. */ 3963 push_window_data(a, rar, rar->cstate.last_write_ptr, 3964 max_end_pos); 3965 rar->cstate.last_write_ptr = max_end_pos; 3966 } 3967 3968 return ARCHIVE_OK; 3969 } 3970 3971 static int uncompress_file(struct archive_read* a) { 3972 int ret; 3973 3974 while(1) { 3975 /* Sometimes the uncompression function will return a 3976 * 'retry' signal. If this will happen, we have to retry 3977 * the function. */ 3978 ret = do_uncompress_file(a); 3979 if(ret != ARCHIVE_RETRY) 3980 return ret; 3981 } 3982 } 3983 3984 3985 static int do_unstore_file(struct archive_read* a, 3986 struct rar5* rar, const void** buf, size_t* size, int64_t* offset) 3987 { 3988 size_t to_read; 3989 const uint8_t* p; 3990 3991 if(rar->file.bytes_remaining == 0 && rar->main.volume > 0 && 3992 rar->generic.split_after > 0) 3993 { 3994 int ret; 3995 3996 rar->cstate.switch_multivolume = 1; 3997 ret = advance_multivolume(a); 3998 rar->cstate.switch_multivolume = 0; 3999 4000 if(ret != ARCHIVE_OK) { 4001 /* Failed to advance to next multivolume archive 4002 * file. */ 4003 return ret; 4004 } 4005 } 4006 4007 to_read = rar5_min(rar->file.bytes_remaining, 64 * 1024); 4008 if(to_read == 0) { 4009 return ARCHIVE_EOF; 4010 } 4011 4012 if(!read_ahead(a, to_read, &p)) { 4013 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 4014 "I/O error when unstoring file"); 4015 return ARCHIVE_FATAL; 4016 } 4017 4018 if(ARCHIVE_OK != consume(a, to_read)) { 4019 return ARCHIVE_EOF; 4020 } 4021 4022 if(buf) *buf = p; 4023 if(size) *size = to_read; 4024 if(offset) *offset = rar->cstate.last_unstore_ptr; 4025 4026 rar->file.bytes_remaining -= to_read; 4027 rar->cstate.last_unstore_ptr += to_read; 4028 4029 update_crc(rar, p, to_read); 4030 return ARCHIVE_OK; 4031 } 4032 4033 static int do_unpack(struct archive_read* a, struct rar5* rar, 4034 const void** buf, size_t* size, int64_t* offset) 4035 { 4036 enum COMPRESSION_METHOD { 4037 STORE = 0, FASTEST = 1, FAST = 2, NORMAL = 3, GOOD = 4, 4038 BEST = 5 4039 }; 4040 4041 if(rar->file.service > 0) { 4042 return do_unstore_file(a, rar, buf, size, offset); 4043 } else { 4044 switch(rar->cstate.method) { 4045 case STORE: 4046 return do_unstore_file(a, rar, buf, size, 4047 offset); 4048 case FASTEST: 4049 /* fallthrough */ 4050 case FAST: 4051 /* fallthrough */ 4052 case NORMAL: 4053 /* fallthrough */ 4054 case GOOD: 4055 /* fallthrough */ 4056 case BEST: 4057 /* No data is returned here. But because a sparse-file aware 4058 * caller (like archive_read_data_into_fd) may treat zero-size 4059 * as a sparse file block, we need to update the offset 4060 * accordingly. At this point the decoder doesn't have any 4061 * pending uncompressed data blocks, so the current position in 4062 * the output file should be last_write_ptr. */ 4063 if (offset) *offset = rar->cstate.last_write_ptr; 4064 return uncompress_file(a); 4065 default: 4066 archive_set_error(&a->archive, 4067 ARCHIVE_ERRNO_FILE_FORMAT, 4068 "Compression method not supported: 0x%x", 4069 (unsigned int)rar->cstate.method); 4070 4071 return ARCHIVE_FATAL; 4072 } 4073 } 4074 4075 #if !defined WIN32 4076 /* Not reached. */ 4077 return ARCHIVE_OK; 4078 #endif 4079 } 4080 4081 static int verify_checksums(struct archive_read* a) { 4082 int verify_crc; 4083 struct rar5* rar = get_context(a); 4084 4085 /* Check checksums only when actually unpacking the data. There's no 4086 * need to calculate checksum when we're skipping data in solid archives 4087 * (skipping in solid archives is the same thing as unpacking compressed 4088 * data and discarding the result). */ 4089 4090 if(!rar->skip_mode) { 4091 /* Always check checksums if we're not in skip mode */ 4092 verify_crc = 1; 4093 } else { 4094 /* We can override the logic above with a compile-time option 4095 * NO_CRC_ON_SOLID_SKIP. This option is used during debugging, 4096 * and it will check checksums of unpacked data even when 4097 * we're skipping it. */ 4098 4099 #if defined CHECK_CRC_ON_SOLID_SKIP 4100 /* Debug case */ 4101 verify_crc = 1; 4102 #else 4103 /* Normal case */ 4104 verify_crc = 0; 4105 #endif 4106 } 4107 4108 if(verify_crc) { 4109 /* During unpacking, on each unpacked block we're calling the 4110 * update_crc() function. Since we are here, the unpacking 4111 * process is already over and we can check if calculated 4112 * checksum (CRC32 or BLAKE2sp) is the same as what is stored 4113 * in the archive. */ 4114 if(rar->file.stored_crc32 > 0) { 4115 /* Check CRC32 only when the file contains a CRC32 4116 * value for this file. */ 4117 4118 if(rar->file.calculated_crc32 != 4119 rar->file.stored_crc32) { 4120 /* Checksums do not match; the unpacked file 4121 * is corrupted. */ 4122 4123 DEBUG_CODE { 4124 printf("Checksum error: CRC32 " 4125 "(was: %08" PRIx32 ", expected: %08" PRIx32 ")\n", 4126 rar->file.calculated_crc32, 4127 rar->file.stored_crc32); 4128 } 4129 4130 #ifndef DONT_FAIL_ON_CRC_ERROR 4131 archive_set_error(&a->archive, 4132 ARCHIVE_ERRNO_FILE_FORMAT, 4133 "Checksum error: CRC32"); 4134 return ARCHIVE_FATAL; 4135 #endif 4136 } else { 4137 DEBUG_CODE { 4138 printf("Checksum OK: CRC32 " 4139 "(%08" PRIx32 "/%08" PRIx32 ")\n", 4140 rar->file.stored_crc32, 4141 rar->file.calculated_crc32); 4142 } 4143 } 4144 } 4145 4146 if(rar->file.has_blake2 > 0) { 4147 /* BLAKE2sp is an optional checksum algorithm that is 4148 * added to RARv5 archives when using the `-htb` switch 4149 * during creation of archive. 4150 * 4151 * We now finalize the hash calculation by calling the 4152 * `final` function. This will generate the final hash 4153 * value we can use to compare it with the BLAKE2sp 4154 * checksum that is stored in the archive. 4155 * 4156 * The return value of this `final` function is not 4157 * very helpful, as it guards only against improper use. 4158 * This is why we're explicitly ignoring it. */ 4159 4160 uint8_t b2_buf[32]; 4161 (void) blake2sp_final(&rar->file.b2state, b2_buf, 32); 4162 4163 if(memcmp(&rar->file.blake2sp, b2_buf, 32) != 0) { 4164 #ifndef DONT_FAIL_ON_CRC_ERROR 4165 archive_set_error(&a->archive, 4166 ARCHIVE_ERRNO_FILE_FORMAT, 4167 "Checksum error: BLAKE2"); 4168 4169 return ARCHIVE_FATAL; 4170 #endif 4171 } 4172 } 4173 } 4174 4175 /* Finalization for this file has been successfully completed. */ 4176 return ARCHIVE_OK; 4177 } 4178 4179 static int verify_global_checksums(struct archive_read* a) { 4180 return verify_checksums(a); 4181 } 4182 4183 /* 4184 * Decryption function for the magic signature pattern. Check the comment near 4185 * the `rar5_signature_xor` symbol to read the rationale behind this. 4186 */ 4187 static void rar5_signature(char *buf) { 4188 size_t i; 4189 4190 for(i = 0; i < sizeof(rar5_signature_xor); i++) { 4191 buf[i] = rar5_signature_xor[i] ^ 0xA1; 4192 } 4193 } 4194 4195 static int rar5_read_data(struct archive_read *a, const void **buff, 4196 size_t *size, int64_t *offset) { 4197 int ret; 4198 struct rar5* rar = get_context(a); 4199 4200 if (size) 4201 *size = 0; 4202 4203 if (rar->has_encrypted_entries == ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW) { 4204 rar->has_encrypted_entries = 0; 4205 } 4206 4207 if (rar->headers_are_encrypted || rar->cstate.data_encrypted) { 4208 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 4209 "Reading encrypted data is not currently supported"); 4210 return ARCHIVE_FATAL; 4211 } 4212 4213 if(rar->file.dir > 0) { 4214 /* Don't process any data if this file entry was declared 4215 * as a directory. This is needed, because entries marked as 4216 * directory doesn't have any dictionary buffer allocated, so 4217 * it's impossible to perform any decompression. */ 4218 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 4219 "Can't decompress an entry marked as a directory"); 4220 return ARCHIVE_FATAL; 4221 } 4222 4223 if(!rar->skip_mode && (rar->cstate.last_write_ptr > rar->file.unpacked_size)) { 4224 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 4225 "Unpacker has written too many bytes"); 4226 return ARCHIVE_FATAL; 4227 } 4228 4229 ret = use_data(rar, buff, size, offset); 4230 if(ret == ARCHIVE_OK) { 4231 return ret; 4232 } 4233 4234 if(rar->file.eof == 1) { 4235 return ARCHIVE_EOF; 4236 } 4237 4238 ret = do_unpack(a, rar, buff, size, offset); 4239 if(ret != ARCHIVE_OK) { 4240 return ret; 4241 } 4242 4243 if(rar->file.bytes_remaining == 0 && 4244 rar->cstate.last_write_ptr == rar->file.unpacked_size) 4245 { 4246 /* If all bytes of current file were processed, run 4247 * finalization. 4248 * 4249 * Finalization will check checksum against proper values. If 4250 * some of the checksums will not match, we'll return an error 4251 * value in the last `archive_read_data` call to signal an error 4252 * to the user. */ 4253 4254 rar->file.eof = 1; 4255 return verify_global_checksums(a); 4256 } 4257 4258 return ARCHIVE_OK; 4259 } 4260 4261 static int rar5_read_data_skip(struct archive_read *a) { 4262 struct rar5* rar = get_context(a); 4263 4264 if(rar->main.solid && (rar->cstate.data_encrypted == 0)) { 4265 /* In solid archives, instead of skipping the data, we need to 4266 * extract it, and dispose the result. The side effect of this 4267 * operation will be setting up the initial window buffer state 4268 * needed to be able to extract the selected file. Note that 4269 * this is only possible when data withing this solid block is 4270 * not encrypted, in which case we'll skip and fail if the user 4271 * tries to read data. */ 4272 4273 int ret; 4274 4275 /* Make sure to process all blocks in the compressed stream. */ 4276 while(rar->file.bytes_remaining > 0) { 4277 /* Setting the "skip mode" will allow us to skip 4278 * checksum checks during data skipping. Checking the 4279 * checksum of skipped data isn't really necessary and 4280 * it's only slowing things down. 4281 * 4282 * This is incremented instead of setting to 1 because 4283 * this data skipping function can be called 4284 * recursively. */ 4285 rar->skip_mode++; 4286 4287 /* We're disposing 1 block of data, so we use triple 4288 * NULLs in arguments. */ 4289 ret = rar5_read_data(a, NULL, NULL, NULL); 4290 4291 /* Turn off "skip mode". */ 4292 rar->skip_mode--; 4293 4294 if(ret < 0 || ret == ARCHIVE_EOF) { 4295 /* Propagate any potential error conditions 4296 * to the caller. */ 4297 return ret; 4298 } 4299 } 4300 } else { 4301 /* In standard archives, we can just jump over the compressed 4302 * stream. Each file in non-solid archives starts from an empty 4303 * window buffer. */ 4304 4305 if(ARCHIVE_OK != consume(a, rar->file.bytes_remaining)) { 4306 return ARCHIVE_FATAL; 4307 } 4308 4309 rar->file.bytes_remaining = 0; 4310 } 4311 4312 return ARCHIVE_OK; 4313 } 4314 4315 static int64_t rar5_seek_data(struct archive_read *a, int64_t offset, 4316 int whence) 4317 { 4318 (void) a; 4319 (void) offset; 4320 (void) whence; 4321 4322 /* We're a streaming unpacker, and we don't support seeking. */ 4323 4324 return ARCHIVE_FATAL; 4325 } 4326 4327 static int rar5_cleanup(struct archive_read *a) { 4328 struct rar5* rar = get_context(a); 4329 4330 free(rar->cstate.window_buf); 4331 free(rar->cstate.filtered_buf); 4332 clear_data_ready_stack(rar); 4333 4334 free(rar->vol.push_buf); 4335 4336 free_filters(rar); 4337 rar5_deinit(rar); 4338 4339 free(rar); 4340 a->format->data = NULL; 4341 4342 return ARCHIVE_OK; 4343 } 4344 4345 static int rar5_capabilities(struct archive_read * a) { 4346 (void) a; 4347 return (ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_DATA 4348 | ARCHIVE_READ_FORMAT_CAPS_ENCRYPT_METADATA); 4349 } 4350 4351 static int rar5_has_encrypted_entries(struct archive_read *_a) { 4352 if (_a && _a->format) { 4353 struct rar5 *rar = (struct rar5 *)_a->format->data; 4354 if (rar) { 4355 return rar->has_encrypted_entries; 4356 } 4357 } 4358 4359 return ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW; 4360 } 4361 4362 /* Must match deallocations in rar5_deinit */ 4363 static int rar5_init(struct rar5* rar) { 4364 memset(rar, 0, sizeof(struct rar5)); 4365 4366 if(CDE_OK != cdeque_init(&rar->cstate.filters, 8192)) 4367 return ARCHIVE_FATAL; 4368 4369 /* 4370 * Until enough data has been read, we cannot tell about 4371 * any encrypted entries yet. 4372 */ 4373 rar->has_encrypted_entries = ARCHIVE_READ_FORMAT_ENCRYPTION_DONT_KNOW; 4374 4375 return ARCHIVE_OK; 4376 } 4377 4378 /* Must match allocations in rar5_init */ 4379 static void rar5_deinit(struct rar5* rar) { 4380 cdeque_free(&rar->cstate.filters); 4381 } 4382 4383 int archive_read_support_format_rar5(struct archive *_a) { 4384 struct archive_read* ar; 4385 int ret; 4386 struct rar5* rar; 4387 4388 if(ARCHIVE_OK != (ret = get_archive_read(_a, &ar))) 4389 return ret; 4390 4391 rar = malloc(sizeof(*rar)); 4392 if(rar == NULL) { 4393 archive_set_error(&ar->archive, ENOMEM, 4394 "Can't allocate rar5 data"); 4395 return ARCHIVE_FATAL; 4396 } 4397 4398 if(ARCHIVE_OK != rar5_init(rar)) { 4399 archive_set_error(&ar->archive, ENOMEM, 4400 "Can't allocate rar5 filter buffer"); 4401 free(rar); 4402 return ARCHIVE_FATAL; 4403 } 4404 4405 ret = __archive_read_register_format(ar, 4406 rar, 4407 "rar5", 4408 rar5_bid, 4409 rar5_options, 4410 rar5_read_header, 4411 rar5_read_data, 4412 rar5_read_data_skip, 4413 rar5_seek_data, 4414 rar5_cleanup, 4415 rar5_capabilities, 4416 rar5_has_encrypted_entries); 4417 4418 if(ret != ARCHIVE_OK) { 4419 rar5_deinit(rar); 4420 free(rar); 4421 } 4422 4423 return ARCHIVE_OK; 4424 } 4425