1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file test_microlzma.c 6 /// \brief Tests MicroLZMA encoding and decoding 7 // 8 // Author: Jia Tan 9 // 10 /////////////////////////////////////////////////////////////////////////////// 11 12 #include "tests.h" 13 14 #define BUFFER_SIZE 1024 15 16 17 #ifdef HAVE_ENCODER_LZMA1 18 19 // MicroLZMA encoded "Hello\nWorld\n" output size in bytes. 20 #define ENCODED_OUTPUT_SIZE 17 21 22 // Byte array of "Hello\nWorld\n". This is used for various encoder tests. 23 static const uint8_t hello_world[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A, 24 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x0A }; 25 26 // This is the CRC32 value of the MicroLZMA encoding of "Hello\nWorld\n". 27 // The settings used were based on LZMA_PRESET_DEFAULT as of liblzma 5.6.0. 28 // This assumes MicroLZMA is correct in liblzma 5.6.0, which is safe 29 // considering the encoded "Hello\nWorld\n" can successfully be decoded at 30 // this time. This is to test for regressions that cause MicroLZMA output 31 // to change. 32 static const uint32_t hello_world_encoded_crc = 0x3CDE40A8; 33 34 35 // Function implementation borrowed from lzma_decoder.c. It is needed to 36 // ensure the first byte of a MicroLZMA stream is set correctly with the 37 // negation of the LZMA properties. 38 static bool 39 lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte) 40 { 41 if (byte > (4 * 5 + 4) * 9 + 8) 42 return true; 43 44 // See the file format specification to understand this. 45 options->pb = byte / (9 * 5); 46 byte -= options->pb * 9 * 5; 47 options->lp = byte / 9; 48 options->lc = byte - options->lp * 9; 49 50 return options->lc + options->lp > LZMA_LCLP_MAX; 51 } 52 53 54 /////////////////// 55 // Encoder tests // 56 /////////////////// 57 58 // This tests a few of the basic options. These options are not unique to 59 // MicroLZMA in any way, its mostly ensuring that the options are actually 60 // being checked before initializing the decoder internals. 61 static void 62 test_encode_options(void) 63 { 64 lzma_stream strm = LZMA_STREAM_INIT; 65 lzma_options_lzma opt_lzma; 66 67 // Initialize with default options. 68 assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT)); 69 70 // NULL stream 71 assert_lzma_ret(lzma_microlzma_encoder(NULL, &opt_lzma), 72 LZMA_PROG_ERROR); 73 74 // lc/lp/pb = 5/0/2 (lc invalid) 75 opt_lzma.lc = 5; 76 opt_lzma.lp = 0; 77 opt_lzma.pb = 2; 78 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), 79 LZMA_OPTIONS_ERROR); 80 81 // lc/lp/pb = 0/5/2 (lp invalid) 82 opt_lzma.lc = 0; 83 opt_lzma.lp = 5; 84 opt_lzma.pb = 2; 85 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), 86 LZMA_OPTIONS_ERROR); 87 88 // lc/lp/pb = 3/2/2 (lc + lp invalid) 89 opt_lzma.lc = 3; 90 opt_lzma.lp = 2; 91 opt_lzma.pb = 2; 92 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), 93 LZMA_OPTIONS_ERROR); 94 95 // lc/lp/pb = 3/0/5 (pb invalid) 96 opt_lzma.lc = 3; 97 opt_lzma.lp = 0; 98 opt_lzma.pb = 5; 99 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), 100 LZMA_OPTIONS_ERROR); 101 102 // Zero out lp, pb, lc options to not interfere with later tests. 103 opt_lzma.lp = 0; 104 opt_lzma.pb = 0; 105 opt_lzma.lc = 0; 106 107 // Set invalid dictionary size. 108 opt_lzma.dict_size = LZMA_DICT_SIZE_MIN - 1; 109 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), 110 LZMA_OPTIONS_ERROR); 111 112 // Maximum dictionary size for the encoder, as described in lzma12.h 113 // is 1.5 GiB. 114 opt_lzma.dict_size = (UINT32_C(1) << 30) + (UINT32_C(1) << 29) + 1; 115 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), 116 LZMA_OPTIONS_ERROR); 117 118 lzma_end(&strm); 119 } 120 121 122 static void 123 test_encode_basic(void) 124 { 125 lzma_stream strm = LZMA_STREAM_INIT; 126 lzma_options_lzma opt_lzma; 127 128 // The lzma_lzma_preset return value is inverse of what it perhaps 129 // should be, that is, it returns false on success. 130 assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT)); 131 132 // Initialize the encoder using the default options. 133 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK); 134 135 uint8_t output[BUFFER_SIZE]; 136 137 strm.next_in = hello_world; 138 strm.avail_in = sizeof(hello_world); 139 strm.next_out = output; 140 strm.avail_out = sizeof(output); 141 142 // Everything must be encoded in one lzma_code() call. 143 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END); 144 145 // Check that the entire input was consumed. 146 assert_uint_eq(strm.total_in, sizeof(hello_world)); 147 148 // Check that the first byte in the output stream is not 0x00. 149 // In a regular raw LZMA stream the first byte is always 0x00. 150 // In MicroLZMA the first byte replaced by the bitwise-negation 151 // of the LZMA properties. 152 assert_uint(output[0], !=, 0x00); 153 154 const uint8_t props = ~output[0]; 155 156 lzma_options_lzma test_options; 157 assert_false(lzma_lzma_lclppb_decode(&test_options, props)); 158 159 assert_uint_eq(opt_lzma.lc, test_options.lc); 160 assert_uint_eq(opt_lzma.lp, test_options.lp); 161 assert_uint_eq(opt_lzma.pb, test_options.pb); 162 163 // Compute the check over the output data. This is compared to 164 // the expected check value. 165 const uint32_t check_val = lzma_crc32(output, strm.total_out, 0); 166 167 assert_uint_eq(check_val, hello_world_encoded_crc); 168 169 lzma_end(&strm); 170 } 171 172 173 // This tests the behavior when strm.avail_out is so small it cannot hold 174 // the header plus 1 encoded byte (< 6). 175 static void 176 test_encode_small_out(void) 177 { 178 lzma_stream strm = LZMA_STREAM_INIT; 179 lzma_options_lzma opt_lzma; 180 181 assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT)); 182 183 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK); 184 185 uint8_t output[BUFFER_SIZE]; 186 187 strm.next_in = hello_world; 188 strm.avail_in = sizeof(hello_world); 189 strm.next_out = output; 190 strm.avail_out = 5; 191 192 // LZMA_PROG_ERROR is expected when strm.avail_out < 6 193 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_PROG_ERROR); 194 195 // The encoder must be reset because coders cannot be used again 196 // after returning LZMA_PROG_ERROR. 197 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK); 198 199 // Reset strm.avail_out to be > 6, but not enough to hold all of the 200 // compressed data. 201 strm.avail_out = ENCODED_OUTPUT_SIZE - 1; 202 203 // Encoding should not return an error now. 204 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END); 205 assert_uint(strm.total_in, <, sizeof(hello_world)); 206 207 lzma_end(&strm); 208 } 209 210 211 // LZMA_FINISH is the only supported action. All others must 212 // return LZMA_PROG_ERROR. 213 static void 214 test_encode_actions(void) 215 { 216 lzma_stream strm = LZMA_STREAM_INIT; 217 lzma_options_lzma opt_lzma; 218 219 assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT)); 220 221 const lzma_action actions[] = { 222 LZMA_RUN, 223 LZMA_SYNC_FLUSH, 224 LZMA_FULL_FLUSH, 225 LZMA_FULL_BARRIER, 226 }; 227 228 for (size_t i = 0; i < ARRAY_SIZE(actions); ++i) { 229 assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), 230 LZMA_OK); 231 232 uint8_t output[BUFFER_SIZE]; 233 234 strm.next_in = hello_world; 235 strm.avail_in = sizeof(hello_world); 236 strm.next_out = output; 237 strm.avail_out = sizeof(output); 238 239 assert_lzma_ret(lzma_code(&strm, actions[i]), 240 LZMA_PROG_ERROR); 241 } 242 243 lzma_end(&strm); 244 } 245 #endif // HAVE_ENCODER_LZMA1 246 247 248 /////////////////// 249 // Decoder tests // 250 /////////////////// 251 252 #if defined(HAVE_DECODER_LZMA1) && defined(HAVE_ENCODER_LZMA1) 253 254 // Byte array of "Goodbye World!". This is used for various decoder tests. 255 static const uint8_t goodbye_world[] = { 0x47, 0x6F, 0x6F, 0x64, 0x62, 256 0x79, 0x65, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21 }; 257 258 static uint8_t *goodbye_world_encoded = NULL; 259 static size_t goodbye_world_encoded_size = 0; 260 261 262 // Helper function to encode data and return the compressed size. 263 static size_t 264 basic_microlzma_encode(const uint8_t *input, size_t in_size, 265 uint8_t **compressed) 266 { 267 lzma_stream strm = LZMA_STREAM_INIT; 268 lzma_options_lzma opt_lzma; 269 270 // Lazy way to set the output size since the input should never 271 // inflate by much in these simple test cases. This is tested to 272 // be large enough after encoding to fit the entire input, so if 273 // this assumption does not hold then this will fail. 274 const size_t out_size = in_size << 1; 275 276 *compressed = tuktest_malloc(out_size); 277 278 // Always encode with the default options for simplicity. 279 if (lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT)) 280 goto decoder_setup_error; 281 282 if (lzma_microlzma_encoder(&strm, &opt_lzma) != LZMA_OK) 283 goto decoder_setup_error; 284 285 strm.next_in = input; 286 strm.avail_in = in_size; 287 strm.next_out = *compressed; 288 strm.avail_out = out_size; 289 290 if (lzma_code(&strm, LZMA_FINISH) != LZMA_STREAM_END) 291 goto decoder_setup_error; 292 293 // Check that the entire input was consumed and that it fit into 294 // the output buffer. 295 if (strm.total_in != in_size) 296 goto decoder_setup_error; 297 298 lzma_end(&strm); 299 300 // lzma_end() doesn't touch other members of lzma_stream than 301 // lzma_stream.internal so using strm.total_out here is fine. 302 return strm.total_out; 303 304 decoder_setup_error: 305 tuktest_error("Failed to initialize decoder tests"); 306 return 0; 307 } 308 309 310 static void 311 test_decode_options(void) 312 { 313 // NULL stream 314 assert_lzma_ret(lzma_microlzma_decoder(NULL, BUFFER_SIZE, 315 sizeof(hello_world), true, 316 LZMA_DICT_SIZE_DEFAULT), LZMA_PROG_ERROR); 317 318 // Uncompressed size larger than max 319 lzma_stream strm = LZMA_STREAM_INIT; 320 assert_lzma_ret(lzma_microlzma_decoder(&strm, BUFFER_SIZE, 321 LZMA_VLI_MAX + 1, true, LZMA_DICT_SIZE_DEFAULT), 322 LZMA_OPTIONS_ERROR); 323 } 324 325 326 // Test that decoding succeeds when uncomp_size is correct regardless of 327 // the value of uncomp_size_is_exact. 328 static void 329 test_decode_uncomp_size_is_exact(void) 330 { 331 lzma_stream strm = LZMA_STREAM_INIT; 332 333 assert_lzma_ret(lzma_microlzma_decoder(&strm, 334 goodbye_world_encoded_size, 335 sizeof(goodbye_world), true, 336 LZMA_DICT_SIZE_DEFAULT), LZMA_OK); 337 338 uint8_t output[BUFFER_SIZE]; 339 340 strm.next_in = goodbye_world_encoded; 341 strm.avail_in = goodbye_world_encoded_size; 342 strm.next_out = output; 343 strm.avail_out = sizeof(output); 344 345 assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_STREAM_END); 346 assert_uint_eq(strm.total_in, goodbye_world_encoded_size); 347 348 assert_uint_eq(strm.total_out, sizeof(goodbye_world)); 349 assert_array_eq(goodbye_world, output, sizeof(goodbye_world)); 350 351 // Reset decoder with uncomp_size_is_exact set to false and 352 // uncomp_size set to correct value. Also test using the 353 // uncompressed size as the dictionary size. 354 assert_lzma_ret(lzma_microlzma_decoder(&strm, 355 goodbye_world_encoded_size, 356 sizeof(goodbye_world), false, 357 sizeof(goodbye_world)), LZMA_OK); 358 359 strm.next_in = goodbye_world_encoded; 360 strm.avail_in = goodbye_world_encoded_size; 361 strm.next_out = output; 362 strm.avail_out = sizeof(output); 363 364 assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_STREAM_END); 365 assert_uint_eq(strm.total_in, goodbye_world_encoded_size); 366 367 assert_uint_eq(strm.total_out, sizeof(goodbye_world)); 368 assert_array_eq(goodbye_world, output, sizeof(goodbye_world)); 369 370 lzma_end(&strm); 371 } 372 373 374 // This tests decoding when MicroLZMA decoder is called with 375 // an incorrect uncompressed size. 376 static void 377 test_decode_uncomp_size_wrong(void) 378 { 379 lzma_stream strm = LZMA_STREAM_INIT; 380 assert_lzma_ret(lzma_microlzma_decoder(&strm, 381 goodbye_world_encoded_size, 382 sizeof(goodbye_world) + 1, false, 383 LZMA_DICT_SIZE_DEFAULT), LZMA_OK); 384 385 uint8_t output[BUFFER_SIZE]; 386 387 strm.next_in = goodbye_world_encoded; 388 strm.avail_in = goodbye_world_encoded_size; 389 strm.next_out = output; 390 strm.avail_out = sizeof(output); 391 392 // LZMA_OK should be returned because the input size given was 393 // larger than the actual encoded size. The decoder is expecting 394 // more input to possibly fill the uncompressed size that was set. 395 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK); 396 397 assert_uint_eq(strm.total_out, sizeof(goodbye_world)); 398 399 assert_array_eq(goodbye_world, output, sizeof(goodbye_world)); 400 401 // Next, test with uncomp_size_is_exact set. 402 assert_lzma_ret(lzma_microlzma_decoder(&strm, 403 goodbye_world_encoded_size, 404 sizeof(goodbye_world) + 1, true, 405 LZMA_DICT_SIZE_DEFAULT), LZMA_OK); 406 407 strm.next_in = goodbye_world_encoded; 408 strm.avail_in = goodbye_world_encoded_size; 409 strm.next_out = output; 410 strm.avail_out = sizeof(output); 411 412 // No error detected, even though all input was consumed and there 413 // is more room in the output buffer. 414 // 415 // FIXME? LZMA_FINISH tells that no more input is coming and 416 // the MicroLZMA decoder knows the exact compressed size from 417 // the initialization as well. So should it return LZMA_DATA_ERROR 418 // on the first call instead of relying on the generic lzma_code() 419 // logic to eventually get LZMA_BUF_ERROR? 420 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK); 421 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK); 422 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_BUF_ERROR); 423 424 assert_uint_eq(strm.total_out, sizeof(goodbye_world)); 425 assert_array_eq(goodbye_world, output, sizeof(goodbye_world)); 426 427 // Reset stream with uncomp_size smaller than the real 428 // uncompressed size. 429 assert_lzma_ret(lzma_microlzma_decoder(&strm, 430 goodbye_world_encoded_size, 431 ARRAY_SIZE(hello_world) - 1, true, 432 LZMA_DICT_SIZE_DEFAULT), LZMA_OK); 433 434 strm.next_in = goodbye_world_encoded; 435 strm.avail_in = goodbye_world_encoded_size; 436 strm.next_out = output; 437 strm.avail_out = sizeof(output); 438 439 // This case actually results in an error since it decodes the full 440 // uncompressed size but the range coder is not in the proper state 441 // for the stream to end. 442 assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_DATA_ERROR); 443 444 lzma_end(&strm); 445 } 446 447 448 static void 449 test_decode_comp_size_wrong(void) 450 { 451 lzma_stream strm = LZMA_STREAM_INIT; 452 453 // goodbye_world_encoded_size + 1 is safe because extra space was 454 // allocated for goodbye_world_encoded. The extra space isn't 455 // initialized but it shouldn't be read either, thus Valgrind 456 // has to remain happy with this code. 457 assert_lzma_ret(lzma_microlzma_decoder(&strm, 458 goodbye_world_encoded_size + 1, 459 sizeof(goodbye_world), true, 460 LZMA_DICT_SIZE_DEFAULT), LZMA_OK); 461 462 uint8_t output[BUFFER_SIZE]; 463 464 strm.next_in = goodbye_world_encoded; 465 strm.avail_in = goodbye_world_encoded_size; 466 strm.next_out = output; 467 strm.avail_out = sizeof(output); 468 469 // When uncomp_size_is_exact is set, the compressed size must be 470 // correct or else LZMA_DATA_ERROR is returned. 471 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_DATA_ERROR); 472 473 assert_lzma_ret(lzma_microlzma_decoder(&strm, 474 goodbye_world_encoded_size + 1, 475 sizeof(goodbye_world), false, 476 LZMA_DICT_SIZE_DEFAULT), LZMA_OK); 477 478 strm.next_in = goodbye_world_encoded; 479 strm.avail_in = goodbye_world_encoded_size; 480 strm.next_out = output; 481 strm.avail_out = sizeof(output); 482 483 // When uncomp_size_is_exact is not set, the decoder does not 484 // detect when the compressed size is wrong as long as all of the 485 // expected output has been decoded. This is because the decoder 486 // assumes that the real uncompressed size might be bigger than 487 // the specified value and in that case more input might be needed 488 // as well. 489 assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END); 490 491 lzma_end(&strm); 492 } 493 494 495 static void 496 test_decode_bad_lzma_properties(void) 497 { 498 // Alter first byte to encode invalid LZMA properties. 499 uint8_t *compressed = tuktest_malloc(goodbye_world_encoded_size); 500 memcpy(compressed, goodbye_world_encoded, goodbye_world_encoded_size); 501 502 // lc=3, lp=2, pb=2 503 compressed[0] = (uint8_t)~0x6FU; 504 505 lzma_stream strm = LZMA_STREAM_INIT; 506 assert_lzma_ret(lzma_microlzma_decoder(&strm, 507 goodbye_world_encoded_size, 508 sizeof(goodbye_world), false, 509 LZMA_DICT_SIZE_DEFAULT), LZMA_OK); 510 511 uint8_t output[BUFFER_SIZE]; 512 513 strm.next_in = compressed; 514 strm.avail_in = goodbye_world_encoded_size; 515 strm.next_out = output; 516 strm.avail_out = sizeof(output); 517 518 assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_OPTIONS_ERROR); 519 520 // Use valid, but incorrect LZMA properties. 521 // lc=3, lp=1, pb=2 522 compressed[0] = (uint8_t)~0x66; 523 524 assert_lzma_ret(lzma_microlzma_decoder(&strm, 525 goodbye_world_encoded_size, 526 ARRAY_SIZE(goodbye_world), true, 527 LZMA_DICT_SIZE_DEFAULT), LZMA_OK); 528 529 strm.next_in = compressed; 530 strm.avail_in = goodbye_world_encoded_size; 531 strm.next_out = output; 532 strm.avail_out = sizeof(output); 533 534 assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_DATA_ERROR); 535 536 lzma_end(&strm); 537 } 538 #endif 539 540 541 extern int 542 main(int argc, char **argv) 543 { 544 tuktest_start(argc, argv); 545 546 #ifndef HAVE_ENCODER_LZMA1 547 tuktest_early_skip("LZMA1 encoder disabled"); 548 #else 549 tuktest_run(test_encode_options); 550 tuktest_run(test_encode_basic); 551 tuktest_run(test_encode_small_out); 552 tuktest_run(test_encode_actions); 553 554 // MicroLZMA decoder tests require the basic encoder functionality. 555 # ifdef HAVE_DECODER_LZMA1 556 goodbye_world_encoded_size = basic_microlzma_encode(goodbye_world, 557 sizeof(goodbye_world), &goodbye_world_encoded); 558 559 tuktest_run(test_decode_options); 560 tuktest_run(test_decode_uncomp_size_is_exact); 561 tuktest_run(test_decode_uncomp_size_wrong); 562 tuktest_run(test_decode_comp_size_wrong); 563 tuktest_run(test_decode_bad_lzma_properties); 564 # endif 565 566 return tuktest_end(); 567 #endif 568 } 569