1 1.1 christos /* 2 1.1 christos * Copyright (c) Meta Platforms, Inc. and affiliates. 3 1.1 christos * All rights reserved. 4 1.1 christos * 5 1.1 christos * This source code is licensed under both the BSD-style license (found in the 6 1.1 christos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 1.1 christos * in the COPYING file in the root directory of this source tree). 8 1.1 christos * You may select, at your option, one of the above-listed licenses. 9 1.1 christos */ 10 1.1 christos 11 1.1 christos #include <limits.h> 12 1.1 christos #include <math.h> 13 1.1 christos #include <stddef.h> 14 1.1 christos #include <stdio.h> 15 1.1 christos #include <stdlib.h> 16 1.1 christos #include <string.h> 17 1.1 christos #include <time.h> /* time(), for seed random initialization */ 18 1.1 christos 19 1.1 christos #include "util.h" 20 1.1 christos #include "timefn.h" /* UTIL_clockSpanMicro, SEC_TO_MICRO, UTIL_TIME_INITIALIZER */ 21 1.1 christos #include "zstd.h" 22 1.1 christos #include "zstd_internal.h" 23 1.1 christos #include "mem.h" 24 1.1 christos #define ZDICT_STATIC_LINKING_ONLY 25 1.1 christos #include "zdict.h" 26 1.1 christos 27 1.1 christos /* Direct access to internal compression functions is required */ 28 1.1 christos #include "compress/zstd_compress.c" /* ZSTD_resetSeqStore, ZSTD_storeSeq, *_TO_OFFBASE, HIST_countFast_wksp, HIST_isError */ 29 1.1 christos #include "decompress/zstd_decompress_block.h" /* ZSTD_decompressBlock_deprecated */ 30 1.1 christos 31 1.1 christos #define XXH_STATIC_LINKING_ONLY 32 1.1 christos #include "xxhash.h" /* XXH64 */ 33 1.1 christos 34 1.1 christos #if !(defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)) 35 1.1 christos # define inline /* disable */ 36 1.1 christos #endif 37 1.1 christos 38 1.1 christos /*-************************************ 39 1.1 christos * DISPLAY Macros 40 1.1 christos **************************************/ 41 1.1 christos #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) 42 1.1 christos #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } 43 1.1 christos static U32 g_displayLevel = 2; 44 1.1 christos 45 1.1 christos #define DISPLAYUPDATE(...) \ 46 1.1 christos do { \ 47 1.1 christos if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || \ 48 1.1 christos (g_displayLevel >= 4)) { \ 49 1.1 christos g_displayClock = UTIL_getTime(); \ 50 1.1 christos DISPLAY(__VA_ARGS__); \ 51 1.1 christos if (g_displayLevel >= 4) fflush(stderr); \ 52 1.1 christos } \ 53 1.1 christos } while (0) 54 1.1 christos 55 1.1 christos static const U64 g_refreshRate = SEC_TO_MICRO / 6; 56 1.1 christos static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; 57 1.1 christos 58 1.1 christos #define CHECKERR(code) \ 59 1.1 christos do { \ 60 1.1 christos if (ZSTD_isError(code)) { \ 61 1.1 christos DISPLAY("Error occurred while generating data: %s\n", \ 62 1.1 christos ZSTD_getErrorName(code)); \ 63 1.1 christos exit(1); \ 64 1.1 christos } \ 65 1.1 christos } while (0) 66 1.1 christos 67 1.1 christos 68 1.1 christos /*-******************************************************* 69 1.1 christos * Random function 70 1.1 christos *********************************************************/ 71 1.1 christos static U32 RAND(U32* src) 72 1.1 christos { 73 1.1 christos #define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r))) 74 1.1 christos static const U32 prime1 = 2654435761U; 75 1.1 christos static const U32 prime2 = 2246822519U; 76 1.1 christos U32 rand32 = *src; 77 1.1 christos rand32 *= prime1; 78 1.1 christos rand32 += prime2; 79 1.1 christos rand32 = RAND_rotl32(rand32, 13); 80 1.1 christos *src = rand32; 81 1.1 christos return RAND_rotl32(rand32, 27); 82 1.1 christos #undef RAND_rotl32 83 1.1 christos } 84 1.1 christos 85 1.1 christos #define DISTSIZE (8192) 86 1.1 christos 87 1.1 christos /* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */ 88 1.1 christos static void RAND_bufferMaxSymb(U32* seed, void* ptr, size_t size, int maxSymb) 89 1.1 christos { 90 1.1 christos size_t i; 91 1.1 christos BYTE* op = ptr; 92 1.1 christos 93 1.1 christos for (i = 0; i < size; i++) { 94 1.1 christos op[i] = (BYTE) (RAND(seed) % (maxSymb + 1)); 95 1.1 christos } 96 1.1 christos } 97 1.1 christos 98 1.1 christos /* Write `size` random bytes into `ptr` */ 99 1.1 christos static void RAND_buffer(U32* seed, void* ptr, size_t size) 100 1.1 christos { 101 1.1 christos size_t i; 102 1.1 christos BYTE* op = ptr; 103 1.1 christos 104 1.1 christos for (i = 0; i + 4 <= size; i += 4) { 105 1.1 christos MEM_writeLE32(op + i, RAND(seed)); 106 1.1 christos } 107 1.1 christos for (; i < size; i++) { 108 1.1 christos op[i] = RAND(seed) & 0xff; 109 1.1 christos } 110 1.1 christos } 111 1.1 christos 112 1.1 christos /* Write `size` bytes into `ptr` following the distribution `dist` */ 113 1.1 christos static void RAND_bufferDist(U32* seed, BYTE* dist, void* ptr, size_t size) 114 1.1 christos { 115 1.1 christos size_t i; 116 1.1 christos BYTE* op = ptr; 117 1.1 christos 118 1.1 christos for (i = 0; i < size; i++) { 119 1.1 christos op[i] = dist[RAND(seed) % DISTSIZE]; 120 1.1 christos } 121 1.1 christos } 122 1.1 christos 123 1.1 christos /* Generate a random distribution where the frequency of each symbol follows a 124 1.1 christos * geometric distribution defined by `weight` 125 1.1 christos * `dist` should have size at least `DISTSIZE` */ 126 1.1 christos static void RAND_genDist(U32* seed, BYTE* dist, double weight) 127 1.1 christos { 128 1.1 christos size_t i = 0; 129 1.1 christos size_t statesLeft = DISTSIZE; 130 1.1 christos BYTE symb = (BYTE) (RAND(seed) % 256); 131 1.1 christos BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */ 132 1.1 christos 133 1.1 christos while (i < DISTSIZE) { 134 1.1 christos size_t states = ((size_t)(weight * (double)statesLeft)) + 1; 135 1.1 christos size_t j; 136 1.1 christos for (j = 0; j < states && i < DISTSIZE; j++, i++) { 137 1.1 christos dist[i] = symb; 138 1.1 christos } 139 1.1 christos 140 1.1 christos symb += step; 141 1.1 christos statesLeft -= states; 142 1.1 christos } 143 1.1 christos } 144 1.1 christos 145 1.1 christos /* Generates a random number in the range [min, max) */ 146 1.1 christos static inline U32 RAND_range(U32* seed, U32 min, U32 max) 147 1.1 christos { 148 1.1 christos return (RAND(seed) % (max-min)) + min; 149 1.1 christos } 150 1.1 christos 151 1.1 christos #define ROUND(x) ((U32)(x + 0.5)) 152 1.1 christos 153 1.1 christos /* Generates a random number in an exponential distribution with mean `mean` */ 154 1.1 christos static double RAND_exp(U32* seed, double mean) 155 1.1 christos { 156 1.1 christos double const u = RAND(seed) / (double) UINT_MAX; 157 1.1 christos return log(1-u) * (-mean); 158 1.1 christos } 159 1.1 christos 160 1.1 christos /*-******************************************************* 161 1.1 christos * Constants and Structs 162 1.1 christos *********************************************************/ 163 1.1 christos const char* BLOCK_TYPES[] = {"raw", "rle", "compressed"}; 164 1.1 christos 165 1.1 christos #define MAX_DECOMPRESSED_SIZE_LOG 20 166 1.1 christos #define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG) 167 1.1 christos 168 1.1 christos #define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */ 169 1.1 christos 170 1.1 christos #define MIN_SEQ_LEN (3) 171 1.1 christos #define MAX_NB_SEQ ((ZSTD_BLOCKSIZE_MAX + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN) 172 1.1 christos 173 1.1 christos #ifndef MAX_PATH 174 1.1 christos #ifdef PATH_MAX 175 1.1 christos #define MAX_PATH PATH_MAX 176 1.1 christos #else 177 1.1 christos #define MAX_PATH 256 178 1.1 christos #endif 179 1.1 christos #endif 180 1.1 christos 181 1.1 christos BYTE CONTENT_BUFFER[MAX_DECOMPRESSED_SIZE]; 182 1.1 christos BYTE FRAME_BUFFER[MAX_DECOMPRESSED_SIZE * 2]; 183 1.1 christos BYTE LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; 184 1.1 christos 185 1.1 christos seqDef SEQUENCE_BUFFER[MAX_NB_SEQ]; 186 1.1 christos BYTE SEQUENCE_LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; /* storeSeq expects a place to copy literals to */ 187 1.1 christos BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX]; 188 1.1 christos BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX]; 189 1.1 christos BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX]; 190 1.1 christos 191 1.1 christos U64 WKSP[HUF_WORKSPACE_SIZE_U64]; 192 1.1 christos 193 1.1 christos typedef struct { 194 1.1 christos size_t contentSize; /* 0 means unknown (unless contentSize == windowSize == 0) */ 195 1.1 christos unsigned windowSize; /* contentSize >= windowSize means single segment */ 196 1.1 christos } frameHeader_t; 197 1.1 christos 198 1.1 christos /* For repeat modes */ 199 1.1 christos typedef struct { 200 1.1 christos U32 rep[ZSTD_REP_NUM]; 201 1.1 christos 202 1.1 christos int hufInit; 203 1.1 christos /* the distribution used in the previous block for repeat mode */ 204 1.1 christos BYTE hufDist[DISTSIZE]; 205 1.1 christos HUF_CElt hufTable [HUF_CTABLE_SIZE_ST(255)]; 206 1.1 christos 207 1.1 christos int fseInit; 208 1.1 christos FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; 209 1.1 christos FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; 210 1.1 christos FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; 211 1.1 christos 212 1.1 christos /* Symbols that were present in the previous distribution, for use with 213 1.1 christos * set_repeat */ 214 1.1 christos BYTE litlengthSymbolSet[36]; 215 1.1 christos BYTE offsetSymbolSet[29]; 216 1.1 christos BYTE matchlengthSymbolSet[53]; 217 1.1 christos } cblockStats_t; 218 1.1 christos 219 1.1 christos typedef struct { 220 1.1 christos void* data; 221 1.1 christos void* dataStart; 222 1.1 christos void* dataEnd; 223 1.1 christos 224 1.1 christos void* src; 225 1.1 christos void* srcStart; 226 1.1 christos void* srcEnd; 227 1.1 christos 228 1.1 christos frameHeader_t header; 229 1.1 christos 230 1.1 christos cblockStats_t stats; 231 1.1 christos cblockStats_t oldStats; /* so they can be rolled back if uncompressible */ 232 1.1 christos } frame_t; 233 1.1 christos 234 1.1 christos typedef struct { 235 1.1 christos int useDict; 236 1.1 christos U32 dictID; 237 1.1 christos size_t dictContentSize; 238 1.1 christos BYTE* dictContent; 239 1.1 christos } dictInfo; 240 1.1 christos 241 1.1 christos typedef enum { 242 1.1 christos gt_frame = 0, /* generate frames */ 243 1.1 christos gt_block, /* generate compressed blocks without block/frame headers */ 244 1.1 christos } genType_e; 245 1.1 christos 246 1.1 christos #ifndef MIN 247 1.1 christos #define MIN(a, b) ((a) < (b) ? (a) : (b)) 248 1.1 christos #endif 249 1.1 christos 250 1.1 christos /*-******************************************************* 251 1.1 christos * Global variables (set from command line) 252 1.1 christos *********************************************************/ 253 1.1 christos U32 g_maxDecompressedSizeLog = MAX_DECOMPRESSED_SIZE_LOG; /* <= 20 */ 254 1.1 christos U32 g_maxBlockSize = ZSTD_BLOCKSIZE_MAX; /* <= 128 KB */ 255 1.1 christos 256 1.1 christos /*-******************************************************* 257 1.1 christos * Generator Functions 258 1.1 christos *********************************************************/ 259 1.1 christos 260 1.1 christos struct { 261 1.1 christos int contentSize; /* force the content size to be present */ 262 1.1 christos } opts; /* advanced options on generation */ 263 1.1 christos 264 1.1 christos /* Generate and write a random frame header */ 265 1.1 christos static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info) 266 1.1 christos { 267 1.1 christos BYTE* const op = frame->data; 268 1.1 christos size_t pos = 0; 269 1.1 christos frameHeader_t fh; 270 1.1 christos 271 1.1 christos BYTE windowByte = 0; 272 1.1 christos 273 1.1 christos int singleSegment = 0; 274 1.1 christos int contentSizeFlag = 0; 275 1.1 christos int fcsCode = 0; 276 1.1 christos 277 1.1 christos memset(&fh, 0, sizeof(fh)); 278 1.1 christos 279 1.1 christos /* generate window size */ 280 1.1 christos { 281 1.1 christos /* Follow window algorithm from specification */ 282 1.1 christos int const exponent = RAND(seed) % (MAX_WINDOW_LOG - 10); 283 1.1 christos int const mantissa = RAND(seed) % 8; 284 1.1 christos windowByte = (BYTE) ((exponent << 3) | mantissa); 285 1.1 christos fh.windowSize = (1U << (exponent + 10)); 286 1.1 christos fh.windowSize += fh.windowSize / 8 * mantissa; 287 1.1 christos } 288 1.1 christos 289 1.1 christos { 290 1.1 christos /* Generate random content size */ 291 1.1 christos size_t highBit; 292 1.1 christos if (RAND(seed) & 7 && g_maxDecompressedSizeLog > 7) { 293 1.1 christos /* do content of at least 128 bytes */ 294 1.1 christos highBit = 1ULL << RAND_range(seed, 7, g_maxDecompressedSizeLog); 295 1.1 christos } else if (RAND(seed) & 3) { 296 1.1 christos /* do small content */ 297 1.1 christos highBit = 1ULL << RAND_range(seed, 0, MIN(7, 1U << g_maxDecompressedSizeLog)); 298 1.1 christos } else { 299 1.1 christos /* 0 size frame */ 300 1.1 christos highBit = 0; 301 1.1 christos } 302 1.1 christos fh.contentSize = highBit ? highBit + (RAND(seed) % highBit) : 0; 303 1.1 christos 304 1.1 christos /* provide size sometimes */ 305 1.1 christos contentSizeFlag = opts.contentSize | (RAND(seed) & 1); 306 1.1 christos 307 1.1 christos if (contentSizeFlag && (fh.contentSize == 0 || !(RAND(seed) & 7))) { 308 1.1 christos /* do single segment sometimes */ 309 1.1 christos fh.windowSize = (U32) fh.contentSize; 310 1.1 christos singleSegment = 1; 311 1.1 christos } 312 1.1 christos } 313 1.1 christos 314 1.1 christos if (contentSizeFlag) { 315 1.1 christos /* Determine how large fcs field has to be */ 316 1.1 christos int minFcsCode = (fh.contentSize >= 256) + 317 1.1 christos (fh.contentSize >= 65536 + 256) + 318 1.1 christos (fh.contentSize > 0xFFFFFFFFU); 319 1.1 christos if (!singleSegment && !minFcsCode) { 320 1.1 christos minFcsCode = 1; 321 1.1 christos } 322 1.1 christos fcsCode = minFcsCode + (RAND(seed) % (4 - minFcsCode)); 323 1.1 christos if (fcsCode == 1 && fh.contentSize < 256) fcsCode++; 324 1.1 christos } 325 1.1 christos 326 1.1 christos /* write out the header */ 327 1.1 christos MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER); 328 1.1 christos pos += 4; 329 1.1 christos 330 1.1 christos { 331 1.1 christos /* 332 1.1 christos * fcsCode: 2-bit flag specifying how many bytes used to represent Frame_Content_Size (bits 7-6) 333 1.1 christos * singleSegment: 1-bit flag describing if data must be regenerated within a single continuous memory segment. (bit 5) 334 1.1 christos * contentChecksumFlag: 1-bit flag that is set if frame includes checksum at the end -- set to 1 below (bit 2) 335 1.1 christos * dictBits: 2-bit flag describing how many bytes Dictionary_ID uses -- set to 3 (bits 1-0) 336 1.1 christos * For more information: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header 337 1.1 christos */ 338 1.1 christos int const dictBits = info.useDict ? 3 : 0; 339 1.1 christos BYTE const frameHeaderDescriptor = 340 1.1 christos (BYTE) ((fcsCode << 6) | (singleSegment << 5) | (1 << 2) | dictBits); 341 1.1 christos op[pos++] = frameHeaderDescriptor; 342 1.1 christos } 343 1.1 christos 344 1.1 christos if (!singleSegment) { 345 1.1 christos op[pos++] = windowByte; 346 1.1 christos } 347 1.1 christos if (info.useDict) { 348 1.1 christos MEM_writeLE32(op + pos, (U32) info.dictID); 349 1.1 christos pos += 4; 350 1.1 christos } 351 1.1 christos if (contentSizeFlag) { 352 1.1 christos switch (fcsCode) { 353 1.1 christos default: /* Impossible */ 354 1.1 christos case 0: op[pos++] = (BYTE) fh.contentSize; break; 355 1.1 christos case 1: MEM_writeLE16(op + pos, (U16) (fh.contentSize - 256)); pos += 2; break; 356 1.1 christos case 2: MEM_writeLE32(op + pos, (U32) fh.contentSize); pos += 4; break; 357 1.1 christos case 3: MEM_writeLE64(op + pos, (U64) fh.contentSize); pos += 8; break; 358 1.1 christos } 359 1.1 christos } 360 1.1 christos 361 1.1 christos DISPLAYLEVEL(3, " frame content size:\t%u\n", (unsigned)fh.contentSize); 362 1.1 christos DISPLAYLEVEL(3, " frame window size:\t%u\n", fh.windowSize); 363 1.1 christos DISPLAYLEVEL(3, " content size flag:\t%d\n", contentSizeFlag); 364 1.1 christos DISPLAYLEVEL(3, " single segment flag:\t%d\n", singleSegment); 365 1.1 christos 366 1.1 christos frame->data = op + pos; 367 1.1 christos frame->header = fh; 368 1.1 christos } 369 1.1 christos 370 1.1 christos /* Write a literal block in either raw or RLE form, return the literals size */ 371 1.1 christos static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize) 372 1.1 christos { 373 1.1 christos BYTE* op = (BYTE*)frame->data; 374 1.1 christos int const type = RAND(seed) % 2; 375 1.1 christos int const sizeFormatDesc = RAND(seed) % 8; 376 1.1 christos size_t litSize; 377 1.1 christos size_t maxLitSize = MIN(contentSize, g_maxBlockSize); 378 1.1 christos 379 1.1 christos if (sizeFormatDesc == 0) { 380 1.1 christos /* Size_FormatDesc = ?0 */ 381 1.1 christos maxLitSize = MIN(maxLitSize, 31); 382 1.1 christos } else if (sizeFormatDesc <= 4) { 383 1.1 christos /* Size_FormatDesc = 01 */ 384 1.1 christos maxLitSize = MIN(maxLitSize, 4095); 385 1.1 christos } else { 386 1.1 christos /* Size_Format = 11 */ 387 1.1 christos maxLitSize = MIN(maxLitSize, 1048575); 388 1.1 christos } 389 1.1 christos 390 1.1 christos litSize = RAND(seed) % (maxLitSize + 1); 391 1.1 christos if (frame->src == frame->srcStart && litSize == 0) { 392 1.1 christos litSize = 1; /* no empty literals if there's nothing preceding this block */ 393 1.1 christos } 394 1.1 christos if (litSize + 3 > contentSize) { 395 1.1 christos litSize = contentSize; /* no matches shorter than 3 are allowed */ 396 1.1 christos } 397 1.1 christos /* use smallest size format that fits */ 398 1.1 christos if (litSize < 32) { 399 1.1 christos op[0] = (type | (0 << 2) | (litSize << 3)) & 0xff; 400 1.1 christos op += 1; 401 1.1 christos } else if (litSize < 4096) { 402 1.1 christos op[0] = (type | (1 << 2) | (litSize << 4)) & 0xff; 403 1.1 christos op[1] = (litSize >> 4) & 0xff; 404 1.1 christos op += 2; 405 1.1 christos } else { 406 1.1 christos op[0] = (type | (3 << 2) | (litSize << 4)) & 0xff; 407 1.1 christos op[1] = (litSize >> 4) & 0xff; 408 1.1 christos op[2] = (litSize >> 12) & 0xff; 409 1.1 christos op += 3; 410 1.1 christos } 411 1.1 christos 412 1.1 christos if (type == 0) { 413 1.1 christos /* Raw literals */ 414 1.1 christos DISPLAYLEVEL(4, " raw literals\n"); 415 1.1 christos 416 1.1 christos RAND_buffer(seed, LITERAL_BUFFER, litSize); 417 1.1 christos memcpy(op, LITERAL_BUFFER, litSize); 418 1.1 christos op += litSize; 419 1.1 christos } else { 420 1.1 christos /* RLE literals */ 421 1.1 christos BYTE const symb = (BYTE) (RAND(seed) % 256); 422 1.1 christos 423 1.1 christos DISPLAYLEVEL(4, " rle literals: 0x%02x\n", (unsigned)symb); 424 1.1 christos 425 1.1 christos memset(LITERAL_BUFFER, symb, litSize); 426 1.1 christos op[0] = symb; 427 1.1 christos op++; 428 1.1 christos } 429 1.1 christos 430 1.1 christos frame->data = op; 431 1.1 christos 432 1.1 christos return litSize; 433 1.1 christos } 434 1.1 christos 435 1.1 christos /* Generate a Huffman header for the given source */ 436 1.1 christos static size_t writeHufHeader(U32* seed, HUF_CElt* hufTable, void* dst, size_t dstSize, 437 1.1 christos const void* src, size_t srcSize) 438 1.1 christos { 439 1.1 christos BYTE* const ostart = (BYTE*)dst; 440 1.1 christos BYTE* op = ostart; 441 1.1 christos 442 1.1 christos unsigned huffLog = 11; 443 1.1 christos unsigned maxSymbolValue = 255; 444 1.1 christos 445 1.1 christos unsigned count[HUF_SYMBOLVALUE_MAX+1]; 446 1.1 christos 447 1.1 christos /* Scan input and build symbol stats */ 448 1.1 christos { size_t const largest = HIST_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, WKSP, sizeof(WKSP)); 449 1.1 christos assert(!HIST_isError(largest)); 450 1.1 christos if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 0; } /* single symbol, rle */ 451 1.1 christos if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ 452 1.1 christos } 453 1.1 christos 454 1.1 christos /* Build Huffman Tree */ 455 1.1 christos /* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */ 456 1.1 christos huffLog = RAND_range(seed, ZSTD_highbit32(maxSymbolValue)+1, huffLog+1); 457 1.1 christos DISPLAYLEVEL(6, " huffman log: %u\n", huffLog); 458 1.1 christos { size_t const maxBits = HUF_buildCTable_wksp (hufTable, count, maxSymbolValue, huffLog, WKSP, sizeof(WKSP)); 459 1.1 christos CHECKERR(maxBits); 460 1.1 christos huffLog = (U32)maxBits; 461 1.1 christos } 462 1.1 christos 463 1.1 christos /* Write table description header */ 464 1.1 christos { size_t const hSize = HUF_writeCTable_wksp (op, dstSize, hufTable, maxSymbolValue, huffLog, WKSP, sizeof(WKSP)); 465 1.1 christos if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */ 466 1.1 christos op += hSize; 467 1.1 christos } 468 1.1 christos 469 1.1 christos return op - ostart; 470 1.1 christos } 471 1.1 christos 472 1.1 christos /* Write a Huffman coded literals block and return the literals size */ 473 1.1 christos static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t contentSize) 474 1.1 christos { 475 1.1 christos BYTE* origop = (BYTE*)frame->data; 476 1.1 christos BYTE* opend = (BYTE*)frame->dataEnd; 477 1.1 christos BYTE* op; 478 1.1 christos BYTE* const ostart = origop; 479 1.1 christos int const sizeFormat = RAND(seed) % 4; 480 1.1 christos size_t litSize; 481 1.1 christos size_t hufHeaderSize = 0; 482 1.1 christos size_t compressedSize = 0; 483 1.1 christos size_t maxLitSize = MIN(contentSize-3, g_maxBlockSize); 484 1.1 christos 485 1.1 christos symbolEncodingType_e hType; 486 1.1 christos 487 1.1 christos if (contentSize < 64) { 488 1.1 christos /* make sure we get reasonably-sized literals for compression */ 489 1.1 christos return ERROR(GENERIC); 490 1.1 christos } 491 1.1 christos 492 1.1 christos DISPLAYLEVEL(4, " compressed literals\n"); 493 1.1 christos 494 1.1 christos switch (sizeFormat) { 495 1.1 christos case 0: /* fall through, size is the same as case 1 */ 496 1.1 christos case 1: 497 1.1 christos maxLitSize = MIN(maxLitSize, 1023); 498 1.1 christos origop += 3; 499 1.1 christos break; 500 1.1 christos case 2: 501 1.1 christos maxLitSize = MIN(maxLitSize, 16383); 502 1.1 christos origop += 4; 503 1.1 christos break; 504 1.1 christos case 3: 505 1.1 christos maxLitSize = MIN(maxLitSize, 262143); 506 1.1 christos origop += 5; 507 1.1 christos break; 508 1.1 christos default:; /* impossible */ 509 1.1 christos } 510 1.1 christos 511 1.1 christos do { 512 1.1 christos op = origop; 513 1.1 christos do { 514 1.1 christos litSize = RAND(seed) % (maxLitSize + 1); 515 1.1 christos } while (litSize < 32); /* avoid small literal sizes */ 516 1.1 christos if (litSize + 3 > contentSize) { 517 1.1 christos litSize = contentSize; /* no matches shorter than 3 are allowed */ 518 1.1 christos } 519 1.1 christos 520 1.1 christos /* most of the time generate a new distribution */ 521 1.1 christos if ((RAND(seed) & 3) || !frame->stats.hufInit) { 522 1.1 christos do { 523 1.1 christos if (RAND(seed) & 3) { 524 1.1 christos /* add 10 to ensure some compressibility */ 525 1.1 christos double const weight = ((RAND(seed) % 90) + 10) / 100.0; 526 1.1 christos 527 1.1 christos DISPLAYLEVEL(5, " distribution weight: %d%%\n", 528 1.1 christos (int)(weight * 100)); 529 1.1 christos 530 1.1 christos RAND_genDist(seed, frame->stats.hufDist, weight); 531 1.1 christos } else { 532 1.1 christos /* sometimes do restricted range literals to force 533 1.1 christos * non-huffman headers */ 534 1.1 christos DISPLAYLEVEL(5, " small range literals\n"); 535 1.1 christos RAND_bufferMaxSymb(seed, frame->stats.hufDist, DISTSIZE, 536 1.1 christos 15); 537 1.1 christos } 538 1.1 christos RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER, 539 1.1 christos litSize); 540 1.1 christos 541 1.1 christos /* generate the header from the distribution instead of the 542 1.1 christos * actual data to avoid bugs with symbols that were in the 543 1.1 christos * distribution but never showed up in the output */ 544 1.1 christos hufHeaderSize = writeHufHeader( 545 1.1 christos seed, frame->stats.hufTable, op, opend - op, 546 1.1 christos frame->stats.hufDist, DISTSIZE); 547 1.1 christos CHECKERR(hufHeaderSize); 548 1.1 christos /* repeat until a valid header is written */ 549 1.1 christos } while (hufHeaderSize == 0); 550 1.1 christos op += hufHeaderSize; 551 1.1 christos hType = set_compressed; 552 1.1 christos 553 1.1 christos frame->stats.hufInit = 1; 554 1.1 christos } else { 555 1.1 christos /* repeat the distribution/table from last time */ 556 1.1 christos DISPLAYLEVEL(5, " huffman repeat stats\n"); 557 1.1 christos RAND_bufferDist(seed, frame->stats.hufDist, LITERAL_BUFFER, 558 1.1 christos litSize); 559 1.1 christos hufHeaderSize = 0; 560 1.1 christos hType = set_repeat; 561 1.1 christos } 562 1.1 christos 563 1.1 christos do { 564 1.1 christos compressedSize = 565 1.1 christos sizeFormat == 0 566 1.1 christos ? HUF_compress1X_usingCTable( 567 1.1 christos op, opend - op, LITERAL_BUFFER, litSize, 568 1.1 christos frame->stats.hufTable, /* flags */ 0) 569 1.1 christos : HUF_compress4X_usingCTable( 570 1.1 christos op, opend - op, LITERAL_BUFFER, litSize, 571 1.1 christos frame->stats.hufTable, /* flags */ 0); 572 1.1 christos CHECKERR(compressedSize); 573 1.1 christos /* this only occurs when it could not compress or similar */ 574 1.1 christos } while (compressedSize <= 0); 575 1.1 christos 576 1.1 christos op += compressedSize; 577 1.1 christos 578 1.1 christos compressedSize += hufHeaderSize; 579 1.1 christos DISPLAYLEVEL(5, " regenerated size: %u\n", (unsigned)litSize); 580 1.1 christos DISPLAYLEVEL(5, " compressed size: %u\n", (unsigned)compressedSize); 581 1.1 christos if (compressedSize >= litSize) { 582 1.1 christos DISPLAYLEVEL(5, " trying again\n"); 583 1.1 christos /* if we have to try again, reset the stats so we don't accidentally 584 1.1 christos * try to repeat a distribution we just made */ 585 1.1 christos frame->stats = frame->oldStats; 586 1.1 christos } else { 587 1.1 christos break; 588 1.1 christos } 589 1.1 christos } while (1); 590 1.1 christos 591 1.1 christos /* write header */ 592 1.1 christos switch (sizeFormat) { 593 1.1 christos case 0: /* fall through, size is the same as case 1 */ 594 1.1 christos case 1: { 595 1.1 christos U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | 596 1.1 christos ((U32)compressedSize << 14); 597 1.1 christos MEM_writeLE24(ostart, header); 598 1.1 christos break; 599 1.1 christos } 600 1.1 christos case 2: { 601 1.1 christos U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | 602 1.1 christos ((U32)compressedSize << 18); 603 1.1 christos MEM_writeLE32(ostart, header); 604 1.1 christos break; 605 1.1 christos } 606 1.1 christos case 3: { 607 1.1 christos U32 const header = hType | (sizeFormat << 2) | ((U32)litSize << 4) | 608 1.1 christos ((U32)compressedSize << 22); 609 1.1 christos MEM_writeLE32(ostart, header); 610 1.1 christos ostart[4] = (BYTE)(compressedSize >> 10); 611 1.1 christos break; 612 1.1 christos } 613 1.1 christos default:; /* impossible */ 614 1.1 christos } 615 1.1 christos 616 1.1 christos frame->data = op; 617 1.1 christos return litSize; 618 1.1 christos } 619 1.1 christos 620 1.1 christos static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize) 621 1.1 christos { 622 1.1 christos /* only do compressed for larger segments to avoid compressibility issues */ 623 1.1 christos if (RAND(seed) & 7 && contentSize >= 64) { 624 1.1 christos return writeLiteralsBlockCompressed(seed, frame, contentSize); 625 1.1 christos } else { 626 1.1 christos return writeLiteralsBlockSimple(seed, frame, contentSize); 627 1.1 christos } 628 1.1 christos } 629 1.1 christos 630 1.1 christos static inline void initSeqStore(seqStore_t *seqStore) { 631 1.1 christos seqStore->maxNbSeq = MAX_NB_SEQ; 632 1.1 christos seqStore->maxNbLit = ZSTD_BLOCKSIZE_MAX; 633 1.1 christos seqStore->sequencesStart = SEQUENCE_BUFFER; 634 1.1 christos seqStore->litStart = SEQUENCE_LITERAL_BUFFER; 635 1.1 christos seqStore->llCode = SEQUENCE_LLCODE; 636 1.1 christos seqStore->mlCode = SEQUENCE_MLCODE; 637 1.1 christos seqStore->ofCode = SEQUENCE_OFCODE; 638 1.1 christos 639 1.1 christos ZSTD_resetSeqStore(seqStore); 640 1.1 christos } 641 1.1 christos 642 1.1 christos /* Randomly generate sequence commands */ 643 1.1 christos static U32 644 1.1 christos generateSequences(U32* seed, frame_t* frame, seqStore_t* seqStore, 645 1.1 christos size_t contentSize, size_t literalsSize, dictInfo info) 646 1.1 christos { 647 1.1 christos /* The total length of all the matches */ 648 1.1 christos size_t const remainingMatch = contentSize - literalsSize; 649 1.1 christos size_t excessMatch = 0; 650 1.1 christos U32 numSequences = 0; 651 1.1 christos U32 i; 652 1.1 christos 653 1.1 christos const BYTE* literals = LITERAL_BUFFER; 654 1.1 christos BYTE* srcPtr = frame->src; 655 1.1 christos 656 1.1 christos if (literalsSize != contentSize) { 657 1.1 christos /* each match must be at least MIN_SEQ_LEN, so this is the maximum 658 1.1 christos * number of sequences we can have */ 659 1.1 christos U32 const maxSequences = (U32)remainingMatch / MIN_SEQ_LEN; 660 1.1 christos numSequences = (RAND(seed) % maxSequences) + 1; 661 1.1 christos 662 1.1 christos /* the extra match lengths we have to allocate to each sequence */ 663 1.1 christos excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN; 664 1.1 christos } 665 1.1 christos 666 1.1 christos DISPLAYLEVEL(5, " total match lengths: %u\n", (unsigned)remainingMatch); 667 1.1 christos for (i = 0; i < numSequences; i++) { 668 1.1 christos /* Generate match and literal lengths by exponential distribution to 669 1.1 christos * ensure nice numbers */ 670 1.1 christos U32 matchLen = 671 1.1 christos MIN_SEQ_LEN + 672 1.1 christos ROUND(RAND_exp(seed, (double)excessMatch / (double)(numSequences - i))); 673 1.1 christos U32 literalLen = 674 1.1 christos (RAND(seed) & 7) 675 1.1 christos ? ROUND(RAND_exp(seed, 676 1.1 christos (double)literalsSize / 677 1.1 christos (double)(numSequences - i))) 678 1.1 christos : 0; 679 1.1 christos /* actual offset, code to send, and point to copy up to when shifting 680 1.1 christos * codes in the repeat offsets history */ 681 1.1 christos U32 offset, offBase, repIndex; 682 1.1 christos 683 1.1 christos /* bounds checks */ 684 1.1 christos matchLen = (U32) MIN(matchLen, excessMatch + MIN_SEQ_LEN); 685 1.1 christos literalLen = MIN(literalLen, (U32) literalsSize); 686 1.1 christos if (i == 0 && srcPtr == frame->srcStart && literalLen == 0) literalLen = 1; 687 1.1 christos if (i + 1 == numSequences) matchLen = MIN_SEQ_LEN + (U32) excessMatch; 688 1.1 christos 689 1.1 christos memcpy(srcPtr, literals, literalLen); 690 1.1 christos srcPtr += literalLen; 691 1.1 christos do { 692 1.1 christos if (RAND(seed) & 7) { 693 1.1 christos /* do a normal offset */ 694 1.1 christos U32 const dataDecompressed = (U32)((BYTE*)srcPtr-(BYTE*)frame->srcStart); 695 1.1 christos offset = (RAND(seed) % 696 1.1 christos MIN(frame->header.windowSize, 697 1.1 christos (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) + 698 1.1 christos 1; 699 1.1 christos if (info.useDict && (RAND(seed) & 1) && i + 1 != numSequences && dataDecompressed < frame->header.windowSize) { 700 1.1 christos /* need to occasionally generate offsets that go past the start */ 701 1.1 christos /* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */ 702 1.1 christos U32 lenPastStart = (RAND(seed) % info.dictContentSize) + 1; 703 1.1 christos offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart)+lenPastStart; 704 1.1 christos if (offset > frame->header.windowSize) { 705 1.1 christos if (lenPastStart < MIN_SEQ_LEN) { 706 1.1 christos /* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */ 707 1.1 christos /* this also means that lenPastStart must be greater than MIN_SEQ_LEN */ 708 1.1 christos /* make sure lenPastStart does not go past dictionary start though */ 709 1.1 christos lenPastStart = MIN(lenPastStart+MIN_SEQ_LEN, (U32)info.dictContentSize); 710 1.1 christos offset = (U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) + lenPastStart; 711 1.1 christos } 712 1.1 christos { U32 const matchLenBound = MIN(frame->header.windowSize, lenPastStart); 713 1.1 christos matchLen = MIN(matchLen, matchLenBound); 714 1.1 christos } 715 1.1 christos } 716 1.1 christos } 717 1.1 christos offBase = OFFSET_TO_OFFBASE(offset); 718 1.1 christos repIndex = 2; 719 1.1 christos } else { 720 1.1 christos /* do a repeat offset */ 721 1.1 christos U32 const randomRepIndex = RAND(seed) % 3; 722 1.1 christos offBase = REPCODE_TO_OFFBASE(randomRepIndex + 1); /* expects values between 1 & 3 */ 723 1.1 christos if (literalLen > 0) { 724 1.1 christos offset = frame->stats.rep[randomRepIndex]; 725 1.1 christos repIndex = randomRepIndex; 726 1.1 christos } else { 727 1.1 christos /* special case : literalLen == 0 */ 728 1.1 christos offset = randomRepIndex == 2 ? frame->stats.rep[0] - 1 729 1.1 christos : frame->stats.rep[randomRepIndex + 1]; 730 1.1 christos repIndex = MIN(2, randomRepIndex + 1); 731 1.1 christos } 732 1.1 christos } 733 1.1 christos } while (((!info.useDict) && (offset > (size_t)((BYTE*)srcPtr - (BYTE*)frame->srcStart))) || offset == 0); 734 1.1 christos 735 1.1 christos { BYTE* const dictEnd = ZSTD_maybeNullPtrAdd(info.dictContent, info.dictContentSize); 736 1.1 christos size_t j; 737 1.1 christos for (j = 0; j < matchLen; j++) { 738 1.1 christos if ((U32)((BYTE*)srcPtr - (BYTE*)frame->srcStart) < offset) { 739 1.1 christos /* copy from dictionary instead of literals */ 740 1.1 christos size_t const dictOffset = offset - (srcPtr - (BYTE*)frame->srcStart); 741 1.1 christos *srcPtr = *(dictEnd - dictOffset); 742 1.1 christos } 743 1.1 christos else { 744 1.1 christos *srcPtr = *(srcPtr-offset); 745 1.1 christos } 746 1.1 christos srcPtr++; 747 1.1 christos } } 748 1.1 christos 749 1.1 christos { int r; 750 1.1 christos for (r = repIndex; r > 0; r--) { 751 1.1 christos frame->stats.rep[r] = frame->stats.rep[r - 1]; 752 1.1 christos } 753 1.1 christos frame->stats.rep[0] = offset; 754 1.1 christos } 755 1.1 christos 756 1.1 christos DISPLAYLEVEL(6, " LL: %5u OF: %5u ML: %5u", 757 1.1 christos (unsigned)literalLen, (unsigned)offset, (unsigned)matchLen); 758 1.1 christos DISPLAYLEVEL(7, " srcPos: %8u seqNb: %3u", 759 1.1 christos (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart), (unsigned)i); 760 1.1 christos DISPLAYLEVEL(6, "\n"); 761 1.1 christos if (OFFBASE_IS_REPCODE(offBase)) { /* expects sumtype numeric representation of ZSTD_storeSeq() */ 762 1.1 christos DISPLAYLEVEL(7, " repeat offset: %d\n", (int)repIndex); 763 1.1 christos } 764 1.1 christos /* use libzstd sequence handling */ 765 1.1 christos ZSTD_storeSeq(seqStore, literalLen, literals, literals + literalLen, 766 1.1 christos offBase, matchLen); 767 1.1 christos 768 1.1 christos literalsSize -= literalLen; 769 1.1 christos excessMatch -= (matchLen - MIN_SEQ_LEN); 770 1.1 christos literals += literalLen; 771 1.1 christos } 772 1.1 christos 773 1.1 christos memcpy(srcPtr, literals, literalsSize); 774 1.1 christos srcPtr += literalsSize; 775 1.1 christos DISPLAYLEVEL(6, " excess literals: %5u ", (unsigned)literalsSize); 776 1.1 christos DISPLAYLEVEL(7, "srcPos: %8u ", (unsigned)((BYTE*)srcPtr - (BYTE*)frame->srcStart)); 777 1.1 christos DISPLAYLEVEL(6, "\n"); 778 1.1 christos 779 1.1 christos return numSequences; 780 1.1 christos } 781 1.1 christos 782 1.1 christos static void initSymbolSet(const BYTE* symbols, size_t len, BYTE* set, BYTE maxSymbolValue) 783 1.1 christos { 784 1.1 christos size_t i; 785 1.1 christos 786 1.1 christos memset(set, 0, (size_t)maxSymbolValue+1); 787 1.1 christos 788 1.1 christos for (i = 0; i < len; i++) { 789 1.1 christos set[symbols[i]] = 1; 790 1.1 christos } 791 1.1 christos } 792 1.1 christos 793 1.1 christos static int isSymbolSubset(const BYTE* symbols, size_t len, const BYTE* set, BYTE maxSymbolValue) 794 1.1 christos { 795 1.1 christos size_t i; 796 1.1 christos 797 1.1 christos for (i = 0; i < len; i++) { 798 1.1 christos if (symbols[i] > maxSymbolValue || !set[symbols[i]]) { 799 1.1 christos return 0; 800 1.1 christos } 801 1.1 christos } 802 1.1 christos return 1; 803 1.1 christos } 804 1.1 christos 805 1.1 christos static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, 806 1.1 christos size_t nbSeq) 807 1.1 christos { 808 1.1 christos /* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */ 809 1.1 christos unsigned count[MaxSeq+1]; 810 1.1 christos S16 norm[MaxSeq+1]; 811 1.1 christos FSE_CTable* CTable_LitLength = frame->stats.litlengthCTable; 812 1.1 christos FSE_CTable* CTable_OffsetBits = frame->stats.offcodeCTable; 813 1.1 christos FSE_CTable* CTable_MatchLength = frame->stats.matchlengthCTable; 814 1.1 christos U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ 815 1.1 christos const seqDef* const sequences = seqStorePtr->sequencesStart; 816 1.1 christos const BYTE* const ofCodeTable = seqStorePtr->ofCode; 817 1.1 christos const BYTE* const llCodeTable = seqStorePtr->llCode; 818 1.1 christos const BYTE* const mlCodeTable = seqStorePtr->mlCode; 819 1.1 christos BYTE* const oend = (BYTE*)frame->dataEnd; 820 1.1 christos BYTE* op = (BYTE*)frame->data; 821 1.1 christos BYTE* seqHead; 822 1.1 christos BYTE scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog)]; 823 1.1 christos 824 1.1 christos /* literals compressing block removed so that can be done separately */ 825 1.1 christos 826 1.1 christos /* Sequences Header */ 827 1.1 christos if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall); 828 1.1 christos if (nbSeq < 128) *op++ = (BYTE)nbSeq; 829 1.1 christos else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; 830 1.1 christos else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; 831 1.1 christos 832 1.1 christos if (nbSeq==0) { 833 1.1 christos frame->data = op; 834 1.1 christos return 0; 835 1.1 christos } 836 1.1 christos 837 1.1 christos /* seqHead : flags for FSE encoding type */ 838 1.1 christos seqHead = op++; 839 1.1 christos 840 1.1 christos /* convert length/distances into codes */ 841 1.1 christos ZSTD_seqToCodes(seqStorePtr); 842 1.1 christos 843 1.1 christos /* CTable for Literal Lengths */ 844 1.1 christos { unsigned max = MaxLL; 845 1.1 christos size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */ 846 1.1 christos assert(!HIST_isError(mostFrequent)); 847 1.1 christos if (frame->stats.fseInit && !(RAND(seed) & 3) && 848 1.1 christos isSymbolSubset(llCodeTable, nbSeq, 849 1.1 christos frame->stats.litlengthSymbolSet, 35)) { 850 1.1 christos /* maybe do repeat mode if we're allowed to */ 851 1.1 christos LLtype = set_repeat; 852 1.1 christos } else if (mostFrequent == nbSeq) { 853 1.1 christos /* do RLE if we have the chance */ 854 1.1 christos *op++ = llCodeTable[0]; 855 1.1 christos FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); 856 1.1 christos LLtype = set_rle; 857 1.1 christos } else if (!(RAND(seed) & 3)) { 858 1.1 christos /* maybe use the default distribution */ 859 1.1 christos CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer))); 860 1.1 christos LLtype = set_basic; 861 1.1 christos } else { 862 1.1 christos /* fall back on a full table */ 863 1.1 christos size_t nbSeq_1 = nbSeq; 864 1.1 christos const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); 865 1.1 christos if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } 866 1.1 christos FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048); 867 1.1 christos { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ 868 1.1 christos if (FSE_isError(NCountSize)) return ERROR(GENERIC); 869 1.1 christos op += NCountSize; } 870 1.1 christos CHECKERR(FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer))); 871 1.1 christos LLtype = set_compressed; 872 1.1 christos } } 873 1.1 christos 874 1.1 christos /* CTable for Offsets */ 875 1.1 christos /* see Literal Lengths for descriptions of mode choices */ 876 1.1 christos { unsigned max = MaxOff; 877 1.1 christos size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */ 878 1.1 christos assert(!HIST_isError(mostFrequent)); 879 1.1 christos if (frame->stats.fseInit && !(RAND(seed) & 3) && 880 1.1 christos isSymbolSubset(ofCodeTable, nbSeq, 881 1.1 christos frame->stats.offsetSymbolSet, 28)) { 882 1.1 christos Offtype = set_repeat; 883 1.1 christos } else if (mostFrequent == nbSeq) { 884 1.1 christos *op++ = ofCodeTable[0]; 885 1.1 christos FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); 886 1.1 christos Offtype = set_rle; 887 1.1 christos } else if (!(RAND(seed) & 3)) { 888 1.1 christos FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); 889 1.1 christos Offtype = set_basic; 890 1.1 christos } else { 891 1.1 christos size_t nbSeq_1 = nbSeq; 892 1.1 christos const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); 893 1.1 christos if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } 894 1.1 christos FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048); 895 1.1 christos { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ 896 1.1 christos if (FSE_isError(NCountSize)) return ERROR(GENERIC); 897 1.1 christos op += NCountSize; } 898 1.1 christos FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); 899 1.1 christos Offtype = set_compressed; 900 1.1 christos } } 901 1.1 christos 902 1.1 christos /* CTable for MatchLengths */ 903 1.1 christos /* see Literal Lengths for descriptions of mode choices */ 904 1.1 christos { unsigned max = MaxML; 905 1.1 christos size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, WKSP, sizeof(WKSP)); /* cannot fail */ 906 1.1 christos assert(!HIST_isError(mostFrequent)); 907 1.1 christos if (frame->stats.fseInit && !(RAND(seed) & 3) && 908 1.1 christos isSymbolSubset(mlCodeTable, nbSeq, 909 1.1 christos frame->stats.matchlengthSymbolSet, 52)) { 910 1.1 christos MLtype = set_repeat; 911 1.1 christos } else if (mostFrequent == nbSeq) { 912 1.1 christos *op++ = *mlCodeTable; 913 1.1 christos FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); 914 1.1 christos MLtype = set_rle; 915 1.1 christos } else if (!(RAND(seed) & 3)) { 916 1.1 christos /* sometimes do default distribution */ 917 1.1 christos FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); 918 1.1 christos MLtype = set_basic; 919 1.1 christos } else { 920 1.1 christos /* fall back on table */ 921 1.1 christos size_t nbSeq_1 = nbSeq; 922 1.1 christos const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); 923 1.1 christos if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } 924 1.1 christos FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, nbSeq >= 2048); 925 1.1 christos { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ 926 1.1 christos if (FSE_isError(NCountSize)) return ERROR(GENERIC); 927 1.1 christos op += NCountSize; } 928 1.1 christos FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); 929 1.1 christos MLtype = set_compressed; 930 1.1 christos } } 931 1.1 christos frame->stats.fseInit = 1; 932 1.1 christos initSymbolSet(llCodeTable, nbSeq, frame->stats.litlengthSymbolSet, 35); 933 1.1 christos initSymbolSet(ofCodeTable, nbSeq, frame->stats.offsetSymbolSet, 28); 934 1.1 christos initSymbolSet(mlCodeTable, nbSeq, frame->stats.matchlengthSymbolSet, 52); 935 1.1 christos 936 1.1 christos DISPLAYLEVEL(5, " LL type: %d OF type: %d ML type: %d\n", (unsigned)LLtype, (unsigned)Offtype, (unsigned)MLtype); 937 1.1 christos 938 1.1 christos *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); 939 1.1 christos 940 1.1 christos /* Encoding Sequences */ 941 1.1 christos { BIT_CStream_t blockStream; 942 1.1 christos FSE_CState_t stateMatchLength; 943 1.1 christos FSE_CState_t stateOffsetBits; 944 1.1 christos FSE_CState_t stateLitLength; 945 1.1 christos 946 1.1 christos RETURN_ERROR_IF( 947 1.1 christos ERR_isError(BIT_initCStream(&blockStream, op, oend-op)), 948 1.1 christos dstSize_tooSmall, "not enough space remaining"); 949 1.1 christos 950 1.1 christos /* first symbols */ 951 1.1 christos FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); 952 1.1 christos FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); 953 1.1 christos FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); 954 1.1 christos BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); 955 1.1 christos if (MEM_32bits()) BIT_flushBits(&blockStream); 956 1.1 christos BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]); 957 1.1 christos if (MEM_32bits()) BIT_flushBits(&blockStream); 958 1.1 christos BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]); 959 1.1 christos BIT_flushBits(&blockStream); 960 1.1 christos 961 1.1 christos { size_t n; 962 1.1 christos for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ 963 1.1 christos BYTE const llCode = llCodeTable[n]; 964 1.1 christos BYTE const ofCode = ofCodeTable[n]; 965 1.1 christos BYTE const mlCode = mlCodeTable[n]; 966 1.1 christos U32 const llBits = LL_bits[llCode]; 967 1.1 christos U32 const ofBits = ofCode; /* 32b*/ /* 64b*/ 968 1.1 christos U32 const mlBits = ML_bits[mlCode]; 969 1.1 christos /* (7)*/ /* (7)*/ 970 1.1 christos FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ 971 1.1 christos FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ 972 1.1 christos if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ 973 1.1 christos FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ 974 1.1 christos if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) 975 1.1 christos BIT_flushBits(&blockStream); /* (7)*/ 976 1.1 christos BIT_addBits(&blockStream, sequences[n].litLength, llBits); 977 1.1 christos if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); 978 1.1 christos BIT_addBits(&blockStream, sequences[n].mlBase, mlBits); 979 1.1 christos if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ 980 1.1 christos BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */ 981 1.1 christos BIT_flushBits(&blockStream); /* (7)*/ 982 1.1 christos } } 983 1.1 christos 984 1.1 christos FSE_flushCState(&blockStream, &stateMatchLength); 985 1.1 christos FSE_flushCState(&blockStream, &stateOffsetBits); 986 1.1 christos FSE_flushCState(&blockStream, &stateLitLength); 987 1.1 christos 988 1.1 christos { size_t const streamSize = BIT_closeCStream(&blockStream); 989 1.1 christos if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ 990 1.1 christos op += streamSize; 991 1.1 christos } } 992 1.1 christos 993 1.1 christos frame->data = op; 994 1.1 christos 995 1.1 christos return 0; 996 1.1 christos } 997 1.1 christos 998 1.1 christos static size_t writeSequencesBlock(U32* seed, frame_t* frame, size_t contentSize, 999 1.1 christos size_t literalsSize, dictInfo info) 1000 1.1 christos { 1001 1.1 christos seqStore_t seqStore; 1002 1.1 christos size_t numSequences; 1003 1.1 christos 1004 1.1 christos 1005 1.1 christos initSeqStore(&seqStore); 1006 1.1 christos 1007 1.1 christos /* randomly generate sequences */ 1008 1.1 christos numSequences = generateSequences(seed, frame, &seqStore, contentSize, literalsSize, info); 1009 1.1 christos /* write them out to the frame data */ 1010 1.1 christos CHECKERR(writeSequences(seed, frame, &seqStore, numSequences)); 1011 1.1 christos 1012 1.1 christos return numSequences; 1013 1.1 christos } 1014 1.1 christos 1015 1.1 christos static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize, dictInfo info) 1016 1.1 christos { 1017 1.1 christos BYTE* const blockStart = (BYTE*)frame->data; 1018 1.1 christos size_t literalsSize; 1019 1.1 christos size_t nbSeq; 1020 1.1 christos 1021 1.1 christos DISPLAYLEVEL(4, " compressed block:\n"); 1022 1.1 christos 1023 1.1 christos literalsSize = writeLiteralsBlock(seed, frame, contentSize); 1024 1.1 christos 1025 1.1 christos DISPLAYLEVEL(4, " literals size: %u\n", (unsigned)literalsSize); 1026 1.1 christos 1027 1.1 christos nbSeq = writeSequencesBlock(seed, frame, contentSize, literalsSize, info); 1028 1.1 christos 1029 1.1 christos DISPLAYLEVEL(4, " number of sequences: %u\n", (unsigned)nbSeq); 1030 1.1 christos 1031 1.1 christos return (BYTE*)frame->data - blockStart; 1032 1.1 christos } 1033 1.1 christos 1034 1.1 christos static void writeBlock(U32* seed, frame_t* frame, size_t contentSize, 1035 1.1 christos int lastBlock, dictInfo info) 1036 1.1 christos { 1037 1.1 christos int const blockTypeDesc = RAND(seed) % 8; 1038 1.1 christos size_t blockSize; 1039 1.1 christos int blockType; 1040 1.1 christos 1041 1.1 christos BYTE *const header = (BYTE*)frame->data; 1042 1.1 christos BYTE *op = header + 3; 1043 1.1 christos 1044 1.1 christos DISPLAYLEVEL(4, " block:\n"); 1045 1.1 christos DISPLAYLEVEL(4, " block content size: %u\n", (unsigned)contentSize); 1046 1.1 christos DISPLAYLEVEL(4, " last block: %s\n", lastBlock ? "yes" : "no"); 1047 1.1 christos 1048 1.1 christos if (blockTypeDesc == 0) { 1049 1.1 christos /* Raw data frame */ 1050 1.1 christos 1051 1.1 christos RAND_buffer(seed, frame->src, contentSize); 1052 1.1 christos memcpy(op, frame->src, contentSize); 1053 1.1 christos 1054 1.1 christos op += contentSize; 1055 1.1 christos blockType = 0; 1056 1.1 christos blockSize = contentSize; 1057 1.1 christos } else if (blockTypeDesc == 1 && frame->header.contentSize > 0) { 1058 1.1 christos /* RLE (Don't create RLE block if frame content is 0 since block size of 1 may exceed max block size)*/ 1059 1.1 christos BYTE const symbol = RAND(seed) & 0xff; 1060 1.1 christos 1061 1.1 christos op[0] = symbol; 1062 1.1 christos memset(frame->src, symbol, contentSize); 1063 1.1 christos 1064 1.1 christos op++; 1065 1.1 christos blockType = 1; 1066 1.1 christos blockSize = contentSize; 1067 1.1 christos } else { 1068 1.1 christos /* compressed, most common */ 1069 1.1 christos size_t compressedSize; 1070 1.1 christos blockType = 2; 1071 1.1 christos 1072 1.1 christos frame->oldStats = frame->stats; 1073 1.1 christos 1074 1.1 christos frame->data = op; 1075 1.1 christos compressedSize = writeCompressedBlock(seed, frame, contentSize, info); 1076 1.1 christos if (compressedSize >= contentSize) { /* compressed block must be strictly smaller than uncompressed one */ 1077 1.1 christos blockType = 0; 1078 1.1 christos memcpy(op, frame->src, contentSize); 1079 1.1 christos 1080 1.1 christos op += contentSize; 1081 1.1 christos blockSize = contentSize; /* fall back on raw block if data doesn't 1082 1.1 christos compress */ 1083 1.1 christos 1084 1.1 christos frame->stats = frame->oldStats; /* don't update the stats */ 1085 1.1 christos } else { 1086 1.1 christos op += compressedSize; 1087 1.1 christos blockSize = compressedSize; 1088 1.1 christos } 1089 1.1 christos } 1090 1.1 christos frame->src = (BYTE*)frame->src + contentSize; 1091 1.1 christos 1092 1.1 christos DISPLAYLEVEL(4, " block type: %s\n", BLOCK_TYPES[blockType]); 1093 1.1 christos DISPLAYLEVEL(4, " block size field: %u\n", (unsigned)blockSize); 1094 1.1 christos 1095 1.1 christos header[0] = (BYTE) ((lastBlock | (blockType << 1) | (blockSize << 3)) & 0xff); 1096 1.1 christos MEM_writeLE16(header + 1, (U16) (blockSize >> 5)); 1097 1.1 christos 1098 1.1 christos frame->data = op; 1099 1.1 christos } 1100 1.1 christos 1101 1.1 christos static void writeBlocks(U32* seed, frame_t* frame, dictInfo info) 1102 1.1 christos { 1103 1.1 christos size_t contentLeft = frame->header.contentSize; 1104 1.1 christos size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize); 1105 1.1 christos while (1) { 1106 1.1 christos /* 1 in 4 chance of ending frame */ 1107 1.1 christos int const lastBlock = contentLeft > maxBlockSize ? 0 : !(RAND(seed) & 3); 1108 1.1 christos size_t blockContentSize; 1109 1.1 christos if (lastBlock) { 1110 1.1 christos blockContentSize = contentLeft; 1111 1.1 christos } else { 1112 1.1 christos if (contentLeft > 0 && (RAND(seed) & 7)) { 1113 1.1 christos /* some variable size block */ 1114 1.1 christos blockContentSize = RAND(seed) % (MIN(maxBlockSize, contentLeft)+1); 1115 1.1 christos } else if (contentLeft > maxBlockSize && (RAND(seed) & 1)) { 1116 1.1 christos /* some full size block */ 1117 1.1 christos blockContentSize = maxBlockSize; 1118 1.1 christos } else { 1119 1.1 christos /* some empty block */ 1120 1.1 christos blockContentSize = 0; 1121 1.1 christos } 1122 1.1 christos } 1123 1.1 christos 1124 1.1 christos writeBlock(seed, frame, blockContentSize, lastBlock, info); 1125 1.1 christos 1126 1.1 christos contentLeft -= blockContentSize; 1127 1.1 christos if (lastBlock) break; 1128 1.1 christos } 1129 1.1 christos } 1130 1.1 christos 1131 1.1 christos static void writeChecksum(frame_t* frame) 1132 1.1 christos { 1133 1.1 christos /* write checksum so implementations can verify their output */ 1134 1.1 christos U64 digest = XXH64(frame->srcStart, (BYTE*)frame->src-(BYTE*)frame->srcStart, 0); 1135 1.1 christos DISPLAYLEVEL(3, " checksum: %08x\n", (unsigned)digest); 1136 1.1 christos MEM_writeLE32(frame->data, (U32)digest); 1137 1.1 christos frame->data = (BYTE*)frame->data + 4; 1138 1.1 christos } 1139 1.1 christos 1140 1.1 christos static void outputBuffer(const void* buf, size_t size, const char* const path) 1141 1.1 christos { 1142 1.1 christos /* write data out to file */ 1143 1.1 christos const BYTE* ip = (const BYTE*)buf; 1144 1.1 christos FILE* out; 1145 1.1 christos if (path) { 1146 1.1 christos out = fopen(path, "wb"); 1147 1.1 christos } else { 1148 1.1 christos out = stdout; 1149 1.1 christos } 1150 1.1 christos if (!out) { 1151 1.1 christos fprintf(stderr, "Failed to open file at %s: ", path); 1152 1.1 christos perror(NULL); 1153 1.1 christos exit(1); 1154 1.1 christos } 1155 1.1 christos 1156 1.1 christos { size_t fsize = size; 1157 1.1 christos size_t written = 0; 1158 1.1 christos while (written < fsize) { 1159 1.1 christos written += fwrite(ip + written, 1, fsize - written, out); 1160 1.1 christos if (ferror(out)) { 1161 1.1 christos fprintf(stderr, "Failed to write to file at %s: ", path); 1162 1.1 christos perror(NULL); 1163 1.1 christos exit(1); 1164 1.1 christos } 1165 1.1 christos } 1166 1.1 christos } 1167 1.1 christos 1168 1.1 christos if (path) { 1169 1.1 christos fclose(out); 1170 1.1 christos } 1171 1.1 christos } 1172 1.1 christos 1173 1.1 christos static void initFrame(frame_t* fr) 1174 1.1 christos { 1175 1.1 christos memset(fr, 0, sizeof(*fr)); 1176 1.1 christos fr->data = fr->dataStart = FRAME_BUFFER; 1177 1.1 christos fr->dataEnd = FRAME_BUFFER + sizeof(FRAME_BUFFER); 1178 1.1 christos fr->src = fr->srcStart = CONTENT_BUFFER; 1179 1.1 christos fr->srcEnd = CONTENT_BUFFER + sizeof(CONTENT_BUFFER); 1180 1.1 christos 1181 1.1 christos /* init repeat codes */ 1182 1.1 christos fr->stats.rep[0] = 1; 1183 1.1 christos fr->stats.rep[1] = 4; 1184 1.1 christos fr->stats.rep[2] = 8; 1185 1.1 christos } 1186 1.1 christos 1187 1.1 christos /** 1188 1.1 christos * Generated a single zstd compressed block with no block/frame header. 1189 1.1 christos * Returns the final seed. 1190 1.1 christos */ 1191 1.1 christos static U32 generateCompressedBlock(U32 seed, frame_t* frame, dictInfo info) 1192 1.1 christos { 1193 1.1 christos size_t blockContentSize; 1194 1.1 christos int blockWritten = 0; 1195 1.1 christos BYTE* op; 1196 1.1 christos DISPLAYLEVEL(4, "block seed: %u\n", (unsigned)seed); 1197 1.1 christos initFrame(frame); 1198 1.1 christos op = (BYTE*)frame->data; 1199 1.1 christos 1200 1.1 christos while (!blockWritten) { 1201 1.1 christos size_t cSize; 1202 1.1 christos /* generate window size */ 1203 1.1 christos { int const exponent = RAND(&seed) % (MAX_WINDOW_LOG - 10); 1204 1.1 christos int const mantissa = RAND(&seed) % 8; 1205 1.1 christos frame->header.windowSize = (1U << (exponent + 10)); 1206 1.1 christos frame->header.windowSize += (frame->header.windowSize / 8) * mantissa; 1207 1.1 christos } 1208 1.1 christos 1209 1.1 christos /* generate content size */ 1210 1.1 christos { size_t const maxBlockSize = MIN(g_maxBlockSize, frame->header.windowSize); 1211 1.1 christos if (RAND(&seed) & 15) { 1212 1.1 christos /* some full size blocks */ 1213 1.1 christos blockContentSize = maxBlockSize; 1214 1.1 christos } else if (RAND(&seed) & 7 && g_maxBlockSize >= (1U << 7)) { 1215 1.1 christos /* some small blocks <= 128 bytes*/ 1216 1.1 christos blockContentSize = RAND(&seed) % (1U << 7); 1217 1.1 christos } else { 1218 1.1 christos /* some variable size blocks */ 1219 1.1 christos blockContentSize = RAND(&seed) % maxBlockSize; 1220 1.1 christos } 1221 1.1 christos } 1222 1.1 christos 1223 1.1 christos /* try generating a compressed block */ 1224 1.1 christos frame->oldStats = frame->stats; 1225 1.1 christos frame->data = op; 1226 1.1 christos cSize = writeCompressedBlock(&seed, frame, blockContentSize, info); 1227 1.1 christos if (cSize >= blockContentSize) { /* compressed size must be strictly smaller than decompressed size : https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#blocks */ 1228 1.1 christos /* data doesn't compress -- try again */ 1229 1.1 christos frame->stats = frame->oldStats; /* don't update the stats */ 1230 1.1 christos DISPLAYLEVEL(5, " can't compress block : try again \n"); 1231 1.1 christos } else { 1232 1.1 christos blockWritten = 1; 1233 1.1 christos DISPLAYLEVEL(4, " block size: %u \n", (unsigned)cSize); 1234 1.1 christos frame->src = (BYTE*)frame->src + blockContentSize; 1235 1.1 christos } 1236 1.1 christos } 1237 1.1 christos return seed; 1238 1.1 christos } 1239 1.1 christos 1240 1.1 christos /* Return the final seed */ 1241 1.1 christos static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info) 1242 1.1 christos { 1243 1.1 christos /* generate a complete frame */ 1244 1.1 christos DISPLAYLEVEL(3, "frame seed: %u\n", (unsigned)seed); 1245 1.1 christos initFrame(fr); 1246 1.1 christos 1247 1.1 christos writeFrameHeader(&seed, fr, info); 1248 1.1 christos writeBlocks(&seed, fr, info); 1249 1.1 christos writeChecksum(fr); 1250 1.1 christos 1251 1.1 christos return seed; 1252 1.1 christos } 1253 1.1 christos 1254 1.1 christos /*_******************************************************* 1255 1.1 christos * Dictionary Helper Functions 1256 1.1 christos *********************************************************/ 1257 1.1 christos /* returns 0 if successful, otherwise returns 1 upon error */ 1258 1.1 christos static int genRandomDict(U32 dictID, U32 seed, size_t dictSize, BYTE* fullDict) 1259 1.1 christos { 1260 1.1 christos /* allocate space for samples */ 1261 1.1 christos int ret = 0; 1262 1.1 christos unsigned const numSamples = 4; 1263 1.1 christos size_t sampleSizes[4]; 1264 1.1 christos BYTE* const samples = malloc(5000*sizeof(BYTE)); 1265 1.1 christos if (samples == NULL) { 1266 1.1 christos DISPLAY("Error: could not allocate space for samples\n"); 1267 1.1 christos return 1; 1268 1.1 christos } 1269 1.1 christos 1270 1.1 christos /* generate samples */ 1271 1.1 christos { unsigned literalValue = 1; 1272 1.1 christos unsigned samplesPos = 0; 1273 1.1 christos size_t currSize = 1; 1274 1.1 christos while (literalValue <= 4) { 1275 1.1 christos sampleSizes[literalValue - 1] = currSize; 1276 1.1 christos { size_t k; 1277 1.1 christos for (k = 0; k < currSize; k++) { 1278 1.1 christos *(samples + (samplesPos++)) = (BYTE)literalValue; 1279 1.1 christos } } 1280 1.1 christos literalValue++; 1281 1.1 christos currSize *= 16; 1282 1.1 christos } } 1283 1.1 christos 1284 1.1 christos { size_t dictWriteSize = 0; 1285 1.1 christos ZDICT_params_t zdictParams; 1286 1.1 christos size_t const headerSize = MAX(dictSize/4, 256); 1287 1.1 christos size_t const dictContentSize = dictSize - headerSize; 1288 1.1 christos BYTE* const dictContent = fullDict + headerSize; 1289 1.1 christos if (dictContentSize < ZDICT_CONTENTSIZE_MIN || dictSize < ZDICT_DICTSIZE_MIN) { 1290 1.1 christos DISPLAY("Error: dictionary size is too small\n"); 1291 1.1 christos ret = 1; 1292 1.1 christos goto exitGenRandomDict; 1293 1.1 christos } 1294 1.1 christos 1295 1.1 christos /* init dictionary params */ 1296 1.1 christos memset(&zdictParams, 0, sizeof(zdictParams)); 1297 1.1 christos zdictParams.dictID = dictID; 1298 1.1 christos zdictParams.notificationLevel = 1; 1299 1.1 christos 1300 1.1 christos /* fill in dictionary content */ 1301 1.1 christos RAND_buffer(&seed, (void*)dictContent, dictContentSize); 1302 1.1 christos 1303 1.1 christos /* finalize dictionary with random samples */ 1304 1.1 christos dictWriteSize = ZDICT_finalizeDictionary(fullDict, dictSize, 1305 1.1 christos dictContent, dictContentSize, 1306 1.1 christos samples, sampleSizes, numSamples, 1307 1.1 christos zdictParams); 1308 1.1 christos 1309 1.1 christos if (ZDICT_isError(dictWriteSize)) { 1310 1.1 christos DISPLAY("Could not finalize dictionary: %s\n", ZDICT_getErrorName(dictWriteSize)); 1311 1.1 christos ret = 1; 1312 1.1 christos } 1313 1.1 christos } 1314 1.1 christos 1315 1.1 christos exitGenRandomDict: 1316 1.1 christos free(samples); 1317 1.1 christos return ret; 1318 1.1 christos } 1319 1.1 christos 1320 1.1 christos static dictInfo initDictInfo(int useDict, size_t dictContentSize, BYTE* dictContent, U32 dictID){ 1321 1.1 christos /* allocate space statically */ 1322 1.1 christos dictInfo dictOp; 1323 1.1 christos memset(&dictOp, 0, sizeof(dictOp)); 1324 1.1 christos dictOp.useDict = useDict; 1325 1.1 christos dictOp.dictContentSize = dictContentSize; 1326 1.1 christos dictOp.dictContent = dictContent; 1327 1.1 christos dictOp.dictID = dictID; 1328 1.1 christos return dictOp; 1329 1.1 christos } 1330 1.1 christos 1331 1.1 christos /*-******************************************************* 1332 1.1 christos * Test Mode 1333 1.1 christos *********************************************************/ 1334 1.1 christos 1335 1.1 christos BYTE DECOMPRESSED_BUFFER[MAX_DECOMPRESSED_SIZE]; 1336 1.1 christos 1337 1.1 christos static size_t testDecodeSimple(frame_t* fr) 1338 1.1 christos { 1339 1.1 christos /* test decoding the generated data with the simple API */ 1340 1.1 christos size_t const ret = ZSTD_decompress(DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, 1341 1.1 christos fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart); 1342 1.1 christos 1343 1.1 christos if (ZSTD_isError(ret)) return ret; 1344 1.1 christos 1345 1.1 christos if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart, 1346 1.1 christos (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) { 1347 1.1 christos return ERROR(corruption_detected); 1348 1.1 christos } 1349 1.1 christos 1350 1.1 christos return ret; 1351 1.1 christos } 1352 1.1 christos 1353 1.1 christos static size_t testDecodeStreaming(frame_t* fr) 1354 1.1 christos { 1355 1.1 christos /* test decoding the generated data with the streaming API */ 1356 1.1 christos ZSTD_DStream* zd = ZSTD_createDStream(); 1357 1.1 christos ZSTD_inBuffer in; 1358 1.1 christos ZSTD_outBuffer out; 1359 1.1 christos size_t ret; 1360 1.1 christos 1361 1.1 christos if (!zd) return ERROR(memory_allocation); 1362 1.1 christos 1363 1.1 christos in.src = fr->dataStart; 1364 1.1 christos in.pos = 0; 1365 1.1 christos in.size = (BYTE*)fr->data - (BYTE*)fr->dataStart; 1366 1.1 christos 1367 1.1 christos out.dst = DECOMPRESSED_BUFFER; 1368 1.1 christos out.pos = 0; 1369 1.1 christos out.size = ZSTD_DStreamOutSize(); 1370 1.1 christos 1371 1.1 christos ZSTD_initDStream(zd); 1372 1.1 christos while (1) { 1373 1.1 christos ret = ZSTD_decompressStream(zd, &out, &in); 1374 1.1 christos if (ZSTD_isError(ret)) goto cleanup; /* error */ 1375 1.1 christos if (ret == 0) break; /* frame is done */ 1376 1.1 christos 1377 1.1 christos /* force decoding to be done in chunks */ 1378 1.1 christos out.size += MIN(ZSTD_DStreamOutSize(), MAX_DECOMPRESSED_SIZE - out.size); 1379 1.1 christos } 1380 1.1 christos 1381 1.1 christos ret = out.pos; 1382 1.1 christos 1383 1.1 christos if (memcmp(out.dst, fr->srcStart, out.pos) != 0) { 1384 1.1 christos return ERROR(corruption_detected); 1385 1.1 christos } 1386 1.1 christos 1387 1.1 christos cleanup: 1388 1.1 christos ZSTD_freeDStream(zd); 1389 1.1 christos return ret; 1390 1.1 christos } 1391 1.1 christos 1392 1.1 christos static size_t testDecodeWithDict(U32 seed, genType_e genType) 1393 1.1 christos { 1394 1.1 christos /* create variables */ 1395 1.1 christos size_t const dictSize = RAND(&seed) % (10 << 20) + ZDICT_DICTSIZE_MIN + ZDICT_CONTENTSIZE_MIN; 1396 1.1 christos U32 const dictID = RAND(&seed); 1397 1.1 christos size_t errorDetected = 0; 1398 1.1 christos BYTE* const fullDict = malloc(dictSize); 1399 1.1 christos if (fullDict == NULL) { 1400 1.1 christos return ERROR(GENERIC); 1401 1.1 christos } 1402 1.1 christos 1403 1.1 christos /* generate random dictionary */ 1404 1.1 christos if (genRandomDict(dictID, seed, dictSize, fullDict)) { /* return 0 on success */ 1405 1.1 christos errorDetected = ERROR(GENERIC); 1406 1.1 christos goto dictTestCleanup; 1407 1.1 christos } 1408 1.1 christos 1409 1.1 christos 1410 1.1 christos { frame_t fr; 1411 1.1 christos dictInfo info; 1412 1.1 christos ZSTD_DCtx* const dctx = ZSTD_createDCtx(); 1413 1.1 christos size_t ret; 1414 1.1 christos 1415 1.1 christos /* get dict info */ 1416 1.1 christos { size_t const headerSize = MAX(dictSize/4, 256); 1417 1.1 christos size_t const dictContentSize = dictSize-headerSize; 1418 1.1 christos BYTE* const dictContent = fullDict+headerSize; 1419 1.1 christos info = initDictInfo(1, dictContentSize, dictContent, dictID); 1420 1.1 christos } 1421 1.1 christos 1422 1.1 christos /* manually decompress and check difference */ 1423 1.1 christos if (genType == gt_frame) { 1424 1.1 christos /* Test frame */ 1425 1.1 christos generateFrame(seed, &fr, info); 1426 1.1 christos ret = ZSTD_decompress_usingDict(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, 1427 1.1 christos fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, 1428 1.1 christos fullDict, dictSize); 1429 1.1 christos } else { 1430 1.1 christos /* Test block */ 1431 1.1 christos generateCompressedBlock(seed, &fr, info); 1432 1.1 christos ret = ZSTD_decompressBegin_usingDict(dctx, fullDict, dictSize); 1433 1.1 christos if (ZSTD_isError(ret)) { 1434 1.1 christos errorDetected = ret; 1435 1.1 christos ZSTD_freeDCtx(dctx); 1436 1.1 christos goto dictTestCleanup; 1437 1.1 christos } 1438 1.1 christos ret = ZSTD_decompressBlock_deprecated(dctx, DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, 1439 1.1 christos fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart); 1440 1.1 christos } 1441 1.1 christos ZSTD_freeDCtx(dctx); 1442 1.1 christos 1443 1.1 christos if (ZSTD_isError(ret)) { 1444 1.1 christos errorDetected = ret; 1445 1.1 christos goto dictTestCleanup; 1446 1.1 christos } 1447 1.1 christos 1448 1.1 christos if (memcmp(DECOMPRESSED_BUFFER, fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart) != 0) { 1449 1.1 christos errorDetected = ERROR(corruption_detected); 1450 1.1 christos goto dictTestCleanup; 1451 1.1 christos } 1452 1.1 christos } 1453 1.1 christos 1454 1.1 christos dictTestCleanup: 1455 1.1 christos free(fullDict); 1456 1.1 christos return errorDetected; 1457 1.1 christos } 1458 1.1 christos 1459 1.1 christos static size_t testDecodeRawBlock(frame_t* fr) 1460 1.1 christos { 1461 1.1 christos ZSTD_DCtx* dctx = ZSTD_createDCtx(); 1462 1.1 christos size_t ret = ZSTD_decompressBegin(dctx); 1463 1.1 christos if (ZSTD_isError(ret)) return ret; 1464 1.1 christos 1465 1.1 christos ret = ZSTD_decompressBlock_deprecated( 1466 1.1 christos dctx, 1467 1.1 christos DECOMPRESSED_BUFFER, MAX_DECOMPRESSED_SIZE, 1468 1.1 christos fr->dataStart, (BYTE*)fr->data - (BYTE*)fr->dataStart); 1469 1.1 christos ZSTD_freeDCtx(dctx); 1470 1.1 christos if (ZSTD_isError(ret)) return ret; 1471 1.1 christos 1472 1.1 christos if (memcmp(DECOMPRESSED_BUFFER, fr->srcStart, 1473 1.1 christos (BYTE*)fr->src - (BYTE*)fr->srcStart) != 0) { 1474 1.1 christos return ERROR(corruption_detected); 1475 1.1 christos } 1476 1.1 christos 1477 1.1 christos return ret; 1478 1.1 christos } 1479 1.1 christos 1480 1.1 christos static int runBlockTest(U32* seed) 1481 1.1 christos { 1482 1.1 christos frame_t fr; 1483 1.1 christos U32 const seedCopy = *seed; 1484 1.1 christos { dictInfo const info = initDictInfo(0, 0, NULL, 0); 1485 1.1 christos *seed = generateCompressedBlock(*seed, &fr, info); 1486 1.1 christos } 1487 1.1 christos 1488 1.1 christos { size_t const r = testDecodeRawBlock(&fr); 1489 1.1 christos if (ZSTD_isError(r)) { 1490 1.1 christos DISPLAY("Error in block mode on test seed %u: %s\n", 1491 1.1 christos (unsigned)seedCopy, ZSTD_getErrorName(r)); 1492 1.1 christos return 1; 1493 1.1 christos } 1494 1.1 christos } 1495 1.1 christos 1496 1.1 christos { size_t const r = testDecodeWithDict(*seed, gt_block); 1497 1.1 christos if (ZSTD_isError(r)) { 1498 1.1 christos DISPLAY("Error in block mode with dictionary on test seed %u: %s\n", 1499 1.1 christos (unsigned)seedCopy, ZSTD_getErrorName(r)); 1500 1.1 christos return 1; 1501 1.1 christos } 1502 1.1 christos } 1503 1.1 christos return 0; 1504 1.1 christos } 1505 1.1 christos 1506 1.1 christos static int runFrameTest(U32* seed) 1507 1.1 christos { 1508 1.1 christos frame_t fr; 1509 1.1 christos U32 const seedCopy = *seed; 1510 1.1 christos { dictInfo const info = initDictInfo(0, 0, NULL, 0); 1511 1.1 christos *seed = generateFrame(*seed, &fr, info); 1512 1.1 christos } 1513 1.1 christos 1514 1.1 christos { size_t const r = testDecodeSimple(&fr); 1515 1.1 christos if (ZSTD_isError(r)) { 1516 1.1 christos DISPLAY("Error in simple mode on test seed %u: %s\n", 1517 1.1 christos (unsigned)seedCopy, ZSTD_getErrorName(r)); 1518 1.1 christos return 1; 1519 1.1 christos } 1520 1.1 christos } 1521 1.1 christos { size_t const r = testDecodeStreaming(&fr); 1522 1.1 christos if (ZSTD_isError(r)) { 1523 1.1 christos DISPLAY("Error in streaming mode on test seed %u: %s\n", 1524 1.1 christos (unsigned)seedCopy, ZSTD_getErrorName(r)); 1525 1.1 christos return 1; 1526 1.1 christos } 1527 1.1 christos } 1528 1.1 christos { size_t const r = testDecodeWithDict(*seed, gt_frame); /* avoid big dictionaries */ 1529 1.1 christos if (ZSTD_isError(r)) { 1530 1.1 christos DISPLAY("Error in dictionary mode on test seed %u: %s\n", 1531 1.1 christos (unsigned)seedCopy, ZSTD_getErrorName(r)); 1532 1.1 christos return 1; 1533 1.1 christos } 1534 1.1 christos } 1535 1.1 christos return 0; 1536 1.1 christos } 1537 1.1 christos 1538 1.1 christos static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS, 1539 1.1 christos genType_e genType) 1540 1.1 christos { 1541 1.1 christos unsigned fnum; 1542 1.1 christos 1543 1.1 christos UTIL_time_t const startClock = UTIL_getTime(); 1544 1.1 christos U64 const maxClockSpan = testDurationS * SEC_TO_MICRO; 1545 1.1 christos 1546 1.1 christos if (numFiles == 0 && !testDurationS) numFiles = 1; 1547 1.1 christos 1548 1.1 christos DISPLAY("seed: %u\n", (unsigned)seed); 1549 1.1 christos 1550 1.1 christos for (fnum = 0; fnum < numFiles || UTIL_clockSpanMicro(startClock) < maxClockSpan; fnum++) { 1551 1.1 christos if (fnum < numFiles) 1552 1.1 christos DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); 1553 1.1 christos else 1554 1.1 christos DISPLAYUPDATE("\r%u ", fnum); 1555 1.1 christos 1556 1.1 christos { int const ret = (genType == gt_frame) ? 1557 1.1 christos runFrameTest(&seed) : 1558 1.1 christos runBlockTest(&seed); 1559 1.1 christos if (ret) return ret; 1560 1.1 christos } 1561 1.1 christos } 1562 1.1 christos 1563 1.1 christos DISPLAY("\r%u tests completed: ", fnum); 1564 1.1 christos DISPLAY("OK\n"); 1565 1.1 christos 1566 1.1 christos return 0; 1567 1.1 christos } 1568 1.1 christos 1569 1.1 christos /*-******************************************************* 1570 1.1 christos * File I/O 1571 1.1 christos *********************************************************/ 1572 1.1 christos 1573 1.1 christos static int generateFile(U32 seed, const char* const path, 1574 1.1 christos const char* const origPath, genType_e genType) 1575 1.1 christos { 1576 1.1 christos frame_t fr; 1577 1.1 christos 1578 1.1 christos DISPLAY("seed: %u\n", (unsigned)seed); 1579 1.1 christos 1580 1.1 christos { dictInfo const info = initDictInfo(0, 0, NULL, 0); 1581 1.1 christos if (genType == gt_frame) { 1582 1.1 christos generateFrame(seed, &fr, info); 1583 1.1 christos } else { 1584 1.1 christos generateCompressedBlock(seed, &fr, info); 1585 1.1 christos } 1586 1.1 christos } 1587 1.1 christos outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path); 1588 1.1 christos if (origPath) { 1589 1.1 christos outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath); 1590 1.1 christos } 1591 1.1 christos return 0; 1592 1.1 christos } 1593 1.1 christos 1594 1.1 christos static int generateCorpus(U32 seed, unsigned numFiles, const char* const path, 1595 1.1 christos const char* const origPath, genType_e genType) 1596 1.1 christos { 1597 1.1 christos char outPath[MAX_PATH]; 1598 1.1 christos unsigned fnum; 1599 1.1 christos 1600 1.1 christos DISPLAY("seed: %u\n", (unsigned)seed); 1601 1.1 christos 1602 1.1 christos for (fnum = 0; fnum < numFiles; fnum++) { 1603 1.1 christos frame_t fr; 1604 1.1 christos 1605 1.1 christos DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); 1606 1.1 christos 1607 1.1 christos { dictInfo const info = initDictInfo(0, 0, NULL, 0); 1608 1.1 christos if (genType == gt_frame) { 1609 1.1 christos seed = generateFrame(seed, &fr, info); 1610 1.1 christos } else { 1611 1.1 christos seed = generateCompressedBlock(seed, &fr, info); 1612 1.1 christos } 1613 1.1 christos } 1614 1.1 christos 1615 1.1 christos if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { 1616 1.1 christos DISPLAY("Error: path too long\n"); 1617 1.1 christos return 1; 1618 1.1 christos } 1619 1.1 christos outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath); 1620 1.1 christos 1621 1.1 christos if (origPath) { 1622 1.1 christos if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) { 1623 1.1 christos DISPLAY("Error: path too long\n"); 1624 1.1 christos return 1; 1625 1.1 christos } 1626 1.1 christos outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath); 1627 1.1 christos } 1628 1.1 christos } 1629 1.1 christos 1630 1.1 christos DISPLAY("\r%u/%u \n", fnum, numFiles); 1631 1.1 christos 1632 1.1 christos return 0; 1633 1.1 christos } 1634 1.1 christos 1635 1.1 christos static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path, 1636 1.1 christos const char* const origPath, const size_t dictSize, 1637 1.1 christos genType_e genType) 1638 1.1 christos { 1639 1.1 christos char outPath[MAX_PATH]; 1640 1.1 christos BYTE* fullDict; 1641 1.1 christos U32 const dictID = RAND(&seed); 1642 1.1 christos int errorDetected = 0; 1643 1.1 christos 1644 1.1 christos if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) { 1645 1.1 christos DISPLAY("Error: path too long\n"); 1646 1.1 christos return 1; 1647 1.1 christos } 1648 1.1 christos 1649 1.1 christos /* allocate space for the dictionary */ 1650 1.1 christos fullDict = malloc(dictSize); 1651 1.1 christos if (fullDict == NULL) { 1652 1.1 christos DISPLAY("Error: could not allocate space for full dictionary.\n"); 1653 1.1 christos return 1; 1654 1.1 christos } 1655 1.1 christos 1656 1.1 christos /* randomly generate the dictionary */ 1657 1.1 christos { int const ret = genRandomDict(dictID, seed, dictSize, fullDict); 1658 1.1 christos if (ret != 0) { 1659 1.1 christos errorDetected = ret; 1660 1.1 christos goto dictCleanup; 1661 1.1 christos } 1662 1.1 christos } 1663 1.1 christos 1664 1.1 christos /* write out dictionary */ 1665 1.1 christos if (numFiles != 0) { 1666 1.1 christos if (snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) { 1667 1.1 christos DISPLAY("Error: dictionary path too long\n"); 1668 1.1 christos errorDetected = 1; 1669 1.1 christos goto dictCleanup; 1670 1.1 christos } 1671 1.1 christos outputBuffer(fullDict, dictSize, outPath); 1672 1.1 christos } 1673 1.1 christos else { 1674 1.1 christos outputBuffer(fullDict, dictSize, "dictionary"); 1675 1.1 christos } 1676 1.1 christos 1677 1.1 christos /* generate random compressed/decompressed files */ 1678 1.1 christos { unsigned fnum; 1679 1.1 christos for (fnum = 0; fnum < MAX(numFiles, 1); fnum++) { 1680 1.1 christos frame_t fr; 1681 1.1 christos DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); 1682 1.1 christos { 1683 1.1 christos size_t const headerSize = MAX(dictSize/4, 256); 1684 1.1 christos size_t const dictContentSize = dictSize-headerSize; 1685 1.1 christos BYTE* const dictContent = fullDict+headerSize; 1686 1.1 christos dictInfo const info = initDictInfo(1, dictContentSize, dictContent, dictID); 1687 1.1 christos if (genType == gt_frame) { 1688 1.1 christos seed = generateFrame(seed, &fr, info); 1689 1.1 christos } else { 1690 1.1 christos seed = generateCompressedBlock(seed, &fr, info); 1691 1.1 christos } 1692 1.1 christos } 1693 1.1 christos 1694 1.1 christos if (numFiles != 0) { 1695 1.1 christos if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { 1696 1.1 christos DISPLAY("Error: path too long\n"); 1697 1.1 christos errorDetected = 1; 1698 1.1 christos goto dictCleanup; 1699 1.1 christos } 1700 1.1 christos outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath); 1701 1.1 christos 1702 1.1 christos if (origPath) { 1703 1.1 christos if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) { 1704 1.1 christos DISPLAY("Error: path too long\n"); 1705 1.1 christos errorDetected = 1; 1706 1.1 christos goto dictCleanup; 1707 1.1 christos } 1708 1.1 christos outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath); 1709 1.1 christos } 1710 1.1 christos } 1711 1.1 christos else { 1712 1.1 christos outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path); 1713 1.1 christos if (origPath) { 1714 1.1 christos outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, origPath); 1715 1.1 christos } 1716 1.1 christos } 1717 1.1 christos } 1718 1.1 christos } 1719 1.1 christos 1720 1.1 christos dictCleanup: 1721 1.1 christos free(fullDict); 1722 1.1 christos return errorDetected; 1723 1.1 christos } 1724 1.1 christos 1725 1.1 christos 1726 1.1 christos /*_******************************************************* 1727 1.1 christos * Command line 1728 1.1 christos *********************************************************/ 1729 1.1 christos static U32 makeSeed(void) 1730 1.1 christos { 1731 1.1 christos U32 t = (U32) time(NULL); 1732 1.1 christos return XXH32(&t, sizeof(t), 0) % 65536; 1733 1.1 christos } 1734 1.1 christos 1735 1.1 christos static unsigned readInt(const char** argument) 1736 1.1 christos { 1737 1.1 christos unsigned val = 0; 1738 1.1 christos while ((**argument>='0') && (**argument<='9')) { 1739 1.1 christos val *= 10; 1740 1.1 christos val += **argument - '0'; 1741 1.1 christos (*argument)++; 1742 1.1 christos } 1743 1.1 christos return val; 1744 1.1 christos } 1745 1.1 christos 1746 1.1 christos static void usage(const char* programName) 1747 1.1 christos { 1748 1.1 christos DISPLAY( "Usage :\n"); 1749 1.1 christos DISPLAY( " %s [args]\n", programName); 1750 1.1 christos DISPLAY( "\n"); 1751 1.1 christos DISPLAY( "Arguments :\n"); 1752 1.1 christos DISPLAY( " -p<path> : select output path (default:stdout)\n"); 1753 1.1 christos DISPLAY( " in multiple files mode this should be a directory\n"); 1754 1.1 christos DISPLAY( " -o<path> : select path to output original file (default:no output)\n"); 1755 1.1 christos DISPLAY( " in multiple files mode this should be a directory\n"); 1756 1.1 christos DISPLAY( " -s# : select seed (default:random based on time)\n"); 1757 1.1 christos DISPLAY( " -n# : number of files to generate (default:1)\n"); 1758 1.1 christos DISPLAY( " -t : activate test mode (test files against libzstd instead of outputting them)\n"); 1759 1.1 christos DISPLAY( " -T# : length of time to run tests for\n"); 1760 1.1 christos DISPLAY( " -v : increase verbosity level (default:0, max:7)\n"); 1761 1.1 christos DISPLAY( " -h/H : display help/long help and exit\n"); 1762 1.1 christos } 1763 1.1 christos 1764 1.1 christos static void advancedUsage(const char* programName) 1765 1.1 christos { 1766 1.1 christos usage(programName); 1767 1.1 christos DISPLAY( "\n"); 1768 1.1 christos DISPLAY( "Advanced arguments :\n"); 1769 1.1 christos DISPLAY( " --content-size : always include the content size in the frame header\n"); 1770 1.1 christos DISPLAY( " --use-dict=# : include a dictionary used to decompress the corpus\n"); 1771 1.1 christos DISPLAY( " --gen-blocks : generate raw compressed blocks without block/frame headers\n"); 1772 1.1 christos DISPLAY( " --max-block-size-log=# : max block size log, must be in range [2, 17]\n"); 1773 1.1 christos DISPLAY( " --max-content-size-log=# : max content size log, must be <= 20\n"); 1774 1.1 christos DISPLAY( " (this is ignored with gen-blocks)\n"); 1775 1.1 christos } 1776 1.1 christos 1777 1.1 christos /*! readU32FromChar() : 1778 1.1 christos @return : unsigned integer value read from input in `char` format 1779 1.1 christos allows and interprets K, KB, KiB, M, MB and MiB suffix. 1780 1.1 christos Will also modify `*stringPtr`, advancing it to position where it stopped reading. 1781 1.1 christos Note : function result can overflow if digit string > MAX_UINT */ 1782 1.1 christos static unsigned readU32FromChar(const char** stringPtr) 1783 1.1 christos { 1784 1.1 christos unsigned result = 0; 1785 1.1 christos while ((**stringPtr >='0') && (**stringPtr <='9')) 1786 1.1 christos result *= 10, result += **stringPtr - '0', (*stringPtr)++ ; 1787 1.1 christos if ((**stringPtr=='K') || (**stringPtr=='M')) { 1788 1.1 christos result <<= 10; 1789 1.1 christos if (**stringPtr=='M') result <<= 10; 1790 1.1 christos (*stringPtr)++ ; 1791 1.1 christos if (**stringPtr=='i') (*stringPtr)++; 1792 1.1 christos if (**stringPtr=='B') (*stringPtr)++; 1793 1.1 christos } 1794 1.1 christos return result; 1795 1.1 christos } 1796 1.1 christos 1797 1.1 christos /** longCommandWArg() : 1798 1.1 christos * check if *stringPtr is the same as longCommand. 1799 1.1 christos * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. 1800 1.1 christos * @return 0 and doesn't modify *stringPtr otherwise. 1801 1.1 christos */ 1802 1.1 christos static unsigned longCommandWArg(const char** stringPtr, const char* longCommand) 1803 1.1 christos { 1804 1.1 christos size_t const comSize = strlen(longCommand); 1805 1.1 christos int const result = !strncmp(*stringPtr, longCommand, comSize); 1806 1.1 christos if (result) *stringPtr += comSize; 1807 1.1 christos return result; 1808 1.1 christos } 1809 1.1 christos 1810 1.1 christos int main(int argc, char** argv) 1811 1.1 christos { 1812 1.1 christos U32 seed = 0; 1813 1.1 christos int seedset = 0; 1814 1.1 christos unsigned numFiles = 0; 1815 1.1 christos unsigned testDuration = 0; 1816 1.1 christos int testMode = 0; 1817 1.1 christos const char* path = NULL; 1818 1.1 christos const char* origPath = NULL; 1819 1.1 christos int useDict = 0; 1820 1.1 christos unsigned dictSize = (10 << 10); /* 10 kB default */ 1821 1.1 christos genType_e genType = gt_frame; 1822 1.1 christos 1823 1.1 christos int argNb; 1824 1.1 christos 1825 1.1 christos /* Check command line */ 1826 1.1 christos for (argNb=1; argNb<argc; argNb++) { 1827 1.1 christos const char* argument = argv[argNb]; 1828 1.1 christos if(!argument) continue; /* Protection if argument empty */ 1829 1.1 christos 1830 1.1 christos /* Handle commands. Aggregated commands are allowed */ 1831 1.1 christos if (argument[0]=='-') { 1832 1.1 christos argument++; 1833 1.1 christos while (*argument!=0) { 1834 1.1 christos switch(*argument) 1835 1.1 christos { 1836 1.1 christos case 'h': 1837 1.1 christos usage(argv[0]); 1838 1.1 christos return 0; 1839 1.1 christos case 'H': 1840 1.1 christos advancedUsage(argv[0]); 1841 1.1 christos return 0; 1842 1.1 christos case 'v': 1843 1.1 christos argument++; 1844 1.1 christos g_displayLevel++; 1845 1.1 christos break; 1846 1.1 christos case 's': 1847 1.1 christos argument++; 1848 1.1 christos seedset=1; 1849 1.1 christos seed = readInt(&argument); 1850 1.1 christos break; 1851 1.1 christos case 'n': 1852 1.1 christos argument++; 1853 1.1 christos numFiles = readInt(&argument); 1854 1.1 christos break; 1855 1.1 christos case 'T': 1856 1.1 christos argument++; 1857 1.1 christos testDuration = readInt(&argument); 1858 1.1 christos if (*argument == 'm') { 1859 1.1 christos testDuration *= 60; 1860 1.1 christos argument++; 1861 1.1 christos if (*argument == 'n') argument++; 1862 1.1 christos } 1863 1.1 christos break; 1864 1.1 christos case 'o': 1865 1.1 christos argument++; 1866 1.1 christos origPath = argument; 1867 1.1 christos argument += strlen(argument); 1868 1.1 christos break; 1869 1.1 christos case 'p': 1870 1.1 christos argument++; 1871 1.1 christos path = argument; 1872 1.1 christos argument += strlen(argument); 1873 1.1 christos break; 1874 1.1 christos case 't': 1875 1.1 christos argument++; 1876 1.1 christos testMode = 1; 1877 1.1 christos break; 1878 1.1 christos case '-': 1879 1.1 christos argument++; 1880 1.1 christos if (strcmp(argument, "content-size") == 0) { 1881 1.1 christos opts.contentSize = 1; 1882 1.1 christos } else if (longCommandWArg(&argument, "use-dict=")) { 1883 1.1 christos dictSize = readU32FromChar(&argument); 1884 1.1 christos useDict = 1; 1885 1.1 christos } else if (strcmp(argument, "gen-blocks") == 0) { 1886 1.1 christos genType = gt_block; 1887 1.1 christos } else if (longCommandWArg(&argument, "max-block-size-log=")) { 1888 1.1 christos U32 value = readU32FromChar(&argument); 1889 1.1 christos if (value >= 2 && value <= ZSTD_BLOCKSIZE_MAX) { 1890 1.1 christos g_maxBlockSize = 1U << value; 1891 1.1 christos } 1892 1.1 christos } else if (longCommandWArg(&argument, "max-content-size-log=")) { 1893 1.1 christos U32 value = readU32FromChar(&argument); 1894 1.1 christos g_maxDecompressedSizeLog = 1895 1.1 christos MIN(MAX_DECOMPRESSED_SIZE_LOG, value); 1896 1.1 christos } else { 1897 1.1 christos advancedUsage(argv[0]); 1898 1.1 christos return 1; 1899 1.1 christos } 1900 1.1 christos argument += strlen(argument); 1901 1.1 christos break; 1902 1.1 christos default: 1903 1.1 christos usage(argv[0]); 1904 1.1 christos return 1; 1905 1.1 christos } } } } /* for (argNb=1; argNb<argc; argNb++) */ 1906 1.1 christos 1907 1.1 christos if (!seedset) { 1908 1.1 christos seed = makeSeed(); 1909 1.1 christos } 1910 1.1 christos 1911 1.1 christos if (testMode) { 1912 1.1 christos return runTestMode(seed, numFiles, testDuration, genType); 1913 1.1 christos } else { 1914 1.1 christos if (testDuration) { 1915 1.1 christos DISPLAY("Error: -T requires test mode (-t)\n\n"); 1916 1.1 christos usage(argv[0]); 1917 1.1 christos return 1; 1918 1.1 christos } 1919 1.1 christos } 1920 1.1 christos 1921 1.1 christos if (!path) { 1922 1.1 christos DISPLAY("Error: path is required in file generation mode\n"); 1923 1.1 christos usage(argv[0]); 1924 1.1 christos return 1; 1925 1.1 christos } 1926 1.1 christos 1927 1.1 christos if (numFiles == 0 && useDict == 0) { 1928 1.1 christos return generateFile(seed, path, origPath, genType); 1929 1.1 christos } else if (useDict == 0){ 1930 1.1 christos return generateCorpus(seed, numFiles, path, origPath, genType); 1931 1.1 christos } else { 1932 1.1 christos /* should generate files with a dictionary */ 1933 1.1 christos return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize, genType); 1934 1.1 christos } 1935 1.1 christos 1936 1.1 christos } 1937