Home | History | Annotate | Line # | Download | only in zfs
lz4.c revision 1.2
      1 /*
      2  * LZ4 - Fast LZ compression algorithm
      3  * Header File
      4  * Copyright (C) 2011-2013, Yann Collet.
      5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions are
      9  * met:
     10  *
     11  *     * Redistributions of source code must retain the above copyright
     12  * notice, this list of conditions and the following disclaimer.
     13  *     * Redistributions in binary form must reproduce the above
     14  * copyright notice, this list of conditions and the following disclaimer
     15  * in the documentation and/or other materials provided with the
     16  * distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  *
     30  * You can contact the author at :
     31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
     32  * - LZ4 source repository : http://code.google.com/p/lz4/
     33  */
     34 
     35 #include <sys/zfs_context.h>
     36 
     37 static int real_LZ4_compress(const char *source, char *dest, int isize,
     38     int osize);
     39 static int LZ4_compressBound(int isize);
     40 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
     41     int isize, int maxOutputSize);
     42 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
     43     int isize, int osize);
     44 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
     45     int isize, int osize);
     46 
     47 #ifdef __FreeBSD__
     48 static kmem_cache_t *lz4_ctx_cache;
     49 #endif
     50 
     51 /*ARGSUSED*/
     52 size_t
     53 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
     54 {
     55 	uint32_t bufsiz;
     56 	char *dest = d_start;
     57 
     58 	ASSERT(d_len >= sizeof (bufsiz));
     59 
     60 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
     61 	    d_len - sizeof (bufsiz));
     62 
     63 	/* Signal an error if the compression routine returned zero. */
     64 	if (bufsiz == 0)
     65 		return (s_len);
     66 
     67 	/*
     68 	 * Encode the compresed buffer size at the start. We'll need this in
     69 	 * decompression to counter the effects of padding which might be
     70 	 * added to the compressed buffer and which, if unhandled, would
     71 	 * confuse the hell out of our decompression function.
     72 	 */
     73 	*(uint32_t *)dest = BE_32(bufsiz);
     74 
     75 	return (bufsiz + sizeof (bufsiz));
     76 }
     77 
     78 /*ARGSUSED*/
     79 int
     80 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
     81 {
     82 	const char *src = s_start;
     83 	uint32_t bufsiz = BE_IN32(src);
     84 
     85 	/* invalid compressed buffer size encoded at start */
     86 	if (bufsiz + sizeof (bufsiz) > s_len)
     87 		return (1);
     88 
     89 	/*
     90 	 * Returns 0 on success (decompression function returned non-negative)
     91 	 * and non-zero on failure (decompression function returned negative.
     92 	 */
     93 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
     94 	    d_start, bufsiz, d_len) < 0);
     95 }
     96 
     97 /*
     98  * LZ4 API Description:
     99  *
    100  * Simple Functions:
    101  * real_LZ4_compress() :
    102  * 	isize  : is the input size. Max supported value is ~1.9GB
    103  * 	return : the number of bytes written in buffer dest
    104  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
    105  * 	note : destination buffer must be already allocated.
    106  * 		destination buffer must be sized to handle worst cases
    107  * 		situations (input data not compressible) worst case size
    108  * 		evaluation is provided by function LZ4_compressBound().
    109  *
    110  * Advanced Functions
    111  *
    112  * LZ4_compressBound() :
    113  * 	Provides the maximum size that LZ4 may output in a "worst case"
    114  * 	scenario (input data not compressible) primarily useful for memory
    115  * 	allocation of output buffer.
    116  *
    117  * 	isize  : is the input size. Max supported value is ~1.9GB
    118  * 	return : maximum output size in a "worst case" scenario
    119  * 	note : this function is limited by "int" range (2^31-1)
    120  *
    121  * LZ4_uncompress_unknownOutputSize() :
    122  * 	isize  : is the input size, therefore the compressed size
    123  * 	maxOutputSize : is the size of the destination buffer (which must be
    124  * 		already allocated)
    125  * 	return : the number of bytes decoded in the destination buffer
    126  * 		(necessarily <= maxOutputSize). If the source stream is
    127  * 		malformed, the function will stop decoding and return a
    128  * 		negative result, indicating the byte position of the faulty
    129  * 		instruction. This function never writes beyond dest +
    130  * 		maxOutputSize, and is therefore protected against malicious
    131  * 		data packets.
    132  * 	note   : Destination buffer must be already allocated.
    133  *
    134  * LZ4_compressCtx() :
    135  * 	This function explicitly handles the CTX memory structure.
    136  *
    137  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
    138  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
    139  * 	isn't valid.
    140  *
    141  * LZ4_compress64kCtx() :
    142  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
    143  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
    144  *
    145  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
    146  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
    147  * 	isn't valid.
    148  */
    149 
    150 /*
    151  * Tuning parameters
    152  */
    153 
    154 /*
    155  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
    156  *	 Lowering this value reduces memory usage. Reduced memory usage
    157  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
    158  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
    159  *	(examples : 12 -> 16KB ; 17 -> 512KB)
    160  */
    161 #define	COMPRESSIONLEVEL 12
    162 
    163 /*
    164  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
    165  *	algorithm skip faster data segments considered "incompressible".
    166  *	This may decrease compression ratio dramatically, but will be
    167  *	faster on incompressible data. Increasing this value will make
    168  *	the algorithm search more before declaring a segment "incompressible".
    169  *	This could improve compression a bit, but will be slower on
    170  *	incompressible data. The default value (6) is recommended.
    171  */
    172 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
    173 
    174 /*
    175  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
    176  * performance for big endian cpu, but the resulting compressed stream
    177  * will be incompatible with little-endian CPU. You can set this option
    178  * to 1 in situations where data will stay within closed environment.
    179  * This option is useless on Little_Endian CPU (such as x86).
    180  */
    181 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
    182 
    183 /*
    184  * CPU Feature Detection
    185  */
    186 
    187 /* 32 or 64 bits ? */
    188 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
    189     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
    190     defined(__LP64__) || defined(_LP64))
    191 #define	LZ4_ARCH64 1
    192 #else
    193 #define	LZ4_ARCH64 0
    194 #endif
    195 
    196 /*
    197  * Limits the amount of stack space that the algorithm may consume to hold
    198  * the compression lookup table. The value `9' here means we'll never use
    199  * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
    200  * If more memory is needed, it is allocated from the heap.
    201  */
    202 /* FreeBSD: Use heap for all platforms for now */
    203 #define	STACKLIMIT 0
    204 
    205 /*
    206  * Little Endian or Big Endian?
    207  * Note: overwrite the below #define if you know your architecture endianess.
    208  */
    209 #if BYTE_ORDER == BIG_ENDIAN
    210 #define	LZ4_BIG_ENDIAN 1
    211 #else
    212 /*
    213  * Little Endian assumed. PDP Endian and other very rare endian format
    214  * are unsupported.
    215  */
    216 #endif
    217 
    218 /*
    219  * Unaligned memory access is automatically enabled for "common" CPU,
    220  * such as x86. For others CPU, the compiler will be more cautious, and
    221  * insert extra code to ensure aligned access is respected. If you know
    222  * your target CPU supports unaligned memory access, you may want to
    223  * force this option manually to improve performance
    224  */
    225 #if defined(__ARM_FEATURE_UNALIGNED)
    226 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
    227 #endif
    228 
    229 /*
    230  * FreeBSD: can't use GCC's __builtin_ctz when using sparc64 because
    231  * gcc currently rely on libcompiler_rt.
    232  *
    233  * TODO: revisit this when situation changes.
    234  */
    235 #if defined(__sparc64__)
    236 #define	LZ4_FORCE_SW_BITCOUNT
    237 #endif
    238 
    239 /*
    240  * Compiler Options
    241  */
    242 #if __STDC_VERSION__ >= 199901L	/* C99 */
    243 /* "restrict" is a known keyword */
    244 #else
    245 /* Disable restrict */
    246 #define	restrict
    247 #endif
    248 
    249 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
    250 	(((x) & 0xffu) << 8)))
    251 
    252 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
    253 
    254 #if defined(likely)
    255 #undef likely
    256 #endif
    257 #if defined(unlikely)
    258 #undef unlikely
    259 #endif
    260 
    261 #define	likely(expr)	expect((expr) != 0, 1)
    262 #define	unlikely(expr)	expect((expr) != 0, 0)
    263 
    264 /* Basic types */
    265 #define	BYTE	uint8_t
    266 #define	U16	uint16_t
    267 #define	U32	uint32_t
    268 #define	S32	int32_t
    269 #define	U64	uint64_t
    270 
    271 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
    272 #pragma pack(1)
    273 #endif
    274 
    275 typedef struct _U16_S {
    276 	U16 v;
    277 } U16_S;
    278 typedef struct _U32_S {
    279 	U32 v;
    280 } U32_S;
    281 typedef struct _U64_S {
    282 	U64 v;
    283 } U64_S;
    284 
    285 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
    286 #pragma pack()
    287 #endif
    288 
    289 #define	A64(x) (((U64_S *)(x))->v)
    290 #define	A32(x) (((U32_S *)(x))->v)
    291 #define	A16(x) (((U16_S *)(x))->v)
    292 
    293 /*
    294  * Constants
    295  */
    296 #define	MINMATCH 4
    297 
    298 #define	HASH_LOG COMPRESSIONLEVEL
    299 #define	HASHTABLESIZE (1 << HASH_LOG)
    300 #define	HASH_MASK (HASHTABLESIZE - 1)
    301 
    302 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
    303 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
    304 
    305 /*
    306  * Defines if memory is allocated into the stack (local variable),
    307  * or into the heap (kmem_alloc()).
    308  */
    309 #define	HEAPMODE (HASH_LOG > STACKLIMIT)
    310 #define	COPYLENGTH 8
    311 #define	LASTLITERALS 5
    312 #define	MFLIMIT (COPYLENGTH + MINMATCH)
    313 #define	MINLENGTH (MFLIMIT + 1)
    314 
    315 #define	MAXD_LOG 16
    316 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
    317 
    318 #define	ML_BITS 4
    319 #define	ML_MASK ((1U<<ML_BITS)-1)
    320 #define	RUN_BITS (8-ML_BITS)
    321 #define	RUN_MASK ((1U<<RUN_BITS)-1)
    322 
    323 
    324 /*
    325  * Architecture-specific macros
    326  */
    327 #if LZ4_ARCH64
    328 #define	STEPSIZE 8
    329 #define	UARCH U64
    330 #define	AARCH A64
    331 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
    332 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
    333 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
    334 #define	HTYPE U32
    335 #define	INITBASE(base)		const BYTE* const base = ip
    336 #else /* !LZ4_ARCH64 */
    337 #define	STEPSIZE 4
    338 #define	UARCH U32
    339 #define	AARCH A32
    340 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
    341 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
    342 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
    343 #define	HTYPE const BYTE *
    344 #define	INITBASE(base)		const int base = 0
    345 #endif /* !LZ4_ARCH64 */
    346 
    347 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
    348 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
    349 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
    350 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
    351 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
    352 #else
    353 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
    354 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
    355 #endif
    356 
    357 
    358 /* Local structures */
    359 struct refTables {
    360 	HTYPE hashTable[HASHTABLESIZE];
    361 };
    362 
    363 
    364 /* Macros */
    365 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
    366 	HASH_LOG))
    367 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
    368 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
    369 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
    370 	d = e; }
    371 
    372 
    373 /* Private functions */
    374 #if LZ4_ARCH64
    375 
    376 static inline int
    377 LZ4_NbCommonBytes(register U64 val)
    378 {
    379 #if defined(LZ4_BIG_ENDIAN)
    380 #if !defined(LZ4_FORCE_SW_BITCOUNT)
    381 	return (__builtin_clzll(val) >> 3);
    382 #else
    383 	int r;
    384 	if (!(val >> 32)) {
    385 		r = 4;
    386 	} else {
    387 		r = 0;
    388 		val >>= 32;
    389 	}
    390 	if (!(val >> 16)) {
    391 		r += 2;
    392 		val >>= 8;
    393 	} else {
    394 		val >>= 24;
    395 	}
    396 	r += (!val);
    397 	return (r);
    398 #endif
    399 #else
    400 #if !defined(LZ4_FORCE_SW_BITCOUNT)
    401 	return (__builtin_ctzll(val) >> 3);
    402 #else
    403 	static const int DeBruijnBytePos[64] =
    404 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
    405 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
    406 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
    407 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
    408 	};
    409 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
    410 	    58];
    411 #endif
    412 #endif
    413 }
    414 
    415 #else
    416 
    417 static inline int
    418 LZ4_NbCommonBytes(register U32 val)
    419 {
    420 #if defined(LZ4_BIG_ENDIAN)
    421 #if !defined(LZ4_FORCE_SW_BITCOUNT)
    422 	return (__builtin_clz(val) >> 3);
    423 #else
    424 	int r;
    425 	if (!(val >> 16)) {
    426 		r = 2;
    427 		val >>= 8;
    428 	} else {
    429 		r = 0;
    430 		val >>= 24;
    431 	}
    432 	r += (!val);
    433 	return (r);
    434 #endif
    435 #else
    436 #if !defined(LZ4_FORCE_SW_BITCOUNT)
    437 	return (__builtin_ctz(val) >> 3);
    438 #else
    439 	static const int DeBruijnBytePos[32] = {
    440 		0, 0, 3, 0, 3, 1, 3, 0,
    441 		3, 2, 2, 1, 3, 2, 0, 1,
    442 		3, 3, 1, 2, 2, 2, 2, 0,
    443 		3, 1, 2, 0, 1, 0, 1, 1
    444 	};
    445 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
    446 	    27];
    447 #endif
    448 #endif
    449 }
    450 
    451 #endif
    452 
    453 /* Public functions */
    454 
    455 static int
    456 LZ4_compressBound(int isize)
    457 {
    458 	return (isize + (isize / 255) + 16);
    459 }
    460 
    461 /* Compression functions */
    462 
    463 /*ARGSUSED*/
    464 static int
    465 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
    466     int osize)
    467 {
    468 #if HEAPMODE
    469 	struct refTables *srt = (struct refTables *)ctx;
    470 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
    471 #else
    472 	HTYPE HashTable[HASHTABLESIZE] = { 0 };
    473 #endif
    474 
    475 	const BYTE *ip = (BYTE *) source;
    476 	INITBASE(base);
    477 	const BYTE *anchor = ip;
    478 	const BYTE *const iend = ip + isize;
    479 	const BYTE *const oend = (BYTE *) dest + osize;
    480 	const BYTE *const mflimit = iend - MFLIMIT;
    481 #define	matchlimit (iend - LASTLITERALS)
    482 
    483 	BYTE *op = (BYTE *) dest;
    484 
    485 	int len, length;
    486 	const int skipStrength = SKIPSTRENGTH;
    487 	U32 forwardH;
    488 
    489 
    490 	/* Init */
    491 	if (isize < MINLENGTH)
    492 		goto _last_literals;
    493 
    494 	/* First Byte */
    495 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
    496 	ip++;
    497 	forwardH = LZ4_HASH_VALUE(ip);
    498 
    499 	/* Main Loop */
    500 	for (;;) {
    501 		int findMatchAttempts = (1U << skipStrength) + 3;
    502 		const BYTE *forwardIp = ip;
    503 		const BYTE *ref;
    504 		BYTE *token;
    505 
    506 		/* Find a match */
    507 		do {
    508 			U32 h = forwardH;
    509 			int step = findMatchAttempts++ >> skipStrength;
    510 			ip = forwardIp;
    511 			forwardIp = ip + step;
    512 
    513 			if unlikely(forwardIp > mflimit) {
    514 				goto _last_literals;
    515 			}
    516 
    517 			forwardH = LZ4_HASH_VALUE(forwardIp);
    518 			ref = base + HashTable[h];
    519 			HashTable[h] = ip - base;
    520 
    521 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
    522 
    523 		/* Catch up */
    524 		while ((ip > anchor) && (ref > (BYTE *) source) &&
    525 		    unlikely(ip[-1] == ref[-1])) {
    526 			ip--;
    527 			ref--;
    528 		}
    529 
    530 		/* Encode Literal length */
    531 		length = ip - anchor;
    532 		token = op++;
    533 
    534 		/* Check output limit */
    535 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
    536 		    (length >> 8) > oend)
    537 			return (0);
    538 
    539 		if (length >= (int)RUN_MASK) {
    540 			*token = (RUN_MASK << ML_BITS);
    541 			len = length - RUN_MASK;
    542 			for (; len > 254; len -= 255)
    543 				*op++ = 255;
    544 			*op++ = (BYTE)len;
    545 		} else
    546 			*token = (length << ML_BITS);
    547 
    548 		/* Copy Literals */
    549 		LZ4_BLINDCOPY(anchor, op, length);
    550 
    551 		_next_match:
    552 		/* Encode Offset */
    553 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
    554 
    555 		/* Start Counting */
    556 		ip += MINMATCH;
    557 		ref += MINMATCH;	/* MinMatch verified */
    558 		anchor = ip;
    559 		while likely(ip < matchlimit - (STEPSIZE - 1)) {
    560 			UARCH diff = AARCH(ref) ^ AARCH(ip);
    561 			if (!diff) {
    562 				ip += STEPSIZE;
    563 				ref += STEPSIZE;
    564 				continue;
    565 			}
    566 			ip += LZ4_NbCommonBytes(diff);
    567 			goto _endCount;
    568 		}
    569 #if LZ4_ARCH64
    570 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
    571 			ip += 4;
    572 			ref += 4;
    573 		}
    574 #endif
    575 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
    576 			ip += 2;
    577 			ref += 2;
    578 		}
    579 		if ((ip < matchlimit) && (*ref == *ip))
    580 			ip++;
    581 		_endCount:
    582 
    583 		/* Encode MatchLength */
    584 		len = (ip - anchor);
    585 		/* Check output limit */
    586 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
    587 			return (0);
    588 		if (len >= (int)ML_MASK) {
    589 			*token += ML_MASK;
    590 			len -= ML_MASK;
    591 			for (; len > 509; len -= 510) {
    592 				*op++ = 255;
    593 				*op++ = 255;
    594 			}
    595 			if (len > 254) {
    596 				len -= 255;
    597 				*op++ = 255;
    598 			}
    599 			*op++ = (BYTE)len;
    600 		} else
    601 			*token += len;
    602 
    603 		/* Test end of chunk */
    604 		if (ip > mflimit) {
    605 			anchor = ip;
    606 			break;
    607 		}
    608 		/* Fill table */
    609 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
    610 
    611 		/* Test next position */
    612 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
    613 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
    614 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
    615 			token = op++;
    616 			*token = 0;
    617 			goto _next_match;
    618 		}
    619 		/* Prepare next loop */
    620 		anchor = ip++;
    621 		forwardH = LZ4_HASH_VALUE(ip);
    622 	}
    623 
    624 	_last_literals:
    625 	/* Encode Last Literals */
    626 	{
    627 		int lastRun = iend - anchor;
    628 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
    629 		    oend)
    630 			return (0);
    631 		if (lastRun >= (int)RUN_MASK) {
    632 			*op++ = (RUN_MASK << ML_BITS);
    633 			lastRun -= RUN_MASK;
    634 			for (; lastRun > 254; lastRun -= 255) {
    635 				*op++ = 255;
    636 			}
    637 			*op++ = (BYTE)lastRun;
    638 		} else
    639 			*op++ = (lastRun << ML_BITS);
    640 		(void) memcpy(op, anchor, iend - anchor);
    641 		op += iend - anchor;
    642 	}
    643 
    644 	/* End */
    645 	return (int)(((char *)op) - dest);
    646 }
    647 
    648 
    649 
    650 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
    651 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
    652 #define	HASHLOG64K (HASH_LOG + 1)
    653 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
    654 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
    655 	HASHLOG64K))
    656 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
    657 
    658 /*ARGSUSED*/
    659 static int
    660 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
    661     int osize)
    662 {
    663 #if HEAPMODE
    664 	struct refTables *srt = (struct refTables *)ctx;
    665 	U16 *HashTable = (U16 *) (srt->hashTable);
    666 #else
    667 	U16 HashTable[HASH64KTABLESIZE] = { 0 };
    668 #endif
    669 
    670 	const BYTE *ip = (BYTE *) source;
    671 	const BYTE *anchor = ip;
    672 	const BYTE *const base = ip;
    673 	const BYTE *const iend = ip + isize;
    674 	const BYTE *const oend = (BYTE *) dest + osize;
    675 	const BYTE *const mflimit = iend - MFLIMIT;
    676 #define	matchlimit (iend - LASTLITERALS)
    677 
    678 	BYTE *op = (BYTE *) dest;
    679 
    680 	int len, length;
    681 	const int skipStrength = SKIPSTRENGTH;
    682 	U32 forwardH;
    683 
    684 	/* Init */
    685 	if (isize < MINLENGTH)
    686 		goto _last_literals;
    687 
    688 	/* First Byte */
    689 	ip++;
    690 	forwardH = LZ4_HASH64K_VALUE(ip);
    691 
    692 	/* Main Loop */
    693 	for (;;) {
    694 		int findMatchAttempts = (1U << skipStrength) + 3;
    695 		const BYTE *forwardIp = ip;
    696 		const BYTE *ref;
    697 		BYTE *token;
    698 
    699 		/* Find a match */
    700 		do {
    701 			U32 h = forwardH;
    702 			int step = findMatchAttempts++ >> skipStrength;
    703 			ip = forwardIp;
    704 			forwardIp = ip + step;
    705 
    706 			if (forwardIp > mflimit) {
    707 				goto _last_literals;
    708 			}
    709 
    710 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
    711 			ref = base + HashTable[h];
    712 			HashTable[h] = ip - base;
    713 
    714 		} while (A32(ref) != A32(ip));
    715 
    716 		/* Catch up */
    717 		while ((ip > anchor) && (ref > (BYTE *) source) &&
    718 		    (ip[-1] == ref[-1])) {
    719 			ip--;
    720 			ref--;
    721 		}
    722 
    723 		/* Encode Literal length */
    724 		length = ip - anchor;
    725 		token = op++;
    726 
    727 		/* Check output limit */
    728 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
    729 		    (length >> 8) > oend)
    730 			return (0);
    731 
    732 		if (length >= (int)RUN_MASK) {
    733 			*token = (RUN_MASK << ML_BITS);
    734 			len = length - RUN_MASK;
    735 			for (; len > 254; len -= 255)
    736 				*op++ = 255;
    737 			*op++ = (BYTE)len;
    738 		} else
    739 			*token = (length << ML_BITS);
    740 
    741 		/* Copy Literals */
    742 		LZ4_BLINDCOPY(anchor, op, length);
    743 
    744 		_next_match:
    745 		/* Encode Offset */
    746 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
    747 
    748 		/* Start Counting */
    749 		ip += MINMATCH;
    750 		ref += MINMATCH;	/* MinMatch verified */
    751 		anchor = ip;
    752 		while (ip < matchlimit - (STEPSIZE - 1)) {
    753 			UARCH diff = AARCH(ref) ^ AARCH(ip);
    754 			if (!diff) {
    755 				ip += STEPSIZE;
    756 				ref += STEPSIZE;
    757 				continue;
    758 			}
    759 			ip += LZ4_NbCommonBytes(diff);
    760 			goto _endCount;
    761 		}
    762 #if LZ4_ARCH64
    763 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
    764 			ip += 4;
    765 			ref += 4;
    766 		}
    767 #endif
    768 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
    769 			ip += 2;
    770 			ref += 2;
    771 		}
    772 		if ((ip < matchlimit) && (*ref == *ip))
    773 			ip++;
    774 		_endCount:
    775 
    776 		/* Encode MatchLength */
    777 		len = (ip - anchor);
    778 		/* Check output limit */
    779 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
    780 			return (0);
    781 		if (len >= (int)ML_MASK) {
    782 			*token += ML_MASK;
    783 			len -= ML_MASK;
    784 			for (; len > 509; len -= 510) {
    785 				*op++ = 255;
    786 				*op++ = 255;
    787 			}
    788 			if (len > 254) {
    789 				len -= 255;
    790 				*op++ = 255;
    791 			}
    792 			*op++ = (BYTE)len;
    793 		} else
    794 			*token += len;
    795 
    796 		/* Test end of chunk */
    797 		if (ip > mflimit) {
    798 			anchor = ip;
    799 			break;
    800 		}
    801 		/* Fill table */
    802 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
    803 
    804 		/* Test next position */
    805 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
    806 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
    807 		if (A32(ref) == A32(ip)) {
    808 			token = op++;
    809 			*token = 0;
    810 			goto _next_match;
    811 		}
    812 		/* Prepare next loop */
    813 		anchor = ip++;
    814 		forwardH = LZ4_HASH64K_VALUE(ip);
    815 	}
    816 
    817 	_last_literals:
    818 	/* Encode Last Literals */
    819 	{
    820 		int lastRun = iend - anchor;
    821 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
    822 		    oend)
    823 			return (0);
    824 		if (lastRun >= (int)RUN_MASK) {
    825 			*op++ = (RUN_MASK << ML_BITS);
    826 			lastRun -= RUN_MASK;
    827 			for (; lastRun > 254; lastRun -= 255)
    828 				*op++ = 255;
    829 			*op++ = (BYTE)lastRun;
    830 		} else
    831 			*op++ = (lastRun << ML_BITS);
    832 		(void) memcpy(op, anchor, iend - anchor);
    833 		op += iend - anchor;
    834 	}
    835 
    836 	/* End */
    837 	return (int)(((char *)op) - dest);
    838 }
    839 
    840 static int
    841 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
    842 {
    843 #if HEAPMODE
    844 #ifdef __FreeBSD__
    845 	void *ctx = kmem_cache_alloc(lz4_ctx_cache, KM_NOSLEEP);
    846 #else
    847 	void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
    848 #endif
    849 	int result;
    850 
    851 	/*
    852 	 * out of kernel memory, gently fall through - this will disable
    853 	 * compression in zio_compress_data
    854 	 */
    855 	if (ctx == NULL)
    856 		return (0);
    857 
    858 	bzero(ctx, sizeof(struct refTables));
    859 	if (isize < LZ4_64KLIMIT)
    860 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
    861 	else
    862 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
    863 
    864 #ifdef __FreeBSD__
    865 	kmem_cache_free(lz4_ctx_cache, ctx);
    866 #else
    867 	kmem_free(ctx, sizeof (struct refTables));
    868 #endif
    869 	return (result);
    870 #else
    871 	if (isize < (int)LZ4_64KLIMIT)
    872 		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
    873 	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
    874 #endif
    875 }
    876 
    877 /* Decompression functions */
    878 
    879 /*
    880  * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe
    881  *	against "buffer overflow" attack type. They will never write nor
    882  *	read outside of the provided output buffers.
    883  *	LZ4_uncompress_unknownOutputSize() also insures that it will never
    884  *	read outside of the input buffer.  A corrupted input will produce
    885  *	an error result, a negative int, indicating the position of the
    886  *	error within input stream.
    887  */
    888 
    889 static int
    890 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
    891     int maxOutputSize)
    892 {
    893 	/* Local Variables */
    894 	const BYTE *restrict ip = (const BYTE *) source;
    895 	const BYTE *const iend = ip + isize;
    896 	const BYTE *ref;
    897 
    898 	BYTE *op = (BYTE *) dest;
    899 	BYTE *const oend = op + maxOutputSize;
    900 	BYTE *cpy;
    901 
    902 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
    903 #if LZ4_ARCH64
    904 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
    905 #endif
    906 
    907 	/* Main Loop */
    908 	while (ip < iend) {
    909 		unsigned token;
    910 		size_t length;
    911 
    912 		/* get runlength */
    913 		token = *ip++;
    914 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
    915 			int s = 255;
    916 			while ((ip < iend) && (s == 255)) {
    917 				s = *ip++;
    918 				length += s;
    919 			}
    920 		}
    921 		/* copy literals */
    922 		cpy = op + length;
    923 		/* CORNER-CASE: cpy might overflow. */
    924 		if (cpy < op)
    925 			goto _output_error;	/* cpy was overflowed, bail! */
    926 		if ((cpy > oend - COPYLENGTH) ||
    927 		    (ip + length > iend - COPYLENGTH)) {
    928 			if (cpy > oend)
    929 				/* Error: writes beyond output buffer */
    930 				goto _output_error;
    931 			if (ip + length != iend)
    932 				/*
    933 				 * Error: LZ4 format requires to consume all
    934 				 * input at this stage
    935 				 */
    936 				goto _output_error;
    937 			(void) memcpy(op, ip, length);
    938 			op += length;
    939 			/* Necessarily EOF, due to parsing restrictions */
    940 			break;
    941 		}
    942 		LZ4_WILDCOPY(ip, op, cpy);
    943 		ip -= (op - cpy);
    944 		op = cpy;
    945 
    946 		/* get offset */
    947 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
    948 		ip += 2;
    949 		if (ref < (BYTE * const) dest)
    950 			/*
    951 			 * Error: offset creates reference outside of
    952 			 * destination buffer
    953 			 */
    954 			goto _output_error;
    955 
    956 		/* get matchlength */
    957 		if ((length = (token & ML_MASK)) == ML_MASK) {
    958 			while (ip < iend) {
    959 				int s = *ip++;
    960 				length += s;
    961 				if (s == 255)
    962 					continue;
    963 				break;
    964 			}
    965 		}
    966 		/* copy repeated sequence */
    967 		if unlikely(op - ref < STEPSIZE) {
    968 #if LZ4_ARCH64
    969 			size_t dec64 = dec64table[op-ref];
    970 #else
    971 			const int dec64 = 0;
    972 #endif
    973 			op[0] = ref[0];
    974 			op[1] = ref[1];
    975 			op[2] = ref[2];
    976 			op[3] = ref[3];
    977 			op += 4;
    978 			ref += 4;
    979 			ref -= dec32table[op-ref];
    980 			A32(op) = A32(ref);
    981 			op += STEPSIZE - 4;
    982 			ref -= dec64;
    983 		} else {
    984 			LZ4_COPYSTEP(ref, op);
    985 		}
    986 		cpy = op + length - (STEPSIZE - 4);
    987 		if (cpy > oend - COPYLENGTH) {
    988 			if (cpy > oend)
    989 				/*
    990 				 * Error: request to write outside of
    991 				 * destination buffer
    992 				 */
    993 				goto _output_error;
    994 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
    995 			while (op < cpy)
    996 				*op++ = *ref++;
    997 			op = cpy;
    998 			if (op == oend)
    999 				/*
   1000 				 * Check EOF (should never happen, since
   1001 				 * last 5 bytes are supposed to be literals)
   1002 				 */
   1003 				goto _output_error;
   1004 			continue;
   1005 		}
   1006 		LZ4_SECURECOPY(ref, op, cpy);
   1007 		op = cpy;	/* correction */
   1008 	}
   1009 
   1010 	/* end of decoding */
   1011 	return (int)(((char *)op) - dest);
   1012 
   1013 	/* write overflow error detected */
   1014 	_output_error:
   1015 	return (int)(-(((char *)ip) - source));
   1016 }
   1017 
   1018 #ifdef __FreeBSD__
   1019 
   1020 extern void
   1021 lz4_init(void)
   1022 {
   1023 
   1024 #if HEAPMODE
   1025 	lz4_ctx_cache = kmem_cache_create("lz4_ctx", sizeof(struct refTables),
   1026 	    0, NULL, NULL, NULL, NULL, NULL, 0);
   1027 #endif
   1028 }
   1029 
   1030 extern void
   1031 lz4_fini(void)
   1032 {
   1033 
   1034 #if HEAPMODE
   1035 	kmem_cache_destroy(lz4_ctx_cache);
   1036 #endif
   1037 }
   1038 
   1039 #endif /* __FreeBSD__ */
   1040