Home | History | Annotate | Line # | Download | only in zfs
lz4.c revision 1.1.1.1
      1 /*
      2  * LZ4 - Fast LZ compression algorithm
      3  * Header File
      4  * Copyright (C) 2011-2013, Yann Collet.
      5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions are
      9  * met:
     10  *
     11  *     * Redistributions of source code must retain the above copyright
     12  * notice, this list of conditions and the following disclaimer.
     13  *     * Redistributions in binary form must reproduce the above
     14  * copyright notice, this list of conditions and the following disclaimer
     15  * in the documentation and/or other materials provided with the
     16  * distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  *
     30  * You can contact the author at :
     31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
     32  * - LZ4 source repository : http://code.google.com/p/lz4/
     33  */
     34 
     35 #include <sys/zfs_context.h>
     36 
     37 static int real_LZ4_compress(const char *source, char *dest, int isize,
     38     int osize);
     39 static int LZ4_compressBound(int isize);
     40 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
     41     int isize, int maxOutputSize);
     42 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
     43     int isize, int osize);
     44 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
     45     int isize, int osize);
     46 
     47 static kmem_cache_t *lz4_ctx_cache;
     48 
     49 /*ARGSUSED*/
     50 size_t
     51 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
     52 {
     53 	uint32_t bufsiz;
     54 	char *dest = d_start;
     55 
     56 	ASSERT(d_len >= sizeof (bufsiz));
     57 
     58 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
     59 	    d_len - sizeof (bufsiz));
     60 
     61 	/* Signal an error if the compression routine returned zero. */
     62 	if (bufsiz == 0)
     63 		return (s_len);
     64 
     65 	/*
     66 	 * Encode the compresed buffer size at the start. We'll need this in
     67 	 * decompression to counter the effects of padding which might be
     68 	 * added to the compressed buffer and which, if unhandled, would
     69 	 * confuse the hell out of our decompression function.
     70 	 */
     71 	*(uint32_t *)dest = BE_32(bufsiz);
     72 
     73 	return (bufsiz + sizeof (bufsiz));
     74 }
     75 
     76 /*ARGSUSED*/
     77 int
     78 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
     79 {
     80 	const char *src = s_start;
     81 	uint32_t bufsiz = BE_IN32(src);
     82 
     83 	/* invalid compressed buffer size encoded at start */
     84 	if (bufsiz + sizeof (bufsiz) > s_len)
     85 		return (1);
     86 
     87 	/*
     88 	 * Returns 0 on success (decompression function returned non-negative)
     89 	 * and non-zero on failure (decompression function returned negative.
     90 	 */
     91 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
     92 	    d_start, bufsiz, d_len) < 0);
     93 }
     94 
     95 /*
     96  * LZ4 API Description:
     97  *
     98  * Simple Functions:
     99  * real_LZ4_compress() :
    100  * 	isize  : is the input size. Max supported value is ~1.9GB
    101  * 	return : the number of bytes written in buffer dest
    102  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
    103  * 	note : destination buffer must be already allocated.
    104  * 		destination buffer must be sized to handle worst cases
    105  * 		situations (input data not compressible) worst case size
    106  * 		evaluation is provided by function LZ4_compressBound().
    107  *
    108  * Advanced Functions
    109  *
    110  * LZ4_compressBound() :
    111  * 	Provides the maximum size that LZ4 may output in a "worst case"
    112  * 	scenario (input data not compressible) primarily useful for memory
    113  * 	allocation of output buffer.
    114  *
    115  * 	isize  : is the input size. Max supported value is ~1.9GB
    116  * 	return : maximum output size in a "worst case" scenario
    117  * 	note : this function is limited by "int" range (2^31-1)
    118  *
    119  * LZ4_uncompress_unknownOutputSize() :
    120  * 	isize  : is the input size, therefore the compressed size
    121  * 	maxOutputSize : is the size of the destination buffer (which must be
    122  * 		already allocated)
    123  * 	return : the number of bytes decoded in the destination buffer
    124  * 		(necessarily <= maxOutputSize). If the source stream is
    125  * 		malformed, the function will stop decoding and return a
    126  * 		negative result, indicating the byte position of the faulty
    127  * 		instruction. This function never writes beyond dest +
    128  * 		maxOutputSize, and is therefore protected against malicious
    129  * 		data packets.
    130  * 	note   : Destination buffer must be already allocated.
    131  *
    132  * LZ4_compressCtx() :
    133  * 	This function explicitly handles the CTX memory structure.
    134  *
    135  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
    136  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
    137  * 	isn't valid.
    138  *
    139  * LZ4_compress64kCtx() :
    140  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
    141  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
    142  *
    143  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
    144  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
    145  * 	isn't valid.
    146  */
    147 
    148 /*
    149  * Tuning parameters
    150  */
    151 
    152 /*
    153  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
    154  *	 Lowering this value reduces memory usage. Reduced memory usage
    155  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
    156  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
    157  *	(examples : 12 -> 16KB ; 17 -> 512KB)
    158  */
    159 #define	COMPRESSIONLEVEL 12
    160 
    161 /*
    162  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
    163  *	algorithm skip faster data segments considered "incompressible".
    164  *	This may decrease compression ratio dramatically, but will be
    165  *	faster on incompressible data. Increasing this value will make
    166  *	the algorithm search more before declaring a segment "incompressible".
    167  *	This could improve compression a bit, but will be slower on
    168  *	incompressible data. The default value (6) is recommended.
    169  */
    170 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
    171 
    172 /*
    173  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
    174  * performance for big endian cpu, but the resulting compressed stream
    175  * will be incompatible with little-endian CPU. You can set this option
    176  * to 1 in situations where data will stay within closed environment.
    177  * This option is useless on Little_Endian CPU (such as x86).
    178  */
    179 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
    180 
    181 /*
    182  * CPU Feature Detection
    183  */
    184 
    185 /* 32 or 64 bits ? */
    186 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
    187     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
    188     defined(__LP64__) || defined(_LP64))
    189 #define	LZ4_ARCH64 1
    190 #else
    191 #define	LZ4_ARCH64 0
    192 #endif
    193 
    194 /*
    195  * Limits the amount of stack space that the algorithm may consume to hold
    196  * the compression lookup table. The value `9' here means we'll never use
    197  * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
    198  * If more memory is needed, it is allocated from the heap.
    199  */
    200 /* FreeBSD: Use heap for all platforms for now */
    201 #define	STACKLIMIT 0
    202 
    203 /*
    204  * Little Endian or Big Endian?
    205  * Note: overwrite the below #define if you know your architecture endianess.
    206  */
    207 #if BYTE_ORDER == BIG_ENDIAN
    208 #define	LZ4_BIG_ENDIAN 1
    209 #else
    210 /*
    211  * Little Endian assumed. PDP Endian and other very rare endian format
    212  * are unsupported.
    213  */
    214 #endif
    215 
    216 /*
    217  * Unaligned memory access is automatically enabled for "common" CPU,
    218  * such as x86. For others CPU, the compiler will be more cautious, and
    219  * insert extra code to ensure aligned access is respected. If you know
    220  * your target CPU supports unaligned memory access, you may want to
    221  * force this option manually to improve performance
    222  */
    223 #if defined(__ARM_FEATURE_UNALIGNED)
    224 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
    225 #endif
    226 
    227 /*
    228  * FreeBSD: can't use GCC's __builtin_ctz when using sparc64 because
    229  * gcc currently rely on libcompiler_rt.
    230  *
    231  * TODO: revisit this when situation changes.
    232  */
    233 #if defined(__sparc64__)
    234 #define	LZ4_FORCE_SW_BITCOUNT
    235 #endif
    236 
    237 /*
    238  * Compiler Options
    239  */
    240 #if __STDC_VERSION__ >= 199901L	/* C99 */
    241 /* "restrict" is a known keyword */
    242 #else
    243 /* Disable restrict */
    244 #define	restrict
    245 #endif
    246 
    247 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
    248 	(((x) & 0xffu) << 8)))
    249 
    250 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
    251 
    252 #if defined(likely)
    253 #undef likely
    254 #endif
    255 #if defined(unlikely)
    256 #undef unlikely
    257 #endif
    258 
    259 #define	likely(expr)	expect((expr) != 0, 1)
    260 #define	unlikely(expr)	expect((expr) != 0, 0)
    261 
    262 /* Basic types */
    263 #define	BYTE	uint8_t
    264 #define	U16	uint16_t
    265 #define	U32	uint32_t
    266 #define	S32	int32_t
    267 #define	U64	uint64_t
    268 
    269 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
    270 #pragma pack(1)
    271 #endif
    272 
    273 typedef struct _U16_S {
    274 	U16 v;
    275 } U16_S;
    276 typedef struct _U32_S {
    277 	U32 v;
    278 } U32_S;
    279 typedef struct _U64_S {
    280 	U64 v;
    281 } U64_S;
    282 
    283 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
    284 #pragma pack()
    285 #endif
    286 
    287 #define	A64(x) (((U64_S *)(x))->v)
    288 #define	A32(x) (((U32_S *)(x))->v)
    289 #define	A16(x) (((U16_S *)(x))->v)
    290 
    291 /*
    292  * Constants
    293  */
    294 #define	MINMATCH 4
    295 
    296 #define	HASH_LOG COMPRESSIONLEVEL
    297 #define	HASHTABLESIZE (1 << HASH_LOG)
    298 #define	HASH_MASK (HASHTABLESIZE - 1)
    299 
    300 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
    301 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
    302 
    303 /*
    304  * Defines if memory is allocated into the stack (local variable),
    305  * or into the heap (kmem_alloc()).
    306  */
    307 #define	HEAPMODE (HASH_LOG > STACKLIMIT)
    308 #define	COPYLENGTH 8
    309 #define	LASTLITERALS 5
    310 #define	MFLIMIT (COPYLENGTH + MINMATCH)
    311 #define	MINLENGTH (MFLIMIT + 1)
    312 
    313 #define	MAXD_LOG 16
    314 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
    315 
    316 #define	ML_BITS 4
    317 #define	ML_MASK ((1U<<ML_BITS)-1)
    318 #define	RUN_BITS (8-ML_BITS)
    319 #define	RUN_MASK ((1U<<RUN_BITS)-1)
    320 
    321 
    322 /*
    323  * Architecture-specific macros
    324  */
    325 #if LZ4_ARCH64
    326 #define	STEPSIZE 8
    327 #define	UARCH U64
    328 #define	AARCH A64
    329 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
    330 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
    331 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
    332 #define	HTYPE U32
    333 #define	INITBASE(base)		const BYTE* const base = ip
    334 #else /* !LZ4_ARCH64 */
    335 #define	STEPSIZE 4
    336 #define	UARCH U32
    337 #define	AARCH A32
    338 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
    339 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
    340 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
    341 #define	HTYPE const BYTE *
    342 #define	INITBASE(base)		const int base = 0
    343 #endif /* !LZ4_ARCH64 */
    344 
    345 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
    346 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
    347 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
    348 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
    349 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
    350 #else
    351 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
    352 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
    353 #endif
    354 
    355 
    356 /* Local structures */
    357 struct refTables {
    358 	HTYPE hashTable[HASHTABLESIZE];
    359 };
    360 
    361 
    362 /* Macros */
    363 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
    364 	HASH_LOG))
    365 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
    366 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
    367 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
    368 	d = e; }
    369 
    370 
    371 /* Private functions */
    372 #if LZ4_ARCH64
    373 
    374 static inline int
    375 LZ4_NbCommonBytes(register U64 val)
    376 {
    377 #if defined(LZ4_BIG_ENDIAN)
    378 #if !defined(LZ4_FORCE_SW_BITCOUNT)
    379 	return (__builtin_clzll(val) >> 3);
    380 #else
    381 	int r;
    382 	if (!(val >> 32)) {
    383 		r = 4;
    384 	} else {
    385 		r = 0;
    386 		val >>= 32;
    387 	}
    388 	if (!(val >> 16)) {
    389 		r += 2;
    390 		val >>= 8;
    391 	} else {
    392 		val >>= 24;
    393 	}
    394 	r += (!val);
    395 	return (r);
    396 #endif
    397 #else
    398 #if !defined(LZ4_FORCE_SW_BITCOUNT)
    399 	return (__builtin_ctzll(val) >> 3);
    400 #else
    401 	static const int DeBruijnBytePos[64] =
    402 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
    403 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
    404 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
    405 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
    406 	};
    407 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
    408 	    58];
    409 #endif
    410 #endif
    411 }
    412 
    413 #else
    414 
    415 static inline int
    416 LZ4_NbCommonBytes(register U32 val)
    417 {
    418 #if defined(LZ4_BIG_ENDIAN)
    419 #if !defined(LZ4_FORCE_SW_BITCOUNT)
    420 	return (__builtin_clz(val) >> 3);
    421 #else
    422 	int r;
    423 	if (!(val >> 16)) {
    424 		r = 2;
    425 		val >>= 8;
    426 	} else {
    427 		r = 0;
    428 		val >>= 24;
    429 	}
    430 	r += (!val);
    431 	return (r);
    432 #endif
    433 #else
    434 #if !defined(LZ4_FORCE_SW_BITCOUNT)
    435 	return (__builtin_ctz(val) >> 3);
    436 #else
    437 	static const int DeBruijnBytePos[32] = {
    438 		0, 0, 3, 0, 3, 1, 3, 0,
    439 		3, 2, 2, 1, 3, 2, 0, 1,
    440 		3, 3, 1, 2, 2, 2, 2, 0,
    441 		3, 1, 2, 0, 1, 0, 1, 1
    442 	};
    443 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
    444 	    27];
    445 #endif
    446 #endif
    447 }
    448 
    449 #endif
    450 
    451 /* Public functions */
    452 
    453 static int
    454 LZ4_compressBound(int isize)
    455 {
    456 	return (isize + (isize / 255) + 16);
    457 }
    458 
    459 /* Compression functions */
    460 
    461 /*ARGSUSED*/
    462 static int
    463 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
    464     int osize)
    465 {
    466 #if HEAPMODE
    467 	struct refTables *srt = (struct refTables *)ctx;
    468 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
    469 #else
    470 	HTYPE HashTable[HASHTABLESIZE] = { 0 };
    471 #endif
    472 
    473 	const BYTE *ip = (BYTE *) source;
    474 	INITBASE(base);
    475 	const BYTE *anchor = ip;
    476 	const BYTE *const iend = ip + isize;
    477 	const BYTE *const oend = (BYTE *) dest + osize;
    478 	const BYTE *const mflimit = iend - MFLIMIT;
    479 #define	matchlimit (iend - LASTLITERALS)
    480 
    481 	BYTE *op = (BYTE *) dest;
    482 
    483 	int len, length;
    484 	const int skipStrength = SKIPSTRENGTH;
    485 	U32 forwardH;
    486 
    487 
    488 	/* Init */
    489 	if (isize < MINLENGTH)
    490 		goto _last_literals;
    491 
    492 	/* First Byte */
    493 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
    494 	ip++;
    495 	forwardH = LZ4_HASH_VALUE(ip);
    496 
    497 	/* Main Loop */
    498 	for (;;) {
    499 		int findMatchAttempts = (1U << skipStrength) + 3;
    500 		const BYTE *forwardIp = ip;
    501 		const BYTE *ref;
    502 		BYTE *token;
    503 
    504 		/* Find a match */
    505 		do {
    506 			U32 h = forwardH;
    507 			int step = findMatchAttempts++ >> skipStrength;
    508 			ip = forwardIp;
    509 			forwardIp = ip + step;
    510 
    511 			if unlikely(forwardIp > mflimit) {
    512 				goto _last_literals;
    513 			}
    514 
    515 			forwardH = LZ4_HASH_VALUE(forwardIp);
    516 			ref = base + HashTable[h];
    517 			HashTable[h] = ip - base;
    518 
    519 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
    520 
    521 		/* Catch up */
    522 		while ((ip > anchor) && (ref > (BYTE *) source) &&
    523 		    unlikely(ip[-1] == ref[-1])) {
    524 			ip--;
    525 			ref--;
    526 		}
    527 
    528 		/* Encode Literal length */
    529 		length = ip - anchor;
    530 		token = op++;
    531 
    532 		/* Check output limit */
    533 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
    534 		    (length >> 8) > oend)
    535 			return (0);
    536 
    537 		if (length >= (int)RUN_MASK) {
    538 			*token = (RUN_MASK << ML_BITS);
    539 			len = length - RUN_MASK;
    540 			for (; len > 254; len -= 255)
    541 				*op++ = 255;
    542 			*op++ = (BYTE)len;
    543 		} else
    544 			*token = (length << ML_BITS);
    545 
    546 		/* Copy Literals */
    547 		LZ4_BLINDCOPY(anchor, op, length);
    548 
    549 		_next_match:
    550 		/* Encode Offset */
    551 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
    552 
    553 		/* Start Counting */
    554 		ip += MINMATCH;
    555 		ref += MINMATCH;	/* MinMatch verified */
    556 		anchor = ip;
    557 		while likely(ip < matchlimit - (STEPSIZE - 1)) {
    558 			UARCH diff = AARCH(ref) ^ AARCH(ip);
    559 			if (!diff) {
    560 				ip += STEPSIZE;
    561 				ref += STEPSIZE;
    562 				continue;
    563 			}
    564 			ip += LZ4_NbCommonBytes(diff);
    565 			goto _endCount;
    566 		}
    567 #if LZ4_ARCH64
    568 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
    569 			ip += 4;
    570 			ref += 4;
    571 		}
    572 #endif
    573 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
    574 			ip += 2;
    575 			ref += 2;
    576 		}
    577 		if ((ip < matchlimit) && (*ref == *ip))
    578 			ip++;
    579 		_endCount:
    580 
    581 		/* Encode MatchLength */
    582 		len = (ip - anchor);
    583 		/* Check output limit */
    584 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
    585 			return (0);
    586 		if (len >= (int)ML_MASK) {
    587 			*token += ML_MASK;
    588 			len -= ML_MASK;
    589 			for (; len > 509; len -= 510) {
    590 				*op++ = 255;
    591 				*op++ = 255;
    592 			}
    593 			if (len > 254) {
    594 				len -= 255;
    595 				*op++ = 255;
    596 			}
    597 			*op++ = (BYTE)len;
    598 		} else
    599 			*token += len;
    600 
    601 		/* Test end of chunk */
    602 		if (ip > mflimit) {
    603 			anchor = ip;
    604 			break;
    605 		}
    606 		/* Fill table */
    607 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
    608 
    609 		/* Test next position */
    610 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
    611 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
    612 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
    613 			token = op++;
    614 			*token = 0;
    615 			goto _next_match;
    616 		}
    617 		/* Prepare next loop */
    618 		anchor = ip++;
    619 		forwardH = LZ4_HASH_VALUE(ip);
    620 	}
    621 
    622 	_last_literals:
    623 	/* Encode Last Literals */
    624 	{
    625 		int lastRun = iend - anchor;
    626 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
    627 		    oend)
    628 			return (0);
    629 		if (lastRun >= (int)RUN_MASK) {
    630 			*op++ = (RUN_MASK << ML_BITS);
    631 			lastRun -= RUN_MASK;
    632 			for (; lastRun > 254; lastRun -= 255) {
    633 				*op++ = 255;
    634 			}
    635 			*op++ = (BYTE)lastRun;
    636 		} else
    637 			*op++ = (lastRun << ML_BITS);
    638 		(void) memcpy(op, anchor, iend - anchor);
    639 		op += iend - anchor;
    640 	}
    641 
    642 	/* End */
    643 	return (int)(((char *)op) - dest);
    644 }
    645 
    646 
    647 
    648 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
    649 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
    650 #define	HASHLOG64K (HASH_LOG + 1)
    651 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
    652 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
    653 	HASHLOG64K))
    654 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
    655 
    656 /*ARGSUSED*/
    657 static int
    658 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
    659     int osize)
    660 {
    661 #if HEAPMODE
    662 	struct refTables *srt = (struct refTables *)ctx;
    663 	U16 *HashTable = (U16 *) (srt->hashTable);
    664 #else
    665 	U16 HashTable[HASH64KTABLESIZE] = { 0 };
    666 #endif
    667 
    668 	const BYTE *ip = (BYTE *) source;
    669 	const BYTE *anchor = ip;
    670 	const BYTE *const base = ip;
    671 	const BYTE *const iend = ip + isize;
    672 	const BYTE *const oend = (BYTE *) dest + osize;
    673 	const BYTE *const mflimit = iend - MFLIMIT;
    674 #define	matchlimit (iend - LASTLITERALS)
    675 
    676 	BYTE *op = (BYTE *) dest;
    677 
    678 	int len, length;
    679 	const int skipStrength = SKIPSTRENGTH;
    680 	U32 forwardH;
    681 
    682 	/* Init */
    683 	if (isize < MINLENGTH)
    684 		goto _last_literals;
    685 
    686 	/* First Byte */
    687 	ip++;
    688 	forwardH = LZ4_HASH64K_VALUE(ip);
    689 
    690 	/* Main Loop */
    691 	for (;;) {
    692 		int findMatchAttempts = (1U << skipStrength) + 3;
    693 		const BYTE *forwardIp = ip;
    694 		const BYTE *ref;
    695 		BYTE *token;
    696 
    697 		/* Find a match */
    698 		do {
    699 			U32 h = forwardH;
    700 			int step = findMatchAttempts++ >> skipStrength;
    701 			ip = forwardIp;
    702 			forwardIp = ip + step;
    703 
    704 			if (forwardIp > mflimit) {
    705 				goto _last_literals;
    706 			}
    707 
    708 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
    709 			ref = base + HashTable[h];
    710 			HashTable[h] = ip - base;
    711 
    712 		} while (A32(ref) != A32(ip));
    713 
    714 		/* Catch up */
    715 		while ((ip > anchor) && (ref > (BYTE *) source) &&
    716 		    (ip[-1] == ref[-1])) {
    717 			ip--;
    718 			ref--;
    719 		}
    720 
    721 		/* Encode Literal length */
    722 		length = ip - anchor;
    723 		token = op++;
    724 
    725 		/* Check output limit */
    726 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
    727 		    (length >> 8) > oend)
    728 			return (0);
    729 
    730 		if (length >= (int)RUN_MASK) {
    731 			*token = (RUN_MASK << ML_BITS);
    732 			len = length - RUN_MASK;
    733 			for (; len > 254; len -= 255)
    734 				*op++ = 255;
    735 			*op++ = (BYTE)len;
    736 		} else
    737 			*token = (length << ML_BITS);
    738 
    739 		/* Copy Literals */
    740 		LZ4_BLINDCOPY(anchor, op, length);
    741 
    742 		_next_match:
    743 		/* Encode Offset */
    744 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
    745 
    746 		/* Start Counting */
    747 		ip += MINMATCH;
    748 		ref += MINMATCH;	/* MinMatch verified */
    749 		anchor = ip;
    750 		while (ip < matchlimit - (STEPSIZE - 1)) {
    751 			UARCH diff = AARCH(ref) ^ AARCH(ip);
    752 			if (!diff) {
    753 				ip += STEPSIZE;
    754 				ref += STEPSIZE;
    755 				continue;
    756 			}
    757 			ip += LZ4_NbCommonBytes(diff);
    758 			goto _endCount;
    759 		}
    760 #if LZ4_ARCH64
    761 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
    762 			ip += 4;
    763 			ref += 4;
    764 		}
    765 #endif
    766 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
    767 			ip += 2;
    768 			ref += 2;
    769 		}
    770 		if ((ip < matchlimit) && (*ref == *ip))
    771 			ip++;
    772 		_endCount:
    773 
    774 		/* Encode MatchLength */
    775 		len = (ip - anchor);
    776 		/* Check output limit */
    777 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
    778 			return (0);
    779 		if (len >= (int)ML_MASK) {
    780 			*token += ML_MASK;
    781 			len -= ML_MASK;
    782 			for (; len > 509; len -= 510) {
    783 				*op++ = 255;
    784 				*op++ = 255;
    785 			}
    786 			if (len > 254) {
    787 				len -= 255;
    788 				*op++ = 255;
    789 			}
    790 			*op++ = (BYTE)len;
    791 		} else
    792 			*token += len;
    793 
    794 		/* Test end of chunk */
    795 		if (ip > mflimit) {
    796 			anchor = ip;
    797 			break;
    798 		}
    799 		/* Fill table */
    800 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
    801 
    802 		/* Test next position */
    803 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
    804 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
    805 		if (A32(ref) == A32(ip)) {
    806 			token = op++;
    807 			*token = 0;
    808 			goto _next_match;
    809 		}
    810 		/* Prepare next loop */
    811 		anchor = ip++;
    812 		forwardH = LZ4_HASH64K_VALUE(ip);
    813 	}
    814 
    815 	_last_literals:
    816 	/* Encode Last Literals */
    817 	{
    818 		int lastRun = iend - anchor;
    819 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
    820 		    oend)
    821 			return (0);
    822 		if (lastRun >= (int)RUN_MASK) {
    823 			*op++ = (RUN_MASK << ML_BITS);
    824 			lastRun -= RUN_MASK;
    825 			for (; lastRun > 254; lastRun -= 255)
    826 				*op++ = 255;
    827 			*op++ = (BYTE)lastRun;
    828 		} else
    829 			*op++ = (lastRun << ML_BITS);
    830 		(void) memcpy(op, anchor, iend - anchor);
    831 		op += iend - anchor;
    832 	}
    833 
    834 	/* End */
    835 	return (int)(((char *)op) - dest);
    836 }
    837 
    838 static int
    839 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
    840 {
    841 #if HEAPMODE
    842 	void *ctx = kmem_cache_alloc(lz4_ctx_cache, KM_NOSLEEP);
    843 	int result;
    844 
    845 	/*
    846 	 * out of kernel memory, gently fall through - this will disable
    847 	 * compression in zio_compress_data
    848 	 */
    849 	if (ctx == NULL)
    850 		return (0);
    851 
    852 	bzero(ctx, sizeof(struct refTables));
    853 	if (isize < LZ4_64KLIMIT)
    854 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
    855 	else
    856 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
    857 
    858 	kmem_cache_free(lz4_ctx_cache, ctx);
    859 	return (result);
    860 #else
    861 	if (isize < (int)LZ4_64KLIMIT)
    862 		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
    863 	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
    864 #endif
    865 }
    866 
    867 /* Decompression functions */
    868 
    869 /*
    870  * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe
    871  *	against "buffer overflow" attack type. They will never write nor
    872  *	read outside of the provided output buffers.
    873  *	LZ4_uncompress_unknownOutputSize() also insures that it will never
    874  *	read outside of the input buffer.  A corrupted input will produce
    875  *	an error result, a negative int, indicating the position of the
    876  *	error within input stream.
    877  */
    878 
    879 static int
    880 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
    881     int maxOutputSize)
    882 {
    883 	/* Local Variables */
    884 	const BYTE *restrict ip = (const BYTE *) source;
    885 	const BYTE *const iend = ip + isize;
    886 	const BYTE *ref;
    887 
    888 	BYTE *op = (BYTE *) dest;
    889 	BYTE *const oend = op + maxOutputSize;
    890 	BYTE *cpy;
    891 
    892 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
    893 #if LZ4_ARCH64
    894 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
    895 #endif
    896 
    897 	/* Main Loop */
    898 	while (ip < iend) {
    899 		unsigned token;
    900 		size_t length;
    901 
    902 		/* get runlength */
    903 		token = *ip++;
    904 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
    905 			int s = 255;
    906 			while ((ip < iend) && (s == 255)) {
    907 				s = *ip++;
    908 				length += s;
    909 			}
    910 		}
    911 		/* copy literals */
    912 		cpy = op + length;
    913 		/* CORNER-CASE: cpy might overflow. */
    914 		if (cpy < op)
    915 			goto _output_error;	/* cpy was overflowed, bail! */
    916 		if ((cpy > oend - COPYLENGTH) ||
    917 		    (ip + length > iend - COPYLENGTH)) {
    918 			if (cpy > oend)
    919 				/* Error: writes beyond output buffer */
    920 				goto _output_error;
    921 			if (ip + length != iend)
    922 				/*
    923 				 * Error: LZ4 format requires to consume all
    924 				 * input at this stage
    925 				 */
    926 				goto _output_error;
    927 			(void) memcpy(op, ip, length);
    928 			op += length;
    929 			/* Necessarily EOF, due to parsing restrictions */
    930 			break;
    931 		}
    932 		LZ4_WILDCOPY(ip, op, cpy);
    933 		ip -= (op - cpy);
    934 		op = cpy;
    935 
    936 		/* get offset */
    937 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
    938 		ip += 2;
    939 		if (ref < (BYTE * const) dest)
    940 			/*
    941 			 * Error: offset creates reference outside of
    942 			 * destination buffer
    943 			 */
    944 			goto _output_error;
    945 
    946 		/* get matchlength */
    947 		if ((length = (token & ML_MASK)) == ML_MASK) {
    948 			while (ip < iend) {
    949 				int s = *ip++;
    950 				length += s;
    951 				if (s == 255)
    952 					continue;
    953 				break;
    954 			}
    955 		}
    956 		/* copy repeated sequence */
    957 		if unlikely(op - ref < STEPSIZE) {
    958 #if LZ4_ARCH64
    959 			size_t dec64 = dec64table[op-ref];
    960 #else
    961 			const int dec64 = 0;
    962 #endif
    963 			op[0] = ref[0];
    964 			op[1] = ref[1];
    965 			op[2] = ref[2];
    966 			op[3] = ref[3];
    967 			op += 4;
    968 			ref += 4;
    969 			ref -= dec32table[op-ref];
    970 			A32(op) = A32(ref);
    971 			op += STEPSIZE - 4;
    972 			ref -= dec64;
    973 		} else {
    974 			LZ4_COPYSTEP(ref, op);
    975 		}
    976 		cpy = op + length - (STEPSIZE - 4);
    977 		if (cpy > oend - COPYLENGTH) {
    978 			if (cpy > oend)
    979 				/*
    980 				 * Error: request to write outside of
    981 				 * destination buffer
    982 				 */
    983 				goto _output_error;
    984 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
    985 			while (op < cpy)
    986 				*op++ = *ref++;
    987 			op = cpy;
    988 			if (op == oend)
    989 				/*
    990 				 * Check EOF (should never happen, since
    991 				 * last 5 bytes are supposed to be literals)
    992 				 */
    993 				goto _output_error;
    994 			continue;
    995 		}
    996 		LZ4_SECURECOPY(ref, op, cpy);
    997 		op = cpy;	/* correction */
    998 	}
    999 
   1000 	/* end of decoding */
   1001 	return (int)(((char *)op) - dest);
   1002 
   1003 	/* write overflow error detected */
   1004 	_output_error:
   1005 	return (int)(-(((char *)ip) - source));
   1006 }
   1007 
   1008 extern void
   1009 lz4_init(void)
   1010 {
   1011 
   1012 #if HEAPMODE
   1013 	lz4_ctx_cache = kmem_cache_create("lz4_ctx", sizeof(struct refTables),
   1014 	    0, NULL, NULL, NULL, NULL, NULL, 0);
   1015 #endif
   1016 }
   1017 
   1018 extern void
   1019 lz4_fini(void)
   1020 {
   1021 
   1022 #if HEAPMODE
   1023 	kmem_cache_destroy(lz4_ctx_cache);
   1024 #endif
   1025 }
   1026