lz4.c revision 1.1.1.1 1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35 #include <sys/zfs_context.h>
36
37 static int real_LZ4_compress(const char *source, char *dest, int isize,
38 int osize);
39 static int LZ4_compressBound(int isize);
40 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
41 int isize, int maxOutputSize);
42 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
43 int isize, int osize);
44 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
45 int isize, int osize);
46
47 static kmem_cache_t *lz4_ctx_cache;
48
49 /*ARGSUSED*/
50 size_t
51 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
52 {
53 uint32_t bufsiz;
54 char *dest = d_start;
55
56 ASSERT(d_len >= sizeof (bufsiz));
57
58 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
59 d_len - sizeof (bufsiz));
60
61 /* Signal an error if the compression routine returned zero. */
62 if (bufsiz == 0)
63 return (s_len);
64
65 /*
66 * Encode the compresed buffer size at the start. We'll need this in
67 * decompression to counter the effects of padding which might be
68 * added to the compressed buffer and which, if unhandled, would
69 * confuse the hell out of our decompression function.
70 */
71 *(uint32_t *)dest = BE_32(bufsiz);
72
73 return (bufsiz + sizeof (bufsiz));
74 }
75
76 /*ARGSUSED*/
77 int
78 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
79 {
80 const char *src = s_start;
81 uint32_t bufsiz = BE_IN32(src);
82
83 /* invalid compressed buffer size encoded at start */
84 if (bufsiz + sizeof (bufsiz) > s_len)
85 return (1);
86
87 /*
88 * Returns 0 on success (decompression function returned non-negative)
89 * and non-zero on failure (decompression function returned negative.
90 */
91 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
92 d_start, bufsiz, d_len) < 0);
93 }
94
95 /*
96 * LZ4 API Description:
97 *
98 * Simple Functions:
99 * real_LZ4_compress() :
100 * isize : is the input size. Max supported value is ~1.9GB
101 * return : the number of bytes written in buffer dest
102 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
103 * note : destination buffer must be already allocated.
104 * destination buffer must be sized to handle worst cases
105 * situations (input data not compressible) worst case size
106 * evaluation is provided by function LZ4_compressBound().
107 *
108 * Advanced Functions
109 *
110 * LZ4_compressBound() :
111 * Provides the maximum size that LZ4 may output in a "worst case"
112 * scenario (input data not compressible) primarily useful for memory
113 * allocation of output buffer.
114 *
115 * isize : is the input size. Max supported value is ~1.9GB
116 * return : maximum output size in a "worst case" scenario
117 * note : this function is limited by "int" range (2^31-1)
118 *
119 * LZ4_uncompress_unknownOutputSize() :
120 * isize : is the input size, therefore the compressed size
121 * maxOutputSize : is the size of the destination buffer (which must be
122 * already allocated)
123 * return : the number of bytes decoded in the destination buffer
124 * (necessarily <= maxOutputSize). If the source stream is
125 * malformed, the function will stop decoding and return a
126 * negative result, indicating the byte position of the faulty
127 * instruction. This function never writes beyond dest +
128 * maxOutputSize, and is therefore protected against malicious
129 * data packets.
130 * note : Destination buffer must be already allocated.
131 *
132 * LZ4_compressCtx() :
133 * This function explicitly handles the CTX memory structure.
134 *
135 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
136 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
137 * isn't valid.
138 *
139 * LZ4_compress64kCtx() :
140 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
141 * isize *Must* be <64KB, otherwise the output will be corrupted.
142 *
143 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
144 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
145 * isn't valid.
146 */
147
148 /*
149 * Tuning parameters
150 */
151
152 /*
153 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
154 * Lowering this value reduces memory usage. Reduced memory usage
155 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
156 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
157 * (examples : 12 -> 16KB ; 17 -> 512KB)
158 */
159 #define COMPRESSIONLEVEL 12
160
161 /*
162 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
163 * algorithm skip faster data segments considered "incompressible".
164 * This may decrease compression ratio dramatically, but will be
165 * faster on incompressible data. Increasing this value will make
166 * the algorithm search more before declaring a segment "incompressible".
167 * This could improve compression a bit, but will be slower on
168 * incompressible data. The default value (6) is recommended.
169 */
170 #define NOTCOMPRESSIBLE_CONFIRMATION 6
171
172 /*
173 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
174 * performance for big endian cpu, but the resulting compressed stream
175 * will be incompatible with little-endian CPU. You can set this option
176 * to 1 in situations where data will stay within closed environment.
177 * This option is useless on Little_Endian CPU (such as x86).
178 */
179 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
180
181 /*
182 * CPU Feature Detection
183 */
184
185 /* 32 or 64 bits ? */
186 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
187 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
188 defined(__LP64__) || defined(_LP64))
189 #define LZ4_ARCH64 1
190 #else
191 #define LZ4_ARCH64 0
192 #endif
193
194 /*
195 * Limits the amount of stack space that the algorithm may consume to hold
196 * the compression lookup table. The value `9' here means we'll never use
197 * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
198 * If more memory is needed, it is allocated from the heap.
199 */
200 /* FreeBSD: Use heap for all platforms for now */
201 #define STACKLIMIT 0
202
203 /*
204 * Little Endian or Big Endian?
205 * Note: overwrite the below #define if you know your architecture endianess.
206 */
207 #if BYTE_ORDER == BIG_ENDIAN
208 #define LZ4_BIG_ENDIAN 1
209 #else
210 /*
211 * Little Endian assumed. PDP Endian and other very rare endian format
212 * are unsupported.
213 */
214 #endif
215
216 /*
217 * Unaligned memory access is automatically enabled for "common" CPU,
218 * such as x86. For others CPU, the compiler will be more cautious, and
219 * insert extra code to ensure aligned access is respected. If you know
220 * your target CPU supports unaligned memory access, you may want to
221 * force this option manually to improve performance
222 */
223 #if defined(__ARM_FEATURE_UNALIGNED)
224 #define LZ4_FORCE_UNALIGNED_ACCESS 1
225 #endif
226
227 /*
228 * FreeBSD: can't use GCC's __builtin_ctz when using sparc64 because
229 * gcc currently rely on libcompiler_rt.
230 *
231 * TODO: revisit this when situation changes.
232 */
233 #if defined(__sparc64__)
234 #define LZ4_FORCE_SW_BITCOUNT
235 #endif
236
237 /*
238 * Compiler Options
239 */
240 #if __STDC_VERSION__ >= 199901L /* C99 */
241 /* "restrict" is a known keyword */
242 #else
243 /* Disable restrict */
244 #define restrict
245 #endif
246
247 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
248 (((x) & 0xffu) << 8)))
249
250 #define expect(expr, value) (__builtin_expect((expr), (value)))
251
252 #if defined(likely)
253 #undef likely
254 #endif
255 #if defined(unlikely)
256 #undef unlikely
257 #endif
258
259 #define likely(expr) expect((expr) != 0, 1)
260 #define unlikely(expr) expect((expr) != 0, 0)
261
262 /* Basic types */
263 #define BYTE uint8_t
264 #define U16 uint16_t
265 #define U32 uint32_t
266 #define S32 int32_t
267 #define U64 uint64_t
268
269 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
270 #pragma pack(1)
271 #endif
272
273 typedef struct _U16_S {
274 U16 v;
275 } U16_S;
276 typedef struct _U32_S {
277 U32 v;
278 } U32_S;
279 typedef struct _U64_S {
280 U64 v;
281 } U64_S;
282
283 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
284 #pragma pack()
285 #endif
286
287 #define A64(x) (((U64_S *)(x))->v)
288 #define A32(x) (((U32_S *)(x))->v)
289 #define A16(x) (((U16_S *)(x))->v)
290
291 /*
292 * Constants
293 */
294 #define MINMATCH 4
295
296 #define HASH_LOG COMPRESSIONLEVEL
297 #define HASHTABLESIZE (1 << HASH_LOG)
298 #define HASH_MASK (HASHTABLESIZE - 1)
299
300 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
301 NOTCOMPRESSIBLE_CONFIRMATION : 2)
302
303 /*
304 * Defines if memory is allocated into the stack (local variable),
305 * or into the heap (kmem_alloc()).
306 */
307 #define HEAPMODE (HASH_LOG > STACKLIMIT)
308 #define COPYLENGTH 8
309 #define LASTLITERALS 5
310 #define MFLIMIT (COPYLENGTH + MINMATCH)
311 #define MINLENGTH (MFLIMIT + 1)
312
313 #define MAXD_LOG 16
314 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
315
316 #define ML_BITS 4
317 #define ML_MASK ((1U<<ML_BITS)-1)
318 #define RUN_BITS (8-ML_BITS)
319 #define RUN_MASK ((1U<<RUN_BITS)-1)
320
321
322 /*
323 * Architecture-specific macros
324 */
325 #if LZ4_ARCH64
326 #define STEPSIZE 8
327 #define UARCH U64
328 #define AARCH A64
329 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
330 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
331 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
332 #define HTYPE U32
333 #define INITBASE(base) const BYTE* const base = ip
334 #else /* !LZ4_ARCH64 */
335 #define STEPSIZE 4
336 #define UARCH U32
337 #define AARCH A32
338 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
339 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
340 #define LZ4_SECURECOPY LZ4_WILDCOPY
341 #define HTYPE const BYTE *
342 #define INITBASE(base) const int base = 0
343 #endif /* !LZ4_ARCH64 */
344
345 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
346 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
347 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
348 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
349 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
350 #else
351 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
352 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
353 #endif
354
355
356 /* Local structures */
357 struct refTables {
358 HTYPE hashTable[HASHTABLESIZE];
359 };
360
361
362 /* Macros */
363 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
364 HASH_LOG))
365 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
366 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
367 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
368 d = e; }
369
370
371 /* Private functions */
372 #if LZ4_ARCH64
373
374 static inline int
375 LZ4_NbCommonBytes(register U64 val)
376 {
377 #if defined(LZ4_BIG_ENDIAN)
378 #if !defined(LZ4_FORCE_SW_BITCOUNT)
379 return (__builtin_clzll(val) >> 3);
380 #else
381 int r;
382 if (!(val >> 32)) {
383 r = 4;
384 } else {
385 r = 0;
386 val >>= 32;
387 }
388 if (!(val >> 16)) {
389 r += 2;
390 val >>= 8;
391 } else {
392 val >>= 24;
393 }
394 r += (!val);
395 return (r);
396 #endif
397 #else
398 #if !defined(LZ4_FORCE_SW_BITCOUNT)
399 return (__builtin_ctzll(val) >> 3);
400 #else
401 static const int DeBruijnBytePos[64] =
402 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
403 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
404 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
405 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
406 };
407 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
408 58];
409 #endif
410 #endif
411 }
412
413 #else
414
415 static inline int
416 LZ4_NbCommonBytes(register U32 val)
417 {
418 #if defined(LZ4_BIG_ENDIAN)
419 #if !defined(LZ4_FORCE_SW_BITCOUNT)
420 return (__builtin_clz(val) >> 3);
421 #else
422 int r;
423 if (!(val >> 16)) {
424 r = 2;
425 val >>= 8;
426 } else {
427 r = 0;
428 val >>= 24;
429 }
430 r += (!val);
431 return (r);
432 #endif
433 #else
434 #if !defined(LZ4_FORCE_SW_BITCOUNT)
435 return (__builtin_ctz(val) >> 3);
436 #else
437 static const int DeBruijnBytePos[32] = {
438 0, 0, 3, 0, 3, 1, 3, 0,
439 3, 2, 2, 1, 3, 2, 0, 1,
440 3, 3, 1, 2, 2, 2, 2, 0,
441 3, 1, 2, 0, 1, 0, 1, 1
442 };
443 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
444 27];
445 #endif
446 #endif
447 }
448
449 #endif
450
451 /* Public functions */
452
453 static int
454 LZ4_compressBound(int isize)
455 {
456 return (isize + (isize / 255) + 16);
457 }
458
459 /* Compression functions */
460
461 /*ARGSUSED*/
462 static int
463 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
464 int osize)
465 {
466 #if HEAPMODE
467 struct refTables *srt = (struct refTables *)ctx;
468 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
469 #else
470 HTYPE HashTable[HASHTABLESIZE] = { 0 };
471 #endif
472
473 const BYTE *ip = (BYTE *) source;
474 INITBASE(base);
475 const BYTE *anchor = ip;
476 const BYTE *const iend = ip + isize;
477 const BYTE *const oend = (BYTE *) dest + osize;
478 const BYTE *const mflimit = iend - MFLIMIT;
479 #define matchlimit (iend - LASTLITERALS)
480
481 BYTE *op = (BYTE *) dest;
482
483 int len, length;
484 const int skipStrength = SKIPSTRENGTH;
485 U32 forwardH;
486
487
488 /* Init */
489 if (isize < MINLENGTH)
490 goto _last_literals;
491
492 /* First Byte */
493 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
494 ip++;
495 forwardH = LZ4_HASH_VALUE(ip);
496
497 /* Main Loop */
498 for (;;) {
499 int findMatchAttempts = (1U << skipStrength) + 3;
500 const BYTE *forwardIp = ip;
501 const BYTE *ref;
502 BYTE *token;
503
504 /* Find a match */
505 do {
506 U32 h = forwardH;
507 int step = findMatchAttempts++ >> skipStrength;
508 ip = forwardIp;
509 forwardIp = ip + step;
510
511 if unlikely(forwardIp > mflimit) {
512 goto _last_literals;
513 }
514
515 forwardH = LZ4_HASH_VALUE(forwardIp);
516 ref = base + HashTable[h];
517 HashTable[h] = ip - base;
518
519 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
520
521 /* Catch up */
522 while ((ip > anchor) && (ref > (BYTE *) source) &&
523 unlikely(ip[-1] == ref[-1])) {
524 ip--;
525 ref--;
526 }
527
528 /* Encode Literal length */
529 length = ip - anchor;
530 token = op++;
531
532 /* Check output limit */
533 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
534 (length >> 8) > oend)
535 return (0);
536
537 if (length >= (int)RUN_MASK) {
538 *token = (RUN_MASK << ML_BITS);
539 len = length - RUN_MASK;
540 for (; len > 254; len -= 255)
541 *op++ = 255;
542 *op++ = (BYTE)len;
543 } else
544 *token = (length << ML_BITS);
545
546 /* Copy Literals */
547 LZ4_BLINDCOPY(anchor, op, length);
548
549 _next_match:
550 /* Encode Offset */
551 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
552
553 /* Start Counting */
554 ip += MINMATCH;
555 ref += MINMATCH; /* MinMatch verified */
556 anchor = ip;
557 while likely(ip < matchlimit - (STEPSIZE - 1)) {
558 UARCH diff = AARCH(ref) ^ AARCH(ip);
559 if (!diff) {
560 ip += STEPSIZE;
561 ref += STEPSIZE;
562 continue;
563 }
564 ip += LZ4_NbCommonBytes(diff);
565 goto _endCount;
566 }
567 #if LZ4_ARCH64
568 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
569 ip += 4;
570 ref += 4;
571 }
572 #endif
573 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
574 ip += 2;
575 ref += 2;
576 }
577 if ((ip < matchlimit) && (*ref == *ip))
578 ip++;
579 _endCount:
580
581 /* Encode MatchLength */
582 len = (ip - anchor);
583 /* Check output limit */
584 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
585 return (0);
586 if (len >= (int)ML_MASK) {
587 *token += ML_MASK;
588 len -= ML_MASK;
589 for (; len > 509; len -= 510) {
590 *op++ = 255;
591 *op++ = 255;
592 }
593 if (len > 254) {
594 len -= 255;
595 *op++ = 255;
596 }
597 *op++ = (BYTE)len;
598 } else
599 *token += len;
600
601 /* Test end of chunk */
602 if (ip > mflimit) {
603 anchor = ip;
604 break;
605 }
606 /* Fill table */
607 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
608
609 /* Test next position */
610 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
611 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
612 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
613 token = op++;
614 *token = 0;
615 goto _next_match;
616 }
617 /* Prepare next loop */
618 anchor = ip++;
619 forwardH = LZ4_HASH_VALUE(ip);
620 }
621
622 _last_literals:
623 /* Encode Last Literals */
624 {
625 int lastRun = iend - anchor;
626 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
627 oend)
628 return (0);
629 if (lastRun >= (int)RUN_MASK) {
630 *op++ = (RUN_MASK << ML_BITS);
631 lastRun -= RUN_MASK;
632 for (; lastRun > 254; lastRun -= 255) {
633 *op++ = 255;
634 }
635 *op++ = (BYTE)lastRun;
636 } else
637 *op++ = (lastRun << ML_BITS);
638 (void) memcpy(op, anchor, iend - anchor);
639 op += iend - anchor;
640 }
641
642 /* End */
643 return (int)(((char *)op) - dest);
644 }
645
646
647
648 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
649 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
650 #define HASHLOG64K (HASH_LOG + 1)
651 #define HASH64KTABLESIZE (1U << HASHLOG64K)
652 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
653 HASHLOG64K))
654 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
655
656 /*ARGSUSED*/
657 static int
658 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
659 int osize)
660 {
661 #if HEAPMODE
662 struct refTables *srt = (struct refTables *)ctx;
663 U16 *HashTable = (U16 *) (srt->hashTable);
664 #else
665 U16 HashTable[HASH64KTABLESIZE] = { 0 };
666 #endif
667
668 const BYTE *ip = (BYTE *) source;
669 const BYTE *anchor = ip;
670 const BYTE *const base = ip;
671 const BYTE *const iend = ip + isize;
672 const BYTE *const oend = (BYTE *) dest + osize;
673 const BYTE *const mflimit = iend - MFLIMIT;
674 #define matchlimit (iend - LASTLITERALS)
675
676 BYTE *op = (BYTE *) dest;
677
678 int len, length;
679 const int skipStrength = SKIPSTRENGTH;
680 U32 forwardH;
681
682 /* Init */
683 if (isize < MINLENGTH)
684 goto _last_literals;
685
686 /* First Byte */
687 ip++;
688 forwardH = LZ4_HASH64K_VALUE(ip);
689
690 /* Main Loop */
691 for (;;) {
692 int findMatchAttempts = (1U << skipStrength) + 3;
693 const BYTE *forwardIp = ip;
694 const BYTE *ref;
695 BYTE *token;
696
697 /* Find a match */
698 do {
699 U32 h = forwardH;
700 int step = findMatchAttempts++ >> skipStrength;
701 ip = forwardIp;
702 forwardIp = ip + step;
703
704 if (forwardIp > mflimit) {
705 goto _last_literals;
706 }
707
708 forwardH = LZ4_HASH64K_VALUE(forwardIp);
709 ref = base + HashTable[h];
710 HashTable[h] = ip - base;
711
712 } while (A32(ref) != A32(ip));
713
714 /* Catch up */
715 while ((ip > anchor) && (ref > (BYTE *) source) &&
716 (ip[-1] == ref[-1])) {
717 ip--;
718 ref--;
719 }
720
721 /* Encode Literal length */
722 length = ip - anchor;
723 token = op++;
724
725 /* Check output limit */
726 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
727 (length >> 8) > oend)
728 return (0);
729
730 if (length >= (int)RUN_MASK) {
731 *token = (RUN_MASK << ML_BITS);
732 len = length - RUN_MASK;
733 for (; len > 254; len -= 255)
734 *op++ = 255;
735 *op++ = (BYTE)len;
736 } else
737 *token = (length << ML_BITS);
738
739 /* Copy Literals */
740 LZ4_BLINDCOPY(anchor, op, length);
741
742 _next_match:
743 /* Encode Offset */
744 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
745
746 /* Start Counting */
747 ip += MINMATCH;
748 ref += MINMATCH; /* MinMatch verified */
749 anchor = ip;
750 while (ip < matchlimit - (STEPSIZE - 1)) {
751 UARCH diff = AARCH(ref) ^ AARCH(ip);
752 if (!diff) {
753 ip += STEPSIZE;
754 ref += STEPSIZE;
755 continue;
756 }
757 ip += LZ4_NbCommonBytes(diff);
758 goto _endCount;
759 }
760 #if LZ4_ARCH64
761 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
762 ip += 4;
763 ref += 4;
764 }
765 #endif
766 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
767 ip += 2;
768 ref += 2;
769 }
770 if ((ip < matchlimit) && (*ref == *ip))
771 ip++;
772 _endCount:
773
774 /* Encode MatchLength */
775 len = (ip - anchor);
776 /* Check output limit */
777 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
778 return (0);
779 if (len >= (int)ML_MASK) {
780 *token += ML_MASK;
781 len -= ML_MASK;
782 for (; len > 509; len -= 510) {
783 *op++ = 255;
784 *op++ = 255;
785 }
786 if (len > 254) {
787 len -= 255;
788 *op++ = 255;
789 }
790 *op++ = (BYTE)len;
791 } else
792 *token += len;
793
794 /* Test end of chunk */
795 if (ip > mflimit) {
796 anchor = ip;
797 break;
798 }
799 /* Fill table */
800 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
801
802 /* Test next position */
803 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
804 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
805 if (A32(ref) == A32(ip)) {
806 token = op++;
807 *token = 0;
808 goto _next_match;
809 }
810 /* Prepare next loop */
811 anchor = ip++;
812 forwardH = LZ4_HASH64K_VALUE(ip);
813 }
814
815 _last_literals:
816 /* Encode Last Literals */
817 {
818 int lastRun = iend - anchor;
819 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
820 oend)
821 return (0);
822 if (lastRun >= (int)RUN_MASK) {
823 *op++ = (RUN_MASK << ML_BITS);
824 lastRun -= RUN_MASK;
825 for (; lastRun > 254; lastRun -= 255)
826 *op++ = 255;
827 *op++ = (BYTE)lastRun;
828 } else
829 *op++ = (lastRun << ML_BITS);
830 (void) memcpy(op, anchor, iend - anchor);
831 op += iend - anchor;
832 }
833
834 /* End */
835 return (int)(((char *)op) - dest);
836 }
837
838 static int
839 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
840 {
841 #if HEAPMODE
842 void *ctx = kmem_cache_alloc(lz4_ctx_cache, KM_NOSLEEP);
843 int result;
844
845 /*
846 * out of kernel memory, gently fall through - this will disable
847 * compression in zio_compress_data
848 */
849 if (ctx == NULL)
850 return (0);
851
852 bzero(ctx, sizeof(struct refTables));
853 if (isize < LZ4_64KLIMIT)
854 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
855 else
856 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
857
858 kmem_cache_free(lz4_ctx_cache, ctx);
859 return (result);
860 #else
861 if (isize < (int)LZ4_64KLIMIT)
862 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
863 return (LZ4_compressCtx(NULL, source, dest, isize, osize));
864 #endif
865 }
866
867 /* Decompression functions */
868
869 /*
870 * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe
871 * against "buffer overflow" attack type. They will never write nor
872 * read outside of the provided output buffers.
873 * LZ4_uncompress_unknownOutputSize() also insures that it will never
874 * read outside of the input buffer. A corrupted input will produce
875 * an error result, a negative int, indicating the position of the
876 * error within input stream.
877 */
878
879 static int
880 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
881 int maxOutputSize)
882 {
883 /* Local Variables */
884 const BYTE *restrict ip = (const BYTE *) source;
885 const BYTE *const iend = ip + isize;
886 const BYTE *ref;
887
888 BYTE *op = (BYTE *) dest;
889 BYTE *const oend = op + maxOutputSize;
890 BYTE *cpy;
891
892 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
893 #if LZ4_ARCH64
894 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
895 #endif
896
897 /* Main Loop */
898 while (ip < iend) {
899 unsigned token;
900 size_t length;
901
902 /* get runlength */
903 token = *ip++;
904 if ((length = (token >> ML_BITS)) == RUN_MASK) {
905 int s = 255;
906 while ((ip < iend) && (s == 255)) {
907 s = *ip++;
908 length += s;
909 }
910 }
911 /* copy literals */
912 cpy = op + length;
913 /* CORNER-CASE: cpy might overflow. */
914 if (cpy < op)
915 goto _output_error; /* cpy was overflowed, bail! */
916 if ((cpy > oend - COPYLENGTH) ||
917 (ip + length > iend - COPYLENGTH)) {
918 if (cpy > oend)
919 /* Error: writes beyond output buffer */
920 goto _output_error;
921 if (ip + length != iend)
922 /*
923 * Error: LZ4 format requires to consume all
924 * input at this stage
925 */
926 goto _output_error;
927 (void) memcpy(op, ip, length);
928 op += length;
929 /* Necessarily EOF, due to parsing restrictions */
930 break;
931 }
932 LZ4_WILDCOPY(ip, op, cpy);
933 ip -= (op - cpy);
934 op = cpy;
935
936 /* get offset */
937 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
938 ip += 2;
939 if (ref < (BYTE * const) dest)
940 /*
941 * Error: offset creates reference outside of
942 * destination buffer
943 */
944 goto _output_error;
945
946 /* get matchlength */
947 if ((length = (token & ML_MASK)) == ML_MASK) {
948 while (ip < iend) {
949 int s = *ip++;
950 length += s;
951 if (s == 255)
952 continue;
953 break;
954 }
955 }
956 /* copy repeated sequence */
957 if unlikely(op - ref < STEPSIZE) {
958 #if LZ4_ARCH64
959 size_t dec64 = dec64table[op-ref];
960 #else
961 const int dec64 = 0;
962 #endif
963 op[0] = ref[0];
964 op[1] = ref[1];
965 op[2] = ref[2];
966 op[3] = ref[3];
967 op += 4;
968 ref += 4;
969 ref -= dec32table[op-ref];
970 A32(op) = A32(ref);
971 op += STEPSIZE - 4;
972 ref -= dec64;
973 } else {
974 LZ4_COPYSTEP(ref, op);
975 }
976 cpy = op + length - (STEPSIZE - 4);
977 if (cpy > oend - COPYLENGTH) {
978 if (cpy > oend)
979 /*
980 * Error: request to write outside of
981 * destination buffer
982 */
983 goto _output_error;
984 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
985 while (op < cpy)
986 *op++ = *ref++;
987 op = cpy;
988 if (op == oend)
989 /*
990 * Check EOF (should never happen, since
991 * last 5 bytes are supposed to be literals)
992 */
993 goto _output_error;
994 continue;
995 }
996 LZ4_SECURECOPY(ref, op, cpy);
997 op = cpy; /* correction */
998 }
999
1000 /* end of decoding */
1001 return (int)(((char *)op) - dest);
1002
1003 /* write overflow error detected */
1004 _output_error:
1005 return (int)(-(((char *)ip) - source));
1006 }
1007
1008 extern void
1009 lz4_init(void)
1010 {
1011
1012 #if HEAPMODE
1013 lz4_ctx_cache = kmem_cache_create("lz4_ctx", sizeof(struct refTables),
1014 0, NULL, NULL, NULL, NULL, NULL, 0);
1015 #endif
1016 }
1017
1018 extern void
1019 lz4_fini(void)
1020 {
1021
1022 #if HEAPMODE
1023 kmem_cache_destroy(lz4_ctx_cache);
1024 #endif
1025 }
1026