Home | History | Annotate | Line # | Download | only in linux
      1 /*
      2  * xxHash - Extremely Fast Hash algorithm
      3  * Copyright (C) 2012-2016, Yann Collet.
      4  *
      5  * BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php)
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions are
      9  * met:
     10  *
     11  *   * Redistributions of source code must retain the above copyright
     12  *     notice, this list of conditions and the following disclaimer.
     13  *   * Redistributions in binary form must reproduce the above
     14  *     copyright notice, this list of conditions and the following disclaimer
     15  *     in the documentation and/or other materials provided with the
     16  *     distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29  *
     30  * This program is free software; you can redistribute it and/or modify it under
     31  * the terms of the GNU General Public License version 2 as published by the
     32  * Free Software Foundation. This program is dual-licensed; you may select
     33  * either version 2 of the GNU General Public License ("GPL") or BSD license
     34  * ("BSD").
     35  *
     36  * You can contact the author at:
     37  * - xxHash homepage: https://cyan4973.github.io/xxHash/
     38  * - xxHash source repository: https://github.com/Cyan4973/xxHash
     39  */
     40 
     41 /*
     42  * Notice extracted from xxHash homepage:
     43  *
     44  * xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
     45  * It also successfully passes all tests from the SMHasher suite.
     46  *
     47  * Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2
     48  * Duo @3GHz)
     49  *
     50  * Name            Speed       Q.Score   Author
     51  * xxHash          5.4 GB/s     10
     52  * CrapWow         3.2 GB/s      2       Andrew
     53  * MumurHash 3a    2.7 GB/s     10       Austin Appleby
     54  * SpookyHash      2.0 GB/s     10       Bob Jenkins
     55  * SBox            1.4 GB/s      9       Bret Mulvey
     56  * Lookup3         1.2 GB/s      9       Bob Jenkins
     57  * SuperFastHash   1.2 GB/s      1       Paul Hsieh
     58  * CityHash64      1.05 GB/s    10       Pike & Alakuijala
     59  * FNV             0.55 GB/s     5       Fowler, Noll, Vo
     60  * CRC32           0.43 GB/s     9
     61  * MD5-32          0.33 GB/s    10       Ronald L. Rivest
     62  * SHA1-32         0.28 GB/s    10
     63  *
     64  * Q.Score is a measure of quality of the hash function.
     65  * It depends on successfully passing SMHasher test set.
     66  * 10 is a perfect score.
     67  *
     68  * A 64-bits version, named xxh64 offers much better speed,
     69  * but for 64-bits applications only.
     70  * Name     Speed on 64 bits    Speed on 32 bits
     71  * xxh64       13.8 GB/s            1.9 GB/s
     72  * xxh32        6.8 GB/s            6.0 GB/s
     73  */
     74 
     75 #ifndef XXHASH_H
     76 #define XXHASH_H
     77 
     78 #include <linux/types.h>
     79 
     80 #define XXH_API static inline __attribute__((unused))
     81 /*-****************************
     82  * Simple Hash Functions
     83  *****************************/
     84 
     85 /**
     86  * xxh32() - calculate the 32-bit hash of the input with a given seed.
     87  *
     88  * @input:  The data to hash.
     89  * @length: The length of the data to hash.
     90  * @seed:   The seed can be used to alter the result predictably.
     91  *
     92  * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
     93  *
     94  * Return:  The 32-bit hash of the data.
     95  */
     96 XXH_API uint32_t xxh32(const void *input, size_t length, uint32_t seed);
     97 
     98 /**
     99  * xxh64() - calculate the 64-bit hash of the input with a given seed.
    100  *
    101  * @input:  The data to hash.
    102  * @length: The length of the data to hash.
    103  * @seed:   The seed can be used to alter the result predictably.
    104  *
    105  * This function runs 2x faster on 64-bit systems, but slower on 32-bit systems.
    106  *
    107  * Return:  The 64-bit hash of the data.
    108  */
    109 XXH_API uint64_t xxh64(const void *input, size_t length, uint64_t seed);
    110 
    111 /**
    112  * xxhash() - calculate wordsize hash of the input with a given seed
    113  * @input:  The data to hash.
    114  * @length: The length of the data to hash.
    115  * @seed:   The seed can be used to alter the result predictably.
    116  *
    117  * If the hash does not need to be comparable between machines with
    118  * different word sizes, this function will call whichever of xxh32()
    119  * or xxh64() is faster.
    120  *
    121  * Return:  wordsize hash of the data.
    122  */
    123 
    124 static inline unsigned long xxhash(const void *input, size_t length,
    125 				   uint64_t seed)
    126 {
    127 	if (sizeof(size_t) == 8)
    128 		return xxh64(input, length, seed);
    129 	else
    130 		return xxh32(input, length, seed);
    131 }
    132 
    133 /*-****************************
    134  * Streaming Hash Functions
    135  *****************************/
    136 
    137 /*
    138  * These definitions are only meant to allow allocation of XXH state
    139  * statically, on stack, or in a struct for example.
    140  * Do not use members directly.
    141  */
    142 
    143 /**
    144  * struct xxh32_state - private xxh32 state, do not use members directly
    145  */
    146 struct xxh32_state {
    147 	uint32_t total_len_32;
    148 	uint32_t large_len;
    149 	uint32_t v1;
    150 	uint32_t v2;
    151 	uint32_t v3;
    152 	uint32_t v4;
    153 	uint32_t mem32[4];
    154 	uint32_t memsize;
    155 };
    156 
    157 /**
    158  * struct xxh32_state - private xxh64 state, do not use members directly
    159  */
    160 struct xxh64_state {
    161 	uint64_t total_len;
    162 	uint64_t v1;
    163 	uint64_t v2;
    164 	uint64_t v3;
    165 	uint64_t v4;
    166 	uint64_t mem64[4];
    167 	uint32_t memsize;
    168 };
    169 
    170 /**
    171  * xxh32_reset() - reset the xxh32 state to start a new hashing operation
    172  *
    173  * @state: The xxh32 state to reset.
    174  * @seed:  Initialize the hash state with this seed.
    175  *
    176  * Call this function on any xxh32_state to prepare for a new hashing operation.
    177  */
    178 XXH_API void xxh32_reset(struct xxh32_state *state, uint32_t seed);
    179 
    180 /**
    181  * xxh32_update() - hash the data given and update the xxh32 state
    182  *
    183  * @state:  The xxh32 state to update.
    184  * @input:  The data to hash.
    185  * @length: The length of the data to hash.
    186  *
    187  * After calling xxh32_reset() call xxh32_update() as many times as necessary.
    188  *
    189  * Return:  Zero on success, otherwise an error code.
    190  */
    191 XXH_API int xxh32_update(struct xxh32_state *state, const void *input, size_t length);
    192 
    193 /**
    194  * xxh32_digest() - produce the current xxh32 hash
    195  *
    196  * @state: Produce the current xxh32 hash of this state.
    197  *
    198  * A hash value can be produced at any time. It is still possible to continue
    199  * inserting input into the hash state after a call to xxh32_digest(), and
    200  * generate new hashes later on, by calling xxh32_digest() again.
    201  *
    202  * Return: The xxh32 hash stored in the state.
    203  */
    204 XXH_API uint32_t xxh32_digest(const struct xxh32_state *state);
    205 
    206 /**
    207  * xxh64_reset() - reset the xxh64 state to start a new hashing operation
    208  *
    209  * @state: The xxh64 state to reset.
    210  * @seed:  Initialize the hash state with this seed.
    211  */
    212 XXH_API void xxh64_reset(struct xxh64_state *state, uint64_t seed);
    213 
    214 /**
    215  * xxh64_update() - hash the data given and update the xxh64 state
    216  * @state:  The xxh64 state to update.
    217  * @input:  The data to hash.
    218  * @length: The length of the data to hash.
    219  *
    220  * After calling xxh64_reset() call xxh64_update() as many times as necessary.
    221  *
    222  * Return:  Zero on success, otherwise an error code.
    223  */
    224 XXH_API int xxh64_update(struct xxh64_state *state, const void *input, size_t length);
    225 
    226 /**
    227  * xxh64_digest() - produce the current xxh64 hash
    228  *
    229  * @state: Produce the current xxh64 hash of this state.
    230  *
    231  * A hash value can be produced at any time. It is still possible to continue
    232  * inserting input into the hash state after a call to xxh64_digest(), and
    233  * generate new hashes later on, by calling xxh64_digest() again.
    234  *
    235  * Return: The xxh64 hash stored in the state.
    236  */
    237 XXH_API uint64_t xxh64_digest(const struct xxh64_state *state);
    238 
    239 /*-**************************
    240  * Utils
    241  ***************************/
    242 
    243 /**
    244  * xxh32_copy_state() - copy the source state into the destination state
    245  *
    246  * @src: The source xxh32 state.
    247  * @dst: The destination xxh32 state.
    248  */
    249 XXH_API void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src);
    250 
    251 /**
    252  * xxh64_copy_state() - copy the source state into the destination state
    253  *
    254  * @src: The source xxh64 state.
    255  * @dst: The destination xxh64 state.
    256  */
    257 XXH_API void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src);
    258 
    259 /*
    260  * xxHash - Extremely Fast Hash algorithm
    261  * Copyright (C) 2012-2016, Yann Collet.
    262  *
    263  * BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php)
    264  *
    265  * Redistribution and use in source and binary forms, with or without
    266  * modification, are permitted provided that the following conditions are
    267  * met:
    268  *
    269  *   * Redistributions of source code must retain the above copyright
    270  *     notice, this list of conditions and the following disclaimer.
    271  *   * Redistributions in binary form must reproduce the above
    272  *     copyright notice, this list of conditions and the following disclaimer
    273  *     in the documentation and/or other materials provided with the
    274  *     distribution.
    275  *
    276  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    277  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    278  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    279  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    280  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    281  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    282  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    283  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    284  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    285  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    286  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    287  *
    288  * This program is free software; you can redistribute it and/or modify it under
    289  * the terms of the GNU General Public License version 2 as published by the
    290  * Free Software Foundation. This program is dual-licensed; you may select
    291  * either version 2 of the GNU General Public License ("GPL") or BSD license
    292  * ("BSD").
    293  *
    294  * You can contact the author at:
    295  * - xxHash homepage: https://cyan4973.github.io/xxHash/
    296  * - xxHash source repository: https://github.com/Cyan4973/xxHash
    297  */
    298 
    299 #include <asm/unaligned.h>
    300 #include <linux/errno.h>
    301 #include <linux/kernel.h>
    302 #include <linux/module.h>
    303 #include <linux/xxhash.h>
    304 
    305 /*-*************************************
    306  * Macros
    307  **************************************/
    308 #define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
    309 #define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r)))
    310 
    311 #ifdef __LITTLE_ENDIAN
    312 # define XXH_CPU_LITTLE_ENDIAN 1
    313 #else
    314 # define XXH_CPU_LITTLE_ENDIAN 0
    315 #endif
    316 
    317 /*-*************************************
    318  * Constants
    319  **************************************/
    320 static const uint32_t PRIME32_1 = 2654435761U;
    321 static const uint32_t PRIME32_2 = 2246822519U;
    322 static const uint32_t PRIME32_3 = 3266489917U;
    323 static const uint32_t PRIME32_4 =  668265263U;
    324 static const uint32_t PRIME32_5 =  374761393U;
    325 
    326 static const uint64_t PRIME64_1 = 11400714785074694791ULL;
    327 static const uint64_t PRIME64_2 = 14029467366897019727ULL;
    328 static const uint64_t PRIME64_3 =  1609587929392839161ULL;
    329 static const uint64_t PRIME64_4 =  9650029242287828579ULL;
    330 static const uint64_t PRIME64_5 =  2870177450012600261ULL;
    331 
    332 /*-**************************
    333  *  Utils
    334  ***************************/
    335 XXH_API void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src)
    336 {
    337 	__builtin_memcpy(dst, src, sizeof(*dst));
    338 }
    339 
    340 XXH_API void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src)
    341 {
    342 	__builtin_memcpy(dst, src, sizeof(*dst));
    343 }
    344 
    345 /*-***************************
    346  * Simple Hash Functions
    347  ****************************/
    348 static uint32_t xxh32_round(uint32_t seed, const uint32_t input)
    349 {
    350 	seed += input * PRIME32_2;
    351 	seed = xxh_rotl32(seed, 13);
    352 	seed *= PRIME32_1;
    353 	return seed;
    354 }
    355 
    356 XXH_API uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
    357 {
    358 	const uint8_t *p = (const uint8_t *)input;
    359 	const uint8_t *b_end = p + len;
    360 	uint32_t h32;
    361 
    362 	if (len >= 16) {
    363 		const uint8_t *const limit = b_end - 16;
    364 		uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
    365 		uint32_t v2 = seed + PRIME32_2;
    366 		uint32_t v3 = seed + 0;
    367 		uint32_t v4 = seed - PRIME32_1;
    368 
    369 		do {
    370 			v1 = xxh32_round(v1, get_unaligned_le32(p));
    371 			p += 4;
    372 			v2 = xxh32_round(v2, get_unaligned_le32(p));
    373 			p += 4;
    374 			v3 = xxh32_round(v3, get_unaligned_le32(p));
    375 			p += 4;
    376 			v4 = xxh32_round(v4, get_unaligned_le32(p));
    377 			p += 4;
    378 		} while (p <= limit);
    379 
    380 		h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) +
    381 			xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18);
    382 	} else {
    383 		h32 = seed + PRIME32_5;
    384 	}
    385 
    386 	h32 += (uint32_t)len;
    387 
    388 	while (p + 4 <= b_end) {
    389 		h32 += get_unaligned_le32(p) * PRIME32_3;
    390 		h32 = xxh_rotl32(h32, 17) * PRIME32_4;
    391 		p += 4;
    392 	}
    393 
    394 	while (p < b_end) {
    395 		h32 += (*p) * PRIME32_5;
    396 		h32 = xxh_rotl32(h32, 11) * PRIME32_1;
    397 		p++;
    398 	}
    399 
    400 	h32 ^= h32 >> 15;
    401 	h32 *= PRIME32_2;
    402 	h32 ^= h32 >> 13;
    403 	h32 *= PRIME32_3;
    404 	h32 ^= h32 >> 16;
    405 
    406 	return h32;
    407 }
    408 
    409 static uint64_t xxh64_round(uint64_t acc, const uint64_t input)
    410 {
    411 	acc += input * PRIME64_2;
    412 	acc = xxh_rotl64(acc, 31);
    413 	acc *= PRIME64_1;
    414 	return acc;
    415 }
    416 
    417 static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val)
    418 {
    419 	val = xxh64_round(0, val);
    420 	acc ^= val;
    421 	acc = acc * PRIME64_1 + PRIME64_4;
    422 	return acc;
    423 }
    424 
    425 XXH_API uint64_t xxh64(const void *input, const size_t len, const uint64_t seed)
    426 {
    427 	const uint8_t *p = (const uint8_t *)input;
    428 	const uint8_t *const b_end = p + len;
    429 	uint64_t h64;
    430 
    431 	if (len >= 32) {
    432 		const uint8_t *const limit = b_end - 32;
    433 		uint64_t v1 = seed + PRIME64_1 + PRIME64_2;
    434 		uint64_t v2 = seed + PRIME64_2;
    435 		uint64_t v3 = seed + 0;
    436 		uint64_t v4 = seed - PRIME64_1;
    437 
    438 		do {
    439 			v1 = xxh64_round(v1, get_unaligned_le64(p));
    440 			p += 8;
    441 			v2 = xxh64_round(v2, get_unaligned_le64(p));
    442 			p += 8;
    443 			v3 = xxh64_round(v3, get_unaligned_le64(p));
    444 			p += 8;
    445 			v4 = xxh64_round(v4, get_unaligned_le64(p));
    446 			p += 8;
    447 		} while (p <= limit);
    448 
    449 		h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
    450 			xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
    451 		h64 = xxh64_merge_round(h64, v1);
    452 		h64 = xxh64_merge_round(h64, v2);
    453 		h64 = xxh64_merge_round(h64, v3);
    454 		h64 = xxh64_merge_round(h64, v4);
    455 
    456 	} else {
    457 		h64  = seed + PRIME64_5;
    458 	}
    459 
    460 	h64 += (uint64_t)len;
    461 
    462 	while (p + 8 <= b_end) {
    463 		const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
    464 
    465 		h64 ^= k1;
    466 		h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
    467 		p += 8;
    468 	}
    469 
    470 	if (p + 4 <= b_end) {
    471 		h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
    472 		h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
    473 		p += 4;
    474 	}
    475 
    476 	while (p < b_end) {
    477 		h64 ^= (*p) * PRIME64_5;
    478 		h64 = xxh_rotl64(h64, 11) * PRIME64_1;
    479 		p++;
    480 	}
    481 
    482 	h64 ^= h64 >> 33;
    483 	h64 *= PRIME64_2;
    484 	h64 ^= h64 >> 29;
    485 	h64 *= PRIME64_3;
    486 	h64 ^= h64 >> 32;
    487 
    488 	return h64;
    489 }
    490 
    491 /*-**************************************************
    492  * Advanced Hash Functions
    493  ***************************************************/
    494 XXH_API void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed)
    495 {
    496 	/* use a local state for memcpy() to avoid strict-aliasing warnings */
    497 	struct xxh32_state state;
    498 
    499 	__builtin_memset(&state, 0, sizeof(state));
    500 	state.v1 = seed + PRIME32_1 + PRIME32_2;
    501 	state.v2 = seed + PRIME32_2;
    502 	state.v3 = seed + 0;
    503 	state.v4 = seed - PRIME32_1;
    504 	__builtin_memcpy(statePtr, &state, sizeof(state));
    505 }
    506 
    507 XXH_API void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)
    508 {
    509 	/* use a local state for memcpy() to avoid strict-aliasing warnings */
    510 	struct xxh64_state state;
    511 
    512 	__builtin_memset(&state, 0, sizeof(state));
    513 	state.v1 = seed + PRIME64_1 + PRIME64_2;
    514 	state.v2 = seed + PRIME64_2;
    515 	state.v3 = seed + 0;
    516 	state.v4 = seed - PRIME64_1;
    517 	__builtin_memcpy(statePtr, &state, sizeof(state));
    518 }
    519 
    520 XXH_API int xxh32_update(struct xxh32_state *state, const void *input, const size_t len)
    521 {
    522 	const uint8_t *p = (const uint8_t *)input;
    523 	const uint8_t *const b_end = p + len;
    524 
    525 	if (input == NULL)
    526 		return -EINVAL;
    527 
    528 	state->total_len_32 += (uint32_t)len;
    529 	state->large_len |= (len >= 16) | (state->total_len_32 >= 16);
    530 
    531 	if (state->memsize + len < 16) { /* fill in tmp buffer */
    532 		__builtin_memcpy((uint8_t *)(state->mem32) + state->memsize, input, len);
    533 		state->memsize += (uint32_t)len;
    534 		return 0;
    535 	}
    536 
    537 	if (state->memsize) { /* some data left from previous update */
    538 		const uint32_t *p32 = state->mem32;
    539 
    540 		__builtin_memcpy((uint8_t *)(state->mem32) + state->memsize, input,
    541 			16 - state->memsize);
    542 
    543 		state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32));
    544 		p32++;
    545 		state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32));
    546 		p32++;
    547 		state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32));
    548 		p32++;
    549 		state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32));
    550 		p32++;
    551 
    552 		p += 16-state->memsize;
    553 		state->memsize = 0;
    554 	}
    555 
    556 	if (p <= b_end - 16) {
    557 		const uint8_t *const limit = b_end - 16;
    558 		uint32_t v1 = state->v1;
    559 		uint32_t v2 = state->v2;
    560 		uint32_t v3 = state->v3;
    561 		uint32_t v4 = state->v4;
    562 
    563 		do {
    564 			v1 = xxh32_round(v1, get_unaligned_le32(p));
    565 			p += 4;
    566 			v2 = xxh32_round(v2, get_unaligned_le32(p));
    567 			p += 4;
    568 			v3 = xxh32_round(v3, get_unaligned_le32(p));
    569 			p += 4;
    570 			v4 = xxh32_round(v4, get_unaligned_le32(p));
    571 			p += 4;
    572 		} while (p <= limit);
    573 
    574 		state->v1 = v1;
    575 		state->v2 = v2;
    576 		state->v3 = v3;
    577 		state->v4 = v4;
    578 	}
    579 
    580 	if (p < b_end) {
    581 		__builtin_memcpy(state->mem32, p, (size_t)(b_end-p));
    582 		state->memsize = (uint32_t)(b_end-p);
    583 	}
    584 
    585 	return 0;
    586 }
    587 
    588 XXH_API uint32_t xxh32_digest(const struct xxh32_state *state)
    589 {
    590 	const uint8_t *p = (const uint8_t *)state->mem32;
    591 	const uint8_t *const b_end = (const uint8_t *)(state->mem32) +
    592 		state->memsize;
    593 	uint32_t h32;
    594 
    595 	if (state->large_len) {
    596 		h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) +
    597 			xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18);
    598 	} else {
    599 		h32 = state->v3 /* == seed */ + PRIME32_5;
    600 	}
    601 
    602 	h32 += state->total_len_32;
    603 
    604 	while (p + 4 <= b_end) {
    605 		h32 += get_unaligned_le32(p) * PRIME32_3;
    606 		h32 = xxh_rotl32(h32, 17) * PRIME32_4;
    607 		p += 4;
    608 	}
    609 
    610 	while (p < b_end) {
    611 		h32 += (*p) * PRIME32_5;
    612 		h32 = xxh_rotl32(h32, 11) * PRIME32_1;
    613 		p++;
    614 	}
    615 
    616 	h32 ^= h32 >> 15;
    617 	h32 *= PRIME32_2;
    618 	h32 ^= h32 >> 13;
    619 	h32 *= PRIME32_3;
    620 	h32 ^= h32 >> 16;
    621 
    622 	return h32;
    623 }
    624 
    625 XXH_API int xxh64_update(struct xxh64_state *state, const void *input, const size_t len)
    626 {
    627 	const uint8_t *p = (const uint8_t *)input;
    628 	const uint8_t *const b_end = p + len;
    629 
    630 	if (input == NULL)
    631 		return -EINVAL;
    632 
    633 	state->total_len += len;
    634 
    635 	if (state->memsize + len < 32) { /* fill in tmp buffer */
    636 		__builtin_memcpy(((uint8_t *)state->mem64) + state->memsize, input, len);
    637 		state->memsize += (uint32_t)len;
    638 		return 0;
    639 	}
    640 
    641 	if (state->memsize) { /* tmp buffer is full */
    642 		uint64_t *p64 = state->mem64;
    643 
    644 		__builtin_memcpy(((uint8_t *)p64) + state->memsize, input,
    645 			32 - state->memsize);
    646 
    647 		state->v1 = xxh64_round(state->v1, get_unaligned_le64(p64));
    648 		p64++;
    649 		state->v2 = xxh64_round(state->v2, get_unaligned_le64(p64));
    650 		p64++;
    651 		state->v3 = xxh64_round(state->v3, get_unaligned_le64(p64));
    652 		p64++;
    653 		state->v4 = xxh64_round(state->v4, get_unaligned_le64(p64));
    654 
    655 		p += 32 - state->memsize;
    656 		state->memsize = 0;
    657 	}
    658 
    659 	if (p + 32 <= b_end) {
    660 		const uint8_t *const limit = b_end - 32;
    661 		uint64_t v1 = state->v1;
    662 		uint64_t v2 = state->v2;
    663 		uint64_t v3 = state->v3;
    664 		uint64_t v4 = state->v4;
    665 
    666 		do {
    667 			v1 = xxh64_round(v1, get_unaligned_le64(p));
    668 			p += 8;
    669 			v2 = xxh64_round(v2, get_unaligned_le64(p));
    670 			p += 8;
    671 			v3 = xxh64_round(v3, get_unaligned_le64(p));
    672 			p += 8;
    673 			v4 = xxh64_round(v4, get_unaligned_le64(p));
    674 			p += 8;
    675 		} while (p <= limit);
    676 
    677 		state->v1 = v1;
    678 		state->v2 = v2;
    679 		state->v3 = v3;
    680 		state->v4 = v4;
    681 	}
    682 
    683 	if (p < b_end) {
    684 		__builtin_memcpy(state->mem64, p, (size_t)(b_end-p));
    685 		state->memsize = (uint32_t)(b_end - p);
    686 	}
    687 
    688 	return 0;
    689 }
    690 
    691 XXH_API uint64_t xxh64_digest(const struct xxh64_state *state)
    692 {
    693 	const uint8_t *p = (const uint8_t *)state->mem64;
    694 	const uint8_t *const b_end = (const uint8_t *)state->mem64 +
    695 		state->memsize;
    696 	uint64_t h64;
    697 
    698 	if (state->total_len >= 32) {
    699 		const uint64_t v1 = state->v1;
    700 		const uint64_t v2 = state->v2;
    701 		const uint64_t v3 = state->v3;
    702 		const uint64_t v4 = state->v4;
    703 
    704 		h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
    705 			xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
    706 		h64 = xxh64_merge_round(h64, v1);
    707 		h64 = xxh64_merge_round(h64, v2);
    708 		h64 = xxh64_merge_round(h64, v3);
    709 		h64 = xxh64_merge_round(h64, v4);
    710 	} else {
    711 		h64  = state->v3 + PRIME64_5;
    712 	}
    713 
    714 	h64 += (uint64_t)state->total_len;
    715 
    716 	while (p + 8 <= b_end) {
    717 		const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
    718 
    719 		h64 ^= k1;
    720 		h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
    721 		p += 8;
    722 	}
    723 
    724 	if (p + 4 <= b_end) {
    725 		h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
    726 		h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
    727 		p += 4;
    728 	}
    729 
    730 	while (p < b_end) {
    731 		h64 ^= (*p) * PRIME64_5;
    732 		h64 = xxh_rotl64(h64, 11) * PRIME64_1;
    733 		p++;
    734 	}
    735 
    736 	h64 ^= h64 >> 33;
    737 	h64 *= PRIME64_2;
    738 	h64 ^= h64 >> 29;
    739 	h64 *= PRIME64_3;
    740 	h64 ^= h64 >> 32;
    741 
    742 	return h64;
    743 }
    744 
    745 #endif /* XXHASH_H */
    746