126fa459cSmrg/* NOLINT(build/header_guard) */ 226fa459cSmrg/* Copyright 2018 Google Inc. All Rights Reserved. 326fa459cSmrg 426fa459cSmrg Distributed under MIT license. 526fa459cSmrg See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 626fa459cSmrg*/ 726fa459cSmrg 826fa459cSmrg/* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */ 926fa459cSmrg/* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */ 1026fa459cSmrg/* JUMP = skip bytes for speedup */ 1126fa459cSmrg 1226fa459cSmrg/* Rolling hash for long distance long string matches. Stores one position 1326fa459cSmrg per bucket, bucket key is computed over a long region. */ 1426fa459cSmrg 1526fa459cSmrg#define HashRolling HASHER() 1626fa459cSmrg 1726fa459cSmrgstatic const uint32_t FN(kRollingHashMul32) = 69069; 1826fa459cSmrgstatic const uint32_t FN(kInvalidPos) = 0xffffffff; 1926fa459cSmrg 2026fa459cSmrg/* This hasher uses a longer forward length, but returning a higher value here 2126fa459cSmrg will hurt compression by the main hasher when combined with a composite 2226fa459cSmrg hasher. The hasher tests for forward itself instead. */ 2326fa459cSmrgstatic BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; } 2426fa459cSmrgstatic BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; } 2526fa459cSmrg 2626fa459cSmrg/* Computes a code from a single byte. A lookup table of 256 values could be 2726fa459cSmrg used, but simply adding 1 works about as good. */ 2826fa459cSmrgstatic uint32_t FN(HashByte)(uint8_t byte) { 2926fa459cSmrg return (uint32_t)byte + 1u; 3026fa459cSmrg} 3126fa459cSmrg 3226fa459cSmrgstatic uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add, 3326fa459cSmrg uint32_t factor) { 3426fa459cSmrg return (uint32_t)(factor * state + FN(HashByte)(add)); 3526fa459cSmrg} 3626fa459cSmrg 3726fa459cSmrgstatic uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add, 3826fa459cSmrg uint8_t rem, uint32_t factor, 3926fa459cSmrg uint32_t factor_remove) { 4026fa459cSmrg return (uint32_t)(factor * state + 4126fa459cSmrg FN(HashByte)(add) - factor_remove * FN(HashByte)(rem)); 4226fa459cSmrg} 4326fa459cSmrg 4426fa459cSmrgtypedef struct HashRolling { 4526fa459cSmrg uint32_t state; 4626fa459cSmrg uint32_t* table; 4726fa459cSmrg size_t next_ix; 4826fa459cSmrg 4926fa459cSmrg uint32_t chunk_len; 5026fa459cSmrg uint32_t factor; 5126fa459cSmrg uint32_t factor_remove; 5226fa459cSmrg} HashRolling; 5326fa459cSmrg 5426fa459cSmrgstatic void FN(Initialize)( 5526fa459cSmrg HasherCommon* common, HashRolling* BROTLI_RESTRICT self, 5626fa459cSmrg const BrotliEncoderParams* params) { 5726fa459cSmrg size_t i; 5826fa459cSmrg self->state = 0; 5926fa459cSmrg self->next_ix = 0; 6026fa459cSmrg 6126fa459cSmrg self->factor = FN(kRollingHashMul32); 6226fa459cSmrg 6326fa459cSmrg /* Compute the factor of the oldest byte to remove: factor**steps modulo 6426fa459cSmrg 0xffffffff (the multiplications rely on 32-bit overflow) */ 6526fa459cSmrg self->factor_remove = 1; 6626fa459cSmrg for (i = 0; i < CHUNKLEN; i += JUMP) { 6726fa459cSmrg self->factor_remove *= self->factor; 6826fa459cSmrg } 6926fa459cSmrg 7026fa459cSmrg self->table = (uint32_t*)common->extra; 7126fa459cSmrg for (i = 0; i < NUMBUCKETS; i++) { 7226fa459cSmrg self->table[i] = FN(kInvalidPos); 7326fa459cSmrg } 7426fa459cSmrg 7526fa459cSmrg BROTLI_UNUSED(params); 7626fa459cSmrg} 7726fa459cSmrg 7826fa459cSmrgstatic void FN(Prepare)(HashRolling* BROTLI_RESTRICT self, BROTLI_BOOL one_shot, 7926fa459cSmrg size_t input_size, const uint8_t* BROTLI_RESTRICT data) { 8026fa459cSmrg size_t i; 8126fa459cSmrg /* Too small size, cannot use this hasher. */ 8226fa459cSmrg if (input_size < CHUNKLEN) return; 8326fa459cSmrg self->state = 0; 8426fa459cSmrg for (i = 0; i < CHUNKLEN; i += JUMP) { 8526fa459cSmrg self->state = FN(HashRollingFunctionInitial)( 8626fa459cSmrg self->state, data[i], self->factor); 8726fa459cSmrg } 8826fa459cSmrg BROTLI_UNUSED(one_shot); 8926fa459cSmrg} 9026fa459cSmrg 9126fa459cSmrgstatic BROTLI_INLINE size_t FN(HashMemAllocInBytes)( 9226fa459cSmrg const BrotliEncoderParams* params, BROTLI_BOOL one_shot, 9326fa459cSmrg size_t input_size) { 9426fa459cSmrg return NUMBUCKETS * sizeof(uint32_t); 9526fa459cSmrg BROTLI_UNUSED(params); 9626fa459cSmrg BROTLI_UNUSED(one_shot); 9726fa459cSmrg BROTLI_UNUSED(input_size); 9826fa459cSmrg} 9926fa459cSmrg 10026fa459cSmrgstatic BROTLI_INLINE void FN(Store)(HashRolling* BROTLI_RESTRICT self, 10126fa459cSmrg const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) { 10226fa459cSmrg BROTLI_UNUSED(self); 10326fa459cSmrg BROTLI_UNUSED(data); 10426fa459cSmrg BROTLI_UNUSED(mask); 10526fa459cSmrg BROTLI_UNUSED(ix); 10626fa459cSmrg} 10726fa459cSmrg 10826fa459cSmrgstatic BROTLI_INLINE void FN(StoreRange)(HashRolling* BROTLI_RESTRICT self, 10926fa459cSmrg const uint8_t* BROTLI_RESTRICT data, const size_t mask, 11026fa459cSmrg const size_t ix_start, const size_t ix_end) { 11126fa459cSmrg BROTLI_UNUSED(self); 11226fa459cSmrg BROTLI_UNUSED(data); 11326fa459cSmrg BROTLI_UNUSED(mask); 11426fa459cSmrg BROTLI_UNUSED(ix_start); 11526fa459cSmrg BROTLI_UNUSED(ix_end); 11626fa459cSmrg} 11726fa459cSmrg 11826fa459cSmrgstatic BROTLI_INLINE void FN(StitchToPreviousBlock)( 11926fa459cSmrg HashRolling* BROTLI_RESTRICT self, 12026fa459cSmrg size_t num_bytes, size_t position, const uint8_t* ringbuffer, 12126fa459cSmrg size_t ring_buffer_mask) { 12226fa459cSmrg /* In this case we must re-initialize the hasher from scratch from the 12326fa459cSmrg current position. */ 12426fa459cSmrg size_t position_masked; 12526fa459cSmrg size_t available = num_bytes; 12626fa459cSmrg if ((position & (JUMP - 1)) != 0) { 12726fa459cSmrg size_t diff = JUMP - (position & (JUMP - 1)); 12826fa459cSmrg available = (diff > available) ? 0 : (available - diff); 12926fa459cSmrg position += diff; 13026fa459cSmrg } 13126fa459cSmrg position_masked = position & ring_buffer_mask; 13226fa459cSmrg /* wrapping around ringbuffer not handled. */ 13326fa459cSmrg if (available > ring_buffer_mask - position_masked) { 13426fa459cSmrg available = ring_buffer_mask - position_masked; 13526fa459cSmrg } 13626fa459cSmrg 13726fa459cSmrg FN(Prepare)(self, BROTLI_FALSE, available, 13826fa459cSmrg ringbuffer + (position & ring_buffer_mask)); 13926fa459cSmrg self->next_ix = position; 14026fa459cSmrg BROTLI_UNUSED(num_bytes); 14126fa459cSmrg} 14226fa459cSmrg 14326fa459cSmrgstatic BROTLI_INLINE void FN(PrepareDistanceCache)( 14426fa459cSmrg HashRolling* BROTLI_RESTRICT self, 14526fa459cSmrg int* BROTLI_RESTRICT distance_cache) { 14626fa459cSmrg BROTLI_UNUSED(self); 14726fa459cSmrg BROTLI_UNUSED(distance_cache); 14826fa459cSmrg} 14926fa459cSmrg 15026fa459cSmrgstatic BROTLI_INLINE void FN(FindLongestMatch)( 15126fa459cSmrg HashRolling* BROTLI_RESTRICT self, 15226fa459cSmrg const BrotliEncoderDictionary* dictionary, 15326fa459cSmrg const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, 15426fa459cSmrg const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, 15526fa459cSmrg const size_t max_length, const size_t max_backward, 15626fa459cSmrg const size_t dictionary_distance, const size_t max_distance, 15726fa459cSmrg HasherSearchResult* BROTLI_RESTRICT out) { 15826fa459cSmrg const size_t cur_ix_masked = cur_ix & ring_buffer_mask; 15926fa459cSmrg size_t pos; 16026fa459cSmrg 16126fa459cSmrg if ((cur_ix & (JUMP - 1)) != 0) return; 16226fa459cSmrg 16326fa459cSmrg /* Not enough lookahead */ 16426fa459cSmrg if (max_length < CHUNKLEN) return; 16526fa459cSmrg 16626fa459cSmrg for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) { 16726fa459cSmrg uint32_t code = self->state & MASK; 16826fa459cSmrg 16926fa459cSmrg uint8_t rem = data[pos & ring_buffer_mask]; 17026fa459cSmrg uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask]; 17126fa459cSmrg size_t found_ix = FN(kInvalidPos); 17226fa459cSmrg 17326fa459cSmrg self->state = FN(HashRollingFunction)( 17426fa459cSmrg self->state, add, rem, self->factor, self->factor_remove); 17526fa459cSmrg 17626fa459cSmrg if (code < NUMBUCKETS) { 17726fa459cSmrg found_ix = self->table[code]; 17826fa459cSmrg self->table[code] = (uint32_t)pos; 17926fa459cSmrg if (pos == cur_ix && found_ix != FN(kInvalidPos)) { 18026fa459cSmrg /* The cast to 32-bit makes backward distances up to 4GB work even 18126fa459cSmrg if cur_ix is above 4GB, despite using 32-bit values in the table. */ 18226fa459cSmrg size_t backward = (uint32_t)(cur_ix - found_ix); 18326fa459cSmrg if (backward <= max_backward) { 18426fa459cSmrg const size_t found_ix_masked = found_ix & ring_buffer_mask; 18526fa459cSmrg const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked], 18626fa459cSmrg &data[cur_ix_masked], 18726fa459cSmrg max_length); 18826fa459cSmrg if (len >= 4 && len > out->len) { 18926fa459cSmrg score_t score = BackwardReferenceScore(len, backward); 19026fa459cSmrg if (score > out->score) { 19126fa459cSmrg out->len = len; 19226fa459cSmrg out->distance = backward; 19326fa459cSmrg out->score = score; 19426fa459cSmrg out->len_code_delta = 0; 19526fa459cSmrg } 19626fa459cSmrg } 19726fa459cSmrg } 19826fa459cSmrg } 19926fa459cSmrg } 20026fa459cSmrg } 20126fa459cSmrg 20226fa459cSmrg self->next_ix = cur_ix + JUMP; 20326fa459cSmrg 20426fa459cSmrg /* NOTE: this hasher does not search in the dictionary. It is used as 20526fa459cSmrg backup-hasher, the main hasher already searches in it. */ 20626fa459cSmrg BROTLI_UNUSED(dictionary); 20726fa459cSmrg BROTLI_UNUSED(distance_cache); 20826fa459cSmrg BROTLI_UNUSED(dictionary_distance); 20926fa459cSmrg BROTLI_UNUSED(max_distance); 21026fa459cSmrg} 21126fa459cSmrg 21226fa459cSmrg#undef HashRolling 213