126fa459cSmrg/* NOLINT(build/header_guard) */
226fa459cSmrg/* Copyright 2018 Google Inc. All Rights Reserved.
326fa459cSmrg
426fa459cSmrg   Distributed under MIT license.
526fa459cSmrg   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
626fa459cSmrg*/
726fa459cSmrg
826fa459cSmrg/* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
926fa459cSmrg/* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
1026fa459cSmrg/* JUMP = skip bytes for speedup */
1126fa459cSmrg
1226fa459cSmrg/* Rolling hash for long distance long string matches. Stores one position
1326fa459cSmrg   per bucket, bucket key is computed over a long region. */
1426fa459cSmrg
1526fa459cSmrg#define HashRolling HASHER()
1626fa459cSmrg
1726fa459cSmrgstatic const uint32_t FN(kRollingHashMul32) = 69069;
1826fa459cSmrgstatic const uint32_t FN(kInvalidPos) = 0xffffffff;
1926fa459cSmrg
2026fa459cSmrg/* This hasher uses a longer forward length, but returning a higher value here
2126fa459cSmrg   will hurt compression by the main hasher when combined with a composite
2226fa459cSmrg   hasher. The hasher tests for forward itself instead. */
2326fa459cSmrgstatic BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
2426fa459cSmrgstatic BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
2526fa459cSmrg
2626fa459cSmrg/* Computes a code from a single byte. A lookup table of 256 values could be
2726fa459cSmrg   used, but simply adding 1 works about as good. */
2826fa459cSmrgstatic uint32_t FN(HashByte)(uint8_t byte) {
2926fa459cSmrg  return (uint32_t)byte + 1u;
3026fa459cSmrg}
3126fa459cSmrg
3226fa459cSmrgstatic uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
3326fa459cSmrg                                               uint32_t factor) {
3426fa459cSmrg  return (uint32_t)(factor * state + FN(HashByte)(add));
3526fa459cSmrg}
3626fa459cSmrg
3726fa459cSmrgstatic uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
3826fa459cSmrg                                        uint8_t rem, uint32_t factor,
3926fa459cSmrg                                        uint32_t factor_remove) {
4026fa459cSmrg  return (uint32_t)(factor * state +
4126fa459cSmrg      FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
4226fa459cSmrg}
4326fa459cSmrg
4426fa459cSmrgtypedef struct HashRolling {
4526fa459cSmrg  uint32_t state;
4626fa459cSmrg  uint32_t* table;
4726fa459cSmrg  size_t next_ix;
4826fa459cSmrg
4926fa459cSmrg  uint32_t chunk_len;
5026fa459cSmrg  uint32_t factor;
5126fa459cSmrg  uint32_t factor_remove;
5226fa459cSmrg} HashRolling;
5326fa459cSmrg
5426fa459cSmrgstatic void FN(Initialize)(
5526fa459cSmrg    HasherCommon* common, HashRolling* BROTLI_RESTRICT self,
5626fa459cSmrg    const BrotliEncoderParams* params) {
5726fa459cSmrg  size_t i;
5826fa459cSmrg  self->state = 0;
5926fa459cSmrg  self->next_ix = 0;
6026fa459cSmrg
6126fa459cSmrg  self->factor = FN(kRollingHashMul32);
6226fa459cSmrg
6326fa459cSmrg  /* Compute the factor of the oldest byte to remove: factor**steps modulo
6426fa459cSmrg     0xffffffff (the multiplications rely on 32-bit overflow) */
6526fa459cSmrg  self->factor_remove = 1;
6626fa459cSmrg  for (i = 0; i < CHUNKLEN; i += JUMP) {
6726fa459cSmrg    self->factor_remove *= self->factor;
6826fa459cSmrg  }
6926fa459cSmrg
7026fa459cSmrg  self->table = (uint32_t*)common->extra;
7126fa459cSmrg  for (i = 0; i < NUMBUCKETS; i++) {
7226fa459cSmrg    self->table[i] = FN(kInvalidPos);
7326fa459cSmrg  }
7426fa459cSmrg
7526fa459cSmrg  BROTLI_UNUSED(params);
7626fa459cSmrg}
7726fa459cSmrg
7826fa459cSmrgstatic void FN(Prepare)(HashRolling* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
7926fa459cSmrg    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
8026fa459cSmrg  size_t i;
8126fa459cSmrg  /* Too small size, cannot use this hasher. */
8226fa459cSmrg  if (input_size < CHUNKLEN) return;
8326fa459cSmrg  self->state = 0;
8426fa459cSmrg  for (i = 0; i < CHUNKLEN; i += JUMP) {
8526fa459cSmrg    self->state = FN(HashRollingFunctionInitial)(
8626fa459cSmrg        self->state, data[i], self->factor);
8726fa459cSmrg  }
8826fa459cSmrg  BROTLI_UNUSED(one_shot);
8926fa459cSmrg}
9026fa459cSmrg
9126fa459cSmrgstatic BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
9226fa459cSmrg    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
9326fa459cSmrg    size_t input_size) {
9426fa459cSmrg  return NUMBUCKETS * sizeof(uint32_t);
9526fa459cSmrg  BROTLI_UNUSED(params);
9626fa459cSmrg  BROTLI_UNUSED(one_shot);
9726fa459cSmrg  BROTLI_UNUSED(input_size);
9826fa459cSmrg}
9926fa459cSmrg
10026fa459cSmrgstatic BROTLI_INLINE void FN(Store)(HashRolling* BROTLI_RESTRICT self,
10126fa459cSmrg    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
10226fa459cSmrg  BROTLI_UNUSED(self);
10326fa459cSmrg  BROTLI_UNUSED(data);
10426fa459cSmrg  BROTLI_UNUSED(mask);
10526fa459cSmrg  BROTLI_UNUSED(ix);
10626fa459cSmrg}
10726fa459cSmrg
10826fa459cSmrgstatic BROTLI_INLINE void FN(StoreRange)(HashRolling* BROTLI_RESTRICT self,
10926fa459cSmrg    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
11026fa459cSmrg    const size_t ix_start, const size_t ix_end) {
11126fa459cSmrg  BROTLI_UNUSED(self);
11226fa459cSmrg  BROTLI_UNUSED(data);
11326fa459cSmrg  BROTLI_UNUSED(mask);
11426fa459cSmrg  BROTLI_UNUSED(ix_start);
11526fa459cSmrg  BROTLI_UNUSED(ix_end);
11626fa459cSmrg}
11726fa459cSmrg
11826fa459cSmrgstatic BROTLI_INLINE void FN(StitchToPreviousBlock)(
11926fa459cSmrg    HashRolling* BROTLI_RESTRICT self,
12026fa459cSmrg    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
12126fa459cSmrg    size_t ring_buffer_mask) {
12226fa459cSmrg  /* In this case we must re-initialize the hasher from scratch from the
12326fa459cSmrg     current position. */
12426fa459cSmrg  size_t position_masked;
12526fa459cSmrg  size_t available = num_bytes;
12626fa459cSmrg  if ((position & (JUMP - 1)) != 0) {
12726fa459cSmrg    size_t diff = JUMP - (position & (JUMP - 1));
12826fa459cSmrg    available = (diff > available) ? 0 : (available - diff);
12926fa459cSmrg    position += diff;
13026fa459cSmrg  }
13126fa459cSmrg  position_masked = position & ring_buffer_mask;
13226fa459cSmrg  /* wrapping around ringbuffer not handled. */
13326fa459cSmrg  if (available > ring_buffer_mask - position_masked) {
13426fa459cSmrg    available = ring_buffer_mask - position_masked;
13526fa459cSmrg  }
13626fa459cSmrg
13726fa459cSmrg  FN(Prepare)(self, BROTLI_FALSE, available,
13826fa459cSmrg      ringbuffer + (position & ring_buffer_mask));
13926fa459cSmrg  self->next_ix = position;
14026fa459cSmrg  BROTLI_UNUSED(num_bytes);
14126fa459cSmrg}
14226fa459cSmrg
14326fa459cSmrgstatic BROTLI_INLINE void FN(PrepareDistanceCache)(
14426fa459cSmrg    HashRolling* BROTLI_RESTRICT self,
14526fa459cSmrg    int* BROTLI_RESTRICT distance_cache) {
14626fa459cSmrg  BROTLI_UNUSED(self);
14726fa459cSmrg  BROTLI_UNUSED(distance_cache);
14826fa459cSmrg}
14926fa459cSmrg
15026fa459cSmrgstatic BROTLI_INLINE void FN(FindLongestMatch)(
15126fa459cSmrg    HashRolling* BROTLI_RESTRICT self,
15226fa459cSmrg    const BrotliEncoderDictionary* dictionary,
15326fa459cSmrg    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
15426fa459cSmrg    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
15526fa459cSmrg    const size_t max_length, const size_t max_backward,
15626fa459cSmrg    const size_t dictionary_distance, const size_t max_distance,
15726fa459cSmrg    HasherSearchResult* BROTLI_RESTRICT out) {
15826fa459cSmrg  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
15926fa459cSmrg  size_t pos;
16026fa459cSmrg
16126fa459cSmrg  if ((cur_ix & (JUMP - 1)) != 0) return;
16226fa459cSmrg
16326fa459cSmrg  /* Not enough lookahead */
16426fa459cSmrg  if (max_length < CHUNKLEN) return;
16526fa459cSmrg
16626fa459cSmrg  for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
16726fa459cSmrg    uint32_t code = self->state & MASK;
16826fa459cSmrg
16926fa459cSmrg    uint8_t rem = data[pos & ring_buffer_mask];
17026fa459cSmrg    uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
17126fa459cSmrg    size_t found_ix = FN(kInvalidPos);
17226fa459cSmrg
17326fa459cSmrg    self->state = FN(HashRollingFunction)(
17426fa459cSmrg        self->state, add, rem, self->factor, self->factor_remove);
17526fa459cSmrg
17626fa459cSmrg    if (code < NUMBUCKETS) {
17726fa459cSmrg      found_ix = self->table[code];
17826fa459cSmrg      self->table[code] = (uint32_t)pos;
17926fa459cSmrg      if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
18026fa459cSmrg        /* The cast to 32-bit makes backward distances up to 4GB work even
18126fa459cSmrg           if cur_ix is above 4GB, despite using 32-bit values in the table. */
18226fa459cSmrg        size_t backward = (uint32_t)(cur_ix - found_ix);
18326fa459cSmrg        if (backward <= max_backward) {
18426fa459cSmrg          const size_t found_ix_masked = found_ix & ring_buffer_mask;
18526fa459cSmrg          const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
18626fa459cSmrg                                                      &data[cur_ix_masked],
18726fa459cSmrg                                                      max_length);
18826fa459cSmrg          if (len >= 4 && len > out->len) {
18926fa459cSmrg            score_t score = BackwardReferenceScore(len, backward);
19026fa459cSmrg            if (score > out->score) {
19126fa459cSmrg              out->len = len;
19226fa459cSmrg              out->distance = backward;
19326fa459cSmrg              out->score = score;
19426fa459cSmrg              out->len_code_delta = 0;
19526fa459cSmrg            }
19626fa459cSmrg          }
19726fa459cSmrg        }
19826fa459cSmrg      }
19926fa459cSmrg    }
20026fa459cSmrg  }
20126fa459cSmrg
20226fa459cSmrg  self->next_ix = cur_ix + JUMP;
20326fa459cSmrg
20426fa459cSmrg  /* NOTE: this hasher does not search in the dictionary. It is used as
20526fa459cSmrg     backup-hasher, the main hasher already searches in it. */
20626fa459cSmrg  BROTLI_UNUSED(dictionary);
20726fa459cSmrg  BROTLI_UNUSED(distance_cache);
20826fa459cSmrg  BROTLI_UNUSED(dictionary_distance);
20926fa459cSmrg  BROTLI_UNUSED(max_distance);
21026fa459cSmrg}
21126fa459cSmrg
21226fa459cSmrg#undef HashRolling
213