1/* NOLINT(build/header_guard) */
2/* Copyright 2010 Google Inc. All Rights Reserved.
3
4   Distributed under MIT license.
5   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6*/
7
8/* template parameters: FN, BUCKET_BITS, BUCKET_SWEEP_BITS, HASH_LEN,
9                        USE_DICTIONARY
10 */
11
12#define HashLongestMatchQuickly HASHER()
13
14#define BUCKET_SIZE (1 << BUCKET_BITS)
15#define BUCKET_MASK (BUCKET_SIZE - 1)
16#define BUCKET_SWEEP (1 << BUCKET_SWEEP_BITS)
17#define BUCKET_SWEEP_MASK ((BUCKET_SWEEP - 1) << 3)
18
19static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
20static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
21
22/* HashBytes is the function that chooses the bucket to place
23   the address in. The HashLongestMatch and HashLongestMatchQuickly
24   classes have separate, different implementations of hashing. */
25static uint32_t FN(HashBytes)(const uint8_t* data) {
26  const uint64_t h = ((BROTLI_UNALIGNED_LOAD64LE(data) << (64 - 8 * HASH_LEN)) *
27                      kHashMul64);
28  /* The higher bits contain more mixture from the multiplication,
29     so we take our results from there. */
30  return (uint32_t)(h >> (64 - BUCKET_BITS));
31}
32
33/* A (forgetful) hash table to the data seen by the compressor, to
34   help create backward references to previous data.
35
36   This is a hash map of fixed size (BUCKET_SIZE). */
37typedef struct HashLongestMatchQuickly {
38  /* Shortcuts. */
39  HasherCommon* common;
40
41  /* --- Dynamic size members --- */
42
43  uint32_t* buckets_;  /* uint32_t[BUCKET_SIZE]; */
44} HashLongestMatchQuickly;
45
46static void FN(Initialize)(
47    HasherCommon* common, HashLongestMatchQuickly* BROTLI_RESTRICT self,
48    const BrotliEncoderParams* params) {
49  self->common = common;
50
51  BROTLI_UNUSED(params);
52  self->buckets_ = (uint32_t*)common->extra;
53}
54
55static void FN(Prepare)(
56    HashLongestMatchQuickly* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
57    size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
58  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
59  /* Partial preparation is 100 times slower (per socket). */
60  size_t partial_prepare_threshold = BUCKET_SIZE >> 5;
61  if (one_shot && input_size <= partial_prepare_threshold) {
62    size_t i;
63    for (i = 0; i < input_size; ++i) {
64      const uint32_t key = FN(HashBytes)(&data[i]);
65      if (BUCKET_SWEEP == 1) {
66        buckets[key] = 0;
67      } else {
68        uint32_t j;
69        for (j = 0; j < BUCKET_SWEEP; ++j) {
70          buckets[(key + (j << 3)) & BUCKET_MASK] = 0;
71        }
72      }
73    }
74  } else {
75    /* It is not strictly necessary to fill this buffer here, but
76       not filling will make the results of the compression stochastic
77       (but correct). This is because random data would cause the
78       system to find accidentally good backward references here and there. */
79    memset(buckets, 0, sizeof(uint32_t) * BUCKET_SIZE);
80  }
81}
82
83static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
84    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
85    size_t input_size) {
86  BROTLI_UNUSED(params);
87  BROTLI_UNUSED(one_shot);
88  BROTLI_UNUSED(input_size);
89  return sizeof(uint32_t) * BUCKET_SIZE;
90}
91
92/* Look at 5 bytes at &data[ix & mask].
93   Compute a hash from these, and store the value somewhere within
94   [ix .. ix+3]. */
95static BROTLI_INLINE void FN(Store)(
96    HashLongestMatchQuickly* BROTLI_RESTRICT self,
97    const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
98  const uint32_t key = FN(HashBytes)(&data[ix & mask]);
99  if (BUCKET_SWEEP == 1) {
100    self->buckets_[key] = (uint32_t)ix;
101  } else {
102    /* Wiggle the value with the bucket sweep range. */
103    const uint32_t off = ix & BUCKET_SWEEP_MASK;
104    self->buckets_[(key + off) & BUCKET_MASK] = (uint32_t)ix;
105  }
106}
107
108static BROTLI_INLINE void FN(StoreRange)(
109    HashLongestMatchQuickly* BROTLI_RESTRICT self,
110    const uint8_t* BROTLI_RESTRICT data, const size_t mask,
111    const size_t ix_start, const size_t ix_end) {
112  size_t i;
113  for (i = ix_start; i < ix_end; ++i) {
114    FN(Store)(self, data, mask, i);
115  }
116}
117
118static BROTLI_INLINE void FN(StitchToPreviousBlock)(
119    HashLongestMatchQuickly* BROTLI_RESTRICT self,
120    size_t num_bytes, size_t position,
121    const uint8_t* ringbuffer, size_t ringbuffer_mask) {
122  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
123    /* Prepare the hashes for three last bytes of the last write.
124       These could not be calculated before, since they require knowledge
125       of both the previous and the current block. */
126    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
127    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
128    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
129  }
130}
131
132static BROTLI_INLINE void FN(PrepareDistanceCache)(
133    HashLongestMatchQuickly* BROTLI_RESTRICT self,
134    int* BROTLI_RESTRICT distance_cache) {
135  BROTLI_UNUSED(self);
136  BROTLI_UNUSED(distance_cache);
137}
138
139/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
140   up to the length of max_length and stores the position cur_ix in the
141   hash table.
142
143   Does not look for matches longer than max_length.
144   Does not look for matches further away than max_backward.
145   Writes the best match into |out|.
146   |out|->score is updated only if a better match is found. */
147static BROTLI_INLINE void FN(FindLongestMatch)(
148    HashLongestMatchQuickly* BROTLI_RESTRICT self,
149    const BrotliEncoderDictionary* dictionary,
150    const uint8_t* BROTLI_RESTRICT data,
151    const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
152    const size_t cur_ix, const size_t max_length, const size_t max_backward,
153    const size_t dictionary_distance, const size_t max_distance,
154    HasherSearchResult* BROTLI_RESTRICT out) {
155  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
156  const size_t best_len_in = out->len;
157  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
158  int compare_char = data[cur_ix_masked + best_len_in];
159  size_t key = FN(HashBytes)(&data[cur_ix_masked]);
160  size_t key_out;
161  score_t min_score = out->score;
162  score_t best_score = out->score;
163  size_t best_len = best_len_in;
164  size_t cached_backward = (size_t)distance_cache[0];
165  size_t prev_ix = cur_ix - cached_backward;
166  out->len_code_delta = 0;
167  if (prev_ix < cur_ix) {
168    prev_ix &= (uint32_t)ring_buffer_mask;
169    if (compare_char == data[prev_ix + best_len]) {
170      const size_t len = FindMatchLengthWithLimit(
171          &data[prev_ix], &data[cur_ix_masked], max_length);
172      if (len >= 4) {
173        const score_t score = BackwardReferenceScoreUsingLastDistance(len);
174        if (best_score < score) {
175          out->len = len;
176          out->distance = cached_backward;
177          out->score = score;
178          if (BUCKET_SWEEP == 1) {
179            buckets[key] = (uint32_t)cur_ix;
180            return;
181          } else {
182            best_len = len;
183            best_score = score;
184            compare_char = data[cur_ix_masked + len];
185          }
186        }
187      }
188    }
189  }
190  if (BUCKET_SWEEP == 1) {
191    size_t backward;
192    size_t len;
193    /* Only one to look for, don't bother to prepare for a loop. */
194    prev_ix = buckets[key];
195    buckets[key] = (uint32_t)cur_ix;
196    backward = cur_ix - prev_ix;
197    prev_ix &= (uint32_t)ring_buffer_mask;
198    if (compare_char != data[prev_ix + best_len_in]) {
199      return;
200    }
201    if (BROTLI_PREDICT_FALSE(backward == 0 || backward > max_backward)) {
202      return;
203    }
204    len = FindMatchLengthWithLimit(&data[prev_ix],
205                                   &data[cur_ix_masked],
206                                   max_length);
207    if (len >= 4) {
208      const score_t score = BackwardReferenceScore(len, backward);
209      if (best_score < score) {
210        out->len = len;
211        out->distance = backward;
212        out->score = score;
213        return;
214      }
215    }
216  } else {
217    size_t keys[BUCKET_SWEEP];
218    size_t i;
219    for (i = 0; i < BUCKET_SWEEP; ++i) {
220      keys[i] = (key + (i << 3)) & BUCKET_MASK;
221    }
222    key_out = keys[(cur_ix & BUCKET_SWEEP_MASK) >> 3];
223    for (i = 0; i < BUCKET_SWEEP; ++i) {
224      size_t len;
225      size_t backward;
226      prev_ix = buckets[keys[i]];
227      backward = cur_ix - prev_ix;
228      prev_ix &= (uint32_t)ring_buffer_mask;
229      if (compare_char != data[prev_ix + best_len]) {
230        continue;
231      }
232      if (BROTLI_PREDICT_FALSE(backward == 0 || backward > max_backward)) {
233        continue;
234      }
235      len = FindMatchLengthWithLimit(&data[prev_ix],
236                                     &data[cur_ix_masked],
237                                     max_length);
238      if (len >= 4) {
239        const score_t score = BackwardReferenceScore(len, backward);
240        if (best_score < score) {
241          best_len = len;
242          out->len = len;
243          compare_char = data[cur_ix_masked + len];
244          best_score = score;
245          out->score = score;
246          out->distance = backward;
247        }
248      }
249    }
250  }
251  if (USE_DICTIONARY && min_score == out->score) {
252    SearchInStaticDictionary(dictionary,
253        self->common, &data[cur_ix_masked], max_length, dictionary_distance,
254        max_distance, out, BROTLI_TRUE);
255  }
256  if (BUCKET_SWEEP != 1) {
257    buckets[key_out] = (uint32_t)cur_ix;
258  }
259}
260
261#undef BUCKET_SWEEP_MASK
262#undef BUCKET_SWEEP
263#undef BUCKET_MASK
264#undef BUCKET_SIZE
265
266#undef HashLongestMatchQuickly
267