126fa459cSmrg/* Copyright 2013 Google Inc. All Rights Reserved.
226fa459cSmrg
326fa459cSmrg   Distributed under MIT license.
426fa459cSmrg   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
526fa459cSmrg*/
626fa459cSmrg
726fa459cSmrg#include "./static_dict.h"
826fa459cSmrg
926fa459cSmrg#include "../common/dictionary.h"
1026fa459cSmrg#include "../common/platform.h"
1126fa459cSmrg#include "../common/transform.h"
1226fa459cSmrg#include "./encoder_dict.h"
1326fa459cSmrg#include "./find_match_length.h"
1426fa459cSmrg
1526fa459cSmrg#if defined(__cplusplus) || defined(c_plusplus)
1626fa459cSmrgextern "C" {
1726fa459cSmrg#endif
1826fa459cSmrg
1926fa459cSmrgstatic BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
2026fa459cSmrg  uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
2126fa459cSmrg  /* The higher bits contain more mixture from the multiplication,
2226fa459cSmrg     so we take our results from there. */
2326fa459cSmrg  return h >> (32 - kDictNumBits);
2426fa459cSmrg}
2526fa459cSmrg
2626fa459cSmrgstatic BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
2726fa459cSmrg                                   uint32_t* matches) {
2826fa459cSmrg  uint32_t match = (uint32_t)((distance << 5) + len_code);
2926fa459cSmrg  matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
3026fa459cSmrg}
3126fa459cSmrg
3226fa459cSmrgstatic BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary,
3326fa459cSmrg                                            const uint8_t* data,
3426fa459cSmrg                                            size_t id,
3526fa459cSmrg                                            size_t len,
3626fa459cSmrg                                            size_t maxlen) {
3726fa459cSmrg  const size_t offset = dictionary->offsets_by_length[len] + len * id;
3826fa459cSmrg  return FindMatchLengthWithLimit(&dictionary->data[offset], data,
3926fa459cSmrg                                  BROTLI_MIN(size_t, len, maxlen));
4026fa459cSmrg}
4126fa459cSmrg
4226fa459cSmrgstatic BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
4326fa459cSmrg    DictWord w, const uint8_t* data, size_t max_length) {
4426fa459cSmrg  if (w.len > max_length) {
4526fa459cSmrg    return BROTLI_FALSE;
4626fa459cSmrg  } else {
4726fa459cSmrg    const size_t offset = dictionary->offsets_by_length[w.len] +
4826fa459cSmrg        (size_t)w.len * (size_t)w.idx;
4926fa459cSmrg    const uint8_t* dict = &dictionary->data[offset];
5026fa459cSmrg    if (w.transform == 0) {
5126fa459cSmrg      /* Match against base dictionary word. */
5226fa459cSmrg      return
5326fa459cSmrg          TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);
5426fa459cSmrg    } else if (w.transform == 10) {
5526fa459cSmrg      /* Match against uppercase first transform.
5626fa459cSmrg         Note that there are only ASCII uppercase words in the lookup table. */
5726fa459cSmrg      return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&
5826fa459cSmrg              (dict[0] ^ 32) == data[0] &&
5926fa459cSmrg              FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
6026fa459cSmrg              w.len - 1u);
6126fa459cSmrg    } else {
6226fa459cSmrg      /* Match against uppercase all transform.
6326fa459cSmrg         Note that there are only ASCII uppercase words in the lookup table. */
6426fa459cSmrg      size_t i;
6526fa459cSmrg      for (i = 0; i < w.len; ++i) {
6626fa459cSmrg        if (dict[i] >= 'a' && dict[i] <= 'z') {
6726fa459cSmrg          if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;
6826fa459cSmrg        } else {
6926fa459cSmrg          if (dict[i] != data[i]) return BROTLI_FALSE;
7026fa459cSmrg        }
7126fa459cSmrg      }
7226fa459cSmrg      return BROTLI_TRUE;
7326fa459cSmrg    }
7426fa459cSmrg  }
7526fa459cSmrg}
7626fa459cSmrg
7726fa459cSmrgBROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
7826fa459cSmrg    const BrotliEncoderDictionary* dictionary, const uint8_t* data,
7926fa459cSmrg    size_t min_length, size_t max_length, uint32_t* matches) {
8026fa459cSmrg  BROTLI_BOOL has_found_match = BROTLI_FALSE;
8126fa459cSmrg  {
8226fa459cSmrg    size_t offset = dictionary->buckets[Hash(data)];
8326fa459cSmrg    BROTLI_BOOL end = !offset;
8426fa459cSmrg    while (!end) {
8526fa459cSmrg      DictWord w = dictionary->dict_words[offset++];
8626fa459cSmrg      const size_t l = w.len & 0x1F;
8726fa459cSmrg      const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
8826fa459cSmrg      const size_t id = w.idx;
8926fa459cSmrg      end = !!(w.len & 0x80);
9026fa459cSmrg      w.len = (uint8_t)l;
9126fa459cSmrg      if (w.transform == 0) {
9226fa459cSmrg        const size_t matchlen =
9326fa459cSmrg            DictMatchLength(dictionary->words, data, id, l, max_length);
9426fa459cSmrg        const uint8_t* s;
9526fa459cSmrg        size_t minlen;
9626fa459cSmrg        size_t maxlen;
9726fa459cSmrg        size_t len;
9826fa459cSmrg        /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
9926fa459cSmrg        if (matchlen == l) {
10026fa459cSmrg          AddMatch(id, l, l, matches);
10126fa459cSmrg          has_found_match = BROTLI_TRUE;
10226fa459cSmrg        }
10326fa459cSmrg        /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
10426fa459cSmrg                      "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
10526fa459cSmrg        if (matchlen >= l - 1) {
10626fa459cSmrg          AddMatch(id + 12 * n, l - 1, l, matches);
10726fa459cSmrg          if (l + 2 < max_length &&
10826fa459cSmrg              data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
10926fa459cSmrg              data[l + 2] == ' ') {
11026fa459cSmrg            AddMatch(id + 49 * n, l + 3, l, matches);
11126fa459cSmrg          }
11226fa459cSmrg          has_found_match = BROTLI_TRUE;
11326fa459cSmrg        }
11426fa459cSmrg        /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
11526fa459cSmrg        minlen = min_length;
11626fa459cSmrg        if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
11726fa459cSmrg        maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
11826fa459cSmrg        for (len = minlen; len <= maxlen; ++len) {
11926fa459cSmrg          size_t cut = l - len;
12026fa459cSmrg          size_t transform_id = (cut << 2) +
12126fa459cSmrg              (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
12226fa459cSmrg          AddMatch(id + transform_id * n, len, l, matches);
12326fa459cSmrg          has_found_match = BROTLI_TRUE;
12426fa459cSmrg        }
12526fa459cSmrg        if (matchlen < l || l + 6 >= max_length) {
12626fa459cSmrg          continue;
12726fa459cSmrg        }
12826fa459cSmrg        s = &data[l];
12926fa459cSmrg        /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
13026fa459cSmrg        if (s[0] == ' ') {
13126fa459cSmrg          AddMatch(id + n, l + 1, l, matches);
13226fa459cSmrg          if (s[1] == 'a') {
13326fa459cSmrg            if (s[2] == ' ') {
13426fa459cSmrg              AddMatch(id + 28 * n, l + 3, l, matches);
13526fa459cSmrg            } else if (s[2] == 's') {
13626fa459cSmrg              if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
13726fa459cSmrg            } else if (s[2] == 't') {
13826fa459cSmrg              if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
13926fa459cSmrg            } else if (s[2] == 'n') {
14026fa459cSmrg              if (s[3] == 'd' && s[4] == ' ') {
14126fa459cSmrg                AddMatch(id + 10 * n, l + 5, l, matches);
14226fa459cSmrg              }
14326fa459cSmrg            }
14426fa459cSmrg          } else if (s[1] == 'b') {
14526fa459cSmrg            if (s[2] == 'y' && s[3] == ' ') {
14626fa459cSmrg              AddMatch(id + 38 * n, l + 4, l, matches);
14726fa459cSmrg            }
14826fa459cSmrg          } else if (s[1] == 'i') {
14926fa459cSmrg            if (s[2] == 'n') {
15026fa459cSmrg              if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
15126fa459cSmrg            } else if (s[2] == 's') {
15226fa459cSmrg              if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
15326fa459cSmrg            }
15426fa459cSmrg          } else if (s[1] == 'f') {
15526fa459cSmrg            if (s[2] == 'o') {
15626fa459cSmrg              if (s[3] == 'r' && s[4] == ' ') {
15726fa459cSmrg                AddMatch(id + 25 * n, l + 5, l, matches);
15826fa459cSmrg              }
15926fa459cSmrg            } else if (s[2] == 'r') {
16026fa459cSmrg              if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
16126fa459cSmrg                AddMatch(id + 37 * n, l + 6, l, matches);
16226fa459cSmrg              }
16326fa459cSmrg            }
16426fa459cSmrg          } else if (s[1] == 'o') {
16526fa459cSmrg            if (s[2] == 'f') {
16626fa459cSmrg              if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
16726fa459cSmrg            } else if (s[2] == 'n') {
16826fa459cSmrg              if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
16926fa459cSmrg            }
17026fa459cSmrg          } else if (s[1] == 'n') {
17126fa459cSmrg            if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
17226fa459cSmrg              AddMatch(id + 80 * n, l + 5, l, matches);
17326fa459cSmrg            }
17426fa459cSmrg          } else if (s[1] == 't') {
17526fa459cSmrg            if (s[2] == 'h') {
17626fa459cSmrg              if (s[3] == 'e') {
17726fa459cSmrg                if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
17826fa459cSmrg              } else if (s[3] == 'a') {
17926fa459cSmrg                if (s[4] == 't' && s[5] == ' ') {
18026fa459cSmrg                  AddMatch(id + 29 * n, l + 6, l, matches);
18126fa459cSmrg                }
18226fa459cSmrg              }
18326fa459cSmrg            } else if (s[2] == 'o') {
18426fa459cSmrg              if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
18526fa459cSmrg            }
18626fa459cSmrg          } else if (s[1] == 'w') {
18726fa459cSmrg            if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
18826fa459cSmrg              AddMatch(id + 35 * n, l + 6, l, matches);
18926fa459cSmrg            }
19026fa459cSmrg          }
19126fa459cSmrg        } else if (s[0] == '"') {
19226fa459cSmrg          AddMatch(id + 19 * n, l + 1, l, matches);
19326fa459cSmrg          if (s[1] == '>') {
19426fa459cSmrg            AddMatch(id + 21 * n, l + 2, l, matches);
19526fa459cSmrg          }
19626fa459cSmrg        } else if (s[0] == '.') {
19726fa459cSmrg          AddMatch(id + 20 * n, l + 1, l, matches);
19826fa459cSmrg          if (s[1] == ' ') {
19926fa459cSmrg            AddMatch(id + 31 * n, l + 2, l, matches);
20026fa459cSmrg            if (s[2] == 'T' && s[3] == 'h') {
20126fa459cSmrg              if (s[4] == 'e') {
20226fa459cSmrg                if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
20326fa459cSmrg              } else if (s[4] == 'i') {
20426fa459cSmrg                if (s[5] == 's' && s[6] == ' ') {
20526fa459cSmrg                  AddMatch(id + 75 * n, l + 7, l, matches);
20626fa459cSmrg                }
20726fa459cSmrg              }
20826fa459cSmrg            }
20926fa459cSmrg          }
21026fa459cSmrg        } else if (s[0] == ',') {
21126fa459cSmrg          AddMatch(id + 76 * n, l + 1, l, matches);
21226fa459cSmrg          if (s[1] == ' ') {
21326fa459cSmrg            AddMatch(id + 14 * n, l + 2, l, matches);
21426fa459cSmrg          }
21526fa459cSmrg        } else if (s[0] == '\n') {
21626fa459cSmrg          AddMatch(id + 22 * n, l + 1, l, matches);
21726fa459cSmrg          if (s[1] == '\t') {
21826fa459cSmrg            AddMatch(id + 50 * n, l + 2, l, matches);
21926fa459cSmrg          }
22026fa459cSmrg        } else if (s[0] == ']') {
22126fa459cSmrg          AddMatch(id + 24 * n, l + 1, l, matches);
22226fa459cSmrg        } else if (s[0] == '\'') {
22326fa459cSmrg          AddMatch(id + 36 * n, l + 1, l, matches);
22426fa459cSmrg        } else if (s[0] == ':') {
22526fa459cSmrg          AddMatch(id + 51 * n, l + 1, l, matches);
22626fa459cSmrg        } else if (s[0] == '(') {
22726fa459cSmrg          AddMatch(id + 57 * n, l + 1, l, matches);
22826fa459cSmrg        } else if (s[0] == '=') {
22926fa459cSmrg          if (s[1] == '"') {
23026fa459cSmrg            AddMatch(id + 70 * n, l + 2, l, matches);
23126fa459cSmrg          } else if (s[1] == '\'') {
23226fa459cSmrg            AddMatch(id + 86 * n, l + 2, l, matches);
23326fa459cSmrg          }
23426fa459cSmrg        } else if (s[0] == 'a') {
23526fa459cSmrg          if (s[1] == 'l' && s[2] == ' ') {
23626fa459cSmrg            AddMatch(id + 84 * n, l + 3, l, matches);
23726fa459cSmrg          }
23826fa459cSmrg        } else if (s[0] == 'e') {
23926fa459cSmrg          if (s[1] == 'd') {
24026fa459cSmrg            if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
24126fa459cSmrg          } else if (s[1] == 'r') {
24226fa459cSmrg            if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
24326fa459cSmrg          } else if (s[1] == 's') {
24426fa459cSmrg            if (s[2] == 't' && s[3] == ' ') {
24526fa459cSmrg              AddMatch(id + 95 * n, l + 4, l, matches);
24626fa459cSmrg            }
24726fa459cSmrg          }
24826fa459cSmrg        } else if (s[0] == 'f') {
24926fa459cSmrg          if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
25026fa459cSmrg            AddMatch(id + 90 * n, l + 4, l, matches);
25126fa459cSmrg          }
25226fa459cSmrg        } else if (s[0] == 'i') {
25326fa459cSmrg          if (s[1] == 'v') {
25426fa459cSmrg            if (s[2] == 'e' && s[3] == ' ') {
25526fa459cSmrg              AddMatch(id + 92 * n, l + 4, l, matches);
25626fa459cSmrg            }
25726fa459cSmrg          } else if (s[1] == 'z') {
25826fa459cSmrg            if (s[2] == 'e' && s[3] == ' ') {
25926fa459cSmrg              AddMatch(id + 100 * n, l + 4, l, matches);
26026fa459cSmrg            }
26126fa459cSmrg          }
26226fa459cSmrg        } else if (s[0] == 'l') {
26326fa459cSmrg          if (s[1] == 'e') {
26426fa459cSmrg            if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
26526fa459cSmrg              AddMatch(id + 93 * n, l + 5, l, matches);
26626fa459cSmrg            }
26726fa459cSmrg          } else if (s[1] == 'y') {
26826fa459cSmrg            if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
26926fa459cSmrg          }
27026fa459cSmrg        } else if (s[0] == 'o') {
27126fa459cSmrg          if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
27226fa459cSmrg            AddMatch(id + 106 * n, l + 4, l, matches);
27326fa459cSmrg          }
27426fa459cSmrg        }
27526fa459cSmrg      } else {
27626fa459cSmrg        /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
27726fa459cSmrg               is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
27826fa459cSmrg           transform. */
27926fa459cSmrg        const BROTLI_BOOL is_all_caps =
28026fa459cSmrg            TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
28126fa459cSmrg        const uint8_t* s;
28226fa459cSmrg        if (!IsMatch(dictionary->words, w, data, max_length)) {
28326fa459cSmrg          continue;
28426fa459cSmrg        }
28526fa459cSmrg        /* Transform "" + kUppercase{First,All} + "" */
28626fa459cSmrg        AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
28726fa459cSmrg        has_found_match = BROTLI_TRUE;
28826fa459cSmrg        if (l + 1 >= max_length) {
28926fa459cSmrg          continue;
29026fa459cSmrg        }
29126fa459cSmrg        /* Transforms "" + kUppercase{First,All} + <suffix> */
29226fa459cSmrg        s = &data[l];
29326fa459cSmrg        if (s[0] == ' ') {
29426fa459cSmrg          AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
29526fa459cSmrg        } else if (s[0] == '"') {
29626fa459cSmrg          AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
29726fa459cSmrg          if (s[1] == '>') {
29826fa459cSmrg            AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
29926fa459cSmrg          }
30026fa459cSmrg        } else if (s[0] == '.') {
30126fa459cSmrg          AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
30226fa459cSmrg          if (s[1] == ' ') {
30326fa459cSmrg            AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
30426fa459cSmrg          }
30526fa459cSmrg        } else if (s[0] == ',') {
30626fa459cSmrg          AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
30726fa459cSmrg          if (s[1] == ' ') {
30826fa459cSmrg            AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
30926fa459cSmrg          }
31026fa459cSmrg        } else if (s[0] == '\'') {
31126fa459cSmrg          AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
31226fa459cSmrg        } else if (s[0] == '(') {
31326fa459cSmrg          AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
31426fa459cSmrg        } else if (s[0] == '=') {
31526fa459cSmrg          if (s[1] == '"') {
31626fa459cSmrg            AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
31726fa459cSmrg          } else if (s[1] == '\'') {
31826fa459cSmrg            AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
31926fa459cSmrg          }
32026fa459cSmrg        }
32126fa459cSmrg      }
32226fa459cSmrg    }
32326fa459cSmrg  }
32426fa459cSmrg  /* Transforms with prefixes " " and "." */
32526fa459cSmrg  if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
32626fa459cSmrg    BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
32726fa459cSmrg    size_t offset = dictionary->buckets[Hash(&data[1])];
32826fa459cSmrg    BROTLI_BOOL end = !offset;
32926fa459cSmrg    while (!end) {
33026fa459cSmrg      DictWord w = dictionary->dict_words[offset++];
33126fa459cSmrg      const size_t l = w.len & 0x1F;
33226fa459cSmrg      const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
33326fa459cSmrg      const size_t id = w.idx;
33426fa459cSmrg      end = !!(w.len & 0x80);
33526fa459cSmrg      w.len = (uint8_t)l;
33626fa459cSmrg      if (w.transform == 0) {
33726fa459cSmrg        const uint8_t* s;
33826fa459cSmrg        if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
33926fa459cSmrg          continue;
34026fa459cSmrg        }
34126fa459cSmrg        /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
34226fa459cSmrg                      "." + BROTLI_TRANSFORM_IDENTITY + "" */
34326fa459cSmrg        AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
34426fa459cSmrg        has_found_match = BROTLI_TRUE;
34526fa459cSmrg        if (l + 2 >= max_length) {
34626fa459cSmrg          continue;
34726fa459cSmrg        }
34826fa459cSmrg        /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
34926fa459cSmrg                      "." + BROTLI_TRANSFORM_IDENTITY + <suffix>
35026fa459cSmrg        */
35126fa459cSmrg        s = &data[l + 1];
35226fa459cSmrg        if (s[0] == ' ') {
35326fa459cSmrg          AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
35426fa459cSmrg        } else if (s[0] == '(') {
35526fa459cSmrg          AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
35626fa459cSmrg        } else if (is_space) {
35726fa459cSmrg          if (s[0] == ',') {
35826fa459cSmrg            AddMatch(id + 103 * n, l + 2, l, matches);
35926fa459cSmrg            if (s[1] == ' ') {
36026fa459cSmrg              AddMatch(id + 33 * n, l + 3, l, matches);
36126fa459cSmrg            }
36226fa459cSmrg          } else if (s[0] == '.') {
36326fa459cSmrg            AddMatch(id + 71 * n, l + 2, l, matches);
36426fa459cSmrg            if (s[1] == ' ') {
36526fa459cSmrg              AddMatch(id + 52 * n, l + 3, l, matches);
36626fa459cSmrg            }
36726fa459cSmrg          } else if (s[0] == '=') {
36826fa459cSmrg            if (s[1] == '"') {
36926fa459cSmrg              AddMatch(id + 81 * n, l + 3, l, matches);
37026fa459cSmrg            } else if (s[1] == '\'') {
37126fa459cSmrg              AddMatch(id + 98 * n, l + 3, l, matches);
37226fa459cSmrg            }
37326fa459cSmrg          }
37426fa459cSmrg        }
37526fa459cSmrg      } else if (is_space) {
37626fa459cSmrg        /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
37726fa459cSmrg               is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
37826fa459cSmrg           transform. */
37926fa459cSmrg        const BROTLI_BOOL is_all_caps =
38026fa459cSmrg            TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
38126fa459cSmrg        const uint8_t* s;
38226fa459cSmrg        if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
38326fa459cSmrg          continue;
38426fa459cSmrg        }
38526fa459cSmrg        /* Transforms " " + kUppercase{First,All} + "" */
38626fa459cSmrg        AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
38726fa459cSmrg        has_found_match = BROTLI_TRUE;
38826fa459cSmrg        if (l + 2 >= max_length) {
38926fa459cSmrg          continue;
39026fa459cSmrg        }
39126fa459cSmrg        /* Transforms " " + kUppercase{First,All} + <suffix> */
39226fa459cSmrg        s = &data[l + 1];
39326fa459cSmrg        if (s[0] == ' ') {
39426fa459cSmrg          AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
39526fa459cSmrg        } else if (s[0] == ',') {
39626fa459cSmrg          if (!is_all_caps) {
39726fa459cSmrg            AddMatch(id + 109 * n, l + 2, l, matches);
39826fa459cSmrg          }
39926fa459cSmrg          if (s[1] == ' ') {
40026fa459cSmrg            AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
40126fa459cSmrg          }
40226fa459cSmrg        } else if (s[0] == '.') {
40326fa459cSmrg          AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
40426fa459cSmrg          if (s[1] == ' ') {
40526fa459cSmrg            AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
40626fa459cSmrg          }
40726fa459cSmrg        } else if (s[0] == '=') {
40826fa459cSmrg          if (s[1] == '"') {
40926fa459cSmrg            AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
41026fa459cSmrg          } else if (s[1] == '\'') {
41126fa459cSmrg            AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
41226fa459cSmrg          }
41326fa459cSmrg        }
41426fa459cSmrg      }
41526fa459cSmrg    }
41626fa459cSmrg  }
41726fa459cSmrg  if (max_length >= 6) {
41826fa459cSmrg    /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
41926fa459cSmrg    if ((data[1] == ' ' &&
42026fa459cSmrg         (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
42126fa459cSmrg        (data[0] == 0xC2 && data[1] == 0xA0)) {
42226fa459cSmrg      size_t offset = dictionary->buckets[Hash(&data[2])];
42326fa459cSmrg      BROTLI_BOOL end = !offset;
42426fa459cSmrg      while (!end) {
42526fa459cSmrg        DictWord w = dictionary->dict_words[offset++];
42626fa459cSmrg        const size_t l = w.len & 0x1F;
42726fa459cSmrg        const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
42826fa459cSmrg        const size_t id = w.idx;
42926fa459cSmrg        end = !!(w.len & 0x80);
43026fa459cSmrg        w.len = (uint8_t)l;
43126fa459cSmrg        if (w.transform == 0 &&
43226fa459cSmrg            IsMatch(dictionary->words, w, &data[2], max_length - 2)) {
43326fa459cSmrg          if (data[0] == 0xC2) {
43426fa459cSmrg            AddMatch(id + 102 * n, l + 2, l, matches);
43526fa459cSmrg            has_found_match = BROTLI_TRUE;
43626fa459cSmrg          } else if (l + 2 < max_length && data[l + 2] == ' ') {
43726fa459cSmrg            size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
43826fa459cSmrg            AddMatch(id + t * n, l + 3, l, matches);
43926fa459cSmrg            has_found_match = BROTLI_TRUE;
44026fa459cSmrg          }
44126fa459cSmrg        }
44226fa459cSmrg      }
44326fa459cSmrg    }
44426fa459cSmrg  }
44526fa459cSmrg  if (max_length >= 9) {
44626fa459cSmrg    /* Transforms with prefixes " the " and ".com/" */
44726fa459cSmrg    if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
44826fa459cSmrg         data[3] == 'e' && data[4] == ' ') ||
44926fa459cSmrg        (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
45026fa459cSmrg         data[3] == 'm' && data[4] == '/')) {
45126fa459cSmrg      size_t offset = dictionary->buckets[Hash(&data[5])];
45226fa459cSmrg      BROTLI_BOOL end = !offset;
45326fa459cSmrg      while (!end) {
45426fa459cSmrg        DictWord w = dictionary->dict_words[offset++];
45526fa459cSmrg        const size_t l = w.len & 0x1F;
45626fa459cSmrg        const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
45726fa459cSmrg        const size_t id = w.idx;
45826fa459cSmrg        end = !!(w.len & 0x80);
45926fa459cSmrg        w.len = (uint8_t)l;
46026fa459cSmrg        if (w.transform == 0 &&
46126fa459cSmrg            IsMatch(dictionary->words, w, &data[5], max_length - 5)) {
46226fa459cSmrg          AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
46326fa459cSmrg          has_found_match = BROTLI_TRUE;
46426fa459cSmrg          if (l + 5 < max_length) {
46526fa459cSmrg            const uint8_t* s = &data[l + 5];
46626fa459cSmrg            if (data[0] == ' ') {
46726fa459cSmrg              if (l + 8 < max_length &&
46826fa459cSmrg                  s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
46926fa459cSmrg                AddMatch(id + 62 * n, l + 9, l, matches);
47026fa459cSmrg                if (l + 12 < max_length &&
47126fa459cSmrg                    s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
47226fa459cSmrg                  AddMatch(id + 73 * n, l + 13, l, matches);
47326fa459cSmrg                }
47426fa459cSmrg              }
47526fa459cSmrg            }
47626fa459cSmrg          }
47726fa459cSmrg        }
47826fa459cSmrg      }
47926fa459cSmrg    }
48026fa459cSmrg  }
48126fa459cSmrg  return has_found_match;
48226fa459cSmrg}
48326fa459cSmrg
48426fa459cSmrg#if defined(__cplusplus) || defined(c_plusplus)
48526fa459cSmrg}  /* extern "C" */
48626fa459cSmrg#endif
487