126fa459cSmrg/* Copyright 2013 Google Inc. All Rights Reserved. 226fa459cSmrg 326fa459cSmrg Distributed under MIT license. 426fa459cSmrg See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 526fa459cSmrg*/ 626fa459cSmrg 726fa459cSmrg#include "./static_dict.h" 826fa459cSmrg 926fa459cSmrg#include "../common/dictionary.h" 1026fa459cSmrg#include "../common/platform.h" 1126fa459cSmrg#include "../common/transform.h" 1226fa459cSmrg#include "./encoder_dict.h" 1326fa459cSmrg#include "./find_match_length.h" 1426fa459cSmrg 1526fa459cSmrg#if defined(__cplusplus) || defined(c_plusplus) 1626fa459cSmrgextern "C" { 1726fa459cSmrg#endif 1826fa459cSmrg 1926fa459cSmrgstatic BROTLI_INLINE uint32_t Hash(const uint8_t* data) { 2026fa459cSmrg uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32; 2126fa459cSmrg /* The higher bits contain more mixture from the multiplication, 2226fa459cSmrg so we take our results from there. */ 2326fa459cSmrg return h >> (32 - kDictNumBits); 2426fa459cSmrg} 2526fa459cSmrg 2626fa459cSmrgstatic BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code, 2726fa459cSmrg uint32_t* matches) { 2826fa459cSmrg uint32_t match = (uint32_t)((distance << 5) + len_code); 2926fa459cSmrg matches[len] = BROTLI_MIN(uint32_t, matches[len], match); 3026fa459cSmrg} 3126fa459cSmrg 3226fa459cSmrgstatic BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary, 3326fa459cSmrg const uint8_t* data, 3426fa459cSmrg size_t id, 3526fa459cSmrg size_t len, 3626fa459cSmrg size_t maxlen) { 3726fa459cSmrg const size_t offset = dictionary->offsets_by_length[len] + len * id; 3826fa459cSmrg return FindMatchLengthWithLimit(&dictionary->data[offset], data, 3926fa459cSmrg BROTLI_MIN(size_t, len, maxlen)); 4026fa459cSmrg} 4126fa459cSmrg 4226fa459cSmrgstatic BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary, 4326fa459cSmrg DictWord w, const uint8_t* data, size_t max_length) { 4426fa459cSmrg if (w.len > max_length) { 4526fa459cSmrg return BROTLI_FALSE; 4626fa459cSmrg } else { 4726fa459cSmrg const size_t offset = dictionary->offsets_by_length[w.len] + 4826fa459cSmrg (size_t)w.len * (size_t)w.idx; 4926fa459cSmrg const uint8_t* dict = &dictionary->data[offset]; 5026fa459cSmrg if (w.transform == 0) { 5126fa459cSmrg /* Match against base dictionary word. */ 5226fa459cSmrg return 5326fa459cSmrg TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len); 5426fa459cSmrg } else if (w.transform == 10) { 5526fa459cSmrg /* Match against uppercase first transform. 5626fa459cSmrg Note that there are only ASCII uppercase words in the lookup table. */ 5726fa459cSmrg return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' && 5826fa459cSmrg (dict[0] ^ 32) == data[0] && 5926fa459cSmrg FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) == 6026fa459cSmrg w.len - 1u); 6126fa459cSmrg } else { 6226fa459cSmrg /* Match against uppercase all transform. 6326fa459cSmrg Note that there are only ASCII uppercase words in the lookup table. */ 6426fa459cSmrg size_t i; 6526fa459cSmrg for (i = 0; i < w.len; ++i) { 6626fa459cSmrg if (dict[i] >= 'a' && dict[i] <= 'z') { 6726fa459cSmrg if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE; 6826fa459cSmrg } else { 6926fa459cSmrg if (dict[i] != data[i]) return BROTLI_FALSE; 7026fa459cSmrg } 7126fa459cSmrg } 7226fa459cSmrg return BROTLI_TRUE; 7326fa459cSmrg } 7426fa459cSmrg } 7526fa459cSmrg} 7626fa459cSmrg 7726fa459cSmrgBROTLI_BOOL BrotliFindAllStaticDictionaryMatches( 7826fa459cSmrg const BrotliEncoderDictionary* dictionary, const uint8_t* data, 7926fa459cSmrg size_t min_length, size_t max_length, uint32_t* matches) { 8026fa459cSmrg BROTLI_BOOL has_found_match = BROTLI_FALSE; 8126fa459cSmrg { 8226fa459cSmrg size_t offset = dictionary->buckets[Hash(data)]; 8326fa459cSmrg BROTLI_BOOL end = !offset; 8426fa459cSmrg while (!end) { 8526fa459cSmrg DictWord w = dictionary->dict_words[offset++]; 8626fa459cSmrg const size_t l = w.len & 0x1F; 8726fa459cSmrg const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; 8826fa459cSmrg const size_t id = w.idx; 8926fa459cSmrg end = !!(w.len & 0x80); 9026fa459cSmrg w.len = (uint8_t)l; 9126fa459cSmrg if (w.transform == 0) { 9226fa459cSmrg const size_t matchlen = 9326fa459cSmrg DictMatchLength(dictionary->words, data, id, l, max_length); 9426fa459cSmrg const uint8_t* s; 9526fa459cSmrg size_t minlen; 9626fa459cSmrg size_t maxlen; 9726fa459cSmrg size_t len; 9826fa459cSmrg /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */ 9926fa459cSmrg if (matchlen == l) { 10026fa459cSmrg AddMatch(id, l, l, matches); 10126fa459cSmrg has_found_match = BROTLI_TRUE; 10226fa459cSmrg } 10326fa459cSmrg /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and 10426fa459cSmrg "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */ 10526fa459cSmrg if (matchlen >= l - 1) { 10626fa459cSmrg AddMatch(id + 12 * n, l - 1, l, matches); 10726fa459cSmrg if (l + 2 < max_length && 10826fa459cSmrg data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' && 10926fa459cSmrg data[l + 2] == ' ') { 11026fa459cSmrg AddMatch(id + 49 * n, l + 3, l, matches); 11126fa459cSmrg } 11226fa459cSmrg has_found_match = BROTLI_TRUE; 11326fa459cSmrg } 11426fa459cSmrg /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */ 11526fa459cSmrg minlen = min_length; 11626fa459cSmrg if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9); 11726fa459cSmrg maxlen = BROTLI_MIN(size_t, matchlen, l - 2); 11826fa459cSmrg for (len = minlen; len <= maxlen; ++len) { 11926fa459cSmrg size_t cut = l - len; 12026fa459cSmrg size_t transform_id = (cut << 2) + 12126fa459cSmrg (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F); 12226fa459cSmrg AddMatch(id + transform_id * n, len, l, matches); 12326fa459cSmrg has_found_match = BROTLI_TRUE; 12426fa459cSmrg } 12526fa459cSmrg if (matchlen < l || l + 6 >= max_length) { 12626fa459cSmrg continue; 12726fa459cSmrg } 12826fa459cSmrg s = &data[l]; 12926fa459cSmrg /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */ 13026fa459cSmrg if (s[0] == ' ') { 13126fa459cSmrg AddMatch(id + n, l + 1, l, matches); 13226fa459cSmrg if (s[1] == 'a') { 13326fa459cSmrg if (s[2] == ' ') { 13426fa459cSmrg AddMatch(id + 28 * n, l + 3, l, matches); 13526fa459cSmrg } else if (s[2] == 's') { 13626fa459cSmrg if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches); 13726fa459cSmrg } else if (s[2] == 't') { 13826fa459cSmrg if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches); 13926fa459cSmrg } else if (s[2] == 'n') { 14026fa459cSmrg if (s[3] == 'd' && s[4] == ' ') { 14126fa459cSmrg AddMatch(id + 10 * n, l + 5, l, matches); 14226fa459cSmrg } 14326fa459cSmrg } 14426fa459cSmrg } else if (s[1] == 'b') { 14526fa459cSmrg if (s[2] == 'y' && s[3] == ' ') { 14626fa459cSmrg AddMatch(id + 38 * n, l + 4, l, matches); 14726fa459cSmrg } 14826fa459cSmrg } else if (s[1] == 'i') { 14926fa459cSmrg if (s[2] == 'n') { 15026fa459cSmrg if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches); 15126fa459cSmrg } else if (s[2] == 's') { 15226fa459cSmrg if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches); 15326fa459cSmrg } 15426fa459cSmrg } else if (s[1] == 'f') { 15526fa459cSmrg if (s[2] == 'o') { 15626fa459cSmrg if (s[3] == 'r' && s[4] == ' ') { 15726fa459cSmrg AddMatch(id + 25 * n, l + 5, l, matches); 15826fa459cSmrg } 15926fa459cSmrg } else if (s[2] == 'r') { 16026fa459cSmrg if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') { 16126fa459cSmrg AddMatch(id + 37 * n, l + 6, l, matches); 16226fa459cSmrg } 16326fa459cSmrg } 16426fa459cSmrg } else if (s[1] == 'o') { 16526fa459cSmrg if (s[2] == 'f') { 16626fa459cSmrg if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches); 16726fa459cSmrg } else if (s[2] == 'n') { 16826fa459cSmrg if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches); 16926fa459cSmrg } 17026fa459cSmrg } else if (s[1] == 'n') { 17126fa459cSmrg if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') { 17226fa459cSmrg AddMatch(id + 80 * n, l + 5, l, matches); 17326fa459cSmrg } 17426fa459cSmrg } else if (s[1] == 't') { 17526fa459cSmrg if (s[2] == 'h') { 17626fa459cSmrg if (s[3] == 'e') { 17726fa459cSmrg if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches); 17826fa459cSmrg } else if (s[3] == 'a') { 17926fa459cSmrg if (s[4] == 't' && s[5] == ' ') { 18026fa459cSmrg AddMatch(id + 29 * n, l + 6, l, matches); 18126fa459cSmrg } 18226fa459cSmrg } 18326fa459cSmrg } else if (s[2] == 'o') { 18426fa459cSmrg if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches); 18526fa459cSmrg } 18626fa459cSmrg } else if (s[1] == 'w') { 18726fa459cSmrg if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') { 18826fa459cSmrg AddMatch(id + 35 * n, l + 6, l, matches); 18926fa459cSmrg } 19026fa459cSmrg } 19126fa459cSmrg } else if (s[0] == '"') { 19226fa459cSmrg AddMatch(id + 19 * n, l + 1, l, matches); 19326fa459cSmrg if (s[1] == '>') { 19426fa459cSmrg AddMatch(id + 21 * n, l + 2, l, matches); 19526fa459cSmrg } 19626fa459cSmrg } else if (s[0] == '.') { 19726fa459cSmrg AddMatch(id + 20 * n, l + 1, l, matches); 19826fa459cSmrg if (s[1] == ' ') { 19926fa459cSmrg AddMatch(id + 31 * n, l + 2, l, matches); 20026fa459cSmrg if (s[2] == 'T' && s[3] == 'h') { 20126fa459cSmrg if (s[4] == 'e') { 20226fa459cSmrg if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches); 20326fa459cSmrg } else if (s[4] == 'i') { 20426fa459cSmrg if (s[5] == 's' && s[6] == ' ') { 20526fa459cSmrg AddMatch(id + 75 * n, l + 7, l, matches); 20626fa459cSmrg } 20726fa459cSmrg } 20826fa459cSmrg } 20926fa459cSmrg } 21026fa459cSmrg } else if (s[0] == ',') { 21126fa459cSmrg AddMatch(id + 76 * n, l + 1, l, matches); 21226fa459cSmrg if (s[1] == ' ') { 21326fa459cSmrg AddMatch(id + 14 * n, l + 2, l, matches); 21426fa459cSmrg } 21526fa459cSmrg } else if (s[0] == '\n') { 21626fa459cSmrg AddMatch(id + 22 * n, l + 1, l, matches); 21726fa459cSmrg if (s[1] == '\t') { 21826fa459cSmrg AddMatch(id + 50 * n, l + 2, l, matches); 21926fa459cSmrg } 22026fa459cSmrg } else if (s[0] == ']') { 22126fa459cSmrg AddMatch(id + 24 * n, l + 1, l, matches); 22226fa459cSmrg } else if (s[0] == '\'') { 22326fa459cSmrg AddMatch(id + 36 * n, l + 1, l, matches); 22426fa459cSmrg } else if (s[0] == ':') { 22526fa459cSmrg AddMatch(id + 51 * n, l + 1, l, matches); 22626fa459cSmrg } else if (s[0] == '(') { 22726fa459cSmrg AddMatch(id + 57 * n, l + 1, l, matches); 22826fa459cSmrg } else if (s[0] == '=') { 22926fa459cSmrg if (s[1] == '"') { 23026fa459cSmrg AddMatch(id + 70 * n, l + 2, l, matches); 23126fa459cSmrg } else if (s[1] == '\'') { 23226fa459cSmrg AddMatch(id + 86 * n, l + 2, l, matches); 23326fa459cSmrg } 23426fa459cSmrg } else if (s[0] == 'a') { 23526fa459cSmrg if (s[1] == 'l' && s[2] == ' ') { 23626fa459cSmrg AddMatch(id + 84 * n, l + 3, l, matches); 23726fa459cSmrg } 23826fa459cSmrg } else if (s[0] == 'e') { 23926fa459cSmrg if (s[1] == 'd') { 24026fa459cSmrg if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches); 24126fa459cSmrg } else if (s[1] == 'r') { 24226fa459cSmrg if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches); 24326fa459cSmrg } else if (s[1] == 's') { 24426fa459cSmrg if (s[2] == 't' && s[3] == ' ') { 24526fa459cSmrg AddMatch(id + 95 * n, l + 4, l, matches); 24626fa459cSmrg } 24726fa459cSmrg } 24826fa459cSmrg } else if (s[0] == 'f') { 24926fa459cSmrg if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') { 25026fa459cSmrg AddMatch(id + 90 * n, l + 4, l, matches); 25126fa459cSmrg } 25226fa459cSmrg } else if (s[0] == 'i') { 25326fa459cSmrg if (s[1] == 'v') { 25426fa459cSmrg if (s[2] == 'e' && s[3] == ' ') { 25526fa459cSmrg AddMatch(id + 92 * n, l + 4, l, matches); 25626fa459cSmrg } 25726fa459cSmrg } else if (s[1] == 'z') { 25826fa459cSmrg if (s[2] == 'e' && s[3] == ' ') { 25926fa459cSmrg AddMatch(id + 100 * n, l + 4, l, matches); 26026fa459cSmrg } 26126fa459cSmrg } 26226fa459cSmrg } else if (s[0] == 'l') { 26326fa459cSmrg if (s[1] == 'e') { 26426fa459cSmrg if (s[2] == 's' && s[3] == 's' && s[4] == ' ') { 26526fa459cSmrg AddMatch(id + 93 * n, l + 5, l, matches); 26626fa459cSmrg } 26726fa459cSmrg } else if (s[1] == 'y') { 26826fa459cSmrg if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches); 26926fa459cSmrg } 27026fa459cSmrg } else if (s[0] == 'o') { 27126fa459cSmrg if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') { 27226fa459cSmrg AddMatch(id + 106 * n, l + 4, l, matches); 27326fa459cSmrg } 27426fa459cSmrg } 27526fa459cSmrg } else { 27626fa459cSmrg /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and 27726fa459cSmrg is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL) 27826fa459cSmrg transform. */ 27926fa459cSmrg const BROTLI_BOOL is_all_caps = 28026fa459cSmrg TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST); 28126fa459cSmrg const uint8_t* s; 28226fa459cSmrg if (!IsMatch(dictionary->words, w, data, max_length)) { 28326fa459cSmrg continue; 28426fa459cSmrg } 28526fa459cSmrg /* Transform "" + kUppercase{First,All} + "" */ 28626fa459cSmrg AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches); 28726fa459cSmrg has_found_match = BROTLI_TRUE; 28826fa459cSmrg if (l + 1 >= max_length) { 28926fa459cSmrg continue; 29026fa459cSmrg } 29126fa459cSmrg /* Transforms "" + kUppercase{First,All} + <suffix> */ 29226fa459cSmrg s = &data[l]; 29326fa459cSmrg if (s[0] == ' ') { 29426fa459cSmrg AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches); 29526fa459cSmrg } else if (s[0] == '"') { 29626fa459cSmrg AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches); 29726fa459cSmrg if (s[1] == '>') { 29826fa459cSmrg AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches); 29926fa459cSmrg } 30026fa459cSmrg } else if (s[0] == '.') { 30126fa459cSmrg AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches); 30226fa459cSmrg if (s[1] == ' ') { 30326fa459cSmrg AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches); 30426fa459cSmrg } 30526fa459cSmrg } else if (s[0] == ',') { 30626fa459cSmrg AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches); 30726fa459cSmrg if (s[1] == ' ') { 30826fa459cSmrg AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches); 30926fa459cSmrg } 31026fa459cSmrg } else if (s[0] == '\'') { 31126fa459cSmrg AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches); 31226fa459cSmrg } else if (s[0] == '(') { 31326fa459cSmrg AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches); 31426fa459cSmrg } else if (s[0] == '=') { 31526fa459cSmrg if (s[1] == '"') { 31626fa459cSmrg AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches); 31726fa459cSmrg } else if (s[1] == '\'') { 31826fa459cSmrg AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches); 31926fa459cSmrg } 32026fa459cSmrg } 32126fa459cSmrg } 32226fa459cSmrg } 32326fa459cSmrg } 32426fa459cSmrg /* Transforms with prefixes " " and "." */ 32526fa459cSmrg if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) { 32626fa459cSmrg BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' '); 32726fa459cSmrg size_t offset = dictionary->buckets[Hash(&data[1])]; 32826fa459cSmrg BROTLI_BOOL end = !offset; 32926fa459cSmrg while (!end) { 33026fa459cSmrg DictWord w = dictionary->dict_words[offset++]; 33126fa459cSmrg const size_t l = w.len & 0x1F; 33226fa459cSmrg const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; 33326fa459cSmrg const size_t id = w.idx; 33426fa459cSmrg end = !!(w.len & 0x80); 33526fa459cSmrg w.len = (uint8_t)l; 33626fa459cSmrg if (w.transform == 0) { 33726fa459cSmrg const uint8_t* s; 33826fa459cSmrg if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) { 33926fa459cSmrg continue; 34026fa459cSmrg } 34126fa459cSmrg /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and 34226fa459cSmrg "." + BROTLI_TRANSFORM_IDENTITY + "" */ 34326fa459cSmrg AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches); 34426fa459cSmrg has_found_match = BROTLI_TRUE; 34526fa459cSmrg if (l + 2 >= max_length) { 34626fa459cSmrg continue; 34726fa459cSmrg } 34826fa459cSmrg /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and 34926fa459cSmrg "." + BROTLI_TRANSFORM_IDENTITY + <suffix> 35026fa459cSmrg */ 35126fa459cSmrg s = &data[l + 1]; 35226fa459cSmrg if (s[0] == ' ') { 35326fa459cSmrg AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches); 35426fa459cSmrg } else if (s[0] == '(') { 35526fa459cSmrg AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches); 35626fa459cSmrg } else if (is_space) { 35726fa459cSmrg if (s[0] == ',') { 35826fa459cSmrg AddMatch(id + 103 * n, l + 2, l, matches); 35926fa459cSmrg if (s[1] == ' ') { 36026fa459cSmrg AddMatch(id + 33 * n, l + 3, l, matches); 36126fa459cSmrg } 36226fa459cSmrg } else if (s[0] == '.') { 36326fa459cSmrg AddMatch(id + 71 * n, l + 2, l, matches); 36426fa459cSmrg if (s[1] == ' ') { 36526fa459cSmrg AddMatch(id + 52 * n, l + 3, l, matches); 36626fa459cSmrg } 36726fa459cSmrg } else if (s[0] == '=') { 36826fa459cSmrg if (s[1] == '"') { 36926fa459cSmrg AddMatch(id + 81 * n, l + 3, l, matches); 37026fa459cSmrg } else if (s[1] == '\'') { 37126fa459cSmrg AddMatch(id + 98 * n, l + 3, l, matches); 37226fa459cSmrg } 37326fa459cSmrg } 37426fa459cSmrg } 37526fa459cSmrg } else if (is_space) { 37626fa459cSmrg /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and 37726fa459cSmrg is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL) 37826fa459cSmrg transform. */ 37926fa459cSmrg const BROTLI_BOOL is_all_caps = 38026fa459cSmrg TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST); 38126fa459cSmrg const uint8_t* s; 38226fa459cSmrg if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) { 38326fa459cSmrg continue; 38426fa459cSmrg } 38526fa459cSmrg /* Transforms " " + kUppercase{First,All} + "" */ 38626fa459cSmrg AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches); 38726fa459cSmrg has_found_match = BROTLI_TRUE; 38826fa459cSmrg if (l + 2 >= max_length) { 38926fa459cSmrg continue; 39026fa459cSmrg } 39126fa459cSmrg /* Transforms " " + kUppercase{First,All} + <suffix> */ 39226fa459cSmrg s = &data[l + 1]; 39326fa459cSmrg if (s[0] == ' ') { 39426fa459cSmrg AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches); 39526fa459cSmrg } else if (s[0] == ',') { 39626fa459cSmrg if (!is_all_caps) { 39726fa459cSmrg AddMatch(id + 109 * n, l + 2, l, matches); 39826fa459cSmrg } 39926fa459cSmrg if (s[1] == ' ') { 40026fa459cSmrg AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches); 40126fa459cSmrg } 40226fa459cSmrg } else if (s[0] == '.') { 40326fa459cSmrg AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches); 40426fa459cSmrg if (s[1] == ' ') { 40526fa459cSmrg AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches); 40626fa459cSmrg } 40726fa459cSmrg } else if (s[0] == '=') { 40826fa459cSmrg if (s[1] == '"') { 40926fa459cSmrg AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches); 41026fa459cSmrg } else if (s[1] == '\'') { 41126fa459cSmrg AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches); 41226fa459cSmrg } 41326fa459cSmrg } 41426fa459cSmrg } 41526fa459cSmrg } 41626fa459cSmrg } 41726fa459cSmrg if (max_length >= 6) { 41826fa459cSmrg /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */ 41926fa459cSmrg if ((data[1] == ' ' && 42026fa459cSmrg (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || 42126fa459cSmrg (data[0] == 0xC2 && data[1] == 0xA0)) { 42226fa459cSmrg size_t offset = dictionary->buckets[Hash(&data[2])]; 42326fa459cSmrg BROTLI_BOOL end = !offset; 42426fa459cSmrg while (!end) { 42526fa459cSmrg DictWord w = dictionary->dict_words[offset++]; 42626fa459cSmrg const size_t l = w.len & 0x1F; 42726fa459cSmrg const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; 42826fa459cSmrg const size_t id = w.idx; 42926fa459cSmrg end = !!(w.len & 0x80); 43026fa459cSmrg w.len = (uint8_t)l; 43126fa459cSmrg if (w.transform == 0 && 43226fa459cSmrg IsMatch(dictionary->words, w, &data[2], max_length - 2)) { 43326fa459cSmrg if (data[0] == 0xC2) { 43426fa459cSmrg AddMatch(id + 102 * n, l + 2, l, matches); 43526fa459cSmrg has_found_match = BROTLI_TRUE; 43626fa459cSmrg } else if (l + 2 < max_length && data[l + 2] == ' ') { 43726fa459cSmrg size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13); 43826fa459cSmrg AddMatch(id + t * n, l + 3, l, matches); 43926fa459cSmrg has_found_match = BROTLI_TRUE; 44026fa459cSmrg } 44126fa459cSmrg } 44226fa459cSmrg } 44326fa459cSmrg } 44426fa459cSmrg } 44526fa459cSmrg if (max_length >= 9) { 44626fa459cSmrg /* Transforms with prefixes " the " and ".com/" */ 44726fa459cSmrg if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' && 44826fa459cSmrg data[3] == 'e' && data[4] == ' ') || 44926fa459cSmrg (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && 45026fa459cSmrg data[3] == 'm' && data[4] == '/')) { 45126fa459cSmrg size_t offset = dictionary->buckets[Hash(&data[5])]; 45226fa459cSmrg BROTLI_BOOL end = !offset; 45326fa459cSmrg while (!end) { 45426fa459cSmrg DictWord w = dictionary->dict_words[offset++]; 45526fa459cSmrg const size_t l = w.len & 0x1F; 45626fa459cSmrg const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l]; 45726fa459cSmrg const size_t id = w.idx; 45826fa459cSmrg end = !!(w.len & 0x80); 45926fa459cSmrg w.len = (uint8_t)l; 46026fa459cSmrg if (w.transform == 0 && 46126fa459cSmrg IsMatch(dictionary->words, w, &data[5], max_length - 5)) { 46226fa459cSmrg AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches); 46326fa459cSmrg has_found_match = BROTLI_TRUE; 46426fa459cSmrg if (l + 5 < max_length) { 46526fa459cSmrg const uint8_t* s = &data[l + 5]; 46626fa459cSmrg if (data[0] == ' ') { 46726fa459cSmrg if (l + 8 < max_length && 46826fa459cSmrg s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') { 46926fa459cSmrg AddMatch(id + 62 * n, l + 9, l, matches); 47026fa459cSmrg if (l + 12 < max_length && 47126fa459cSmrg s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') { 47226fa459cSmrg AddMatch(id + 73 * n, l + 13, l, matches); 47326fa459cSmrg } 47426fa459cSmrg } 47526fa459cSmrg } 47626fa459cSmrg } 47726fa459cSmrg } 47826fa459cSmrg } 47926fa459cSmrg } 48026fa459cSmrg } 48126fa459cSmrg return has_found_match; 48226fa459cSmrg} 48326fa459cSmrg 48426fa459cSmrg#if defined(__cplusplus) || defined(c_plusplus) 48526fa459cSmrg} /* extern "C" */ 48626fa459cSmrg#endif 487