126fa459cSmrg/* Copyright 2013 Google Inc. All Rights Reserved. 226fa459cSmrg 326fa459cSmrg Distributed under MIT license. 426fa459cSmrg See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 526fa459cSmrg*/ 626fa459cSmrg 726fa459cSmrg#include "./transform.h" 826fa459cSmrg 926fa459cSmrg#if defined(__cplusplus) || defined(c_plusplus) 1026fa459cSmrgextern "C" { 1126fa459cSmrg#endif 1226fa459cSmrg 1326fa459cSmrg/* RFC 7932 transforms string data */ 1426fa459cSmrgstatic const char kPrefixSuffix[217] = 1526fa459cSmrg "\1 \2, \10 of the \4 of \2s \1.\5 and \4 " 1626fa459cSmrg/* 0x _0 _2 __5 _E _3 _6 _8 _E */ 1726fa459cSmrg "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 " 1826fa459cSmrg/* 2x _3_ _5 _A_ _D_ _F _2 _4 _A _E */ 1926fa459cSmrg "that \1\'\6 with \6 from \4 by \1(\6. T" 2026fa459cSmrg/* 4x _5_ _7 _E _5 _A _C */ 2126fa459cSmrg "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed " 2226fa459cSmrg/* 6x _3 _8 _D _2 _7_ _ _A _C */ 2326fa459cSmrg "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5" 2426fa459cSmrg/* 8x _0 _ _3 _8 _C _E _ _1 _7 _F */ 2526fa459cSmrg " not \3er \3al \4ful \4ive \5less \4es" 2626fa459cSmrg/* Ax _5 _9 _D _2 _7 _D */ 2726fa459cSmrg "t \4ize \2\xc2\xa0\4ous \5 the \2e "; /* \0 - implicit trailing zero. */ 2826fa459cSmrg/* Cx _2 _7___ ___ _A _F _5 _8 */ 2926fa459cSmrg 3026fa459cSmrgstatic const uint16_t kPrefixSuffixMap[50] = { 3126fa459cSmrg 0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25, 3226fa459cSmrg 0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E, 3326fa459cSmrg 0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C, 3426fa459cSmrg 0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9, 3526fa459cSmrg 0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8 3626fa459cSmrg}; 3726fa459cSmrg 3826fa459cSmrg/* RFC 7932 transforms */ 3926fa459cSmrgstatic const uint8_t kTransformsData[] = { 4026fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 49, 4126fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 0, 4226fa459cSmrg 0, BROTLI_TRANSFORM_IDENTITY, 0, 4326fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49, 4426fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0, 4526fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 47, 4626fa459cSmrg 0, BROTLI_TRANSFORM_IDENTITY, 49, 4726fa459cSmrg 4, BROTLI_TRANSFORM_IDENTITY, 0, 4826fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 3, 4926fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49, 5026fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 6, 5126fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49, 5226fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_LAST_1, 49, 5326fa459cSmrg 1, BROTLI_TRANSFORM_IDENTITY, 0, 5426fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 1, 5526fa459cSmrg 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0, 5626fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 7, 5726fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 9, 5826fa459cSmrg 48, BROTLI_TRANSFORM_IDENTITY, 0, 5926fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 8, 6026fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 5, 6126fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 10, 6226fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 11, 6326fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_LAST_3, 49, 6426fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 13, 6526fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 14, 6626fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49, 6726fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_LAST_2, 49, 6826fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 15, 6926fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 16, 7026fa459cSmrg 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49, 7126fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 12, 7226fa459cSmrg 5, BROTLI_TRANSFORM_IDENTITY, 49, 7326fa459cSmrg 0, BROTLI_TRANSFORM_IDENTITY, 1, 7426fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49, 7526fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 18, 7626fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 17, 7726fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 19, 7826fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 20, 7926fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49, 8026fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49, 8126fa459cSmrg 47, BROTLI_TRANSFORM_IDENTITY, 49, 8226fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_LAST_4, 49, 8326fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 22, 8426fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49, 8526fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 23, 8626fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 24, 8726fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 25, 8826fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_LAST_7, 49, 8926fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_LAST_1, 26, 9026fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 27, 9126fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 28, 9226fa459cSmrg 0, BROTLI_TRANSFORM_IDENTITY, 12, 9326fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 29, 9426fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49, 9526fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49, 9626fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_LAST_6, 49, 9726fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 21, 9826fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1, 9926fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_LAST_8, 49, 10026fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 31, 10126fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 32, 10226fa459cSmrg 47, BROTLI_TRANSFORM_IDENTITY, 3, 10326fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_LAST_5, 49, 10426fa459cSmrg 49, BROTLI_TRANSFORM_OMIT_LAST_9, 49, 10526fa459cSmrg 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1, 10626fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8, 10726fa459cSmrg 5, BROTLI_TRANSFORM_IDENTITY, 21, 10826fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0, 10926fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10, 11026fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 30, 11126fa459cSmrg 0, BROTLI_TRANSFORM_IDENTITY, 5, 11226fa459cSmrg 35, BROTLI_TRANSFORM_IDENTITY, 49, 11326fa459cSmrg 47, BROTLI_TRANSFORM_IDENTITY, 2, 11426fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17, 11526fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 36, 11626fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 33, 11726fa459cSmrg 5, BROTLI_TRANSFORM_IDENTITY, 0, 11826fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21, 11926fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5, 12026fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 37, 12126fa459cSmrg 0, BROTLI_TRANSFORM_IDENTITY, 30, 12226fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 38, 12326fa459cSmrg 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0, 12426fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 39, 12526fa459cSmrg 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49, 12626fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 34, 12726fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8, 12826fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12, 12926fa459cSmrg 0, BROTLI_TRANSFORM_IDENTITY, 21, 13026fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 40, 13126fa459cSmrg 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12, 13226fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 41, 13326fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 42, 13426fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17, 13526fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 43, 13626fa459cSmrg 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5, 13726fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10, 13826fa459cSmrg 0, BROTLI_TRANSFORM_IDENTITY, 34, 13926fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33, 14026fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 44, 14126fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5, 14226fa459cSmrg 45, BROTLI_TRANSFORM_IDENTITY, 49, 14326fa459cSmrg 0, BROTLI_TRANSFORM_IDENTITY, 33, 14426fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30, 14526fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30, 14626fa459cSmrg 49, BROTLI_TRANSFORM_IDENTITY, 46, 14726fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1, 14826fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34, 14926fa459cSmrg 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33, 15026fa459cSmrg 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30, 15126fa459cSmrg 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1, 15226fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33, 15326fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21, 15426fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12, 15526fa459cSmrg 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5, 15626fa459cSmrg 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34, 15726fa459cSmrg 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12, 15826fa459cSmrg 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30, 15926fa459cSmrg 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34, 16026fa459cSmrg 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34, 16126fa459cSmrg}; 16226fa459cSmrg 16326fa459cSmrgstatic const BrotliTransforms kBrotliTransforms = { 16426fa459cSmrg sizeof(kPrefixSuffix), 16526fa459cSmrg (const uint8_t*)kPrefixSuffix, 16626fa459cSmrg kPrefixSuffixMap, 16726fa459cSmrg sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])), 16826fa459cSmrg kTransformsData, 16926fa459cSmrg NULL, /* no extra parameters */ 17026fa459cSmrg {0, 12, 27, 23, 42, 63, 56, 48, 59, 64} 17126fa459cSmrg}; 17226fa459cSmrg 17326fa459cSmrgconst BrotliTransforms* BrotliGetTransforms(void) { 17426fa459cSmrg return &kBrotliTransforms; 17526fa459cSmrg} 17626fa459cSmrg 17726fa459cSmrgstatic int ToUpperCase(uint8_t* p) { 17826fa459cSmrg if (p[0] < 0xC0) { 17926fa459cSmrg if (p[0] >= 'a' && p[0] <= 'z') { 18026fa459cSmrg p[0] ^= 32; 18126fa459cSmrg } 18226fa459cSmrg return 1; 18326fa459cSmrg } 18426fa459cSmrg /* An overly simplified uppercasing model for UTF-8. */ 18526fa459cSmrg if (p[0] < 0xE0) { 18626fa459cSmrg p[1] ^= 32; 18726fa459cSmrg return 2; 18826fa459cSmrg } 18926fa459cSmrg /* An arbitrary transform for three byte characters. */ 19026fa459cSmrg p[2] ^= 5; 19126fa459cSmrg return 3; 19226fa459cSmrg} 19326fa459cSmrg 19426fa459cSmrgstatic int Shift(uint8_t* word, int word_len, uint16_t parameter) { 19526fa459cSmrg /* Limited sign extension: scalar < (1 << 24). */ 19626fa459cSmrg uint32_t scalar = 19726fa459cSmrg (parameter & 0x7FFFu) + (0x1000000u - (parameter & 0x8000u)); 19826fa459cSmrg if (word[0] < 0x80) { 19926fa459cSmrg /* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */ 20026fa459cSmrg scalar += (uint32_t)word[0]; 20126fa459cSmrg word[0] = (uint8_t)(scalar & 0x7Fu); 20226fa459cSmrg return 1; 20326fa459cSmrg } else if (word[0] < 0xC0) { 20426fa459cSmrg /* Continuation / 10AAAAAA. */ 20526fa459cSmrg return 1; 20626fa459cSmrg } else if (word[0] < 0xE0) { 20726fa459cSmrg /* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */ 20826fa459cSmrg if (word_len < 2) return 1; 20926fa459cSmrg scalar += (uint32_t)((word[1] & 0x3Fu) | ((word[0] & 0x1Fu) << 6u)); 21026fa459cSmrg word[0] = (uint8_t)(0xC0 | ((scalar >> 6u) & 0x1F)); 21126fa459cSmrg word[1] = (uint8_t)((word[1] & 0xC0) | (scalar & 0x3F)); 21226fa459cSmrg return 2; 21326fa459cSmrg } else if (word[0] < 0xF0) { 21426fa459cSmrg /* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */ 21526fa459cSmrg if (word_len < 3) return word_len; 21626fa459cSmrg scalar += (uint32_t)((word[2] & 0x3Fu) | ((word[1] & 0x3Fu) << 6u) | 21726fa459cSmrg ((word[0] & 0x0Fu) << 12u)); 21826fa459cSmrg word[0] = (uint8_t)(0xE0 | ((scalar >> 12u) & 0x0F)); 21926fa459cSmrg word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 6u) & 0x3F)); 22026fa459cSmrg word[2] = (uint8_t)((word[2] & 0xC0) | (scalar & 0x3F)); 22126fa459cSmrg return 3; 22226fa459cSmrg } else if (word[0] < 0xF8) { 22326fa459cSmrg /* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */ 22426fa459cSmrg if (word_len < 4) return word_len; 22526fa459cSmrg scalar += (uint32_t)((word[3] & 0x3Fu) | ((word[2] & 0x3Fu) << 6u) | 22626fa459cSmrg ((word[1] & 0x3Fu) << 12u) | ((word[0] & 0x07u) << 18u)); 22726fa459cSmrg word[0] = (uint8_t)(0xF0 | ((scalar >> 18u) & 0x07)); 22826fa459cSmrg word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 12u) & 0x3F)); 22926fa459cSmrg word[2] = (uint8_t)((word[2] & 0xC0) | ((scalar >> 6u) & 0x3F)); 23026fa459cSmrg word[3] = (uint8_t)((word[3] & 0xC0) | (scalar & 0x3F)); 23126fa459cSmrg return 4; 23226fa459cSmrg } 23326fa459cSmrg return 1; 23426fa459cSmrg} 23526fa459cSmrg 23626fa459cSmrgint BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len, 23726fa459cSmrg const BrotliTransforms* transforms, int transform_idx) { 23826fa459cSmrg int idx = 0; 23926fa459cSmrg const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transform_idx); 24026fa459cSmrg uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transform_idx); 24126fa459cSmrg const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transform_idx); 24226fa459cSmrg { 24326fa459cSmrg int prefix_len = *prefix++; 24426fa459cSmrg while (prefix_len--) { dst[idx++] = *prefix++; } 24526fa459cSmrg } 24626fa459cSmrg { 24726fa459cSmrg const int t = type; 24826fa459cSmrg int i = 0; 24926fa459cSmrg if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) { 25026fa459cSmrg len -= t; 25126fa459cSmrg } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1 25226fa459cSmrg && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) { 25326fa459cSmrg int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1); 25426fa459cSmrg word += skip; 25526fa459cSmrg len -= skip; 25626fa459cSmrg } 25726fa459cSmrg while (i < len) { dst[idx++] = word[i++]; } 25826fa459cSmrg if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) { 25926fa459cSmrg ToUpperCase(&dst[idx - len]); 26026fa459cSmrg } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) { 26126fa459cSmrg uint8_t* uppercase = &dst[idx - len]; 26226fa459cSmrg while (len > 0) { 26326fa459cSmrg int step = ToUpperCase(uppercase); 26426fa459cSmrg uppercase += step; 26526fa459cSmrg len -= step; 26626fa459cSmrg } 26726fa459cSmrg } else if (t == BROTLI_TRANSFORM_SHIFT_FIRST) { 26826fa459cSmrg uint16_t param = (uint16_t)(transforms->params[transform_idx * 2] 26926fa459cSmrg + (transforms->params[transform_idx * 2 + 1] << 8u)); 27026fa459cSmrg Shift(&dst[idx - len], len, param); 27126fa459cSmrg } else if (t == BROTLI_TRANSFORM_SHIFT_ALL) { 27226fa459cSmrg uint16_t param = (uint16_t)(transforms->params[transform_idx * 2] 27326fa459cSmrg + (transforms->params[transform_idx * 2 + 1] << 8u)); 27426fa459cSmrg uint8_t* shift = &dst[idx - len]; 27526fa459cSmrg while (len > 0) { 27626fa459cSmrg int step = Shift(shift, len, param); 27726fa459cSmrg shift += step; 27826fa459cSmrg len -= step; 27926fa459cSmrg } 28026fa459cSmrg } 28126fa459cSmrg } 28226fa459cSmrg { 28326fa459cSmrg int suffix_len = *suffix++; 28426fa459cSmrg while (suffix_len--) { dst[idx++] = *suffix++; } 28526fa459cSmrg return idx; 28626fa459cSmrg } 28726fa459cSmrg} 28826fa459cSmrg 28926fa459cSmrg#if defined(__cplusplus) || defined(c_plusplus) 29026fa459cSmrg} /* extern "C" */ 29126fa459cSmrg#endif 292