126fa459cSmrg/* Copyright 2013 Google Inc. All Rights Reserved.
226fa459cSmrg
326fa459cSmrg   Distributed under MIT license.
426fa459cSmrg   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
526fa459cSmrg*/
626fa459cSmrg
726fa459cSmrg#include "./transform.h"
826fa459cSmrg
926fa459cSmrg#if defined(__cplusplus) || defined(c_plusplus)
1026fa459cSmrgextern "C" {
1126fa459cSmrg#endif
1226fa459cSmrg
1326fa459cSmrg/* RFC 7932 transforms string data */
1426fa459cSmrgstatic const char kPrefixSuffix[217] =
1526fa459cSmrg      "\1 \2, \10 of the \4 of \2s \1.\5 and \4 "
1626fa459cSmrg/* 0x  _0 _2  __5        _E    _3  _6 _8     _E */
1726fa459cSmrg      "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 "
1826fa459cSmrg/* 2x     _3_ _5    _A_  _D_ _F  _2 _4     _A   _E */
1926fa459cSmrg      "that \1\'\6 with \6 from \4 by \1(\6. T"
2026fa459cSmrg/* 4x       _5_ _7      _E      _5    _A _C */
2126fa459cSmrg      "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed "
2226fa459cSmrg/* 6x     _3    _8    _D    _2    _7_ _ _A _C */
2326fa459cSmrg      "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5"
2426fa459cSmrg/* 8x  _0 _ _3    _8   _C _E _ _1     _7       _F */
2526fa459cSmrg      " not \3er \3al \4ful \4ive \5less \4es"
2626fa459cSmrg/* Ax       _5   _9   _D    _2    _7     _D */
2726fa459cSmrg      "t \4ize \2\xc2\xa0\4ous \5 the \2e "; /* \0 - implicit trailing zero. */
2826fa459cSmrg/* Cx    _2    _7___ ___ _A    _F     _5        _8 */
2926fa459cSmrg
3026fa459cSmrgstatic const uint16_t kPrefixSuffixMap[50] = {
3126fa459cSmrg  0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25,
3226fa459cSmrg  0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E,
3326fa459cSmrg  0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C,
3426fa459cSmrg  0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9,
3526fa459cSmrg  0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8
3626fa459cSmrg};
3726fa459cSmrg
3826fa459cSmrg/* RFC 7932 transforms */
3926fa459cSmrgstatic const uint8_t kTransformsData[] = {
4026fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 49,
4126fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 0,
4226fa459cSmrg   0, BROTLI_TRANSFORM_IDENTITY, 0,
4326fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49,
4426fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
4526fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 47,
4626fa459cSmrg   0, BROTLI_TRANSFORM_IDENTITY, 49,
4726fa459cSmrg   4, BROTLI_TRANSFORM_IDENTITY, 0,
4826fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 3,
4926fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
5026fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 6,
5126fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49,
5226fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_LAST_1, 49,
5326fa459cSmrg   1, BROTLI_TRANSFORM_IDENTITY, 0,
5426fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 1,
5526fa459cSmrg   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
5626fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 7,
5726fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 9,
5826fa459cSmrg  48, BROTLI_TRANSFORM_IDENTITY, 0,
5926fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 8,
6026fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 5,
6126fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 10,
6226fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 11,
6326fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_LAST_3, 49,
6426fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 13,
6526fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 14,
6626fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49,
6726fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_LAST_2, 49,
6826fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 15,
6926fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 16,
7026fa459cSmrg   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
7126fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 12,
7226fa459cSmrg   5, BROTLI_TRANSFORM_IDENTITY, 49,
7326fa459cSmrg   0, BROTLI_TRANSFORM_IDENTITY, 1,
7426fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49,
7526fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 18,
7626fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 17,
7726fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 19,
7826fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 20,
7926fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49,
8026fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49,
8126fa459cSmrg  47, BROTLI_TRANSFORM_IDENTITY, 49,
8226fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_LAST_4, 49,
8326fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 22,
8426fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
8526fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 23,
8626fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 24,
8726fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 25,
8826fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_LAST_7, 49,
8926fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_LAST_1, 26,
9026fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 27,
9126fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 28,
9226fa459cSmrg   0, BROTLI_TRANSFORM_IDENTITY, 12,
9326fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 29,
9426fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49,
9526fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49,
9626fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_LAST_6, 49,
9726fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 21,
9826fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
9926fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_LAST_8, 49,
10026fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 31,
10126fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 32,
10226fa459cSmrg  47, BROTLI_TRANSFORM_IDENTITY, 3,
10326fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_LAST_5, 49,
10426fa459cSmrg  49, BROTLI_TRANSFORM_OMIT_LAST_9, 49,
10526fa459cSmrg   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
10626fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8,
10726fa459cSmrg   5, BROTLI_TRANSFORM_IDENTITY, 21,
10826fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
10926fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10,
11026fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 30,
11126fa459cSmrg   0, BROTLI_TRANSFORM_IDENTITY, 5,
11226fa459cSmrg  35, BROTLI_TRANSFORM_IDENTITY, 49,
11326fa459cSmrg  47, BROTLI_TRANSFORM_IDENTITY, 2,
11426fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17,
11526fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 36,
11626fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 33,
11726fa459cSmrg   5, BROTLI_TRANSFORM_IDENTITY, 0,
11826fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21,
11926fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
12026fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 37,
12126fa459cSmrg   0, BROTLI_TRANSFORM_IDENTITY, 30,
12226fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 38,
12326fa459cSmrg   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
12426fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 39,
12526fa459cSmrg   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
12626fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 34,
12726fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8,
12826fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
12926fa459cSmrg   0, BROTLI_TRANSFORM_IDENTITY, 21,
13026fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 40,
13126fa459cSmrg   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
13226fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 41,
13326fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 42,
13426fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17,
13526fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 43,
13626fa459cSmrg   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
13726fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10,
13826fa459cSmrg   0, BROTLI_TRANSFORM_IDENTITY, 34,
13926fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
14026fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 44,
14126fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
14226fa459cSmrg  45, BROTLI_TRANSFORM_IDENTITY, 49,
14326fa459cSmrg   0, BROTLI_TRANSFORM_IDENTITY, 33,
14426fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
14526fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
14626fa459cSmrg  49, BROTLI_TRANSFORM_IDENTITY, 46,
14726fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
14826fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
14926fa459cSmrg   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
15026fa459cSmrg   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
15126fa459cSmrg   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
15226fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33,
15326fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21,
15426fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
15526fa459cSmrg   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
15626fa459cSmrg  49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
15726fa459cSmrg   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
15826fa459cSmrg   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
15926fa459cSmrg   0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
16026fa459cSmrg   0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
16126fa459cSmrg};
16226fa459cSmrg
16326fa459cSmrgstatic const BrotliTransforms kBrotliTransforms = {
16426fa459cSmrg  sizeof(kPrefixSuffix),
16526fa459cSmrg  (const uint8_t*)kPrefixSuffix,
16626fa459cSmrg  kPrefixSuffixMap,
16726fa459cSmrg  sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])),
16826fa459cSmrg  kTransformsData,
16926fa459cSmrg  NULL,  /* no extra parameters */
17026fa459cSmrg  {0, 12, 27, 23, 42, 63, 56, 48, 59, 64}
17126fa459cSmrg};
17226fa459cSmrg
17326fa459cSmrgconst BrotliTransforms* BrotliGetTransforms(void) {
17426fa459cSmrg  return &kBrotliTransforms;
17526fa459cSmrg}
17626fa459cSmrg
17726fa459cSmrgstatic int ToUpperCase(uint8_t* p) {
17826fa459cSmrg  if (p[0] < 0xC0) {
17926fa459cSmrg    if (p[0] >= 'a' && p[0] <= 'z') {
18026fa459cSmrg      p[0] ^= 32;
18126fa459cSmrg    }
18226fa459cSmrg    return 1;
18326fa459cSmrg  }
18426fa459cSmrg  /* An overly simplified uppercasing model for UTF-8. */
18526fa459cSmrg  if (p[0] < 0xE0) {
18626fa459cSmrg    p[1] ^= 32;
18726fa459cSmrg    return 2;
18826fa459cSmrg  }
18926fa459cSmrg  /* An arbitrary transform for three byte characters. */
19026fa459cSmrg  p[2] ^= 5;
19126fa459cSmrg  return 3;
19226fa459cSmrg}
19326fa459cSmrg
19426fa459cSmrgstatic int Shift(uint8_t* word, int word_len, uint16_t parameter) {
19526fa459cSmrg  /* Limited sign extension: scalar < (1 << 24). */
19626fa459cSmrg  uint32_t scalar =
19726fa459cSmrg      (parameter & 0x7FFFu) + (0x1000000u - (parameter & 0x8000u));
19826fa459cSmrg  if (word[0] < 0x80) {
19926fa459cSmrg    /* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */
20026fa459cSmrg    scalar += (uint32_t)word[0];
20126fa459cSmrg    word[0] = (uint8_t)(scalar & 0x7Fu);
20226fa459cSmrg    return 1;
20326fa459cSmrg  } else if (word[0] < 0xC0) {
20426fa459cSmrg    /* Continuation / 10AAAAAA. */
20526fa459cSmrg    return 1;
20626fa459cSmrg  } else if (word[0] < 0xE0) {
20726fa459cSmrg    /* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */
20826fa459cSmrg    if (word_len < 2) return 1;
20926fa459cSmrg    scalar += (uint32_t)((word[1] & 0x3Fu) | ((word[0] & 0x1Fu) << 6u));
21026fa459cSmrg    word[0] = (uint8_t)(0xC0 | ((scalar >> 6u) & 0x1F));
21126fa459cSmrg    word[1] = (uint8_t)((word[1] & 0xC0) | (scalar & 0x3F));
21226fa459cSmrg    return 2;
21326fa459cSmrg  } else if (word[0] < 0xF0) {
21426fa459cSmrg    /* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */
21526fa459cSmrg    if (word_len < 3) return word_len;
21626fa459cSmrg    scalar += (uint32_t)((word[2] & 0x3Fu) | ((word[1] & 0x3Fu) << 6u) |
21726fa459cSmrg        ((word[0] & 0x0Fu) << 12u));
21826fa459cSmrg    word[0] = (uint8_t)(0xE0 | ((scalar >> 12u) & 0x0F));
21926fa459cSmrg    word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 6u) & 0x3F));
22026fa459cSmrg    word[2] = (uint8_t)((word[2] & 0xC0) | (scalar & 0x3F));
22126fa459cSmrg    return 3;
22226fa459cSmrg  } else if (word[0] < 0xF8) {
22326fa459cSmrg    /* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */
22426fa459cSmrg    if (word_len < 4) return word_len;
22526fa459cSmrg    scalar += (uint32_t)((word[3] & 0x3Fu) | ((word[2] & 0x3Fu) << 6u) |
22626fa459cSmrg        ((word[1] & 0x3Fu) << 12u) | ((word[0] & 0x07u) << 18u));
22726fa459cSmrg    word[0] = (uint8_t)(0xF0 | ((scalar >> 18u) & 0x07));
22826fa459cSmrg    word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 12u) & 0x3F));
22926fa459cSmrg    word[2] = (uint8_t)((word[2] & 0xC0) | ((scalar >> 6u) & 0x3F));
23026fa459cSmrg    word[3] = (uint8_t)((word[3] & 0xC0) | (scalar & 0x3F));
23126fa459cSmrg    return 4;
23226fa459cSmrg  }
23326fa459cSmrg  return 1;
23426fa459cSmrg}
23526fa459cSmrg
23626fa459cSmrgint BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len,
23726fa459cSmrg    const BrotliTransforms* transforms, int transform_idx) {
23826fa459cSmrg  int idx = 0;
23926fa459cSmrg  const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transform_idx);
24026fa459cSmrg  uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transform_idx);
24126fa459cSmrg  const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transform_idx);
24226fa459cSmrg  {
24326fa459cSmrg    int prefix_len = *prefix++;
24426fa459cSmrg    while (prefix_len--) { dst[idx++] = *prefix++; }
24526fa459cSmrg  }
24626fa459cSmrg  {
24726fa459cSmrg    const int t = type;
24826fa459cSmrg    int i = 0;
24926fa459cSmrg    if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) {
25026fa459cSmrg      len -= t;
25126fa459cSmrg    } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1
25226fa459cSmrg        && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) {
25326fa459cSmrg      int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1);
25426fa459cSmrg      word += skip;
25526fa459cSmrg      len -= skip;
25626fa459cSmrg    }
25726fa459cSmrg    while (i < len) { dst[idx++] = word[i++]; }
25826fa459cSmrg    if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) {
25926fa459cSmrg      ToUpperCase(&dst[idx - len]);
26026fa459cSmrg    } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) {
26126fa459cSmrg      uint8_t* uppercase = &dst[idx - len];
26226fa459cSmrg      while (len > 0) {
26326fa459cSmrg        int step = ToUpperCase(uppercase);
26426fa459cSmrg        uppercase += step;
26526fa459cSmrg        len -= step;
26626fa459cSmrg      }
26726fa459cSmrg    } else if (t == BROTLI_TRANSFORM_SHIFT_FIRST) {
26826fa459cSmrg      uint16_t param = (uint16_t)(transforms->params[transform_idx * 2]
26926fa459cSmrg          + (transforms->params[transform_idx * 2 + 1] << 8u));
27026fa459cSmrg      Shift(&dst[idx - len], len, param);
27126fa459cSmrg    } else if (t == BROTLI_TRANSFORM_SHIFT_ALL) {
27226fa459cSmrg      uint16_t param = (uint16_t)(transforms->params[transform_idx * 2]
27326fa459cSmrg          + (transforms->params[transform_idx * 2 + 1] << 8u));
27426fa459cSmrg      uint8_t* shift = &dst[idx - len];
27526fa459cSmrg      while (len > 0) {
27626fa459cSmrg        int step = Shift(shift, len, param);
27726fa459cSmrg        shift += step;
27826fa459cSmrg        len -= step;
27926fa459cSmrg      }
28026fa459cSmrg    }
28126fa459cSmrg  }
28226fa459cSmrg  {
28326fa459cSmrg    int suffix_len = *suffix++;
28426fa459cSmrg    while (suffix_len--) { dst[idx++] = *suffix++; }
28526fa459cSmrg    return idx;
28626fa459cSmrg  }
28726fa459cSmrg}
28826fa459cSmrg
28926fa459cSmrg#if defined(__cplusplus) || defined(c_plusplus)
29026fa459cSmrg}  /* extern "C" */
29126fa459cSmrg#endif
292