11ab64890Smrg
21ab64890Smrg/*
31ab64890Smrg * BIG5-0 and BIG5-1
41ab64890Smrg */
51ab64890Smrg
61ab64890Smrg/*
71ab64890Smrg   BIG5 with its 13494 characters doesn't fit in a single 94x94 or 96x96
81ab64890Smrg   block. Therefore Emacs/Mule developers, in a typically Japanese way of
91ab64890Smrg   thinking, have developed an alternative encoding of BIG5 in two 94x94
101ab64890Smrg   planes, very similar to the SHIFT_JIS encoding for JISX0208.
111ab64890Smrg
121ab64890Smrg   Conversion between BIG5 codes (s1,s2) and BIG5-0 codes (c1,c2):
131ab64890Smrg   Example. (s1,s2) = 0xA140, (c1,c2) = 0x2121.
141ab64890Smrg   0xA1 <= s1 <= 0xC7, 0x40 <= s2 <= 0x7E || 0xA1 <= s2 <= 0xFE,
151ab64890Smrg   0x21 <= c1 <= 0x62, 0x21 <= c2 <= 0x7E.
161ab64890Smrg   Invariant:
171ab64890Smrg     157*(s1-0xA1) + (s2 < 0x80 ? s2-0x40 : s2-0x62)
181ab64890Smrg     = 94*(c1-0x21)+(c2-0x21)
191ab64890Smrg   Conversion (s1,s2) -> (c1,c2):
201ab64890Smrg     t := 157*(s1-0xA1) + (s2 < 0x80 ? s2-0x40 : s2-0x62)
211ab64890Smrg     c1 := (t div 94) + 0x21
221ab64890Smrg     c2 := (t mod 94) + 0x21
231ab64890Smrg   Conversion (c1,c2) -> (s1,s2):
241ab64890Smrg     t := 94*(c1-0x21)+(c2-0x21)
251ab64890Smrg     t2 := t mod 157
261ab64890Smrg     s1 := (t div 157) + 0xA1
271ab64890Smrg     s2 := (t2 < 0x3F ? t2+0x40 : t2+0x62)
281ab64890Smrg
291ab64890Smrg   Conversion between BIG5 codes (s1,s2) and BIG5-1 codes (c1,c2):
301ab64890Smrg   Example. (s1,s2) = 0xC940, (c1,c2) = 0x2121.
311ab64890Smrg   0xC9 <= s1 <= 0xF9, 0x40 <= s2 <= 0x7E || 0xA1 <= s2 <= 0xFE,
321ab64890Smrg   0x21 <= c1 <= 0x72, 0x21 <= c2 <= 0x7E.
331ab64890Smrg   Invariant:
341ab64890Smrg     157*(s1-0xC9) + (s2 < 0x80 ? s2-0x40 : s2-0x62)
351ab64890Smrg     = 94*(c1-0x21)+(c2-0x21)
361ab64890Smrg   Conversion (s1,s2) -> (c1,c2):
371ab64890Smrg     t := 157*(s1-0xC9) + (s2 < 0x80 ? s2-0x40 : s2-0x62)
381ab64890Smrg     c1 := (t div 94) + 0x21
391ab64890Smrg     c2 := (t mod 94) + 0x21
401ab64890Smrg   Conversion (c1,c2) -> (s1,s2):
411ab64890Smrg     t := 94*(c1-0x21)+(c2-0x21)
421ab64890Smrg     t2 := t mod 157
431ab64890Smrg     s1 := (t div 157) + 0xC9
441ab64890Smrg     s2 := (t2 < 0x3F ? t2+0x40 : t2+0x62)
451ab64890Smrg */
461ab64890Smrg
471ab64890Smrgstatic int
481ab64890Smrgbig5_0_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
491ab64890Smrg{
501ab64890Smrg  unsigned char c1 = s[0];
511ab64890Smrg  if (c1 >= 0x21 && c1 <= 0x62) {
521ab64890Smrg    if (n >= 2) {
531ab64890Smrg      unsigned char c2 = s[1];
541ab64890Smrg      if (c2 >= 0x21 && c2 <= 0x7e) {
551ab64890Smrg        unsigned int i = 94 * (c1 - 0x21) + (c2 - 0x21);
561ab64890Smrg        if (0) {
571ab64890Smrg          /* Unoptimized. */
581ab64890Smrg          unsigned char buf[2];
591ab64890Smrg          buf[0] = (i / 157) + 0xa1;
601ab64890Smrg          i = i % 157;
611ab64890Smrg          buf[1] = i + (i < 0x3f ? 0x40 : 0x62);
621ab64890Smrg          return big5_mbtowc(conv,pwc,buf,2);
631ab64890Smrg        } else {
641ab64890Smrg          /* Inline the implementation of big5_mbtowc. */
651ab64890Smrg          if (i < 6121) {
661ab64890Smrg            unsigned short wc = big5_2uni_pagea1[i];
671ab64890Smrg            if (wc != 0xfffd) {
681ab64890Smrg              *pwc = (ucs4_t) wc;
691ab64890Smrg              return 2;
701ab64890Smrg            }
711ab64890Smrg          }
721ab64890Smrg        }
731ab64890Smrg      }
741ab64890Smrg      return RET_ILSEQ;
751ab64890Smrg    }
761ab64890Smrg    return RET_TOOFEW(0);
771ab64890Smrg  }
781ab64890Smrg  return RET_ILSEQ;
791ab64890Smrg}
801ab64890Smrg
811ab64890Smrgstatic int
821ab64890Smrgbig5_1_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
831ab64890Smrg{
841ab64890Smrg  unsigned char c1 = s[0];
851ab64890Smrg  if (c1 >= 0x21 && c1 <= 0x72) {
861ab64890Smrg    if (n >= 2) {
871ab64890Smrg      unsigned char c2 = s[1];
881ab64890Smrg      if (c2 >= 0x21 && c2 <= 0x7e) {
891ab64890Smrg        unsigned int i = 94 * (c1 - 0x21) + (c2 - 0x21);
901ab64890Smrg        if (0) {
911ab64890Smrg          /* Unoptimized. */
921ab64890Smrg          unsigned char buf[2];
931ab64890Smrg          buf[0] = (i / 157) + 0xc9;
941ab64890Smrg          i = i % 157;
951ab64890Smrg          buf[1] = i + (i < 0x3f ? 0x40 : 0x62);
961ab64890Smrg          return big5_mbtowc(conv,pwc,buf,2);
971ab64890Smrg        } else {
981ab64890Smrg          /* Inline the implementation of big5_mbtowc. */
991ab64890Smrg          if (i < 7652) {
1001ab64890Smrg            unsigned short wc = big5_2uni_pagec9[i];
1011ab64890Smrg            if (wc != 0xfffd) {
1021ab64890Smrg              *pwc = (ucs4_t) wc;
1031ab64890Smrg              return 2;
1041ab64890Smrg            }
1051ab64890Smrg          }
1061ab64890Smrg        }
1071ab64890Smrg      }
1081ab64890Smrg      return RET_ILSEQ;
1091ab64890Smrg    }
1101ab64890Smrg    return RET_TOOFEW(0);
1111ab64890Smrg  }
1121ab64890Smrg  return RET_ILSEQ;
1131ab64890Smrg}
1141ab64890Smrg
1151ab64890Smrgstatic int
1161ab64890Smrgbig5_0_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
1171ab64890Smrg{
1181ab64890Smrg  if (n >= 2) {
1191ab64890Smrg    unsigned char buf[2];
1201ab64890Smrg    int ret = big5_wctomb(conv,buf,wc,2);
1211ab64890Smrg    if (ret != RET_ILSEQ) {
1221ab64890Smrg      unsigned char s1, s2;
1231ab64890Smrg      if (ret != 2) abort();
1241ab64890Smrg      s1 = buf[0];
1251ab64890Smrg      s2 = buf[1];
1261ab64890Smrg      if (!(s1 >= 0xa1)) abort();
1271ab64890Smrg      if (!((s2 >= 0x40 && s2 <= 0x7e) || (s2 >= 0xa1 && s2 <= 0xfe))) abort();
1281ab64890Smrg      if (s1 < 0xc9) {
1291ab64890Smrg        unsigned int t = 157 * (s1 - 0xa1) + s2 - (s2 < 0x80 ? 0x40 : 0x62);
1301ab64890Smrg        r[0] = (t / 94) + 0x21;
1311ab64890Smrg        r[1] = (t % 94) + 0x21;
1321ab64890Smrg        return 2;
1331ab64890Smrg      }
1341ab64890Smrg    }
1351ab64890Smrg    return RET_ILSEQ;
1361ab64890Smrg  }
1371ab64890Smrg  return RET_TOOSMALL;
1381ab64890Smrg}
1391ab64890Smrg
1401ab64890Smrgstatic int
1411ab64890Smrgbig5_1_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
1421ab64890Smrg{
1431ab64890Smrg  if (n >= 2) {
1441ab64890Smrg    unsigned char buf[2];
1451ab64890Smrg    int ret = big5_wctomb(conv,buf,wc,2);
1461ab64890Smrg    if (ret != RET_ILSEQ) {
1471ab64890Smrg      unsigned char s1, s2;
1481ab64890Smrg      if (ret != 2) abort();
1491ab64890Smrg      s1 = buf[0];
1501ab64890Smrg      s2 = buf[1];
1511ab64890Smrg      if (!(s1 <= 0xf9)) abort();
1521ab64890Smrg      if (!((s2 >= 0x40 && s2 <= 0x7e) || (s2 >= 0xa1 && s2 <= 0xfe))) abort();
1531ab64890Smrg      if (s1 >= 0xc9) {
1541ab64890Smrg        unsigned int t = 157 * (s1 - 0xc9) + s2 - (s2 < 0x80 ? 0x40 : 0x62);
1551ab64890Smrg        r[0] = (t / 94) + 0x21;
1561ab64890Smrg        r[1] = (t % 94) + 0x21;
1571ab64890Smrg        return 2;
1581ab64890Smrg      }
1591ab64890Smrg    }
1601ab64890Smrg    return RET_ILSEQ;
1611ab64890Smrg  }
1621ab64890Smrg  return RET_TOOSMALL;
1631ab64890Smrg}
164