big5_emacs.h revision b4ee4795
1
2/*
3 * BIG5-0 and BIG5-1
4 */
5
6/*
7   BIG5 with its 13494 characters doesn't fit in a single 94x94 or 96x96
8   block. Therefore Emacs/Mule developers, in a typically Japanese way of
9   thinking, have developed an alternative encoding of BIG5 in two 94x94
10   planes, very similar to the SHIFT_JIS encoding for JISX0208.
11
12   Conversion between BIG5 codes (s1,s2) and BIG5-0 codes (c1,c2):
13   Example. (s1,s2) = 0xA140, (c1,c2) = 0x2121.
14   0xA1 <= s1 <= 0xC7, 0x40 <= s2 <= 0x7E || 0xA1 <= s2 <= 0xFE,
15   0x21 <= c1 <= 0x62, 0x21 <= c2 <= 0x7E.
16   Invariant:
17     157*(s1-0xA1) + (s2 < 0x80 ? s2-0x40 : s2-0x62)
18     = 94*(c1-0x21)+(c2-0x21)
19   Conversion (s1,s2) -> (c1,c2):
20     t := 157*(s1-0xA1) + (s2 < 0x80 ? s2-0x40 : s2-0x62)
21     c1 := (t div 94) + 0x21
22     c2 := (t mod 94) + 0x21
23   Conversion (c1,c2) -> (s1,s2):
24     t := 94*(c1-0x21)+(c2-0x21)
25     t2 := t mod 157
26     s1 := (t div 157) + 0xA1
27     s2 := (t2 < 0x3F ? t2+0x40 : t2+0x62)
28
29   Conversion between BIG5 codes (s1,s2) and BIG5-1 codes (c1,c2):
30   Example. (s1,s2) = 0xC940, (c1,c2) = 0x2121.
31   0xC9 <= s1 <= 0xF9, 0x40 <= s2 <= 0x7E || 0xA1 <= s2 <= 0xFE,
32   0x21 <= c1 <= 0x72, 0x21 <= c2 <= 0x7E.
33   Invariant:
34     157*(s1-0xC9) + (s2 < 0x80 ? s2-0x40 : s2-0x62)
35     = 94*(c1-0x21)+(c2-0x21)
36   Conversion (s1,s2) -> (c1,c2):
37     t := 157*(s1-0xC9) + (s2 < 0x80 ? s2-0x40 : s2-0x62)
38     c1 := (t div 94) + 0x21
39     c2 := (t mod 94) + 0x21
40   Conversion (c1,c2) -> (s1,s2):
41     t := 94*(c1-0x21)+(c2-0x21)
42     t2 := t mod 157
43     s1 := (t div 157) + 0xC9
44     s2 := (t2 < 0x3F ? t2+0x40 : t2+0x62)
45 */
46
47static int
48big5_0_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
49{
50  unsigned char c1 = s[0];
51  if (c1 >= 0x21 && c1 <= 0x62) {
52    if (n >= 2) {
53      unsigned char c2 = s[1];
54      if (c2 >= 0x21 && c2 <= 0x7e) {
55        unsigned int i = 94 * (c1 - 0x21) + (c2 - 0x21);
56        if (0) {
57          /* Unoptimized. */
58          unsigned char buf[2];
59          buf[0] = (i / 157) + 0xa1;
60          i = i % 157;
61          buf[1] = i + (i < 0x3f ? 0x40 : 0x62);
62          return big5_mbtowc(conv,pwc,buf,2);
63        } else {
64          /* Inline the implementation of big5_mbtowc. */
65          if (i < 6121) {
66            unsigned short wc = big5_2uni_pagea1[i];
67            if (wc != 0xfffd) {
68              *pwc = (ucs4_t) wc;
69              return 2;
70            }
71          }
72        }
73      }
74      return RET_ILSEQ;
75    }
76    return RET_TOOFEW(0);
77  }
78  return RET_ILSEQ;
79}
80
81static int
82big5_1_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
83{
84  unsigned char c1 = s[0];
85  if (c1 >= 0x21 && c1 <= 0x72) {
86    if (n >= 2) {
87      unsigned char c2 = s[1];
88      if (c2 >= 0x21 && c2 <= 0x7e) {
89        unsigned int i = 94 * (c1 - 0x21) + (c2 - 0x21);
90        if (0) {
91          /* Unoptimized. */
92          unsigned char buf[2];
93          buf[0] = (i / 157) + 0xc9;
94          i = i % 157;
95          buf[1] = i + (i < 0x3f ? 0x40 : 0x62);
96          return big5_mbtowc(conv,pwc,buf,2);
97        } else {
98          /* Inline the implementation of big5_mbtowc. */
99          if (i < 7652) {
100            unsigned short wc = big5_2uni_pagec9[i];
101            if (wc != 0xfffd) {
102              *pwc = (ucs4_t) wc;
103              return 2;
104            }
105          }
106        }
107      }
108      return RET_ILSEQ;
109    }
110    return RET_TOOFEW(0);
111  }
112  return RET_ILSEQ;
113}
114
115static int
116big5_0_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
117{
118  if (n >= 2) {
119    unsigned char buf[2];
120    int ret = big5_wctomb(conv,buf,wc,2);
121    if (ret != RET_ILSEQ) {
122      unsigned char s1, s2;
123      if (ret != 2) abort();
124      s1 = buf[0];
125      s2 = buf[1];
126      if (!(s1 >= 0xa1)) abort();
127      if (!((s2 >= 0x40 && s2 <= 0x7e) || (s2 >= 0xa1 && s2 <= 0xfe))) abort();
128      if (s1 < 0xc9) {
129        unsigned int t = 157 * (s1 - 0xa1) + s2 - (s2 < 0x80 ? 0x40 : 0x62);
130        r[0] = (t / 94) + 0x21;
131        r[1] = (t % 94) + 0x21;
132        return 2;
133      }
134    }
135    return RET_ILSEQ;
136  }
137  return RET_TOOSMALL;
138}
139
140static int
141big5_1_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
142{
143  if (n >= 2) {
144    unsigned char buf[2];
145    int ret = big5_wctomb(conv,buf,wc,2);
146    if (ret != RET_ILSEQ) {
147      unsigned char s1, s2;
148      if (ret != 2) abort();
149      s1 = buf[0];
150      s2 = buf[1];
151      if (!(s1 <= 0xf9)) abort();
152      if (!((s2 >= 0x40 && s2 <= 0x7e) || (s2 >= 0xa1 && s2 <= 0xfe))) abort();
153      if (s1 >= 0xc9) {
154        unsigned int t = 157 * (s1 - 0xc9) + s2 - (s2 < 0x80 ? 0x40 : 0x62);
155        r[0] = (t / 94) + 0x21;
156        r[1] = (t % 94) + 0x21;
157        return 2;
158      }
159    }
160    return RET_ILSEQ;
161  }
162  return RET_TOOSMALL;
163}
164