big5_emacs.h revision b4ee4795
1 2/* 3 * BIG5-0 and BIG5-1 4 */ 5 6/* 7 BIG5 with its 13494 characters doesn't fit in a single 94x94 or 96x96 8 block. Therefore Emacs/Mule developers, in a typically Japanese way of 9 thinking, have developed an alternative encoding of BIG5 in two 94x94 10 planes, very similar to the SHIFT_JIS encoding for JISX0208. 11 12 Conversion between BIG5 codes (s1,s2) and BIG5-0 codes (c1,c2): 13 Example. (s1,s2) = 0xA140, (c1,c2) = 0x2121. 14 0xA1 <= s1 <= 0xC7, 0x40 <= s2 <= 0x7E || 0xA1 <= s2 <= 0xFE, 15 0x21 <= c1 <= 0x62, 0x21 <= c2 <= 0x7E. 16 Invariant: 17 157*(s1-0xA1) + (s2 < 0x80 ? s2-0x40 : s2-0x62) 18 = 94*(c1-0x21)+(c2-0x21) 19 Conversion (s1,s2) -> (c1,c2): 20 t := 157*(s1-0xA1) + (s2 < 0x80 ? s2-0x40 : s2-0x62) 21 c1 := (t div 94) + 0x21 22 c2 := (t mod 94) + 0x21 23 Conversion (c1,c2) -> (s1,s2): 24 t := 94*(c1-0x21)+(c2-0x21) 25 t2 := t mod 157 26 s1 := (t div 157) + 0xA1 27 s2 := (t2 < 0x3F ? t2+0x40 : t2+0x62) 28 29 Conversion between BIG5 codes (s1,s2) and BIG5-1 codes (c1,c2): 30 Example. (s1,s2) = 0xC940, (c1,c2) = 0x2121. 31 0xC9 <= s1 <= 0xF9, 0x40 <= s2 <= 0x7E || 0xA1 <= s2 <= 0xFE, 32 0x21 <= c1 <= 0x72, 0x21 <= c2 <= 0x7E. 33 Invariant: 34 157*(s1-0xC9) + (s2 < 0x80 ? s2-0x40 : s2-0x62) 35 = 94*(c1-0x21)+(c2-0x21) 36 Conversion (s1,s2) -> (c1,c2): 37 t := 157*(s1-0xC9) + (s2 < 0x80 ? s2-0x40 : s2-0x62) 38 c1 := (t div 94) + 0x21 39 c2 := (t mod 94) + 0x21 40 Conversion (c1,c2) -> (s1,s2): 41 t := 94*(c1-0x21)+(c2-0x21) 42 t2 := t mod 157 43 s1 := (t div 157) + 0xC9 44 s2 := (t2 < 0x3F ? t2+0x40 : t2+0x62) 45 */ 46 47static int 48big5_0_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 49{ 50 unsigned char c1 = s[0]; 51 if (c1 >= 0x21 && c1 <= 0x62) { 52 if (n >= 2) { 53 unsigned char c2 = s[1]; 54 if (c2 >= 0x21 && c2 <= 0x7e) { 55 unsigned int i = 94 * (c1 - 0x21) + (c2 - 0x21); 56 if (0) { 57 /* Unoptimized. */ 58 unsigned char buf[2]; 59 buf[0] = (i / 157) + 0xa1; 60 i = i % 157; 61 buf[1] = i + (i < 0x3f ? 0x40 : 0x62); 62 return big5_mbtowc(conv,pwc,buf,2); 63 } else { 64 /* Inline the implementation of big5_mbtowc. */ 65 if (i < 6121) { 66 unsigned short wc = big5_2uni_pagea1[i]; 67 if (wc != 0xfffd) { 68 *pwc = (ucs4_t) wc; 69 return 2; 70 } 71 } 72 } 73 } 74 return RET_ILSEQ; 75 } 76 return RET_TOOFEW(0); 77 } 78 return RET_ILSEQ; 79} 80 81static int 82big5_1_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 83{ 84 unsigned char c1 = s[0]; 85 if (c1 >= 0x21 && c1 <= 0x72) { 86 if (n >= 2) { 87 unsigned char c2 = s[1]; 88 if (c2 >= 0x21 && c2 <= 0x7e) { 89 unsigned int i = 94 * (c1 - 0x21) + (c2 - 0x21); 90 if (0) { 91 /* Unoptimized. */ 92 unsigned char buf[2]; 93 buf[0] = (i / 157) + 0xc9; 94 i = i % 157; 95 buf[1] = i + (i < 0x3f ? 0x40 : 0x62); 96 return big5_mbtowc(conv,pwc,buf,2); 97 } else { 98 /* Inline the implementation of big5_mbtowc. */ 99 if (i < 7652) { 100 unsigned short wc = big5_2uni_pagec9[i]; 101 if (wc != 0xfffd) { 102 *pwc = (ucs4_t) wc; 103 return 2; 104 } 105 } 106 } 107 } 108 return RET_ILSEQ; 109 } 110 return RET_TOOFEW(0); 111 } 112 return RET_ILSEQ; 113} 114 115static int 116big5_0_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 117{ 118 if (n >= 2) { 119 unsigned char buf[2]; 120 int ret = big5_wctomb(conv,buf,wc,2); 121 if (ret != RET_ILSEQ) { 122 unsigned char s1, s2; 123 if (ret != 2) abort(); 124 s1 = buf[0]; 125 s2 = buf[1]; 126 if (!(s1 >= 0xa1)) abort(); 127 if (!((s2 >= 0x40 && s2 <= 0x7e) || (s2 >= 0xa1 && s2 <= 0xfe))) abort(); 128 if (s1 < 0xc9) { 129 unsigned int t = 157 * (s1 - 0xa1) + s2 - (s2 < 0x80 ? 0x40 : 0x62); 130 r[0] = (t / 94) + 0x21; 131 r[1] = (t % 94) + 0x21; 132 return 2; 133 } 134 } 135 return RET_ILSEQ; 136 } 137 return RET_TOOSMALL; 138} 139 140static int 141big5_1_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 142{ 143 if (n >= 2) { 144 unsigned char buf[2]; 145 int ret = big5_wctomb(conv,buf,wc,2); 146 if (ret != RET_ILSEQ) { 147 unsigned char s1, s2; 148 if (ret != 2) abort(); 149 s1 = buf[0]; 150 s2 = buf[1]; 151 if (!(s1 <= 0xf9)) abort(); 152 if (!((s2 >= 0x40 && s2 <= 0x7e) || (s2 >= 0xa1 && s2 <= 0xfe))) abort(); 153 if (s1 >= 0xc9) { 154 unsigned int t = 157 * (s1 - 0xc9) + s2 - (s2 < 0x80 ? 0x40 : 0x62); 155 r[0] = (t / 94) + 0x21; 156 r[1] = (t % 94) + 0x21; 157 return 2; 158 } 159 } 160 return RET_ILSEQ; 161 } 162 return RET_TOOSMALL; 163} 164