big5_emacs.h revision 1ab64890
1/* $XFree86$ */ 2 3/* 4 * BIG5-0 and BIG5-1 5 */ 6 7/* 8 BIG5 with its 13494 characters doesn't fit in a single 94x94 or 96x96 9 block. Therefore Emacs/Mule developers, in a typically Japanese way of 10 thinking, have developed an alternative encoding of BIG5 in two 94x94 11 planes, very similar to the SHIFT_JIS encoding for JISX0208. 12 13 Conversion between BIG5 codes (s1,s2) and BIG5-0 codes (c1,c2): 14 Example. (s1,s2) = 0xA140, (c1,c2) = 0x2121. 15 0xA1 <= s1 <= 0xC7, 0x40 <= s2 <= 0x7E || 0xA1 <= s2 <= 0xFE, 16 0x21 <= c1 <= 0x62, 0x21 <= c2 <= 0x7E. 17 Invariant: 18 157*(s1-0xA1) + (s2 < 0x80 ? s2-0x40 : s2-0x62) 19 = 94*(c1-0x21)+(c2-0x21) 20 Conversion (s1,s2) -> (c1,c2): 21 t := 157*(s1-0xA1) + (s2 < 0x80 ? s2-0x40 : s2-0x62) 22 c1 := (t div 94) + 0x21 23 c2 := (t mod 94) + 0x21 24 Conversion (c1,c2) -> (s1,s2): 25 t := 94*(c1-0x21)+(c2-0x21) 26 t2 := t mod 157 27 s1 := (t div 157) + 0xA1 28 s2 := (t2 < 0x3F ? t2+0x40 : t2+0x62) 29 30 Conversion between BIG5 codes (s1,s2) and BIG5-1 codes (c1,c2): 31 Example. (s1,s2) = 0xC940, (c1,c2) = 0x2121. 32 0xC9 <= s1 <= 0xF9, 0x40 <= s2 <= 0x7E || 0xA1 <= s2 <= 0xFE, 33 0x21 <= c1 <= 0x72, 0x21 <= c2 <= 0x7E. 34 Invariant: 35 157*(s1-0xC9) + (s2 < 0x80 ? s2-0x40 : s2-0x62) 36 = 94*(c1-0x21)+(c2-0x21) 37 Conversion (s1,s2) -> (c1,c2): 38 t := 157*(s1-0xC9) + (s2 < 0x80 ? s2-0x40 : s2-0x62) 39 c1 := (t div 94) + 0x21 40 c2 := (t mod 94) + 0x21 41 Conversion (c1,c2) -> (s1,s2): 42 t := 94*(c1-0x21)+(c2-0x21) 43 t2 := t mod 157 44 s1 := (t div 157) + 0xC9 45 s2 := (t2 < 0x3F ? t2+0x40 : t2+0x62) 46 */ 47 48static int 49big5_0_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 50{ 51 unsigned char c1 = s[0]; 52 if (c1 >= 0x21 && c1 <= 0x62) { 53 if (n >= 2) { 54 unsigned char c2 = s[1]; 55 if (c2 >= 0x21 && c2 <= 0x7e) { 56 unsigned int i = 94 * (c1 - 0x21) + (c2 - 0x21); 57 if (0) { 58 /* Unoptimized. */ 59 unsigned char buf[2]; 60 buf[0] = (i / 157) + 0xa1; 61 i = i % 157; 62 buf[1] = i + (i < 0x3f ? 0x40 : 0x62); 63 return big5_mbtowc(conv,pwc,buf,2); 64 } else { 65 /* Inline the implementation of big5_mbtowc. */ 66 if (i < 6121) { 67 unsigned short wc = big5_2uni_pagea1[i]; 68 if (wc != 0xfffd) { 69 *pwc = (ucs4_t) wc; 70 return 2; 71 } 72 } 73 } 74 } 75 return RET_ILSEQ; 76 } 77 return RET_TOOFEW(0); 78 } 79 return RET_ILSEQ; 80} 81 82static int 83big5_1_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 84{ 85 unsigned char c1 = s[0]; 86 if (c1 >= 0x21 && c1 <= 0x72) { 87 if (n >= 2) { 88 unsigned char c2 = s[1]; 89 if (c2 >= 0x21 && c2 <= 0x7e) { 90 unsigned int i = 94 * (c1 - 0x21) + (c2 - 0x21); 91 if (0) { 92 /* Unoptimized. */ 93 unsigned char buf[2]; 94 buf[0] = (i / 157) + 0xc9; 95 i = i % 157; 96 buf[1] = i + (i < 0x3f ? 0x40 : 0x62); 97 return big5_mbtowc(conv,pwc,buf,2); 98 } else { 99 /* Inline the implementation of big5_mbtowc. */ 100 if (i < 7652) { 101 unsigned short wc = big5_2uni_pagec9[i]; 102 if (wc != 0xfffd) { 103 *pwc = (ucs4_t) wc; 104 return 2; 105 } 106 } 107 } 108 } 109 return RET_ILSEQ; 110 } 111 return RET_TOOFEW(0); 112 } 113 return RET_ILSEQ; 114} 115 116static int 117big5_0_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 118{ 119 if (n >= 2) { 120 unsigned char buf[2]; 121 int ret = big5_wctomb(conv,buf,wc,2); 122 if (ret != RET_ILSEQ) { 123 unsigned char s1, s2; 124 if (ret != 2) abort(); 125 s1 = buf[0]; 126 s2 = buf[1]; 127 if (!(s1 >= 0xa1)) abort(); 128 if (!((s2 >= 0x40 && s2 <= 0x7e) || (s2 >= 0xa1 && s2 <= 0xfe))) abort(); 129 if (s1 < 0xc9) { 130 unsigned int t = 157 * (s1 - 0xa1) + s2 - (s2 < 0x80 ? 0x40 : 0x62); 131 r[0] = (t / 94) + 0x21; 132 r[1] = (t % 94) + 0x21; 133 return 2; 134 } 135 } 136 return RET_ILSEQ; 137 } 138 return RET_TOOSMALL; 139} 140 141static int 142big5_1_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 143{ 144 if (n >= 2) { 145 unsigned char buf[2]; 146 int ret = big5_wctomb(conv,buf,wc,2); 147 if (ret != RET_ILSEQ) { 148 unsigned char s1, s2; 149 if (ret != 2) abort(); 150 s1 = buf[0]; 151 s2 = buf[1]; 152 if (!(s1 <= 0xf9)) abort(); 153 if (!((s2 >= 0x40 && s2 <= 0x7e) || (s2 >= 0xa1 && s2 <= 0xfe))) abort(); 154 if (s1 >= 0xc9) { 155 unsigned int t = 157 * (s1 - 0xc9) + s2 - (s2 < 0x80 ? 0x40 : 0x62); 156 r[0] = (t / 94) + 0x21; 157 r[1] = (t % 94) + 0x21; 158 return 2; 159 } 160 } 161 return RET_ILSEQ; 162 } 163 return RET_TOOSMALL; 164} 165