11ab64890Smrg 21ab64890Smrg/* 31ab64890Smrg * BIG5-0 and BIG5-1 41ab64890Smrg */ 51ab64890Smrg 61ab64890Smrg/* 71ab64890Smrg BIG5 with its 13494 characters doesn't fit in a single 94x94 or 96x96 81ab64890Smrg block. Therefore Emacs/Mule developers, in a typically Japanese way of 91ab64890Smrg thinking, have developed an alternative encoding of BIG5 in two 94x94 101ab64890Smrg planes, very similar to the SHIFT_JIS encoding for JISX0208. 111ab64890Smrg 121ab64890Smrg Conversion between BIG5 codes (s1,s2) and BIG5-0 codes (c1,c2): 131ab64890Smrg Example. (s1,s2) = 0xA140, (c1,c2) = 0x2121. 141ab64890Smrg 0xA1 <= s1 <= 0xC7, 0x40 <= s2 <= 0x7E || 0xA1 <= s2 <= 0xFE, 151ab64890Smrg 0x21 <= c1 <= 0x62, 0x21 <= c2 <= 0x7E. 161ab64890Smrg Invariant: 171ab64890Smrg 157*(s1-0xA1) + (s2 < 0x80 ? s2-0x40 : s2-0x62) 181ab64890Smrg = 94*(c1-0x21)+(c2-0x21) 191ab64890Smrg Conversion (s1,s2) -> (c1,c2): 201ab64890Smrg t := 157*(s1-0xA1) + (s2 < 0x80 ? s2-0x40 : s2-0x62) 211ab64890Smrg c1 := (t div 94) + 0x21 221ab64890Smrg c2 := (t mod 94) + 0x21 231ab64890Smrg Conversion (c1,c2) -> (s1,s2): 241ab64890Smrg t := 94*(c1-0x21)+(c2-0x21) 251ab64890Smrg t2 := t mod 157 261ab64890Smrg s1 := (t div 157) + 0xA1 271ab64890Smrg s2 := (t2 < 0x3F ? t2+0x40 : t2+0x62) 281ab64890Smrg 291ab64890Smrg Conversion between BIG5 codes (s1,s2) and BIG5-1 codes (c1,c2): 301ab64890Smrg Example. (s1,s2) = 0xC940, (c1,c2) = 0x2121. 311ab64890Smrg 0xC9 <= s1 <= 0xF9, 0x40 <= s2 <= 0x7E || 0xA1 <= s2 <= 0xFE, 321ab64890Smrg 0x21 <= c1 <= 0x72, 0x21 <= c2 <= 0x7E. 331ab64890Smrg Invariant: 341ab64890Smrg 157*(s1-0xC9) + (s2 < 0x80 ? s2-0x40 : s2-0x62) 351ab64890Smrg = 94*(c1-0x21)+(c2-0x21) 361ab64890Smrg Conversion (s1,s2) -> (c1,c2): 371ab64890Smrg t := 157*(s1-0xC9) + (s2 < 0x80 ? s2-0x40 : s2-0x62) 381ab64890Smrg c1 := (t div 94) + 0x21 391ab64890Smrg c2 := (t mod 94) + 0x21 401ab64890Smrg Conversion (c1,c2) -> (s1,s2): 411ab64890Smrg t := 94*(c1-0x21)+(c2-0x21) 421ab64890Smrg t2 := t mod 157 431ab64890Smrg s1 := (t div 157) + 0xC9 441ab64890Smrg s2 := (t2 < 0x3F ? t2+0x40 : t2+0x62) 451ab64890Smrg */ 461ab64890Smrg 471ab64890Smrgstatic int 481ab64890Smrgbig5_0_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 491ab64890Smrg{ 501ab64890Smrg unsigned char c1 = s[0]; 511ab64890Smrg if (c1 >= 0x21 && c1 <= 0x62) { 521ab64890Smrg if (n >= 2) { 531ab64890Smrg unsigned char c2 = s[1]; 541ab64890Smrg if (c2 >= 0x21 && c2 <= 0x7e) { 551ab64890Smrg unsigned int i = 94 * (c1 - 0x21) + (c2 - 0x21); 561ab64890Smrg if (0) { 571ab64890Smrg /* Unoptimized. */ 581ab64890Smrg unsigned char buf[2]; 591ab64890Smrg buf[0] = (i / 157) + 0xa1; 601ab64890Smrg i = i % 157; 611ab64890Smrg buf[1] = i + (i < 0x3f ? 0x40 : 0x62); 621ab64890Smrg return big5_mbtowc(conv,pwc,buf,2); 631ab64890Smrg } else { 641ab64890Smrg /* Inline the implementation of big5_mbtowc. */ 651ab64890Smrg if (i < 6121) { 661ab64890Smrg unsigned short wc = big5_2uni_pagea1[i]; 671ab64890Smrg if (wc != 0xfffd) { 681ab64890Smrg *pwc = (ucs4_t) wc; 691ab64890Smrg return 2; 701ab64890Smrg } 711ab64890Smrg } 721ab64890Smrg } 731ab64890Smrg } 741ab64890Smrg return RET_ILSEQ; 751ab64890Smrg } 761ab64890Smrg return RET_TOOFEW(0); 771ab64890Smrg } 781ab64890Smrg return RET_ILSEQ; 791ab64890Smrg} 801ab64890Smrg 811ab64890Smrgstatic int 821ab64890Smrgbig5_1_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 831ab64890Smrg{ 841ab64890Smrg unsigned char c1 = s[0]; 851ab64890Smrg if (c1 >= 0x21 && c1 <= 0x72) { 861ab64890Smrg if (n >= 2) { 871ab64890Smrg unsigned char c2 = s[1]; 881ab64890Smrg if (c2 >= 0x21 && c2 <= 0x7e) { 891ab64890Smrg unsigned int i = 94 * (c1 - 0x21) + (c2 - 0x21); 901ab64890Smrg if (0) { 911ab64890Smrg /* Unoptimized. */ 921ab64890Smrg unsigned char buf[2]; 931ab64890Smrg buf[0] = (i / 157) + 0xc9; 941ab64890Smrg i = i % 157; 951ab64890Smrg buf[1] = i + (i < 0x3f ? 0x40 : 0x62); 961ab64890Smrg return big5_mbtowc(conv,pwc,buf,2); 971ab64890Smrg } else { 981ab64890Smrg /* Inline the implementation of big5_mbtowc. */ 991ab64890Smrg if (i < 7652) { 1001ab64890Smrg unsigned short wc = big5_2uni_pagec9[i]; 1011ab64890Smrg if (wc != 0xfffd) { 1021ab64890Smrg *pwc = (ucs4_t) wc; 1031ab64890Smrg return 2; 1041ab64890Smrg } 1051ab64890Smrg } 1061ab64890Smrg } 1071ab64890Smrg } 1081ab64890Smrg return RET_ILSEQ; 1091ab64890Smrg } 1101ab64890Smrg return RET_TOOFEW(0); 1111ab64890Smrg } 1121ab64890Smrg return RET_ILSEQ; 1131ab64890Smrg} 1141ab64890Smrg 1151ab64890Smrgstatic int 1161ab64890Smrgbig5_0_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 1171ab64890Smrg{ 1181ab64890Smrg if (n >= 2) { 1191ab64890Smrg unsigned char buf[2]; 1201ab64890Smrg int ret = big5_wctomb(conv,buf,wc,2); 1211ab64890Smrg if (ret != RET_ILSEQ) { 1221ab64890Smrg unsigned char s1, s2; 1231ab64890Smrg if (ret != 2) abort(); 1241ab64890Smrg s1 = buf[0]; 1251ab64890Smrg s2 = buf[1]; 1261ab64890Smrg if (!(s1 >= 0xa1)) abort(); 1271ab64890Smrg if (!((s2 >= 0x40 && s2 <= 0x7e) || (s2 >= 0xa1 && s2 <= 0xfe))) abort(); 1281ab64890Smrg if (s1 < 0xc9) { 1291ab64890Smrg unsigned int t = 157 * (s1 - 0xa1) + s2 - (s2 < 0x80 ? 0x40 : 0x62); 1301ab64890Smrg r[0] = (t / 94) + 0x21; 1311ab64890Smrg r[1] = (t % 94) + 0x21; 1321ab64890Smrg return 2; 1331ab64890Smrg } 1341ab64890Smrg } 1351ab64890Smrg return RET_ILSEQ; 1361ab64890Smrg } 1371ab64890Smrg return RET_TOOSMALL; 1381ab64890Smrg} 1391ab64890Smrg 1401ab64890Smrgstatic int 1411ab64890Smrgbig5_1_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 1421ab64890Smrg{ 1431ab64890Smrg if (n >= 2) { 1441ab64890Smrg unsigned char buf[2]; 1451ab64890Smrg int ret = big5_wctomb(conv,buf,wc,2); 1461ab64890Smrg if (ret != RET_ILSEQ) { 1471ab64890Smrg unsigned char s1, s2; 1481ab64890Smrg if (ret != 2) abort(); 1491ab64890Smrg s1 = buf[0]; 1501ab64890Smrg s2 = buf[1]; 1511ab64890Smrg if (!(s1 <= 0xf9)) abort(); 1521ab64890Smrg if (!((s2 >= 0x40 && s2 <= 0x7e) || (s2 >= 0xa1 && s2 <= 0xfe))) abort(); 1531ab64890Smrg if (s1 >= 0xc9) { 1541ab64890Smrg unsigned int t = 157 * (s1 - 0xc9) + s2 - (s2 < 0x80 ? 0x40 : 0x62); 1551ab64890Smrg r[0] = (t / 94) + 0x21; 1561ab64890Smrg r[1] = (t % 94) + 0x21; 1571ab64890Smrg return 2; 1581ab64890Smrg } 1591ab64890Smrg } 1601ab64890Smrg return RET_ILSEQ; 1611ab64890Smrg } 1621ab64890Smrg return RET_TOOSMALL; 1631ab64890Smrg} 164