other.c revision a8fdb4bc
1/* 2Copyright (c) 2002 by Tomohiro KUBOTA 3 4Permission is hereby granted, free of charge, to any person obtaining a copy 5of this software and associated documentation files (the "Software"), to deal 6in the Software without restriction, including without limitation the rights 7to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8copies of the Software, and to permit persons to whom the Software is 9furnished to do so, subject to the following conditions: 10 11The above copyright notice and this permission notice shall be included in 12all copies or substantial portions of the Software. 13 14THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20THE SOFTWARE. 21*/ 22/* $XFree86: xc/programs/luit/other.c,v 1.1 2002/10/17 01:06:09 dawes Exp $ */ 23 24#include <stdlib.h> 25#include <stdio.h> 26#include <string.h> 27#include <ctype.h> 28#include <X11/fonts/fontenc.h> 29#include "other.h" 30#include "charset.h" 31 32#ifndef NULL 33#define NULL 0 34#endif 35 36#define EURO_10646 0x20AC 37 38int 39init_gbk(OtherStatePtr s) 40{ 41 s->gbk.mapping = 42 FontEncMapFind("gbk-0", FONT_ENCODING_UNICODE, -1, -1, NULL); 43 if(!s->gbk.mapping) return 0; 44 45 s->gbk.reverse = FontMapReverse(s->gbk.mapping); 46 if(!s->gbk.reverse) return 0; 47 48 s->gbk.buf = -1; 49 return 1; 50} 51 52unsigned int 53mapping_gbk(unsigned int n, OtherStatePtr s) 54{ 55 unsigned int r; 56 if(n < 128) return n; 57 if(n == 128) return EURO_10646; 58 r = FontEncRecode(n, s->gbk.mapping); 59 return r; 60} 61 62unsigned int 63reverse_gbk(unsigned int n, OtherStatePtr s) 64{ 65 if(n < 128) return n; 66 if(n == EURO_10646) return 128; 67 return s->gbk.reverse->reverse(n, s->gbk.reverse->data); 68} 69 70int 71stack_gbk(unsigned char c, OtherStatePtr s) 72{ 73 if(s->gbk.buf < 0) { 74 if(c < 129) return c; 75 s->gbk.buf = c; 76 return -1; 77 } else { 78 int b; 79 if(c < 0x40 || c == 0x7F) { 80 s->gbk.buf = -1; 81 return c; 82 } 83 if(s->gbk.buf < 0xFF && c < 0xFF) 84 b = (s->gbk.buf << 8) + c; 85 else 86 b = -1; 87 s->gbk.buf = -1; 88 return b; 89 } 90} 91 92int 93init_utf8(OtherStatePtr s) 94{ 95 s->utf8.buf_ptr = 0; 96 return 1; 97} 98 99unsigned int 100mapping_utf8(unsigned int n, OtherStatePtr s) 101{ 102 return n; 103} 104 105unsigned int 106reverse_utf8(unsigned int n, OtherStatePtr s) 107{ 108 if(n < 0x80) 109 return n; 110 if(n < 0x800) 111 return 0xC080 + ((n&0x7C0)<<2) + (n&0x3F); 112 if(n < 0x10000) 113 return 0xE08080 + ((n&0xF000)<<4) + ((n&0xFC0)<<2) + (n&0x3F); 114 return 0xF0808080 + ((n&0x1C0000)<<6) + ((n&0x3F000)<<4) + 115 ((n&0xFC0)<<2) + (n&0x3F); 116} 117 118int 119stack_utf8(unsigned char c, OtherStatePtr s) 120{ 121 int u; 122 123 if(c < 0x80) { 124 s->utf8.buf_ptr = 0; 125 return c; 126 } 127 if(s->utf8.buf_ptr == 0) { 128 if((c & 0x40) == 0) return -1; 129 s->utf8.buf[s->utf8.buf_ptr++] = c; 130 if((c & 0x60) == 0x40) s->utf8.len = 2; 131 else if((c & 0x70) == 0x60) s->utf8.len = 3; 132 else if((c & 0x78) == 0x70) s->utf8.len = 4; 133 else s->utf8.buf_ptr = 0; 134 return -1; 135 } 136 if((c & 0x40) != 0) { 137 s->utf8.buf_ptr = 0; 138 return -1; 139 } 140 s->utf8.buf[s->utf8.buf_ptr++] = c; 141 if(s->utf8.buf_ptr < s->utf8.len) return -1; 142 switch(s->utf8.len) { 143 case 2: 144 u = ((s->utf8.buf[0] & 0x1F) << 6) | (s->utf8.buf[1] & 0x3F); 145 s->utf8.buf_ptr = 0; 146 if(u < 0x80) return -1; else return u; 147 case 3: 148 u = ((s->utf8.buf[0] & 0x0F) << 12) 149 | ((s->utf8.buf[1] & 0x3F) << 6) 150 | (s->utf8.buf[2] & 0x3F); 151 s->utf8.buf_ptr = 0; 152 if(u < 0x800) return -1; else return u; 153 case 4: 154 u = ((s->utf8.buf[0] & 0x03) << 18) 155 | ((s->utf8.buf[1] & 0x3F) << 12) 156 | ((s->utf8.buf[2] & 0x3F) << 6) 157 | ((s->utf8.buf[3] & 0x3F)); 158 s->utf8.buf_ptr = 0; 159 if(u < 0x10000) return -1; else return u; 160 } 161 s->utf8.buf_ptr = 0; 162 return -1; 163} 164 165 166#define HALFWIDTH_10646 0xFF61 167#define YEN_SJIS 0x5C 168#define YEN_10646 0x00A5 169#define OVERLINE_SJIS 0x7E 170#define OVERLINE_10646 0x203E 171 172int 173init_sjis(OtherStatePtr s) 174{ 175 s->sjis.x0208mapping = 176 FontEncMapFind("jisx0208.1990-0", FONT_ENCODING_UNICODE, -1, -1, NULL); 177 if(!s->sjis.x0208mapping) return 0; 178 179 s->sjis.x0208reverse = FontMapReverse(s->sjis.x0208mapping); 180 if(!s->sjis.x0208reverse) return 0; 181 182 s->sjis.x0201mapping = 183 FontEncMapFind("jisx0201.1976-0", FONT_ENCODING_UNICODE, -1, -1, NULL); 184 if(!s->sjis.x0201mapping) return 0; 185 186 s->sjis.x0201reverse = FontMapReverse(s->sjis.x0201mapping); 187 if(!s->sjis.x0201reverse) return 0; 188 189 s->sjis.buf = -1; 190 return 1; 191} 192 193unsigned int 194mapping_sjis(unsigned int n, OtherStatePtr s) 195{ 196 unsigned int j1, j2, s1, s2; 197 if(n == YEN_SJIS) return YEN_10646; 198 if(n == OVERLINE_SJIS) return OVERLINE_10646; 199 if(n < 0x80) return n; 200 if(n >= 0xA0 && n <= 0xDF) return FontEncRecode(n, s->sjis.x0201mapping); 201 s1 = ((n>>8)&0xFF); 202 s2 = (n&0xFF); 203 j1 = (s1 << 1) - (s1 <= 0x9F ? 0xE0 : 0x160) - (s2 < 0x9F ? 1 : 0); 204 j2 = s2 - 0x1F - (s2 >= 0x7F ? 1 : 0) - (s2 >= 0x9F ? 0x5E : 0); 205 return FontEncRecode((j1<<8) + j2, s->sjis.x0208mapping); 206} 207 208unsigned int 209reverse_sjis(unsigned int n, OtherStatePtr s) 210{ 211 unsigned int j, j1, j2, s1, s2; 212 if(n == YEN_10646) return YEN_SJIS; 213 if(n == OVERLINE_10646) return OVERLINE_SJIS; 214 if(n < 0x80) return n; 215 if(n >= HALFWIDTH_10646) 216 return s->sjis.x0201reverse->reverse(n, s->sjis.x0201reverse->data); 217 j = s->sjis.x0208reverse->reverse(n, s->sjis.x0208reverse->data); 218 j1 = ((j>>8)&0xFF); 219 j2 = (j&0xFF); 220 s1 = ((j1 - 1) >> 1) + ((j1 <= 0x5E) ? 0x71 : 0xB1); 221 s2 = j2 + ((j1 & 1) ? ((j2 < 0x60) ? 0x1F : 0x20) : 0x7E); 222 return (s1<<8) + s2; 223} 224 225int 226stack_sjis(unsigned char c, OtherStatePtr s) 227{ 228 if(s->sjis.buf < 0) { 229 if(c < 128 || (c >= 0xA0 && c <= 0xDF)) return c; 230 s->sjis.buf = c; 231 return -1; 232 } else { 233 int b; 234 if(c < 0x40 || c == 0x7F) { 235 s->sjis.buf = -1; 236 return c; 237 } 238 if(s->sjis.buf < 0xFF && c < 0xFF) 239 b = (s->sjis.buf << 8) + c; 240 else 241 b = -1; 242 s->sjis.buf = -1; 243 return b; 244 } 245} 246 247int 248init_hkscs(OtherStatePtr s) 249{ 250 s->hkscs.mapping = 251 FontEncMapFind("big5hkscs-0", FONT_ENCODING_UNICODE, -1, -1, NULL); 252 if(!s->hkscs.mapping) return 0; 253 254 s->hkscs.reverse = FontMapReverse(s->hkscs.mapping); 255 if(!s->hkscs.reverse) return 0; 256 257 s->hkscs.buf = -1; 258 return 1; 259} 260 261unsigned int 262mapping_hkscs(unsigned int n, OtherStatePtr s) 263{ 264 unsigned int r; 265 if(n < 128) return n; 266 if(n == 128) return EURO_10646; 267 r = FontEncRecode(n, s->hkscs.mapping); 268 return r; 269} 270 271unsigned int 272reverse_hkscs(unsigned int n, OtherStatePtr s) 273{ 274 if(n < 128) return n; 275 if(n == EURO_10646) return 128; 276 return s->hkscs.reverse->reverse(n, s->hkscs.reverse->data); 277} 278 279int 280stack_hkscs(unsigned char c, OtherStatePtr s) 281{ 282 if(s->hkscs.buf < 0) { 283 if(c < 129) return c; 284 s->hkscs.buf = c; 285 return -1; 286 } else { 287 int b; 288 if(c < 0x40 || c == 0x7F) { 289 s->hkscs.buf = -1; 290 return c; 291 } 292 if(s->hkscs.buf < 0xFF && c < 0xFF) 293 b = (s->hkscs.buf << 8) + c; 294 else 295 b = -1; 296 s->hkscs.buf = -1; 297 return b; 298 } 299} 300 301 302/* 303 * Because of the 1 ~ 4 multi-bytes nature of GB18030. 304 * CharSet encoding is split to 2 subset (besides latin) 305 * The 2Bytes MB char is defined in gb18030.2000-0 306 * The 4Bytes MB char is defined in gb18030.2000-1 307 * Please note that the mapping in 2000-1 is not a 4Bytes seq => 2Bytes value 308 * mapping. 309 * To use the 2000-1 we need to 'linear' the 4Bytes sequence and 'lookup' the 310 * unicode value after that. 311 * 312 * For more info on GB18030 standard pls check: 313 * http://oss.software.ibm.com/icu/docs/papers/gb18030.html 314 * 315 * For more info on GB18030 implementation issues in XFree86 pls check: 316 * http://www.ibm.com/developerWorks/cn/linux/i18n/gb18030/xfree86/part1 317 */ 318int 319init_gb18030(OtherStatePtr s) 320{ 321 s->gb18030.cs0_mapping = 322 FontEncMapFind("gb18030.2000-0", FONT_ENCODING_UNICODE, -1, -1, NULL); 323 if(!s->gb18030.cs0_mapping) return 0; 324 325 s->gb18030.cs0_reverse = FontMapReverse(s->gb18030.cs0_mapping); 326 if(!s->gb18030.cs0_reverse) return 0; 327 328 s->gb18030.cs1_mapping = 329 FontEncMapFind("gb18030.2000-1", FONT_ENCODING_UNICODE, -1, -1, NULL); 330 if(!s->gb18030.cs1_mapping) return 0; 331 332 s->gb18030.cs1_reverse = FontMapReverse(s->gb18030.cs1_mapping); 333 if(!s->gb18030.cs1_reverse) return 0; 334 335 s->gb18030.linear = 0; 336 s->gb18030.buf_ptr = 0; 337 return 1; 338} 339 340unsigned int 341mapping_gb18030(unsigned int n, OtherStatePtr s) 342{ 343 if(n <= 0x80) return n; /* 0x80 is valid but unassigned codepoint */ 344 if(n >= 0xFFFF) return '?'; 345 346 return FontEncRecode(n, 347 (s->gb18030.linear)?s->gb18030.cs1_mapping:s->gb18030.cs0_mapping); 348} 349 350unsigned int 351reverse_gb18030(unsigned int n, OtherStatePtr s) 352{ 353 /* when lookup in 2000-0 failed. */ 354 /* lookup in 2000-1 and then try to unlinear'd */ 355 unsigned int r; 356 if(n <= 0x80) return n; 357 358 r = s->gb18030.cs0_reverse->reverse(n, s->gb18030.cs0_reverse->data); 359 if (r != 0) 360 return r; 361 362 r = s->gb18030.cs1_reverse->reverse(n, s->gb18030.cs1_reverse->data); 363 if (r != 0) { 364 unsigned char bytes[4]; 365 366 bytes[3] = 0x30 + r % 10; r /= 10; 367 bytes[2] = 0x81 + r % 126; r /= 126; 368 bytes[1] = 0x30 + r % 10; r /= 10; 369 bytes[0] = 0x81 + r; 370 371 r = (unsigned int)bytes[0] << 24; 372 r |= (unsigned int)bytes[1] << 16; 373 r |= (unsigned int)bytes[2] << 8; 374 r |= (unsigned int)bytes[3]; 375 } 376 return r; 377} 378 379int 380stack_gb18030(unsigned char c, OtherStatePtr s) 381{ 382 /* if set gb18030.linear => True. the return value is "linear'd" */ 383 if(s->gb18030.buf_ptr == 0) { 384 if(c <= 0x80) return c; 385 if (c == 0xFF) return -1; 386 s->gb18030.linear = 0; 387 s->gb18030.buf[s->gb18030.buf_ptr++] = c; 388 return -1; 389 } else if (s->gb18030.buf_ptr == 1) { 390 if (c >= 0x40) { 391 s->gb18030.buf_ptr = 0; 392 if ((c == 0x80) || (c == 0xFF)) 393 return -1; 394 else 395 return (s->gb18030.buf[0] << 8) + c; 396 } else if (c >= 30) { /* 2Byte is (0x30 -> 0x39) */ 397 s->gb18030.buf[s->gb18030.buf_ptr++] = c; 398 return -1; 399 } else { 400 s->gb18030.buf_ptr = 0; 401 return c; 402 } 403 } else if (s->gb18030.buf_ptr == 2) { 404 if ((c >= 0x81) && (c <= 0xFE)) { 405 s->gb18030.buf[s->gb18030.buf_ptr++] = c; 406 return -1; 407 } else { 408 s->gb18030.buf_ptr = 0; 409 return c; 410 } 411 } else { 412 int r = 0; 413 s->gb18030.buf_ptr = 0; 414 if ((c >= 0x30) && (c <= 0x39)) { 415 s->gb18030.linear = 1; 416 r = (((s->gb18030.buf[0] - 0x81) * 10 417 + (s->gb18030.buf[1] - 0x30)) * 126 418 + (s->gb18030.buf[2] - 0x81)) * 10 419 + (c - 0x30); 420 return r; 421 } 422 return -1; 423 } 424} 425 426