other.c revision 77683534
1/* 2Copyright (c) 2002 by Tomohiro KUBOTA 3 4Permission is hereby granted, free of charge, to any person obtaining a copy 5of this software and associated documentation files (the "Software"), to deal 6in the Software without restriction, including without limitation the rights 7to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8copies of the Software, and to permit persons to whom the Software is 9furnished to do so, subject to the following conditions: 10 11The above copyright notice and this permission notice shall be included in 12all copies or substantial portions of the Software. 13 14THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20THE SOFTWARE. 21*/ 22 23#ifdef HAVE_CONFIG_H 24# include "config.h" 25#endif 26 27#include <stdlib.h> 28#include <stdio.h> 29#include <string.h> 30#include <ctype.h> 31#include "other.h" 32 33#ifndef NULL 34#define NULL 0 35#endif 36 37#define EURO_10646 0x20AC 38 39int 40init_gbk(OtherStatePtr s) 41{ 42 s->gbk.mapping = 43 FontEncMapFind("gbk-0", FONT_ENCODING_UNICODE, -1, -1, NULL); 44 if (!s->gbk.mapping) 45 return 0; 46 47 s->gbk.reverse = FontMapReverse(s->gbk.mapping); 48 if (!s->gbk.reverse) 49 return 0; 50 51 s->gbk.buf = -1; 52 return 1; 53} 54 55unsigned int 56mapping_gbk(unsigned int n, OtherStatePtr s) 57{ 58 unsigned int r; 59 if (n < 128) 60 return n; 61 if (n == 128) 62 return EURO_10646; 63 r = FontEncRecode(n, s->gbk.mapping); 64 return r; 65} 66 67unsigned int 68reverse_gbk(unsigned int n, OtherStatePtr s) 69{ 70 if (n < 128) 71 return n; 72 if (n == EURO_10646) 73 return 128; 74 return s->gbk.reverse->reverse(n, s->gbk.reverse->data); 75} 76 77int 78stack_gbk(unsigned c, OtherStatePtr s) 79{ 80 if (s->gbk.buf < 0) { 81 if (c < 129) 82 return (int) c; 83 s->gbk.buf = (int) c; 84 return -1; 85 } else { 86 int b; 87 if (c < 0x40 || c == 0x7F) { 88 s->gbk.buf = -1; 89 return (int) c; 90 } 91 if (s->gbk.buf < 0xFF && c < 0xFF) 92 b = (int) ((unsigned) (s->gbk.buf << 8) + c); 93 else 94 b = -1; 95 s->gbk.buf = -1; 96 return b; 97 } 98} 99 100int 101init_utf8(OtherStatePtr s) 102{ 103 s->utf8.buf_ptr = 0; 104 return 1; 105} 106 107unsigned int 108mapping_utf8(unsigned int n, OtherStatePtr s GCC_UNUSED) 109{ 110 return n; 111} 112 113unsigned int 114reverse_utf8(unsigned int n, OtherStatePtr s GCC_UNUSED) 115{ 116 if (n < 0x80) 117 return n; 118 if (n < 0x800) 119 return 0xC080 + ((n & 0x7C0) << 2) + (n & 0x3F); 120 if (n < 0x10000) 121 return 0xE08080 + ((n & 0xF000) << 4) + ((n & 0xFC0) << 2) + (n & 0x3F); 122 return 0xF0808080 + ((n & 0x1C0000) << 6) + ((n & 0x3F000) << 4) + 123 ((n & 0xFC0) << 2) + (n & 0x3F); 124} 125 126int 127stack_utf8(unsigned c, OtherStatePtr s) 128{ 129 int u; 130 131 if (c < 0x80) { 132 s->utf8.buf_ptr = 0; 133 return (int) c; 134 } 135 if (s->utf8.buf_ptr == 0) { 136 if ((c & 0x40) == 0) 137 return -1; 138 s->utf8.buf[s->utf8.buf_ptr++] = UChar(c); 139 if ((c & 0x60) == 0x40) 140 s->utf8.len = 2; 141 else if ((c & 0x70) == 0x60) 142 s->utf8.len = 3; 143 else if ((c & 0x78) == 0x70) 144 s->utf8.len = 4; 145 else 146 s->utf8.buf_ptr = 0; 147 return -1; 148 } 149 if ((c & 0x40) != 0) { 150 s->utf8.buf_ptr = 0; 151 return -1; 152 } 153 s->utf8.buf[s->utf8.buf_ptr++] = UChar(c); 154 if (s->utf8.buf_ptr < s->utf8.len) 155 return -1; 156 switch (s->utf8.len) { 157 case 2: 158 u = ((s->utf8.buf[0] & 0x1F) << 6) | (s->utf8.buf[1] & 0x3F); 159 s->utf8.buf_ptr = 0; 160 if (u < 0x80) 161 return -1; 162 else 163 return u; 164 case 3: 165 u = ((s->utf8.buf[0] & 0x0F) << 12) 166 | ((s->utf8.buf[1] & 0x3F) << 6) 167 | (s->utf8.buf[2] & 0x3F); 168 s->utf8.buf_ptr = 0; 169 if (u < 0x800) 170 return -1; 171 else 172 return u; 173 case 4: 174 u = ((s->utf8.buf[0] & 0x03) << 18) 175 | ((s->utf8.buf[1] & 0x3F) << 12) 176 | ((s->utf8.buf[2] & 0x3F) << 6) 177 | ((s->utf8.buf[3] & 0x3F)); 178 s->utf8.buf_ptr = 0; 179 if (u < 0x10000) 180 return -1; 181 else 182 return u; 183 } 184 s->utf8.buf_ptr = 0; 185 return -1; 186} 187 188#define HALFWIDTH_10646 0xFF61 189#define YEN_SJIS 0x5C 190#define YEN_10646 0x00A5 191#define OVERLINE_SJIS 0x7E 192#define OVERLINE_10646 0x203E 193 194int 195init_sjis(OtherStatePtr s) 196{ 197 s->sjis.x0208mapping = 198 FontEncMapFind("jisx0208.1990-0", FONT_ENCODING_UNICODE, -1, -1, NULL); 199 if (!s->sjis.x0208mapping) 200 return 0; 201 202 s->sjis.x0208reverse = FontMapReverse(s->sjis.x0208mapping); 203 if (!s->sjis.x0208reverse) 204 return 0; 205 206 s->sjis.x0201mapping = 207 FontEncMapFind("jisx0201.1976-0", FONT_ENCODING_UNICODE, -1, -1, NULL); 208 if (!s->sjis.x0201mapping) 209 return 0; 210 211 s->sjis.x0201reverse = FontMapReverse(s->sjis.x0201mapping); 212 if (!s->sjis.x0201reverse) 213 return 0; 214 215 s->sjis.buf = -1; 216 return 1; 217} 218 219unsigned int 220mapping_sjis(unsigned int n, OtherStatePtr s) 221{ 222 unsigned int j1, j2, s1, s2; 223 if (n == YEN_SJIS) 224 return YEN_10646; 225 if (n == OVERLINE_SJIS) 226 return OVERLINE_10646; 227 if (n < 0x80) 228 return n; 229 if (n >= 0xA0 && n <= 0xDF) 230 return FontEncRecode(n, s->sjis.x0201mapping); 231 s1 = ((n >> 8) & 0xFF); 232 s2 = (n & 0xFF); 233 j1 = (s1 << 1) 234 - (unsigned) (s1 <= 0x9F ? 0xE0 : 0x160) 235 - (unsigned) (s2 < 0x9F ? 1 : 0); 236 j2 = s2 237 - 0x1F 238 - (unsigned) (s2 >= 0x7F ? 1 : 0) 239 - (unsigned) (s2 >= 0x9F ? 0x5E : 0); 240 return FontEncRecode((j1 << 8) + j2, s->sjis.x0208mapping); 241} 242 243unsigned int 244reverse_sjis(unsigned int n, OtherStatePtr s) 245{ 246 unsigned int j, j1, j2, s1, s2; 247 if (n == YEN_10646) 248 return YEN_SJIS; 249 if (n == OVERLINE_10646) 250 return OVERLINE_SJIS; 251 if (n < 0x80) 252 return n; 253 if (n >= HALFWIDTH_10646) 254 return s->sjis.x0201reverse->reverse(n, s->sjis.x0201reverse->data); 255 j = s->sjis.x0208reverse->reverse(n, s->sjis.x0208reverse->data); 256 j1 = ((j >> 8) & 0xFF); 257 j2 = (j & 0xFF); 258 s1 = ((j1 - 1) >> 1) 259 + (unsigned) ((j1 <= 0x5E) ? 0x71 : 0xB1); 260 s2 = j2 261 + (unsigned) ((j1 & 1) ? ((j2 < 0x60) ? 0x1F : 0x20) : 0x7E); 262 return (s1 << 8) + s2; 263} 264 265int 266stack_sjis(unsigned c, OtherStatePtr s) 267{ 268 if (s->sjis.buf < 0) { 269 if (c < 128 || (c >= 0xA0 && c <= 0xDF)) 270 return (int) c; 271 s->sjis.buf = (int) c; 272 return -1; 273 } else { 274 int b; 275 if (c < 0x40 || c == 0x7F) { 276 s->sjis.buf = -1; 277 return (int) c; 278 } 279 if (s->sjis.buf < 0xFF && c < 0xFF) 280 b = (int) ((unsigned) (s->sjis.buf << 8) + c); 281 else 282 b = -1; 283 s->sjis.buf = -1; 284 return b; 285 } 286} 287 288int 289init_hkscs(OtherStatePtr s) 290{ 291 s->hkscs.mapping = 292 FontEncMapFind("big5hkscs-0", FONT_ENCODING_UNICODE, -1, -1, NULL); 293 if (!s->hkscs.mapping) 294 return 0; 295 296 s->hkscs.reverse = FontMapReverse(s->hkscs.mapping); 297 if (!s->hkscs.reverse) 298 return 0; 299 300 s->hkscs.buf = -1; 301 return 1; 302} 303 304unsigned int 305mapping_hkscs(unsigned int n, OtherStatePtr s) 306{ 307 unsigned int r; 308 if (n < 128) 309 return n; 310 if (n == 128) 311 return EURO_10646; 312 r = FontEncRecode(n, s->hkscs.mapping); 313 return r; 314} 315 316unsigned int 317reverse_hkscs(unsigned int n, OtherStatePtr s) 318{ 319 if (n < 128) 320 return n; 321 if (n == EURO_10646) 322 return 128; 323 return s->hkscs.reverse->reverse(n, s->hkscs.reverse->data); 324} 325 326int 327stack_hkscs(unsigned c, OtherStatePtr s) 328{ 329 if (s->hkscs.buf < 0) { 330 if (c < 129) 331 return (int) c; 332 s->hkscs.buf = (int) c; 333 return -1; 334 } else { 335 int b; 336 if (c < 0x40 || c == 0x7F) { 337 s->hkscs.buf = -1; 338 return (int) c; 339 } 340 if (s->hkscs.buf < 0xFF && c < 0xFF) 341 b = (int) ((unsigned) (s->hkscs.buf << 8) + c); 342 else 343 b = -1; 344 s->hkscs.buf = -1; 345 return b; 346 } 347} 348 349/* 350 * Because of the 1 ~ 4 multi-bytes nature of GB18030. 351 * CharSet encoding is split to 2 subset (besides latin) 352 * The 2Bytes MB char is defined in gb18030.2000-0 353 * The 4Bytes MB char is defined in gb18030.2000-1 354 * Please note that the mapping in 2000-1 is not a 4Bytes seq => 2Bytes value 355 * mapping. 356 * To use the 2000-1 we need to 'linear' the 4Bytes sequence and 'lookup' the 357 * unicode value after that. 358 * 359 * For more info on GB18030 standard pls check: 360 * http://oss.software.ibm.com/icu/docs/papers/gb18030.html 361 * 362 * For more info on GB18030 implementation issues in XFree86 pls check: 363 * http://www.ibm.com/developerWorks/cn/linux/i18n/gb18030/xfree86/part1 364 */ 365int 366init_gb18030(OtherStatePtr s) 367{ 368 s->gb18030.cs0_mapping = 369 FontEncMapFind("gb18030.2000-0", FONT_ENCODING_UNICODE, -1, -1, NULL); 370 if (!s->gb18030.cs0_mapping) 371 return 0; 372 373 s->gb18030.cs0_reverse = FontMapReverse(s->gb18030.cs0_mapping); 374 if (!s->gb18030.cs0_reverse) 375 return 0; 376 377 s->gb18030.cs1_mapping = 378 FontEncMapFind("gb18030.2000-1", FONT_ENCODING_UNICODE, -1, -1, NULL); 379 if (!s->gb18030.cs1_mapping) 380 return 0; 381 382 s->gb18030.cs1_reverse = FontMapReverse(s->gb18030.cs1_mapping); 383 if (!s->gb18030.cs1_reverse) 384 return 0; 385 386 s->gb18030.linear = 0; 387 s->gb18030.buf_ptr = 0; 388 return 1; 389} 390 391unsigned int 392mapping_gb18030(unsigned int n, OtherStatePtr s) 393{ 394 if (n <= 0x80) 395 return n; /* 0x80 is valid but unassigned codepoint */ 396 if (n >= 0xFFFF) 397 return '?'; 398 399 return FontEncRecode(n, 400 (s->gb18030.linear) ? s->gb18030.cs1_mapping : s->gb18030.cs0_mapping); 401} 402 403unsigned int 404reverse_gb18030(unsigned int n, OtherStatePtr s) 405{ 406 /* when lookup in 2000-0 failed. */ 407 /* lookup in 2000-1 and then try to unlinear'd */ 408 unsigned int r; 409 if (n <= 0x80) 410 return n; 411 412 r = s->gb18030.cs0_reverse->reverse(n, s->gb18030.cs0_reverse->data); 413 if (r != 0) 414 return r; 415 416 r = s->gb18030.cs1_reverse->reverse(n, s->gb18030.cs1_reverse->data); 417 if (r != 0) { 418 unsigned char bytes[4]; 419 420 bytes[3] = UChar(0x30 + r % 10); 421 r /= 10; 422 bytes[2] = UChar(0x81 + r % 126); 423 r /= 126; 424 bytes[1] = UChar(0x30 + r % 10); 425 r /= 10; 426 bytes[0] = UChar(0x81 + r); 427 428 r = (unsigned int) bytes[0] << 24; 429 r |= (unsigned int) bytes[1] << 16; 430 r |= (unsigned int) bytes[2] << 8; 431 r |= (unsigned int) bytes[3]; 432 } 433 return r; 434} 435 436int 437stack_gb18030(unsigned c, OtherStatePtr s) 438{ 439 /* if set gb18030.linear => True. the return value is "linear'd" */ 440 if (s->gb18030.buf_ptr == 0) { 441 if (c <= 0x80) 442 return (int) c; 443 if (c == 0xFF) 444 return -1; 445 s->gb18030.linear = 0; 446 s->gb18030.buf[s->gb18030.buf_ptr++] = (int) c; 447 return -1; 448 } else if (s->gb18030.buf_ptr == 1) { 449 if (c >= 0x40) { 450 s->gb18030.buf_ptr = 0; 451 if ((c == 0x80) || (c == 0xFF)) 452 return -1; 453 else 454 return (int) ((unsigned) (s->gb18030.buf[0] << 8) + c); 455 } else if (c >= 30) { /* 2Byte is (0x30 -> 0x39) */ 456 s->gb18030.buf[s->gb18030.buf_ptr++] = (int) c; 457 return -1; 458 } else { 459 s->gb18030.buf_ptr = 0; 460 return (int) c; 461 } 462 } else if (s->gb18030.buf_ptr == 2) { 463 if ((c >= 0x81) && (c <= 0xFE)) { 464 s->gb18030.buf[s->gb18030.buf_ptr++] = (int) c; 465 return -1; 466 } else { 467 s->gb18030.buf_ptr = 0; 468 return (int) c; 469 } 470 } else { 471 int r = 0; 472 s->gb18030.buf_ptr = 0; 473 if ((c >= 0x30) && (c <= 0x39)) { 474 s->gb18030.linear = 1; 475 r = (((s->gb18030.buf[0] - 0x81) * 10 476 + (s->gb18030.buf[1] - 0x30)) * 126 477 + (s->gb18030.buf[2] - 0x81)) * 10 478 + ((int) c - 0x30); 479 return r; 480 } 481 return -1; 482 } 483} 484