lcUTF8.c revision 88de56cc
1/****************************************************************** 2 3 Copyright 1993 by SunSoft, Inc. 4 Copyright 1999-2000 by Bruno Haible 5 6Permission to use, copy, modify, distribute, and sell this software 7and its documentation for any purpose is hereby granted without fee, 8provided that the above copyright notice appear in all copies and 9that both that copyright notice and this permission notice appear 10in supporting documentation, and that the names of SunSoft, Inc. and 11Bruno Haible not be used in advertising or publicity pertaining to 12distribution of the software without specific, written prior 13permission. SunSoft, Inc. and Bruno Haible make no representations 14about the suitability of this software for any purpose. It is 15provided "as is" without express or implied warranty. 16 17SunSoft Inc. AND Bruno Haible DISCLAIM ALL WARRANTIES WITH REGARD 18TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 19AND FITNESS, IN NO EVENT SHALL SunSoft, Inc. OR Bruno Haible BE LIABLE 20FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 21WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 22ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 23OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 24 25******************************************************************/ 26 27/* 28 * This file contains: 29 * 30 * I. Conversion routines CompoundText/CharSet <--> Unicode/UTF-8. 31 * 32 * Used for three purposes: 33 * 1. The UTF-8 locales, see below. 34 * 2. Unicode aware applications for which the use of 8-bit character 35 * sets is an anachronism. 36 * 3. For conversion from keysym to locale encoding. 37 * 38 * II. Conversion files for an UTF-8 locale loader. 39 * Supports: all locales with codeset UTF-8. 40 * How: Provides converters for UTF-8. 41 * Platforms: all systems. 42 * 43 * The loader itself is located in lcUTF8.c. 44 */ 45 46/* 47 * The conversion from UTF-8 to CompoundText is realized in a very 48 * conservative way. Recall that CompoundText data is used for inter-client 49 * communication purposes. We distinguish three classes of clients: 50 * - Clients which accept only those pieces of CompoundText which belong to 51 * the character set understood by the current locale. 52 * (Example: clients which are linked to an older X11 library.) 53 * - Clients which accept CompoundText with multiple character sets and parse 54 * it themselves. 55 * (Example: emacs, xemacs.) 56 * - Clients which rely entirely on the X{mb,wc}TextPropertyToTextList 57 * functions for the conversion of CompoundText to their current locale's 58 * multi-byte/wide-character format. 59 * For best interoperation, the UTF-8 to CompoundText conversion proceeds as 60 * follows. For every character, it first tests whether the character is 61 * representable in the current locale's original (non-UTF-8) character set. 62 * If not, it goes through the list of predefined character sets for 63 * CompoundText and tests if the character is representable in that character 64 * set. If so, it encodes the character using its code within that character 65 * set. If not, it uses an UTF-8-in-CompoundText encapsulation. Since 66 * clients of the first and second kind ignore such encapsulated text, 67 * this encapsulation is kept to a minimum and terminated as early as possible. 68 * 69 * In a distant future, when clients of the first and second kind will have 70 * disappeared, we will be able to stuff UTF-8 data directly in CompoundText 71 * without first going through the list of predefined character sets. 72 */ 73 74#ifdef HAVE_CONFIG_H 75#include <config.h> 76#endif 77#include <stdio.h> 78#include "Xlibint.h" 79#include "XlcPubI.h" 80#include "XlcGeneric.h" 81 82static XlcConv 83create_conv( 84 XLCd lcd, 85 XlcConvMethods methods) 86{ 87 XlcConv conv; 88 89 conv = (XlcConv) Xmalloc(sizeof(XlcConvRec)); 90 if (conv == (XlcConv) NULL) 91 return (XlcConv) NULL; 92 93 conv->methods = methods; 94 conv->state = NULL; 95 96 return conv; 97} 98 99static void 100close_converter( 101 XlcConv conv) 102{ 103 Xfree((char *) conv); 104} 105 106/* Replacement character for invalid multibyte sequence or wide character. */ 107#define BAD_WCHAR ((ucs4_t) 0xfffd) 108#define BAD_CHAR '?' 109 110/***************************************************************************/ 111/* Part I: Conversion routines CompoundText/CharSet <--> Unicode/UTF-8. 112 * 113 * Note that this code works in any locale. We store Unicode values in 114 * `ucs4_t' variables, but don't pass them to the user. 115 * 116 * This code has to support all character sets that are used for CompoundText, 117 * nothing more, nothing less. See the table in lcCT.c. 118 * Since the conversion _to_ CompoundText is likely to need the tables for all 119 * character sets at once, we don't use dynamic loading (of tables or shared 120 * libraries through iconv()). Use a fixed set of tables instead. 121 * 122 * We use statically computed tables, not dynamically allocated arrays, 123 * because it's more memory efficient: Different processes using the same 124 * libX11 shared library share the "text" and read-only "data" sections. 125 */ 126 127typedef unsigned int ucs4_t; 128#define conv_t XlcConv 129 130typedef struct _Utf8ConvRec { 131 const char *name; 132 XrmQuark xrm_name; 133 int (* cstowc) (XlcConv, ucs4_t *, unsigned char const *, int); 134 int (* wctocs) (XlcConv, unsigned char *, ucs4_t, int); 135} Utf8ConvRec, *Utf8Conv; 136 137/* 138 * int xxx_cstowc (XlcConv conv, ucs4_t *pwc, unsigned char const *s, int n) 139 * converts the byte sequence starting at s to a wide character. Up to n bytes 140 * are available at s. n is >= 1. 141 * Result is number of bytes consumed (if a wide character was read), 142 * or 0 if invalid, or -1 if n too small. 143 * 144 * int xxx_wctocs (XlcConv conv, unsigned char *r, ucs4_t wc, int n) 145 * converts the wide character wc to the character set xxx, and stores the 146 * result beginning at r. Up to n bytes may be written at r. n is >= 1. 147 * Result is number of bytes written, or 0 if invalid, or -1 if n too small. 148 */ 149 150/* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */ 151#define RET_ILSEQ 0 152/* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */ 153#define RET_TOOFEW(n) (-1-(n)) 154/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */ 155#define RET_TOOSMALL -1 156 157/* 158 * The tables below are bijective. It would be possible to extend the 159 * xxx_wctocs tables to do some transliteration (e.g. U+201C,U+201D -> 0x22) 160 * but *only* with characters not contained in any other table, and *only* 161 * when the current locale is not an UTF-8 locale. 162 */ 163 164#include "lcUniConv/utf8.h" 165#include "lcUniConv/ucs2be.h" 166#ifdef notused 167#include "lcUniConv/ascii.h" 168#endif 169#include "lcUniConv/iso8859_1.h" 170#include "lcUniConv/iso8859_2.h" 171#include "lcUniConv/iso8859_3.h" 172#include "lcUniConv/iso8859_4.h" 173#include "lcUniConv/iso8859_5.h" 174#include "lcUniConv/iso8859_6.h" 175#include "lcUniConv/iso8859_7.h" 176#include "lcUniConv/iso8859_8.h" 177#include "lcUniConv/iso8859_9.h" 178#include "lcUniConv/iso8859_10.h" 179#include "lcUniConv/iso8859_11.h" 180#include "lcUniConv/iso8859_13.h" 181#include "lcUniConv/iso8859_14.h" 182#include "lcUniConv/iso8859_15.h" 183#include "lcUniConv/iso8859_16.h" 184#include "lcUniConv/iso8859_9e.h" 185#include "lcUniConv/jisx0201.h" 186#include "lcUniConv/tis620.h" 187#include "lcUniConv/koi8_r.h" 188#include "lcUniConv/koi8_u.h" 189#include "lcUniConv/koi8_c.h" 190#include "lcUniConv/armscii_8.h" 191#include "lcUniConv/cp1133.h" 192#include "lcUniConv/mulelao.h" 193#include "lcUniConv/viscii.h" 194#include "lcUniConv/tcvn.h" 195#include "lcUniConv/georgian_academy.h" 196#include "lcUniConv/georgian_ps.h" 197#include "lcUniConv/cp1251.h" 198#include "lcUniConv/cp1255.h" 199#include "lcUniConv/cp1256.h" 200#include "lcUniConv/tatar_cyr.h" 201 202typedef struct { 203 unsigned short indx; /* index into big table */ 204 unsigned short used; /* bitmask of used entries */ 205} Summary16; 206 207#include "lcUniConv/gb2312.h" 208#include "lcUniConv/jisx0208.h" 209#include "lcUniConv/jisx0212.h" 210#include "lcUniConv/ksc5601.h" 211#include "lcUniConv/big5.h" 212#include "lcUniConv/big5_emacs.h" 213#include "lcUniConv/big5hkscs.h" 214#include "lcUniConv/gbk.h" 215 216static Utf8ConvRec all_charsets[] = { 217 /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning 218 (for lookup speed), once at the end (as a fallback). */ 219 { "ISO10646-1", NULLQUARK, 220 utf8_mbtowc, utf8_wctomb 221 }, 222 223 { "ISO8859-1", NULLQUARK, 224 iso8859_1_mbtowc, iso8859_1_wctomb 225 }, 226 { "ISO8859-2", NULLQUARK, 227 iso8859_2_mbtowc, iso8859_2_wctomb 228 }, 229 { "ISO8859-3", NULLQUARK, 230 iso8859_3_mbtowc, iso8859_3_wctomb 231 }, 232 { "ISO8859-4", NULLQUARK, 233 iso8859_4_mbtowc, iso8859_4_wctomb 234 }, 235 { "ISO8859-5", NULLQUARK, 236 iso8859_5_mbtowc, iso8859_5_wctomb 237 }, 238 { "ISO8859-6", NULLQUARK, 239 iso8859_6_mbtowc, iso8859_6_wctomb 240 }, 241 { "ISO8859-7", NULLQUARK, 242 iso8859_7_mbtowc, iso8859_7_wctomb 243 }, 244 { "ISO8859-8", NULLQUARK, 245 iso8859_8_mbtowc, iso8859_8_wctomb 246 }, 247 { "ISO8859-9", NULLQUARK, 248 iso8859_9_mbtowc, iso8859_9_wctomb 249 }, 250 { "ISO8859-10", NULLQUARK, 251 iso8859_10_mbtowc, iso8859_10_wctomb 252 }, 253 { "ISO8859-11", NULLQUARK, 254 iso8859_11_mbtowc, iso8859_11_wctomb 255 }, 256 { "ISO8859-13", NULLQUARK, 257 iso8859_13_mbtowc, iso8859_13_wctomb 258 }, 259 { "ISO8859-14", NULLQUARK, 260 iso8859_14_mbtowc, iso8859_14_wctomb 261 }, 262 { "ISO8859-15", NULLQUARK, 263 iso8859_15_mbtowc, iso8859_15_wctomb 264 }, 265 { "ISO8859-16", NULLQUARK, 266 iso8859_16_mbtowc, iso8859_16_wctomb 267 }, 268 { "JISX0201.1976-0", NULLQUARK, 269 jisx0201_mbtowc, jisx0201_wctomb 270 }, 271 { "TIS620-0", NULLQUARK, 272 tis620_mbtowc, tis620_wctomb 273 }, 274 { "GB2312.1980-0", NULLQUARK, 275 gb2312_mbtowc, gb2312_wctomb 276 }, 277 { "JISX0208.1983-0", NULLQUARK, 278 jisx0208_mbtowc, jisx0208_wctomb 279 }, 280 { "JISX0208.1990-0", NULLQUARK, 281 jisx0208_mbtowc, jisx0208_wctomb 282 }, 283 { "JISX0212.1990-0", NULLQUARK, 284 jisx0212_mbtowc, jisx0212_wctomb 285 }, 286 { "KSC5601.1987-0", NULLQUARK, 287 ksc5601_mbtowc, ksc5601_wctomb 288 }, 289 { "KOI8-R", NULLQUARK, 290 koi8_r_mbtowc, koi8_r_wctomb 291 }, 292 { "KOI8-U", NULLQUARK, 293 koi8_u_mbtowc, koi8_u_wctomb 294 }, 295 { "KOI8-C", NULLQUARK, 296 koi8_c_mbtowc, koi8_c_wctomb 297 }, 298 { "TATAR-CYR", NULLQUARK, 299 tatar_cyr_mbtowc, tatar_cyr_wctomb 300 }, 301 { "ARMSCII-8", NULLQUARK, 302 armscii_8_mbtowc, armscii_8_wctomb 303 }, 304 { "IBM-CP1133", NULLQUARK, 305 cp1133_mbtowc, cp1133_wctomb 306 }, 307 { "MULELAO-1", NULLQUARK, 308 mulelao_mbtowc, mulelao_wctomb 309 }, 310 { "VISCII1.1-1", NULLQUARK, 311 viscii_mbtowc, viscii_wctomb 312 }, 313 { "TCVN-5712", NULLQUARK, 314 tcvn_mbtowc, tcvn_wctomb 315 }, 316 { "GEORGIAN-ACADEMY", NULLQUARK, 317 georgian_academy_mbtowc, georgian_academy_wctomb 318 }, 319 { "GEORGIAN-PS", NULLQUARK, 320 georgian_ps_mbtowc, georgian_ps_wctomb 321 }, 322 { "ISO8859-9E", NULLQUARK, 323 iso8859_9e_mbtowc, iso8859_9e_wctomb 324 }, 325 { "MICROSOFT-CP1251", NULLQUARK, 326 cp1251_mbtowc, cp1251_wctomb 327 }, 328 { "MICROSOFT-CP1255", NULLQUARK, 329 cp1255_mbtowc, cp1255_wctomb 330 }, 331 { "MICROSOFT-CP1256", NULLQUARK, 332 cp1256_mbtowc, cp1256_wctomb 333 }, 334 { "BIG5-0", NULLQUARK, 335 big5_mbtowc, big5_wctomb 336 }, 337 { "BIG5-E0", NULLQUARK, 338 big5_0_mbtowc, big5_0_wctomb 339 }, 340 { "BIG5-E1", NULLQUARK, 341 big5_1_mbtowc, big5_1_wctomb 342 }, 343 { "GBK-0", NULLQUARK, 344 gbk_mbtowc, gbk_wctomb 345 }, 346 { "BIG5HKSCS-0", NULLQUARK, 347 big5hkscs_mbtowc, big5hkscs_wctomb 348 }, 349 350 /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning 351 (for lookup speed), once at the end (as a fallback). */ 352 { "ISO10646-1", NULLQUARK, 353 utf8_mbtowc, utf8_wctomb 354 }, 355 356 /* Encoding ISO10646-1 for fonts means UCS2-like encoding 357 so for conversion to FontCharSet we need this record */ 358 { "ISO10646-1", NULLQUARK, 359 ucs2be_mbtowc, ucs2be_wctomb 360 } 361}; 362 363#define charsets_table_size (sizeof(all_charsets)/sizeof(all_charsets[0])) 364#define all_charsets_count (charsets_table_size - 1) 365#define ucs2_conv_index (charsets_table_size - 1) 366 367static void 368init_all_charsets (void) 369{ 370 Utf8Conv convptr; 371 int i; 372 373 for (convptr = all_charsets, i = charsets_table_size; i > 0; convptr++, i--) 374 convptr->xrm_name = XrmStringToQuark(convptr->name); 375} 376 377#define lazy_init_all_charsets() \ 378 do { \ 379 if (all_charsets[0].xrm_name == NULLQUARK) \ 380 init_all_charsets(); \ 381 } while (0) 382 383/* from XlcNCharSet to XlcNUtf8String */ 384 385static int 386cstoutf8( 387 XlcConv conv, 388 XPointer *from, 389 int *from_left, 390 XPointer *to, 391 int *to_left, 392 XPointer *args, 393 int num_args) 394{ 395 XlcCharSet charset; 396 const char *name; 397 Utf8Conv convptr; 398 int i; 399 unsigned char const *src; 400 unsigned char const *srcend; 401 unsigned char *dst; 402 unsigned char *dstend; 403 int unconv_num; 404 405 if (from == NULL || *from == NULL) 406 return 0; 407 408 if (num_args < 1) 409 return -1; 410 411 charset = (XlcCharSet) args[0]; 412 name = charset->encoding_name; 413 /* not charset->name because the latter has a ":GL"/":GR" suffix */ 414 415 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 416 if (!strcmp(convptr->name, name)) 417 break; 418 if (i == 0) 419 return -1; 420 421 src = (unsigned char const *) *from; 422 srcend = src + *from_left; 423 dst = (unsigned char *) *to; 424 dstend = dst + *to_left; 425 unconv_num = 0; 426 427 while (src < srcend) { 428 ucs4_t wc; 429 int consumed; 430 int count; 431 432 consumed = convptr->cstowc(conv, &wc, src, srcend-src); 433 if (consumed == RET_ILSEQ) 434 return -1; 435 if (consumed == RET_TOOFEW(0)) 436 break; 437 438 count = utf8_wctomb(NULL, dst, wc, dstend-dst); 439 if (count == RET_TOOSMALL) 440 break; 441 if (count == RET_ILSEQ) { 442 count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst); 443 if (count == RET_TOOSMALL) 444 break; 445 unconv_num++; 446 } 447 src += consumed; 448 dst += count; 449 } 450 451 *from = (XPointer) src; 452 *from_left = srcend - src; 453 *to = (XPointer) dst; 454 *to_left = dstend - dst; 455 456 return unconv_num; 457} 458 459static XlcConvMethodsRec methods_cstoutf8 = { 460 close_converter, 461 cstoutf8, 462 NULL 463}; 464 465static XlcConv 466open_cstoutf8( 467 XLCd from_lcd, 468 const char *from_type, 469 XLCd to_lcd, 470 const char *to_type) 471{ 472 lazy_init_all_charsets(); 473 return create_conv(from_lcd, &methods_cstoutf8); 474} 475 476/* from XlcNUtf8String to XlcNCharSet */ 477 478static XlcConv 479create_tocs_conv( 480 XLCd lcd, 481 XlcConvMethods methods) 482{ 483 XlcConv conv; 484 CodeSet *codeset_list; 485 int codeset_num; 486 int charset_num; 487 int i, j, k; 488 Utf8Conv *preferred; 489 490 lazy_init_all_charsets(); 491 492 codeset_list = XLC_GENERIC(lcd, codeset_list); 493 codeset_num = XLC_GENERIC(lcd, codeset_num); 494 495 charset_num = 0; 496 for (i = 0; i < codeset_num; i++) 497 charset_num += codeset_list[i]->num_charsets; 498 if (charset_num > all_charsets_count-1) 499 charset_num = all_charsets_count-1; 500 501 conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) 502 + (charset_num + 1) * sizeof(Utf8Conv)); 503 if (conv == (XlcConv) NULL) 504 return (XlcConv) NULL; 505 preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec)); 506 507 /* Loop through all codesets mentioned in the locale. */ 508 charset_num = 0; 509 for (i = 0; i < codeset_num; i++) { 510 XlcCharSet *charsets = codeset_list[i]->charset_list; 511 int num_charsets = codeset_list[i]->num_charsets; 512 for (j = 0; j < num_charsets; j++) { 513 const char *name = charsets[j]->encoding_name; 514 /* If it wasn't already encountered... */ 515 for (k = charset_num-1; k >= 0; k--) 516 if (!strcmp(preferred[k]->name, name)) 517 break; 518 if (k < 0) { 519 /* Look it up in all_charsets[]. */ 520 for (k = 0; k < all_charsets_count-1; k++) 521 if (!strcmp(all_charsets[k].name, name)) { 522 /* Add it to the preferred set. */ 523 preferred[charset_num++] = &all_charsets[k]; 524 break; 525 } 526 } 527 } 528 } 529 preferred[charset_num] = (Utf8Conv) NULL; 530 531 conv->methods = methods; 532 conv->state = (XPointer) preferred; 533 534 return conv; 535} 536 537static void 538close_tocs_converter( 539 XlcConv conv) 540{ 541 /* conv->state is allocated together with conv, free both at once. */ 542 Xfree((char *) conv); 543} 544 545/* 546 * Converts a Unicode character to an appropriate character set. The NULL 547 * terminated array of preferred character sets is passed as first argument. 548 * If successful, *charsetp is set to the character set that was used, and 549 * *sidep is set to the character set side (XlcGL or XlcGR). 550 */ 551static int 552charset_wctocs( 553 Utf8Conv *preferred, 554 Utf8Conv *charsetp, 555 XlcSide *sidep, 556 XlcConv conv, 557 unsigned char *r, 558 ucs4_t wc, 559 int n) 560{ 561 int count; 562 Utf8Conv convptr; 563 int i; 564 565 for (; *preferred != (Utf8Conv) NULL; preferred++) { 566 convptr = *preferred; 567 count = convptr->wctocs(conv, r, wc, n); 568 if (count == RET_TOOSMALL) 569 return RET_TOOSMALL; 570 if (count != RET_ILSEQ) { 571 *charsetp = convptr; 572 *sidep = (*r < 0x80 ? XlcGL : XlcGR); 573 return count; 574 } 575 } 576 for (convptr = all_charsets+1, i = all_charsets_count-1; i > 0; convptr++, i--) { 577 count = convptr->wctocs(conv, r, wc, n); 578 if (count == RET_TOOSMALL) 579 return RET_TOOSMALL; 580 if (count != RET_ILSEQ) { 581 *charsetp = convptr; 582 *sidep = (*r < 0x80 ? XlcGL : XlcGR); 583 return count; 584 } 585 } 586 return RET_ILSEQ; 587} 588 589static int 590utf8tocs( 591 XlcConv conv, 592 XPointer *from, 593 int *from_left, 594 XPointer *to, 595 int *to_left, 596 XPointer *args, 597 int num_args) 598{ 599 Utf8Conv *preferred_charsets; 600 XlcCharSet last_charset = NULL; 601 unsigned char const *src; 602 unsigned char const *srcend; 603 unsigned char *dst; 604 unsigned char *dstend; 605 int unconv_num; 606 607 if (from == NULL || *from == NULL) 608 return 0; 609 610 preferred_charsets = (Utf8Conv *) conv->state; 611 src = (unsigned char const *) *from; 612 srcend = src + *from_left; 613 dst = (unsigned char *) *to; 614 dstend = dst + *to_left; 615 unconv_num = 0; 616 617 while (src < srcend && dst < dstend) { 618 Utf8Conv chosen_charset = NULL; 619 XlcSide chosen_side = XlcNONE; 620 ucs4_t wc; 621 int consumed; 622 int count; 623 624 consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 625 if (consumed == RET_TOOFEW(0)) 626 break; 627 if (consumed == RET_ILSEQ) { 628 src++; 629 unconv_num++; 630 continue; 631 } 632 633 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 634 if (count == RET_TOOSMALL) 635 break; 636 if (count == RET_ILSEQ) { 637 src += consumed; 638 unconv_num++; 639 continue; 640 } 641 642 if (last_charset == NULL) { 643 last_charset = 644 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 645 if (last_charset == NULL) { 646 src += consumed; 647 unconv_num++; 648 continue; 649 } 650 } else { 651 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 652 && (last_charset->side == XlcGLGR 653 || last_charset->side == chosen_side))) 654 break; 655 } 656 src += consumed; 657 dst += count; 658 } 659 660 if (last_charset == NULL) 661 return -1; 662 663 *from = (XPointer) src; 664 *from_left = srcend - src; 665 *to = (XPointer) dst; 666 *to_left = dstend - dst; 667 668 if (num_args >= 1) 669 *((XlcCharSet *)args[0]) = last_charset; 670 671 return unconv_num; 672} 673 674static XlcConvMethodsRec methods_utf8tocs = { 675 close_tocs_converter, 676 utf8tocs, 677 NULL 678}; 679 680static XlcConv 681open_utf8tocs( 682 XLCd from_lcd, 683 const char *from_type, 684 XLCd to_lcd, 685 const char *to_type) 686{ 687 return create_tocs_conv(from_lcd, &methods_utf8tocs); 688} 689 690/* from XlcNUtf8String to XlcNChar */ 691 692static int 693utf8tocs1( 694 XlcConv conv, 695 XPointer *from, 696 int *from_left, 697 XPointer *to, 698 int *to_left, 699 XPointer *args, 700 int num_args) 701{ 702 Utf8Conv *preferred_charsets; 703 XlcCharSet last_charset = NULL; 704 unsigned char const *src; 705 unsigned char const *srcend; 706 unsigned char *dst; 707 unsigned char *dstend; 708 int unconv_num; 709 710 if (from == NULL || *from == NULL) 711 return 0; 712 713 preferred_charsets = (Utf8Conv *) conv->state; 714 src = (unsigned char const *) *from; 715 srcend = src + *from_left; 716 dst = (unsigned char *) *to; 717 dstend = dst + *to_left; 718 unconv_num = 0; 719 720 while (src < srcend && dst < dstend) { 721 Utf8Conv chosen_charset = NULL; 722 XlcSide chosen_side = XlcNONE; 723 ucs4_t wc; 724 int consumed; 725 int count; 726 727 consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 728 if (consumed == RET_TOOFEW(0)) 729 break; 730 if (consumed == RET_ILSEQ) { 731 src++; 732 unconv_num++; 733 continue; 734 } 735 736 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 737 if (count == RET_TOOSMALL) 738 break; 739 if (count == RET_ILSEQ) { 740 src += consumed; 741 unconv_num++; 742 continue; 743 } 744 745 if (last_charset == NULL) { 746 last_charset = 747 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 748 if (last_charset == NULL) { 749 src += consumed; 750 unconv_num++; 751 continue; 752 } 753 } else { 754 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 755 && (last_charset->side == XlcGLGR 756 || last_charset->side == chosen_side))) 757 break; 758 } 759 src += consumed; 760 dst += count; 761 break; 762 } 763 764 if (last_charset == NULL) 765 return -1; 766 767 *from = (XPointer) src; 768 *from_left = srcend - src; 769 *to = (XPointer) dst; 770 *to_left = dstend - dst; 771 772 if (num_args >= 1) 773 *((XlcCharSet *)args[0]) = last_charset; 774 775 return unconv_num; 776} 777 778static XlcConvMethodsRec methods_utf8tocs1 = { 779 close_tocs_converter, 780 utf8tocs1, 781 NULL 782}; 783 784static XlcConv 785open_utf8tocs1( 786 XLCd from_lcd, 787 const char *from_type, 788 XLCd to_lcd, 789 const char *to_type) 790{ 791 return create_tocs_conv(from_lcd, &methods_utf8tocs1); 792} 793 794/* from XlcNUtf8String to XlcNString */ 795 796static int 797utf8tostr( 798 XlcConv conv, 799 XPointer *from, 800 int *from_left, 801 XPointer *to, 802 int *to_left, 803 XPointer *args, 804 int num_args) 805{ 806 unsigned char const *src; 807 unsigned char const *srcend; 808 unsigned char *dst; 809 unsigned char *dstend; 810 int unconv_num; 811 812 if (from == NULL || *from == NULL) 813 return 0; 814 815 src = (unsigned char const *) *from; 816 srcend = src + *from_left; 817 dst = (unsigned char *) *to; 818 dstend = dst + *to_left; 819 unconv_num = 0; 820 821 while (src < srcend) { 822 unsigned char c; 823 ucs4_t wc; 824 int consumed; 825 826 consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 827 if (consumed == RET_TOOFEW(0)) 828 break; 829 if (dst == dstend) 830 break; 831 if (consumed == RET_ILSEQ) { 832 consumed = 1; 833 c = BAD_CHAR; 834 unconv_num++; 835 } else { 836 if ((wc & ~(ucs4_t)0xff) != 0) { 837 c = BAD_CHAR; 838 unconv_num++; 839 } else 840 c = (unsigned char) wc; 841 } 842 *dst++ = c; 843 src += consumed; 844 } 845 846 *from = (XPointer) src; 847 *from_left = srcend - src; 848 *to = (XPointer) dst; 849 *to_left = dstend - dst; 850 851 return unconv_num; 852} 853 854static XlcConvMethodsRec methods_utf8tostr = { 855 close_converter, 856 utf8tostr, 857 NULL 858}; 859 860static XlcConv 861open_utf8tostr( 862 XLCd from_lcd, 863 const char *from_type, 864 XLCd to_lcd, 865 const char *to_type) 866{ 867 return create_conv(from_lcd, &methods_utf8tostr); 868} 869 870/* from XlcNString to XlcNUtf8String */ 871 872static int 873strtoutf8( 874 XlcConv conv, 875 XPointer *from, 876 int *from_left, 877 XPointer *to, 878 int *to_left, 879 XPointer *args, 880 int num_args) 881{ 882 unsigned char const *src; 883 unsigned char const *srcend; 884 unsigned char *dst; 885 unsigned char *dstend; 886 887 if (from == NULL || *from == NULL) 888 return 0; 889 890 src = (unsigned char const *) *from; 891 srcend = src + *from_left; 892 dst = (unsigned char *) *to; 893 dstend = dst + *to_left; 894 895 while (src < srcend) { 896 int count = utf8_wctomb(NULL, dst, *src, dstend-dst); 897 if (count == RET_TOOSMALL) 898 break; 899 dst += count; 900 src++; 901 } 902 903 *from = (XPointer) src; 904 *from_left = srcend - src; 905 *to = (XPointer) dst; 906 *to_left = dstend - dst; 907 908 return 0; 909} 910 911static XlcConvMethodsRec methods_strtoutf8 = { 912 close_converter, 913 strtoutf8, 914 NULL 915}; 916 917static XlcConv 918open_strtoutf8( 919 XLCd from_lcd, 920 const char *from_type, 921 XLCd to_lcd, 922 const char *to_type) 923{ 924 return create_conv(from_lcd, &methods_strtoutf8); 925} 926 927/* Support for the input methods. */ 928 929XPointer 930_Utf8GetConvByName( 931 const char *name) 932{ 933 XrmQuark xrm_name; 934 Utf8Conv convptr; 935 int i; 936 937 if (name == NULL) 938 return (XPointer) NULL; 939 940 lazy_init_all_charsets(); 941 xrm_name = XrmStringToQuark(name); 942 943 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 944 if (convptr->xrm_name == xrm_name) 945 return (XPointer) convptr->wctocs; 946 return (XPointer) NULL; 947} 948 949/* from XlcNUcsChar to XlcNChar, needed for input methods */ 950 951static XlcConv 952create_ucstocs_conv( 953 XLCd lcd, 954 XlcConvMethods methods) 955{ 956 957 if (XLC_PUBLIC_PART(lcd)->codeset 958 && _XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "UTF-8") == 0) { 959 XlcConv conv; 960 Utf8Conv *preferred; 961 962 lazy_init_all_charsets(); 963 964 conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + 2 * sizeof(Utf8Conv)); 965 if (conv == (XlcConv) NULL) 966 return (XlcConv) NULL; 967 preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec)); 968 969 preferred[0] = &all_charsets[0]; /* ISO10646 */ 970 preferred[1] = (Utf8Conv) NULL; 971 972 conv->methods = methods; 973 conv->state = (XPointer) preferred; 974 975 return conv; 976 } else { 977 return create_tocs_conv(lcd, methods); 978 } 979} 980 981static int 982charset_wctocs_exactly( 983 Utf8Conv *preferred, 984 Utf8Conv *charsetp, 985 XlcSide *sidep, 986 XlcConv conv, 987 unsigned char *r, 988 ucs4_t wc, 989 int n) 990{ 991 int count; 992 Utf8Conv convptr; 993 994 for (; *preferred != (Utf8Conv) NULL; preferred++) { 995 convptr = *preferred; 996 count = convptr->wctocs(conv, r, wc, n); 997 if (count == RET_TOOSMALL) 998 return RET_TOOSMALL; 999 if (count != RET_ILSEQ) { 1000 *charsetp = convptr; 1001 *sidep = (*r < 0x80 ? XlcGL : XlcGR); 1002 return count; 1003 } 1004 } 1005 return RET_ILSEQ; 1006} 1007 1008static int 1009ucstocs1( 1010 XlcConv conv, 1011 XPointer *from, 1012 int *from_left, 1013 XPointer *to, 1014 int *to_left, 1015 XPointer *args, 1016 int num_args) 1017{ 1018 ucs4_t const *src = (ucs4_t const *) *from; 1019 unsigned char *dst = (unsigned char *) *to; 1020 int unconv_num = 0; 1021 Utf8Conv *preferred_charsets = (Utf8Conv *) conv->state; 1022 Utf8Conv chosen_charset = NULL; 1023 XlcSide chosen_side = XlcNONE; 1024 XlcCharSet charset = NULL; 1025 int count; 1026 1027 if (from == NULL || *from == NULL) 1028 return 0; 1029 1030 count = charset_wctocs_exactly(preferred_charsets, &chosen_charset, 1031 &chosen_side, conv, dst, *src, *to_left); 1032 if (count < 1) { 1033 unconv_num++; 1034 count = 0; 1035 } else { 1036 charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1037 } 1038 if (charset == NULL) 1039 return -1; 1040 1041 *from = (XPointer) ++src; 1042 (*from_left)--; 1043 *to = (XPointer) dst; 1044 *to_left -= count; 1045 1046 if (num_args >= 1) 1047 *((XlcCharSet *)args[0]) = charset; 1048 1049 return unconv_num; 1050} 1051 1052static XlcConvMethodsRec methods_ucstocs1 = { 1053 close_tocs_converter, 1054 ucstocs1, 1055 NULL 1056}; 1057 1058static XlcConv 1059open_ucstocs1( 1060 XLCd from_lcd, 1061 const char *from_type, 1062 XLCd to_lcd, 1063 const char *to_type) 1064{ 1065 return create_ucstocs_conv(from_lcd, &methods_ucstocs1); 1066} 1067 1068/* from XlcNUcsChar to XlcNUtf8String, needed for input methods */ 1069 1070static int 1071ucstoutf8( 1072 XlcConv conv, 1073 XPointer *from, 1074 int *from_left, 1075 XPointer *to, 1076 int *to_left, 1077 XPointer *args, 1078 int num_args) 1079{ 1080 const ucs4_t *src; 1081 const ucs4_t *srcend; 1082 unsigned char *dst; 1083 unsigned char *dstend; 1084 int unconv_num; 1085 1086 if (from == NULL || *from == NULL) 1087 return 0; 1088 1089 src = (const ucs4_t *) *from; 1090 srcend = src + *from_left; 1091 dst = (unsigned char *) *to; 1092 dstend = dst + *to_left; 1093 unconv_num = 0; 1094 1095 while (src < srcend) { 1096 int count = utf8_wctomb(NULL, dst, *src, dstend-dst); 1097 if (count == RET_TOOSMALL) 1098 break; 1099 if (count == RET_ILSEQ) 1100 unconv_num++; 1101 src++; 1102 dst += count; 1103 } 1104 1105 *from = (XPointer) src; 1106 *from_left = srcend - src; 1107 *to = (XPointer) dst; 1108 *to_left = dstend - dst; 1109 1110 return unconv_num; 1111} 1112 1113static XlcConvMethodsRec methods_ucstoutf8 = { 1114 close_converter, 1115 ucstoutf8, 1116 NULL 1117}; 1118 1119static XlcConv 1120open_ucstoutf8( 1121 XLCd from_lcd, 1122 const char *from_type, 1123 XLCd to_lcd, 1124 const char *to_type) 1125{ 1126 return create_conv(from_lcd, &methods_ucstoutf8); 1127} 1128 1129/* Registers UTF-8 converters for a non-UTF-8 locale. */ 1130void 1131_XlcAddUtf8Converters( 1132 XLCd lcd) 1133{ 1134 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNUtf8String, open_cstoutf8); 1135 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNCharSet, open_utf8tocs); 1136 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNChar, open_utf8tocs1); 1137 _XlcSetConverter(lcd, XlcNString, lcd, XlcNUtf8String, open_strtoutf8); 1138 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNString, open_utf8tostr); 1139 _XlcSetConverter(lcd, XlcNUcsChar, lcd, XlcNChar, open_ucstocs1); 1140 _XlcSetConverter(lcd, XlcNUcsChar, lcd, XlcNUtf8String, open_ucstoutf8); 1141} 1142 1143/***************************************************************************/ 1144/* Part II: UTF-8 locale loader conversion files 1145 * 1146 * Here we can assume that "multi-byte" is UTF-8 and that `wchar_t' is Unicode. 1147 */ 1148 1149/* from XlcNMultiByte to XlcNWideChar */ 1150 1151static int 1152utf8towcs( 1153 XlcConv conv, 1154 XPointer *from, 1155 int *from_left, 1156 XPointer *to, 1157 int *to_left, 1158 XPointer *args, 1159 int num_args) 1160{ 1161 unsigned char const *src; 1162 unsigned char const *srcend; 1163 wchar_t *dst; 1164 wchar_t *dstend; 1165 int unconv_num; 1166 1167 if (from == NULL || *from == NULL) 1168 return 0; 1169 1170 src = (unsigned char const *) *from; 1171 srcend = src + *from_left; 1172 dst = (wchar_t *) *to; 1173 dstend = dst + *to_left; 1174 unconv_num = 0; 1175 1176 while (src < srcend && dst < dstend) { 1177 ucs4_t wc; 1178 int consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 1179 if (consumed == RET_TOOFEW(0)) 1180 break; 1181 if (consumed == RET_ILSEQ) { 1182 src++; 1183 *dst = BAD_WCHAR; 1184 unconv_num++; 1185 } else { 1186 src += consumed; 1187 *dst = wc; 1188 } 1189 dst++; 1190 } 1191 1192 *from = (XPointer) src; 1193 *from_left = srcend - src; 1194 *to = (XPointer) dst; 1195 *to_left = dstend - dst; 1196 1197 return unconv_num; 1198} 1199 1200static XlcConvMethodsRec methods_utf8towcs = { 1201 close_converter, 1202 utf8towcs, 1203 NULL 1204}; 1205 1206static XlcConv 1207open_utf8towcs( 1208 XLCd from_lcd, 1209 const char *from_type, 1210 XLCd to_lcd, 1211 const char *to_type) 1212{ 1213 return create_conv(from_lcd, &methods_utf8towcs); 1214} 1215 1216/* from XlcNWideChar to XlcNMultiByte */ 1217 1218static int 1219wcstoutf8( 1220 XlcConv conv, 1221 XPointer *from, 1222 int *from_left, 1223 XPointer *to, 1224 int *to_left, 1225 XPointer *args, 1226 int num_args) 1227{ 1228 wchar_t const *src; 1229 wchar_t const *srcend; 1230 unsigned char *dst; 1231 unsigned char *dstend; 1232 int unconv_num; 1233 1234 if (from == NULL || *from == NULL) 1235 return 0; 1236 1237 src = (wchar_t const *) *from; 1238 srcend = src + *from_left; 1239 dst = (unsigned char *) *to; 1240 dstend = dst + *to_left; 1241 unconv_num = 0; 1242 1243 while (src < srcend) { 1244 int count = utf8_wctomb(NULL, dst, *src, dstend-dst); 1245 if (count == RET_TOOSMALL) 1246 break; 1247 if (count == RET_ILSEQ) { 1248 count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst); 1249 if (count == RET_TOOSMALL) 1250 break; 1251 unconv_num++; 1252 } 1253 dst += count; 1254 src++; 1255 } 1256 1257 *from = (XPointer) src; 1258 *from_left = srcend - src; 1259 *to = (XPointer) dst; 1260 *to_left = dstend - dst; 1261 1262 return unconv_num; 1263} 1264 1265static XlcConvMethodsRec methods_wcstoutf8 = { 1266 close_converter, 1267 wcstoutf8, 1268 NULL 1269}; 1270 1271static XlcConv 1272open_wcstoutf8( 1273 XLCd from_lcd, 1274 const char *from_type, 1275 XLCd to_lcd, 1276 const char *to_type) 1277{ 1278 return create_conv(from_lcd, &methods_wcstoutf8); 1279} 1280 1281/* from XlcNString to XlcNWideChar */ 1282 1283static int 1284our_strtowcs( 1285 XlcConv conv, 1286 XPointer *from, 1287 int *from_left, 1288 XPointer *to, 1289 int *to_left, 1290 XPointer *args, 1291 int num_args) 1292{ 1293 unsigned char const *src; 1294 unsigned char const *srcend; 1295 wchar_t *dst; 1296 wchar_t *dstend; 1297 1298 if (from == NULL || *from == NULL) 1299 return 0; 1300 1301 src = (unsigned char const *) *from; 1302 srcend = src + *from_left; 1303 dst = (wchar_t *) *to; 1304 dstend = dst + *to_left; 1305 1306 while (src < srcend && dst < dstend) 1307 *dst++ = (wchar_t) *src++; 1308 1309 *from = (XPointer) src; 1310 *from_left = srcend - src; 1311 *to = (XPointer) dst; 1312 *to_left = dstend - dst; 1313 1314 return 0; 1315} 1316 1317static XlcConvMethodsRec methods_strtowcs = { 1318 close_converter, 1319 our_strtowcs, 1320 NULL 1321}; 1322 1323static XlcConv 1324open_strtowcs( 1325 XLCd from_lcd, 1326 const char *from_type, 1327 XLCd to_lcd, 1328 const char *to_type) 1329{ 1330 return create_conv(from_lcd, &methods_strtowcs); 1331} 1332 1333/* from XlcNWideChar to XlcNString */ 1334 1335static int 1336our_wcstostr( 1337 XlcConv conv, 1338 XPointer *from, 1339 int *from_left, 1340 XPointer *to, 1341 int *to_left, 1342 XPointer *args, 1343 int num_args) 1344{ 1345 wchar_t const *src; 1346 wchar_t const *srcend; 1347 unsigned char *dst; 1348 unsigned char *dstend; 1349 int unconv_num; 1350 1351 if (from == NULL || *from == NULL) 1352 return 0; 1353 1354 src = (wchar_t const *) *from; 1355 srcend = src + *from_left; 1356 dst = (unsigned char *) *to; 1357 dstend = dst + *to_left; 1358 unconv_num = 0; 1359 1360 while (src < srcend && dst < dstend) { 1361 unsigned int wc = *src++; 1362 if (wc < 0x80) 1363 *dst = wc; 1364 else { 1365 *dst = BAD_CHAR; 1366 unconv_num++; 1367 } 1368 dst++; 1369 } 1370 1371 *from = (XPointer) src; 1372 *from_left = srcend - src; 1373 *to = (XPointer) dst; 1374 *to_left = dstend - dst; 1375 1376 return unconv_num; 1377} 1378 1379static XlcConvMethodsRec methods_wcstostr = { 1380 close_converter, 1381 our_wcstostr, 1382 NULL 1383}; 1384 1385static XlcConv 1386open_wcstostr( 1387 XLCd from_lcd, 1388 const char *from_type, 1389 XLCd to_lcd, 1390 const char *to_type) 1391{ 1392 return create_conv(from_lcd, &methods_wcstostr); 1393} 1394 1395/* from XlcNCharSet to XlcNWideChar */ 1396 1397static int 1398cstowcs( 1399 XlcConv conv, 1400 XPointer *from, 1401 int *from_left, 1402 XPointer *to, 1403 int *to_left, 1404 XPointer *args, 1405 int num_args) 1406{ 1407 XlcCharSet charset; 1408 const char *name; 1409 Utf8Conv convptr; 1410 int i; 1411 unsigned char const *src; 1412 unsigned char const *srcend; 1413 wchar_t *dst; 1414 wchar_t *dstend; 1415 int unconv_num; 1416 1417 if (from == NULL || *from == NULL) 1418 return 0; 1419 1420 if (num_args < 1) 1421 return -1; 1422 1423 charset = (XlcCharSet) args[0]; 1424 name = charset->encoding_name; 1425 /* not charset->name because the latter has a ":GL"/":GR" suffix */ 1426 1427 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 1428 if (!strcmp(convptr->name, name)) 1429 break; 1430 if (i == 0) 1431 return -1; 1432 1433 src = (unsigned char const *) *from; 1434 srcend = src + *from_left; 1435 dst = (wchar_t *) *to; 1436 dstend = dst + *to_left; 1437 unconv_num = 0; 1438 1439 while (src < srcend && dst < dstend) { 1440 unsigned int wc; 1441 int consumed; 1442 1443 consumed = convptr->cstowc(conv, &wc, src, srcend-src); 1444 if (consumed == RET_ILSEQ) 1445 return -1; 1446 if (consumed == RET_TOOFEW(0)) 1447 break; 1448 1449 *dst++ = wc; 1450 src += consumed; 1451 } 1452 1453 *from = (XPointer) src; 1454 *from_left = srcend - src; 1455 *to = (XPointer) dst; 1456 *to_left = dstend - dst; 1457 1458 return unconv_num; 1459} 1460 1461static XlcConvMethodsRec methods_cstowcs = { 1462 close_converter, 1463 cstowcs, 1464 NULL 1465}; 1466 1467static XlcConv 1468open_cstowcs( 1469 XLCd from_lcd, 1470 const char *from_type, 1471 XLCd to_lcd, 1472 const char *to_type) 1473{ 1474 lazy_init_all_charsets(); 1475 return create_conv(from_lcd, &methods_cstowcs); 1476} 1477 1478/* from XlcNWideChar to XlcNCharSet */ 1479 1480static int 1481wcstocs( 1482 XlcConv conv, 1483 XPointer *from, 1484 int *from_left, 1485 XPointer *to, 1486 int *to_left, 1487 XPointer *args, 1488 int num_args) 1489{ 1490 Utf8Conv *preferred_charsets; 1491 XlcCharSet last_charset = NULL; 1492 wchar_t const *src; 1493 wchar_t const *srcend; 1494 unsigned char *dst; 1495 unsigned char *dstend; 1496 int unconv_num; 1497 1498 if (from == NULL || *from == NULL) 1499 return 0; 1500 1501 preferred_charsets = (Utf8Conv *) conv->state; 1502 src = (wchar_t const *) *from; 1503 srcend = src + *from_left; 1504 dst = (unsigned char *) *to; 1505 dstend = dst + *to_left; 1506 unconv_num = 0; 1507 1508 while (src < srcend && dst < dstend) { 1509 Utf8Conv chosen_charset = NULL; 1510 XlcSide chosen_side = XlcNONE; 1511 wchar_t wc = *src; 1512 int count; 1513 1514 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 1515 if (count == RET_TOOSMALL) 1516 break; 1517 if (count == RET_ILSEQ) { 1518 src++; 1519 unconv_num++; 1520 continue; 1521 } 1522 1523 if (last_charset == NULL) { 1524 last_charset = 1525 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1526 if (last_charset == NULL) { 1527 src++; 1528 unconv_num++; 1529 continue; 1530 } 1531 } else { 1532 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 1533 && (last_charset->side == XlcGLGR 1534 || last_charset->side == chosen_side))) 1535 break; 1536 } 1537 src++; 1538 dst += count; 1539 } 1540 1541 if (last_charset == NULL) 1542 return -1; 1543 1544 *from = (XPointer) src; 1545 *from_left = srcend - src; 1546 *to = (XPointer) dst; 1547 *to_left = dstend - dst; 1548 1549 if (num_args >= 1) 1550 *((XlcCharSet *)args[0]) = last_charset; 1551 1552 return unconv_num; 1553} 1554 1555static XlcConvMethodsRec methods_wcstocs = { 1556 close_tocs_converter, 1557 wcstocs, 1558 NULL 1559}; 1560 1561static XlcConv 1562open_wcstocs( 1563 XLCd from_lcd, 1564 const char *from_type, 1565 XLCd to_lcd, 1566 const char *to_type) 1567{ 1568 return create_tocs_conv(from_lcd, &methods_wcstocs); 1569} 1570 1571/* from XlcNWideChar to XlcNChar */ 1572 1573static int 1574wcstocs1( 1575 XlcConv conv, 1576 XPointer *from, 1577 int *from_left, 1578 XPointer *to, 1579 int *to_left, 1580 XPointer *args, 1581 int num_args) 1582{ 1583 Utf8Conv *preferred_charsets; 1584 XlcCharSet last_charset = NULL; 1585 wchar_t const *src; 1586 wchar_t const *srcend; 1587 unsigned char *dst; 1588 unsigned char *dstend; 1589 int unconv_num; 1590 1591 if (from == NULL || *from == NULL) 1592 return 0; 1593 1594 preferred_charsets = (Utf8Conv *) conv->state; 1595 src = (wchar_t const *) *from; 1596 srcend = src + *from_left; 1597 dst = (unsigned char *) *to; 1598 dstend = dst + *to_left; 1599 unconv_num = 0; 1600 1601 while (src < srcend && dst < dstend) { 1602 Utf8Conv chosen_charset = NULL; 1603 XlcSide chosen_side = XlcNONE; 1604 wchar_t wc = *src; 1605 int count; 1606 1607 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 1608 if (count == RET_TOOSMALL) 1609 break; 1610 if (count == RET_ILSEQ) { 1611 src++; 1612 unconv_num++; 1613 continue; 1614 } 1615 1616 if (last_charset == NULL) { 1617 last_charset = 1618 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1619 if (last_charset == NULL) { 1620 src++; 1621 unconv_num++; 1622 continue; 1623 } 1624 } else { 1625 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 1626 && (last_charset->side == XlcGLGR 1627 || last_charset->side == chosen_side))) 1628 break; 1629 } 1630 src++; 1631 dst += count; 1632 break; 1633 } 1634 1635 if (last_charset == NULL) 1636 return -1; 1637 1638 *from = (XPointer) src; 1639 *from_left = srcend - src; 1640 *to = (XPointer) dst; 1641 *to_left = dstend - dst; 1642 1643 if (num_args >= 1) 1644 *((XlcCharSet *)args[0]) = last_charset; 1645 1646 return unconv_num; 1647} 1648 1649static XlcConvMethodsRec methods_wcstocs1 = { 1650 close_tocs_converter, 1651 wcstocs1, 1652 NULL 1653}; 1654 1655static XlcConv 1656open_wcstocs1( 1657 XLCd from_lcd, 1658 const char *from_type, 1659 XLCd to_lcd, 1660 const char *to_type) 1661{ 1662 return create_tocs_conv(from_lcd, &methods_wcstocs1); 1663} 1664 1665/* trivial, no conversion */ 1666 1667static int 1668identity( 1669 XlcConv conv, 1670 XPointer *from, 1671 int *from_left, 1672 XPointer *to, 1673 int *to_left, 1674 XPointer *args, 1675 int num_args) 1676{ 1677 unsigned char const *src; 1678 unsigned char const *srcend; 1679 unsigned char *dst; 1680 unsigned char *dstend; 1681 1682 if (from == NULL || *from == NULL) 1683 return 0; 1684 1685 src = (unsigned char const *) *from; 1686 srcend = src + *from_left; 1687 dst = (unsigned char *) *to; 1688 dstend = dst + *to_left; 1689 1690 while (src < srcend && dst < dstend) 1691 *dst++ = *src++; 1692 1693 *from = (XPointer) src; 1694 *from_left = srcend - src; 1695 *to = (XPointer) dst; 1696 *to_left = dstend - dst; 1697 1698 return 0; 1699} 1700 1701static XlcConvMethodsRec methods_identity = { 1702 close_converter, 1703 identity, 1704 NULL 1705}; 1706 1707static XlcConv 1708open_identity( 1709 XLCd from_lcd, 1710 const char *from_type, 1711 XLCd to_lcd, 1712 const char *to_type) 1713{ 1714 return create_conv(from_lcd, &methods_identity); 1715} 1716 1717/* from MultiByte/WideChar to FontCharSet. */ 1718/* They really use converters to CharSet 1719 * but with different create_conv procedure. */ 1720 1721static XlcConv 1722create_tofontcs_conv( 1723 XLCd lcd, 1724 XlcConvMethods methods) 1725{ 1726 XlcConv conv; 1727 int i, num, k, count; 1728 char **value, buf[20]; 1729 Utf8Conv *preferred; 1730 1731 lazy_init_all_charsets(); 1732 1733 for (i = 0, num = 0;; i++) { 1734 sprintf(buf, "fs%d.charset.name", i); 1735 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1736 if (count < 1) { 1737 sprintf(buf, "fs%d.charset", i); 1738 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1739 if (count < 1) 1740 break; 1741 } 1742 num += count; 1743 } 1744 1745 conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + (num + 1) * sizeof(Utf8Conv)); 1746 if (conv == (XlcConv) NULL) 1747 return (XlcConv) NULL; 1748 preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec)); 1749 1750 /* Loop through all fontsets mentioned in the locale. */ 1751 for (i = 0, num = 0;; i++) { 1752 sprintf(buf, "fs%d.charset.name", i); 1753 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1754 if (count < 1) { 1755 sprintf(buf, "fs%d.charset", i); 1756 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1757 if (count < 1) 1758 break; 1759 } 1760 while (count-- > 0) { 1761 XlcCharSet charset = _XlcGetCharSet(*value++); 1762 const char *name; 1763 1764 if (charset == (XlcCharSet) NULL) 1765 continue; 1766 1767 name = charset->encoding_name; 1768 /* If it wasn't already encountered... */ 1769 for (k = num - 1; k >= 0; k--) 1770 if (!strcmp(preferred[k]->name, name)) 1771 break; 1772 if (k < 0) { 1773 /* For fonts "ISO10646-1" means ucs2, not utf8.*/ 1774 if (!strcmp("ISO10646-1", name)) { 1775 preferred[num++] = &all_charsets[ucs2_conv_index]; 1776 continue; 1777 } 1778 /* Look it up in all_charsets[]. */ 1779 for (k = 0; k < all_charsets_count-1; k++) 1780 if (!strcmp(all_charsets[k].name, name)) { 1781 /* Add it to the preferred set. */ 1782 preferred[num++] = &all_charsets[k]; 1783 break; 1784 } 1785 } 1786 } 1787 } 1788 preferred[num] = (Utf8Conv) NULL; 1789 1790 conv->methods = methods; 1791 conv->state = (XPointer) preferred; 1792 1793 return conv; 1794} 1795 1796static XlcConv 1797open_wcstofcs( 1798 XLCd from_lcd, 1799 const char *from_type, 1800 XLCd to_lcd, 1801 const char *to_type) 1802{ 1803 return create_tofontcs_conv(from_lcd, &methods_wcstocs); 1804} 1805 1806static XlcConv 1807open_utf8tofcs( 1808 XLCd from_lcd, 1809 const char *from_type, 1810 XLCd to_lcd, 1811 const char *to_type) 1812{ 1813 return create_tofontcs_conv(from_lcd, &methods_utf8tocs); 1814} 1815 1816/* ========================== iconv Stuff ================================ */ 1817 1818/* from XlcNCharSet to XlcNMultiByte */ 1819 1820static int 1821iconv_cstombs(XlcConv conv, XPointer *from, int *from_left, 1822 XPointer *to, int *to_left, XPointer *args, int num_args) 1823{ 1824 XlcCharSet charset; 1825 char const *name; 1826 Utf8Conv convptr; 1827 int i; 1828 unsigned char const *src; 1829 unsigned char const *srcend; 1830 unsigned char *dst; 1831 unsigned char *dstend; 1832 int unconv_num; 1833 1834 if (from == NULL || *from == NULL) 1835 return 0; 1836 1837 if (num_args < 1) 1838 return -1; 1839 1840 charset = (XlcCharSet) args[0]; 1841 name = charset->encoding_name; 1842 /* not charset->name because the latter has a ":GL"/":GR" suffix */ 1843 1844 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 1845 if (!strcmp(convptr->name, name)) 1846 break; 1847 if (i == 0) 1848 return -1; 1849 1850 src = (unsigned char const *) *from; 1851 srcend = src + *from_left; 1852 dst = (unsigned char *) *to; 1853 dstend = dst + *to_left; 1854 unconv_num = 0; 1855 1856 while (src < srcend) { 1857 ucs4_t wc; 1858 int consumed; 1859 int count; 1860 1861 consumed = convptr->cstowc(conv, &wc, src, srcend-src); 1862 if (consumed == RET_ILSEQ) 1863 return -1; 1864 if (consumed == RET_TOOFEW(0)) 1865 break; 1866 1867 /* Use stdc iconv to convert widechar -> multibyte */ 1868 1869 count = wctomb((char *)dst, wc); 1870 if (count == 0) 1871 break; 1872 if (count == -1) { 1873 count = wctomb((char *)dst, BAD_WCHAR); 1874 if (count == 0) 1875 break; 1876 unconv_num++; 1877 } 1878 src += consumed; 1879 dst += count; 1880 } 1881 1882 *from = (XPointer) src; 1883 *from_left = srcend - src; 1884 *to = (XPointer) dst; 1885 *to_left = dstend - dst; 1886 1887 return unconv_num; 1888 1889} 1890 1891static XlcConvMethodsRec iconv_cstombs_methods = { 1892 close_converter, 1893 iconv_cstombs, 1894 NULL 1895}; 1896 1897static XlcConv 1898open_iconv_cstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 1899{ 1900 lazy_init_all_charsets(); 1901 return create_conv(from_lcd, &iconv_cstombs_methods); 1902} 1903 1904static int 1905iconv_mbstocs(XlcConv conv, XPointer *from, int *from_left, 1906 XPointer *to, int *to_left, XPointer *args, int num_args) 1907{ 1908 Utf8Conv *preferred_charsets; 1909 XlcCharSet last_charset = NULL; 1910 unsigned char const *src; 1911 unsigned char const *srcend; 1912 unsigned char *dst; 1913 unsigned char *dstend; 1914 int unconv_num; 1915 1916 if (from == NULL || *from == NULL) 1917 return 0; 1918 1919 preferred_charsets = (Utf8Conv *) conv->state; 1920 src = (unsigned char const *) *from; 1921 srcend = src + *from_left; 1922 dst = (unsigned char *) *to; 1923 dstend = dst + *to_left; 1924 unconv_num = 0; 1925 1926 while (src < srcend && dst < dstend) { 1927 Utf8Conv chosen_charset = NULL; 1928 XlcSide chosen_side = XlcNONE; 1929 wchar_t wc; 1930 int consumed; 1931 int count; 1932 1933 /* Uses stdc iconv to convert multibyte -> widechar */ 1934 1935 consumed = mbtowc(&wc, (const char *)src, srcend-src); 1936 if (consumed == 0) 1937 break; 1938 if (consumed == -1) { 1939 src++; 1940 unconv_num++; 1941 continue; 1942 } 1943 1944 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 1945 1946 if (count == RET_TOOSMALL) 1947 break; 1948 if (count == RET_ILSEQ) { 1949 src += consumed; 1950 unconv_num++; 1951 continue; 1952 } 1953 1954 if (last_charset == NULL) { 1955 last_charset = 1956 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1957 if (last_charset == NULL) { 1958 src += consumed; 1959 unconv_num++; 1960 continue; 1961 } 1962 } else { 1963 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 1964 && (last_charset->side == XlcGLGR 1965 || last_charset->side == chosen_side))) 1966 break; 1967 } 1968 src += consumed; 1969 dst += count; 1970 } 1971 1972 if (last_charset == NULL) 1973 return -1; 1974 1975 *from = (XPointer) src; 1976 *from_left = srcend - src; 1977 *to = (XPointer) dst; 1978 *to_left = dstend - dst; 1979 1980 if (num_args >= 1) 1981 *((XlcCharSet *)args[0]) = last_charset; 1982 1983 return unconv_num; 1984} 1985 1986static XlcConvMethodsRec iconv_mbstocs_methods = { 1987 close_tocs_converter, 1988 iconv_mbstocs, 1989 NULL 1990}; 1991 1992static XlcConv 1993open_iconv_mbstocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 1994{ 1995 return create_tocs_conv(from_lcd, &iconv_mbstocs_methods); 1996} 1997 1998/* from XlcNMultiByte to XlcNChar */ 1999 2000static int 2001iconv_mbtocs(XlcConv conv, XPointer *from, int *from_left, 2002 XPointer *to, int *to_left, XPointer *args, int num_args) 2003{ 2004 Utf8Conv *preferred_charsets; 2005 XlcCharSet last_charset = NULL; 2006 unsigned char const *src; 2007 unsigned char const *srcend; 2008 unsigned char *dst; 2009 unsigned char *dstend; 2010 int unconv_num; 2011 2012 if (from == NULL || *from == NULL) 2013 return 0; 2014 2015 preferred_charsets = (Utf8Conv *) conv->state; 2016 src = (unsigned char const *) *from; 2017 srcend = src + *from_left; 2018 dst = (unsigned char *) *to; 2019 dstend = dst + *to_left; 2020 unconv_num = 0; 2021 2022 while (src < srcend && dst < dstend) { 2023 Utf8Conv chosen_charset = NULL; 2024 XlcSide chosen_side = XlcNONE; 2025 wchar_t wc; 2026 int consumed; 2027 int count; 2028 2029 /* Uses stdc iconv to convert multibyte -> widechar */ 2030 2031 consumed = mbtowc(&wc, (const char *)src, srcend-src); 2032 if (consumed == 0) 2033 break; 2034 if (consumed == -1) { 2035 src++; 2036 unconv_num++; 2037 continue; 2038 } 2039 2040 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 2041 if (count == RET_TOOSMALL) 2042 break; 2043 if (count == RET_ILSEQ) { 2044 src += consumed; 2045 unconv_num++; 2046 continue; 2047 } 2048 2049 if (last_charset == NULL) { 2050 last_charset = 2051 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 2052 if (last_charset == NULL) { 2053 src += consumed; 2054 unconv_num++; 2055 continue; 2056 } 2057 } else { 2058 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 2059 && (last_charset->side == XlcGLGR 2060 || last_charset->side == chosen_side))) 2061 break; 2062 } 2063 src += consumed; 2064 dst += count; 2065 } 2066 2067 if (last_charset == NULL) 2068 return -1; 2069 2070 *from = (XPointer) src; 2071 *from_left = srcend - src; 2072 *to = (XPointer) dst; 2073 *to_left = dstend - dst; 2074 2075 if (num_args >= 1) 2076 *((XlcCharSet *)args[0]) = last_charset; 2077 2078 return unconv_num; 2079} 2080 2081static XlcConvMethodsRec iconv_mbtocs_methods = { 2082 close_tocs_converter, 2083 iconv_mbtocs, 2084 NULL 2085}; 2086 2087static XlcConv 2088open_iconv_mbtocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 2089{ 2090 return create_tocs_conv(from_lcd, &iconv_mbtocs_methods ); 2091} 2092 2093/* from XlcNMultiByte to XlcNString */ 2094 2095static int 2096iconv_mbstostr(XlcConv conv, XPointer *from, int *from_left, 2097 XPointer *to, int *to_left, XPointer *args, int num_args) 2098{ 2099 unsigned char const *src; 2100 unsigned char const *srcend; 2101 unsigned char *dst; 2102 unsigned char *dstend; 2103 int unconv_num; 2104 2105 if (from == NULL || *from == NULL) 2106 return 0; 2107 2108 src = (unsigned char const *) *from; 2109 srcend = src + *from_left; 2110 dst = (unsigned char *) *to; 2111 dstend = dst + *to_left; 2112 unconv_num = 0; 2113 2114 while (src < srcend) { 2115 unsigned char c; 2116 wchar_t wc; 2117 int consumed; 2118 2119 /* Uses stdc iconv to convert multibyte -> widechar */ 2120 2121 consumed = mbtowc(&wc, (const char *)src, srcend-src); 2122 if (consumed == 0) 2123 break; 2124 if (dst == dstend) 2125 break; 2126 if (consumed == -1) { 2127 consumed = 1; 2128 c = BAD_CHAR; 2129 unconv_num++; 2130 } else { 2131 if ((wc & ~(wchar_t)0xff) != 0) { 2132 c = BAD_CHAR; 2133 unconv_num++; 2134 } else 2135 c = (unsigned char) wc; 2136 } 2137 *dst++ = c; 2138 src += consumed; 2139 } 2140 2141 *from = (XPointer) src; 2142 *from_left = srcend - src; 2143 *to = (XPointer) dst; 2144 *to_left = dstend - dst; 2145 2146 return unconv_num; 2147} 2148 2149static XlcConvMethodsRec iconv_mbstostr_methods = { 2150 close_converter, 2151 iconv_mbstostr, 2152 NULL 2153}; 2154 2155static XlcConv 2156open_iconv_mbstostr(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 2157{ 2158 return create_conv(from_lcd, &iconv_mbstostr_methods); 2159} 2160 2161/* from XlcNString to XlcNMultiByte */ 2162static int 2163iconv_strtombs(XlcConv conv, XPointer *from, int *from_left, 2164 XPointer *to, int *to_left, XPointer *args, int num_args) 2165{ 2166 unsigned char const *src; 2167 unsigned char const *srcend; 2168 unsigned char *dst; 2169 unsigned char *dstend; 2170 2171 if (from == NULL || *from == NULL) 2172 return 0; 2173 2174 src = (unsigned char const *) *from; 2175 srcend = src + *from_left; 2176 dst = (unsigned char *) *to; 2177 dstend = dst + *to_left; 2178 2179 while (src < srcend) { 2180 int count = wctomb((char *)dst, *src); 2181 if (count < 0) 2182 break; 2183 dst += count; 2184 src++; 2185 } 2186 2187 *from = (XPointer) src; 2188 *from_left = srcend - src; 2189 *to = (XPointer) dst; 2190 *to_left = dstend - dst; 2191 2192 return 0; 2193} 2194 2195static XlcConvMethodsRec iconv_strtombs_methods= { 2196 close_converter, 2197 iconv_strtombs, 2198 NULL 2199}; 2200 2201static XlcConv 2202open_iconv_strtombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 2203{ 2204 return create_conv(from_lcd, &iconv_strtombs_methods); 2205} 2206 2207/***************************************************************************/ 2208/* Part II: An iconv locale loader. 2209 * 2210 *Here we can assume that "multi-byte" is iconv and that `wchar_t' is Unicode. 2211 */ 2212 2213/* from XlcNMultiByte to XlcNWideChar */ 2214static int 2215iconv_mbstowcs(XlcConv conv, XPointer *from, int *from_left, 2216 XPointer *to, int *to_left, XPointer *args, int num_args) 2217{ 2218 char *src = *((char **) from); 2219 wchar_t *dst = *((wchar_t **) to); 2220 int src_left = *from_left; 2221 int dst_left = *to_left; 2222 int length, unconv_num = 0; 2223 2224 while (src_left > 0 && dst_left > 0) { 2225 length = mbtowc(dst, src, src_left); 2226 2227 if (length > 0) { 2228 src += length; 2229 src_left -= length; 2230 if (dst) 2231 dst++; 2232 dst_left--; 2233 } else if (length < 0) { 2234 src++; 2235 src_left--; 2236 unconv_num++; 2237 } else { 2238 /* null ? */ 2239 src++; 2240 src_left--; 2241 if (dst) 2242 *dst++ = L'\0'; 2243 dst_left--; 2244 } 2245 } 2246 2247 *from = (XPointer) src; 2248 if (dst) 2249 *to = (XPointer) dst; 2250 *from_left = src_left; 2251 *to_left = dst_left; 2252 2253 return unconv_num; 2254} 2255 2256static XlcConvMethodsRec iconv_mbstowcs_methods = { 2257 close_converter, 2258 iconv_mbstowcs, 2259 NULL 2260} ; 2261 2262static XlcConv 2263open_iconv_mbstowcs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 2264{ 2265 return create_conv(from_lcd, &iconv_mbstowcs_methods); 2266} 2267 2268static int 2269iconv_wcstombs(XlcConv conv, XPointer *from, int *from_left, 2270 XPointer *to, int *to_left, XPointer *args, int num_args) 2271{ 2272 wchar_t *src = *((wchar_t **) from); 2273 char *dst = *((char **) to); 2274 int src_left = *from_left; 2275 int dst_left = *to_left; 2276 int length, unconv_num = 0; 2277 2278 while (src_left > 0 && dst_left >= MB_CUR_MAX) { 2279 length = wctomb(dst, *src); /* XXX */ 2280 2281 if (length > 0) { 2282 src++; 2283 src_left--; 2284 if (dst) 2285 dst += length; 2286 dst_left -= length; 2287 } else if (length < 0) { 2288 src++; 2289 src_left--; 2290 unconv_num++; 2291 } 2292 } 2293 2294 *from = (XPointer) src; 2295 if (dst) 2296 *to = (XPointer) dst; 2297 *from_left = src_left; 2298 *to_left = dst_left; 2299 2300 return unconv_num; 2301} 2302 2303static XlcConvMethodsRec iconv_wcstombs_methods = { 2304 close_converter, 2305 iconv_wcstombs, 2306 NULL 2307} ; 2308 2309static XlcConv 2310open_iconv_wcstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 2311{ 2312 return create_conv(from_lcd, &iconv_wcstombs_methods); 2313} 2314 2315static XlcConv 2316open_iconv_mbstofcs( 2317 XLCd from_lcd, 2318 const char *from_type, 2319 XLCd to_lcd, 2320 const char *to_type) 2321{ 2322 return create_tofontcs_conv(from_lcd, &iconv_mbstocs_methods); 2323} 2324 2325/* Registers UTF-8 converters for a UTF-8 locale. */ 2326 2327void 2328_XlcAddUtf8LocaleConverters( 2329 XLCd lcd) 2330{ 2331 /* Register elementary converters. */ 2332 2333 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_utf8towcs); 2334 2335 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_wcstoutf8); 2336 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr); 2337 2338 _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs); 2339 2340 /* Register converters for XlcNCharSet. This implicitly provides 2341 * converters from and to XlcNCompoundText. */ 2342 2343 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_cstoutf8); 2344 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_utf8tocs); 2345 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_utf8tocs1); 2346 2347 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs); 2348 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs); 2349 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1); 2350 2351 _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_strtoutf8); 2352 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_utf8tostr); 2353 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNMultiByte, open_identity); 2354 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNUtf8String, open_identity); 2355 2356 /* Register converters for XlcNFontCharSet */ 2357 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_utf8tofcs); 2358 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs); 2359} 2360 2361void 2362_XlcAddGB18030LocaleConverters( 2363 XLCd lcd) 2364{ 2365 2366 /* Register elementary converters. */ 2367 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_iconv_mbstowcs); 2368 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_iconv_wcstombs); 2369 2370 /* Register converters for XlcNCharSet. This implicitly provides 2371 * converters from and to XlcNCompoundText. */ 2372 2373 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_iconv_cstombs); 2374 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_iconv_mbstocs); 2375 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_iconv_mbtocs); 2376 _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_iconv_strtombs); 2377 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_iconv_mbstostr); 2378 2379 /* Register converters for XlcNFontCharSet */ 2380 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_iconv_mbstofcs); 2381 2382 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr); 2383 _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs); 2384 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs); 2385 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs); 2386 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1); 2387 2388 /* Register converters for XlcNFontCharSet */ 2389 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs); 2390} 2391