lcUTF8.c revision e9628295
1/****************************************************************** 2 3 Copyright 1993 by SunSoft, Inc. 4 Copyright 1999-2000 by Bruno Haible 5 6Permission to use, copy, modify, distribute, and sell this software 7and its documentation for any purpose is hereby granted without fee, 8provided that the above copyright notice appear in all copies and 9that both that copyright notice and this permission notice appear 10in supporting documentation, and that the names of SunSoft, Inc. and 11Bruno Haible not be used in advertising or publicity pertaining to 12distribution of the software without specific, written prior 13permission. SunSoft, Inc. and Bruno Haible make no representations 14about the suitability of this software for any purpose. It is 15provided "as is" without express or implied warranty. 16 17SunSoft Inc. AND Bruno Haible DISCLAIM ALL WARRANTIES WITH REGARD 18TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 19AND FITNESS, IN NO EVENT SHALL SunSoft, Inc. OR Bruno Haible BE LIABLE 20FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 21WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 22ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 23OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 24 25******************************************************************/ 26 27/* 28 * This file contains: 29 * 30 * I. Conversion routines CompoundText/CharSet <--> Unicode/UTF-8. 31 * 32 * Used for three purposes: 33 * 1. The UTF-8 locales, see below. 34 * 2. Unicode aware applications for which the use of 8-bit character 35 * sets is an anachronism. 36 * 3. For conversion from keysym to locale encoding. 37 * 38 * II. Conversion files for an UTF-8 locale loader. 39 * Supports: all locales with codeset UTF-8. 40 * How: Provides converters for UTF-8. 41 * Platforms: all systems. 42 * 43 * The loader itself is located in lcUTF8.c. 44 */ 45 46/* 47 * The conversion from UTF-8 to CompoundText is realized in a very 48 * conservative way. Recall that CompoundText data is used for inter-client 49 * communication purposes. We distinguish three classes of clients: 50 * - Clients which accept only those pieces of CompoundText which belong to 51 * the character set understood by the current locale. 52 * (Example: clients which are linked to an older X11 library.) 53 * - Clients which accept CompoundText with multiple character sets and parse 54 * it themselves. 55 * (Example: emacs, xemacs.) 56 * - Clients which rely entirely on the X{mb,wc}TextPropertyToTextList 57 * functions for the conversion of CompoundText to their current locale's 58 * multi-byte/wide-character format. 59 * For best interoperation, the UTF-8 to CompoundText conversion proceeds as 60 * follows. For every character, it first tests whether the character is 61 * representable in the current locale's original (non-UTF-8) character set. 62 * If not, it goes through the list of predefined character sets for 63 * CompoundText and tests if the character is representable in that character 64 * set. If so, it encodes the character using its code within that character 65 * set. If not, it uses an UTF-8-in-CompoundText encapsulation. Since 66 * clients of the first and second kind ignore such encapsulated text, 67 * this encapsulation is kept to a minimum and terminated as early as possible. 68 * 69 * In a distant future, when clients of the first and second kind will have 70 * disappeared, we will be able to stuff UTF-8 data directly in CompoundText 71 * without first going through the list of predefined character sets. 72 */ 73 74#ifdef HAVE_CONFIG_H 75#include <config.h> 76#endif 77#include <stdio.h> 78#include "Xlibint.h" 79#include "XlcPubI.h" 80#include "XlcGeneric.h" 81 82static XlcConv 83create_conv( 84 XLCd lcd, 85 XlcConvMethods methods) 86{ 87 XlcConv conv; 88 89 conv = Xmalloc(sizeof(XlcConvRec)); 90 if (conv == (XlcConv) NULL) 91 return (XlcConv) NULL; 92 93 conv->methods = methods; 94 conv->state = NULL; 95 96 return conv; 97} 98 99static void 100close_converter( 101 XlcConv conv) 102{ 103 Xfree(conv); 104} 105 106/* Replacement character for invalid multibyte sequence or wide character. */ 107#define BAD_WCHAR ((ucs4_t) 0xfffd) 108#define BAD_CHAR '?' 109 110/***************************************************************************/ 111/* Part I: Conversion routines CompoundText/CharSet <--> Unicode/UTF-8. 112 * 113 * Note that this code works in any locale. We store Unicode values in 114 * `ucs4_t' variables, but don't pass them to the user. 115 * 116 * This code has to support all character sets that are used for CompoundText, 117 * nothing more, nothing less. See the table in lcCT.c. 118 * Since the conversion _to_ CompoundText is likely to need the tables for all 119 * character sets at once, we don't use dynamic loading (of tables or shared 120 * libraries through iconv()). Use a fixed set of tables instead. 121 * 122 * We use statically computed tables, not dynamically allocated arrays, 123 * because it's more memory efficient: Different processes using the same 124 * libX11 shared library share the "text" and read-only "data" sections. 125 */ 126 127typedef unsigned int ucs4_t; 128#define conv_t XlcConv 129 130typedef struct _Utf8ConvRec { 131 const char *name; 132 XrmQuark xrm_name; 133 int (* cstowc) (XlcConv, ucs4_t *, unsigned char const *, int); 134 int (* wctocs) (XlcConv, unsigned char *, ucs4_t, int); 135} Utf8ConvRec, *Utf8Conv; 136 137/* 138 * int xxx_cstowc (XlcConv conv, ucs4_t *pwc, unsigned char const *s, int n) 139 * converts the byte sequence starting at s to a wide character. Up to n bytes 140 * are available at s. n is >= 1. 141 * Result is number of bytes consumed (if a wide character was read), 142 * or 0 if invalid, or -1 if n too small. 143 * 144 * int xxx_wctocs (XlcConv conv, unsigned char *r, ucs4_t wc, int n) 145 * converts the wide character wc to the character set xxx, and stores the 146 * result beginning at r. Up to n bytes may be written at r. n is >= 1. 147 * Result is number of bytes written, or 0 if invalid, or -1 if n too small. 148 */ 149 150/* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */ 151#define RET_ILSEQ 0 152/* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */ 153#define RET_TOOFEW(n) (-1-(n)) 154/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */ 155#define RET_TOOSMALL -1 156 157/* 158 * The tables below are bijective. It would be possible to extend the 159 * xxx_wctocs tables to do some transliteration (e.g. U+201C,U+201D -> 0x22) 160 * but *only* with characters not contained in any other table, and *only* 161 * when the current locale is not an UTF-8 locale. 162 */ 163 164#include "lcUniConv/utf8.h" 165#include "lcUniConv/ucs2be.h" 166#ifdef notused 167#include "lcUniConv/ascii.h" 168#endif 169#include "lcUniConv/iso8859_1.h" 170#include "lcUniConv/iso8859_2.h" 171#include "lcUniConv/iso8859_3.h" 172#include "lcUniConv/iso8859_4.h" 173#include "lcUniConv/iso8859_5.h" 174#include "lcUniConv/iso8859_6.h" 175#include "lcUniConv/iso8859_7.h" 176#include "lcUniConv/iso8859_8.h" 177#include "lcUniConv/iso8859_9.h" 178#include "lcUniConv/iso8859_10.h" 179#include "lcUniConv/iso8859_11.h" 180#include "lcUniConv/iso8859_13.h" 181#include "lcUniConv/iso8859_14.h" 182#include "lcUniConv/iso8859_15.h" 183#include "lcUniConv/iso8859_16.h" 184#include "lcUniConv/iso8859_9e.h" 185#include "lcUniConv/jisx0201.h" 186#include "lcUniConv/tis620.h" 187#include "lcUniConv/koi8_r.h" 188#include "lcUniConv/koi8_u.h" 189#include "lcUniConv/koi8_c.h" 190#include "lcUniConv/armscii_8.h" 191#include "lcUniConv/cp1133.h" 192#include "lcUniConv/mulelao.h" 193#include "lcUniConv/viscii.h" 194#include "lcUniConv/tcvn.h" 195#include "lcUniConv/georgian_academy.h" 196#include "lcUniConv/georgian_ps.h" 197#include "lcUniConv/cp1251.h" 198#include "lcUniConv/cp1255.h" 199#include "lcUniConv/cp1256.h" 200#include "lcUniConv/tatar_cyr.h" 201 202typedef struct { 203 unsigned short indx; /* index into big table */ 204 unsigned short used; /* bitmask of used entries */ 205} Summary16; 206 207#include "lcUniConv/gb2312.h" 208#include "lcUniConv/jisx0208.h" 209#include "lcUniConv/jisx0212.h" 210#include "lcUniConv/ksc5601.h" 211#include "lcUniConv/big5.h" 212#include "lcUniConv/big5_emacs.h" 213#include "lcUniConv/big5hkscs.h" 214#include "lcUniConv/gbk.h" 215 216static Utf8ConvRec all_charsets[] = { 217 /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning 218 (for lookup speed), once at the end (as a fallback). */ 219 { "ISO10646-1", NULLQUARK, 220 utf8_mbtowc, utf8_wctomb 221 }, 222 223 { "ISO8859-1", NULLQUARK, 224 iso8859_1_mbtowc, iso8859_1_wctomb 225 }, 226 { "ISO8859-2", NULLQUARK, 227 iso8859_2_mbtowc, iso8859_2_wctomb 228 }, 229 { "ISO8859-3", NULLQUARK, 230 iso8859_3_mbtowc, iso8859_3_wctomb 231 }, 232 { "ISO8859-4", NULLQUARK, 233 iso8859_4_mbtowc, iso8859_4_wctomb 234 }, 235 { "ISO8859-5", NULLQUARK, 236 iso8859_5_mbtowc, iso8859_5_wctomb 237 }, 238 { "ISO8859-6", NULLQUARK, 239 iso8859_6_mbtowc, iso8859_6_wctomb 240 }, 241 { "ISO8859-7", NULLQUARK, 242 iso8859_7_mbtowc, iso8859_7_wctomb 243 }, 244 { "ISO8859-8", NULLQUARK, 245 iso8859_8_mbtowc, iso8859_8_wctomb 246 }, 247 { "ISO8859-9", NULLQUARK, 248 iso8859_9_mbtowc, iso8859_9_wctomb 249 }, 250 { "ISO8859-10", NULLQUARK, 251 iso8859_10_mbtowc, iso8859_10_wctomb 252 }, 253 { "ISO8859-11", NULLQUARK, 254 iso8859_11_mbtowc, iso8859_11_wctomb 255 }, 256 { "ISO8859-13", NULLQUARK, 257 iso8859_13_mbtowc, iso8859_13_wctomb 258 }, 259 { "ISO8859-14", NULLQUARK, 260 iso8859_14_mbtowc, iso8859_14_wctomb 261 }, 262 { "ISO8859-15", NULLQUARK, 263 iso8859_15_mbtowc, iso8859_15_wctomb 264 }, 265 { "ISO8859-16", NULLQUARK, 266 iso8859_16_mbtowc, iso8859_16_wctomb 267 }, 268 { "JISX0201.1976-0", NULLQUARK, 269 jisx0201_mbtowc, jisx0201_wctomb 270 }, 271 { "TIS620-0", NULLQUARK, 272 tis620_mbtowc, tis620_wctomb 273 }, 274 { "GB2312.1980-0", NULLQUARK, 275 gb2312_mbtowc, gb2312_wctomb 276 }, 277 { "JISX0208.1983-0", NULLQUARK, 278 jisx0208_mbtowc, jisx0208_wctomb 279 }, 280 { "JISX0208.1990-0", NULLQUARK, 281 jisx0208_mbtowc, jisx0208_wctomb 282 }, 283 { "JISX0212.1990-0", NULLQUARK, 284 jisx0212_mbtowc, jisx0212_wctomb 285 }, 286 { "KSC5601.1987-0", NULLQUARK, 287 ksc5601_mbtowc, ksc5601_wctomb 288 }, 289 { "KOI8-R", NULLQUARK, 290 koi8_r_mbtowc, koi8_r_wctomb 291 }, 292 { "KOI8-U", NULLQUARK, 293 koi8_u_mbtowc, koi8_u_wctomb 294 }, 295 { "KOI8-C", NULLQUARK, 296 koi8_c_mbtowc, koi8_c_wctomb 297 }, 298 { "TATAR-CYR", NULLQUARK, 299 tatar_cyr_mbtowc, tatar_cyr_wctomb 300 }, 301 { "ARMSCII-8", NULLQUARK, 302 armscii_8_mbtowc, armscii_8_wctomb 303 }, 304 { "IBM-CP1133", NULLQUARK, 305 cp1133_mbtowc, cp1133_wctomb 306 }, 307 { "MULELAO-1", NULLQUARK, 308 mulelao_mbtowc, mulelao_wctomb 309 }, 310 { "VISCII1.1-1", NULLQUARK, 311 viscii_mbtowc, viscii_wctomb 312 }, 313 { "TCVN-5712", NULLQUARK, 314 tcvn_mbtowc, tcvn_wctomb 315 }, 316 { "GEORGIAN-ACADEMY", NULLQUARK, 317 georgian_academy_mbtowc, georgian_academy_wctomb 318 }, 319 { "GEORGIAN-PS", NULLQUARK, 320 georgian_ps_mbtowc, georgian_ps_wctomb 321 }, 322 { "ISO8859-9E", NULLQUARK, 323 iso8859_9e_mbtowc, iso8859_9e_wctomb 324 }, 325 { "MICROSOFT-CP1251", NULLQUARK, 326 cp1251_mbtowc, cp1251_wctomb 327 }, 328 { "MICROSOFT-CP1255", NULLQUARK, 329 cp1255_mbtowc, cp1255_wctomb 330 }, 331 { "MICROSOFT-CP1256", NULLQUARK, 332 cp1256_mbtowc, cp1256_wctomb 333 }, 334 { "BIG5-0", NULLQUARK, 335 big5_mbtowc, big5_wctomb 336 }, 337 { "BIG5-E0", NULLQUARK, 338 big5_0_mbtowc, big5_0_wctomb 339 }, 340 { "BIG5-E1", NULLQUARK, 341 big5_1_mbtowc, big5_1_wctomb 342 }, 343 { "GBK-0", NULLQUARK, 344 gbk_mbtowc, gbk_wctomb 345 }, 346 { "BIG5HKSCS-0", NULLQUARK, 347 big5hkscs_mbtowc, big5hkscs_wctomb 348 }, 349 350 /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning 351 (for lookup speed), once at the end (as a fallback). */ 352 { "ISO10646-1", NULLQUARK, 353 utf8_mbtowc, utf8_wctomb 354 }, 355 356 /* Encoding ISO10646-1 for fonts means UCS2-like encoding 357 so for conversion to FontCharSet we need this record */ 358 { "ISO10646-1", NULLQUARK, 359 ucs2be_mbtowc, ucs2be_wctomb 360 } 361}; 362 363#define charsets_table_size (sizeof(all_charsets)/sizeof(all_charsets[0])) 364#define all_charsets_count (charsets_table_size - 1) 365#define ucs2_conv_index (charsets_table_size - 1) 366 367static void 368init_all_charsets (void) 369{ 370 Utf8Conv convptr; 371 int i; 372 373 for (convptr = all_charsets, i = charsets_table_size; i > 0; convptr++, i--) 374 convptr->xrm_name = XrmStringToQuark(convptr->name); 375} 376 377#define lazy_init_all_charsets() \ 378 do { \ 379 if (all_charsets[0].xrm_name == NULLQUARK) \ 380 init_all_charsets(); \ 381 } while (0) 382 383/* from XlcNCharSet to XlcNUtf8String */ 384 385static int 386cstoutf8( 387 XlcConv conv, 388 XPointer *from, 389 int *from_left, 390 XPointer *to, 391 int *to_left, 392 XPointer *args, 393 int num_args) 394{ 395 XlcCharSet charset; 396 const char *name; 397 Utf8Conv convptr; 398 int i; 399 unsigned char const *src; 400 unsigned char const *srcend; 401 unsigned char *dst; 402 unsigned char *dstend; 403 int unconv_num; 404 405 if (from == NULL || *from == NULL) 406 return 0; 407 408 if (num_args < 1) 409 return -1; 410 411 charset = (XlcCharSet) args[0]; 412 name = charset->encoding_name; 413 /* not charset->name because the latter has a ":GL"/":GR" suffix */ 414 415 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 416 if (!strcmp(convptr->name, name)) 417 break; 418 if (i == 0) 419 return -1; 420 421 src = (unsigned char const *) *from; 422 srcend = src + *from_left; 423 dst = (unsigned char *) *to; 424 dstend = dst + *to_left; 425 unconv_num = 0; 426 427 while (src < srcend) { 428 ucs4_t wc; 429 int consumed; 430 int count; 431 432 consumed = convptr->cstowc(conv, &wc, src, srcend-src); 433 if (consumed == RET_ILSEQ) 434 return -1; 435 if (consumed == RET_TOOFEW(0)) 436 break; 437 438 count = utf8_wctomb(NULL, dst, wc, dstend-dst); 439 if (count == RET_TOOSMALL) 440 break; 441 if (count == RET_ILSEQ) { 442 count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst); 443 if (count == RET_TOOSMALL) 444 break; 445 unconv_num++; 446 } 447 src += consumed; 448 dst += count; 449 } 450 451 *from = (XPointer) src; 452 *from_left = srcend - src; 453 *to = (XPointer) dst; 454 *to_left = dstend - dst; 455 456 return unconv_num; 457} 458 459static XlcConvMethodsRec methods_cstoutf8 = { 460 close_converter, 461 cstoutf8, 462 NULL 463}; 464 465static XlcConv 466open_cstoutf8( 467 XLCd from_lcd, 468 const char *from_type, 469 XLCd to_lcd, 470 const char *to_type) 471{ 472 lazy_init_all_charsets(); 473 return create_conv(from_lcd, &methods_cstoutf8); 474} 475 476/* from XlcNUtf8String to XlcNCharSet */ 477 478static XlcConv 479create_tocs_conv( 480 XLCd lcd, 481 XlcConvMethods methods) 482{ 483 XlcConv conv; 484 CodeSet *codeset_list; 485 int codeset_num; 486 int charset_num; 487 int i, j, k; 488 Utf8Conv *preferred; 489 490 lazy_init_all_charsets(); 491 492 codeset_list = XLC_GENERIC(lcd, codeset_list); 493 codeset_num = XLC_GENERIC(lcd, codeset_num); 494 495 charset_num = 0; 496 for (i = 0; i < codeset_num; i++) 497 charset_num += codeset_list[i]->num_charsets; 498 if (charset_num > all_charsets_count-1) 499 charset_num = all_charsets_count-1; 500 501 conv = Xmalloc(sizeof(XlcConvRec) 502 + (charset_num + 1) * sizeof(Utf8Conv)); 503 if (conv == (XlcConv) NULL) 504 return (XlcConv) NULL; 505 preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec)); 506 507 /* Loop through all codesets mentioned in the locale. */ 508 charset_num = 0; 509 for (i = 0; i < codeset_num; i++) { 510 XlcCharSet *charsets = codeset_list[i]->charset_list; 511 int num_charsets = codeset_list[i]->num_charsets; 512 for (j = 0; j < num_charsets; j++) { 513 const char *name = charsets[j]->encoding_name; 514 /* If it wasn't already encountered... */ 515 for (k = charset_num-1; k >= 0; k--) 516 if (!strcmp(preferred[k]->name, name)) 517 break; 518 if (k < 0) { 519 /* Look it up in all_charsets[]. */ 520 for (k = 0; k < all_charsets_count-1; k++) 521 if (!strcmp(all_charsets[k].name, name)) { 522 /* Add it to the preferred set. */ 523 preferred[charset_num++] = &all_charsets[k]; 524 break; 525 } 526 } 527 } 528 } 529 preferred[charset_num] = (Utf8Conv) NULL; 530 531 conv->methods = methods; 532 conv->state = (XPointer) preferred; 533 534 return conv; 535} 536 537static void 538close_tocs_converter( 539 XlcConv conv) 540{ 541 /* conv->state is allocated together with conv, free both at once. */ 542 Xfree(conv); 543} 544 545/* 546 * Converts a Unicode character to an appropriate character set. The NULL 547 * terminated array of preferred character sets is passed as first argument. 548 * If successful, *charsetp is set to the character set that was used, and 549 * *sidep is set to the character set side (XlcGL or XlcGR). 550 */ 551static int 552charset_wctocs( 553 Utf8Conv *preferred, 554 Utf8Conv *charsetp, 555 XlcSide *sidep, 556 XlcConv conv, 557 unsigned char *r, 558 ucs4_t wc, 559 int n) 560{ 561 int count; 562 Utf8Conv convptr; 563 int i; 564 565 for (; *preferred != (Utf8Conv) NULL; preferred++) { 566 convptr = *preferred; 567 count = convptr->wctocs(conv, r, wc, n); 568 if (count == RET_TOOSMALL) 569 return RET_TOOSMALL; 570 if (count != RET_ILSEQ) { 571 *charsetp = convptr; 572 *sidep = (*r < 0x80 ? XlcGL : XlcGR); 573 return count; 574 } 575 } 576 for (convptr = all_charsets+1, i = all_charsets_count-1; i > 0; convptr++, i--) { 577 count = convptr->wctocs(conv, r, wc, n); 578 if (count == RET_TOOSMALL) 579 return RET_TOOSMALL; 580 if (count != RET_ILSEQ) { 581 *charsetp = convptr; 582 *sidep = (*r < 0x80 ? XlcGL : XlcGR); 583 return count; 584 } 585 } 586 return RET_ILSEQ; 587} 588 589static int 590utf8tocs( 591 XlcConv conv, 592 XPointer *from, 593 int *from_left, 594 XPointer *to, 595 int *to_left, 596 XPointer *args, 597 int num_args) 598{ 599 Utf8Conv *preferred_charsets; 600 XlcCharSet last_charset = NULL; 601 unsigned char const *src; 602 unsigned char const *srcend; 603 unsigned char *dst; 604 unsigned char *dstend; 605 int unconv_num; 606 607 if (from == NULL || *from == NULL) 608 return 0; 609 610 preferred_charsets = (Utf8Conv *) conv->state; 611 src = (unsigned char const *) *from; 612 srcend = src + *from_left; 613 dst = (unsigned char *) *to; 614 dstend = dst + *to_left; 615 unconv_num = 0; 616 617 while (src < srcend && dst < dstend) { 618 Utf8Conv chosen_charset = NULL; 619 XlcSide chosen_side = XlcNONE; 620 ucs4_t wc; 621 int consumed; 622 int count; 623 624 consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 625 if (consumed == RET_TOOFEW(0)) 626 break; 627 if (consumed == RET_ILSEQ) { 628 src++; 629 unconv_num++; 630 continue; 631 } 632 633 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 634 if (count == RET_TOOSMALL) 635 break; 636 if (count == RET_ILSEQ) { 637 src += consumed; 638 unconv_num++; 639 continue; 640 } 641 642 if (last_charset == NULL) { 643 last_charset = 644 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 645 if (last_charset == NULL) { 646 src += consumed; 647 unconv_num++; 648 continue; 649 } 650 } else { 651 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 652 && (last_charset->side == XlcGLGR 653 || last_charset->side == chosen_side))) 654 break; 655 } 656 src += consumed; 657 dst += count; 658 } 659 660 if (last_charset == NULL) 661 return -1; 662 663 *from = (XPointer) src; 664 *from_left = srcend - src; 665 *to = (XPointer) dst; 666 *to_left = dstend - dst; 667 668 if (num_args >= 1) 669 *((XlcCharSet *)args[0]) = last_charset; 670 671 return unconv_num; 672} 673 674static XlcConvMethodsRec methods_utf8tocs = { 675 close_tocs_converter, 676 utf8tocs, 677 NULL 678}; 679 680static XlcConv 681open_utf8tocs( 682 XLCd from_lcd, 683 const char *from_type, 684 XLCd to_lcd, 685 const char *to_type) 686{ 687 return create_tocs_conv(from_lcd, &methods_utf8tocs); 688} 689 690/* from XlcNUtf8String to XlcNChar */ 691 692static int 693utf8tocs1( 694 XlcConv conv, 695 XPointer *from, 696 int *from_left, 697 XPointer *to, 698 int *to_left, 699 XPointer *args, 700 int num_args) 701{ 702 Utf8Conv *preferred_charsets; 703 XlcCharSet last_charset = NULL; 704 unsigned char const *src; 705 unsigned char const *srcend; 706 unsigned char *dst; 707 unsigned char *dstend; 708 int unconv_num; 709 710 if (from == NULL || *from == NULL) 711 return 0; 712 713 preferred_charsets = (Utf8Conv *) conv->state; 714 src = (unsigned char const *) *from; 715 srcend = src + *from_left; 716 dst = (unsigned char *) *to; 717 dstend = dst + *to_left; 718 unconv_num = 0; 719 720 while (src < srcend && dst < dstend) { 721 Utf8Conv chosen_charset = NULL; 722 XlcSide chosen_side = XlcNONE; 723 ucs4_t wc; 724 int consumed; 725 int count; 726 727 consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 728 if (consumed == RET_TOOFEW(0)) 729 break; 730 if (consumed == RET_ILSEQ) { 731 src++; 732 unconv_num++; 733 continue; 734 } 735 736 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 737 if (count == RET_TOOSMALL) 738 break; 739 if (count == RET_ILSEQ) { 740 src += consumed; 741 unconv_num++; 742 continue; 743 } 744 745 last_charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 746 747 if (last_charset == NULL) { 748 src += consumed; 749 unconv_num++; 750 continue; 751 } 752 753 src += consumed; 754 dst += count; 755 break; 756 } 757 758 if (last_charset == NULL) 759 return -1; 760 761 *from = (XPointer) src; 762 *from_left = srcend - src; 763 *to = (XPointer) dst; 764 *to_left = dstend - dst; 765 766 if (num_args >= 1) 767 *((XlcCharSet *)args[0]) = last_charset; 768 769 return unconv_num; 770} 771 772static XlcConvMethodsRec methods_utf8tocs1 = { 773 close_tocs_converter, 774 utf8tocs1, 775 NULL 776}; 777 778static XlcConv 779open_utf8tocs1( 780 XLCd from_lcd, 781 const char *from_type, 782 XLCd to_lcd, 783 const char *to_type) 784{ 785 return create_tocs_conv(from_lcd, &methods_utf8tocs1); 786} 787 788/* from XlcNUtf8String to XlcNString */ 789 790static int 791utf8tostr( 792 XlcConv conv, 793 XPointer *from, 794 int *from_left, 795 XPointer *to, 796 int *to_left, 797 XPointer *args, 798 int num_args) 799{ 800 unsigned char const *src; 801 unsigned char const *srcend; 802 unsigned char *dst; 803 unsigned char *dstend; 804 int unconv_num; 805 806 if (from == NULL || *from == NULL) 807 return 0; 808 809 src = (unsigned char const *) *from; 810 srcend = src + *from_left; 811 dst = (unsigned char *) *to; 812 dstend = dst + *to_left; 813 unconv_num = 0; 814 815 while (src < srcend) { 816 unsigned char c; 817 ucs4_t wc; 818 int consumed; 819 820 consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 821 if (consumed == RET_TOOFEW(0)) 822 break; 823 if (dst == dstend) 824 break; 825 if (consumed == RET_ILSEQ) { 826 consumed = 1; 827 c = BAD_CHAR; 828 unconv_num++; 829 } else { 830 if ((wc & ~(ucs4_t)0xff) != 0) { 831 c = BAD_CHAR; 832 unconv_num++; 833 } else 834 c = (unsigned char) wc; 835 } 836 *dst++ = c; 837 src += consumed; 838 } 839 840 *from = (XPointer) src; 841 *from_left = srcend - src; 842 *to = (XPointer) dst; 843 *to_left = dstend - dst; 844 845 return unconv_num; 846} 847 848static XlcConvMethodsRec methods_utf8tostr = { 849 close_converter, 850 utf8tostr, 851 NULL 852}; 853 854static XlcConv 855open_utf8tostr( 856 XLCd from_lcd, 857 const char *from_type, 858 XLCd to_lcd, 859 const char *to_type) 860{ 861 return create_conv(from_lcd, &methods_utf8tostr); 862} 863 864/* from XlcNString to XlcNUtf8String */ 865 866static int 867strtoutf8( 868 XlcConv conv, 869 XPointer *from, 870 int *from_left, 871 XPointer *to, 872 int *to_left, 873 XPointer *args, 874 int num_args) 875{ 876 unsigned char const *src; 877 unsigned char const *srcend; 878 unsigned char *dst; 879 unsigned char *dstend; 880 881 if (from == NULL || *from == NULL) 882 return 0; 883 884 src = (unsigned char const *) *from; 885 srcend = src + *from_left; 886 dst = (unsigned char *) *to; 887 dstend = dst + *to_left; 888 889 while (src < srcend) { 890 int count = utf8_wctomb(NULL, dst, *src, dstend-dst); 891 if (count == RET_TOOSMALL) 892 break; 893 dst += count; 894 src++; 895 } 896 897 *from = (XPointer) src; 898 *from_left = srcend - src; 899 *to = (XPointer) dst; 900 *to_left = dstend - dst; 901 902 return 0; 903} 904 905static XlcConvMethodsRec methods_strtoutf8 = { 906 close_converter, 907 strtoutf8, 908 NULL 909}; 910 911static XlcConv 912open_strtoutf8( 913 XLCd from_lcd, 914 const char *from_type, 915 XLCd to_lcd, 916 const char *to_type) 917{ 918 return create_conv(from_lcd, &methods_strtoutf8); 919} 920 921/* Support for the input methods. */ 922 923XPointer 924_Utf8GetConvByName( 925 const char *name) 926{ 927 XrmQuark xrm_name; 928 Utf8Conv convptr; 929 int i; 930 931 if (name == NULL) 932 return (XPointer) NULL; 933 934 lazy_init_all_charsets(); 935 xrm_name = XrmStringToQuark(name); 936 937 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 938 if (convptr->xrm_name == xrm_name) 939 return (XPointer) convptr->wctocs; 940 return (XPointer) NULL; 941} 942 943/* from XlcNUcsChar to XlcNChar, needed for input methods */ 944 945static XlcConv 946create_ucstocs_conv( 947 XLCd lcd, 948 XlcConvMethods methods) 949{ 950 951 if (XLC_PUBLIC_PART(lcd)->codeset 952 && _XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "UTF-8") == 0) { 953 XlcConv conv; 954 Utf8Conv *preferred; 955 956 lazy_init_all_charsets(); 957 958 conv = Xmalloc(sizeof(XlcConvRec) + 2 * sizeof(Utf8Conv)); 959 if (conv == (XlcConv) NULL) 960 return (XlcConv) NULL; 961 preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec)); 962 963 preferred[0] = &all_charsets[0]; /* ISO10646 */ 964 preferred[1] = (Utf8Conv) NULL; 965 966 conv->methods = methods; 967 conv->state = (XPointer) preferred; 968 969 return conv; 970 } else { 971 return create_tocs_conv(lcd, methods); 972 } 973} 974 975static int 976charset_wctocs_exactly( 977 Utf8Conv *preferred, 978 Utf8Conv *charsetp, 979 XlcSide *sidep, 980 XlcConv conv, 981 unsigned char *r, 982 ucs4_t wc, 983 int n) 984{ 985 int count; 986 Utf8Conv convptr; 987 988 for (; *preferred != (Utf8Conv) NULL; preferred++) { 989 convptr = *preferred; 990 count = convptr->wctocs(conv, r, wc, n); 991 if (count == RET_TOOSMALL) 992 return RET_TOOSMALL; 993 if (count != RET_ILSEQ) { 994 *charsetp = convptr; 995 *sidep = (*r < 0x80 ? XlcGL : XlcGR); 996 return count; 997 } 998 } 999 return RET_ILSEQ; 1000} 1001 1002static int 1003ucstocs1( 1004 XlcConv conv, 1005 XPointer *from, 1006 int *from_left, 1007 XPointer *to, 1008 int *to_left, 1009 XPointer *args, 1010 int num_args) 1011{ 1012 ucs4_t const *src; 1013 unsigned char *dst = (unsigned char *) *to; 1014 int unconv_num = 0; 1015 Utf8Conv *preferred_charsets = (Utf8Conv *) conv->state; 1016 Utf8Conv chosen_charset = NULL; 1017 XlcSide chosen_side = XlcNONE; 1018 XlcCharSet charset = NULL; 1019 int count; 1020 1021 if (from == NULL || *from == NULL) 1022 return 0; 1023 1024 src = (ucs4_t const *) *from; 1025 1026 count = charset_wctocs_exactly(preferred_charsets, &chosen_charset, 1027 &chosen_side, conv, dst, *src, *to_left); 1028 if (count < 1) { 1029 unconv_num++; 1030 count = 0; 1031 } else { 1032 charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1033 } 1034 if (charset == NULL) 1035 return -1; 1036 1037 *from = (XPointer) ++src; 1038 (*from_left)--; 1039 *to = (XPointer) dst; 1040 *to_left -= count; 1041 1042 if (num_args >= 1) 1043 *((XlcCharSet *)args[0]) = charset; 1044 1045 return unconv_num; 1046} 1047 1048static XlcConvMethodsRec methods_ucstocs1 = { 1049 close_tocs_converter, 1050 ucstocs1, 1051 NULL 1052}; 1053 1054static XlcConv 1055open_ucstocs1( 1056 XLCd from_lcd, 1057 const char *from_type, 1058 XLCd to_lcd, 1059 const char *to_type) 1060{ 1061 return create_ucstocs_conv(from_lcd, &methods_ucstocs1); 1062} 1063 1064/* from XlcNUcsChar to XlcNUtf8String, needed for input methods */ 1065 1066static int 1067ucstoutf8( 1068 XlcConv conv, 1069 XPointer *from, 1070 int *from_left, 1071 XPointer *to, 1072 int *to_left, 1073 XPointer *args, 1074 int num_args) 1075{ 1076 const ucs4_t *src; 1077 const ucs4_t *srcend; 1078 unsigned char *dst; 1079 unsigned char *dstend; 1080 int unconv_num; 1081 1082 if (from == NULL || *from == NULL) 1083 return 0; 1084 1085 src = (const ucs4_t *) *from; 1086 srcend = src + *from_left; 1087 dst = (unsigned char *) *to; 1088 dstend = dst + *to_left; 1089 unconv_num = 0; 1090 1091 while (src < srcend) { 1092 int count = utf8_wctomb(NULL, dst, *src, dstend-dst); 1093 if (count == RET_TOOSMALL) 1094 break; 1095 if (count == RET_ILSEQ) 1096 unconv_num++; 1097 src++; 1098 dst += count; 1099 } 1100 1101 *from = (XPointer) src; 1102 *from_left = srcend - src; 1103 *to = (XPointer) dst; 1104 *to_left = dstend - dst; 1105 1106 return unconv_num; 1107} 1108 1109static XlcConvMethodsRec methods_ucstoutf8 = { 1110 close_converter, 1111 ucstoutf8, 1112 NULL 1113}; 1114 1115static XlcConv 1116open_ucstoutf8( 1117 XLCd from_lcd, 1118 const char *from_type, 1119 XLCd to_lcd, 1120 const char *to_type) 1121{ 1122 return create_conv(from_lcd, &methods_ucstoutf8); 1123} 1124 1125/* Registers UTF-8 converters for a non-UTF-8 locale. */ 1126void 1127_XlcAddUtf8Converters( 1128 XLCd lcd) 1129{ 1130 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNUtf8String, open_cstoutf8); 1131 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNCharSet, open_utf8tocs); 1132 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNChar, open_utf8tocs1); 1133 _XlcSetConverter(lcd, XlcNString, lcd, XlcNUtf8String, open_strtoutf8); 1134 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNString, open_utf8tostr); 1135 _XlcSetConverter(lcd, XlcNUcsChar, lcd, XlcNChar, open_ucstocs1); 1136 _XlcSetConverter(lcd, XlcNUcsChar, lcd, XlcNUtf8String, open_ucstoutf8); 1137} 1138 1139/***************************************************************************/ 1140/* Part II: UTF-8 locale loader conversion files 1141 * 1142 * Here we can assume that "multi-byte" is UTF-8 and that `wchar_t' is Unicode. 1143 */ 1144 1145/* from XlcNMultiByte to XlcNWideChar */ 1146 1147static int 1148utf8towcs( 1149 XlcConv conv, 1150 XPointer *from, 1151 int *from_left, 1152 XPointer *to, 1153 int *to_left, 1154 XPointer *args, 1155 int num_args) 1156{ 1157 unsigned char const *src; 1158 unsigned char const *srcend; 1159 wchar_t *dst; 1160 wchar_t *dstend; 1161 int unconv_num; 1162 1163 if (from == NULL || *from == NULL) 1164 return 0; 1165 1166 src = (unsigned char const *) *from; 1167 srcend = src + *from_left; 1168 dst = (wchar_t *) *to; 1169 dstend = dst + *to_left; 1170 unconv_num = 0; 1171 1172 while (src < srcend && dst < dstend) { 1173 ucs4_t wc; 1174 int consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 1175 if (consumed == RET_TOOFEW(0)) 1176 break; 1177 if (consumed == RET_ILSEQ) { 1178 src++; 1179 *dst = BAD_WCHAR; 1180 unconv_num++; 1181 } else { 1182 src += consumed; 1183 *dst = wc; 1184 } 1185 dst++; 1186 } 1187 1188 *from = (XPointer) src; 1189 *from_left = srcend - src; 1190 *to = (XPointer) dst; 1191 *to_left = dstend - dst; 1192 1193 return unconv_num; 1194} 1195 1196static XlcConvMethodsRec methods_utf8towcs = { 1197 close_converter, 1198 utf8towcs, 1199 NULL 1200}; 1201 1202static XlcConv 1203open_utf8towcs( 1204 XLCd from_lcd, 1205 const char *from_type, 1206 XLCd to_lcd, 1207 const char *to_type) 1208{ 1209 return create_conv(from_lcd, &methods_utf8towcs); 1210} 1211 1212/* from XlcNWideChar to XlcNMultiByte */ 1213 1214static int 1215wcstoutf8( 1216 XlcConv conv, 1217 XPointer *from, 1218 int *from_left, 1219 XPointer *to, 1220 int *to_left, 1221 XPointer *args, 1222 int num_args) 1223{ 1224 wchar_t const *src; 1225 wchar_t const *srcend; 1226 unsigned char *dst; 1227 unsigned char *dstend; 1228 int unconv_num; 1229 1230 if (from == NULL || *from == NULL) 1231 return 0; 1232 1233 src = (wchar_t const *) *from; 1234 srcend = src + *from_left; 1235 dst = (unsigned char *) *to; 1236 dstend = dst + *to_left; 1237 unconv_num = 0; 1238 1239 while (src < srcend) { 1240 int count = utf8_wctomb(NULL, dst, *src, dstend-dst); 1241 if (count == RET_TOOSMALL) 1242 break; 1243 if (count == RET_ILSEQ) { 1244 count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst); 1245 if (count == RET_TOOSMALL) 1246 break; 1247 unconv_num++; 1248 } 1249 dst += count; 1250 src++; 1251 } 1252 1253 *from = (XPointer) src; 1254 *from_left = srcend - src; 1255 *to = (XPointer) dst; 1256 *to_left = dstend - dst; 1257 1258 return unconv_num; 1259} 1260 1261static XlcConvMethodsRec methods_wcstoutf8 = { 1262 close_converter, 1263 wcstoutf8, 1264 NULL 1265}; 1266 1267static XlcConv 1268open_wcstoutf8( 1269 XLCd from_lcd, 1270 const char *from_type, 1271 XLCd to_lcd, 1272 const char *to_type) 1273{ 1274 return create_conv(from_lcd, &methods_wcstoutf8); 1275} 1276 1277/* from XlcNString to XlcNWideChar */ 1278 1279static int 1280our_strtowcs( 1281 XlcConv conv, 1282 XPointer *from, 1283 int *from_left, 1284 XPointer *to, 1285 int *to_left, 1286 XPointer *args, 1287 int num_args) 1288{ 1289 unsigned char const *src; 1290 unsigned char const *srcend; 1291 wchar_t *dst; 1292 wchar_t *dstend; 1293 1294 if (from == NULL || *from == NULL) 1295 return 0; 1296 1297 src = (unsigned char const *) *from; 1298 srcend = src + *from_left; 1299 dst = (wchar_t *) *to; 1300 dstend = dst + *to_left; 1301 1302 while (src < srcend && dst < dstend) 1303 *dst++ = (wchar_t) *src++; 1304 1305 *from = (XPointer) src; 1306 *from_left = srcend - src; 1307 *to = (XPointer) dst; 1308 *to_left = dstend - dst; 1309 1310 return 0; 1311} 1312 1313static XlcConvMethodsRec methods_strtowcs = { 1314 close_converter, 1315 our_strtowcs, 1316 NULL 1317}; 1318 1319static XlcConv 1320open_strtowcs( 1321 XLCd from_lcd, 1322 const char *from_type, 1323 XLCd to_lcd, 1324 const char *to_type) 1325{ 1326 return create_conv(from_lcd, &methods_strtowcs); 1327} 1328 1329/* from XlcNWideChar to XlcNString */ 1330 1331static int 1332our_wcstostr( 1333 XlcConv conv, 1334 XPointer *from, 1335 int *from_left, 1336 XPointer *to, 1337 int *to_left, 1338 XPointer *args, 1339 int num_args) 1340{ 1341 wchar_t const *src; 1342 wchar_t const *srcend; 1343 unsigned char *dst; 1344 unsigned char *dstend; 1345 int unconv_num; 1346 1347 if (from == NULL || *from == NULL) 1348 return 0; 1349 1350 src = (wchar_t const *) *from; 1351 srcend = src + *from_left; 1352 dst = (unsigned char *) *to; 1353 dstend = dst + *to_left; 1354 unconv_num = 0; 1355 1356 while (src < srcend && dst < dstend) { 1357 unsigned int wc = *src++; 1358 if (wc < 0x80) 1359 *dst = wc; 1360 else { 1361 *dst = BAD_CHAR; 1362 unconv_num++; 1363 } 1364 dst++; 1365 } 1366 1367 *from = (XPointer) src; 1368 *from_left = srcend - src; 1369 *to = (XPointer) dst; 1370 *to_left = dstend - dst; 1371 1372 return unconv_num; 1373} 1374 1375static XlcConvMethodsRec methods_wcstostr = { 1376 close_converter, 1377 our_wcstostr, 1378 NULL 1379}; 1380 1381static XlcConv 1382open_wcstostr( 1383 XLCd from_lcd, 1384 const char *from_type, 1385 XLCd to_lcd, 1386 const char *to_type) 1387{ 1388 return create_conv(from_lcd, &methods_wcstostr); 1389} 1390 1391/* from XlcNCharSet to XlcNWideChar */ 1392 1393static int 1394cstowcs( 1395 XlcConv conv, 1396 XPointer *from, 1397 int *from_left, 1398 XPointer *to, 1399 int *to_left, 1400 XPointer *args, 1401 int num_args) 1402{ 1403 XlcCharSet charset; 1404 const char *name; 1405 Utf8Conv convptr; 1406 int i; 1407 unsigned char const *src; 1408 unsigned char const *srcend; 1409 wchar_t *dst; 1410 wchar_t *dstend; 1411 int unconv_num; 1412 1413 if (from == NULL || *from == NULL) 1414 return 0; 1415 1416 if (num_args < 1) 1417 return -1; 1418 1419 charset = (XlcCharSet) args[0]; 1420 name = charset->encoding_name; 1421 /* not charset->name because the latter has a ":GL"/":GR" suffix */ 1422 1423 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 1424 if (!strcmp(convptr->name, name)) 1425 break; 1426 if (i == 0) 1427 return -1; 1428 1429 src = (unsigned char const *) *from; 1430 srcend = src + *from_left; 1431 dst = (wchar_t *) *to; 1432 dstend = dst + *to_left; 1433 unconv_num = 0; 1434 1435 while (src < srcend && dst < dstend) { 1436 unsigned int wc; 1437 int consumed; 1438 1439 consumed = convptr->cstowc(conv, &wc, src, srcend-src); 1440 if (consumed == RET_ILSEQ) 1441 return -1; 1442 if (consumed == RET_TOOFEW(0)) 1443 break; 1444 1445 *dst++ = wc; 1446 src += consumed; 1447 } 1448 1449 *from = (XPointer) src; 1450 *from_left = srcend - src; 1451 *to = (XPointer) dst; 1452 *to_left = dstend - dst; 1453 1454 return unconv_num; 1455} 1456 1457static XlcConvMethodsRec methods_cstowcs = { 1458 close_converter, 1459 cstowcs, 1460 NULL 1461}; 1462 1463static XlcConv 1464open_cstowcs( 1465 XLCd from_lcd, 1466 const char *from_type, 1467 XLCd to_lcd, 1468 const char *to_type) 1469{ 1470 lazy_init_all_charsets(); 1471 return create_conv(from_lcd, &methods_cstowcs); 1472} 1473 1474/* from XlcNWideChar to XlcNCharSet */ 1475 1476static int 1477wcstocs( 1478 XlcConv conv, 1479 XPointer *from, 1480 int *from_left, 1481 XPointer *to, 1482 int *to_left, 1483 XPointer *args, 1484 int num_args) 1485{ 1486 Utf8Conv *preferred_charsets; 1487 XlcCharSet last_charset = NULL; 1488 wchar_t const *src; 1489 wchar_t const *srcend; 1490 unsigned char *dst; 1491 unsigned char *dstend; 1492 int unconv_num; 1493 1494 if (from == NULL || *from == NULL) 1495 return 0; 1496 1497 preferred_charsets = (Utf8Conv *) conv->state; 1498 src = (wchar_t const *) *from; 1499 srcend = src + *from_left; 1500 dst = (unsigned char *) *to; 1501 dstend = dst + *to_left; 1502 unconv_num = 0; 1503 1504 while (src < srcend && dst < dstend) { 1505 Utf8Conv chosen_charset = NULL; 1506 XlcSide chosen_side = XlcNONE; 1507 wchar_t wc = *src; 1508 int count; 1509 1510 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 1511 if (count == RET_TOOSMALL) 1512 break; 1513 if (count == RET_ILSEQ) { 1514 src++; 1515 unconv_num++; 1516 continue; 1517 } 1518 1519 if (last_charset == NULL) { 1520 last_charset = 1521 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1522 if (last_charset == NULL) { 1523 src++; 1524 unconv_num++; 1525 continue; 1526 } 1527 } else { 1528 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 1529 && (last_charset->side == XlcGLGR 1530 || last_charset->side == chosen_side))) 1531 break; 1532 } 1533 src++; 1534 dst += count; 1535 } 1536 1537 if (last_charset == NULL) 1538 return -1; 1539 1540 *from = (XPointer) src; 1541 *from_left = srcend - src; 1542 *to = (XPointer) dst; 1543 *to_left = dstend - dst; 1544 1545 if (num_args >= 1) 1546 *((XlcCharSet *)args[0]) = last_charset; 1547 1548 return unconv_num; 1549} 1550 1551static XlcConvMethodsRec methods_wcstocs = { 1552 close_tocs_converter, 1553 wcstocs, 1554 NULL 1555}; 1556 1557static XlcConv 1558open_wcstocs( 1559 XLCd from_lcd, 1560 const char *from_type, 1561 XLCd to_lcd, 1562 const char *to_type) 1563{ 1564 return create_tocs_conv(from_lcd, &methods_wcstocs); 1565} 1566 1567/* from XlcNWideChar to XlcNChar */ 1568 1569static int 1570wcstocs1( 1571 XlcConv conv, 1572 XPointer *from, 1573 int *from_left, 1574 XPointer *to, 1575 int *to_left, 1576 XPointer *args, 1577 int num_args) 1578{ 1579 Utf8Conv *preferred_charsets; 1580 XlcCharSet last_charset = NULL; 1581 wchar_t const *src; 1582 wchar_t const *srcend; 1583 unsigned char *dst; 1584 unsigned char *dstend; 1585 int unconv_num; 1586 1587 if (from == NULL || *from == NULL) 1588 return 0; 1589 1590 preferred_charsets = (Utf8Conv *) conv->state; 1591 src = (wchar_t const *) *from; 1592 srcend = src + *from_left; 1593 dst = (unsigned char *) *to; 1594 dstend = dst + *to_left; 1595 unconv_num = 0; 1596 1597 while (src < srcend && dst < dstend) { 1598 Utf8Conv chosen_charset = NULL; 1599 XlcSide chosen_side = XlcNONE; 1600 wchar_t wc = *src; 1601 int count; 1602 1603 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 1604 if (count == RET_TOOSMALL) 1605 break; 1606 if (count == RET_ILSEQ) { 1607 src++; 1608 unconv_num++; 1609 continue; 1610 } 1611 1612 last_charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1613 1614 if (last_charset == NULL) { 1615 src++; 1616 unconv_num++; 1617 continue; 1618 } 1619 1620 src++; 1621 dst += count; 1622 break; 1623 } 1624 1625 if (last_charset == NULL) 1626 return -1; 1627 1628 *from = (XPointer) src; 1629 *from_left = srcend - src; 1630 *to = (XPointer) dst; 1631 *to_left = dstend - dst; 1632 1633 if (num_args >= 1) 1634 *((XlcCharSet *)args[0]) = last_charset; 1635 1636 return unconv_num; 1637} 1638 1639static XlcConvMethodsRec methods_wcstocs1 = { 1640 close_tocs_converter, 1641 wcstocs1, 1642 NULL 1643}; 1644 1645static XlcConv 1646open_wcstocs1( 1647 XLCd from_lcd, 1648 const char *from_type, 1649 XLCd to_lcd, 1650 const char *to_type) 1651{ 1652 return create_tocs_conv(from_lcd, &methods_wcstocs1); 1653} 1654 1655/* trivial, no conversion */ 1656 1657static int 1658identity( 1659 XlcConv conv, 1660 XPointer *from, 1661 int *from_left, 1662 XPointer *to, 1663 int *to_left, 1664 XPointer *args, 1665 int num_args) 1666{ 1667 unsigned char const *src; 1668 unsigned char const *srcend; 1669 unsigned char *dst; 1670 unsigned char *dstend; 1671 1672 if (from == NULL || *from == NULL) 1673 return 0; 1674 1675 src = (unsigned char const *) *from; 1676 srcend = src + *from_left; 1677 dst = (unsigned char *) *to; 1678 dstend = dst + *to_left; 1679 1680 while (src < srcend && dst < dstend) 1681 *dst++ = *src++; 1682 1683 *from = (XPointer) src; 1684 *from_left = srcend - src; 1685 *to = (XPointer) dst; 1686 *to_left = dstend - dst; 1687 1688 return 0; 1689} 1690 1691static XlcConvMethodsRec methods_identity = { 1692 close_converter, 1693 identity, 1694 NULL 1695}; 1696 1697static XlcConv 1698open_identity( 1699 XLCd from_lcd, 1700 const char *from_type, 1701 XLCd to_lcd, 1702 const char *to_type) 1703{ 1704 return create_conv(from_lcd, &methods_identity); 1705} 1706 1707/* from MultiByte/WideChar to FontCharSet. */ 1708/* They really use converters to CharSet 1709 * but with different create_conv procedure. */ 1710 1711static XlcConv 1712create_tofontcs_conv( 1713 XLCd lcd, 1714 XlcConvMethods methods) 1715{ 1716 XlcConv conv; 1717 int i, num, k, count; 1718 char **value, buf[32]; 1719 Utf8Conv *preferred; 1720 1721 lazy_init_all_charsets(); 1722 1723 for (i = 0, num = 0;; i++) { 1724 snprintf(buf, sizeof(buf), "fs%d.charset.name", i); 1725 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1726 if (count < 1) { 1727 snprintf(buf, sizeof(buf), "fs%d.charset", i); 1728 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1729 if (count < 1) 1730 break; 1731 } 1732 num += count; 1733 } 1734 1735 conv = Xmalloc(sizeof(XlcConvRec) + (num + 1) * sizeof(Utf8Conv)); 1736 if (conv == (XlcConv) NULL) 1737 return (XlcConv) NULL; 1738 preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec)); 1739 1740 /* Loop through all fontsets mentioned in the locale. */ 1741 for (i = 0, num = 0;; i++) { 1742 snprintf(buf, sizeof(buf), "fs%d.charset.name", i); 1743 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1744 if (count < 1) { 1745 snprintf(buf, sizeof(buf), "fs%d.charset", i); 1746 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1747 if (count < 1) 1748 break; 1749 } 1750 while (count-- > 0) { 1751 XlcCharSet charset = _XlcGetCharSet(*value++); 1752 const char *name; 1753 1754 if (charset == (XlcCharSet) NULL) 1755 continue; 1756 1757 name = charset->encoding_name; 1758 /* If it wasn't already encountered... */ 1759 for (k = num - 1; k >= 0; k--) 1760 if (!strcmp(preferred[k]->name, name)) 1761 break; 1762 if (k < 0) { 1763 /* For fonts "ISO10646-1" means ucs2, not utf8.*/ 1764 if (!strcmp("ISO10646-1", name)) { 1765 preferred[num++] = &all_charsets[ucs2_conv_index]; 1766 continue; 1767 } 1768 /* Look it up in all_charsets[]. */ 1769 for (k = 0; k < all_charsets_count-1; k++) 1770 if (!strcmp(all_charsets[k].name, name)) { 1771 /* Add it to the preferred set. */ 1772 preferred[num++] = &all_charsets[k]; 1773 break; 1774 } 1775 } 1776 } 1777 } 1778 preferred[num] = (Utf8Conv) NULL; 1779 1780 conv->methods = methods; 1781 conv->state = (XPointer) preferred; 1782 1783 return conv; 1784} 1785 1786static XlcConv 1787open_wcstofcs( 1788 XLCd from_lcd, 1789 const char *from_type, 1790 XLCd to_lcd, 1791 const char *to_type) 1792{ 1793 return create_tofontcs_conv(from_lcd, &methods_wcstocs); 1794} 1795 1796static XlcConv 1797open_utf8tofcs( 1798 XLCd from_lcd, 1799 const char *from_type, 1800 XLCd to_lcd, 1801 const char *to_type) 1802{ 1803 return create_tofontcs_conv(from_lcd, &methods_utf8tocs); 1804} 1805 1806/* ========================== iconv Stuff ================================ */ 1807 1808/* from XlcNCharSet to XlcNMultiByte */ 1809 1810static int 1811iconv_cstombs(XlcConv conv, XPointer *from, int *from_left, 1812 XPointer *to, int *to_left, XPointer *args, int num_args) 1813{ 1814 XlcCharSet charset; 1815 char const *name; 1816 Utf8Conv convptr; 1817 int i; 1818 unsigned char const *src; 1819 unsigned char const *srcend; 1820 unsigned char *dst; 1821 unsigned char *dstend; 1822 int unconv_num; 1823 1824 if (from == NULL || *from == NULL) 1825 return 0; 1826 1827 if (num_args < 1) 1828 return -1; 1829 1830 charset = (XlcCharSet) args[0]; 1831 name = charset->encoding_name; 1832 /* not charset->name because the latter has a ":GL"/":GR" suffix */ 1833 1834 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 1835 if (!strcmp(convptr->name, name)) 1836 break; 1837 if (i == 0) 1838 return -1; 1839 1840 src = (unsigned char const *) *from; 1841 srcend = src + *from_left; 1842 dst = (unsigned char *) *to; 1843 dstend = dst + *to_left; 1844 unconv_num = 0; 1845 1846 while (src < srcend) { 1847 ucs4_t wc; 1848 int consumed; 1849 int count; 1850 1851 consumed = convptr->cstowc(conv, &wc, src, srcend-src); 1852 if (consumed == RET_ILSEQ) 1853 return -1; 1854 if (consumed == RET_TOOFEW(0)) 1855 break; 1856 1857 /* Use stdc iconv to convert widechar -> multibyte */ 1858 1859 count = wctomb((char *)dst, wc); 1860 if (count == 0) 1861 break; 1862 if (count == -1) { 1863 count = wctomb((char *)dst, BAD_WCHAR); 1864 if (count == 0) 1865 break; 1866 unconv_num++; 1867 } 1868 src += consumed; 1869 dst += count; 1870 } 1871 1872 *from = (XPointer) src; 1873 *from_left = srcend - src; 1874 *to = (XPointer) dst; 1875 *to_left = dstend - dst; 1876 1877 return unconv_num; 1878 1879} 1880 1881static XlcConvMethodsRec iconv_cstombs_methods = { 1882 close_converter, 1883 iconv_cstombs, 1884 NULL 1885}; 1886 1887static XlcConv 1888open_iconv_cstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 1889{ 1890 lazy_init_all_charsets(); 1891 return create_conv(from_lcd, &iconv_cstombs_methods); 1892} 1893 1894static int 1895iconv_mbstocs(XlcConv conv, XPointer *from, int *from_left, 1896 XPointer *to, int *to_left, XPointer *args, int num_args) 1897{ 1898 Utf8Conv *preferred_charsets; 1899 XlcCharSet last_charset = NULL; 1900 unsigned char const *src; 1901 unsigned char const *srcend; 1902 unsigned char *dst; 1903 unsigned char *dstend; 1904 int unconv_num; 1905 1906 if (from == NULL || *from == NULL) 1907 return 0; 1908 1909 preferred_charsets = (Utf8Conv *) conv->state; 1910 src = (unsigned char const *) *from; 1911 srcend = src + *from_left; 1912 dst = (unsigned char *) *to; 1913 dstend = dst + *to_left; 1914 unconv_num = 0; 1915 1916 while (src < srcend && dst < dstend) { 1917 Utf8Conv chosen_charset = NULL; 1918 XlcSide chosen_side = XlcNONE; 1919 wchar_t wc; 1920 int consumed; 1921 int count; 1922 1923 /* Uses stdc iconv to convert multibyte -> widechar */ 1924 1925 consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src)); 1926 if (consumed == 0) 1927 break; 1928 if (consumed == -1) { 1929 src++; 1930 unconv_num++; 1931 continue; 1932 } 1933 1934 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 1935 1936 if (count == RET_TOOSMALL) 1937 break; 1938 if (count == RET_ILSEQ) { 1939 src += consumed; 1940 unconv_num++; 1941 continue; 1942 } 1943 1944 if (last_charset == NULL) { 1945 last_charset = 1946 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1947 if (last_charset == NULL) { 1948 src += consumed; 1949 unconv_num++; 1950 continue; 1951 } 1952 } else { 1953 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 1954 && (last_charset->side == XlcGLGR 1955 || last_charset->side == chosen_side))) 1956 break; 1957 } 1958 src += consumed; 1959 dst += count; 1960 } 1961 1962 if (last_charset == NULL) 1963 return -1; 1964 1965 *from = (XPointer) src; 1966 *from_left = srcend - src; 1967 *to = (XPointer) dst; 1968 *to_left = dstend - dst; 1969 1970 if (num_args >= 1) 1971 *((XlcCharSet *)args[0]) = last_charset; 1972 1973 return unconv_num; 1974} 1975 1976static XlcConvMethodsRec iconv_mbstocs_methods = { 1977 close_tocs_converter, 1978 iconv_mbstocs, 1979 NULL 1980}; 1981 1982static XlcConv 1983open_iconv_mbstocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 1984{ 1985 return create_tocs_conv(from_lcd, &iconv_mbstocs_methods); 1986} 1987 1988/* from XlcNMultiByte to XlcNChar */ 1989 1990static int 1991iconv_mbtocs(XlcConv conv, XPointer *from, int *from_left, 1992 XPointer *to, int *to_left, XPointer *args, int num_args) 1993{ 1994 Utf8Conv *preferred_charsets; 1995 XlcCharSet last_charset = NULL; 1996 unsigned char const *src; 1997 unsigned char const *srcend; 1998 unsigned char *dst; 1999 unsigned char *dstend; 2000 int unconv_num; 2001 2002 if (from == NULL || *from == NULL) 2003 return 0; 2004 2005 preferred_charsets = (Utf8Conv *) conv->state; 2006 src = (unsigned char const *) *from; 2007 srcend = src + *from_left; 2008 dst = (unsigned char *) *to; 2009 dstend = dst + *to_left; 2010 unconv_num = 0; 2011 2012 while (src < srcend && dst < dstend) { 2013 Utf8Conv chosen_charset = NULL; 2014 XlcSide chosen_side = XlcNONE; 2015 wchar_t wc; 2016 int consumed; 2017 int count; 2018 2019 /* Uses stdc iconv to convert multibyte -> widechar */ 2020 2021 consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src)); 2022 if (consumed == 0) 2023 break; 2024 if (consumed == -1) { 2025 src++; 2026 unconv_num++; 2027 continue; 2028 } 2029 2030 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 2031 if (count == RET_TOOSMALL) 2032 break; 2033 if (count == RET_ILSEQ) { 2034 src += consumed; 2035 unconv_num++; 2036 continue; 2037 } 2038 2039 if (last_charset == NULL) { 2040 last_charset = 2041 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 2042 if (last_charset == NULL) { 2043 src += consumed; 2044 unconv_num++; 2045 continue; 2046 } 2047 } else { 2048 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 2049 && (last_charset->side == XlcGLGR 2050 || last_charset->side == chosen_side))) 2051 break; 2052 } 2053 src += consumed; 2054 dst += count; 2055 } 2056 2057 if (last_charset == NULL) 2058 return -1; 2059 2060 *from = (XPointer) src; 2061 *from_left = srcend - src; 2062 *to = (XPointer) dst; 2063 *to_left = dstend - dst; 2064 2065 if (num_args >= 1) 2066 *((XlcCharSet *)args[0]) = last_charset; 2067 2068 return unconv_num; 2069} 2070 2071static XlcConvMethodsRec iconv_mbtocs_methods = { 2072 close_tocs_converter, 2073 iconv_mbtocs, 2074 NULL 2075}; 2076 2077static XlcConv 2078open_iconv_mbtocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 2079{ 2080 return create_tocs_conv(from_lcd, &iconv_mbtocs_methods ); 2081} 2082 2083/* from XlcNMultiByte to XlcNString */ 2084 2085static int 2086iconv_mbstostr(XlcConv conv, XPointer *from, int *from_left, 2087 XPointer *to, int *to_left, XPointer *args, int num_args) 2088{ 2089 unsigned char const *src; 2090 unsigned char const *srcend; 2091 unsigned char *dst; 2092 unsigned char *dstend; 2093 int unconv_num; 2094 2095 if (from == NULL || *from == NULL) 2096 return 0; 2097 2098 src = (unsigned char const *) *from; 2099 srcend = src + *from_left; 2100 dst = (unsigned char *) *to; 2101 dstend = dst + *to_left; 2102 unconv_num = 0; 2103 2104 while (src < srcend) { 2105 unsigned char c; 2106 wchar_t wc; 2107 int consumed; 2108 2109 /* Uses stdc iconv to convert multibyte -> widechar */ 2110 2111 consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src)); 2112 if (consumed == 0) 2113 break; 2114 if (dst == dstend) 2115 break; 2116 if (consumed == -1) { 2117 consumed = 1; 2118 c = BAD_CHAR; 2119 unconv_num++; 2120 } else { 2121 if ((wc & ~(wchar_t)0xff) != 0) { 2122 c = BAD_CHAR; 2123 unconv_num++; 2124 } else 2125 c = (unsigned char) wc; 2126 } 2127 *dst++ = c; 2128 src += consumed; 2129 } 2130 2131 *from = (XPointer) src; 2132 *from_left = srcend - src; 2133 *to = (XPointer) dst; 2134 *to_left = dstend - dst; 2135 2136 return unconv_num; 2137} 2138 2139static XlcConvMethodsRec iconv_mbstostr_methods = { 2140 close_converter, 2141 iconv_mbstostr, 2142 NULL 2143}; 2144 2145static XlcConv 2146open_iconv_mbstostr(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 2147{ 2148 return create_conv(from_lcd, &iconv_mbstostr_methods); 2149} 2150 2151/* from XlcNString to XlcNMultiByte */ 2152static int 2153iconv_strtombs(XlcConv conv, XPointer *from, int *from_left, 2154 XPointer *to, int *to_left, XPointer *args, int num_args) 2155{ 2156 unsigned char const *src; 2157 unsigned char const *srcend; 2158 unsigned char *dst; 2159 unsigned char *dstend; 2160 2161 if (from == NULL || *from == NULL) 2162 return 0; 2163 2164 src = (unsigned char const *) *from; 2165 srcend = src + *from_left; 2166 dst = (unsigned char *) *to; 2167 dstend = dst + *to_left; 2168 2169 while (src < srcend) { 2170 int count = wctomb((char *)dst, *src); 2171 if (count < 0) 2172 break; 2173 dst += count; 2174 src++; 2175 } 2176 2177 *from = (XPointer) src; 2178 *from_left = srcend - src; 2179 *to = (XPointer) dst; 2180 *to_left = dstend - dst; 2181 2182 return 0; 2183} 2184 2185static XlcConvMethodsRec iconv_strtombs_methods= { 2186 close_converter, 2187 iconv_strtombs, 2188 NULL 2189}; 2190 2191static XlcConv 2192open_iconv_strtombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 2193{ 2194 return create_conv(from_lcd, &iconv_strtombs_methods); 2195} 2196 2197/***************************************************************************/ 2198/* Part II: An iconv locale loader. 2199 * 2200 *Here we can assume that "multi-byte" is iconv and that `wchar_t' is Unicode. 2201 */ 2202 2203/* from XlcNMultiByte to XlcNWideChar */ 2204static int 2205iconv_mbstowcs(XlcConv conv, XPointer *from, int *from_left, 2206 XPointer *to, int *to_left, XPointer *args, int num_args) 2207{ 2208 char *src = *((char **) from); 2209 wchar_t *dst = *((wchar_t **) to); 2210 int src_left = *from_left; 2211 int dst_left = *to_left; 2212 int length, unconv_num = 0; 2213 2214 while (src_left > 0 && dst_left > 0) { 2215 length = mbtowc(dst, src, (size_t) src_left); 2216 2217 if (length > 0) { 2218 src += length; 2219 src_left -= length; 2220 if (dst) 2221 dst++; 2222 dst_left--; 2223 } else if (length < 0) { 2224 src++; 2225 src_left--; 2226 unconv_num++; 2227 } else { 2228 /* null ? */ 2229 src++; 2230 src_left--; 2231 if (dst) 2232 *dst++ = L'\0'; 2233 dst_left--; 2234 } 2235 } 2236 2237 *from = (XPointer) src; 2238 if (dst) 2239 *to = (XPointer) dst; 2240 *from_left = src_left; 2241 *to_left = dst_left; 2242 2243 return unconv_num; 2244} 2245 2246static XlcConvMethodsRec iconv_mbstowcs_methods = { 2247 close_converter, 2248 iconv_mbstowcs, 2249 NULL 2250} ; 2251 2252static XlcConv 2253open_iconv_mbstowcs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 2254{ 2255 return create_conv(from_lcd, &iconv_mbstowcs_methods); 2256} 2257 2258static int 2259iconv_wcstombs(XlcConv conv, XPointer *from, int *from_left, 2260 XPointer *to, int *to_left, XPointer *args, int num_args) 2261{ 2262 wchar_t *src = *((wchar_t **) from); 2263 char *dst = *((char **) to); 2264 int src_left = *from_left; 2265 int dst_left = *to_left; 2266 int length, unconv_num = 0; 2267 2268 while (src_left > 0 && dst_left >= MB_CUR_MAX) { 2269 length = wctomb(dst, *src); /* XXX */ 2270 2271 if (length > 0) { 2272 src++; 2273 src_left--; 2274 if (dst) 2275 dst += length; 2276 dst_left -= length; 2277 } else if (length < 0) { 2278 src++; 2279 src_left--; 2280 unconv_num++; 2281 } 2282 } 2283 2284 *from = (XPointer) src; 2285 if (dst) 2286 *to = (XPointer) dst; 2287 *from_left = src_left; 2288 *to_left = dst_left; 2289 2290 return unconv_num; 2291} 2292 2293static XlcConvMethodsRec iconv_wcstombs_methods = { 2294 close_converter, 2295 iconv_wcstombs, 2296 NULL 2297} ; 2298 2299static XlcConv 2300open_iconv_wcstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 2301{ 2302 return create_conv(from_lcd, &iconv_wcstombs_methods); 2303} 2304 2305static XlcConv 2306open_iconv_mbstofcs( 2307 XLCd from_lcd, 2308 const char *from_type, 2309 XLCd to_lcd, 2310 const char *to_type) 2311{ 2312 return create_tofontcs_conv(from_lcd, &iconv_mbstocs_methods); 2313} 2314 2315/* Registers UTF-8 converters for a UTF-8 locale. */ 2316 2317void 2318_XlcAddUtf8LocaleConverters( 2319 XLCd lcd) 2320{ 2321 /* Register elementary converters. */ 2322 2323 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_utf8towcs); 2324 2325 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_wcstoutf8); 2326 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr); 2327 2328 _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs); 2329 2330 /* Register converters for XlcNCharSet. This implicitly provides 2331 * converters from and to XlcNCompoundText. */ 2332 2333 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_cstoutf8); 2334 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_utf8tocs); 2335 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_utf8tocs1); 2336 2337 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs); 2338 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs); 2339 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1); 2340 2341 _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_strtoutf8); 2342 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_utf8tostr); 2343 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNMultiByte, open_identity); 2344 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNUtf8String, open_identity); 2345 2346 /* Register converters for XlcNFontCharSet */ 2347 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_utf8tofcs); 2348 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs); 2349 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNFontCharSet, open_utf8tofcs); 2350} 2351 2352void 2353_XlcAddGB18030LocaleConverters( 2354 XLCd lcd) 2355{ 2356 2357 /* Register elementary converters. */ 2358 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_iconv_mbstowcs); 2359 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_iconv_wcstombs); 2360 2361 /* Register converters for XlcNCharSet. This implicitly provides 2362 * converters from and to XlcNCompoundText. */ 2363 2364 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_iconv_cstombs); 2365 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_iconv_mbstocs); 2366 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_iconv_mbtocs); 2367 _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_iconv_strtombs); 2368 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_iconv_mbstostr); 2369 2370 /* Register converters for XlcNFontCharSet */ 2371 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_iconv_mbstofcs); 2372 2373 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr); 2374 _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs); 2375 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs); 2376 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs); 2377 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1); 2378 2379 /* Register converters for XlcNFontCharSet */ 2380 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs); 2381} 2382