lcUTF8.c revision 61b2299d
1/* $TOG: $ */ 2/****************************************************************** 3 4 Copyright 1993 by SunSoft, Inc. 5 Copyright 1999-2000 by Bruno Haible 6 7Permission to use, copy, modify, distribute, and sell this software 8and its documentation for any purpose is hereby granted without fee, 9provided that the above copyright notice appear in all copies and 10that both that copyright notice and this permission notice appear 11in supporting documentation, and that the names of SunSoft, Inc. and 12Bruno Haible not be used in advertising or publicity pertaining to 13distribution of the software without specific, written prior 14permission. SunSoft, Inc. and Bruno Haible make no representations 15about the suitability of this software for any purpose. It is 16provided "as is" without express or implied warranty. 17 18SunSoft Inc. AND Bruno Haible DISCLAIM ALL WARRANTIES WITH REGARD 19TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 20AND FITNESS, IN NO EVENT SHALL SunSoft, Inc. OR Bruno Haible BE LIABLE 21FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 22WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 23ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 24OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 25 26******************************************************************/ 27/* $XFree86: xc/lib/X11/lcUTF8.c,v 1.15 2002/10/08 23:31:36 dawes Exp $ */ 28 29/* 30 * This file contains: 31 * 32 * I. Conversion routines CompoundText/CharSet <--> Unicode/UTF-8. 33 * 34 * Used for three purposes: 35 * 1. The UTF-8 locales, see below. 36 * 2. Unicode aware applications for which the use of 8-bit character 37 * sets is an anachronism. 38 * 3. For conversion from keysym to locale encoding. 39 * 40 * II. Conversion files for an UTF-8 locale loader. 41 * Supports: all locales with codeset UTF-8. 42 * How: Provides converters for UTF-8. 43 * Platforms: all systems. 44 * 45 * The loader itself is located in lcUTF8.c. 46 */ 47 48/* 49 * The conversion from UTF-8 to CompoundText is realized in a very 50 * conservative way. Recall that CompoundText data is used for inter-client 51 * communication purposes. We distinguish three classes of clients: 52 * - Clients which accept only those pieces of CompoundText which belong to 53 * the character set understood by the current locale. 54 * (Example: clients which are linked to an older X11 library.) 55 * - Clients which accept CompoundText with multiple character sets and parse 56 * it themselves. 57 * (Example: emacs, xemacs.) 58 * - Clients which rely entirely on the X{mb,wc}TextPropertyToTextList 59 * functions for the conversion of CompoundText to their current locale's 60 * multi-byte/wide-character format. 61 * For best interoperation, the UTF-8 to CompoundText conversion proceeds as 62 * follows. For every character, it first tests whether the character is 63 * representable in the current locale's original (non-UTF-8) character set. 64 * If not, it goes through the list of predefined character sets for 65 * CompoundText and tests if the character is representable in that character 66 * set. If so, it encodes the character using its code within that character 67 * set. If not, it uses an UTF-8-in-CompoundText encapsulation. Since 68 * clients of the first and second kind ignore such encapsulated text, 69 * this encapsulation is kept to a minimum and terminated as early as possible. 70 * 71 * In a distant future, when clients of the first and second kind will have 72 * disappeared, we will be able to stuff UTF-8 data directly in CompoundText 73 * without first going through the list of predefined character sets. 74 */ 75 76#ifdef HAVE_CONFIG_H 77#include <config.h> 78#endif 79#include <stdio.h> 80#include "Xlibint.h" 81#include "XlcPubI.h" 82#include "XlcGeneric.h" 83 84static XlcConv 85create_conv( 86 XLCd lcd, 87 XlcConvMethods methods) 88{ 89 XlcConv conv; 90 91 conv = (XlcConv) Xmalloc(sizeof(XlcConvRec)); 92 if (conv == (XlcConv) NULL) 93 return (XlcConv) NULL; 94 95 conv->methods = methods; 96 conv->state = NULL; 97 98 return conv; 99} 100 101static void 102close_converter( 103 XlcConv conv) 104{ 105 Xfree((char *) conv); 106} 107 108/* Replacement character for invalid multibyte sequence or wide character. */ 109#define BAD_WCHAR ((ucs4_t) 0xfffd) 110#define BAD_CHAR '?' 111 112/***************************************************************************/ 113/* Part I: Conversion routines CompoundText/CharSet <--> Unicode/UTF-8. 114 * 115 * Note that this code works in any locale. We store Unicode values in 116 * `ucs4_t' variables, but don't pass them to the user. 117 * 118 * This code has to support all character sets that are used for CompoundText, 119 * nothing more, nothing less. See the table in lcCT.c. 120 * Since the conversion _to_ CompoundText is likely to need the tables for all 121 * character sets at once, we don't use dynamic loading (of tables or shared 122 * libraries through iconv()). Use a fixed set of tables instead. 123 * 124 * We use statically computed tables, not dynamically allocated arrays, 125 * because it's more memory efficient: Different processes using the same 126 * libX11 shared library share the "text" and read-only "data" sections. 127 */ 128 129typedef unsigned int ucs4_t; 130#define conv_t XlcConv 131 132typedef struct _Utf8ConvRec { 133 const char *name; 134 XrmQuark xrm_name; 135 int (* cstowc) (XlcConv, ucs4_t *, unsigned char const *, int); 136 int (* wctocs) (XlcConv, unsigned char *, ucs4_t, int); 137} Utf8ConvRec, *Utf8Conv; 138 139/* 140 * int xxx_cstowc (XlcConv conv, ucs4_t *pwc, unsigned char const *s, int n) 141 * converts the byte sequence starting at s to a wide character. Up to n bytes 142 * are available at s. n is >= 1. 143 * Result is number of bytes consumed (if a wide character was read), 144 * or 0 if invalid, or -1 if n too small. 145 * 146 * int xxx_wctocs (XlcConv conv, unsigned char *r, ucs4_t wc, int n) 147 * converts the wide character wc to the character set xxx, and stores the 148 * result beginning at r. Up to n bytes may be written at r. n is >= 1. 149 * Result is number of bytes written, or 0 if invalid, or -1 if n too small. 150 */ 151 152/* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */ 153#define RET_ILSEQ 0 154/* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */ 155#define RET_TOOFEW(n) (-1-(n)) 156/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */ 157#define RET_TOOSMALL -1 158 159/* 160 * The tables below are bijective. It would be possible to extend the 161 * xxx_wctocs tables to do some transliteration (e.g. U+201C,U+201D -> 0x22) 162 * but *only* with characters not contained in any other table, and *only* 163 * when the current locale is not an UTF-8 locale. 164 */ 165 166#include "lcUniConv/utf8.h" 167#include "lcUniConv/ucs2be.h" 168#ifdef notused 169#include "lcUniConv/ascii.h" 170#endif 171#include "lcUniConv/iso8859_1.h" 172#include "lcUniConv/iso8859_2.h" 173#include "lcUniConv/iso8859_3.h" 174#include "lcUniConv/iso8859_4.h" 175#include "lcUniConv/iso8859_5.h" 176#include "lcUniConv/iso8859_6.h" 177#include "lcUniConv/iso8859_7.h" 178#include "lcUniConv/iso8859_8.h" 179#include "lcUniConv/iso8859_9.h" 180#include "lcUniConv/iso8859_10.h" 181#include "lcUniConv/iso8859_11.h" 182#include "lcUniConv/iso8859_13.h" 183#include "lcUniConv/iso8859_14.h" 184#include "lcUniConv/iso8859_15.h" 185#include "lcUniConv/iso8859_16.h" 186#include "lcUniConv/iso8859_9e.h" 187#include "lcUniConv/jisx0201.h" 188#include "lcUniConv/tis620.h" 189#include "lcUniConv/koi8_r.h" 190#include "lcUniConv/koi8_u.h" 191#include "lcUniConv/koi8_c.h" 192#include "lcUniConv/armscii_8.h" 193#include "lcUniConv/cp1133.h" 194#include "lcUniConv/mulelao.h" 195#include "lcUniConv/viscii.h" 196#include "lcUniConv/tcvn.h" 197#include "lcUniConv/georgian_academy.h" 198#include "lcUniConv/georgian_ps.h" 199#include "lcUniConv/cp1251.h" 200#include "lcUniConv/cp1255.h" 201#include "lcUniConv/cp1256.h" 202#include "lcUniConv/tatar_cyr.h" 203 204typedef struct { 205 unsigned short indx; /* index into big table */ 206 unsigned short used; /* bitmask of used entries */ 207} Summary16; 208 209#include "lcUniConv/gb2312.h" 210#include "lcUniConv/jisx0208.h" 211#include "lcUniConv/jisx0212.h" 212#include "lcUniConv/ksc5601.h" 213#include "lcUniConv/big5.h" 214#include "lcUniConv/big5_emacs.h" 215#include "lcUniConv/big5hkscs.h" 216#include "lcUniConv/gbk.h" 217 218static Utf8ConvRec all_charsets[] = { 219 /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning 220 (for lookup speed), once at the end (as a fallback). */ 221 { "ISO10646-1", NULLQUARK, 222 utf8_mbtowc, utf8_wctomb 223 }, 224 225 { "ISO8859-1", NULLQUARK, 226 iso8859_1_mbtowc, iso8859_1_wctomb 227 }, 228 { "ISO8859-2", NULLQUARK, 229 iso8859_2_mbtowc, iso8859_2_wctomb 230 }, 231 { "ISO8859-3", NULLQUARK, 232 iso8859_3_mbtowc, iso8859_3_wctomb 233 }, 234 { "ISO8859-4", NULLQUARK, 235 iso8859_4_mbtowc, iso8859_4_wctomb 236 }, 237 { "ISO8859-5", NULLQUARK, 238 iso8859_5_mbtowc, iso8859_5_wctomb 239 }, 240 { "ISO8859-6", NULLQUARK, 241 iso8859_6_mbtowc, iso8859_6_wctomb 242 }, 243 { "ISO8859-7", NULLQUARK, 244 iso8859_7_mbtowc, iso8859_7_wctomb 245 }, 246 { "ISO8859-8", NULLQUARK, 247 iso8859_8_mbtowc, iso8859_8_wctomb 248 }, 249 { "ISO8859-9", NULLQUARK, 250 iso8859_9_mbtowc, iso8859_9_wctomb 251 }, 252 { "ISO8859-10", NULLQUARK, 253 iso8859_10_mbtowc, iso8859_10_wctomb 254 }, 255 { "ISO8859-11", NULLQUARK, 256 iso8859_11_mbtowc, iso8859_11_wctomb 257 }, 258 { "ISO8859-13", NULLQUARK, 259 iso8859_13_mbtowc, iso8859_13_wctomb 260 }, 261 { "ISO8859-14", NULLQUARK, 262 iso8859_14_mbtowc, iso8859_14_wctomb 263 }, 264 { "ISO8859-15", NULLQUARK, 265 iso8859_15_mbtowc, iso8859_15_wctomb 266 }, 267 { "ISO8859-16", NULLQUARK, 268 iso8859_16_mbtowc, iso8859_16_wctomb 269 }, 270 { "JISX0201.1976-0", NULLQUARK, 271 jisx0201_mbtowc, jisx0201_wctomb 272 }, 273 { "TIS620-0", NULLQUARK, 274 tis620_mbtowc, tis620_wctomb 275 }, 276 { "GB2312.1980-0", NULLQUARK, 277 gb2312_mbtowc, gb2312_wctomb 278 }, 279 { "JISX0208.1983-0", NULLQUARK, 280 jisx0208_mbtowc, jisx0208_wctomb 281 }, 282 { "JISX0208.1990-0", NULLQUARK, 283 jisx0208_mbtowc, jisx0208_wctomb 284 }, 285 { "JISX0212.1990-0", NULLQUARK, 286 jisx0212_mbtowc, jisx0212_wctomb 287 }, 288 { "KSC5601.1987-0", NULLQUARK, 289 ksc5601_mbtowc, ksc5601_wctomb 290 }, 291 { "KOI8-R", NULLQUARK, 292 koi8_r_mbtowc, koi8_r_wctomb 293 }, 294 { "KOI8-U", NULLQUARK, 295 koi8_u_mbtowc, koi8_u_wctomb 296 }, 297 { "KOI8-C", NULLQUARK, 298 koi8_c_mbtowc, koi8_c_wctomb 299 }, 300 { "TATAR-CYR", NULLQUARK, 301 tatar_cyr_mbtowc, tatar_cyr_wctomb 302 }, 303 { "ARMSCII-8", NULLQUARK, 304 armscii_8_mbtowc, armscii_8_wctomb 305 }, 306 { "IBM-CP1133", NULLQUARK, 307 cp1133_mbtowc, cp1133_wctomb 308 }, 309 { "MULELAO-1", NULLQUARK, 310 mulelao_mbtowc, mulelao_wctomb 311 }, 312 { "VISCII1.1-1", NULLQUARK, 313 viscii_mbtowc, viscii_wctomb 314 }, 315 { "TCVN-5712", NULLQUARK, 316 tcvn_mbtowc, tcvn_wctomb 317 }, 318 { "GEORGIAN-ACADEMY", NULLQUARK, 319 georgian_academy_mbtowc, georgian_academy_wctomb 320 }, 321 { "GEORGIAN-PS", NULLQUARK, 322 georgian_ps_mbtowc, georgian_ps_wctomb 323 }, 324 { "ISO8859-9E", NULLQUARK, 325 iso8859_9e_mbtowc, iso8859_9e_wctomb 326 }, 327 { "MICROSOFT-CP1251", NULLQUARK, 328 cp1251_mbtowc, cp1251_wctomb 329 }, 330 { "MICROSOFT-CP1255", NULLQUARK, 331 cp1255_mbtowc, cp1255_wctomb 332 }, 333 { "MICROSOFT-CP1256", NULLQUARK, 334 cp1256_mbtowc, cp1256_wctomb 335 }, 336 { "BIG5-0", NULLQUARK, 337 big5_mbtowc, big5_wctomb 338 }, 339 { "BIG5-E0", NULLQUARK, 340 big5_0_mbtowc, big5_0_wctomb 341 }, 342 { "BIG5-E1", NULLQUARK, 343 big5_1_mbtowc, big5_1_wctomb 344 }, 345 { "GBK-0", NULLQUARK, 346 gbk_mbtowc, gbk_wctomb 347 }, 348 { "BIG5HKSCS-0", NULLQUARK, 349 big5hkscs_mbtowc, big5hkscs_wctomb 350 }, 351 352 /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning 353 (for lookup speed), once at the end (as a fallback). */ 354 { "ISO10646-1", NULLQUARK, 355 utf8_mbtowc, utf8_wctomb 356 }, 357 358 /* Encoding ISO10646-1 for fonts means UCS2-like encoding 359 so for conversion to FontCharSet we need this record */ 360 { "ISO10646-1", NULLQUARK, 361 ucs2be_mbtowc, ucs2be_wctomb 362 } 363}; 364 365#define charsets_table_size (sizeof(all_charsets)/sizeof(all_charsets[0])) 366#define all_charsets_count (charsets_table_size - 1) 367#define ucs2_conv_index (charsets_table_size - 1) 368 369static void 370init_all_charsets (void) 371{ 372 Utf8Conv convptr; 373 int i; 374 375 for (convptr = all_charsets, i = charsets_table_size; i > 0; convptr++, i--) 376 convptr->xrm_name = XrmStringToQuark(convptr->name); 377} 378 379#define lazy_init_all_charsets() \ 380 do { \ 381 if (all_charsets[0].xrm_name == NULLQUARK) \ 382 init_all_charsets(); \ 383 } while (0) 384 385/* from XlcNCharSet to XlcNUtf8String */ 386 387static int 388cstoutf8( 389 XlcConv conv, 390 XPointer *from, 391 int *from_left, 392 XPointer *to, 393 int *to_left, 394 XPointer *args, 395 int num_args) 396{ 397 XlcCharSet charset; 398 const char *name; 399 Utf8Conv convptr; 400 int i; 401 unsigned char const *src; 402 unsigned char const *srcend; 403 unsigned char *dst; 404 unsigned char *dstend; 405 int unconv_num; 406 407 if (from == NULL || *from == NULL) 408 return 0; 409 410 if (num_args < 1) 411 return -1; 412 413 charset = (XlcCharSet) args[0]; 414 name = charset->encoding_name; 415 /* not charset->name because the latter has a ":GL"/":GR" suffix */ 416 417 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 418 if (!strcmp(convptr->name, name)) 419 break; 420 if (i == 0) 421 return -1; 422 423 src = (unsigned char const *) *from; 424 srcend = src + *from_left; 425 dst = (unsigned char *) *to; 426 dstend = dst + *to_left; 427 unconv_num = 0; 428 429 while (src < srcend) { 430 ucs4_t wc; 431 int consumed; 432 int count; 433 434 consumed = convptr->cstowc(conv, &wc, src, srcend-src); 435 if (consumed == RET_ILSEQ) 436 return -1; 437 if (consumed == RET_TOOFEW(0)) 438 break; 439 440 count = utf8_wctomb(NULL, dst, wc, dstend-dst); 441 if (count == RET_TOOSMALL) 442 break; 443 if (count == RET_ILSEQ) { 444 count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst); 445 if (count == RET_TOOSMALL) 446 break; 447 unconv_num++; 448 } 449 src += consumed; 450 dst += count; 451 } 452 453 *from = (XPointer) src; 454 *from_left = srcend - src; 455 *to = (XPointer) dst; 456 *to_left = dstend - dst; 457 458 return unconv_num; 459} 460 461static XlcConvMethodsRec methods_cstoutf8 = { 462 close_converter, 463 cstoutf8, 464 NULL 465}; 466 467static XlcConv 468open_cstoutf8( 469 XLCd from_lcd, 470 const char *from_type, 471 XLCd to_lcd, 472 const char *to_type) 473{ 474 lazy_init_all_charsets(); 475 return create_conv(from_lcd, &methods_cstoutf8); 476} 477 478/* from XlcNUtf8String to XlcNCharSet */ 479 480static XlcConv 481create_tocs_conv( 482 XLCd lcd, 483 XlcConvMethods methods) 484{ 485 XlcConv conv; 486 CodeSet *codeset_list; 487 int codeset_num; 488 int charset_num; 489 int i, j, k; 490 Utf8Conv *preferred; 491 492 lazy_init_all_charsets(); 493 494 codeset_list = XLC_GENERIC(lcd, codeset_list); 495 codeset_num = XLC_GENERIC(lcd, codeset_num); 496 497 charset_num = 0; 498 for (i = 0; i < codeset_num; i++) 499 charset_num += codeset_list[i]->num_charsets; 500 if (charset_num > all_charsets_count-1) 501 charset_num = all_charsets_count-1; 502 503 conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) 504 + (charset_num + 1) * sizeof(Utf8Conv)); 505 if (conv == (XlcConv) NULL) 506 return (XlcConv) NULL; 507 preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec)); 508 509 /* Loop through all codesets mentioned in the locale. */ 510 charset_num = 0; 511 for (i = 0; i < codeset_num; i++) { 512 XlcCharSet *charsets = codeset_list[i]->charset_list; 513 int num_charsets = codeset_list[i]->num_charsets; 514 for (j = 0; j < num_charsets; j++) { 515 const char *name = charsets[j]->encoding_name; 516 /* If it wasn't already encountered... */ 517 for (k = charset_num-1; k >= 0; k--) 518 if (!strcmp(preferred[k]->name, name)) 519 break; 520 if (k < 0) { 521 /* Look it up in all_charsets[]. */ 522 for (k = 0; k < all_charsets_count-1; k++) 523 if (!strcmp(all_charsets[k].name, name)) { 524 /* Add it to the preferred set. */ 525 preferred[charset_num++] = &all_charsets[k]; 526 break; 527 } 528 } 529 } 530 } 531 preferred[charset_num] = (Utf8Conv) NULL; 532 533 conv->methods = methods; 534 conv->state = (XPointer) preferred; 535 536 return conv; 537} 538 539static void 540close_tocs_converter( 541 XlcConv conv) 542{ 543 /* conv->state is allocated together with conv, free both at once. */ 544 Xfree((char *) conv); 545} 546 547/* 548 * Converts a Unicode character to an appropriate character set. The NULL 549 * terminated array of preferred character sets is passed as first argument. 550 * If successful, *charsetp is set to the character set that was used, and 551 * *sidep is set to the character set side (XlcGL or XlcGR). 552 */ 553static int 554charset_wctocs( 555 Utf8Conv *preferred, 556 Utf8Conv *charsetp, 557 XlcSide *sidep, 558 XlcConv conv, 559 unsigned char *r, 560 ucs4_t wc, 561 int n) 562{ 563 int count; 564 Utf8Conv convptr; 565 int i; 566 567 for (; *preferred != (Utf8Conv) NULL; preferred++) { 568 convptr = *preferred; 569 count = convptr->wctocs(conv, r, wc, n); 570 if (count == RET_TOOSMALL) 571 return RET_TOOSMALL; 572 if (count != RET_ILSEQ) { 573 *charsetp = convptr; 574 *sidep = (*r < 0x80 ? XlcGL : XlcGR); 575 return count; 576 } 577 } 578 for (convptr = all_charsets+1, i = all_charsets_count-1; i > 0; convptr++, i--) { 579 count = convptr->wctocs(conv, r, wc, n); 580 if (count == RET_TOOSMALL) 581 return RET_TOOSMALL; 582 if (count != RET_ILSEQ) { 583 *charsetp = convptr; 584 *sidep = (*r < 0x80 ? XlcGL : XlcGR); 585 return count; 586 } 587 } 588 return RET_ILSEQ; 589} 590 591static int 592utf8tocs( 593 XlcConv conv, 594 XPointer *from, 595 int *from_left, 596 XPointer *to, 597 int *to_left, 598 XPointer *args, 599 int num_args) 600{ 601 Utf8Conv *preferred_charsets; 602 XlcCharSet last_charset = NULL; 603 unsigned char const *src; 604 unsigned char const *srcend; 605 unsigned char *dst; 606 unsigned char *dstend; 607 int unconv_num; 608 609 if (from == NULL || *from == NULL) 610 return 0; 611 612 preferred_charsets = (Utf8Conv *) conv->state; 613 src = (unsigned char const *) *from; 614 srcend = src + *from_left; 615 dst = (unsigned char *) *to; 616 dstend = dst + *to_left; 617 unconv_num = 0; 618 619 while (src < srcend && dst < dstend) { 620 Utf8Conv chosen_charset = NULL; 621 XlcSide chosen_side = XlcNONE; 622 ucs4_t wc; 623 int consumed; 624 int count; 625 626 consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 627 if (consumed == RET_TOOFEW(0)) 628 break; 629 if (consumed == RET_ILSEQ) { 630 src++; 631 unconv_num++; 632 continue; 633 } 634 635 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 636 if (count == RET_TOOSMALL) 637 break; 638 if (count == RET_ILSEQ) { 639 src += consumed; 640 unconv_num++; 641 continue; 642 } 643 644 if (last_charset == NULL) { 645 last_charset = 646 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 647 if (last_charset == NULL) { 648 src += consumed; 649 unconv_num++; 650 continue; 651 } 652 } else { 653 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 654 && (last_charset->side == XlcGLGR 655 || last_charset->side == chosen_side))) 656 break; 657 } 658 src += consumed; 659 dst += count; 660 } 661 662 if (last_charset == NULL) 663 return -1; 664 665 *from = (XPointer) src; 666 *from_left = srcend - src; 667 *to = (XPointer) dst; 668 *to_left = dstend - dst; 669 670 if (num_args >= 1) 671 *((XlcCharSet *)args[0]) = last_charset; 672 673 return unconv_num; 674} 675 676static XlcConvMethodsRec methods_utf8tocs = { 677 close_tocs_converter, 678 utf8tocs, 679 NULL 680}; 681 682static XlcConv 683open_utf8tocs( 684 XLCd from_lcd, 685 const char *from_type, 686 XLCd to_lcd, 687 const char *to_type) 688{ 689 return create_tocs_conv(from_lcd, &methods_utf8tocs); 690} 691 692/* from XlcNUtf8String to XlcNChar */ 693 694static int 695utf8tocs1( 696 XlcConv conv, 697 XPointer *from, 698 int *from_left, 699 XPointer *to, 700 int *to_left, 701 XPointer *args, 702 int num_args) 703{ 704 Utf8Conv *preferred_charsets; 705 XlcCharSet last_charset = NULL; 706 unsigned char const *src; 707 unsigned char const *srcend; 708 unsigned char *dst; 709 unsigned char *dstend; 710 int unconv_num; 711 712 if (from == NULL || *from == NULL) 713 return 0; 714 715 preferred_charsets = (Utf8Conv *) conv->state; 716 src = (unsigned char const *) *from; 717 srcend = src + *from_left; 718 dst = (unsigned char *) *to; 719 dstend = dst + *to_left; 720 unconv_num = 0; 721 722 while (src < srcend && dst < dstend) { 723 Utf8Conv chosen_charset = NULL; 724 XlcSide chosen_side = XlcNONE; 725 ucs4_t wc; 726 int consumed; 727 int count; 728 729 consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 730 if (consumed == RET_TOOFEW(0)) 731 break; 732 if (consumed == RET_ILSEQ) { 733 src++; 734 unconv_num++; 735 continue; 736 } 737 738 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 739 if (count == RET_TOOSMALL) 740 break; 741 if (count == RET_ILSEQ) { 742 src += consumed; 743 unconv_num++; 744 continue; 745 } 746 747 if (last_charset == NULL) { 748 last_charset = 749 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 750 if (last_charset == NULL) { 751 src += consumed; 752 unconv_num++; 753 continue; 754 } 755 } else { 756 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 757 && (last_charset->side == XlcGLGR 758 || last_charset->side == chosen_side))) 759 break; 760 } 761 src += consumed; 762 dst += count; 763 break; 764 } 765 766 if (last_charset == NULL) 767 return -1; 768 769 *from = (XPointer) src; 770 *from_left = srcend - src; 771 *to = (XPointer) dst; 772 *to_left = dstend - dst; 773 774 if (num_args >= 1) 775 *((XlcCharSet *)args[0]) = last_charset; 776 777 return unconv_num; 778} 779 780static XlcConvMethodsRec methods_utf8tocs1 = { 781 close_tocs_converter, 782 utf8tocs1, 783 NULL 784}; 785 786static XlcConv 787open_utf8tocs1( 788 XLCd from_lcd, 789 const char *from_type, 790 XLCd to_lcd, 791 const char *to_type) 792{ 793 return create_tocs_conv(from_lcd, &methods_utf8tocs1); 794} 795 796/* from XlcNUtf8String to XlcNString */ 797 798static int 799utf8tostr( 800 XlcConv conv, 801 XPointer *from, 802 int *from_left, 803 XPointer *to, 804 int *to_left, 805 XPointer *args, 806 int num_args) 807{ 808 unsigned char const *src; 809 unsigned char const *srcend; 810 unsigned char *dst; 811 unsigned char *dstend; 812 int unconv_num; 813 814 if (from == NULL || *from == NULL) 815 return 0; 816 817 src = (unsigned char const *) *from; 818 srcend = src + *from_left; 819 dst = (unsigned char *) *to; 820 dstend = dst + *to_left; 821 unconv_num = 0; 822 823 while (src < srcend) { 824 unsigned char c; 825 ucs4_t wc; 826 int consumed; 827 828 consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 829 if (consumed == RET_TOOFEW(0)) 830 break; 831 if (dst == dstend) 832 break; 833 if (consumed == RET_ILSEQ) { 834 consumed = 1; 835 c = BAD_CHAR; 836 unconv_num++; 837 } else { 838 if ((wc & ~(ucs4_t)0xff) != 0) { 839 c = BAD_CHAR; 840 unconv_num++; 841 } else 842 c = (unsigned char) wc; 843 } 844 *dst++ = c; 845 src += consumed; 846 } 847 848 *from = (XPointer) src; 849 *from_left = srcend - src; 850 *to = (XPointer) dst; 851 *to_left = dstend - dst; 852 853 return unconv_num; 854} 855 856static XlcConvMethodsRec methods_utf8tostr = { 857 close_converter, 858 utf8tostr, 859 NULL 860}; 861 862static XlcConv 863open_utf8tostr( 864 XLCd from_lcd, 865 const char *from_type, 866 XLCd to_lcd, 867 const char *to_type) 868{ 869 return create_conv(from_lcd, &methods_utf8tostr); 870} 871 872/* from XlcNString to XlcNUtf8String */ 873 874static int 875strtoutf8( 876 XlcConv conv, 877 XPointer *from, 878 int *from_left, 879 XPointer *to, 880 int *to_left, 881 XPointer *args, 882 int num_args) 883{ 884 unsigned char const *src; 885 unsigned char const *srcend; 886 unsigned char *dst; 887 unsigned char *dstend; 888 889 if (from == NULL || *from == NULL) 890 return 0; 891 892 src = (unsigned char const *) *from; 893 srcend = src + *from_left; 894 dst = (unsigned char *) *to; 895 dstend = dst + *to_left; 896 897 while (src < srcend) { 898 int count = utf8_wctomb(NULL, dst, *src, dstend-dst); 899 if (count == RET_TOOSMALL) 900 break; 901 dst += count; 902 src++; 903 } 904 905 *from = (XPointer) src; 906 *from_left = srcend - src; 907 *to = (XPointer) dst; 908 *to_left = dstend - dst; 909 910 return 0; 911} 912 913static XlcConvMethodsRec methods_strtoutf8 = { 914 close_converter, 915 strtoutf8, 916 NULL 917}; 918 919static XlcConv 920open_strtoutf8( 921 XLCd from_lcd, 922 const char *from_type, 923 XLCd to_lcd, 924 const char *to_type) 925{ 926 return create_conv(from_lcd, &methods_strtoutf8); 927} 928 929/* Support for the input methods. */ 930 931XPointer 932_Utf8GetConvByName( 933 const char *name) 934{ 935 XrmQuark xrm_name; 936 Utf8Conv convptr; 937 int i; 938 939 if (name == NULL) 940 return (XPointer) NULL; 941 942 lazy_init_all_charsets(); 943 xrm_name = XrmStringToQuark(name); 944 945 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 946 if (convptr->xrm_name == xrm_name) 947 return (XPointer) convptr->wctocs; 948 return (XPointer) NULL; 949} 950 951/* from XlcNUcsChar to XlcNChar, needed for input methods */ 952 953static XlcConv 954create_ucstocs_conv( 955 XLCd lcd, 956 XlcConvMethods methods) 957{ 958 959 if (XLC_PUBLIC_PART(lcd)->codeset 960 && _XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "UTF-8") == 0) { 961 XlcConv conv; 962 Utf8Conv *preferred; 963 964 lazy_init_all_charsets(); 965 966 conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + 2 * sizeof(Utf8Conv)); 967 if (conv == (XlcConv) NULL) 968 return (XlcConv) NULL; 969 preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec)); 970 971 preferred[0] = &all_charsets[0]; /* ISO10646 */ 972 preferred[1] = (Utf8Conv) NULL; 973 974 conv->methods = methods; 975 conv->state = (XPointer) preferred; 976 977 return conv; 978 } else { 979 return create_tocs_conv(lcd, methods); 980 } 981} 982 983static int 984charset_wctocs_exactly( 985 Utf8Conv *preferred, 986 Utf8Conv *charsetp, 987 XlcSide *sidep, 988 XlcConv conv, 989 unsigned char *r, 990 ucs4_t wc, 991 int n) 992{ 993 int count; 994 Utf8Conv convptr; 995 996 for (; *preferred != (Utf8Conv) NULL; preferred++) { 997 convptr = *preferred; 998 count = convptr->wctocs(conv, r, wc, n); 999 if (count == RET_TOOSMALL) 1000 return RET_TOOSMALL; 1001 if (count != RET_ILSEQ) { 1002 *charsetp = convptr; 1003 *sidep = (*r < 0x80 ? XlcGL : XlcGR); 1004 return count; 1005 } 1006 } 1007 return RET_ILSEQ; 1008} 1009 1010static int 1011ucstocs1( 1012 XlcConv conv, 1013 XPointer *from, 1014 int *from_left, 1015 XPointer *to, 1016 int *to_left, 1017 XPointer *args, 1018 int num_args) 1019{ 1020 ucs4_t const *src = (ucs4_t const *) *from; 1021 unsigned char *dst = (unsigned char *) *to; 1022 int unconv_num = 0; 1023 Utf8Conv *preferred_charsets = (Utf8Conv *) conv->state; 1024 Utf8Conv chosen_charset = NULL; 1025 XlcSide chosen_side = XlcNONE; 1026 XlcCharSet charset = NULL; 1027 int count; 1028 1029 if (from == NULL || *from == NULL) 1030 return 0; 1031 1032 count = charset_wctocs_exactly(preferred_charsets, &chosen_charset, 1033 &chosen_side, conv, dst, *src, *to_left); 1034 if (count < 1) { 1035 unconv_num++; 1036 count = 0; 1037 } else { 1038 charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1039 } 1040 if (charset == NULL) 1041 return -1; 1042 1043 *from = (XPointer) ++src; 1044 (*from_left)--; 1045 *to = (XPointer) dst; 1046 *to_left -= count; 1047 1048 if (num_args >= 1) 1049 *((XlcCharSet *)args[0]) = charset; 1050 1051 return unconv_num; 1052} 1053 1054static XlcConvMethodsRec methods_ucstocs1 = { 1055 close_tocs_converter, 1056 ucstocs1, 1057 NULL 1058}; 1059 1060static XlcConv 1061open_ucstocs1( 1062 XLCd from_lcd, 1063 const char *from_type, 1064 XLCd to_lcd, 1065 const char *to_type) 1066{ 1067 return create_ucstocs_conv(from_lcd, &methods_ucstocs1); 1068} 1069 1070/* from XlcNUcsChar to XlcNUtf8String, needed for input methods */ 1071 1072static int 1073ucstoutf8( 1074 XlcConv conv, 1075 XPointer *from, 1076 int *from_left, 1077 XPointer *to, 1078 int *to_left, 1079 XPointer *args, 1080 int num_args) 1081{ 1082 const ucs4_t *src; 1083 const ucs4_t *srcend; 1084 unsigned char *dst; 1085 unsigned char *dstend; 1086 int unconv_num; 1087 1088 if (from == NULL || *from == NULL) 1089 return 0; 1090 1091 src = (const ucs4_t *) *from; 1092 srcend = src + *from_left; 1093 dst = (unsigned char *) *to; 1094 dstend = dst + *to_left; 1095 unconv_num = 0; 1096 1097 while (src < srcend) { 1098 int count = utf8_wctomb(NULL, dst, *src, dstend-dst); 1099 if (count == RET_TOOSMALL) 1100 break; 1101 if (count == RET_ILSEQ) 1102 unconv_num++; 1103 src++; 1104 dst += count; 1105 } 1106 1107 *from = (XPointer) src; 1108 *from_left = srcend - src; 1109 *to = (XPointer) dst; 1110 *to_left = dstend - dst; 1111 1112 return unconv_num; 1113} 1114 1115static XlcConvMethodsRec methods_ucstoutf8 = { 1116 close_converter, 1117 ucstoutf8, 1118 NULL 1119}; 1120 1121static XlcConv 1122open_ucstoutf8( 1123 XLCd from_lcd, 1124 const char *from_type, 1125 XLCd to_lcd, 1126 const char *to_type) 1127{ 1128 return create_conv(from_lcd, &methods_ucstoutf8); 1129} 1130 1131/* Registers UTF-8 converters for a non-UTF-8 locale. */ 1132void 1133_XlcAddUtf8Converters( 1134 XLCd lcd) 1135{ 1136 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNUtf8String, open_cstoutf8); 1137 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNCharSet, open_utf8tocs); 1138 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNChar, open_utf8tocs1); 1139 _XlcSetConverter(lcd, XlcNString, lcd, XlcNUtf8String, open_strtoutf8); 1140 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNString, open_utf8tostr); 1141 _XlcSetConverter(lcd, XlcNUcsChar, lcd, XlcNChar, open_ucstocs1); 1142 _XlcSetConverter(lcd, XlcNUcsChar, lcd, XlcNUtf8String, open_ucstoutf8); 1143} 1144 1145/***************************************************************************/ 1146/* Part II: UTF-8 locale loader conversion files 1147 * 1148 * Here we can assume that "multi-byte" is UTF-8 and that `wchar_t' is Unicode. 1149 */ 1150 1151/* from XlcNMultiByte to XlcNWideChar */ 1152 1153static int 1154utf8towcs( 1155 XlcConv conv, 1156 XPointer *from, 1157 int *from_left, 1158 XPointer *to, 1159 int *to_left, 1160 XPointer *args, 1161 int num_args) 1162{ 1163 unsigned char const *src; 1164 unsigned char const *srcend; 1165 wchar_t *dst; 1166 wchar_t *dstend; 1167 int unconv_num; 1168 1169 if (from == NULL || *from == NULL) 1170 return 0; 1171 1172 src = (unsigned char const *) *from; 1173 srcend = src + *from_left; 1174 dst = (wchar_t *) *to; 1175 dstend = dst + *to_left; 1176 unconv_num = 0; 1177 1178 while (src < srcend && dst < dstend) { 1179 ucs4_t wc; 1180 int consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 1181 if (consumed == RET_TOOFEW(0)) 1182 break; 1183 if (consumed == RET_ILSEQ) { 1184 src++; 1185 *dst = BAD_WCHAR; 1186 unconv_num++; 1187 } else { 1188 src += consumed; 1189 *dst = wc; 1190 } 1191 dst++; 1192 } 1193 1194 *from = (XPointer) src; 1195 *from_left = srcend - src; 1196 *to = (XPointer) dst; 1197 *to_left = dstend - dst; 1198 1199 return unconv_num; 1200} 1201 1202static XlcConvMethodsRec methods_utf8towcs = { 1203 close_converter, 1204 utf8towcs, 1205 NULL 1206}; 1207 1208static XlcConv 1209open_utf8towcs( 1210 XLCd from_lcd, 1211 const char *from_type, 1212 XLCd to_lcd, 1213 const char *to_type) 1214{ 1215 return create_conv(from_lcd, &methods_utf8towcs); 1216} 1217 1218/* from XlcNWideChar to XlcNMultiByte */ 1219 1220static int 1221wcstoutf8( 1222 XlcConv conv, 1223 XPointer *from, 1224 int *from_left, 1225 XPointer *to, 1226 int *to_left, 1227 XPointer *args, 1228 int num_args) 1229{ 1230 wchar_t const *src; 1231 wchar_t const *srcend; 1232 unsigned char *dst; 1233 unsigned char *dstend; 1234 int unconv_num; 1235 1236 if (from == NULL || *from == NULL) 1237 return 0; 1238 1239 src = (wchar_t const *) *from; 1240 srcend = src + *from_left; 1241 dst = (unsigned char *) *to; 1242 dstend = dst + *to_left; 1243 unconv_num = 0; 1244 1245 while (src < srcend) { 1246 int count = utf8_wctomb(NULL, dst, *src, dstend-dst); 1247 if (count == RET_TOOSMALL) 1248 break; 1249 if (count == RET_ILSEQ) { 1250 count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst); 1251 if (count == RET_TOOSMALL) 1252 break; 1253 unconv_num++; 1254 } 1255 dst += count; 1256 src++; 1257 } 1258 1259 *from = (XPointer) src; 1260 *from_left = srcend - src; 1261 *to = (XPointer) dst; 1262 *to_left = dstend - dst; 1263 1264 return unconv_num; 1265} 1266 1267static XlcConvMethodsRec methods_wcstoutf8 = { 1268 close_converter, 1269 wcstoutf8, 1270 NULL 1271}; 1272 1273static XlcConv 1274open_wcstoutf8( 1275 XLCd from_lcd, 1276 const char *from_type, 1277 XLCd to_lcd, 1278 const char *to_type) 1279{ 1280 return create_conv(from_lcd, &methods_wcstoutf8); 1281} 1282 1283/* from XlcNString to XlcNWideChar */ 1284 1285static int 1286our_strtowcs( 1287 XlcConv conv, 1288 XPointer *from, 1289 int *from_left, 1290 XPointer *to, 1291 int *to_left, 1292 XPointer *args, 1293 int num_args) 1294{ 1295 unsigned char const *src; 1296 unsigned char const *srcend; 1297 wchar_t *dst; 1298 wchar_t *dstend; 1299 1300 if (from == NULL || *from == NULL) 1301 return 0; 1302 1303 src = (unsigned char const *) *from; 1304 srcend = src + *from_left; 1305 dst = (wchar_t *) *to; 1306 dstend = dst + *to_left; 1307 1308 while (src < srcend && dst < dstend) 1309 *dst++ = (wchar_t) *src++; 1310 1311 *from = (XPointer) src; 1312 *from_left = srcend - src; 1313 *to = (XPointer) dst; 1314 *to_left = dstend - dst; 1315 1316 return 0; 1317} 1318 1319static XlcConvMethodsRec methods_strtowcs = { 1320 close_converter, 1321 our_strtowcs, 1322 NULL 1323}; 1324 1325static XlcConv 1326open_strtowcs( 1327 XLCd from_lcd, 1328 const char *from_type, 1329 XLCd to_lcd, 1330 const char *to_type) 1331{ 1332 return create_conv(from_lcd, &methods_strtowcs); 1333} 1334 1335/* from XlcNWideChar to XlcNString */ 1336 1337static int 1338our_wcstostr( 1339 XlcConv conv, 1340 XPointer *from, 1341 int *from_left, 1342 XPointer *to, 1343 int *to_left, 1344 XPointer *args, 1345 int num_args) 1346{ 1347 wchar_t const *src; 1348 wchar_t const *srcend; 1349 unsigned char *dst; 1350 unsigned char *dstend; 1351 int unconv_num; 1352 1353 if (from == NULL || *from == NULL) 1354 return 0; 1355 1356 src = (wchar_t const *) *from; 1357 srcend = src + *from_left; 1358 dst = (unsigned char *) *to; 1359 dstend = dst + *to_left; 1360 unconv_num = 0; 1361 1362 while (src < srcend && dst < dstend) { 1363 unsigned int wc = *src++; 1364 if (wc < 0x80) 1365 *dst = wc; 1366 else { 1367 *dst = BAD_CHAR; 1368 unconv_num++; 1369 } 1370 dst++; 1371 } 1372 1373 *from = (XPointer) src; 1374 *from_left = srcend - src; 1375 *to = (XPointer) dst; 1376 *to_left = dstend - dst; 1377 1378 return unconv_num; 1379} 1380 1381static XlcConvMethodsRec methods_wcstostr = { 1382 close_converter, 1383 our_wcstostr, 1384 NULL 1385}; 1386 1387static XlcConv 1388open_wcstostr( 1389 XLCd from_lcd, 1390 const char *from_type, 1391 XLCd to_lcd, 1392 const char *to_type) 1393{ 1394 return create_conv(from_lcd, &methods_wcstostr); 1395} 1396 1397/* from XlcNCharSet to XlcNWideChar */ 1398 1399static int 1400cstowcs( 1401 XlcConv conv, 1402 XPointer *from, 1403 int *from_left, 1404 XPointer *to, 1405 int *to_left, 1406 XPointer *args, 1407 int num_args) 1408{ 1409 XlcCharSet charset; 1410 const char *name; 1411 Utf8Conv convptr; 1412 int i; 1413 unsigned char const *src; 1414 unsigned char const *srcend; 1415 wchar_t *dst; 1416 wchar_t *dstend; 1417 int unconv_num; 1418 1419 if (from == NULL || *from == NULL) 1420 return 0; 1421 1422 if (num_args < 1) 1423 return -1; 1424 1425 charset = (XlcCharSet) args[0]; 1426 name = charset->encoding_name; 1427 /* not charset->name because the latter has a ":GL"/":GR" suffix */ 1428 1429 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 1430 if (!strcmp(convptr->name, name)) 1431 break; 1432 if (i == 0) 1433 return -1; 1434 1435 src = (unsigned char const *) *from; 1436 srcend = src + *from_left; 1437 dst = (wchar_t *) *to; 1438 dstend = dst + *to_left; 1439 unconv_num = 0; 1440 1441 while (src < srcend && dst < dstend) { 1442 unsigned int wc; 1443 int consumed; 1444 1445 consumed = convptr->cstowc(conv, &wc, src, srcend-src); 1446 if (consumed == RET_ILSEQ) 1447 return -1; 1448 if (consumed == RET_TOOFEW(0)) 1449 break; 1450 1451 *dst++ = wc; 1452 src += consumed; 1453 } 1454 1455 *from = (XPointer) src; 1456 *from_left = srcend - src; 1457 *to = (XPointer) dst; 1458 *to_left = dstend - dst; 1459 1460 return unconv_num; 1461} 1462 1463static XlcConvMethodsRec methods_cstowcs = { 1464 close_converter, 1465 cstowcs, 1466 NULL 1467}; 1468 1469static XlcConv 1470open_cstowcs( 1471 XLCd from_lcd, 1472 const char *from_type, 1473 XLCd to_lcd, 1474 const char *to_type) 1475{ 1476 lazy_init_all_charsets(); 1477 return create_conv(from_lcd, &methods_cstowcs); 1478} 1479 1480/* from XlcNWideChar to XlcNCharSet */ 1481 1482static int 1483wcstocs( 1484 XlcConv conv, 1485 XPointer *from, 1486 int *from_left, 1487 XPointer *to, 1488 int *to_left, 1489 XPointer *args, 1490 int num_args) 1491{ 1492 Utf8Conv *preferred_charsets; 1493 XlcCharSet last_charset = NULL; 1494 wchar_t const *src; 1495 wchar_t const *srcend; 1496 unsigned char *dst; 1497 unsigned char *dstend; 1498 int unconv_num; 1499 1500 if (from == NULL || *from == NULL) 1501 return 0; 1502 1503 preferred_charsets = (Utf8Conv *) conv->state; 1504 src = (wchar_t const *) *from; 1505 srcend = src + *from_left; 1506 dst = (unsigned char *) *to; 1507 dstend = dst + *to_left; 1508 unconv_num = 0; 1509 1510 while (src < srcend && dst < dstend) { 1511 Utf8Conv chosen_charset = NULL; 1512 XlcSide chosen_side = XlcNONE; 1513 wchar_t wc = *src; 1514 int count; 1515 1516 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 1517 if (count == RET_TOOSMALL) 1518 break; 1519 if (count == RET_ILSEQ) { 1520 src++; 1521 unconv_num++; 1522 continue; 1523 } 1524 1525 if (last_charset == NULL) { 1526 last_charset = 1527 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1528 if (last_charset == NULL) { 1529 src++; 1530 unconv_num++; 1531 continue; 1532 } 1533 } else { 1534 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 1535 && (last_charset->side == XlcGLGR 1536 || last_charset->side == chosen_side))) 1537 break; 1538 } 1539 src++; 1540 dst += count; 1541 } 1542 1543 if (last_charset == NULL) 1544 return -1; 1545 1546 *from = (XPointer) src; 1547 *from_left = srcend - src; 1548 *to = (XPointer) dst; 1549 *to_left = dstend - dst; 1550 1551 if (num_args >= 1) 1552 *((XlcCharSet *)args[0]) = last_charset; 1553 1554 return unconv_num; 1555} 1556 1557static XlcConvMethodsRec methods_wcstocs = { 1558 close_tocs_converter, 1559 wcstocs, 1560 NULL 1561}; 1562 1563static XlcConv 1564open_wcstocs( 1565 XLCd from_lcd, 1566 const char *from_type, 1567 XLCd to_lcd, 1568 const char *to_type) 1569{ 1570 return create_tocs_conv(from_lcd, &methods_wcstocs); 1571} 1572 1573/* from XlcNWideChar to XlcNChar */ 1574 1575static int 1576wcstocs1( 1577 XlcConv conv, 1578 XPointer *from, 1579 int *from_left, 1580 XPointer *to, 1581 int *to_left, 1582 XPointer *args, 1583 int num_args) 1584{ 1585 Utf8Conv *preferred_charsets; 1586 XlcCharSet last_charset = NULL; 1587 wchar_t const *src; 1588 wchar_t const *srcend; 1589 unsigned char *dst; 1590 unsigned char *dstend; 1591 int unconv_num; 1592 1593 if (from == NULL || *from == NULL) 1594 return 0; 1595 1596 preferred_charsets = (Utf8Conv *) conv->state; 1597 src = (wchar_t const *) *from; 1598 srcend = src + *from_left; 1599 dst = (unsigned char *) *to; 1600 dstend = dst + *to_left; 1601 unconv_num = 0; 1602 1603 while (src < srcend && dst < dstend) { 1604 Utf8Conv chosen_charset = NULL; 1605 XlcSide chosen_side = XlcNONE; 1606 wchar_t wc = *src; 1607 int count; 1608 1609 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 1610 if (count == RET_TOOSMALL) 1611 break; 1612 if (count == RET_ILSEQ) { 1613 src++; 1614 unconv_num++; 1615 continue; 1616 } 1617 1618 if (last_charset == NULL) { 1619 last_charset = 1620 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1621 if (last_charset == NULL) { 1622 src++; 1623 unconv_num++; 1624 continue; 1625 } 1626 } else { 1627 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 1628 && (last_charset->side == XlcGLGR 1629 || last_charset->side == chosen_side))) 1630 break; 1631 } 1632 src++; 1633 dst += count; 1634 break; 1635 } 1636 1637 if (last_charset == NULL) 1638 return -1; 1639 1640 *from = (XPointer) src; 1641 *from_left = srcend - src; 1642 *to = (XPointer) dst; 1643 *to_left = dstend - dst; 1644 1645 if (num_args >= 1) 1646 *((XlcCharSet *)args[0]) = last_charset; 1647 1648 return unconv_num; 1649} 1650 1651static XlcConvMethodsRec methods_wcstocs1 = { 1652 close_tocs_converter, 1653 wcstocs1, 1654 NULL 1655}; 1656 1657static XlcConv 1658open_wcstocs1( 1659 XLCd from_lcd, 1660 const char *from_type, 1661 XLCd to_lcd, 1662 const char *to_type) 1663{ 1664 return create_tocs_conv(from_lcd, &methods_wcstocs1); 1665} 1666 1667/* trivial, no conversion */ 1668 1669static int 1670identity( 1671 XlcConv conv, 1672 XPointer *from, 1673 int *from_left, 1674 XPointer *to, 1675 int *to_left, 1676 XPointer *args, 1677 int num_args) 1678{ 1679 unsigned char const *src; 1680 unsigned char const *srcend; 1681 unsigned char *dst; 1682 unsigned char *dstend; 1683 1684 if (from == NULL || *from == NULL) 1685 return 0; 1686 1687 src = (unsigned char const *) *from; 1688 srcend = src + *from_left; 1689 dst = (unsigned char *) *to; 1690 dstend = dst + *to_left; 1691 1692 while (src < srcend && dst < dstend) 1693 *dst++ = *src++; 1694 1695 *from = (XPointer) src; 1696 *from_left = srcend - src; 1697 *to = (XPointer) dst; 1698 *to_left = dstend - dst; 1699 1700 return 0; 1701} 1702 1703static XlcConvMethodsRec methods_identity = { 1704 close_converter, 1705 identity, 1706 NULL 1707}; 1708 1709static XlcConv 1710open_identity( 1711 XLCd from_lcd, 1712 const char *from_type, 1713 XLCd to_lcd, 1714 const char *to_type) 1715{ 1716 return create_conv(from_lcd, &methods_identity); 1717} 1718 1719/* from MultiByte/WideChar to FontCharSet. */ 1720/* They really use converters to CharSet 1721 * but with different create_conv procedure. */ 1722 1723static XlcConv 1724create_tofontcs_conv( 1725 XLCd lcd, 1726 XlcConvMethods methods) 1727{ 1728 XlcConv conv; 1729 int i, num, k, count; 1730 char **value, buf[20]; 1731 Utf8Conv *preferred; 1732 1733 lazy_init_all_charsets(); 1734 1735 for (i = 0, num = 0;; i++) { 1736 sprintf(buf, "fs%d.charset.name", i); 1737 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1738 if (count < 1) { 1739 sprintf(buf, "fs%d.charset", i); 1740 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1741 if (count < 1) 1742 break; 1743 } 1744 num += count; 1745 } 1746 1747 conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + (num + 1) * sizeof(Utf8Conv)); 1748 if (conv == (XlcConv) NULL) 1749 return (XlcConv) NULL; 1750 preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec)); 1751 1752 /* Loop through all fontsets mentioned in the locale. */ 1753 for (i = 0, num = 0;; i++) { 1754 sprintf(buf, "fs%d.charset.name", i); 1755 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1756 if (count < 1) { 1757 sprintf(buf, "fs%d.charset", i); 1758 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1759 if (count < 1) 1760 break; 1761 } 1762 while (count-- > 0) { 1763 XlcCharSet charset = _XlcGetCharSet(*value++); 1764 const char *name; 1765 1766 if (charset == (XlcCharSet) NULL) 1767 continue; 1768 1769 name = charset->encoding_name; 1770 /* If it wasn't already encountered... */ 1771 for (k = num - 1; k >= 0; k--) 1772 if (!strcmp(preferred[k]->name, name)) 1773 break; 1774 if (k < 0) { 1775 /* For fonts "ISO10646-1" means ucs2, not utf8.*/ 1776 if (!strcmp("ISO10646-1", name)) { 1777 preferred[num++] = &all_charsets[ucs2_conv_index]; 1778 continue; 1779 } 1780 /* Look it up in all_charsets[]. */ 1781 for (k = 0; k < all_charsets_count-1; k++) 1782 if (!strcmp(all_charsets[k].name, name)) { 1783 /* Add it to the preferred set. */ 1784 preferred[num++] = &all_charsets[k]; 1785 break; 1786 } 1787 } 1788 } 1789 } 1790 preferred[num] = (Utf8Conv) NULL; 1791 1792 conv->methods = methods; 1793 conv->state = (XPointer) preferred; 1794 1795 return conv; 1796} 1797 1798static XlcConv 1799open_wcstofcs( 1800 XLCd from_lcd, 1801 const char *from_type, 1802 XLCd to_lcd, 1803 const char *to_type) 1804{ 1805 return create_tofontcs_conv(from_lcd, &methods_wcstocs); 1806} 1807 1808static XlcConv 1809open_utf8tofcs( 1810 XLCd from_lcd, 1811 const char *from_type, 1812 XLCd to_lcd, 1813 const char *to_type) 1814{ 1815 return create_tofontcs_conv(from_lcd, &methods_utf8tocs); 1816} 1817 1818/* ========================== iconv Stuff ================================ */ 1819 1820/* from XlcNCharSet to XlcNMultiByte */ 1821 1822static int 1823iconv_cstombs(XlcConv conv, XPointer *from, int *from_left, 1824 XPointer *to, int *to_left, XPointer *args, int num_args) 1825{ 1826 XlcCharSet charset; 1827 char *name; 1828 Utf8Conv convptr; 1829 int i; 1830 unsigned char const *src; 1831 unsigned char const *srcend; 1832 unsigned char *dst; 1833 unsigned char *dstend; 1834 int unconv_num; 1835 1836 if (from == NULL || *from == NULL) 1837 return 0; 1838 1839 if (num_args < 1) 1840 return -1; 1841 1842 charset = (XlcCharSet) args[0]; 1843 name = charset->encoding_name; 1844 /* not charset->name because the latter has a ":GL"/":GR" suffix */ 1845 1846 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 1847 if (!strcmp(convptr->name, name)) 1848 break; 1849 if (i == 0) 1850 return -1; 1851 1852 src = (unsigned char const *) *from; 1853 srcend = src + *from_left; 1854 dst = (unsigned char *) *to; 1855 dstend = dst + *to_left; 1856 unconv_num = 0; 1857 1858 while (src < srcend) { 1859 ucs4_t wc; 1860 int consumed; 1861 int count; 1862 1863 consumed = convptr->cstowc(conv, &wc, src, srcend-src); 1864 if (consumed == RET_ILSEQ) 1865 return -1; 1866 if (consumed == RET_TOOFEW(0)) 1867 break; 1868 1869 /* Use stdc iconv to convert widechar -> multibyte */ 1870 1871 count = wctomb(dst, wc); 1872 if (count == 0) 1873 break; 1874 if (count == -1) { 1875 count = wctomb(dst, BAD_WCHAR); 1876 if (count == 0) 1877 break; 1878 unconv_num++; 1879 } 1880 src += consumed; 1881 dst += count; 1882 } 1883 1884 *from = (XPointer) src; 1885 *from_left = srcend - src; 1886 *to = (XPointer) dst; 1887 *to_left = dstend - dst; 1888 1889 return unconv_num; 1890 1891} 1892 1893static XlcConvMethodsRec iconv_cstombs_methods = { 1894 close_converter, 1895 iconv_cstombs, 1896 NULL 1897}; 1898 1899static XlcConv 1900open_iconv_cstombs(XLCd from_lcd, char *from_type, XLCd to_lcd, char *to_type) 1901{ 1902 lazy_init_all_charsets(); 1903 return create_conv(from_lcd, &iconv_cstombs_methods); 1904} 1905 1906static int 1907iconv_mbstocs(XlcConv conv, XPointer *from, int *from_left, 1908 XPointer *to, int *to_left, XPointer *args, int num_args) 1909{ 1910 Utf8Conv *preferred_charsets; 1911 XlcCharSet last_charset = NULL; 1912 unsigned char const *src; 1913 unsigned char const *srcend; 1914 unsigned char *dst; 1915 unsigned char *dstend; 1916 int unconv_num; 1917 1918 if (from == NULL || *from == NULL) 1919 return 0; 1920 1921 preferred_charsets = (Utf8Conv *) conv->state; 1922 src = (unsigned char const *) *from; 1923 srcend = src + *from_left; 1924 dst = (unsigned char *) *to; 1925 dstend = dst + *to_left; 1926 unconv_num = 0; 1927 1928 while (src < srcend && dst < dstend) { 1929 Utf8Conv chosen_charset = NULL; 1930 XlcSide chosen_side = XlcNONE; 1931 wchar_t wc; 1932 int consumed; 1933 int count; 1934 1935 /* Uses stdc iconv to convert multibyte -> widechar */ 1936 1937 consumed = mbtowc(&wc, src, srcend-src); 1938 if (consumed == 0) 1939 break; 1940 if (consumed == -1) { 1941 src++; 1942 unconv_num++; 1943 continue; 1944 } 1945 1946 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 1947 1948 if (count == RET_TOOSMALL) 1949 break; 1950 if (count == RET_ILSEQ) { 1951 src += consumed; 1952 unconv_num++; 1953 continue; 1954 } 1955 1956 if (last_charset == NULL) { 1957 last_charset = 1958 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1959 if (last_charset == NULL) { 1960 src += consumed; 1961 unconv_num++; 1962 continue; 1963 } 1964 } else { 1965 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 1966 && (last_charset->side == XlcGLGR 1967 || last_charset->side == chosen_side))) 1968 break; 1969 } 1970 src += consumed; 1971 dst += count; 1972 } 1973 1974 if (last_charset == NULL) 1975 return -1; 1976 1977 *from = (XPointer) src; 1978 *from_left = srcend - src; 1979 *to = (XPointer) dst; 1980 *to_left = dstend - dst; 1981 1982 if (num_args >= 1) 1983 *((XlcCharSet *)args[0]) = last_charset; 1984 1985 return unconv_num; 1986} 1987 1988static XlcConvMethodsRec iconv_mbstocs_methods = { 1989 close_tocs_converter, 1990 iconv_mbstocs, 1991 NULL 1992}; 1993 1994static XlcConv 1995open_iconv_mbstocs(XLCd from_lcd, char *from_type, XLCd to_lcd, char *to_type) 1996{ 1997 return create_tocs_conv(from_lcd, &iconv_mbstocs_methods); 1998} 1999 2000/* from XlcNMultiByte to XlcNChar */ 2001 2002static int 2003iconv_mbtocs(XlcConv conv, XPointer *from, int *from_left, 2004 XPointer *to, int *to_left, XPointer *args, int num_args) 2005{ 2006 Utf8Conv *preferred_charsets; 2007 XlcCharSet last_charset = NULL; 2008 unsigned char const *src; 2009 unsigned char const *srcend; 2010 unsigned char *dst; 2011 unsigned char *dstend; 2012 int unconv_num; 2013 2014 if (from == NULL || *from == NULL) 2015 return 0; 2016 2017 preferred_charsets = (Utf8Conv *) conv->state; 2018 src = (unsigned char const *) *from; 2019 srcend = src + *from_left; 2020 dst = (unsigned char *) *to; 2021 dstend = dst + *to_left; 2022 unconv_num = 0; 2023 2024 while (src < srcend && dst < dstend) { 2025 Utf8Conv chosen_charset = NULL; 2026 XlcSide chosen_side = XlcNONE; 2027 wchar_t wc; 2028 int consumed; 2029 int count; 2030 2031 /* Uses stdc iconv to convert multibyte -> widechar */ 2032 2033 consumed = mbtowc(&wc, src, srcend-src); 2034 if (consumed == 0) 2035 break; 2036 if (consumed == -1) { 2037 src++; 2038 unconv_num++; 2039 continue; 2040 } 2041 2042 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 2043 if (count == RET_TOOSMALL) 2044 break; 2045 if (count == RET_ILSEQ) { 2046 src += consumed; 2047 unconv_num++; 2048 continue; 2049 } 2050 2051 if (last_charset == NULL) { 2052 last_charset = 2053 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 2054 if (last_charset == NULL) { 2055 src += consumed; 2056 unconv_num++; 2057 continue; 2058 } 2059 } else { 2060 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 2061 && (last_charset->side == XlcGLGR 2062 || last_charset->side == chosen_side))) 2063 break; 2064 } 2065 src += consumed; 2066 dst += count; 2067 } 2068 2069 if (last_charset == NULL) 2070 return -1; 2071 2072 *from = (XPointer) src; 2073 *from_left = srcend - src; 2074 *to = (XPointer) dst; 2075 *to_left = dstend - dst; 2076 2077 if (num_args >= 1) 2078 *((XlcCharSet *)args[0]) = last_charset; 2079 2080 return unconv_num; 2081} 2082 2083static XlcConvMethodsRec iconv_mbtocs_methods = { 2084 close_tocs_converter, 2085 iconv_mbtocs, 2086 NULL 2087}; 2088 2089static XlcConv 2090open_iconv_mbtocs(XLCd from_lcd, char *from_type, XLCd to_lcd, char *to_type) 2091{ 2092 return create_tocs_conv(from_lcd, &iconv_mbtocs_methods ); 2093} 2094 2095/* from XlcNMultiByte to XlcNString */ 2096 2097static int 2098iconv_mbstostr(XlcConv conv, XPointer *from, int *from_left, 2099 XPointer *to, int *to_left, XPointer *args, int num_args) 2100{ 2101 unsigned char const *src; 2102 unsigned char const *srcend; 2103 unsigned char *dst; 2104 unsigned char *dstend; 2105 int unconv_num; 2106 2107 if (from == NULL || *from == NULL) 2108 return 0; 2109 2110 src = (unsigned char const *) *from; 2111 srcend = src + *from_left; 2112 dst = (unsigned char *) *to; 2113 dstend = dst + *to_left; 2114 unconv_num = 0; 2115 2116 while (src < srcend) { 2117 unsigned char c; 2118 wchar_t wc; 2119 int consumed; 2120 2121 /* Uses stdc iconv to convert multibyte -> widechar */ 2122 2123 consumed = mbtowc(&wc, src, srcend-src); 2124 if (consumed == 0) 2125 break; 2126 if (dst == dstend) 2127 break; 2128 if (consumed == -1) { 2129 consumed = 1; 2130 c = BAD_CHAR; 2131 unconv_num++; 2132 } else { 2133 if ((wc & ~(wchar_t)0xff) != 0) { 2134 c = BAD_CHAR; 2135 unconv_num++; 2136 } else 2137 c = (unsigned char) wc; 2138 } 2139 *dst++ = c; 2140 src += consumed; 2141 } 2142 2143 *from = (XPointer) src; 2144 *from_left = srcend - src; 2145 *to = (XPointer) dst; 2146 *to_left = dstend - dst; 2147 2148 return unconv_num; 2149} 2150 2151static XlcConvMethodsRec iconv_mbstostr_methods = { 2152 close_converter, 2153 iconv_mbstostr, 2154 NULL 2155}; 2156 2157static XlcConv 2158open_iconv_mbstostr(XLCd from_lcd, char *from_type, XLCd to_lcd, char *to_type) 2159{ 2160 return create_conv(from_lcd, &iconv_mbstostr_methods); 2161} 2162 2163/* from XlcNString to XlcNMultiByte */ 2164static int 2165iconv_strtombs(XlcConv conv, XPointer *from, int *from_left, 2166 XPointer *to, int *to_left, XPointer *args, int num_args) 2167{ 2168 unsigned char const *src; 2169 unsigned char const *srcend; 2170 unsigned char *dst; 2171 unsigned char *dstend; 2172 2173 if (from == NULL || *from == NULL) 2174 return 0; 2175 2176 src = (unsigned char const *) *from; 2177 srcend = src + *from_left; 2178 dst = (unsigned char *) *to; 2179 dstend = dst + *to_left; 2180 2181 while (src < srcend) { 2182 int count = wctomb(dst, *src); 2183 if (count < 0) 2184 break; 2185 dst += count; 2186 src++; 2187 } 2188 2189 *from = (XPointer) src; 2190 *from_left = srcend - src; 2191 *to = (XPointer) dst; 2192 *to_left = dstend - dst; 2193 2194 return 0; 2195} 2196 2197static XlcConvMethodsRec iconv_strtombs_methods= { 2198 close_converter, 2199 iconv_strtombs, 2200 NULL 2201}; 2202 2203static XlcConv 2204open_iconv_strtombs(XLCd from_lcd, char *from_type, XLCd to_lcd, char *to_type) 2205{ 2206 return create_conv(from_lcd, &iconv_strtombs_methods); 2207} 2208 2209/***************************************************************************/ 2210/* Part II: An iconv locale loader. 2211 * 2212 *Here we can assume that "multi-byte" is iconv and that `wchar_t' is Unicode. 2213 */ 2214 2215/* from XlcNMultiByte to XlcNWideChar */ 2216static int 2217iconv_mbstowcs(XlcConv conv, XPointer *from, int *from_left, 2218 XPointer *to, int *to_left, XPointer *args, int num_args) 2219{ 2220 char *src = *((char **) from); 2221 wchar_t *dst = *((wchar_t **) to); 2222 int src_left = *from_left; 2223 int dst_left = *to_left; 2224 int length, unconv_num = 0; 2225 2226 while (src_left > 0 && dst_left > 0) { 2227 length = mbtowc(dst, src, src_left); 2228 2229 if (length > 0) { 2230 src += length; 2231 src_left -= length; 2232 if (dst) 2233 dst++; 2234 dst_left--; 2235 } else if (length < 0) { 2236 src++; 2237 src_left--; 2238 unconv_num++; 2239 } else { 2240 /* null ? */ 2241 src++; 2242 src_left--; 2243 if (dst) 2244 *dst++ = L'\0'; 2245 dst_left--; 2246 } 2247 } 2248 2249 *from = (XPointer) src; 2250 if (dst) 2251 *to = (XPointer) dst; 2252 *from_left = src_left; 2253 *to_left = dst_left; 2254 2255 return unconv_num; 2256} 2257 2258static XlcConvMethodsRec iconv_mbstowcs_methods = { 2259 close_converter, 2260 iconv_mbstowcs, 2261 NULL 2262} ; 2263 2264static XlcConv 2265open_iconv_mbstowcs(XLCd from_lcd, char *from_type, XLCd to_lcd, char *to_type) 2266{ 2267 return create_conv(from_lcd, &iconv_mbstowcs_methods); 2268} 2269 2270static int 2271iconv_wcstombs(XlcConv conv, XPointer *from, int *from_left, 2272 XPointer *to, int *to_left, XPointer *args, int num_args) 2273{ 2274 wchar_t *src = *((wchar_t **) from); 2275 char *dst = *((char **) to); 2276 int src_left = *from_left; 2277 int dst_left = *to_left; 2278 int length, unconv_num = 0; 2279 2280 while (src_left > 0 && dst_left >= MB_CUR_MAX) { 2281 length = wctomb(dst, *src); /* XXX */ 2282 2283 if (length > 0) { 2284 src++; 2285 src_left--; 2286 if (dst) 2287 dst += length; 2288 dst_left -= length; 2289 } else if (length < 0) { 2290 src++; 2291 src_left--; 2292 unconv_num++; 2293 } 2294 } 2295 2296 *from = (XPointer) src; 2297 if (dst) 2298 *to = (XPointer) dst; 2299 *from_left = src_left; 2300 *to_left = dst_left; 2301 2302 return unconv_num; 2303} 2304 2305static XlcConvMethodsRec iconv_wcstombs_methods = { 2306 close_converter, 2307 iconv_wcstombs, 2308 NULL 2309} ; 2310 2311static XlcConv 2312open_iconv_wcstombs(XLCd from_lcd, char *from_type, XLCd to_lcd, char *to_type) 2313{ 2314 return create_conv(from_lcd, &iconv_wcstombs_methods); 2315} 2316 2317static XlcConv 2318open_iconv_mbstofcs( 2319 XLCd from_lcd, 2320 const char *from_type, 2321 XLCd to_lcd, 2322 const char *to_type) 2323{ 2324 return create_tofontcs_conv(from_lcd, &iconv_mbstocs_methods); 2325} 2326 2327/* Registers UTF-8 converters for a UTF-8 locale. */ 2328 2329void 2330_XlcAddUtf8LocaleConverters( 2331 XLCd lcd) 2332{ 2333 /* Register elementary converters. */ 2334 2335 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_utf8towcs); 2336 2337 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_wcstoutf8); 2338 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr); 2339 2340 _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs); 2341 2342 /* Register converters for XlcNCharSet. This implicitly provides 2343 * converters from and to XlcNCompoundText. */ 2344 2345 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_cstoutf8); 2346 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_utf8tocs); 2347 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_utf8tocs1); 2348 2349 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs); 2350 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs); 2351 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1); 2352 2353 _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_strtoutf8); 2354 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_utf8tostr); 2355 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNMultiByte, open_identity); 2356 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNUtf8String, open_identity); 2357 2358 /* Register converters for XlcNFontCharSet */ 2359 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_utf8tofcs); 2360 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs); 2361} 2362 2363void 2364_XlcAddGB18030LocaleConverters( 2365 XLCd lcd) 2366{ 2367 2368 /* Register elementary converters. */ 2369 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_iconv_mbstowcs); 2370 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_iconv_wcstombs); 2371 2372 /* Register converters for XlcNCharSet. This implicitly provides 2373 * converters from and to XlcNCompoundText. */ 2374 2375 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_iconv_cstombs); 2376 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_iconv_mbstocs); 2377 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_iconv_mbtocs); 2378 _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_iconv_strtombs); 2379 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_iconv_mbstostr); 2380 2381 /* Register converters for XlcNFontCharSet */ 2382 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_iconv_mbstofcs); 2383 2384 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr); 2385 _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs); 2386 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs); 2387 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs); 2388 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1); 2389 2390 /* Register converters for XlcNFontCharSet */ 2391 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs); 2392} 2393