11ab64890Smrg/****************************************************************** 21ab64890Smrg 31ab64890Smrg Copyright 1993 by SunSoft, Inc. 41ab64890Smrg Copyright 1999-2000 by Bruno Haible 51ab64890Smrg 61ab64890SmrgPermission to use, copy, modify, distribute, and sell this software 71ab64890Smrgand its documentation for any purpose is hereby granted without fee, 81ab64890Smrgprovided that the above copyright notice appear in all copies and 91ab64890Smrgthat both that copyright notice and this permission notice appear 101ab64890Smrgin supporting documentation, and that the names of SunSoft, Inc. and 111ab64890SmrgBruno Haible not be used in advertising or publicity pertaining to 121ab64890Smrgdistribution of the software without specific, written prior 131ab64890Smrgpermission. SunSoft, Inc. and Bruno Haible make no representations 141ab64890Smrgabout the suitability of this software for any purpose. It is 151ab64890Smrgprovided "as is" without express or implied warranty. 161ab64890Smrg 171ab64890SmrgSunSoft Inc. AND Bruno Haible DISCLAIM ALL WARRANTIES WITH REGARD 181ab64890SmrgTO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 191ab64890SmrgAND FITNESS, IN NO EVENT SHALL SunSoft, Inc. OR Bruno Haible BE LIABLE 201ab64890SmrgFOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 211ab64890SmrgWHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 221ab64890SmrgACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 231ab64890SmrgOF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 241ab64890Smrg 251ab64890Smrg******************************************************************/ 261ab64890Smrg 271ab64890Smrg/* 281ab64890Smrg * This file contains: 291ab64890Smrg * 301ab64890Smrg * I. Conversion routines CompoundText/CharSet <--> Unicode/UTF-8. 311ab64890Smrg * 321ab64890Smrg * Used for three purposes: 331ab64890Smrg * 1. The UTF-8 locales, see below. 341ab64890Smrg * 2. Unicode aware applications for which the use of 8-bit character 351ab64890Smrg * sets is an anachronism. 361ab64890Smrg * 3. For conversion from keysym to locale encoding. 371ab64890Smrg * 381ab64890Smrg * II. Conversion files for an UTF-8 locale loader. 391ab64890Smrg * Supports: all locales with codeset UTF-8. 401ab64890Smrg * How: Provides converters for UTF-8. 411ab64890Smrg * Platforms: all systems. 421ab64890Smrg * 431ab64890Smrg * The loader itself is located in lcUTF8.c. 441ab64890Smrg */ 451ab64890Smrg 461ab64890Smrg/* 471ab64890Smrg * The conversion from UTF-8 to CompoundText is realized in a very 481ab64890Smrg * conservative way. Recall that CompoundText data is used for inter-client 491ab64890Smrg * communication purposes. We distinguish three classes of clients: 501ab64890Smrg * - Clients which accept only those pieces of CompoundText which belong to 511ab64890Smrg * the character set understood by the current locale. 521ab64890Smrg * (Example: clients which are linked to an older X11 library.) 531ab64890Smrg * - Clients which accept CompoundText with multiple character sets and parse 541ab64890Smrg * it themselves. 551ab64890Smrg * (Example: emacs, xemacs.) 561ab64890Smrg * - Clients which rely entirely on the X{mb,wc}TextPropertyToTextList 571ab64890Smrg * functions for the conversion of CompoundText to their current locale's 581ab64890Smrg * multi-byte/wide-character format. 591ab64890Smrg * For best interoperation, the UTF-8 to CompoundText conversion proceeds as 601ab64890Smrg * follows. For every character, it first tests whether the character is 611ab64890Smrg * representable in the current locale's original (non-UTF-8) character set. 621ab64890Smrg * If not, it goes through the list of predefined character sets for 631ab64890Smrg * CompoundText and tests if the character is representable in that character 641ab64890Smrg * set. If so, it encodes the character using its code within that character 651ab64890Smrg * set. If not, it uses an UTF-8-in-CompoundText encapsulation. Since 661ab64890Smrg * clients of the first and second kind ignore such encapsulated text, 671ab64890Smrg * this encapsulation is kept to a minimum and terminated as early as possible. 681ab64890Smrg * 691ab64890Smrg * In a distant future, when clients of the first and second kind will have 701ab64890Smrg * disappeared, we will be able to stuff UTF-8 data directly in CompoundText 711ab64890Smrg * without first going through the list of predefined character sets. 721ab64890Smrg */ 731ab64890Smrg 741ab64890Smrg#ifdef HAVE_CONFIG_H 751ab64890Smrg#include <config.h> 761ab64890Smrg#endif 771ab64890Smrg#include <stdio.h> 781ab64890Smrg#include "Xlibint.h" 791ab64890Smrg#include "XlcPubI.h" 801ab64890Smrg#include "XlcGeneric.h" 811ab64890Smrg 821ab64890Smrgstatic XlcConv 831ab64890Smrgcreate_conv( 841ab64890Smrg XLCd lcd, 851ab64890Smrg XlcConvMethods methods) 861ab64890Smrg{ 871ab64890Smrg XlcConv conv; 881ab64890Smrg 89818534a1Smrg conv = Xmalloc(sizeof(XlcConvRec)); 901ab64890Smrg if (conv == (XlcConv) NULL) 911ab64890Smrg return (XlcConv) NULL; 921ab64890Smrg 931ab64890Smrg conv->methods = methods; 941ab64890Smrg conv->state = NULL; 951ab64890Smrg 961ab64890Smrg return conv; 971ab64890Smrg} 981ab64890Smrg 991ab64890Smrgstatic void 1001ab64890Smrgclose_converter( 1011ab64890Smrg XlcConv conv) 1021ab64890Smrg{ 103818534a1Smrg Xfree(conv); 1041ab64890Smrg} 1051ab64890Smrg 1061ab64890Smrg/* Replacement character for invalid multibyte sequence or wide character. */ 1071ab64890Smrg#define BAD_WCHAR ((ucs4_t) 0xfffd) 1081ab64890Smrg#define BAD_CHAR '?' 1091ab64890Smrg 1101ab64890Smrg/***************************************************************************/ 1111ab64890Smrg/* Part I: Conversion routines CompoundText/CharSet <--> Unicode/UTF-8. 1121ab64890Smrg * 1131ab64890Smrg * Note that this code works in any locale. We store Unicode values in 1141ab64890Smrg * `ucs4_t' variables, but don't pass them to the user. 1151ab64890Smrg * 1161ab64890Smrg * This code has to support all character sets that are used for CompoundText, 1171ab64890Smrg * nothing more, nothing less. See the table in lcCT.c. 1181ab64890Smrg * Since the conversion _to_ CompoundText is likely to need the tables for all 1191ab64890Smrg * character sets at once, we don't use dynamic loading (of tables or shared 1201ab64890Smrg * libraries through iconv()). Use a fixed set of tables instead. 1211ab64890Smrg * 1221ab64890Smrg * We use statically computed tables, not dynamically allocated arrays, 1231ab64890Smrg * because it's more memory efficient: Different processes using the same 1241ab64890Smrg * libX11 shared library share the "text" and read-only "data" sections. 1251ab64890Smrg */ 1261ab64890Smrg 1271ab64890Smrgtypedef unsigned int ucs4_t; 1281ab64890Smrg#define conv_t XlcConv 1291ab64890Smrg 1301ab64890Smrgtypedef struct _Utf8ConvRec { 1311ab64890Smrg const char *name; 1321ab64890Smrg XrmQuark xrm_name; 1331ab64890Smrg int (* cstowc) (XlcConv, ucs4_t *, unsigned char const *, int); 1341ab64890Smrg int (* wctocs) (XlcConv, unsigned char *, ucs4_t, int); 1351ab64890Smrg} Utf8ConvRec, *Utf8Conv; 1361ab64890Smrg 1371ab64890Smrg/* 1381ab64890Smrg * int xxx_cstowc (XlcConv conv, ucs4_t *pwc, unsigned char const *s, int n) 1391ab64890Smrg * converts the byte sequence starting at s to a wide character. Up to n bytes 1401ab64890Smrg * are available at s. n is >= 1. 1411ab64890Smrg * Result is number of bytes consumed (if a wide character was read), 1421ab64890Smrg * or 0 if invalid, or -1 if n too small. 1431ab64890Smrg * 1441ab64890Smrg * int xxx_wctocs (XlcConv conv, unsigned char *r, ucs4_t wc, int n) 1451ab64890Smrg * converts the wide character wc to the character set xxx, and stores the 1461ab64890Smrg * result beginning at r. Up to n bytes may be written at r. n is >= 1. 1471ab64890Smrg * Result is number of bytes written, or 0 if invalid, or -1 if n too small. 1481ab64890Smrg */ 1491ab64890Smrg 1501ab64890Smrg/* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */ 1511ab64890Smrg#define RET_ILSEQ 0 1521ab64890Smrg/* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */ 1531ab64890Smrg#define RET_TOOFEW(n) (-1-(n)) 1541ab64890Smrg/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */ 1551ab64890Smrg#define RET_TOOSMALL -1 1561ab64890Smrg 1571ab64890Smrg/* 1581ab64890Smrg * The tables below are bijective. It would be possible to extend the 1591ab64890Smrg * xxx_wctocs tables to do some transliteration (e.g. U+201C,U+201D -> 0x22) 1601ab64890Smrg * but *only* with characters not contained in any other table, and *only* 1611ab64890Smrg * when the current locale is not an UTF-8 locale. 1621ab64890Smrg */ 1631ab64890Smrg 1641ab64890Smrg#include "lcUniConv/utf8.h" 1651ab64890Smrg#include "lcUniConv/ucs2be.h" 1661ab64890Smrg#ifdef notused 1671ab64890Smrg#include "lcUniConv/ascii.h" 1681ab64890Smrg#endif 1691ab64890Smrg#include "lcUniConv/iso8859_1.h" 1701ab64890Smrg#include "lcUniConv/iso8859_2.h" 1711ab64890Smrg#include "lcUniConv/iso8859_3.h" 1721ab64890Smrg#include "lcUniConv/iso8859_4.h" 1731ab64890Smrg#include "lcUniConv/iso8859_5.h" 1741ab64890Smrg#include "lcUniConv/iso8859_6.h" 1751ab64890Smrg#include "lcUniConv/iso8859_7.h" 1761ab64890Smrg#include "lcUniConv/iso8859_8.h" 1771ab64890Smrg#include "lcUniConv/iso8859_9.h" 1781ab64890Smrg#include "lcUniConv/iso8859_10.h" 1791ab64890Smrg#include "lcUniConv/iso8859_11.h" 1801ab64890Smrg#include "lcUniConv/iso8859_13.h" 1811ab64890Smrg#include "lcUniConv/iso8859_14.h" 1821ab64890Smrg#include "lcUniConv/iso8859_15.h" 1831ab64890Smrg#include "lcUniConv/iso8859_16.h" 1841ab64890Smrg#include "lcUniConv/iso8859_9e.h" 1851ab64890Smrg#include "lcUniConv/jisx0201.h" 1861ab64890Smrg#include "lcUniConv/tis620.h" 1871ab64890Smrg#include "lcUniConv/koi8_r.h" 1881ab64890Smrg#include "lcUniConv/koi8_u.h" 1891ab64890Smrg#include "lcUniConv/koi8_c.h" 1901ab64890Smrg#include "lcUniConv/armscii_8.h" 1911ab64890Smrg#include "lcUniConv/cp1133.h" 1921ab64890Smrg#include "lcUniConv/mulelao.h" 1931ab64890Smrg#include "lcUniConv/viscii.h" 1941ab64890Smrg#include "lcUniConv/tcvn.h" 1951ab64890Smrg#include "lcUniConv/georgian_academy.h" 1961ab64890Smrg#include "lcUniConv/georgian_ps.h" 1971ab64890Smrg#include "lcUniConv/cp1251.h" 1981ab64890Smrg#include "lcUniConv/cp1255.h" 1991ab64890Smrg#include "lcUniConv/cp1256.h" 2001ab64890Smrg#include "lcUniConv/tatar_cyr.h" 2011ab64890Smrg 2021ab64890Smrgtypedef struct { 2031ab64890Smrg unsigned short indx; /* index into big table */ 2041ab64890Smrg unsigned short used; /* bitmask of used entries */ 2051ab64890Smrg} Summary16; 2061ab64890Smrg 2071ab64890Smrg#include "lcUniConv/gb2312.h" 2081ab64890Smrg#include "lcUniConv/jisx0208.h" 2091ab64890Smrg#include "lcUniConv/jisx0212.h" 2101ab64890Smrg#include "lcUniConv/ksc5601.h" 2111ab64890Smrg#include "lcUniConv/big5.h" 2121ab64890Smrg#include "lcUniConv/big5_emacs.h" 21361b2299dSmrg#include "lcUniConv/big5hkscs.h" 21461b2299dSmrg#include "lcUniConv/gbk.h" 2151ab64890Smrg 2161ab64890Smrgstatic Utf8ConvRec all_charsets[] = { 2171ab64890Smrg /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning 2181ab64890Smrg (for lookup speed), once at the end (as a fallback). */ 2191ab64890Smrg { "ISO10646-1", NULLQUARK, 2201ab64890Smrg utf8_mbtowc, utf8_wctomb 2211ab64890Smrg }, 2221ab64890Smrg 2231ab64890Smrg { "ISO8859-1", NULLQUARK, 2241ab64890Smrg iso8859_1_mbtowc, iso8859_1_wctomb 2251ab64890Smrg }, 2261ab64890Smrg { "ISO8859-2", NULLQUARK, 2271ab64890Smrg iso8859_2_mbtowc, iso8859_2_wctomb 2281ab64890Smrg }, 2291ab64890Smrg { "ISO8859-3", NULLQUARK, 2301ab64890Smrg iso8859_3_mbtowc, iso8859_3_wctomb 2311ab64890Smrg }, 2321ab64890Smrg { "ISO8859-4", NULLQUARK, 2331ab64890Smrg iso8859_4_mbtowc, iso8859_4_wctomb 2341ab64890Smrg }, 2351ab64890Smrg { "ISO8859-5", NULLQUARK, 2361ab64890Smrg iso8859_5_mbtowc, iso8859_5_wctomb 2371ab64890Smrg }, 2381ab64890Smrg { "ISO8859-6", NULLQUARK, 2391ab64890Smrg iso8859_6_mbtowc, iso8859_6_wctomb 2401ab64890Smrg }, 2411ab64890Smrg { "ISO8859-7", NULLQUARK, 2421ab64890Smrg iso8859_7_mbtowc, iso8859_7_wctomb 2431ab64890Smrg }, 2441ab64890Smrg { "ISO8859-8", NULLQUARK, 2451ab64890Smrg iso8859_8_mbtowc, iso8859_8_wctomb 2461ab64890Smrg }, 2471ab64890Smrg { "ISO8859-9", NULLQUARK, 2481ab64890Smrg iso8859_9_mbtowc, iso8859_9_wctomb 2491ab64890Smrg }, 2501ab64890Smrg { "ISO8859-10", NULLQUARK, 2511ab64890Smrg iso8859_10_mbtowc, iso8859_10_wctomb 2521ab64890Smrg }, 2531ab64890Smrg { "ISO8859-11", NULLQUARK, 2541ab64890Smrg iso8859_11_mbtowc, iso8859_11_wctomb 2551ab64890Smrg }, 2561ab64890Smrg { "ISO8859-13", NULLQUARK, 2571ab64890Smrg iso8859_13_mbtowc, iso8859_13_wctomb 2581ab64890Smrg }, 2591ab64890Smrg { "ISO8859-14", NULLQUARK, 2601ab64890Smrg iso8859_14_mbtowc, iso8859_14_wctomb 2611ab64890Smrg }, 2621ab64890Smrg { "ISO8859-15", NULLQUARK, 2631ab64890Smrg iso8859_15_mbtowc, iso8859_15_wctomb 2641ab64890Smrg }, 2651ab64890Smrg { "ISO8859-16", NULLQUARK, 2661ab64890Smrg iso8859_16_mbtowc, iso8859_16_wctomb 2671ab64890Smrg }, 2681ab64890Smrg { "JISX0201.1976-0", NULLQUARK, 2691ab64890Smrg jisx0201_mbtowc, jisx0201_wctomb 2701ab64890Smrg }, 2711ab64890Smrg { "TIS620-0", NULLQUARK, 2721ab64890Smrg tis620_mbtowc, tis620_wctomb 2731ab64890Smrg }, 2741ab64890Smrg { "GB2312.1980-0", NULLQUARK, 2751ab64890Smrg gb2312_mbtowc, gb2312_wctomb 2761ab64890Smrg }, 2771ab64890Smrg { "JISX0208.1983-0", NULLQUARK, 2781ab64890Smrg jisx0208_mbtowc, jisx0208_wctomb 2791ab64890Smrg }, 2801ab64890Smrg { "JISX0208.1990-0", NULLQUARK, 2811ab64890Smrg jisx0208_mbtowc, jisx0208_wctomb 2821ab64890Smrg }, 2831ab64890Smrg { "JISX0212.1990-0", NULLQUARK, 2841ab64890Smrg jisx0212_mbtowc, jisx0212_wctomb 2851ab64890Smrg }, 2861ab64890Smrg { "KSC5601.1987-0", NULLQUARK, 2871ab64890Smrg ksc5601_mbtowc, ksc5601_wctomb 2881ab64890Smrg }, 2891ab64890Smrg { "KOI8-R", NULLQUARK, 2901ab64890Smrg koi8_r_mbtowc, koi8_r_wctomb 2911ab64890Smrg }, 2921ab64890Smrg { "KOI8-U", NULLQUARK, 2931ab64890Smrg koi8_u_mbtowc, koi8_u_wctomb 2941ab64890Smrg }, 2951ab64890Smrg { "KOI8-C", NULLQUARK, 2961ab64890Smrg koi8_c_mbtowc, koi8_c_wctomb 2971ab64890Smrg }, 2981ab64890Smrg { "TATAR-CYR", NULLQUARK, 2991ab64890Smrg tatar_cyr_mbtowc, tatar_cyr_wctomb 3001ab64890Smrg }, 3011ab64890Smrg { "ARMSCII-8", NULLQUARK, 3021ab64890Smrg armscii_8_mbtowc, armscii_8_wctomb 3031ab64890Smrg }, 3041ab64890Smrg { "IBM-CP1133", NULLQUARK, 3051ab64890Smrg cp1133_mbtowc, cp1133_wctomb 3061ab64890Smrg }, 3071ab64890Smrg { "MULELAO-1", NULLQUARK, 3081ab64890Smrg mulelao_mbtowc, mulelao_wctomb 3091ab64890Smrg }, 3101ab64890Smrg { "VISCII1.1-1", NULLQUARK, 3111ab64890Smrg viscii_mbtowc, viscii_wctomb 3121ab64890Smrg }, 3131ab64890Smrg { "TCVN-5712", NULLQUARK, 3141ab64890Smrg tcvn_mbtowc, tcvn_wctomb 3151ab64890Smrg }, 3161ab64890Smrg { "GEORGIAN-ACADEMY", NULLQUARK, 3171ab64890Smrg georgian_academy_mbtowc, georgian_academy_wctomb 3181ab64890Smrg }, 3191ab64890Smrg { "GEORGIAN-PS", NULLQUARK, 3201ab64890Smrg georgian_ps_mbtowc, georgian_ps_wctomb 3211ab64890Smrg }, 3221ab64890Smrg { "ISO8859-9E", NULLQUARK, 3231ab64890Smrg iso8859_9e_mbtowc, iso8859_9e_wctomb 3241ab64890Smrg }, 3251ab64890Smrg { "MICROSOFT-CP1251", NULLQUARK, 3261ab64890Smrg cp1251_mbtowc, cp1251_wctomb 3271ab64890Smrg }, 3281ab64890Smrg { "MICROSOFT-CP1255", NULLQUARK, 3291ab64890Smrg cp1255_mbtowc, cp1255_wctomb 3301ab64890Smrg }, 3311ab64890Smrg { "MICROSOFT-CP1256", NULLQUARK, 3321ab64890Smrg cp1256_mbtowc, cp1256_wctomb 3331ab64890Smrg }, 3341ab64890Smrg { "BIG5-0", NULLQUARK, 33561b2299dSmrg big5_mbtowc, big5_wctomb 33661b2299dSmrg }, 3371ab64890Smrg { "BIG5-E0", NULLQUARK, 3381ab64890Smrg big5_0_mbtowc, big5_0_wctomb 3391ab64890Smrg }, 3401ab64890Smrg { "BIG5-E1", NULLQUARK, 3411ab64890Smrg big5_1_mbtowc, big5_1_wctomb 3421ab64890Smrg }, 34361b2299dSmrg { "GBK-0", NULLQUARK, 34461b2299dSmrg gbk_mbtowc, gbk_wctomb 34561b2299dSmrg }, 34661b2299dSmrg { "BIG5HKSCS-0", NULLQUARK, 34761b2299dSmrg big5hkscs_mbtowc, big5hkscs_wctomb 34861b2299dSmrg }, 3491ab64890Smrg 3501ab64890Smrg /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning 3511ab64890Smrg (for lookup speed), once at the end (as a fallback). */ 3521ab64890Smrg { "ISO10646-1", NULLQUARK, 3531ab64890Smrg utf8_mbtowc, utf8_wctomb 3541ab64890Smrg }, 3551ab64890Smrg 3561ab64890Smrg /* Encoding ISO10646-1 for fonts means UCS2-like encoding 3571ab64890Smrg so for conversion to FontCharSet we need this record */ 3581ab64890Smrg { "ISO10646-1", NULLQUARK, 3591ab64890Smrg ucs2be_mbtowc, ucs2be_wctomb 3601ab64890Smrg } 3611ab64890Smrg}; 3621ab64890Smrg 3631ab64890Smrg#define charsets_table_size (sizeof(all_charsets)/sizeof(all_charsets[0])) 3641ab64890Smrg#define all_charsets_count (charsets_table_size - 1) 3651ab64890Smrg#define ucs2_conv_index (charsets_table_size - 1) 3661ab64890Smrg 3671ab64890Smrgstatic void 3681ab64890Smrginit_all_charsets (void) 3691ab64890Smrg{ 3701ab64890Smrg Utf8Conv convptr; 3711ab64890Smrg int i; 3721ab64890Smrg 3731ab64890Smrg for (convptr = all_charsets, i = charsets_table_size; i > 0; convptr++, i--) 3741ab64890Smrg convptr->xrm_name = XrmStringToQuark(convptr->name); 3751ab64890Smrg} 3761ab64890Smrg 3771ab64890Smrg#define lazy_init_all_charsets() \ 3781ab64890Smrg do { \ 3791ab64890Smrg if (all_charsets[0].xrm_name == NULLQUARK) \ 3801ab64890Smrg init_all_charsets(); \ 3811ab64890Smrg } while (0) 3821ab64890Smrg 3831ab64890Smrg/* from XlcNCharSet to XlcNUtf8String */ 3841ab64890Smrg 3851ab64890Smrgstatic int 3861ab64890Smrgcstoutf8( 3871ab64890Smrg XlcConv conv, 3881ab64890Smrg XPointer *from, 3891ab64890Smrg int *from_left, 3901ab64890Smrg XPointer *to, 3911ab64890Smrg int *to_left, 3921ab64890Smrg XPointer *args, 3931ab64890Smrg int num_args) 3941ab64890Smrg{ 3951ab64890Smrg XlcCharSet charset; 3961ab64890Smrg const char *name; 3971ab64890Smrg Utf8Conv convptr; 3981ab64890Smrg int i; 3991ab64890Smrg unsigned char const *src; 4001ab64890Smrg unsigned char const *srcend; 4011ab64890Smrg unsigned char *dst; 4021ab64890Smrg unsigned char *dstend; 4031ab64890Smrg int unconv_num; 4041ab64890Smrg 4051ab64890Smrg if (from == NULL || *from == NULL) 4061ab64890Smrg return 0; 4071ab64890Smrg 4081ab64890Smrg if (num_args < 1) 4091ab64890Smrg return -1; 4101ab64890Smrg 4111ab64890Smrg charset = (XlcCharSet) args[0]; 4121ab64890Smrg name = charset->encoding_name; 4131ab64890Smrg /* not charset->name because the latter has a ":GL"/":GR" suffix */ 4141ab64890Smrg 4151ab64890Smrg for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 4161ab64890Smrg if (!strcmp(convptr->name, name)) 4171ab64890Smrg break; 4181ab64890Smrg if (i == 0) 4191ab64890Smrg return -1; 4201ab64890Smrg 4211ab64890Smrg src = (unsigned char const *) *from; 4221ab64890Smrg srcend = src + *from_left; 4231ab64890Smrg dst = (unsigned char *) *to; 4241ab64890Smrg dstend = dst + *to_left; 4251ab64890Smrg unconv_num = 0; 4261ab64890Smrg 4271ab64890Smrg while (src < srcend) { 4281ab64890Smrg ucs4_t wc; 4291ab64890Smrg int consumed; 4301ab64890Smrg int count; 4311ab64890Smrg 4321ab64890Smrg consumed = convptr->cstowc(conv, &wc, src, srcend-src); 4331ab64890Smrg if (consumed == RET_ILSEQ) 4341ab64890Smrg return -1; 4351ab64890Smrg if (consumed == RET_TOOFEW(0)) 4361ab64890Smrg break; 4371ab64890Smrg 4381ab64890Smrg count = utf8_wctomb(NULL, dst, wc, dstend-dst); 4391ab64890Smrg if (count == RET_TOOSMALL) 4401ab64890Smrg break; 4411ab64890Smrg if (count == RET_ILSEQ) { 4421ab64890Smrg count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst); 4431ab64890Smrg if (count == RET_TOOSMALL) 4441ab64890Smrg break; 4451ab64890Smrg unconv_num++; 4461ab64890Smrg } 4471ab64890Smrg src += consumed; 4481ab64890Smrg dst += count; 4491ab64890Smrg } 4501ab64890Smrg 4511ab64890Smrg *from = (XPointer) src; 4521ab64890Smrg *from_left = srcend - src; 4531ab64890Smrg *to = (XPointer) dst; 4541ab64890Smrg *to_left = dstend - dst; 4551ab64890Smrg 4561ab64890Smrg return unconv_num; 4571ab64890Smrg} 4581ab64890Smrg 4591ab64890Smrgstatic XlcConvMethodsRec methods_cstoutf8 = { 4601ab64890Smrg close_converter, 4611ab64890Smrg cstoutf8, 4621ab64890Smrg NULL 4631ab64890Smrg}; 4641ab64890Smrg 4651ab64890Smrgstatic XlcConv 4661ab64890Smrgopen_cstoutf8( 4671ab64890Smrg XLCd from_lcd, 4681ab64890Smrg const char *from_type, 4691ab64890Smrg XLCd to_lcd, 4701ab64890Smrg const char *to_type) 4711ab64890Smrg{ 4721ab64890Smrg lazy_init_all_charsets(); 4731ab64890Smrg return create_conv(from_lcd, &methods_cstoutf8); 4741ab64890Smrg} 4751ab64890Smrg 4761ab64890Smrg/* from XlcNUtf8String to XlcNCharSet */ 4771ab64890Smrg 4781ab64890Smrgstatic XlcConv 4791ab64890Smrgcreate_tocs_conv( 4801ab64890Smrg XLCd lcd, 4811ab64890Smrg XlcConvMethods methods) 4821ab64890Smrg{ 4831ab64890Smrg XlcConv conv; 4841ab64890Smrg CodeSet *codeset_list; 4851ab64890Smrg int codeset_num; 4861ab64890Smrg int charset_num; 4871ab64890Smrg int i, j, k; 4881ab64890Smrg Utf8Conv *preferred; 4891ab64890Smrg 4901ab64890Smrg lazy_init_all_charsets(); 4911ab64890Smrg 4921ab64890Smrg codeset_list = XLC_GENERIC(lcd, codeset_list); 4931ab64890Smrg codeset_num = XLC_GENERIC(lcd, codeset_num); 4941ab64890Smrg 4951ab64890Smrg charset_num = 0; 4961ab64890Smrg for (i = 0; i < codeset_num; i++) 4971ab64890Smrg charset_num += codeset_list[i]->num_charsets; 4981ab64890Smrg if (charset_num > all_charsets_count-1) 4991ab64890Smrg charset_num = all_charsets_count-1; 5001ab64890Smrg 501818534a1Smrg conv = Xmalloc(sizeof(XlcConvRec) 5021ab64890Smrg + (charset_num + 1) * sizeof(Utf8Conv)); 5031ab64890Smrg if (conv == (XlcConv) NULL) 5041ab64890Smrg return (XlcConv) NULL; 5051ab64890Smrg preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec)); 5061ab64890Smrg 5071ab64890Smrg /* Loop through all codesets mentioned in the locale. */ 5081ab64890Smrg charset_num = 0; 5091ab64890Smrg for (i = 0; i < codeset_num; i++) { 5101ab64890Smrg XlcCharSet *charsets = codeset_list[i]->charset_list; 5111ab64890Smrg int num_charsets = codeset_list[i]->num_charsets; 5121ab64890Smrg for (j = 0; j < num_charsets; j++) { 5131ab64890Smrg const char *name = charsets[j]->encoding_name; 5141ab64890Smrg /* If it wasn't already encountered... */ 5151ab64890Smrg for (k = charset_num-1; k >= 0; k--) 5161ab64890Smrg if (!strcmp(preferred[k]->name, name)) 5171ab64890Smrg break; 5181ab64890Smrg if (k < 0) { 5191ab64890Smrg /* Look it up in all_charsets[]. */ 5201ab64890Smrg for (k = 0; k < all_charsets_count-1; k++) 5211ab64890Smrg if (!strcmp(all_charsets[k].name, name)) { 5221ab64890Smrg /* Add it to the preferred set. */ 5231ab64890Smrg preferred[charset_num++] = &all_charsets[k]; 5241ab64890Smrg break; 5251ab64890Smrg } 5261ab64890Smrg } 5271ab64890Smrg } 5281ab64890Smrg } 5291ab64890Smrg preferred[charset_num] = (Utf8Conv) NULL; 5301ab64890Smrg 5311ab64890Smrg conv->methods = methods; 5321ab64890Smrg conv->state = (XPointer) preferred; 5331ab64890Smrg 5341ab64890Smrg return conv; 5351ab64890Smrg} 5361ab64890Smrg 5371ab64890Smrgstatic void 5381ab64890Smrgclose_tocs_converter( 5391ab64890Smrg XlcConv conv) 5401ab64890Smrg{ 5411ab64890Smrg /* conv->state is allocated together with conv, free both at once. */ 542818534a1Smrg Xfree(conv); 5431ab64890Smrg} 5441ab64890Smrg 5451ab64890Smrg/* 5461ab64890Smrg * Converts a Unicode character to an appropriate character set. The NULL 5471ab64890Smrg * terminated array of preferred character sets is passed as first argument. 5481ab64890Smrg * If successful, *charsetp is set to the character set that was used, and 5491ab64890Smrg * *sidep is set to the character set side (XlcGL or XlcGR). 5501ab64890Smrg */ 5511ab64890Smrgstatic int 5521ab64890Smrgcharset_wctocs( 5531ab64890Smrg Utf8Conv *preferred, 5541ab64890Smrg Utf8Conv *charsetp, 5551ab64890Smrg XlcSide *sidep, 5561ab64890Smrg XlcConv conv, 5571ab64890Smrg unsigned char *r, 5581ab64890Smrg ucs4_t wc, 5591ab64890Smrg int n) 5601ab64890Smrg{ 5611ab64890Smrg int count; 5621ab64890Smrg Utf8Conv convptr; 5631ab64890Smrg int i; 5641ab64890Smrg 5651ab64890Smrg for (; *preferred != (Utf8Conv) NULL; preferred++) { 5661ab64890Smrg convptr = *preferred; 5671ab64890Smrg count = convptr->wctocs(conv, r, wc, n); 5681ab64890Smrg if (count == RET_TOOSMALL) 5691ab64890Smrg return RET_TOOSMALL; 5701ab64890Smrg if (count != RET_ILSEQ) { 5711ab64890Smrg *charsetp = convptr; 5721ab64890Smrg *sidep = (*r < 0x80 ? XlcGL : XlcGR); 5731ab64890Smrg return count; 5741ab64890Smrg } 5751ab64890Smrg } 5761ab64890Smrg for (convptr = all_charsets+1, i = all_charsets_count-1; i > 0; convptr++, i--) { 5771ab64890Smrg count = convptr->wctocs(conv, r, wc, n); 5781ab64890Smrg if (count == RET_TOOSMALL) 5791ab64890Smrg return RET_TOOSMALL; 5801ab64890Smrg if (count != RET_ILSEQ) { 5811ab64890Smrg *charsetp = convptr; 5821ab64890Smrg *sidep = (*r < 0x80 ? XlcGL : XlcGR); 5831ab64890Smrg return count; 5841ab64890Smrg } 5851ab64890Smrg } 5861ab64890Smrg return RET_ILSEQ; 5871ab64890Smrg} 5881ab64890Smrg 5891ab64890Smrgstatic int 5901ab64890Smrgutf8tocs( 5911ab64890Smrg XlcConv conv, 5921ab64890Smrg XPointer *from, 5931ab64890Smrg int *from_left, 5941ab64890Smrg XPointer *to, 5951ab64890Smrg int *to_left, 5961ab64890Smrg XPointer *args, 5971ab64890Smrg int num_args) 5981ab64890Smrg{ 5991ab64890Smrg Utf8Conv *preferred_charsets; 6001ab64890Smrg XlcCharSet last_charset = NULL; 6011ab64890Smrg unsigned char const *src; 6021ab64890Smrg unsigned char const *srcend; 6031ab64890Smrg unsigned char *dst; 6041ab64890Smrg unsigned char *dstend; 6051ab64890Smrg int unconv_num; 6061ab64890Smrg 6071ab64890Smrg if (from == NULL || *from == NULL) 6081ab64890Smrg return 0; 6091ab64890Smrg 6101ab64890Smrg preferred_charsets = (Utf8Conv *) conv->state; 6111ab64890Smrg src = (unsigned char const *) *from; 6121ab64890Smrg srcend = src + *from_left; 6131ab64890Smrg dst = (unsigned char *) *to; 6141ab64890Smrg dstend = dst + *to_left; 6151ab64890Smrg unconv_num = 0; 6161ab64890Smrg 6171ab64890Smrg while (src < srcend && dst < dstend) { 6181ab64890Smrg Utf8Conv chosen_charset = NULL; 6191ab64890Smrg XlcSide chosen_side = XlcNONE; 6201ab64890Smrg ucs4_t wc; 6211ab64890Smrg int consumed; 6221ab64890Smrg int count; 6231ab64890Smrg 6241ab64890Smrg consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 6251ab64890Smrg if (consumed == RET_TOOFEW(0)) 6261ab64890Smrg break; 6271ab64890Smrg if (consumed == RET_ILSEQ) { 6281ab64890Smrg src++; 6291ab64890Smrg unconv_num++; 6301ab64890Smrg continue; 6311ab64890Smrg } 6321ab64890Smrg 6331ab64890Smrg count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 6341ab64890Smrg if (count == RET_TOOSMALL) 6351ab64890Smrg break; 6361ab64890Smrg if (count == RET_ILSEQ) { 6371ab64890Smrg src += consumed; 6381ab64890Smrg unconv_num++; 6391ab64890Smrg continue; 6401ab64890Smrg } 6411ab64890Smrg 6421ab64890Smrg if (last_charset == NULL) { 6431ab64890Smrg last_charset = 6441ab64890Smrg _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 6451ab64890Smrg if (last_charset == NULL) { 6461ab64890Smrg src += consumed; 6471ab64890Smrg unconv_num++; 6481ab64890Smrg continue; 6491ab64890Smrg } 6501ab64890Smrg } else { 6511ab64890Smrg if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 6521ab64890Smrg && (last_charset->side == XlcGLGR 6531ab64890Smrg || last_charset->side == chosen_side))) 6541ab64890Smrg break; 6551ab64890Smrg } 6561ab64890Smrg src += consumed; 6571ab64890Smrg dst += count; 6581ab64890Smrg } 6591ab64890Smrg 6601ab64890Smrg if (last_charset == NULL) 6611ab64890Smrg return -1; 6621ab64890Smrg 6631ab64890Smrg *from = (XPointer) src; 6641ab64890Smrg *from_left = srcend - src; 6651ab64890Smrg *to = (XPointer) dst; 6661ab64890Smrg *to_left = dstend - dst; 6671ab64890Smrg 6681ab64890Smrg if (num_args >= 1) 6691ab64890Smrg *((XlcCharSet *)args[0]) = last_charset; 6701ab64890Smrg 6711ab64890Smrg return unconv_num; 6721ab64890Smrg} 6731ab64890Smrg 6741ab64890Smrgstatic XlcConvMethodsRec methods_utf8tocs = { 6751ab64890Smrg close_tocs_converter, 6761ab64890Smrg utf8tocs, 6771ab64890Smrg NULL 6781ab64890Smrg}; 6791ab64890Smrg 6801ab64890Smrgstatic XlcConv 6811ab64890Smrgopen_utf8tocs( 6821ab64890Smrg XLCd from_lcd, 6831ab64890Smrg const char *from_type, 6841ab64890Smrg XLCd to_lcd, 6851ab64890Smrg const char *to_type) 6861ab64890Smrg{ 6871ab64890Smrg return create_tocs_conv(from_lcd, &methods_utf8tocs); 6881ab64890Smrg} 6891ab64890Smrg 6901ab64890Smrg/* from XlcNUtf8String to XlcNChar */ 6911ab64890Smrg 6921ab64890Smrgstatic int 6931ab64890Smrgutf8tocs1( 6941ab64890Smrg XlcConv conv, 6951ab64890Smrg XPointer *from, 6961ab64890Smrg int *from_left, 6971ab64890Smrg XPointer *to, 6981ab64890Smrg int *to_left, 6991ab64890Smrg XPointer *args, 7001ab64890Smrg int num_args) 7011ab64890Smrg{ 7021ab64890Smrg Utf8Conv *preferred_charsets; 7031ab64890Smrg XlcCharSet last_charset = NULL; 7041ab64890Smrg unsigned char const *src; 7051ab64890Smrg unsigned char const *srcend; 7061ab64890Smrg unsigned char *dst; 7071ab64890Smrg unsigned char *dstend; 7081ab64890Smrg int unconv_num; 7091ab64890Smrg 7101ab64890Smrg if (from == NULL || *from == NULL) 7111ab64890Smrg return 0; 7121ab64890Smrg 7131ab64890Smrg preferred_charsets = (Utf8Conv *) conv->state; 7141ab64890Smrg src = (unsigned char const *) *from; 7151ab64890Smrg srcend = src + *from_left; 7161ab64890Smrg dst = (unsigned char *) *to; 7171ab64890Smrg dstend = dst + *to_left; 7181ab64890Smrg unconv_num = 0; 7191ab64890Smrg 7201ab64890Smrg while (src < srcend && dst < dstend) { 7211ab64890Smrg Utf8Conv chosen_charset = NULL; 7221ab64890Smrg XlcSide chosen_side = XlcNONE; 7231ab64890Smrg ucs4_t wc; 7241ab64890Smrg int consumed; 7251ab64890Smrg int count; 7261ab64890Smrg 7271ab64890Smrg consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 7281ab64890Smrg if (consumed == RET_TOOFEW(0)) 7291ab64890Smrg break; 7301ab64890Smrg if (consumed == RET_ILSEQ) { 7311ab64890Smrg src++; 7321ab64890Smrg unconv_num++; 7331ab64890Smrg continue; 7341ab64890Smrg } 7351ab64890Smrg 7361ab64890Smrg count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 7371ab64890Smrg if (count == RET_TOOSMALL) 7381ab64890Smrg break; 7391ab64890Smrg if (count == RET_ILSEQ) { 7401ab64890Smrg src += consumed; 7411ab64890Smrg unconv_num++; 7421ab64890Smrg continue; 7431ab64890Smrg } 7441ab64890Smrg 745e9628295Smrg last_charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 746e9628295Smrg 7471ab64890Smrg if (last_charset == NULL) { 748e9628295Smrg src += consumed; 749e9628295Smrg unconv_num++; 750e9628295Smrg continue; 7511ab64890Smrg } 752e9628295Smrg 7531ab64890Smrg src += consumed; 7541ab64890Smrg dst += count; 7551ab64890Smrg break; 7561ab64890Smrg } 7571ab64890Smrg 7581ab64890Smrg if (last_charset == NULL) 7591ab64890Smrg return -1; 7601ab64890Smrg 7611ab64890Smrg *from = (XPointer) src; 7621ab64890Smrg *from_left = srcend - src; 7631ab64890Smrg *to = (XPointer) dst; 7641ab64890Smrg *to_left = dstend - dst; 7651ab64890Smrg 7661ab64890Smrg if (num_args >= 1) 7671ab64890Smrg *((XlcCharSet *)args[0]) = last_charset; 7681ab64890Smrg 7691ab64890Smrg return unconv_num; 7701ab64890Smrg} 7711ab64890Smrg 7721ab64890Smrgstatic XlcConvMethodsRec methods_utf8tocs1 = { 7731ab64890Smrg close_tocs_converter, 7741ab64890Smrg utf8tocs1, 7751ab64890Smrg NULL 7761ab64890Smrg}; 7771ab64890Smrg 7781ab64890Smrgstatic XlcConv 7791ab64890Smrgopen_utf8tocs1( 7801ab64890Smrg XLCd from_lcd, 7811ab64890Smrg const char *from_type, 7821ab64890Smrg XLCd to_lcd, 7831ab64890Smrg const char *to_type) 7841ab64890Smrg{ 7851ab64890Smrg return create_tocs_conv(from_lcd, &methods_utf8tocs1); 7861ab64890Smrg} 7871ab64890Smrg 7881ab64890Smrg/* from XlcNUtf8String to XlcNString */ 7891ab64890Smrg 7901ab64890Smrgstatic int 7911ab64890Smrgutf8tostr( 7921ab64890Smrg XlcConv conv, 7931ab64890Smrg XPointer *from, 7941ab64890Smrg int *from_left, 7951ab64890Smrg XPointer *to, 7961ab64890Smrg int *to_left, 7971ab64890Smrg XPointer *args, 7981ab64890Smrg int num_args) 7991ab64890Smrg{ 8001ab64890Smrg unsigned char const *src; 8011ab64890Smrg unsigned char const *srcend; 8021ab64890Smrg unsigned char *dst; 8031ab64890Smrg unsigned char *dstend; 8041ab64890Smrg int unconv_num; 8051ab64890Smrg 8061ab64890Smrg if (from == NULL || *from == NULL) 8071ab64890Smrg return 0; 8081ab64890Smrg 8091ab64890Smrg src = (unsigned char const *) *from; 8101ab64890Smrg srcend = src + *from_left; 8111ab64890Smrg dst = (unsigned char *) *to; 8121ab64890Smrg dstend = dst + *to_left; 8131ab64890Smrg unconv_num = 0; 8141ab64890Smrg 8151ab64890Smrg while (src < srcend) { 8161ab64890Smrg unsigned char c; 8171ab64890Smrg ucs4_t wc; 8181ab64890Smrg int consumed; 8191ab64890Smrg 8201ab64890Smrg consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 8211ab64890Smrg if (consumed == RET_TOOFEW(0)) 8221ab64890Smrg break; 8231ab64890Smrg if (dst == dstend) 8241ab64890Smrg break; 8251ab64890Smrg if (consumed == RET_ILSEQ) { 8261ab64890Smrg consumed = 1; 8271ab64890Smrg c = BAD_CHAR; 8281ab64890Smrg unconv_num++; 8291ab64890Smrg } else { 8301ab64890Smrg if ((wc & ~(ucs4_t)0xff) != 0) { 8311ab64890Smrg c = BAD_CHAR; 8321ab64890Smrg unconv_num++; 8331ab64890Smrg } else 8341ab64890Smrg c = (unsigned char) wc; 8351ab64890Smrg } 8361ab64890Smrg *dst++ = c; 8371ab64890Smrg src += consumed; 8381ab64890Smrg } 8391ab64890Smrg 8401ab64890Smrg *from = (XPointer) src; 8411ab64890Smrg *from_left = srcend - src; 8421ab64890Smrg *to = (XPointer) dst; 8431ab64890Smrg *to_left = dstend - dst; 8441ab64890Smrg 8451ab64890Smrg return unconv_num; 8461ab64890Smrg} 8471ab64890Smrg 8481ab64890Smrgstatic XlcConvMethodsRec methods_utf8tostr = { 8491ab64890Smrg close_converter, 8501ab64890Smrg utf8tostr, 8511ab64890Smrg NULL 8521ab64890Smrg}; 8531ab64890Smrg 8541ab64890Smrgstatic XlcConv 8551ab64890Smrgopen_utf8tostr( 8561ab64890Smrg XLCd from_lcd, 8571ab64890Smrg const char *from_type, 8581ab64890Smrg XLCd to_lcd, 8591ab64890Smrg const char *to_type) 8601ab64890Smrg{ 8611ab64890Smrg return create_conv(from_lcd, &methods_utf8tostr); 8621ab64890Smrg} 8631ab64890Smrg 8641ab64890Smrg/* from XlcNString to XlcNUtf8String */ 8651ab64890Smrg 8661ab64890Smrgstatic int 8671ab64890Smrgstrtoutf8( 8681ab64890Smrg XlcConv conv, 8691ab64890Smrg XPointer *from, 8701ab64890Smrg int *from_left, 8711ab64890Smrg XPointer *to, 8721ab64890Smrg int *to_left, 8731ab64890Smrg XPointer *args, 8741ab64890Smrg int num_args) 8751ab64890Smrg{ 8761ab64890Smrg unsigned char const *src; 8771ab64890Smrg unsigned char const *srcend; 8781ab64890Smrg unsigned char *dst; 8791ab64890Smrg unsigned char *dstend; 8801ab64890Smrg 8811ab64890Smrg if (from == NULL || *from == NULL) 8821ab64890Smrg return 0; 8831ab64890Smrg 8841ab64890Smrg src = (unsigned char const *) *from; 8851ab64890Smrg srcend = src + *from_left; 8861ab64890Smrg dst = (unsigned char *) *to; 8871ab64890Smrg dstend = dst + *to_left; 8881ab64890Smrg 8891ab64890Smrg while (src < srcend) { 8901ab64890Smrg int count = utf8_wctomb(NULL, dst, *src, dstend-dst); 8911ab64890Smrg if (count == RET_TOOSMALL) 8921ab64890Smrg break; 8931ab64890Smrg dst += count; 8941ab64890Smrg src++; 8951ab64890Smrg } 8961ab64890Smrg 8971ab64890Smrg *from = (XPointer) src; 8981ab64890Smrg *from_left = srcend - src; 8991ab64890Smrg *to = (XPointer) dst; 9001ab64890Smrg *to_left = dstend - dst; 9011ab64890Smrg 9021ab64890Smrg return 0; 9031ab64890Smrg} 9041ab64890Smrg 9051ab64890Smrgstatic XlcConvMethodsRec methods_strtoutf8 = { 9061ab64890Smrg close_converter, 9071ab64890Smrg strtoutf8, 9081ab64890Smrg NULL 9091ab64890Smrg}; 9101ab64890Smrg 9111ab64890Smrgstatic XlcConv 9121ab64890Smrgopen_strtoutf8( 9131ab64890Smrg XLCd from_lcd, 9141ab64890Smrg const char *from_type, 9151ab64890Smrg XLCd to_lcd, 9161ab64890Smrg const char *to_type) 9171ab64890Smrg{ 9181ab64890Smrg return create_conv(from_lcd, &methods_strtoutf8); 9191ab64890Smrg} 9201ab64890Smrg 9211ab64890Smrg/* Support for the input methods. */ 9221ab64890Smrg 9231ab64890SmrgXPointer 9241ab64890Smrg_Utf8GetConvByName( 9251ab64890Smrg const char *name) 9261ab64890Smrg{ 9271ab64890Smrg XrmQuark xrm_name; 9281ab64890Smrg Utf8Conv convptr; 9291ab64890Smrg int i; 9301ab64890Smrg 9311ab64890Smrg if (name == NULL) 9321ab64890Smrg return (XPointer) NULL; 9331ab64890Smrg 9341ab64890Smrg lazy_init_all_charsets(); 9351ab64890Smrg xrm_name = XrmStringToQuark(name); 9361ab64890Smrg 9371ab64890Smrg for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 9381ab64890Smrg if (convptr->xrm_name == xrm_name) 9391ab64890Smrg return (XPointer) convptr->wctocs; 9401ab64890Smrg return (XPointer) NULL; 9411ab64890Smrg} 9421ab64890Smrg 9431ab64890Smrg/* from XlcNUcsChar to XlcNChar, needed for input methods */ 9441ab64890Smrg 9451ab64890Smrgstatic XlcConv 9461ab64890Smrgcreate_ucstocs_conv( 9471ab64890Smrg XLCd lcd, 9481ab64890Smrg XlcConvMethods methods) 9491ab64890Smrg{ 9501ab64890Smrg 9511ab64890Smrg if (XLC_PUBLIC_PART(lcd)->codeset 9521ab64890Smrg && _XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "UTF-8") == 0) { 9531ab64890Smrg XlcConv conv; 9541ab64890Smrg Utf8Conv *preferred; 9551ab64890Smrg 9561ab64890Smrg lazy_init_all_charsets(); 9571ab64890Smrg 958818534a1Smrg conv = Xmalloc(sizeof(XlcConvRec) + 2 * sizeof(Utf8Conv)); 9591ab64890Smrg if (conv == (XlcConv) NULL) 9601ab64890Smrg return (XlcConv) NULL; 9611ab64890Smrg preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec)); 9621ab64890Smrg 9631ab64890Smrg preferred[0] = &all_charsets[0]; /* ISO10646 */ 9641ab64890Smrg preferred[1] = (Utf8Conv) NULL; 9651ab64890Smrg 9661ab64890Smrg conv->methods = methods; 9671ab64890Smrg conv->state = (XPointer) preferred; 9681ab64890Smrg 9691ab64890Smrg return conv; 9701ab64890Smrg } else { 9711ab64890Smrg return create_tocs_conv(lcd, methods); 9721ab64890Smrg } 9731ab64890Smrg} 9741ab64890Smrg 9751ab64890Smrgstatic int 9761ab64890Smrgcharset_wctocs_exactly( 9771ab64890Smrg Utf8Conv *preferred, 9781ab64890Smrg Utf8Conv *charsetp, 9791ab64890Smrg XlcSide *sidep, 9801ab64890Smrg XlcConv conv, 9811ab64890Smrg unsigned char *r, 9821ab64890Smrg ucs4_t wc, 9831ab64890Smrg int n) 9841ab64890Smrg{ 9851ab64890Smrg int count; 9861ab64890Smrg Utf8Conv convptr; 9871ab64890Smrg 9881ab64890Smrg for (; *preferred != (Utf8Conv) NULL; preferred++) { 9891ab64890Smrg convptr = *preferred; 9901ab64890Smrg count = convptr->wctocs(conv, r, wc, n); 9911ab64890Smrg if (count == RET_TOOSMALL) 9921ab64890Smrg return RET_TOOSMALL; 9931ab64890Smrg if (count != RET_ILSEQ) { 9941ab64890Smrg *charsetp = convptr; 9951ab64890Smrg *sidep = (*r < 0x80 ? XlcGL : XlcGR); 9961ab64890Smrg return count; 9971ab64890Smrg } 9981ab64890Smrg } 9991ab64890Smrg return RET_ILSEQ; 10001ab64890Smrg} 10011ab64890Smrg 10021ab64890Smrgstatic int 10031ab64890Smrgucstocs1( 10041ab64890Smrg XlcConv conv, 10051ab64890Smrg XPointer *from, 10061ab64890Smrg int *from_left, 10071ab64890Smrg XPointer *to, 10081ab64890Smrg int *to_left, 10091ab64890Smrg XPointer *args, 10101ab64890Smrg int num_args) 10111ab64890Smrg{ 10120f8248bfSmrg ucs4_t const *src; 10131ab64890Smrg unsigned char *dst = (unsigned char *) *to; 10141ab64890Smrg int unconv_num = 0; 10151ab64890Smrg Utf8Conv *preferred_charsets = (Utf8Conv *) conv->state; 10161ab64890Smrg Utf8Conv chosen_charset = NULL; 10171ab64890Smrg XlcSide chosen_side = XlcNONE; 10181ab64890Smrg XlcCharSet charset = NULL; 10191ab64890Smrg int count; 10201ab64890Smrg 10211ab64890Smrg if (from == NULL || *from == NULL) 10221ab64890Smrg return 0; 10231ab64890Smrg 10240f8248bfSmrg src = (ucs4_t const *) *from; 10250f8248bfSmrg 10261ab64890Smrg count = charset_wctocs_exactly(preferred_charsets, &chosen_charset, 10271ab64890Smrg &chosen_side, conv, dst, *src, *to_left); 10281ab64890Smrg if (count < 1) { 10291ab64890Smrg unconv_num++; 10301ab64890Smrg count = 0; 10311ab64890Smrg } else { 10321ab64890Smrg charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 10331ab64890Smrg } 10341ab64890Smrg if (charset == NULL) 10351ab64890Smrg return -1; 10361ab64890Smrg 10371ab64890Smrg *from = (XPointer) ++src; 10381ab64890Smrg (*from_left)--; 10391ab64890Smrg *to = (XPointer) dst; 10401ab64890Smrg *to_left -= count; 10411ab64890Smrg 10421ab64890Smrg if (num_args >= 1) 10431ab64890Smrg *((XlcCharSet *)args[0]) = charset; 10441ab64890Smrg 10451ab64890Smrg return unconv_num; 10461ab64890Smrg} 10471ab64890Smrg 10481ab64890Smrgstatic XlcConvMethodsRec methods_ucstocs1 = { 10491ab64890Smrg close_tocs_converter, 10501ab64890Smrg ucstocs1, 10511ab64890Smrg NULL 10521ab64890Smrg}; 10531ab64890Smrg 10541ab64890Smrgstatic XlcConv 10551ab64890Smrgopen_ucstocs1( 10561ab64890Smrg XLCd from_lcd, 10571ab64890Smrg const char *from_type, 10581ab64890Smrg XLCd to_lcd, 10591ab64890Smrg const char *to_type) 10601ab64890Smrg{ 10611ab64890Smrg return create_ucstocs_conv(from_lcd, &methods_ucstocs1); 10621ab64890Smrg} 10631ab64890Smrg 10641ab64890Smrg/* from XlcNUcsChar to XlcNUtf8String, needed for input methods */ 10651ab64890Smrg 10661ab64890Smrgstatic int 10671ab64890Smrgucstoutf8( 10681ab64890Smrg XlcConv conv, 10691ab64890Smrg XPointer *from, 10701ab64890Smrg int *from_left, 10711ab64890Smrg XPointer *to, 10721ab64890Smrg int *to_left, 10731ab64890Smrg XPointer *args, 10741ab64890Smrg int num_args) 10751ab64890Smrg{ 10761ab64890Smrg const ucs4_t *src; 10771ab64890Smrg const ucs4_t *srcend; 10781ab64890Smrg unsigned char *dst; 10791ab64890Smrg unsigned char *dstend; 10801ab64890Smrg int unconv_num; 10811ab64890Smrg 10821ab64890Smrg if (from == NULL || *from == NULL) 10831ab64890Smrg return 0; 10841ab64890Smrg 10851ab64890Smrg src = (const ucs4_t *) *from; 10861ab64890Smrg srcend = src + *from_left; 10871ab64890Smrg dst = (unsigned char *) *to; 10881ab64890Smrg dstend = dst + *to_left; 10891ab64890Smrg unconv_num = 0; 10901ab64890Smrg 10911ab64890Smrg while (src < srcend) { 10921ab64890Smrg int count = utf8_wctomb(NULL, dst, *src, dstend-dst); 10931ab64890Smrg if (count == RET_TOOSMALL) 10941ab64890Smrg break; 10951ab64890Smrg if (count == RET_ILSEQ) 10961ab64890Smrg unconv_num++; 10971ab64890Smrg src++; 10981ab64890Smrg dst += count; 10991ab64890Smrg } 11001ab64890Smrg 11011ab64890Smrg *from = (XPointer) src; 11021ab64890Smrg *from_left = srcend - src; 11031ab64890Smrg *to = (XPointer) dst; 11041ab64890Smrg *to_left = dstend - dst; 11051ab64890Smrg 11061ab64890Smrg return unconv_num; 11071ab64890Smrg} 11081ab64890Smrg 11091ab64890Smrgstatic XlcConvMethodsRec methods_ucstoutf8 = { 11101ab64890Smrg close_converter, 11111ab64890Smrg ucstoutf8, 11121ab64890Smrg NULL 11131ab64890Smrg}; 11141ab64890Smrg 11151ab64890Smrgstatic XlcConv 11161ab64890Smrgopen_ucstoutf8( 11171ab64890Smrg XLCd from_lcd, 11181ab64890Smrg const char *from_type, 11191ab64890Smrg XLCd to_lcd, 11201ab64890Smrg const char *to_type) 11211ab64890Smrg{ 11221ab64890Smrg return create_conv(from_lcd, &methods_ucstoutf8); 11231ab64890Smrg} 11241ab64890Smrg 11251ab64890Smrg/* Registers UTF-8 converters for a non-UTF-8 locale. */ 11261ab64890Smrgvoid 11271ab64890Smrg_XlcAddUtf8Converters( 11281ab64890Smrg XLCd lcd) 11291ab64890Smrg{ 11301ab64890Smrg _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNUtf8String, open_cstoutf8); 11311ab64890Smrg _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNCharSet, open_utf8tocs); 11321ab64890Smrg _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNChar, open_utf8tocs1); 11331ab64890Smrg _XlcSetConverter(lcd, XlcNString, lcd, XlcNUtf8String, open_strtoutf8); 11341ab64890Smrg _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNString, open_utf8tostr); 11351ab64890Smrg _XlcSetConverter(lcd, XlcNUcsChar, lcd, XlcNChar, open_ucstocs1); 11361ab64890Smrg _XlcSetConverter(lcd, XlcNUcsChar, lcd, XlcNUtf8String, open_ucstoutf8); 11371ab64890Smrg} 11381ab64890Smrg 11391ab64890Smrg/***************************************************************************/ 11401ab64890Smrg/* Part II: UTF-8 locale loader conversion files 11411ab64890Smrg * 11421ab64890Smrg * Here we can assume that "multi-byte" is UTF-8 and that `wchar_t' is Unicode. 11431ab64890Smrg */ 11441ab64890Smrg 11451ab64890Smrg/* from XlcNMultiByte to XlcNWideChar */ 11461ab64890Smrg 11471ab64890Smrgstatic int 11481ab64890Smrgutf8towcs( 11491ab64890Smrg XlcConv conv, 11501ab64890Smrg XPointer *from, 11511ab64890Smrg int *from_left, 11521ab64890Smrg XPointer *to, 11531ab64890Smrg int *to_left, 11541ab64890Smrg XPointer *args, 11551ab64890Smrg int num_args) 11561ab64890Smrg{ 11571ab64890Smrg unsigned char const *src; 11581ab64890Smrg unsigned char const *srcend; 11591ab64890Smrg wchar_t *dst; 11601ab64890Smrg wchar_t *dstend; 11611ab64890Smrg int unconv_num; 11621ab64890Smrg 11631ab64890Smrg if (from == NULL || *from == NULL) 11641ab64890Smrg return 0; 11651ab64890Smrg 11661ab64890Smrg src = (unsigned char const *) *from; 11671ab64890Smrg srcend = src + *from_left; 11681ab64890Smrg dst = (wchar_t *) *to; 11691ab64890Smrg dstend = dst + *to_left; 11701ab64890Smrg unconv_num = 0; 11711ab64890Smrg 11721ab64890Smrg while (src < srcend && dst < dstend) { 11731ab64890Smrg ucs4_t wc; 11741ab64890Smrg int consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 11751ab64890Smrg if (consumed == RET_TOOFEW(0)) 11761ab64890Smrg break; 11771ab64890Smrg if (consumed == RET_ILSEQ) { 11781ab64890Smrg src++; 11791ab64890Smrg *dst = BAD_WCHAR; 11801ab64890Smrg unconv_num++; 11811ab64890Smrg } else { 11821ab64890Smrg src += consumed; 11831ab64890Smrg *dst = wc; 11841ab64890Smrg } 11851ab64890Smrg dst++; 11861ab64890Smrg } 11871ab64890Smrg 11881ab64890Smrg *from = (XPointer) src; 11891ab64890Smrg *from_left = srcend - src; 11901ab64890Smrg *to = (XPointer) dst; 11911ab64890Smrg *to_left = dstend - dst; 11921ab64890Smrg 11931ab64890Smrg return unconv_num; 11941ab64890Smrg} 11951ab64890Smrg 11961ab64890Smrgstatic XlcConvMethodsRec methods_utf8towcs = { 11971ab64890Smrg close_converter, 11981ab64890Smrg utf8towcs, 11991ab64890Smrg NULL 12001ab64890Smrg}; 12011ab64890Smrg 12021ab64890Smrgstatic XlcConv 12031ab64890Smrgopen_utf8towcs( 12041ab64890Smrg XLCd from_lcd, 12051ab64890Smrg const char *from_type, 12061ab64890Smrg XLCd to_lcd, 12071ab64890Smrg const char *to_type) 12081ab64890Smrg{ 12091ab64890Smrg return create_conv(from_lcd, &methods_utf8towcs); 12101ab64890Smrg} 12111ab64890Smrg 12121ab64890Smrg/* from XlcNWideChar to XlcNMultiByte */ 12131ab64890Smrg 12141ab64890Smrgstatic int 12151ab64890Smrgwcstoutf8( 12161ab64890Smrg XlcConv conv, 12171ab64890Smrg XPointer *from, 12181ab64890Smrg int *from_left, 12191ab64890Smrg XPointer *to, 12201ab64890Smrg int *to_left, 12211ab64890Smrg XPointer *args, 12221ab64890Smrg int num_args) 12231ab64890Smrg{ 12241ab64890Smrg wchar_t const *src; 12251ab64890Smrg wchar_t const *srcend; 12261ab64890Smrg unsigned char *dst; 12271ab64890Smrg unsigned char *dstend; 12281ab64890Smrg int unconv_num; 12291ab64890Smrg 12301ab64890Smrg if (from == NULL || *from == NULL) 12311ab64890Smrg return 0; 12321ab64890Smrg 12331ab64890Smrg src = (wchar_t const *) *from; 12341ab64890Smrg srcend = src + *from_left; 12351ab64890Smrg dst = (unsigned char *) *to; 12361ab64890Smrg dstend = dst + *to_left; 12371ab64890Smrg unconv_num = 0; 12381ab64890Smrg 12391ab64890Smrg while (src < srcend) { 12401ab64890Smrg int count = utf8_wctomb(NULL, dst, *src, dstend-dst); 12411ab64890Smrg if (count == RET_TOOSMALL) 12421ab64890Smrg break; 12431ab64890Smrg if (count == RET_ILSEQ) { 12441ab64890Smrg count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst); 12451ab64890Smrg if (count == RET_TOOSMALL) 12461ab64890Smrg break; 12471ab64890Smrg unconv_num++; 12481ab64890Smrg } 12491ab64890Smrg dst += count; 12501ab64890Smrg src++; 12511ab64890Smrg } 12521ab64890Smrg 12531ab64890Smrg *from = (XPointer) src; 12541ab64890Smrg *from_left = srcend - src; 12551ab64890Smrg *to = (XPointer) dst; 12561ab64890Smrg *to_left = dstend - dst; 12571ab64890Smrg 12581ab64890Smrg return unconv_num; 12591ab64890Smrg} 12601ab64890Smrg 12611ab64890Smrgstatic XlcConvMethodsRec methods_wcstoutf8 = { 12621ab64890Smrg close_converter, 12631ab64890Smrg wcstoutf8, 12641ab64890Smrg NULL 12651ab64890Smrg}; 12661ab64890Smrg 12671ab64890Smrgstatic XlcConv 12681ab64890Smrgopen_wcstoutf8( 12691ab64890Smrg XLCd from_lcd, 12701ab64890Smrg const char *from_type, 12711ab64890Smrg XLCd to_lcd, 12721ab64890Smrg const char *to_type) 12731ab64890Smrg{ 12741ab64890Smrg return create_conv(from_lcd, &methods_wcstoutf8); 12751ab64890Smrg} 12761ab64890Smrg 12771ab64890Smrg/* from XlcNString to XlcNWideChar */ 12781ab64890Smrg 12791ab64890Smrgstatic int 12801ab64890Smrgour_strtowcs( 12811ab64890Smrg XlcConv conv, 12821ab64890Smrg XPointer *from, 12831ab64890Smrg int *from_left, 12841ab64890Smrg XPointer *to, 12851ab64890Smrg int *to_left, 12861ab64890Smrg XPointer *args, 12871ab64890Smrg int num_args) 12881ab64890Smrg{ 12891ab64890Smrg unsigned char const *src; 12901ab64890Smrg unsigned char const *srcend; 12911ab64890Smrg wchar_t *dst; 12921ab64890Smrg wchar_t *dstend; 12931ab64890Smrg 12941ab64890Smrg if (from == NULL || *from == NULL) 12951ab64890Smrg return 0; 12961ab64890Smrg 12971ab64890Smrg src = (unsigned char const *) *from; 12981ab64890Smrg srcend = src + *from_left; 12991ab64890Smrg dst = (wchar_t *) *to; 13001ab64890Smrg dstend = dst + *to_left; 13011ab64890Smrg 13021ab64890Smrg while (src < srcend && dst < dstend) 13031ab64890Smrg *dst++ = (wchar_t) *src++; 13041ab64890Smrg 13051ab64890Smrg *from = (XPointer) src; 13061ab64890Smrg *from_left = srcend - src; 13071ab64890Smrg *to = (XPointer) dst; 13081ab64890Smrg *to_left = dstend - dst; 13091ab64890Smrg 13101ab64890Smrg return 0; 13111ab64890Smrg} 13121ab64890Smrg 13131ab64890Smrgstatic XlcConvMethodsRec methods_strtowcs = { 13141ab64890Smrg close_converter, 13151ab64890Smrg our_strtowcs, 13161ab64890Smrg NULL 13171ab64890Smrg}; 13181ab64890Smrg 13191ab64890Smrgstatic XlcConv 13201ab64890Smrgopen_strtowcs( 13211ab64890Smrg XLCd from_lcd, 13221ab64890Smrg const char *from_type, 13231ab64890Smrg XLCd to_lcd, 13241ab64890Smrg const char *to_type) 13251ab64890Smrg{ 13261ab64890Smrg return create_conv(from_lcd, &methods_strtowcs); 13271ab64890Smrg} 13281ab64890Smrg 13291ab64890Smrg/* from XlcNWideChar to XlcNString */ 13301ab64890Smrg 13311ab64890Smrgstatic int 13321ab64890Smrgour_wcstostr( 13331ab64890Smrg XlcConv conv, 13341ab64890Smrg XPointer *from, 13351ab64890Smrg int *from_left, 13361ab64890Smrg XPointer *to, 13371ab64890Smrg int *to_left, 13381ab64890Smrg XPointer *args, 13391ab64890Smrg int num_args) 13401ab64890Smrg{ 13411ab64890Smrg wchar_t const *src; 13421ab64890Smrg wchar_t const *srcend; 13431ab64890Smrg unsigned char *dst; 13441ab64890Smrg unsigned char *dstend; 13451ab64890Smrg int unconv_num; 13461ab64890Smrg 13471ab64890Smrg if (from == NULL || *from == NULL) 13481ab64890Smrg return 0; 13491ab64890Smrg 13501ab64890Smrg src = (wchar_t const *) *from; 13511ab64890Smrg srcend = src + *from_left; 13521ab64890Smrg dst = (unsigned char *) *to; 13531ab64890Smrg dstend = dst + *to_left; 13541ab64890Smrg unconv_num = 0; 13551ab64890Smrg 13561ab64890Smrg while (src < srcend && dst < dstend) { 13571ab64890Smrg unsigned int wc = *src++; 13581ab64890Smrg if (wc < 0x80) 13591ab64890Smrg *dst = wc; 13601ab64890Smrg else { 13611ab64890Smrg *dst = BAD_CHAR; 13621ab64890Smrg unconv_num++; 13631ab64890Smrg } 13641ab64890Smrg dst++; 13651ab64890Smrg } 13661ab64890Smrg 13671ab64890Smrg *from = (XPointer) src; 13681ab64890Smrg *from_left = srcend - src; 13691ab64890Smrg *to = (XPointer) dst; 13701ab64890Smrg *to_left = dstend - dst; 13711ab64890Smrg 13721ab64890Smrg return unconv_num; 13731ab64890Smrg} 13741ab64890Smrg 13751ab64890Smrgstatic XlcConvMethodsRec methods_wcstostr = { 13761ab64890Smrg close_converter, 13771ab64890Smrg our_wcstostr, 13781ab64890Smrg NULL 13791ab64890Smrg}; 13801ab64890Smrg 13811ab64890Smrgstatic XlcConv 13821ab64890Smrgopen_wcstostr( 13831ab64890Smrg XLCd from_lcd, 13841ab64890Smrg const char *from_type, 13851ab64890Smrg XLCd to_lcd, 13861ab64890Smrg const char *to_type) 13871ab64890Smrg{ 13881ab64890Smrg return create_conv(from_lcd, &methods_wcstostr); 13891ab64890Smrg} 13901ab64890Smrg 13911ab64890Smrg/* from XlcNCharSet to XlcNWideChar */ 13921ab64890Smrg 13931ab64890Smrgstatic int 13941ab64890Smrgcstowcs( 13951ab64890Smrg XlcConv conv, 13961ab64890Smrg XPointer *from, 13971ab64890Smrg int *from_left, 13981ab64890Smrg XPointer *to, 13991ab64890Smrg int *to_left, 14001ab64890Smrg XPointer *args, 14011ab64890Smrg int num_args) 14021ab64890Smrg{ 14031ab64890Smrg XlcCharSet charset; 14041ab64890Smrg const char *name; 14051ab64890Smrg Utf8Conv convptr; 14061ab64890Smrg int i; 14071ab64890Smrg unsigned char const *src; 14081ab64890Smrg unsigned char const *srcend; 14091ab64890Smrg wchar_t *dst; 14101ab64890Smrg wchar_t *dstend; 14111ab64890Smrg int unconv_num; 14121ab64890Smrg 14131ab64890Smrg if (from == NULL || *from == NULL) 14141ab64890Smrg return 0; 14151ab64890Smrg 14161ab64890Smrg if (num_args < 1) 14171ab64890Smrg return -1; 14181ab64890Smrg 14191ab64890Smrg charset = (XlcCharSet) args[0]; 14201ab64890Smrg name = charset->encoding_name; 14211ab64890Smrg /* not charset->name because the latter has a ":GL"/":GR" suffix */ 14221ab64890Smrg 14231ab64890Smrg for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 14241ab64890Smrg if (!strcmp(convptr->name, name)) 14251ab64890Smrg break; 14261ab64890Smrg if (i == 0) 14271ab64890Smrg return -1; 14281ab64890Smrg 14291ab64890Smrg src = (unsigned char const *) *from; 14301ab64890Smrg srcend = src + *from_left; 14311ab64890Smrg dst = (wchar_t *) *to; 14321ab64890Smrg dstend = dst + *to_left; 14331ab64890Smrg unconv_num = 0; 14341ab64890Smrg 14351ab64890Smrg while (src < srcend && dst < dstend) { 14361ab64890Smrg unsigned int wc; 14371ab64890Smrg int consumed; 14381ab64890Smrg 14391ab64890Smrg consumed = convptr->cstowc(conv, &wc, src, srcend-src); 14401ab64890Smrg if (consumed == RET_ILSEQ) 14411ab64890Smrg return -1; 14421ab64890Smrg if (consumed == RET_TOOFEW(0)) 14431ab64890Smrg break; 14441ab64890Smrg 14451ab64890Smrg *dst++ = wc; 14461ab64890Smrg src += consumed; 14471ab64890Smrg } 14481ab64890Smrg 14491ab64890Smrg *from = (XPointer) src; 14501ab64890Smrg *from_left = srcend - src; 14511ab64890Smrg *to = (XPointer) dst; 14521ab64890Smrg *to_left = dstend - dst; 14531ab64890Smrg 14541ab64890Smrg return unconv_num; 14551ab64890Smrg} 14561ab64890Smrg 14571ab64890Smrgstatic XlcConvMethodsRec methods_cstowcs = { 14581ab64890Smrg close_converter, 14591ab64890Smrg cstowcs, 14601ab64890Smrg NULL 14611ab64890Smrg}; 14621ab64890Smrg 14631ab64890Smrgstatic XlcConv 14641ab64890Smrgopen_cstowcs( 14651ab64890Smrg XLCd from_lcd, 14661ab64890Smrg const char *from_type, 14671ab64890Smrg XLCd to_lcd, 14681ab64890Smrg const char *to_type) 14691ab64890Smrg{ 14701ab64890Smrg lazy_init_all_charsets(); 14711ab64890Smrg return create_conv(from_lcd, &methods_cstowcs); 14721ab64890Smrg} 14731ab64890Smrg 14741ab64890Smrg/* from XlcNWideChar to XlcNCharSet */ 14751ab64890Smrg 14761ab64890Smrgstatic int 14771ab64890Smrgwcstocs( 14781ab64890Smrg XlcConv conv, 14791ab64890Smrg XPointer *from, 14801ab64890Smrg int *from_left, 14811ab64890Smrg XPointer *to, 14821ab64890Smrg int *to_left, 14831ab64890Smrg XPointer *args, 14841ab64890Smrg int num_args) 14851ab64890Smrg{ 14861ab64890Smrg Utf8Conv *preferred_charsets; 14871ab64890Smrg XlcCharSet last_charset = NULL; 14881ab64890Smrg wchar_t const *src; 14891ab64890Smrg wchar_t const *srcend; 14901ab64890Smrg unsigned char *dst; 14911ab64890Smrg unsigned char *dstend; 14921ab64890Smrg int unconv_num; 14931ab64890Smrg 14941ab64890Smrg if (from == NULL || *from == NULL) 14951ab64890Smrg return 0; 14961ab64890Smrg 14971ab64890Smrg preferred_charsets = (Utf8Conv *) conv->state; 14981ab64890Smrg src = (wchar_t const *) *from; 14991ab64890Smrg srcend = src + *from_left; 15001ab64890Smrg dst = (unsigned char *) *to; 15011ab64890Smrg dstend = dst + *to_left; 15021ab64890Smrg unconv_num = 0; 15031ab64890Smrg 15041ab64890Smrg while (src < srcend && dst < dstend) { 15051ab64890Smrg Utf8Conv chosen_charset = NULL; 15061ab64890Smrg XlcSide chosen_side = XlcNONE; 15071ab64890Smrg wchar_t wc = *src; 15081ab64890Smrg int count; 15091ab64890Smrg 15101ab64890Smrg count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 15111ab64890Smrg if (count == RET_TOOSMALL) 15121ab64890Smrg break; 15131ab64890Smrg if (count == RET_ILSEQ) { 15141ab64890Smrg src++; 15151ab64890Smrg unconv_num++; 15161ab64890Smrg continue; 15171ab64890Smrg } 15181ab64890Smrg 15191ab64890Smrg if (last_charset == NULL) { 15201ab64890Smrg last_charset = 15211ab64890Smrg _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 15221ab64890Smrg if (last_charset == NULL) { 15231ab64890Smrg src++; 15241ab64890Smrg unconv_num++; 15251ab64890Smrg continue; 15261ab64890Smrg } 15271ab64890Smrg } else { 15281ab64890Smrg if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 15291ab64890Smrg && (last_charset->side == XlcGLGR 15301ab64890Smrg || last_charset->side == chosen_side))) 15311ab64890Smrg break; 15321ab64890Smrg } 15331ab64890Smrg src++; 15341ab64890Smrg dst += count; 15351ab64890Smrg } 15361ab64890Smrg 15371ab64890Smrg if (last_charset == NULL) 15381ab64890Smrg return -1; 15391ab64890Smrg 15401ab64890Smrg *from = (XPointer) src; 15411ab64890Smrg *from_left = srcend - src; 15421ab64890Smrg *to = (XPointer) dst; 15431ab64890Smrg *to_left = dstend - dst; 15441ab64890Smrg 15451ab64890Smrg if (num_args >= 1) 15461ab64890Smrg *((XlcCharSet *)args[0]) = last_charset; 15471ab64890Smrg 15481ab64890Smrg return unconv_num; 15491ab64890Smrg} 15501ab64890Smrg 15511ab64890Smrgstatic XlcConvMethodsRec methods_wcstocs = { 15521ab64890Smrg close_tocs_converter, 15531ab64890Smrg wcstocs, 15541ab64890Smrg NULL 15551ab64890Smrg}; 15561ab64890Smrg 15571ab64890Smrgstatic XlcConv 15581ab64890Smrgopen_wcstocs( 15591ab64890Smrg XLCd from_lcd, 15601ab64890Smrg const char *from_type, 15611ab64890Smrg XLCd to_lcd, 15621ab64890Smrg const char *to_type) 15631ab64890Smrg{ 15641ab64890Smrg return create_tocs_conv(from_lcd, &methods_wcstocs); 15651ab64890Smrg} 15661ab64890Smrg 15671ab64890Smrg/* from XlcNWideChar to XlcNChar */ 15681ab64890Smrg 15691ab64890Smrgstatic int 15701ab64890Smrgwcstocs1( 15711ab64890Smrg XlcConv conv, 15721ab64890Smrg XPointer *from, 15731ab64890Smrg int *from_left, 15741ab64890Smrg XPointer *to, 15751ab64890Smrg int *to_left, 15761ab64890Smrg XPointer *args, 15771ab64890Smrg int num_args) 15781ab64890Smrg{ 15791ab64890Smrg Utf8Conv *preferred_charsets; 15801ab64890Smrg XlcCharSet last_charset = NULL; 15811ab64890Smrg wchar_t const *src; 15821ab64890Smrg wchar_t const *srcend; 15831ab64890Smrg unsigned char *dst; 15841ab64890Smrg unsigned char *dstend; 15851ab64890Smrg int unconv_num; 15861ab64890Smrg 15871ab64890Smrg if (from == NULL || *from == NULL) 15881ab64890Smrg return 0; 15891ab64890Smrg 15901ab64890Smrg preferred_charsets = (Utf8Conv *) conv->state; 15911ab64890Smrg src = (wchar_t const *) *from; 15921ab64890Smrg srcend = src + *from_left; 15931ab64890Smrg dst = (unsigned char *) *to; 15941ab64890Smrg dstend = dst + *to_left; 15951ab64890Smrg unconv_num = 0; 15961ab64890Smrg 15971ab64890Smrg while (src < srcend && dst < dstend) { 15981ab64890Smrg Utf8Conv chosen_charset = NULL; 15991ab64890Smrg XlcSide chosen_side = XlcNONE; 16001ab64890Smrg wchar_t wc = *src; 16011ab64890Smrg int count; 16021ab64890Smrg 16031ab64890Smrg count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 16041ab64890Smrg if (count == RET_TOOSMALL) 16051ab64890Smrg break; 16061ab64890Smrg if (count == RET_ILSEQ) { 16071ab64890Smrg src++; 16081ab64890Smrg unconv_num++; 16091ab64890Smrg continue; 16101ab64890Smrg } 16111ab64890Smrg 1612e9628295Smrg last_charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1613e9628295Smrg 16141ab64890Smrg if (last_charset == NULL) { 1615e9628295Smrg src++; 1616e9628295Smrg unconv_num++; 1617e9628295Smrg continue; 16181ab64890Smrg } 1619e9628295Smrg 16201ab64890Smrg src++; 16211ab64890Smrg dst += count; 16221ab64890Smrg break; 16231ab64890Smrg } 16241ab64890Smrg 16251ab64890Smrg if (last_charset == NULL) 16261ab64890Smrg return -1; 16271ab64890Smrg 16281ab64890Smrg *from = (XPointer) src; 16291ab64890Smrg *from_left = srcend - src; 16301ab64890Smrg *to = (XPointer) dst; 16311ab64890Smrg *to_left = dstend - dst; 16321ab64890Smrg 16331ab64890Smrg if (num_args >= 1) 16341ab64890Smrg *((XlcCharSet *)args[0]) = last_charset; 16351ab64890Smrg 16361ab64890Smrg return unconv_num; 16371ab64890Smrg} 16381ab64890Smrg 16391ab64890Smrgstatic XlcConvMethodsRec methods_wcstocs1 = { 16401ab64890Smrg close_tocs_converter, 16411ab64890Smrg wcstocs1, 16421ab64890Smrg NULL 16431ab64890Smrg}; 16441ab64890Smrg 16451ab64890Smrgstatic XlcConv 16461ab64890Smrgopen_wcstocs1( 16471ab64890Smrg XLCd from_lcd, 16481ab64890Smrg const char *from_type, 16491ab64890Smrg XLCd to_lcd, 16501ab64890Smrg const char *to_type) 16511ab64890Smrg{ 16521ab64890Smrg return create_tocs_conv(from_lcd, &methods_wcstocs1); 16531ab64890Smrg} 16541ab64890Smrg 16551ab64890Smrg/* trivial, no conversion */ 16561ab64890Smrg 16571ab64890Smrgstatic int 16581ab64890Smrgidentity( 16591ab64890Smrg XlcConv conv, 16601ab64890Smrg XPointer *from, 16611ab64890Smrg int *from_left, 16621ab64890Smrg XPointer *to, 16631ab64890Smrg int *to_left, 16641ab64890Smrg XPointer *args, 16651ab64890Smrg int num_args) 16661ab64890Smrg{ 16671ab64890Smrg unsigned char const *src; 16681ab64890Smrg unsigned char const *srcend; 16691ab64890Smrg unsigned char *dst; 16701ab64890Smrg unsigned char *dstend; 16711ab64890Smrg 16721ab64890Smrg if (from == NULL || *from == NULL) 16731ab64890Smrg return 0; 16741ab64890Smrg 16751ab64890Smrg src = (unsigned char const *) *from; 16761ab64890Smrg srcend = src + *from_left; 16771ab64890Smrg dst = (unsigned char *) *to; 16781ab64890Smrg dstend = dst + *to_left; 16791ab64890Smrg 16801ab64890Smrg while (src < srcend && dst < dstend) 16811ab64890Smrg *dst++ = *src++; 16821ab64890Smrg 16831ab64890Smrg *from = (XPointer) src; 16841ab64890Smrg *from_left = srcend - src; 16851ab64890Smrg *to = (XPointer) dst; 16861ab64890Smrg *to_left = dstend - dst; 16871ab64890Smrg 16881ab64890Smrg return 0; 16891ab64890Smrg} 16901ab64890Smrg 16911ab64890Smrgstatic XlcConvMethodsRec methods_identity = { 16921ab64890Smrg close_converter, 16931ab64890Smrg identity, 16941ab64890Smrg NULL 16951ab64890Smrg}; 16961ab64890Smrg 16971ab64890Smrgstatic XlcConv 16981ab64890Smrgopen_identity( 16991ab64890Smrg XLCd from_lcd, 17001ab64890Smrg const char *from_type, 17011ab64890Smrg XLCd to_lcd, 17021ab64890Smrg const char *to_type) 17031ab64890Smrg{ 17041ab64890Smrg return create_conv(from_lcd, &methods_identity); 17051ab64890Smrg} 17061ab64890Smrg 17071ab64890Smrg/* from MultiByte/WideChar to FontCharSet. */ 17081ab64890Smrg/* They really use converters to CharSet 17091ab64890Smrg * but with different create_conv procedure. */ 17101ab64890Smrg 17111ab64890Smrgstatic XlcConv 17121ab64890Smrgcreate_tofontcs_conv( 17131ab64890Smrg XLCd lcd, 17141ab64890Smrg XlcConvMethods methods) 17151ab64890Smrg{ 17161ab64890Smrg XlcConv conv; 17171ab64890Smrg int i, num, k, count; 1718d4a3aaf4Smrg char **value, buf[32]; 17191ab64890Smrg Utf8Conv *preferred; 17201ab64890Smrg 17211ab64890Smrg lazy_init_all_charsets(); 17221ab64890Smrg 17231ab64890Smrg for (i = 0, num = 0;; i++) { 1724818534a1Smrg snprintf(buf, sizeof(buf), "fs%d.charset.name", i); 17251ab64890Smrg _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 17261ab64890Smrg if (count < 1) { 1727818534a1Smrg snprintf(buf, sizeof(buf), "fs%d.charset", i); 17281ab64890Smrg _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 17291ab64890Smrg if (count < 1) 17301ab64890Smrg break; 17311ab64890Smrg } 17321ab64890Smrg num += count; 17331ab64890Smrg } 17341ab64890Smrg 1735818534a1Smrg conv = Xmalloc(sizeof(XlcConvRec) + (num + 1) * sizeof(Utf8Conv)); 17361ab64890Smrg if (conv == (XlcConv) NULL) 17371ab64890Smrg return (XlcConv) NULL; 17381ab64890Smrg preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec)); 17391ab64890Smrg 17401ab64890Smrg /* Loop through all fontsets mentioned in the locale. */ 17411ab64890Smrg for (i = 0, num = 0;; i++) { 1742818534a1Smrg snprintf(buf, sizeof(buf), "fs%d.charset.name", i); 17431ab64890Smrg _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 17441ab64890Smrg if (count < 1) { 1745818534a1Smrg snprintf(buf, sizeof(buf), "fs%d.charset", i); 17461ab64890Smrg _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 17471ab64890Smrg if (count < 1) 17481ab64890Smrg break; 17491ab64890Smrg } 17501ab64890Smrg while (count-- > 0) { 17511ab64890Smrg XlcCharSet charset = _XlcGetCharSet(*value++); 17521ab64890Smrg const char *name; 17531ab64890Smrg 17541ab64890Smrg if (charset == (XlcCharSet) NULL) 17551ab64890Smrg continue; 17561ab64890Smrg 17571ab64890Smrg name = charset->encoding_name; 17581ab64890Smrg /* If it wasn't already encountered... */ 17591ab64890Smrg for (k = num - 1; k >= 0; k--) 17601ab64890Smrg if (!strcmp(preferred[k]->name, name)) 17611ab64890Smrg break; 17621ab64890Smrg if (k < 0) { 17631ab64890Smrg /* For fonts "ISO10646-1" means ucs2, not utf8.*/ 17641ab64890Smrg if (!strcmp("ISO10646-1", name)) { 17651ab64890Smrg preferred[num++] = &all_charsets[ucs2_conv_index]; 17661ab64890Smrg continue; 17671ab64890Smrg } 17681ab64890Smrg /* Look it up in all_charsets[]. */ 17691ab64890Smrg for (k = 0; k < all_charsets_count-1; k++) 17701ab64890Smrg if (!strcmp(all_charsets[k].name, name)) { 17711ab64890Smrg /* Add it to the preferred set. */ 17721ab64890Smrg preferred[num++] = &all_charsets[k]; 17731ab64890Smrg break; 17741ab64890Smrg } 17751ab64890Smrg } 17761ab64890Smrg } 17771ab64890Smrg } 17781ab64890Smrg preferred[num] = (Utf8Conv) NULL; 17791ab64890Smrg 17801ab64890Smrg conv->methods = methods; 17811ab64890Smrg conv->state = (XPointer) preferred; 17821ab64890Smrg 17831ab64890Smrg return conv; 17841ab64890Smrg} 17851ab64890Smrg 17861ab64890Smrgstatic XlcConv 17871ab64890Smrgopen_wcstofcs( 17881ab64890Smrg XLCd from_lcd, 17891ab64890Smrg const char *from_type, 17901ab64890Smrg XLCd to_lcd, 17911ab64890Smrg const char *to_type) 17921ab64890Smrg{ 17931ab64890Smrg return create_tofontcs_conv(from_lcd, &methods_wcstocs); 17941ab64890Smrg} 17951ab64890Smrg 17961ab64890Smrgstatic XlcConv 17971ab64890Smrgopen_utf8tofcs( 17981ab64890Smrg XLCd from_lcd, 17991ab64890Smrg const char *from_type, 18001ab64890Smrg XLCd to_lcd, 18011ab64890Smrg const char *to_type) 18021ab64890Smrg{ 18031ab64890Smrg return create_tofontcs_conv(from_lcd, &methods_utf8tocs); 18041ab64890Smrg} 18051ab64890Smrg 180661b2299dSmrg/* ========================== iconv Stuff ================================ */ 180761b2299dSmrg 180861b2299dSmrg/* from XlcNCharSet to XlcNMultiByte */ 180961b2299dSmrg 181061b2299dSmrgstatic int 181161b2299dSmrgiconv_cstombs(XlcConv conv, XPointer *from, int *from_left, 181261b2299dSmrg XPointer *to, int *to_left, XPointer *args, int num_args) 181361b2299dSmrg{ 181461b2299dSmrg XlcCharSet charset; 181588de56ccSmrg char const *name; 181661b2299dSmrg Utf8Conv convptr; 181761b2299dSmrg int i; 181861b2299dSmrg unsigned char const *src; 181961b2299dSmrg unsigned char const *srcend; 182061b2299dSmrg unsigned char *dst; 182161b2299dSmrg unsigned char *dstend; 182261b2299dSmrg int unconv_num; 182361b2299dSmrg 182461b2299dSmrg if (from == NULL || *from == NULL) 182561b2299dSmrg return 0; 182661b2299dSmrg 182761b2299dSmrg if (num_args < 1) 182861b2299dSmrg return -1; 182961b2299dSmrg 183061b2299dSmrg charset = (XlcCharSet) args[0]; 183161b2299dSmrg name = charset->encoding_name; 183261b2299dSmrg /* not charset->name because the latter has a ":GL"/":GR" suffix */ 183361b2299dSmrg 183461b2299dSmrg for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 183561b2299dSmrg if (!strcmp(convptr->name, name)) 183661b2299dSmrg break; 183761b2299dSmrg if (i == 0) 183861b2299dSmrg return -1; 183961b2299dSmrg 184061b2299dSmrg src = (unsigned char const *) *from; 184161b2299dSmrg srcend = src + *from_left; 184261b2299dSmrg dst = (unsigned char *) *to; 184361b2299dSmrg dstend = dst + *to_left; 184461b2299dSmrg unconv_num = 0; 184561b2299dSmrg 184661b2299dSmrg while (src < srcend) { 184761b2299dSmrg ucs4_t wc; 184861b2299dSmrg int consumed; 184961b2299dSmrg int count; 185061b2299dSmrg 185161b2299dSmrg consumed = convptr->cstowc(conv, &wc, src, srcend-src); 185261b2299dSmrg if (consumed == RET_ILSEQ) 185361b2299dSmrg return -1; 185461b2299dSmrg if (consumed == RET_TOOFEW(0)) 185561b2299dSmrg break; 185661b2299dSmrg 185761b2299dSmrg /* Use stdc iconv to convert widechar -> multibyte */ 185861b2299dSmrg 185988de56ccSmrg count = wctomb((char *)dst, wc); 186061b2299dSmrg if (count == 0) 186161b2299dSmrg break; 186261b2299dSmrg if (count == -1) { 186388de56ccSmrg count = wctomb((char *)dst, BAD_WCHAR); 186461b2299dSmrg if (count == 0) 186561b2299dSmrg break; 186661b2299dSmrg unconv_num++; 186761b2299dSmrg } 186861b2299dSmrg src += consumed; 186961b2299dSmrg dst += count; 187061b2299dSmrg } 187161b2299dSmrg 187261b2299dSmrg *from = (XPointer) src; 187361b2299dSmrg *from_left = srcend - src; 187461b2299dSmrg *to = (XPointer) dst; 187561b2299dSmrg *to_left = dstend - dst; 187661b2299dSmrg 187761b2299dSmrg return unconv_num; 187861b2299dSmrg 187961b2299dSmrg} 188061b2299dSmrg 188161b2299dSmrgstatic XlcConvMethodsRec iconv_cstombs_methods = { 188261b2299dSmrg close_converter, 188361b2299dSmrg iconv_cstombs, 188461b2299dSmrg NULL 188561b2299dSmrg}; 188661b2299dSmrg 188761b2299dSmrgstatic XlcConv 188888de56ccSmrgopen_iconv_cstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 188961b2299dSmrg{ 189061b2299dSmrg lazy_init_all_charsets(); 189161b2299dSmrg return create_conv(from_lcd, &iconv_cstombs_methods); 189261b2299dSmrg} 189361b2299dSmrg 189461b2299dSmrgstatic int 189561b2299dSmrgiconv_mbstocs(XlcConv conv, XPointer *from, int *from_left, 189661b2299dSmrg XPointer *to, int *to_left, XPointer *args, int num_args) 189761b2299dSmrg{ 189861b2299dSmrg Utf8Conv *preferred_charsets; 189961b2299dSmrg XlcCharSet last_charset = NULL; 190061b2299dSmrg unsigned char const *src; 190161b2299dSmrg unsigned char const *srcend; 190261b2299dSmrg unsigned char *dst; 190361b2299dSmrg unsigned char *dstend; 190461b2299dSmrg int unconv_num; 190561b2299dSmrg 190661b2299dSmrg if (from == NULL || *from == NULL) 190761b2299dSmrg return 0; 190861b2299dSmrg 190961b2299dSmrg preferred_charsets = (Utf8Conv *) conv->state; 191061b2299dSmrg src = (unsigned char const *) *from; 191161b2299dSmrg srcend = src + *from_left; 191261b2299dSmrg dst = (unsigned char *) *to; 191361b2299dSmrg dstend = dst + *to_left; 191461b2299dSmrg unconv_num = 0; 191561b2299dSmrg 191661b2299dSmrg while (src < srcend && dst < dstend) { 191761b2299dSmrg Utf8Conv chosen_charset = NULL; 191861b2299dSmrg XlcSide chosen_side = XlcNONE; 191961b2299dSmrg wchar_t wc; 192061b2299dSmrg int consumed; 192161b2299dSmrg int count; 192261b2299dSmrg 192361b2299dSmrg /* Uses stdc iconv to convert multibyte -> widechar */ 192461b2299dSmrg 19259c019ec5Smaya consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src)); 192661b2299dSmrg if (consumed == 0) 192761b2299dSmrg break; 192861b2299dSmrg if (consumed == -1) { 192961b2299dSmrg src++; 193061b2299dSmrg unconv_num++; 193161b2299dSmrg continue; 193261b2299dSmrg } 193361b2299dSmrg 193461b2299dSmrg count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 193561b2299dSmrg 193661b2299dSmrg if (count == RET_TOOSMALL) 193761b2299dSmrg break; 193861b2299dSmrg if (count == RET_ILSEQ) { 193961b2299dSmrg src += consumed; 194061b2299dSmrg unconv_num++; 194161b2299dSmrg continue; 194261b2299dSmrg } 194361b2299dSmrg 194461b2299dSmrg if (last_charset == NULL) { 194561b2299dSmrg last_charset = 194661b2299dSmrg _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 194761b2299dSmrg if (last_charset == NULL) { 194861b2299dSmrg src += consumed; 194961b2299dSmrg unconv_num++; 195061b2299dSmrg continue; 195161b2299dSmrg } 195261b2299dSmrg } else { 195361b2299dSmrg if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 195461b2299dSmrg && (last_charset->side == XlcGLGR 195561b2299dSmrg || last_charset->side == chosen_side))) 195661b2299dSmrg break; 195761b2299dSmrg } 195861b2299dSmrg src += consumed; 195961b2299dSmrg dst += count; 196061b2299dSmrg } 196161b2299dSmrg 196261b2299dSmrg if (last_charset == NULL) 196361b2299dSmrg return -1; 196461b2299dSmrg 196561b2299dSmrg *from = (XPointer) src; 196661b2299dSmrg *from_left = srcend - src; 196761b2299dSmrg *to = (XPointer) dst; 196861b2299dSmrg *to_left = dstend - dst; 196961b2299dSmrg 197061b2299dSmrg if (num_args >= 1) 197161b2299dSmrg *((XlcCharSet *)args[0]) = last_charset; 197261b2299dSmrg 197361b2299dSmrg return unconv_num; 197461b2299dSmrg} 197561b2299dSmrg 197661b2299dSmrgstatic XlcConvMethodsRec iconv_mbstocs_methods = { 197761b2299dSmrg close_tocs_converter, 197861b2299dSmrg iconv_mbstocs, 197961b2299dSmrg NULL 198061b2299dSmrg}; 198161b2299dSmrg 198261b2299dSmrgstatic XlcConv 198388de56ccSmrgopen_iconv_mbstocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 198461b2299dSmrg{ 198561b2299dSmrg return create_tocs_conv(from_lcd, &iconv_mbstocs_methods); 198661b2299dSmrg} 198761b2299dSmrg 198861b2299dSmrg/* from XlcNMultiByte to XlcNChar */ 198961b2299dSmrg 199061b2299dSmrgstatic int 199161b2299dSmrgiconv_mbtocs(XlcConv conv, XPointer *from, int *from_left, 199261b2299dSmrg XPointer *to, int *to_left, XPointer *args, int num_args) 199361b2299dSmrg{ 199461b2299dSmrg Utf8Conv *preferred_charsets; 199561b2299dSmrg XlcCharSet last_charset = NULL; 199661b2299dSmrg unsigned char const *src; 199761b2299dSmrg unsigned char const *srcend; 199861b2299dSmrg unsigned char *dst; 199961b2299dSmrg unsigned char *dstend; 200061b2299dSmrg int unconv_num; 200161b2299dSmrg 200261b2299dSmrg if (from == NULL || *from == NULL) 200361b2299dSmrg return 0; 200461b2299dSmrg 200561b2299dSmrg preferred_charsets = (Utf8Conv *) conv->state; 200661b2299dSmrg src = (unsigned char const *) *from; 200761b2299dSmrg srcend = src + *from_left; 200861b2299dSmrg dst = (unsigned char *) *to; 200961b2299dSmrg dstend = dst + *to_left; 201061b2299dSmrg unconv_num = 0; 201161b2299dSmrg 201261b2299dSmrg while (src < srcend && dst < dstend) { 201361b2299dSmrg Utf8Conv chosen_charset = NULL; 201461b2299dSmrg XlcSide chosen_side = XlcNONE; 201561b2299dSmrg wchar_t wc; 201661b2299dSmrg int consumed; 201761b2299dSmrg int count; 201861b2299dSmrg 201961b2299dSmrg /* Uses stdc iconv to convert multibyte -> widechar */ 202061b2299dSmrg 20219c019ec5Smaya consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src)); 202261b2299dSmrg if (consumed == 0) 202361b2299dSmrg break; 202461b2299dSmrg if (consumed == -1) { 202561b2299dSmrg src++; 202661b2299dSmrg unconv_num++; 202761b2299dSmrg continue; 202861b2299dSmrg } 202961b2299dSmrg 203061b2299dSmrg count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 203161b2299dSmrg if (count == RET_TOOSMALL) 203261b2299dSmrg break; 203361b2299dSmrg if (count == RET_ILSEQ) { 203461b2299dSmrg src += consumed; 203561b2299dSmrg unconv_num++; 203661b2299dSmrg continue; 203761b2299dSmrg } 203861b2299dSmrg 203961b2299dSmrg if (last_charset == NULL) { 204061b2299dSmrg last_charset = 204161b2299dSmrg _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 204261b2299dSmrg if (last_charset == NULL) { 204361b2299dSmrg src += consumed; 204461b2299dSmrg unconv_num++; 204561b2299dSmrg continue; 204661b2299dSmrg } 204761b2299dSmrg } else { 204861b2299dSmrg if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 204961b2299dSmrg && (last_charset->side == XlcGLGR 205061b2299dSmrg || last_charset->side == chosen_side))) 205161b2299dSmrg break; 205261b2299dSmrg } 205361b2299dSmrg src += consumed; 205461b2299dSmrg dst += count; 205561b2299dSmrg } 205661b2299dSmrg 205761b2299dSmrg if (last_charset == NULL) 205861b2299dSmrg return -1; 205961b2299dSmrg 206061b2299dSmrg *from = (XPointer) src; 206161b2299dSmrg *from_left = srcend - src; 206261b2299dSmrg *to = (XPointer) dst; 206361b2299dSmrg *to_left = dstend - dst; 206461b2299dSmrg 206561b2299dSmrg if (num_args >= 1) 206661b2299dSmrg *((XlcCharSet *)args[0]) = last_charset; 206761b2299dSmrg 206861b2299dSmrg return unconv_num; 206961b2299dSmrg} 207061b2299dSmrg 207161b2299dSmrgstatic XlcConvMethodsRec iconv_mbtocs_methods = { 207261b2299dSmrg close_tocs_converter, 207361b2299dSmrg iconv_mbtocs, 207461b2299dSmrg NULL 207561b2299dSmrg}; 207661b2299dSmrg 207761b2299dSmrgstatic XlcConv 207888de56ccSmrgopen_iconv_mbtocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 207961b2299dSmrg{ 208061b2299dSmrg return create_tocs_conv(from_lcd, &iconv_mbtocs_methods ); 208161b2299dSmrg} 208261b2299dSmrg 208361b2299dSmrg/* from XlcNMultiByte to XlcNString */ 208461b2299dSmrg 208561b2299dSmrgstatic int 208661b2299dSmrgiconv_mbstostr(XlcConv conv, XPointer *from, int *from_left, 208761b2299dSmrg XPointer *to, int *to_left, XPointer *args, int num_args) 208861b2299dSmrg{ 208961b2299dSmrg unsigned char const *src; 209061b2299dSmrg unsigned char const *srcend; 209161b2299dSmrg unsigned char *dst; 209261b2299dSmrg unsigned char *dstend; 209361b2299dSmrg int unconv_num; 209461b2299dSmrg 209561b2299dSmrg if (from == NULL || *from == NULL) 209661b2299dSmrg return 0; 209761b2299dSmrg 209861b2299dSmrg src = (unsigned char const *) *from; 209961b2299dSmrg srcend = src + *from_left; 210061b2299dSmrg dst = (unsigned char *) *to; 210161b2299dSmrg dstend = dst + *to_left; 210261b2299dSmrg unconv_num = 0; 210361b2299dSmrg 210461b2299dSmrg while (src < srcend) { 210561b2299dSmrg unsigned char c; 210661b2299dSmrg wchar_t wc; 210761b2299dSmrg int consumed; 210861b2299dSmrg 210961b2299dSmrg /* Uses stdc iconv to convert multibyte -> widechar */ 211061b2299dSmrg 21119c019ec5Smaya consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src)); 211261b2299dSmrg if (consumed == 0) 211361b2299dSmrg break; 211461b2299dSmrg if (dst == dstend) 211561b2299dSmrg break; 211661b2299dSmrg if (consumed == -1) { 211761b2299dSmrg consumed = 1; 211861b2299dSmrg c = BAD_CHAR; 211961b2299dSmrg unconv_num++; 212061b2299dSmrg } else { 212161b2299dSmrg if ((wc & ~(wchar_t)0xff) != 0) { 212261b2299dSmrg c = BAD_CHAR; 212361b2299dSmrg unconv_num++; 212461b2299dSmrg } else 212561b2299dSmrg c = (unsigned char) wc; 212661b2299dSmrg } 212761b2299dSmrg *dst++ = c; 212861b2299dSmrg src += consumed; 212961b2299dSmrg } 213061b2299dSmrg 213161b2299dSmrg *from = (XPointer) src; 213261b2299dSmrg *from_left = srcend - src; 213361b2299dSmrg *to = (XPointer) dst; 213461b2299dSmrg *to_left = dstend - dst; 213561b2299dSmrg 213661b2299dSmrg return unconv_num; 213761b2299dSmrg} 213861b2299dSmrg 213961b2299dSmrgstatic XlcConvMethodsRec iconv_mbstostr_methods = { 214061b2299dSmrg close_converter, 214161b2299dSmrg iconv_mbstostr, 214261b2299dSmrg NULL 214361b2299dSmrg}; 214461b2299dSmrg 214561b2299dSmrgstatic XlcConv 214688de56ccSmrgopen_iconv_mbstostr(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 214761b2299dSmrg{ 214861b2299dSmrg return create_conv(from_lcd, &iconv_mbstostr_methods); 214961b2299dSmrg} 215061b2299dSmrg 215161b2299dSmrg/* from XlcNString to XlcNMultiByte */ 215261b2299dSmrgstatic int 215361b2299dSmrgiconv_strtombs(XlcConv conv, XPointer *from, int *from_left, 215461b2299dSmrg XPointer *to, int *to_left, XPointer *args, int num_args) 215561b2299dSmrg{ 215661b2299dSmrg unsigned char const *src; 215761b2299dSmrg unsigned char const *srcend; 215861b2299dSmrg unsigned char *dst; 215961b2299dSmrg unsigned char *dstend; 216061b2299dSmrg 216161b2299dSmrg if (from == NULL || *from == NULL) 216261b2299dSmrg return 0; 216361b2299dSmrg 216461b2299dSmrg src = (unsigned char const *) *from; 216561b2299dSmrg srcend = src + *from_left; 216661b2299dSmrg dst = (unsigned char *) *to; 216761b2299dSmrg dstend = dst + *to_left; 216861b2299dSmrg 216961b2299dSmrg while (src < srcend) { 217088de56ccSmrg int count = wctomb((char *)dst, *src); 217161b2299dSmrg if (count < 0) 217261b2299dSmrg break; 217361b2299dSmrg dst += count; 217461b2299dSmrg src++; 217561b2299dSmrg } 217661b2299dSmrg 217761b2299dSmrg *from = (XPointer) src; 217861b2299dSmrg *from_left = srcend - src; 217961b2299dSmrg *to = (XPointer) dst; 218061b2299dSmrg *to_left = dstend - dst; 218161b2299dSmrg 218261b2299dSmrg return 0; 218361b2299dSmrg} 218461b2299dSmrg 218561b2299dSmrgstatic XlcConvMethodsRec iconv_strtombs_methods= { 218661b2299dSmrg close_converter, 218761b2299dSmrg iconv_strtombs, 218861b2299dSmrg NULL 218961b2299dSmrg}; 219061b2299dSmrg 219161b2299dSmrgstatic XlcConv 219288de56ccSmrgopen_iconv_strtombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 219361b2299dSmrg{ 219461b2299dSmrg return create_conv(from_lcd, &iconv_strtombs_methods); 219561b2299dSmrg} 219661b2299dSmrg 219761b2299dSmrg/***************************************************************************/ 219861b2299dSmrg/* Part II: An iconv locale loader. 219961b2299dSmrg * 220061b2299dSmrg *Here we can assume that "multi-byte" is iconv and that `wchar_t' is Unicode. 220161b2299dSmrg */ 220261b2299dSmrg 220361b2299dSmrg/* from XlcNMultiByte to XlcNWideChar */ 220461b2299dSmrgstatic int 220561b2299dSmrgiconv_mbstowcs(XlcConv conv, XPointer *from, int *from_left, 220661b2299dSmrg XPointer *to, int *to_left, XPointer *args, int num_args) 220761b2299dSmrg{ 220861b2299dSmrg char *src = *((char **) from); 220961b2299dSmrg wchar_t *dst = *((wchar_t **) to); 221061b2299dSmrg int src_left = *from_left; 221161b2299dSmrg int dst_left = *to_left; 221261b2299dSmrg int length, unconv_num = 0; 221361b2299dSmrg 221461b2299dSmrg while (src_left > 0 && dst_left > 0) { 22159c019ec5Smaya length = mbtowc(dst, src, (size_t) src_left); 221661b2299dSmrg 221761b2299dSmrg if (length > 0) { 221861b2299dSmrg src += length; 221961b2299dSmrg src_left -= length; 222061b2299dSmrg if (dst) 222161b2299dSmrg dst++; 222261b2299dSmrg dst_left--; 222361b2299dSmrg } else if (length < 0) { 222461b2299dSmrg src++; 222561b2299dSmrg src_left--; 222661b2299dSmrg unconv_num++; 222761b2299dSmrg } else { 222861b2299dSmrg /* null ? */ 222961b2299dSmrg src++; 223061b2299dSmrg src_left--; 223161b2299dSmrg if (dst) 223261b2299dSmrg *dst++ = L'\0'; 223361b2299dSmrg dst_left--; 223461b2299dSmrg } 223561b2299dSmrg } 223661b2299dSmrg 223761b2299dSmrg *from = (XPointer) src; 223861b2299dSmrg if (dst) 223961b2299dSmrg *to = (XPointer) dst; 224061b2299dSmrg *from_left = src_left; 224161b2299dSmrg *to_left = dst_left; 224261b2299dSmrg 224361b2299dSmrg return unconv_num; 224461b2299dSmrg} 224561b2299dSmrg 224661b2299dSmrgstatic XlcConvMethodsRec iconv_mbstowcs_methods = { 224761b2299dSmrg close_converter, 224861b2299dSmrg iconv_mbstowcs, 224961b2299dSmrg NULL 225061b2299dSmrg} ; 225161b2299dSmrg 225261b2299dSmrgstatic XlcConv 225388de56ccSmrgopen_iconv_mbstowcs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 225461b2299dSmrg{ 225561b2299dSmrg return create_conv(from_lcd, &iconv_mbstowcs_methods); 225661b2299dSmrg} 225761b2299dSmrg 225861b2299dSmrgstatic int 225961b2299dSmrgiconv_wcstombs(XlcConv conv, XPointer *from, int *from_left, 226061b2299dSmrg XPointer *to, int *to_left, XPointer *args, int num_args) 226161b2299dSmrg{ 226261b2299dSmrg wchar_t *src = *((wchar_t **) from); 226361b2299dSmrg char *dst = *((char **) to); 226461b2299dSmrg int src_left = *from_left; 226561b2299dSmrg int dst_left = *to_left; 226661b2299dSmrg int length, unconv_num = 0; 226761b2299dSmrg 226861b2299dSmrg while (src_left > 0 && dst_left >= MB_CUR_MAX) { 226961b2299dSmrg length = wctomb(dst, *src); /* XXX */ 227061b2299dSmrg 227161b2299dSmrg if (length > 0) { 227261b2299dSmrg src++; 227361b2299dSmrg src_left--; 227461b2299dSmrg if (dst) 227561b2299dSmrg dst += length; 227661b2299dSmrg dst_left -= length; 227761b2299dSmrg } else if (length < 0) { 227861b2299dSmrg src++; 227961b2299dSmrg src_left--; 228061b2299dSmrg unconv_num++; 228161b2299dSmrg } 228261b2299dSmrg } 228361b2299dSmrg 228461b2299dSmrg *from = (XPointer) src; 228561b2299dSmrg if (dst) 228661b2299dSmrg *to = (XPointer) dst; 228761b2299dSmrg *from_left = src_left; 228861b2299dSmrg *to_left = dst_left; 228961b2299dSmrg 229061b2299dSmrg return unconv_num; 229161b2299dSmrg} 229261b2299dSmrg 229361b2299dSmrgstatic XlcConvMethodsRec iconv_wcstombs_methods = { 229461b2299dSmrg close_converter, 229561b2299dSmrg iconv_wcstombs, 229661b2299dSmrg NULL 229761b2299dSmrg} ; 229861b2299dSmrg 229961b2299dSmrgstatic XlcConv 230088de56ccSmrgopen_iconv_wcstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 230161b2299dSmrg{ 230261b2299dSmrg return create_conv(from_lcd, &iconv_wcstombs_methods); 230361b2299dSmrg} 230461b2299dSmrg 230561b2299dSmrgstatic XlcConv 230661b2299dSmrgopen_iconv_mbstofcs( 230761b2299dSmrg XLCd from_lcd, 230861b2299dSmrg const char *from_type, 230961b2299dSmrg XLCd to_lcd, 231061b2299dSmrg const char *to_type) 231161b2299dSmrg{ 231261b2299dSmrg return create_tofontcs_conv(from_lcd, &iconv_mbstocs_methods); 231361b2299dSmrg} 231461b2299dSmrg 23151ab64890Smrg/* Registers UTF-8 converters for a UTF-8 locale. */ 23161ab64890Smrg 23171ab64890Smrgvoid 23181ab64890Smrg_XlcAddUtf8LocaleConverters( 23191ab64890Smrg XLCd lcd) 23201ab64890Smrg{ 23211ab64890Smrg /* Register elementary converters. */ 23221ab64890Smrg 23231ab64890Smrg _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_utf8towcs); 23241ab64890Smrg 23251ab64890Smrg _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_wcstoutf8); 23261ab64890Smrg _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr); 23271ab64890Smrg 23281ab64890Smrg _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs); 23291ab64890Smrg 23301ab64890Smrg /* Register converters for XlcNCharSet. This implicitly provides 23311ab64890Smrg * converters from and to XlcNCompoundText. */ 23321ab64890Smrg 23331ab64890Smrg _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_cstoutf8); 23341ab64890Smrg _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_utf8tocs); 23351ab64890Smrg _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_utf8tocs1); 23361ab64890Smrg 23371ab64890Smrg _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs); 23381ab64890Smrg _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs); 23391ab64890Smrg _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1); 23401ab64890Smrg 23411ab64890Smrg _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_strtoutf8); 23421ab64890Smrg _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_utf8tostr); 23431ab64890Smrg _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNMultiByte, open_identity); 23441ab64890Smrg _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNUtf8String, open_identity); 23451ab64890Smrg 23461ab64890Smrg /* Register converters for XlcNFontCharSet */ 23471ab64890Smrg _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_utf8tofcs); 23481ab64890Smrg _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs); 2349e9628295Smrg _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNFontCharSet, open_utf8tofcs); 23501ab64890Smrg} 235161b2299dSmrg 235261b2299dSmrgvoid 235361b2299dSmrg_XlcAddGB18030LocaleConverters( 235461b2299dSmrg XLCd lcd) 235561b2299dSmrg{ 235661b2299dSmrg 235761b2299dSmrg /* Register elementary converters. */ 235861b2299dSmrg _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_iconv_mbstowcs); 235961b2299dSmrg _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_iconv_wcstombs); 236061b2299dSmrg 236161b2299dSmrg /* Register converters for XlcNCharSet. This implicitly provides 236261b2299dSmrg * converters from and to XlcNCompoundText. */ 236361b2299dSmrg 236461b2299dSmrg _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_iconv_cstombs); 236561b2299dSmrg _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_iconv_mbstocs); 236661b2299dSmrg _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_iconv_mbtocs); 236761b2299dSmrg _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_iconv_strtombs); 236861b2299dSmrg _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_iconv_mbstostr); 236961b2299dSmrg 237061b2299dSmrg /* Register converters for XlcNFontCharSet */ 237161b2299dSmrg _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_iconv_mbstofcs); 237261b2299dSmrg 237361b2299dSmrg _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr); 237461b2299dSmrg _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs); 237561b2299dSmrg _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs); 237661b2299dSmrg _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs); 237761b2299dSmrg _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1); 237861b2299dSmrg 237961b2299dSmrg /* Register converters for XlcNFontCharSet */ 238061b2299dSmrg _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs); 238161b2299dSmrg} 2382