lcUTF8.c revision 9c019ec5
1b8e80941Smrg/****************************************************************** 2b8e80941Smrg 3b8e80941Smrg Copyright 1993 by SunSoft, Inc. 4b8e80941Smrg Copyright 1999-2000 by Bruno Haible 5b8e80941Smrg 6b8e80941SmrgPermission to use, copy, modify, distribute, and sell this software 7b8e80941Smrgand its documentation for any purpose is hereby granted without fee, 8b8e80941Smrgprovided that the above copyright notice appear in all copies and 9b8e80941Smrgthat both that copyright notice and this permission notice appear 10b8e80941Smrgin supporting documentation, and that the names of SunSoft, Inc. and 11b8e80941SmrgBruno Haible not be used in advertising or publicity pertaining to 12b8e80941Smrgdistribution of the software without specific, written prior 13b8e80941Smrgpermission. SunSoft, Inc. and Bruno Haible make no representations 14b8e80941Smrgabout the suitability of this software for any purpose. It is 15b8e80941Smrgprovided "as is" without express or implied warranty. 16b8e80941Smrg 17b8e80941SmrgSunSoft Inc. AND Bruno Haible DISCLAIM ALL WARRANTIES WITH REGARD 18b8e80941SmrgTO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 19b8e80941SmrgAND FITNESS, IN NO EVENT SHALL SunSoft, Inc. OR Bruno Haible BE LIABLE 20b8e80941SmrgFOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 21b8e80941SmrgWHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 22b8e80941SmrgACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 23b8e80941SmrgOF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 24b8e80941Smrg 25b8e80941Smrg******************************************************************/ 26b8e80941Smrg 27b8e80941Smrg/* 28b8e80941Smrg * This file contains: 29b8e80941Smrg * 30b8e80941Smrg * I. Conversion routines CompoundText/CharSet <--> Unicode/UTF-8. 31b8e80941Smrg * 32b8e80941Smrg * Used for three purposes: 33b8e80941Smrg * 1. The UTF-8 locales, see below. 34b8e80941Smrg * 2. Unicode aware applications for which the use of 8-bit character 35b8e80941Smrg * sets is an anachronism. 36b8e80941Smrg * 3. For conversion from keysym to locale encoding. 37b8e80941Smrg * 38b8e80941Smrg * II. Conversion files for an UTF-8 locale loader. 39b8e80941Smrg * Supports: all locales with codeset UTF-8. 40b8e80941Smrg * How: Provides converters for UTF-8. 41b8e80941Smrg * Platforms: all systems. 42b8e80941Smrg * 43b8e80941Smrg * The loader itself is located in lcUTF8.c. 44b8e80941Smrg */ 45b8e80941Smrg 46b8e80941Smrg/* 47b8e80941Smrg * The conversion from UTF-8 to CompoundText is realized in a very 48b8e80941Smrg * conservative way. Recall that CompoundText data is used for inter-client 49b8e80941Smrg * communication purposes. We distinguish three classes of clients: 50b8e80941Smrg * - Clients which accept only those pieces of CompoundText which belong to 51b8e80941Smrg * the character set understood by the current locale. 52b8e80941Smrg * (Example: clients which are linked to an older X11 library.) 53b8e80941Smrg * - Clients which accept CompoundText with multiple character sets and parse 54b8e80941Smrg * it themselves. 55b8e80941Smrg * (Example: emacs, xemacs.) 56b8e80941Smrg * - Clients which rely entirely on the X{mb,wc}TextPropertyToTextList 57b8e80941Smrg * functions for the conversion of CompoundText to their current locale's 58b8e80941Smrg * multi-byte/wide-character format. 59b8e80941Smrg * For best interoperation, the UTF-8 to CompoundText conversion proceeds as 60b8e80941Smrg * follows. For every character, it first tests whether the character is 61b8e80941Smrg * representable in the current locale's original (non-UTF-8) character set. 62b8e80941Smrg * If not, it goes through the list of predefined character sets for 63b8e80941Smrg * CompoundText and tests if the character is representable in that character 64b8e80941Smrg * set. If so, it encodes the character using its code within that character 65b8e80941Smrg * set. If not, it uses an UTF-8-in-CompoundText encapsulation. Since 66b8e80941Smrg * clients of the first and second kind ignore such encapsulated text, 67b8e80941Smrg * this encapsulation is kept to a minimum and terminated as early as possible. 68b8e80941Smrg * 69b8e80941Smrg * In a distant future, when clients of the first and second kind will have 70b8e80941Smrg * disappeared, we will be able to stuff UTF-8 data directly in CompoundText 71b8e80941Smrg * without first going through the list of predefined character sets. 72b8e80941Smrg */ 73b8e80941Smrg 74b8e80941Smrg#ifdef HAVE_CONFIG_H 75b8e80941Smrg#include <config.h> 76b8e80941Smrg#endif 77b8e80941Smrg#include <stdio.h> 78b8e80941Smrg#include "Xlibint.h" 79b8e80941Smrg#include "XlcPubI.h" 80b8e80941Smrg#include "XlcGeneric.h" 81b8e80941Smrg 82b8e80941Smrgstatic XlcConv 83b8e80941Smrgcreate_conv( 84b8e80941Smrg XLCd lcd, 85b8e80941Smrg XlcConvMethods methods) 86b8e80941Smrg{ 87b8e80941Smrg XlcConv conv; 88b8e80941Smrg 89b8e80941Smrg conv = Xmalloc(sizeof(XlcConvRec)); 90b8e80941Smrg if (conv == (XlcConv) NULL) 91b8e80941Smrg return (XlcConv) NULL; 92b8e80941Smrg 93b8e80941Smrg conv->methods = methods; 94b8e80941Smrg conv->state = NULL; 95b8e80941Smrg 96b8e80941Smrg return conv; 97b8e80941Smrg} 98b8e80941Smrg 99b8e80941Smrgstatic void 100b8e80941Smrgclose_converter( 101b8e80941Smrg XlcConv conv) 102b8e80941Smrg{ 103b8e80941Smrg Xfree(conv); 104b8e80941Smrg} 105b8e80941Smrg 106b8e80941Smrg/* Replacement character for invalid multibyte sequence or wide character. */ 107b8e80941Smrg#define BAD_WCHAR ((ucs4_t) 0xfffd) 108b8e80941Smrg#define BAD_CHAR '?' 109b8e80941Smrg 110b8e80941Smrg/***************************************************************************/ 111b8e80941Smrg/* Part I: Conversion routines CompoundText/CharSet <--> Unicode/UTF-8. 112b8e80941Smrg * 113b8e80941Smrg * Note that this code works in any locale. We store Unicode values in 114b8e80941Smrg * `ucs4_t' variables, but don't pass them to the user. 115b8e80941Smrg * 116b8e80941Smrg * This code has to support all character sets that are used for CompoundText, 117b8e80941Smrg * nothing more, nothing less. See the table in lcCT.c. 118b8e80941Smrg * Since the conversion _to_ CompoundText is likely to need the tables for all 119b8e80941Smrg * character sets at once, we don't use dynamic loading (of tables or shared 120b8e80941Smrg * libraries through iconv()). Use a fixed set of tables instead. 121b8e80941Smrg * 122b8e80941Smrg * We use statically computed tables, not dynamically allocated arrays, 123b8e80941Smrg * because it's more memory efficient: Different processes using the same 124b8e80941Smrg * libX11 shared library share the "text" and read-only "data" sections. 125b8e80941Smrg */ 126b8e80941Smrg 127b8e80941Smrgtypedef unsigned int ucs4_t; 128b8e80941Smrg#define conv_t XlcConv 129b8e80941Smrg 130b8e80941Smrgtypedef struct _Utf8ConvRec { 131b8e80941Smrg const char *name; 132b8e80941Smrg XrmQuark xrm_name; 133b8e80941Smrg int (* cstowc) (XlcConv, ucs4_t *, unsigned char const *, int); 134b8e80941Smrg int (* wctocs) (XlcConv, unsigned char *, ucs4_t, int); 135b8e80941Smrg} Utf8ConvRec, *Utf8Conv; 136b8e80941Smrg 137b8e80941Smrg/* 138b8e80941Smrg * int xxx_cstowc (XlcConv conv, ucs4_t *pwc, unsigned char const *s, int n) 139b8e80941Smrg * converts the byte sequence starting at s to a wide character. Up to n bytes 140b8e80941Smrg * are available at s. n is >= 1. 141b8e80941Smrg * Result is number of bytes consumed (if a wide character was read), 142b8e80941Smrg * or 0 if invalid, or -1 if n too small. 143b8e80941Smrg * 144b8e80941Smrg * int xxx_wctocs (XlcConv conv, unsigned char *r, ucs4_t wc, int n) 145b8e80941Smrg * converts the wide character wc to the character set xxx, and stores the 146b8e80941Smrg * result beginning at r. Up to n bytes may be written at r. n is >= 1. 147b8e80941Smrg * Result is number of bytes written, or 0 if invalid, or -1 if n too small. 148b8e80941Smrg */ 149b8e80941Smrg 150b8e80941Smrg/* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */ 151b8e80941Smrg#define RET_ILSEQ 0 152b8e80941Smrg/* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */ 153b8e80941Smrg#define RET_TOOFEW(n) (-1-(n)) 154b8e80941Smrg/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */ 155b8e80941Smrg#define RET_TOOSMALL -1 156b8e80941Smrg 157b8e80941Smrg/* 158b8e80941Smrg * The tables below are bijective. It would be possible to extend the 159b8e80941Smrg * xxx_wctocs tables to do some transliteration (e.g. U+201C,U+201D -> 0x22) 160b8e80941Smrg * but *only* with characters not contained in any other table, and *only* 161b8e80941Smrg * when the current locale is not an UTF-8 locale. 162b8e80941Smrg */ 163b8e80941Smrg 164b8e80941Smrg#include "lcUniConv/utf8.h" 165b8e80941Smrg#include "lcUniConv/ucs2be.h" 166b8e80941Smrg#ifdef notused 167b8e80941Smrg#include "lcUniConv/ascii.h" 168b8e80941Smrg#endif 169b8e80941Smrg#include "lcUniConv/iso8859_1.h" 170b8e80941Smrg#include "lcUniConv/iso8859_2.h" 171b8e80941Smrg#include "lcUniConv/iso8859_3.h" 172b8e80941Smrg#include "lcUniConv/iso8859_4.h" 173b8e80941Smrg#include "lcUniConv/iso8859_5.h" 174b8e80941Smrg#include "lcUniConv/iso8859_6.h" 175b8e80941Smrg#include "lcUniConv/iso8859_7.h" 176b8e80941Smrg#include "lcUniConv/iso8859_8.h" 177b8e80941Smrg#include "lcUniConv/iso8859_9.h" 178b8e80941Smrg#include "lcUniConv/iso8859_10.h" 179b8e80941Smrg#include "lcUniConv/iso8859_11.h" 180b8e80941Smrg#include "lcUniConv/iso8859_13.h" 181b8e80941Smrg#include "lcUniConv/iso8859_14.h" 182b8e80941Smrg#include "lcUniConv/iso8859_15.h" 183b8e80941Smrg#include "lcUniConv/iso8859_16.h" 184b8e80941Smrg#include "lcUniConv/iso8859_9e.h" 185b8e80941Smrg#include "lcUniConv/jisx0201.h" 186b8e80941Smrg#include "lcUniConv/tis620.h" 187b8e80941Smrg#include "lcUniConv/koi8_r.h" 188b8e80941Smrg#include "lcUniConv/koi8_u.h" 189b8e80941Smrg#include "lcUniConv/koi8_c.h" 190b8e80941Smrg#include "lcUniConv/armscii_8.h" 191b8e80941Smrg#include "lcUniConv/cp1133.h" 192b8e80941Smrg#include "lcUniConv/mulelao.h" 193b8e80941Smrg#include "lcUniConv/viscii.h" 194b8e80941Smrg#include "lcUniConv/tcvn.h" 195b8e80941Smrg#include "lcUniConv/georgian_academy.h" 196b8e80941Smrg#include "lcUniConv/georgian_ps.h" 197b8e80941Smrg#include "lcUniConv/cp1251.h" 198b8e80941Smrg#include "lcUniConv/cp1255.h" 199b8e80941Smrg#include "lcUniConv/cp1256.h" 200b8e80941Smrg#include "lcUniConv/tatar_cyr.h" 201b8e80941Smrg 202b8e80941Smrgtypedef struct { 203b8e80941Smrg unsigned short indx; /* index into big table */ 204b8e80941Smrg unsigned short used; /* bitmask of used entries */ 205b8e80941Smrg} Summary16; 206b8e80941Smrg 207b8e80941Smrg#include "lcUniConv/gb2312.h" 208b8e80941Smrg#include "lcUniConv/jisx0208.h" 209b8e80941Smrg#include "lcUniConv/jisx0212.h" 210b8e80941Smrg#include "lcUniConv/ksc5601.h" 211b8e80941Smrg#include "lcUniConv/big5.h" 212b8e80941Smrg#include "lcUniConv/big5_emacs.h" 213b8e80941Smrg#include "lcUniConv/big5hkscs.h" 214b8e80941Smrg#include "lcUniConv/gbk.h" 215b8e80941Smrg 216b8e80941Smrgstatic Utf8ConvRec all_charsets[] = { 217b8e80941Smrg /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning 218b8e80941Smrg (for lookup speed), once at the end (as a fallback). */ 219b8e80941Smrg { "ISO10646-1", NULLQUARK, 220b8e80941Smrg utf8_mbtowc, utf8_wctomb 221b8e80941Smrg }, 222b8e80941Smrg 223b8e80941Smrg { "ISO8859-1", NULLQUARK, 224b8e80941Smrg iso8859_1_mbtowc, iso8859_1_wctomb 225b8e80941Smrg }, 226b8e80941Smrg { "ISO8859-2", NULLQUARK, 227b8e80941Smrg iso8859_2_mbtowc, iso8859_2_wctomb 228b8e80941Smrg }, 229b8e80941Smrg { "ISO8859-3", NULLQUARK, 230b8e80941Smrg iso8859_3_mbtowc, iso8859_3_wctomb 231b8e80941Smrg }, 232b8e80941Smrg { "ISO8859-4", NULLQUARK, 233b8e80941Smrg iso8859_4_mbtowc, iso8859_4_wctomb 234b8e80941Smrg }, 235b8e80941Smrg { "ISO8859-5", NULLQUARK, 236b8e80941Smrg iso8859_5_mbtowc, iso8859_5_wctomb 237b8e80941Smrg }, 238b8e80941Smrg { "ISO8859-6", NULLQUARK, 239b8e80941Smrg iso8859_6_mbtowc, iso8859_6_wctomb 240b8e80941Smrg }, 241b8e80941Smrg { "ISO8859-7", NULLQUARK, 242b8e80941Smrg iso8859_7_mbtowc, iso8859_7_wctomb 243b8e80941Smrg }, 244b8e80941Smrg { "ISO8859-8", NULLQUARK, 245b8e80941Smrg iso8859_8_mbtowc, iso8859_8_wctomb 246b8e80941Smrg }, 247b8e80941Smrg { "ISO8859-9", NULLQUARK, 248b8e80941Smrg iso8859_9_mbtowc, iso8859_9_wctomb 249b8e80941Smrg }, 250b8e80941Smrg { "ISO8859-10", NULLQUARK, 251b8e80941Smrg iso8859_10_mbtowc, iso8859_10_wctomb 252b8e80941Smrg }, 253b8e80941Smrg { "ISO8859-11", NULLQUARK, 254b8e80941Smrg iso8859_11_mbtowc, iso8859_11_wctomb 255b8e80941Smrg }, 256b8e80941Smrg { "ISO8859-13", NULLQUARK, 257b8e80941Smrg iso8859_13_mbtowc, iso8859_13_wctomb 258b8e80941Smrg }, 259b8e80941Smrg { "ISO8859-14", NULLQUARK, 260b8e80941Smrg iso8859_14_mbtowc, iso8859_14_wctomb 261b8e80941Smrg }, 262b8e80941Smrg { "ISO8859-15", NULLQUARK, 263b8e80941Smrg iso8859_15_mbtowc, iso8859_15_wctomb 264b8e80941Smrg }, 265b8e80941Smrg { "ISO8859-16", NULLQUARK, 266b8e80941Smrg iso8859_16_mbtowc, iso8859_16_wctomb 267b8e80941Smrg }, 268b8e80941Smrg { "JISX0201.1976-0", NULLQUARK, 269b8e80941Smrg jisx0201_mbtowc, jisx0201_wctomb 270b8e80941Smrg }, 271b8e80941Smrg { "TIS620-0", NULLQUARK, 272b8e80941Smrg tis620_mbtowc, tis620_wctomb 273b8e80941Smrg }, 274b8e80941Smrg { "GB2312.1980-0", NULLQUARK, 275b8e80941Smrg gb2312_mbtowc, gb2312_wctomb 276b8e80941Smrg }, 277b8e80941Smrg { "JISX0208.1983-0", NULLQUARK, 278b8e80941Smrg jisx0208_mbtowc, jisx0208_wctomb 279b8e80941Smrg }, 280b8e80941Smrg { "JISX0208.1990-0", NULLQUARK, 281b8e80941Smrg jisx0208_mbtowc, jisx0208_wctomb 282b8e80941Smrg }, 283b8e80941Smrg { "JISX0212.1990-0", NULLQUARK, 284b8e80941Smrg jisx0212_mbtowc, jisx0212_wctomb 285b8e80941Smrg }, 286b8e80941Smrg { "KSC5601.1987-0", NULLQUARK, 287b8e80941Smrg ksc5601_mbtowc, ksc5601_wctomb 288b8e80941Smrg }, 289b8e80941Smrg { "KOI8-R", NULLQUARK, 290b8e80941Smrg koi8_r_mbtowc, koi8_r_wctomb 291b8e80941Smrg }, 292b8e80941Smrg { "KOI8-U", NULLQUARK, 293b8e80941Smrg koi8_u_mbtowc, koi8_u_wctomb 294b8e80941Smrg }, 295b8e80941Smrg { "KOI8-C", NULLQUARK, 296b8e80941Smrg koi8_c_mbtowc, koi8_c_wctomb 297b8e80941Smrg }, 298b8e80941Smrg { "TATAR-CYR", NULLQUARK, 299b8e80941Smrg tatar_cyr_mbtowc, tatar_cyr_wctomb 300b8e80941Smrg }, 301b8e80941Smrg { "ARMSCII-8", NULLQUARK, 302b8e80941Smrg armscii_8_mbtowc, armscii_8_wctomb 303b8e80941Smrg }, 304b8e80941Smrg { "IBM-CP1133", NULLQUARK, 305b8e80941Smrg cp1133_mbtowc, cp1133_wctomb 306b8e80941Smrg }, 307b8e80941Smrg { "MULELAO-1", NULLQUARK, 308b8e80941Smrg mulelao_mbtowc, mulelao_wctomb 309b8e80941Smrg }, 310b8e80941Smrg { "VISCII1.1-1", NULLQUARK, 311b8e80941Smrg viscii_mbtowc, viscii_wctomb 312b8e80941Smrg }, 313b8e80941Smrg { "TCVN-5712", NULLQUARK, 314b8e80941Smrg tcvn_mbtowc, tcvn_wctomb 315b8e80941Smrg }, 316b8e80941Smrg { "GEORGIAN-ACADEMY", NULLQUARK, 317b8e80941Smrg georgian_academy_mbtowc, georgian_academy_wctomb 318b8e80941Smrg }, 319b8e80941Smrg { "GEORGIAN-PS", NULLQUARK, 320b8e80941Smrg georgian_ps_mbtowc, georgian_ps_wctomb 321b8e80941Smrg }, 322b8e80941Smrg { "ISO8859-9E", NULLQUARK, 323b8e80941Smrg iso8859_9e_mbtowc, iso8859_9e_wctomb 324b8e80941Smrg }, 325b8e80941Smrg { "MICROSOFT-CP1251", NULLQUARK, 326b8e80941Smrg cp1251_mbtowc, cp1251_wctomb 327b8e80941Smrg }, 328b8e80941Smrg { "MICROSOFT-CP1255", NULLQUARK, 329b8e80941Smrg cp1255_mbtowc, cp1255_wctomb 330b8e80941Smrg }, 331b8e80941Smrg { "MICROSOFT-CP1256", NULLQUARK, 332b8e80941Smrg cp1256_mbtowc, cp1256_wctomb 333b8e80941Smrg }, 334b8e80941Smrg { "BIG5-0", NULLQUARK, 335b8e80941Smrg big5_mbtowc, big5_wctomb 336b8e80941Smrg }, 337b8e80941Smrg { "BIG5-E0", NULLQUARK, 338b8e80941Smrg big5_0_mbtowc, big5_0_wctomb 339b8e80941Smrg }, 340b8e80941Smrg { "BIG5-E1", NULLQUARK, 341b8e80941Smrg big5_1_mbtowc, big5_1_wctomb 342b8e80941Smrg }, 343b8e80941Smrg { "GBK-0", NULLQUARK, 344b8e80941Smrg gbk_mbtowc, gbk_wctomb 345b8e80941Smrg }, 346b8e80941Smrg { "BIG5HKSCS-0", NULLQUARK, 347b8e80941Smrg big5hkscs_mbtowc, big5hkscs_wctomb 348b8e80941Smrg }, 349b8e80941Smrg 350b8e80941Smrg /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning 351b8e80941Smrg (for lookup speed), once at the end (as a fallback). */ 352b8e80941Smrg { "ISO10646-1", NULLQUARK, 353b8e80941Smrg utf8_mbtowc, utf8_wctomb 354b8e80941Smrg }, 355b8e80941Smrg 356b8e80941Smrg /* Encoding ISO10646-1 for fonts means UCS2-like encoding 357b8e80941Smrg so for conversion to FontCharSet we need this record */ 358b8e80941Smrg { "ISO10646-1", NULLQUARK, 359b8e80941Smrg ucs2be_mbtowc, ucs2be_wctomb 360b8e80941Smrg } 361b8e80941Smrg}; 362b8e80941Smrg 363b8e80941Smrg#define charsets_table_size (sizeof(all_charsets)/sizeof(all_charsets[0])) 364b8e80941Smrg#define all_charsets_count (charsets_table_size - 1) 365b8e80941Smrg#define ucs2_conv_index (charsets_table_size - 1) 366b8e80941Smrg 367b8e80941Smrgstatic void 368b8e80941Smrginit_all_charsets (void) 369b8e80941Smrg{ 370b8e80941Smrg Utf8Conv convptr; 371b8e80941Smrg int i; 372b8e80941Smrg 373b8e80941Smrg for (convptr = all_charsets, i = charsets_table_size; i > 0; convptr++, i--) 374b8e80941Smrg convptr->xrm_name = XrmStringToQuark(convptr->name); 375b8e80941Smrg} 376b8e80941Smrg 377b8e80941Smrg#define lazy_init_all_charsets() \ 378b8e80941Smrg do { \ 379b8e80941Smrg if (all_charsets[0].xrm_name == NULLQUARK) \ 380b8e80941Smrg init_all_charsets(); \ 381b8e80941Smrg } while (0) 382b8e80941Smrg 383b8e80941Smrg/* from XlcNCharSet to XlcNUtf8String */ 384b8e80941Smrg 385b8e80941Smrgstatic int 386b8e80941Smrgcstoutf8( 387b8e80941Smrg XlcConv conv, 388b8e80941Smrg XPointer *from, 389b8e80941Smrg int *from_left, 390b8e80941Smrg XPointer *to, 391b8e80941Smrg int *to_left, 392b8e80941Smrg XPointer *args, 393b8e80941Smrg int num_args) 394b8e80941Smrg{ 395b8e80941Smrg XlcCharSet charset; 396b8e80941Smrg const char *name; 397b8e80941Smrg Utf8Conv convptr; 398b8e80941Smrg int i; 399b8e80941Smrg unsigned char const *src; 400b8e80941Smrg unsigned char const *srcend; 401b8e80941Smrg unsigned char *dst; 402b8e80941Smrg unsigned char *dstend; 403b8e80941Smrg int unconv_num; 404b8e80941Smrg 405b8e80941Smrg if (from == NULL || *from == NULL) 406b8e80941Smrg return 0; 407b8e80941Smrg 408b8e80941Smrg if (num_args < 1) 409b8e80941Smrg return -1; 410b8e80941Smrg 411b8e80941Smrg charset = (XlcCharSet) args[0]; 412b8e80941Smrg name = charset->encoding_name; 413b8e80941Smrg /* not charset->name because the latter has a ":GL"/":GR" suffix */ 414b8e80941Smrg 415b8e80941Smrg for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 416b8e80941Smrg if (!strcmp(convptr->name, name)) 417b8e80941Smrg break; 418b8e80941Smrg if (i == 0) 419b8e80941Smrg return -1; 420b8e80941Smrg 421b8e80941Smrg src = (unsigned char const *) *from; 422b8e80941Smrg srcend = src + *from_left; 423b8e80941Smrg dst = (unsigned char *) *to; 424b8e80941Smrg dstend = dst + *to_left; 425b8e80941Smrg unconv_num = 0; 426b8e80941Smrg 427b8e80941Smrg while (src < srcend) { 428b8e80941Smrg ucs4_t wc; 429b8e80941Smrg int consumed; 430b8e80941Smrg int count; 431b8e80941Smrg 432b8e80941Smrg consumed = convptr->cstowc(conv, &wc, src, srcend-src); 433b8e80941Smrg if (consumed == RET_ILSEQ) 434b8e80941Smrg return -1; 435b8e80941Smrg if (consumed == RET_TOOFEW(0)) 436b8e80941Smrg break; 437b8e80941Smrg 438b8e80941Smrg count = utf8_wctomb(NULL, dst, wc, dstend-dst); 439b8e80941Smrg if (count == RET_TOOSMALL) 440b8e80941Smrg break; 441b8e80941Smrg if (count == RET_ILSEQ) { 442 count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst); 443 if (count == RET_TOOSMALL) 444 break; 445 unconv_num++; 446 } 447 src += consumed; 448 dst += count; 449 } 450 451 *from = (XPointer) src; 452 *from_left = srcend - src; 453 *to = (XPointer) dst; 454 *to_left = dstend - dst; 455 456 return unconv_num; 457} 458 459static XlcConvMethodsRec methods_cstoutf8 = { 460 close_converter, 461 cstoutf8, 462 NULL 463}; 464 465static XlcConv 466open_cstoutf8( 467 XLCd from_lcd, 468 const char *from_type, 469 XLCd to_lcd, 470 const char *to_type) 471{ 472 lazy_init_all_charsets(); 473 return create_conv(from_lcd, &methods_cstoutf8); 474} 475 476/* from XlcNUtf8String to XlcNCharSet */ 477 478static XlcConv 479create_tocs_conv( 480 XLCd lcd, 481 XlcConvMethods methods) 482{ 483 XlcConv conv; 484 CodeSet *codeset_list; 485 int codeset_num; 486 int charset_num; 487 int i, j, k; 488 Utf8Conv *preferred; 489 490 lazy_init_all_charsets(); 491 492 codeset_list = XLC_GENERIC(lcd, codeset_list); 493 codeset_num = XLC_GENERIC(lcd, codeset_num); 494 495 charset_num = 0; 496 for (i = 0; i < codeset_num; i++) 497 charset_num += codeset_list[i]->num_charsets; 498 if (charset_num > all_charsets_count-1) 499 charset_num = all_charsets_count-1; 500 501 conv = Xmalloc(sizeof(XlcConvRec) 502 + (charset_num + 1) * sizeof(Utf8Conv)); 503 if (conv == (XlcConv) NULL) 504 return (XlcConv) NULL; 505 preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec)); 506 507 /* Loop through all codesets mentioned in the locale. */ 508 charset_num = 0; 509 for (i = 0; i < codeset_num; i++) { 510 XlcCharSet *charsets = codeset_list[i]->charset_list; 511 int num_charsets = codeset_list[i]->num_charsets; 512 for (j = 0; j < num_charsets; j++) { 513 const char *name = charsets[j]->encoding_name; 514 /* If it wasn't already encountered... */ 515 for (k = charset_num-1; k >= 0; k--) 516 if (!strcmp(preferred[k]->name, name)) 517 break; 518 if (k < 0) { 519 /* Look it up in all_charsets[]. */ 520 for (k = 0; k < all_charsets_count-1; k++) 521 if (!strcmp(all_charsets[k].name, name)) { 522 /* Add it to the preferred set. */ 523 preferred[charset_num++] = &all_charsets[k]; 524 break; 525 } 526 } 527 } 528 } 529 preferred[charset_num] = (Utf8Conv) NULL; 530 531 conv->methods = methods; 532 conv->state = (XPointer) preferred; 533 534 return conv; 535} 536 537static void 538close_tocs_converter( 539 XlcConv conv) 540{ 541 /* conv->state is allocated together with conv, free both at once. */ 542 Xfree(conv); 543} 544 545/* 546 * Converts a Unicode character to an appropriate character set. The NULL 547 * terminated array of preferred character sets is passed as first argument. 548 * If successful, *charsetp is set to the character set that was used, and 549 * *sidep is set to the character set side (XlcGL or XlcGR). 550 */ 551static int 552charset_wctocs( 553 Utf8Conv *preferred, 554 Utf8Conv *charsetp, 555 XlcSide *sidep, 556 XlcConv conv, 557 unsigned char *r, 558 ucs4_t wc, 559 int n) 560{ 561 int count; 562 Utf8Conv convptr; 563 int i; 564 565 for (; *preferred != (Utf8Conv) NULL; preferred++) { 566 convptr = *preferred; 567 count = convptr->wctocs(conv, r, wc, n); 568 if (count == RET_TOOSMALL) 569 return RET_TOOSMALL; 570 if (count != RET_ILSEQ) { 571 *charsetp = convptr; 572 *sidep = (*r < 0x80 ? XlcGL : XlcGR); 573 return count; 574 } 575 } 576 for (convptr = all_charsets+1, i = all_charsets_count-1; i > 0; convptr++, i--) { 577 count = convptr->wctocs(conv, r, wc, n); 578 if (count == RET_TOOSMALL) 579 return RET_TOOSMALL; 580 if (count != RET_ILSEQ) { 581 *charsetp = convptr; 582 *sidep = (*r < 0x80 ? XlcGL : XlcGR); 583 return count; 584 } 585 } 586 return RET_ILSEQ; 587} 588 589static int 590utf8tocs( 591 XlcConv conv, 592 XPointer *from, 593 int *from_left, 594 XPointer *to, 595 int *to_left, 596 XPointer *args, 597 int num_args) 598{ 599 Utf8Conv *preferred_charsets; 600 XlcCharSet last_charset = NULL; 601 unsigned char const *src; 602 unsigned char const *srcend; 603 unsigned char *dst; 604 unsigned char *dstend; 605 int unconv_num; 606 607 if (from == NULL || *from == NULL) 608 return 0; 609 610 preferred_charsets = (Utf8Conv *) conv->state; 611 src = (unsigned char const *) *from; 612 srcend = src + *from_left; 613 dst = (unsigned char *) *to; 614 dstend = dst + *to_left; 615 unconv_num = 0; 616 617 while (src < srcend && dst < dstend) { 618 Utf8Conv chosen_charset = NULL; 619 XlcSide chosen_side = XlcNONE; 620 ucs4_t wc; 621 int consumed; 622 int count; 623 624 consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 625 if (consumed == RET_TOOFEW(0)) 626 break; 627 if (consumed == RET_ILSEQ) { 628 src++; 629 unconv_num++; 630 continue; 631 } 632 633 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 634 if (count == RET_TOOSMALL) 635 break; 636 if (count == RET_ILSEQ) { 637 src += consumed; 638 unconv_num++; 639 continue; 640 } 641 642 if (last_charset == NULL) { 643 last_charset = 644 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 645 if (last_charset == NULL) { 646 src += consumed; 647 unconv_num++; 648 continue; 649 } 650 } else { 651 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 652 && (last_charset->side == XlcGLGR 653 || last_charset->side == chosen_side))) 654 break; 655 } 656 src += consumed; 657 dst += count; 658 } 659 660 if (last_charset == NULL) 661 return -1; 662 663 *from = (XPointer) src; 664 *from_left = srcend - src; 665 *to = (XPointer) dst; 666 *to_left = dstend - dst; 667 668 if (num_args >= 1) 669 *((XlcCharSet *)args[0]) = last_charset; 670 671 return unconv_num; 672} 673 674static XlcConvMethodsRec methods_utf8tocs = { 675 close_tocs_converter, 676 utf8tocs, 677 NULL 678}; 679 680static XlcConv 681open_utf8tocs( 682 XLCd from_lcd, 683 const char *from_type, 684 XLCd to_lcd, 685 const char *to_type) 686{ 687 return create_tocs_conv(from_lcd, &methods_utf8tocs); 688} 689 690/* from XlcNUtf8String to XlcNChar */ 691 692static int 693utf8tocs1( 694 XlcConv conv, 695 XPointer *from, 696 int *from_left, 697 XPointer *to, 698 int *to_left, 699 XPointer *args, 700 int num_args) 701{ 702 Utf8Conv *preferred_charsets; 703 XlcCharSet last_charset = NULL; 704 unsigned char const *src; 705 unsigned char const *srcend; 706 unsigned char *dst; 707 unsigned char *dstend; 708 int unconv_num; 709 710 if (from == NULL || *from == NULL) 711 return 0; 712 713 preferred_charsets = (Utf8Conv *) conv->state; 714 src = (unsigned char const *) *from; 715 srcend = src + *from_left; 716 dst = (unsigned char *) *to; 717 dstend = dst + *to_left; 718 unconv_num = 0; 719 720 while (src < srcend && dst < dstend) { 721 Utf8Conv chosen_charset = NULL; 722 XlcSide chosen_side = XlcNONE; 723 ucs4_t wc; 724 int consumed; 725 int count; 726 727 consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 728 if (consumed == RET_TOOFEW(0)) 729 break; 730 if (consumed == RET_ILSEQ) { 731 src++; 732 unconv_num++; 733 continue; 734 } 735 736 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 737 if (count == RET_TOOSMALL) 738 break; 739 if (count == RET_ILSEQ) { 740 src += consumed; 741 unconv_num++; 742 continue; 743 } 744 745 if (last_charset == NULL) { 746 last_charset = 747 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 748 if (last_charset == NULL) { 749 src += consumed; 750 unconv_num++; 751 continue; 752 } 753 } else { 754 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 755 && (last_charset->side == XlcGLGR 756 || last_charset->side == chosen_side))) 757 break; 758 } 759 src += consumed; 760 dst += count; 761 break; 762 } 763 764 if (last_charset == NULL) 765 return -1; 766 767 *from = (XPointer) src; 768 *from_left = srcend - src; 769 *to = (XPointer) dst; 770 *to_left = dstend - dst; 771 772 if (num_args >= 1) 773 *((XlcCharSet *)args[0]) = last_charset; 774 775 return unconv_num; 776} 777 778static XlcConvMethodsRec methods_utf8tocs1 = { 779 close_tocs_converter, 780 utf8tocs1, 781 NULL 782}; 783 784static XlcConv 785open_utf8tocs1( 786 XLCd from_lcd, 787 const char *from_type, 788 XLCd to_lcd, 789 const char *to_type) 790{ 791 return create_tocs_conv(from_lcd, &methods_utf8tocs1); 792} 793 794/* from XlcNUtf8String to XlcNString */ 795 796static int 797utf8tostr( 798 XlcConv conv, 799 XPointer *from, 800 int *from_left, 801 XPointer *to, 802 int *to_left, 803 XPointer *args, 804 int num_args) 805{ 806 unsigned char const *src; 807 unsigned char const *srcend; 808 unsigned char *dst; 809 unsigned char *dstend; 810 int unconv_num; 811 812 if (from == NULL || *from == NULL) 813 return 0; 814 815 src = (unsigned char const *) *from; 816 srcend = src + *from_left; 817 dst = (unsigned char *) *to; 818 dstend = dst + *to_left; 819 unconv_num = 0; 820 821 while (src < srcend) { 822 unsigned char c; 823 ucs4_t wc; 824 int consumed; 825 826 consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 827 if (consumed == RET_TOOFEW(0)) 828 break; 829 if (dst == dstend) 830 break; 831 if (consumed == RET_ILSEQ) { 832 consumed = 1; 833 c = BAD_CHAR; 834 unconv_num++; 835 } else { 836 if ((wc & ~(ucs4_t)0xff) != 0) { 837 c = BAD_CHAR; 838 unconv_num++; 839 } else 840 c = (unsigned char) wc; 841 } 842 *dst++ = c; 843 src += consumed; 844 } 845 846 *from = (XPointer) src; 847 *from_left = srcend - src; 848 *to = (XPointer) dst; 849 *to_left = dstend - dst; 850 851 return unconv_num; 852} 853 854static XlcConvMethodsRec methods_utf8tostr = { 855 close_converter, 856 utf8tostr, 857 NULL 858}; 859 860static XlcConv 861open_utf8tostr( 862 XLCd from_lcd, 863 const char *from_type, 864 XLCd to_lcd, 865 const char *to_type) 866{ 867 return create_conv(from_lcd, &methods_utf8tostr); 868} 869 870/* from XlcNString to XlcNUtf8String */ 871 872static int 873strtoutf8( 874 XlcConv conv, 875 XPointer *from, 876 int *from_left, 877 XPointer *to, 878 int *to_left, 879 XPointer *args, 880 int num_args) 881{ 882 unsigned char const *src; 883 unsigned char const *srcend; 884 unsigned char *dst; 885 unsigned char *dstend; 886 887 if (from == NULL || *from == NULL) 888 return 0; 889 890 src = (unsigned char const *) *from; 891 srcend = src + *from_left; 892 dst = (unsigned char *) *to; 893 dstend = dst + *to_left; 894 895 while (src < srcend) { 896 int count = utf8_wctomb(NULL, dst, *src, dstend-dst); 897 if (count == RET_TOOSMALL) 898 break; 899 dst += count; 900 src++; 901 } 902 903 *from = (XPointer) src; 904 *from_left = srcend - src; 905 *to = (XPointer) dst; 906 *to_left = dstend - dst; 907 908 return 0; 909} 910 911static XlcConvMethodsRec methods_strtoutf8 = { 912 close_converter, 913 strtoutf8, 914 NULL 915}; 916 917static XlcConv 918open_strtoutf8( 919 XLCd from_lcd, 920 const char *from_type, 921 XLCd to_lcd, 922 const char *to_type) 923{ 924 return create_conv(from_lcd, &methods_strtoutf8); 925} 926 927/* Support for the input methods. */ 928 929XPointer 930_Utf8GetConvByName( 931 const char *name) 932{ 933 XrmQuark xrm_name; 934 Utf8Conv convptr; 935 int i; 936 937 if (name == NULL) 938 return (XPointer) NULL; 939 940 lazy_init_all_charsets(); 941 xrm_name = XrmStringToQuark(name); 942 943 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 944 if (convptr->xrm_name == xrm_name) 945 return (XPointer) convptr->wctocs; 946 return (XPointer) NULL; 947} 948 949/* from XlcNUcsChar to XlcNChar, needed for input methods */ 950 951static XlcConv 952create_ucstocs_conv( 953 XLCd lcd, 954 XlcConvMethods methods) 955{ 956 957 if (XLC_PUBLIC_PART(lcd)->codeset 958 && _XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "UTF-8") == 0) { 959 XlcConv conv; 960 Utf8Conv *preferred; 961 962 lazy_init_all_charsets(); 963 964 conv = Xmalloc(sizeof(XlcConvRec) + 2 * sizeof(Utf8Conv)); 965 if (conv == (XlcConv) NULL) 966 return (XlcConv) NULL; 967 preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec)); 968 969 preferred[0] = &all_charsets[0]; /* ISO10646 */ 970 preferred[1] = (Utf8Conv) NULL; 971 972 conv->methods = methods; 973 conv->state = (XPointer) preferred; 974 975 return conv; 976 } else { 977 return create_tocs_conv(lcd, methods); 978 } 979} 980 981static int 982charset_wctocs_exactly( 983 Utf8Conv *preferred, 984 Utf8Conv *charsetp, 985 XlcSide *sidep, 986 XlcConv conv, 987 unsigned char *r, 988 ucs4_t wc, 989 int n) 990{ 991 int count; 992 Utf8Conv convptr; 993 994 for (; *preferred != (Utf8Conv) NULL; preferred++) { 995 convptr = *preferred; 996 count = convptr->wctocs(conv, r, wc, n); 997 if (count == RET_TOOSMALL) 998 return RET_TOOSMALL; 999 if (count != RET_ILSEQ) { 1000 *charsetp = convptr; 1001 *sidep = (*r < 0x80 ? XlcGL : XlcGR); 1002 return count; 1003 } 1004 } 1005 return RET_ILSEQ; 1006} 1007 1008static int 1009ucstocs1( 1010 XlcConv conv, 1011 XPointer *from, 1012 int *from_left, 1013 XPointer *to, 1014 int *to_left, 1015 XPointer *args, 1016 int num_args) 1017{ 1018 ucs4_t const *src; 1019 unsigned char *dst = (unsigned char *) *to; 1020 int unconv_num = 0; 1021 Utf8Conv *preferred_charsets = (Utf8Conv *) conv->state; 1022 Utf8Conv chosen_charset = NULL; 1023 XlcSide chosen_side = XlcNONE; 1024 XlcCharSet charset = NULL; 1025 int count; 1026 1027 if (from == NULL || *from == NULL) 1028 return 0; 1029 1030 src = (ucs4_t const *) *from; 1031 1032 count = charset_wctocs_exactly(preferred_charsets, &chosen_charset, 1033 &chosen_side, conv, dst, *src, *to_left); 1034 if (count < 1) { 1035 unconv_num++; 1036 count = 0; 1037 } else { 1038 charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1039 } 1040 if (charset == NULL) 1041 return -1; 1042 1043 *from = (XPointer) ++src; 1044 (*from_left)--; 1045 *to = (XPointer) dst; 1046 *to_left -= count; 1047 1048 if (num_args >= 1) 1049 *((XlcCharSet *)args[0]) = charset; 1050 1051 return unconv_num; 1052} 1053 1054static XlcConvMethodsRec methods_ucstocs1 = { 1055 close_tocs_converter, 1056 ucstocs1, 1057 NULL 1058}; 1059 1060static XlcConv 1061open_ucstocs1( 1062 XLCd from_lcd, 1063 const char *from_type, 1064 XLCd to_lcd, 1065 const char *to_type) 1066{ 1067 return create_ucstocs_conv(from_lcd, &methods_ucstocs1); 1068} 1069 1070/* from XlcNUcsChar to XlcNUtf8String, needed for input methods */ 1071 1072static int 1073ucstoutf8( 1074 XlcConv conv, 1075 XPointer *from, 1076 int *from_left, 1077 XPointer *to, 1078 int *to_left, 1079 XPointer *args, 1080 int num_args) 1081{ 1082 const ucs4_t *src; 1083 const ucs4_t *srcend; 1084 unsigned char *dst; 1085 unsigned char *dstend; 1086 int unconv_num; 1087 1088 if (from == NULL || *from == NULL) 1089 return 0; 1090 1091 src = (const ucs4_t *) *from; 1092 srcend = src + *from_left; 1093 dst = (unsigned char *) *to; 1094 dstend = dst + *to_left; 1095 unconv_num = 0; 1096 1097 while (src < srcend) { 1098 int count = utf8_wctomb(NULL, dst, *src, dstend-dst); 1099 if (count == RET_TOOSMALL) 1100 break; 1101 if (count == RET_ILSEQ) 1102 unconv_num++; 1103 src++; 1104 dst += count; 1105 } 1106 1107 *from = (XPointer) src; 1108 *from_left = srcend - src; 1109 *to = (XPointer) dst; 1110 *to_left = dstend - dst; 1111 1112 return unconv_num; 1113} 1114 1115static XlcConvMethodsRec methods_ucstoutf8 = { 1116 close_converter, 1117 ucstoutf8, 1118 NULL 1119}; 1120 1121static XlcConv 1122open_ucstoutf8( 1123 XLCd from_lcd, 1124 const char *from_type, 1125 XLCd to_lcd, 1126 const char *to_type) 1127{ 1128 return create_conv(from_lcd, &methods_ucstoutf8); 1129} 1130 1131/* Registers UTF-8 converters for a non-UTF-8 locale. */ 1132void 1133_XlcAddUtf8Converters( 1134 XLCd lcd) 1135{ 1136 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNUtf8String, open_cstoutf8); 1137 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNCharSet, open_utf8tocs); 1138 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNChar, open_utf8tocs1); 1139 _XlcSetConverter(lcd, XlcNString, lcd, XlcNUtf8String, open_strtoutf8); 1140 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNString, open_utf8tostr); 1141 _XlcSetConverter(lcd, XlcNUcsChar, lcd, XlcNChar, open_ucstocs1); 1142 _XlcSetConverter(lcd, XlcNUcsChar, lcd, XlcNUtf8String, open_ucstoutf8); 1143} 1144 1145/***************************************************************************/ 1146/* Part II: UTF-8 locale loader conversion files 1147 * 1148 * Here we can assume that "multi-byte" is UTF-8 and that `wchar_t' is Unicode. 1149 */ 1150 1151/* from XlcNMultiByte to XlcNWideChar */ 1152 1153static int 1154utf8towcs( 1155 XlcConv conv, 1156 XPointer *from, 1157 int *from_left, 1158 XPointer *to, 1159 int *to_left, 1160 XPointer *args, 1161 int num_args) 1162{ 1163 unsigned char const *src; 1164 unsigned char const *srcend; 1165 wchar_t *dst; 1166 wchar_t *dstend; 1167 int unconv_num; 1168 1169 if (from == NULL || *from == NULL) 1170 return 0; 1171 1172 src = (unsigned char const *) *from; 1173 srcend = src + *from_left; 1174 dst = (wchar_t *) *to; 1175 dstend = dst + *to_left; 1176 unconv_num = 0; 1177 1178 while (src < srcend && dst < dstend) { 1179 ucs4_t wc; 1180 int consumed = utf8_mbtowc(NULL, &wc, src, srcend-src); 1181 if (consumed == RET_TOOFEW(0)) 1182 break; 1183 if (consumed == RET_ILSEQ) { 1184 src++; 1185 *dst = BAD_WCHAR; 1186 unconv_num++; 1187 } else { 1188 src += consumed; 1189 *dst = wc; 1190 } 1191 dst++; 1192 } 1193 1194 *from = (XPointer) src; 1195 *from_left = srcend - src; 1196 *to = (XPointer) dst; 1197 *to_left = dstend - dst; 1198 1199 return unconv_num; 1200} 1201 1202static XlcConvMethodsRec methods_utf8towcs = { 1203 close_converter, 1204 utf8towcs, 1205 NULL 1206}; 1207 1208static XlcConv 1209open_utf8towcs( 1210 XLCd from_lcd, 1211 const char *from_type, 1212 XLCd to_lcd, 1213 const char *to_type) 1214{ 1215 return create_conv(from_lcd, &methods_utf8towcs); 1216} 1217 1218/* from XlcNWideChar to XlcNMultiByte */ 1219 1220static int 1221wcstoutf8( 1222 XlcConv conv, 1223 XPointer *from, 1224 int *from_left, 1225 XPointer *to, 1226 int *to_left, 1227 XPointer *args, 1228 int num_args) 1229{ 1230 wchar_t const *src; 1231 wchar_t const *srcend; 1232 unsigned char *dst; 1233 unsigned char *dstend; 1234 int unconv_num; 1235 1236 if (from == NULL || *from == NULL) 1237 return 0; 1238 1239 src = (wchar_t const *) *from; 1240 srcend = src + *from_left; 1241 dst = (unsigned char *) *to; 1242 dstend = dst + *to_left; 1243 unconv_num = 0; 1244 1245 while (src < srcend) { 1246 int count = utf8_wctomb(NULL, dst, *src, dstend-dst); 1247 if (count == RET_TOOSMALL) 1248 break; 1249 if (count == RET_ILSEQ) { 1250 count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst); 1251 if (count == RET_TOOSMALL) 1252 break; 1253 unconv_num++; 1254 } 1255 dst += count; 1256 src++; 1257 } 1258 1259 *from = (XPointer) src; 1260 *from_left = srcend - src; 1261 *to = (XPointer) dst; 1262 *to_left = dstend - dst; 1263 1264 return unconv_num; 1265} 1266 1267static XlcConvMethodsRec methods_wcstoutf8 = { 1268 close_converter, 1269 wcstoutf8, 1270 NULL 1271}; 1272 1273static XlcConv 1274open_wcstoutf8( 1275 XLCd from_lcd, 1276 const char *from_type, 1277 XLCd to_lcd, 1278 const char *to_type) 1279{ 1280 return create_conv(from_lcd, &methods_wcstoutf8); 1281} 1282 1283/* from XlcNString to XlcNWideChar */ 1284 1285static int 1286our_strtowcs( 1287 XlcConv conv, 1288 XPointer *from, 1289 int *from_left, 1290 XPointer *to, 1291 int *to_left, 1292 XPointer *args, 1293 int num_args) 1294{ 1295 unsigned char const *src; 1296 unsigned char const *srcend; 1297 wchar_t *dst; 1298 wchar_t *dstend; 1299 1300 if (from == NULL || *from == NULL) 1301 return 0; 1302 1303 src = (unsigned char const *) *from; 1304 srcend = src + *from_left; 1305 dst = (wchar_t *) *to; 1306 dstend = dst + *to_left; 1307 1308 while (src < srcend && dst < dstend) 1309 *dst++ = (wchar_t) *src++; 1310 1311 *from = (XPointer) src; 1312 *from_left = srcend - src; 1313 *to = (XPointer) dst; 1314 *to_left = dstend - dst; 1315 1316 return 0; 1317} 1318 1319static XlcConvMethodsRec methods_strtowcs = { 1320 close_converter, 1321 our_strtowcs, 1322 NULL 1323}; 1324 1325static XlcConv 1326open_strtowcs( 1327 XLCd from_lcd, 1328 const char *from_type, 1329 XLCd to_lcd, 1330 const char *to_type) 1331{ 1332 return create_conv(from_lcd, &methods_strtowcs); 1333} 1334 1335/* from XlcNWideChar to XlcNString */ 1336 1337static int 1338our_wcstostr( 1339 XlcConv conv, 1340 XPointer *from, 1341 int *from_left, 1342 XPointer *to, 1343 int *to_left, 1344 XPointer *args, 1345 int num_args) 1346{ 1347 wchar_t const *src; 1348 wchar_t const *srcend; 1349 unsigned char *dst; 1350 unsigned char *dstend; 1351 int unconv_num; 1352 1353 if (from == NULL || *from == NULL) 1354 return 0; 1355 1356 src = (wchar_t const *) *from; 1357 srcend = src + *from_left; 1358 dst = (unsigned char *) *to; 1359 dstend = dst + *to_left; 1360 unconv_num = 0; 1361 1362 while (src < srcend && dst < dstend) { 1363 unsigned int wc = *src++; 1364 if (wc < 0x80) 1365 *dst = wc; 1366 else { 1367 *dst = BAD_CHAR; 1368 unconv_num++; 1369 } 1370 dst++; 1371 } 1372 1373 *from = (XPointer) src; 1374 *from_left = srcend - src; 1375 *to = (XPointer) dst; 1376 *to_left = dstend - dst; 1377 1378 return unconv_num; 1379} 1380 1381static XlcConvMethodsRec methods_wcstostr = { 1382 close_converter, 1383 our_wcstostr, 1384 NULL 1385}; 1386 1387static XlcConv 1388open_wcstostr( 1389 XLCd from_lcd, 1390 const char *from_type, 1391 XLCd to_lcd, 1392 const char *to_type) 1393{ 1394 return create_conv(from_lcd, &methods_wcstostr); 1395} 1396 1397/* from XlcNCharSet to XlcNWideChar */ 1398 1399static int 1400cstowcs( 1401 XlcConv conv, 1402 XPointer *from, 1403 int *from_left, 1404 XPointer *to, 1405 int *to_left, 1406 XPointer *args, 1407 int num_args) 1408{ 1409 XlcCharSet charset; 1410 const char *name; 1411 Utf8Conv convptr; 1412 int i; 1413 unsigned char const *src; 1414 unsigned char const *srcend; 1415 wchar_t *dst; 1416 wchar_t *dstend; 1417 int unconv_num; 1418 1419 if (from == NULL || *from == NULL) 1420 return 0; 1421 1422 if (num_args < 1) 1423 return -1; 1424 1425 charset = (XlcCharSet) args[0]; 1426 name = charset->encoding_name; 1427 /* not charset->name because the latter has a ":GL"/":GR" suffix */ 1428 1429 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 1430 if (!strcmp(convptr->name, name)) 1431 break; 1432 if (i == 0) 1433 return -1; 1434 1435 src = (unsigned char const *) *from; 1436 srcend = src + *from_left; 1437 dst = (wchar_t *) *to; 1438 dstend = dst + *to_left; 1439 unconv_num = 0; 1440 1441 while (src < srcend && dst < dstend) { 1442 unsigned int wc; 1443 int consumed; 1444 1445 consumed = convptr->cstowc(conv, &wc, src, srcend-src); 1446 if (consumed == RET_ILSEQ) 1447 return -1; 1448 if (consumed == RET_TOOFEW(0)) 1449 break; 1450 1451 *dst++ = wc; 1452 src += consumed; 1453 } 1454 1455 *from = (XPointer) src; 1456 *from_left = srcend - src; 1457 *to = (XPointer) dst; 1458 *to_left = dstend - dst; 1459 1460 return unconv_num; 1461} 1462 1463static XlcConvMethodsRec methods_cstowcs = { 1464 close_converter, 1465 cstowcs, 1466 NULL 1467}; 1468 1469static XlcConv 1470open_cstowcs( 1471 XLCd from_lcd, 1472 const char *from_type, 1473 XLCd to_lcd, 1474 const char *to_type) 1475{ 1476 lazy_init_all_charsets(); 1477 return create_conv(from_lcd, &methods_cstowcs); 1478} 1479 1480/* from XlcNWideChar to XlcNCharSet */ 1481 1482static int 1483wcstocs( 1484 XlcConv conv, 1485 XPointer *from, 1486 int *from_left, 1487 XPointer *to, 1488 int *to_left, 1489 XPointer *args, 1490 int num_args) 1491{ 1492 Utf8Conv *preferred_charsets; 1493 XlcCharSet last_charset = NULL; 1494 wchar_t const *src; 1495 wchar_t const *srcend; 1496 unsigned char *dst; 1497 unsigned char *dstend; 1498 int unconv_num; 1499 1500 if (from == NULL || *from == NULL) 1501 return 0; 1502 1503 preferred_charsets = (Utf8Conv *) conv->state; 1504 src = (wchar_t const *) *from; 1505 srcend = src + *from_left; 1506 dst = (unsigned char *) *to; 1507 dstend = dst + *to_left; 1508 unconv_num = 0; 1509 1510 while (src < srcend && dst < dstend) { 1511 Utf8Conv chosen_charset = NULL; 1512 XlcSide chosen_side = XlcNONE; 1513 wchar_t wc = *src; 1514 int count; 1515 1516 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 1517 if (count == RET_TOOSMALL) 1518 break; 1519 if (count == RET_ILSEQ) { 1520 src++; 1521 unconv_num++; 1522 continue; 1523 } 1524 1525 if (last_charset == NULL) { 1526 last_charset = 1527 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1528 if (last_charset == NULL) { 1529 src++; 1530 unconv_num++; 1531 continue; 1532 } 1533 } else { 1534 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 1535 && (last_charset->side == XlcGLGR 1536 || last_charset->side == chosen_side))) 1537 break; 1538 } 1539 src++; 1540 dst += count; 1541 } 1542 1543 if (last_charset == NULL) 1544 return -1; 1545 1546 *from = (XPointer) src; 1547 *from_left = srcend - src; 1548 *to = (XPointer) dst; 1549 *to_left = dstend - dst; 1550 1551 if (num_args >= 1) 1552 *((XlcCharSet *)args[0]) = last_charset; 1553 1554 return unconv_num; 1555} 1556 1557static XlcConvMethodsRec methods_wcstocs = { 1558 close_tocs_converter, 1559 wcstocs, 1560 NULL 1561}; 1562 1563static XlcConv 1564open_wcstocs( 1565 XLCd from_lcd, 1566 const char *from_type, 1567 XLCd to_lcd, 1568 const char *to_type) 1569{ 1570 return create_tocs_conv(from_lcd, &methods_wcstocs); 1571} 1572 1573/* from XlcNWideChar to XlcNChar */ 1574 1575static int 1576wcstocs1( 1577 XlcConv conv, 1578 XPointer *from, 1579 int *from_left, 1580 XPointer *to, 1581 int *to_left, 1582 XPointer *args, 1583 int num_args) 1584{ 1585 Utf8Conv *preferred_charsets; 1586 XlcCharSet last_charset = NULL; 1587 wchar_t const *src; 1588 wchar_t const *srcend; 1589 unsigned char *dst; 1590 unsigned char *dstend; 1591 int unconv_num; 1592 1593 if (from == NULL || *from == NULL) 1594 return 0; 1595 1596 preferred_charsets = (Utf8Conv *) conv->state; 1597 src = (wchar_t const *) *from; 1598 srcend = src + *from_left; 1599 dst = (unsigned char *) *to; 1600 dstend = dst + *to_left; 1601 unconv_num = 0; 1602 1603 while (src < srcend && dst < dstend) { 1604 Utf8Conv chosen_charset = NULL; 1605 XlcSide chosen_side = XlcNONE; 1606 wchar_t wc = *src; 1607 int count; 1608 1609 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 1610 if (count == RET_TOOSMALL) 1611 break; 1612 if (count == RET_ILSEQ) { 1613 src++; 1614 unconv_num++; 1615 continue; 1616 } 1617 1618 if (last_charset == NULL) { 1619 last_charset = 1620 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1621 if (last_charset == NULL) { 1622 src++; 1623 unconv_num++; 1624 continue; 1625 } 1626 } else { 1627 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 1628 && (last_charset->side == XlcGLGR 1629 || last_charset->side == chosen_side))) 1630 break; 1631 } 1632 src++; 1633 dst += count; 1634 break; 1635 } 1636 1637 if (last_charset == NULL) 1638 return -1; 1639 1640 *from = (XPointer) src; 1641 *from_left = srcend - src; 1642 *to = (XPointer) dst; 1643 *to_left = dstend - dst; 1644 1645 if (num_args >= 1) 1646 *((XlcCharSet *)args[0]) = last_charset; 1647 1648 return unconv_num; 1649} 1650 1651static XlcConvMethodsRec methods_wcstocs1 = { 1652 close_tocs_converter, 1653 wcstocs1, 1654 NULL 1655}; 1656 1657static XlcConv 1658open_wcstocs1( 1659 XLCd from_lcd, 1660 const char *from_type, 1661 XLCd to_lcd, 1662 const char *to_type) 1663{ 1664 return create_tocs_conv(from_lcd, &methods_wcstocs1); 1665} 1666 1667/* trivial, no conversion */ 1668 1669static int 1670identity( 1671 XlcConv conv, 1672 XPointer *from, 1673 int *from_left, 1674 XPointer *to, 1675 int *to_left, 1676 XPointer *args, 1677 int num_args) 1678{ 1679 unsigned char const *src; 1680 unsigned char const *srcend; 1681 unsigned char *dst; 1682 unsigned char *dstend; 1683 1684 if (from == NULL || *from == NULL) 1685 return 0; 1686 1687 src = (unsigned char const *) *from; 1688 srcend = src + *from_left; 1689 dst = (unsigned char *) *to; 1690 dstend = dst + *to_left; 1691 1692 while (src < srcend && dst < dstend) 1693 *dst++ = *src++; 1694 1695 *from = (XPointer) src; 1696 *from_left = srcend - src; 1697 *to = (XPointer) dst; 1698 *to_left = dstend - dst; 1699 1700 return 0; 1701} 1702 1703static XlcConvMethodsRec methods_identity = { 1704 close_converter, 1705 identity, 1706 NULL 1707}; 1708 1709static XlcConv 1710open_identity( 1711 XLCd from_lcd, 1712 const char *from_type, 1713 XLCd to_lcd, 1714 const char *to_type) 1715{ 1716 return create_conv(from_lcd, &methods_identity); 1717} 1718 1719/* from MultiByte/WideChar to FontCharSet. */ 1720/* They really use converters to CharSet 1721 * but with different create_conv procedure. */ 1722 1723static XlcConv 1724create_tofontcs_conv( 1725 XLCd lcd, 1726 XlcConvMethods methods) 1727{ 1728 XlcConv conv; 1729 int i, num, k, count; 1730 char **value, buf[20]; 1731 Utf8Conv *preferred; 1732 1733 lazy_init_all_charsets(); 1734 1735 for (i = 0, num = 0;; i++) { 1736 snprintf(buf, sizeof(buf), "fs%d.charset.name", i); 1737 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1738 if (count < 1) { 1739 snprintf(buf, sizeof(buf), "fs%d.charset", i); 1740 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1741 if (count < 1) 1742 break; 1743 } 1744 num += count; 1745 } 1746 1747 conv = Xmalloc(sizeof(XlcConvRec) + (num + 1) * sizeof(Utf8Conv)); 1748 if (conv == (XlcConv) NULL) 1749 return (XlcConv) NULL; 1750 preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec)); 1751 1752 /* Loop through all fontsets mentioned in the locale. */ 1753 for (i = 0, num = 0;; i++) { 1754 snprintf(buf, sizeof(buf), "fs%d.charset.name", i); 1755 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1756 if (count < 1) { 1757 snprintf(buf, sizeof(buf), "fs%d.charset", i); 1758 _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count); 1759 if (count < 1) 1760 break; 1761 } 1762 while (count-- > 0) { 1763 XlcCharSet charset = _XlcGetCharSet(*value++); 1764 const char *name; 1765 1766 if (charset == (XlcCharSet) NULL) 1767 continue; 1768 1769 name = charset->encoding_name; 1770 /* If it wasn't already encountered... */ 1771 for (k = num - 1; k >= 0; k--) 1772 if (!strcmp(preferred[k]->name, name)) 1773 break; 1774 if (k < 0) { 1775 /* For fonts "ISO10646-1" means ucs2, not utf8.*/ 1776 if (!strcmp("ISO10646-1", name)) { 1777 preferred[num++] = &all_charsets[ucs2_conv_index]; 1778 continue; 1779 } 1780 /* Look it up in all_charsets[]. */ 1781 for (k = 0; k < all_charsets_count-1; k++) 1782 if (!strcmp(all_charsets[k].name, name)) { 1783 /* Add it to the preferred set. */ 1784 preferred[num++] = &all_charsets[k]; 1785 break; 1786 } 1787 } 1788 } 1789 } 1790 preferred[num] = (Utf8Conv) NULL; 1791 1792 conv->methods = methods; 1793 conv->state = (XPointer) preferred; 1794 1795 return conv; 1796} 1797 1798static XlcConv 1799open_wcstofcs( 1800 XLCd from_lcd, 1801 const char *from_type, 1802 XLCd to_lcd, 1803 const char *to_type) 1804{ 1805 return create_tofontcs_conv(from_lcd, &methods_wcstocs); 1806} 1807 1808static XlcConv 1809open_utf8tofcs( 1810 XLCd from_lcd, 1811 const char *from_type, 1812 XLCd to_lcd, 1813 const char *to_type) 1814{ 1815 return create_tofontcs_conv(from_lcd, &methods_utf8tocs); 1816} 1817 1818/* ========================== iconv Stuff ================================ */ 1819 1820/* from XlcNCharSet to XlcNMultiByte */ 1821 1822static int 1823iconv_cstombs(XlcConv conv, XPointer *from, int *from_left, 1824 XPointer *to, int *to_left, XPointer *args, int num_args) 1825{ 1826 XlcCharSet charset; 1827 char const *name; 1828 Utf8Conv convptr; 1829 int i; 1830 unsigned char const *src; 1831 unsigned char const *srcend; 1832 unsigned char *dst; 1833 unsigned char *dstend; 1834 int unconv_num; 1835 1836 if (from == NULL || *from == NULL) 1837 return 0; 1838 1839 if (num_args < 1) 1840 return -1; 1841 1842 charset = (XlcCharSet) args[0]; 1843 name = charset->encoding_name; 1844 /* not charset->name because the latter has a ":GL"/":GR" suffix */ 1845 1846 for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--) 1847 if (!strcmp(convptr->name, name)) 1848 break; 1849 if (i == 0) 1850 return -1; 1851 1852 src = (unsigned char const *) *from; 1853 srcend = src + *from_left; 1854 dst = (unsigned char *) *to; 1855 dstend = dst + *to_left; 1856 unconv_num = 0; 1857 1858 while (src < srcend) { 1859 ucs4_t wc; 1860 int consumed; 1861 int count; 1862 1863 consumed = convptr->cstowc(conv, &wc, src, srcend-src); 1864 if (consumed == RET_ILSEQ) 1865 return -1; 1866 if (consumed == RET_TOOFEW(0)) 1867 break; 1868 1869 /* Use stdc iconv to convert widechar -> multibyte */ 1870 1871 count = wctomb((char *)dst, wc); 1872 if (count == 0) 1873 break; 1874 if (count == -1) { 1875 count = wctomb((char *)dst, BAD_WCHAR); 1876 if (count == 0) 1877 break; 1878 unconv_num++; 1879 } 1880 src += consumed; 1881 dst += count; 1882 } 1883 1884 *from = (XPointer) src; 1885 *from_left = srcend - src; 1886 *to = (XPointer) dst; 1887 *to_left = dstend - dst; 1888 1889 return unconv_num; 1890 1891} 1892 1893static XlcConvMethodsRec iconv_cstombs_methods = { 1894 close_converter, 1895 iconv_cstombs, 1896 NULL 1897}; 1898 1899static XlcConv 1900open_iconv_cstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 1901{ 1902 lazy_init_all_charsets(); 1903 return create_conv(from_lcd, &iconv_cstombs_methods); 1904} 1905 1906static int 1907iconv_mbstocs(XlcConv conv, XPointer *from, int *from_left, 1908 XPointer *to, int *to_left, XPointer *args, int num_args) 1909{ 1910 Utf8Conv *preferred_charsets; 1911 XlcCharSet last_charset = NULL; 1912 unsigned char const *src; 1913 unsigned char const *srcend; 1914 unsigned char *dst; 1915 unsigned char *dstend; 1916 int unconv_num; 1917 1918 if (from == NULL || *from == NULL) 1919 return 0; 1920 1921 preferred_charsets = (Utf8Conv *) conv->state; 1922 src = (unsigned char const *) *from; 1923 srcend = src + *from_left; 1924 dst = (unsigned char *) *to; 1925 dstend = dst + *to_left; 1926 unconv_num = 0; 1927 1928 while (src < srcend && dst < dstend) { 1929 Utf8Conv chosen_charset = NULL; 1930 XlcSide chosen_side = XlcNONE; 1931 wchar_t wc; 1932 int consumed; 1933 int count; 1934 1935 /* Uses stdc iconv to convert multibyte -> widechar */ 1936 1937 consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src)); 1938 if (consumed == 0) 1939 break; 1940 if (consumed == -1) { 1941 src++; 1942 unconv_num++; 1943 continue; 1944 } 1945 1946 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 1947 1948 if (count == RET_TOOSMALL) 1949 break; 1950 if (count == RET_ILSEQ) { 1951 src += consumed; 1952 unconv_num++; 1953 continue; 1954 } 1955 1956 if (last_charset == NULL) { 1957 last_charset = 1958 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 1959 if (last_charset == NULL) { 1960 src += consumed; 1961 unconv_num++; 1962 continue; 1963 } 1964 } else { 1965 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 1966 && (last_charset->side == XlcGLGR 1967 || last_charset->side == chosen_side))) 1968 break; 1969 } 1970 src += consumed; 1971 dst += count; 1972 } 1973 1974 if (last_charset == NULL) 1975 return -1; 1976 1977 *from = (XPointer) src; 1978 *from_left = srcend - src; 1979 *to = (XPointer) dst; 1980 *to_left = dstend - dst; 1981 1982 if (num_args >= 1) 1983 *((XlcCharSet *)args[0]) = last_charset; 1984 1985 return unconv_num; 1986} 1987 1988static XlcConvMethodsRec iconv_mbstocs_methods = { 1989 close_tocs_converter, 1990 iconv_mbstocs, 1991 NULL 1992}; 1993 1994static XlcConv 1995open_iconv_mbstocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 1996{ 1997 return create_tocs_conv(from_lcd, &iconv_mbstocs_methods); 1998} 1999 2000/* from XlcNMultiByte to XlcNChar */ 2001 2002static int 2003iconv_mbtocs(XlcConv conv, XPointer *from, int *from_left, 2004 XPointer *to, int *to_left, XPointer *args, int num_args) 2005{ 2006 Utf8Conv *preferred_charsets; 2007 XlcCharSet last_charset = NULL; 2008 unsigned char const *src; 2009 unsigned char const *srcend; 2010 unsigned char *dst; 2011 unsigned char *dstend; 2012 int unconv_num; 2013 2014 if (from == NULL || *from == NULL) 2015 return 0; 2016 2017 preferred_charsets = (Utf8Conv *) conv->state; 2018 src = (unsigned char const *) *from; 2019 srcend = src + *from_left; 2020 dst = (unsigned char *) *to; 2021 dstend = dst + *to_left; 2022 unconv_num = 0; 2023 2024 while (src < srcend && dst < dstend) { 2025 Utf8Conv chosen_charset = NULL; 2026 XlcSide chosen_side = XlcNONE; 2027 wchar_t wc; 2028 int consumed; 2029 int count; 2030 2031 /* Uses stdc iconv to convert multibyte -> widechar */ 2032 2033 consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src)); 2034 if (consumed == 0) 2035 break; 2036 if (consumed == -1) { 2037 src++; 2038 unconv_num++; 2039 continue; 2040 } 2041 2042 count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst); 2043 if (count == RET_TOOSMALL) 2044 break; 2045 if (count == RET_ILSEQ) { 2046 src += consumed; 2047 unconv_num++; 2048 continue; 2049 } 2050 2051 if (last_charset == NULL) { 2052 last_charset = 2053 _XlcGetCharSetWithSide(chosen_charset->name, chosen_side); 2054 if (last_charset == NULL) { 2055 src += consumed; 2056 unconv_num++; 2057 continue; 2058 } 2059 } else { 2060 if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name 2061 && (last_charset->side == XlcGLGR 2062 || last_charset->side == chosen_side))) 2063 break; 2064 } 2065 src += consumed; 2066 dst += count; 2067 } 2068 2069 if (last_charset == NULL) 2070 return -1; 2071 2072 *from = (XPointer) src; 2073 *from_left = srcend - src; 2074 *to = (XPointer) dst; 2075 *to_left = dstend - dst; 2076 2077 if (num_args >= 1) 2078 *((XlcCharSet *)args[0]) = last_charset; 2079 2080 return unconv_num; 2081} 2082 2083static XlcConvMethodsRec iconv_mbtocs_methods = { 2084 close_tocs_converter, 2085 iconv_mbtocs, 2086 NULL 2087}; 2088 2089static XlcConv 2090open_iconv_mbtocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 2091{ 2092 return create_tocs_conv(from_lcd, &iconv_mbtocs_methods ); 2093} 2094 2095/* from XlcNMultiByte to XlcNString */ 2096 2097static int 2098iconv_mbstostr(XlcConv conv, XPointer *from, int *from_left, 2099 XPointer *to, int *to_left, XPointer *args, int num_args) 2100{ 2101 unsigned char const *src; 2102 unsigned char const *srcend; 2103 unsigned char *dst; 2104 unsigned char *dstend; 2105 int unconv_num; 2106 2107 if (from == NULL || *from == NULL) 2108 return 0; 2109 2110 src = (unsigned char const *) *from; 2111 srcend = src + *from_left; 2112 dst = (unsigned char *) *to; 2113 dstend = dst + *to_left; 2114 unconv_num = 0; 2115 2116 while (src < srcend) { 2117 unsigned char c; 2118 wchar_t wc; 2119 int consumed; 2120 2121 /* Uses stdc iconv to convert multibyte -> widechar */ 2122 2123 consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src)); 2124 if (consumed == 0) 2125 break; 2126 if (dst == dstend) 2127 break; 2128 if (consumed == -1) { 2129 consumed = 1; 2130 c = BAD_CHAR; 2131 unconv_num++; 2132 } else { 2133 if ((wc & ~(wchar_t)0xff) != 0) { 2134 c = BAD_CHAR; 2135 unconv_num++; 2136 } else 2137 c = (unsigned char) wc; 2138 } 2139 *dst++ = c; 2140 src += consumed; 2141 } 2142 2143 *from = (XPointer) src; 2144 *from_left = srcend - src; 2145 *to = (XPointer) dst; 2146 *to_left = dstend - dst; 2147 2148 return unconv_num; 2149} 2150 2151static XlcConvMethodsRec iconv_mbstostr_methods = { 2152 close_converter, 2153 iconv_mbstostr, 2154 NULL 2155}; 2156 2157static XlcConv 2158open_iconv_mbstostr(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 2159{ 2160 return create_conv(from_lcd, &iconv_mbstostr_methods); 2161} 2162 2163/* from XlcNString to XlcNMultiByte */ 2164static int 2165iconv_strtombs(XlcConv conv, XPointer *from, int *from_left, 2166 XPointer *to, int *to_left, XPointer *args, int num_args) 2167{ 2168 unsigned char const *src; 2169 unsigned char const *srcend; 2170 unsigned char *dst; 2171 unsigned char *dstend; 2172 2173 if (from == NULL || *from == NULL) 2174 return 0; 2175 2176 src = (unsigned char const *) *from; 2177 srcend = src + *from_left; 2178 dst = (unsigned char *) *to; 2179 dstend = dst + *to_left; 2180 2181 while (src < srcend) { 2182 int count = wctomb((char *)dst, *src); 2183 if (count < 0) 2184 break; 2185 dst += count; 2186 src++; 2187 } 2188 2189 *from = (XPointer) src; 2190 *from_left = srcend - src; 2191 *to = (XPointer) dst; 2192 *to_left = dstend - dst; 2193 2194 return 0; 2195} 2196 2197static XlcConvMethodsRec iconv_strtombs_methods= { 2198 close_converter, 2199 iconv_strtombs, 2200 NULL 2201}; 2202 2203static XlcConv 2204open_iconv_strtombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 2205{ 2206 return create_conv(from_lcd, &iconv_strtombs_methods); 2207} 2208 2209/***************************************************************************/ 2210/* Part II: An iconv locale loader. 2211 * 2212 *Here we can assume that "multi-byte" is iconv and that `wchar_t' is Unicode. 2213 */ 2214 2215/* from XlcNMultiByte to XlcNWideChar */ 2216static int 2217iconv_mbstowcs(XlcConv conv, XPointer *from, int *from_left, 2218 XPointer *to, int *to_left, XPointer *args, int num_args) 2219{ 2220 char *src = *((char **) from); 2221 wchar_t *dst = *((wchar_t **) to); 2222 int src_left = *from_left; 2223 int dst_left = *to_left; 2224 int length, unconv_num = 0; 2225 2226 while (src_left > 0 && dst_left > 0) { 2227 length = mbtowc(dst, src, (size_t) src_left); 2228 2229 if (length > 0) { 2230 src += length; 2231 src_left -= length; 2232 if (dst) 2233 dst++; 2234 dst_left--; 2235 } else if (length < 0) { 2236 src++; 2237 src_left--; 2238 unconv_num++; 2239 } else { 2240 /* null ? */ 2241 src++; 2242 src_left--; 2243 if (dst) 2244 *dst++ = L'\0'; 2245 dst_left--; 2246 } 2247 } 2248 2249 *from = (XPointer) src; 2250 if (dst) 2251 *to = (XPointer) dst; 2252 *from_left = src_left; 2253 *to_left = dst_left; 2254 2255 return unconv_num; 2256} 2257 2258static XlcConvMethodsRec iconv_mbstowcs_methods = { 2259 close_converter, 2260 iconv_mbstowcs, 2261 NULL 2262} ; 2263 2264static XlcConv 2265open_iconv_mbstowcs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 2266{ 2267 return create_conv(from_lcd, &iconv_mbstowcs_methods); 2268} 2269 2270static int 2271iconv_wcstombs(XlcConv conv, XPointer *from, int *from_left, 2272 XPointer *to, int *to_left, XPointer *args, int num_args) 2273{ 2274 wchar_t *src = *((wchar_t **) from); 2275 char *dst = *((char **) to); 2276 int src_left = *from_left; 2277 int dst_left = *to_left; 2278 int length, unconv_num = 0; 2279 2280 while (src_left > 0 && dst_left >= MB_CUR_MAX) { 2281 length = wctomb(dst, *src); /* XXX */ 2282 2283 if (length > 0) { 2284 src++; 2285 src_left--; 2286 if (dst) 2287 dst += length; 2288 dst_left -= length; 2289 } else if (length < 0) { 2290 src++; 2291 src_left--; 2292 unconv_num++; 2293 } 2294 } 2295 2296 *from = (XPointer) src; 2297 if (dst) 2298 *to = (XPointer) dst; 2299 *from_left = src_left; 2300 *to_left = dst_left; 2301 2302 return unconv_num; 2303} 2304 2305static XlcConvMethodsRec iconv_wcstombs_methods = { 2306 close_converter, 2307 iconv_wcstombs, 2308 NULL 2309} ; 2310 2311static XlcConv 2312open_iconv_wcstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type) 2313{ 2314 return create_conv(from_lcd, &iconv_wcstombs_methods); 2315} 2316 2317static XlcConv 2318open_iconv_mbstofcs( 2319 XLCd from_lcd, 2320 const char *from_type, 2321 XLCd to_lcd, 2322 const char *to_type) 2323{ 2324 return create_tofontcs_conv(from_lcd, &iconv_mbstocs_methods); 2325} 2326 2327/* Registers UTF-8 converters for a UTF-8 locale. */ 2328 2329void 2330_XlcAddUtf8LocaleConverters( 2331 XLCd lcd) 2332{ 2333 /* Register elementary converters. */ 2334 2335 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_utf8towcs); 2336 2337 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_wcstoutf8); 2338 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr); 2339 2340 _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs); 2341 2342 /* Register converters for XlcNCharSet. This implicitly provides 2343 * converters from and to XlcNCompoundText. */ 2344 2345 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_cstoutf8); 2346 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_utf8tocs); 2347 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_utf8tocs1); 2348 2349 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs); 2350 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs); 2351 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1); 2352 2353 _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_strtoutf8); 2354 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_utf8tostr); 2355 _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNMultiByte, open_identity); 2356 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNUtf8String, open_identity); 2357 2358 /* Register converters for XlcNFontCharSet */ 2359 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_utf8tofcs); 2360 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs); 2361} 2362 2363void 2364_XlcAddGB18030LocaleConverters( 2365 XLCd lcd) 2366{ 2367 2368 /* Register elementary converters. */ 2369 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_iconv_mbstowcs); 2370 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_iconv_wcstombs); 2371 2372 /* Register converters for XlcNCharSet. This implicitly provides 2373 * converters from and to XlcNCompoundText. */ 2374 2375 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_iconv_cstombs); 2376 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_iconv_mbstocs); 2377 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_iconv_mbtocs); 2378 _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_iconv_strtombs); 2379 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_iconv_mbstostr); 2380 2381 /* Register converters for XlcNFontCharSet */ 2382 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_iconv_mbstofcs); 2383 2384 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr); 2385 _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs); 2386 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs); 2387 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs); 2388 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1); 2389 2390 /* Register converters for XlcNFontCharSet */ 2391 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs); 2392} 2393