lcUTF8.c revision e9628295
1/******************************************************************
2
3              Copyright 1993 by SunSoft, Inc.
4              Copyright 1999-2000 by Bruno Haible
5
6Permission to use, copy, modify, distribute, and sell this software
7and its documentation for any purpose is hereby granted without fee,
8provided that the above copyright notice appear in all copies and
9that both that copyright notice and this permission notice appear
10in supporting documentation, and that the names of SunSoft, Inc. and
11Bruno Haible not be used in advertising or publicity pertaining to
12distribution of the software without specific, written prior
13permission.  SunSoft, Inc. and Bruno Haible make no representations
14about the suitability of this software for any purpose.  It is
15provided "as is" without express or implied warranty.
16
17SunSoft Inc. AND Bruno Haible DISCLAIM ALL WARRANTIES WITH REGARD
18TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
19AND FITNESS, IN NO EVENT SHALL SunSoft, Inc. OR Bruno Haible BE LIABLE
20FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
21WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
22ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
23OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24
25******************************************************************/
26
27/*
28 * This file contains:
29 *
30 * I. Conversion routines CompoundText/CharSet <--> Unicode/UTF-8.
31 *
32 *    Used for three purposes:
33 *      1. The UTF-8 locales, see below.
34 *      2. Unicode aware applications for which the use of 8-bit character
35 *         sets is an anachronism.
36 *      3. For conversion from keysym to locale encoding.
37 *
38 * II. Conversion files for an UTF-8 locale loader.
39 *     Supports: all locales with codeset UTF-8.
40 *     How: Provides converters for UTF-8.
41 *     Platforms: all systems.
42 *
43 * The loader itself is located in lcUTF8.c.
44 */
45
46/*
47 * The conversion from UTF-8 to CompoundText is realized in a very
48 * conservative way. Recall that CompoundText data is used for inter-client
49 * communication purposes. We distinguish three classes of clients:
50 * - Clients which accept only those pieces of CompoundText which belong to
51 *   the character set understood by the current locale.
52 *   (Example: clients which are linked to an older X11 library.)
53 * - Clients which accept CompoundText with multiple character sets and parse
54 *   it themselves.
55 *   (Example: emacs, xemacs.)
56 * - Clients which rely entirely on the X{mb,wc}TextPropertyToTextList
57 *   functions for the conversion of CompoundText to their current locale's
58 *   multi-byte/wide-character format.
59 * For best interoperation, the UTF-8 to CompoundText conversion proceeds as
60 * follows. For every character, it first tests whether the character is
61 * representable in the current locale's original (non-UTF-8) character set.
62 * If not, it goes through the list of predefined character sets for
63 * CompoundText and tests if the character is representable in that character
64 * set. If so, it encodes the character using its code within that character
65 * set. If not, it uses an UTF-8-in-CompoundText encapsulation. Since
66 * clients of the first and second kind ignore such encapsulated text,
67 * this encapsulation is kept to a minimum and terminated as early as possible.
68 *
69 * In a distant future, when clients of the first and second kind will have
70 * disappeared, we will be able to stuff UTF-8 data directly in CompoundText
71 * without first going through the list of predefined character sets.
72 */
73
74#ifdef HAVE_CONFIG_H
75#include <config.h>
76#endif
77#include <stdio.h>
78#include "Xlibint.h"
79#include "XlcPubI.h"
80#include "XlcGeneric.h"
81
82static XlcConv
83create_conv(
84    XLCd lcd,
85    XlcConvMethods methods)
86{
87    XlcConv conv;
88
89    conv = Xmalloc(sizeof(XlcConvRec));
90    if (conv == (XlcConv) NULL)
91	return (XlcConv) NULL;
92
93    conv->methods = methods;
94    conv->state = NULL;
95
96    return conv;
97}
98
99static void
100close_converter(
101    XlcConv conv)
102{
103    Xfree(conv);
104}
105
106/* Replacement character for invalid multibyte sequence or wide character. */
107#define BAD_WCHAR ((ucs4_t) 0xfffd)
108#define BAD_CHAR '?'
109
110/***************************************************************************/
111/* Part I: Conversion routines CompoundText/CharSet <--> Unicode/UTF-8.
112 *
113 * Note that this code works in any locale. We store Unicode values in
114 * `ucs4_t' variables, but don't pass them to the user.
115 *
116 * This code has to support all character sets that are used for CompoundText,
117 * nothing more, nothing less. See the table in lcCT.c.
118 * Since the conversion _to_ CompoundText is likely to need the tables for all
119 * character sets at once, we don't use dynamic loading (of tables or shared
120 * libraries through iconv()). Use a fixed set of tables instead.
121 *
122 * We use statically computed tables, not dynamically allocated arrays,
123 * because it's more memory efficient: Different processes using the same
124 * libX11 shared library share the "text" and read-only "data" sections.
125 */
126
127typedef unsigned int ucs4_t;
128#define conv_t XlcConv
129
130typedef struct _Utf8ConvRec {
131    const char *name;
132    XrmQuark xrm_name;
133    int (* cstowc) (XlcConv, ucs4_t *, unsigned char const *, int);
134    int (* wctocs) (XlcConv, unsigned char *, ucs4_t, int);
135} Utf8ConvRec, *Utf8Conv;
136
137/*
138 * int xxx_cstowc (XlcConv conv, ucs4_t *pwc, unsigned char const *s, int n)
139 * converts the byte sequence starting at s to a wide character. Up to n bytes
140 * are available at s. n is >= 1.
141 * Result is number of bytes consumed (if a wide character was read),
142 * or 0 if invalid, or -1 if n too small.
143 *
144 * int xxx_wctocs (XlcConv conv, unsigned char *r, ucs4_t wc, int n)
145 * converts the wide character wc to the character set xxx, and stores the
146 * result beginning at r. Up to n bytes may be written at r. n is >= 1.
147 * Result is number of bytes written, or 0 if invalid, or -1 if n too small.
148 */
149
150/* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */
151#define RET_ILSEQ      0
152/* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
153#define RET_TOOFEW(n)  (-1-(n))
154/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
155#define RET_TOOSMALL   -1
156
157/*
158 * The tables below are bijective. It would be possible to extend the
159 * xxx_wctocs tables to do some transliteration (e.g. U+201C,U+201D -> 0x22)
160 * but *only* with characters not contained in any other table, and *only*
161 * when the current locale is not an UTF-8 locale.
162 */
163
164#include "lcUniConv/utf8.h"
165#include "lcUniConv/ucs2be.h"
166#ifdef notused
167#include "lcUniConv/ascii.h"
168#endif
169#include "lcUniConv/iso8859_1.h"
170#include "lcUniConv/iso8859_2.h"
171#include "lcUniConv/iso8859_3.h"
172#include "lcUniConv/iso8859_4.h"
173#include "lcUniConv/iso8859_5.h"
174#include "lcUniConv/iso8859_6.h"
175#include "lcUniConv/iso8859_7.h"
176#include "lcUniConv/iso8859_8.h"
177#include "lcUniConv/iso8859_9.h"
178#include "lcUniConv/iso8859_10.h"
179#include "lcUniConv/iso8859_11.h"
180#include "lcUniConv/iso8859_13.h"
181#include "lcUniConv/iso8859_14.h"
182#include "lcUniConv/iso8859_15.h"
183#include "lcUniConv/iso8859_16.h"
184#include "lcUniConv/iso8859_9e.h"
185#include "lcUniConv/jisx0201.h"
186#include "lcUniConv/tis620.h"
187#include "lcUniConv/koi8_r.h"
188#include "lcUniConv/koi8_u.h"
189#include "lcUniConv/koi8_c.h"
190#include "lcUniConv/armscii_8.h"
191#include "lcUniConv/cp1133.h"
192#include "lcUniConv/mulelao.h"
193#include "lcUniConv/viscii.h"
194#include "lcUniConv/tcvn.h"
195#include "lcUniConv/georgian_academy.h"
196#include "lcUniConv/georgian_ps.h"
197#include "lcUniConv/cp1251.h"
198#include "lcUniConv/cp1255.h"
199#include "lcUniConv/cp1256.h"
200#include "lcUniConv/tatar_cyr.h"
201
202typedef struct {
203    unsigned short indx; /* index into big table */
204    unsigned short used; /* bitmask of used entries */
205} Summary16;
206
207#include "lcUniConv/gb2312.h"
208#include "lcUniConv/jisx0208.h"
209#include "lcUniConv/jisx0212.h"
210#include "lcUniConv/ksc5601.h"
211#include "lcUniConv/big5.h"
212#include "lcUniConv/big5_emacs.h"
213#include "lcUniConv/big5hkscs.h"
214#include "lcUniConv/gbk.h"
215
216static Utf8ConvRec all_charsets[] = {
217    /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
218       (for lookup speed), once at the end (as a fallback).  */
219    { "ISO10646-1", NULLQUARK,
220	utf8_mbtowc, utf8_wctomb
221    },
222
223    { "ISO8859-1", NULLQUARK,
224	iso8859_1_mbtowc, iso8859_1_wctomb
225    },
226    { "ISO8859-2", NULLQUARK,
227	iso8859_2_mbtowc, iso8859_2_wctomb
228    },
229    { "ISO8859-3", NULLQUARK,
230	iso8859_3_mbtowc, iso8859_3_wctomb
231    },
232    { "ISO8859-4", NULLQUARK,
233	iso8859_4_mbtowc, iso8859_4_wctomb
234    },
235    { "ISO8859-5", NULLQUARK,
236	iso8859_5_mbtowc, iso8859_5_wctomb
237    },
238    { "ISO8859-6", NULLQUARK,
239	iso8859_6_mbtowc, iso8859_6_wctomb
240    },
241    { "ISO8859-7", NULLQUARK,
242	iso8859_7_mbtowc, iso8859_7_wctomb
243    },
244    { "ISO8859-8", NULLQUARK,
245	iso8859_8_mbtowc, iso8859_8_wctomb
246    },
247    { "ISO8859-9", NULLQUARK,
248	iso8859_9_mbtowc, iso8859_9_wctomb
249    },
250    { "ISO8859-10", NULLQUARK,
251	iso8859_10_mbtowc, iso8859_10_wctomb
252    },
253    { "ISO8859-11", NULLQUARK,
254	iso8859_11_mbtowc, iso8859_11_wctomb
255    },
256    { "ISO8859-13", NULLQUARK,
257	iso8859_13_mbtowc, iso8859_13_wctomb
258    },
259    { "ISO8859-14", NULLQUARK,
260	iso8859_14_mbtowc, iso8859_14_wctomb
261    },
262    { "ISO8859-15", NULLQUARK,
263	iso8859_15_mbtowc, iso8859_15_wctomb
264    },
265    { "ISO8859-16", NULLQUARK,
266	iso8859_16_mbtowc, iso8859_16_wctomb
267    },
268    { "JISX0201.1976-0", NULLQUARK,
269	jisx0201_mbtowc, jisx0201_wctomb
270    },
271    { "TIS620-0", NULLQUARK,
272	tis620_mbtowc, tis620_wctomb
273    },
274    { "GB2312.1980-0", NULLQUARK,
275	gb2312_mbtowc, gb2312_wctomb
276    },
277    { "JISX0208.1983-0", NULLQUARK,
278	jisx0208_mbtowc, jisx0208_wctomb
279    },
280    { "JISX0208.1990-0", NULLQUARK,
281	jisx0208_mbtowc, jisx0208_wctomb
282    },
283    { "JISX0212.1990-0", NULLQUARK,
284	jisx0212_mbtowc, jisx0212_wctomb
285    },
286    { "KSC5601.1987-0", NULLQUARK,
287	ksc5601_mbtowc, ksc5601_wctomb
288    },
289    { "KOI8-R", NULLQUARK,
290	koi8_r_mbtowc, koi8_r_wctomb
291    },
292    { "KOI8-U", NULLQUARK,
293	koi8_u_mbtowc, koi8_u_wctomb
294    },
295    { "KOI8-C", NULLQUARK,
296	koi8_c_mbtowc, koi8_c_wctomb
297    },
298    { "TATAR-CYR", NULLQUARK,
299	tatar_cyr_mbtowc, tatar_cyr_wctomb
300    },
301    { "ARMSCII-8", NULLQUARK,
302	armscii_8_mbtowc, armscii_8_wctomb
303    },
304    { "IBM-CP1133", NULLQUARK,
305	cp1133_mbtowc, cp1133_wctomb
306    },
307    { "MULELAO-1", NULLQUARK,
308	mulelao_mbtowc, mulelao_wctomb
309    },
310    { "VISCII1.1-1", NULLQUARK,
311	viscii_mbtowc, viscii_wctomb
312    },
313    { "TCVN-5712", NULLQUARK,
314	tcvn_mbtowc, tcvn_wctomb
315    },
316    { "GEORGIAN-ACADEMY", NULLQUARK,
317	georgian_academy_mbtowc, georgian_academy_wctomb
318    },
319    { "GEORGIAN-PS", NULLQUARK,
320	georgian_ps_mbtowc, georgian_ps_wctomb
321    },
322    { "ISO8859-9E", NULLQUARK,
323	iso8859_9e_mbtowc, iso8859_9e_wctomb
324    },
325    { "MICROSOFT-CP1251", NULLQUARK,
326	cp1251_mbtowc, cp1251_wctomb
327    },
328    { "MICROSOFT-CP1255", NULLQUARK,
329	cp1255_mbtowc, cp1255_wctomb
330    },
331    { "MICROSOFT-CP1256", NULLQUARK,
332	cp1256_mbtowc, cp1256_wctomb
333    },
334    { "BIG5-0", NULLQUARK,
335	big5_mbtowc, big5_wctomb
336    },
337    { "BIG5-E0", NULLQUARK,
338	big5_0_mbtowc, big5_0_wctomb
339    },
340    { "BIG5-E1", NULLQUARK,
341	big5_1_mbtowc, big5_1_wctomb
342    },
343    { "GBK-0", NULLQUARK,
344	gbk_mbtowc, gbk_wctomb
345    },
346    { "BIG5HKSCS-0", NULLQUARK,
347	big5hkscs_mbtowc, big5hkscs_wctomb
348    },
349
350    /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
351       (for lookup speed), once at the end (as a fallback).  */
352    { "ISO10646-1", NULLQUARK,
353	utf8_mbtowc, utf8_wctomb
354    },
355
356    /* Encoding ISO10646-1 for fonts means UCS2-like encoding
357       so for conversion to FontCharSet we need this record */
358    { "ISO10646-1", NULLQUARK,
359	ucs2be_mbtowc, ucs2be_wctomb
360    }
361};
362
363#define charsets_table_size (sizeof(all_charsets)/sizeof(all_charsets[0]))
364#define all_charsets_count  (charsets_table_size - 1)
365#define ucs2_conv_index     (charsets_table_size - 1)
366
367static void
368init_all_charsets (void)
369{
370    Utf8Conv convptr;
371    int i;
372
373    for (convptr = all_charsets, i = charsets_table_size; i > 0; convptr++, i--)
374	convptr->xrm_name = XrmStringToQuark(convptr->name);
375}
376
377#define lazy_init_all_charsets()					\
378    do {								\
379	if (all_charsets[0].xrm_name == NULLQUARK)			\
380	    init_all_charsets();					\
381    } while (0)
382
383/* from XlcNCharSet to XlcNUtf8String */
384
385static int
386cstoutf8(
387    XlcConv conv,
388    XPointer *from,
389    int *from_left,
390    XPointer *to,
391    int *to_left,
392    XPointer *args,
393    int num_args)
394{
395    XlcCharSet charset;
396    const char *name;
397    Utf8Conv convptr;
398    int i;
399    unsigned char const *src;
400    unsigned char const *srcend;
401    unsigned char *dst;
402    unsigned char *dstend;
403    int unconv_num;
404
405    if (from == NULL || *from == NULL)
406	return 0;
407
408    if (num_args < 1)
409	return -1;
410
411    charset = (XlcCharSet) args[0];
412    name = charset->encoding_name;
413    /* not charset->name because the latter has a ":GL"/":GR" suffix */
414
415    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
416	if (!strcmp(convptr->name, name))
417	    break;
418    if (i == 0)
419	return -1;
420
421    src = (unsigned char const *) *from;
422    srcend = src + *from_left;
423    dst = (unsigned char *) *to;
424    dstend = dst + *to_left;
425    unconv_num = 0;
426
427    while (src < srcend) {
428	ucs4_t wc;
429	int consumed;
430	int count;
431
432	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
433	if (consumed == RET_ILSEQ)
434	    return -1;
435	if (consumed == RET_TOOFEW(0))
436	    break;
437
438	count = utf8_wctomb(NULL, dst, wc, dstend-dst);
439	if (count == RET_TOOSMALL)
440	    break;
441	if (count == RET_ILSEQ) {
442	    count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
443	    if (count == RET_TOOSMALL)
444		break;
445	    unconv_num++;
446	}
447	src += consumed;
448	dst += count;
449    }
450
451    *from = (XPointer) src;
452    *from_left = srcend - src;
453    *to = (XPointer) dst;
454    *to_left = dstend - dst;
455
456    return unconv_num;
457}
458
459static XlcConvMethodsRec methods_cstoutf8 = {
460    close_converter,
461    cstoutf8,
462    NULL
463};
464
465static XlcConv
466open_cstoutf8(
467    XLCd from_lcd,
468    const char *from_type,
469    XLCd to_lcd,
470    const char *to_type)
471{
472    lazy_init_all_charsets();
473    return create_conv(from_lcd, &methods_cstoutf8);
474}
475
476/* from XlcNUtf8String to XlcNCharSet */
477
478static XlcConv
479create_tocs_conv(
480    XLCd lcd,
481    XlcConvMethods methods)
482{
483    XlcConv conv;
484    CodeSet *codeset_list;
485    int codeset_num;
486    int charset_num;
487    int i, j, k;
488    Utf8Conv *preferred;
489
490    lazy_init_all_charsets();
491
492    codeset_list = XLC_GENERIC(lcd, codeset_list);
493    codeset_num = XLC_GENERIC(lcd, codeset_num);
494
495    charset_num = 0;
496    for (i = 0; i < codeset_num; i++)
497	charset_num += codeset_list[i]->num_charsets;
498    if (charset_num > all_charsets_count-1)
499	charset_num = all_charsets_count-1;
500
501    conv = Xmalloc(sizeof(XlcConvRec)
502			     + (charset_num + 1) * sizeof(Utf8Conv));
503    if (conv == (XlcConv) NULL)
504	return (XlcConv) NULL;
505    preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
506
507    /* Loop through all codesets mentioned in the locale. */
508    charset_num = 0;
509    for (i = 0; i < codeset_num; i++) {
510	XlcCharSet *charsets = codeset_list[i]->charset_list;
511	int num_charsets = codeset_list[i]->num_charsets;
512	for (j = 0; j < num_charsets; j++) {
513	    const char *name = charsets[j]->encoding_name;
514	    /* If it wasn't already encountered... */
515	    for (k = charset_num-1; k >= 0; k--)
516		if (!strcmp(preferred[k]->name, name))
517		    break;
518	    if (k < 0) {
519		/* Look it up in all_charsets[]. */
520		for (k = 0; k < all_charsets_count-1; k++)
521		    if (!strcmp(all_charsets[k].name, name)) {
522			/* Add it to the preferred set. */
523			preferred[charset_num++] = &all_charsets[k];
524			break;
525		    }
526	    }
527	}
528    }
529    preferred[charset_num] = (Utf8Conv) NULL;
530
531    conv->methods = methods;
532    conv->state = (XPointer) preferred;
533
534    return conv;
535}
536
537static void
538close_tocs_converter(
539    XlcConv conv)
540{
541    /* conv->state is allocated together with conv, free both at once.  */
542    Xfree(conv);
543}
544
545/*
546 * Converts a Unicode character to an appropriate character set. The NULL
547 * terminated array of preferred character sets is passed as first argument.
548 * If successful, *charsetp is set to the character set that was used, and
549 * *sidep is set to the character set side (XlcGL or XlcGR).
550 */
551static int
552charset_wctocs(
553    Utf8Conv *preferred,
554    Utf8Conv *charsetp,
555    XlcSide *sidep,
556    XlcConv conv,
557    unsigned char *r,
558    ucs4_t wc,
559    int n)
560{
561    int count;
562    Utf8Conv convptr;
563    int i;
564
565    for (; *preferred != (Utf8Conv) NULL; preferred++) {
566	convptr = *preferred;
567	count = convptr->wctocs(conv, r, wc, n);
568	if (count == RET_TOOSMALL)
569	    return RET_TOOSMALL;
570	if (count != RET_ILSEQ) {
571	    *charsetp = convptr;
572	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
573	    return count;
574	}
575    }
576    for (convptr = all_charsets+1, i = all_charsets_count-1; i > 0; convptr++, i--) {
577	count = convptr->wctocs(conv, r, wc, n);
578	if (count == RET_TOOSMALL)
579	    return RET_TOOSMALL;
580	if (count != RET_ILSEQ) {
581	    *charsetp = convptr;
582	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
583	    return count;
584	}
585    }
586    return RET_ILSEQ;
587}
588
589static int
590utf8tocs(
591    XlcConv conv,
592    XPointer *from,
593    int *from_left,
594    XPointer *to,
595    int *to_left,
596    XPointer *args,
597    int num_args)
598{
599    Utf8Conv *preferred_charsets;
600    XlcCharSet last_charset = NULL;
601    unsigned char const *src;
602    unsigned char const *srcend;
603    unsigned char *dst;
604    unsigned char *dstend;
605    int unconv_num;
606
607    if (from == NULL || *from == NULL)
608	return 0;
609
610    preferred_charsets = (Utf8Conv *) conv->state;
611    src = (unsigned char const *) *from;
612    srcend = src + *from_left;
613    dst = (unsigned char *) *to;
614    dstend = dst + *to_left;
615    unconv_num = 0;
616
617    while (src < srcend && dst < dstend) {
618	Utf8Conv chosen_charset = NULL;
619	XlcSide chosen_side = XlcNONE;
620	ucs4_t wc;
621	int consumed;
622	int count;
623
624	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
625	if (consumed == RET_TOOFEW(0))
626	    break;
627	if (consumed == RET_ILSEQ) {
628	    src++;
629	    unconv_num++;
630	    continue;
631	}
632
633	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
634	if (count == RET_TOOSMALL)
635	    break;
636	if (count == RET_ILSEQ) {
637	    src += consumed;
638	    unconv_num++;
639	    continue;
640	}
641
642	if (last_charset == NULL) {
643	    last_charset =
644	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
645	    if (last_charset == NULL) {
646		src += consumed;
647		unconv_num++;
648		continue;
649	    }
650	} else {
651	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
652	          && (last_charset->side == XlcGLGR
653	              || last_charset->side == chosen_side)))
654		break;
655	}
656	src += consumed;
657	dst += count;
658    }
659
660    if (last_charset == NULL)
661	return -1;
662
663    *from = (XPointer) src;
664    *from_left = srcend - src;
665    *to = (XPointer) dst;
666    *to_left = dstend - dst;
667
668    if (num_args >= 1)
669	*((XlcCharSet *)args[0]) = last_charset;
670
671    return unconv_num;
672}
673
674static XlcConvMethodsRec methods_utf8tocs = {
675    close_tocs_converter,
676    utf8tocs,
677    NULL
678};
679
680static XlcConv
681open_utf8tocs(
682    XLCd from_lcd,
683    const char *from_type,
684    XLCd to_lcd,
685    const char *to_type)
686{
687    return create_tocs_conv(from_lcd, &methods_utf8tocs);
688}
689
690/* from XlcNUtf8String to XlcNChar */
691
692static int
693utf8tocs1(
694    XlcConv conv,
695    XPointer *from,
696    int *from_left,
697    XPointer *to,
698    int *to_left,
699    XPointer *args,
700    int num_args)
701{
702    Utf8Conv *preferred_charsets;
703    XlcCharSet last_charset = NULL;
704    unsigned char const *src;
705    unsigned char const *srcend;
706    unsigned char *dst;
707    unsigned char *dstend;
708    int unconv_num;
709
710    if (from == NULL || *from == NULL)
711	return 0;
712
713    preferred_charsets = (Utf8Conv *) conv->state;
714    src = (unsigned char const *) *from;
715    srcend = src + *from_left;
716    dst = (unsigned char *) *to;
717    dstend = dst + *to_left;
718    unconv_num = 0;
719
720    while (src < srcend && dst < dstend) {
721	Utf8Conv chosen_charset = NULL;
722	XlcSide chosen_side = XlcNONE;
723	ucs4_t wc;
724	int consumed;
725	int count;
726
727	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
728	if (consumed == RET_TOOFEW(0))
729	    break;
730	if (consumed == RET_ILSEQ) {
731	    src++;
732	    unconv_num++;
733	    continue;
734	}
735
736	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
737	if (count == RET_TOOSMALL)
738	    break;
739	if (count == RET_ILSEQ) {
740	    src += consumed;
741	    unconv_num++;
742	    continue;
743	}
744
745	last_charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
746
747	if (last_charset == NULL) {
748	    src += consumed;
749	    unconv_num++;
750	    continue;
751	}
752
753	src += consumed;
754	dst += count;
755	break;
756    }
757
758    if (last_charset == NULL)
759	return -1;
760
761    *from = (XPointer) src;
762    *from_left = srcend - src;
763    *to = (XPointer) dst;
764    *to_left = dstend - dst;
765
766    if (num_args >= 1)
767	*((XlcCharSet *)args[0]) = last_charset;
768
769    return unconv_num;
770}
771
772static XlcConvMethodsRec methods_utf8tocs1 = {
773    close_tocs_converter,
774    utf8tocs1,
775    NULL
776};
777
778static XlcConv
779open_utf8tocs1(
780    XLCd from_lcd,
781    const char *from_type,
782    XLCd to_lcd,
783    const char *to_type)
784{
785    return create_tocs_conv(from_lcd, &methods_utf8tocs1);
786}
787
788/* from XlcNUtf8String to XlcNString */
789
790static int
791utf8tostr(
792    XlcConv conv,
793    XPointer *from,
794    int *from_left,
795    XPointer *to,
796    int *to_left,
797    XPointer *args,
798    int num_args)
799{
800    unsigned char const *src;
801    unsigned char const *srcend;
802    unsigned char *dst;
803    unsigned char *dstend;
804    int unconv_num;
805
806    if (from == NULL || *from == NULL)
807	return 0;
808
809    src = (unsigned char const *) *from;
810    srcend = src + *from_left;
811    dst = (unsigned char *) *to;
812    dstend = dst + *to_left;
813    unconv_num = 0;
814
815    while (src < srcend) {
816	unsigned char c;
817	ucs4_t wc;
818	int consumed;
819
820	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
821	if (consumed == RET_TOOFEW(0))
822	    break;
823	if (dst == dstend)
824	    break;
825	if (consumed == RET_ILSEQ) {
826	    consumed = 1;
827	    c = BAD_CHAR;
828	    unconv_num++;
829	} else {
830	    if ((wc & ~(ucs4_t)0xff) != 0) {
831		c = BAD_CHAR;
832		unconv_num++;
833	    } else
834		c = (unsigned char) wc;
835	}
836	*dst++ = c;
837	src += consumed;
838    }
839
840    *from = (XPointer) src;
841    *from_left = srcend - src;
842    *to = (XPointer) dst;
843    *to_left = dstend - dst;
844
845    return unconv_num;
846}
847
848static XlcConvMethodsRec methods_utf8tostr = {
849    close_converter,
850    utf8tostr,
851    NULL
852};
853
854static XlcConv
855open_utf8tostr(
856    XLCd from_lcd,
857    const char *from_type,
858    XLCd to_lcd,
859    const char *to_type)
860{
861    return create_conv(from_lcd, &methods_utf8tostr);
862}
863
864/* from XlcNString to XlcNUtf8String */
865
866static int
867strtoutf8(
868    XlcConv conv,
869    XPointer *from,
870    int *from_left,
871    XPointer *to,
872    int *to_left,
873    XPointer *args,
874    int num_args)
875{
876    unsigned char const *src;
877    unsigned char const *srcend;
878    unsigned char *dst;
879    unsigned char *dstend;
880
881    if (from == NULL || *from == NULL)
882	return 0;
883
884    src = (unsigned char const *) *from;
885    srcend = src + *from_left;
886    dst = (unsigned char *) *to;
887    dstend = dst + *to_left;
888
889    while (src < srcend) {
890	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
891	if (count == RET_TOOSMALL)
892	    break;
893	dst += count;
894	src++;
895    }
896
897    *from = (XPointer) src;
898    *from_left = srcend - src;
899    *to = (XPointer) dst;
900    *to_left = dstend - dst;
901
902    return 0;
903}
904
905static XlcConvMethodsRec methods_strtoutf8 = {
906    close_converter,
907    strtoutf8,
908    NULL
909};
910
911static XlcConv
912open_strtoutf8(
913    XLCd from_lcd,
914    const char *from_type,
915    XLCd to_lcd,
916    const char *to_type)
917{
918    return create_conv(from_lcd, &methods_strtoutf8);
919}
920
921/* Support for the input methods. */
922
923XPointer
924_Utf8GetConvByName(
925    const char *name)
926{
927    XrmQuark xrm_name;
928    Utf8Conv convptr;
929    int i;
930
931    if (name == NULL)
932        return (XPointer) NULL;
933
934    lazy_init_all_charsets();
935    xrm_name = XrmStringToQuark(name);
936
937    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
938	if (convptr->xrm_name == xrm_name)
939	    return (XPointer) convptr->wctocs;
940    return (XPointer) NULL;
941}
942
943/* from XlcNUcsChar to XlcNChar, needed for input methods */
944
945static XlcConv
946create_ucstocs_conv(
947    XLCd lcd,
948    XlcConvMethods methods)
949{
950
951    if (XLC_PUBLIC_PART(lcd)->codeset
952	&& _XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "UTF-8") == 0) {
953	XlcConv conv;
954	Utf8Conv *preferred;
955
956	lazy_init_all_charsets();
957
958	conv = Xmalloc(sizeof(XlcConvRec) + 2 * sizeof(Utf8Conv));
959	if (conv == (XlcConv) NULL)
960	    return (XlcConv) NULL;
961	preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
962
963	preferred[0] = &all_charsets[0]; /* ISO10646 */
964	preferred[1] = (Utf8Conv) NULL;
965
966	conv->methods = methods;
967	conv->state = (XPointer) preferred;
968
969	return conv;
970    } else {
971	return create_tocs_conv(lcd, methods);
972    }
973}
974
975static int
976charset_wctocs_exactly(
977    Utf8Conv *preferred,
978    Utf8Conv *charsetp,
979    XlcSide *sidep,
980    XlcConv conv,
981    unsigned char *r,
982    ucs4_t wc,
983    int n)
984{
985    int count;
986    Utf8Conv convptr;
987
988    for (; *preferred != (Utf8Conv) NULL; preferred++) {
989	convptr = *preferred;
990	count = convptr->wctocs(conv, r, wc, n);
991	if (count == RET_TOOSMALL)
992	    return RET_TOOSMALL;
993	if (count != RET_ILSEQ) {
994	    *charsetp = convptr;
995	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
996	    return count;
997	}
998    }
999    return RET_ILSEQ;
1000}
1001
1002static int
1003ucstocs1(
1004    XlcConv conv,
1005    XPointer *from,
1006    int *from_left,
1007    XPointer *to,
1008    int *to_left,
1009    XPointer *args,
1010    int num_args)
1011{
1012    ucs4_t const *src;
1013    unsigned char *dst = (unsigned char *) *to;
1014    int unconv_num = 0;
1015    Utf8Conv *preferred_charsets = (Utf8Conv *) conv->state;
1016    Utf8Conv chosen_charset = NULL;
1017    XlcSide chosen_side = XlcNONE;
1018    XlcCharSet charset = NULL;
1019    int count;
1020
1021    if (from == NULL || *from == NULL)
1022	return 0;
1023
1024    src = (ucs4_t const *) *from;
1025
1026    count = charset_wctocs_exactly(preferred_charsets, &chosen_charset,
1027                                   &chosen_side, conv, dst, *src, *to_left);
1028    if (count < 1) {
1029        unconv_num++;
1030        count = 0;
1031    } else {
1032        charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1033    }
1034    if (charset == NULL)
1035	return -1;
1036
1037    *from = (XPointer) ++src;
1038    (*from_left)--;
1039    *to = (XPointer) dst;
1040    *to_left -= count;
1041
1042    if (num_args >= 1)
1043	*((XlcCharSet *)args[0]) = charset;
1044
1045    return unconv_num;
1046}
1047
1048static XlcConvMethodsRec methods_ucstocs1 = {
1049    close_tocs_converter,
1050    ucstocs1,
1051    NULL
1052};
1053
1054static XlcConv
1055open_ucstocs1(
1056    XLCd from_lcd,
1057    const char *from_type,
1058    XLCd to_lcd,
1059    const char *to_type)
1060{
1061    return create_ucstocs_conv(from_lcd, &methods_ucstocs1);
1062}
1063
1064/* from XlcNUcsChar to XlcNUtf8String, needed for input methods */
1065
1066static int
1067ucstoutf8(
1068    XlcConv conv,
1069    XPointer *from,
1070    int *from_left,
1071    XPointer *to,
1072    int *to_left,
1073    XPointer *args,
1074    int num_args)
1075{
1076    const ucs4_t *src;
1077    const ucs4_t *srcend;
1078    unsigned char *dst;
1079    unsigned char *dstend;
1080    int unconv_num;
1081
1082    if (from == NULL || *from == NULL)
1083	return 0;
1084
1085    src = (const ucs4_t *) *from;
1086    srcend = src + *from_left;
1087    dst = (unsigned char *) *to;
1088    dstend = dst + *to_left;
1089    unconv_num = 0;
1090
1091    while (src < srcend) {
1092	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
1093	if (count == RET_TOOSMALL)
1094	    break;
1095	if (count == RET_ILSEQ)
1096	    unconv_num++;
1097	src++;
1098	dst += count;
1099    }
1100
1101    *from = (XPointer) src;
1102    *from_left = srcend - src;
1103    *to = (XPointer) dst;
1104    *to_left = dstend - dst;
1105
1106    return unconv_num;
1107}
1108
1109static XlcConvMethodsRec methods_ucstoutf8 = {
1110    close_converter,
1111    ucstoutf8,
1112    NULL
1113};
1114
1115static XlcConv
1116open_ucstoutf8(
1117    XLCd from_lcd,
1118    const char *from_type,
1119    XLCd to_lcd,
1120    const char *to_type)
1121{
1122    return create_conv(from_lcd, &methods_ucstoutf8);
1123}
1124
1125/* Registers UTF-8 converters for a non-UTF-8 locale. */
1126void
1127_XlcAddUtf8Converters(
1128    XLCd lcd)
1129{
1130    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNUtf8String, open_cstoutf8);
1131    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNCharSet, open_utf8tocs);
1132    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNChar, open_utf8tocs1);
1133    _XlcSetConverter(lcd, XlcNString, lcd, XlcNUtf8String, open_strtoutf8);
1134    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNString, open_utf8tostr);
1135    _XlcSetConverter(lcd, XlcNUcsChar,    lcd, XlcNChar, open_ucstocs1);
1136    _XlcSetConverter(lcd, XlcNUcsChar,    lcd, XlcNUtf8String, open_ucstoutf8);
1137}
1138
1139/***************************************************************************/
1140/* Part II: UTF-8 locale loader conversion files
1141 *
1142 * Here we can assume that "multi-byte" is UTF-8 and that `wchar_t' is Unicode.
1143 */
1144
1145/* from XlcNMultiByte to XlcNWideChar */
1146
1147static int
1148utf8towcs(
1149    XlcConv conv,
1150    XPointer *from,
1151    int *from_left,
1152    XPointer *to,
1153    int *to_left,
1154    XPointer *args,
1155    int num_args)
1156{
1157    unsigned char const *src;
1158    unsigned char const *srcend;
1159    wchar_t *dst;
1160    wchar_t *dstend;
1161    int unconv_num;
1162
1163    if (from == NULL || *from == NULL)
1164	return 0;
1165
1166    src = (unsigned char const *) *from;
1167    srcend = src + *from_left;
1168    dst = (wchar_t *) *to;
1169    dstend = dst + *to_left;
1170    unconv_num = 0;
1171
1172    while (src < srcend && dst < dstend) {
1173	ucs4_t wc;
1174	int consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
1175	if (consumed == RET_TOOFEW(0))
1176	    break;
1177	if (consumed == RET_ILSEQ) {
1178	    src++;
1179	    *dst = BAD_WCHAR;
1180	    unconv_num++;
1181	} else {
1182	    src += consumed;
1183	    *dst = wc;
1184	}
1185	dst++;
1186    }
1187
1188    *from = (XPointer) src;
1189    *from_left = srcend - src;
1190    *to = (XPointer) dst;
1191    *to_left = dstend - dst;
1192
1193    return unconv_num;
1194}
1195
1196static XlcConvMethodsRec methods_utf8towcs = {
1197    close_converter,
1198    utf8towcs,
1199    NULL
1200};
1201
1202static XlcConv
1203open_utf8towcs(
1204    XLCd from_lcd,
1205    const char *from_type,
1206    XLCd to_lcd,
1207    const char *to_type)
1208{
1209    return create_conv(from_lcd, &methods_utf8towcs);
1210}
1211
1212/* from XlcNWideChar to XlcNMultiByte */
1213
1214static int
1215wcstoutf8(
1216    XlcConv conv,
1217    XPointer *from,
1218    int *from_left,
1219    XPointer *to,
1220    int *to_left,
1221    XPointer *args,
1222    int num_args)
1223{
1224    wchar_t const *src;
1225    wchar_t const *srcend;
1226    unsigned char *dst;
1227    unsigned char *dstend;
1228    int unconv_num;
1229
1230    if (from == NULL || *from == NULL)
1231	return 0;
1232
1233    src = (wchar_t const *) *from;
1234    srcend = src + *from_left;
1235    dst = (unsigned char *) *to;
1236    dstend = dst + *to_left;
1237    unconv_num = 0;
1238
1239    while (src < srcend) {
1240	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
1241	if (count == RET_TOOSMALL)
1242	    break;
1243	if (count == RET_ILSEQ) {
1244	    count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
1245	    if (count == RET_TOOSMALL)
1246		break;
1247	    unconv_num++;
1248	}
1249	dst += count;
1250	src++;
1251    }
1252
1253    *from = (XPointer) src;
1254    *from_left = srcend - src;
1255    *to = (XPointer) dst;
1256    *to_left = dstend - dst;
1257
1258    return unconv_num;
1259}
1260
1261static XlcConvMethodsRec methods_wcstoutf8 = {
1262    close_converter,
1263    wcstoutf8,
1264    NULL
1265};
1266
1267static XlcConv
1268open_wcstoutf8(
1269    XLCd from_lcd,
1270    const char *from_type,
1271    XLCd to_lcd,
1272    const char *to_type)
1273{
1274    return create_conv(from_lcd, &methods_wcstoutf8);
1275}
1276
1277/* from XlcNString to XlcNWideChar */
1278
1279static int
1280our_strtowcs(
1281    XlcConv conv,
1282    XPointer *from,
1283    int *from_left,
1284    XPointer *to,
1285    int *to_left,
1286    XPointer *args,
1287    int num_args)
1288{
1289    unsigned char const *src;
1290    unsigned char const *srcend;
1291    wchar_t *dst;
1292    wchar_t *dstend;
1293
1294    if (from == NULL || *from == NULL)
1295	return 0;
1296
1297    src = (unsigned char const *) *from;
1298    srcend = src + *from_left;
1299    dst = (wchar_t *) *to;
1300    dstend = dst + *to_left;
1301
1302    while (src < srcend && dst < dstend)
1303	*dst++ = (wchar_t) *src++;
1304
1305    *from = (XPointer) src;
1306    *from_left = srcend - src;
1307    *to = (XPointer) dst;
1308    *to_left = dstend - dst;
1309
1310    return 0;
1311}
1312
1313static XlcConvMethodsRec methods_strtowcs = {
1314    close_converter,
1315    our_strtowcs,
1316    NULL
1317};
1318
1319static XlcConv
1320open_strtowcs(
1321    XLCd from_lcd,
1322    const char *from_type,
1323    XLCd to_lcd,
1324    const char *to_type)
1325{
1326    return create_conv(from_lcd, &methods_strtowcs);
1327}
1328
1329/* from XlcNWideChar to XlcNString */
1330
1331static int
1332our_wcstostr(
1333    XlcConv conv,
1334    XPointer *from,
1335    int *from_left,
1336    XPointer *to,
1337    int *to_left,
1338    XPointer *args,
1339    int num_args)
1340{
1341    wchar_t const *src;
1342    wchar_t const *srcend;
1343    unsigned char *dst;
1344    unsigned char *dstend;
1345    int unconv_num;
1346
1347    if (from == NULL || *from == NULL)
1348	return 0;
1349
1350    src = (wchar_t const *) *from;
1351    srcend = src + *from_left;
1352    dst = (unsigned char *) *to;
1353    dstend = dst + *to_left;
1354    unconv_num = 0;
1355
1356    while (src < srcend && dst < dstend) {
1357	unsigned int wc = *src++;
1358	if (wc < 0x80)
1359	    *dst = wc;
1360	else {
1361	    *dst = BAD_CHAR;
1362	    unconv_num++;
1363	}
1364	dst++;
1365    }
1366
1367    *from = (XPointer) src;
1368    *from_left = srcend - src;
1369    *to = (XPointer) dst;
1370    *to_left = dstend - dst;
1371
1372    return unconv_num;
1373}
1374
1375static XlcConvMethodsRec methods_wcstostr = {
1376    close_converter,
1377    our_wcstostr,
1378    NULL
1379};
1380
1381static XlcConv
1382open_wcstostr(
1383    XLCd from_lcd,
1384    const char *from_type,
1385    XLCd to_lcd,
1386    const char *to_type)
1387{
1388    return create_conv(from_lcd, &methods_wcstostr);
1389}
1390
1391/* from XlcNCharSet to XlcNWideChar */
1392
1393static int
1394cstowcs(
1395    XlcConv conv,
1396    XPointer *from,
1397    int *from_left,
1398    XPointer *to,
1399    int *to_left,
1400    XPointer *args,
1401    int num_args)
1402{
1403    XlcCharSet charset;
1404    const char *name;
1405    Utf8Conv convptr;
1406    int i;
1407    unsigned char const *src;
1408    unsigned char const *srcend;
1409    wchar_t *dst;
1410    wchar_t *dstend;
1411    int unconv_num;
1412
1413    if (from == NULL || *from == NULL)
1414	return 0;
1415
1416    if (num_args < 1)
1417	return -1;
1418
1419    charset = (XlcCharSet) args[0];
1420    name = charset->encoding_name;
1421    /* not charset->name because the latter has a ":GL"/":GR" suffix */
1422
1423    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
1424	if (!strcmp(convptr->name, name))
1425	    break;
1426    if (i == 0)
1427	return -1;
1428
1429    src = (unsigned char const *) *from;
1430    srcend = src + *from_left;
1431    dst = (wchar_t *) *to;
1432    dstend = dst + *to_left;
1433    unconv_num = 0;
1434
1435    while (src < srcend && dst < dstend) {
1436	unsigned int wc;
1437	int consumed;
1438
1439	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
1440	if (consumed == RET_ILSEQ)
1441	    return -1;
1442	if (consumed == RET_TOOFEW(0))
1443	    break;
1444
1445	*dst++ = wc;
1446	src += consumed;
1447    }
1448
1449    *from = (XPointer) src;
1450    *from_left = srcend - src;
1451    *to = (XPointer) dst;
1452    *to_left = dstend - dst;
1453
1454    return unconv_num;
1455}
1456
1457static XlcConvMethodsRec methods_cstowcs = {
1458    close_converter,
1459    cstowcs,
1460    NULL
1461};
1462
1463static XlcConv
1464open_cstowcs(
1465    XLCd from_lcd,
1466    const char *from_type,
1467    XLCd to_lcd,
1468    const char *to_type)
1469{
1470    lazy_init_all_charsets();
1471    return create_conv(from_lcd, &methods_cstowcs);
1472}
1473
1474/* from XlcNWideChar to XlcNCharSet */
1475
1476static int
1477wcstocs(
1478    XlcConv conv,
1479    XPointer *from,
1480    int *from_left,
1481    XPointer *to,
1482    int *to_left,
1483    XPointer *args,
1484    int num_args)
1485{
1486    Utf8Conv *preferred_charsets;
1487    XlcCharSet last_charset = NULL;
1488    wchar_t const *src;
1489    wchar_t const *srcend;
1490    unsigned char *dst;
1491    unsigned char *dstend;
1492    int unconv_num;
1493
1494    if (from == NULL || *from == NULL)
1495	return 0;
1496
1497    preferred_charsets = (Utf8Conv *) conv->state;
1498    src = (wchar_t const *) *from;
1499    srcend = src + *from_left;
1500    dst = (unsigned char *) *to;
1501    dstend = dst + *to_left;
1502    unconv_num = 0;
1503
1504    while (src < srcend && dst < dstend) {
1505	Utf8Conv chosen_charset = NULL;
1506	XlcSide chosen_side = XlcNONE;
1507	wchar_t wc = *src;
1508	int count;
1509
1510	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1511	if (count == RET_TOOSMALL)
1512	    break;
1513	if (count == RET_ILSEQ) {
1514	    src++;
1515	    unconv_num++;
1516	    continue;
1517	}
1518
1519	if (last_charset == NULL) {
1520	    last_charset =
1521	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1522	    if (last_charset == NULL) {
1523		src++;
1524		unconv_num++;
1525		continue;
1526	    }
1527	} else {
1528	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1529	          && (last_charset->side == XlcGLGR
1530	              || last_charset->side == chosen_side)))
1531		break;
1532	}
1533	src++;
1534	dst += count;
1535    }
1536
1537    if (last_charset == NULL)
1538	return -1;
1539
1540    *from = (XPointer) src;
1541    *from_left = srcend - src;
1542    *to = (XPointer) dst;
1543    *to_left = dstend - dst;
1544
1545    if (num_args >= 1)
1546	*((XlcCharSet *)args[0]) = last_charset;
1547
1548    return unconv_num;
1549}
1550
1551static XlcConvMethodsRec methods_wcstocs = {
1552    close_tocs_converter,
1553    wcstocs,
1554    NULL
1555};
1556
1557static XlcConv
1558open_wcstocs(
1559    XLCd from_lcd,
1560    const char *from_type,
1561    XLCd to_lcd,
1562    const char *to_type)
1563{
1564    return create_tocs_conv(from_lcd, &methods_wcstocs);
1565}
1566
1567/* from XlcNWideChar to XlcNChar */
1568
1569static int
1570wcstocs1(
1571    XlcConv conv,
1572    XPointer *from,
1573    int *from_left,
1574    XPointer *to,
1575    int *to_left,
1576    XPointer *args,
1577    int num_args)
1578{
1579    Utf8Conv *preferred_charsets;
1580    XlcCharSet last_charset = NULL;
1581    wchar_t const *src;
1582    wchar_t const *srcend;
1583    unsigned char *dst;
1584    unsigned char *dstend;
1585    int unconv_num;
1586
1587    if (from == NULL || *from == NULL)
1588	return 0;
1589
1590    preferred_charsets = (Utf8Conv *) conv->state;
1591    src = (wchar_t const *) *from;
1592    srcend = src + *from_left;
1593    dst = (unsigned char *) *to;
1594    dstend = dst + *to_left;
1595    unconv_num = 0;
1596
1597    while (src < srcend && dst < dstend) {
1598	Utf8Conv chosen_charset = NULL;
1599	XlcSide chosen_side = XlcNONE;
1600	wchar_t wc = *src;
1601	int count;
1602
1603	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1604	if (count == RET_TOOSMALL)
1605	    break;
1606	if (count == RET_ILSEQ) {
1607	    src++;
1608	    unconv_num++;
1609	    continue;
1610	}
1611
1612	last_charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1613
1614	if (last_charset == NULL) {
1615	    src++;
1616	    unconv_num++;
1617	    continue;
1618	}
1619
1620	src++;
1621	dst += count;
1622	break;
1623    }
1624
1625    if (last_charset == NULL)
1626	return -1;
1627
1628    *from = (XPointer) src;
1629    *from_left = srcend - src;
1630    *to = (XPointer) dst;
1631    *to_left = dstend - dst;
1632
1633    if (num_args >= 1)
1634	*((XlcCharSet *)args[0]) = last_charset;
1635
1636    return unconv_num;
1637}
1638
1639static XlcConvMethodsRec methods_wcstocs1 = {
1640    close_tocs_converter,
1641    wcstocs1,
1642    NULL
1643};
1644
1645static XlcConv
1646open_wcstocs1(
1647    XLCd from_lcd,
1648    const char *from_type,
1649    XLCd to_lcd,
1650    const char *to_type)
1651{
1652    return create_tocs_conv(from_lcd, &methods_wcstocs1);
1653}
1654
1655/* trivial, no conversion */
1656
1657static int
1658identity(
1659    XlcConv conv,
1660    XPointer *from,
1661    int *from_left,
1662    XPointer *to,
1663    int *to_left,
1664    XPointer *args,
1665    int num_args)
1666{
1667    unsigned char const *src;
1668    unsigned char const *srcend;
1669    unsigned char *dst;
1670    unsigned char *dstend;
1671
1672    if (from == NULL || *from == NULL)
1673	return 0;
1674
1675    src = (unsigned char const *) *from;
1676    srcend = src + *from_left;
1677    dst = (unsigned char *) *to;
1678    dstend = dst + *to_left;
1679
1680    while (src < srcend && dst < dstend)
1681	*dst++ = *src++;
1682
1683    *from = (XPointer) src;
1684    *from_left = srcend - src;
1685    *to = (XPointer) dst;
1686    *to_left = dstend - dst;
1687
1688    return 0;
1689}
1690
1691static XlcConvMethodsRec methods_identity = {
1692    close_converter,
1693    identity,
1694    NULL
1695};
1696
1697static XlcConv
1698open_identity(
1699    XLCd from_lcd,
1700    const char *from_type,
1701    XLCd to_lcd,
1702    const char *to_type)
1703{
1704    return create_conv(from_lcd, &methods_identity);
1705}
1706
1707/* from MultiByte/WideChar to FontCharSet. */
1708/* They really use converters to CharSet
1709 * but with different create_conv procedure. */
1710
1711static XlcConv
1712create_tofontcs_conv(
1713    XLCd lcd,
1714    XlcConvMethods methods)
1715{
1716    XlcConv conv;
1717    int i, num, k, count;
1718    char **value, buf[32];
1719    Utf8Conv *preferred;
1720
1721    lazy_init_all_charsets();
1722
1723    for (i = 0, num = 0;; i++) {
1724	snprintf(buf, sizeof(buf), "fs%d.charset.name", i);
1725	_XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1726	if (count < 1) {
1727	    snprintf(buf, sizeof(buf), "fs%d.charset", i);
1728	    _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1729	    if (count < 1)
1730		break;
1731	}
1732	num += count;
1733    }
1734
1735    conv = Xmalloc(sizeof(XlcConvRec) + (num + 1) * sizeof(Utf8Conv));
1736    if (conv == (XlcConv) NULL)
1737	return (XlcConv) NULL;
1738    preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
1739
1740    /* Loop through all fontsets mentioned in the locale. */
1741    for (i = 0, num = 0;; i++) {
1742        snprintf(buf, sizeof(buf), "fs%d.charset.name", i);
1743        _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1744        if (count < 1) {
1745            snprintf(buf, sizeof(buf), "fs%d.charset", i);
1746            _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1747            if (count < 1)
1748                break;
1749        }
1750	while (count-- > 0) {
1751	    XlcCharSet charset = _XlcGetCharSet(*value++);
1752	    const char *name;
1753
1754	    if (charset == (XlcCharSet) NULL)
1755		continue;
1756
1757	    name = charset->encoding_name;
1758	    /* If it wasn't already encountered... */
1759	    for (k = num - 1; k >= 0; k--)
1760		if (!strcmp(preferred[k]->name, name))
1761		    break;
1762	    if (k < 0) {
1763                /* For fonts "ISO10646-1" means ucs2, not utf8.*/
1764                if (!strcmp("ISO10646-1", name)) {
1765                    preferred[num++] = &all_charsets[ucs2_conv_index];
1766                    continue;
1767                }
1768		/* Look it up in all_charsets[]. */
1769		for (k = 0; k < all_charsets_count-1; k++)
1770		    if (!strcmp(all_charsets[k].name, name)) {
1771			/* Add it to the preferred set. */
1772			preferred[num++] = &all_charsets[k];
1773			break;
1774		    }
1775	    }
1776        }
1777    }
1778    preferred[num] = (Utf8Conv) NULL;
1779
1780    conv->methods = methods;
1781    conv->state = (XPointer) preferred;
1782
1783    return conv;
1784}
1785
1786static XlcConv
1787open_wcstofcs(
1788    XLCd from_lcd,
1789    const char *from_type,
1790    XLCd to_lcd,
1791    const char *to_type)
1792{
1793    return create_tofontcs_conv(from_lcd, &methods_wcstocs);
1794}
1795
1796static XlcConv
1797open_utf8tofcs(
1798    XLCd from_lcd,
1799    const char *from_type,
1800    XLCd to_lcd,
1801    const char *to_type)
1802{
1803    return create_tofontcs_conv(from_lcd, &methods_utf8tocs);
1804}
1805
1806/* ========================== iconv Stuff ================================ */
1807
1808/* from XlcNCharSet to XlcNMultiByte */
1809
1810static int
1811iconv_cstombs(XlcConv conv, XPointer *from, int *from_left,
1812	      XPointer *to, int *to_left, XPointer *args, int num_args)
1813{
1814    XlcCharSet charset;
1815    char const *name;
1816    Utf8Conv convptr;
1817    int i;
1818    unsigned char const *src;
1819    unsigned char const *srcend;
1820    unsigned char *dst;
1821    unsigned char *dstend;
1822    int unconv_num;
1823
1824    if (from == NULL || *from == NULL)
1825	return 0;
1826
1827    if (num_args < 1)
1828	return -1;
1829
1830    charset = (XlcCharSet) args[0];
1831    name = charset->encoding_name;
1832    /* not charset->name because the latter has a ":GL"/":GR" suffix */
1833
1834    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
1835	if (!strcmp(convptr->name, name))
1836	    break;
1837    if (i == 0)
1838	return -1;
1839
1840    src = (unsigned char const *) *from;
1841    srcend = src + *from_left;
1842    dst = (unsigned char *) *to;
1843    dstend = dst + *to_left;
1844    unconv_num = 0;
1845
1846    while (src < srcend) {
1847	ucs4_t wc;
1848	int consumed;
1849	int count;
1850
1851	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
1852	if (consumed == RET_ILSEQ)
1853	    return -1;
1854	if (consumed == RET_TOOFEW(0))
1855	    break;
1856
1857    /* Use stdc iconv to convert widechar -> multibyte */
1858
1859	count = wctomb((char *)dst, wc);
1860	if (count == 0)
1861	    break;
1862	if (count == -1) {
1863	    count = wctomb((char *)dst, BAD_WCHAR);
1864	    if (count == 0)
1865		break;
1866	    unconv_num++;
1867	}
1868	src += consumed;
1869	dst += count;
1870    }
1871
1872    *from = (XPointer) src;
1873    *from_left = srcend - src;
1874    *to = (XPointer) dst;
1875    *to_left = dstend - dst;
1876
1877    return unconv_num;
1878
1879}
1880
1881static XlcConvMethodsRec iconv_cstombs_methods = {
1882    close_converter,
1883    iconv_cstombs,
1884    NULL
1885};
1886
1887static XlcConv
1888open_iconv_cstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
1889{
1890    lazy_init_all_charsets();
1891    return create_conv(from_lcd, &iconv_cstombs_methods);
1892}
1893
1894static int
1895iconv_mbstocs(XlcConv conv, XPointer *from, int *from_left,
1896	      XPointer *to, int *to_left, XPointer *args, int num_args)
1897{
1898    Utf8Conv *preferred_charsets;
1899    XlcCharSet last_charset = NULL;
1900    unsigned char const *src;
1901    unsigned char const *srcend;
1902    unsigned char *dst;
1903    unsigned char *dstend;
1904    int unconv_num;
1905
1906    if (from == NULL || *from == NULL)
1907	return 0;
1908
1909    preferred_charsets = (Utf8Conv *) conv->state;
1910    src = (unsigned char const *) *from;
1911    srcend = src + *from_left;
1912    dst = (unsigned char *) *to;
1913    dstend = dst + *to_left;
1914    unconv_num = 0;
1915
1916    while (src < srcend && dst < dstend) {
1917	Utf8Conv chosen_charset = NULL;
1918	XlcSide chosen_side = XlcNONE;
1919	wchar_t wc;
1920	int consumed;
1921	int count;
1922
1923    /* Uses stdc iconv to convert multibyte -> widechar */
1924
1925	consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src));
1926	if (consumed == 0)
1927	    break;
1928	if (consumed == -1) {
1929	    src++;
1930	    unconv_num++;
1931	    continue;
1932	}
1933
1934	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1935
1936	if (count == RET_TOOSMALL)
1937	    break;
1938	if (count == RET_ILSEQ) {
1939	    src += consumed;
1940	    unconv_num++;
1941	    continue;
1942	}
1943
1944	if (last_charset == NULL) {
1945	    last_charset =
1946	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1947	    if (last_charset == NULL) {
1948		src += consumed;
1949		unconv_num++;
1950		continue;
1951	    }
1952	} else {
1953	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1954	          && (last_charset->side == XlcGLGR
1955	              || last_charset->side == chosen_side)))
1956		break;
1957	}
1958	src += consumed;
1959	dst += count;
1960    }
1961
1962    if (last_charset == NULL)
1963	return -1;
1964
1965    *from = (XPointer) src;
1966    *from_left = srcend - src;
1967    *to = (XPointer) dst;
1968    *to_left = dstend - dst;
1969
1970    if (num_args >= 1)
1971	*((XlcCharSet *)args[0]) = last_charset;
1972
1973    return unconv_num;
1974}
1975
1976static XlcConvMethodsRec iconv_mbstocs_methods = {
1977    close_tocs_converter,
1978    iconv_mbstocs,
1979    NULL
1980};
1981
1982static XlcConv
1983open_iconv_mbstocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
1984{
1985    return create_tocs_conv(from_lcd, &iconv_mbstocs_methods);
1986}
1987
1988/* from XlcNMultiByte to XlcNChar */
1989
1990static int
1991iconv_mbtocs(XlcConv conv, XPointer *from, int *from_left,
1992	     XPointer *to, int *to_left, XPointer *args, int num_args)
1993{
1994    Utf8Conv *preferred_charsets;
1995    XlcCharSet last_charset = NULL;
1996    unsigned char const *src;
1997    unsigned char const *srcend;
1998    unsigned char *dst;
1999    unsigned char *dstend;
2000    int unconv_num;
2001
2002    if (from == NULL || *from == NULL)
2003	return 0;
2004
2005    preferred_charsets = (Utf8Conv *) conv->state;
2006    src = (unsigned char const *) *from;
2007    srcend = src + *from_left;
2008    dst = (unsigned char *) *to;
2009    dstend = dst + *to_left;
2010    unconv_num = 0;
2011
2012    while (src < srcend && dst < dstend) {
2013	Utf8Conv chosen_charset = NULL;
2014	XlcSide chosen_side = XlcNONE;
2015	wchar_t wc;
2016	int consumed;
2017	int count;
2018
2019    /* Uses stdc iconv to convert multibyte -> widechar */
2020
2021	consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src));
2022	if (consumed == 0)
2023	    break;
2024	if (consumed == -1) {
2025	    src++;
2026	    unconv_num++;
2027	    continue;
2028	}
2029
2030	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
2031	if (count == RET_TOOSMALL)
2032	    break;
2033	if (count == RET_ILSEQ) {
2034	    src += consumed;
2035	    unconv_num++;
2036	    continue;
2037	}
2038
2039	if (last_charset == NULL) {
2040	    last_charset =
2041		_XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
2042	    if (last_charset == NULL) {
2043		src += consumed;
2044		unconv_num++;
2045		continue;
2046	    }
2047	} else {
2048	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
2049		  && (last_charset->side == XlcGLGR
2050		      || last_charset->side == chosen_side)))
2051		break;
2052	}
2053	src += consumed;
2054	dst += count;
2055    }
2056
2057    if (last_charset == NULL)
2058	return -1;
2059
2060    *from = (XPointer) src;
2061    *from_left = srcend - src;
2062    *to = (XPointer) dst;
2063    *to_left = dstend - dst;
2064
2065    if (num_args >= 1)
2066	*((XlcCharSet *)args[0]) = last_charset;
2067
2068    return unconv_num;
2069}
2070
2071static XlcConvMethodsRec iconv_mbtocs_methods = {
2072    close_tocs_converter,
2073    iconv_mbtocs,
2074    NULL
2075};
2076
2077static XlcConv
2078open_iconv_mbtocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2079{
2080    return create_tocs_conv(from_lcd, &iconv_mbtocs_methods );
2081}
2082
2083/* from XlcNMultiByte to XlcNString */
2084
2085static int
2086iconv_mbstostr(XlcConv conv, XPointer *from, int *from_left,
2087	       XPointer *to, int *to_left, XPointer *args, int num_args)
2088{
2089    unsigned char const *src;
2090    unsigned char const *srcend;
2091    unsigned char *dst;
2092    unsigned char *dstend;
2093    int unconv_num;
2094
2095    if (from == NULL || *from == NULL)
2096	return 0;
2097
2098    src = (unsigned char const *) *from;
2099    srcend = src + *from_left;
2100    dst = (unsigned char *) *to;
2101    dstend = dst + *to_left;
2102    unconv_num = 0;
2103
2104    while (src < srcend) {
2105	unsigned char c;
2106	wchar_t wc;
2107	int consumed;
2108
2109    /* Uses stdc iconv to convert multibyte -> widechar */
2110
2111	consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src));
2112	if (consumed == 0)
2113	    break;
2114	if (dst == dstend)
2115	    break;
2116	if (consumed == -1) {
2117	    consumed = 1;
2118	    c = BAD_CHAR;
2119	    unconv_num++;
2120	} else {
2121	    if ((wc & ~(wchar_t)0xff) != 0) {
2122		c = BAD_CHAR;
2123		unconv_num++;
2124	    } else
2125		c = (unsigned char) wc;
2126	}
2127	*dst++ = c;
2128	src += consumed;
2129    }
2130
2131    *from = (XPointer) src;
2132    *from_left = srcend - src;
2133    *to = (XPointer) dst;
2134    *to_left = dstend - dst;
2135
2136    return unconv_num;
2137}
2138
2139static XlcConvMethodsRec iconv_mbstostr_methods = {
2140    close_converter,
2141    iconv_mbstostr,
2142    NULL
2143};
2144
2145static XlcConv
2146open_iconv_mbstostr(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2147{
2148    return create_conv(from_lcd, &iconv_mbstostr_methods);
2149}
2150
2151/* from XlcNString to XlcNMultiByte */
2152static int
2153iconv_strtombs(XlcConv conv, XPointer *from, int *from_left,
2154	       XPointer *to, int *to_left, XPointer *args, int num_args)
2155{
2156    unsigned char const *src;
2157    unsigned char const *srcend;
2158    unsigned char *dst;
2159    unsigned char *dstend;
2160
2161    if (from == NULL || *from == NULL)
2162	return 0;
2163
2164    src = (unsigned char const *) *from;
2165    srcend = src + *from_left;
2166    dst = (unsigned char *) *to;
2167    dstend = dst + *to_left;
2168
2169    while (src < srcend) {
2170	int count = wctomb((char *)dst, *src);
2171	if (count < 0)
2172	    break;
2173	dst += count;
2174	src++;
2175    }
2176
2177    *from = (XPointer) src;
2178    *from_left = srcend - src;
2179    *to = (XPointer) dst;
2180    *to_left = dstend - dst;
2181
2182    return 0;
2183}
2184
2185static XlcConvMethodsRec iconv_strtombs_methods= {
2186    close_converter,
2187    iconv_strtombs,
2188    NULL
2189};
2190
2191static XlcConv
2192open_iconv_strtombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2193{
2194    return create_conv(from_lcd, &iconv_strtombs_methods);
2195}
2196
2197/***************************************************************************/
2198/* Part II: An iconv locale loader.
2199 *
2200 *Here we can assume that "multi-byte" is iconv and that `wchar_t' is Unicode.
2201 */
2202
2203/* from XlcNMultiByte to XlcNWideChar */
2204static int
2205iconv_mbstowcs(XlcConv conv, XPointer *from, int *from_left,
2206	       XPointer *to, int *to_left, XPointer *args,  int num_args)
2207{
2208    char *src = *((char **) from);
2209    wchar_t *dst = *((wchar_t **) to);
2210    int src_left = *from_left;
2211    int dst_left = *to_left;
2212    int length, unconv_num = 0;
2213
2214    while (src_left > 0 && dst_left > 0) {
2215	length = mbtowc(dst, src, (size_t) src_left);
2216
2217	if (length > 0) {
2218	    src += length;
2219	    src_left -= length;
2220	    if (dst)
2221	        dst++;
2222	    dst_left--;
2223	} else if (length < 0) {
2224	    src++;
2225	    src_left--;
2226	    unconv_num++;
2227        } else {
2228            /* null ? */
2229            src++;
2230            src_left--;
2231            if (dst)
2232                *dst++ = L'\0';
2233            dst_left--;
2234        }
2235    }
2236
2237    *from = (XPointer) src;
2238    if (dst)
2239	*to = (XPointer) dst;
2240    *from_left = src_left;
2241    *to_left = dst_left;
2242
2243    return unconv_num;
2244}
2245
2246static XlcConvMethodsRec iconv_mbstowcs_methods = {
2247    close_converter,
2248    iconv_mbstowcs,
2249    NULL
2250} ;
2251
2252static XlcConv
2253open_iconv_mbstowcs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2254{
2255    return create_conv(from_lcd, &iconv_mbstowcs_methods);
2256}
2257
2258static int
2259iconv_wcstombs(XlcConv conv, XPointer *from, int *from_left,
2260	       XPointer *to, int *to_left, XPointer *args, int num_args)
2261{
2262    wchar_t *src = *((wchar_t **) from);
2263    char *dst = *((char **) to);
2264    int src_left = *from_left;
2265    int dst_left = *to_left;
2266    int length, unconv_num = 0;
2267
2268    while (src_left > 0 && dst_left >= MB_CUR_MAX) {
2269	length = wctomb(dst, *src);		/* XXX */
2270
2271        if (length > 0) {
2272	    src++;
2273	    src_left--;
2274	    if (dst)
2275		dst += length;
2276	    dst_left -= length;
2277	} else if (length < 0) {
2278	    src++;
2279	    src_left--;
2280	    unconv_num++;
2281	}
2282    }
2283
2284    *from = (XPointer) src;
2285    if (dst)
2286      *to = (XPointer) dst;
2287    *from_left = src_left;
2288    *to_left = dst_left;
2289
2290    return unconv_num;
2291}
2292
2293static XlcConvMethodsRec iconv_wcstombs_methods = {
2294    close_converter,
2295    iconv_wcstombs,
2296    NULL
2297} ;
2298
2299static XlcConv
2300open_iconv_wcstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2301{
2302    return create_conv(from_lcd, &iconv_wcstombs_methods);
2303}
2304
2305static XlcConv
2306open_iconv_mbstofcs(
2307    XLCd from_lcd,
2308    const char *from_type,
2309    XLCd to_lcd,
2310    const char *to_type)
2311{
2312    return create_tofontcs_conv(from_lcd, &iconv_mbstocs_methods);
2313}
2314
2315/* Registers UTF-8 converters for a UTF-8 locale. */
2316
2317void
2318_XlcAddUtf8LocaleConverters(
2319    XLCd lcd)
2320{
2321    /* Register elementary converters. */
2322
2323    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_utf8towcs);
2324
2325    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_wcstoutf8);
2326    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr);
2327
2328    _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs);
2329
2330    /* Register converters for XlcNCharSet. This implicitly provides
2331     * converters from and to XlcNCompoundText. */
2332
2333    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_cstoutf8);
2334    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_utf8tocs);
2335    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_utf8tocs1);
2336
2337    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
2338    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
2339    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1);
2340
2341    _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_strtoutf8);
2342    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_utf8tostr);
2343    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNMultiByte, open_identity);
2344    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNUtf8String, open_identity);
2345
2346    /* Register converters for XlcNFontCharSet */
2347    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_utf8tofcs);
2348    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
2349    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNFontCharSet, open_utf8tofcs);
2350}
2351
2352void
2353_XlcAddGB18030LocaleConverters(
2354    XLCd lcd)
2355{
2356
2357    /* Register elementary converters. */
2358    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_iconv_mbstowcs);
2359    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_iconv_wcstombs);
2360
2361    /* Register converters for XlcNCharSet. This implicitly provides
2362     * converters from and to XlcNCompoundText. */
2363
2364    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_iconv_cstombs);
2365    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_iconv_mbstocs);
2366    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_iconv_mbtocs);
2367    _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_iconv_strtombs);
2368    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_iconv_mbstostr);
2369
2370    /* Register converters for XlcNFontCharSet */
2371    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_iconv_mbstofcs);
2372
2373    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr);
2374    _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs);
2375    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
2376    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
2377    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1);
2378
2379    /* Register converters for XlcNFontCharSet */
2380    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
2381}
2382