lcUTF8.c revision 88de56cc
1/******************************************************************
2
3              Copyright 1993 by SunSoft, Inc.
4              Copyright 1999-2000 by Bruno Haible
5
6Permission to use, copy, modify, distribute, and sell this software
7and its documentation for any purpose is hereby granted without fee,
8provided that the above copyright notice appear in all copies and
9that both that copyright notice and this permission notice appear
10in supporting documentation, and that the names of SunSoft, Inc. and
11Bruno Haible not be used in advertising or publicity pertaining to
12distribution of the software without specific, written prior
13permission.  SunSoft, Inc. and Bruno Haible make no representations
14about the suitability of this software for any purpose.  It is
15provided "as is" without express or implied warranty.
16
17SunSoft Inc. AND Bruno Haible DISCLAIM ALL WARRANTIES WITH REGARD
18TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
19AND FITNESS, IN NO EVENT SHALL SunSoft, Inc. OR Bruno Haible BE LIABLE
20FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
21WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
22ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
23OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24
25******************************************************************/
26
27/*
28 * This file contains:
29 *
30 * I. Conversion routines CompoundText/CharSet <--> Unicode/UTF-8.
31 *
32 *    Used for three purposes:
33 *      1. The UTF-8 locales, see below.
34 *      2. Unicode aware applications for which the use of 8-bit character
35 *         sets is an anachronism.
36 *      3. For conversion from keysym to locale encoding.
37 *
38 * II. Conversion files for an UTF-8 locale loader.
39 *     Supports: all locales with codeset UTF-8.
40 *     How: Provides converters for UTF-8.
41 *     Platforms: all systems.
42 *
43 * The loader itself is located in lcUTF8.c.
44 */
45
46/*
47 * The conversion from UTF-8 to CompoundText is realized in a very
48 * conservative way. Recall that CompoundText data is used for inter-client
49 * communication purposes. We distinguish three classes of clients:
50 * - Clients which accept only those pieces of CompoundText which belong to
51 *   the character set understood by the current locale.
52 *   (Example: clients which are linked to an older X11 library.)
53 * - Clients which accept CompoundText with multiple character sets and parse
54 *   it themselves.
55 *   (Example: emacs, xemacs.)
56 * - Clients which rely entirely on the X{mb,wc}TextPropertyToTextList
57 *   functions for the conversion of CompoundText to their current locale's
58 *   multi-byte/wide-character format.
59 * For best interoperation, the UTF-8 to CompoundText conversion proceeds as
60 * follows. For every character, it first tests whether the character is
61 * representable in the current locale's original (non-UTF-8) character set.
62 * If not, it goes through the list of predefined character sets for
63 * CompoundText and tests if the character is representable in that character
64 * set. If so, it encodes the character using its code within that character
65 * set. If not, it uses an UTF-8-in-CompoundText encapsulation. Since
66 * clients of the first and second kind ignore such encapsulated text,
67 * this encapsulation is kept to a minimum and terminated as early as possible.
68 *
69 * In a distant future, when clients of the first and second kind will have
70 * disappeared, we will be able to stuff UTF-8 data directly in CompoundText
71 * without first going through the list of predefined character sets.
72 */
73
74#ifdef HAVE_CONFIG_H
75#include <config.h>
76#endif
77#include <stdio.h>
78#include "Xlibint.h"
79#include "XlcPubI.h"
80#include "XlcGeneric.h"
81
82static XlcConv
83create_conv(
84    XLCd lcd,
85    XlcConvMethods methods)
86{
87    XlcConv conv;
88
89    conv = (XlcConv) Xmalloc(sizeof(XlcConvRec));
90    if (conv == (XlcConv) NULL)
91	return (XlcConv) NULL;
92
93    conv->methods = methods;
94    conv->state = NULL;
95
96    return conv;
97}
98
99static void
100close_converter(
101    XlcConv conv)
102{
103    Xfree((char *) conv);
104}
105
106/* Replacement character for invalid multibyte sequence or wide character. */
107#define BAD_WCHAR ((ucs4_t) 0xfffd)
108#define BAD_CHAR '?'
109
110/***************************************************************************/
111/* Part I: Conversion routines CompoundText/CharSet <--> Unicode/UTF-8.
112 *
113 * Note that this code works in any locale. We store Unicode values in
114 * `ucs4_t' variables, but don't pass them to the user.
115 *
116 * This code has to support all character sets that are used for CompoundText,
117 * nothing more, nothing less. See the table in lcCT.c.
118 * Since the conversion _to_ CompoundText is likely to need the tables for all
119 * character sets at once, we don't use dynamic loading (of tables or shared
120 * libraries through iconv()). Use a fixed set of tables instead.
121 *
122 * We use statically computed tables, not dynamically allocated arrays,
123 * because it's more memory efficient: Different processes using the same
124 * libX11 shared library share the "text" and read-only "data" sections.
125 */
126
127typedef unsigned int ucs4_t;
128#define conv_t XlcConv
129
130typedef struct _Utf8ConvRec {
131    const char *name;
132    XrmQuark xrm_name;
133    int (* cstowc) (XlcConv, ucs4_t *, unsigned char const *, int);
134    int (* wctocs) (XlcConv, unsigned char *, ucs4_t, int);
135} Utf8ConvRec, *Utf8Conv;
136
137/*
138 * int xxx_cstowc (XlcConv conv, ucs4_t *pwc, unsigned char const *s, int n)
139 * converts the byte sequence starting at s to a wide character. Up to n bytes
140 * are available at s. n is >= 1.
141 * Result is number of bytes consumed (if a wide character was read),
142 * or 0 if invalid, or -1 if n too small.
143 *
144 * int xxx_wctocs (XlcConv conv, unsigned char *r, ucs4_t wc, int n)
145 * converts the wide character wc to the character set xxx, and stores the
146 * result beginning at r. Up to n bytes may be written at r. n is >= 1.
147 * Result is number of bytes written, or 0 if invalid, or -1 if n too small.
148 */
149
150/* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */
151#define RET_ILSEQ      0
152/* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
153#define RET_TOOFEW(n)  (-1-(n))
154/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
155#define RET_TOOSMALL   -1
156
157/*
158 * The tables below are bijective. It would be possible to extend the
159 * xxx_wctocs tables to do some transliteration (e.g. U+201C,U+201D -> 0x22)
160 * but *only* with characters not contained in any other table, and *only*
161 * when the current locale is not an UTF-8 locale.
162 */
163
164#include "lcUniConv/utf8.h"
165#include "lcUniConv/ucs2be.h"
166#ifdef notused
167#include "lcUniConv/ascii.h"
168#endif
169#include "lcUniConv/iso8859_1.h"
170#include "lcUniConv/iso8859_2.h"
171#include "lcUniConv/iso8859_3.h"
172#include "lcUniConv/iso8859_4.h"
173#include "lcUniConv/iso8859_5.h"
174#include "lcUniConv/iso8859_6.h"
175#include "lcUniConv/iso8859_7.h"
176#include "lcUniConv/iso8859_8.h"
177#include "lcUniConv/iso8859_9.h"
178#include "lcUniConv/iso8859_10.h"
179#include "lcUniConv/iso8859_11.h"
180#include "lcUniConv/iso8859_13.h"
181#include "lcUniConv/iso8859_14.h"
182#include "lcUniConv/iso8859_15.h"
183#include "lcUniConv/iso8859_16.h"
184#include "lcUniConv/iso8859_9e.h"
185#include "lcUniConv/jisx0201.h"
186#include "lcUniConv/tis620.h"
187#include "lcUniConv/koi8_r.h"
188#include "lcUniConv/koi8_u.h"
189#include "lcUniConv/koi8_c.h"
190#include "lcUniConv/armscii_8.h"
191#include "lcUniConv/cp1133.h"
192#include "lcUniConv/mulelao.h"
193#include "lcUniConv/viscii.h"
194#include "lcUniConv/tcvn.h"
195#include "lcUniConv/georgian_academy.h"
196#include "lcUniConv/georgian_ps.h"
197#include "lcUniConv/cp1251.h"
198#include "lcUniConv/cp1255.h"
199#include "lcUniConv/cp1256.h"
200#include "lcUniConv/tatar_cyr.h"
201
202typedef struct {
203    unsigned short indx; /* index into big table */
204    unsigned short used; /* bitmask of used entries */
205} Summary16;
206
207#include "lcUniConv/gb2312.h"
208#include "lcUniConv/jisx0208.h"
209#include "lcUniConv/jisx0212.h"
210#include "lcUniConv/ksc5601.h"
211#include "lcUniConv/big5.h"
212#include "lcUniConv/big5_emacs.h"
213#include "lcUniConv/big5hkscs.h"
214#include "lcUniConv/gbk.h"
215
216static Utf8ConvRec all_charsets[] = {
217    /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
218       (for lookup speed), once at the end (as a fallback).  */
219    { "ISO10646-1", NULLQUARK,
220	utf8_mbtowc, utf8_wctomb
221    },
222
223    { "ISO8859-1", NULLQUARK,
224	iso8859_1_mbtowc, iso8859_1_wctomb
225    },
226    { "ISO8859-2", NULLQUARK,
227	iso8859_2_mbtowc, iso8859_2_wctomb
228    },
229    { "ISO8859-3", NULLQUARK,
230	iso8859_3_mbtowc, iso8859_3_wctomb
231    },
232    { "ISO8859-4", NULLQUARK,
233	iso8859_4_mbtowc, iso8859_4_wctomb
234    },
235    { "ISO8859-5", NULLQUARK,
236	iso8859_5_mbtowc, iso8859_5_wctomb
237    },
238    { "ISO8859-6", NULLQUARK,
239	iso8859_6_mbtowc, iso8859_6_wctomb
240    },
241    { "ISO8859-7", NULLQUARK,
242	iso8859_7_mbtowc, iso8859_7_wctomb
243    },
244    { "ISO8859-8", NULLQUARK,
245	iso8859_8_mbtowc, iso8859_8_wctomb
246    },
247    { "ISO8859-9", NULLQUARK,
248	iso8859_9_mbtowc, iso8859_9_wctomb
249    },
250    { "ISO8859-10", NULLQUARK,
251	iso8859_10_mbtowc, iso8859_10_wctomb
252    },
253    { "ISO8859-11", NULLQUARK,
254	iso8859_11_mbtowc, iso8859_11_wctomb
255    },
256    { "ISO8859-13", NULLQUARK,
257	iso8859_13_mbtowc, iso8859_13_wctomb
258    },
259    { "ISO8859-14", NULLQUARK,
260	iso8859_14_mbtowc, iso8859_14_wctomb
261    },
262    { "ISO8859-15", NULLQUARK,
263	iso8859_15_mbtowc, iso8859_15_wctomb
264    },
265    { "ISO8859-16", NULLQUARK,
266	iso8859_16_mbtowc, iso8859_16_wctomb
267    },
268    { "JISX0201.1976-0", NULLQUARK,
269	jisx0201_mbtowc, jisx0201_wctomb
270    },
271    { "TIS620-0", NULLQUARK,
272	tis620_mbtowc, tis620_wctomb
273    },
274    { "GB2312.1980-0", NULLQUARK,
275	gb2312_mbtowc, gb2312_wctomb
276    },
277    { "JISX0208.1983-0", NULLQUARK,
278	jisx0208_mbtowc, jisx0208_wctomb
279    },
280    { "JISX0208.1990-0", NULLQUARK,
281	jisx0208_mbtowc, jisx0208_wctomb
282    },
283    { "JISX0212.1990-0", NULLQUARK,
284	jisx0212_mbtowc, jisx0212_wctomb
285    },
286    { "KSC5601.1987-0", NULLQUARK,
287	ksc5601_mbtowc, ksc5601_wctomb
288    },
289    { "KOI8-R", NULLQUARK,
290	koi8_r_mbtowc, koi8_r_wctomb
291    },
292    { "KOI8-U", NULLQUARK,
293	koi8_u_mbtowc, koi8_u_wctomb
294    },
295    { "KOI8-C", NULLQUARK,
296	koi8_c_mbtowc, koi8_c_wctomb
297    },
298    { "TATAR-CYR", NULLQUARK,
299	tatar_cyr_mbtowc, tatar_cyr_wctomb
300    },
301    { "ARMSCII-8", NULLQUARK,
302	armscii_8_mbtowc, armscii_8_wctomb
303    },
304    { "IBM-CP1133", NULLQUARK,
305	cp1133_mbtowc, cp1133_wctomb
306    },
307    { "MULELAO-1", NULLQUARK,
308	mulelao_mbtowc, mulelao_wctomb
309    },
310    { "VISCII1.1-1", NULLQUARK,
311	viscii_mbtowc, viscii_wctomb
312    },
313    { "TCVN-5712", NULLQUARK,
314	tcvn_mbtowc, tcvn_wctomb
315    },
316    { "GEORGIAN-ACADEMY", NULLQUARK,
317	georgian_academy_mbtowc, georgian_academy_wctomb
318    },
319    { "GEORGIAN-PS", NULLQUARK,
320	georgian_ps_mbtowc, georgian_ps_wctomb
321    },
322    { "ISO8859-9E", NULLQUARK,
323	iso8859_9e_mbtowc, iso8859_9e_wctomb
324    },
325    { "MICROSOFT-CP1251", NULLQUARK,
326	cp1251_mbtowc, cp1251_wctomb
327    },
328    { "MICROSOFT-CP1255", NULLQUARK,
329	cp1255_mbtowc, cp1255_wctomb
330    },
331    { "MICROSOFT-CP1256", NULLQUARK,
332	cp1256_mbtowc, cp1256_wctomb
333    },
334    { "BIG5-0", NULLQUARK,
335	big5_mbtowc, big5_wctomb
336    },
337    { "BIG5-E0", NULLQUARK,
338	big5_0_mbtowc, big5_0_wctomb
339    },
340    { "BIG5-E1", NULLQUARK,
341	big5_1_mbtowc, big5_1_wctomb
342    },
343    { "GBK-0", NULLQUARK,
344	gbk_mbtowc, gbk_wctomb
345    },
346    { "BIG5HKSCS-0", NULLQUARK,
347	big5hkscs_mbtowc, big5hkscs_wctomb
348    },
349
350    /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
351       (for lookup speed), once at the end (as a fallback).  */
352    { "ISO10646-1", NULLQUARK,
353	utf8_mbtowc, utf8_wctomb
354    },
355
356    /* Encoding ISO10646-1 for fonts means UCS2-like encoding
357       so for conversion to FontCharSet we need this record */
358    { "ISO10646-1", NULLQUARK,
359	ucs2be_mbtowc, ucs2be_wctomb
360    }
361};
362
363#define charsets_table_size (sizeof(all_charsets)/sizeof(all_charsets[0]))
364#define all_charsets_count  (charsets_table_size - 1)
365#define ucs2_conv_index     (charsets_table_size - 1)
366
367static void
368init_all_charsets (void)
369{
370    Utf8Conv convptr;
371    int i;
372
373    for (convptr = all_charsets, i = charsets_table_size; i > 0; convptr++, i--)
374	convptr->xrm_name = XrmStringToQuark(convptr->name);
375}
376
377#define lazy_init_all_charsets()					\
378    do {								\
379	if (all_charsets[0].xrm_name == NULLQUARK)			\
380	    init_all_charsets();					\
381    } while (0)
382
383/* from XlcNCharSet to XlcNUtf8String */
384
385static int
386cstoutf8(
387    XlcConv conv,
388    XPointer *from,
389    int *from_left,
390    XPointer *to,
391    int *to_left,
392    XPointer *args,
393    int num_args)
394{
395    XlcCharSet charset;
396    const char *name;
397    Utf8Conv convptr;
398    int i;
399    unsigned char const *src;
400    unsigned char const *srcend;
401    unsigned char *dst;
402    unsigned char *dstend;
403    int unconv_num;
404
405    if (from == NULL || *from == NULL)
406	return 0;
407
408    if (num_args < 1)
409	return -1;
410
411    charset = (XlcCharSet) args[0];
412    name = charset->encoding_name;
413    /* not charset->name because the latter has a ":GL"/":GR" suffix */
414
415    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
416	if (!strcmp(convptr->name, name))
417	    break;
418    if (i == 0)
419	return -1;
420
421    src = (unsigned char const *) *from;
422    srcend = src + *from_left;
423    dst = (unsigned char *) *to;
424    dstend = dst + *to_left;
425    unconv_num = 0;
426
427    while (src < srcend) {
428	ucs4_t wc;
429	int consumed;
430	int count;
431
432	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
433	if (consumed == RET_ILSEQ)
434	    return -1;
435	if (consumed == RET_TOOFEW(0))
436	    break;
437
438	count = utf8_wctomb(NULL, dst, wc, dstend-dst);
439	if (count == RET_TOOSMALL)
440	    break;
441	if (count == RET_ILSEQ) {
442	    count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
443	    if (count == RET_TOOSMALL)
444		break;
445	    unconv_num++;
446	}
447	src += consumed;
448	dst += count;
449    }
450
451    *from = (XPointer) src;
452    *from_left = srcend - src;
453    *to = (XPointer) dst;
454    *to_left = dstend - dst;
455
456    return unconv_num;
457}
458
459static XlcConvMethodsRec methods_cstoutf8 = {
460    close_converter,
461    cstoutf8,
462    NULL
463};
464
465static XlcConv
466open_cstoutf8(
467    XLCd from_lcd,
468    const char *from_type,
469    XLCd to_lcd,
470    const char *to_type)
471{
472    lazy_init_all_charsets();
473    return create_conv(from_lcd, &methods_cstoutf8);
474}
475
476/* from XlcNUtf8String to XlcNCharSet */
477
478static XlcConv
479create_tocs_conv(
480    XLCd lcd,
481    XlcConvMethods methods)
482{
483    XlcConv conv;
484    CodeSet *codeset_list;
485    int codeset_num;
486    int charset_num;
487    int i, j, k;
488    Utf8Conv *preferred;
489
490    lazy_init_all_charsets();
491
492    codeset_list = XLC_GENERIC(lcd, codeset_list);
493    codeset_num = XLC_GENERIC(lcd, codeset_num);
494
495    charset_num = 0;
496    for (i = 0; i < codeset_num; i++)
497	charset_num += codeset_list[i]->num_charsets;
498    if (charset_num > all_charsets_count-1)
499	charset_num = all_charsets_count-1;
500
501    conv = (XlcConv) Xmalloc(sizeof(XlcConvRec)
502			     + (charset_num + 1) * sizeof(Utf8Conv));
503    if (conv == (XlcConv) NULL)
504	return (XlcConv) NULL;
505    preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
506
507    /* Loop through all codesets mentioned in the locale. */
508    charset_num = 0;
509    for (i = 0; i < codeset_num; i++) {
510	XlcCharSet *charsets = codeset_list[i]->charset_list;
511	int num_charsets = codeset_list[i]->num_charsets;
512	for (j = 0; j < num_charsets; j++) {
513	    const char *name = charsets[j]->encoding_name;
514	    /* If it wasn't already encountered... */
515	    for (k = charset_num-1; k >= 0; k--)
516		if (!strcmp(preferred[k]->name, name))
517		    break;
518	    if (k < 0) {
519		/* Look it up in all_charsets[]. */
520		for (k = 0; k < all_charsets_count-1; k++)
521		    if (!strcmp(all_charsets[k].name, name)) {
522			/* Add it to the preferred set. */
523			preferred[charset_num++] = &all_charsets[k];
524			break;
525		    }
526	    }
527	}
528    }
529    preferred[charset_num] = (Utf8Conv) NULL;
530
531    conv->methods = methods;
532    conv->state = (XPointer) preferred;
533
534    return conv;
535}
536
537static void
538close_tocs_converter(
539    XlcConv conv)
540{
541    /* conv->state is allocated together with conv, free both at once.  */
542    Xfree((char *) conv);
543}
544
545/*
546 * Converts a Unicode character to an appropriate character set. The NULL
547 * terminated array of preferred character sets is passed as first argument.
548 * If successful, *charsetp is set to the character set that was used, and
549 * *sidep is set to the character set side (XlcGL or XlcGR).
550 */
551static int
552charset_wctocs(
553    Utf8Conv *preferred,
554    Utf8Conv *charsetp,
555    XlcSide *sidep,
556    XlcConv conv,
557    unsigned char *r,
558    ucs4_t wc,
559    int n)
560{
561    int count;
562    Utf8Conv convptr;
563    int i;
564
565    for (; *preferred != (Utf8Conv) NULL; preferred++) {
566	convptr = *preferred;
567	count = convptr->wctocs(conv, r, wc, n);
568	if (count == RET_TOOSMALL)
569	    return RET_TOOSMALL;
570	if (count != RET_ILSEQ) {
571	    *charsetp = convptr;
572	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
573	    return count;
574	}
575    }
576    for (convptr = all_charsets+1, i = all_charsets_count-1; i > 0; convptr++, i--) {
577	count = convptr->wctocs(conv, r, wc, n);
578	if (count == RET_TOOSMALL)
579	    return RET_TOOSMALL;
580	if (count != RET_ILSEQ) {
581	    *charsetp = convptr;
582	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
583	    return count;
584	}
585    }
586    return RET_ILSEQ;
587}
588
589static int
590utf8tocs(
591    XlcConv conv,
592    XPointer *from,
593    int *from_left,
594    XPointer *to,
595    int *to_left,
596    XPointer *args,
597    int num_args)
598{
599    Utf8Conv *preferred_charsets;
600    XlcCharSet last_charset = NULL;
601    unsigned char const *src;
602    unsigned char const *srcend;
603    unsigned char *dst;
604    unsigned char *dstend;
605    int unconv_num;
606
607    if (from == NULL || *from == NULL)
608	return 0;
609
610    preferred_charsets = (Utf8Conv *) conv->state;
611    src = (unsigned char const *) *from;
612    srcend = src + *from_left;
613    dst = (unsigned char *) *to;
614    dstend = dst + *to_left;
615    unconv_num = 0;
616
617    while (src < srcend && dst < dstend) {
618	Utf8Conv chosen_charset = NULL;
619	XlcSide chosen_side = XlcNONE;
620	ucs4_t wc;
621	int consumed;
622	int count;
623
624	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
625	if (consumed == RET_TOOFEW(0))
626	    break;
627	if (consumed == RET_ILSEQ) {
628	    src++;
629	    unconv_num++;
630	    continue;
631	}
632
633	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
634	if (count == RET_TOOSMALL)
635	    break;
636	if (count == RET_ILSEQ) {
637	    src += consumed;
638	    unconv_num++;
639	    continue;
640	}
641
642	if (last_charset == NULL) {
643	    last_charset =
644	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
645	    if (last_charset == NULL) {
646		src += consumed;
647		unconv_num++;
648		continue;
649	    }
650	} else {
651	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
652	          && (last_charset->side == XlcGLGR
653	              || last_charset->side == chosen_side)))
654		break;
655	}
656	src += consumed;
657	dst += count;
658    }
659
660    if (last_charset == NULL)
661	return -1;
662
663    *from = (XPointer) src;
664    *from_left = srcend - src;
665    *to = (XPointer) dst;
666    *to_left = dstend - dst;
667
668    if (num_args >= 1)
669	*((XlcCharSet *)args[0]) = last_charset;
670
671    return unconv_num;
672}
673
674static XlcConvMethodsRec methods_utf8tocs = {
675    close_tocs_converter,
676    utf8tocs,
677    NULL
678};
679
680static XlcConv
681open_utf8tocs(
682    XLCd from_lcd,
683    const char *from_type,
684    XLCd to_lcd,
685    const char *to_type)
686{
687    return create_tocs_conv(from_lcd, &methods_utf8tocs);
688}
689
690/* from XlcNUtf8String to XlcNChar */
691
692static int
693utf8tocs1(
694    XlcConv conv,
695    XPointer *from,
696    int *from_left,
697    XPointer *to,
698    int *to_left,
699    XPointer *args,
700    int num_args)
701{
702    Utf8Conv *preferred_charsets;
703    XlcCharSet last_charset = NULL;
704    unsigned char const *src;
705    unsigned char const *srcend;
706    unsigned char *dst;
707    unsigned char *dstend;
708    int unconv_num;
709
710    if (from == NULL || *from == NULL)
711	return 0;
712
713    preferred_charsets = (Utf8Conv *) conv->state;
714    src = (unsigned char const *) *from;
715    srcend = src + *from_left;
716    dst = (unsigned char *) *to;
717    dstend = dst + *to_left;
718    unconv_num = 0;
719
720    while (src < srcend && dst < dstend) {
721	Utf8Conv chosen_charset = NULL;
722	XlcSide chosen_side = XlcNONE;
723	ucs4_t wc;
724	int consumed;
725	int count;
726
727	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
728	if (consumed == RET_TOOFEW(0))
729	    break;
730	if (consumed == RET_ILSEQ) {
731	    src++;
732	    unconv_num++;
733	    continue;
734	}
735
736	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
737	if (count == RET_TOOSMALL)
738	    break;
739	if (count == RET_ILSEQ) {
740	    src += consumed;
741	    unconv_num++;
742	    continue;
743	}
744
745	if (last_charset == NULL) {
746	    last_charset =
747	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
748	    if (last_charset == NULL) {
749		src += consumed;
750		unconv_num++;
751		continue;
752	    }
753	} else {
754	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
755	          && (last_charset->side == XlcGLGR
756	              || last_charset->side == chosen_side)))
757		break;
758	}
759	src += consumed;
760	dst += count;
761	break;
762    }
763
764    if (last_charset == NULL)
765	return -1;
766
767    *from = (XPointer) src;
768    *from_left = srcend - src;
769    *to = (XPointer) dst;
770    *to_left = dstend - dst;
771
772    if (num_args >= 1)
773	*((XlcCharSet *)args[0]) = last_charset;
774
775    return unconv_num;
776}
777
778static XlcConvMethodsRec methods_utf8tocs1 = {
779    close_tocs_converter,
780    utf8tocs1,
781    NULL
782};
783
784static XlcConv
785open_utf8tocs1(
786    XLCd from_lcd,
787    const char *from_type,
788    XLCd to_lcd,
789    const char *to_type)
790{
791    return create_tocs_conv(from_lcd, &methods_utf8tocs1);
792}
793
794/* from XlcNUtf8String to XlcNString */
795
796static int
797utf8tostr(
798    XlcConv conv,
799    XPointer *from,
800    int *from_left,
801    XPointer *to,
802    int *to_left,
803    XPointer *args,
804    int num_args)
805{
806    unsigned char const *src;
807    unsigned char const *srcend;
808    unsigned char *dst;
809    unsigned char *dstend;
810    int unconv_num;
811
812    if (from == NULL || *from == NULL)
813	return 0;
814
815    src = (unsigned char const *) *from;
816    srcend = src + *from_left;
817    dst = (unsigned char *) *to;
818    dstend = dst + *to_left;
819    unconv_num = 0;
820
821    while (src < srcend) {
822	unsigned char c;
823	ucs4_t wc;
824	int consumed;
825
826	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
827	if (consumed == RET_TOOFEW(0))
828	    break;
829	if (dst == dstend)
830	    break;
831	if (consumed == RET_ILSEQ) {
832	    consumed = 1;
833	    c = BAD_CHAR;
834	    unconv_num++;
835	} else {
836	    if ((wc & ~(ucs4_t)0xff) != 0) {
837		c = BAD_CHAR;
838		unconv_num++;
839	    } else
840		c = (unsigned char) wc;
841	}
842	*dst++ = c;
843	src += consumed;
844    }
845
846    *from = (XPointer) src;
847    *from_left = srcend - src;
848    *to = (XPointer) dst;
849    *to_left = dstend - dst;
850
851    return unconv_num;
852}
853
854static XlcConvMethodsRec methods_utf8tostr = {
855    close_converter,
856    utf8tostr,
857    NULL
858};
859
860static XlcConv
861open_utf8tostr(
862    XLCd from_lcd,
863    const char *from_type,
864    XLCd to_lcd,
865    const char *to_type)
866{
867    return create_conv(from_lcd, &methods_utf8tostr);
868}
869
870/* from XlcNString to XlcNUtf8String */
871
872static int
873strtoutf8(
874    XlcConv conv,
875    XPointer *from,
876    int *from_left,
877    XPointer *to,
878    int *to_left,
879    XPointer *args,
880    int num_args)
881{
882    unsigned char const *src;
883    unsigned char const *srcend;
884    unsigned char *dst;
885    unsigned char *dstend;
886
887    if (from == NULL || *from == NULL)
888	return 0;
889
890    src = (unsigned char const *) *from;
891    srcend = src + *from_left;
892    dst = (unsigned char *) *to;
893    dstend = dst + *to_left;
894
895    while (src < srcend) {
896	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
897	if (count == RET_TOOSMALL)
898	    break;
899	dst += count;
900	src++;
901    }
902
903    *from = (XPointer) src;
904    *from_left = srcend - src;
905    *to = (XPointer) dst;
906    *to_left = dstend - dst;
907
908    return 0;
909}
910
911static XlcConvMethodsRec methods_strtoutf8 = {
912    close_converter,
913    strtoutf8,
914    NULL
915};
916
917static XlcConv
918open_strtoutf8(
919    XLCd from_lcd,
920    const char *from_type,
921    XLCd to_lcd,
922    const char *to_type)
923{
924    return create_conv(from_lcd, &methods_strtoutf8);
925}
926
927/* Support for the input methods. */
928
929XPointer
930_Utf8GetConvByName(
931    const char *name)
932{
933    XrmQuark xrm_name;
934    Utf8Conv convptr;
935    int i;
936
937    if (name == NULL)
938        return (XPointer) NULL;
939
940    lazy_init_all_charsets();
941    xrm_name = XrmStringToQuark(name);
942
943    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
944	if (convptr->xrm_name == xrm_name)
945	    return (XPointer) convptr->wctocs;
946    return (XPointer) NULL;
947}
948
949/* from XlcNUcsChar to XlcNChar, needed for input methods */
950
951static XlcConv
952create_ucstocs_conv(
953    XLCd lcd,
954    XlcConvMethods methods)
955{
956
957    if (XLC_PUBLIC_PART(lcd)->codeset
958	&& _XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "UTF-8") == 0) {
959	XlcConv conv;
960	Utf8Conv *preferred;
961
962	lazy_init_all_charsets();
963
964	conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + 2 * sizeof(Utf8Conv));
965	if (conv == (XlcConv) NULL)
966	    return (XlcConv) NULL;
967	preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
968
969	preferred[0] = &all_charsets[0]; /* ISO10646 */
970	preferred[1] = (Utf8Conv) NULL;
971
972	conv->methods = methods;
973	conv->state = (XPointer) preferred;
974
975	return conv;
976    } else {
977	return create_tocs_conv(lcd, methods);
978    }
979}
980
981static int
982charset_wctocs_exactly(
983    Utf8Conv *preferred,
984    Utf8Conv *charsetp,
985    XlcSide *sidep,
986    XlcConv conv,
987    unsigned char *r,
988    ucs4_t wc,
989    int n)
990{
991    int count;
992    Utf8Conv convptr;
993
994    for (; *preferred != (Utf8Conv) NULL; preferred++) {
995	convptr = *preferred;
996	count = convptr->wctocs(conv, r, wc, n);
997	if (count == RET_TOOSMALL)
998	    return RET_TOOSMALL;
999	if (count != RET_ILSEQ) {
1000	    *charsetp = convptr;
1001	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
1002	    return count;
1003	}
1004    }
1005    return RET_ILSEQ;
1006}
1007
1008static int
1009ucstocs1(
1010    XlcConv conv,
1011    XPointer *from,
1012    int *from_left,
1013    XPointer *to,
1014    int *to_left,
1015    XPointer *args,
1016    int num_args)
1017{
1018    ucs4_t const *src = (ucs4_t const *) *from;
1019    unsigned char *dst = (unsigned char *) *to;
1020    int unconv_num = 0;
1021    Utf8Conv *preferred_charsets = (Utf8Conv *) conv->state;
1022    Utf8Conv chosen_charset = NULL;
1023    XlcSide chosen_side = XlcNONE;
1024    XlcCharSet charset = NULL;
1025    int count;
1026
1027    if (from == NULL || *from == NULL)
1028	return 0;
1029
1030    count = charset_wctocs_exactly(preferred_charsets, &chosen_charset,
1031                                   &chosen_side, conv, dst, *src, *to_left);
1032    if (count < 1) {
1033        unconv_num++;
1034        count = 0;
1035    } else {
1036        charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1037    }
1038    if (charset == NULL)
1039	return -1;
1040
1041    *from = (XPointer) ++src;
1042    (*from_left)--;
1043    *to = (XPointer) dst;
1044    *to_left -= count;
1045
1046    if (num_args >= 1)
1047	*((XlcCharSet *)args[0]) = charset;
1048
1049    return unconv_num;
1050}
1051
1052static XlcConvMethodsRec methods_ucstocs1 = {
1053    close_tocs_converter,
1054    ucstocs1,
1055    NULL
1056};
1057
1058static XlcConv
1059open_ucstocs1(
1060    XLCd from_lcd,
1061    const char *from_type,
1062    XLCd to_lcd,
1063    const char *to_type)
1064{
1065    return create_ucstocs_conv(from_lcd, &methods_ucstocs1);
1066}
1067
1068/* from XlcNUcsChar to XlcNUtf8String, needed for input methods */
1069
1070static int
1071ucstoutf8(
1072    XlcConv conv,
1073    XPointer *from,
1074    int *from_left,
1075    XPointer *to,
1076    int *to_left,
1077    XPointer *args,
1078    int num_args)
1079{
1080    const ucs4_t *src;
1081    const ucs4_t *srcend;
1082    unsigned char *dst;
1083    unsigned char *dstend;
1084    int unconv_num;
1085
1086    if (from == NULL || *from == NULL)
1087	return 0;
1088
1089    src = (const ucs4_t *) *from;
1090    srcend = src + *from_left;
1091    dst = (unsigned char *) *to;
1092    dstend = dst + *to_left;
1093    unconv_num = 0;
1094
1095    while (src < srcend) {
1096	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
1097	if (count == RET_TOOSMALL)
1098	    break;
1099	if (count == RET_ILSEQ)
1100	    unconv_num++;
1101	src++;
1102	dst += count;
1103    }
1104
1105    *from = (XPointer) src;
1106    *from_left = srcend - src;
1107    *to = (XPointer) dst;
1108    *to_left = dstend - dst;
1109
1110    return unconv_num;
1111}
1112
1113static XlcConvMethodsRec methods_ucstoutf8 = {
1114    close_converter,
1115    ucstoutf8,
1116    NULL
1117};
1118
1119static XlcConv
1120open_ucstoutf8(
1121    XLCd from_lcd,
1122    const char *from_type,
1123    XLCd to_lcd,
1124    const char *to_type)
1125{
1126    return create_conv(from_lcd, &methods_ucstoutf8);
1127}
1128
1129/* Registers UTF-8 converters for a non-UTF-8 locale. */
1130void
1131_XlcAddUtf8Converters(
1132    XLCd lcd)
1133{
1134    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNUtf8String, open_cstoutf8);
1135    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNCharSet, open_utf8tocs);
1136    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNChar, open_utf8tocs1);
1137    _XlcSetConverter(lcd, XlcNString, lcd, XlcNUtf8String, open_strtoutf8);
1138    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNString, open_utf8tostr);
1139    _XlcSetConverter(lcd, XlcNUcsChar,    lcd, XlcNChar, open_ucstocs1);
1140    _XlcSetConverter(lcd, XlcNUcsChar,    lcd, XlcNUtf8String, open_ucstoutf8);
1141}
1142
1143/***************************************************************************/
1144/* Part II: UTF-8 locale loader conversion files
1145 *
1146 * Here we can assume that "multi-byte" is UTF-8 and that `wchar_t' is Unicode.
1147 */
1148
1149/* from XlcNMultiByte to XlcNWideChar */
1150
1151static int
1152utf8towcs(
1153    XlcConv conv,
1154    XPointer *from,
1155    int *from_left,
1156    XPointer *to,
1157    int *to_left,
1158    XPointer *args,
1159    int num_args)
1160{
1161    unsigned char const *src;
1162    unsigned char const *srcend;
1163    wchar_t *dst;
1164    wchar_t *dstend;
1165    int unconv_num;
1166
1167    if (from == NULL || *from == NULL)
1168	return 0;
1169
1170    src = (unsigned char const *) *from;
1171    srcend = src + *from_left;
1172    dst = (wchar_t *) *to;
1173    dstend = dst + *to_left;
1174    unconv_num = 0;
1175
1176    while (src < srcend && dst < dstend) {
1177	ucs4_t wc;
1178	int consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
1179	if (consumed == RET_TOOFEW(0))
1180	    break;
1181	if (consumed == RET_ILSEQ) {
1182	    src++;
1183	    *dst = BAD_WCHAR;
1184	    unconv_num++;
1185	} else {
1186	    src += consumed;
1187	    *dst = wc;
1188	}
1189	dst++;
1190    }
1191
1192    *from = (XPointer) src;
1193    *from_left = srcend - src;
1194    *to = (XPointer) dst;
1195    *to_left = dstend - dst;
1196
1197    return unconv_num;
1198}
1199
1200static XlcConvMethodsRec methods_utf8towcs = {
1201    close_converter,
1202    utf8towcs,
1203    NULL
1204};
1205
1206static XlcConv
1207open_utf8towcs(
1208    XLCd from_lcd,
1209    const char *from_type,
1210    XLCd to_lcd,
1211    const char *to_type)
1212{
1213    return create_conv(from_lcd, &methods_utf8towcs);
1214}
1215
1216/* from XlcNWideChar to XlcNMultiByte */
1217
1218static int
1219wcstoutf8(
1220    XlcConv conv,
1221    XPointer *from,
1222    int *from_left,
1223    XPointer *to,
1224    int *to_left,
1225    XPointer *args,
1226    int num_args)
1227{
1228    wchar_t const *src;
1229    wchar_t const *srcend;
1230    unsigned char *dst;
1231    unsigned char *dstend;
1232    int unconv_num;
1233
1234    if (from == NULL || *from == NULL)
1235	return 0;
1236
1237    src = (wchar_t const *) *from;
1238    srcend = src + *from_left;
1239    dst = (unsigned char *) *to;
1240    dstend = dst + *to_left;
1241    unconv_num = 0;
1242
1243    while (src < srcend) {
1244	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
1245	if (count == RET_TOOSMALL)
1246	    break;
1247	if (count == RET_ILSEQ) {
1248	    count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
1249	    if (count == RET_TOOSMALL)
1250		break;
1251	    unconv_num++;
1252	}
1253	dst += count;
1254	src++;
1255    }
1256
1257    *from = (XPointer) src;
1258    *from_left = srcend - src;
1259    *to = (XPointer) dst;
1260    *to_left = dstend - dst;
1261
1262    return unconv_num;
1263}
1264
1265static XlcConvMethodsRec methods_wcstoutf8 = {
1266    close_converter,
1267    wcstoutf8,
1268    NULL
1269};
1270
1271static XlcConv
1272open_wcstoutf8(
1273    XLCd from_lcd,
1274    const char *from_type,
1275    XLCd to_lcd,
1276    const char *to_type)
1277{
1278    return create_conv(from_lcd, &methods_wcstoutf8);
1279}
1280
1281/* from XlcNString to XlcNWideChar */
1282
1283static int
1284our_strtowcs(
1285    XlcConv conv,
1286    XPointer *from,
1287    int *from_left,
1288    XPointer *to,
1289    int *to_left,
1290    XPointer *args,
1291    int num_args)
1292{
1293    unsigned char const *src;
1294    unsigned char const *srcend;
1295    wchar_t *dst;
1296    wchar_t *dstend;
1297
1298    if (from == NULL || *from == NULL)
1299	return 0;
1300
1301    src = (unsigned char const *) *from;
1302    srcend = src + *from_left;
1303    dst = (wchar_t *) *to;
1304    dstend = dst + *to_left;
1305
1306    while (src < srcend && dst < dstend)
1307	*dst++ = (wchar_t) *src++;
1308
1309    *from = (XPointer) src;
1310    *from_left = srcend - src;
1311    *to = (XPointer) dst;
1312    *to_left = dstend - dst;
1313
1314    return 0;
1315}
1316
1317static XlcConvMethodsRec methods_strtowcs = {
1318    close_converter,
1319    our_strtowcs,
1320    NULL
1321};
1322
1323static XlcConv
1324open_strtowcs(
1325    XLCd from_lcd,
1326    const char *from_type,
1327    XLCd to_lcd,
1328    const char *to_type)
1329{
1330    return create_conv(from_lcd, &methods_strtowcs);
1331}
1332
1333/* from XlcNWideChar to XlcNString */
1334
1335static int
1336our_wcstostr(
1337    XlcConv conv,
1338    XPointer *from,
1339    int *from_left,
1340    XPointer *to,
1341    int *to_left,
1342    XPointer *args,
1343    int num_args)
1344{
1345    wchar_t const *src;
1346    wchar_t const *srcend;
1347    unsigned char *dst;
1348    unsigned char *dstend;
1349    int unconv_num;
1350
1351    if (from == NULL || *from == NULL)
1352	return 0;
1353
1354    src = (wchar_t const *) *from;
1355    srcend = src + *from_left;
1356    dst = (unsigned char *) *to;
1357    dstend = dst + *to_left;
1358    unconv_num = 0;
1359
1360    while (src < srcend && dst < dstend) {
1361	unsigned int wc = *src++;
1362	if (wc < 0x80)
1363	    *dst = wc;
1364	else {
1365	    *dst = BAD_CHAR;
1366	    unconv_num++;
1367	}
1368	dst++;
1369    }
1370
1371    *from = (XPointer) src;
1372    *from_left = srcend - src;
1373    *to = (XPointer) dst;
1374    *to_left = dstend - dst;
1375
1376    return unconv_num;
1377}
1378
1379static XlcConvMethodsRec methods_wcstostr = {
1380    close_converter,
1381    our_wcstostr,
1382    NULL
1383};
1384
1385static XlcConv
1386open_wcstostr(
1387    XLCd from_lcd,
1388    const char *from_type,
1389    XLCd to_lcd,
1390    const char *to_type)
1391{
1392    return create_conv(from_lcd, &methods_wcstostr);
1393}
1394
1395/* from XlcNCharSet to XlcNWideChar */
1396
1397static int
1398cstowcs(
1399    XlcConv conv,
1400    XPointer *from,
1401    int *from_left,
1402    XPointer *to,
1403    int *to_left,
1404    XPointer *args,
1405    int num_args)
1406{
1407    XlcCharSet charset;
1408    const char *name;
1409    Utf8Conv convptr;
1410    int i;
1411    unsigned char const *src;
1412    unsigned char const *srcend;
1413    wchar_t *dst;
1414    wchar_t *dstend;
1415    int unconv_num;
1416
1417    if (from == NULL || *from == NULL)
1418	return 0;
1419
1420    if (num_args < 1)
1421	return -1;
1422
1423    charset = (XlcCharSet) args[0];
1424    name = charset->encoding_name;
1425    /* not charset->name because the latter has a ":GL"/":GR" suffix */
1426
1427    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
1428	if (!strcmp(convptr->name, name))
1429	    break;
1430    if (i == 0)
1431	return -1;
1432
1433    src = (unsigned char const *) *from;
1434    srcend = src + *from_left;
1435    dst = (wchar_t *) *to;
1436    dstend = dst + *to_left;
1437    unconv_num = 0;
1438
1439    while (src < srcend && dst < dstend) {
1440	unsigned int wc;
1441	int consumed;
1442
1443	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
1444	if (consumed == RET_ILSEQ)
1445	    return -1;
1446	if (consumed == RET_TOOFEW(0))
1447	    break;
1448
1449	*dst++ = wc;
1450	src += consumed;
1451    }
1452
1453    *from = (XPointer) src;
1454    *from_left = srcend - src;
1455    *to = (XPointer) dst;
1456    *to_left = dstend - dst;
1457
1458    return unconv_num;
1459}
1460
1461static XlcConvMethodsRec methods_cstowcs = {
1462    close_converter,
1463    cstowcs,
1464    NULL
1465};
1466
1467static XlcConv
1468open_cstowcs(
1469    XLCd from_lcd,
1470    const char *from_type,
1471    XLCd to_lcd,
1472    const char *to_type)
1473{
1474    lazy_init_all_charsets();
1475    return create_conv(from_lcd, &methods_cstowcs);
1476}
1477
1478/* from XlcNWideChar to XlcNCharSet */
1479
1480static int
1481wcstocs(
1482    XlcConv conv,
1483    XPointer *from,
1484    int *from_left,
1485    XPointer *to,
1486    int *to_left,
1487    XPointer *args,
1488    int num_args)
1489{
1490    Utf8Conv *preferred_charsets;
1491    XlcCharSet last_charset = NULL;
1492    wchar_t const *src;
1493    wchar_t const *srcend;
1494    unsigned char *dst;
1495    unsigned char *dstend;
1496    int unconv_num;
1497
1498    if (from == NULL || *from == NULL)
1499	return 0;
1500
1501    preferred_charsets = (Utf8Conv *) conv->state;
1502    src = (wchar_t const *) *from;
1503    srcend = src + *from_left;
1504    dst = (unsigned char *) *to;
1505    dstend = dst + *to_left;
1506    unconv_num = 0;
1507
1508    while (src < srcend && dst < dstend) {
1509	Utf8Conv chosen_charset = NULL;
1510	XlcSide chosen_side = XlcNONE;
1511	wchar_t wc = *src;
1512	int count;
1513
1514	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1515	if (count == RET_TOOSMALL)
1516	    break;
1517	if (count == RET_ILSEQ) {
1518	    src++;
1519	    unconv_num++;
1520	    continue;
1521	}
1522
1523	if (last_charset == NULL) {
1524	    last_charset =
1525	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1526	    if (last_charset == NULL) {
1527		src++;
1528		unconv_num++;
1529		continue;
1530	    }
1531	} else {
1532	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1533	          && (last_charset->side == XlcGLGR
1534	              || last_charset->side == chosen_side)))
1535		break;
1536	}
1537	src++;
1538	dst += count;
1539    }
1540
1541    if (last_charset == NULL)
1542	return -1;
1543
1544    *from = (XPointer) src;
1545    *from_left = srcend - src;
1546    *to = (XPointer) dst;
1547    *to_left = dstend - dst;
1548
1549    if (num_args >= 1)
1550	*((XlcCharSet *)args[0]) = last_charset;
1551
1552    return unconv_num;
1553}
1554
1555static XlcConvMethodsRec methods_wcstocs = {
1556    close_tocs_converter,
1557    wcstocs,
1558    NULL
1559};
1560
1561static XlcConv
1562open_wcstocs(
1563    XLCd from_lcd,
1564    const char *from_type,
1565    XLCd to_lcd,
1566    const char *to_type)
1567{
1568    return create_tocs_conv(from_lcd, &methods_wcstocs);
1569}
1570
1571/* from XlcNWideChar to XlcNChar */
1572
1573static int
1574wcstocs1(
1575    XlcConv conv,
1576    XPointer *from,
1577    int *from_left,
1578    XPointer *to,
1579    int *to_left,
1580    XPointer *args,
1581    int num_args)
1582{
1583    Utf8Conv *preferred_charsets;
1584    XlcCharSet last_charset = NULL;
1585    wchar_t const *src;
1586    wchar_t const *srcend;
1587    unsigned char *dst;
1588    unsigned char *dstend;
1589    int unconv_num;
1590
1591    if (from == NULL || *from == NULL)
1592	return 0;
1593
1594    preferred_charsets = (Utf8Conv *) conv->state;
1595    src = (wchar_t const *) *from;
1596    srcend = src + *from_left;
1597    dst = (unsigned char *) *to;
1598    dstend = dst + *to_left;
1599    unconv_num = 0;
1600
1601    while (src < srcend && dst < dstend) {
1602	Utf8Conv chosen_charset = NULL;
1603	XlcSide chosen_side = XlcNONE;
1604	wchar_t wc = *src;
1605	int count;
1606
1607	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1608	if (count == RET_TOOSMALL)
1609	    break;
1610	if (count == RET_ILSEQ) {
1611	    src++;
1612	    unconv_num++;
1613	    continue;
1614	}
1615
1616	if (last_charset == NULL) {
1617	    last_charset =
1618	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1619	    if (last_charset == NULL) {
1620		src++;
1621		unconv_num++;
1622		continue;
1623	    }
1624	} else {
1625	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1626	          && (last_charset->side == XlcGLGR
1627	              || last_charset->side == chosen_side)))
1628		break;
1629	}
1630	src++;
1631	dst += count;
1632	break;
1633    }
1634
1635    if (last_charset == NULL)
1636	return -1;
1637
1638    *from = (XPointer) src;
1639    *from_left = srcend - src;
1640    *to = (XPointer) dst;
1641    *to_left = dstend - dst;
1642
1643    if (num_args >= 1)
1644	*((XlcCharSet *)args[0]) = last_charset;
1645
1646    return unconv_num;
1647}
1648
1649static XlcConvMethodsRec methods_wcstocs1 = {
1650    close_tocs_converter,
1651    wcstocs1,
1652    NULL
1653};
1654
1655static XlcConv
1656open_wcstocs1(
1657    XLCd from_lcd,
1658    const char *from_type,
1659    XLCd to_lcd,
1660    const char *to_type)
1661{
1662    return create_tocs_conv(from_lcd, &methods_wcstocs1);
1663}
1664
1665/* trivial, no conversion */
1666
1667static int
1668identity(
1669    XlcConv conv,
1670    XPointer *from,
1671    int *from_left,
1672    XPointer *to,
1673    int *to_left,
1674    XPointer *args,
1675    int num_args)
1676{
1677    unsigned char const *src;
1678    unsigned char const *srcend;
1679    unsigned char *dst;
1680    unsigned char *dstend;
1681
1682    if (from == NULL || *from == NULL)
1683	return 0;
1684
1685    src = (unsigned char const *) *from;
1686    srcend = src + *from_left;
1687    dst = (unsigned char *) *to;
1688    dstend = dst + *to_left;
1689
1690    while (src < srcend && dst < dstend)
1691	*dst++ = *src++;
1692
1693    *from = (XPointer) src;
1694    *from_left = srcend - src;
1695    *to = (XPointer) dst;
1696    *to_left = dstend - dst;
1697
1698    return 0;
1699}
1700
1701static XlcConvMethodsRec methods_identity = {
1702    close_converter,
1703    identity,
1704    NULL
1705};
1706
1707static XlcConv
1708open_identity(
1709    XLCd from_lcd,
1710    const char *from_type,
1711    XLCd to_lcd,
1712    const char *to_type)
1713{
1714    return create_conv(from_lcd, &methods_identity);
1715}
1716
1717/* from MultiByte/WideChar to FontCharSet. */
1718/* They really use converters to CharSet
1719 * but with different create_conv procedure. */
1720
1721static XlcConv
1722create_tofontcs_conv(
1723    XLCd lcd,
1724    XlcConvMethods methods)
1725{
1726    XlcConv conv;
1727    int i, num, k, count;
1728    char **value, buf[20];
1729    Utf8Conv *preferred;
1730
1731    lazy_init_all_charsets();
1732
1733    for (i = 0, num = 0;; i++) {
1734	sprintf(buf, "fs%d.charset.name", i);
1735	_XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1736	if (count < 1) {
1737	    sprintf(buf, "fs%d.charset", i);
1738	    _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1739	    if (count < 1)
1740		break;
1741	}
1742	num += count;
1743    }
1744
1745    conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + (num + 1) * sizeof(Utf8Conv));
1746    if (conv == (XlcConv) NULL)
1747	return (XlcConv) NULL;
1748    preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
1749
1750    /* Loop through all fontsets mentioned in the locale. */
1751    for (i = 0, num = 0;; i++) {
1752        sprintf(buf, "fs%d.charset.name", i);
1753        _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1754        if (count < 1) {
1755            sprintf(buf, "fs%d.charset", i);
1756            _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1757            if (count < 1)
1758                break;
1759        }
1760	while (count-- > 0) {
1761	    XlcCharSet charset = _XlcGetCharSet(*value++);
1762	    const char *name;
1763
1764	    if (charset == (XlcCharSet) NULL)
1765		continue;
1766
1767	    name = charset->encoding_name;
1768	    /* If it wasn't already encountered... */
1769	    for (k = num - 1; k >= 0; k--)
1770		if (!strcmp(preferred[k]->name, name))
1771		    break;
1772	    if (k < 0) {
1773                /* For fonts "ISO10646-1" means ucs2, not utf8.*/
1774                if (!strcmp("ISO10646-1", name)) {
1775                    preferred[num++] = &all_charsets[ucs2_conv_index];
1776                    continue;
1777                }
1778		/* Look it up in all_charsets[]. */
1779		for (k = 0; k < all_charsets_count-1; k++)
1780		    if (!strcmp(all_charsets[k].name, name)) {
1781			/* Add it to the preferred set. */
1782			preferred[num++] = &all_charsets[k];
1783			break;
1784		    }
1785	    }
1786        }
1787    }
1788    preferred[num] = (Utf8Conv) NULL;
1789
1790    conv->methods = methods;
1791    conv->state = (XPointer) preferred;
1792
1793    return conv;
1794}
1795
1796static XlcConv
1797open_wcstofcs(
1798    XLCd from_lcd,
1799    const char *from_type,
1800    XLCd to_lcd,
1801    const char *to_type)
1802{
1803    return create_tofontcs_conv(from_lcd, &methods_wcstocs);
1804}
1805
1806static XlcConv
1807open_utf8tofcs(
1808    XLCd from_lcd,
1809    const char *from_type,
1810    XLCd to_lcd,
1811    const char *to_type)
1812{
1813    return create_tofontcs_conv(from_lcd, &methods_utf8tocs);
1814}
1815
1816/* ========================== iconv Stuff ================================ */
1817
1818/* from XlcNCharSet to XlcNMultiByte */
1819
1820static int
1821iconv_cstombs(XlcConv conv, XPointer *from, int *from_left,
1822	      XPointer *to, int *to_left, XPointer *args, int num_args)
1823{
1824    XlcCharSet charset;
1825    char const *name;
1826    Utf8Conv convptr;
1827    int i;
1828    unsigned char const *src;
1829    unsigned char const *srcend;
1830    unsigned char *dst;
1831    unsigned char *dstend;
1832    int unconv_num;
1833
1834    if (from == NULL || *from == NULL)
1835	return 0;
1836
1837    if (num_args < 1)
1838	return -1;
1839
1840    charset = (XlcCharSet) args[0];
1841    name = charset->encoding_name;
1842    /* not charset->name because the latter has a ":GL"/":GR" suffix */
1843
1844    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
1845	if (!strcmp(convptr->name, name))
1846	    break;
1847    if (i == 0)
1848	return -1;
1849
1850    src = (unsigned char const *) *from;
1851    srcend = src + *from_left;
1852    dst = (unsigned char *) *to;
1853    dstend = dst + *to_left;
1854    unconv_num = 0;
1855
1856    while (src < srcend) {
1857	ucs4_t wc;
1858	int consumed;
1859	int count;
1860
1861	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
1862	if (consumed == RET_ILSEQ)
1863	    return -1;
1864	if (consumed == RET_TOOFEW(0))
1865	    break;
1866
1867    /* Use stdc iconv to convert widechar -> multibyte */
1868
1869	count = wctomb((char *)dst, wc);
1870	if (count == 0)
1871	    break;
1872	if (count == -1) {
1873	    count = wctomb((char *)dst, BAD_WCHAR);
1874	    if (count == 0)
1875		break;
1876	    unconv_num++;
1877	}
1878	src += consumed;
1879	dst += count;
1880    }
1881
1882    *from = (XPointer) src;
1883    *from_left = srcend - src;
1884    *to = (XPointer) dst;
1885    *to_left = dstend - dst;
1886
1887    return unconv_num;
1888
1889}
1890
1891static XlcConvMethodsRec iconv_cstombs_methods = {
1892    close_converter,
1893    iconv_cstombs,
1894    NULL
1895};
1896
1897static XlcConv
1898open_iconv_cstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
1899{
1900    lazy_init_all_charsets();
1901    return create_conv(from_lcd, &iconv_cstombs_methods);
1902}
1903
1904static int
1905iconv_mbstocs(XlcConv conv, XPointer *from, int *from_left,
1906	      XPointer *to, int *to_left, XPointer *args, int num_args)
1907{
1908    Utf8Conv *preferred_charsets;
1909    XlcCharSet last_charset = NULL;
1910    unsigned char const *src;
1911    unsigned char const *srcend;
1912    unsigned char *dst;
1913    unsigned char *dstend;
1914    int unconv_num;
1915
1916    if (from == NULL || *from == NULL)
1917	return 0;
1918
1919    preferred_charsets = (Utf8Conv *) conv->state;
1920    src = (unsigned char const *) *from;
1921    srcend = src + *from_left;
1922    dst = (unsigned char *) *to;
1923    dstend = dst + *to_left;
1924    unconv_num = 0;
1925
1926    while (src < srcend && dst < dstend) {
1927	Utf8Conv chosen_charset = NULL;
1928	XlcSide chosen_side = XlcNONE;
1929	wchar_t wc;
1930	int consumed;
1931	int count;
1932
1933    /* Uses stdc iconv to convert multibyte -> widechar */
1934
1935	consumed = mbtowc(&wc, (const char *)src, srcend-src);
1936	if (consumed == 0)
1937	    break;
1938	if (consumed == -1) {
1939	    src++;
1940	    unconv_num++;
1941	    continue;
1942	}
1943
1944	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1945
1946	if (count == RET_TOOSMALL)
1947	    break;
1948	if (count == RET_ILSEQ) {
1949	    src += consumed;
1950	    unconv_num++;
1951	    continue;
1952	}
1953
1954	if (last_charset == NULL) {
1955	    last_charset =
1956	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1957	    if (last_charset == NULL) {
1958		src += consumed;
1959		unconv_num++;
1960		continue;
1961	    }
1962	} else {
1963	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1964	          && (last_charset->side == XlcGLGR
1965	              || last_charset->side == chosen_side)))
1966		break;
1967	}
1968	src += consumed;
1969	dst += count;
1970    }
1971
1972    if (last_charset == NULL)
1973	return -1;
1974
1975    *from = (XPointer) src;
1976    *from_left = srcend - src;
1977    *to = (XPointer) dst;
1978    *to_left = dstend - dst;
1979
1980    if (num_args >= 1)
1981	*((XlcCharSet *)args[0]) = last_charset;
1982
1983    return unconv_num;
1984}
1985
1986static XlcConvMethodsRec iconv_mbstocs_methods = {
1987    close_tocs_converter,
1988    iconv_mbstocs,
1989    NULL
1990};
1991
1992static XlcConv
1993open_iconv_mbstocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
1994{
1995    return create_tocs_conv(from_lcd, &iconv_mbstocs_methods);
1996}
1997
1998/* from XlcNMultiByte to XlcNChar */
1999
2000static int
2001iconv_mbtocs(XlcConv conv, XPointer *from, int *from_left,
2002	     XPointer *to, int *to_left, XPointer *args, int num_args)
2003{
2004    Utf8Conv *preferred_charsets;
2005    XlcCharSet last_charset = NULL;
2006    unsigned char const *src;
2007    unsigned char const *srcend;
2008    unsigned char *dst;
2009    unsigned char *dstend;
2010    int unconv_num;
2011
2012    if (from == NULL || *from == NULL)
2013	return 0;
2014
2015    preferred_charsets = (Utf8Conv *) conv->state;
2016    src = (unsigned char const *) *from;
2017    srcend = src + *from_left;
2018    dst = (unsigned char *) *to;
2019    dstend = dst + *to_left;
2020    unconv_num = 0;
2021
2022    while (src < srcend && dst < dstend) {
2023	Utf8Conv chosen_charset = NULL;
2024	XlcSide chosen_side = XlcNONE;
2025	wchar_t wc;
2026	int consumed;
2027	int count;
2028
2029    /* Uses stdc iconv to convert multibyte -> widechar */
2030
2031	consumed = mbtowc(&wc, (const char *)src, srcend-src);
2032	if (consumed == 0)
2033	    break;
2034	if (consumed == -1) {
2035	    src++;
2036	    unconv_num++;
2037	    continue;
2038	}
2039
2040	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
2041	if (count == RET_TOOSMALL)
2042	    break;
2043	if (count == RET_ILSEQ) {
2044	    src += consumed;
2045	    unconv_num++;
2046	    continue;
2047	}
2048
2049	if (last_charset == NULL) {
2050	    last_charset =
2051		_XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
2052	    if (last_charset == NULL) {
2053		src += consumed;
2054		unconv_num++;
2055		continue;
2056	    }
2057	} else {
2058	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
2059		  && (last_charset->side == XlcGLGR
2060		      || last_charset->side == chosen_side)))
2061		break;
2062	}
2063	src += consumed;
2064	dst += count;
2065    }
2066
2067    if (last_charset == NULL)
2068	return -1;
2069
2070    *from = (XPointer) src;
2071    *from_left = srcend - src;
2072    *to = (XPointer) dst;
2073    *to_left = dstend - dst;
2074
2075    if (num_args >= 1)
2076	*((XlcCharSet *)args[0]) = last_charset;
2077
2078    return unconv_num;
2079}
2080
2081static XlcConvMethodsRec iconv_mbtocs_methods = {
2082    close_tocs_converter,
2083    iconv_mbtocs,
2084    NULL
2085};
2086
2087static XlcConv
2088open_iconv_mbtocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2089{
2090    return create_tocs_conv(from_lcd, &iconv_mbtocs_methods );
2091}
2092
2093/* from XlcNMultiByte to XlcNString */
2094
2095static int
2096iconv_mbstostr(XlcConv conv, XPointer *from, int *from_left,
2097	       XPointer *to, int *to_left, XPointer *args, int num_args)
2098{
2099    unsigned char const *src;
2100    unsigned char const *srcend;
2101    unsigned char *dst;
2102    unsigned char *dstend;
2103    int unconv_num;
2104
2105    if (from == NULL || *from == NULL)
2106	return 0;
2107
2108    src = (unsigned char const *) *from;
2109    srcend = src + *from_left;
2110    dst = (unsigned char *) *to;
2111    dstend = dst + *to_left;
2112    unconv_num = 0;
2113
2114    while (src < srcend) {
2115	unsigned char c;
2116	wchar_t wc;
2117	int consumed;
2118
2119    /* Uses stdc iconv to convert multibyte -> widechar */
2120
2121	consumed = mbtowc(&wc, (const char *)src, srcend-src);
2122	if (consumed == 0)
2123	    break;
2124	if (dst == dstend)
2125	    break;
2126	if (consumed == -1) {
2127	    consumed = 1;
2128	    c = BAD_CHAR;
2129	    unconv_num++;
2130	} else {
2131	    if ((wc & ~(wchar_t)0xff) != 0) {
2132		c = BAD_CHAR;
2133		unconv_num++;
2134	    } else
2135		c = (unsigned char) wc;
2136	}
2137	*dst++ = c;
2138	src += consumed;
2139    }
2140
2141    *from = (XPointer) src;
2142    *from_left = srcend - src;
2143    *to = (XPointer) dst;
2144    *to_left = dstend - dst;
2145
2146    return unconv_num;
2147}
2148
2149static XlcConvMethodsRec iconv_mbstostr_methods = {
2150    close_converter,
2151    iconv_mbstostr,
2152    NULL
2153};
2154
2155static XlcConv
2156open_iconv_mbstostr(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2157{
2158    return create_conv(from_lcd, &iconv_mbstostr_methods);
2159}
2160
2161/* from XlcNString to XlcNMultiByte */
2162static int
2163iconv_strtombs(XlcConv conv, XPointer *from, int *from_left,
2164	       XPointer *to, int *to_left, XPointer *args, int num_args)
2165{
2166    unsigned char const *src;
2167    unsigned char const *srcend;
2168    unsigned char *dst;
2169    unsigned char *dstend;
2170
2171    if (from == NULL || *from == NULL)
2172	return 0;
2173
2174    src = (unsigned char const *) *from;
2175    srcend = src + *from_left;
2176    dst = (unsigned char *) *to;
2177    dstend = dst + *to_left;
2178
2179    while (src < srcend) {
2180	int count = wctomb((char *)dst, *src);
2181	if (count < 0)
2182	    break;
2183	dst += count;
2184	src++;
2185    }
2186
2187    *from = (XPointer) src;
2188    *from_left = srcend - src;
2189    *to = (XPointer) dst;
2190    *to_left = dstend - dst;
2191
2192    return 0;
2193}
2194
2195static XlcConvMethodsRec iconv_strtombs_methods= {
2196    close_converter,
2197    iconv_strtombs,
2198    NULL
2199};
2200
2201static XlcConv
2202open_iconv_strtombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2203{
2204    return create_conv(from_lcd, &iconv_strtombs_methods);
2205}
2206
2207/***************************************************************************/
2208/* Part II: An iconv locale loader.
2209 *
2210 *Here we can assume that "multi-byte" is iconv and that `wchar_t' is Unicode.
2211 */
2212
2213/* from XlcNMultiByte to XlcNWideChar */
2214static int
2215iconv_mbstowcs(XlcConv conv, XPointer *from, int *from_left,
2216	       XPointer *to, int *to_left, XPointer *args,  int num_args)
2217{
2218    char *src = *((char **) from);
2219    wchar_t *dst = *((wchar_t **) to);
2220    int src_left = *from_left;
2221    int dst_left = *to_left;
2222    int length, unconv_num = 0;
2223
2224    while (src_left > 0 && dst_left > 0) {
2225	length = mbtowc(dst, src, src_left);
2226
2227	if (length > 0) {
2228	    src += length;
2229	    src_left -= length;
2230	    if (dst)
2231	        dst++;
2232	    dst_left--;
2233	} else if (length < 0) {
2234	    src++;
2235	    src_left--;
2236	    unconv_num++;
2237        } else {
2238            /* null ? */
2239            src++;
2240            src_left--;
2241            if (dst)
2242                *dst++ = L'\0';
2243            dst_left--;
2244        }
2245    }
2246
2247    *from = (XPointer) src;
2248    if (dst)
2249	*to = (XPointer) dst;
2250    *from_left = src_left;
2251    *to_left = dst_left;
2252
2253    return unconv_num;
2254}
2255
2256static XlcConvMethodsRec iconv_mbstowcs_methods = {
2257    close_converter,
2258    iconv_mbstowcs,
2259    NULL
2260} ;
2261
2262static XlcConv
2263open_iconv_mbstowcs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2264{
2265    return create_conv(from_lcd, &iconv_mbstowcs_methods);
2266}
2267
2268static int
2269iconv_wcstombs(XlcConv conv, XPointer *from, int *from_left,
2270	       XPointer *to, int *to_left, XPointer *args, int num_args)
2271{
2272    wchar_t *src = *((wchar_t **) from);
2273    char *dst = *((char **) to);
2274    int src_left = *from_left;
2275    int dst_left = *to_left;
2276    int length, unconv_num = 0;
2277
2278    while (src_left > 0 && dst_left >= MB_CUR_MAX) {
2279	length = wctomb(dst, *src);		/* XXX */
2280
2281        if (length > 0) {
2282	    src++;
2283	    src_left--;
2284	    if (dst)
2285		dst += length;
2286	    dst_left -= length;
2287	} else if (length < 0) {
2288	    src++;
2289	    src_left--;
2290	    unconv_num++;
2291	}
2292    }
2293
2294    *from = (XPointer) src;
2295    if (dst)
2296      *to = (XPointer) dst;
2297    *from_left = src_left;
2298    *to_left = dst_left;
2299
2300    return unconv_num;
2301}
2302
2303static XlcConvMethodsRec iconv_wcstombs_methods = {
2304    close_converter,
2305    iconv_wcstombs,
2306    NULL
2307} ;
2308
2309static XlcConv
2310open_iconv_wcstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2311{
2312    return create_conv(from_lcd, &iconv_wcstombs_methods);
2313}
2314
2315static XlcConv
2316open_iconv_mbstofcs(
2317    XLCd from_lcd,
2318    const char *from_type,
2319    XLCd to_lcd,
2320    const char *to_type)
2321{
2322    return create_tofontcs_conv(from_lcd, &iconv_mbstocs_methods);
2323}
2324
2325/* Registers UTF-8 converters for a UTF-8 locale. */
2326
2327void
2328_XlcAddUtf8LocaleConverters(
2329    XLCd lcd)
2330{
2331    /* Register elementary converters. */
2332
2333    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_utf8towcs);
2334
2335    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_wcstoutf8);
2336    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr);
2337
2338    _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs);
2339
2340    /* Register converters for XlcNCharSet. This implicitly provides
2341     * converters from and to XlcNCompoundText. */
2342
2343    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_cstoutf8);
2344    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_utf8tocs);
2345    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_utf8tocs1);
2346
2347    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
2348    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
2349    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1);
2350
2351    _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_strtoutf8);
2352    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_utf8tostr);
2353    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNMultiByte, open_identity);
2354    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNUtf8String, open_identity);
2355
2356    /* Register converters for XlcNFontCharSet */
2357    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_utf8tofcs);
2358    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
2359}
2360
2361void
2362_XlcAddGB18030LocaleConverters(
2363    XLCd lcd)
2364{
2365
2366    /* Register elementary converters. */
2367    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_iconv_mbstowcs);
2368    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_iconv_wcstombs);
2369
2370    /* Register converters for XlcNCharSet. This implicitly provides
2371     * converters from and to XlcNCompoundText. */
2372
2373    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_iconv_cstombs);
2374    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_iconv_mbstocs);
2375    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_iconv_mbtocs);
2376    _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_iconv_strtombs);
2377    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_iconv_mbstostr);
2378
2379    /* Register converters for XlcNFontCharSet */
2380    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_iconv_mbstofcs);
2381
2382    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr);
2383    _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs);
2384    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
2385    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
2386    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1);
2387
2388    /* Register converters for XlcNFontCharSet */
2389    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
2390}
2391