lcUTF8.c revision 61b2299d
1/* $TOG:  $ */
2/******************************************************************
3
4              Copyright 1993 by SunSoft, Inc.
5              Copyright 1999-2000 by Bruno Haible
6
7Permission to use, copy, modify, distribute, and sell this software
8and its documentation for any purpose is hereby granted without fee,
9provided that the above copyright notice appear in all copies and
10that both that copyright notice and this permission notice appear
11in supporting documentation, and that the names of SunSoft, Inc. and
12Bruno Haible not be used in advertising or publicity pertaining to
13distribution of the software without specific, written prior
14permission.  SunSoft, Inc. and Bruno Haible make no representations
15about the suitability of this software for any purpose.  It is
16provided "as is" without express or implied warranty.
17
18SunSoft Inc. AND Bruno Haible DISCLAIM ALL WARRANTIES WITH REGARD
19TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
20AND FITNESS, IN NO EVENT SHALL SunSoft, Inc. OR Bruno Haible BE LIABLE
21FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
22WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
23ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
24OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
25
26******************************************************************/
27/* $XFree86: xc/lib/X11/lcUTF8.c,v 1.15 2002/10/08 23:31:36 dawes Exp $ */
28
29/*
30 * This file contains:
31 *
32 * I. Conversion routines CompoundText/CharSet <--> Unicode/UTF-8.
33 *
34 *    Used for three purposes:
35 *      1. The UTF-8 locales, see below.
36 *      2. Unicode aware applications for which the use of 8-bit character
37 *         sets is an anachronism.
38 *      3. For conversion from keysym to locale encoding.
39 *
40 * II. Conversion files for an UTF-8 locale loader.
41 *     Supports: all locales with codeset UTF-8.
42 *     How: Provides converters for UTF-8.
43 *     Platforms: all systems.
44 *
45 * The loader itself is located in lcUTF8.c.
46 */
47
48/*
49 * The conversion from UTF-8 to CompoundText is realized in a very
50 * conservative way. Recall that CompoundText data is used for inter-client
51 * communication purposes. We distinguish three classes of clients:
52 * - Clients which accept only those pieces of CompoundText which belong to
53 *   the character set understood by the current locale.
54 *   (Example: clients which are linked to an older X11 library.)
55 * - Clients which accept CompoundText with multiple character sets and parse
56 *   it themselves.
57 *   (Example: emacs, xemacs.)
58 * - Clients which rely entirely on the X{mb,wc}TextPropertyToTextList
59 *   functions for the conversion of CompoundText to their current locale's
60 *   multi-byte/wide-character format.
61 * For best interoperation, the UTF-8 to CompoundText conversion proceeds as
62 * follows. For every character, it first tests whether the character is
63 * representable in the current locale's original (non-UTF-8) character set.
64 * If not, it goes through the list of predefined character sets for
65 * CompoundText and tests if the character is representable in that character
66 * set. If so, it encodes the character using its code within that character
67 * set. If not, it uses an UTF-8-in-CompoundText encapsulation. Since
68 * clients of the first and second kind ignore such encapsulated text,
69 * this encapsulation is kept to a minimum and terminated as early as possible.
70 *
71 * In a distant future, when clients of the first and second kind will have
72 * disappeared, we will be able to stuff UTF-8 data directly in CompoundText
73 * without first going through the list of predefined character sets.
74 */
75
76#ifdef HAVE_CONFIG_H
77#include <config.h>
78#endif
79#include <stdio.h>
80#include "Xlibint.h"
81#include "XlcPubI.h"
82#include "XlcGeneric.h"
83
84static XlcConv
85create_conv(
86    XLCd lcd,
87    XlcConvMethods methods)
88{
89    XlcConv conv;
90
91    conv = (XlcConv) Xmalloc(sizeof(XlcConvRec));
92    if (conv == (XlcConv) NULL)
93	return (XlcConv) NULL;
94
95    conv->methods = methods;
96    conv->state = NULL;
97
98    return conv;
99}
100
101static void
102close_converter(
103    XlcConv conv)
104{
105    Xfree((char *) conv);
106}
107
108/* Replacement character for invalid multibyte sequence or wide character. */
109#define BAD_WCHAR ((ucs4_t) 0xfffd)
110#define BAD_CHAR '?'
111
112/***************************************************************************/
113/* Part I: Conversion routines CompoundText/CharSet <--> Unicode/UTF-8.
114 *
115 * Note that this code works in any locale. We store Unicode values in
116 * `ucs4_t' variables, but don't pass them to the user.
117 *
118 * This code has to support all character sets that are used for CompoundText,
119 * nothing more, nothing less. See the table in lcCT.c.
120 * Since the conversion _to_ CompoundText is likely to need the tables for all
121 * character sets at once, we don't use dynamic loading (of tables or shared
122 * libraries through iconv()). Use a fixed set of tables instead.
123 *
124 * We use statically computed tables, not dynamically allocated arrays,
125 * because it's more memory efficient: Different processes using the same
126 * libX11 shared library share the "text" and read-only "data" sections.
127 */
128
129typedef unsigned int ucs4_t;
130#define conv_t XlcConv
131
132typedef struct _Utf8ConvRec {
133    const char *name;
134    XrmQuark xrm_name;
135    int (* cstowc) (XlcConv, ucs4_t *, unsigned char const *, int);
136    int (* wctocs) (XlcConv, unsigned char *, ucs4_t, int);
137} Utf8ConvRec, *Utf8Conv;
138
139/*
140 * int xxx_cstowc (XlcConv conv, ucs4_t *pwc, unsigned char const *s, int n)
141 * converts the byte sequence starting at s to a wide character. Up to n bytes
142 * are available at s. n is >= 1.
143 * Result is number of bytes consumed (if a wide character was read),
144 * or 0 if invalid, or -1 if n too small.
145 *
146 * int xxx_wctocs (XlcConv conv, unsigned char *r, ucs4_t wc, int n)
147 * converts the wide character wc to the character set xxx, and stores the
148 * result beginning at r. Up to n bytes may be written at r. n is >= 1.
149 * Result is number of bytes written, or 0 if invalid, or -1 if n too small.
150 */
151
152/* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */
153#define RET_ILSEQ      0
154/* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
155#define RET_TOOFEW(n)  (-1-(n))
156/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
157#define RET_TOOSMALL   -1
158
159/*
160 * The tables below are bijective. It would be possible to extend the
161 * xxx_wctocs tables to do some transliteration (e.g. U+201C,U+201D -> 0x22)
162 * but *only* with characters not contained in any other table, and *only*
163 * when the current locale is not an UTF-8 locale.
164 */
165
166#include "lcUniConv/utf8.h"
167#include "lcUniConv/ucs2be.h"
168#ifdef notused
169#include "lcUniConv/ascii.h"
170#endif
171#include "lcUniConv/iso8859_1.h"
172#include "lcUniConv/iso8859_2.h"
173#include "lcUniConv/iso8859_3.h"
174#include "lcUniConv/iso8859_4.h"
175#include "lcUniConv/iso8859_5.h"
176#include "lcUniConv/iso8859_6.h"
177#include "lcUniConv/iso8859_7.h"
178#include "lcUniConv/iso8859_8.h"
179#include "lcUniConv/iso8859_9.h"
180#include "lcUniConv/iso8859_10.h"
181#include "lcUniConv/iso8859_11.h"
182#include "lcUniConv/iso8859_13.h"
183#include "lcUniConv/iso8859_14.h"
184#include "lcUniConv/iso8859_15.h"
185#include "lcUniConv/iso8859_16.h"
186#include "lcUniConv/iso8859_9e.h"
187#include "lcUniConv/jisx0201.h"
188#include "lcUniConv/tis620.h"
189#include "lcUniConv/koi8_r.h"
190#include "lcUniConv/koi8_u.h"
191#include "lcUniConv/koi8_c.h"
192#include "lcUniConv/armscii_8.h"
193#include "lcUniConv/cp1133.h"
194#include "lcUniConv/mulelao.h"
195#include "lcUniConv/viscii.h"
196#include "lcUniConv/tcvn.h"
197#include "lcUniConv/georgian_academy.h"
198#include "lcUniConv/georgian_ps.h"
199#include "lcUniConv/cp1251.h"
200#include "lcUniConv/cp1255.h"
201#include "lcUniConv/cp1256.h"
202#include "lcUniConv/tatar_cyr.h"
203
204typedef struct {
205    unsigned short indx; /* index into big table */
206    unsigned short used; /* bitmask of used entries */
207} Summary16;
208
209#include "lcUniConv/gb2312.h"
210#include "lcUniConv/jisx0208.h"
211#include "lcUniConv/jisx0212.h"
212#include "lcUniConv/ksc5601.h"
213#include "lcUniConv/big5.h"
214#include "lcUniConv/big5_emacs.h"
215#include "lcUniConv/big5hkscs.h"
216#include "lcUniConv/gbk.h"
217
218static Utf8ConvRec all_charsets[] = {
219    /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
220       (for lookup speed), once at the end (as a fallback).  */
221    { "ISO10646-1", NULLQUARK,
222	utf8_mbtowc, utf8_wctomb
223    },
224
225    { "ISO8859-1", NULLQUARK,
226	iso8859_1_mbtowc, iso8859_1_wctomb
227    },
228    { "ISO8859-2", NULLQUARK,
229	iso8859_2_mbtowc, iso8859_2_wctomb
230    },
231    { "ISO8859-3", NULLQUARK,
232	iso8859_3_mbtowc, iso8859_3_wctomb
233    },
234    { "ISO8859-4", NULLQUARK,
235	iso8859_4_mbtowc, iso8859_4_wctomb
236    },
237    { "ISO8859-5", NULLQUARK,
238	iso8859_5_mbtowc, iso8859_5_wctomb
239    },
240    { "ISO8859-6", NULLQUARK,
241	iso8859_6_mbtowc, iso8859_6_wctomb
242    },
243    { "ISO8859-7", NULLQUARK,
244	iso8859_7_mbtowc, iso8859_7_wctomb
245    },
246    { "ISO8859-8", NULLQUARK,
247	iso8859_8_mbtowc, iso8859_8_wctomb
248    },
249    { "ISO8859-9", NULLQUARK,
250	iso8859_9_mbtowc, iso8859_9_wctomb
251    },
252    { "ISO8859-10", NULLQUARK,
253	iso8859_10_mbtowc, iso8859_10_wctomb
254    },
255    { "ISO8859-11", NULLQUARK,
256	iso8859_11_mbtowc, iso8859_11_wctomb
257    },
258    { "ISO8859-13", NULLQUARK,
259	iso8859_13_mbtowc, iso8859_13_wctomb
260    },
261    { "ISO8859-14", NULLQUARK,
262	iso8859_14_mbtowc, iso8859_14_wctomb
263    },
264    { "ISO8859-15", NULLQUARK,
265	iso8859_15_mbtowc, iso8859_15_wctomb
266    },
267    { "ISO8859-16", NULLQUARK,
268	iso8859_16_mbtowc, iso8859_16_wctomb
269    },
270    { "JISX0201.1976-0", NULLQUARK,
271	jisx0201_mbtowc, jisx0201_wctomb
272    },
273    { "TIS620-0", NULLQUARK,
274	tis620_mbtowc, tis620_wctomb
275    },
276    { "GB2312.1980-0", NULLQUARK,
277	gb2312_mbtowc, gb2312_wctomb
278    },
279    { "JISX0208.1983-0", NULLQUARK,
280	jisx0208_mbtowc, jisx0208_wctomb
281    },
282    { "JISX0208.1990-0", NULLQUARK,
283	jisx0208_mbtowc, jisx0208_wctomb
284    },
285    { "JISX0212.1990-0", NULLQUARK,
286	jisx0212_mbtowc, jisx0212_wctomb
287    },
288    { "KSC5601.1987-0", NULLQUARK,
289	ksc5601_mbtowc, ksc5601_wctomb
290    },
291    { "KOI8-R", NULLQUARK,
292	koi8_r_mbtowc, koi8_r_wctomb
293    },
294    { "KOI8-U", NULLQUARK,
295	koi8_u_mbtowc, koi8_u_wctomb
296    },
297    { "KOI8-C", NULLQUARK,
298	koi8_c_mbtowc, koi8_c_wctomb
299    },
300    { "TATAR-CYR", NULLQUARK,
301	tatar_cyr_mbtowc, tatar_cyr_wctomb
302    },
303    { "ARMSCII-8", NULLQUARK,
304	armscii_8_mbtowc, armscii_8_wctomb
305    },
306    { "IBM-CP1133", NULLQUARK,
307	cp1133_mbtowc, cp1133_wctomb
308    },
309    { "MULELAO-1", NULLQUARK,
310	mulelao_mbtowc, mulelao_wctomb
311    },
312    { "VISCII1.1-1", NULLQUARK,
313	viscii_mbtowc, viscii_wctomb
314    },
315    { "TCVN-5712", NULLQUARK,
316	tcvn_mbtowc, tcvn_wctomb
317    },
318    { "GEORGIAN-ACADEMY", NULLQUARK,
319	georgian_academy_mbtowc, georgian_academy_wctomb
320    },
321    { "GEORGIAN-PS", NULLQUARK,
322	georgian_ps_mbtowc, georgian_ps_wctomb
323    },
324    { "ISO8859-9E", NULLQUARK,
325	iso8859_9e_mbtowc, iso8859_9e_wctomb
326    },
327    { "MICROSOFT-CP1251", NULLQUARK,
328	cp1251_mbtowc, cp1251_wctomb
329    },
330    { "MICROSOFT-CP1255", NULLQUARK,
331	cp1255_mbtowc, cp1255_wctomb
332    },
333    { "MICROSOFT-CP1256", NULLQUARK,
334	cp1256_mbtowc, cp1256_wctomb
335    },
336    { "BIG5-0", NULLQUARK,
337	big5_mbtowc, big5_wctomb
338    },
339    { "BIG5-E0", NULLQUARK,
340	big5_0_mbtowc, big5_0_wctomb
341    },
342    { "BIG5-E1", NULLQUARK,
343	big5_1_mbtowc, big5_1_wctomb
344    },
345    { "GBK-0", NULLQUARK,
346	gbk_mbtowc, gbk_wctomb
347    },
348    { "BIG5HKSCS-0", NULLQUARK,
349	big5hkscs_mbtowc, big5hkscs_wctomb
350    },
351
352    /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
353       (for lookup speed), once at the end (as a fallback).  */
354    { "ISO10646-1", NULLQUARK,
355	utf8_mbtowc, utf8_wctomb
356    },
357
358    /* Encoding ISO10646-1 for fonts means UCS2-like encoding
359       so for conversion to FontCharSet we need this record */
360    { "ISO10646-1", NULLQUARK,
361	ucs2be_mbtowc, ucs2be_wctomb
362    }
363};
364
365#define charsets_table_size (sizeof(all_charsets)/sizeof(all_charsets[0]))
366#define all_charsets_count  (charsets_table_size - 1)
367#define ucs2_conv_index     (charsets_table_size - 1)
368
369static void
370init_all_charsets (void)
371{
372    Utf8Conv convptr;
373    int i;
374
375    for (convptr = all_charsets, i = charsets_table_size; i > 0; convptr++, i--)
376	convptr->xrm_name = XrmStringToQuark(convptr->name);
377}
378
379#define lazy_init_all_charsets()					\
380    do {								\
381	if (all_charsets[0].xrm_name == NULLQUARK)			\
382	    init_all_charsets();					\
383    } while (0)
384
385/* from XlcNCharSet to XlcNUtf8String */
386
387static int
388cstoutf8(
389    XlcConv conv,
390    XPointer *from,
391    int *from_left,
392    XPointer *to,
393    int *to_left,
394    XPointer *args,
395    int num_args)
396{
397    XlcCharSet charset;
398    const char *name;
399    Utf8Conv convptr;
400    int i;
401    unsigned char const *src;
402    unsigned char const *srcend;
403    unsigned char *dst;
404    unsigned char *dstend;
405    int unconv_num;
406
407    if (from == NULL || *from == NULL)
408	return 0;
409
410    if (num_args < 1)
411	return -1;
412
413    charset = (XlcCharSet) args[0];
414    name = charset->encoding_name;
415    /* not charset->name because the latter has a ":GL"/":GR" suffix */
416
417    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
418	if (!strcmp(convptr->name, name))
419	    break;
420    if (i == 0)
421	return -1;
422
423    src = (unsigned char const *) *from;
424    srcend = src + *from_left;
425    dst = (unsigned char *) *to;
426    dstend = dst + *to_left;
427    unconv_num = 0;
428
429    while (src < srcend) {
430	ucs4_t wc;
431	int consumed;
432	int count;
433
434	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
435	if (consumed == RET_ILSEQ)
436	    return -1;
437	if (consumed == RET_TOOFEW(0))
438	    break;
439
440	count = utf8_wctomb(NULL, dst, wc, dstend-dst);
441	if (count == RET_TOOSMALL)
442	    break;
443	if (count == RET_ILSEQ) {
444	    count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
445	    if (count == RET_TOOSMALL)
446		break;
447	    unconv_num++;
448	}
449	src += consumed;
450	dst += count;
451    }
452
453    *from = (XPointer) src;
454    *from_left = srcend - src;
455    *to = (XPointer) dst;
456    *to_left = dstend - dst;
457
458    return unconv_num;
459}
460
461static XlcConvMethodsRec methods_cstoutf8 = {
462    close_converter,
463    cstoutf8,
464    NULL
465};
466
467static XlcConv
468open_cstoutf8(
469    XLCd from_lcd,
470    const char *from_type,
471    XLCd to_lcd,
472    const char *to_type)
473{
474    lazy_init_all_charsets();
475    return create_conv(from_lcd, &methods_cstoutf8);
476}
477
478/* from XlcNUtf8String to XlcNCharSet */
479
480static XlcConv
481create_tocs_conv(
482    XLCd lcd,
483    XlcConvMethods methods)
484{
485    XlcConv conv;
486    CodeSet *codeset_list;
487    int codeset_num;
488    int charset_num;
489    int i, j, k;
490    Utf8Conv *preferred;
491
492    lazy_init_all_charsets();
493
494    codeset_list = XLC_GENERIC(lcd, codeset_list);
495    codeset_num = XLC_GENERIC(lcd, codeset_num);
496
497    charset_num = 0;
498    for (i = 0; i < codeset_num; i++)
499	charset_num += codeset_list[i]->num_charsets;
500    if (charset_num > all_charsets_count-1)
501	charset_num = all_charsets_count-1;
502
503    conv = (XlcConv) Xmalloc(sizeof(XlcConvRec)
504			     + (charset_num + 1) * sizeof(Utf8Conv));
505    if (conv == (XlcConv) NULL)
506	return (XlcConv) NULL;
507    preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
508
509    /* Loop through all codesets mentioned in the locale. */
510    charset_num = 0;
511    for (i = 0; i < codeset_num; i++) {
512	XlcCharSet *charsets = codeset_list[i]->charset_list;
513	int num_charsets = codeset_list[i]->num_charsets;
514	for (j = 0; j < num_charsets; j++) {
515	    const char *name = charsets[j]->encoding_name;
516	    /* If it wasn't already encountered... */
517	    for (k = charset_num-1; k >= 0; k--)
518		if (!strcmp(preferred[k]->name, name))
519		    break;
520	    if (k < 0) {
521		/* Look it up in all_charsets[]. */
522		for (k = 0; k < all_charsets_count-1; k++)
523		    if (!strcmp(all_charsets[k].name, name)) {
524			/* Add it to the preferred set. */
525			preferred[charset_num++] = &all_charsets[k];
526			break;
527		    }
528	    }
529	}
530    }
531    preferred[charset_num] = (Utf8Conv) NULL;
532
533    conv->methods = methods;
534    conv->state = (XPointer) preferred;
535
536    return conv;
537}
538
539static void
540close_tocs_converter(
541    XlcConv conv)
542{
543    /* conv->state is allocated together with conv, free both at once.  */
544    Xfree((char *) conv);
545}
546
547/*
548 * Converts a Unicode character to an appropriate character set. The NULL
549 * terminated array of preferred character sets is passed as first argument.
550 * If successful, *charsetp is set to the character set that was used, and
551 * *sidep is set to the character set side (XlcGL or XlcGR).
552 */
553static int
554charset_wctocs(
555    Utf8Conv *preferred,
556    Utf8Conv *charsetp,
557    XlcSide *sidep,
558    XlcConv conv,
559    unsigned char *r,
560    ucs4_t wc,
561    int n)
562{
563    int count;
564    Utf8Conv convptr;
565    int i;
566
567    for (; *preferred != (Utf8Conv) NULL; preferred++) {
568	convptr = *preferred;
569	count = convptr->wctocs(conv, r, wc, n);
570	if (count == RET_TOOSMALL)
571	    return RET_TOOSMALL;
572	if (count != RET_ILSEQ) {
573	    *charsetp = convptr;
574	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
575	    return count;
576	}
577    }
578    for (convptr = all_charsets+1, i = all_charsets_count-1; i > 0; convptr++, i--) {
579	count = convptr->wctocs(conv, r, wc, n);
580	if (count == RET_TOOSMALL)
581	    return RET_TOOSMALL;
582	if (count != RET_ILSEQ) {
583	    *charsetp = convptr;
584	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
585	    return count;
586	}
587    }
588    return RET_ILSEQ;
589}
590
591static int
592utf8tocs(
593    XlcConv conv,
594    XPointer *from,
595    int *from_left,
596    XPointer *to,
597    int *to_left,
598    XPointer *args,
599    int num_args)
600{
601    Utf8Conv *preferred_charsets;
602    XlcCharSet last_charset = NULL;
603    unsigned char const *src;
604    unsigned char const *srcend;
605    unsigned char *dst;
606    unsigned char *dstend;
607    int unconv_num;
608
609    if (from == NULL || *from == NULL)
610	return 0;
611
612    preferred_charsets = (Utf8Conv *) conv->state;
613    src = (unsigned char const *) *from;
614    srcend = src + *from_left;
615    dst = (unsigned char *) *to;
616    dstend = dst + *to_left;
617    unconv_num = 0;
618
619    while (src < srcend && dst < dstend) {
620	Utf8Conv chosen_charset = NULL;
621	XlcSide chosen_side = XlcNONE;
622	ucs4_t wc;
623	int consumed;
624	int count;
625
626	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
627	if (consumed == RET_TOOFEW(0))
628	    break;
629	if (consumed == RET_ILSEQ) {
630	    src++;
631	    unconv_num++;
632	    continue;
633	}
634
635	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
636	if (count == RET_TOOSMALL)
637	    break;
638	if (count == RET_ILSEQ) {
639	    src += consumed;
640	    unconv_num++;
641	    continue;
642	}
643
644	if (last_charset == NULL) {
645	    last_charset =
646	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
647	    if (last_charset == NULL) {
648		src += consumed;
649		unconv_num++;
650		continue;
651	    }
652	} else {
653	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
654	          && (last_charset->side == XlcGLGR
655	              || last_charset->side == chosen_side)))
656		break;
657	}
658	src += consumed;
659	dst += count;
660    }
661
662    if (last_charset == NULL)
663	return -1;
664
665    *from = (XPointer) src;
666    *from_left = srcend - src;
667    *to = (XPointer) dst;
668    *to_left = dstend - dst;
669
670    if (num_args >= 1)
671	*((XlcCharSet *)args[0]) = last_charset;
672
673    return unconv_num;
674}
675
676static XlcConvMethodsRec methods_utf8tocs = {
677    close_tocs_converter,
678    utf8tocs,
679    NULL
680};
681
682static XlcConv
683open_utf8tocs(
684    XLCd from_lcd,
685    const char *from_type,
686    XLCd to_lcd,
687    const char *to_type)
688{
689    return create_tocs_conv(from_lcd, &methods_utf8tocs);
690}
691
692/* from XlcNUtf8String to XlcNChar */
693
694static int
695utf8tocs1(
696    XlcConv conv,
697    XPointer *from,
698    int *from_left,
699    XPointer *to,
700    int *to_left,
701    XPointer *args,
702    int num_args)
703{
704    Utf8Conv *preferred_charsets;
705    XlcCharSet last_charset = NULL;
706    unsigned char const *src;
707    unsigned char const *srcend;
708    unsigned char *dst;
709    unsigned char *dstend;
710    int unconv_num;
711
712    if (from == NULL || *from == NULL)
713	return 0;
714
715    preferred_charsets = (Utf8Conv *) conv->state;
716    src = (unsigned char const *) *from;
717    srcend = src + *from_left;
718    dst = (unsigned char *) *to;
719    dstend = dst + *to_left;
720    unconv_num = 0;
721
722    while (src < srcend && dst < dstend) {
723	Utf8Conv chosen_charset = NULL;
724	XlcSide chosen_side = XlcNONE;
725	ucs4_t wc;
726	int consumed;
727	int count;
728
729	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
730	if (consumed == RET_TOOFEW(0))
731	    break;
732	if (consumed == RET_ILSEQ) {
733	    src++;
734	    unconv_num++;
735	    continue;
736	}
737
738	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
739	if (count == RET_TOOSMALL)
740	    break;
741	if (count == RET_ILSEQ) {
742	    src += consumed;
743	    unconv_num++;
744	    continue;
745	}
746
747	if (last_charset == NULL) {
748	    last_charset =
749	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
750	    if (last_charset == NULL) {
751		src += consumed;
752		unconv_num++;
753		continue;
754	    }
755	} else {
756	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
757	          && (last_charset->side == XlcGLGR
758	              || last_charset->side == chosen_side)))
759		break;
760	}
761	src += consumed;
762	dst += count;
763	break;
764    }
765
766    if (last_charset == NULL)
767	return -1;
768
769    *from = (XPointer) src;
770    *from_left = srcend - src;
771    *to = (XPointer) dst;
772    *to_left = dstend - dst;
773
774    if (num_args >= 1)
775	*((XlcCharSet *)args[0]) = last_charset;
776
777    return unconv_num;
778}
779
780static XlcConvMethodsRec methods_utf8tocs1 = {
781    close_tocs_converter,
782    utf8tocs1,
783    NULL
784};
785
786static XlcConv
787open_utf8tocs1(
788    XLCd from_lcd,
789    const char *from_type,
790    XLCd to_lcd,
791    const char *to_type)
792{
793    return create_tocs_conv(from_lcd, &methods_utf8tocs1);
794}
795
796/* from XlcNUtf8String to XlcNString */
797
798static int
799utf8tostr(
800    XlcConv conv,
801    XPointer *from,
802    int *from_left,
803    XPointer *to,
804    int *to_left,
805    XPointer *args,
806    int num_args)
807{
808    unsigned char const *src;
809    unsigned char const *srcend;
810    unsigned char *dst;
811    unsigned char *dstend;
812    int unconv_num;
813
814    if (from == NULL || *from == NULL)
815	return 0;
816
817    src = (unsigned char const *) *from;
818    srcend = src + *from_left;
819    dst = (unsigned char *) *to;
820    dstend = dst + *to_left;
821    unconv_num = 0;
822
823    while (src < srcend) {
824	unsigned char c;
825	ucs4_t wc;
826	int consumed;
827
828	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
829	if (consumed == RET_TOOFEW(0))
830	    break;
831	if (dst == dstend)
832	    break;
833	if (consumed == RET_ILSEQ) {
834	    consumed = 1;
835	    c = BAD_CHAR;
836	    unconv_num++;
837	} else {
838	    if ((wc & ~(ucs4_t)0xff) != 0) {
839		c = BAD_CHAR;
840		unconv_num++;
841	    } else
842		c = (unsigned char) wc;
843	}
844	*dst++ = c;
845	src += consumed;
846    }
847
848    *from = (XPointer) src;
849    *from_left = srcend - src;
850    *to = (XPointer) dst;
851    *to_left = dstend - dst;
852
853    return unconv_num;
854}
855
856static XlcConvMethodsRec methods_utf8tostr = {
857    close_converter,
858    utf8tostr,
859    NULL
860};
861
862static XlcConv
863open_utf8tostr(
864    XLCd from_lcd,
865    const char *from_type,
866    XLCd to_lcd,
867    const char *to_type)
868{
869    return create_conv(from_lcd, &methods_utf8tostr);
870}
871
872/* from XlcNString to XlcNUtf8String */
873
874static int
875strtoutf8(
876    XlcConv conv,
877    XPointer *from,
878    int *from_left,
879    XPointer *to,
880    int *to_left,
881    XPointer *args,
882    int num_args)
883{
884    unsigned char const *src;
885    unsigned char const *srcend;
886    unsigned char *dst;
887    unsigned char *dstend;
888
889    if (from == NULL || *from == NULL)
890	return 0;
891
892    src = (unsigned char const *) *from;
893    srcend = src + *from_left;
894    dst = (unsigned char *) *to;
895    dstend = dst + *to_left;
896
897    while (src < srcend) {
898	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
899	if (count == RET_TOOSMALL)
900	    break;
901	dst += count;
902	src++;
903    }
904
905    *from = (XPointer) src;
906    *from_left = srcend - src;
907    *to = (XPointer) dst;
908    *to_left = dstend - dst;
909
910    return 0;
911}
912
913static XlcConvMethodsRec methods_strtoutf8 = {
914    close_converter,
915    strtoutf8,
916    NULL
917};
918
919static XlcConv
920open_strtoutf8(
921    XLCd from_lcd,
922    const char *from_type,
923    XLCd to_lcd,
924    const char *to_type)
925{
926    return create_conv(from_lcd, &methods_strtoutf8);
927}
928
929/* Support for the input methods. */
930
931XPointer
932_Utf8GetConvByName(
933    const char *name)
934{
935    XrmQuark xrm_name;
936    Utf8Conv convptr;
937    int i;
938
939    if (name == NULL)
940        return (XPointer) NULL;
941
942    lazy_init_all_charsets();
943    xrm_name = XrmStringToQuark(name);
944
945    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
946	if (convptr->xrm_name == xrm_name)
947	    return (XPointer) convptr->wctocs;
948    return (XPointer) NULL;
949}
950
951/* from XlcNUcsChar to XlcNChar, needed for input methods */
952
953static XlcConv
954create_ucstocs_conv(
955    XLCd lcd,
956    XlcConvMethods methods)
957{
958
959    if (XLC_PUBLIC_PART(lcd)->codeset
960	&& _XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "UTF-8") == 0) {
961	XlcConv conv;
962	Utf8Conv *preferred;
963
964	lazy_init_all_charsets();
965
966	conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + 2 * sizeof(Utf8Conv));
967	if (conv == (XlcConv) NULL)
968	    return (XlcConv) NULL;
969	preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
970
971	preferred[0] = &all_charsets[0]; /* ISO10646 */
972	preferred[1] = (Utf8Conv) NULL;
973
974	conv->methods = methods;
975	conv->state = (XPointer) preferred;
976
977	return conv;
978    } else {
979	return create_tocs_conv(lcd, methods);
980    }
981}
982
983static int
984charset_wctocs_exactly(
985    Utf8Conv *preferred,
986    Utf8Conv *charsetp,
987    XlcSide *sidep,
988    XlcConv conv,
989    unsigned char *r,
990    ucs4_t wc,
991    int n)
992{
993    int count;
994    Utf8Conv convptr;
995
996    for (; *preferred != (Utf8Conv) NULL; preferred++) {
997	convptr = *preferred;
998	count = convptr->wctocs(conv, r, wc, n);
999	if (count == RET_TOOSMALL)
1000	    return RET_TOOSMALL;
1001	if (count != RET_ILSEQ) {
1002	    *charsetp = convptr;
1003	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
1004	    return count;
1005	}
1006    }
1007    return RET_ILSEQ;
1008}
1009
1010static int
1011ucstocs1(
1012    XlcConv conv,
1013    XPointer *from,
1014    int *from_left,
1015    XPointer *to,
1016    int *to_left,
1017    XPointer *args,
1018    int num_args)
1019{
1020    ucs4_t const *src = (ucs4_t const *) *from;
1021    unsigned char *dst = (unsigned char *) *to;
1022    int unconv_num = 0;
1023    Utf8Conv *preferred_charsets = (Utf8Conv *) conv->state;
1024    Utf8Conv chosen_charset = NULL;
1025    XlcSide chosen_side = XlcNONE;
1026    XlcCharSet charset = NULL;
1027    int count;
1028
1029    if (from == NULL || *from == NULL)
1030	return 0;
1031
1032    count = charset_wctocs_exactly(preferred_charsets, &chosen_charset,
1033                                   &chosen_side, conv, dst, *src, *to_left);
1034    if (count < 1) {
1035        unconv_num++;
1036        count = 0;
1037    } else {
1038        charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1039    }
1040    if (charset == NULL)
1041	return -1;
1042
1043    *from = (XPointer) ++src;
1044    (*from_left)--;
1045    *to = (XPointer) dst;
1046    *to_left -= count;
1047
1048    if (num_args >= 1)
1049	*((XlcCharSet *)args[0]) = charset;
1050
1051    return unconv_num;
1052}
1053
1054static XlcConvMethodsRec methods_ucstocs1 = {
1055    close_tocs_converter,
1056    ucstocs1,
1057    NULL
1058};
1059
1060static XlcConv
1061open_ucstocs1(
1062    XLCd from_lcd,
1063    const char *from_type,
1064    XLCd to_lcd,
1065    const char *to_type)
1066{
1067    return create_ucstocs_conv(from_lcd, &methods_ucstocs1);
1068}
1069
1070/* from XlcNUcsChar to XlcNUtf8String, needed for input methods */
1071
1072static int
1073ucstoutf8(
1074    XlcConv conv,
1075    XPointer *from,
1076    int *from_left,
1077    XPointer *to,
1078    int *to_left,
1079    XPointer *args,
1080    int num_args)
1081{
1082    const ucs4_t *src;
1083    const ucs4_t *srcend;
1084    unsigned char *dst;
1085    unsigned char *dstend;
1086    int unconv_num;
1087
1088    if (from == NULL || *from == NULL)
1089	return 0;
1090
1091    src = (const ucs4_t *) *from;
1092    srcend = src + *from_left;
1093    dst = (unsigned char *) *to;
1094    dstend = dst + *to_left;
1095    unconv_num = 0;
1096
1097    while (src < srcend) {
1098	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
1099	if (count == RET_TOOSMALL)
1100	    break;
1101	if (count == RET_ILSEQ)
1102	    unconv_num++;
1103	src++;
1104	dst += count;
1105    }
1106
1107    *from = (XPointer) src;
1108    *from_left = srcend - src;
1109    *to = (XPointer) dst;
1110    *to_left = dstend - dst;
1111
1112    return unconv_num;
1113}
1114
1115static XlcConvMethodsRec methods_ucstoutf8 = {
1116    close_converter,
1117    ucstoutf8,
1118    NULL
1119};
1120
1121static XlcConv
1122open_ucstoutf8(
1123    XLCd from_lcd,
1124    const char *from_type,
1125    XLCd to_lcd,
1126    const char *to_type)
1127{
1128    return create_conv(from_lcd, &methods_ucstoutf8);
1129}
1130
1131/* Registers UTF-8 converters for a non-UTF-8 locale. */
1132void
1133_XlcAddUtf8Converters(
1134    XLCd lcd)
1135{
1136    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNUtf8String, open_cstoutf8);
1137    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNCharSet, open_utf8tocs);
1138    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNChar, open_utf8tocs1);
1139    _XlcSetConverter(lcd, XlcNString, lcd, XlcNUtf8String, open_strtoutf8);
1140    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNString, open_utf8tostr);
1141    _XlcSetConverter(lcd, XlcNUcsChar,    lcd, XlcNChar, open_ucstocs1);
1142    _XlcSetConverter(lcd, XlcNUcsChar,    lcd, XlcNUtf8String, open_ucstoutf8);
1143}
1144
1145/***************************************************************************/
1146/* Part II: UTF-8 locale loader conversion files
1147 *
1148 * Here we can assume that "multi-byte" is UTF-8 and that `wchar_t' is Unicode.
1149 */
1150
1151/* from XlcNMultiByte to XlcNWideChar */
1152
1153static int
1154utf8towcs(
1155    XlcConv conv,
1156    XPointer *from,
1157    int *from_left,
1158    XPointer *to,
1159    int *to_left,
1160    XPointer *args,
1161    int num_args)
1162{
1163    unsigned char const *src;
1164    unsigned char const *srcend;
1165    wchar_t *dst;
1166    wchar_t *dstend;
1167    int unconv_num;
1168
1169    if (from == NULL || *from == NULL)
1170	return 0;
1171
1172    src = (unsigned char const *) *from;
1173    srcend = src + *from_left;
1174    dst = (wchar_t *) *to;
1175    dstend = dst + *to_left;
1176    unconv_num = 0;
1177
1178    while (src < srcend && dst < dstend) {
1179	ucs4_t wc;
1180	int consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
1181	if (consumed == RET_TOOFEW(0))
1182	    break;
1183	if (consumed == RET_ILSEQ) {
1184	    src++;
1185	    *dst = BAD_WCHAR;
1186	    unconv_num++;
1187	} else {
1188	    src += consumed;
1189	    *dst = wc;
1190	}
1191	dst++;
1192    }
1193
1194    *from = (XPointer) src;
1195    *from_left = srcend - src;
1196    *to = (XPointer) dst;
1197    *to_left = dstend - dst;
1198
1199    return unconv_num;
1200}
1201
1202static XlcConvMethodsRec methods_utf8towcs = {
1203    close_converter,
1204    utf8towcs,
1205    NULL
1206};
1207
1208static XlcConv
1209open_utf8towcs(
1210    XLCd from_lcd,
1211    const char *from_type,
1212    XLCd to_lcd,
1213    const char *to_type)
1214{
1215    return create_conv(from_lcd, &methods_utf8towcs);
1216}
1217
1218/* from XlcNWideChar to XlcNMultiByte */
1219
1220static int
1221wcstoutf8(
1222    XlcConv conv,
1223    XPointer *from,
1224    int *from_left,
1225    XPointer *to,
1226    int *to_left,
1227    XPointer *args,
1228    int num_args)
1229{
1230    wchar_t const *src;
1231    wchar_t const *srcend;
1232    unsigned char *dst;
1233    unsigned char *dstend;
1234    int unconv_num;
1235
1236    if (from == NULL || *from == NULL)
1237	return 0;
1238
1239    src = (wchar_t const *) *from;
1240    srcend = src + *from_left;
1241    dst = (unsigned char *) *to;
1242    dstend = dst + *to_left;
1243    unconv_num = 0;
1244
1245    while (src < srcend) {
1246	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
1247	if (count == RET_TOOSMALL)
1248	    break;
1249	if (count == RET_ILSEQ) {
1250	    count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
1251	    if (count == RET_TOOSMALL)
1252		break;
1253	    unconv_num++;
1254	}
1255	dst += count;
1256	src++;
1257    }
1258
1259    *from = (XPointer) src;
1260    *from_left = srcend - src;
1261    *to = (XPointer) dst;
1262    *to_left = dstend - dst;
1263
1264    return unconv_num;
1265}
1266
1267static XlcConvMethodsRec methods_wcstoutf8 = {
1268    close_converter,
1269    wcstoutf8,
1270    NULL
1271};
1272
1273static XlcConv
1274open_wcstoutf8(
1275    XLCd from_lcd,
1276    const char *from_type,
1277    XLCd to_lcd,
1278    const char *to_type)
1279{
1280    return create_conv(from_lcd, &methods_wcstoutf8);
1281}
1282
1283/* from XlcNString to XlcNWideChar */
1284
1285static int
1286our_strtowcs(
1287    XlcConv conv,
1288    XPointer *from,
1289    int *from_left,
1290    XPointer *to,
1291    int *to_left,
1292    XPointer *args,
1293    int num_args)
1294{
1295    unsigned char const *src;
1296    unsigned char const *srcend;
1297    wchar_t *dst;
1298    wchar_t *dstend;
1299
1300    if (from == NULL || *from == NULL)
1301	return 0;
1302
1303    src = (unsigned char const *) *from;
1304    srcend = src + *from_left;
1305    dst = (wchar_t *) *to;
1306    dstend = dst + *to_left;
1307
1308    while (src < srcend && dst < dstend)
1309	*dst++ = (wchar_t) *src++;
1310
1311    *from = (XPointer) src;
1312    *from_left = srcend - src;
1313    *to = (XPointer) dst;
1314    *to_left = dstend - dst;
1315
1316    return 0;
1317}
1318
1319static XlcConvMethodsRec methods_strtowcs = {
1320    close_converter,
1321    our_strtowcs,
1322    NULL
1323};
1324
1325static XlcConv
1326open_strtowcs(
1327    XLCd from_lcd,
1328    const char *from_type,
1329    XLCd to_lcd,
1330    const char *to_type)
1331{
1332    return create_conv(from_lcd, &methods_strtowcs);
1333}
1334
1335/* from XlcNWideChar to XlcNString */
1336
1337static int
1338our_wcstostr(
1339    XlcConv conv,
1340    XPointer *from,
1341    int *from_left,
1342    XPointer *to,
1343    int *to_left,
1344    XPointer *args,
1345    int num_args)
1346{
1347    wchar_t const *src;
1348    wchar_t const *srcend;
1349    unsigned char *dst;
1350    unsigned char *dstend;
1351    int unconv_num;
1352
1353    if (from == NULL || *from == NULL)
1354	return 0;
1355
1356    src = (wchar_t const *) *from;
1357    srcend = src + *from_left;
1358    dst = (unsigned char *) *to;
1359    dstend = dst + *to_left;
1360    unconv_num = 0;
1361
1362    while (src < srcend && dst < dstend) {
1363	unsigned int wc = *src++;
1364	if (wc < 0x80)
1365	    *dst = wc;
1366	else {
1367	    *dst = BAD_CHAR;
1368	    unconv_num++;
1369	}
1370	dst++;
1371    }
1372
1373    *from = (XPointer) src;
1374    *from_left = srcend - src;
1375    *to = (XPointer) dst;
1376    *to_left = dstend - dst;
1377
1378    return unconv_num;
1379}
1380
1381static XlcConvMethodsRec methods_wcstostr = {
1382    close_converter,
1383    our_wcstostr,
1384    NULL
1385};
1386
1387static XlcConv
1388open_wcstostr(
1389    XLCd from_lcd,
1390    const char *from_type,
1391    XLCd to_lcd,
1392    const char *to_type)
1393{
1394    return create_conv(from_lcd, &methods_wcstostr);
1395}
1396
1397/* from XlcNCharSet to XlcNWideChar */
1398
1399static int
1400cstowcs(
1401    XlcConv conv,
1402    XPointer *from,
1403    int *from_left,
1404    XPointer *to,
1405    int *to_left,
1406    XPointer *args,
1407    int num_args)
1408{
1409    XlcCharSet charset;
1410    const char *name;
1411    Utf8Conv convptr;
1412    int i;
1413    unsigned char const *src;
1414    unsigned char const *srcend;
1415    wchar_t *dst;
1416    wchar_t *dstend;
1417    int unconv_num;
1418
1419    if (from == NULL || *from == NULL)
1420	return 0;
1421
1422    if (num_args < 1)
1423	return -1;
1424
1425    charset = (XlcCharSet) args[0];
1426    name = charset->encoding_name;
1427    /* not charset->name because the latter has a ":GL"/":GR" suffix */
1428
1429    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
1430	if (!strcmp(convptr->name, name))
1431	    break;
1432    if (i == 0)
1433	return -1;
1434
1435    src = (unsigned char const *) *from;
1436    srcend = src + *from_left;
1437    dst = (wchar_t *) *to;
1438    dstend = dst + *to_left;
1439    unconv_num = 0;
1440
1441    while (src < srcend && dst < dstend) {
1442	unsigned int wc;
1443	int consumed;
1444
1445	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
1446	if (consumed == RET_ILSEQ)
1447	    return -1;
1448	if (consumed == RET_TOOFEW(0))
1449	    break;
1450
1451	*dst++ = wc;
1452	src += consumed;
1453    }
1454
1455    *from = (XPointer) src;
1456    *from_left = srcend - src;
1457    *to = (XPointer) dst;
1458    *to_left = dstend - dst;
1459
1460    return unconv_num;
1461}
1462
1463static XlcConvMethodsRec methods_cstowcs = {
1464    close_converter,
1465    cstowcs,
1466    NULL
1467};
1468
1469static XlcConv
1470open_cstowcs(
1471    XLCd from_lcd,
1472    const char *from_type,
1473    XLCd to_lcd,
1474    const char *to_type)
1475{
1476    lazy_init_all_charsets();
1477    return create_conv(from_lcd, &methods_cstowcs);
1478}
1479
1480/* from XlcNWideChar to XlcNCharSet */
1481
1482static int
1483wcstocs(
1484    XlcConv conv,
1485    XPointer *from,
1486    int *from_left,
1487    XPointer *to,
1488    int *to_left,
1489    XPointer *args,
1490    int num_args)
1491{
1492    Utf8Conv *preferred_charsets;
1493    XlcCharSet last_charset = NULL;
1494    wchar_t const *src;
1495    wchar_t const *srcend;
1496    unsigned char *dst;
1497    unsigned char *dstend;
1498    int unconv_num;
1499
1500    if (from == NULL || *from == NULL)
1501	return 0;
1502
1503    preferred_charsets = (Utf8Conv *) conv->state;
1504    src = (wchar_t const *) *from;
1505    srcend = src + *from_left;
1506    dst = (unsigned char *) *to;
1507    dstend = dst + *to_left;
1508    unconv_num = 0;
1509
1510    while (src < srcend && dst < dstend) {
1511	Utf8Conv chosen_charset = NULL;
1512	XlcSide chosen_side = XlcNONE;
1513	wchar_t wc = *src;
1514	int count;
1515
1516	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1517	if (count == RET_TOOSMALL)
1518	    break;
1519	if (count == RET_ILSEQ) {
1520	    src++;
1521	    unconv_num++;
1522	    continue;
1523	}
1524
1525	if (last_charset == NULL) {
1526	    last_charset =
1527	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1528	    if (last_charset == NULL) {
1529		src++;
1530		unconv_num++;
1531		continue;
1532	    }
1533	} else {
1534	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1535	          && (last_charset->side == XlcGLGR
1536	              || last_charset->side == chosen_side)))
1537		break;
1538	}
1539	src++;
1540	dst += count;
1541    }
1542
1543    if (last_charset == NULL)
1544	return -1;
1545
1546    *from = (XPointer) src;
1547    *from_left = srcend - src;
1548    *to = (XPointer) dst;
1549    *to_left = dstend - dst;
1550
1551    if (num_args >= 1)
1552	*((XlcCharSet *)args[0]) = last_charset;
1553
1554    return unconv_num;
1555}
1556
1557static XlcConvMethodsRec methods_wcstocs = {
1558    close_tocs_converter,
1559    wcstocs,
1560    NULL
1561};
1562
1563static XlcConv
1564open_wcstocs(
1565    XLCd from_lcd,
1566    const char *from_type,
1567    XLCd to_lcd,
1568    const char *to_type)
1569{
1570    return create_tocs_conv(from_lcd, &methods_wcstocs);
1571}
1572
1573/* from XlcNWideChar to XlcNChar */
1574
1575static int
1576wcstocs1(
1577    XlcConv conv,
1578    XPointer *from,
1579    int *from_left,
1580    XPointer *to,
1581    int *to_left,
1582    XPointer *args,
1583    int num_args)
1584{
1585    Utf8Conv *preferred_charsets;
1586    XlcCharSet last_charset = NULL;
1587    wchar_t const *src;
1588    wchar_t const *srcend;
1589    unsigned char *dst;
1590    unsigned char *dstend;
1591    int unconv_num;
1592
1593    if (from == NULL || *from == NULL)
1594	return 0;
1595
1596    preferred_charsets = (Utf8Conv *) conv->state;
1597    src = (wchar_t const *) *from;
1598    srcend = src + *from_left;
1599    dst = (unsigned char *) *to;
1600    dstend = dst + *to_left;
1601    unconv_num = 0;
1602
1603    while (src < srcend && dst < dstend) {
1604	Utf8Conv chosen_charset = NULL;
1605	XlcSide chosen_side = XlcNONE;
1606	wchar_t wc = *src;
1607	int count;
1608
1609	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1610	if (count == RET_TOOSMALL)
1611	    break;
1612	if (count == RET_ILSEQ) {
1613	    src++;
1614	    unconv_num++;
1615	    continue;
1616	}
1617
1618	if (last_charset == NULL) {
1619	    last_charset =
1620	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1621	    if (last_charset == NULL) {
1622		src++;
1623		unconv_num++;
1624		continue;
1625	    }
1626	} else {
1627	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1628	          && (last_charset->side == XlcGLGR
1629	              || last_charset->side == chosen_side)))
1630		break;
1631	}
1632	src++;
1633	dst += count;
1634	break;
1635    }
1636
1637    if (last_charset == NULL)
1638	return -1;
1639
1640    *from = (XPointer) src;
1641    *from_left = srcend - src;
1642    *to = (XPointer) dst;
1643    *to_left = dstend - dst;
1644
1645    if (num_args >= 1)
1646	*((XlcCharSet *)args[0]) = last_charset;
1647
1648    return unconv_num;
1649}
1650
1651static XlcConvMethodsRec methods_wcstocs1 = {
1652    close_tocs_converter,
1653    wcstocs1,
1654    NULL
1655};
1656
1657static XlcConv
1658open_wcstocs1(
1659    XLCd from_lcd,
1660    const char *from_type,
1661    XLCd to_lcd,
1662    const char *to_type)
1663{
1664    return create_tocs_conv(from_lcd, &methods_wcstocs1);
1665}
1666
1667/* trivial, no conversion */
1668
1669static int
1670identity(
1671    XlcConv conv,
1672    XPointer *from,
1673    int *from_left,
1674    XPointer *to,
1675    int *to_left,
1676    XPointer *args,
1677    int num_args)
1678{
1679    unsigned char const *src;
1680    unsigned char const *srcend;
1681    unsigned char *dst;
1682    unsigned char *dstend;
1683
1684    if (from == NULL || *from == NULL)
1685	return 0;
1686
1687    src = (unsigned char const *) *from;
1688    srcend = src + *from_left;
1689    dst = (unsigned char *) *to;
1690    dstend = dst + *to_left;
1691
1692    while (src < srcend && dst < dstend)
1693	*dst++ = *src++;
1694
1695    *from = (XPointer) src;
1696    *from_left = srcend - src;
1697    *to = (XPointer) dst;
1698    *to_left = dstend - dst;
1699
1700    return 0;
1701}
1702
1703static XlcConvMethodsRec methods_identity = {
1704    close_converter,
1705    identity,
1706    NULL
1707};
1708
1709static XlcConv
1710open_identity(
1711    XLCd from_lcd,
1712    const char *from_type,
1713    XLCd to_lcd,
1714    const char *to_type)
1715{
1716    return create_conv(from_lcd, &methods_identity);
1717}
1718
1719/* from MultiByte/WideChar to FontCharSet. */
1720/* They really use converters to CharSet
1721 * but with different create_conv procedure. */
1722
1723static XlcConv
1724create_tofontcs_conv(
1725    XLCd lcd,
1726    XlcConvMethods methods)
1727{
1728    XlcConv conv;
1729    int i, num, k, count;
1730    char **value, buf[20];
1731    Utf8Conv *preferred;
1732
1733    lazy_init_all_charsets();
1734
1735    for (i = 0, num = 0;; i++) {
1736	sprintf(buf, "fs%d.charset.name", i);
1737	_XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1738	if (count < 1) {
1739	    sprintf(buf, "fs%d.charset", i);
1740	    _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1741	    if (count < 1)
1742		break;
1743	}
1744	num += count;
1745    }
1746
1747    conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + (num + 1) * sizeof(Utf8Conv));
1748    if (conv == (XlcConv) NULL)
1749	return (XlcConv) NULL;
1750    preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
1751
1752    /* Loop through all fontsets mentioned in the locale. */
1753    for (i = 0, num = 0;; i++) {
1754        sprintf(buf, "fs%d.charset.name", i);
1755        _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1756        if (count < 1) {
1757            sprintf(buf, "fs%d.charset", i);
1758            _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1759            if (count < 1)
1760                break;
1761        }
1762	while (count-- > 0) {
1763	    XlcCharSet charset = _XlcGetCharSet(*value++);
1764	    const char *name;
1765
1766	    if (charset == (XlcCharSet) NULL)
1767		continue;
1768
1769	    name = charset->encoding_name;
1770	    /* If it wasn't already encountered... */
1771	    for (k = num - 1; k >= 0; k--)
1772		if (!strcmp(preferred[k]->name, name))
1773		    break;
1774	    if (k < 0) {
1775                /* For fonts "ISO10646-1" means ucs2, not utf8.*/
1776                if (!strcmp("ISO10646-1", name)) {
1777                    preferred[num++] = &all_charsets[ucs2_conv_index];
1778                    continue;
1779                }
1780		/* Look it up in all_charsets[]. */
1781		for (k = 0; k < all_charsets_count-1; k++)
1782		    if (!strcmp(all_charsets[k].name, name)) {
1783			/* Add it to the preferred set. */
1784			preferred[num++] = &all_charsets[k];
1785			break;
1786		    }
1787	    }
1788        }
1789    }
1790    preferred[num] = (Utf8Conv) NULL;
1791
1792    conv->methods = methods;
1793    conv->state = (XPointer) preferred;
1794
1795    return conv;
1796}
1797
1798static XlcConv
1799open_wcstofcs(
1800    XLCd from_lcd,
1801    const char *from_type,
1802    XLCd to_lcd,
1803    const char *to_type)
1804{
1805    return create_tofontcs_conv(from_lcd, &methods_wcstocs);
1806}
1807
1808static XlcConv
1809open_utf8tofcs(
1810    XLCd from_lcd,
1811    const char *from_type,
1812    XLCd to_lcd,
1813    const char *to_type)
1814{
1815    return create_tofontcs_conv(from_lcd, &methods_utf8tocs);
1816}
1817
1818/* ========================== iconv Stuff ================================ */
1819
1820/* from XlcNCharSet to XlcNMultiByte */
1821
1822static int
1823iconv_cstombs(XlcConv conv, XPointer *from, int *from_left,
1824	      XPointer *to, int *to_left, XPointer *args, int num_args)
1825{
1826    XlcCharSet charset;
1827    char *name;
1828    Utf8Conv convptr;
1829    int i;
1830    unsigned char const *src;
1831    unsigned char const *srcend;
1832    unsigned char *dst;
1833    unsigned char *dstend;
1834    int unconv_num;
1835
1836    if (from == NULL || *from == NULL)
1837	return 0;
1838
1839    if (num_args < 1)
1840	return -1;
1841
1842    charset = (XlcCharSet) args[0];
1843    name = charset->encoding_name;
1844    /* not charset->name because the latter has a ":GL"/":GR" suffix */
1845
1846    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
1847	if (!strcmp(convptr->name, name))
1848	    break;
1849    if (i == 0)
1850	return -1;
1851
1852    src = (unsigned char const *) *from;
1853    srcend = src + *from_left;
1854    dst = (unsigned char *) *to;
1855    dstend = dst + *to_left;
1856    unconv_num = 0;
1857
1858    while (src < srcend) {
1859	ucs4_t wc;
1860	int consumed;
1861	int count;
1862
1863	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
1864	if (consumed == RET_ILSEQ)
1865	    return -1;
1866	if (consumed == RET_TOOFEW(0))
1867	    break;
1868
1869    /* Use stdc iconv to convert widechar -> multibyte */
1870
1871	count = wctomb(dst, wc);
1872	if (count == 0)
1873	    break;
1874	if (count == -1) {
1875	    count = wctomb(dst, BAD_WCHAR);
1876	    if (count == 0)
1877		break;
1878	    unconv_num++;
1879	}
1880	src += consumed;
1881	dst += count;
1882    }
1883
1884    *from = (XPointer) src;
1885    *from_left = srcend - src;
1886    *to = (XPointer) dst;
1887    *to_left = dstend - dst;
1888
1889    return unconv_num;
1890
1891}
1892
1893static XlcConvMethodsRec iconv_cstombs_methods = {
1894    close_converter,
1895    iconv_cstombs,
1896    NULL
1897};
1898
1899static XlcConv
1900open_iconv_cstombs(XLCd from_lcd, char *from_type, XLCd to_lcd, char *to_type)
1901{
1902    lazy_init_all_charsets();
1903    return create_conv(from_lcd, &iconv_cstombs_methods);
1904}
1905
1906static int
1907iconv_mbstocs(XlcConv conv, XPointer *from, int *from_left,
1908	      XPointer *to, int *to_left, XPointer *args, int num_args)
1909{
1910    Utf8Conv *preferred_charsets;
1911    XlcCharSet last_charset = NULL;
1912    unsigned char const *src;
1913    unsigned char const *srcend;
1914    unsigned char *dst;
1915    unsigned char *dstend;
1916    int unconv_num;
1917
1918    if (from == NULL || *from == NULL)
1919	return 0;
1920
1921    preferred_charsets = (Utf8Conv *) conv->state;
1922    src = (unsigned char const *) *from;
1923    srcend = src + *from_left;
1924    dst = (unsigned char *) *to;
1925    dstend = dst + *to_left;
1926    unconv_num = 0;
1927
1928    while (src < srcend && dst < dstend) {
1929	Utf8Conv chosen_charset = NULL;
1930	XlcSide chosen_side = XlcNONE;
1931	wchar_t wc;
1932	int consumed;
1933	int count;
1934
1935    /* Uses stdc iconv to convert multibyte -> widechar */
1936
1937	consumed = mbtowc(&wc, src, srcend-src);
1938	if (consumed == 0)
1939	    break;
1940	if (consumed == -1) {
1941	    src++;
1942	    unconv_num++;
1943	    continue;
1944	}
1945
1946	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1947
1948	if (count == RET_TOOSMALL)
1949	    break;
1950	if (count == RET_ILSEQ) {
1951	    src += consumed;
1952	    unconv_num++;
1953	    continue;
1954	}
1955
1956	if (last_charset == NULL) {
1957	    last_charset =
1958	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1959	    if (last_charset == NULL) {
1960		src += consumed;
1961		unconv_num++;
1962		continue;
1963	    }
1964	} else {
1965	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1966	          && (last_charset->side == XlcGLGR
1967	              || last_charset->side == chosen_side)))
1968		break;
1969	}
1970	src += consumed;
1971	dst += count;
1972    }
1973
1974    if (last_charset == NULL)
1975	return -1;
1976
1977    *from = (XPointer) src;
1978    *from_left = srcend - src;
1979    *to = (XPointer) dst;
1980    *to_left = dstend - dst;
1981
1982    if (num_args >= 1)
1983	*((XlcCharSet *)args[0]) = last_charset;
1984
1985    return unconv_num;
1986}
1987
1988static XlcConvMethodsRec iconv_mbstocs_methods = {
1989    close_tocs_converter,
1990    iconv_mbstocs,
1991    NULL
1992};
1993
1994static XlcConv
1995open_iconv_mbstocs(XLCd from_lcd, char *from_type, XLCd to_lcd, char *to_type)
1996{
1997    return create_tocs_conv(from_lcd, &iconv_mbstocs_methods);
1998}
1999
2000/* from XlcNMultiByte to XlcNChar */
2001
2002static int
2003iconv_mbtocs(XlcConv conv, XPointer *from, int *from_left,
2004	     XPointer *to, int *to_left, XPointer *args, int num_args)
2005{
2006    Utf8Conv *preferred_charsets;
2007    XlcCharSet last_charset = NULL;
2008    unsigned char const *src;
2009    unsigned char const *srcend;
2010    unsigned char *dst;
2011    unsigned char *dstend;
2012    int unconv_num;
2013
2014    if (from == NULL || *from == NULL)
2015	return 0;
2016
2017    preferred_charsets = (Utf8Conv *) conv->state;
2018    src = (unsigned char const *) *from;
2019    srcend = src + *from_left;
2020    dst = (unsigned char *) *to;
2021    dstend = dst + *to_left;
2022    unconv_num = 0;
2023
2024    while (src < srcend && dst < dstend) {
2025	Utf8Conv chosen_charset = NULL;
2026	XlcSide chosen_side = XlcNONE;
2027	wchar_t wc;
2028	int consumed;
2029	int count;
2030
2031    /* Uses stdc iconv to convert multibyte -> widechar */
2032
2033	consumed = mbtowc(&wc, src, srcend-src);
2034	if (consumed == 0)
2035	    break;
2036	if (consumed == -1) {
2037	    src++;
2038	    unconv_num++;
2039	    continue;
2040	}
2041
2042	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
2043	if (count == RET_TOOSMALL)
2044	    break;
2045	if (count == RET_ILSEQ) {
2046	    src += consumed;
2047	    unconv_num++;
2048	    continue;
2049	}
2050
2051	if (last_charset == NULL) {
2052	    last_charset =
2053		_XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
2054	    if (last_charset == NULL) {
2055		src += consumed;
2056		unconv_num++;
2057		continue;
2058	    }
2059	} else {
2060	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
2061		  && (last_charset->side == XlcGLGR
2062		      || last_charset->side == chosen_side)))
2063		break;
2064	}
2065	src += consumed;
2066	dst += count;
2067    }
2068
2069    if (last_charset == NULL)
2070	return -1;
2071
2072    *from = (XPointer) src;
2073    *from_left = srcend - src;
2074    *to = (XPointer) dst;
2075    *to_left = dstend - dst;
2076
2077    if (num_args >= 1)
2078	*((XlcCharSet *)args[0]) = last_charset;
2079
2080    return unconv_num;
2081}
2082
2083static XlcConvMethodsRec iconv_mbtocs_methods = {
2084    close_tocs_converter,
2085    iconv_mbtocs,
2086    NULL
2087};
2088
2089static XlcConv
2090open_iconv_mbtocs(XLCd from_lcd, char *from_type, XLCd to_lcd, char *to_type)
2091{
2092    return create_tocs_conv(from_lcd, &iconv_mbtocs_methods );
2093}
2094
2095/* from XlcNMultiByte to XlcNString */
2096
2097static int
2098iconv_mbstostr(XlcConv conv, XPointer *from, int *from_left,
2099	       XPointer *to, int *to_left, XPointer *args, int num_args)
2100{
2101    unsigned char const *src;
2102    unsigned char const *srcend;
2103    unsigned char *dst;
2104    unsigned char *dstend;
2105    int unconv_num;
2106
2107    if (from == NULL || *from == NULL)
2108	return 0;
2109
2110    src = (unsigned char const *) *from;
2111    srcend = src + *from_left;
2112    dst = (unsigned char *) *to;
2113    dstend = dst + *to_left;
2114    unconv_num = 0;
2115
2116    while (src < srcend) {
2117	unsigned char c;
2118	wchar_t wc;
2119	int consumed;
2120
2121    /* Uses stdc iconv to convert multibyte -> widechar */
2122
2123	consumed = mbtowc(&wc, src, srcend-src);
2124	if (consumed == 0)
2125	    break;
2126	if (dst == dstend)
2127	    break;
2128	if (consumed == -1) {
2129	    consumed = 1;
2130	    c = BAD_CHAR;
2131	    unconv_num++;
2132	} else {
2133	    if ((wc & ~(wchar_t)0xff) != 0) {
2134		c = BAD_CHAR;
2135		unconv_num++;
2136	    } else
2137		c = (unsigned char) wc;
2138	}
2139	*dst++ = c;
2140	src += consumed;
2141    }
2142
2143    *from = (XPointer) src;
2144    *from_left = srcend - src;
2145    *to = (XPointer) dst;
2146    *to_left = dstend - dst;
2147
2148    return unconv_num;
2149}
2150
2151static XlcConvMethodsRec iconv_mbstostr_methods = {
2152    close_converter,
2153    iconv_mbstostr,
2154    NULL
2155};
2156
2157static XlcConv
2158open_iconv_mbstostr(XLCd from_lcd, char *from_type, XLCd to_lcd, char *to_type)
2159{
2160    return create_conv(from_lcd, &iconv_mbstostr_methods);
2161}
2162
2163/* from XlcNString to XlcNMultiByte */
2164static int
2165iconv_strtombs(XlcConv conv, XPointer *from, int *from_left,
2166	       XPointer *to, int *to_left, XPointer *args, int num_args)
2167{
2168    unsigned char const *src;
2169    unsigned char const *srcend;
2170    unsigned char *dst;
2171    unsigned char *dstend;
2172
2173    if (from == NULL || *from == NULL)
2174	return 0;
2175
2176    src = (unsigned char const *) *from;
2177    srcend = src + *from_left;
2178    dst = (unsigned char *) *to;
2179    dstend = dst + *to_left;
2180
2181    while (src < srcend) {
2182	int count = wctomb(dst, *src);
2183	if (count < 0)
2184	    break;
2185	dst += count;
2186	src++;
2187    }
2188
2189    *from = (XPointer) src;
2190    *from_left = srcend - src;
2191    *to = (XPointer) dst;
2192    *to_left = dstend - dst;
2193
2194    return 0;
2195}
2196
2197static XlcConvMethodsRec iconv_strtombs_methods= {
2198    close_converter,
2199    iconv_strtombs,
2200    NULL
2201};
2202
2203static XlcConv
2204open_iconv_strtombs(XLCd from_lcd, char *from_type, XLCd to_lcd, char *to_type)
2205{
2206    return create_conv(from_lcd, &iconv_strtombs_methods);
2207}
2208
2209/***************************************************************************/
2210/* Part II: An iconv locale loader.
2211 *
2212 *Here we can assume that "multi-byte" is iconv and that `wchar_t' is Unicode.
2213 */
2214
2215/* from XlcNMultiByte to XlcNWideChar */
2216static int
2217iconv_mbstowcs(XlcConv conv, XPointer *from, int *from_left,
2218	       XPointer *to, int *to_left, XPointer *args,  int num_args)
2219{
2220    char *src = *((char **) from);
2221    wchar_t *dst = *((wchar_t **) to);
2222    int src_left = *from_left;
2223    int dst_left = *to_left;
2224    int length, unconv_num = 0;
2225
2226    while (src_left > 0 && dst_left > 0) {
2227	length = mbtowc(dst, src, src_left);
2228
2229	if (length > 0) {
2230	    src += length;
2231	    src_left -= length;
2232	    if (dst)
2233	        dst++;
2234	    dst_left--;
2235	} else if (length < 0) {
2236	    src++;
2237	    src_left--;
2238	    unconv_num++;
2239        } else {
2240            /* null ? */
2241            src++;
2242            src_left--;
2243            if (dst)
2244                *dst++ = L'\0';
2245            dst_left--;
2246        }
2247    }
2248
2249    *from = (XPointer) src;
2250    if (dst)
2251	*to = (XPointer) dst;
2252    *from_left = src_left;
2253    *to_left = dst_left;
2254
2255    return unconv_num;
2256}
2257
2258static XlcConvMethodsRec iconv_mbstowcs_methods = {
2259    close_converter,
2260    iconv_mbstowcs,
2261    NULL
2262} ;
2263
2264static XlcConv
2265open_iconv_mbstowcs(XLCd from_lcd, char *from_type, XLCd to_lcd, char *to_type)
2266{
2267    return create_conv(from_lcd, &iconv_mbstowcs_methods);
2268}
2269
2270static int
2271iconv_wcstombs(XlcConv conv, XPointer *from, int *from_left,
2272	       XPointer *to, int *to_left, XPointer *args, int num_args)
2273{
2274    wchar_t *src = *((wchar_t **) from);
2275    char *dst = *((char **) to);
2276    int src_left = *from_left;
2277    int dst_left = *to_left;
2278    int length, unconv_num = 0;
2279
2280    while (src_left > 0 && dst_left >= MB_CUR_MAX) {
2281	length = wctomb(dst, *src);		/* XXX */
2282
2283        if (length > 0) {
2284	    src++;
2285	    src_left--;
2286	    if (dst)
2287		dst += length;
2288	    dst_left -= length;
2289	} else if (length < 0) {
2290	    src++;
2291	    src_left--;
2292	    unconv_num++;
2293	}
2294    }
2295
2296    *from = (XPointer) src;
2297    if (dst)
2298      *to = (XPointer) dst;
2299    *from_left = src_left;
2300    *to_left = dst_left;
2301
2302    return unconv_num;
2303}
2304
2305static XlcConvMethodsRec iconv_wcstombs_methods = {
2306    close_converter,
2307    iconv_wcstombs,
2308    NULL
2309} ;
2310
2311static XlcConv
2312open_iconv_wcstombs(XLCd from_lcd, char *from_type, XLCd to_lcd, char *to_type)
2313{
2314    return create_conv(from_lcd, &iconv_wcstombs_methods);
2315}
2316
2317static XlcConv
2318open_iconv_mbstofcs(
2319    XLCd from_lcd,
2320    const char *from_type,
2321    XLCd to_lcd,
2322    const char *to_type)
2323{
2324    return create_tofontcs_conv(from_lcd, &iconv_mbstocs_methods);
2325}
2326
2327/* Registers UTF-8 converters for a UTF-8 locale. */
2328
2329void
2330_XlcAddUtf8LocaleConverters(
2331    XLCd lcd)
2332{
2333    /* Register elementary converters. */
2334
2335    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_utf8towcs);
2336
2337    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_wcstoutf8);
2338    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr);
2339
2340    _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs);
2341
2342    /* Register converters for XlcNCharSet. This implicitly provides
2343     * converters from and to XlcNCompoundText. */
2344
2345    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_cstoutf8);
2346    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_utf8tocs);
2347    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_utf8tocs1);
2348
2349    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
2350    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
2351    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1);
2352
2353    _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_strtoutf8);
2354    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_utf8tostr);
2355    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNMultiByte, open_identity);
2356    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNUtf8String, open_identity);
2357
2358    /* Register converters for XlcNFontCharSet */
2359    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_utf8tofcs);
2360    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
2361}
2362
2363void
2364_XlcAddGB18030LocaleConverters(
2365    XLCd lcd)
2366{
2367
2368    /* Register elementary converters. */
2369    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_iconv_mbstowcs);
2370    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_iconv_wcstombs);
2371
2372    /* Register converters for XlcNCharSet. This implicitly provides
2373     * converters from and to XlcNCompoundText. */
2374
2375    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_iconv_cstombs);
2376    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_iconv_mbstocs);
2377    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_iconv_mbtocs);
2378    _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_iconv_strtombs);
2379    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_iconv_mbstostr);
2380
2381    /* Register converters for XlcNFontCharSet */
2382    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_iconv_mbstofcs);
2383
2384    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr);
2385    _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs);
2386    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
2387    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
2388    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1);
2389
2390    /* Register converters for XlcNFontCharSet */
2391    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
2392}
2393