lcUTF8.c revision 9c019ec5
1b8e80941Smrg/******************************************************************
2b8e80941Smrg
3b8e80941Smrg              Copyright 1993 by SunSoft, Inc.
4b8e80941Smrg              Copyright 1999-2000 by Bruno Haible
5b8e80941Smrg
6b8e80941SmrgPermission to use, copy, modify, distribute, and sell this software
7b8e80941Smrgand its documentation for any purpose is hereby granted without fee,
8b8e80941Smrgprovided that the above copyright notice appear in all copies and
9b8e80941Smrgthat both that copyright notice and this permission notice appear
10b8e80941Smrgin supporting documentation, and that the names of SunSoft, Inc. and
11b8e80941SmrgBruno Haible not be used in advertising or publicity pertaining to
12b8e80941Smrgdistribution of the software without specific, written prior
13b8e80941Smrgpermission.  SunSoft, Inc. and Bruno Haible make no representations
14b8e80941Smrgabout the suitability of this software for any purpose.  It is
15b8e80941Smrgprovided "as is" without express or implied warranty.
16b8e80941Smrg
17b8e80941SmrgSunSoft Inc. AND Bruno Haible DISCLAIM ALL WARRANTIES WITH REGARD
18b8e80941SmrgTO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
19b8e80941SmrgAND FITNESS, IN NO EVENT SHALL SunSoft, Inc. OR Bruno Haible BE LIABLE
20b8e80941SmrgFOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
21b8e80941SmrgWHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
22b8e80941SmrgACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
23b8e80941SmrgOF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24b8e80941Smrg
25b8e80941Smrg******************************************************************/
26b8e80941Smrg
27b8e80941Smrg/*
28b8e80941Smrg * This file contains:
29b8e80941Smrg *
30b8e80941Smrg * I. Conversion routines CompoundText/CharSet <--> Unicode/UTF-8.
31b8e80941Smrg *
32b8e80941Smrg *    Used for three purposes:
33b8e80941Smrg *      1. The UTF-8 locales, see below.
34b8e80941Smrg *      2. Unicode aware applications for which the use of 8-bit character
35b8e80941Smrg *         sets is an anachronism.
36b8e80941Smrg *      3. For conversion from keysym to locale encoding.
37b8e80941Smrg *
38b8e80941Smrg * II. Conversion files for an UTF-8 locale loader.
39b8e80941Smrg *     Supports: all locales with codeset UTF-8.
40b8e80941Smrg *     How: Provides converters for UTF-8.
41b8e80941Smrg *     Platforms: all systems.
42b8e80941Smrg *
43b8e80941Smrg * The loader itself is located in lcUTF8.c.
44b8e80941Smrg */
45b8e80941Smrg
46b8e80941Smrg/*
47b8e80941Smrg * The conversion from UTF-8 to CompoundText is realized in a very
48b8e80941Smrg * conservative way. Recall that CompoundText data is used for inter-client
49b8e80941Smrg * communication purposes. We distinguish three classes of clients:
50b8e80941Smrg * - Clients which accept only those pieces of CompoundText which belong to
51b8e80941Smrg *   the character set understood by the current locale.
52b8e80941Smrg *   (Example: clients which are linked to an older X11 library.)
53b8e80941Smrg * - Clients which accept CompoundText with multiple character sets and parse
54b8e80941Smrg *   it themselves.
55b8e80941Smrg *   (Example: emacs, xemacs.)
56b8e80941Smrg * - Clients which rely entirely on the X{mb,wc}TextPropertyToTextList
57b8e80941Smrg *   functions for the conversion of CompoundText to their current locale's
58b8e80941Smrg *   multi-byte/wide-character format.
59b8e80941Smrg * For best interoperation, the UTF-8 to CompoundText conversion proceeds as
60b8e80941Smrg * follows. For every character, it first tests whether the character is
61b8e80941Smrg * representable in the current locale's original (non-UTF-8) character set.
62b8e80941Smrg * If not, it goes through the list of predefined character sets for
63b8e80941Smrg * CompoundText and tests if the character is representable in that character
64b8e80941Smrg * set. If so, it encodes the character using its code within that character
65b8e80941Smrg * set. If not, it uses an UTF-8-in-CompoundText encapsulation. Since
66b8e80941Smrg * clients of the first and second kind ignore such encapsulated text,
67b8e80941Smrg * this encapsulation is kept to a minimum and terminated as early as possible.
68b8e80941Smrg *
69b8e80941Smrg * In a distant future, when clients of the first and second kind will have
70b8e80941Smrg * disappeared, we will be able to stuff UTF-8 data directly in CompoundText
71b8e80941Smrg * without first going through the list of predefined character sets.
72b8e80941Smrg */
73b8e80941Smrg
74b8e80941Smrg#ifdef HAVE_CONFIG_H
75b8e80941Smrg#include <config.h>
76b8e80941Smrg#endif
77b8e80941Smrg#include <stdio.h>
78b8e80941Smrg#include "Xlibint.h"
79b8e80941Smrg#include "XlcPubI.h"
80b8e80941Smrg#include "XlcGeneric.h"
81b8e80941Smrg
82b8e80941Smrgstatic XlcConv
83b8e80941Smrgcreate_conv(
84b8e80941Smrg    XLCd lcd,
85b8e80941Smrg    XlcConvMethods methods)
86b8e80941Smrg{
87b8e80941Smrg    XlcConv conv;
88b8e80941Smrg
89b8e80941Smrg    conv = Xmalloc(sizeof(XlcConvRec));
90b8e80941Smrg    if (conv == (XlcConv) NULL)
91b8e80941Smrg	return (XlcConv) NULL;
92b8e80941Smrg
93b8e80941Smrg    conv->methods = methods;
94b8e80941Smrg    conv->state = NULL;
95b8e80941Smrg
96b8e80941Smrg    return conv;
97b8e80941Smrg}
98b8e80941Smrg
99b8e80941Smrgstatic void
100b8e80941Smrgclose_converter(
101b8e80941Smrg    XlcConv conv)
102b8e80941Smrg{
103b8e80941Smrg    Xfree(conv);
104b8e80941Smrg}
105b8e80941Smrg
106b8e80941Smrg/* Replacement character for invalid multibyte sequence or wide character. */
107b8e80941Smrg#define BAD_WCHAR ((ucs4_t) 0xfffd)
108b8e80941Smrg#define BAD_CHAR '?'
109b8e80941Smrg
110b8e80941Smrg/***************************************************************************/
111b8e80941Smrg/* Part I: Conversion routines CompoundText/CharSet <--> Unicode/UTF-8.
112b8e80941Smrg *
113b8e80941Smrg * Note that this code works in any locale. We store Unicode values in
114b8e80941Smrg * `ucs4_t' variables, but don't pass them to the user.
115b8e80941Smrg *
116b8e80941Smrg * This code has to support all character sets that are used for CompoundText,
117b8e80941Smrg * nothing more, nothing less. See the table in lcCT.c.
118b8e80941Smrg * Since the conversion _to_ CompoundText is likely to need the tables for all
119b8e80941Smrg * character sets at once, we don't use dynamic loading (of tables or shared
120b8e80941Smrg * libraries through iconv()). Use a fixed set of tables instead.
121b8e80941Smrg *
122b8e80941Smrg * We use statically computed tables, not dynamically allocated arrays,
123b8e80941Smrg * because it's more memory efficient: Different processes using the same
124b8e80941Smrg * libX11 shared library share the "text" and read-only "data" sections.
125b8e80941Smrg */
126b8e80941Smrg
127b8e80941Smrgtypedef unsigned int ucs4_t;
128b8e80941Smrg#define conv_t XlcConv
129b8e80941Smrg
130b8e80941Smrgtypedef struct _Utf8ConvRec {
131b8e80941Smrg    const char *name;
132b8e80941Smrg    XrmQuark xrm_name;
133b8e80941Smrg    int (* cstowc) (XlcConv, ucs4_t *, unsigned char const *, int);
134b8e80941Smrg    int (* wctocs) (XlcConv, unsigned char *, ucs4_t, int);
135b8e80941Smrg} Utf8ConvRec, *Utf8Conv;
136b8e80941Smrg
137b8e80941Smrg/*
138b8e80941Smrg * int xxx_cstowc (XlcConv conv, ucs4_t *pwc, unsigned char const *s, int n)
139b8e80941Smrg * converts the byte sequence starting at s to a wide character. Up to n bytes
140b8e80941Smrg * are available at s. n is >= 1.
141b8e80941Smrg * Result is number of bytes consumed (if a wide character was read),
142b8e80941Smrg * or 0 if invalid, or -1 if n too small.
143b8e80941Smrg *
144b8e80941Smrg * int xxx_wctocs (XlcConv conv, unsigned char *r, ucs4_t wc, int n)
145b8e80941Smrg * converts the wide character wc to the character set xxx, and stores the
146b8e80941Smrg * result beginning at r. Up to n bytes may be written at r. n is >= 1.
147b8e80941Smrg * Result is number of bytes written, or 0 if invalid, or -1 if n too small.
148b8e80941Smrg */
149b8e80941Smrg
150b8e80941Smrg/* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */
151b8e80941Smrg#define RET_ILSEQ      0
152b8e80941Smrg/* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
153b8e80941Smrg#define RET_TOOFEW(n)  (-1-(n))
154b8e80941Smrg/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
155b8e80941Smrg#define RET_TOOSMALL   -1
156b8e80941Smrg
157b8e80941Smrg/*
158b8e80941Smrg * The tables below are bijective. It would be possible to extend the
159b8e80941Smrg * xxx_wctocs tables to do some transliteration (e.g. U+201C,U+201D -> 0x22)
160b8e80941Smrg * but *only* with characters not contained in any other table, and *only*
161b8e80941Smrg * when the current locale is not an UTF-8 locale.
162b8e80941Smrg */
163b8e80941Smrg
164b8e80941Smrg#include "lcUniConv/utf8.h"
165b8e80941Smrg#include "lcUniConv/ucs2be.h"
166b8e80941Smrg#ifdef notused
167b8e80941Smrg#include "lcUniConv/ascii.h"
168b8e80941Smrg#endif
169b8e80941Smrg#include "lcUniConv/iso8859_1.h"
170b8e80941Smrg#include "lcUniConv/iso8859_2.h"
171b8e80941Smrg#include "lcUniConv/iso8859_3.h"
172b8e80941Smrg#include "lcUniConv/iso8859_4.h"
173b8e80941Smrg#include "lcUniConv/iso8859_5.h"
174b8e80941Smrg#include "lcUniConv/iso8859_6.h"
175b8e80941Smrg#include "lcUniConv/iso8859_7.h"
176b8e80941Smrg#include "lcUniConv/iso8859_8.h"
177b8e80941Smrg#include "lcUniConv/iso8859_9.h"
178b8e80941Smrg#include "lcUniConv/iso8859_10.h"
179b8e80941Smrg#include "lcUniConv/iso8859_11.h"
180b8e80941Smrg#include "lcUniConv/iso8859_13.h"
181b8e80941Smrg#include "lcUniConv/iso8859_14.h"
182b8e80941Smrg#include "lcUniConv/iso8859_15.h"
183b8e80941Smrg#include "lcUniConv/iso8859_16.h"
184b8e80941Smrg#include "lcUniConv/iso8859_9e.h"
185b8e80941Smrg#include "lcUniConv/jisx0201.h"
186b8e80941Smrg#include "lcUniConv/tis620.h"
187b8e80941Smrg#include "lcUniConv/koi8_r.h"
188b8e80941Smrg#include "lcUniConv/koi8_u.h"
189b8e80941Smrg#include "lcUniConv/koi8_c.h"
190b8e80941Smrg#include "lcUniConv/armscii_8.h"
191b8e80941Smrg#include "lcUniConv/cp1133.h"
192b8e80941Smrg#include "lcUniConv/mulelao.h"
193b8e80941Smrg#include "lcUniConv/viscii.h"
194b8e80941Smrg#include "lcUniConv/tcvn.h"
195b8e80941Smrg#include "lcUniConv/georgian_academy.h"
196b8e80941Smrg#include "lcUniConv/georgian_ps.h"
197b8e80941Smrg#include "lcUniConv/cp1251.h"
198b8e80941Smrg#include "lcUniConv/cp1255.h"
199b8e80941Smrg#include "lcUniConv/cp1256.h"
200b8e80941Smrg#include "lcUniConv/tatar_cyr.h"
201b8e80941Smrg
202b8e80941Smrgtypedef struct {
203b8e80941Smrg    unsigned short indx; /* index into big table */
204b8e80941Smrg    unsigned short used; /* bitmask of used entries */
205b8e80941Smrg} Summary16;
206b8e80941Smrg
207b8e80941Smrg#include "lcUniConv/gb2312.h"
208b8e80941Smrg#include "lcUniConv/jisx0208.h"
209b8e80941Smrg#include "lcUniConv/jisx0212.h"
210b8e80941Smrg#include "lcUniConv/ksc5601.h"
211b8e80941Smrg#include "lcUniConv/big5.h"
212b8e80941Smrg#include "lcUniConv/big5_emacs.h"
213b8e80941Smrg#include "lcUniConv/big5hkscs.h"
214b8e80941Smrg#include "lcUniConv/gbk.h"
215b8e80941Smrg
216b8e80941Smrgstatic Utf8ConvRec all_charsets[] = {
217b8e80941Smrg    /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
218b8e80941Smrg       (for lookup speed), once at the end (as a fallback).  */
219b8e80941Smrg    { "ISO10646-1", NULLQUARK,
220b8e80941Smrg	utf8_mbtowc, utf8_wctomb
221b8e80941Smrg    },
222b8e80941Smrg
223b8e80941Smrg    { "ISO8859-1", NULLQUARK,
224b8e80941Smrg	iso8859_1_mbtowc, iso8859_1_wctomb
225b8e80941Smrg    },
226b8e80941Smrg    { "ISO8859-2", NULLQUARK,
227b8e80941Smrg	iso8859_2_mbtowc, iso8859_2_wctomb
228b8e80941Smrg    },
229b8e80941Smrg    { "ISO8859-3", NULLQUARK,
230b8e80941Smrg	iso8859_3_mbtowc, iso8859_3_wctomb
231b8e80941Smrg    },
232b8e80941Smrg    { "ISO8859-4", NULLQUARK,
233b8e80941Smrg	iso8859_4_mbtowc, iso8859_4_wctomb
234b8e80941Smrg    },
235b8e80941Smrg    { "ISO8859-5", NULLQUARK,
236b8e80941Smrg	iso8859_5_mbtowc, iso8859_5_wctomb
237b8e80941Smrg    },
238b8e80941Smrg    { "ISO8859-6", NULLQUARK,
239b8e80941Smrg	iso8859_6_mbtowc, iso8859_6_wctomb
240b8e80941Smrg    },
241b8e80941Smrg    { "ISO8859-7", NULLQUARK,
242b8e80941Smrg	iso8859_7_mbtowc, iso8859_7_wctomb
243b8e80941Smrg    },
244b8e80941Smrg    { "ISO8859-8", NULLQUARK,
245b8e80941Smrg	iso8859_8_mbtowc, iso8859_8_wctomb
246b8e80941Smrg    },
247b8e80941Smrg    { "ISO8859-9", NULLQUARK,
248b8e80941Smrg	iso8859_9_mbtowc, iso8859_9_wctomb
249b8e80941Smrg    },
250b8e80941Smrg    { "ISO8859-10", NULLQUARK,
251b8e80941Smrg	iso8859_10_mbtowc, iso8859_10_wctomb
252b8e80941Smrg    },
253b8e80941Smrg    { "ISO8859-11", NULLQUARK,
254b8e80941Smrg	iso8859_11_mbtowc, iso8859_11_wctomb
255b8e80941Smrg    },
256b8e80941Smrg    { "ISO8859-13", NULLQUARK,
257b8e80941Smrg	iso8859_13_mbtowc, iso8859_13_wctomb
258b8e80941Smrg    },
259b8e80941Smrg    { "ISO8859-14", NULLQUARK,
260b8e80941Smrg	iso8859_14_mbtowc, iso8859_14_wctomb
261b8e80941Smrg    },
262b8e80941Smrg    { "ISO8859-15", NULLQUARK,
263b8e80941Smrg	iso8859_15_mbtowc, iso8859_15_wctomb
264b8e80941Smrg    },
265b8e80941Smrg    { "ISO8859-16", NULLQUARK,
266b8e80941Smrg	iso8859_16_mbtowc, iso8859_16_wctomb
267b8e80941Smrg    },
268b8e80941Smrg    { "JISX0201.1976-0", NULLQUARK,
269b8e80941Smrg	jisx0201_mbtowc, jisx0201_wctomb
270b8e80941Smrg    },
271b8e80941Smrg    { "TIS620-0", NULLQUARK,
272b8e80941Smrg	tis620_mbtowc, tis620_wctomb
273b8e80941Smrg    },
274b8e80941Smrg    { "GB2312.1980-0", NULLQUARK,
275b8e80941Smrg	gb2312_mbtowc, gb2312_wctomb
276b8e80941Smrg    },
277b8e80941Smrg    { "JISX0208.1983-0", NULLQUARK,
278b8e80941Smrg	jisx0208_mbtowc, jisx0208_wctomb
279b8e80941Smrg    },
280b8e80941Smrg    { "JISX0208.1990-0", NULLQUARK,
281b8e80941Smrg	jisx0208_mbtowc, jisx0208_wctomb
282b8e80941Smrg    },
283b8e80941Smrg    { "JISX0212.1990-0", NULLQUARK,
284b8e80941Smrg	jisx0212_mbtowc, jisx0212_wctomb
285b8e80941Smrg    },
286b8e80941Smrg    { "KSC5601.1987-0", NULLQUARK,
287b8e80941Smrg	ksc5601_mbtowc, ksc5601_wctomb
288b8e80941Smrg    },
289b8e80941Smrg    { "KOI8-R", NULLQUARK,
290b8e80941Smrg	koi8_r_mbtowc, koi8_r_wctomb
291b8e80941Smrg    },
292b8e80941Smrg    { "KOI8-U", NULLQUARK,
293b8e80941Smrg	koi8_u_mbtowc, koi8_u_wctomb
294b8e80941Smrg    },
295b8e80941Smrg    { "KOI8-C", NULLQUARK,
296b8e80941Smrg	koi8_c_mbtowc, koi8_c_wctomb
297b8e80941Smrg    },
298b8e80941Smrg    { "TATAR-CYR", NULLQUARK,
299b8e80941Smrg	tatar_cyr_mbtowc, tatar_cyr_wctomb
300b8e80941Smrg    },
301b8e80941Smrg    { "ARMSCII-8", NULLQUARK,
302b8e80941Smrg	armscii_8_mbtowc, armscii_8_wctomb
303b8e80941Smrg    },
304b8e80941Smrg    { "IBM-CP1133", NULLQUARK,
305b8e80941Smrg	cp1133_mbtowc, cp1133_wctomb
306b8e80941Smrg    },
307b8e80941Smrg    { "MULELAO-1", NULLQUARK,
308b8e80941Smrg	mulelao_mbtowc, mulelao_wctomb
309b8e80941Smrg    },
310b8e80941Smrg    { "VISCII1.1-1", NULLQUARK,
311b8e80941Smrg	viscii_mbtowc, viscii_wctomb
312b8e80941Smrg    },
313b8e80941Smrg    { "TCVN-5712", NULLQUARK,
314b8e80941Smrg	tcvn_mbtowc, tcvn_wctomb
315b8e80941Smrg    },
316b8e80941Smrg    { "GEORGIAN-ACADEMY", NULLQUARK,
317b8e80941Smrg	georgian_academy_mbtowc, georgian_academy_wctomb
318b8e80941Smrg    },
319b8e80941Smrg    { "GEORGIAN-PS", NULLQUARK,
320b8e80941Smrg	georgian_ps_mbtowc, georgian_ps_wctomb
321b8e80941Smrg    },
322b8e80941Smrg    { "ISO8859-9E", NULLQUARK,
323b8e80941Smrg	iso8859_9e_mbtowc, iso8859_9e_wctomb
324b8e80941Smrg    },
325b8e80941Smrg    { "MICROSOFT-CP1251", NULLQUARK,
326b8e80941Smrg	cp1251_mbtowc, cp1251_wctomb
327b8e80941Smrg    },
328b8e80941Smrg    { "MICROSOFT-CP1255", NULLQUARK,
329b8e80941Smrg	cp1255_mbtowc, cp1255_wctomb
330b8e80941Smrg    },
331b8e80941Smrg    { "MICROSOFT-CP1256", NULLQUARK,
332b8e80941Smrg	cp1256_mbtowc, cp1256_wctomb
333b8e80941Smrg    },
334b8e80941Smrg    { "BIG5-0", NULLQUARK,
335b8e80941Smrg	big5_mbtowc, big5_wctomb
336b8e80941Smrg    },
337b8e80941Smrg    { "BIG5-E0", NULLQUARK,
338b8e80941Smrg	big5_0_mbtowc, big5_0_wctomb
339b8e80941Smrg    },
340b8e80941Smrg    { "BIG5-E1", NULLQUARK,
341b8e80941Smrg	big5_1_mbtowc, big5_1_wctomb
342b8e80941Smrg    },
343b8e80941Smrg    { "GBK-0", NULLQUARK,
344b8e80941Smrg	gbk_mbtowc, gbk_wctomb
345b8e80941Smrg    },
346b8e80941Smrg    { "BIG5HKSCS-0", NULLQUARK,
347b8e80941Smrg	big5hkscs_mbtowc, big5hkscs_wctomb
348b8e80941Smrg    },
349b8e80941Smrg
350b8e80941Smrg    /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
351b8e80941Smrg       (for lookup speed), once at the end (as a fallback).  */
352b8e80941Smrg    { "ISO10646-1", NULLQUARK,
353b8e80941Smrg	utf8_mbtowc, utf8_wctomb
354b8e80941Smrg    },
355b8e80941Smrg
356b8e80941Smrg    /* Encoding ISO10646-1 for fonts means UCS2-like encoding
357b8e80941Smrg       so for conversion to FontCharSet we need this record */
358b8e80941Smrg    { "ISO10646-1", NULLQUARK,
359b8e80941Smrg	ucs2be_mbtowc, ucs2be_wctomb
360b8e80941Smrg    }
361b8e80941Smrg};
362b8e80941Smrg
363b8e80941Smrg#define charsets_table_size (sizeof(all_charsets)/sizeof(all_charsets[0]))
364b8e80941Smrg#define all_charsets_count  (charsets_table_size - 1)
365b8e80941Smrg#define ucs2_conv_index     (charsets_table_size - 1)
366b8e80941Smrg
367b8e80941Smrgstatic void
368b8e80941Smrginit_all_charsets (void)
369b8e80941Smrg{
370b8e80941Smrg    Utf8Conv convptr;
371b8e80941Smrg    int i;
372b8e80941Smrg
373b8e80941Smrg    for (convptr = all_charsets, i = charsets_table_size; i > 0; convptr++, i--)
374b8e80941Smrg	convptr->xrm_name = XrmStringToQuark(convptr->name);
375b8e80941Smrg}
376b8e80941Smrg
377b8e80941Smrg#define lazy_init_all_charsets()					\
378b8e80941Smrg    do {								\
379b8e80941Smrg	if (all_charsets[0].xrm_name == NULLQUARK)			\
380b8e80941Smrg	    init_all_charsets();					\
381b8e80941Smrg    } while (0)
382b8e80941Smrg
383b8e80941Smrg/* from XlcNCharSet to XlcNUtf8String */
384b8e80941Smrg
385b8e80941Smrgstatic int
386b8e80941Smrgcstoutf8(
387b8e80941Smrg    XlcConv conv,
388b8e80941Smrg    XPointer *from,
389b8e80941Smrg    int *from_left,
390b8e80941Smrg    XPointer *to,
391b8e80941Smrg    int *to_left,
392b8e80941Smrg    XPointer *args,
393b8e80941Smrg    int num_args)
394b8e80941Smrg{
395b8e80941Smrg    XlcCharSet charset;
396b8e80941Smrg    const char *name;
397b8e80941Smrg    Utf8Conv convptr;
398b8e80941Smrg    int i;
399b8e80941Smrg    unsigned char const *src;
400b8e80941Smrg    unsigned char const *srcend;
401b8e80941Smrg    unsigned char *dst;
402b8e80941Smrg    unsigned char *dstend;
403b8e80941Smrg    int unconv_num;
404b8e80941Smrg
405b8e80941Smrg    if (from == NULL || *from == NULL)
406b8e80941Smrg	return 0;
407b8e80941Smrg
408b8e80941Smrg    if (num_args < 1)
409b8e80941Smrg	return -1;
410b8e80941Smrg
411b8e80941Smrg    charset = (XlcCharSet) args[0];
412b8e80941Smrg    name = charset->encoding_name;
413b8e80941Smrg    /* not charset->name because the latter has a ":GL"/":GR" suffix */
414b8e80941Smrg
415b8e80941Smrg    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
416b8e80941Smrg	if (!strcmp(convptr->name, name))
417b8e80941Smrg	    break;
418b8e80941Smrg    if (i == 0)
419b8e80941Smrg	return -1;
420b8e80941Smrg
421b8e80941Smrg    src = (unsigned char const *) *from;
422b8e80941Smrg    srcend = src + *from_left;
423b8e80941Smrg    dst = (unsigned char *) *to;
424b8e80941Smrg    dstend = dst + *to_left;
425b8e80941Smrg    unconv_num = 0;
426b8e80941Smrg
427b8e80941Smrg    while (src < srcend) {
428b8e80941Smrg	ucs4_t wc;
429b8e80941Smrg	int consumed;
430b8e80941Smrg	int count;
431b8e80941Smrg
432b8e80941Smrg	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
433b8e80941Smrg	if (consumed == RET_ILSEQ)
434b8e80941Smrg	    return -1;
435b8e80941Smrg	if (consumed == RET_TOOFEW(0))
436b8e80941Smrg	    break;
437b8e80941Smrg
438b8e80941Smrg	count = utf8_wctomb(NULL, dst, wc, dstend-dst);
439b8e80941Smrg	if (count == RET_TOOSMALL)
440b8e80941Smrg	    break;
441b8e80941Smrg	if (count == RET_ILSEQ) {
442	    count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
443	    if (count == RET_TOOSMALL)
444		break;
445	    unconv_num++;
446	}
447	src += consumed;
448	dst += count;
449    }
450
451    *from = (XPointer) src;
452    *from_left = srcend - src;
453    *to = (XPointer) dst;
454    *to_left = dstend - dst;
455
456    return unconv_num;
457}
458
459static XlcConvMethodsRec methods_cstoutf8 = {
460    close_converter,
461    cstoutf8,
462    NULL
463};
464
465static XlcConv
466open_cstoutf8(
467    XLCd from_lcd,
468    const char *from_type,
469    XLCd to_lcd,
470    const char *to_type)
471{
472    lazy_init_all_charsets();
473    return create_conv(from_lcd, &methods_cstoutf8);
474}
475
476/* from XlcNUtf8String to XlcNCharSet */
477
478static XlcConv
479create_tocs_conv(
480    XLCd lcd,
481    XlcConvMethods methods)
482{
483    XlcConv conv;
484    CodeSet *codeset_list;
485    int codeset_num;
486    int charset_num;
487    int i, j, k;
488    Utf8Conv *preferred;
489
490    lazy_init_all_charsets();
491
492    codeset_list = XLC_GENERIC(lcd, codeset_list);
493    codeset_num = XLC_GENERIC(lcd, codeset_num);
494
495    charset_num = 0;
496    for (i = 0; i < codeset_num; i++)
497	charset_num += codeset_list[i]->num_charsets;
498    if (charset_num > all_charsets_count-1)
499	charset_num = all_charsets_count-1;
500
501    conv = Xmalloc(sizeof(XlcConvRec)
502			     + (charset_num + 1) * sizeof(Utf8Conv));
503    if (conv == (XlcConv) NULL)
504	return (XlcConv) NULL;
505    preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
506
507    /* Loop through all codesets mentioned in the locale. */
508    charset_num = 0;
509    for (i = 0; i < codeset_num; i++) {
510	XlcCharSet *charsets = codeset_list[i]->charset_list;
511	int num_charsets = codeset_list[i]->num_charsets;
512	for (j = 0; j < num_charsets; j++) {
513	    const char *name = charsets[j]->encoding_name;
514	    /* If it wasn't already encountered... */
515	    for (k = charset_num-1; k >= 0; k--)
516		if (!strcmp(preferred[k]->name, name))
517		    break;
518	    if (k < 0) {
519		/* Look it up in all_charsets[]. */
520		for (k = 0; k < all_charsets_count-1; k++)
521		    if (!strcmp(all_charsets[k].name, name)) {
522			/* Add it to the preferred set. */
523			preferred[charset_num++] = &all_charsets[k];
524			break;
525		    }
526	    }
527	}
528    }
529    preferred[charset_num] = (Utf8Conv) NULL;
530
531    conv->methods = methods;
532    conv->state = (XPointer) preferred;
533
534    return conv;
535}
536
537static void
538close_tocs_converter(
539    XlcConv conv)
540{
541    /* conv->state is allocated together with conv, free both at once.  */
542    Xfree(conv);
543}
544
545/*
546 * Converts a Unicode character to an appropriate character set. The NULL
547 * terminated array of preferred character sets is passed as first argument.
548 * If successful, *charsetp is set to the character set that was used, and
549 * *sidep is set to the character set side (XlcGL or XlcGR).
550 */
551static int
552charset_wctocs(
553    Utf8Conv *preferred,
554    Utf8Conv *charsetp,
555    XlcSide *sidep,
556    XlcConv conv,
557    unsigned char *r,
558    ucs4_t wc,
559    int n)
560{
561    int count;
562    Utf8Conv convptr;
563    int i;
564
565    for (; *preferred != (Utf8Conv) NULL; preferred++) {
566	convptr = *preferred;
567	count = convptr->wctocs(conv, r, wc, n);
568	if (count == RET_TOOSMALL)
569	    return RET_TOOSMALL;
570	if (count != RET_ILSEQ) {
571	    *charsetp = convptr;
572	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
573	    return count;
574	}
575    }
576    for (convptr = all_charsets+1, i = all_charsets_count-1; i > 0; convptr++, i--) {
577	count = convptr->wctocs(conv, r, wc, n);
578	if (count == RET_TOOSMALL)
579	    return RET_TOOSMALL;
580	if (count != RET_ILSEQ) {
581	    *charsetp = convptr;
582	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
583	    return count;
584	}
585    }
586    return RET_ILSEQ;
587}
588
589static int
590utf8tocs(
591    XlcConv conv,
592    XPointer *from,
593    int *from_left,
594    XPointer *to,
595    int *to_left,
596    XPointer *args,
597    int num_args)
598{
599    Utf8Conv *preferred_charsets;
600    XlcCharSet last_charset = NULL;
601    unsigned char const *src;
602    unsigned char const *srcend;
603    unsigned char *dst;
604    unsigned char *dstend;
605    int unconv_num;
606
607    if (from == NULL || *from == NULL)
608	return 0;
609
610    preferred_charsets = (Utf8Conv *) conv->state;
611    src = (unsigned char const *) *from;
612    srcend = src + *from_left;
613    dst = (unsigned char *) *to;
614    dstend = dst + *to_left;
615    unconv_num = 0;
616
617    while (src < srcend && dst < dstend) {
618	Utf8Conv chosen_charset = NULL;
619	XlcSide chosen_side = XlcNONE;
620	ucs4_t wc;
621	int consumed;
622	int count;
623
624	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
625	if (consumed == RET_TOOFEW(0))
626	    break;
627	if (consumed == RET_ILSEQ) {
628	    src++;
629	    unconv_num++;
630	    continue;
631	}
632
633	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
634	if (count == RET_TOOSMALL)
635	    break;
636	if (count == RET_ILSEQ) {
637	    src += consumed;
638	    unconv_num++;
639	    continue;
640	}
641
642	if (last_charset == NULL) {
643	    last_charset =
644	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
645	    if (last_charset == NULL) {
646		src += consumed;
647		unconv_num++;
648		continue;
649	    }
650	} else {
651	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
652	          && (last_charset->side == XlcGLGR
653	              || last_charset->side == chosen_side)))
654		break;
655	}
656	src += consumed;
657	dst += count;
658    }
659
660    if (last_charset == NULL)
661	return -1;
662
663    *from = (XPointer) src;
664    *from_left = srcend - src;
665    *to = (XPointer) dst;
666    *to_left = dstend - dst;
667
668    if (num_args >= 1)
669	*((XlcCharSet *)args[0]) = last_charset;
670
671    return unconv_num;
672}
673
674static XlcConvMethodsRec methods_utf8tocs = {
675    close_tocs_converter,
676    utf8tocs,
677    NULL
678};
679
680static XlcConv
681open_utf8tocs(
682    XLCd from_lcd,
683    const char *from_type,
684    XLCd to_lcd,
685    const char *to_type)
686{
687    return create_tocs_conv(from_lcd, &methods_utf8tocs);
688}
689
690/* from XlcNUtf8String to XlcNChar */
691
692static int
693utf8tocs1(
694    XlcConv conv,
695    XPointer *from,
696    int *from_left,
697    XPointer *to,
698    int *to_left,
699    XPointer *args,
700    int num_args)
701{
702    Utf8Conv *preferred_charsets;
703    XlcCharSet last_charset = NULL;
704    unsigned char const *src;
705    unsigned char const *srcend;
706    unsigned char *dst;
707    unsigned char *dstend;
708    int unconv_num;
709
710    if (from == NULL || *from == NULL)
711	return 0;
712
713    preferred_charsets = (Utf8Conv *) conv->state;
714    src = (unsigned char const *) *from;
715    srcend = src + *from_left;
716    dst = (unsigned char *) *to;
717    dstend = dst + *to_left;
718    unconv_num = 0;
719
720    while (src < srcend && dst < dstend) {
721	Utf8Conv chosen_charset = NULL;
722	XlcSide chosen_side = XlcNONE;
723	ucs4_t wc;
724	int consumed;
725	int count;
726
727	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
728	if (consumed == RET_TOOFEW(0))
729	    break;
730	if (consumed == RET_ILSEQ) {
731	    src++;
732	    unconv_num++;
733	    continue;
734	}
735
736	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
737	if (count == RET_TOOSMALL)
738	    break;
739	if (count == RET_ILSEQ) {
740	    src += consumed;
741	    unconv_num++;
742	    continue;
743	}
744
745	if (last_charset == NULL) {
746	    last_charset =
747	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
748	    if (last_charset == NULL) {
749		src += consumed;
750		unconv_num++;
751		continue;
752	    }
753	} else {
754	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
755	          && (last_charset->side == XlcGLGR
756	              || last_charset->side == chosen_side)))
757		break;
758	}
759	src += consumed;
760	dst += count;
761	break;
762    }
763
764    if (last_charset == NULL)
765	return -1;
766
767    *from = (XPointer) src;
768    *from_left = srcend - src;
769    *to = (XPointer) dst;
770    *to_left = dstend - dst;
771
772    if (num_args >= 1)
773	*((XlcCharSet *)args[0]) = last_charset;
774
775    return unconv_num;
776}
777
778static XlcConvMethodsRec methods_utf8tocs1 = {
779    close_tocs_converter,
780    utf8tocs1,
781    NULL
782};
783
784static XlcConv
785open_utf8tocs1(
786    XLCd from_lcd,
787    const char *from_type,
788    XLCd to_lcd,
789    const char *to_type)
790{
791    return create_tocs_conv(from_lcd, &methods_utf8tocs1);
792}
793
794/* from XlcNUtf8String to XlcNString */
795
796static int
797utf8tostr(
798    XlcConv conv,
799    XPointer *from,
800    int *from_left,
801    XPointer *to,
802    int *to_left,
803    XPointer *args,
804    int num_args)
805{
806    unsigned char const *src;
807    unsigned char const *srcend;
808    unsigned char *dst;
809    unsigned char *dstend;
810    int unconv_num;
811
812    if (from == NULL || *from == NULL)
813	return 0;
814
815    src = (unsigned char const *) *from;
816    srcend = src + *from_left;
817    dst = (unsigned char *) *to;
818    dstend = dst + *to_left;
819    unconv_num = 0;
820
821    while (src < srcend) {
822	unsigned char c;
823	ucs4_t wc;
824	int consumed;
825
826	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
827	if (consumed == RET_TOOFEW(0))
828	    break;
829	if (dst == dstend)
830	    break;
831	if (consumed == RET_ILSEQ) {
832	    consumed = 1;
833	    c = BAD_CHAR;
834	    unconv_num++;
835	} else {
836	    if ((wc & ~(ucs4_t)0xff) != 0) {
837		c = BAD_CHAR;
838		unconv_num++;
839	    } else
840		c = (unsigned char) wc;
841	}
842	*dst++ = c;
843	src += consumed;
844    }
845
846    *from = (XPointer) src;
847    *from_left = srcend - src;
848    *to = (XPointer) dst;
849    *to_left = dstend - dst;
850
851    return unconv_num;
852}
853
854static XlcConvMethodsRec methods_utf8tostr = {
855    close_converter,
856    utf8tostr,
857    NULL
858};
859
860static XlcConv
861open_utf8tostr(
862    XLCd from_lcd,
863    const char *from_type,
864    XLCd to_lcd,
865    const char *to_type)
866{
867    return create_conv(from_lcd, &methods_utf8tostr);
868}
869
870/* from XlcNString to XlcNUtf8String */
871
872static int
873strtoutf8(
874    XlcConv conv,
875    XPointer *from,
876    int *from_left,
877    XPointer *to,
878    int *to_left,
879    XPointer *args,
880    int num_args)
881{
882    unsigned char const *src;
883    unsigned char const *srcend;
884    unsigned char *dst;
885    unsigned char *dstend;
886
887    if (from == NULL || *from == NULL)
888	return 0;
889
890    src = (unsigned char const *) *from;
891    srcend = src + *from_left;
892    dst = (unsigned char *) *to;
893    dstend = dst + *to_left;
894
895    while (src < srcend) {
896	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
897	if (count == RET_TOOSMALL)
898	    break;
899	dst += count;
900	src++;
901    }
902
903    *from = (XPointer) src;
904    *from_left = srcend - src;
905    *to = (XPointer) dst;
906    *to_left = dstend - dst;
907
908    return 0;
909}
910
911static XlcConvMethodsRec methods_strtoutf8 = {
912    close_converter,
913    strtoutf8,
914    NULL
915};
916
917static XlcConv
918open_strtoutf8(
919    XLCd from_lcd,
920    const char *from_type,
921    XLCd to_lcd,
922    const char *to_type)
923{
924    return create_conv(from_lcd, &methods_strtoutf8);
925}
926
927/* Support for the input methods. */
928
929XPointer
930_Utf8GetConvByName(
931    const char *name)
932{
933    XrmQuark xrm_name;
934    Utf8Conv convptr;
935    int i;
936
937    if (name == NULL)
938        return (XPointer) NULL;
939
940    lazy_init_all_charsets();
941    xrm_name = XrmStringToQuark(name);
942
943    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
944	if (convptr->xrm_name == xrm_name)
945	    return (XPointer) convptr->wctocs;
946    return (XPointer) NULL;
947}
948
949/* from XlcNUcsChar to XlcNChar, needed for input methods */
950
951static XlcConv
952create_ucstocs_conv(
953    XLCd lcd,
954    XlcConvMethods methods)
955{
956
957    if (XLC_PUBLIC_PART(lcd)->codeset
958	&& _XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "UTF-8") == 0) {
959	XlcConv conv;
960	Utf8Conv *preferred;
961
962	lazy_init_all_charsets();
963
964	conv = Xmalloc(sizeof(XlcConvRec) + 2 * sizeof(Utf8Conv));
965	if (conv == (XlcConv) NULL)
966	    return (XlcConv) NULL;
967	preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
968
969	preferred[0] = &all_charsets[0]; /* ISO10646 */
970	preferred[1] = (Utf8Conv) NULL;
971
972	conv->methods = methods;
973	conv->state = (XPointer) preferred;
974
975	return conv;
976    } else {
977	return create_tocs_conv(lcd, methods);
978    }
979}
980
981static int
982charset_wctocs_exactly(
983    Utf8Conv *preferred,
984    Utf8Conv *charsetp,
985    XlcSide *sidep,
986    XlcConv conv,
987    unsigned char *r,
988    ucs4_t wc,
989    int n)
990{
991    int count;
992    Utf8Conv convptr;
993
994    for (; *preferred != (Utf8Conv) NULL; preferred++) {
995	convptr = *preferred;
996	count = convptr->wctocs(conv, r, wc, n);
997	if (count == RET_TOOSMALL)
998	    return RET_TOOSMALL;
999	if (count != RET_ILSEQ) {
1000	    *charsetp = convptr;
1001	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
1002	    return count;
1003	}
1004    }
1005    return RET_ILSEQ;
1006}
1007
1008static int
1009ucstocs1(
1010    XlcConv conv,
1011    XPointer *from,
1012    int *from_left,
1013    XPointer *to,
1014    int *to_left,
1015    XPointer *args,
1016    int num_args)
1017{
1018    ucs4_t const *src;
1019    unsigned char *dst = (unsigned char *) *to;
1020    int unconv_num = 0;
1021    Utf8Conv *preferred_charsets = (Utf8Conv *) conv->state;
1022    Utf8Conv chosen_charset = NULL;
1023    XlcSide chosen_side = XlcNONE;
1024    XlcCharSet charset = NULL;
1025    int count;
1026
1027    if (from == NULL || *from == NULL)
1028	return 0;
1029
1030    src = (ucs4_t const *) *from;
1031
1032    count = charset_wctocs_exactly(preferred_charsets, &chosen_charset,
1033                                   &chosen_side, conv, dst, *src, *to_left);
1034    if (count < 1) {
1035        unconv_num++;
1036        count = 0;
1037    } else {
1038        charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1039    }
1040    if (charset == NULL)
1041	return -1;
1042
1043    *from = (XPointer) ++src;
1044    (*from_left)--;
1045    *to = (XPointer) dst;
1046    *to_left -= count;
1047
1048    if (num_args >= 1)
1049	*((XlcCharSet *)args[0]) = charset;
1050
1051    return unconv_num;
1052}
1053
1054static XlcConvMethodsRec methods_ucstocs1 = {
1055    close_tocs_converter,
1056    ucstocs1,
1057    NULL
1058};
1059
1060static XlcConv
1061open_ucstocs1(
1062    XLCd from_lcd,
1063    const char *from_type,
1064    XLCd to_lcd,
1065    const char *to_type)
1066{
1067    return create_ucstocs_conv(from_lcd, &methods_ucstocs1);
1068}
1069
1070/* from XlcNUcsChar to XlcNUtf8String, needed for input methods */
1071
1072static int
1073ucstoutf8(
1074    XlcConv conv,
1075    XPointer *from,
1076    int *from_left,
1077    XPointer *to,
1078    int *to_left,
1079    XPointer *args,
1080    int num_args)
1081{
1082    const ucs4_t *src;
1083    const ucs4_t *srcend;
1084    unsigned char *dst;
1085    unsigned char *dstend;
1086    int unconv_num;
1087
1088    if (from == NULL || *from == NULL)
1089	return 0;
1090
1091    src = (const ucs4_t *) *from;
1092    srcend = src + *from_left;
1093    dst = (unsigned char *) *to;
1094    dstend = dst + *to_left;
1095    unconv_num = 0;
1096
1097    while (src < srcend) {
1098	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
1099	if (count == RET_TOOSMALL)
1100	    break;
1101	if (count == RET_ILSEQ)
1102	    unconv_num++;
1103	src++;
1104	dst += count;
1105    }
1106
1107    *from = (XPointer) src;
1108    *from_left = srcend - src;
1109    *to = (XPointer) dst;
1110    *to_left = dstend - dst;
1111
1112    return unconv_num;
1113}
1114
1115static XlcConvMethodsRec methods_ucstoutf8 = {
1116    close_converter,
1117    ucstoutf8,
1118    NULL
1119};
1120
1121static XlcConv
1122open_ucstoutf8(
1123    XLCd from_lcd,
1124    const char *from_type,
1125    XLCd to_lcd,
1126    const char *to_type)
1127{
1128    return create_conv(from_lcd, &methods_ucstoutf8);
1129}
1130
1131/* Registers UTF-8 converters for a non-UTF-8 locale. */
1132void
1133_XlcAddUtf8Converters(
1134    XLCd lcd)
1135{
1136    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNUtf8String, open_cstoutf8);
1137    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNCharSet, open_utf8tocs);
1138    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNChar, open_utf8tocs1);
1139    _XlcSetConverter(lcd, XlcNString, lcd, XlcNUtf8String, open_strtoutf8);
1140    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNString, open_utf8tostr);
1141    _XlcSetConverter(lcd, XlcNUcsChar,    lcd, XlcNChar, open_ucstocs1);
1142    _XlcSetConverter(lcd, XlcNUcsChar,    lcd, XlcNUtf8String, open_ucstoutf8);
1143}
1144
1145/***************************************************************************/
1146/* Part II: UTF-8 locale loader conversion files
1147 *
1148 * Here we can assume that "multi-byte" is UTF-8 and that `wchar_t' is Unicode.
1149 */
1150
1151/* from XlcNMultiByte to XlcNWideChar */
1152
1153static int
1154utf8towcs(
1155    XlcConv conv,
1156    XPointer *from,
1157    int *from_left,
1158    XPointer *to,
1159    int *to_left,
1160    XPointer *args,
1161    int num_args)
1162{
1163    unsigned char const *src;
1164    unsigned char const *srcend;
1165    wchar_t *dst;
1166    wchar_t *dstend;
1167    int unconv_num;
1168
1169    if (from == NULL || *from == NULL)
1170	return 0;
1171
1172    src = (unsigned char const *) *from;
1173    srcend = src + *from_left;
1174    dst = (wchar_t *) *to;
1175    dstend = dst + *to_left;
1176    unconv_num = 0;
1177
1178    while (src < srcend && dst < dstend) {
1179	ucs4_t wc;
1180	int consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
1181	if (consumed == RET_TOOFEW(0))
1182	    break;
1183	if (consumed == RET_ILSEQ) {
1184	    src++;
1185	    *dst = BAD_WCHAR;
1186	    unconv_num++;
1187	} else {
1188	    src += consumed;
1189	    *dst = wc;
1190	}
1191	dst++;
1192    }
1193
1194    *from = (XPointer) src;
1195    *from_left = srcend - src;
1196    *to = (XPointer) dst;
1197    *to_left = dstend - dst;
1198
1199    return unconv_num;
1200}
1201
1202static XlcConvMethodsRec methods_utf8towcs = {
1203    close_converter,
1204    utf8towcs,
1205    NULL
1206};
1207
1208static XlcConv
1209open_utf8towcs(
1210    XLCd from_lcd,
1211    const char *from_type,
1212    XLCd to_lcd,
1213    const char *to_type)
1214{
1215    return create_conv(from_lcd, &methods_utf8towcs);
1216}
1217
1218/* from XlcNWideChar to XlcNMultiByte */
1219
1220static int
1221wcstoutf8(
1222    XlcConv conv,
1223    XPointer *from,
1224    int *from_left,
1225    XPointer *to,
1226    int *to_left,
1227    XPointer *args,
1228    int num_args)
1229{
1230    wchar_t const *src;
1231    wchar_t const *srcend;
1232    unsigned char *dst;
1233    unsigned char *dstend;
1234    int unconv_num;
1235
1236    if (from == NULL || *from == NULL)
1237	return 0;
1238
1239    src = (wchar_t const *) *from;
1240    srcend = src + *from_left;
1241    dst = (unsigned char *) *to;
1242    dstend = dst + *to_left;
1243    unconv_num = 0;
1244
1245    while (src < srcend) {
1246	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
1247	if (count == RET_TOOSMALL)
1248	    break;
1249	if (count == RET_ILSEQ) {
1250	    count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
1251	    if (count == RET_TOOSMALL)
1252		break;
1253	    unconv_num++;
1254	}
1255	dst += count;
1256	src++;
1257    }
1258
1259    *from = (XPointer) src;
1260    *from_left = srcend - src;
1261    *to = (XPointer) dst;
1262    *to_left = dstend - dst;
1263
1264    return unconv_num;
1265}
1266
1267static XlcConvMethodsRec methods_wcstoutf8 = {
1268    close_converter,
1269    wcstoutf8,
1270    NULL
1271};
1272
1273static XlcConv
1274open_wcstoutf8(
1275    XLCd from_lcd,
1276    const char *from_type,
1277    XLCd to_lcd,
1278    const char *to_type)
1279{
1280    return create_conv(from_lcd, &methods_wcstoutf8);
1281}
1282
1283/* from XlcNString to XlcNWideChar */
1284
1285static int
1286our_strtowcs(
1287    XlcConv conv,
1288    XPointer *from,
1289    int *from_left,
1290    XPointer *to,
1291    int *to_left,
1292    XPointer *args,
1293    int num_args)
1294{
1295    unsigned char const *src;
1296    unsigned char const *srcend;
1297    wchar_t *dst;
1298    wchar_t *dstend;
1299
1300    if (from == NULL || *from == NULL)
1301	return 0;
1302
1303    src = (unsigned char const *) *from;
1304    srcend = src + *from_left;
1305    dst = (wchar_t *) *to;
1306    dstend = dst + *to_left;
1307
1308    while (src < srcend && dst < dstend)
1309	*dst++ = (wchar_t) *src++;
1310
1311    *from = (XPointer) src;
1312    *from_left = srcend - src;
1313    *to = (XPointer) dst;
1314    *to_left = dstend - dst;
1315
1316    return 0;
1317}
1318
1319static XlcConvMethodsRec methods_strtowcs = {
1320    close_converter,
1321    our_strtowcs,
1322    NULL
1323};
1324
1325static XlcConv
1326open_strtowcs(
1327    XLCd from_lcd,
1328    const char *from_type,
1329    XLCd to_lcd,
1330    const char *to_type)
1331{
1332    return create_conv(from_lcd, &methods_strtowcs);
1333}
1334
1335/* from XlcNWideChar to XlcNString */
1336
1337static int
1338our_wcstostr(
1339    XlcConv conv,
1340    XPointer *from,
1341    int *from_left,
1342    XPointer *to,
1343    int *to_left,
1344    XPointer *args,
1345    int num_args)
1346{
1347    wchar_t const *src;
1348    wchar_t const *srcend;
1349    unsigned char *dst;
1350    unsigned char *dstend;
1351    int unconv_num;
1352
1353    if (from == NULL || *from == NULL)
1354	return 0;
1355
1356    src = (wchar_t const *) *from;
1357    srcend = src + *from_left;
1358    dst = (unsigned char *) *to;
1359    dstend = dst + *to_left;
1360    unconv_num = 0;
1361
1362    while (src < srcend && dst < dstend) {
1363	unsigned int wc = *src++;
1364	if (wc < 0x80)
1365	    *dst = wc;
1366	else {
1367	    *dst = BAD_CHAR;
1368	    unconv_num++;
1369	}
1370	dst++;
1371    }
1372
1373    *from = (XPointer) src;
1374    *from_left = srcend - src;
1375    *to = (XPointer) dst;
1376    *to_left = dstend - dst;
1377
1378    return unconv_num;
1379}
1380
1381static XlcConvMethodsRec methods_wcstostr = {
1382    close_converter,
1383    our_wcstostr,
1384    NULL
1385};
1386
1387static XlcConv
1388open_wcstostr(
1389    XLCd from_lcd,
1390    const char *from_type,
1391    XLCd to_lcd,
1392    const char *to_type)
1393{
1394    return create_conv(from_lcd, &methods_wcstostr);
1395}
1396
1397/* from XlcNCharSet to XlcNWideChar */
1398
1399static int
1400cstowcs(
1401    XlcConv conv,
1402    XPointer *from,
1403    int *from_left,
1404    XPointer *to,
1405    int *to_left,
1406    XPointer *args,
1407    int num_args)
1408{
1409    XlcCharSet charset;
1410    const char *name;
1411    Utf8Conv convptr;
1412    int i;
1413    unsigned char const *src;
1414    unsigned char const *srcend;
1415    wchar_t *dst;
1416    wchar_t *dstend;
1417    int unconv_num;
1418
1419    if (from == NULL || *from == NULL)
1420	return 0;
1421
1422    if (num_args < 1)
1423	return -1;
1424
1425    charset = (XlcCharSet) args[0];
1426    name = charset->encoding_name;
1427    /* not charset->name because the latter has a ":GL"/":GR" suffix */
1428
1429    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
1430	if (!strcmp(convptr->name, name))
1431	    break;
1432    if (i == 0)
1433	return -1;
1434
1435    src = (unsigned char const *) *from;
1436    srcend = src + *from_left;
1437    dst = (wchar_t *) *to;
1438    dstend = dst + *to_left;
1439    unconv_num = 0;
1440
1441    while (src < srcend && dst < dstend) {
1442	unsigned int wc;
1443	int consumed;
1444
1445	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
1446	if (consumed == RET_ILSEQ)
1447	    return -1;
1448	if (consumed == RET_TOOFEW(0))
1449	    break;
1450
1451	*dst++ = wc;
1452	src += consumed;
1453    }
1454
1455    *from = (XPointer) src;
1456    *from_left = srcend - src;
1457    *to = (XPointer) dst;
1458    *to_left = dstend - dst;
1459
1460    return unconv_num;
1461}
1462
1463static XlcConvMethodsRec methods_cstowcs = {
1464    close_converter,
1465    cstowcs,
1466    NULL
1467};
1468
1469static XlcConv
1470open_cstowcs(
1471    XLCd from_lcd,
1472    const char *from_type,
1473    XLCd to_lcd,
1474    const char *to_type)
1475{
1476    lazy_init_all_charsets();
1477    return create_conv(from_lcd, &methods_cstowcs);
1478}
1479
1480/* from XlcNWideChar to XlcNCharSet */
1481
1482static int
1483wcstocs(
1484    XlcConv conv,
1485    XPointer *from,
1486    int *from_left,
1487    XPointer *to,
1488    int *to_left,
1489    XPointer *args,
1490    int num_args)
1491{
1492    Utf8Conv *preferred_charsets;
1493    XlcCharSet last_charset = NULL;
1494    wchar_t const *src;
1495    wchar_t const *srcend;
1496    unsigned char *dst;
1497    unsigned char *dstend;
1498    int unconv_num;
1499
1500    if (from == NULL || *from == NULL)
1501	return 0;
1502
1503    preferred_charsets = (Utf8Conv *) conv->state;
1504    src = (wchar_t const *) *from;
1505    srcend = src + *from_left;
1506    dst = (unsigned char *) *to;
1507    dstend = dst + *to_left;
1508    unconv_num = 0;
1509
1510    while (src < srcend && dst < dstend) {
1511	Utf8Conv chosen_charset = NULL;
1512	XlcSide chosen_side = XlcNONE;
1513	wchar_t wc = *src;
1514	int count;
1515
1516	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1517	if (count == RET_TOOSMALL)
1518	    break;
1519	if (count == RET_ILSEQ) {
1520	    src++;
1521	    unconv_num++;
1522	    continue;
1523	}
1524
1525	if (last_charset == NULL) {
1526	    last_charset =
1527	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1528	    if (last_charset == NULL) {
1529		src++;
1530		unconv_num++;
1531		continue;
1532	    }
1533	} else {
1534	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1535	          && (last_charset->side == XlcGLGR
1536	              || last_charset->side == chosen_side)))
1537		break;
1538	}
1539	src++;
1540	dst += count;
1541    }
1542
1543    if (last_charset == NULL)
1544	return -1;
1545
1546    *from = (XPointer) src;
1547    *from_left = srcend - src;
1548    *to = (XPointer) dst;
1549    *to_left = dstend - dst;
1550
1551    if (num_args >= 1)
1552	*((XlcCharSet *)args[0]) = last_charset;
1553
1554    return unconv_num;
1555}
1556
1557static XlcConvMethodsRec methods_wcstocs = {
1558    close_tocs_converter,
1559    wcstocs,
1560    NULL
1561};
1562
1563static XlcConv
1564open_wcstocs(
1565    XLCd from_lcd,
1566    const char *from_type,
1567    XLCd to_lcd,
1568    const char *to_type)
1569{
1570    return create_tocs_conv(from_lcd, &methods_wcstocs);
1571}
1572
1573/* from XlcNWideChar to XlcNChar */
1574
1575static int
1576wcstocs1(
1577    XlcConv conv,
1578    XPointer *from,
1579    int *from_left,
1580    XPointer *to,
1581    int *to_left,
1582    XPointer *args,
1583    int num_args)
1584{
1585    Utf8Conv *preferred_charsets;
1586    XlcCharSet last_charset = NULL;
1587    wchar_t const *src;
1588    wchar_t const *srcend;
1589    unsigned char *dst;
1590    unsigned char *dstend;
1591    int unconv_num;
1592
1593    if (from == NULL || *from == NULL)
1594	return 0;
1595
1596    preferred_charsets = (Utf8Conv *) conv->state;
1597    src = (wchar_t const *) *from;
1598    srcend = src + *from_left;
1599    dst = (unsigned char *) *to;
1600    dstend = dst + *to_left;
1601    unconv_num = 0;
1602
1603    while (src < srcend && dst < dstend) {
1604	Utf8Conv chosen_charset = NULL;
1605	XlcSide chosen_side = XlcNONE;
1606	wchar_t wc = *src;
1607	int count;
1608
1609	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1610	if (count == RET_TOOSMALL)
1611	    break;
1612	if (count == RET_ILSEQ) {
1613	    src++;
1614	    unconv_num++;
1615	    continue;
1616	}
1617
1618	if (last_charset == NULL) {
1619	    last_charset =
1620	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1621	    if (last_charset == NULL) {
1622		src++;
1623		unconv_num++;
1624		continue;
1625	    }
1626	} else {
1627	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1628	          && (last_charset->side == XlcGLGR
1629	              || last_charset->side == chosen_side)))
1630		break;
1631	}
1632	src++;
1633	dst += count;
1634	break;
1635    }
1636
1637    if (last_charset == NULL)
1638	return -1;
1639
1640    *from = (XPointer) src;
1641    *from_left = srcend - src;
1642    *to = (XPointer) dst;
1643    *to_left = dstend - dst;
1644
1645    if (num_args >= 1)
1646	*((XlcCharSet *)args[0]) = last_charset;
1647
1648    return unconv_num;
1649}
1650
1651static XlcConvMethodsRec methods_wcstocs1 = {
1652    close_tocs_converter,
1653    wcstocs1,
1654    NULL
1655};
1656
1657static XlcConv
1658open_wcstocs1(
1659    XLCd from_lcd,
1660    const char *from_type,
1661    XLCd to_lcd,
1662    const char *to_type)
1663{
1664    return create_tocs_conv(from_lcd, &methods_wcstocs1);
1665}
1666
1667/* trivial, no conversion */
1668
1669static int
1670identity(
1671    XlcConv conv,
1672    XPointer *from,
1673    int *from_left,
1674    XPointer *to,
1675    int *to_left,
1676    XPointer *args,
1677    int num_args)
1678{
1679    unsigned char const *src;
1680    unsigned char const *srcend;
1681    unsigned char *dst;
1682    unsigned char *dstend;
1683
1684    if (from == NULL || *from == NULL)
1685	return 0;
1686
1687    src = (unsigned char const *) *from;
1688    srcend = src + *from_left;
1689    dst = (unsigned char *) *to;
1690    dstend = dst + *to_left;
1691
1692    while (src < srcend && dst < dstend)
1693	*dst++ = *src++;
1694
1695    *from = (XPointer) src;
1696    *from_left = srcend - src;
1697    *to = (XPointer) dst;
1698    *to_left = dstend - dst;
1699
1700    return 0;
1701}
1702
1703static XlcConvMethodsRec methods_identity = {
1704    close_converter,
1705    identity,
1706    NULL
1707};
1708
1709static XlcConv
1710open_identity(
1711    XLCd from_lcd,
1712    const char *from_type,
1713    XLCd to_lcd,
1714    const char *to_type)
1715{
1716    return create_conv(from_lcd, &methods_identity);
1717}
1718
1719/* from MultiByte/WideChar to FontCharSet. */
1720/* They really use converters to CharSet
1721 * but with different create_conv procedure. */
1722
1723static XlcConv
1724create_tofontcs_conv(
1725    XLCd lcd,
1726    XlcConvMethods methods)
1727{
1728    XlcConv conv;
1729    int i, num, k, count;
1730    char **value, buf[20];
1731    Utf8Conv *preferred;
1732
1733    lazy_init_all_charsets();
1734
1735    for (i = 0, num = 0;; i++) {
1736	snprintf(buf, sizeof(buf), "fs%d.charset.name", i);
1737	_XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1738	if (count < 1) {
1739	    snprintf(buf, sizeof(buf), "fs%d.charset", i);
1740	    _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1741	    if (count < 1)
1742		break;
1743	}
1744	num += count;
1745    }
1746
1747    conv = Xmalloc(sizeof(XlcConvRec) + (num + 1) * sizeof(Utf8Conv));
1748    if (conv == (XlcConv) NULL)
1749	return (XlcConv) NULL;
1750    preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
1751
1752    /* Loop through all fontsets mentioned in the locale. */
1753    for (i = 0, num = 0;; i++) {
1754        snprintf(buf, sizeof(buf), "fs%d.charset.name", i);
1755        _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1756        if (count < 1) {
1757            snprintf(buf, sizeof(buf), "fs%d.charset", i);
1758            _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
1759            if (count < 1)
1760                break;
1761        }
1762	while (count-- > 0) {
1763	    XlcCharSet charset = _XlcGetCharSet(*value++);
1764	    const char *name;
1765
1766	    if (charset == (XlcCharSet) NULL)
1767		continue;
1768
1769	    name = charset->encoding_name;
1770	    /* If it wasn't already encountered... */
1771	    for (k = num - 1; k >= 0; k--)
1772		if (!strcmp(preferred[k]->name, name))
1773		    break;
1774	    if (k < 0) {
1775                /* For fonts "ISO10646-1" means ucs2, not utf8.*/
1776                if (!strcmp("ISO10646-1", name)) {
1777                    preferred[num++] = &all_charsets[ucs2_conv_index];
1778                    continue;
1779                }
1780		/* Look it up in all_charsets[]. */
1781		for (k = 0; k < all_charsets_count-1; k++)
1782		    if (!strcmp(all_charsets[k].name, name)) {
1783			/* Add it to the preferred set. */
1784			preferred[num++] = &all_charsets[k];
1785			break;
1786		    }
1787	    }
1788        }
1789    }
1790    preferred[num] = (Utf8Conv) NULL;
1791
1792    conv->methods = methods;
1793    conv->state = (XPointer) preferred;
1794
1795    return conv;
1796}
1797
1798static XlcConv
1799open_wcstofcs(
1800    XLCd from_lcd,
1801    const char *from_type,
1802    XLCd to_lcd,
1803    const char *to_type)
1804{
1805    return create_tofontcs_conv(from_lcd, &methods_wcstocs);
1806}
1807
1808static XlcConv
1809open_utf8tofcs(
1810    XLCd from_lcd,
1811    const char *from_type,
1812    XLCd to_lcd,
1813    const char *to_type)
1814{
1815    return create_tofontcs_conv(from_lcd, &methods_utf8tocs);
1816}
1817
1818/* ========================== iconv Stuff ================================ */
1819
1820/* from XlcNCharSet to XlcNMultiByte */
1821
1822static int
1823iconv_cstombs(XlcConv conv, XPointer *from, int *from_left,
1824	      XPointer *to, int *to_left, XPointer *args, int num_args)
1825{
1826    XlcCharSet charset;
1827    char const *name;
1828    Utf8Conv convptr;
1829    int i;
1830    unsigned char const *src;
1831    unsigned char const *srcend;
1832    unsigned char *dst;
1833    unsigned char *dstend;
1834    int unconv_num;
1835
1836    if (from == NULL || *from == NULL)
1837	return 0;
1838
1839    if (num_args < 1)
1840	return -1;
1841
1842    charset = (XlcCharSet) args[0];
1843    name = charset->encoding_name;
1844    /* not charset->name because the latter has a ":GL"/":GR" suffix */
1845
1846    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
1847	if (!strcmp(convptr->name, name))
1848	    break;
1849    if (i == 0)
1850	return -1;
1851
1852    src = (unsigned char const *) *from;
1853    srcend = src + *from_left;
1854    dst = (unsigned char *) *to;
1855    dstend = dst + *to_left;
1856    unconv_num = 0;
1857
1858    while (src < srcend) {
1859	ucs4_t wc;
1860	int consumed;
1861	int count;
1862
1863	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
1864	if (consumed == RET_ILSEQ)
1865	    return -1;
1866	if (consumed == RET_TOOFEW(0))
1867	    break;
1868
1869    /* Use stdc iconv to convert widechar -> multibyte */
1870
1871	count = wctomb((char *)dst, wc);
1872	if (count == 0)
1873	    break;
1874	if (count == -1) {
1875	    count = wctomb((char *)dst, BAD_WCHAR);
1876	    if (count == 0)
1877		break;
1878	    unconv_num++;
1879	}
1880	src += consumed;
1881	dst += count;
1882    }
1883
1884    *from = (XPointer) src;
1885    *from_left = srcend - src;
1886    *to = (XPointer) dst;
1887    *to_left = dstend - dst;
1888
1889    return unconv_num;
1890
1891}
1892
1893static XlcConvMethodsRec iconv_cstombs_methods = {
1894    close_converter,
1895    iconv_cstombs,
1896    NULL
1897};
1898
1899static XlcConv
1900open_iconv_cstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
1901{
1902    lazy_init_all_charsets();
1903    return create_conv(from_lcd, &iconv_cstombs_methods);
1904}
1905
1906static int
1907iconv_mbstocs(XlcConv conv, XPointer *from, int *from_left,
1908	      XPointer *to, int *to_left, XPointer *args, int num_args)
1909{
1910    Utf8Conv *preferred_charsets;
1911    XlcCharSet last_charset = NULL;
1912    unsigned char const *src;
1913    unsigned char const *srcend;
1914    unsigned char *dst;
1915    unsigned char *dstend;
1916    int unconv_num;
1917
1918    if (from == NULL || *from == NULL)
1919	return 0;
1920
1921    preferred_charsets = (Utf8Conv *) conv->state;
1922    src = (unsigned char const *) *from;
1923    srcend = src + *from_left;
1924    dst = (unsigned char *) *to;
1925    dstend = dst + *to_left;
1926    unconv_num = 0;
1927
1928    while (src < srcend && dst < dstend) {
1929	Utf8Conv chosen_charset = NULL;
1930	XlcSide chosen_side = XlcNONE;
1931	wchar_t wc;
1932	int consumed;
1933	int count;
1934
1935    /* Uses stdc iconv to convert multibyte -> widechar */
1936
1937	consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src));
1938	if (consumed == 0)
1939	    break;
1940	if (consumed == -1) {
1941	    src++;
1942	    unconv_num++;
1943	    continue;
1944	}
1945
1946	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
1947
1948	if (count == RET_TOOSMALL)
1949	    break;
1950	if (count == RET_ILSEQ) {
1951	    src += consumed;
1952	    unconv_num++;
1953	    continue;
1954	}
1955
1956	if (last_charset == NULL) {
1957	    last_charset =
1958	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
1959	    if (last_charset == NULL) {
1960		src += consumed;
1961		unconv_num++;
1962		continue;
1963	    }
1964	} else {
1965	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
1966	          && (last_charset->side == XlcGLGR
1967	              || last_charset->side == chosen_side)))
1968		break;
1969	}
1970	src += consumed;
1971	dst += count;
1972    }
1973
1974    if (last_charset == NULL)
1975	return -1;
1976
1977    *from = (XPointer) src;
1978    *from_left = srcend - src;
1979    *to = (XPointer) dst;
1980    *to_left = dstend - dst;
1981
1982    if (num_args >= 1)
1983	*((XlcCharSet *)args[0]) = last_charset;
1984
1985    return unconv_num;
1986}
1987
1988static XlcConvMethodsRec iconv_mbstocs_methods = {
1989    close_tocs_converter,
1990    iconv_mbstocs,
1991    NULL
1992};
1993
1994static XlcConv
1995open_iconv_mbstocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
1996{
1997    return create_tocs_conv(from_lcd, &iconv_mbstocs_methods);
1998}
1999
2000/* from XlcNMultiByte to XlcNChar */
2001
2002static int
2003iconv_mbtocs(XlcConv conv, XPointer *from, int *from_left,
2004	     XPointer *to, int *to_left, XPointer *args, int num_args)
2005{
2006    Utf8Conv *preferred_charsets;
2007    XlcCharSet last_charset = NULL;
2008    unsigned char const *src;
2009    unsigned char const *srcend;
2010    unsigned char *dst;
2011    unsigned char *dstend;
2012    int unconv_num;
2013
2014    if (from == NULL || *from == NULL)
2015	return 0;
2016
2017    preferred_charsets = (Utf8Conv *) conv->state;
2018    src = (unsigned char const *) *from;
2019    srcend = src + *from_left;
2020    dst = (unsigned char *) *to;
2021    dstend = dst + *to_left;
2022    unconv_num = 0;
2023
2024    while (src < srcend && dst < dstend) {
2025	Utf8Conv chosen_charset = NULL;
2026	XlcSide chosen_side = XlcNONE;
2027	wchar_t wc;
2028	int consumed;
2029	int count;
2030
2031    /* Uses stdc iconv to convert multibyte -> widechar */
2032
2033	consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src));
2034	if (consumed == 0)
2035	    break;
2036	if (consumed == -1) {
2037	    src++;
2038	    unconv_num++;
2039	    continue;
2040	}
2041
2042	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
2043	if (count == RET_TOOSMALL)
2044	    break;
2045	if (count == RET_ILSEQ) {
2046	    src += consumed;
2047	    unconv_num++;
2048	    continue;
2049	}
2050
2051	if (last_charset == NULL) {
2052	    last_charset =
2053		_XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
2054	    if (last_charset == NULL) {
2055		src += consumed;
2056		unconv_num++;
2057		continue;
2058	    }
2059	} else {
2060	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
2061		  && (last_charset->side == XlcGLGR
2062		      || last_charset->side == chosen_side)))
2063		break;
2064	}
2065	src += consumed;
2066	dst += count;
2067    }
2068
2069    if (last_charset == NULL)
2070	return -1;
2071
2072    *from = (XPointer) src;
2073    *from_left = srcend - src;
2074    *to = (XPointer) dst;
2075    *to_left = dstend - dst;
2076
2077    if (num_args >= 1)
2078	*((XlcCharSet *)args[0]) = last_charset;
2079
2080    return unconv_num;
2081}
2082
2083static XlcConvMethodsRec iconv_mbtocs_methods = {
2084    close_tocs_converter,
2085    iconv_mbtocs,
2086    NULL
2087};
2088
2089static XlcConv
2090open_iconv_mbtocs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2091{
2092    return create_tocs_conv(from_lcd, &iconv_mbtocs_methods );
2093}
2094
2095/* from XlcNMultiByte to XlcNString */
2096
2097static int
2098iconv_mbstostr(XlcConv conv, XPointer *from, int *from_left,
2099	       XPointer *to, int *to_left, XPointer *args, int num_args)
2100{
2101    unsigned char const *src;
2102    unsigned char const *srcend;
2103    unsigned char *dst;
2104    unsigned char *dstend;
2105    int unconv_num;
2106
2107    if (from == NULL || *from == NULL)
2108	return 0;
2109
2110    src = (unsigned char const *) *from;
2111    srcend = src + *from_left;
2112    dst = (unsigned char *) *to;
2113    dstend = dst + *to_left;
2114    unconv_num = 0;
2115
2116    while (src < srcend) {
2117	unsigned char c;
2118	wchar_t wc;
2119	int consumed;
2120
2121    /* Uses stdc iconv to convert multibyte -> widechar */
2122
2123	consumed = mbtowc(&wc, (const char *)src, (size_t) (srcend - src));
2124	if (consumed == 0)
2125	    break;
2126	if (dst == dstend)
2127	    break;
2128	if (consumed == -1) {
2129	    consumed = 1;
2130	    c = BAD_CHAR;
2131	    unconv_num++;
2132	} else {
2133	    if ((wc & ~(wchar_t)0xff) != 0) {
2134		c = BAD_CHAR;
2135		unconv_num++;
2136	    } else
2137		c = (unsigned char) wc;
2138	}
2139	*dst++ = c;
2140	src += consumed;
2141    }
2142
2143    *from = (XPointer) src;
2144    *from_left = srcend - src;
2145    *to = (XPointer) dst;
2146    *to_left = dstend - dst;
2147
2148    return unconv_num;
2149}
2150
2151static XlcConvMethodsRec iconv_mbstostr_methods = {
2152    close_converter,
2153    iconv_mbstostr,
2154    NULL
2155};
2156
2157static XlcConv
2158open_iconv_mbstostr(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2159{
2160    return create_conv(from_lcd, &iconv_mbstostr_methods);
2161}
2162
2163/* from XlcNString to XlcNMultiByte */
2164static int
2165iconv_strtombs(XlcConv conv, XPointer *from, int *from_left,
2166	       XPointer *to, int *to_left, XPointer *args, int num_args)
2167{
2168    unsigned char const *src;
2169    unsigned char const *srcend;
2170    unsigned char *dst;
2171    unsigned char *dstend;
2172
2173    if (from == NULL || *from == NULL)
2174	return 0;
2175
2176    src = (unsigned char const *) *from;
2177    srcend = src + *from_left;
2178    dst = (unsigned char *) *to;
2179    dstend = dst + *to_left;
2180
2181    while (src < srcend) {
2182	int count = wctomb((char *)dst, *src);
2183	if (count < 0)
2184	    break;
2185	dst += count;
2186	src++;
2187    }
2188
2189    *from = (XPointer) src;
2190    *from_left = srcend - src;
2191    *to = (XPointer) dst;
2192    *to_left = dstend - dst;
2193
2194    return 0;
2195}
2196
2197static XlcConvMethodsRec iconv_strtombs_methods= {
2198    close_converter,
2199    iconv_strtombs,
2200    NULL
2201};
2202
2203static XlcConv
2204open_iconv_strtombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2205{
2206    return create_conv(from_lcd, &iconv_strtombs_methods);
2207}
2208
2209/***************************************************************************/
2210/* Part II: An iconv locale loader.
2211 *
2212 *Here we can assume that "multi-byte" is iconv and that `wchar_t' is Unicode.
2213 */
2214
2215/* from XlcNMultiByte to XlcNWideChar */
2216static int
2217iconv_mbstowcs(XlcConv conv, XPointer *from, int *from_left,
2218	       XPointer *to, int *to_left, XPointer *args,  int num_args)
2219{
2220    char *src = *((char **) from);
2221    wchar_t *dst = *((wchar_t **) to);
2222    int src_left = *from_left;
2223    int dst_left = *to_left;
2224    int length, unconv_num = 0;
2225
2226    while (src_left > 0 && dst_left > 0) {
2227	length = mbtowc(dst, src, (size_t) src_left);
2228
2229	if (length > 0) {
2230	    src += length;
2231	    src_left -= length;
2232	    if (dst)
2233	        dst++;
2234	    dst_left--;
2235	} else if (length < 0) {
2236	    src++;
2237	    src_left--;
2238	    unconv_num++;
2239        } else {
2240            /* null ? */
2241            src++;
2242            src_left--;
2243            if (dst)
2244                *dst++ = L'\0';
2245            dst_left--;
2246        }
2247    }
2248
2249    *from = (XPointer) src;
2250    if (dst)
2251	*to = (XPointer) dst;
2252    *from_left = src_left;
2253    *to_left = dst_left;
2254
2255    return unconv_num;
2256}
2257
2258static XlcConvMethodsRec iconv_mbstowcs_methods = {
2259    close_converter,
2260    iconv_mbstowcs,
2261    NULL
2262} ;
2263
2264static XlcConv
2265open_iconv_mbstowcs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2266{
2267    return create_conv(from_lcd, &iconv_mbstowcs_methods);
2268}
2269
2270static int
2271iconv_wcstombs(XlcConv conv, XPointer *from, int *from_left,
2272	       XPointer *to, int *to_left, XPointer *args, int num_args)
2273{
2274    wchar_t *src = *((wchar_t **) from);
2275    char *dst = *((char **) to);
2276    int src_left = *from_left;
2277    int dst_left = *to_left;
2278    int length, unconv_num = 0;
2279
2280    while (src_left > 0 && dst_left >= MB_CUR_MAX) {
2281	length = wctomb(dst, *src);		/* XXX */
2282
2283        if (length > 0) {
2284	    src++;
2285	    src_left--;
2286	    if (dst)
2287		dst += length;
2288	    dst_left -= length;
2289	} else if (length < 0) {
2290	    src++;
2291	    src_left--;
2292	    unconv_num++;
2293	}
2294    }
2295
2296    *from = (XPointer) src;
2297    if (dst)
2298      *to = (XPointer) dst;
2299    *from_left = src_left;
2300    *to_left = dst_left;
2301
2302    return unconv_num;
2303}
2304
2305static XlcConvMethodsRec iconv_wcstombs_methods = {
2306    close_converter,
2307    iconv_wcstombs,
2308    NULL
2309} ;
2310
2311static XlcConv
2312open_iconv_wcstombs(XLCd from_lcd, const char *from_type, XLCd to_lcd, const char *to_type)
2313{
2314    return create_conv(from_lcd, &iconv_wcstombs_methods);
2315}
2316
2317static XlcConv
2318open_iconv_mbstofcs(
2319    XLCd from_lcd,
2320    const char *from_type,
2321    XLCd to_lcd,
2322    const char *to_type)
2323{
2324    return create_tofontcs_conv(from_lcd, &iconv_mbstocs_methods);
2325}
2326
2327/* Registers UTF-8 converters for a UTF-8 locale. */
2328
2329void
2330_XlcAddUtf8LocaleConverters(
2331    XLCd lcd)
2332{
2333    /* Register elementary converters. */
2334
2335    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_utf8towcs);
2336
2337    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_wcstoutf8);
2338    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr);
2339
2340    _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs);
2341
2342    /* Register converters for XlcNCharSet. This implicitly provides
2343     * converters from and to XlcNCompoundText. */
2344
2345    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_cstoutf8);
2346    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_utf8tocs);
2347    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_utf8tocs1);
2348
2349    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
2350    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
2351    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1);
2352
2353    _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_strtoutf8);
2354    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_utf8tostr);
2355    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNMultiByte, open_identity);
2356    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNUtf8String, open_identity);
2357
2358    /* Register converters for XlcNFontCharSet */
2359    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_utf8tofcs);
2360    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
2361}
2362
2363void
2364_XlcAddGB18030LocaleConverters(
2365    XLCd lcd)
2366{
2367
2368    /* Register elementary converters. */
2369    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_iconv_mbstowcs);
2370    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_iconv_wcstombs);
2371
2372    /* Register converters for XlcNCharSet. This implicitly provides
2373     * converters from and to XlcNCompoundText. */
2374
2375    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_iconv_cstombs);
2376    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_iconv_mbstocs);
2377    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_iconv_mbtocs);
2378    _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_iconv_strtombs);
2379    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_iconv_mbstostr);
2380
2381    /* Register converters for XlcNFontCharSet */
2382    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_iconv_mbstofcs);
2383
2384    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr);
2385    _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs);
2386    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
2387    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
2388    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1);
2389
2390    /* Register converters for XlcNFontCharSet */
2391    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
2392}
2393