lcUTF8.c revision 1ab64890
11ab64890Smrg/* $TOG:  $ */
21ab64890Smrg/******************************************************************
31ab64890Smrg
41ab64890Smrg              Copyright 1993 by SunSoft, Inc.
51ab64890Smrg              Copyright 1999-2000 by Bruno Haible
61ab64890Smrg
71ab64890SmrgPermission to use, copy, modify, distribute, and sell this software
81ab64890Smrgand its documentation for any purpose is hereby granted without fee,
91ab64890Smrgprovided that the above copyright notice appear in all copies and
101ab64890Smrgthat both that copyright notice and this permission notice appear
111ab64890Smrgin supporting documentation, and that the names of SunSoft, Inc. and
121ab64890SmrgBruno Haible not be used in advertising or publicity pertaining to
131ab64890Smrgdistribution of the software without specific, written prior
141ab64890Smrgpermission.  SunSoft, Inc. and Bruno Haible make no representations
151ab64890Smrgabout the suitability of this software for any purpose.  It is
161ab64890Smrgprovided "as is" without express or implied warranty.
171ab64890Smrg
181ab64890SmrgSunSoft Inc. AND Bruno Haible DISCLAIM ALL WARRANTIES WITH REGARD
191ab64890SmrgTO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
201ab64890SmrgAND FITNESS, IN NO EVENT SHALL SunSoft, Inc. OR Bruno Haible BE LIABLE
211ab64890SmrgFOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
221ab64890SmrgWHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
231ab64890SmrgACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
241ab64890SmrgOF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
251ab64890Smrg
261ab64890Smrg******************************************************************/
271ab64890Smrg/* $XFree86: xc/lib/X11/lcUTF8.c,v 1.15 2002/10/08 23:31:36 dawes Exp $ */
281ab64890Smrg
291ab64890Smrg/*
301ab64890Smrg * This file contains:
311ab64890Smrg *
321ab64890Smrg * I. Conversion routines CompoundText/CharSet <--> Unicode/UTF-8.
331ab64890Smrg *
341ab64890Smrg *    Used for three purposes:
351ab64890Smrg *      1. The UTF-8 locales, see below.
361ab64890Smrg *      2. Unicode aware applications for which the use of 8-bit character
371ab64890Smrg *         sets is an anachronism.
381ab64890Smrg *      3. For conversion from keysym to locale encoding.
391ab64890Smrg *
401ab64890Smrg * II. Conversion files for an UTF-8 locale loader.
411ab64890Smrg *     Supports: all locales with codeset UTF-8.
421ab64890Smrg *     How: Provides converters for UTF-8.
431ab64890Smrg *     Platforms: all systems.
441ab64890Smrg *
451ab64890Smrg * The loader itself is located in lcUTF8.c.
461ab64890Smrg */
471ab64890Smrg
481ab64890Smrg/*
491ab64890Smrg * The conversion from UTF-8 to CompoundText is realized in a very
501ab64890Smrg * conservative way. Recall that CompoundText data is used for inter-client
511ab64890Smrg * communication purposes. We distinguish three classes of clients:
521ab64890Smrg * - Clients which accept only those pieces of CompoundText which belong to
531ab64890Smrg *   the character set understood by the current locale.
541ab64890Smrg *   (Example: clients which are linked to an older X11 library.)
551ab64890Smrg * - Clients which accept CompoundText with multiple character sets and parse
561ab64890Smrg *   it themselves.
571ab64890Smrg *   (Example: emacs, xemacs.)
581ab64890Smrg * - Clients which rely entirely on the X{mb,wc}TextPropertyToTextList
591ab64890Smrg *   functions for the conversion of CompoundText to their current locale's
601ab64890Smrg *   multi-byte/wide-character format.
611ab64890Smrg * For best interoperation, the UTF-8 to CompoundText conversion proceeds as
621ab64890Smrg * follows. For every character, it first tests whether the character is
631ab64890Smrg * representable in the current locale's original (non-UTF-8) character set.
641ab64890Smrg * If not, it goes through the list of predefined character sets for
651ab64890Smrg * CompoundText and tests if the character is representable in that character
661ab64890Smrg * set. If so, it encodes the character using its code within that character
671ab64890Smrg * set. If not, it uses an UTF-8-in-CompoundText encapsulation. Since
681ab64890Smrg * clients of the first and second kind ignore such encapsulated text,
691ab64890Smrg * this encapsulation is kept to a minimum and terminated as early as possible.
701ab64890Smrg *
711ab64890Smrg * In a distant future, when clients of the first and second kind will have
721ab64890Smrg * disappeared, we will be able to stuff UTF-8 data directly in CompoundText
731ab64890Smrg * without first going through the list of predefined character sets.
741ab64890Smrg */
751ab64890Smrg
761ab64890Smrg#ifdef HAVE_CONFIG_H
771ab64890Smrg#include <config.h>
781ab64890Smrg#endif
791ab64890Smrg#include <stdio.h>
801ab64890Smrg#include "Xlibint.h"
811ab64890Smrg#include "XlcPubI.h"
821ab64890Smrg#include "XlcGeneric.h"
831ab64890Smrg
841ab64890Smrgstatic XlcConv
851ab64890Smrgcreate_conv(
861ab64890Smrg    XLCd lcd,
871ab64890Smrg    XlcConvMethods methods)
881ab64890Smrg{
891ab64890Smrg    XlcConv conv;
901ab64890Smrg
911ab64890Smrg    conv = (XlcConv) Xmalloc(sizeof(XlcConvRec));
921ab64890Smrg    if (conv == (XlcConv) NULL)
931ab64890Smrg	return (XlcConv) NULL;
941ab64890Smrg
951ab64890Smrg    conv->methods = methods;
961ab64890Smrg    conv->state = NULL;
971ab64890Smrg
981ab64890Smrg    return conv;
991ab64890Smrg}
1001ab64890Smrg
1011ab64890Smrgstatic void
1021ab64890Smrgclose_converter(
1031ab64890Smrg    XlcConv conv)
1041ab64890Smrg{
1051ab64890Smrg    Xfree((char *) conv);
1061ab64890Smrg}
1071ab64890Smrg
1081ab64890Smrg/* Replacement character for invalid multibyte sequence or wide character. */
1091ab64890Smrg#define BAD_WCHAR ((ucs4_t) 0xfffd)
1101ab64890Smrg#define BAD_CHAR '?'
1111ab64890Smrg
1121ab64890Smrg/***************************************************************************/
1131ab64890Smrg/* Part I: Conversion routines CompoundText/CharSet <--> Unicode/UTF-8.
1141ab64890Smrg *
1151ab64890Smrg * Note that this code works in any locale. We store Unicode values in
1161ab64890Smrg * `ucs4_t' variables, but don't pass them to the user.
1171ab64890Smrg *
1181ab64890Smrg * This code has to support all character sets that are used for CompoundText,
1191ab64890Smrg * nothing more, nothing less. See the table in lcCT.c.
1201ab64890Smrg * Since the conversion _to_ CompoundText is likely to need the tables for all
1211ab64890Smrg * character sets at once, we don't use dynamic loading (of tables or shared
1221ab64890Smrg * libraries through iconv()). Use a fixed set of tables instead.
1231ab64890Smrg *
1241ab64890Smrg * We use statically computed tables, not dynamically allocated arrays,
1251ab64890Smrg * because it's more memory efficient: Different processes using the same
1261ab64890Smrg * libX11 shared library share the "text" and read-only "data" sections.
1271ab64890Smrg */
1281ab64890Smrg
1291ab64890Smrgtypedef unsigned int ucs4_t;
1301ab64890Smrg#define conv_t XlcConv
1311ab64890Smrg
1321ab64890Smrgtypedef struct _Utf8ConvRec {
1331ab64890Smrg    const char *name;
1341ab64890Smrg    XrmQuark xrm_name;
1351ab64890Smrg    int (* cstowc) (XlcConv, ucs4_t *, unsigned char const *, int);
1361ab64890Smrg    int (* wctocs) (XlcConv, unsigned char *, ucs4_t, int);
1371ab64890Smrg} Utf8ConvRec, *Utf8Conv;
1381ab64890Smrg
1391ab64890Smrg/*
1401ab64890Smrg * int xxx_cstowc (XlcConv conv, ucs4_t *pwc, unsigned char const *s, int n)
1411ab64890Smrg * converts the byte sequence starting at s to a wide character. Up to n bytes
1421ab64890Smrg * are available at s. n is >= 1.
1431ab64890Smrg * Result is number of bytes consumed (if a wide character was read),
1441ab64890Smrg * or 0 if invalid, or -1 if n too small.
1451ab64890Smrg *
1461ab64890Smrg * int xxx_wctocs (XlcConv conv, unsigned char *r, ucs4_t wc, int n)
1471ab64890Smrg * converts the wide character wc to the character set xxx, and stores the
1481ab64890Smrg * result beginning at r. Up to n bytes may be written at r. n is >= 1.
1491ab64890Smrg * Result is number of bytes written, or 0 if invalid, or -1 if n too small.
1501ab64890Smrg */
1511ab64890Smrg
1521ab64890Smrg/* Return code if invalid. (xxx_mbtowc, xxx_wctomb) */
1531ab64890Smrg#define RET_ILSEQ      0
1541ab64890Smrg/* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
1551ab64890Smrg#define RET_TOOFEW(n)  (-1-(n))
1561ab64890Smrg/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
1571ab64890Smrg#define RET_TOOSMALL   -1
1581ab64890Smrg
1591ab64890Smrg/*
1601ab64890Smrg * The tables below are bijective. It would be possible to extend the
1611ab64890Smrg * xxx_wctocs tables to do some transliteration (e.g. U+201C,U+201D -> 0x22)
1621ab64890Smrg * but *only* with characters not contained in any other table, and *only*
1631ab64890Smrg * when the current locale is not an UTF-8 locale.
1641ab64890Smrg */
1651ab64890Smrg
1661ab64890Smrg#include "lcUniConv/utf8.h"
1671ab64890Smrg#include "lcUniConv/ucs2be.h"
1681ab64890Smrg#ifdef notused
1691ab64890Smrg#include "lcUniConv/ascii.h"
1701ab64890Smrg#endif
1711ab64890Smrg#include "lcUniConv/iso8859_1.h"
1721ab64890Smrg#include "lcUniConv/iso8859_2.h"
1731ab64890Smrg#include "lcUniConv/iso8859_3.h"
1741ab64890Smrg#include "lcUniConv/iso8859_4.h"
1751ab64890Smrg#include "lcUniConv/iso8859_5.h"
1761ab64890Smrg#include "lcUniConv/iso8859_6.h"
1771ab64890Smrg#include "lcUniConv/iso8859_7.h"
1781ab64890Smrg#include "lcUniConv/iso8859_8.h"
1791ab64890Smrg#include "lcUniConv/iso8859_9.h"
1801ab64890Smrg#include "lcUniConv/iso8859_10.h"
1811ab64890Smrg#include "lcUniConv/iso8859_11.h"
1821ab64890Smrg#include "lcUniConv/iso8859_13.h"
1831ab64890Smrg#include "lcUniConv/iso8859_14.h"
1841ab64890Smrg#include "lcUniConv/iso8859_15.h"
1851ab64890Smrg#include "lcUniConv/iso8859_16.h"
1861ab64890Smrg#include "lcUniConv/iso8859_9e.h"
1871ab64890Smrg#include "lcUniConv/jisx0201.h"
1881ab64890Smrg#include "lcUniConv/tis620.h"
1891ab64890Smrg#include "lcUniConv/koi8_r.h"
1901ab64890Smrg#include "lcUniConv/koi8_u.h"
1911ab64890Smrg#include "lcUniConv/koi8_c.h"
1921ab64890Smrg#include "lcUniConv/armscii_8.h"
1931ab64890Smrg#include "lcUniConv/cp1133.h"
1941ab64890Smrg#include "lcUniConv/mulelao.h"
1951ab64890Smrg#include "lcUniConv/viscii.h"
1961ab64890Smrg#include "lcUniConv/tcvn.h"
1971ab64890Smrg#include "lcUniConv/georgian_academy.h"
1981ab64890Smrg#include "lcUniConv/georgian_ps.h"
1991ab64890Smrg#include "lcUniConv/cp1251.h"
2001ab64890Smrg#include "lcUniConv/cp1255.h"
2011ab64890Smrg#include "lcUniConv/cp1256.h"
2021ab64890Smrg#include "lcUniConv/tatar_cyr.h"
2031ab64890Smrg
2041ab64890Smrgtypedef struct {
2051ab64890Smrg    unsigned short indx; /* index into big table */
2061ab64890Smrg    unsigned short used; /* bitmask of used entries */
2071ab64890Smrg} Summary16;
2081ab64890Smrg
2091ab64890Smrg#include "lcUniConv/gb2312.h"
2101ab64890Smrg#include "lcUniConv/jisx0208.h"
2111ab64890Smrg#include "lcUniConv/jisx0212.h"
2121ab64890Smrg#include "lcUniConv/ksc5601.h"
2131ab64890Smrg#include "lcUniConv/big5.h"
2141ab64890Smrg#include "lcUniConv/big5_emacs.h"
2151ab64890Smrg
2161ab64890Smrgstatic Utf8ConvRec all_charsets[] = {
2171ab64890Smrg    /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
2181ab64890Smrg       (for lookup speed), once at the end (as a fallback).  */
2191ab64890Smrg    { "ISO10646-1", NULLQUARK,
2201ab64890Smrg	utf8_mbtowc, utf8_wctomb
2211ab64890Smrg    },
2221ab64890Smrg
2231ab64890Smrg    { "ISO8859-1", NULLQUARK,
2241ab64890Smrg	iso8859_1_mbtowc, iso8859_1_wctomb
2251ab64890Smrg    },
2261ab64890Smrg    { "ISO8859-2", NULLQUARK,
2271ab64890Smrg	iso8859_2_mbtowc, iso8859_2_wctomb
2281ab64890Smrg    },
2291ab64890Smrg    { "ISO8859-3", NULLQUARK,
2301ab64890Smrg	iso8859_3_mbtowc, iso8859_3_wctomb
2311ab64890Smrg    },
2321ab64890Smrg    { "ISO8859-4", NULLQUARK,
2331ab64890Smrg	iso8859_4_mbtowc, iso8859_4_wctomb
2341ab64890Smrg    },
2351ab64890Smrg    { "ISO8859-5", NULLQUARK,
2361ab64890Smrg	iso8859_5_mbtowc, iso8859_5_wctomb
2371ab64890Smrg    },
2381ab64890Smrg    { "ISO8859-6", NULLQUARK,
2391ab64890Smrg	iso8859_6_mbtowc, iso8859_6_wctomb
2401ab64890Smrg    },
2411ab64890Smrg    { "ISO8859-7", NULLQUARK,
2421ab64890Smrg	iso8859_7_mbtowc, iso8859_7_wctomb
2431ab64890Smrg    },
2441ab64890Smrg    { "ISO8859-8", NULLQUARK,
2451ab64890Smrg	iso8859_8_mbtowc, iso8859_8_wctomb
2461ab64890Smrg    },
2471ab64890Smrg    { "ISO8859-9", NULLQUARK,
2481ab64890Smrg	iso8859_9_mbtowc, iso8859_9_wctomb
2491ab64890Smrg    },
2501ab64890Smrg    { "ISO8859-10", NULLQUARK,
2511ab64890Smrg	iso8859_10_mbtowc, iso8859_10_wctomb
2521ab64890Smrg    },
2531ab64890Smrg    { "ISO8859-11", NULLQUARK,
2541ab64890Smrg	iso8859_11_mbtowc, iso8859_11_wctomb
2551ab64890Smrg    },
2561ab64890Smrg    { "ISO8859-13", NULLQUARK,
2571ab64890Smrg	iso8859_13_mbtowc, iso8859_13_wctomb
2581ab64890Smrg    },
2591ab64890Smrg    { "ISO8859-14", NULLQUARK,
2601ab64890Smrg	iso8859_14_mbtowc, iso8859_14_wctomb
2611ab64890Smrg    },
2621ab64890Smrg    { "ISO8859-15", NULLQUARK,
2631ab64890Smrg	iso8859_15_mbtowc, iso8859_15_wctomb
2641ab64890Smrg    },
2651ab64890Smrg    { "ISO8859-16", NULLQUARK,
2661ab64890Smrg	iso8859_16_mbtowc, iso8859_16_wctomb
2671ab64890Smrg    },
2681ab64890Smrg    { "JISX0201.1976-0", NULLQUARK,
2691ab64890Smrg	jisx0201_mbtowc, jisx0201_wctomb
2701ab64890Smrg    },
2711ab64890Smrg    { "TIS620-0", NULLQUARK,
2721ab64890Smrg	tis620_mbtowc, tis620_wctomb
2731ab64890Smrg    },
2741ab64890Smrg    { "GB2312.1980-0", NULLQUARK,
2751ab64890Smrg	gb2312_mbtowc, gb2312_wctomb
2761ab64890Smrg    },
2771ab64890Smrg    { "JISX0208.1983-0", NULLQUARK,
2781ab64890Smrg	jisx0208_mbtowc, jisx0208_wctomb
2791ab64890Smrg    },
2801ab64890Smrg    { "JISX0208.1990-0", NULLQUARK,
2811ab64890Smrg	jisx0208_mbtowc, jisx0208_wctomb
2821ab64890Smrg    },
2831ab64890Smrg    { "JISX0212.1990-0", NULLQUARK,
2841ab64890Smrg	jisx0212_mbtowc, jisx0212_wctomb
2851ab64890Smrg    },
2861ab64890Smrg    { "KSC5601.1987-0", NULLQUARK,
2871ab64890Smrg	ksc5601_mbtowc, ksc5601_wctomb
2881ab64890Smrg    },
2891ab64890Smrg    { "KOI8-R", NULLQUARK,
2901ab64890Smrg	koi8_r_mbtowc, koi8_r_wctomb
2911ab64890Smrg    },
2921ab64890Smrg    { "KOI8-U", NULLQUARK,
2931ab64890Smrg	koi8_u_mbtowc, koi8_u_wctomb
2941ab64890Smrg    },
2951ab64890Smrg    { "KOI8-C", NULLQUARK,
2961ab64890Smrg	koi8_c_mbtowc, koi8_c_wctomb
2971ab64890Smrg    },
2981ab64890Smrg    { "TATAR-CYR", NULLQUARK,
2991ab64890Smrg	tatar_cyr_mbtowc, tatar_cyr_wctomb
3001ab64890Smrg    },
3011ab64890Smrg    { "ARMSCII-8", NULLQUARK,
3021ab64890Smrg	armscii_8_mbtowc, armscii_8_wctomb
3031ab64890Smrg    },
3041ab64890Smrg    { "IBM-CP1133", NULLQUARK,
3051ab64890Smrg	cp1133_mbtowc, cp1133_wctomb
3061ab64890Smrg    },
3071ab64890Smrg    { "MULELAO-1", NULLQUARK,
3081ab64890Smrg	mulelao_mbtowc, mulelao_wctomb
3091ab64890Smrg    },
3101ab64890Smrg    { "VISCII1.1-1", NULLQUARK,
3111ab64890Smrg	viscii_mbtowc, viscii_wctomb
3121ab64890Smrg    },
3131ab64890Smrg    { "TCVN-5712", NULLQUARK,
3141ab64890Smrg	tcvn_mbtowc, tcvn_wctomb
3151ab64890Smrg    },
3161ab64890Smrg    { "GEORGIAN-ACADEMY", NULLQUARK,
3171ab64890Smrg	georgian_academy_mbtowc, georgian_academy_wctomb
3181ab64890Smrg    },
3191ab64890Smrg    { "GEORGIAN-PS", NULLQUARK,
3201ab64890Smrg	georgian_ps_mbtowc, georgian_ps_wctomb
3211ab64890Smrg    },
3221ab64890Smrg    { "ISO8859-9E", NULLQUARK,
3231ab64890Smrg	iso8859_9e_mbtowc, iso8859_9e_wctomb
3241ab64890Smrg    },
3251ab64890Smrg    { "MICROSOFT-CP1251", NULLQUARK,
3261ab64890Smrg	cp1251_mbtowc, cp1251_wctomb
3271ab64890Smrg    },
3281ab64890Smrg    { "MICROSOFT-CP1255", NULLQUARK,
3291ab64890Smrg	cp1255_mbtowc, cp1255_wctomb
3301ab64890Smrg    },
3311ab64890Smrg    { "MICROSOFT-CP1256", NULLQUARK,
3321ab64890Smrg	cp1256_mbtowc, cp1256_wctomb
3331ab64890Smrg    },
3341ab64890Smrg    { "BIG5-0", NULLQUARK,
3351ab64890Smrg    big5_mbtowc, big5_wctomb
3361ab64890Smrg	},
3371ab64890Smrg    { "BIG5-E0", NULLQUARK,
3381ab64890Smrg	big5_0_mbtowc, big5_0_wctomb
3391ab64890Smrg    },
3401ab64890Smrg    { "BIG5-E1", NULLQUARK,
3411ab64890Smrg	big5_1_mbtowc, big5_1_wctomb
3421ab64890Smrg    },
3431ab64890Smrg
3441ab64890Smrg    /* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
3451ab64890Smrg       (for lookup speed), once at the end (as a fallback).  */
3461ab64890Smrg    { "ISO10646-1", NULLQUARK,
3471ab64890Smrg	utf8_mbtowc, utf8_wctomb
3481ab64890Smrg    },
3491ab64890Smrg
3501ab64890Smrg    /* Encoding ISO10646-1 for fonts means UCS2-like encoding
3511ab64890Smrg       so for conversion to FontCharSet we need this record */
3521ab64890Smrg    { "ISO10646-1", NULLQUARK,
3531ab64890Smrg	ucs2be_mbtowc, ucs2be_wctomb
3541ab64890Smrg    }
3551ab64890Smrg};
3561ab64890Smrg
3571ab64890Smrg#define charsets_table_size (sizeof(all_charsets)/sizeof(all_charsets[0]))
3581ab64890Smrg#define all_charsets_count  (charsets_table_size - 1)
3591ab64890Smrg#define ucs2_conv_index     (charsets_table_size - 1)
3601ab64890Smrg
3611ab64890Smrgstatic void
3621ab64890Smrginit_all_charsets (void)
3631ab64890Smrg{
3641ab64890Smrg    Utf8Conv convptr;
3651ab64890Smrg    int i;
3661ab64890Smrg
3671ab64890Smrg    for (convptr = all_charsets, i = charsets_table_size; i > 0; convptr++, i--)
3681ab64890Smrg	convptr->xrm_name = XrmStringToQuark(convptr->name);
3691ab64890Smrg}
3701ab64890Smrg
3711ab64890Smrg#define lazy_init_all_charsets()					\
3721ab64890Smrg    do {								\
3731ab64890Smrg	if (all_charsets[0].xrm_name == NULLQUARK)			\
3741ab64890Smrg	    init_all_charsets();					\
3751ab64890Smrg    } while (0)
3761ab64890Smrg
3771ab64890Smrg/* from XlcNCharSet to XlcNUtf8String */
3781ab64890Smrg
3791ab64890Smrgstatic int
3801ab64890Smrgcstoutf8(
3811ab64890Smrg    XlcConv conv,
3821ab64890Smrg    XPointer *from,
3831ab64890Smrg    int *from_left,
3841ab64890Smrg    XPointer *to,
3851ab64890Smrg    int *to_left,
3861ab64890Smrg    XPointer *args,
3871ab64890Smrg    int num_args)
3881ab64890Smrg{
3891ab64890Smrg    XlcCharSet charset;
3901ab64890Smrg    const char *name;
3911ab64890Smrg    Utf8Conv convptr;
3921ab64890Smrg    int i;
3931ab64890Smrg    unsigned char const *src;
3941ab64890Smrg    unsigned char const *srcend;
3951ab64890Smrg    unsigned char *dst;
3961ab64890Smrg    unsigned char *dstend;
3971ab64890Smrg    int unconv_num;
3981ab64890Smrg
3991ab64890Smrg    if (from == NULL || *from == NULL)
4001ab64890Smrg	return 0;
4011ab64890Smrg
4021ab64890Smrg    if (num_args < 1)
4031ab64890Smrg	return -1;
4041ab64890Smrg
4051ab64890Smrg    charset = (XlcCharSet) args[0];
4061ab64890Smrg    name = charset->encoding_name;
4071ab64890Smrg    /* not charset->name because the latter has a ":GL"/":GR" suffix */
4081ab64890Smrg
4091ab64890Smrg    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
4101ab64890Smrg	if (!strcmp(convptr->name, name))
4111ab64890Smrg	    break;
4121ab64890Smrg    if (i == 0)
4131ab64890Smrg	return -1;
4141ab64890Smrg
4151ab64890Smrg    src = (unsigned char const *) *from;
4161ab64890Smrg    srcend = src + *from_left;
4171ab64890Smrg    dst = (unsigned char *) *to;
4181ab64890Smrg    dstend = dst + *to_left;
4191ab64890Smrg    unconv_num = 0;
4201ab64890Smrg
4211ab64890Smrg    while (src < srcend) {
4221ab64890Smrg	ucs4_t wc;
4231ab64890Smrg	int consumed;
4241ab64890Smrg	int count;
4251ab64890Smrg
4261ab64890Smrg	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
4271ab64890Smrg	if (consumed == RET_ILSEQ)
4281ab64890Smrg	    return -1;
4291ab64890Smrg	if (consumed == RET_TOOFEW(0))
4301ab64890Smrg	    break;
4311ab64890Smrg
4321ab64890Smrg	count = utf8_wctomb(NULL, dst, wc, dstend-dst);
4331ab64890Smrg	if (count == RET_TOOSMALL)
4341ab64890Smrg	    break;
4351ab64890Smrg	if (count == RET_ILSEQ) {
4361ab64890Smrg	    count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
4371ab64890Smrg	    if (count == RET_TOOSMALL)
4381ab64890Smrg		break;
4391ab64890Smrg	    unconv_num++;
4401ab64890Smrg	}
4411ab64890Smrg	src += consumed;
4421ab64890Smrg	dst += count;
4431ab64890Smrg    }
4441ab64890Smrg
4451ab64890Smrg    *from = (XPointer) src;
4461ab64890Smrg    *from_left = srcend - src;
4471ab64890Smrg    *to = (XPointer) dst;
4481ab64890Smrg    *to_left = dstend - dst;
4491ab64890Smrg
4501ab64890Smrg    return unconv_num;
4511ab64890Smrg}
4521ab64890Smrg
4531ab64890Smrgstatic XlcConvMethodsRec methods_cstoutf8 = {
4541ab64890Smrg    close_converter,
4551ab64890Smrg    cstoutf8,
4561ab64890Smrg    NULL
4571ab64890Smrg};
4581ab64890Smrg
4591ab64890Smrgstatic XlcConv
4601ab64890Smrgopen_cstoutf8(
4611ab64890Smrg    XLCd from_lcd,
4621ab64890Smrg    const char *from_type,
4631ab64890Smrg    XLCd to_lcd,
4641ab64890Smrg    const char *to_type)
4651ab64890Smrg{
4661ab64890Smrg    lazy_init_all_charsets();
4671ab64890Smrg    return create_conv(from_lcd, &methods_cstoutf8);
4681ab64890Smrg}
4691ab64890Smrg
4701ab64890Smrg/* from XlcNUtf8String to XlcNCharSet */
4711ab64890Smrg
4721ab64890Smrgstatic XlcConv
4731ab64890Smrgcreate_tocs_conv(
4741ab64890Smrg    XLCd lcd,
4751ab64890Smrg    XlcConvMethods methods)
4761ab64890Smrg{
4771ab64890Smrg    XlcConv conv;
4781ab64890Smrg    CodeSet *codeset_list;
4791ab64890Smrg    int codeset_num;
4801ab64890Smrg    int charset_num;
4811ab64890Smrg    int i, j, k;
4821ab64890Smrg    Utf8Conv *preferred;
4831ab64890Smrg
4841ab64890Smrg    lazy_init_all_charsets();
4851ab64890Smrg
4861ab64890Smrg    codeset_list = XLC_GENERIC(lcd, codeset_list);
4871ab64890Smrg    codeset_num = XLC_GENERIC(lcd, codeset_num);
4881ab64890Smrg
4891ab64890Smrg    charset_num = 0;
4901ab64890Smrg    for (i = 0; i < codeset_num; i++)
4911ab64890Smrg	charset_num += codeset_list[i]->num_charsets;
4921ab64890Smrg    if (charset_num > all_charsets_count-1)
4931ab64890Smrg	charset_num = all_charsets_count-1;
4941ab64890Smrg
4951ab64890Smrg    conv = (XlcConv) Xmalloc(sizeof(XlcConvRec)
4961ab64890Smrg			     + (charset_num + 1) * sizeof(Utf8Conv));
4971ab64890Smrg    if (conv == (XlcConv) NULL)
4981ab64890Smrg	return (XlcConv) NULL;
4991ab64890Smrg    preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
5001ab64890Smrg
5011ab64890Smrg    /* Loop through all codesets mentioned in the locale. */
5021ab64890Smrg    charset_num = 0;
5031ab64890Smrg    for (i = 0; i < codeset_num; i++) {
5041ab64890Smrg	XlcCharSet *charsets = codeset_list[i]->charset_list;
5051ab64890Smrg	int num_charsets = codeset_list[i]->num_charsets;
5061ab64890Smrg	for (j = 0; j < num_charsets; j++) {
5071ab64890Smrg	    const char *name = charsets[j]->encoding_name;
5081ab64890Smrg	    /* If it wasn't already encountered... */
5091ab64890Smrg	    for (k = charset_num-1; k >= 0; k--)
5101ab64890Smrg		if (!strcmp(preferred[k]->name, name))
5111ab64890Smrg		    break;
5121ab64890Smrg	    if (k < 0) {
5131ab64890Smrg		/* Look it up in all_charsets[]. */
5141ab64890Smrg		for (k = 0; k < all_charsets_count-1; k++)
5151ab64890Smrg		    if (!strcmp(all_charsets[k].name, name)) {
5161ab64890Smrg			/* Add it to the preferred set. */
5171ab64890Smrg			preferred[charset_num++] = &all_charsets[k];
5181ab64890Smrg			break;
5191ab64890Smrg		    }
5201ab64890Smrg	    }
5211ab64890Smrg	}
5221ab64890Smrg    }
5231ab64890Smrg    preferred[charset_num] = (Utf8Conv) NULL;
5241ab64890Smrg
5251ab64890Smrg    conv->methods = methods;
5261ab64890Smrg    conv->state = (XPointer) preferred;
5271ab64890Smrg
5281ab64890Smrg    return conv;
5291ab64890Smrg}
5301ab64890Smrg
5311ab64890Smrgstatic void
5321ab64890Smrgclose_tocs_converter(
5331ab64890Smrg    XlcConv conv)
5341ab64890Smrg{
5351ab64890Smrg    /* conv->state is allocated together with conv, free both at once.  */
5361ab64890Smrg    Xfree((char *) conv);
5371ab64890Smrg}
5381ab64890Smrg
5391ab64890Smrg/*
5401ab64890Smrg * Converts a Unicode character to an appropriate character set. The NULL
5411ab64890Smrg * terminated array of preferred character sets is passed as first argument.
5421ab64890Smrg * If successful, *charsetp is set to the character set that was used, and
5431ab64890Smrg * *sidep is set to the character set side (XlcGL or XlcGR).
5441ab64890Smrg */
5451ab64890Smrgstatic int
5461ab64890Smrgcharset_wctocs(
5471ab64890Smrg    Utf8Conv *preferred,
5481ab64890Smrg    Utf8Conv *charsetp,
5491ab64890Smrg    XlcSide *sidep,
5501ab64890Smrg    XlcConv conv,
5511ab64890Smrg    unsigned char *r,
5521ab64890Smrg    ucs4_t wc,
5531ab64890Smrg    int n)
5541ab64890Smrg{
5551ab64890Smrg    int count;
5561ab64890Smrg    Utf8Conv convptr;
5571ab64890Smrg    int i;
5581ab64890Smrg
5591ab64890Smrg    for (; *preferred != (Utf8Conv) NULL; preferred++) {
5601ab64890Smrg	convptr = *preferred;
5611ab64890Smrg	count = convptr->wctocs(conv, r, wc, n);
5621ab64890Smrg	if (count == RET_TOOSMALL)
5631ab64890Smrg	    return RET_TOOSMALL;
5641ab64890Smrg	if (count != RET_ILSEQ) {
5651ab64890Smrg	    *charsetp = convptr;
5661ab64890Smrg	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
5671ab64890Smrg	    return count;
5681ab64890Smrg	}
5691ab64890Smrg    }
5701ab64890Smrg    for (convptr = all_charsets+1, i = all_charsets_count-1; i > 0; convptr++, i--) {
5711ab64890Smrg	count = convptr->wctocs(conv, r, wc, n);
5721ab64890Smrg	if (count == RET_TOOSMALL)
5731ab64890Smrg	    return RET_TOOSMALL;
5741ab64890Smrg	if (count != RET_ILSEQ) {
5751ab64890Smrg	    *charsetp = convptr;
5761ab64890Smrg	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
5771ab64890Smrg	    return count;
5781ab64890Smrg	}
5791ab64890Smrg    }
5801ab64890Smrg    return RET_ILSEQ;
5811ab64890Smrg}
5821ab64890Smrg
5831ab64890Smrgstatic int
5841ab64890Smrgutf8tocs(
5851ab64890Smrg    XlcConv conv,
5861ab64890Smrg    XPointer *from,
5871ab64890Smrg    int *from_left,
5881ab64890Smrg    XPointer *to,
5891ab64890Smrg    int *to_left,
5901ab64890Smrg    XPointer *args,
5911ab64890Smrg    int num_args)
5921ab64890Smrg{
5931ab64890Smrg    Utf8Conv *preferred_charsets;
5941ab64890Smrg    XlcCharSet last_charset = NULL;
5951ab64890Smrg    unsigned char const *src;
5961ab64890Smrg    unsigned char const *srcend;
5971ab64890Smrg    unsigned char *dst;
5981ab64890Smrg    unsigned char *dstend;
5991ab64890Smrg    int unconv_num;
6001ab64890Smrg
6011ab64890Smrg    if (from == NULL || *from == NULL)
6021ab64890Smrg	return 0;
6031ab64890Smrg
6041ab64890Smrg    preferred_charsets = (Utf8Conv *) conv->state;
6051ab64890Smrg    src = (unsigned char const *) *from;
6061ab64890Smrg    srcend = src + *from_left;
6071ab64890Smrg    dst = (unsigned char *) *to;
6081ab64890Smrg    dstend = dst + *to_left;
6091ab64890Smrg    unconv_num = 0;
6101ab64890Smrg
6111ab64890Smrg    while (src < srcend && dst < dstend) {
6121ab64890Smrg	Utf8Conv chosen_charset = NULL;
6131ab64890Smrg	XlcSide chosen_side = XlcNONE;
6141ab64890Smrg	ucs4_t wc;
6151ab64890Smrg	int consumed;
6161ab64890Smrg	int count;
6171ab64890Smrg
6181ab64890Smrg	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
6191ab64890Smrg	if (consumed == RET_TOOFEW(0))
6201ab64890Smrg	    break;
6211ab64890Smrg	if (consumed == RET_ILSEQ) {
6221ab64890Smrg	    src++;
6231ab64890Smrg	    unconv_num++;
6241ab64890Smrg	    continue;
6251ab64890Smrg	}
6261ab64890Smrg
6271ab64890Smrg	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
6281ab64890Smrg	if (count == RET_TOOSMALL)
6291ab64890Smrg	    break;
6301ab64890Smrg	if (count == RET_ILSEQ) {
6311ab64890Smrg	    src += consumed;
6321ab64890Smrg	    unconv_num++;
6331ab64890Smrg	    continue;
6341ab64890Smrg	}
6351ab64890Smrg
6361ab64890Smrg	if (last_charset == NULL) {
6371ab64890Smrg	    last_charset =
6381ab64890Smrg	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
6391ab64890Smrg	    if (last_charset == NULL) {
6401ab64890Smrg		src += consumed;
6411ab64890Smrg		unconv_num++;
6421ab64890Smrg		continue;
6431ab64890Smrg	    }
6441ab64890Smrg	} else {
6451ab64890Smrg	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
6461ab64890Smrg	          && (last_charset->side == XlcGLGR
6471ab64890Smrg	              || last_charset->side == chosen_side)))
6481ab64890Smrg		break;
6491ab64890Smrg	}
6501ab64890Smrg	src += consumed;
6511ab64890Smrg	dst += count;
6521ab64890Smrg    }
6531ab64890Smrg
6541ab64890Smrg    if (last_charset == NULL)
6551ab64890Smrg	return -1;
6561ab64890Smrg
6571ab64890Smrg    *from = (XPointer) src;
6581ab64890Smrg    *from_left = srcend - src;
6591ab64890Smrg    *to = (XPointer) dst;
6601ab64890Smrg    *to_left = dstend - dst;
6611ab64890Smrg
6621ab64890Smrg    if (num_args >= 1)
6631ab64890Smrg	*((XlcCharSet *)args[0]) = last_charset;
6641ab64890Smrg
6651ab64890Smrg    return unconv_num;
6661ab64890Smrg}
6671ab64890Smrg
6681ab64890Smrgstatic XlcConvMethodsRec methods_utf8tocs = {
6691ab64890Smrg    close_tocs_converter,
6701ab64890Smrg    utf8tocs,
6711ab64890Smrg    NULL
6721ab64890Smrg};
6731ab64890Smrg
6741ab64890Smrgstatic XlcConv
6751ab64890Smrgopen_utf8tocs(
6761ab64890Smrg    XLCd from_lcd,
6771ab64890Smrg    const char *from_type,
6781ab64890Smrg    XLCd to_lcd,
6791ab64890Smrg    const char *to_type)
6801ab64890Smrg{
6811ab64890Smrg    return create_tocs_conv(from_lcd, &methods_utf8tocs);
6821ab64890Smrg}
6831ab64890Smrg
6841ab64890Smrg/* from XlcNUtf8String to XlcNChar */
6851ab64890Smrg
6861ab64890Smrgstatic int
6871ab64890Smrgutf8tocs1(
6881ab64890Smrg    XlcConv conv,
6891ab64890Smrg    XPointer *from,
6901ab64890Smrg    int *from_left,
6911ab64890Smrg    XPointer *to,
6921ab64890Smrg    int *to_left,
6931ab64890Smrg    XPointer *args,
6941ab64890Smrg    int num_args)
6951ab64890Smrg{
6961ab64890Smrg    Utf8Conv *preferred_charsets;
6971ab64890Smrg    XlcCharSet last_charset = NULL;
6981ab64890Smrg    unsigned char const *src;
6991ab64890Smrg    unsigned char const *srcend;
7001ab64890Smrg    unsigned char *dst;
7011ab64890Smrg    unsigned char *dstend;
7021ab64890Smrg    int unconv_num;
7031ab64890Smrg
7041ab64890Smrg    if (from == NULL || *from == NULL)
7051ab64890Smrg	return 0;
7061ab64890Smrg
7071ab64890Smrg    preferred_charsets = (Utf8Conv *) conv->state;
7081ab64890Smrg    src = (unsigned char const *) *from;
7091ab64890Smrg    srcend = src + *from_left;
7101ab64890Smrg    dst = (unsigned char *) *to;
7111ab64890Smrg    dstend = dst + *to_left;
7121ab64890Smrg    unconv_num = 0;
7131ab64890Smrg
7141ab64890Smrg    while (src < srcend && dst < dstend) {
7151ab64890Smrg	Utf8Conv chosen_charset = NULL;
7161ab64890Smrg	XlcSide chosen_side = XlcNONE;
7171ab64890Smrg	ucs4_t wc;
7181ab64890Smrg	int consumed;
7191ab64890Smrg	int count;
7201ab64890Smrg
7211ab64890Smrg	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
7221ab64890Smrg	if (consumed == RET_TOOFEW(0))
7231ab64890Smrg	    break;
7241ab64890Smrg	if (consumed == RET_ILSEQ) {
7251ab64890Smrg	    src++;
7261ab64890Smrg	    unconv_num++;
7271ab64890Smrg	    continue;
7281ab64890Smrg	}
7291ab64890Smrg
7301ab64890Smrg	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
7311ab64890Smrg	if (count == RET_TOOSMALL)
7321ab64890Smrg	    break;
7331ab64890Smrg	if (count == RET_ILSEQ) {
7341ab64890Smrg	    src += consumed;
7351ab64890Smrg	    unconv_num++;
7361ab64890Smrg	    continue;
7371ab64890Smrg	}
7381ab64890Smrg
7391ab64890Smrg	if (last_charset == NULL) {
7401ab64890Smrg	    last_charset =
7411ab64890Smrg	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
7421ab64890Smrg	    if (last_charset == NULL) {
7431ab64890Smrg		src += consumed;
7441ab64890Smrg		unconv_num++;
7451ab64890Smrg		continue;
7461ab64890Smrg	    }
7471ab64890Smrg	} else {
7481ab64890Smrg	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
7491ab64890Smrg	          && (last_charset->side == XlcGLGR
7501ab64890Smrg	              || last_charset->side == chosen_side)))
7511ab64890Smrg		break;
7521ab64890Smrg	}
7531ab64890Smrg	src += consumed;
7541ab64890Smrg	dst += count;
7551ab64890Smrg	break;
7561ab64890Smrg    }
7571ab64890Smrg
7581ab64890Smrg    if (last_charset == NULL)
7591ab64890Smrg	return -1;
7601ab64890Smrg
7611ab64890Smrg    *from = (XPointer) src;
7621ab64890Smrg    *from_left = srcend - src;
7631ab64890Smrg    *to = (XPointer) dst;
7641ab64890Smrg    *to_left = dstend - dst;
7651ab64890Smrg
7661ab64890Smrg    if (num_args >= 1)
7671ab64890Smrg	*((XlcCharSet *)args[0]) = last_charset;
7681ab64890Smrg
7691ab64890Smrg    return unconv_num;
7701ab64890Smrg}
7711ab64890Smrg
7721ab64890Smrgstatic XlcConvMethodsRec methods_utf8tocs1 = {
7731ab64890Smrg    close_tocs_converter,
7741ab64890Smrg    utf8tocs1,
7751ab64890Smrg    NULL
7761ab64890Smrg};
7771ab64890Smrg
7781ab64890Smrgstatic XlcConv
7791ab64890Smrgopen_utf8tocs1(
7801ab64890Smrg    XLCd from_lcd,
7811ab64890Smrg    const char *from_type,
7821ab64890Smrg    XLCd to_lcd,
7831ab64890Smrg    const char *to_type)
7841ab64890Smrg{
7851ab64890Smrg    return create_tocs_conv(from_lcd, &methods_utf8tocs1);
7861ab64890Smrg}
7871ab64890Smrg
7881ab64890Smrg/* from XlcNUtf8String to XlcNString */
7891ab64890Smrg
7901ab64890Smrgstatic int
7911ab64890Smrgutf8tostr(
7921ab64890Smrg    XlcConv conv,
7931ab64890Smrg    XPointer *from,
7941ab64890Smrg    int *from_left,
7951ab64890Smrg    XPointer *to,
7961ab64890Smrg    int *to_left,
7971ab64890Smrg    XPointer *args,
7981ab64890Smrg    int num_args)
7991ab64890Smrg{
8001ab64890Smrg    unsigned char const *src;
8011ab64890Smrg    unsigned char const *srcend;
8021ab64890Smrg    unsigned char *dst;
8031ab64890Smrg    unsigned char *dstend;
8041ab64890Smrg    int unconv_num;
8051ab64890Smrg
8061ab64890Smrg    if (from == NULL || *from == NULL)
8071ab64890Smrg	return 0;
8081ab64890Smrg
8091ab64890Smrg    src = (unsigned char const *) *from;
8101ab64890Smrg    srcend = src + *from_left;
8111ab64890Smrg    dst = (unsigned char *) *to;
8121ab64890Smrg    dstend = dst + *to_left;
8131ab64890Smrg    unconv_num = 0;
8141ab64890Smrg
8151ab64890Smrg    while (src < srcend) {
8161ab64890Smrg	unsigned char c;
8171ab64890Smrg	ucs4_t wc;
8181ab64890Smrg	int consumed;
8191ab64890Smrg
8201ab64890Smrg	consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
8211ab64890Smrg	if (consumed == RET_TOOFEW(0))
8221ab64890Smrg	    break;
8231ab64890Smrg	if (dst == dstend)
8241ab64890Smrg	    break;
8251ab64890Smrg	if (consumed == RET_ILSEQ) {
8261ab64890Smrg	    consumed = 1;
8271ab64890Smrg	    c = BAD_CHAR;
8281ab64890Smrg	    unconv_num++;
8291ab64890Smrg	} else {
8301ab64890Smrg	    if ((wc & ~(ucs4_t)0xff) != 0) {
8311ab64890Smrg		c = BAD_CHAR;
8321ab64890Smrg		unconv_num++;
8331ab64890Smrg	    } else
8341ab64890Smrg		c = (unsigned char) wc;
8351ab64890Smrg	}
8361ab64890Smrg	*dst++ = c;
8371ab64890Smrg	src += consumed;
8381ab64890Smrg    }
8391ab64890Smrg
8401ab64890Smrg    *from = (XPointer) src;
8411ab64890Smrg    *from_left = srcend - src;
8421ab64890Smrg    *to = (XPointer) dst;
8431ab64890Smrg    *to_left = dstend - dst;
8441ab64890Smrg
8451ab64890Smrg    return unconv_num;
8461ab64890Smrg}
8471ab64890Smrg
8481ab64890Smrgstatic XlcConvMethodsRec methods_utf8tostr = {
8491ab64890Smrg    close_converter,
8501ab64890Smrg    utf8tostr,
8511ab64890Smrg    NULL
8521ab64890Smrg};
8531ab64890Smrg
8541ab64890Smrgstatic XlcConv
8551ab64890Smrgopen_utf8tostr(
8561ab64890Smrg    XLCd from_lcd,
8571ab64890Smrg    const char *from_type,
8581ab64890Smrg    XLCd to_lcd,
8591ab64890Smrg    const char *to_type)
8601ab64890Smrg{
8611ab64890Smrg    return create_conv(from_lcd, &methods_utf8tostr);
8621ab64890Smrg}
8631ab64890Smrg
8641ab64890Smrg/* from XlcNString to XlcNUtf8String */
8651ab64890Smrg
8661ab64890Smrgstatic int
8671ab64890Smrgstrtoutf8(
8681ab64890Smrg    XlcConv conv,
8691ab64890Smrg    XPointer *from,
8701ab64890Smrg    int *from_left,
8711ab64890Smrg    XPointer *to,
8721ab64890Smrg    int *to_left,
8731ab64890Smrg    XPointer *args,
8741ab64890Smrg    int num_args)
8751ab64890Smrg{
8761ab64890Smrg    unsigned char const *src;
8771ab64890Smrg    unsigned char const *srcend;
8781ab64890Smrg    unsigned char *dst;
8791ab64890Smrg    unsigned char *dstend;
8801ab64890Smrg
8811ab64890Smrg    if (from == NULL || *from == NULL)
8821ab64890Smrg	return 0;
8831ab64890Smrg
8841ab64890Smrg    src = (unsigned char const *) *from;
8851ab64890Smrg    srcend = src + *from_left;
8861ab64890Smrg    dst = (unsigned char *) *to;
8871ab64890Smrg    dstend = dst + *to_left;
8881ab64890Smrg
8891ab64890Smrg    while (src < srcend) {
8901ab64890Smrg	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
8911ab64890Smrg	if (count == RET_TOOSMALL)
8921ab64890Smrg	    break;
8931ab64890Smrg	dst += count;
8941ab64890Smrg	src++;
8951ab64890Smrg    }
8961ab64890Smrg
8971ab64890Smrg    *from = (XPointer) src;
8981ab64890Smrg    *from_left = srcend - src;
8991ab64890Smrg    *to = (XPointer) dst;
9001ab64890Smrg    *to_left = dstend - dst;
9011ab64890Smrg
9021ab64890Smrg    return 0;
9031ab64890Smrg}
9041ab64890Smrg
9051ab64890Smrgstatic XlcConvMethodsRec methods_strtoutf8 = {
9061ab64890Smrg    close_converter,
9071ab64890Smrg    strtoutf8,
9081ab64890Smrg    NULL
9091ab64890Smrg};
9101ab64890Smrg
9111ab64890Smrgstatic XlcConv
9121ab64890Smrgopen_strtoutf8(
9131ab64890Smrg    XLCd from_lcd,
9141ab64890Smrg    const char *from_type,
9151ab64890Smrg    XLCd to_lcd,
9161ab64890Smrg    const char *to_type)
9171ab64890Smrg{
9181ab64890Smrg    return create_conv(from_lcd, &methods_strtoutf8);
9191ab64890Smrg}
9201ab64890Smrg
9211ab64890Smrg/* Support for the input methods. */
9221ab64890Smrg
9231ab64890SmrgXPointer
9241ab64890Smrg_Utf8GetConvByName(
9251ab64890Smrg    const char *name)
9261ab64890Smrg{
9271ab64890Smrg    XrmQuark xrm_name;
9281ab64890Smrg    Utf8Conv convptr;
9291ab64890Smrg    int i;
9301ab64890Smrg
9311ab64890Smrg    if (name == NULL)
9321ab64890Smrg        return (XPointer) NULL;
9331ab64890Smrg
9341ab64890Smrg    lazy_init_all_charsets();
9351ab64890Smrg    xrm_name = XrmStringToQuark(name);
9361ab64890Smrg
9371ab64890Smrg    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
9381ab64890Smrg	if (convptr->xrm_name == xrm_name)
9391ab64890Smrg	    return (XPointer) convptr->wctocs;
9401ab64890Smrg    return (XPointer) NULL;
9411ab64890Smrg}
9421ab64890Smrg
9431ab64890Smrg/* from XlcNUcsChar to XlcNChar, needed for input methods */
9441ab64890Smrg
9451ab64890Smrgstatic XlcConv
9461ab64890Smrgcreate_ucstocs_conv(
9471ab64890Smrg    XLCd lcd,
9481ab64890Smrg    XlcConvMethods methods)
9491ab64890Smrg{
9501ab64890Smrg
9511ab64890Smrg    if (XLC_PUBLIC_PART(lcd)->codeset
9521ab64890Smrg	&& _XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "UTF-8") == 0) {
9531ab64890Smrg	XlcConv conv;
9541ab64890Smrg	Utf8Conv *preferred;
9551ab64890Smrg
9561ab64890Smrg	lazy_init_all_charsets();
9571ab64890Smrg
9581ab64890Smrg	conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + 2 * sizeof(Utf8Conv));
9591ab64890Smrg	if (conv == (XlcConv) NULL)
9601ab64890Smrg	    return (XlcConv) NULL;
9611ab64890Smrg	preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
9621ab64890Smrg
9631ab64890Smrg	preferred[0] = &all_charsets[0]; /* ISO10646 */
9641ab64890Smrg	preferred[1] = (Utf8Conv) NULL;
9651ab64890Smrg
9661ab64890Smrg	conv->methods = methods;
9671ab64890Smrg	conv->state = (XPointer) preferred;
9681ab64890Smrg
9691ab64890Smrg	return conv;
9701ab64890Smrg    } else {
9711ab64890Smrg	return create_tocs_conv(lcd, methods);
9721ab64890Smrg    }
9731ab64890Smrg}
9741ab64890Smrg
9751ab64890Smrgstatic int
9761ab64890Smrgcharset_wctocs_exactly(
9771ab64890Smrg    Utf8Conv *preferred,
9781ab64890Smrg    Utf8Conv *charsetp,
9791ab64890Smrg    XlcSide *sidep,
9801ab64890Smrg    XlcConv conv,
9811ab64890Smrg    unsigned char *r,
9821ab64890Smrg    ucs4_t wc,
9831ab64890Smrg    int n)
9841ab64890Smrg{
9851ab64890Smrg    int count;
9861ab64890Smrg    Utf8Conv convptr;
9871ab64890Smrg
9881ab64890Smrg    for (; *preferred != (Utf8Conv) NULL; preferred++) {
9891ab64890Smrg	convptr = *preferred;
9901ab64890Smrg	count = convptr->wctocs(conv, r, wc, n);
9911ab64890Smrg	if (count == RET_TOOSMALL)
9921ab64890Smrg	    return RET_TOOSMALL;
9931ab64890Smrg	if (count != RET_ILSEQ) {
9941ab64890Smrg	    *charsetp = convptr;
9951ab64890Smrg	    *sidep = (*r < 0x80 ? XlcGL : XlcGR);
9961ab64890Smrg	    return count;
9971ab64890Smrg	}
9981ab64890Smrg    }
9991ab64890Smrg    return RET_ILSEQ;
10001ab64890Smrg}
10011ab64890Smrg
10021ab64890Smrgstatic int
10031ab64890Smrgucstocs1(
10041ab64890Smrg    XlcConv conv,
10051ab64890Smrg    XPointer *from,
10061ab64890Smrg    int *from_left,
10071ab64890Smrg    XPointer *to,
10081ab64890Smrg    int *to_left,
10091ab64890Smrg    XPointer *args,
10101ab64890Smrg    int num_args)
10111ab64890Smrg{
10121ab64890Smrg    ucs4_t const *src = (ucs4_t const *) *from;
10131ab64890Smrg    unsigned char *dst = (unsigned char *) *to;
10141ab64890Smrg    int unconv_num = 0;
10151ab64890Smrg    Utf8Conv *preferred_charsets = (Utf8Conv *) conv->state;
10161ab64890Smrg    Utf8Conv chosen_charset = NULL;
10171ab64890Smrg    XlcSide chosen_side = XlcNONE;
10181ab64890Smrg    XlcCharSet charset = NULL;
10191ab64890Smrg    int count;
10201ab64890Smrg
10211ab64890Smrg    if (from == NULL || *from == NULL)
10221ab64890Smrg	return 0;
10231ab64890Smrg
10241ab64890Smrg    count = charset_wctocs_exactly(preferred_charsets, &chosen_charset,
10251ab64890Smrg                                   &chosen_side, conv, dst, *src, *to_left);
10261ab64890Smrg    if (count < 1) {
10271ab64890Smrg        unconv_num++;
10281ab64890Smrg        count = 0;
10291ab64890Smrg    } else {
10301ab64890Smrg        charset = _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
10311ab64890Smrg    }
10321ab64890Smrg    if (charset == NULL)
10331ab64890Smrg	return -1;
10341ab64890Smrg
10351ab64890Smrg    *from = (XPointer) ++src;
10361ab64890Smrg    (*from_left)--;
10371ab64890Smrg    *to = (XPointer) dst;
10381ab64890Smrg    *to_left -= count;
10391ab64890Smrg
10401ab64890Smrg    if (num_args >= 1)
10411ab64890Smrg	*((XlcCharSet *)args[0]) = charset;
10421ab64890Smrg
10431ab64890Smrg    return unconv_num;
10441ab64890Smrg}
10451ab64890Smrg
10461ab64890Smrgstatic XlcConvMethodsRec methods_ucstocs1 = {
10471ab64890Smrg    close_tocs_converter,
10481ab64890Smrg    ucstocs1,
10491ab64890Smrg    NULL
10501ab64890Smrg};
10511ab64890Smrg
10521ab64890Smrgstatic XlcConv
10531ab64890Smrgopen_ucstocs1(
10541ab64890Smrg    XLCd from_lcd,
10551ab64890Smrg    const char *from_type,
10561ab64890Smrg    XLCd to_lcd,
10571ab64890Smrg    const char *to_type)
10581ab64890Smrg{
10591ab64890Smrg    return create_ucstocs_conv(from_lcd, &methods_ucstocs1);
10601ab64890Smrg}
10611ab64890Smrg
10621ab64890Smrg/* from XlcNUcsChar to XlcNUtf8String, needed for input methods */
10631ab64890Smrg
10641ab64890Smrgstatic int
10651ab64890Smrgucstoutf8(
10661ab64890Smrg    XlcConv conv,
10671ab64890Smrg    XPointer *from,
10681ab64890Smrg    int *from_left,
10691ab64890Smrg    XPointer *to,
10701ab64890Smrg    int *to_left,
10711ab64890Smrg    XPointer *args,
10721ab64890Smrg    int num_args)
10731ab64890Smrg{
10741ab64890Smrg    const ucs4_t *src;
10751ab64890Smrg    const ucs4_t *srcend;
10761ab64890Smrg    unsigned char *dst;
10771ab64890Smrg    unsigned char *dstend;
10781ab64890Smrg    int unconv_num;
10791ab64890Smrg
10801ab64890Smrg    if (from == NULL || *from == NULL)
10811ab64890Smrg	return 0;
10821ab64890Smrg
10831ab64890Smrg    src = (const ucs4_t *) *from;
10841ab64890Smrg    srcend = src + *from_left;
10851ab64890Smrg    dst = (unsigned char *) *to;
10861ab64890Smrg    dstend = dst + *to_left;
10871ab64890Smrg    unconv_num = 0;
10881ab64890Smrg
10891ab64890Smrg    while (src < srcend) {
10901ab64890Smrg	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
10911ab64890Smrg	if (count == RET_TOOSMALL)
10921ab64890Smrg	    break;
10931ab64890Smrg	if (count == RET_ILSEQ)
10941ab64890Smrg	    unconv_num++;
10951ab64890Smrg	src++;
10961ab64890Smrg	dst += count;
10971ab64890Smrg    }
10981ab64890Smrg
10991ab64890Smrg    *from = (XPointer) src;
11001ab64890Smrg    *from_left = srcend - src;
11011ab64890Smrg    *to = (XPointer) dst;
11021ab64890Smrg    *to_left = dstend - dst;
11031ab64890Smrg
11041ab64890Smrg    return unconv_num;
11051ab64890Smrg}
11061ab64890Smrg
11071ab64890Smrgstatic XlcConvMethodsRec methods_ucstoutf8 = {
11081ab64890Smrg    close_converter,
11091ab64890Smrg    ucstoutf8,
11101ab64890Smrg    NULL
11111ab64890Smrg};
11121ab64890Smrg
11131ab64890Smrgstatic XlcConv
11141ab64890Smrgopen_ucstoutf8(
11151ab64890Smrg    XLCd from_lcd,
11161ab64890Smrg    const char *from_type,
11171ab64890Smrg    XLCd to_lcd,
11181ab64890Smrg    const char *to_type)
11191ab64890Smrg{
11201ab64890Smrg    return create_conv(from_lcd, &methods_ucstoutf8);
11211ab64890Smrg}
11221ab64890Smrg
11231ab64890Smrg/* Registers UTF-8 converters for a non-UTF-8 locale. */
11241ab64890Smrgvoid
11251ab64890Smrg_XlcAddUtf8Converters(
11261ab64890Smrg    XLCd lcd)
11271ab64890Smrg{
11281ab64890Smrg    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNUtf8String, open_cstoutf8);
11291ab64890Smrg    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNCharSet, open_utf8tocs);
11301ab64890Smrg    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNChar, open_utf8tocs1);
11311ab64890Smrg    _XlcSetConverter(lcd, XlcNString, lcd, XlcNUtf8String, open_strtoutf8);
11321ab64890Smrg    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNString, open_utf8tostr);
11331ab64890Smrg    _XlcSetConverter(lcd, XlcNUcsChar,    lcd, XlcNChar, open_ucstocs1);
11341ab64890Smrg    _XlcSetConverter(lcd, XlcNUcsChar,    lcd, XlcNUtf8String, open_ucstoutf8);
11351ab64890Smrg}
11361ab64890Smrg
11371ab64890Smrg/***************************************************************************/
11381ab64890Smrg/* Part II: UTF-8 locale loader conversion files
11391ab64890Smrg *
11401ab64890Smrg * Here we can assume that "multi-byte" is UTF-8 and that `wchar_t' is Unicode.
11411ab64890Smrg */
11421ab64890Smrg
11431ab64890Smrg/* from XlcNMultiByte to XlcNWideChar */
11441ab64890Smrg
11451ab64890Smrgstatic int
11461ab64890Smrgutf8towcs(
11471ab64890Smrg    XlcConv conv,
11481ab64890Smrg    XPointer *from,
11491ab64890Smrg    int *from_left,
11501ab64890Smrg    XPointer *to,
11511ab64890Smrg    int *to_left,
11521ab64890Smrg    XPointer *args,
11531ab64890Smrg    int num_args)
11541ab64890Smrg{
11551ab64890Smrg    unsigned char const *src;
11561ab64890Smrg    unsigned char const *srcend;
11571ab64890Smrg    wchar_t *dst;
11581ab64890Smrg    wchar_t *dstend;
11591ab64890Smrg    int unconv_num;
11601ab64890Smrg
11611ab64890Smrg    if (from == NULL || *from == NULL)
11621ab64890Smrg	return 0;
11631ab64890Smrg
11641ab64890Smrg    src = (unsigned char const *) *from;
11651ab64890Smrg    srcend = src + *from_left;
11661ab64890Smrg    dst = (wchar_t *) *to;
11671ab64890Smrg    dstend = dst + *to_left;
11681ab64890Smrg    unconv_num = 0;
11691ab64890Smrg
11701ab64890Smrg    while (src < srcend && dst < dstend) {
11711ab64890Smrg	ucs4_t wc;
11721ab64890Smrg	int consumed = utf8_mbtowc(NULL, &wc, src, srcend-src);
11731ab64890Smrg	if (consumed == RET_TOOFEW(0))
11741ab64890Smrg	    break;
11751ab64890Smrg	if (consumed == RET_ILSEQ) {
11761ab64890Smrg	    src++;
11771ab64890Smrg	    *dst = BAD_WCHAR;
11781ab64890Smrg	    unconv_num++;
11791ab64890Smrg	} else {
11801ab64890Smrg	    src += consumed;
11811ab64890Smrg	    *dst = wc;
11821ab64890Smrg	}
11831ab64890Smrg	dst++;
11841ab64890Smrg    }
11851ab64890Smrg
11861ab64890Smrg    *from = (XPointer) src;
11871ab64890Smrg    *from_left = srcend - src;
11881ab64890Smrg    *to = (XPointer) dst;
11891ab64890Smrg    *to_left = dstend - dst;
11901ab64890Smrg
11911ab64890Smrg    return unconv_num;
11921ab64890Smrg}
11931ab64890Smrg
11941ab64890Smrgstatic XlcConvMethodsRec methods_utf8towcs = {
11951ab64890Smrg    close_converter,
11961ab64890Smrg    utf8towcs,
11971ab64890Smrg    NULL
11981ab64890Smrg};
11991ab64890Smrg
12001ab64890Smrgstatic XlcConv
12011ab64890Smrgopen_utf8towcs(
12021ab64890Smrg    XLCd from_lcd,
12031ab64890Smrg    const char *from_type,
12041ab64890Smrg    XLCd to_lcd,
12051ab64890Smrg    const char *to_type)
12061ab64890Smrg{
12071ab64890Smrg    return create_conv(from_lcd, &methods_utf8towcs);
12081ab64890Smrg}
12091ab64890Smrg
12101ab64890Smrg/* from XlcNWideChar to XlcNMultiByte */
12111ab64890Smrg
12121ab64890Smrgstatic int
12131ab64890Smrgwcstoutf8(
12141ab64890Smrg    XlcConv conv,
12151ab64890Smrg    XPointer *from,
12161ab64890Smrg    int *from_left,
12171ab64890Smrg    XPointer *to,
12181ab64890Smrg    int *to_left,
12191ab64890Smrg    XPointer *args,
12201ab64890Smrg    int num_args)
12211ab64890Smrg{
12221ab64890Smrg    wchar_t const *src;
12231ab64890Smrg    wchar_t const *srcend;
12241ab64890Smrg    unsigned char *dst;
12251ab64890Smrg    unsigned char *dstend;
12261ab64890Smrg    int unconv_num;
12271ab64890Smrg
12281ab64890Smrg    if (from == NULL || *from == NULL)
12291ab64890Smrg	return 0;
12301ab64890Smrg
12311ab64890Smrg    src = (wchar_t const *) *from;
12321ab64890Smrg    srcend = src + *from_left;
12331ab64890Smrg    dst = (unsigned char *) *to;
12341ab64890Smrg    dstend = dst + *to_left;
12351ab64890Smrg    unconv_num = 0;
12361ab64890Smrg
12371ab64890Smrg    while (src < srcend) {
12381ab64890Smrg	int count = utf8_wctomb(NULL, dst, *src, dstend-dst);
12391ab64890Smrg	if (count == RET_TOOSMALL)
12401ab64890Smrg	    break;
12411ab64890Smrg	if (count == RET_ILSEQ) {
12421ab64890Smrg	    count = utf8_wctomb(NULL, dst, BAD_WCHAR, dstend-dst);
12431ab64890Smrg	    if (count == RET_TOOSMALL)
12441ab64890Smrg		break;
12451ab64890Smrg	    unconv_num++;
12461ab64890Smrg	}
12471ab64890Smrg	dst += count;
12481ab64890Smrg	src++;
12491ab64890Smrg    }
12501ab64890Smrg
12511ab64890Smrg    *from = (XPointer) src;
12521ab64890Smrg    *from_left = srcend - src;
12531ab64890Smrg    *to = (XPointer) dst;
12541ab64890Smrg    *to_left = dstend - dst;
12551ab64890Smrg
12561ab64890Smrg    return unconv_num;
12571ab64890Smrg}
12581ab64890Smrg
12591ab64890Smrgstatic XlcConvMethodsRec methods_wcstoutf8 = {
12601ab64890Smrg    close_converter,
12611ab64890Smrg    wcstoutf8,
12621ab64890Smrg    NULL
12631ab64890Smrg};
12641ab64890Smrg
12651ab64890Smrgstatic XlcConv
12661ab64890Smrgopen_wcstoutf8(
12671ab64890Smrg    XLCd from_lcd,
12681ab64890Smrg    const char *from_type,
12691ab64890Smrg    XLCd to_lcd,
12701ab64890Smrg    const char *to_type)
12711ab64890Smrg{
12721ab64890Smrg    return create_conv(from_lcd, &methods_wcstoutf8);
12731ab64890Smrg}
12741ab64890Smrg
12751ab64890Smrg/* from XlcNString to XlcNWideChar */
12761ab64890Smrg
12771ab64890Smrgstatic int
12781ab64890Smrgour_strtowcs(
12791ab64890Smrg    XlcConv conv,
12801ab64890Smrg    XPointer *from,
12811ab64890Smrg    int *from_left,
12821ab64890Smrg    XPointer *to,
12831ab64890Smrg    int *to_left,
12841ab64890Smrg    XPointer *args,
12851ab64890Smrg    int num_args)
12861ab64890Smrg{
12871ab64890Smrg    unsigned char const *src;
12881ab64890Smrg    unsigned char const *srcend;
12891ab64890Smrg    wchar_t *dst;
12901ab64890Smrg    wchar_t *dstend;
12911ab64890Smrg
12921ab64890Smrg    if (from == NULL || *from == NULL)
12931ab64890Smrg	return 0;
12941ab64890Smrg
12951ab64890Smrg    src = (unsigned char const *) *from;
12961ab64890Smrg    srcend = src + *from_left;
12971ab64890Smrg    dst = (wchar_t *) *to;
12981ab64890Smrg    dstend = dst + *to_left;
12991ab64890Smrg
13001ab64890Smrg    while (src < srcend && dst < dstend)
13011ab64890Smrg	*dst++ = (wchar_t) *src++;
13021ab64890Smrg
13031ab64890Smrg    *from = (XPointer) src;
13041ab64890Smrg    *from_left = srcend - src;
13051ab64890Smrg    *to = (XPointer) dst;
13061ab64890Smrg    *to_left = dstend - dst;
13071ab64890Smrg
13081ab64890Smrg    return 0;
13091ab64890Smrg}
13101ab64890Smrg
13111ab64890Smrgstatic XlcConvMethodsRec methods_strtowcs = {
13121ab64890Smrg    close_converter,
13131ab64890Smrg    our_strtowcs,
13141ab64890Smrg    NULL
13151ab64890Smrg};
13161ab64890Smrg
13171ab64890Smrgstatic XlcConv
13181ab64890Smrgopen_strtowcs(
13191ab64890Smrg    XLCd from_lcd,
13201ab64890Smrg    const char *from_type,
13211ab64890Smrg    XLCd to_lcd,
13221ab64890Smrg    const char *to_type)
13231ab64890Smrg{
13241ab64890Smrg    return create_conv(from_lcd, &methods_strtowcs);
13251ab64890Smrg}
13261ab64890Smrg
13271ab64890Smrg/* from XlcNWideChar to XlcNString */
13281ab64890Smrg
13291ab64890Smrgstatic int
13301ab64890Smrgour_wcstostr(
13311ab64890Smrg    XlcConv conv,
13321ab64890Smrg    XPointer *from,
13331ab64890Smrg    int *from_left,
13341ab64890Smrg    XPointer *to,
13351ab64890Smrg    int *to_left,
13361ab64890Smrg    XPointer *args,
13371ab64890Smrg    int num_args)
13381ab64890Smrg{
13391ab64890Smrg    wchar_t const *src;
13401ab64890Smrg    wchar_t const *srcend;
13411ab64890Smrg    unsigned char *dst;
13421ab64890Smrg    unsigned char *dstend;
13431ab64890Smrg    int unconv_num;
13441ab64890Smrg
13451ab64890Smrg    if (from == NULL || *from == NULL)
13461ab64890Smrg	return 0;
13471ab64890Smrg
13481ab64890Smrg    src = (wchar_t const *) *from;
13491ab64890Smrg    srcend = src + *from_left;
13501ab64890Smrg    dst = (unsigned char *) *to;
13511ab64890Smrg    dstend = dst + *to_left;
13521ab64890Smrg    unconv_num = 0;
13531ab64890Smrg
13541ab64890Smrg    while (src < srcend && dst < dstend) {
13551ab64890Smrg	unsigned int wc = *src++;
13561ab64890Smrg	if (wc < 0x80)
13571ab64890Smrg	    *dst = wc;
13581ab64890Smrg	else {
13591ab64890Smrg	    *dst = BAD_CHAR;
13601ab64890Smrg	    unconv_num++;
13611ab64890Smrg	}
13621ab64890Smrg	dst++;
13631ab64890Smrg    }
13641ab64890Smrg
13651ab64890Smrg    *from = (XPointer) src;
13661ab64890Smrg    *from_left = srcend - src;
13671ab64890Smrg    *to = (XPointer) dst;
13681ab64890Smrg    *to_left = dstend - dst;
13691ab64890Smrg
13701ab64890Smrg    return unconv_num;
13711ab64890Smrg}
13721ab64890Smrg
13731ab64890Smrgstatic XlcConvMethodsRec methods_wcstostr = {
13741ab64890Smrg    close_converter,
13751ab64890Smrg    our_wcstostr,
13761ab64890Smrg    NULL
13771ab64890Smrg};
13781ab64890Smrg
13791ab64890Smrgstatic XlcConv
13801ab64890Smrgopen_wcstostr(
13811ab64890Smrg    XLCd from_lcd,
13821ab64890Smrg    const char *from_type,
13831ab64890Smrg    XLCd to_lcd,
13841ab64890Smrg    const char *to_type)
13851ab64890Smrg{
13861ab64890Smrg    return create_conv(from_lcd, &methods_wcstostr);
13871ab64890Smrg}
13881ab64890Smrg
13891ab64890Smrg/* from XlcNCharSet to XlcNWideChar */
13901ab64890Smrg
13911ab64890Smrgstatic int
13921ab64890Smrgcstowcs(
13931ab64890Smrg    XlcConv conv,
13941ab64890Smrg    XPointer *from,
13951ab64890Smrg    int *from_left,
13961ab64890Smrg    XPointer *to,
13971ab64890Smrg    int *to_left,
13981ab64890Smrg    XPointer *args,
13991ab64890Smrg    int num_args)
14001ab64890Smrg{
14011ab64890Smrg    XlcCharSet charset;
14021ab64890Smrg    const char *name;
14031ab64890Smrg    Utf8Conv convptr;
14041ab64890Smrg    int i;
14051ab64890Smrg    unsigned char const *src;
14061ab64890Smrg    unsigned char const *srcend;
14071ab64890Smrg    wchar_t *dst;
14081ab64890Smrg    wchar_t *dstend;
14091ab64890Smrg    int unconv_num;
14101ab64890Smrg
14111ab64890Smrg    if (from == NULL || *from == NULL)
14121ab64890Smrg	return 0;
14131ab64890Smrg
14141ab64890Smrg    if (num_args < 1)
14151ab64890Smrg	return -1;
14161ab64890Smrg
14171ab64890Smrg    charset = (XlcCharSet) args[0];
14181ab64890Smrg    name = charset->encoding_name;
14191ab64890Smrg    /* not charset->name because the latter has a ":GL"/":GR" suffix */
14201ab64890Smrg
14211ab64890Smrg    for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
14221ab64890Smrg	if (!strcmp(convptr->name, name))
14231ab64890Smrg	    break;
14241ab64890Smrg    if (i == 0)
14251ab64890Smrg	return -1;
14261ab64890Smrg
14271ab64890Smrg    src = (unsigned char const *) *from;
14281ab64890Smrg    srcend = src + *from_left;
14291ab64890Smrg    dst = (wchar_t *) *to;
14301ab64890Smrg    dstend = dst + *to_left;
14311ab64890Smrg    unconv_num = 0;
14321ab64890Smrg
14331ab64890Smrg    while (src < srcend && dst < dstend) {
14341ab64890Smrg	unsigned int wc;
14351ab64890Smrg	int consumed;
14361ab64890Smrg
14371ab64890Smrg	consumed = convptr->cstowc(conv, &wc, src, srcend-src);
14381ab64890Smrg	if (consumed == RET_ILSEQ)
14391ab64890Smrg	    return -1;
14401ab64890Smrg	if (consumed == RET_TOOFEW(0))
14411ab64890Smrg	    break;
14421ab64890Smrg
14431ab64890Smrg	*dst++ = wc;
14441ab64890Smrg	src += consumed;
14451ab64890Smrg    }
14461ab64890Smrg
14471ab64890Smrg    *from = (XPointer) src;
14481ab64890Smrg    *from_left = srcend - src;
14491ab64890Smrg    *to = (XPointer) dst;
14501ab64890Smrg    *to_left = dstend - dst;
14511ab64890Smrg
14521ab64890Smrg    return unconv_num;
14531ab64890Smrg}
14541ab64890Smrg
14551ab64890Smrgstatic XlcConvMethodsRec methods_cstowcs = {
14561ab64890Smrg    close_converter,
14571ab64890Smrg    cstowcs,
14581ab64890Smrg    NULL
14591ab64890Smrg};
14601ab64890Smrg
14611ab64890Smrgstatic XlcConv
14621ab64890Smrgopen_cstowcs(
14631ab64890Smrg    XLCd from_lcd,
14641ab64890Smrg    const char *from_type,
14651ab64890Smrg    XLCd to_lcd,
14661ab64890Smrg    const char *to_type)
14671ab64890Smrg{
14681ab64890Smrg    lazy_init_all_charsets();
14691ab64890Smrg    return create_conv(from_lcd, &methods_cstowcs);
14701ab64890Smrg}
14711ab64890Smrg
14721ab64890Smrg/* from XlcNWideChar to XlcNCharSet */
14731ab64890Smrg
14741ab64890Smrgstatic int
14751ab64890Smrgwcstocs(
14761ab64890Smrg    XlcConv conv,
14771ab64890Smrg    XPointer *from,
14781ab64890Smrg    int *from_left,
14791ab64890Smrg    XPointer *to,
14801ab64890Smrg    int *to_left,
14811ab64890Smrg    XPointer *args,
14821ab64890Smrg    int num_args)
14831ab64890Smrg{
14841ab64890Smrg    Utf8Conv *preferred_charsets;
14851ab64890Smrg    XlcCharSet last_charset = NULL;
14861ab64890Smrg    wchar_t const *src;
14871ab64890Smrg    wchar_t const *srcend;
14881ab64890Smrg    unsigned char *dst;
14891ab64890Smrg    unsigned char *dstend;
14901ab64890Smrg    int unconv_num;
14911ab64890Smrg
14921ab64890Smrg    if (from == NULL || *from == NULL)
14931ab64890Smrg	return 0;
14941ab64890Smrg
14951ab64890Smrg    preferred_charsets = (Utf8Conv *) conv->state;
14961ab64890Smrg    src = (wchar_t const *) *from;
14971ab64890Smrg    srcend = src + *from_left;
14981ab64890Smrg    dst = (unsigned char *) *to;
14991ab64890Smrg    dstend = dst + *to_left;
15001ab64890Smrg    unconv_num = 0;
15011ab64890Smrg
15021ab64890Smrg    while (src < srcend && dst < dstend) {
15031ab64890Smrg	Utf8Conv chosen_charset = NULL;
15041ab64890Smrg	XlcSide chosen_side = XlcNONE;
15051ab64890Smrg	wchar_t wc = *src;
15061ab64890Smrg	int count;
15071ab64890Smrg
15081ab64890Smrg	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
15091ab64890Smrg	if (count == RET_TOOSMALL)
15101ab64890Smrg	    break;
15111ab64890Smrg	if (count == RET_ILSEQ) {
15121ab64890Smrg	    src++;
15131ab64890Smrg	    unconv_num++;
15141ab64890Smrg	    continue;
15151ab64890Smrg	}
15161ab64890Smrg
15171ab64890Smrg	if (last_charset == NULL) {
15181ab64890Smrg	    last_charset =
15191ab64890Smrg	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
15201ab64890Smrg	    if (last_charset == NULL) {
15211ab64890Smrg		src++;
15221ab64890Smrg		unconv_num++;
15231ab64890Smrg		continue;
15241ab64890Smrg	    }
15251ab64890Smrg	} else {
15261ab64890Smrg	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
15271ab64890Smrg	          && (last_charset->side == XlcGLGR
15281ab64890Smrg	              || last_charset->side == chosen_side)))
15291ab64890Smrg		break;
15301ab64890Smrg	}
15311ab64890Smrg	src++;
15321ab64890Smrg	dst += count;
15331ab64890Smrg    }
15341ab64890Smrg
15351ab64890Smrg    if (last_charset == NULL)
15361ab64890Smrg	return -1;
15371ab64890Smrg
15381ab64890Smrg    *from = (XPointer) src;
15391ab64890Smrg    *from_left = srcend - src;
15401ab64890Smrg    *to = (XPointer) dst;
15411ab64890Smrg    *to_left = dstend - dst;
15421ab64890Smrg
15431ab64890Smrg    if (num_args >= 1)
15441ab64890Smrg	*((XlcCharSet *)args[0]) = last_charset;
15451ab64890Smrg
15461ab64890Smrg    return unconv_num;
15471ab64890Smrg}
15481ab64890Smrg
15491ab64890Smrgstatic XlcConvMethodsRec methods_wcstocs = {
15501ab64890Smrg    close_tocs_converter,
15511ab64890Smrg    wcstocs,
15521ab64890Smrg    NULL
15531ab64890Smrg};
15541ab64890Smrg
15551ab64890Smrgstatic XlcConv
15561ab64890Smrgopen_wcstocs(
15571ab64890Smrg    XLCd from_lcd,
15581ab64890Smrg    const char *from_type,
15591ab64890Smrg    XLCd to_lcd,
15601ab64890Smrg    const char *to_type)
15611ab64890Smrg{
15621ab64890Smrg    return create_tocs_conv(from_lcd, &methods_wcstocs);
15631ab64890Smrg}
15641ab64890Smrg
15651ab64890Smrg/* from XlcNWideChar to XlcNChar */
15661ab64890Smrg
15671ab64890Smrgstatic int
15681ab64890Smrgwcstocs1(
15691ab64890Smrg    XlcConv conv,
15701ab64890Smrg    XPointer *from,
15711ab64890Smrg    int *from_left,
15721ab64890Smrg    XPointer *to,
15731ab64890Smrg    int *to_left,
15741ab64890Smrg    XPointer *args,
15751ab64890Smrg    int num_args)
15761ab64890Smrg{
15771ab64890Smrg    Utf8Conv *preferred_charsets;
15781ab64890Smrg    XlcCharSet last_charset = NULL;
15791ab64890Smrg    wchar_t const *src;
15801ab64890Smrg    wchar_t const *srcend;
15811ab64890Smrg    unsigned char *dst;
15821ab64890Smrg    unsigned char *dstend;
15831ab64890Smrg    int unconv_num;
15841ab64890Smrg
15851ab64890Smrg    if (from == NULL || *from == NULL)
15861ab64890Smrg	return 0;
15871ab64890Smrg
15881ab64890Smrg    preferred_charsets = (Utf8Conv *) conv->state;
15891ab64890Smrg    src = (wchar_t const *) *from;
15901ab64890Smrg    srcend = src + *from_left;
15911ab64890Smrg    dst = (unsigned char *) *to;
15921ab64890Smrg    dstend = dst + *to_left;
15931ab64890Smrg    unconv_num = 0;
15941ab64890Smrg
15951ab64890Smrg    while (src < srcend && dst < dstend) {
15961ab64890Smrg	Utf8Conv chosen_charset = NULL;
15971ab64890Smrg	XlcSide chosen_side = XlcNONE;
15981ab64890Smrg	wchar_t wc = *src;
15991ab64890Smrg	int count;
16001ab64890Smrg
16011ab64890Smrg	count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
16021ab64890Smrg	if (count == RET_TOOSMALL)
16031ab64890Smrg	    break;
16041ab64890Smrg	if (count == RET_ILSEQ) {
16051ab64890Smrg	    src++;
16061ab64890Smrg	    unconv_num++;
16071ab64890Smrg	    continue;
16081ab64890Smrg	}
16091ab64890Smrg
16101ab64890Smrg	if (last_charset == NULL) {
16111ab64890Smrg	    last_charset =
16121ab64890Smrg	        _XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
16131ab64890Smrg	    if (last_charset == NULL) {
16141ab64890Smrg		src++;
16151ab64890Smrg		unconv_num++;
16161ab64890Smrg		continue;
16171ab64890Smrg	    }
16181ab64890Smrg	} else {
16191ab64890Smrg	    if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
16201ab64890Smrg	          && (last_charset->side == XlcGLGR
16211ab64890Smrg	              || last_charset->side == chosen_side)))
16221ab64890Smrg		break;
16231ab64890Smrg	}
16241ab64890Smrg	src++;
16251ab64890Smrg	dst += count;
16261ab64890Smrg	break;
16271ab64890Smrg    }
16281ab64890Smrg
16291ab64890Smrg    if (last_charset == NULL)
16301ab64890Smrg	return -1;
16311ab64890Smrg
16321ab64890Smrg    *from = (XPointer) src;
16331ab64890Smrg    *from_left = srcend - src;
16341ab64890Smrg    *to = (XPointer) dst;
16351ab64890Smrg    *to_left = dstend - dst;
16361ab64890Smrg
16371ab64890Smrg    if (num_args >= 1)
16381ab64890Smrg	*((XlcCharSet *)args[0]) = last_charset;
16391ab64890Smrg
16401ab64890Smrg    return unconv_num;
16411ab64890Smrg}
16421ab64890Smrg
16431ab64890Smrgstatic XlcConvMethodsRec methods_wcstocs1 = {
16441ab64890Smrg    close_tocs_converter,
16451ab64890Smrg    wcstocs1,
16461ab64890Smrg    NULL
16471ab64890Smrg};
16481ab64890Smrg
16491ab64890Smrgstatic XlcConv
16501ab64890Smrgopen_wcstocs1(
16511ab64890Smrg    XLCd from_lcd,
16521ab64890Smrg    const char *from_type,
16531ab64890Smrg    XLCd to_lcd,
16541ab64890Smrg    const char *to_type)
16551ab64890Smrg{
16561ab64890Smrg    return create_tocs_conv(from_lcd, &methods_wcstocs1);
16571ab64890Smrg}
16581ab64890Smrg
16591ab64890Smrg/* trivial, no conversion */
16601ab64890Smrg
16611ab64890Smrgstatic int
16621ab64890Smrgidentity(
16631ab64890Smrg    XlcConv conv,
16641ab64890Smrg    XPointer *from,
16651ab64890Smrg    int *from_left,
16661ab64890Smrg    XPointer *to,
16671ab64890Smrg    int *to_left,
16681ab64890Smrg    XPointer *args,
16691ab64890Smrg    int num_args)
16701ab64890Smrg{
16711ab64890Smrg    unsigned char const *src;
16721ab64890Smrg    unsigned char const *srcend;
16731ab64890Smrg    unsigned char *dst;
16741ab64890Smrg    unsigned char *dstend;
16751ab64890Smrg
16761ab64890Smrg    if (from == NULL || *from == NULL)
16771ab64890Smrg	return 0;
16781ab64890Smrg
16791ab64890Smrg    src = (unsigned char const *) *from;
16801ab64890Smrg    srcend = src + *from_left;
16811ab64890Smrg    dst = (unsigned char *) *to;
16821ab64890Smrg    dstend = dst + *to_left;
16831ab64890Smrg
16841ab64890Smrg    while (src < srcend && dst < dstend)
16851ab64890Smrg	*dst++ = *src++;
16861ab64890Smrg
16871ab64890Smrg    *from = (XPointer) src;
16881ab64890Smrg    *from_left = srcend - src;
16891ab64890Smrg    *to = (XPointer) dst;
16901ab64890Smrg    *to_left = dstend - dst;
16911ab64890Smrg
16921ab64890Smrg    return 0;
16931ab64890Smrg}
16941ab64890Smrg
16951ab64890Smrgstatic XlcConvMethodsRec methods_identity = {
16961ab64890Smrg    close_converter,
16971ab64890Smrg    identity,
16981ab64890Smrg    NULL
16991ab64890Smrg};
17001ab64890Smrg
17011ab64890Smrgstatic XlcConv
17021ab64890Smrgopen_identity(
17031ab64890Smrg    XLCd from_lcd,
17041ab64890Smrg    const char *from_type,
17051ab64890Smrg    XLCd to_lcd,
17061ab64890Smrg    const char *to_type)
17071ab64890Smrg{
17081ab64890Smrg    return create_conv(from_lcd, &methods_identity);
17091ab64890Smrg}
17101ab64890Smrg
17111ab64890Smrg/* from MultiByte/WideChar to FontCharSet. */
17121ab64890Smrg/* They really use converters to CharSet
17131ab64890Smrg * but with different create_conv procedure. */
17141ab64890Smrg
17151ab64890Smrgstatic XlcConv
17161ab64890Smrgcreate_tofontcs_conv(
17171ab64890Smrg    XLCd lcd,
17181ab64890Smrg    XlcConvMethods methods)
17191ab64890Smrg{
17201ab64890Smrg    XlcConv conv;
17211ab64890Smrg    int i, num, k, count;
17221ab64890Smrg    char **value, buf[20];
17231ab64890Smrg    Utf8Conv *preferred;
17241ab64890Smrg
17251ab64890Smrg    lazy_init_all_charsets();
17261ab64890Smrg
17271ab64890Smrg    for (i = 0, num = 0;; i++) {
17281ab64890Smrg	sprintf(buf, "fs%d.charset.name", i);
17291ab64890Smrg	_XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
17301ab64890Smrg	if (count < 1) {
17311ab64890Smrg	    sprintf(buf, "fs%d.charset", i);
17321ab64890Smrg	    _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
17331ab64890Smrg	    if (count < 1)
17341ab64890Smrg		break;
17351ab64890Smrg	}
17361ab64890Smrg	num += count;
17371ab64890Smrg    }
17381ab64890Smrg
17391ab64890Smrg    conv = (XlcConv) Xmalloc(sizeof(XlcConvRec) + (num + 1) * sizeof(Utf8Conv));
17401ab64890Smrg    if (conv == (XlcConv) NULL)
17411ab64890Smrg	return (XlcConv) NULL;
17421ab64890Smrg    preferred = (Utf8Conv *) ((char *) conv + sizeof(XlcConvRec));
17431ab64890Smrg
17441ab64890Smrg    /* Loop through all fontsets mentioned in the locale. */
17451ab64890Smrg    for (i = 0, num = 0;; i++) {
17461ab64890Smrg        sprintf(buf, "fs%d.charset.name", i);
17471ab64890Smrg        _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
17481ab64890Smrg        if (count < 1) {
17491ab64890Smrg            sprintf(buf, "fs%d.charset", i);
17501ab64890Smrg            _XlcGetResource(lcd, "XLC_FONTSET", buf, &value, &count);
17511ab64890Smrg            if (count < 1)
17521ab64890Smrg                break;
17531ab64890Smrg        }
17541ab64890Smrg	while (count-- > 0) {
17551ab64890Smrg	    XlcCharSet charset = _XlcGetCharSet(*value++);
17561ab64890Smrg	    const char *name;
17571ab64890Smrg
17581ab64890Smrg	    if (charset == (XlcCharSet) NULL)
17591ab64890Smrg		continue;
17601ab64890Smrg
17611ab64890Smrg	    name = charset->encoding_name;
17621ab64890Smrg	    /* If it wasn't already encountered... */
17631ab64890Smrg	    for (k = num - 1; k >= 0; k--)
17641ab64890Smrg		if (!strcmp(preferred[k]->name, name))
17651ab64890Smrg		    break;
17661ab64890Smrg	    if (k < 0) {
17671ab64890Smrg                /* For fonts "ISO10646-1" means ucs2, not utf8.*/
17681ab64890Smrg                if (!strcmp("ISO10646-1", name)) {
17691ab64890Smrg                    preferred[num++] = &all_charsets[ucs2_conv_index];
17701ab64890Smrg                    continue;
17711ab64890Smrg                }
17721ab64890Smrg		/* Look it up in all_charsets[]. */
17731ab64890Smrg		for (k = 0; k < all_charsets_count-1; k++)
17741ab64890Smrg		    if (!strcmp(all_charsets[k].name, name)) {
17751ab64890Smrg			/* Add it to the preferred set. */
17761ab64890Smrg			preferred[num++] = &all_charsets[k];
17771ab64890Smrg			break;
17781ab64890Smrg		    }
17791ab64890Smrg	    }
17801ab64890Smrg        }
17811ab64890Smrg    }
17821ab64890Smrg    preferred[num] = (Utf8Conv) NULL;
17831ab64890Smrg
17841ab64890Smrg    conv->methods = methods;
17851ab64890Smrg    conv->state = (XPointer) preferred;
17861ab64890Smrg
17871ab64890Smrg    return conv;
17881ab64890Smrg}
17891ab64890Smrg
17901ab64890Smrgstatic XlcConv
17911ab64890Smrgopen_wcstofcs(
17921ab64890Smrg    XLCd from_lcd,
17931ab64890Smrg    const char *from_type,
17941ab64890Smrg    XLCd to_lcd,
17951ab64890Smrg    const char *to_type)
17961ab64890Smrg{
17971ab64890Smrg    return create_tofontcs_conv(from_lcd, &methods_wcstocs);
17981ab64890Smrg}
17991ab64890Smrg
18001ab64890Smrgstatic XlcConv
18011ab64890Smrgopen_utf8tofcs(
18021ab64890Smrg    XLCd from_lcd,
18031ab64890Smrg    const char *from_type,
18041ab64890Smrg    XLCd to_lcd,
18051ab64890Smrg    const char *to_type)
18061ab64890Smrg{
18071ab64890Smrg    return create_tofontcs_conv(from_lcd, &methods_utf8tocs);
18081ab64890Smrg}
18091ab64890Smrg
18101ab64890Smrg/* Registers UTF-8 converters for a UTF-8 locale. */
18111ab64890Smrg
18121ab64890Smrgvoid
18131ab64890Smrg_XlcAddUtf8LocaleConverters(
18141ab64890Smrg    XLCd lcd)
18151ab64890Smrg{
18161ab64890Smrg    /* Register elementary converters. */
18171ab64890Smrg
18181ab64890Smrg    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_utf8towcs);
18191ab64890Smrg
18201ab64890Smrg    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_wcstoutf8);
18211ab64890Smrg    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr);
18221ab64890Smrg
18231ab64890Smrg    _XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs);
18241ab64890Smrg
18251ab64890Smrg    /* Register converters for XlcNCharSet. This implicitly provides
18261ab64890Smrg     * converters from and to XlcNCompoundText. */
18271ab64890Smrg
18281ab64890Smrg    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_cstoutf8);
18291ab64890Smrg    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_utf8tocs);
18301ab64890Smrg    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_utf8tocs1);
18311ab64890Smrg
18321ab64890Smrg    _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
18331ab64890Smrg    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
18341ab64890Smrg    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1);
18351ab64890Smrg
18361ab64890Smrg    _XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_strtoutf8);
18371ab64890Smrg    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_utf8tostr);
18381ab64890Smrg    _XlcSetConverter(lcd, XlcNUtf8String, lcd, XlcNMultiByte, open_identity);
18391ab64890Smrg    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNUtf8String, open_identity);
18401ab64890Smrg
18411ab64890Smrg    /* Register converters for XlcNFontCharSet */
18421ab64890Smrg    _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_utf8tofcs);
18431ab64890Smrg    _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
18441ab64890Smrg}
1845