Home | History | Annotate | Line # | Download | only in binutils
winduni.c revision 1.1
      1 /* winduni.c -- unicode support for the windres program.
      2    Copyright 1997, 1998, 2000, 2001, 2003, 2005, 2007, 2009
      3    Free Software Foundation, Inc.
      4    Written by Ian Lance Taylor, Cygnus Support.
      5    Rewritten by Kai Tietz, Onevision.
      6 
      7    This file is part of GNU Binutils.
      8 
      9    This program is free software; you can redistribute it and/or modify
     10    it under the terms of the GNU General Public License as published by
     11    the Free Software Foundation; either version 3 of the License, or
     12    (at your option) any later version.
     13 
     14    This program is distributed in the hope that it will be useful,
     15    but WITHOUT ANY WARRANTY; without even the implied warranty of
     16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17    GNU General Public License for more details.
     18 
     19    You should have received a copy of the GNU General Public License
     20    along with this program; if not, write to the Free Software
     21    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
     22    02110-1301, USA.  */
     23 
     24 
     25 /* This file contains unicode support routines for the windres
     26    program.  Ideally, we would have generic unicode support which
     27    would work on all systems.  However, we don't.  Instead, on a
     28    Windows host, we are prepared to call some Windows routines.  This
     29    means that we will generate different output on Windows and Unix
     30    hosts, but that seems better than not really supporting unicode at
     31    all.  */
     32 
     33 #include "sysdep.h"
     34 #include "bfd.h"
     35 #include "libiberty.h" /* for xstrdup */
     36 #include "bucomm.h"
     37 /* Must be include before windows.h and winnls.h.  */
     38 #if defined (_WIN32) || defined (__CYGWIN__)
     39 #include <windows.h>
     40 #include <winnls.h>
     41 #endif
     42 #include "winduni.h"
     43 #include "safe-ctype.h"
     44 
     45 #if HAVE_ICONV
     46 #include <iconv.h>
     47 #endif
     48 
     49 static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
     50 static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
     51 static int unichar_isascii (const unichar *, rc_uint_type);
     52 
     53 /* Convert an ASCII string to a unicode string.  We just copy it,
     54    expanding chars to shorts, rather than doing something intelligent.  */
     55 
     56 #if !defined (_WIN32) && !defined (__CYGWIN__)
     57 
     58 /* Codepages mapped.  */
     59 static local_iconv_map codepages[] =
     60 {
     61   { 0, "MS-ANSI" },
     62   { 1, "WINDOWS-1252" },
     63   { 437, "MS-ANSI" },
     64   { 737, "MS-GREEK" },
     65   { 775, "WINBALTRIM" },
     66   { 850, "MS-ANSI" },
     67   { 852, "MS-EE" },
     68   { 857, "MS-TURK" },
     69   { 862, "CP862" },
     70   { 864, "CP864" },
     71   { 866, "MS-CYRL" },
     72   { 874, "WINDOWS-874" },
     73   { 932, "CP932" },
     74   { 936, "CP936" },
     75   { 949, "CP949" },
     76   { 950, "CP950" },
     77   { 1250, "WINDOWS-1250" },
     78   { 1251, "WINDOWS-1251" },
     79   { 1252, "WINDOWS-1252" },
     80   { 1253, "WINDOWS-1253" },
     81   { 1254, "WINDOWS-1254" },
     82   { 1255, "WINDOWS-1255" },
     83   { 1256, "WINDOWS-1256" },
     84   { 1257, "WINDOWS-1257" },
     85   { 1258, "WINDOWS-1258" },
     86   { CP_UTF7, "UTF-7" },
     87   { CP_UTF8, "UTF-8" },
     88   { CP_UTF16, "UTF-16" },
     89   { (rc_uint_type) -1, NULL }
     90 };
     91 
     92 /* Languages supported.  */
     93 static const wind_language_t languages[] =
     94 {
     95   { 0x0000, 437, 1252, "Neutral", "Neutral" },
     96   { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" },    { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
     97   { 0x0403, 850, 1252, "Catalan", "Spain" },	      { 0x0404, 950,  950, "Chinese", "Taiwan" },
     98   { 0x0405, 852, 1250, "Czech", "Czech Republic" },   { 0x0406, 850, 1252, "Danish", "Denmark" },
     99   { 0x0407, 850, 1252, "German", "Germany" },	      { 0x0408, 737, 1253, "Greek", "Greece" },
    100   { 0x0409, 437, 1252, "English", "United States" },  { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
    101   { 0x040B, 850, 1252, "Finnish", "Finland" },	      { 0x040C, 850, 1252, "French", "France" },
    102   { 0x040D, 862, 1255, "Hebrew", "Israel" },	      { 0x040E, 852, 1250, "Hungarian", "Hungary" },
    103   { 0x040F, 850, 1252, "Icelandic", "Iceland" },      { 0x0410, 850, 1252, "Italian", "Italy" },
    104   { 0x0411, 932,  932, "Japanese", "Japan" },	      { 0x0412, 949,  949, "Korean", "Korea (south)" },
    105   { 0x0413, 850, 1252, "Dutch", "Netherlands" },      { 0x0414, 850, 1252, "Norwegian (Bokml)", "Norway" },
    106   { 0x0415, 852, 1250, "Polish", "Poland" },	      { 0x0416, 850, 1252, "Portuguese", "Brazil" },
    107   { 0x0418, 852, 1250, "Romanian", "Romania" },	      { 0x0419, 866, 1251, "Russian", "Russia" },
    108   { 0x041A, 852, 1250, "Croatian", "Croatia" },	      { 0x041B, 852, 1250, "Slovak", "Slovakia" },
    109   { 0x041C, 852, 1250, "Albanian", "Albania" },	      { 0x041D, 850, 1252, "Swedish", "Sweden" },
    110   { 0x041E, 874,  874, "Thai", "Thailand" },	      { 0x041F, 857, 1254, "Turkish", "Turkey" },
    111   { 0x0421, 850, 1252, "Indonesian", "Indonesia" },   { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
    112   { 0x0423, 866, 1251, "Belarusian", "Belarus" },     { 0x0424, 852, 1250, "Slovene", "Slovenia" },
    113   { 0x0425, 775, 1257, "Estonian", "Estonia" },	      { 0x0426, 775, 1257, "Latvian", "Latvia" },
    114   { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
    115   { 0x0429, 864, 1256, "Arabic", "Farsi" },	      { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
    116   { 0x042D, 850, 1252, "Basque", "Spain" },
    117   { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
    118   { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
    119   { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
    120   { 0x043C, 437, 1252, "Irish", "Ireland" },
    121   { 0x043E, 850, 1252, "Malay", "Malaysia" },
    122   { 0x0801, 864, 1256, "Arabic", "Iraq" },
    123   { 0x0804, 936,  936, "Chinese (People's republic of China)", "People's republic of China" },
    124   { 0x0807, 850, 1252, "German", "Switzerland" },
    125   { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
    126   { 0x080C, 850, 1252, "French", "Belgium" },
    127   { 0x0810, 850, 1252, "Italian", "Switzerland" },
    128   { 0x0813, 850, 1252, "Dutch", "Belgium" },	      { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
    129   { 0x0816, 850, 1252, "Portuguese", "Portugal" },
    130   { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
    131   { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
    132   { 0x0C01, 864, 1256, "Arabic", "Egypt" },
    133   { 0x0C04, 950,  950, "Chinese", "Hong Kong" },
    134   { 0x0C07, 850, 1252, "German", "Austria" },
    135   { 0x0C09, 850, 1252, "English", "Australia" },      { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
    136   { 0x0C0C, 850, 1252, "French", "Canada"},
    137   { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
    138   { 0x1001, 864, 1256, "Arabic", "Libya" },
    139   { 0x1004, 936,  936, "Chinese", "Singapore" },
    140   { 0x1007, 850, 1252, "German", "Luxembourg" },
    141   { 0x1009, 850, 1252, "English", "Canada" },
    142   { 0x100A, 850, 1252, "Spanish", "Guatemala" },
    143   { 0x100C, 850, 1252, "French", "Switzerland" },
    144   { 0x1401, 864, 1256, "Arabic", "Algeria" },
    145   { 0x1407, 850, 1252, "German", "Liechtenstein" },
    146   { 0x1409, 850, 1252, "English", "New Zealand" },    { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
    147   { 0x140C, 850, 1252, "French", "Luxembourg" },
    148   { 0x1801, 864, 1256, "Arabic", "Morocco" },
    149   { 0x1809, 850, 1252, "English", "Ireland" },	      { 0x180A, 850, 1252, "Spanish", "Panama" },
    150   { 0x180C, 850, 1252, "French", "Monaco" },
    151   { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
    152   { 0x1C09, 437, 1252, "English", "South Africa" },   { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
    153   { 0x2001, 864, 1256, "Arabic", "Oman" },
    154   { 0x2009, 850, 1252, "English", "Jamaica" },	      { 0x200A, 850, 1252, "Spanish", "Venezuela" },
    155   { 0x2401, 864, 1256, "Arabic", "Yemen" },
    156   { 0x2409, 850, 1252, "English", "Caribbean" },      { 0x240A, 850, 1252, "Spanish", "Colombia" },
    157   { 0x2801, 864, 1256, "Arabic", "Syria" },
    158   { 0x2809, 850, 1252, "English", "Belize" },	      { 0x280A, 850, 1252, "Spanish", "Peru" },
    159   { 0x2C01, 864, 1256, "Arabic", "Jordan" },
    160   { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
    161   { 0x3001, 864, 1256, "Arabic", "Lebanon" },
    162   { 0x3009, 437, 1252, "English", "Zimbabwe" },	      { 0x300A, 850, 1252, "Spanish", "Ecuador" },
    163   { 0x3401, 864, 1256, "Arabic", "Kuwait" },
    164   { 0x3409, 437, 1252, "English", "Philippines" },    { 0x340A, 850, 1252, "Spanish", "Chile" },
    165   { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
    166   { 0x380A, 850, 1252, "Spanish", "Uruguay" },
    167   { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
    168   { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
    169   { 0x4001, 864, 1256, "Arabic", "Qatar" },
    170   { 0x400A, 850, 1252, "Spanish", "Bolivia" },
    171   { 0x440A, 850, 1252, "Spanish", "El Salvador" },
    172   { 0x480A, 850, 1252, "Spanish", "Honduras" },
    173   { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
    174   { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
    175   { (unsigned) -1,  0,      0, NULL, NULL }
    176 };
    177 
    178 #endif
    179 
    180 /* Specifies the default codepage to be used for unicode
    181    transformations.  By default this is CP_ACP.  */
    182 rc_uint_type wind_default_codepage = CP_ACP;
    183 
    184 /* Specifies the currently used codepage for unicode
    185    transformations.  By default this is CP_ACP.  */
    186 rc_uint_type wind_current_codepage = CP_ACP;
    187 
    188 /* Convert an ASCII string to a unicode string.  We just copy it,
    189    expanding chars to shorts, rather than doing something intelligent.  */
    190 
    191 void
    192 unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
    193 {
    194   unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
    195 }
    196 
    197 /* Convert an ASCII string with length A_LENGTH to a unicode string.  We just
    198    copy it, expanding chars to shorts, rather than doing something intelligent.
    199    This routine converts also \0 within a string.  */
    200 
    201 void
    202 unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length)
    203 {
    204   char *tmp, *p;
    205   rc_uint_type tlen, elen, idx = 0;
    206 
    207   *unicode = NULL;
    208 
    209   if (!a_length)
    210     {
    211       if (length)
    212         *length = 0;
    213       return;
    214     }
    215 
    216   /* Make sure we have zero terminated string.  */
    217   p = tmp = (char *) alloca (a_length + 1);
    218   memcpy (tmp, ascii, a_length);
    219   tmp[a_length] = 0;
    220 
    221   while (a_length > 0)
    222     {
    223       unichar *utmp, *up;
    224 
    225       tlen = strlen (p);
    226 
    227       if (tlen > a_length)
    228         tlen = a_length;
    229       if (*p == 0)
    230         {
    231 	  /* Make room for one more character.  */
    232 	  utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
    233 	  if (idx > 0)
    234 	    {
    235 	      memcpy (utmp, *unicode, idx * sizeof (unichar));
    236 	    }
    237 	  *unicode = utmp;
    238 	  utmp[idx++] = 0;
    239 	  --a_length;
    240 	  p++;
    241 	  continue;
    242 	}
    243       utmp = NULL;
    244       elen = 0;
    245       elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0);
    246       if (elen)
    247 	{
    248 	  utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2));
    249 	  wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen);
    250 	  elen /= sizeof (unichar);
    251 	  elen --;
    252 	}
    253       else
    254         {
    255 	  /* Make room for one more character.  */
    256 	  utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
    257 	  if (idx > 0)
    258 	    {
    259 	      memcpy (utmp, *unicode, idx * sizeof (unichar));
    260 	    }
    261 	  *unicode = utmp;
    262 	  utmp[idx++] = ((unichar) *p) & 0xff;
    263 	  --a_length;
    264 	  p++;
    265 	  continue;
    266 	}
    267       p += tlen;
    268       a_length -= tlen;
    269 
    270       up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen));
    271       if (idx > 0)
    272 	memcpy (up, *unicode, idx * sizeof (unichar));
    273 
    274       *unicode = up;
    275       if (elen)
    276 	memcpy (&up[idx], utmp, sizeof (unichar) * elen);
    277 
    278       idx += elen;
    279     }
    280 
    281   if (length)
    282     *length = idx;
    283 }
    284 
    285 /* Convert an unicode string to an ASCII string.  We just copy it,
    286    shrink shorts to chars, rather than doing something intelligent.
    287    Shorts with not within the char range are replaced by '_'.  */
    288 
    289 void
    290 ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
    291 {
    292   codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
    293 }
    294 
    295 /* Print the unicode string UNICODE to the file E.  LENGTH is the
    296    number of characters to print, or -1 if we should print until the
    297    end of the string.  FIXME: On a Windows host, we should be calling
    298    some Windows function, probably WideCharToMultiByte.  */
    299 
    300 void
    301 unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
    302 {
    303   while (1)
    304     {
    305       unichar ch;
    306 
    307       if (length == 0)
    308 	return;
    309       if ((bfd_signed_vma) length > 0)
    310 	--length;
    311 
    312       ch = *unicode;
    313 
    314       if (ch == 0 && (bfd_signed_vma) length < 0)
    315 	return;
    316 
    317       ++unicode;
    318 
    319       if ((ch & 0x7f) == ch)
    320 	{
    321 	  if (ch == '\\')
    322 	    fputs ("\\\\", e);
    323 	  else if (ch == '"')
    324 	    fputs ("\"\"", e);
    325 	  else if (ISPRINT (ch))
    326 	    putc (ch, e);
    327 	  else
    328 	    {
    329 	      switch (ch)
    330 		{
    331 		case ESCAPE_A:
    332 		  fputs ("\\a", e);
    333 		  break;
    334 
    335 		case ESCAPE_B:
    336 		  fputs ("\\b", e);
    337 		  break;
    338 
    339 		case ESCAPE_F:
    340 		  fputs ("\\f", e);
    341 		  break;
    342 
    343 		case ESCAPE_N:
    344 		  fputs ("\\n", e);
    345 		  break;
    346 
    347 		case ESCAPE_R:
    348 		  fputs ("\\r", e);
    349 		  break;
    350 
    351 		case ESCAPE_T:
    352 		  fputs ("\\t", e);
    353 		  break;
    354 
    355 		case ESCAPE_V:
    356 		  fputs ("\\v", e);
    357 		  break;
    358 
    359 		default:
    360 		  fprintf (e, "\\%03o", (unsigned int) ch);
    361 		  break;
    362 		}
    363 	    }
    364 	}
    365       else if ((ch & 0xff) == ch)
    366 	fprintf (e, "\\%03o", (unsigned int) ch);
    367       else
    368 	fprintf (e, "\\x%04x", (unsigned int) ch);
    369     }
    370 }
    371 
    372 /* Print a unicode string to a file.  */
    373 
    374 void
    375 ascii_print (FILE *e, const char *s, rc_uint_type length)
    376 {
    377   while (1)
    378     {
    379       char ch;
    380 
    381       if (length == 0)
    382 	return;
    383       if ((bfd_signed_vma) length > 0)
    384 	--length;
    385 
    386       ch = *s;
    387 
    388       if (ch == 0 && (bfd_signed_vma) length < 0)
    389 	return;
    390 
    391       ++s;
    392 
    393       if ((ch & 0x7f) == ch)
    394 	{
    395 	  if (ch == '\\')
    396 	    fputs ("\\\\", e);
    397 	  else if (ch == '"')
    398 	    fputs ("\"\"", e);
    399 	  else if (ISPRINT (ch))
    400 	    putc (ch, e);
    401 	  else
    402 	    {
    403 	      switch (ch)
    404 		{
    405 		case ESCAPE_A:
    406 		  fputs ("\\a", e);
    407 		  break;
    408 
    409 		case ESCAPE_B:
    410 		  fputs ("\\b", e);
    411 		  break;
    412 
    413 		case ESCAPE_F:
    414 		  fputs ("\\f", e);
    415 		  break;
    416 
    417 		case ESCAPE_N:
    418 		  fputs ("\\n", e);
    419 		  break;
    420 
    421 		case ESCAPE_R:
    422 		  fputs ("\\r", e);
    423 		  break;
    424 
    425 		case ESCAPE_T:
    426 		  fputs ("\\t", e);
    427 		  break;
    428 
    429 		case ESCAPE_V:
    430 		  fputs ("\\v", e);
    431 		  break;
    432 
    433 		default:
    434 		  fprintf (e, "\\%03o", (unsigned int) ch);
    435 		  break;
    436 		}
    437 	    }
    438 	}
    439       else
    440 	fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
    441     }
    442 }
    443 
    444 rc_uint_type
    445 unichar_len (const unichar *unicode)
    446 {
    447   rc_uint_type r = 0;
    448 
    449   if (unicode)
    450     while (unicode[r] != 0)
    451       r++;
    452   else
    453     --r;
    454   return r;
    455 }
    456 
    457 unichar *
    458 unichar_dup (const unichar *unicode)
    459 {
    460   unichar *r;
    461   int len;
    462 
    463   if (! unicode)
    464     return NULL;
    465   for (len = 0; unicode[len] != 0; ++len)
    466     ;
    467   ++len;
    468   r = ((unichar *) res_alloc (len * sizeof (unichar)));
    469   memcpy (r, unicode, len * sizeof (unichar));
    470   return r;
    471 }
    472 
    473 unichar *
    474 unichar_dup_uppercase (const unichar *u)
    475 {
    476   unichar *r = unichar_dup (u);
    477   int i;
    478 
    479   if (! r)
    480     return NULL;
    481 
    482   for (i = 0; r[i] != 0; ++i)
    483     {
    484       if (r[i] >= 'a' && r[i] <= 'z')
    485 	r[i] &= 0xdf;
    486     }
    487   return r;
    488 }
    489 
    490 static int
    491 unichar_isascii (const unichar *u, rc_uint_type len)
    492 {
    493   rc_uint_type i;
    494 
    495   if ((bfd_signed_vma) len < 0)
    496     {
    497       if (u)
    498 	len = (rc_uint_type) unichar_len (u);
    499       else
    500 	len = 0;
    501     }
    502 
    503   for (i = 0; i < len; i++)
    504     if ((u[i] & 0xff80) != 0)
    505       return 0;
    506   return 1;
    507 }
    508 
    509 void
    510 unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
    511 {
    512   if (! unichar_isascii (u, len))
    513     fputc ('L', e);
    514   fputc ('"', e);
    515   unicode_print (e, u, len);
    516   fputc ('"', e);
    517 }
    518 
    519 int
    520 unicode_is_valid_codepage (rc_uint_type cp)
    521 {
    522   if ((cp & 0xffff) != cp)
    523     return 0;
    524   if (cp == CP_UTF16 || cp == CP_ACP)
    525     return 1;
    526 
    527 #if !defined (_WIN32) && !defined (__CYGWIN__)
    528   if (! wind_find_codepage_info (cp))
    529     return 0;
    530   return 1;
    531 #else
    532   return !! IsValidCodePage ((UINT) cp);
    533 #endif
    534 }
    535 
    536 #if defined (_WIN32) || defined (__CYGWIN__)
    537 
    538 #define max_cp_string_len 6
    539 
    540 static unsigned int
    541 codepage_from_langid (unsigned short langid)
    542 {
    543   char cp_string [max_cp_string_len];
    544   int c;
    545 
    546   memset (cp_string, 0, max_cp_string_len);
    547   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
    548      but is unavailable on Win95.  */
    549   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
    550   		      LOCALE_IDEFAULTANSICODEPAGE,
    551   		      cp_string, max_cp_string_len);
    552   /* If codepage data for an LCID is not installed on users's system,
    553      GetLocaleInfo returns an empty string.  Fall back to system ANSI
    554      default. */
    555   if (c == 0)
    556     return CP_ACP;
    557   return strtoul (cp_string, 0, 10);
    558 }
    559 
    560 static unsigned int
    561 wincodepage_from_langid (unsigned short langid)
    562 {
    563   char cp_string [max_cp_string_len];
    564   int c;
    565 
    566   memset (cp_string, 0, max_cp_string_len);
    567   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
    568      but is unavailable on Win95.  */
    569   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
    570 		      LOCALE_IDEFAULTCODEPAGE,
    571 		      cp_string, max_cp_string_len);
    572   /* If codepage data for an LCID is not installed on users's system,
    573      GetLocaleInfo returns an empty string.  Fall back to system ANSI
    574      default. */
    575   if (c == 0)
    576     return CP_OEM;
    577   return strtoul (cp_string, 0, 10);
    578 }
    579 
    580 static char *
    581 lang_from_langid (unsigned short langid)
    582 {
    583   char cp_string[261];
    584   int c;
    585 
    586   memset (cp_string, 0, 261);
    587   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
    588   		      LOCALE_SENGLANGUAGE,
    589   		      cp_string, 260);
    590   /* If codepage data for an LCID is not installed on users's system,
    591      GetLocaleInfo returns an empty string.  Fall back to system ANSI
    592      default. */
    593   if (c == 0)
    594     strcpy (cp_string, "Neutral");
    595   return xstrdup (cp_string);
    596 }
    597 
    598 static char *
    599 country_from_langid (unsigned short langid)
    600 {
    601   char cp_string[261];
    602   int c;
    603 
    604   memset (cp_string, 0, 261);
    605   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
    606   		      LOCALE_SENGCOUNTRY,
    607   		      cp_string, 260);
    608   /* If codepage data for an LCID is not installed on users's system,
    609      GetLocaleInfo returns an empty string.  Fall back to system ANSI
    610      default. */
    611   if (c == 0)
    612     strcpy (cp_string, "Neutral");
    613   return xstrdup (cp_string);
    614 }
    615 
    616 #endif
    617 
    618 const wind_language_t *
    619 wind_find_language_by_id (unsigned id)
    620 {
    621 #if !defined (_WIN32) && !defined (__CYGWIN__)
    622   int i;
    623 
    624   if (! id)
    625     return NULL;
    626   for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
    627     ;
    628   if (languages[i].id == id)
    629     return &languages[i];
    630   return NULL;
    631 #else
    632   static wind_language_t wl;
    633 
    634   wl.id = id;
    635   wl.doscp = codepage_from_langid ((unsigned short) id);
    636   wl.wincp = wincodepage_from_langid ((unsigned short) id);
    637   wl.name = lang_from_langid ((unsigned short) id);
    638   wl.country = country_from_langid ((unsigned short) id);
    639 
    640   return & wl;
    641 #endif
    642 }
    643 
    644 const local_iconv_map *
    645 wind_find_codepage_info (unsigned cp)
    646 {
    647 #if !defined (_WIN32) && !defined (__CYGWIN__)
    648   int i;
    649 
    650   for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
    651     ;
    652   if (codepages[i].codepage == (rc_uint_type) -1)
    653     return NULL;
    654   return &codepages[i];
    655 #else
    656   static local_iconv_map lim;
    657   if (!unicode_is_valid_codepage (cp))
    658   	return NULL;
    659   lim.codepage = cp;
    660   lim.iconv_name = "";
    661   return & lim;
    662 #endif
    663 }
    664 
    665 /* Convert an Codepage string to a unicode string.  */
    666 
    667 void
    668 unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
    669 {
    670   rc_uint_type len;
    671 
    672   len = wind_MultiByteToWideChar (cp, src, NULL, 0);
    673   if (len)
    674     {
    675       *u = ((unichar *) res_alloc (len));
    676       wind_MultiByteToWideChar (cp, src, *u, len);
    677     }
    678   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
    679      this will set *length to -1.  */
    680   len -= sizeof (unichar);
    681 
    682   if (length != NULL)
    683     *length = len / sizeof (unichar);
    684 }
    685 
    686 /* Convert an unicode string to an codepage string.  */
    687 
    688 void
    689 codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
    690 {
    691   rc_uint_type len;
    692 
    693   len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
    694   if (len)
    695     {
    696       *ascii = (char *) res_alloc (len * sizeof (char));
    697       wind_WideCharToMultiByte (cp, unicode, *ascii, len);
    698     }
    699   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
    700      this will set *length to -1.  */
    701   len--;
    702 
    703   if (length != NULL)
    704     *length = len;
    705 }
    706 
    707 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
    708 static int
    709 iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
    710 {
    711   int i;
    712 
    713   for (i = 1; i <= 32; i++)
    714     {
    715       char *tmp_d = d;
    716       ICONV_CONST char *tmp_s = s;
    717       size_t ret;
    718       size_t s_left = (size_t) i;
    719       size_t d_left = (size_t) d_len;
    720 
    721       ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
    722 
    723       if (ret != (size_t) -1)
    724 	{
    725 	  *n_s = tmp_s;
    726 	  *n_d = tmp_d;
    727 	  return 0;
    728 	}
    729     }
    730 
    731   return 1;
    732 }
    733 
    734 static const char *
    735 wind_iconv_cp (rc_uint_type cp)
    736 {
    737   const local_iconv_map *lim = wind_find_codepage_info (cp);
    738 
    739   if (!lim)
    740     return NULL;
    741   return lim->iconv_name;
    742 }
    743 #endif /* HAVE_ICONV */
    744 
    745 static rc_uint_type
    746 wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
    747 			  unichar *u, rc_uint_type u_len)
    748 {
    749   rc_uint_type ret = 0;
    750 
    751 #if defined (_WIN32) || defined (__CYGWIN__)
    752   rc_uint_type conv_flags = MB_PRECOMPOSED;
    753 
    754   /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
    755      MultiByteToWideChar will set the last error to
    756      ERROR_INVALID_FLAGS if we do. */
    757   if (cp == CP_UTF8 || cp == CP_UTF7)
    758     conv_flags = 0;
    759 
    760   ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
    761 					    mb, -1, u, u_len);
    762   /* Convert to bytes. */
    763   ret *= sizeof (unichar);
    764 
    765 #elif defined (HAVE_ICONV)
    766   int first = 1;
    767   char tmp[32];
    768   char *p_tmp;
    769   const char *iconv_name = wind_iconv_cp (cp);
    770 
    771   if (!mb || !iconv_name)
    772     return 0;
    773   iconv_t cd = iconv_open ("UTF-16", iconv_name);
    774 
    775   while (1)
    776     {
    777       int iret;
    778       const char *n_mb = "";
    779       char *n_tmp = "";
    780 
    781       p_tmp = tmp;
    782       iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
    783       if (first)
    784 	{
    785 	  first = 0;
    786 	  continue;
    787 	}
    788       if (!iret)
    789 	{
    790 	  size_t l_tmp = (size_t) (n_tmp - p_tmp);
    791 
    792 	  if (u)
    793 	    {
    794 	      if ((size_t) u_len < l_tmp)
    795 		break;
    796 	      memcpy (u, tmp, l_tmp);
    797 	      u += l_tmp/2;
    798 	      u_len -= l_tmp;
    799 	    }
    800 	  ret += l_tmp;
    801 	}
    802       else
    803 	break;
    804       if (tmp[0] == 0 && tmp[1] == 0)
    805 	break;
    806       mb = n_mb;
    807     }
    808   iconv_close (cd);
    809 #else
    810   if (cp)
    811     ret = 0;
    812   ret = strlen (mb) + 1;
    813   ret *= sizeof (unichar);
    814   if (u != NULL && u_len != 0)
    815     {
    816       do
    817 	{
    818 	  *u++ = ((unichar) *mb) & 0xff;
    819 	  --u_len; mb++;
    820 	}
    821       while (u_len != 0 && mb[-1] != 0);
    822     }
    823   if (u != NULL && u_len != 0)
    824     *u = 0;
    825 #endif
    826   return ret;
    827 }
    828 
    829 static rc_uint_type
    830 wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
    831 {
    832   rc_uint_type ret = 0;
    833 #if defined (_WIN32) || defined (__CYGWIN__)
    834   WINBOOL used_def = FALSE;
    835 
    836   ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
    837 				      	    NULL, & used_def);
    838 #elif defined (HAVE_ICONV)
    839   int first = 1;
    840   char tmp[32];
    841   char *p_tmp;
    842   const char *iconv_name = wind_iconv_cp (cp);
    843 
    844   if (!u || !iconv_name)
    845     return 0;
    846   iconv_t cd = iconv_open (iconv_name, "UTF-16");
    847 
    848   while (1)
    849     {
    850       int iret;
    851       const char *n_u = "";
    852       char *n_tmp = "";
    853 
    854       p_tmp = tmp;
    855       iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
    856       if (first)
    857 	{
    858 	  first = 0;
    859 	  continue;
    860 	}
    861       if (!iret)
    862 	{
    863 	  size_t l_tmp = (size_t) (n_tmp - p_tmp);
    864 
    865 	  if (mb)
    866 	    {
    867 	      if ((size_t) mb_len < l_tmp)
    868 		break;
    869 	      memcpy (mb, tmp, l_tmp);
    870 	      mb += l_tmp;
    871 	      mb_len -= l_tmp;
    872 	    }
    873 	  ret += l_tmp;
    874 	}
    875       else
    876 	break;
    877       if (u[0] == 0)
    878 	break;
    879       u = (const unichar *) n_u;
    880     }
    881   iconv_close (cd);
    882 #else
    883   if (cp)
    884     ret = 0;
    885 
    886   while (u[ret] != 0)
    887     ++ret;
    888 
    889   ++ret;
    890 
    891   if (mb)
    892     {
    893       while (*u != 0 && mb_len != 0)
    894 	{
    895 	  if (u[0] == (u[0] & 0x7f))
    896 	    *mb++ = (char) u[0];
    897 	  else
    898 	    *mb++ = '_';
    899 	  ++u; --mb_len;
    900 	}
    901       if (mb_len != 0)
    902 	*mb = 0;
    903     }
    904 #endif
    905   return ret;
    906 }
    907