winduni.c revision 1.6 1 1.1 christos /* winduni.c -- unicode support for the windres program.
2 1.6 christos Copyright (C) 1997-2018 Free Software Foundation, Inc.
3 1.1 christos Written by Ian Lance Taylor, Cygnus Support.
4 1.1 christos Rewritten by Kai Tietz, Onevision.
5 1.1 christos
6 1.1 christos This file is part of GNU Binutils.
7 1.1 christos
8 1.1 christos This program is free software; you can redistribute it and/or modify
9 1.1 christos it under the terms of the GNU General Public License as published by
10 1.1 christos the Free Software Foundation; either version 3 of the License, or
11 1.1 christos (at your option) any later version.
12 1.1 christos
13 1.1 christos This program is distributed in the hope that it will be useful,
14 1.1 christos but WITHOUT ANY WARRANTY; without even the implied warranty of
15 1.1 christos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 1.1 christos GNU General Public License for more details.
17 1.1 christos
18 1.1 christos You should have received a copy of the GNU General Public License
19 1.1 christos along with this program; if not, write to the Free Software
20 1.1 christos Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 1.1 christos 02110-1301, USA. */
22 1.1 christos
23 1.1 christos
24 1.1 christos /* This file contains unicode support routines for the windres
25 1.1 christos program. Ideally, we would have generic unicode support which
26 1.1 christos would work on all systems. However, we don't. Instead, on a
27 1.1 christos Windows host, we are prepared to call some Windows routines. This
28 1.1 christos means that we will generate different output on Windows and Unix
29 1.1 christos hosts, but that seems better than not really supporting unicode at
30 1.1 christos all. */
31 1.1 christos
32 1.1 christos #include "sysdep.h"
33 1.1 christos #include "bfd.h"
34 1.1 christos #include "libiberty.h" /* for xstrdup */
35 1.1 christos #include "bucomm.h"
36 1.1 christos /* Must be include before windows.h and winnls.h. */
37 1.1 christos #if defined (_WIN32) || defined (__CYGWIN__)
38 1.1 christos #include <windows.h>
39 1.1 christos #include <winnls.h>
40 1.1 christos #endif
41 1.1 christos #include "winduni.h"
42 1.1 christos #include "safe-ctype.h"
43 1.1 christos
44 1.1 christos #if HAVE_ICONV
45 1.1 christos #include <iconv.h>
46 1.1 christos #endif
47 1.1 christos
48 1.1 christos static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
49 1.1 christos static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
50 1.1 christos static int unichar_isascii (const unichar *, rc_uint_type);
51 1.1 christos
52 1.1 christos /* Convert an ASCII string to a unicode string. We just copy it,
53 1.1 christos expanding chars to shorts, rather than doing something intelligent. */
54 1.3 christos
55 1.1 christos #if !defined (_WIN32) && !defined (__CYGWIN__)
56 1.1 christos
57 1.1 christos /* Codepages mapped. */
58 1.1 christos static local_iconv_map codepages[] =
59 1.1 christos {
60 1.6 christos { 0, "cp1252" },
61 1.1 christos { 1, "WINDOWS-1252" },
62 1.1 christos { 437, "MS-ANSI" },
63 1.1 christos { 737, "MS-GREEK" },
64 1.1 christos { 775, "WINBALTRIM" },
65 1.1 christos { 850, "MS-ANSI" },
66 1.1 christos { 852, "MS-EE" },
67 1.1 christos { 857, "MS-TURK" },
68 1.1 christos { 862, "CP862" },
69 1.1 christos { 864, "CP864" },
70 1.1 christos { 866, "MS-CYRL" },
71 1.1 christos { 874, "WINDOWS-874" },
72 1.1 christos { 932, "CP932" },
73 1.1 christos { 936, "CP936" },
74 1.1 christos { 949, "CP949" },
75 1.1 christos { 950, "CP950" },
76 1.1 christos { 1250, "WINDOWS-1250" },
77 1.1 christos { 1251, "WINDOWS-1251" },
78 1.1 christos { 1252, "WINDOWS-1252" },
79 1.1 christos { 1253, "WINDOWS-1253" },
80 1.1 christos { 1254, "WINDOWS-1254" },
81 1.1 christos { 1255, "WINDOWS-1255" },
82 1.1 christos { 1256, "WINDOWS-1256" },
83 1.1 christos { 1257, "WINDOWS-1257" },
84 1.1 christos { 1258, "WINDOWS-1258" },
85 1.1 christos { CP_UTF7, "UTF-7" },
86 1.1 christos { CP_UTF8, "UTF-8" },
87 1.3 christos { CP_UTF16, "UTF-16LE" },
88 1.1 christos { (rc_uint_type) -1, NULL }
89 1.1 christos };
90 1.1 christos
91 1.1 christos /* Languages supported. */
92 1.1 christos static const wind_language_t languages[] =
93 1.1 christos {
94 1.1 christos { 0x0000, 437, 1252, "Neutral", "Neutral" },
95 1.1 christos { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
96 1.1 christos { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" },
97 1.1 christos { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" },
98 1.1 christos { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" },
99 1.1 christos { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
100 1.1 christos { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" },
101 1.1 christos { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" },
102 1.1 christos { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" },
103 1.1 christos { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" },
104 1.3 christos { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" },
105 1.1 christos { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" },
106 1.1 christos { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" },
107 1.1 christos { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" },
108 1.1 christos { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" },
109 1.1 christos { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" },
110 1.1 christos { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
111 1.1 christos { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" },
112 1.1 christos { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" },
113 1.1 christos { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
114 1.1 christos { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
115 1.1 christos { 0x042D, 850, 1252, "Basque", "Spain" },
116 1.1 christos { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
117 1.1 christos { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
118 1.1 christos { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
119 1.1 christos { 0x043C, 437, 1252, "Irish", "Ireland" },
120 1.1 christos { 0x043E, 850, 1252, "Malay", "Malaysia" },
121 1.1 christos { 0x0801, 864, 1256, "Arabic", "Iraq" },
122 1.1 christos { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" },
123 1.1 christos { 0x0807, 850, 1252, "German", "Switzerland" },
124 1.1 christos { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
125 1.1 christos { 0x080C, 850, 1252, "French", "Belgium" },
126 1.1 christos { 0x0810, 850, 1252, "Italian", "Switzerland" },
127 1.1 christos { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
128 1.1 christos { 0x0816, 850, 1252, "Portuguese", "Portugal" },
129 1.1 christos { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
130 1.1 christos { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
131 1.1 christos { 0x0C01, 864, 1256, "Arabic", "Egypt" },
132 1.1 christos { 0x0C04, 950, 950, "Chinese", "Hong Kong" },
133 1.1 christos { 0x0C07, 850, 1252, "German", "Austria" },
134 1.1 christos { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
135 1.1 christos { 0x0C0C, 850, 1252, "French", "Canada"},
136 1.1 christos { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
137 1.1 christos { 0x1001, 864, 1256, "Arabic", "Libya" },
138 1.1 christos { 0x1004, 936, 936, "Chinese", "Singapore" },
139 1.1 christos { 0x1007, 850, 1252, "German", "Luxembourg" },
140 1.1 christos { 0x1009, 850, 1252, "English", "Canada" },
141 1.1 christos { 0x100A, 850, 1252, "Spanish", "Guatemala" },
142 1.1 christos { 0x100C, 850, 1252, "French", "Switzerland" },
143 1.1 christos { 0x1401, 864, 1256, "Arabic", "Algeria" },
144 1.1 christos { 0x1407, 850, 1252, "German", "Liechtenstein" },
145 1.1 christos { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
146 1.1 christos { 0x140C, 850, 1252, "French", "Luxembourg" },
147 1.1 christos { 0x1801, 864, 1256, "Arabic", "Morocco" },
148 1.1 christos { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" },
149 1.1 christos { 0x180C, 850, 1252, "French", "Monaco" },
150 1.1 christos { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
151 1.1 christos { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
152 1.1 christos { 0x2001, 864, 1256, "Arabic", "Oman" },
153 1.1 christos { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" },
154 1.1 christos { 0x2401, 864, 1256, "Arabic", "Yemen" },
155 1.1 christos { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" },
156 1.1 christos { 0x2801, 864, 1256, "Arabic", "Syria" },
157 1.1 christos { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" },
158 1.1 christos { 0x2C01, 864, 1256, "Arabic", "Jordan" },
159 1.1 christos { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
160 1.1 christos { 0x3001, 864, 1256, "Arabic", "Lebanon" },
161 1.1 christos { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" },
162 1.1 christos { 0x3401, 864, 1256, "Arabic", "Kuwait" },
163 1.1 christos { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" },
164 1.1 christos { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
165 1.1 christos { 0x380A, 850, 1252, "Spanish", "Uruguay" },
166 1.1 christos { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
167 1.1 christos { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
168 1.1 christos { 0x4001, 864, 1256, "Arabic", "Qatar" },
169 1.1 christos { 0x400A, 850, 1252, "Spanish", "Bolivia" },
170 1.1 christos { 0x440A, 850, 1252, "Spanish", "El Salvador" },
171 1.1 christos { 0x480A, 850, 1252, "Spanish", "Honduras" },
172 1.1 christos { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
173 1.1 christos { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
174 1.1 christos { (unsigned) -1, 0, 0, NULL, NULL }
175 1.1 christos };
176 1.1 christos
177 1.1 christos #endif
178 1.1 christos
179 1.1 christos /* Specifies the default codepage to be used for unicode
180 1.1 christos transformations. By default this is CP_ACP. */
181 1.1 christos rc_uint_type wind_default_codepage = CP_ACP;
182 1.1 christos
183 1.1 christos /* Specifies the currently used codepage for unicode
184 1.1 christos transformations. By default this is CP_ACP. */
185 1.1 christos rc_uint_type wind_current_codepage = CP_ACP;
186 1.1 christos
187 1.1 christos /* Convert an ASCII string to a unicode string. We just copy it,
188 1.1 christos expanding chars to shorts, rather than doing something intelligent. */
189 1.1 christos
190 1.1 christos void
191 1.1 christos unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
192 1.1 christos {
193 1.1 christos unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
194 1.1 christos }
195 1.1 christos
196 1.1 christos /* Convert an ASCII string with length A_LENGTH to a unicode string. We just
197 1.1 christos copy it, expanding chars to shorts, rather than doing something intelligent.
198 1.1 christos This routine converts also \0 within a string. */
199 1.1 christos
200 1.1 christos void
201 1.1 christos unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length)
202 1.1 christos {
203 1.1 christos char *tmp, *p;
204 1.1 christos rc_uint_type tlen, elen, idx = 0;
205 1.1 christos
206 1.1 christos *unicode = NULL;
207 1.1 christos
208 1.1 christos if (!a_length)
209 1.1 christos {
210 1.1 christos if (length)
211 1.1 christos *length = 0;
212 1.1 christos return;
213 1.1 christos }
214 1.1 christos
215 1.1 christos /* Make sure we have zero terminated string. */
216 1.5 christos p = tmp = (char *) xmalloc (a_length + 1);
217 1.1 christos memcpy (tmp, ascii, a_length);
218 1.1 christos tmp[a_length] = 0;
219 1.1 christos
220 1.1 christos while (a_length > 0)
221 1.1 christos {
222 1.1 christos unichar *utmp, *up;
223 1.1 christos
224 1.1 christos tlen = strlen (p);
225 1.1 christos
226 1.1 christos if (tlen > a_length)
227 1.1 christos tlen = a_length;
228 1.1 christos if (*p == 0)
229 1.1 christos {
230 1.1 christos /* Make room for one more character. */
231 1.1 christos utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
232 1.1 christos if (idx > 0)
233 1.1 christos {
234 1.1 christos memcpy (utmp, *unicode, idx * sizeof (unichar));
235 1.1 christos }
236 1.1 christos *unicode = utmp;
237 1.1 christos utmp[idx++] = 0;
238 1.1 christos --a_length;
239 1.1 christos p++;
240 1.1 christos continue;
241 1.1 christos }
242 1.1 christos utmp = NULL;
243 1.1 christos elen = 0;
244 1.1 christos elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0);
245 1.1 christos if (elen)
246 1.1 christos {
247 1.1 christos utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2));
248 1.1 christos wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen);
249 1.1 christos elen /= sizeof (unichar);
250 1.1 christos elen --;
251 1.1 christos }
252 1.1 christos else
253 1.1 christos {
254 1.1 christos /* Make room for one more character. */
255 1.1 christos utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
256 1.1 christos if (idx > 0)
257 1.1 christos {
258 1.1 christos memcpy (utmp, *unicode, idx * sizeof (unichar));
259 1.1 christos }
260 1.1 christos *unicode = utmp;
261 1.1 christos utmp[idx++] = ((unichar) *p) & 0xff;
262 1.1 christos --a_length;
263 1.1 christos p++;
264 1.1 christos continue;
265 1.1 christos }
266 1.1 christos p += tlen;
267 1.1 christos a_length -= tlen;
268 1.1 christos
269 1.1 christos up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen));
270 1.1 christos if (idx > 0)
271 1.1 christos memcpy (up, *unicode, idx * sizeof (unichar));
272 1.1 christos
273 1.1 christos *unicode = up;
274 1.1 christos if (elen)
275 1.1 christos memcpy (&up[idx], utmp, sizeof (unichar) * elen);
276 1.1 christos
277 1.1 christos idx += elen;
278 1.1 christos }
279 1.1 christos
280 1.1 christos if (length)
281 1.1 christos *length = idx;
282 1.5 christos
283 1.5 christos free (tmp);
284 1.1 christos }
285 1.1 christos
286 1.1 christos /* Convert an unicode string to an ASCII string. We just copy it,
287 1.1 christos shrink shorts to chars, rather than doing something intelligent.
288 1.1 christos Shorts with not within the char range are replaced by '_'. */
289 1.1 christos
290 1.1 christos void
291 1.1 christos ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
292 1.1 christos {
293 1.1 christos codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
294 1.1 christos }
295 1.1 christos
296 1.1 christos /* Print the unicode string UNICODE to the file E. LENGTH is the
297 1.1 christos number of characters to print, or -1 if we should print until the
298 1.1 christos end of the string. FIXME: On a Windows host, we should be calling
299 1.1 christos some Windows function, probably WideCharToMultiByte. */
300 1.1 christos
301 1.1 christos void
302 1.1 christos unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
303 1.1 christos {
304 1.1 christos while (1)
305 1.1 christos {
306 1.1 christos unichar ch;
307 1.1 christos
308 1.1 christos if (length == 0)
309 1.1 christos return;
310 1.1 christos if ((bfd_signed_vma) length > 0)
311 1.1 christos --length;
312 1.1 christos
313 1.1 christos ch = *unicode;
314 1.1 christos
315 1.1 christos if (ch == 0 && (bfd_signed_vma) length < 0)
316 1.1 christos return;
317 1.1 christos
318 1.1 christos ++unicode;
319 1.1 christos
320 1.1 christos if ((ch & 0x7f) == ch)
321 1.1 christos {
322 1.1 christos if (ch == '\\')
323 1.1 christos fputs ("\\\\", e);
324 1.1 christos else if (ch == '"')
325 1.1 christos fputs ("\"\"", e);
326 1.1 christos else if (ISPRINT (ch))
327 1.1 christos putc (ch, e);
328 1.1 christos else
329 1.1 christos {
330 1.1 christos switch (ch)
331 1.1 christos {
332 1.1 christos case ESCAPE_A:
333 1.1 christos fputs ("\\a", e);
334 1.1 christos break;
335 1.1 christos
336 1.1 christos case ESCAPE_B:
337 1.1 christos fputs ("\\b", e);
338 1.1 christos break;
339 1.1 christos
340 1.1 christos case ESCAPE_F:
341 1.1 christos fputs ("\\f", e);
342 1.1 christos break;
343 1.1 christos
344 1.1 christos case ESCAPE_N:
345 1.1 christos fputs ("\\n", e);
346 1.1 christos break;
347 1.1 christos
348 1.1 christos case ESCAPE_R:
349 1.1 christos fputs ("\\r", e);
350 1.1 christos break;
351 1.1 christos
352 1.1 christos case ESCAPE_T:
353 1.1 christos fputs ("\\t", e);
354 1.1 christos break;
355 1.1 christos
356 1.1 christos case ESCAPE_V:
357 1.1 christos fputs ("\\v", e);
358 1.1 christos break;
359 1.1 christos
360 1.1 christos default:
361 1.1 christos fprintf (e, "\\%03o", (unsigned int) ch);
362 1.1 christos break;
363 1.1 christos }
364 1.1 christos }
365 1.1 christos }
366 1.1 christos else if ((ch & 0xff) == ch)
367 1.1 christos fprintf (e, "\\%03o", (unsigned int) ch);
368 1.1 christos else
369 1.1 christos fprintf (e, "\\x%04x", (unsigned int) ch);
370 1.1 christos }
371 1.1 christos }
372 1.1 christos
373 1.1 christos /* Print a unicode string to a file. */
374 1.1 christos
375 1.1 christos void
376 1.1 christos ascii_print (FILE *e, const char *s, rc_uint_type length)
377 1.1 christos {
378 1.1 christos while (1)
379 1.1 christos {
380 1.1 christos char ch;
381 1.1 christos
382 1.1 christos if (length == 0)
383 1.1 christos return;
384 1.1 christos if ((bfd_signed_vma) length > 0)
385 1.1 christos --length;
386 1.1 christos
387 1.1 christos ch = *s;
388 1.1 christos
389 1.1 christos if (ch == 0 && (bfd_signed_vma) length < 0)
390 1.1 christos return;
391 1.1 christos
392 1.1 christos ++s;
393 1.1 christos
394 1.1 christos if ((ch & 0x7f) == ch)
395 1.1 christos {
396 1.1 christos if (ch == '\\')
397 1.1 christos fputs ("\\\\", e);
398 1.1 christos else if (ch == '"')
399 1.1 christos fputs ("\"\"", e);
400 1.1 christos else if (ISPRINT (ch))
401 1.1 christos putc (ch, e);
402 1.1 christos else
403 1.1 christos {
404 1.1 christos switch (ch)
405 1.1 christos {
406 1.1 christos case ESCAPE_A:
407 1.1 christos fputs ("\\a", e);
408 1.1 christos break;
409 1.1 christos
410 1.1 christos case ESCAPE_B:
411 1.1 christos fputs ("\\b", e);
412 1.1 christos break;
413 1.1 christos
414 1.1 christos case ESCAPE_F:
415 1.1 christos fputs ("\\f", e);
416 1.1 christos break;
417 1.1 christos
418 1.1 christos case ESCAPE_N:
419 1.1 christos fputs ("\\n", e);
420 1.1 christos break;
421 1.1 christos
422 1.1 christos case ESCAPE_R:
423 1.1 christos fputs ("\\r", e);
424 1.1 christos break;
425 1.1 christos
426 1.1 christos case ESCAPE_T:
427 1.1 christos fputs ("\\t", e);
428 1.1 christos break;
429 1.1 christos
430 1.1 christos case ESCAPE_V:
431 1.1 christos fputs ("\\v", e);
432 1.1 christos break;
433 1.1 christos
434 1.1 christos default:
435 1.1 christos fprintf (e, "\\%03o", (unsigned int) ch);
436 1.1 christos break;
437 1.1 christos }
438 1.1 christos }
439 1.1 christos }
440 1.1 christos else
441 1.1 christos fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
442 1.1 christos }
443 1.1 christos }
444 1.1 christos
445 1.1 christos rc_uint_type
446 1.1 christos unichar_len (const unichar *unicode)
447 1.1 christos {
448 1.1 christos rc_uint_type r = 0;
449 1.1 christos
450 1.1 christos if (unicode)
451 1.1 christos while (unicode[r] != 0)
452 1.1 christos r++;
453 1.1 christos else
454 1.1 christos --r;
455 1.1 christos return r;
456 1.1 christos }
457 1.1 christos
458 1.1 christos unichar *
459 1.1 christos unichar_dup (const unichar *unicode)
460 1.1 christos {
461 1.1 christos unichar *r;
462 1.1 christos int len;
463 1.1 christos
464 1.1 christos if (! unicode)
465 1.1 christos return NULL;
466 1.1 christos for (len = 0; unicode[len] != 0; ++len)
467 1.1 christos ;
468 1.1 christos ++len;
469 1.1 christos r = ((unichar *) res_alloc (len * sizeof (unichar)));
470 1.1 christos memcpy (r, unicode, len * sizeof (unichar));
471 1.1 christos return r;
472 1.1 christos }
473 1.1 christos
474 1.1 christos unichar *
475 1.1 christos unichar_dup_uppercase (const unichar *u)
476 1.1 christos {
477 1.1 christos unichar *r = unichar_dup (u);
478 1.1 christos int i;
479 1.1 christos
480 1.1 christos if (! r)
481 1.1 christos return NULL;
482 1.1 christos
483 1.1 christos for (i = 0; r[i] != 0; ++i)
484 1.1 christos {
485 1.1 christos if (r[i] >= 'a' && r[i] <= 'z')
486 1.1 christos r[i] &= 0xdf;
487 1.1 christos }
488 1.1 christos return r;
489 1.1 christos }
490 1.1 christos
491 1.1 christos static int
492 1.1 christos unichar_isascii (const unichar *u, rc_uint_type len)
493 1.1 christos {
494 1.1 christos rc_uint_type i;
495 1.1 christos
496 1.1 christos if ((bfd_signed_vma) len < 0)
497 1.1 christos {
498 1.1 christos if (u)
499 1.1 christos len = (rc_uint_type) unichar_len (u);
500 1.1 christos else
501 1.1 christos len = 0;
502 1.1 christos }
503 1.1 christos
504 1.1 christos for (i = 0; i < len; i++)
505 1.1 christos if ((u[i] & 0xff80) != 0)
506 1.1 christos return 0;
507 1.1 christos return 1;
508 1.1 christos }
509 1.1 christos
510 1.1 christos void
511 1.1 christos unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
512 1.1 christos {
513 1.1 christos if (! unichar_isascii (u, len))
514 1.1 christos fputc ('L', e);
515 1.1 christos fputc ('"', e);
516 1.1 christos unicode_print (e, u, len);
517 1.1 christos fputc ('"', e);
518 1.1 christos }
519 1.1 christos
520 1.1 christos int
521 1.1 christos unicode_is_valid_codepage (rc_uint_type cp)
522 1.1 christos {
523 1.1 christos if ((cp & 0xffff) != cp)
524 1.1 christos return 0;
525 1.1 christos if (cp == CP_UTF16 || cp == CP_ACP)
526 1.1 christos return 1;
527 1.1 christos
528 1.1 christos #if !defined (_WIN32) && !defined (__CYGWIN__)
529 1.1 christos if (! wind_find_codepage_info (cp))
530 1.1 christos return 0;
531 1.1 christos return 1;
532 1.1 christos #else
533 1.1 christos return !! IsValidCodePage ((UINT) cp);
534 1.1 christos #endif
535 1.1 christos }
536 1.1 christos
537 1.1 christos #if defined (_WIN32) || defined (__CYGWIN__)
538 1.1 christos
539 1.1 christos #define max_cp_string_len 6
540 1.1 christos
541 1.1 christos static unsigned int
542 1.1 christos codepage_from_langid (unsigned short langid)
543 1.1 christos {
544 1.1 christos char cp_string [max_cp_string_len];
545 1.1 christos int c;
546 1.1 christos
547 1.1 christos memset (cp_string, 0, max_cp_string_len);
548 1.1 christos /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
549 1.1 christos but is unavailable on Win95. */
550 1.1 christos c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
551 1.1 christos LOCALE_IDEFAULTANSICODEPAGE,
552 1.1 christos cp_string, max_cp_string_len);
553 1.1 christos /* If codepage data for an LCID is not installed on users's system,
554 1.1 christos GetLocaleInfo returns an empty string. Fall back to system ANSI
555 1.1 christos default. */
556 1.1 christos if (c == 0)
557 1.1 christos return CP_ACP;
558 1.1 christos return strtoul (cp_string, 0, 10);
559 1.1 christos }
560 1.1 christos
561 1.1 christos static unsigned int
562 1.1 christos wincodepage_from_langid (unsigned short langid)
563 1.1 christos {
564 1.1 christos char cp_string [max_cp_string_len];
565 1.1 christos int c;
566 1.1 christos
567 1.1 christos memset (cp_string, 0, max_cp_string_len);
568 1.1 christos /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
569 1.1 christos but is unavailable on Win95. */
570 1.1 christos c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
571 1.1 christos LOCALE_IDEFAULTCODEPAGE,
572 1.1 christos cp_string, max_cp_string_len);
573 1.1 christos /* If codepage data for an LCID is not installed on users's system,
574 1.1 christos GetLocaleInfo returns an empty string. Fall back to system ANSI
575 1.1 christos default. */
576 1.1 christos if (c == 0)
577 1.1 christos return CP_OEM;
578 1.1 christos return strtoul (cp_string, 0, 10);
579 1.1 christos }
580 1.1 christos
581 1.1 christos static char *
582 1.1 christos lang_from_langid (unsigned short langid)
583 1.1 christos {
584 1.1 christos char cp_string[261];
585 1.1 christos int c;
586 1.1 christos
587 1.1 christos memset (cp_string, 0, 261);
588 1.1 christos c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
589 1.1 christos LOCALE_SENGLANGUAGE,
590 1.1 christos cp_string, 260);
591 1.1 christos /* If codepage data for an LCID is not installed on users's system,
592 1.1 christos GetLocaleInfo returns an empty string. Fall back to system ANSI
593 1.1 christos default. */
594 1.1 christos if (c == 0)
595 1.1 christos strcpy (cp_string, "Neutral");
596 1.1 christos return xstrdup (cp_string);
597 1.1 christos }
598 1.1 christos
599 1.1 christos static char *
600 1.1 christos country_from_langid (unsigned short langid)
601 1.1 christos {
602 1.1 christos char cp_string[261];
603 1.1 christos int c;
604 1.1 christos
605 1.1 christos memset (cp_string, 0, 261);
606 1.1 christos c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
607 1.1 christos LOCALE_SENGCOUNTRY,
608 1.1 christos cp_string, 260);
609 1.1 christos /* If codepage data for an LCID is not installed on users's system,
610 1.1 christos GetLocaleInfo returns an empty string. Fall back to system ANSI
611 1.1 christos default. */
612 1.1 christos if (c == 0)
613 1.1 christos strcpy (cp_string, "Neutral");
614 1.1 christos return xstrdup (cp_string);
615 1.1 christos }
616 1.1 christos
617 1.1 christos #endif
618 1.1 christos
619 1.1 christos const wind_language_t *
620 1.1 christos wind_find_language_by_id (unsigned id)
621 1.1 christos {
622 1.1 christos #if !defined (_WIN32) && !defined (__CYGWIN__)
623 1.1 christos int i;
624 1.1 christos
625 1.1 christos if (! id)
626 1.1 christos return NULL;
627 1.1 christos for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
628 1.1 christos ;
629 1.1 christos if (languages[i].id == id)
630 1.1 christos return &languages[i];
631 1.1 christos return NULL;
632 1.1 christos #else
633 1.1 christos static wind_language_t wl;
634 1.1 christos
635 1.1 christos wl.id = id;
636 1.1 christos wl.doscp = codepage_from_langid ((unsigned short) id);
637 1.1 christos wl.wincp = wincodepage_from_langid ((unsigned short) id);
638 1.1 christos wl.name = lang_from_langid ((unsigned short) id);
639 1.1 christos wl.country = country_from_langid ((unsigned short) id);
640 1.1 christos
641 1.1 christos return & wl;
642 1.1 christos #endif
643 1.1 christos }
644 1.1 christos
645 1.1 christos const local_iconv_map *
646 1.1 christos wind_find_codepage_info (unsigned cp)
647 1.1 christos {
648 1.1 christos #if !defined (_WIN32) && !defined (__CYGWIN__)
649 1.1 christos int i;
650 1.1 christos
651 1.1 christos for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
652 1.1 christos ;
653 1.1 christos if (codepages[i].codepage == (rc_uint_type) -1)
654 1.1 christos return NULL;
655 1.1 christos return &codepages[i];
656 1.1 christos #else
657 1.1 christos static local_iconv_map lim;
658 1.1 christos if (!unicode_is_valid_codepage (cp))
659 1.1 christos return NULL;
660 1.1 christos lim.codepage = cp;
661 1.1 christos lim.iconv_name = "";
662 1.1 christos return & lim;
663 1.1 christos #endif
664 1.1 christos }
665 1.1 christos
666 1.1 christos /* Convert an Codepage string to a unicode string. */
667 1.1 christos
668 1.1 christos void
669 1.1 christos unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
670 1.1 christos {
671 1.1 christos rc_uint_type len;
672 1.1 christos
673 1.1 christos len = wind_MultiByteToWideChar (cp, src, NULL, 0);
674 1.1 christos if (len)
675 1.1 christos {
676 1.1 christos *u = ((unichar *) res_alloc (len));
677 1.1 christos wind_MultiByteToWideChar (cp, src, *u, len);
678 1.1 christos }
679 1.1 christos /* Discount the trailing '/0'. If MultiByteToWideChar failed,
680 1.1 christos this will set *length to -1. */
681 1.1 christos len -= sizeof (unichar);
682 1.1 christos
683 1.1 christos if (length != NULL)
684 1.1 christos *length = len / sizeof (unichar);
685 1.1 christos }
686 1.1 christos
687 1.1 christos /* Convert an unicode string to an codepage string. */
688 1.1 christos
689 1.1 christos void
690 1.1 christos codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
691 1.1 christos {
692 1.1 christos rc_uint_type len;
693 1.1 christos
694 1.1 christos len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
695 1.1 christos if (len)
696 1.1 christos {
697 1.1 christos *ascii = (char *) res_alloc (len * sizeof (char));
698 1.1 christos wind_WideCharToMultiByte (cp, unicode, *ascii, len);
699 1.1 christos }
700 1.1 christos /* Discount the trailing '/0'. If MultiByteToWideChar failed,
701 1.1 christos this will set *length to -1. */
702 1.1 christos len--;
703 1.1 christos
704 1.1 christos if (length != NULL)
705 1.1 christos *length = len;
706 1.1 christos }
707 1.1 christos
708 1.1 christos #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
709 1.1 christos static int
710 1.1 christos iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
711 1.1 christos {
712 1.1 christos int i;
713 1.1 christos
714 1.1 christos for (i = 1; i <= 32; i++)
715 1.1 christos {
716 1.1 christos char *tmp_d = d;
717 1.1 christos ICONV_CONST char *tmp_s = s;
718 1.1 christos size_t ret;
719 1.1 christos size_t s_left = (size_t) i;
720 1.1 christos size_t d_left = (size_t) d_len;
721 1.1 christos
722 1.1 christos ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
723 1.1 christos
724 1.1 christos if (ret != (size_t) -1)
725 1.1 christos {
726 1.1 christos *n_s = tmp_s;
727 1.1 christos *n_d = tmp_d;
728 1.1 christos return 0;
729 1.1 christos }
730 1.1 christos }
731 1.1 christos
732 1.1 christos return 1;
733 1.1 christos }
734 1.1 christos
735 1.1 christos static const char *
736 1.1 christos wind_iconv_cp (rc_uint_type cp)
737 1.1 christos {
738 1.1 christos const local_iconv_map *lim = wind_find_codepage_info (cp);
739 1.1 christos
740 1.1 christos if (!lim)
741 1.1 christos return NULL;
742 1.1 christos return lim->iconv_name;
743 1.1 christos }
744 1.1 christos #endif /* HAVE_ICONV */
745 1.1 christos
746 1.1 christos static rc_uint_type
747 1.1 christos wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
748 1.1 christos unichar *u, rc_uint_type u_len)
749 1.1 christos {
750 1.1 christos rc_uint_type ret = 0;
751 1.1 christos
752 1.1 christos #if defined (_WIN32) || defined (__CYGWIN__)
753 1.1 christos rc_uint_type conv_flags = MB_PRECOMPOSED;
754 1.1 christos
755 1.3 christos /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
756 1.1 christos MultiByteToWideChar will set the last error to
757 1.1 christos ERROR_INVALID_FLAGS if we do. */
758 1.1 christos if (cp == CP_UTF8 || cp == CP_UTF7)
759 1.1 christos conv_flags = 0;
760 1.1 christos
761 1.1 christos ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
762 1.1 christos mb, -1, u, u_len);
763 1.1 christos /* Convert to bytes. */
764 1.1 christos ret *= sizeof (unichar);
765 1.1 christos
766 1.1 christos #elif defined (HAVE_ICONV)
767 1.1 christos int first = 1;
768 1.1 christos char tmp[32];
769 1.1 christos char *p_tmp;
770 1.1 christos const char *iconv_name = wind_iconv_cp (cp);
771 1.1 christos
772 1.1 christos if (!mb || !iconv_name)
773 1.1 christos return 0;
774 1.3 christos iconv_t cd = iconv_open ("UTF-16LE", iconv_name);
775 1.1 christos
776 1.1 christos while (1)
777 1.1 christos {
778 1.1 christos int iret;
779 1.1 christos const char *n_mb = "";
780 1.1 christos char *n_tmp = "";
781 1.1 christos
782 1.1 christos p_tmp = tmp;
783 1.1 christos iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
784 1.1 christos if (first)
785 1.1 christos {
786 1.1 christos first = 0;
787 1.1 christos continue;
788 1.1 christos }
789 1.1 christos if (!iret)
790 1.1 christos {
791 1.1 christos size_t l_tmp = (size_t) (n_tmp - p_tmp);
792 1.1 christos
793 1.1 christos if (u)
794 1.1 christos {
795 1.1 christos if ((size_t) u_len < l_tmp)
796 1.1 christos break;
797 1.1 christos memcpy (u, tmp, l_tmp);
798 1.1 christos u += l_tmp/2;
799 1.1 christos u_len -= l_tmp;
800 1.1 christos }
801 1.1 christos ret += l_tmp;
802 1.1 christos }
803 1.1 christos else
804 1.1 christos break;
805 1.1 christos if (tmp[0] == 0 && tmp[1] == 0)
806 1.1 christos break;
807 1.1 christos mb = n_mb;
808 1.1 christos }
809 1.1 christos iconv_close (cd);
810 1.1 christos #else
811 1.1 christos if (cp)
812 1.1 christos ret = 0;
813 1.1 christos ret = strlen (mb) + 1;
814 1.1 christos ret *= sizeof (unichar);
815 1.1 christos if (u != NULL && u_len != 0)
816 1.1 christos {
817 1.1 christos do
818 1.1 christos {
819 1.1 christos *u++ = ((unichar) *mb) & 0xff;
820 1.1 christos --u_len; mb++;
821 1.1 christos }
822 1.1 christos while (u_len != 0 && mb[-1] != 0);
823 1.1 christos }
824 1.1 christos if (u != NULL && u_len != 0)
825 1.1 christos *u = 0;
826 1.1 christos #endif
827 1.1 christos return ret;
828 1.1 christos }
829 1.1 christos
830 1.1 christos static rc_uint_type
831 1.1 christos wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
832 1.1 christos {
833 1.1 christos rc_uint_type ret = 0;
834 1.1 christos #if defined (_WIN32) || defined (__CYGWIN__)
835 1.1 christos WINBOOL used_def = FALSE;
836 1.1 christos
837 1.1 christos ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
838 1.1 christos NULL, & used_def);
839 1.1 christos #elif defined (HAVE_ICONV)
840 1.1 christos int first = 1;
841 1.1 christos char tmp[32];
842 1.1 christos char *p_tmp;
843 1.1 christos const char *iconv_name = wind_iconv_cp (cp);
844 1.1 christos
845 1.1 christos if (!u || !iconv_name)
846 1.1 christos return 0;
847 1.3 christos iconv_t cd = iconv_open (iconv_name, "UTF-16LE");
848 1.1 christos
849 1.1 christos while (1)
850 1.1 christos {
851 1.1 christos int iret;
852 1.1 christos const char *n_u = "";
853 1.1 christos char *n_tmp = "";
854 1.1 christos
855 1.1 christos p_tmp = tmp;
856 1.1 christos iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
857 1.1 christos if (first)
858 1.1 christos {
859 1.1 christos first = 0;
860 1.1 christos continue;
861 1.1 christos }
862 1.1 christos if (!iret)
863 1.1 christos {
864 1.1 christos size_t l_tmp = (size_t) (n_tmp - p_tmp);
865 1.1 christos
866 1.1 christos if (mb)
867 1.1 christos {
868 1.1 christos if ((size_t) mb_len < l_tmp)
869 1.1 christos break;
870 1.1 christos memcpy (mb, tmp, l_tmp);
871 1.1 christos mb += l_tmp;
872 1.1 christos mb_len -= l_tmp;
873 1.1 christos }
874 1.1 christos ret += l_tmp;
875 1.1 christos }
876 1.1 christos else
877 1.1 christos break;
878 1.1 christos if (u[0] == 0)
879 1.1 christos break;
880 1.1 christos u = (const unichar *) n_u;
881 1.1 christos }
882 1.1 christos iconv_close (cd);
883 1.1 christos #else
884 1.1 christos if (cp)
885 1.1 christos ret = 0;
886 1.1 christos
887 1.1 christos while (u[ret] != 0)
888 1.1 christos ++ret;
889 1.1 christos
890 1.1 christos ++ret;
891 1.1 christos
892 1.1 christos if (mb)
893 1.1 christos {
894 1.1 christos while (*u != 0 && mb_len != 0)
895 1.1 christos {
896 1.1 christos if (u[0] == (u[0] & 0x7f))
897 1.1 christos *mb++ = (char) u[0];
898 1.1 christos else
899 1.1 christos *mb++ = '_';
900 1.1 christos ++u; --mb_len;
901 1.1 christos }
902 1.1 christos if (mb_len != 0)
903 1.1 christos *mb = 0;
904 1.1 christos }
905 1.1 christos #endif
906 1.1 christos return ret;
907 1.1 christos }
908