localcharset.c revision 1.1.1.1.2.1 1 1.1 christos /* Determine a canonical name for the current locale's character encoding.
2 1.1 christos
3 1.1.1.1.2.1 perseant Copyright (C) 2000-2006, 2008-2022 Free Software Foundation, Inc.
4 1.1 christos
5 1.1.1.1.2.1 perseant This file is free software: you can redistribute it and/or modify
6 1.1.1.1.2.1 perseant it under the terms of the GNU Lesser General Public License as
7 1.1.1.1.2.1 perseant published by the Free Software Foundation; either version 2.1 of the
8 1.1.1.1.2.1 perseant License, or (at your option) any later version.
9 1.1 christos
10 1.1.1.1.2.1 perseant This file is distributed in the hope that it will be useful,
11 1.1 christos but WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 christos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 1.1.1.1.2.1 perseant GNU Lesser General Public License for more details.
14 1.1 christos
15 1.1.1.1.2.1 perseant You should have received a copy of the GNU Lesser General Public License
16 1.1.1.1.2.1 perseant along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 1.1 christos
18 1.1 christos /* Written by Bruno Haible <bruno (at) clisp.org>. */
19 1.1 christos
20 1.1 christos #include <config.h>
21 1.1 christos
22 1.1 christos /* Specification. */
23 1.1 christos #include "localcharset.h"
24 1.1 christos
25 1.1 christos #include <stddef.h>
26 1.1 christos #include <stdio.h>
27 1.1 christos #include <string.h>
28 1.1 christos #include <stdlib.h>
29 1.1 christos
30 1.1 christos #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
31 1.1 christos # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
32 1.1 christos #endif
33 1.1 christos
34 1.1 christos #if defined _WIN32 && !defined __CYGWIN__
35 1.1 christos # define WINDOWS_NATIVE
36 1.1 christos # include <locale.h>
37 1.1 christos #endif
38 1.1 christos
39 1.1 christos #if defined __EMX__
40 1.1 christos /* Assume EMX program runs on OS/2, even if compiled under DOS. */
41 1.1 christos # ifndef OS2
42 1.1 christos # define OS2
43 1.1 christos # endif
44 1.1 christos #endif
45 1.1 christos
46 1.1 christos #if !defined WINDOWS_NATIVE
47 1.1 christos # if HAVE_LANGINFO_CODESET
48 1.1 christos # include <langinfo.h>
49 1.1 christos # else
50 1.1 christos # if 0 /* see comment regarding use of setlocale(), below */
51 1.1 christos # include <locale.h>
52 1.1 christos # endif
53 1.1 christos # endif
54 1.1 christos # ifdef __CYGWIN__
55 1.1 christos # define WIN32_LEAN_AND_MEAN
56 1.1 christos # include <windows.h>
57 1.1 christos # endif
58 1.1 christos #elif defined WINDOWS_NATIVE
59 1.1 christos # define WIN32_LEAN_AND_MEAN
60 1.1 christos # include <windows.h>
61 1.1 christos /* For the use of setlocale() below, the Gnulib override in setlocale.c is
62 1.1 christos not needed; see the platform lists in setlocale_null.m4. */
63 1.1 christos # undef setlocale
64 1.1 christos #endif
65 1.1 christos #if defined OS2
66 1.1 christos # define INCL_DOS
67 1.1 christos # include <os2.h>
68 1.1 christos #endif
69 1.1 christos
70 1.1 christos /* For MB_CUR_MAX_L */
71 1.1 christos #if defined DARWIN7
72 1.1 christos # include <xlocale.h>
73 1.1 christos #endif
74 1.1 christos
75 1.1 christos
76 1.1 christos #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
77 1.1 christos
78 1.1 christos /* On these platforms, we use a mapping from non-canonical encoding name
79 1.1 christos to GNU canonical encoding name. */
80 1.1 christos
81 1.1 christos /* With glibc-2.1 or newer, we don't need any canonicalization,
82 1.1 christos because glibc has iconv and both glibc and libiconv support all
83 1.1 christos GNU canonical names directly. */
84 1.1 christos # if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__)
85 1.1 christos
86 1.1 christos struct table_entry
87 1.1 christos {
88 1.1 christos const char alias[11+1];
89 1.1 christos const char canonical[11+1];
90 1.1 christos };
91 1.1 christos
92 1.1 christos /* Table of platform-dependent mappings, sorted in ascending order. */
93 1.1 christos static const struct table_entry alias_table[] =
94 1.1 christos {
95 1.1 christos # if defined __FreeBSD__ /* FreeBSD */
96 1.1 christos /*{ "ARMSCII-8", "ARMSCII-8" },*/
97 1.1 christos { "Big5", "BIG5" },
98 1.1 christos { "C", "ASCII" },
99 1.1 christos /*{ "CP1131", "CP1131" },*/
100 1.1 christos /*{ "CP1251", "CP1251" },*/
101 1.1 christos /*{ "CP866", "CP866" },*/
102 1.1 christos /*{ "GB18030", "GB18030" },*/
103 1.1 christos /*{ "GB2312", "GB2312" },*/
104 1.1 christos /*{ "GBK", "GBK" },*/
105 1.1 christos /*{ "ISCII-DEV", "?" },*/
106 1.1 christos { "ISO8859-1", "ISO-8859-1" },
107 1.1 christos { "ISO8859-13", "ISO-8859-13" },
108 1.1 christos { "ISO8859-15", "ISO-8859-15" },
109 1.1 christos { "ISO8859-2", "ISO-8859-2" },
110 1.1 christos { "ISO8859-5", "ISO-8859-5" },
111 1.1 christos { "ISO8859-7", "ISO-8859-7" },
112 1.1 christos { "ISO8859-9", "ISO-8859-9" },
113 1.1 christos /*{ "KOI8-R", "KOI8-R" },*/
114 1.1 christos /*{ "KOI8-U", "KOI8-U" },*/
115 1.1 christos { "SJIS", "SHIFT_JIS" },
116 1.1 christos { "US-ASCII", "ASCII" },
117 1.1 christos { "eucCN", "GB2312" },
118 1.1 christos { "eucJP", "EUC-JP" },
119 1.1 christos { "eucKR", "EUC-KR" }
120 1.1 christos # define alias_table_defined
121 1.1 christos # endif
122 1.1 christos # if defined __NetBSD__ /* NetBSD */
123 1.1 christos { "646", "ASCII" },
124 1.1 christos /*{ "ARMSCII-8", "ARMSCII-8" },*/
125 1.1 christos /*{ "BIG5", "BIG5" },*/
126 1.1 christos { "Big5-HKSCS", "BIG5-HKSCS" },
127 1.1 christos /*{ "CP1251", "CP1251" },*/
128 1.1 christos /*{ "CP866", "CP866" },*/
129 1.1 christos /*{ "GB18030", "GB18030" },*/
130 1.1 christos /*{ "GB2312", "GB2312" },*/
131 1.1 christos { "ISO8859-1", "ISO-8859-1" },
132 1.1 christos { "ISO8859-13", "ISO-8859-13" },
133 1.1 christos { "ISO8859-15", "ISO-8859-15" },
134 1.1 christos { "ISO8859-2", "ISO-8859-2" },
135 1.1 christos { "ISO8859-4", "ISO-8859-4" },
136 1.1 christos { "ISO8859-5", "ISO-8859-5" },
137 1.1 christos { "ISO8859-7", "ISO-8859-7" },
138 1.1 christos /*{ "KOI8-R", "KOI8-R" },*/
139 1.1 christos /*{ "KOI8-U", "KOI8-U" },*/
140 1.1 christos /*{ "PT154", "PT154" },*/
141 1.1 christos { "SJIS", "SHIFT_JIS" },
142 1.1 christos { "eucCN", "GB2312" },
143 1.1 christos { "eucJP", "EUC-JP" },
144 1.1 christos { "eucKR", "EUC-KR" },
145 1.1 christos { "eucTW", "EUC-TW" }
146 1.1 christos # define alias_table_defined
147 1.1 christos # endif
148 1.1 christos # if defined __OpenBSD__ /* OpenBSD */
149 1.1 christos { "646", "ASCII" },
150 1.1 christos { "ISO8859-1", "ISO-8859-1" },
151 1.1 christos { "ISO8859-13", "ISO-8859-13" },
152 1.1 christos { "ISO8859-15", "ISO-8859-15" },
153 1.1 christos { "ISO8859-2", "ISO-8859-2" },
154 1.1 christos { "ISO8859-4", "ISO-8859-4" },
155 1.1 christos { "ISO8859-5", "ISO-8859-5" },
156 1.1 christos { "ISO8859-7", "ISO-8859-7" },
157 1.1 christos { "US-ASCII", "ASCII" }
158 1.1 christos # define alias_table_defined
159 1.1 christos # endif
160 1.1 christos # if defined __APPLE__ && defined __MACH__ /* Mac OS X */
161 1.1 christos /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is
162 1.1 christos useless:
163 1.1 christos - It returns the empty string when LANG is set to a locale of the
164 1.1 christos form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
165 1.1 christos LC_CTYPE file.
166 1.1 christos - The environment variables LANG, LC_CTYPE, LC_ALL are not set by
167 1.1 christos the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
168 1.1 christos - The documentation says:
169 1.1 christos "... all code that calls BSD system routines should ensure
170 1.1 christos that the const *char parameters of these routines are in UTF-8
171 1.1 christos encoding. All BSD system functions expect their string
172 1.1 christos parameters to be in UTF-8 encoding and nothing else."
173 1.1 christos It also says
174 1.1 christos "An additional caveat is that string parameters for files,
175 1.1 christos paths, and other file-system entities must be in canonical
176 1.1 christos UTF-8. In a canonical UTF-8 Unicode string, all decomposable
177 1.1 christos characters are decomposed ..."
178 1.1 christos but this is not true: You can pass non-decomposed UTF-8 strings
179 1.1 christos to file system functions, and it is the OS which will convert
180 1.1 christos them to decomposed UTF-8 before accessing the file system.
181 1.1 christos - The Apple Terminal application displays UTF-8 by default.
182 1.1 christos - However, other applications are free to use different encodings:
183 1.1 christos - xterm uses ISO-8859-1 by default.
184 1.1 christos - TextEdit uses MacRoman by default.
185 1.1 christos We prefer UTF-8 over decomposed UTF-8-MAC because one should
186 1.1 christos minimize the use of decomposed Unicode. Unfortunately, through the
187 1.1 christos Darwin file system, decomposed UTF-8 strings are leaked into user
188 1.1 christos space nevertheless.
189 1.1 christos Then there are also the locales with encodings other than US-ASCII
190 1.1 christos and UTF-8. These locales can be occasionally useful to users (e.g.
191 1.1 christos when grepping through ISO-8859-1 encoded text files), when all their
192 1.1 christos file names are in US-ASCII.
193 1.1 christos */
194 1.1 christos { "ARMSCII-8", "ARMSCII-8" },
195 1.1 christos { "Big5", "BIG5" },
196 1.1 christos { "Big5HKSCS", "BIG5-HKSCS" },
197 1.1 christos { "CP1131", "CP1131" },
198 1.1 christos { "CP1251", "CP1251" },
199 1.1 christos { "CP866", "CP866" },
200 1.1 christos { "CP949", "CP949" },
201 1.1 christos { "GB18030", "GB18030" },
202 1.1 christos { "GB2312", "GB2312" },
203 1.1 christos { "GBK", "GBK" },
204 1.1 christos /*{ "ISCII-DEV", "?" },*/
205 1.1 christos { "ISO8859-1", "ISO-8859-1" },
206 1.1 christos { "ISO8859-13", "ISO-8859-13" },
207 1.1 christos { "ISO8859-15", "ISO-8859-15" },
208 1.1 christos { "ISO8859-2", "ISO-8859-2" },
209 1.1 christos { "ISO8859-4", "ISO-8859-4" },
210 1.1 christos { "ISO8859-5", "ISO-8859-5" },
211 1.1 christos { "ISO8859-7", "ISO-8859-7" },
212 1.1 christos { "ISO8859-9", "ISO-8859-9" },
213 1.1 christos { "KOI8-R", "KOI8-R" },
214 1.1 christos { "KOI8-U", "KOI8-U" },
215 1.1 christos { "PT154", "PT154" },
216 1.1 christos { "SJIS", "SHIFT_JIS" },
217 1.1 christos { "eucCN", "GB2312" },
218 1.1 christos { "eucJP", "EUC-JP" },
219 1.1 christos { "eucKR", "EUC-KR" }
220 1.1 christos # define alias_table_defined
221 1.1 christos # endif
222 1.1 christos # if defined _AIX /* AIX */
223 1.1 christos /*{ "GBK", "GBK" },*/
224 1.1 christos { "IBM-1046", "CP1046" },
225 1.1 christos { "IBM-1124", "CP1124" },
226 1.1 christos { "IBM-1129", "CP1129" },
227 1.1 christos { "IBM-1252", "CP1252" },
228 1.1 christos { "IBM-850", "CP850" },
229 1.1 christos { "IBM-856", "CP856" },
230 1.1 christos { "IBM-921", "ISO-8859-13" },
231 1.1 christos { "IBM-922", "CP922" },
232 1.1 christos { "IBM-932", "CP932" },
233 1.1 christos { "IBM-943", "CP943" },
234 1.1 christos { "IBM-eucCN", "GB2312" },
235 1.1 christos { "IBM-eucJP", "EUC-JP" },
236 1.1 christos { "IBM-eucKR", "EUC-KR" },
237 1.1 christos { "IBM-eucTW", "EUC-TW" },
238 1.1 christos { "ISO8859-1", "ISO-8859-1" },
239 1.1 christos { "ISO8859-15", "ISO-8859-15" },
240 1.1 christos { "ISO8859-2", "ISO-8859-2" },
241 1.1 christos { "ISO8859-5", "ISO-8859-5" },
242 1.1 christos { "ISO8859-6", "ISO-8859-6" },
243 1.1 christos { "ISO8859-7", "ISO-8859-7" },
244 1.1 christos { "ISO8859-8", "ISO-8859-8" },
245 1.1 christos { "ISO8859-9", "ISO-8859-9" },
246 1.1 christos { "TIS-620", "TIS-620" },
247 1.1 christos /*{ "UTF-8", "UTF-8" },*/
248 1.1 christos { "big5", "BIG5" }
249 1.1 christos # define alias_table_defined
250 1.1 christos # endif
251 1.1 christos # if defined __hpux /* HP-UX */
252 1.1 christos { "SJIS", "SHIFT_JIS" },
253 1.1 christos { "arabic8", "HP-ARABIC8" },
254 1.1 christos { "big5", "BIG5" },
255 1.1 christos { "cp1251", "CP1251" },
256 1.1 christos { "eucJP", "EUC-JP" },
257 1.1 christos { "eucKR", "EUC-KR" },
258 1.1 christos { "eucTW", "EUC-TW" },
259 1.1 christos { "gb18030", "GB18030" },
260 1.1 christos { "greek8", "HP-GREEK8" },
261 1.1 christos { "hebrew8", "HP-HEBREW8" },
262 1.1 christos { "hkbig5", "BIG5-HKSCS" },
263 1.1 christos { "hp15CN", "GB2312" },
264 1.1 christos { "iso88591", "ISO-8859-1" },
265 1.1 christos { "iso885913", "ISO-8859-13" },
266 1.1 christos { "iso885915", "ISO-8859-15" },
267 1.1 christos { "iso88592", "ISO-8859-2" },
268 1.1 christos { "iso88594", "ISO-8859-4" },
269 1.1 christos { "iso88595", "ISO-8859-5" },
270 1.1 christos { "iso88596", "ISO-8859-6" },
271 1.1 christos { "iso88597", "ISO-8859-7" },
272 1.1 christos { "iso88598", "ISO-8859-8" },
273 1.1 christos { "iso88599", "ISO-8859-9" },
274 1.1 christos { "kana8", "HP-KANA8" },
275 1.1 christos { "koi8r", "KOI8-R" },
276 1.1 christos { "roman8", "HP-ROMAN8" },
277 1.1 christos { "tis620", "TIS-620" },
278 1.1 christos { "turkish8", "HP-TURKISH8" },
279 1.1 christos { "utf8", "UTF-8" }
280 1.1 christos # define alias_table_defined
281 1.1 christos # endif
282 1.1 christos # if defined __sgi /* IRIX */
283 1.1 christos { "ISO8859-1", "ISO-8859-1" },
284 1.1 christos { "ISO8859-15", "ISO-8859-15" },
285 1.1 christos { "ISO8859-2", "ISO-8859-2" },
286 1.1 christos { "ISO8859-5", "ISO-8859-5" },
287 1.1 christos { "ISO8859-7", "ISO-8859-7" },
288 1.1 christos { "ISO8859-9", "ISO-8859-9" },
289 1.1 christos { "eucCN", "GB2312" },
290 1.1 christos { "eucJP", "EUC-JP" },
291 1.1 christos { "eucKR", "EUC-KR" },
292 1.1 christos { "eucTW", "EUC-TW" }
293 1.1 christos # define alias_table_defined
294 1.1 christos # endif
295 1.1 christos # if defined __osf__ /* OSF/1 */
296 1.1 christos /*{ "GBK", "GBK" },*/
297 1.1 christos { "ISO8859-1", "ISO-8859-1" },
298 1.1 christos { "ISO8859-15", "ISO-8859-15" },
299 1.1 christos { "ISO8859-2", "ISO-8859-2" },
300 1.1 christos { "ISO8859-4", "ISO-8859-4" },
301 1.1 christos { "ISO8859-5", "ISO-8859-5" },
302 1.1 christos { "ISO8859-7", "ISO-8859-7" },
303 1.1 christos { "ISO8859-8", "ISO-8859-8" },
304 1.1 christos { "ISO8859-9", "ISO-8859-9" },
305 1.1 christos { "KSC5601", "CP949" },
306 1.1 christos { "SJIS", "SHIFT_JIS" },
307 1.1 christos { "TACTIS", "TIS-620" },
308 1.1 christos /*{ "UTF-8", "UTF-8" },*/
309 1.1 christos { "big5", "BIG5" },
310 1.1 christos { "cp850", "CP850" },
311 1.1 christos { "dechanyu", "DEC-HANYU" },
312 1.1 christos { "dechanzi", "GB2312" },
313 1.1 christos { "deckanji", "DEC-KANJI" },
314 1.1 christos { "deckorean", "EUC-KR" },
315 1.1 christos { "eucJP", "EUC-JP" },
316 1.1 christos { "eucKR", "EUC-KR" },
317 1.1 christos { "eucTW", "EUC-TW" },
318 1.1 christos { "sdeckanji", "EUC-JP" }
319 1.1 christos # define alias_table_defined
320 1.1 christos # endif
321 1.1 christos # if defined __sun /* Solaris */
322 1.1 christos { "5601", "EUC-KR" },
323 1.1 christos { "646", "ASCII" },
324 1.1 christos /*{ "BIG5", "BIG5" },*/
325 1.1 christos { "Big5-HKSCS", "BIG5-HKSCS" },
326 1.1 christos { "GB18030", "GB18030" },
327 1.1 christos /*{ "GBK", "GBK" },*/
328 1.1 christos { "ISO8859-1", "ISO-8859-1" },
329 1.1 christos { "ISO8859-11", "TIS-620" },
330 1.1 christos { "ISO8859-13", "ISO-8859-13" },
331 1.1 christos { "ISO8859-15", "ISO-8859-15" },
332 1.1 christos { "ISO8859-2", "ISO-8859-2" },
333 1.1 christos { "ISO8859-3", "ISO-8859-3" },
334 1.1 christos { "ISO8859-4", "ISO-8859-4" },
335 1.1 christos { "ISO8859-5", "ISO-8859-5" },
336 1.1 christos { "ISO8859-6", "ISO-8859-6" },
337 1.1 christos { "ISO8859-7", "ISO-8859-7" },
338 1.1 christos { "ISO8859-8", "ISO-8859-8" },
339 1.1 christos { "ISO8859-9", "ISO-8859-9" },
340 1.1 christos { "PCK", "SHIFT_JIS" },
341 1.1 christos { "TIS620.2533", "TIS-620" },
342 1.1 christos /*{ "UTF-8", "UTF-8" },*/
343 1.1 christos { "ansi-1251", "CP1251" },
344 1.1 christos { "cns11643", "EUC-TW" },
345 1.1 christos { "eucJP", "EUC-JP" },
346 1.1 christos { "gb2312", "GB2312" },
347 1.1 christos { "koi8-r", "KOI8-R" }
348 1.1 christos # define alias_table_defined
349 1.1 christos # endif
350 1.1 christos # if defined __minix /* Minix */
351 1.1 christos { "646", "ASCII" }
352 1.1 christos # define alias_table_defined
353 1.1 christos # endif
354 1.1 christos # if defined WINDOWS_NATIVE || defined __CYGWIN__ /* Windows */
355 1.1 christos { "CP1361", "JOHAB" },
356 1.1 christos { "CP20127", "ASCII" },
357 1.1 christos { "CP20866", "KOI8-R" },
358 1.1 christos { "CP20936", "GB2312" },
359 1.1 christos { "CP21866", "KOI8-RU" },
360 1.1 christos { "CP28591", "ISO-8859-1" },
361 1.1 christos { "CP28592", "ISO-8859-2" },
362 1.1 christos { "CP28593", "ISO-8859-3" },
363 1.1 christos { "CP28594", "ISO-8859-4" },
364 1.1 christos { "CP28595", "ISO-8859-5" },
365 1.1 christos { "CP28596", "ISO-8859-6" },
366 1.1 christos { "CP28597", "ISO-8859-7" },
367 1.1 christos { "CP28598", "ISO-8859-8" },
368 1.1 christos { "CP28599", "ISO-8859-9" },
369 1.1 christos { "CP28605", "ISO-8859-15" },
370 1.1 christos { "CP38598", "ISO-8859-8" },
371 1.1 christos { "CP51932", "EUC-JP" },
372 1.1 christos { "CP51936", "GB2312" },
373 1.1 christos { "CP51949", "EUC-KR" },
374 1.1 christos { "CP51950", "EUC-TW" },
375 1.1 christos { "CP54936", "GB18030" },
376 1.1 christos { "CP65001", "UTF-8" },
377 1.1 christos { "CP936", "GBK" }
378 1.1 christos # define alias_table_defined
379 1.1 christos # endif
380 1.1 christos # if defined OS2 /* OS/2 */
381 1.1 christos /* The list of encodings is taken from "List of OS/2 Codepages"
382 1.1 christos by Alex Taylor:
383 1.1 christos <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
384 1.1 christos See also "__convcp() of kLIBC":
385 1.1 christos <https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>. */
386 1.1 christos { "CP1004", "CP1252" },
387 1.1 christos /*{ "CP1041", "CP943" },*/
388 1.1 christos /*{ "CP1088", "CP949" },*/
389 1.1 christos { "CP1089", "ISO-8859-6" },
390 1.1 christos /*{ "CP1114", "CP950" },*/
391 1.1 christos /*{ "CP1115", "GB2312" },*/
392 1.1 christos { "CP1208", "UTF-8" },
393 1.1 christos /*{ "CP1380", "GB2312" },*/
394 1.1 christos { "CP1381", "GB2312" },
395 1.1 christos { "CP1383", "GB2312" },
396 1.1 christos { "CP1386", "GBK" },
397 1.1 christos /*{ "CP301", "CP943" },*/
398 1.1 christos { "CP3372", "EUC-JP" },
399 1.1 christos { "CP4946", "CP850" },
400 1.1 christos /*{ "CP5048", "JIS_X0208-1990" },*/
401 1.1 christos /*{ "CP5049", "JIS_X0212-1990" },*/
402 1.1 christos /*{ "CP5067", "KS_C_5601-1987" },*/
403 1.1 christos { "CP813", "ISO-8859-7" },
404 1.1 christos { "CP819", "ISO-8859-1" },
405 1.1 christos { "CP878", "KOI8-R" },
406 1.1 christos /*{ "CP897", "CP943" },*/
407 1.1 christos { "CP912", "ISO-8859-2" },
408 1.1 christos { "CP913", "ISO-8859-3" },
409 1.1 christos { "CP914", "ISO-8859-4" },
410 1.1 christos { "CP915", "ISO-8859-5" },
411 1.1 christos { "CP916", "ISO-8859-8" },
412 1.1 christos { "CP920", "ISO-8859-9" },
413 1.1 christos { "CP921", "ISO-8859-13" },
414 1.1 christos { "CP923", "ISO-8859-15" },
415 1.1 christos /*{ "CP941", "CP943" },*/
416 1.1 christos /*{ "CP947", "CP950" },*/
417 1.1 christos /*{ "CP951", "CP949" },*/
418 1.1 christos /*{ "CP952", "JIS_X0208-1990" },*/
419 1.1 christos /*{ "CP953", "JIS_X0212-1990" },*/
420 1.1 christos { "CP954", "EUC-JP" },
421 1.1 christos { "CP964", "EUC-TW" },
422 1.1 christos { "CP970", "EUC-KR" },
423 1.1 christos /*{ "CP971", "KS_C_5601-1987" },*/
424 1.1 christos { "IBM-1004", "CP1252" },
425 1.1 christos /*{ "IBM-1006", "?" },*/
426 1.1 christos /*{ "IBM-1008", "?" },*/
427 1.1 christos /*{ "IBM-1041", "CP943" },*/
428 1.1 christos /*{ "IBM-1051", "?" },*/
429 1.1 christos /*{ "IBM-1088", "CP949" },*/
430 1.1 christos { "IBM-1089", "ISO-8859-6" },
431 1.1 christos /*{ "IBM-1098", "?" },*/
432 1.1 christos /*{ "IBM-1114", "CP950" },*/
433 1.1 christos /*{ "IBM-1115", "GB2312" },*/
434 1.1 christos /*{ "IBM-1116", "?" },*/
435 1.1 christos /*{ "IBM-1117", "?" },*/
436 1.1 christos /*{ "IBM-1118", "?" },*/
437 1.1 christos /*{ "IBM-1119", "?" },*/
438 1.1 christos { "IBM-1124", "CP1124" },
439 1.1 christos { "IBM-1125", "CP1125" },
440 1.1 christos { "IBM-1131", "CP1131" },
441 1.1 christos { "IBM-1208", "UTF-8" },
442 1.1 christos { "IBM-1250", "CP1250" },
443 1.1 christos { "IBM-1251", "CP1251" },
444 1.1 christos { "IBM-1252", "CP1252" },
445 1.1 christos { "IBM-1253", "CP1253" },
446 1.1 christos { "IBM-1254", "CP1254" },
447 1.1 christos { "IBM-1255", "CP1255" },
448 1.1 christos { "IBM-1256", "CP1256" },
449 1.1 christos { "IBM-1257", "CP1257" },
450 1.1 christos /*{ "IBM-1275", "?" },*/
451 1.1 christos /*{ "IBM-1276", "?" },*/
452 1.1 christos /*{ "IBM-1277", "?" },*/
453 1.1 christos /*{ "IBM-1280", "?" },*/
454 1.1 christos /*{ "IBM-1281", "?" },*/
455 1.1 christos /*{ "IBM-1282", "?" },*/
456 1.1 christos /*{ "IBM-1283", "?" },*/
457 1.1 christos /*{ "IBM-1380", "GB2312" },*/
458 1.1 christos { "IBM-1381", "GB2312" },
459 1.1 christos { "IBM-1383", "GB2312" },
460 1.1 christos { "IBM-1386", "GBK" },
461 1.1 christos /*{ "IBM-301", "CP943" },*/
462 1.1 christos { "IBM-3372", "EUC-JP" },
463 1.1 christos { "IBM-367", "ASCII" },
464 1.1 christos { "IBM-437", "CP437" },
465 1.1 christos { "IBM-4946", "CP850" },
466 1.1 christos /*{ "IBM-5048", "JIS_X0208-1990" },*/
467 1.1 christos /*{ "IBM-5049", "JIS_X0212-1990" },*/
468 1.1 christos /*{ "IBM-5067", "KS_C_5601-1987" },*/
469 1.1 christos { "IBM-813", "ISO-8859-7" },
470 1.1 christos { "IBM-819", "ISO-8859-1" },
471 1.1 christos { "IBM-850", "CP850" },
472 1.1 christos /*{ "IBM-851", "?" },*/
473 1.1 christos { "IBM-852", "CP852" },
474 1.1 christos { "IBM-855", "CP855" },
475 1.1 christos { "IBM-856", "CP856" },
476 1.1 christos { "IBM-857", "CP857" },
477 1.1 christos /*{ "IBM-859", "?" },*/
478 1.1 christos { "IBM-860", "CP860" },
479 1.1 christos { "IBM-861", "CP861" },
480 1.1 christos { "IBM-862", "CP862" },
481 1.1 christos { "IBM-863", "CP863" },
482 1.1 christos { "IBM-864", "CP864" },
483 1.1 christos { "IBM-865", "CP865" },
484 1.1 christos { "IBM-866", "CP866" },
485 1.1 christos /*{ "IBM-868", "?" },*/
486 1.1 christos { "IBM-869", "CP869" },
487 1.1 christos { "IBM-874", "CP874" },
488 1.1 christos { "IBM-878", "KOI8-R" },
489 1.1 christos /*{ "IBM-895", "?" },*/
490 1.1 christos /*{ "IBM-897", "CP943" },*/
491 1.1 christos /*{ "IBM-907", "?" },*/
492 1.1 christos /*{ "IBM-909", "?" },*/
493 1.1 christos { "IBM-912", "ISO-8859-2" },
494 1.1 christos { "IBM-913", "ISO-8859-3" },
495 1.1 christos { "IBM-914", "ISO-8859-4" },
496 1.1 christos { "IBM-915", "ISO-8859-5" },
497 1.1 christos { "IBM-916", "ISO-8859-8" },
498 1.1 christos { "IBM-920", "ISO-8859-9" },
499 1.1 christos { "IBM-921", "ISO-8859-13" },
500 1.1 christos { "IBM-922", "CP922" },
501 1.1 christos { "IBM-923", "ISO-8859-15" },
502 1.1 christos { "IBM-932", "CP932" },
503 1.1 christos /*{ "IBM-941", "CP943" },*/
504 1.1 christos /*{ "IBM-942", "?" },*/
505 1.1 christos { "IBM-943", "CP943" },
506 1.1 christos /*{ "IBM-947", "CP950" },*/
507 1.1 christos { "IBM-949", "CP949" },
508 1.1 christos { "IBM-950", "CP950" },
509 1.1 christos /*{ "IBM-951", "CP949" },*/
510 1.1 christos /*{ "IBM-952", "JIS_X0208-1990" },*/
511 1.1 christos /*{ "IBM-953", "JIS_X0212-1990" },*/
512 1.1 christos { "IBM-954", "EUC-JP" },
513 1.1 christos /*{ "IBM-955", "?" },*/
514 1.1 christos { "IBM-964", "EUC-TW" },
515 1.1 christos { "IBM-970", "EUC-KR" },
516 1.1 christos /*{ "IBM-971", "KS_C_5601-1987" },*/
517 1.1 christos { "IBM-eucCN", "GB2312" },
518 1.1 christos { "IBM-eucJP", "EUC-JP" },
519 1.1 christos { "IBM-eucKR", "EUC-KR" },
520 1.1 christos { "IBM-eucTW", "EUC-TW" },
521 1.1 christos { "IBM33722", "EUC-JP" },
522 1.1 christos { "ISO8859-1", "ISO-8859-1" },
523 1.1 christos { "ISO8859-2", "ISO-8859-2" },
524 1.1 christos { "ISO8859-3", "ISO-8859-3" },
525 1.1 christos { "ISO8859-4", "ISO-8859-4" },
526 1.1 christos { "ISO8859-5", "ISO-8859-5" },
527 1.1 christos { "ISO8859-6", "ISO-8859-6" },
528 1.1 christos { "ISO8859-7", "ISO-8859-7" },
529 1.1 christos { "ISO8859-8", "ISO-8859-8" },
530 1.1 christos { "ISO8859-9", "ISO-8859-9" },
531 1.1 christos /*{ "JISX0201-1976", "JISX0201-1976" },*/
532 1.1 christos /*{ "JISX0208-1978", "?" },*/
533 1.1 christos /*{ "JISX0208-1983", "JIS_X0208-1983" },*/
534 1.1 christos /*{ "JISX0208-1990", "JIS_X0208-1990" },*/
535 1.1 christos /*{ "JISX0212-1990", "JIS_X0212-1990" },*/
536 1.1 christos /*{ "KSC5601-1987", "KS_C_5601-1987" },*/
537 1.1 christos { "SJIS-1", "CP943" },
538 1.1 christos { "SJIS-2", "CP943" },
539 1.1 christos { "eucJP", "EUC-JP" },
540 1.1 christos { "eucKR", "EUC-KR" },
541 1.1 christos { "eucTW-1993", "EUC-TW" }
542 1.1 christos # define alias_table_defined
543 1.1 christos # endif
544 1.1 christos # if defined VMS /* OpenVMS */
545 1.1 christos /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
546 1.1 christos "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
547 1.1 christos section 10.7 "Handling Different Character Sets". */
548 1.1 christos { "DECHANYU", "DEC-HANYU" },
549 1.1 christos { "DECHANZI", "GB2312" },
550 1.1 christos { "DECKANJI", "DEC-KANJI" },
551 1.1 christos { "DECKOREAN", "EUC-KR" },
552 1.1 christos { "ISO8859-1", "ISO-8859-1" },
553 1.1 christos { "ISO8859-2", "ISO-8859-2" },
554 1.1 christos { "ISO8859-5", "ISO-8859-5" },
555 1.1 christos { "ISO8859-7", "ISO-8859-7" },
556 1.1 christos { "ISO8859-8", "ISO-8859-8" },
557 1.1 christos { "ISO8859-9", "ISO-8859-9" },
558 1.1 christos { "SDECKANJI", "EUC-JP" },
559 1.1 christos { "SJIS", "SHIFT_JIS" },
560 1.1 christos { "eucJP", "EUC-JP" },
561 1.1 christos { "eucTW", "EUC-TW" }
562 1.1 christos # define alias_table_defined
563 1.1 christos # endif
564 1.1 christos # ifndef alias_table_defined
565 1.1 christos /* Just a dummy entry, to avoid a C syntax error. */
566 1.1 christos { "", "" }
567 1.1 christos # endif
568 1.1 christos };
569 1.1 christos
570 1.1 christos # endif
571 1.1 christos
572 1.1 christos #else
573 1.1 christos
574 1.1 christos /* On these platforms, we use a mapping from locale name to GNU canonical
575 1.1 christos encoding name. */
576 1.1 christos
577 1.1 christos struct table_entry
578 1.1 christos {
579 1.1 christos const char locale[17+1];
580 1.1 christos const char canonical[11+1];
581 1.1 christos };
582 1.1 christos
583 1.1 christos /* Table of platform-dependent mappings, sorted in ascending order. */
584 1.1 christos static const struct table_entry locale_table[] =
585 1.1 christos {
586 1.1 christos # if defined __FreeBSD__ /* FreeBSD 4.2 */
587 1.1 christos { "cs_CZ.ISO_8859-2", "ISO-8859-2" },
588 1.1 christos { "da_DK.DIS_8859-15", "ISO-8859-15" },
589 1.1 christos { "da_DK.ISO_8859-1", "ISO-8859-1" },
590 1.1 christos { "de_AT.DIS_8859-15", "ISO-8859-15" },
591 1.1 christos { "de_AT.ISO_8859-1", "ISO-8859-1" },
592 1.1 christos { "de_CH.DIS_8859-15", "ISO-8859-15" },
593 1.1 christos { "de_CH.ISO_8859-1", "ISO-8859-1" },
594 1.1 christos { "de_DE.DIS_8859-15", "ISO-8859-15" },
595 1.1 christos { "de_DE.ISO_8859-1", "ISO-8859-1" },
596 1.1 christos { "en_AU.DIS_8859-15", "ISO-8859-15" },
597 1.1 christos { "en_AU.ISO_8859-1", "ISO-8859-1" },
598 1.1 christos { "en_CA.DIS_8859-15", "ISO-8859-15" },
599 1.1 christos { "en_CA.ISO_8859-1", "ISO-8859-1" },
600 1.1 christos { "en_GB.DIS_8859-15", "ISO-8859-15" },
601 1.1 christos { "en_GB.ISO_8859-1", "ISO-8859-1" },
602 1.1 christos { "en_US.DIS_8859-15", "ISO-8859-15" },
603 1.1 christos { "en_US.ISO_8859-1", "ISO-8859-1" },
604 1.1 christos { "es_ES.DIS_8859-15", "ISO-8859-15" },
605 1.1 christos { "es_ES.ISO_8859-1", "ISO-8859-1" },
606 1.1 christos { "fi_FI.DIS_8859-15", "ISO-8859-15" },
607 1.1 christos { "fi_FI.ISO_8859-1", "ISO-8859-1" },
608 1.1 christos { "fr_BE.DIS_8859-15", "ISO-8859-15" },
609 1.1 christos { "fr_BE.ISO_8859-1", "ISO-8859-1" },
610 1.1 christos { "fr_CA.DIS_8859-15", "ISO-8859-15" },
611 1.1 christos { "fr_CA.ISO_8859-1", "ISO-8859-1" },
612 1.1 christos { "fr_CH.DIS_8859-15", "ISO-8859-15" },
613 1.1 christos { "fr_CH.ISO_8859-1", "ISO-8859-1" },
614 1.1 christos { "fr_FR.DIS_8859-15", "ISO-8859-15" },
615 1.1 christos { "fr_FR.ISO_8859-1", "ISO-8859-1" },
616 1.1 christos { "hr_HR.ISO_8859-2", "ISO-8859-2" },
617 1.1 christos { "hu_HU.ISO_8859-2", "ISO-8859-2" },
618 1.1 christos { "is_IS.DIS_8859-15", "ISO-8859-15" },
619 1.1 christos { "is_IS.ISO_8859-1", "ISO-8859-1" },
620 1.1 christos { "it_CH.DIS_8859-15", "ISO-8859-15" },
621 1.1 christos { "it_CH.ISO_8859-1", "ISO-8859-1" },
622 1.1 christos { "it_IT.DIS_8859-15", "ISO-8859-15" },
623 1.1 christos { "it_IT.ISO_8859-1", "ISO-8859-1" },
624 1.1 christos { "ja_JP.EUC", "EUC-JP" },
625 1.1 christos { "ja_JP.SJIS", "SHIFT_JIS" },
626 1.1 christos { "ja_JP.Shift_JIS", "SHIFT_JIS" },
627 1.1 christos { "ko_KR.EUC", "EUC-KR" },
628 1.1 christos { "la_LN.ASCII", "ASCII" },
629 1.1 christos { "la_LN.DIS_8859-15", "ISO-8859-15" },
630 1.1 christos { "la_LN.ISO_8859-1", "ISO-8859-1" },
631 1.1 christos { "la_LN.ISO_8859-2", "ISO-8859-2" },
632 1.1 christos { "la_LN.ISO_8859-4", "ISO-8859-4" },
633 1.1 christos { "lt_LN.ASCII", "ASCII" },
634 1.1 christos { "lt_LN.DIS_8859-15", "ISO-8859-15" },
635 1.1 christos { "lt_LN.ISO_8859-1", "ISO-8859-1" },
636 1.1 christos { "lt_LN.ISO_8859-2", "ISO-8859-2" },
637 1.1 christos { "lt_LT.ISO_8859-4", "ISO-8859-4" },
638 1.1 christos { "nl_BE.DIS_8859-15", "ISO-8859-15" },
639 1.1 christos { "nl_BE.ISO_8859-1", "ISO-8859-1" },
640 1.1 christos { "nl_NL.DIS_8859-15", "ISO-8859-15" },
641 1.1 christos { "nl_NL.ISO_8859-1", "ISO-8859-1" },
642 1.1 christos { "no_NO.DIS_8859-15", "ISO-8859-15" },
643 1.1 christos { "no_NO.ISO_8859-1", "ISO-8859-1" },
644 1.1 christos { "pl_PL.ISO_8859-2", "ISO-8859-2" },
645 1.1 christos { "pt_PT.DIS_8859-15", "ISO-8859-15" },
646 1.1 christos { "pt_PT.ISO_8859-1", "ISO-8859-1" },
647 1.1 christos { "ru_RU.CP866", "CP866" },
648 1.1 christos { "ru_RU.ISO_8859-5", "ISO-8859-5" },
649 1.1 christos { "ru_RU.KOI8-R", "KOI8-R" },
650 1.1 christos { "ru_SU.CP866", "CP866" },
651 1.1 christos { "ru_SU.ISO_8859-5", "ISO-8859-5" },
652 1.1 christos { "ru_SU.KOI8-R", "KOI8-R" },
653 1.1 christos { "sl_SI.ISO_8859-2", "ISO-8859-2" },
654 1.1 christos { "sv_SE.DIS_8859-15", "ISO-8859-15" },
655 1.1 christos { "sv_SE.ISO_8859-1", "ISO-8859-1" },
656 1.1 christos { "uk_UA.KOI8-U", "KOI8-U" },
657 1.1 christos { "zh_CN.EUC", "GB2312" },
658 1.1 christos { "zh_TW.BIG5", "BIG5" },
659 1.1 christos { "zh_TW.Big5", "BIG5" }
660 1.1 christos # define locale_table_defined
661 1.1 christos # endif
662 1.1 christos # if defined __DJGPP__ /* DOS / DJGPP 2.03 */
663 1.1 christos /* The encodings given here may not all be correct.
664 1.1 christos If you find that the encoding given for your language and
665 1.1 christos country is not the one your DOS machine actually uses, just
666 1.1 christos correct it in this file, and send a mail to
667 1.1 christos Juan Manuel Guerrero <juan.guerrero (at) gmx.de>
668 1.1 christos and <bug-gnulib (at) gnu.org>. */
669 1.1 christos { "C", "ASCII" },
670 1.1 christos { "ar", "CP864" },
671 1.1 christos { "ar_AE", "CP864" },
672 1.1 christos { "ar_DZ", "CP864" },
673 1.1 christos { "ar_EG", "CP864" },
674 1.1 christos { "ar_IQ", "CP864" },
675 1.1 christos { "ar_IR", "CP864" },
676 1.1 christos { "ar_JO", "CP864" },
677 1.1 christos { "ar_KW", "CP864" },
678 1.1 christos { "ar_MA", "CP864" },
679 1.1 christos { "ar_OM", "CP864" },
680 1.1 christos { "ar_QA", "CP864" },
681 1.1 christos { "ar_SA", "CP864" },
682 1.1 christos { "ar_SY", "CP864" },
683 1.1 christos { "be", "CP866" },
684 1.1 christos { "be_BE", "CP866" },
685 1.1 christos { "bg", "CP866" }, /* not CP855 ?? */
686 1.1 christos { "bg_BG", "CP866" }, /* not CP855 ?? */
687 1.1 christos { "ca", "CP850" },
688 1.1 christos { "ca_ES", "CP850" },
689 1.1 christos { "cs", "CP852" },
690 1.1 christos { "cs_CZ", "CP852" },
691 1.1 christos { "da", "CP865" }, /* not CP850 ?? */
692 1.1 christos { "da_DK", "CP865" }, /* not CP850 ?? */
693 1.1 christos { "de", "CP850" },
694 1.1 christos { "de_AT", "CP850" },
695 1.1 christos { "de_CH", "CP850" },
696 1.1 christos { "de_DE", "CP850" },
697 1.1 christos { "el", "CP869" },
698 1.1 christos { "el_GR", "CP869" },
699 1.1 christos { "en", "CP850" },
700 1.1 christos { "en_AU", "CP850" }, /* not CP437 ?? */
701 1.1 christos { "en_CA", "CP850" },
702 1.1 christos { "en_GB", "CP850" },
703 1.1 christos { "en_NZ", "CP437" },
704 1.1 christos { "en_US", "CP437" },
705 1.1 christos { "en_ZA", "CP850" }, /* not CP437 ?? */
706 1.1 christos { "eo", "CP850" },
707 1.1 christos { "eo_EO", "CP850" },
708 1.1 christos { "es", "CP850" },
709 1.1 christos { "es_AR", "CP850" },
710 1.1 christos { "es_BO", "CP850" },
711 1.1 christos { "es_CL", "CP850" },
712 1.1 christos { "es_CO", "CP850" },
713 1.1 christos { "es_CR", "CP850" },
714 1.1 christos { "es_CU", "CP850" },
715 1.1 christos { "es_DO", "CP850" },
716 1.1 christos { "es_EC", "CP850" },
717 1.1 christos { "es_ES", "CP850" },
718 1.1 christos { "es_GT", "CP850" },
719 1.1 christos { "es_HN", "CP850" },
720 1.1 christos { "es_MX", "CP850" },
721 1.1 christos { "es_NI", "CP850" },
722 1.1 christos { "es_PA", "CP850" },
723 1.1 christos { "es_PE", "CP850" },
724 1.1 christos { "es_PY", "CP850" },
725 1.1 christos { "es_SV", "CP850" },
726 1.1 christos { "es_UY", "CP850" },
727 1.1 christos { "es_VE", "CP850" },
728 1.1 christos { "et", "CP850" },
729 1.1 christos { "et_EE", "CP850" },
730 1.1 christos { "eu", "CP850" },
731 1.1 christos { "eu_ES", "CP850" },
732 1.1 christos { "fi", "CP850" },
733 1.1 christos { "fi_FI", "CP850" },
734 1.1 christos { "fr", "CP850" },
735 1.1 christos { "fr_BE", "CP850" },
736 1.1 christos { "fr_CA", "CP850" },
737 1.1 christos { "fr_CH", "CP850" },
738 1.1 christos { "fr_FR", "CP850" },
739 1.1 christos { "ga", "CP850" },
740 1.1 christos { "ga_IE", "CP850" },
741 1.1 christos { "gd", "CP850" },
742 1.1 christos { "gd_GB", "CP850" },
743 1.1 christos { "gl", "CP850" },
744 1.1 christos { "gl_ES", "CP850" },
745 1.1 christos { "he", "CP862" },
746 1.1 christos { "he_IL", "CP862" },
747 1.1 christos { "hr", "CP852" },
748 1.1 christos { "hr_HR", "CP852" },
749 1.1 christos { "hu", "CP852" },
750 1.1 christos { "hu_HU", "CP852" },
751 1.1 christos { "id", "CP850" }, /* not CP437 ?? */
752 1.1 christos { "id_ID", "CP850" }, /* not CP437 ?? */
753 1.1 christos { "is", "CP861" }, /* not CP850 ?? */
754 1.1 christos { "is_IS", "CP861" }, /* not CP850 ?? */
755 1.1 christos { "it", "CP850" },
756 1.1 christos { "it_CH", "CP850" },
757 1.1 christos { "it_IT", "CP850" },
758 1.1 christos { "ja", "CP932" },
759 1.1 christos { "ja_JP", "CP932" },
760 1.1 christos { "kr", "CP949" }, /* not CP934 ?? */
761 1.1 christos { "kr_KR", "CP949" }, /* not CP934 ?? */
762 1.1 christos { "lt", "CP775" },
763 1.1 christos { "lt_LT", "CP775" },
764 1.1 christos { "lv", "CP775" },
765 1.1 christos { "lv_LV", "CP775" },
766 1.1 christos { "mk", "CP866" }, /* not CP855 ?? */
767 1.1 christos { "mk_MK", "CP866" }, /* not CP855 ?? */
768 1.1 christos { "mt", "CP850" },
769 1.1 christos { "mt_MT", "CP850" },
770 1.1 christos { "nb", "CP865" }, /* not CP850 ?? */
771 1.1 christos { "nb_NO", "CP865" }, /* not CP850 ?? */
772 1.1 christos { "nl", "CP850" },
773 1.1 christos { "nl_BE", "CP850" },
774 1.1 christos { "nl_NL", "CP850" },
775 1.1 christos { "nn", "CP865" }, /* not CP850 ?? */
776 1.1 christos { "nn_NO", "CP865" }, /* not CP850 ?? */
777 1.1 christos { "no", "CP865" }, /* not CP850 ?? */
778 1.1 christos { "no_NO", "CP865" }, /* not CP850 ?? */
779 1.1 christos { "pl", "CP852" },
780 1.1 christos { "pl_PL", "CP852" },
781 1.1 christos { "pt", "CP850" },
782 1.1 christos { "pt_BR", "CP850" },
783 1.1 christos { "pt_PT", "CP850" },
784 1.1 christos { "ro", "CP852" },
785 1.1 christos { "ro_RO", "CP852" },
786 1.1 christos { "ru", "CP866" },
787 1.1 christos { "ru_RU", "CP866" },
788 1.1 christos { "sk", "CP852" },
789 1.1 christos { "sk_SK", "CP852" },
790 1.1 christos { "sl", "CP852" },
791 1.1 christos { "sl_SI", "CP852" },
792 1.1 christos { "sq", "CP852" },
793 1.1 christos { "sq_AL", "CP852" },
794 1.1 christos { "sr", "CP852" }, /* CP852 or CP866 or CP855 ?? */
795 1.1 christos { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */
796 1.1 christos { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */
797 1.1 christos { "sv", "CP850" },
798 1.1 christos { "sv_SE", "CP850" },
799 1.1 christos { "th", "CP874" },
800 1.1 christos { "th_TH", "CP874" },
801 1.1 christos { "tr", "CP857" },
802 1.1 christos { "tr_TR", "CP857" },
803 1.1 christos { "uk", "CP1125" },
804 1.1 christos { "uk_UA", "CP1125" },
805 1.1 christos { "zh_CN", "GBK" },
806 1.1 christos { "zh_TW", "CP950" } /* not CP938 ?? */
807 1.1 christos # define locale_table_defined
808 1.1 christos # endif
809 1.1 christos # ifndef locale_table_defined
810 1.1 christos /* Just a dummy entry, to avoid a C syntax error. */
811 1.1 christos { "", "" }
812 1.1 christos # endif
813 1.1 christos };
814 1.1 christos
815 1.1 christos #endif
816 1.1 christos
817 1.1 christos
818 1.1 christos /* Determine the current locale's character encoding, and canonicalize it
819 1.1 christos into one of the canonical names listed below.
820 1.1 christos The result must not be freed; it is statically allocated. The result
821 1.1 christos becomes invalid when setlocale() is used to change the global locale, or
822 1.1 christos when the value of one of the environment variables LC_ALL, LC_CTYPE, LANG
823 1.1 christos is changed; threads in multithreaded programs should not do this.
824 1.1 christos If the canonical name cannot be determined, the result is a non-canonical
825 1.1 christos name. */
826 1.1 christos
827 1.1 christos #ifdef STATIC
828 1.1 christos STATIC
829 1.1 christos #endif
830 1.1 christos const char *
831 1.1 christos locale_charset (void)
832 1.1 christos {
833 1.1 christos const char *codeset;
834 1.1 christos
835 1.1 christos /* This function must be multithread-safe. To achieve this without using
836 1.1 christos thread-local storage, we use a simple strcpy or memcpy to fill this static
837 1.1 christos buffer. Filling it through, for example, strcpy + strcat would not be
838 1.1 christos guaranteed to leave the buffer's contents intact if another thread is
839 1.1 christos currently accessing it. If necessary, the contents is first assembled in
840 1.1 christos a stack-allocated buffer. */
841 1.1 christos
842 1.1 christos #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
843 1.1 christos
844 1.1 christos # if HAVE_LANGINFO_CODESET
845 1.1 christos
846 1.1 christos /* Most systems support nl_langinfo (CODESET) nowadays. */
847 1.1 christos codeset = nl_langinfo (CODESET);
848 1.1 christos
849 1.1 christos # ifdef __CYGWIN__
850 1.1 christos /* Cygwin < 1.7 does not have locales. nl_langinfo (CODESET) always
851 1.1 christos returns "US-ASCII". Return the suffix of the locale name from the
852 1.1 christos environment variables (if present) or the codepage as a number. */
853 1.1 christos if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
854 1.1 christos {
855 1.1 christos const char *locale;
856 1.1 christos static char resultbuf[2 + 10 + 1];
857 1.1 christos
858 1.1 christos locale = getenv ("LC_ALL");
859 1.1 christos if (locale == NULL || locale[0] == '\0')
860 1.1 christos {
861 1.1 christos locale = getenv ("LC_CTYPE");
862 1.1 christos if (locale == NULL || locale[0] == '\0')
863 1.1 christos locale = getenv ("LANG");
864 1.1 christos }
865 1.1 christos if (locale != NULL && locale[0] != '\0')
866 1.1 christos {
867 1.1 christos /* If the locale name contains an encoding after the dot, return
868 1.1 christos it. */
869 1.1 christos const char *dot = strchr (locale, '.');
870 1.1 christos
871 1.1 christos if (dot != NULL)
872 1.1 christos {
873 1.1 christos const char *modifier;
874 1.1 christos
875 1.1 christos dot++;
876 1.1 christos /* Look for the possible @... trailer and remove it, if any. */
877 1.1 christos modifier = strchr (dot, '@');
878 1.1 christos if (modifier == NULL)
879 1.1 christos return dot;
880 1.1 christos if (modifier - dot < sizeof (resultbuf))
881 1.1 christos {
882 1.1 christos /* This way of filling resultbuf is multithread-safe. */
883 1.1 christos memcpy (resultbuf, dot, modifier - dot);
884 1.1 christos resultbuf [modifier - dot] = '\0';
885 1.1 christos return resultbuf;
886 1.1 christos }
887 1.1 christos }
888 1.1 christos }
889 1.1 christos
890 1.1 christos /* The Windows API has a function returning the locale's codepage as a
891 1.1 christos number: GetACP(). This encoding is used by Cygwin, unless the user
892 1.1 christos has set the environment variable CYGWIN=codepage:oem (which very few
893 1.1 christos people do).
894 1.1 christos Output directed to console windows needs to be converted (to
895 1.1 christos GetOEMCP() if the console is using a raster font, or to
896 1.1 christos GetConsoleOutputCP() if it is using a TrueType font). Cygwin does
897 1.1 christos this conversion transparently (see winsup/cygwin/fhandler_console.cc),
898 1.1 christos converting to GetConsoleOutputCP(). This leads to correct results,
899 1.1 christos except when SetConsoleOutputCP has been called and a raster font is
900 1.1 christos in use. */
901 1.1 christos {
902 1.1 christos char buf[2 + 10 + 1];
903 1.1 christos
904 1.1 christos sprintf (buf, "CP%u", GetACP ());
905 1.1 christos strcpy (resultbuf, buf);
906 1.1 christos codeset = resultbuf;
907 1.1 christos }
908 1.1 christos }
909 1.1 christos # endif
910 1.1 christos
911 1.1 christos if (codeset == NULL)
912 1.1 christos /* The canonical name cannot be determined. */
913 1.1 christos codeset = "";
914 1.1 christos
915 1.1 christos # elif defined WINDOWS_NATIVE
916 1.1 christos
917 1.1 christos char buf[2 + 10 + 1];
918 1.1 christos static char resultbuf[2 + 10 + 1];
919 1.1 christos
920 1.1 christos /* The Windows API has a function returning the locale's codepage as
921 1.1 christos a number, but the value doesn't change according to what the
922 1.1 christos 'setlocale' call specified. So we use it as a last resort, in
923 1.1 christos case the string returned by 'setlocale' doesn't specify the
924 1.1 christos codepage. */
925 1.1 christos char *current_locale = setlocale (LC_CTYPE, NULL);
926 1.1 christos char *pdot = strrchr (current_locale, '.');
927 1.1 christos
928 1.1 christos if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
929 1.1 christos sprintf (buf, "CP%s", pdot + 1);
930 1.1 christos else
931 1.1 christos {
932 1.1 christos /* The Windows API has a function returning the locale's codepage as a
933 1.1 christos number: GetACP().
934 1.1 christos When the output goes to a console window, it needs to be provided in
935 1.1 christos GetOEMCP() encoding if the console is using a raster font, or in
936 1.1 christos GetConsoleOutputCP() encoding if it is using a TrueType font.
937 1.1 christos But in GUI programs and for output sent to files and pipes, GetACP()
938 1.1 christos encoding is the best bet. */
939 1.1 christos sprintf (buf, "CP%u", GetACP ());
940 1.1 christos }
941 1.1 christos /* For a locale name such as "French_France.65001", in Windows 10,
942 1.1 christos setlocale now returns "French_France.utf8" instead. */
943 1.1 christos if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0)
944 1.1 christos codeset = "UTF-8";
945 1.1 christos else
946 1.1 christos {
947 1.1 christos strcpy (resultbuf, buf);
948 1.1 christos codeset = resultbuf;
949 1.1 christos }
950 1.1 christos
951 1.1 christos # elif defined OS2
952 1.1 christos
953 1.1 christos const char *locale;
954 1.1 christos static char resultbuf[2 + 10 + 1];
955 1.1 christos ULONG cp[3];
956 1.1 christos ULONG cplen;
957 1.1 christos
958 1.1 christos codeset = NULL;
959 1.1 christos
960 1.1 christos /* Allow user to override the codeset, as set in the operating system,
961 1.1 christos with standard language environment variables. */
962 1.1 christos locale = getenv ("LC_ALL");
963 1.1 christos if (locale == NULL || locale[0] == '\0')
964 1.1 christos {
965 1.1 christos locale = getenv ("LC_CTYPE");
966 1.1 christos if (locale == NULL || locale[0] == '\0')
967 1.1 christos locale = getenv ("LANG");
968 1.1 christos }
969 1.1 christos if (locale != NULL && locale[0] != '\0')
970 1.1 christos {
971 1.1 christos /* If the locale name contains an encoding after the dot, return it. */
972 1.1 christos const char *dot = strchr (locale, '.');
973 1.1 christos
974 1.1 christos if (dot != NULL)
975 1.1 christos {
976 1.1 christos const char *modifier;
977 1.1 christos
978 1.1 christos dot++;
979 1.1 christos /* Look for the possible @... trailer and remove it, if any. */
980 1.1 christos modifier = strchr (dot, '@');
981 1.1 christos if (modifier == NULL)
982 1.1 christos return dot;
983 1.1 christos if (modifier - dot < sizeof (resultbuf))
984 1.1 christos {
985 1.1 christos /* This way of filling resultbuf is multithread-safe. */
986 1.1 christos memcpy (resultbuf, dot, modifier - dot);
987 1.1 christos resultbuf [modifier - dot] = '\0';
988 1.1 christos return resultbuf;
989 1.1 christos }
990 1.1 christos }
991 1.1 christos
992 1.1 christos /* For the POSIX locale, don't use the system's codepage. */
993 1.1 christos if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
994 1.1 christos codeset = "";
995 1.1 christos }
996 1.1 christos
997 1.1 christos if (codeset == NULL)
998 1.1 christos {
999 1.1 christos /* OS/2 has a function returning the locale's codepage as a number. */
1000 1.1 christos if (DosQueryCp (sizeof (cp), cp, &cplen))
1001 1.1 christos codeset = "";
1002 1.1 christos else
1003 1.1 christos {
1004 1.1 christos char buf[2 + 10 + 1];
1005 1.1 christos
1006 1.1 christos sprintf (buf, "CP%u", cp[0]);
1007 1.1 christos strcpy (resultbuf, buf);
1008 1.1 christos codeset = resultbuf;
1009 1.1 christos }
1010 1.1 christos }
1011 1.1 christos
1012 1.1 christos # else
1013 1.1 christos
1014 1.1 christos # error "Add code for other platforms here."
1015 1.1 christos
1016 1.1 christos # endif
1017 1.1 christos
1018 1.1 christos /* Resolve alias. */
1019 1.1 christos {
1020 1.1 christos # ifdef alias_table_defined
1021 1.1 christos /* On some platforms, UTF-8 locales are the most frequently used ones.
1022 1.1 christos Speed up the common case and slow down the less common cases by
1023 1.1 christos testing for this case first. */
1024 1.1 christos # if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__
1025 1.1 christos if (strcmp (codeset, "UTF-8") == 0)
1026 1.1 christos goto done_table_lookup;
1027 1.1 christos else
1028 1.1 christos # endif
1029 1.1 christos {
1030 1.1 christos const struct table_entry * const table = alias_table;
1031 1.1 christos size_t const table_size =
1032 1.1 christos sizeof (alias_table) / sizeof (struct table_entry);
1033 1.1 christos /* The table is sorted. Perform a binary search. */
1034 1.1 christos size_t hi = table_size;
1035 1.1 christos size_t lo = 0;
1036 1.1 christos while (lo < hi)
1037 1.1 christos {
1038 1.1 christos /* Invariant:
1039 1.1 christos for i < lo, strcmp (table[i].alias, codeset) < 0,
1040 1.1 christos for i >= hi, strcmp (table[i].alias, codeset) > 0. */
1041 1.1 christos size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1042 1.1 christos int cmp = strcmp (table[mid].alias, codeset);
1043 1.1 christos if (cmp < 0)
1044 1.1 christos lo = mid + 1;
1045 1.1 christos else if (cmp > 0)
1046 1.1 christos hi = mid;
1047 1.1 christos else
1048 1.1 christos {
1049 1.1 christos /* Found an i with
1050 1.1 christos strcmp (table[i].alias, codeset) == 0. */
1051 1.1 christos codeset = table[mid].canonical;
1052 1.1 christos goto done_table_lookup;
1053 1.1 christos }
1054 1.1 christos }
1055 1.1 christos }
1056 1.1 christos if (0)
1057 1.1 christos done_table_lookup: ;
1058 1.1 christos else
1059 1.1 christos # endif
1060 1.1 christos {
1061 1.1 christos /* Did not find it in the table. */
1062 1.1 christos /* On Mac OS X, all modern locales use the UTF-8 encoding.
1063 1.1 christos BeOS and Haiku have a single locale, and it has UTF-8 encoding. */
1064 1.1 christos # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
1065 1.1 christos codeset = "UTF-8";
1066 1.1 christos # else
1067 1.1 christos /* Don't return an empty string. GNU libc and GNU libiconv interpret
1068 1.1 christos the empty string as denoting "the locale's character encoding",
1069 1.1 christos thus GNU libiconv would call this function a second time. */
1070 1.1 christos if (codeset[0] == '\0')
1071 1.1 christos codeset = "ASCII";
1072 1.1 christos # endif
1073 1.1 christos }
1074 1.1 christos }
1075 1.1 christos
1076 1.1 christos #else
1077 1.1 christos
1078 1.1 christos /* On old systems which lack it, use setlocale or getenv. */
1079 1.1 christos const char *locale = NULL;
1080 1.1 christos
1081 1.1 christos /* But most old systems don't have a complete set of locales. Some
1082 1.1 christos (like DJGPP) have only the C locale. Therefore we don't use setlocale
1083 1.1 christos here; it would return "C" when it doesn't support the locale name the
1084 1.1 christos user has set. */
1085 1.1 christos # if 0
1086 1.1 christos locale = setlocale (LC_CTYPE, NULL);
1087 1.1 christos # endif
1088 1.1 christos if (locale == NULL || locale[0] == '\0')
1089 1.1 christos {
1090 1.1 christos locale = getenv ("LC_ALL");
1091 1.1 christos if (locale == NULL || locale[0] == '\0')
1092 1.1 christos {
1093 1.1 christos locale = getenv ("LC_CTYPE");
1094 1.1 christos if (locale == NULL || locale[0] == '\0')
1095 1.1 christos locale = getenv ("LANG");
1096 1.1 christos if (locale == NULL)
1097 1.1 christos locale = "";
1098 1.1 christos }
1099 1.1 christos }
1100 1.1 christos
1101 1.1 christos /* Map locale name to canonical encoding name. */
1102 1.1 christos {
1103 1.1 christos # ifdef locale_table_defined
1104 1.1 christos const struct table_entry * const table = locale_table;
1105 1.1 christos size_t const table_size =
1106 1.1 christos sizeof (locale_table) / sizeof (struct table_entry);
1107 1.1 christos /* The table is sorted. Perform a binary search. */
1108 1.1 christos size_t hi = table_size;
1109 1.1 christos size_t lo = 0;
1110 1.1 christos while (lo < hi)
1111 1.1 christos {
1112 1.1 christos /* Invariant:
1113 1.1 christos for i < lo, strcmp (table[i].locale, locale) < 0,
1114 1.1 christos for i >= hi, strcmp (table[i].locale, locale) > 0. */
1115 1.1 christos size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1116 1.1 christos int cmp = strcmp (table[mid].locale, locale);
1117 1.1 christos if (cmp < 0)
1118 1.1 christos lo = mid + 1;
1119 1.1 christos else if (cmp > 0)
1120 1.1 christos hi = mid;
1121 1.1 christos else
1122 1.1 christos {
1123 1.1 christos /* Found an i with
1124 1.1 christos strcmp (table[i].locale, locale) == 0. */
1125 1.1 christos codeset = table[mid].canonical;
1126 1.1 christos goto done_table_lookup;
1127 1.1 christos }
1128 1.1 christos }
1129 1.1 christos if (0)
1130 1.1 christos done_table_lookup: ;
1131 1.1 christos else
1132 1.1 christos # endif
1133 1.1 christos {
1134 1.1 christos /* Did not find it in the table. */
1135 1.1 christos /* On Mac OS X, all modern locales use the UTF-8 encoding.
1136 1.1 christos BeOS and Haiku have a single locale, and it has UTF-8 encoding. */
1137 1.1 christos # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
1138 1.1 christos codeset = "UTF-8";
1139 1.1 christos # else
1140 1.1 christos /* The canonical name cannot be determined. */
1141 1.1 christos /* Don't return an empty string. GNU libc and GNU libiconv interpret
1142 1.1 christos the empty string as denoting "the locale's character encoding",
1143 1.1 christos thus GNU libiconv would call this function a second time. */
1144 1.1 christos codeset = "ASCII";
1145 1.1 christos # endif
1146 1.1 christos }
1147 1.1 christos }
1148 1.1 christos
1149 1.1 christos #endif
1150 1.1 christos
1151 1.1 christos #ifdef DARWIN7
1152 1.1 christos /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
1153 1.1 christos (the default codeset) does not work when MB_CUR_MAX is 1. */
1154 1.1 christos if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
1155 1.1 christos codeset = "ASCII";
1156 1.1 christos #endif
1157 1.1 christos
1158 1.1 christos return codeset;
1159 1.1 christos }
1160