1 1.1 christos /* Determine a canonical name for the current locale's character encoding. 2 1.1 christos 3 1.1.1.2 christos Copyright (C) 2000-2006, 2008-2022 Free Software Foundation, Inc. 4 1.1 christos 5 1.1.1.2 christos This file is free software: you can redistribute it and/or modify 6 1.1.1.2 christos it under the terms of the GNU Lesser General Public License as 7 1.1.1.2 christos published by the Free Software Foundation; either version 2.1 of the 8 1.1.1.2 christos License, or (at your option) any later version. 9 1.1 christos 10 1.1.1.2 christos This file is distributed in the hope that it will be useful, 11 1.1 christos but WITHOUT ANY WARRANTY; without even the implied warranty of 12 1.1 christos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 1.1.1.2 christos GNU Lesser General Public License for more details. 14 1.1 christos 15 1.1.1.2 christos You should have received a copy of the GNU Lesser General Public License 16 1.1.1.2 christos along with this program. If not, see <https://www.gnu.org/licenses/>. */ 17 1.1 christos 18 1.1 christos /* Written by Bruno Haible <bruno (at) clisp.org>. */ 19 1.1 christos 20 1.1 christos #include <config.h> 21 1.1 christos 22 1.1 christos /* Specification. */ 23 1.1 christos #include "localcharset.h" 24 1.1 christos 25 1.1 christos #include <stddef.h> 26 1.1 christos #include <stdio.h> 27 1.1 christos #include <string.h> 28 1.1 christos #include <stdlib.h> 29 1.1 christos 30 1.1 christos #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET 31 1.1 christos # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */ 32 1.1 christos #endif 33 1.1 christos 34 1.1 christos #if defined _WIN32 && !defined __CYGWIN__ 35 1.1 christos # define WINDOWS_NATIVE 36 1.1 christos # include <locale.h> 37 1.1 christos #endif 38 1.1 christos 39 1.1 christos #if defined __EMX__ 40 1.1 christos /* Assume EMX program runs on OS/2, even if compiled under DOS. */ 41 1.1 christos # ifndef OS2 42 1.1 christos # define OS2 43 1.1 christos # endif 44 1.1 christos #endif 45 1.1 christos 46 1.1 christos #if !defined WINDOWS_NATIVE 47 1.1 christos # if HAVE_LANGINFO_CODESET 48 1.1 christos # include <langinfo.h> 49 1.1 christos # else 50 1.1 christos # if 0 /* see comment regarding use of setlocale(), below */ 51 1.1 christos # include <locale.h> 52 1.1 christos # endif 53 1.1 christos # endif 54 1.1 christos # ifdef __CYGWIN__ 55 1.1 christos # define WIN32_LEAN_AND_MEAN 56 1.1 christos # include <windows.h> 57 1.1 christos # endif 58 1.1 christos #elif defined WINDOWS_NATIVE 59 1.1 christos # define WIN32_LEAN_AND_MEAN 60 1.1 christos # include <windows.h> 61 1.1 christos /* For the use of setlocale() below, the Gnulib override in setlocale.c is 62 1.1 christos not needed; see the platform lists in setlocale_null.m4. */ 63 1.1 christos # undef setlocale 64 1.1 christos #endif 65 1.1 christos #if defined OS2 66 1.1 christos # define INCL_DOS 67 1.1 christos # include <os2.h> 68 1.1 christos #endif 69 1.1 christos 70 1.1 christos /* For MB_CUR_MAX_L */ 71 1.1 christos #if defined DARWIN7 72 1.1 christos # include <xlocale.h> 73 1.1 christos #endif 74 1.1 christos 75 1.1 christos 76 1.1 christos #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2 77 1.1 christos 78 1.1 christos /* On these platforms, we use a mapping from non-canonical encoding name 79 1.1 christos to GNU canonical encoding name. */ 80 1.1 christos 81 1.1 christos /* With glibc-2.1 or newer, we don't need any canonicalization, 82 1.1 christos because glibc has iconv and both glibc and libiconv support all 83 1.1 christos GNU canonical names directly. */ 84 1.1 christos # if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__) 85 1.1 christos 86 1.1 christos struct table_entry 87 1.1 christos { 88 1.1 christos const char alias[11+1]; 89 1.1 christos const char canonical[11+1]; 90 1.1 christos }; 91 1.1 christos 92 1.1 christos /* Table of platform-dependent mappings, sorted in ascending order. */ 93 1.1 christos static const struct table_entry alias_table[] = 94 1.1 christos { 95 1.1 christos # if defined __FreeBSD__ /* FreeBSD */ 96 1.1 christos /*{ "ARMSCII-8", "ARMSCII-8" },*/ 97 1.1 christos { "Big5", "BIG5" }, 98 1.1 christos { "C", "ASCII" }, 99 1.1 christos /*{ "CP1131", "CP1131" },*/ 100 1.1 christos /*{ "CP1251", "CP1251" },*/ 101 1.1 christos /*{ "CP866", "CP866" },*/ 102 1.1 christos /*{ "GB18030", "GB18030" },*/ 103 1.1 christos /*{ "GB2312", "GB2312" },*/ 104 1.1 christos /*{ "GBK", "GBK" },*/ 105 1.1 christos /*{ "ISCII-DEV", "?" },*/ 106 1.1 christos { "ISO8859-1", "ISO-8859-1" }, 107 1.1 christos { "ISO8859-13", "ISO-8859-13" }, 108 1.1 christos { "ISO8859-15", "ISO-8859-15" }, 109 1.1 christos { "ISO8859-2", "ISO-8859-2" }, 110 1.1 christos { "ISO8859-5", "ISO-8859-5" }, 111 1.1 christos { "ISO8859-7", "ISO-8859-7" }, 112 1.1 christos { "ISO8859-9", "ISO-8859-9" }, 113 1.1 christos /*{ "KOI8-R", "KOI8-R" },*/ 114 1.1 christos /*{ "KOI8-U", "KOI8-U" },*/ 115 1.1 christos { "SJIS", "SHIFT_JIS" }, 116 1.1 christos { "US-ASCII", "ASCII" }, 117 1.1 christos { "eucCN", "GB2312" }, 118 1.1 christos { "eucJP", "EUC-JP" }, 119 1.1 christos { "eucKR", "EUC-KR" } 120 1.1 christos # define alias_table_defined 121 1.1 christos # endif 122 1.1 christos # if defined __NetBSD__ /* NetBSD */ 123 1.1 christos { "646", "ASCII" }, 124 1.1 christos /*{ "ARMSCII-8", "ARMSCII-8" },*/ 125 1.1 christos /*{ "BIG5", "BIG5" },*/ 126 1.1 christos { "Big5-HKSCS", "BIG5-HKSCS" }, 127 1.1 christos /*{ "CP1251", "CP1251" },*/ 128 1.1 christos /*{ "CP866", "CP866" },*/ 129 1.1 christos /*{ "GB18030", "GB18030" },*/ 130 1.1 christos /*{ "GB2312", "GB2312" },*/ 131 1.1 christos { "ISO8859-1", "ISO-8859-1" }, 132 1.1 christos { "ISO8859-13", "ISO-8859-13" }, 133 1.1 christos { "ISO8859-15", "ISO-8859-15" }, 134 1.1 christos { "ISO8859-2", "ISO-8859-2" }, 135 1.1 christos { "ISO8859-4", "ISO-8859-4" }, 136 1.1 christos { "ISO8859-5", "ISO-8859-5" }, 137 1.1 christos { "ISO8859-7", "ISO-8859-7" }, 138 1.1 christos /*{ "KOI8-R", "KOI8-R" },*/ 139 1.1 christos /*{ "KOI8-U", "KOI8-U" },*/ 140 1.1 christos /*{ "PT154", "PT154" },*/ 141 1.1 christos { "SJIS", "SHIFT_JIS" }, 142 1.1 christos { "eucCN", "GB2312" }, 143 1.1 christos { "eucJP", "EUC-JP" }, 144 1.1 christos { "eucKR", "EUC-KR" }, 145 1.1 christos { "eucTW", "EUC-TW" } 146 1.1 christos # define alias_table_defined 147 1.1 christos # endif 148 1.1 christos # if defined __OpenBSD__ /* OpenBSD */ 149 1.1 christos { "646", "ASCII" }, 150 1.1 christos { "ISO8859-1", "ISO-8859-1" }, 151 1.1 christos { "ISO8859-13", "ISO-8859-13" }, 152 1.1 christos { "ISO8859-15", "ISO-8859-15" }, 153 1.1 christos { "ISO8859-2", "ISO-8859-2" }, 154 1.1 christos { "ISO8859-4", "ISO-8859-4" }, 155 1.1 christos { "ISO8859-5", "ISO-8859-5" }, 156 1.1 christos { "ISO8859-7", "ISO-8859-7" }, 157 1.1 christos { "US-ASCII", "ASCII" } 158 1.1 christos # define alias_table_defined 159 1.1 christos # endif 160 1.1 christos # if defined __APPLE__ && defined __MACH__ /* Mac OS X */ 161 1.1 christos /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is 162 1.1 christos useless: 163 1.1 christos - It returns the empty string when LANG is set to a locale of the 164 1.1 christos form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8 165 1.1 christos LC_CTYPE file. 166 1.1 christos - The environment variables LANG, LC_CTYPE, LC_ALL are not set by 167 1.1 christos the system; nl_langinfo(CODESET) returns "US-ASCII" in this case. 168 1.1 christos - The documentation says: 169 1.1 christos "... all code that calls BSD system routines should ensure 170 1.1 christos that the const *char parameters of these routines are in UTF-8 171 1.1 christos encoding. All BSD system functions expect their string 172 1.1 christos parameters to be in UTF-8 encoding and nothing else." 173 1.1 christos It also says 174 1.1 christos "An additional caveat is that string parameters for files, 175 1.1 christos paths, and other file-system entities must be in canonical 176 1.1 christos UTF-8. In a canonical UTF-8 Unicode string, all decomposable 177 1.1 christos characters are decomposed ..." 178 1.1 christos but this is not true: You can pass non-decomposed UTF-8 strings 179 1.1 christos to file system functions, and it is the OS which will convert 180 1.1 christos them to decomposed UTF-8 before accessing the file system. 181 1.1 christos - The Apple Terminal application displays UTF-8 by default. 182 1.1 christos - However, other applications are free to use different encodings: 183 1.1 christos - xterm uses ISO-8859-1 by default. 184 1.1 christos - TextEdit uses MacRoman by default. 185 1.1 christos We prefer UTF-8 over decomposed UTF-8-MAC because one should 186 1.1 christos minimize the use of decomposed Unicode. Unfortunately, through the 187 1.1 christos Darwin file system, decomposed UTF-8 strings are leaked into user 188 1.1 christos space nevertheless. 189 1.1 christos Then there are also the locales with encodings other than US-ASCII 190 1.1 christos and UTF-8. These locales can be occasionally useful to users (e.g. 191 1.1 christos when grepping through ISO-8859-1 encoded text files), when all their 192 1.1 christos file names are in US-ASCII. 193 1.1 christos */ 194 1.1 christos { "ARMSCII-8", "ARMSCII-8" }, 195 1.1 christos { "Big5", "BIG5" }, 196 1.1 christos { "Big5HKSCS", "BIG5-HKSCS" }, 197 1.1 christos { "CP1131", "CP1131" }, 198 1.1 christos { "CP1251", "CP1251" }, 199 1.1 christos { "CP866", "CP866" }, 200 1.1 christos { "CP949", "CP949" }, 201 1.1 christos { "GB18030", "GB18030" }, 202 1.1 christos { "GB2312", "GB2312" }, 203 1.1 christos { "GBK", "GBK" }, 204 1.1 christos /*{ "ISCII-DEV", "?" },*/ 205 1.1 christos { "ISO8859-1", "ISO-8859-1" }, 206 1.1 christos { "ISO8859-13", "ISO-8859-13" }, 207 1.1 christos { "ISO8859-15", "ISO-8859-15" }, 208 1.1 christos { "ISO8859-2", "ISO-8859-2" }, 209 1.1 christos { "ISO8859-4", "ISO-8859-4" }, 210 1.1 christos { "ISO8859-5", "ISO-8859-5" }, 211 1.1 christos { "ISO8859-7", "ISO-8859-7" }, 212 1.1 christos { "ISO8859-9", "ISO-8859-9" }, 213 1.1 christos { "KOI8-R", "KOI8-R" }, 214 1.1 christos { "KOI8-U", "KOI8-U" }, 215 1.1 christos { "PT154", "PT154" }, 216 1.1 christos { "SJIS", "SHIFT_JIS" }, 217 1.1 christos { "eucCN", "GB2312" }, 218 1.1 christos { "eucJP", "EUC-JP" }, 219 1.1 christos { "eucKR", "EUC-KR" } 220 1.1 christos # define alias_table_defined 221 1.1 christos # endif 222 1.1 christos # if defined _AIX /* AIX */ 223 1.1 christos /*{ "GBK", "GBK" },*/ 224 1.1 christos { "IBM-1046", "CP1046" }, 225 1.1 christos { "IBM-1124", "CP1124" }, 226 1.1 christos { "IBM-1129", "CP1129" }, 227 1.1 christos { "IBM-1252", "CP1252" }, 228 1.1 christos { "IBM-850", "CP850" }, 229 1.1 christos { "IBM-856", "CP856" }, 230 1.1 christos { "IBM-921", "ISO-8859-13" }, 231 1.1 christos { "IBM-922", "CP922" }, 232 1.1 christos { "IBM-932", "CP932" }, 233 1.1 christos { "IBM-943", "CP943" }, 234 1.1 christos { "IBM-eucCN", "GB2312" }, 235 1.1 christos { "IBM-eucJP", "EUC-JP" }, 236 1.1 christos { "IBM-eucKR", "EUC-KR" }, 237 1.1 christos { "IBM-eucTW", "EUC-TW" }, 238 1.1 christos { "ISO8859-1", "ISO-8859-1" }, 239 1.1 christos { "ISO8859-15", "ISO-8859-15" }, 240 1.1 christos { "ISO8859-2", "ISO-8859-2" }, 241 1.1 christos { "ISO8859-5", "ISO-8859-5" }, 242 1.1 christos { "ISO8859-6", "ISO-8859-6" }, 243 1.1 christos { "ISO8859-7", "ISO-8859-7" }, 244 1.1 christos { "ISO8859-8", "ISO-8859-8" }, 245 1.1 christos { "ISO8859-9", "ISO-8859-9" }, 246 1.1 christos { "TIS-620", "TIS-620" }, 247 1.1 christos /*{ "UTF-8", "UTF-8" },*/ 248 1.1 christos { "big5", "BIG5" } 249 1.1 christos # define alias_table_defined 250 1.1 christos # endif 251 1.1 christos # if defined __hpux /* HP-UX */ 252 1.1 christos { "SJIS", "SHIFT_JIS" }, 253 1.1 christos { "arabic8", "HP-ARABIC8" }, 254 1.1 christos { "big5", "BIG5" }, 255 1.1 christos { "cp1251", "CP1251" }, 256 1.1 christos { "eucJP", "EUC-JP" }, 257 1.1 christos { "eucKR", "EUC-KR" }, 258 1.1 christos { "eucTW", "EUC-TW" }, 259 1.1 christos { "gb18030", "GB18030" }, 260 1.1 christos { "greek8", "HP-GREEK8" }, 261 1.1 christos { "hebrew8", "HP-HEBREW8" }, 262 1.1 christos { "hkbig5", "BIG5-HKSCS" }, 263 1.1 christos { "hp15CN", "GB2312" }, 264 1.1 christos { "iso88591", "ISO-8859-1" }, 265 1.1 christos { "iso885913", "ISO-8859-13" }, 266 1.1 christos { "iso885915", "ISO-8859-15" }, 267 1.1 christos { "iso88592", "ISO-8859-2" }, 268 1.1 christos { "iso88594", "ISO-8859-4" }, 269 1.1 christos { "iso88595", "ISO-8859-5" }, 270 1.1 christos { "iso88596", "ISO-8859-6" }, 271 1.1 christos { "iso88597", "ISO-8859-7" }, 272 1.1 christos { "iso88598", "ISO-8859-8" }, 273 1.1 christos { "iso88599", "ISO-8859-9" }, 274 1.1 christos { "kana8", "HP-KANA8" }, 275 1.1 christos { "koi8r", "KOI8-R" }, 276 1.1 christos { "roman8", "HP-ROMAN8" }, 277 1.1 christos { "tis620", "TIS-620" }, 278 1.1 christos { "turkish8", "HP-TURKISH8" }, 279 1.1 christos { "utf8", "UTF-8" } 280 1.1 christos # define alias_table_defined 281 1.1 christos # endif 282 1.1 christos # if defined __sgi /* IRIX */ 283 1.1 christos { "ISO8859-1", "ISO-8859-1" }, 284 1.1 christos { "ISO8859-15", "ISO-8859-15" }, 285 1.1 christos { "ISO8859-2", "ISO-8859-2" }, 286 1.1 christos { "ISO8859-5", "ISO-8859-5" }, 287 1.1 christos { "ISO8859-7", "ISO-8859-7" }, 288 1.1 christos { "ISO8859-9", "ISO-8859-9" }, 289 1.1 christos { "eucCN", "GB2312" }, 290 1.1 christos { "eucJP", "EUC-JP" }, 291 1.1 christos { "eucKR", "EUC-KR" }, 292 1.1 christos { "eucTW", "EUC-TW" } 293 1.1 christos # define alias_table_defined 294 1.1 christos # endif 295 1.1 christos # if defined __osf__ /* OSF/1 */ 296 1.1 christos /*{ "GBK", "GBK" },*/ 297 1.1 christos { "ISO8859-1", "ISO-8859-1" }, 298 1.1 christos { "ISO8859-15", "ISO-8859-15" }, 299 1.1 christos { "ISO8859-2", "ISO-8859-2" }, 300 1.1 christos { "ISO8859-4", "ISO-8859-4" }, 301 1.1 christos { "ISO8859-5", "ISO-8859-5" }, 302 1.1 christos { "ISO8859-7", "ISO-8859-7" }, 303 1.1 christos { "ISO8859-8", "ISO-8859-8" }, 304 1.1 christos { "ISO8859-9", "ISO-8859-9" }, 305 1.1 christos { "KSC5601", "CP949" }, 306 1.1 christos { "SJIS", "SHIFT_JIS" }, 307 1.1 christos { "TACTIS", "TIS-620" }, 308 1.1 christos /*{ "UTF-8", "UTF-8" },*/ 309 1.1 christos { "big5", "BIG5" }, 310 1.1 christos { "cp850", "CP850" }, 311 1.1 christos { "dechanyu", "DEC-HANYU" }, 312 1.1 christos { "dechanzi", "GB2312" }, 313 1.1 christos { "deckanji", "DEC-KANJI" }, 314 1.1 christos { "deckorean", "EUC-KR" }, 315 1.1 christos { "eucJP", "EUC-JP" }, 316 1.1 christos { "eucKR", "EUC-KR" }, 317 1.1 christos { "eucTW", "EUC-TW" }, 318 1.1 christos { "sdeckanji", "EUC-JP" } 319 1.1 christos # define alias_table_defined 320 1.1 christos # endif 321 1.1 christos # if defined __sun /* Solaris */ 322 1.1 christos { "5601", "EUC-KR" }, 323 1.1 christos { "646", "ASCII" }, 324 1.1 christos /*{ "BIG5", "BIG5" },*/ 325 1.1 christos { "Big5-HKSCS", "BIG5-HKSCS" }, 326 1.1 christos { "GB18030", "GB18030" }, 327 1.1 christos /*{ "GBK", "GBK" },*/ 328 1.1 christos { "ISO8859-1", "ISO-8859-1" }, 329 1.1 christos { "ISO8859-11", "TIS-620" }, 330 1.1 christos { "ISO8859-13", "ISO-8859-13" }, 331 1.1 christos { "ISO8859-15", "ISO-8859-15" }, 332 1.1 christos { "ISO8859-2", "ISO-8859-2" }, 333 1.1 christos { "ISO8859-3", "ISO-8859-3" }, 334 1.1 christos { "ISO8859-4", "ISO-8859-4" }, 335 1.1 christos { "ISO8859-5", "ISO-8859-5" }, 336 1.1 christos { "ISO8859-6", "ISO-8859-6" }, 337 1.1 christos { "ISO8859-7", "ISO-8859-7" }, 338 1.1 christos { "ISO8859-8", "ISO-8859-8" }, 339 1.1 christos { "ISO8859-9", "ISO-8859-9" }, 340 1.1 christos { "PCK", "SHIFT_JIS" }, 341 1.1 christos { "TIS620.2533", "TIS-620" }, 342 1.1 christos /*{ "UTF-8", "UTF-8" },*/ 343 1.1 christos { "ansi-1251", "CP1251" }, 344 1.1 christos { "cns11643", "EUC-TW" }, 345 1.1 christos { "eucJP", "EUC-JP" }, 346 1.1 christos { "gb2312", "GB2312" }, 347 1.1 christos { "koi8-r", "KOI8-R" } 348 1.1 christos # define alias_table_defined 349 1.1 christos # endif 350 1.1 christos # if defined __minix /* Minix */ 351 1.1 christos { "646", "ASCII" } 352 1.1 christos # define alias_table_defined 353 1.1 christos # endif 354 1.1 christos # if defined WINDOWS_NATIVE || defined __CYGWIN__ /* Windows */ 355 1.1 christos { "CP1361", "JOHAB" }, 356 1.1 christos { "CP20127", "ASCII" }, 357 1.1 christos { "CP20866", "KOI8-R" }, 358 1.1 christos { "CP20936", "GB2312" }, 359 1.1 christos { "CP21866", "KOI8-RU" }, 360 1.1 christos { "CP28591", "ISO-8859-1" }, 361 1.1 christos { "CP28592", "ISO-8859-2" }, 362 1.1 christos { "CP28593", "ISO-8859-3" }, 363 1.1 christos { "CP28594", "ISO-8859-4" }, 364 1.1 christos { "CP28595", "ISO-8859-5" }, 365 1.1 christos { "CP28596", "ISO-8859-6" }, 366 1.1 christos { "CP28597", "ISO-8859-7" }, 367 1.1 christos { "CP28598", "ISO-8859-8" }, 368 1.1 christos { "CP28599", "ISO-8859-9" }, 369 1.1 christos { "CP28605", "ISO-8859-15" }, 370 1.1 christos { "CP38598", "ISO-8859-8" }, 371 1.1 christos { "CP51932", "EUC-JP" }, 372 1.1 christos { "CP51936", "GB2312" }, 373 1.1 christos { "CP51949", "EUC-KR" }, 374 1.1 christos { "CP51950", "EUC-TW" }, 375 1.1 christos { "CP54936", "GB18030" }, 376 1.1 christos { "CP65001", "UTF-8" }, 377 1.1 christos { "CP936", "GBK" } 378 1.1 christos # define alias_table_defined 379 1.1 christos # endif 380 1.1 christos # if defined OS2 /* OS/2 */ 381 1.1 christos /* The list of encodings is taken from "List of OS/2 Codepages" 382 1.1 christos by Alex Taylor: 383 1.1 christos <http://altsan.org/os2/toolkits/uls/index.html#codepages>. 384 1.1 christos See also "__convcp() of kLIBC": 385 1.1 christos <https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>. */ 386 1.1 christos { "CP1004", "CP1252" }, 387 1.1 christos /*{ "CP1041", "CP943" },*/ 388 1.1 christos /*{ "CP1088", "CP949" },*/ 389 1.1 christos { "CP1089", "ISO-8859-6" }, 390 1.1 christos /*{ "CP1114", "CP950" },*/ 391 1.1 christos /*{ "CP1115", "GB2312" },*/ 392 1.1 christos { "CP1208", "UTF-8" }, 393 1.1 christos /*{ "CP1380", "GB2312" },*/ 394 1.1 christos { "CP1381", "GB2312" }, 395 1.1 christos { "CP1383", "GB2312" }, 396 1.1 christos { "CP1386", "GBK" }, 397 1.1 christos /*{ "CP301", "CP943" },*/ 398 1.1 christos { "CP3372", "EUC-JP" }, 399 1.1 christos { "CP4946", "CP850" }, 400 1.1 christos /*{ "CP5048", "JIS_X0208-1990" },*/ 401 1.1 christos /*{ "CP5049", "JIS_X0212-1990" },*/ 402 1.1 christos /*{ "CP5067", "KS_C_5601-1987" },*/ 403 1.1 christos { "CP813", "ISO-8859-7" }, 404 1.1 christos { "CP819", "ISO-8859-1" }, 405 1.1 christos { "CP878", "KOI8-R" }, 406 1.1 christos /*{ "CP897", "CP943" },*/ 407 1.1 christos { "CP912", "ISO-8859-2" }, 408 1.1 christos { "CP913", "ISO-8859-3" }, 409 1.1 christos { "CP914", "ISO-8859-4" }, 410 1.1 christos { "CP915", "ISO-8859-5" }, 411 1.1 christos { "CP916", "ISO-8859-8" }, 412 1.1 christos { "CP920", "ISO-8859-9" }, 413 1.1 christos { "CP921", "ISO-8859-13" }, 414 1.1 christos { "CP923", "ISO-8859-15" }, 415 1.1 christos /*{ "CP941", "CP943" },*/ 416 1.1 christos /*{ "CP947", "CP950" },*/ 417 1.1 christos /*{ "CP951", "CP949" },*/ 418 1.1 christos /*{ "CP952", "JIS_X0208-1990" },*/ 419 1.1 christos /*{ "CP953", "JIS_X0212-1990" },*/ 420 1.1 christos { "CP954", "EUC-JP" }, 421 1.1 christos { "CP964", "EUC-TW" }, 422 1.1 christos { "CP970", "EUC-KR" }, 423 1.1 christos /*{ "CP971", "KS_C_5601-1987" },*/ 424 1.1 christos { "IBM-1004", "CP1252" }, 425 1.1 christos /*{ "IBM-1006", "?" },*/ 426 1.1 christos /*{ "IBM-1008", "?" },*/ 427 1.1 christos /*{ "IBM-1041", "CP943" },*/ 428 1.1 christos /*{ "IBM-1051", "?" },*/ 429 1.1 christos /*{ "IBM-1088", "CP949" },*/ 430 1.1 christos { "IBM-1089", "ISO-8859-6" }, 431 1.1 christos /*{ "IBM-1098", "?" },*/ 432 1.1 christos /*{ "IBM-1114", "CP950" },*/ 433 1.1 christos /*{ "IBM-1115", "GB2312" },*/ 434 1.1 christos /*{ "IBM-1116", "?" },*/ 435 1.1 christos /*{ "IBM-1117", "?" },*/ 436 1.1 christos /*{ "IBM-1118", "?" },*/ 437 1.1 christos /*{ "IBM-1119", "?" },*/ 438 1.1 christos { "IBM-1124", "CP1124" }, 439 1.1 christos { "IBM-1125", "CP1125" }, 440 1.1 christos { "IBM-1131", "CP1131" }, 441 1.1 christos { "IBM-1208", "UTF-8" }, 442 1.1 christos { "IBM-1250", "CP1250" }, 443 1.1 christos { "IBM-1251", "CP1251" }, 444 1.1 christos { "IBM-1252", "CP1252" }, 445 1.1 christos { "IBM-1253", "CP1253" }, 446 1.1 christos { "IBM-1254", "CP1254" }, 447 1.1 christos { "IBM-1255", "CP1255" }, 448 1.1 christos { "IBM-1256", "CP1256" }, 449 1.1 christos { "IBM-1257", "CP1257" }, 450 1.1 christos /*{ "IBM-1275", "?" },*/ 451 1.1 christos /*{ "IBM-1276", "?" },*/ 452 1.1 christos /*{ "IBM-1277", "?" },*/ 453 1.1 christos /*{ "IBM-1280", "?" },*/ 454 1.1 christos /*{ "IBM-1281", "?" },*/ 455 1.1 christos /*{ "IBM-1282", "?" },*/ 456 1.1 christos /*{ "IBM-1283", "?" },*/ 457 1.1 christos /*{ "IBM-1380", "GB2312" },*/ 458 1.1 christos { "IBM-1381", "GB2312" }, 459 1.1 christos { "IBM-1383", "GB2312" }, 460 1.1 christos { "IBM-1386", "GBK" }, 461 1.1 christos /*{ "IBM-301", "CP943" },*/ 462 1.1 christos { "IBM-3372", "EUC-JP" }, 463 1.1 christos { "IBM-367", "ASCII" }, 464 1.1 christos { "IBM-437", "CP437" }, 465 1.1 christos { "IBM-4946", "CP850" }, 466 1.1 christos /*{ "IBM-5048", "JIS_X0208-1990" },*/ 467 1.1 christos /*{ "IBM-5049", "JIS_X0212-1990" },*/ 468 1.1 christos /*{ "IBM-5067", "KS_C_5601-1987" },*/ 469 1.1 christos { "IBM-813", "ISO-8859-7" }, 470 1.1 christos { "IBM-819", "ISO-8859-1" }, 471 1.1 christos { "IBM-850", "CP850" }, 472 1.1 christos /*{ "IBM-851", "?" },*/ 473 1.1 christos { "IBM-852", "CP852" }, 474 1.1 christos { "IBM-855", "CP855" }, 475 1.1 christos { "IBM-856", "CP856" }, 476 1.1 christos { "IBM-857", "CP857" }, 477 1.1 christos /*{ "IBM-859", "?" },*/ 478 1.1 christos { "IBM-860", "CP860" }, 479 1.1 christos { "IBM-861", "CP861" }, 480 1.1 christos { "IBM-862", "CP862" }, 481 1.1 christos { "IBM-863", "CP863" }, 482 1.1 christos { "IBM-864", "CP864" }, 483 1.1 christos { "IBM-865", "CP865" }, 484 1.1 christos { "IBM-866", "CP866" }, 485 1.1 christos /*{ "IBM-868", "?" },*/ 486 1.1 christos { "IBM-869", "CP869" }, 487 1.1 christos { "IBM-874", "CP874" }, 488 1.1 christos { "IBM-878", "KOI8-R" }, 489 1.1 christos /*{ "IBM-895", "?" },*/ 490 1.1 christos /*{ "IBM-897", "CP943" },*/ 491 1.1 christos /*{ "IBM-907", "?" },*/ 492 1.1 christos /*{ "IBM-909", "?" },*/ 493 1.1 christos { "IBM-912", "ISO-8859-2" }, 494 1.1 christos { "IBM-913", "ISO-8859-3" }, 495 1.1 christos { "IBM-914", "ISO-8859-4" }, 496 1.1 christos { "IBM-915", "ISO-8859-5" }, 497 1.1 christos { "IBM-916", "ISO-8859-8" }, 498 1.1 christos { "IBM-920", "ISO-8859-9" }, 499 1.1 christos { "IBM-921", "ISO-8859-13" }, 500 1.1 christos { "IBM-922", "CP922" }, 501 1.1 christos { "IBM-923", "ISO-8859-15" }, 502 1.1 christos { "IBM-932", "CP932" }, 503 1.1 christos /*{ "IBM-941", "CP943" },*/ 504 1.1 christos /*{ "IBM-942", "?" },*/ 505 1.1 christos { "IBM-943", "CP943" }, 506 1.1 christos /*{ "IBM-947", "CP950" },*/ 507 1.1 christos { "IBM-949", "CP949" }, 508 1.1 christos { "IBM-950", "CP950" }, 509 1.1 christos /*{ "IBM-951", "CP949" },*/ 510 1.1 christos /*{ "IBM-952", "JIS_X0208-1990" },*/ 511 1.1 christos /*{ "IBM-953", "JIS_X0212-1990" },*/ 512 1.1 christos { "IBM-954", "EUC-JP" }, 513 1.1 christos /*{ "IBM-955", "?" },*/ 514 1.1 christos { "IBM-964", "EUC-TW" }, 515 1.1 christos { "IBM-970", "EUC-KR" }, 516 1.1 christos /*{ "IBM-971", "KS_C_5601-1987" },*/ 517 1.1 christos { "IBM-eucCN", "GB2312" }, 518 1.1 christos { "IBM-eucJP", "EUC-JP" }, 519 1.1 christos { "IBM-eucKR", "EUC-KR" }, 520 1.1 christos { "IBM-eucTW", "EUC-TW" }, 521 1.1 christos { "IBM33722", "EUC-JP" }, 522 1.1 christos { "ISO8859-1", "ISO-8859-1" }, 523 1.1 christos { "ISO8859-2", "ISO-8859-2" }, 524 1.1 christos { "ISO8859-3", "ISO-8859-3" }, 525 1.1 christos { "ISO8859-4", "ISO-8859-4" }, 526 1.1 christos { "ISO8859-5", "ISO-8859-5" }, 527 1.1 christos { "ISO8859-6", "ISO-8859-6" }, 528 1.1 christos { "ISO8859-7", "ISO-8859-7" }, 529 1.1 christos { "ISO8859-8", "ISO-8859-8" }, 530 1.1 christos { "ISO8859-9", "ISO-8859-9" }, 531 1.1 christos /*{ "JISX0201-1976", "JISX0201-1976" },*/ 532 1.1 christos /*{ "JISX0208-1978", "?" },*/ 533 1.1 christos /*{ "JISX0208-1983", "JIS_X0208-1983" },*/ 534 1.1 christos /*{ "JISX0208-1990", "JIS_X0208-1990" },*/ 535 1.1 christos /*{ "JISX0212-1990", "JIS_X0212-1990" },*/ 536 1.1 christos /*{ "KSC5601-1987", "KS_C_5601-1987" },*/ 537 1.1 christos { "SJIS-1", "CP943" }, 538 1.1 christos { "SJIS-2", "CP943" }, 539 1.1 christos { "eucJP", "EUC-JP" }, 540 1.1 christos { "eucKR", "EUC-KR" }, 541 1.1 christos { "eucTW-1993", "EUC-TW" } 542 1.1 christos # define alias_table_defined 543 1.1 christos # endif 544 1.1 christos # if defined VMS /* OpenVMS */ 545 1.1 christos /* The list of encodings is taken from the OpenVMS 7.3-1 documentation 546 1.1 christos "Compaq C Run-Time Library Reference Manual for OpenVMS systems" 547 1.1 christos section 10.7 "Handling Different Character Sets". */ 548 1.1 christos { "DECHANYU", "DEC-HANYU" }, 549 1.1 christos { "DECHANZI", "GB2312" }, 550 1.1 christos { "DECKANJI", "DEC-KANJI" }, 551 1.1 christos { "DECKOREAN", "EUC-KR" }, 552 1.1 christos { "ISO8859-1", "ISO-8859-1" }, 553 1.1 christos { "ISO8859-2", "ISO-8859-2" }, 554 1.1 christos { "ISO8859-5", "ISO-8859-5" }, 555 1.1 christos { "ISO8859-7", "ISO-8859-7" }, 556 1.1 christos { "ISO8859-8", "ISO-8859-8" }, 557 1.1 christos { "ISO8859-9", "ISO-8859-9" }, 558 1.1 christos { "SDECKANJI", "EUC-JP" }, 559 1.1 christos { "SJIS", "SHIFT_JIS" }, 560 1.1 christos { "eucJP", "EUC-JP" }, 561 1.1 christos { "eucTW", "EUC-TW" } 562 1.1 christos # define alias_table_defined 563 1.1 christos # endif 564 1.1 christos # ifndef alias_table_defined 565 1.1 christos /* Just a dummy entry, to avoid a C syntax error. */ 566 1.1 christos { "", "" } 567 1.1 christos # endif 568 1.1 christos }; 569 1.1 christos 570 1.1 christos # endif 571 1.1 christos 572 1.1 christos #else 573 1.1 christos 574 1.1 christos /* On these platforms, we use a mapping from locale name to GNU canonical 575 1.1 christos encoding name. */ 576 1.1 christos 577 1.1 christos struct table_entry 578 1.1 christos { 579 1.1 christos const char locale[17+1]; 580 1.1 christos const char canonical[11+1]; 581 1.1 christos }; 582 1.1 christos 583 1.1 christos /* Table of platform-dependent mappings, sorted in ascending order. */ 584 1.1 christos static const struct table_entry locale_table[] = 585 1.1 christos { 586 1.1 christos # if defined __FreeBSD__ /* FreeBSD 4.2 */ 587 1.1 christos { "cs_CZ.ISO_8859-2", "ISO-8859-2" }, 588 1.1 christos { "da_DK.DIS_8859-15", "ISO-8859-15" }, 589 1.1 christos { "da_DK.ISO_8859-1", "ISO-8859-1" }, 590 1.1 christos { "de_AT.DIS_8859-15", "ISO-8859-15" }, 591 1.1 christos { "de_AT.ISO_8859-1", "ISO-8859-1" }, 592 1.1 christos { "de_CH.DIS_8859-15", "ISO-8859-15" }, 593 1.1 christos { "de_CH.ISO_8859-1", "ISO-8859-1" }, 594 1.1 christos { "de_DE.DIS_8859-15", "ISO-8859-15" }, 595 1.1 christos { "de_DE.ISO_8859-1", "ISO-8859-1" }, 596 1.1 christos { "en_AU.DIS_8859-15", "ISO-8859-15" }, 597 1.1 christos { "en_AU.ISO_8859-1", "ISO-8859-1" }, 598 1.1 christos { "en_CA.DIS_8859-15", "ISO-8859-15" }, 599 1.1 christos { "en_CA.ISO_8859-1", "ISO-8859-1" }, 600 1.1 christos { "en_GB.DIS_8859-15", "ISO-8859-15" }, 601 1.1 christos { "en_GB.ISO_8859-1", "ISO-8859-1" }, 602 1.1 christos { "en_US.DIS_8859-15", "ISO-8859-15" }, 603 1.1 christos { "en_US.ISO_8859-1", "ISO-8859-1" }, 604 1.1 christos { "es_ES.DIS_8859-15", "ISO-8859-15" }, 605 1.1 christos { "es_ES.ISO_8859-1", "ISO-8859-1" }, 606 1.1 christos { "fi_FI.DIS_8859-15", "ISO-8859-15" }, 607 1.1 christos { "fi_FI.ISO_8859-1", "ISO-8859-1" }, 608 1.1 christos { "fr_BE.DIS_8859-15", "ISO-8859-15" }, 609 1.1 christos { "fr_BE.ISO_8859-1", "ISO-8859-1" }, 610 1.1 christos { "fr_CA.DIS_8859-15", "ISO-8859-15" }, 611 1.1 christos { "fr_CA.ISO_8859-1", "ISO-8859-1" }, 612 1.1 christos { "fr_CH.DIS_8859-15", "ISO-8859-15" }, 613 1.1 christos { "fr_CH.ISO_8859-1", "ISO-8859-1" }, 614 1.1 christos { "fr_FR.DIS_8859-15", "ISO-8859-15" }, 615 1.1 christos { "fr_FR.ISO_8859-1", "ISO-8859-1" }, 616 1.1 christos { "hr_HR.ISO_8859-2", "ISO-8859-2" }, 617 1.1 christos { "hu_HU.ISO_8859-2", "ISO-8859-2" }, 618 1.1 christos { "is_IS.DIS_8859-15", "ISO-8859-15" }, 619 1.1 christos { "is_IS.ISO_8859-1", "ISO-8859-1" }, 620 1.1 christos { "it_CH.DIS_8859-15", "ISO-8859-15" }, 621 1.1 christos { "it_CH.ISO_8859-1", "ISO-8859-1" }, 622 1.1 christos { "it_IT.DIS_8859-15", "ISO-8859-15" }, 623 1.1 christos { "it_IT.ISO_8859-1", "ISO-8859-1" }, 624 1.1 christos { "ja_JP.EUC", "EUC-JP" }, 625 1.1 christos { "ja_JP.SJIS", "SHIFT_JIS" }, 626 1.1 christos { "ja_JP.Shift_JIS", "SHIFT_JIS" }, 627 1.1 christos { "ko_KR.EUC", "EUC-KR" }, 628 1.1 christos { "la_LN.ASCII", "ASCII" }, 629 1.1 christos { "la_LN.DIS_8859-15", "ISO-8859-15" }, 630 1.1 christos { "la_LN.ISO_8859-1", "ISO-8859-1" }, 631 1.1 christos { "la_LN.ISO_8859-2", "ISO-8859-2" }, 632 1.1 christos { "la_LN.ISO_8859-4", "ISO-8859-4" }, 633 1.1 christos { "lt_LN.ASCII", "ASCII" }, 634 1.1 christos { "lt_LN.DIS_8859-15", "ISO-8859-15" }, 635 1.1 christos { "lt_LN.ISO_8859-1", "ISO-8859-1" }, 636 1.1 christos { "lt_LN.ISO_8859-2", "ISO-8859-2" }, 637 1.1 christos { "lt_LT.ISO_8859-4", "ISO-8859-4" }, 638 1.1 christos { "nl_BE.DIS_8859-15", "ISO-8859-15" }, 639 1.1 christos { "nl_BE.ISO_8859-1", "ISO-8859-1" }, 640 1.1 christos { "nl_NL.DIS_8859-15", "ISO-8859-15" }, 641 1.1 christos { "nl_NL.ISO_8859-1", "ISO-8859-1" }, 642 1.1 christos { "no_NO.DIS_8859-15", "ISO-8859-15" }, 643 1.1 christos { "no_NO.ISO_8859-1", "ISO-8859-1" }, 644 1.1 christos { "pl_PL.ISO_8859-2", "ISO-8859-2" }, 645 1.1 christos { "pt_PT.DIS_8859-15", "ISO-8859-15" }, 646 1.1 christos { "pt_PT.ISO_8859-1", "ISO-8859-1" }, 647 1.1 christos { "ru_RU.CP866", "CP866" }, 648 1.1 christos { "ru_RU.ISO_8859-5", "ISO-8859-5" }, 649 1.1 christos { "ru_RU.KOI8-R", "KOI8-R" }, 650 1.1 christos { "ru_SU.CP866", "CP866" }, 651 1.1 christos { "ru_SU.ISO_8859-5", "ISO-8859-5" }, 652 1.1 christos { "ru_SU.KOI8-R", "KOI8-R" }, 653 1.1 christos { "sl_SI.ISO_8859-2", "ISO-8859-2" }, 654 1.1 christos { "sv_SE.DIS_8859-15", "ISO-8859-15" }, 655 1.1 christos { "sv_SE.ISO_8859-1", "ISO-8859-1" }, 656 1.1 christos { "uk_UA.KOI8-U", "KOI8-U" }, 657 1.1 christos { "zh_CN.EUC", "GB2312" }, 658 1.1 christos { "zh_TW.BIG5", "BIG5" }, 659 1.1 christos { "zh_TW.Big5", "BIG5" } 660 1.1 christos # define locale_table_defined 661 1.1 christos # endif 662 1.1 christos # if defined __DJGPP__ /* DOS / DJGPP 2.03 */ 663 1.1 christos /* The encodings given here may not all be correct. 664 1.1 christos If you find that the encoding given for your language and 665 1.1 christos country is not the one your DOS machine actually uses, just 666 1.1 christos correct it in this file, and send a mail to 667 1.1 christos Juan Manuel Guerrero <juan.guerrero (at) gmx.de> 668 1.1 christos and <bug-gnulib (at) gnu.org>. */ 669 1.1 christos { "C", "ASCII" }, 670 1.1 christos { "ar", "CP864" }, 671 1.1 christos { "ar_AE", "CP864" }, 672 1.1 christos { "ar_DZ", "CP864" }, 673 1.1 christos { "ar_EG", "CP864" }, 674 1.1 christos { "ar_IQ", "CP864" }, 675 1.1 christos { "ar_IR", "CP864" }, 676 1.1 christos { "ar_JO", "CP864" }, 677 1.1 christos { "ar_KW", "CP864" }, 678 1.1 christos { "ar_MA", "CP864" }, 679 1.1 christos { "ar_OM", "CP864" }, 680 1.1 christos { "ar_QA", "CP864" }, 681 1.1 christos { "ar_SA", "CP864" }, 682 1.1 christos { "ar_SY", "CP864" }, 683 1.1 christos { "be", "CP866" }, 684 1.1 christos { "be_BE", "CP866" }, 685 1.1 christos { "bg", "CP866" }, /* not CP855 ?? */ 686 1.1 christos { "bg_BG", "CP866" }, /* not CP855 ?? */ 687 1.1 christos { "ca", "CP850" }, 688 1.1 christos { "ca_ES", "CP850" }, 689 1.1 christos { "cs", "CP852" }, 690 1.1 christos { "cs_CZ", "CP852" }, 691 1.1 christos { "da", "CP865" }, /* not CP850 ?? */ 692 1.1 christos { "da_DK", "CP865" }, /* not CP850 ?? */ 693 1.1 christos { "de", "CP850" }, 694 1.1 christos { "de_AT", "CP850" }, 695 1.1 christos { "de_CH", "CP850" }, 696 1.1 christos { "de_DE", "CP850" }, 697 1.1 christos { "el", "CP869" }, 698 1.1 christos { "el_GR", "CP869" }, 699 1.1 christos { "en", "CP850" }, 700 1.1 christos { "en_AU", "CP850" }, /* not CP437 ?? */ 701 1.1 christos { "en_CA", "CP850" }, 702 1.1 christos { "en_GB", "CP850" }, 703 1.1 christos { "en_NZ", "CP437" }, 704 1.1 christos { "en_US", "CP437" }, 705 1.1 christos { "en_ZA", "CP850" }, /* not CP437 ?? */ 706 1.1 christos { "eo", "CP850" }, 707 1.1 christos { "eo_EO", "CP850" }, 708 1.1 christos { "es", "CP850" }, 709 1.1 christos { "es_AR", "CP850" }, 710 1.1 christos { "es_BO", "CP850" }, 711 1.1 christos { "es_CL", "CP850" }, 712 1.1 christos { "es_CO", "CP850" }, 713 1.1 christos { "es_CR", "CP850" }, 714 1.1 christos { "es_CU", "CP850" }, 715 1.1 christos { "es_DO", "CP850" }, 716 1.1 christos { "es_EC", "CP850" }, 717 1.1 christos { "es_ES", "CP850" }, 718 1.1 christos { "es_GT", "CP850" }, 719 1.1 christos { "es_HN", "CP850" }, 720 1.1 christos { "es_MX", "CP850" }, 721 1.1 christos { "es_NI", "CP850" }, 722 1.1 christos { "es_PA", "CP850" }, 723 1.1 christos { "es_PE", "CP850" }, 724 1.1 christos { "es_PY", "CP850" }, 725 1.1 christos { "es_SV", "CP850" }, 726 1.1 christos { "es_UY", "CP850" }, 727 1.1 christos { "es_VE", "CP850" }, 728 1.1 christos { "et", "CP850" }, 729 1.1 christos { "et_EE", "CP850" }, 730 1.1 christos { "eu", "CP850" }, 731 1.1 christos { "eu_ES", "CP850" }, 732 1.1 christos { "fi", "CP850" }, 733 1.1 christos { "fi_FI", "CP850" }, 734 1.1 christos { "fr", "CP850" }, 735 1.1 christos { "fr_BE", "CP850" }, 736 1.1 christos { "fr_CA", "CP850" }, 737 1.1 christos { "fr_CH", "CP850" }, 738 1.1 christos { "fr_FR", "CP850" }, 739 1.1 christos { "ga", "CP850" }, 740 1.1 christos { "ga_IE", "CP850" }, 741 1.1 christos { "gd", "CP850" }, 742 1.1 christos { "gd_GB", "CP850" }, 743 1.1 christos { "gl", "CP850" }, 744 1.1 christos { "gl_ES", "CP850" }, 745 1.1 christos { "he", "CP862" }, 746 1.1 christos { "he_IL", "CP862" }, 747 1.1 christos { "hr", "CP852" }, 748 1.1 christos { "hr_HR", "CP852" }, 749 1.1 christos { "hu", "CP852" }, 750 1.1 christos { "hu_HU", "CP852" }, 751 1.1 christos { "id", "CP850" }, /* not CP437 ?? */ 752 1.1 christos { "id_ID", "CP850" }, /* not CP437 ?? */ 753 1.1 christos { "is", "CP861" }, /* not CP850 ?? */ 754 1.1 christos { "is_IS", "CP861" }, /* not CP850 ?? */ 755 1.1 christos { "it", "CP850" }, 756 1.1 christos { "it_CH", "CP850" }, 757 1.1 christos { "it_IT", "CP850" }, 758 1.1 christos { "ja", "CP932" }, 759 1.1 christos { "ja_JP", "CP932" }, 760 1.1 christos { "kr", "CP949" }, /* not CP934 ?? */ 761 1.1 christos { "kr_KR", "CP949" }, /* not CP934 ?? */ 762 1.1 christos { "lt", "CP775" }, 763 1.1 christos { "lt_LT", "CP775" }, 764 1.1 christos { "lv", "CP775" }, 765 1.1 christos { "lv_LV", "CP775" }, 766 1.1 christos { "mk", "CP866" }, /* not CP855 ?? */ 767 1.1 christos { "mk_MK", "CP866" }, /* not CP855 ?? */ 768 1.1 christos { "mt", "CP850" }, 769 1.1 christos { "mt_MT", "CP850" }, 770 1.1 christos { "nb", "CP865" }, /* not CP850 ?? */ 771 1.1 christos { "nb_NO", "CP865" }, /* not CP850 ?? */ 772 1.1 christos { "nl", "CP850" }, 773 1.1 christos { "nl_BE", "CP850" }, 774 1.1 christos { "nl_NL", "CP850" }, 775 1.1 christos { "nn", "CP865" }, /* not CP850 ?? */ 776 1.1 christos { "nn_NO", "CP865" }, /* not CP850 ?? */ 777 1.1 christos { "no", "CP865" }, /* not CP850 ?? */ 778 1.1 christos { "no_NO", "CP865" }, /* not CP850 ?? */ 779 1.1 christos { "pl", "CP852" }, 780 1.1 christos { "pl_PL", "CP852" }, 781 1.1 christos { "pt", "CP850" }, 782 1.1 christos { "pt_BR", "CP850" }, 783 1.1 christos { "pt_PT", "CP850" }, 784 1.1 christos { "ro", "CP852" }, 785 1.1 christos { "ro_RO", "CP852" }, 786 1.1 christos { "ru", "CP866" }, 787 1.1 christos { "ru_RU", "CP866" }, 788 1.1 christos { "sk", "CP852" }, 789 1.1 christos { "sk_SK", "CP852" }, 790 1.1 christos { "sl", "CP852" }, 791 1.1 christos { "sl_SI", "CP852" }, 792 1.1 christos { "sq", "CP852" }, 793 1.1 christos { "sq_AL", "CP852" }, 794 1.1 christos { "sr", "CP852" }, /* CP852 or CP866 or CP855 ?? */ 795 1.1 christos { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */ 796 1.1 christos { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */ 797 1.1 christos { "sv", "CP850" }, 798 1.1 christos { "sv_SE", "CP850" }, 799 1.1 christos { "th", "CP874" }, 800 1.1 christos { "th_TH", "CP874" }, 801 1.1 christos { "tr", "CP857" }, 802 1.1 christos { "tr_TR", "CP857" }, 803 1.1 christos { "uk", "CP1125" }, 804 1.1 christos { "uk_UA", "CP1125" }, 805 1.1 christos { "zh_CN", "GBK" }, 806 1.1 christos { "zh_TW", "CP950" } /* not CP938 ?? */ 807 1.1 christos # define locale_table_defined 808 1.1 christos # endif 809 1.1 christos # ifndef locale_table_defined 810 1.1 christos /* Just a dummy entry, to avoid a C syntax error. */ 811 1.1 christos { "", "" } 812 1.1 christos # endif 813 1.1 christos }; 814 1.1 christos 815 1.1 christos #endif 816 1.1 christos 817 1.1 christos 818 1.1 christos /* Determine the current locale's character encoding, and canonicalize it 819 1.1 christos into one of the canonical names listed below. 820 1.1 christos The result must not be freed; it is statically allocated. The result 821 1.1 christos becomes invalid when setlocale() is used to change the global locale, or 822 1.1 christos when the value of one of the environment variables LC_ALL, LC_CTYPE, LANG 823 1.1 christos is changed; threads in multithreaded programs should not do this. 824 1.1 christos If the canonical name cannot be determined, the result is a non-canonical 825 1.1 christos name. */ 826 1.1 christos 827 1.1 christos #ifdef STATIC 828 1.1 christos STATIC 829 1.1 christos #endif 830 1.1 christos const char * 831 1.1 christos locale_charset (void) 832 1.1 christos { 833 1.1 christos const char *codeset; 834 1.1 christos 835 1.1 christos /* This function must be multithread-safe. To achieve this without using 836 1.1 christos thread-local storage, we use a simple strcpy or memcpy to fill this static 837 1.1 christos buffer. Filling it through, for example, strcpy + strcat would not be 838 1.1 christos guaranteed to leave the buffer's contents intact if another thread is 839 1.1 christos currently accessing it. If necessary, the contents is first assembled in 840 1.1 christos a stack-allocated buffer. */ 841 1.1 christos 842 1.1 christos #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2 843 1.1 christos 844 1.1 christos # if HAVE_LANGINFO_CODESET 845 1.1 christos 846 1.1 christos /* Most systems support nl_langinfo (CODESET) nowadays. */ 847 1.1 christos codeset = nl_langinfo (CODESET); 848 1.1 christos 849 1.1 christos # ifdef __CYGWIN__ 850 1.1 christos /* Cygwin < 1.7 does not have locales. nl_langinfo (CODESET) always 851 1.1 christos returns "US-ASCII". Return the suffix of the locale name from the 852 1.1 christos environment variables (if present) or the codepage as a number. */ 853 1.1 christos if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0) 854 1.1 christos { 855 1.1 christos const char *locale; 856 1.1 christos static char resultbuf[2 + 10 + 1]; 857 1.1 christos 858 1.1 christos locale = getenv ("LC_ALL"); 859 1.1 christos if (locale == NULL || locale[0] == '\0') 860 1.1 christos { 861 1.1 christos locale = getenv ("LC_CTYPE"); 862 1.1 christos if (locale == NULL || locale[0] == '\0') 863 1.1 christos locale = getenv ("LANG"); 864 1.1 christos } 865 1.1 christos if (locale != NULL && locale[0] != '\0') 866 1.1 christos { 867 1.1 christos /* If the locale name contains an encoding after the dot, return 868 1.1 christos it. */ 869 1.1 christos const char *dot = strchr (locale, '.'); 870 1.1 christos 871 1.1 christos if (dot != NULL) 872 1.1 christos { 873 1.1 christos const char *modifier; 874 1.1 christos 875 1.1 christos dot++; 876 1.1 christos /* Look for the possible @... trailer and remove it, if any. */ 877 1.1 christos modifier = strchr (dot, '@'); 878 1.1 christos if (modifier == NULL) 879 1.1 christos return dot; 880 1.1 christos if (modifier - dot < sizeof (resultbuf)) 881 1.1 christos { 882 1.1 christos /* This way of filling resultbuf is multithread-safe. */ 883 1.1 christos memcpy (resultbuf, dot, modifier - dot); 884 1.1 christos resultbuf [modifier - dot] = '\0'; 885 1.1 christos return resultbuf; 886 1.1 christos } 887 1.1 christos } 888 1.1 christos } 889 1.1 christos 890 1.1 christos /* The Windows API has a function returning the locale's codepage as a 891 1.1 christos number: GetACP(). This encoding is used by Cygwin, unless the user 892 1.1 christos has set the environment variable CYGWIN=codepage:oem (which very few 893 1.1 christos people do). 894 1.1 christos Output directed to console windows needs to be converted (to 895 1.1 christos GetOEMCP() if the console is using a raster font, or to 896 1.1 christos GetConsoleOutputCP() if it is using a TrueType font). Cygwin does 897 1.1 christos this conversion transparently (see winsup/cygwin/fhandler_console.cc), 898 1.1 christos converting to GetConsoleOutputCP(). This leads to correct results, 899 1.1 christos except when SetConsoleOutputCP has been called and a raster font is 900 1.1 christos in use. */ 901 1.1 christos { 902 1.1 christos char buf[2 + 10 + 1]; 903 1.1 christos 904 1.1 christos sprintf (buf, "CP%u", GetACP ()); 905 1.1 christos strcpy (resultbuf, buf); 906 1.1 christos codeset = resultbuf; 907 1.1 christos } 908 1.1 christos } 909 1.1 christos # endif 910 1.1 christos 911 1.1 christos if (codeset == NULL) 912 1.1 christos /* The canonical name cannot be determined. */ 913 1.1 christos codeset = ""; 914 1.1 christos 915 1.1 christos # elif defined WINDOWS_NATIVE 916 1.1 christos 917 1.1 christos char buf[2 + 10 + 1]; 918 1.1 christos static char resultbuf[2 + 10 + 1]; 919 1.1 christos 920 1.1 christos /* The Windows API has a function returning the locale's codepage as 921 1.1 christos a number, but the value doesn't change according to what the 922 1.1 christos 'setlocale' call specified. So we use it as a last resort, in 923 1.1 christos case the string returned by 'setlocale' doesn't specify the 924 1.1 christos codepage. */ 925 1.1 christos char *current_locale = setlocale (LC_CTYPE, NULL); 926 1.1 christos char *pdot = strrchr (current_locale, '.'); 927 1.1 christos 928 1.1 christos if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf)) 929 1.1 christos sprintf (buf, "CP%s", pdot + 1); 930 1.1 christos else 931 1.1 christos { 932 1.1 christos /* The Windows API has a function returning the locale's codepage as a 933 1.1 christos number: GetACP(). 934 1.1 christos When the output goes to a console window, it needs to be provided in 935 1.1 christos GetOEMCP() encoding if the console is using a raster font, or in 936 1.1 christos GetConsoleOutputCP() encoding if it is using a TrueType font. 937 1.1 christos But in GUI programs and for output sent to files and pipes, GetACP() 938 1.1 christos encoding is the best bet. */ 939 1.1 christos sprintf (buf, "CP%u", GetACP ()); 940 1.1 christos } 941 1.1 christos /* For a locale name such as "French_France.65001", in Windows 10, 942 1.1 christos setlocale now returns "French_France.utf8" instead. */ 943 1.1 christos if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0) 944 1.1 christos codeset = "UTF-8"; 945 1.1 christos else 946 1.1 christos { 947 1.1 christos strcpy (resultbuf, buf); 948 1.1 christos codeset = resultbuf; 949 1.1 christos } 950 1.1 christos 951 1.1 christos # elif defined OS2 952 1.1 christos 953 1.1 christos const char *locale; 954 1.1 christos static char resultbuf[2 + 10 + 1]; 955 1.1 christos ULONG cp[3]; 956 1.1 christos ULONG cplen; 957 1.1 christos 958 1.1 christos codeset = NULL; 959 1.1 christos 960 1.1 christos /* Allow user to override the codeset, as set in the operating system, 961 1.1 christos with standard language environment variables. */ 962 1.1 christos locale = getenv ("LC_ALL"); 963 1.1 christos if (locale == NULL || locale[0] == '\0') 964 1.1 christos { 965 1.1 christos locale = getenv ("LC_CTYPE"); 966 1.1 christos if (locale == NULL || locale[0] == '\0') 967 1.1 christos locale = getenv ("LANG"); 968 1.1 christos } 969 1.1 christos if (locale != NULL && locale[0] != '\0') 970 1.1 christos { 971 1.1 christos /* If the locale name contains an encoding after the dot, return it. */ 972 1.1 christos const char *dot = strchr (locale, '.'); 973 1.1 christos 974 1.1 christos if (dot != NULL) 975 1.1 christos { 976 1.1 christos const char *modifier; 977 1.1 christos 978 1.1 christos dot++; 979 1.1 christos /* Look for the possible @... trailer and remove it, if any. */ 980 1.1 christos modifier = strchr (dot, '@'); 981 1.1 christos if (modifier == NULL) 982 1.1 christos return dot; 983 1.1 christos if (modifier - dot < sizeof (resultbuf)) 984 1.1 christos { 985 1.1 christos /* This way of filling resultbuf is multithread-safe. */ 986 1.1 christos memcpy (resultbuf, dot, modifier - dot); 987 1.1 christos resultbuf [modifier - dot] = '\0'; 988 1.1 christos return resultbuf; 989 1.1 christos } 990 1.1 christos } 991 1.1 christos 992 1.1 christos /* For the POSIX locale, don't use the system's codepage. */ 993 1.1 christos if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0) 994 1.1 christos codeset = ""; 995 1.1 christos } 996 1.1 christos 997 1.1 christos if (codeset == NULL) 998 1.1 christos { 999 1.1 christos /* OS/2 has a function returning the locale's codepage as a number. */ 1000 1.1 christos if (DosQueryCp (sizeof (cp), cp, &cplen)) 1001 1.1 christos codeset = ""; 1002 1.1 christos else 1003 1.1 christos { 1004 1.1 christos char buf[2 + 10 + 1]; 1005 1.1 christos 1006 1.1 christos sprintf (buf, "CP%u", cp[0]); 1007 1.1 christos strcpy (resultbuf, buf); 1008 1.1 christos codeset = resultbuf; 1009 1.1 christos } 1010 1.1 christos } 1011 1.1 christos 1012 1.1 christos # else 1013 1.1 christos 1014 1.1 christos # error "Add code for other platforms here." 1015 1.1 christos 1016 1.1 christos # endif 1017 1.1 christos 1018 1.1 christos /* Resolve alias. */ 1019 1.1 christos { 1020 1.1 christos # ifdef alias_table_defined 1021 1.1 christos /* On some platforms, UTF-8 locales are the most frequently used ones. 1022 1.1 christos Speed up the common case and slow down the less common cases by 1023 1.1 christos testing for this case first. */ 1024 1.1 christos # if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__ 1025 1.1 christos if (strcmp (codeset, "UTF-8") == 0) 1026 1.1 christos goto done_table_lookup; 1027 1.1 christos else 1028 1.1 christos # endif 1029 1.1 christos { 1030 1.1 christos const struct table_entry * const table = alias_table; 1031 1.1 christos size_t const table_size = 1032 1.1 christos sizeof (alias_table) / sizeof (struct table_entry); 1033 1.1 christos /* The table is sorted. Perform a binary search. */ 1034 1.1 christos size_t hi = table_size; 1035 1.1 christos size_t lo = 0; 1036 1.1 christos while (lo < hi) 1037 1.1 christos { 1038 1.1 christos /* Invariant: 1039 1.1 christos for i < lo, strcmp (table[i].alias, codeset) < 0, 1040 1.1 christos for i >= hi, strcmp (table[i].alias, codeset) > 0. */ 1041 1.1 christos size_t mid = (hi + lo) >> 1; /* >= lo, < hi */ 1042 1.1 christos int cmp = strcmp (table[mid].alias, codeset); 1043 1.1 christos if (cmp < 0) 1044 1.1 christos lo = mid + 1; 1045 1.1 christos else if (cmp > 0) 1046 1.1 christos hi = mid; 1047 1.1 christos else 1048 1.1 christos { 1049 1.1 christos /* Found an i with 1050 1.1 christos strcmp (table[i].alias, codeset) == 0. */ 1051 1.1 christos codeset = table[mid].canonical; 1052 1.1 christos goto done_table_lookup; 1053 1.1 christos } 1054 1.1 christos } 1055 1.1 christos } 1056 1.1 christos if (0) 1057 1.1 christos done_table_lookup: ; 1058 1.1 christos else 1059 1.1 christos # endif 1060 1.1 christos { 1061 1.1 christos /* Did not find it in the table. */ 1062 1.1 christos /* On Mac OS X, all modern locales use the UTF-8 encoding. 1063 1.1 christos BeOS and Haiku have a single locale, and it has UTF-8 encoding. */ 1064 1.1 christos # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__ 1065 1.1 christos codeset = "UTF-8"; 1066 1.1 christos # else 1067 1.1 christos /* Don't return an empty string. GNU libc and GNU libiconv interpret 1068 1.1 christos the empty string as denoting "the locale's character encoding", 1069 1.1 christos thus GNU libiconv would call this function a second time. */ 1070 1.1 christos if (codeset[0] == '\0') 1071 1.1 christos codeset = "ASCII"; 1072 1.1 christos # endif 1073 1.1 christos } 1074 1.1 christos } 1075 1.1 christos 1076 1.1 christos #else 1077 1.1 christos 1078 1.1 christos /* On old systems which lack it, use setlocale or getenv. */ 1079 1.1 christos const char *locale = NULL; 1080 1.1 christos 1081 1.1 christos /* But most old systems don't have a complete set of locales. Some 1082 1.1 christos (like DJGPP) have only the C locale. Therefore we don't use setlocale 1083 1.1 christos here; it would return "C" when it doesn't support the locale name the 1084 1.1 christos user has set. */ 1085 1.1 christos # if 0 1086 1.1 christos locale = setlocale (LC_CTYPE, NULL); 1087 1.1 christos # endif 1088 1.1 christos if (locale == NULL || locale[0] == '\0') 1089 1.1 christos { 1090 1.1 christos locale = getenv ("LC_ALL"); 1091 1.1 christos if (locale == NULL || locale[0] == '\0') 1092 1.1 christos { 1093 1.1 christos locale = getenv ("LC_CTYPE"); 1094 1.1 christos if (locale == NULL || locale[0] == '\0') 1095 1.1 christos locale = getenv ("LANG"); 1096 1.1 christos if (locale == NULL) 1097 1.1 christos locale = ""; 1098 1.1 christos } 1099 1.1 christos } 1100 1.1 christos 1101 1.1 christos /* Map locale name to canonical encoding name. */ 1102 1.1 christos { 1103 1.1 christos # ifdef locale_table_defined 1104 1.1 christos const struct table_entry * const table = locale_table; 1105 1.1 christos size_t const table_size = 1106 1.1 christos sizeof (locale_table) / sizeof (struct table_entry); 1107 1.1 christos /* The table is sorted. Perform a binary search. */ 1108 1.1 christos size_t hi = table_size; 1109 1.1 christos size_t lo = 0; 1110 1.1 christos while (lo < hi) 1111 1.1 christos { 1112 1.1 christos /* Invariant: 1113 1.1 christos for i < lo, strcmp (table[i].locale, locale) < 0, 1114 1.1 christos for i >= hi, strcmp (table[i].locale, locale) > 0. */ 1115 1.1 christos size_t mid = (hi + lo) >> 1; /* >= lo, < hi */ 1116 1.1 christos int cmp = strcmp (table[mid].locale, locale); 1117 1.1 christos if (cmp < 0) 1118 1.1 christos lo = mid + 1; 1119 1.1 christos else if (cmp > 0) 1120 1.1 christos hi = mid; 1121 1.1 christos else 1122 1.1 christos { 1123 1.1 christos /* Found an i with 1124 1.1 christos strcmp (table[i].locale, locale) == 0. */ 1125 1.1 christos codeset = table[mid].canonical; 1126 1.1 christos goto done_table_lookup; 1127 1.1 christos } 1128 1.1 christos } 1129 1.1 christos if (0) 1130 1.1 christos done_table_lookup: ; 1131 1.1 christos else 1132 1.1 christos # endif 1133 1.1 christos { 1134 1.1 christos /* Did not find it in the table. */ 1135 1.1 christos /* On Mac OS X, all modern locales use the UTF-8 encoding. 1136 1.1 christos BeOS and Haiku have a single locale, and it has UTF-8 encoding. */ 1137 1.1 christos # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__ 1138 1.1 christos codeset = "UTF-8"; 1139 1.1 christos # else 1140 1.1 christos /* The canonical name cannot be determined. */ 1141 1.1 christos /* Don't return an empty string. GNU libc and GNU libiconv interpret 1142 1.1 christos the empty string as denoting "the locale's character encoding", 1143 1.1 christos thus GNU libiconv would call this function a second time. */ 1144 1.1 christos codeset = "ASCII"; 1145 1.1 christos # endif 1146 1.1 christos } 1147 1.1 christos } 1148 1.1 christos 1149 1.1 christos #endif 1150 1.1 christos 1151 1.1 christos #ifdef DARWIN7 1152 1.1 christos /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8" 1153 1.1 christos (the default codeset) does not work when MB_CUR_MAX is 1. */ 1154 1.1 christos if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1) 1155 1.1 christos codeset = "ASCII"; 1156 1.1 christos #endif 1157 1.1 christos 1158 1.1 christos return codeset; 1159 1.1 christos } 1160