striconv.c revision 1.1.1.1 1 1.1 christos /* Charset conversion.
2 1.1 christos Copyright (C) 2001-2006 Free Software Foundation, Inc.
3 1.1 christos Written by Bruno Haible and Simon Josefsson.
4 1.1 christos
5 1.1 christos This program is free software; you can redistribute it and/or modify
6 1.1 christos it under the terms of the GNU General Public License as published by
7 1.1 christos the Free Software Foundation; either version 2, or (at your option)
8 1.1 christos any later version.
9 1.1 christos
10 1.1 christos This program is distributed in the hope that it will be useful,
11 1.1 christos but WITHOUT ANY WARRANTY; without even the implied warranty of
12 1.1 christos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 1.1 christos GNU General Public License for more details.
14 1.1 christos
15 1.1 christos You should have received a copy of the GNU General Public License
16 1.1 christos along with this program; if not, write to the Free Software Foundation,
17 1.1 christos Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18 1.1 christos
19 1.1 christos #include <config.h>
20 1.1 christos
21 1.1 christos /* Specification. */
22 1.1 christos #include "striconv.h"
23 1.1 christos
24 1.1 christos #include <errno.h>
25 1.1 christos #include <stdlib.h>
26 1.1 christos #include <string.h>
27 1.1 christos
28 1.1 christos #if HAVE_ICONV
29 1.1 christos # include <iconv.h>
30 1.1 christos /* Get MB_LEN_MAX, CHAR_BIT. */
31 1.1 christos # include <limits.h>
32 1.1 christos #endif
33 1.1 christos
34 1.1 christos #include "strdup.h"
35 1.1 christos #include "c-strcase.h"
36 1.1 christos
37 1.1 christos #ifndef SIZE_MAX
38 1.1 christos # define SIZE_MAX ((size_t) -1)
39 1.1 christos #endif
40 1.1 christos
41 1.1 christos
42 1.1 christos #if HAVE_ICONV
43 1.1 christos
44 1.1 christos int
45 1.1 christos mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
46 1.1 christos char **resultp, size_t *lengthp)
47 1.1 christos {
48 1.1 christos # define tmpbufsize 4096
49 1.1 christos size_t length;
50 1.1 christos char *result;
51 1.1 christos
52 1.1 christos /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
53 1.1 christos # if defined _LIBICONV_VERSION \
54 1.1 christos || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
55 1.1 christos /* Set to the initial state. */
56 1.1 christos iconv (cd, NULL, NULL, NULL, NULL);
57 1.1 christos # endif
58 1.1 christos
59 1.1 christos /* Determine the length we need. */
60 1.1 christos {
61 1.1 christos size_t count = 0;
62 1.1 christos char tmpbuf[tmpbufsize];
63 1.1 christos const char *inptr = src;
64 1.1 christos size_t insize = srclen;
65 1.1 christos
66 1.1 christos while (insize > 0)
67 1.1 christos {
68 1.1 christos char *outptr = tmpbuf;
69 1.1 christos size_t outsize = tmpbufsize;
70 1.1 christos size_t res = iconv (cd,
71 1.1 christos (ICONV_CONST char **) &inptr, &insize,
72 1.1 christos &outptr, &outsize);
73 1.1 christos
74 1.1 christos if (res == (size_t)(-1))
75 1.1 christos {
76 1.1 christos if (errno == E2BIG)
77 1.1 christos ;
78 1.1 christos else if (errno == EINVAL)
79 1.1 christos break;
80 1.1 christos else
81 1.1 christos return -1;
82 1.1 christos }
83 1.1 christos # if !defined _LIBICONV_VERSION && !defined __GLIBC__
84 1.1 christos /* Irix iconv() inserts a NUL byte if it cannot convert.
85 1.1 christos NetBSD iconv() inserts a question mark if it cannot convert.
86 1.1 christos Only GNU libiconv and GNU libc are known to prefer to fail rather
87 1.1 christos than doing a lossy conversion. */
88 1.1 christos else if (res > 0)
89 1.1 christos {
90 1.1 christos errno = EILSEQ;
91 1.1 christos return -1;
92 1.1 christos }
93 1.1 christos # endif
94 1.1 christos count += outptr - tmpbuf;
95 1.1 christos }
96 1.1 christos /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
97 1.1 christos # if defined _LIBICONV_VERSION \
98 1.1 christos || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
99 1.1 christos {
100 1.1 christos char *outptr = tmpbuf;
101 1.1 christos size_t outsize = tmpbufsize;
102 1.1 christos size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
103 1.1 christos
104 1.1 christos if (res == (size_t)(-1))
105 1.1 christos return -1;
106 1.1 christos count += outptr - tmpbuf;
107 1.1 christos }
108 1.1 christos # endif
109 1.1 christos length = count;
110 1.1 christos }
111 1.1 christos
112 1.1 christos if (length == 0)
113 1.1 christos {
114 1.1 christos *lengthp = 0;
115 1.1 christos return 0;
116 1.1 christos }
117 1.1 christos result = (*resultp != NULL ? realloc (*resultp, length) : malloc (length));
118 1.1 christos if (result == NULL)
119 1.1 christos {
120 1.1 christos errno = ENOMEM;
121 1.1 christos return -1;
122 1.1 christos }
123 1.1 christos *resultp = result;
124 1.1 christos *lengthp = length;
125 1.1 christos
126 1.1 christos /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
127 1.1 christos # if defined _LIBICONV_VERSION \
128 1.1 christos || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
129 1.1 christos /* Return to the initial state. */
130 1.1 christos iconv (cd, NULL, NULL, NULL, NULL);
131 1.1 christos # endif
132 1.1 christos
133 1.1 christos /* Do the conversion for real. */
134 1.1 christos {
135 1.1 christos const char *inptr = src;
136 1.1 christos size_t insize = srclen;
137 1.1 christos char *outptr = result;
138 1.1 christos size_t outsize = length;
139 1.1 christos
140 1.1 christos while (insize > 0)
141 1.1 christos {
142 1.1 christos size_t res = iconv (cd,
143 1.1 christos (ICONV_CONST char **) &inptr, &insize,
144 1.1 christos &outptr, &outsize);
145 1.1 christos
146 1.1 christos if (res == (size_t)(-1))
147 1.1 christos {
148 1.1 christos if (errno == EINVAL)
149 1.1 christos break;
150 1.1 christos else
151 1.1 christos return -1;
152 1.1 christos }
153 1.1 christos # if !defined _LIBICONV_VERSION && !defined __GLIBC__
154 1.1 christos /* Irix iconv() inserts a NUL byte if it cannot convert.
155 1.1 christos NetBSD iconv() inserts a question mark if it cannot convert.
156 1.1 christos Only GNU libiconv and GNU libc are known to prefer to fail rather
157 1.1 christos than doing a lossy conversion. */
158 1.1 christos else if (res > 0)
159 1.1 christos {
160 1.1 christos errno = EILSEQ;
161 1.1 christos return -1;
162 1.1 christos }
163 1.1 christos # endif
164 1.1 christos }
165 1.1 christos /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
166 1.1 christos # if defined _LIBICONV_VERSION \
167 1.1 christos || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
168 1.1 christos {
169 1.1 christos size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
170 1.1 christos
171 1.1 christos if (res == (size_t)(-1))
172 1.1 christos return -1;
173 1.1 christos }
174 1.1 christos # endif
175 1.1 christos if (outsize != 0)
176 1.1 christos abort ();
177 1.1 christos }
178 1.1 christos
179 1.1 christos return 0;
180 1.1 christos # undef tmpbufsize
181 1.1 christos }
182 1.1 christos
183 1.1 christos char *
184 1.1 christos str_cd_iconv (const char *src, iconv_t cd)
185 1.1 christos {
186 1.1 christos /* For most encodings, a trailing NUL byte in the input will be converted
187 1.1 christos to a trailing NUL byte in the output. But not for UTF-7. So that this
188 1.1 christos function is usable for UTF-7, we have to exclude the NUL byte from the
189 1.1 christos conversion and add it by hand afterwards. */
190 1.1 christos # if PROBABLY_SLOWER
191 1.1 christos
192 1.1 christos char *result = NULL;
193 1.1 christos size_t length;
194 1.1 christos int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
195 1.1 christos char *final_result;
196 1.1 christos
197 1.1 christos if (retval < 0)
198 1.1 christos {
199 1.1 christos if (result != NULL)
200 1.1 christos {
201 1.1 christos int saved_errno = errno;
202 1.1 christos free (result);
203 1.1 christos errno = saved_errno;
204 1.1 christos }
205 1.1 christos return NULL;
206 1.1 christos }
207 1.1 christos
208 1.1 christos /* Add the terminating NUL byte. */
209 1.1 christos final_result =
210 1.1 christos (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
211 1.1 christos if (final_result == NULL)
212 1.1 christos {
213 1.1 christos if (result != NULL)
214 1.1 christos free (result);
215 1.1 christos errno = ENOMEM;
216 1.1 christos return NULL;
217 1.1 christos }
218 1.1 christos final_result[length] = '\0';
219 1.1 christos
220 1.1 christos return final_result;
221 1.1 christos
222 1.1 christos # else
223 1.1 christos
224 1.1 christos char *result;
225 1.1 christos size_t result_size;
226 1.1 christos size_t length;
227 1.1 christos const char *inptr = src;
228 1.1 christos size_t inbytes_remaining = strlen (src);
229 1.1 christos
230 1.1 christos /* Make a guess for the worst-case output size, in order to avoid a
231 1.1 christos realloc. It's OK if the guess is wrong as long as it is not zero and
232 1.1 christos doesn't lead to an integer overflow. */
233 1.1 christos result_size = inbytes_remaining;
234 1.1 christos {
235 1.1 christos size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
236 1.1 christos if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
237 1.1 christos result_size *= MB_LEN_MAX;
238 1.1 christos }
239 1.1 christos result_size += 1; /* for the terminating NUL */
240 1.1 christos
241 1.1 christos result = (char *) malloc (result_size);
242 1.1 christos if (result == NULL)
243 1.1 christos {
244 1.1 christos errno = ENOMEM;
245 1.1 christos return NULL;
246 1.1 christos }
247 1.1 christos
248 1.1 christos /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
249 1.1 christos # if defined _LIBICONV_VERSION \
250 1.1 christos || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
251 1.1 christos /* Set to the initial state. */
252 1.1 christos iconv (cd, NULL, NULL, NULL, NULL);
253 1.1 christos # endif
254 1.1 christos
255 1.1 christos /* Do the conversion. */
256 1.1 christos {
257 1.1 christos char *outptr = result;
258 1.1 christos size_t outbytes_remaining = result_size - 1;
259 1.1 christos
260 1.1 christos for (;;)
261 1.1 christos {
262 1.1 christos /* Here inptr + inbytes_remaining = src + strlen (src),
263 1.1 christos outptr + outbytes_remaining = result + result_size - 1. */
264 1.1 christos size_t res = iconv (cd,
265 1.1 christos (ICONV_CONST char **) &inptr, &inbytes_remaining,
266 1.1 christos &outptr, &outbytes_remaining);
267 1.1 christos
268 1.1 christos if (res == (size_t)(-1))
269 1.1 christos {
270 1.1 christos if (errno == EINVAL)
271 1.1 christos break;
272 1.1 christos else if (errno == E2BIG)
273 1.1 christos {
274 1.1 christos size_t used = outptr - result;
275 1.1 christos size_t newsize = result_size * 2;
276 1.1 christos char *newresult;
277 1.1 christos
278 1.1 christos if (!(newsize > result_size))
279 1.1 christos {
280 1.1 christos errno = ENOMEM;
281 1.1 christos goto failed;
282 1.1 christos }
283 1.1 christos newresult = (char *) realloc (result, newsize);
284 1.1 christos if (newresult == NULL)
285 1.1 christos {
286 1.1 christos errno = ENOMEM;
287 1.1 christos goto failed;
288 1.1 christos }
289 1.1 christos result = newresult;
290 1.1 christos result_size = newsize;
291 1.1 christos outptr = result + used;
292 1.1 christos outbytes_remaining = result_size - 1 - used;
293 1.1 christos }
294 1.1 christos else
295 1.1 christos goto failed;
296 1.1 christos }
297 1.1 christos # if !defined _LIBICONV_VERSION && !defined __GLIBC__
298 1.1 christos /* Irix iconv() inserts a NUL byte if it cannot convert.
299 1.1 christos NetBSD iconv() inserts a question mark if it cannot convert.
300 1.1 christos Only GNU libiconv and GNU libc are known to prefer to fail rather
301 1.1 christos than doing a lossy conversion. */
302 1.1 christos else if (res > 0)
303 1.1 christos {
304 1.1 christos errno = EILSEQ;
305 1.1 christos goto failed;
306 1.1 christos }
307 1.1 christos # endif
308 1.1 christos else
309 1.1 christos break;
310 1.1 christos }
311 1.1 christos /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
312 1.1 christos # if defined _LIBICONV_VERSION \
313 1.1 christos || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
314 1.1 christos for (;;)
315 1.1 christos {
316 1.1 christos /* Here outptr + outbytes_remaining = result + result_size - 1. */
317 1.1 christos size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
318 1.1 christos
319 1.1 christos if (res == (size_t)(-1))
320 1.1 christos {
321 1.1 christos if (errno == E2BIG)
322 1.1 christos {
323 1.1 christos size_t used = outptr - result;
324 1.1 christos size_t newsize = result_size * 2;
325 1.1 christos char *newresult;
326 1.1 christos
327 1.1 christos if (!(newsize > result_size))
328 1.1 christos {
329 1.1 christos errno = ENOMEM;
330 1.1 christos goto failed;
331 1.1 christos }
332 1.1 christos newresult = (char *) realloc (result, newsize);
333 1.1 christos if (newresult == NULL)
334 1.1 christos {
335 1.1 christos errno = ENOMEM;
336 1.1 christos goto failed;
337 1.1 christos }
338 1.1 christos result = newresult;
339 1.1 christos result_size = newsize;
340 1.1 christos outptr = result + used;
341 1.1 christos outbytes_remaining = result_size - 1 - used;
342 1.1 christos }
343 1.1 christos else
344 1.1 christos goto failed;
345 1.1 christos }
346 1.1 christos else
347 1.1 christos break;
348 1.1 christos }
349 1.1 christos # endif
350 1.1 christos
351 1.1 christos /* Add the terminating NUL byte. */
352 1.1 christos *outptr++ = '\0';
353 1.1 christos
354 1.1 christos length = outptr - result;
355 1.1 christos }
356 1.1 christos
357 1.1 christos /* Give away unused memory. */
358 1.1 christos if (length < result_size)
359 1.1 christos {
360 1.1 christos char *smaller_result = (char *) realloc (result, length);
361 1.1 christos
362 1.1 christos if (smaller_result != NULL)
363 1.1 christos result = smaller_result;
364 1.1 christos }
365 1.1 christos
366 1.1 christos return result;
367 1.1 christos
368 1.1 christos failed:
369 1.1 christos {
370 1.1 christos int saved_errno = errno;
371 1.1 christos free (result);
372 1.1 christos errno = saved_errno;
373 1.1 christos return NULL;
374 1.1 christos }
375 1.1 christos
376 1.1 christos # endif
377 1.1 christos }
378 1.1 christos
379 1.1 christos #endif
380 1.1 christos
381 1.1 christos char *
382 1.1 christos str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
383 1.1 christos {
384 1.1 christos if (c_strcasecmp (from_codeset, to_codeset) == 0)
385 1.1 christos return strdup (src);
386 1.1 christos else
387 1.1 christos {
388 1.1 christos #if HAVE_ICONV
389 1.1 christos iconv_t cd;
390 1.1 christos char *result;
391 1.1 christos
392 1.1 christos /* Avoid glibc-2.1 bug with EUC-KR. */
393 1.1 christos # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
394 1.1 christos if (c_strcasecmp (from_codeset, "EUC-KR") == 0
395 1.1 christos || c_strcasecmp (to_codeset, "EUC-KR") == 0)
396 1.1 christos {
397 1.1 christos errno = EINVAL;
398 1.1 christos return NULL;
399 1.1 christos }
400 1.1 christos # endif
401 1.1 christos cd = iconv_open (to_codeset, from_codeset);
402 1.1 christos if (cd == (iconv_t) -1)
403 1.1 christos return NULL;
404 1.1 christos
405 1.1 christos result = str_cd_iconv (src, cd);
406 1.1 christos
407 1.1 christos if (result == NULL)
408 1.1 christos {
409 1.1 christos /* Close cd, but preserve the errno from str_cd_iconv. */
410 1.1 christos int saved_errno = errno;
411 1.1 christos iconv_close (cd);
412 1.1 christos errno = saved_errno;
413 1.1 christos }
414 1.1 christos else
415 1.1 christos {
416 1.1 christos if (iconv_close (cd) < 0)
417 1.1 christos {
418 1.1 christos /* Return NULL, but free the allocated memory, and while doing
419 1.1 christos that, preserve the errno from iconv_close. */
420 1.1 christos int saved_errno = errno;
421 1.1 christos free (result);
422 1.1 christos errno = saved_errno;
423 1.1 christos return NULL;
424 1.1 christos }
425 1.1 christos }
426 1.1 christos return result;
427 1.1 christos #else
428 1.1 christos /* This is a different error code than if iconv_open existed but didn't
429 1.1 christos support from_codeset and to_codeset, so that the caller can emit
430 1.1 christos an error message such as
431 1.1 christos "iconv() is not supported. Installing GNU libiconv and
432 1.1 christos then reinstalling this package would fix this." */
433 1.1 christos errno = ENOSYS;
434 1.1 christos return NULL;
435 1.1 christos #endif
436 1.1 christos }
437 1.1 christos }
438