Home | History | Annotate | Line # | Download | only in gnulib-lib
striconv.c revision 1.1.1.1
      1 /* Charset conversion.
      2    Copyright (C) 2001-2006 Free Software Foundation, Inc.
      3    Written by Bruno Haible and Simon Josefsson.
      4 
      5    This program is free software; you can redistribute it and/or modify
      6    it under the terms of the GNU General Public License as published by
      7    the Free Software Foundation; either version 2, or (at your option)
      8    any later version.
      9 
     10    This program is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU General Public License for more details.
     14 
     15    You should have received a copy of the GNU General Public License
     16    along with this program; if not, write to the Free Software Foundation,
     17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
     18 
     19 #include <config.h>
     20 
     21 /* Specification.  */
     22 #include "striconv.h"
     23 
     24 #include <errno.h>
     25 #include <stdlib.h>
     26 #include <string.h>
     27 
     28 #if HAVE_ICONV
     29 # include <iconv.h>
     30 /* Get MB_LEN_MAX, CHAR_BIT.  */
     31 # include <limits.h>
     32 #endif
     33 
     34 #include "strdup.h"
     35 #include "c-strcase.h"
     36 
     37 #ifndef SIZE_MAX
     38 # define SIZE_MAX ((size_t) -1)
     39 #endif
     40 
     41 
     42 #if HAVE_ICONV
     43 
     44 int
     45 mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
     46 	      char **resultp, size_t *lengthp)
     47 {
     48 # define tmpbufsize 4096
     49   size_t length;
     50   char *result;
     51 
     52   /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
     53 # if defined _LIBICONV_VERSION \
     54     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
     55   /* Set to the initial state.  */
     56   iconv (cd, NULL, NULL, NULL, NULL);
     57 # endif
     58 
     59   /* Determine the length we need.  */
     60   {
     61     size_t count = 0;
     62     char tmpbuf[tmpbufsize];
     63     const char *inptr = src;
     64     size_t insize = srclen;
     65 
     66     while (insize > 0)
     67       {
     68 	char *outptr = tmpbuf;
     69 	size_t outsize = tmpbufsize;
     70 	size_t res = iconv (cd,
     71 			    (ICONV_CONST char **) &inptr, &insize,
     72 			    &outptr, &outsize);
     73 
     74 	if (res == (size_t)(-1))
     75 	  {
     76 	    if (errno == E2BIG)
     77 	      ;
     78 	    else if (errno == EINVAL)
     79 	      break;
     80 	    else
     81 	      return -1;
     82 	  }
     83 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
     84 	/* Irix iconv() inserts a NUL byte if it cannot convert.
     85 	   NetBSD iconv() inserts a question mark if it cannot convert.
     86 	   Only GNU libiconv and GNU libc are known to prefer to fail rather
     87 	   than doing a lossy conversion.  */
     88 	else if (res > 0)
     89 	  {
     90 	    errno = EILSEQ;
     91 	    return -1;
     92 	  }
     93 # endif
     94 	count += outptr - tmpbuf;
     95       }
     96     /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
     97 # if defined _LIBICONV_VERSION \
     98     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
     99     {
    100       char *outptr = tmpbuf;
    101       size_t outsize = tmpbufsize;
    102       size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
    103 
    104       if (res == (size_t)(-1))
    105 	return -1;
    106       count += outptr - tmpbuf;
    107     }
    108 # endif
    109     length = count;
    110   }
    111 
    112   if (length == 0)
    113     {
    114       *lengthp = 0;
    115       return 0;
    116     }
    117   result = (*resultp != NULL ? realloc (*resultp, length) : malloc (length));
    118   if (result == NULL)
    119     {
    120       errno = ENOMEM;
    121       return -1;
    122     }
    123   *resultp = result;
    124   *lengthp = length;
    125 
    126   /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
    127 # if defined _LIBICONV_VERSION \
    128     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
    129   /* Return to the initial state.  */
    130   iconv (cd, NULL, NULL, NULL, NULL);
    131 # endif
    132 
    133   /* Do the conversion for real.  */
    134   {
    135     const char *inptr = src;
    136     size_t insize = srclen;
    137     char *outptr = result;
    138     size_t outsize = length;
    139 
    140     while (insize > 0)
    141       {
    142 	size_t res = iconv (cd,
    143 			    (ICONV_CONST char **) &inptr, &insize,
    144 			    &outptr, &outsize);
    145 
    146 	if (res == (size_t)(-1))
    147 	  {
    148 	    if (errno == EINVAL)
    149 	      break;
    150 	    else
    151 	      return -1;
    152 	  }
    153 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
    154 	/* Irix iconv() inserts a NUL byte if it cannot convert.
    155 	   NetBSD iconv() inserts a question mark if it cannot convert.
    156 	   Only GNU libiconv and GNU libc are known to prefer to fail rather
    157 	   than doing a lossy conversion.  */
    158 	else if (res > 0)
    159 	  {
    160 	    errno = EILSEQ;
    161 	    return -1;
    162 	  }
    163 # endif
    164       }
    165     /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
    166 # if defined _LIBICONV_VERSION \
    167     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
    168     {
    169       size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
    170 
    171       if (res == (size_t)(-1))
    172 	return -1;
    173     }
    174 # endif
    175     if (outsize != 0)
    176       abort ();
    177   }
    178 
    179   return 0;
    180 # undef tmpbufsize
    181 }
    182 
    183 char *
    184 str_cd_iconv (const char *src, iconv_t cd)
    185 {
    186   /* For most encodings, a trailing NUL byte in the input will be converted
    187      to a trailing NUL byte in the output.  But not for UTF-7.  So that this
    188      function is usable for UTF-7, we have to exclude the NUL byte from the
    189      conversion and add it by hand afterwards.  */
    190 # if PROBABLY_SLOWER
    191 
    192   char *result = NULL;
    193   size_t length;
    194   int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
    195   char *final_result;
    196 
    197   if (retval < 0)
    198     {
    199       if (result != NULL)
    200 	{
    201 	  int saved_errno = errno;
    202 	  free (result);
    203 	  errno = saved_errno;
    204 	}
    205       return NULL;
    206     }
    207 
    208   /* Add the terminating NUL byte.  */
    209   final_result =
    210     (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
    211   if (final_result == NULL)
    212     {
    213       if (result != NULL)
    214 	free (result);
    215       errno = ENOMEM;
    216       return NULL;
    217     }
    218   final_result[length] = '\0';
    219 
    220   return final_result;
    221 
    222 # else
    223 
    224   char *result;
    225   size_t result_size;
    226   size_t length;
    227   const char *inptr = src;
    228   size_t inbytes_remaining = strlen (src);
    229 
    230   /* Make a guess for the worst-case output size, in order to avoid a
    231      realloc.  It's OK if the guess is wrong as long as it is not zero and
    232      doesn't lead to an integer overflow.  */
    233   result_size = inbytes_remaining;
    234   {
    235     size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
    236     if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
    237       result_size *= MB_LEN_MAX;
    238   }
    239   result_size += 1; /* for the terminating NUL */
    240 
    241   result = (char *) malloc (result_size);
    242   if (result == NULL)
    243     {
    244       errno = ENOMEM;
    245       return NULL;
    246     }
    247 
    248   /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
    249 # if defined _LIBICONV_VERSION \
    250     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
    251   /* Set to the initial state.  */
    252   iconv (cd, NULL, NULL, NULL, NULL);
    253 # endif
    254 
    255   /* Do the conversion.  */
    256   {
    257     char *outptr = result;
    258     size_t outbytes_remaining = result_size - 1;
    259 
    260     for (;;)
    261       {
    262 	/* Here inptr + inbytes_remaining = src + strlen (src),
    263 		outptr + outbytes_remaining = result + result_size - 1.  */
    264 	size_t res = iconv (cd,
    265 			    (ICONV_CONST char **) &inptr, &inbytes_remaining,
    266 			    &outptr, &outbytes_remaining);
    267 
    268 	if (res == (size_t)(-1))
    269 	  {
    270 	    if (errno == EINVAL)
    271 	      break;
    272 	    else if (errno == E2BIG)
    273 	      {
    274 		size_t used = outptr - result;
    275 		size_t newsize = result_size * 2;
    276 		char *newresult;
    277 
    278 		if (!(newsize > result_size))
    279 		  {
    280 		    errno = ENOMEM;
    281 		    goto failed;
    282 		  }
    283 		newresult = (char *) realloc (result, newsize);
    284 		if (newresult == NULL)
    285 		  {
    286 		    errno = ENOMEM;
    287 		    goto failed;
    288 		  }
    289 		result = newresult;
    290 		result_size = newsize;
    291 		outptr = result + used;
    292 		outbytes_remaining = result_size - 1 - used;
    293 	      }
    294 	    else
    295 	      goto failed;
    296 	  }
    297 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
    298 	/* Irix iconv() inserts a NUL byte if it cannot convert.
    299 	   NetBSD iconv() inserts a question mark if it cannot convert.
    300 	   Only GNU libiconv and GNU libc are known to prefer to fail rather
    301 	   than doing a lossy conversion.  */
    302 	else if (res > 0)
    303 	  {
    304 	    errno = EILSEQ;
    305 	    goto failed;
    306 	  }
    307 # endif
    308 	else
    309 	  break;
    310       }
    311     /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
    312 # if defined _LIBICONV_VERSION \
    313     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
    314     for (;;)
    315       {
    316 	/* Here outptr + outbytes_remaining = result + result_size - 1.  */
    317 	size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
    318 
    319 	if (res == (size_t)(-1))
    320 	  {
    321 	    if (errno == E2BIG)
    322 	      {
    323 		size_t used = outptr - result;
    324 		size_t newsize = result_size * 2;
    325 		char *newresult;
    326 
    327 		if (!(newsize > result_size))
    328 		  {
    329 		    errno = ENOMEM;
    330 		    goto failed;
    331 		  }
    332 		newresult = (char *) realloc (result, newsize);
    333 		if (newresult == NULL)
    334 		  {
    335 		    errno = ENOMEM;
    336 		    goto failed;
    337 		  }
    338 		result = newresult;
    339 		result_size = newsize;
    340 		outptr = result + used;
    341 		outbytes_remaining = result_size - 1 - used;
    342 	      }
    343 	    else
    344 	      goto failed;
    345 	  }
    346 	else
    347 	  break;
    348       }
    349 # endif
    350 
    351     /* Add the terminating NUL byte.  */
    352     *outptr++ = '\0';
    353 
    354     length = outptr - result;
    355   }
    356 
    357   /* Give away unused memory.  */
    358   if (length < result_size)
    359     {
    360       char *smaller_result = (char *) realloc (result, length);
    361 
    362       if (smaller_result != NULL)
    363 	result = smaller_result;
    364     }
    365 
    366   return result;
    367 
    368  failed:
    369   {
    370     int saved_errno = errno;
    371     free (result);
    372     errno = saved_errno;
    373     return NULL;
    374   }
    375 
    376 # endif
    377 }
    378 
    379 #endif
    380 
    381 char *
    382 str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
    383 {
    384   if (c_strcasecmp (from_codeset, to_codeset) == 0)
    385     return strdup (src);
    386   else
    387     {
    388 #if HAVE_ICONV
    389       iconv_t cd;
    390       char *result;
    391 
    392       /* Avoid glibc-2.1 bug with EUC-KR.  */
    393 # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
    394       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
    395 	  || c_strcasecmp (to_codeset, "EUC-KR") == 0)
    396 	{
    397 	  errno = EINVAL;
    398 	  return NULL;
    399 	}
    400 # endif
    401       cd = iconv_open (to_codeset, from_codeset);
    402       if (cd == (iconv_t) -1)
    403 	return NULL;
    404 
    405       result = str_cd_iconv (src, cd);
    406 
    407       if (result == NULL)
    408 	{
    409 	  /* Close cd, but preserve the errno from str_cd_iconv.  */
    410 	  int saved_errno = errno;
    411 	  iconv_close (cd);
    412 	  errno = saved_errno;
    413 	}
    414       else
    415 	{
    416 	  if (iconv_close (cd) < 0)
    417 	    {
    418 	      /* Return NULL, but free the allocated memory, and while doing
    419 		 that, preserve the errno from iconv_close.  */
    420 	      int saved_errno = errno;
    421 	      free (result);
    422 	      errno = saved_errno;
    423 	      return NULL;
    424 	    }
    425 	}
    426       return result;
    427 #else
    428       /* This is a different error code than if iconv_open existed but didn't
    429 	 support from_codeset and to_codeset, so that the caller can emit
    430 	 an error message such as
    431 	   "iconv() is not supported. Installing GNU libiconv and
    432 	    then reinstalling this package would fix this."  */
    433       errno = ENOSYS;
    434       return NULL;
    435 #endif
    436     }
    437 }
    438