Home | History | Annotate | Line # | Download | only in gnulib-lib
striconv.c revision 1.1.1.1
      1  1.1  christos /* Charset conversion.
      2  1.1  christos    Copyright (C) 2001-2006 Free Software Foundation, Inc.
      3  1.1  christos    Written by Bruno Haible and Simon Josefsson.
      4  1.1  christos 
      5  1.1  christos    This program is free software; you can redistribute it and/or modify
      6  1.1  christos    it under the terms of the GNU General Public License as published by
      7  1.1  christos    the Free Software Foundation; either version 2, or (at your option)
      8  1.1  christos    any later version.
      9  1.1  christos 
     10  1.1  christos    This program is distributed in the hope that it will be useful,
     11  1.1  christos    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12  1.1  christos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13  1.1  christos    GNU General Public License for more details.
     14  1.1  christos 
     15  1.1  christos    You should have received a copy of the GNU General Public License
     16  1.1  christos    along with this program; if not, write to the Free Software Foundation,
     17  1.1  christos    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
     18  1.1  christos 
     19  1.1  christos #include <config.h>
     20  1.1  christos 
     21  1.1  christos /* Specification.  */
     22  1.1  christos #include "striconv.h"
     23  1.1  christos 
     24  1.1  christos #include <errno.h>
     25  1.1  christos #include <stdlib.h>
     26  1.1  christos #include <string.h>
     27  1.1  christos 
     28  1.1  christos #if HAVE_ICONV
     29  1.1  christos # include <iconv.h>
     30  1.1  christos /* Get MB_LEN_MAX, CHAR_BIT.  */
     31  1.1  christos # include <limits.h>
     32  1.1  christos #endif
     33  1.1  christos 
     34  1.1  christos #include "strdup.h"
     35  1.1  christos #include "c-strcase.h"
     36  1.1  christos 
     37  1.1  christos #ifndef SIZE_MAX
     38  1.1  christos # define SIZE_MAX ((size_t) -1)
     39  1.1  christos #endif
     40  1.1  christos 
     41  1.1  christos 
     42  1.1  christos #if HAVE_ICONV
     43  1.1  christos 
     44  1.1  christos int
     45  1.1  christos mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
     46  1.1  christos 	      char **resultp, size_t *lengthp)
     47  1.1  christos {
     48  1.1  christos # define tmpbufsize 4096
     49  1.1  christos   size_t length;
     50  1.1  christos   char *result;
     51  1.1  christos 
     52  1.1  christos   /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
     53  1.1  christos # if defined _LIBICONV_VERSION \
     54  1.1  christos     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
     55  1.1  christos   /* Set to the initial state.  */
     56  1.1  christos   iconv (cd, NULL, NULL, NULL, NULL);
     57  1.1  christos # endif
     58  1.1  christos 
     59  1.1  christos   /* Determine the length we need.  */
     60  1.1  christos   {
     61  1.1  christos     size_t count = 0;
     62  1.1  christos     char tmpbuf[tmpbufsize];
     63  1.1  christos     const char *inptr = src;
     64  1.1  christos     size_t insize = srclen;
     65  1.1  christos 
     66  1.1  christos     while (insize > 0)
     67  1.1  christos       {
     68  1.1  christos 	char *outptr = tmpbuf;
     69  1.1  christos 	size_t outsize = tmpbufsize;
     70  1.1  christos 	size_t res = iconv (cd,
     71  1.1  christos 			    (ICONV_CONST char **) &inptr, &insize,
     72  1.1  christos 			    &outptr, &outsize);
     73  1.1  christos 
     74  1.1  christos 	if (res == (size_t)(-1))
     75  1.1  christos 	  {
     76  1.1  christos 	    if (errno == E2BIG)
     77  1.1  christos 	      ;
     78  1.1  christos 	    else if (errno == EINVAL)
     79  1.1  christos 	      break;
     80  1.1  christos 	    else
     81  1.1  christos 	      return -1;
     82  1.1  christos 	  }
     83  1.1  christos # if !defined _LIBICONV_VERSION && !defined __GLIBC__
     84  1.1  christos 	/* Irix iconv() inserts a NUL byte if it cannot convert.
     85  1.1  christos 	   NetBSD iconv() inserts a question mark if it cannot convert.
     86  1.1  christos 	   Only GNU libiconv and GNU libc are known to prefer to fail rather
     87  1.1  christos 	   than doing a lossy conversion.  */
     88  1.1  christos 	else if (res > 0)
     89  1.1  christos 	  {
     90  1.1  christos 	    errno = EILSEQ;
     91  1.1  christos 	    return -1;
     92  1.1  christos 	  }
     93  1.1  christos # endif
     94  1.1  christos 	count += outptr - tmpbuf;
     95  1.1  christos       }
     96  1.1  christos     /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
     97  1.1  christos # if defined _LIBICONV_VERSION \
     98  1.1  christos     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
     99  1.1  christos     {
    100  1.1  christos       char *outptr = tmpbuf;
    101  1.1  christos       size_t outsize = tmpbufsize;
    102  1.1  christos       size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
    103  1.1  christos 
    104  1.1  christos       if (res == (size_t)(-1))
    105  1.1  christos 	return -1;
    106  1.1  christos       count += outptr - tmpbuf;
    107  1.1  christos     }
    108  1.1  christos # endif
    109  1.1  christos     length = count;
    110  1.1  christos   }
    111  1.1  christos 
    112  1.1  christos   if (length == 0)
    113  1.1  christos     {
    114  1.1  christos       *lengthp = 0;
    115  1.1  christos       return 0;
    116  1.1  christos     }
    117  1.1  christos   result = (*resultp != NULL ? realloc (*resultp, length) : malloc (length));
    118  1.1  christos   if (result == NULL)
    119  1.1  christos     {
    120  1.1  christos       errno = ENOMEM;
    121  1.1  christos       return -1;
    122  1.1  christos     }
    123  1.1  christos   *resultp = result;
    124  1.1  christos   *lengthp = length;
    125  1.1  christos 
    126  1.1  christos   /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
    127  1.1  christos # if defined _LIBICONV_VERSION \
    128  1.1  christos     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
    129  1.1  christos   /* Return to the initial state.  */
    130  1.1  christos   iconv (cd, NULL, NULL, NULL, NULL);
    131  1.1  christos # endif
    132  1.1  christos 
    133  1.1  christos   /* Do the conversion for real.  */
    134  1.1  christos   {
    135  1.1  christos     const char *inptr = src;
    136  1.1  christos     size_t insize = srclen;
    137  1.1  christos     char *outptr = result;
    138  1.1  christos     size_t outsize = length;
    139  1.1  christos 
    140  1.1  christos     while (insize > 0)
    141  1.1  christos       {
    142  1.1  christos 	size_t res = iconv (cd,
    143  1.1  christos 			    (ICONV_CONST char **) &inptr, &insize,
    144  1.1  christos 			    &outptr, &outsize);
    145  1.1  christos 
    146  1.1  christos 	if (res == (size_t)(-1))
    147  1.1  christos 	  {
    148  1.1  christos 	    if (errno == EINVAL)
    149  1.1  christos 	      break;
    150  1.1  christos 	    else
    151  1.1  christos 	      return -1;
    152  1.1  christos 	  }
    153  1.1  christos # if !defined _LIBICONV_VERSION && !defined __GLIBC__
    154  1.1  christos 	/* Irix iconv() inserts a NUL byte if it cannot convert.
    155  1.1  christos 	   NetBSD iconv() inserts a question mark if it cannot convert.
    156  1.1  christos 	   Only GNU libiconv and GNU libc are known to prefer to fail rather
    157  1.1  christos 	   than doing a lossy conversion.  */
    158  1.1  christos 	else if (res > 0)
    159  1.1  christos 	  {
    160  1.1  christos 	    errno = EILSEQ;
    161  1.1  christos 	    return -1;
    162  1.1  christos 	  }
    163  1.1  christos # endif
    164  1.1  christos       }
    165  1.1  christos     /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
    166  1.1  christos # if defined _LIBICONV_VERSION \
    167  1.1  christos     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
    168  1.1  christos     {
    169  1.1  christos       size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
    170  1.1  christos 
    171  1.1  christos       if (res == (size_t)(-1))
    172  1.1  christos 	return -1;
    173  1.1  christos     }
    174  1.1  christos # endif
    175  1.1  christos     if (outsize != 0)
    176  1.1  christos       abort ();
    177  1.1  christos   }
    178  1.1  christos 
    179  1.1  christos   return 0;
    180  1.1  christos # undef tmpbufsize
    181  1.1  christos }
    182  1.1  christos 
    183  1.1  christos char *
    184  1.1  christos str_cd_iconv (const char *src, iconv_t cd)
    185  1.1  christos {
    186  1.1  christos   /* For most encodings, a trailing NUL byte in the input will be converted
    187  1.1  christos      to a trailing NUL byte in the output.  But not for UTF-7.  So that this
    188  1.1  christos      function is usable for UTF-7, we have to exclude the NUL byte from the
    189  1.1  christos      conversion and add it by hand afterwards.  */
    190  1.1  christos # if PROBABLY_SLOWER
    191  1.1  christos 
    192  1.1  christos   char *result = NULL;
    193  1.1  christos   size_t length;
    194  1.1  christos   int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
    195  1.1  christos   char *final_result;
    196  1.1  christos 
    197  1.1  christos   if (retval < 0)
    198  1.1  christos     {
    199  1.1  christos       if (result != NULL)
    200  1.1  christos 	{
    201  1.1  christos 	  int saved_errno = errno;
    202  1.1  christos 	  free (result);
    203  1.1  christos 	  errno = saved_errno;
    204  1.1  christos 	}
    205  1.1  christos       return NULL;
    206  1.1  christos     }
    207  1.1  christos 
    208  1.1  christos   /* Add the terminating NUL byte.  */
    209  1.1  christos   final_result =
    210  1.1  christos     (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
    211  1.1  christos   if (final_result == NULL)
    212  1.1  christos     {
    213  1.1  christos       if (result != NULL)
    214  1.1  christos 	free (result);
    215  1.1  christos       errno = ENOMEM;
    216  1.1  christos       return NULL;
    217  1.1  christos     }
    218  1.1  christos   final_result[length] = '\0';
    219  1.1  christos 
    220  1.1  christos   return final_result;
    221  1.1  christos 
    222  1.1  christos # else
    223  1.1  christos 
    224  1.1  christos   char *result;
    225  1.1  christos   size_t result_size;
    226  1.1  christos   size_t length;
    227  1.1  christos   const char *inptr = src;
    228  1.1  christos   size_t inbytes_remaining = strlen (src);
    229  1.1  christos 
    230  1.1  christos   /* Make a guess for the worst-case output size, in order to avoid a
    231  1.1  christos      realloc.  It's OK if the guess is wrong as long as it is not zero and
    232  1.1  christos      doesn't lead to an integer overflow.  */
    233  1.1  christos   result_size = inbytes_remaining;
    234  1.1  christos   {
    235  1.1  christos     size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
    236  1.1  christos     if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
    237  1.1  christos       result_size *= MB_LEN_MAX;
    238  1.1  christos   }
    239  1.1  christos   result_size += 1; /* for the terminating NUL */
    240  1.1  christos 
    241  1.1  christos   result = (char *) malloc (result_size);
    242  1.1  christos   if (result == NULL)
    243  1.1  christos     {
    244  1.1  christos       errno = ENOMEM;
    245  1.1  christos       return NULL;
    246  1.1  christos     }
    247  1.1  christos 
    248  1.1  christos   /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
    249  1.1  christos # if defined _LIBICONV_VERSION \
    250  1.1  christos     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
    251  1.1  christos   /* Set to the initial state.  */
    252  1.1  christos   iconv (cd, NULL, NULL, NULL, NULL);
    253  1.1  christos # endif
    254  1.1  christos 
    255  1.1  christos   /* Do the conversion.  */
    256  1.1  christos   {
    257  1.1  christos     char *outptr = result;
    258  1.1  christos     size_t outbytes_remaining = result_size - 1;
    259  1.1  christos 
    260  1.1  christos     for (;;)
    261  1.1  christos       {
    262  1.1  christos 	/* Here inptr + inbytes_remaining = src + strlen (src),
    263  1.1  christos 		outptr + outbytes_remaining = result + result_size - 1.  */
    264  1.1  christos 	size_t res = iconv (cd,
    265  1.1  christos 			    (ICONV_CONST char **) &inptr, &inbytes_remaining,
    266  1.1  christos 			    &outptr, &outbytes_remaining);
    267  1.1  christos 
    268  1.1  christos 	if (res == (size_t)(-1))
    269  1.1  christos 	  {
    270  1.1  christos 	    if (errno == EINVAL)
    271  1.1  christos 	      break;
    272  1.1  christos 	    else if (errno == E2BIG)
    273  1.1  christos 	      {
    274  1.1  christos 		size_t used = outptr - result;
    275  1.1  christos 		size_t newsize = result_size * 2;
    276  1.1  christos 		char *newresult;
    277  1.1  christos 
    278  1.1  christos 		if (!(newsize > result_size))
    279  1.1  christos 		  {
    280  1.1  christos 		    errno = ENOMEM;
    281  1.1  christos 		    goto failed;
    282  1.1  christos 		  }
    283  1.1  christos 		newresult = (char *) realloc (result, newsize);
    284  1.1  christos 		if (newresult == NULL)
    285  1.1  christos 		  {
    286  1.1  christos 		    errno = ENOMEM;
    287  1.1  christos 		    goto failed;
    288  1.1  christos 		  }
    289  1.1  christos 		result = newresult;
    290  1.1  christos 		result_size = newsize;
    291  1.1  christos 		outptr = result + used;
    292  1.1  christos 		outbytes_remaining = result_size - 1 - used;
    293  1.1  christos 	      }
    294  1.1  christos 	    else
    295  1.1  christos 	      goto failed;
    296  1.1  christos 	  }
    297  1.1  christos # if !defined _LIBICONV_VERSION && !defined __GLIBC__
    298  1.1  christos 	/* Irix iconv() inserts a NUL byte if it cannot convert.
    299  1.1  christos 	   NetBSD iconv() inserts a question mark if it cannot convert.
    300  1.1  christos 	   Only GNU libiconv and GNU libc are known to prefer to fail rather
    301  1.1  christos 	   than doing a lossy conversion.  */
    302  1.1  christos 	else if (res > 0)
    303  1.1  christos 	  {
    304  1.1  christos 	    errno = EILSEQ;
    305  1.1  christos 	    goto failed;
    306  1.1  christos 	  }
    307  1.1  christos # endif
    308  1.1  christos 	else
    309  1.1  christos 	  break;
    310  1.1  christos       }
    311  1.1  christos     /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
    312  1.1  christos # if defined _LIBICONV_VERSION \
    313  1.1  christos     || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
    314  1.1  christos     for (;;)
    315  1.1  christos       {
    316  1.1  christos 	/* Here outptr + outbytes_remaining = result + result_size - 1.  */
    317  1.1  christos 	size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
    318  1.1  christos 
    319  1.1  christos 	if (res == (size_t)(-1))
    320  1.1  christos 	  {
    321  1.1  christos 	    if (errno == E2BIG)
    322  1.1  christos 	      {
    323  1.1  christos 		size_t used = outptr - result;
    324  1.1  christos 		size_t newsize = result_size * 2;
    325  1.1  christos 		char *newresult;
    326  1.1  christos 
    327  1.1  christos 		if (!(newsize > result_size))
    328  1.1  christos 		  {
    329  1.1  christos 		    errno = ENOMEM;
    330  1.1  christos 		    goto failed;
    331  1.1  christos 		  }
    332  1.1  christos 		newresult = (char *) realloc (result, newsize);
    333  1.1  christos 		if (newresult == NULL)
    334  1.1  christos 		  {
    335  1.1  christos 		    errno = ENOMEM;
    336  1.1  christos 		    goto failed;
    337  1.1  christos 		  }
    338  1.1  christos 		result = newresult;
    339  1.1  christos 		result_size = newsize;
    340  1.1  christos 		outptr = result + used;
    341  1.1  christos 		outbytes_remaining = result_size - 1 - used;
    342  1.1  christos 	      }
    343  1.1  christos 	    else
    344  1.1  christos 	      goto failed;
    345  1.1  christos 	  }
    346  1.1  christos 	else
    347  1.1  christos 	  break;
    348  1.1  christos       }
    349  1.1  christos # endif
    350  1.1  christos 
    351  1.1  christos     /* Add the terminating NUL byte.  */
    352  1.1  christos     *outptr++ = '\0';
    353  1.1  christos 
    354  1.1  christos     length = outptr - result;
    355  1.1  christos   }
    356  1.1  christos 
    357  1.1  christos   /* Give away unused memory.  */
    358  1.1  christos   if (length < result_size)
    359  1.1  christos     {
    360  1.1  christos       char *smaller_result = (char *) realloc (result, length);
    361  1.1  christos 
    362  1.1  christos       if (smaller_result != NULL)
    363  1.1  christos 	result = smaller_result;
    364  1.1  christos     }
    365  1.1  christos 
    366  1.1  christos   return result;
    367  1.1  christos 
    368  1.1  christos  failed:
    369  1.1  christos   {
    370  1.1  christos     int saved_errno = errno;
    371  1.1  christos     free (result);
    372  1.1  christos     errno = saved_errno;
    373  1.1  christos     return NULL;
    374  1.1  christos   }
    375  1.1  christos 
    376  1.1  christos # endif
    377  1.1  christos }
    378  1.1  christos 
    379  1.1  christos #endif
    380  1.1  christos 
    381  1.1  christos char *
    382  1.1  christos str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
    383  1.1  christos {
    384  1.1  christos   if (c_strcasecmp (from_codeset, to_codeset) == 0)
    385  1.1  christos     return strdup (src);
    386  1.1  christos   else
    387  1.1  christos     {
    388  1.1  christos #if HAVE_ICONV
    389  1.1  christos       iconv_t cd;
    390  1.1  christos       char *result;
    391  1.1  christos 
    392  1.1  christos       /* Avoid glibc-2.1 bug with EUC-KR.  */
    393  1.1  christos # if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
    394  1.1  christos       if (c_strcasecmp (from_codeset, "EUC-KR") == 0
    395  1.1  christos 	  || c_strcasecmp (to_codeset, "EUC-KR") == 0)
    396  1.1  christos 	{
    397  1.1  christos 	  errno = EINVAL;
    398  1.1  christos 	  return NULL;
    399  1.1  christos 	}
    400  1.1  christos # endif
    401  1.1  christos       cd = iconv_open (to_codeset, from_codeset);
    402  1.1  christos       if (cd == (iconv_t) -1)
    403  1.1  christos 	return NULL;
    404  1.1  christos 
    405  1.1  christos       result = str_cd_iconv (src, cd);
    406  1.1  christos 
    407  1.1  christos       if (result == NULL)
    408  1.1  christos 	{
    409  1.1  christos 	  /* Close cd, but preserve the errno from str_cd_iconv.  */
    410  1.1  christos 	  int saved_errno = errno;
    411  1.1  christos 	  iconv_close (cd);
    412  1.1  christos 	  errno = saved_errno;
    413  1.1  christos 	}
    414  1.1  christos       else
    415  1.1  christos 	{
    416  1.1  christos 	  if (iconv_close (cd) < 0)
    417  1.1  christos 	    {
    418  1.1  christos 	      /* Return NULL, but free the allocated memory, and while doing
    419  1.1  christos 		 that, preserve the errno from iconv_close.  */
    420  1.1  christos 	      int saved_errno = errno;
    421  1.1  christos 	      free (result);
    422  1.1  christos 	      errno = saved_errno;
    423  1.1  christos 	      return NULL;
    424  1.1  christos 	    }
    425  1.1  christos 	}
    426  1.1  christos       return result;
    427  1.1  christos #else
    428  1.1  christos       /* This is a different error code than if iconv_open existed but didn't
    429  1.1  christos 	 support from_codeset and to_codeset, so that the caller can emit
    430  1.1  christos 	 an error message such as
    431  1.1  christos 	   "iconv() is not supported. Installing GNU libiconv and
    432  1.1  christos 	    then reinstalling this package would fix this."  */
    433  1.1  christos       errno = ENOSYS;
    434  1.1  christos       return NULL;
    435  1.1  christos #endif
    436  1.1  christos     }
    437  1.1  christos }
    438