Home | History | Annotate | Line # | Download | only in import
      1 /* Convert multibyte character to wide character.
      2    Copyright (C) 1999-2002, 2005-2022 Free Software Foundation, Inc.
      3    Written by Bruno Haible <bruno (at) clisp.org>, 2008.
      4 
      5    This file is free software: you can redistribute it and/or modify
      6    it under the terms of the GNU Lesser General Public License as
      7    published by the Free Software Foundation; either version 2.1 of the
      8    License, or (at your option) any later version.
      9 
     10    This file is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU Lesser General Public License for more details.
     14 
     15    You should have received a copy of the GNU Lesser General Public License
     16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
     17 
     18 #include <config.h>
     19 
     20 /* Specification.  */
     21 #include <wchar.h>
     22 
     23 #if GNULIB_defined_mbstate_t
     24 /* Implement mbrtowc() on top of mbtowc() for the non-UTF-8 locales
     25    and directly for the UTF-8 locales.  */
     26 
     27 # include <errno.h>
     28 # include <stdint.h>
     29 # include <stdlib.h>
     30 
     31 # if defined _WIN32 && !defined __CYGWIN__
     32 
     33 #  define WIN32_LEAN_AND_MEAN  /* avoid including junk */
     34 #  include <windows.h>
     35 
     36 # elif HAVE_PTHREAD_API
     37 
     38 #  include <pthread.h>
     39 #  if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS
     40 #   include <threads.h>
     41 #   pragma weak thrd_exit
     42 #   define c11_threads_in_use() (thrd_exit != NULL)
     43 #  else
     44 #   define c11_threads_in_use() 0
     45 #  endif
     46 
     47 # elif HAVE_THREADS_H
     48 
     49 #  include <threads.h>
     50 
     51 # endif
     52 
     53 # include "attribute.h"
     54 # include "verify.h"
     55 # include "lc-charset-dispatch.h"
     56 # include "mbtowc-lock.h"
     57 
     58 verify (sizeof (mbstate_t) >= 4);
     59 static char internal_state[4];
     60 
     61 size_t
     62 mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
     63 {
     64 # define FITS_IN_CHAR_TYPE(wc)  ((wc) <= WCHAR_MAX)
     65 # include "mbrtowc-impl.h"
     66 }
     67 
     68 #else
     69 /* Override the system's mbrtowc() function.  */
     70 
     71 # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
     72 #  include "hard-locale.h"
     73 #  include <locale.h>
     74 # endif
     75 
     76 # undef mbrtowc
     77 
     78 size_t
     79 rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
     80 {
     81   size_t ret;
     82   wchar_t wc;
     83 
     84 # if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG || MBRTOWC_EMPTY_INPUT_BUG
     85   if (s == NULL)
     86     {
     87       pwc = NULL;
     88       s = "";
     89       n = 1;
     90     }
     91 # endif
     92 
     93 # if MBRTOWC_EMPTY_INPUT_BUG
     94   if (n == 0)
     95     return (size_t) -2;
     96 # endif
     97 
     98   if (! pwc)
     99     pwc = &wc;
    100 
    101 # if MBRTOWC_RETVAL_BUG
    102   {
    103     static mbstate_t internal_state;
    104 
    105     /* Override mbrtowc's internal state.  We cannot call mbsinit() on the
    106        hidden internal state, but we can call it on our variable.  */
    107     if (ps == NULL)
    108       ps = &internal_state;
    109 
    110     if (!mbsinit (ps))
    111       {
    112         /* Parse the rest of the multibyte character byte for byte.  */
    113         size_t count = 0;
    114         for (; n > 0; s++, n--)
    115           {
    116             ret = mbrtowc (&wc, s, 1, ps);
    117 
    118             if (ret == (size_t)(-1))
    119               return (size_t)(-1);
    120             count++;
    121             if (ret != (size_t)(-2))
    122               {
    123                 /* The multibyte character has been completed.  */
    124                 *pwc = wc;
    125                 return (wc == 0 ? 0 : count);
    126               }
    127           }
    128         return (size_t)(-2);
    129       }
    130   }
    131 # endif
    132 
    133 # if MBRTOWC_STORES_INCOMPLETE_BUG
    134   ret = mbrtowc (&wc, s, n, ps);
    135   if (ret < (size_t) -2 && pwc != NULL)
    136     *pwc = wc;
    137 # else
    138   ret = mbrtowc (pwc, s, n, ps);
    139 # endif
    140 
    141 # if MBRTOWC_NUL_RETVAL_BUG
    142   if (ret < (size_t) -2 && !*pwc)
    143     return 0;
    144 # endif
    145 
    146 # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
    147   if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE))
    148     {
    149       unsigned char uc = *s;
    150       *pwc = uc;
    151       return 1;
    152     }
    153 # endif
    154 
    155   return ret;
    156 }
    157 
    158 #endif
    159