Home | History | Annotate | Line # | Download | only in import
      1      1.1  christos /* Convert multibyte character to wide character.
      2  1.1.1.2  christos    Copyright (C) 1999-2002, 2005-2022 Free Software Foundation, Inc.
      3      1.1  christos    Written by Bruno Haible <bruno (at) clisp.org>, 2008.
      4      1.1  christos 
      5  1.1.1.2  christos    This file is free software: you can redistribute it and/or modify
      6  1.1.1.2  christos    it under the terms of the GNU Lesser General Public License as
      7  1.1.1.2  christos    published by the Free Software Foundation; either version 2.1 of the
      8  1.1.1.2  christos    License, or (at your option) any later version.
      9      1.1  christos 
     10  1.1.1.2  christos    This file is distributed in the hope that it will be useful,
     11      1.1  christos    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12      1.1  christos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13  1.1.1.2  christos    GNU Lesser General Public License for more details.
     14      1.1  christos 
     15  1.1.1.2  christos    You should have received a copy of the GNU Lesser General Public License
     16      1.1  christos    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
     17      1.1  christos 
     18      1.1  christos #include <config.h>
     19      1.1  christos 
     20      1.1  christos /* Specification.  */
     21      1.1  christos #include <wchar.h>
     22      1.1  christos 
     23      1.1  christos #if GNULIB_defined_mbstate_t
     24      1.1  christos /* Implement mbrtowc() on top of mbtowc() for the non-UTF-8 locales
     25      1.1  christos    and directly for the UTF-8 locales.  */
     26      1.1  christos 
     27      1.1  christos # include <errno.h>
     28      1.1  christos # include <stdint.h>
     29      1.1  christos # include <stdlib.h>
     30      1.1  christos 
     31      1.1  christos # if defined _WIN32 && !defined __CYGWIN__
     32      1.1  christos 
     33      1.1  christos #  define WIN32_LEAN_AND_MEAN  /* avoid including junk */
     34      1.1  christos #  include <windows.h>
     35      1.1  christos 
     36      1.1  christos # elif HAVE_PTHREAD_API
     37      1.1  christos 
     38      1.1  christos #  include <pthread.h>
     39      1.1  christos #  if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS
     40      1.1  christos #   include <threads.h>
     41      1.1  christos #   pragma weak thrd_exit
     42      1.1  christos #   define c11_threads_in_use() (thrd_exit != NULL)
     43      1.1  christos #  else
     44      1.1  christos #   define c11_threads_in_use() 0
     45      1.1  christos #  endif
     46      1.1  christos 
     47      1.1  christos # elif HAVE_THREADS_H
     48      1.1  christos 
     49      1.1  christos #  include <threads.h>
     50      1.1  christos 
     51      1.1  christos # endif
     52      1.1  christos 
     53      1.1  christos # include "attribute.h"
     54      1.1  christos # include "verify.h"
     55      1.1  christos # include "lc-charset-dispatch.h"
     56      1.1  christos # include "mbtowc-lock.h"
     57      1.1  christos 
     58      1.1  christos verify (sizeof (mbstate_t) >= 4);
     59      1.1  christos static char internal_state[4];
     60      1.1  christos 
     61      1.1  christos size_t
     62      1.1  christos mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
     63      1.1  christos {
     64      1.1  christos # define FITS_IN_CHAR_TYPE(wc)  ((wc) <= WCHAR_MAX)
     65      1.1  christos # include "mbrtowc-impl.h"
     66      1.1  christos }
     67      1.1  christos 
     68      1.1  christos #else
     69      1.1  christos /* Override the system's mbrtowc() function.  */
     70      1.1  christos 
     71      1.1  christos # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
     72      1.1  christos #  include "hard-locale.h"
     73      1.1  christos #  include <locale.h>
     74      1.1  christos # endif
     75      1.1  christos 
     76      1.1  christos # undef mbrtowc
     77      1.1  christos 
     78      1.1  christos size_t
     79      1.1  christos rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
     80      1.1  christos {
     81      1.1  christos   size_t ret;
     82      1.1  christos   wchar_t wc;
     83      1.1  christos 
     84      1.1  christos # if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG || MBRTOWC_EMPTY_INPUT_BUG
     85      1.1  christos   if (s == NULL)
     86      1.1  christos     {
     87      1.1  christos       pwc = NULL;
     88      1.1  christos       s = "";
     89      1.1  christos       n = 1;
     90      1.1  christos     }
     91      1.1  christos # endif
     92      1.1  christos 
     93      1.1  christos # if MBRTOWC_EMPTY_INPUT_BUG
     94      1.1  christos   if (n == 0)
     95      1.1  christos     return (size_t) -2;
     96      1.1  christos # endif
     97      1.1  christos 
     98      1.1  christos   if (! pwc)
     99      1.1  christos     pwc = &wc;
    100      1.1  christos 
    101      1.1  christos # if MBRTOWC_RETVAL_BUG
    102      1.1  christos   {
    103      1.1  christos     static mbstate_t internal_state;
    104      1.1  christos 
    105      1.1  christos     /* Override mbrtowc's internal state.  We cannot call mbsinit() on the
    106      1.1  christos        hidden internal state, but we can call it on our variable.  */
    107      1.1  christos     if (ps == NULL)
    108      1.1  christos       ps = &internal_state;
    109      1.1  christos 
    110      1.1  christos     if (!mbsinit (ps))
    111      1.1  christos       {
    112      1.1  christos         /* Parse the rest of the multibyte character byte for byte.  */
    113      1.1  christos         size_t count = 0;
    114      1.1  christos         for (; n > 0; s++, n--)
    115      1.1  christos           {
    116      1.1  christos             ret = mbrtowc (&wc, s, 1, ps);
    117      1.1  christos 
    118      1.1  christos             if (ret == (size_t)(-1))
    119      1.1  christos               return (size_t)(-1);
    120      1.1  christos             count++;
    121      1.1  christos             if (ret != (size_t)(-2))
    122      1.1  christos               {
    123      1.1  christos                 /* The multibyte character has been completed.  */
    124      1.1  christos                 *pwc = wc;
    125      1.1  christos                 return (wc == 0 ? 0 : count);
    126      1.1  christos               }
    127      1.1  christos           }
    128      1.1  christos         return (size_t)(-2);
    129      1.1  christos       }
    130      1.1  christos   }
    131      1.1  christos # endif
    132      1.1  christos 
    133      1.1  christos # if MBRTOWC_STORES_INCOMPLETE_BUG
    134      1.1  christos   ret = mbrtowc (&wc, s, n, ps);
    135      1.1  christos   if (ret < (size_t) -2 && pwc != NULL)
    136      1.1  christos     *pwc = wc;
    137      1.1  christos # else
    138      1.1  christos   ret = mbrtowc (pwc, s, n, ps);
    139      1.1  christos # endif
    140      1.1  christos 
    141      1.1  christos # if MBRTOWC_NUL_RETVAL_BUG
    142      1.1  christos   if (ret < (size_t) -2 && !*pwc)
    143      1.1  christos     return 0;
    144      1.1  christos # endif
    145      1.1  christos 
    146      1.1  christos # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
    147      1.1  christos   if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE))
    148      1.1  christos     {
    149      1.1  christos       unsigned char uc = *s;
    150      1.1  christos       *pwc = uc;
    151      1.1  christos       return 1;
    152      1.1  christos     }
    153      1.1  christos # endif
    154      1.1  christos 
    155      1.1  christos   return ret;
    156      1.1  christos }
    157      1.1  christos 
    158      1.1  christos #endif
    159