Home | History | Annotate | Line # | Download | only in import
      1      1.1  christos /* Convert multibyte character to wide character.
      2  1.1.1.2  christos    Copyright (C) 1999-2002, 2005-2022 Free Software Foundation, Inc.
      3      1.1  christos 
      4  1.1.1.2  christos    This file is free software: you can redistribute it and/or modify
      5  1.1.1.2  christos    it under the terms of the GNU Lesser General Public License as
      6  1.1.1.2  christos    published by the Free Software Foundation; either version 2.1 of the
      7  1.1.1.2  christos    License, or (at your option) any later version.
      8      1.1  christos 
      9  1.1.1.2  christos    This file is distributed in the hope that it will be useful,
     10      1.1  christos    but WITHOUT ANY WARRANTY; without even the implied warranty of
     11      1.1  christos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12  1.1.1.2  christos    GNU Lesser General Public License for more details.
     13      1.1  christos 
     14  1.1.1.2  christos    You should have received a copy of the GNU Lesser General Public License
     15      1.1  christos    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
     16      1.1  christos 
     17      1.1  christos /* Written by Bruno Haible <bruno (at) clisp.org>, 2008.  */
     18      1.1  christos 
     19      1.1  christos /* This file contains the part of the body of the mbrtowc and mbrtoc32 functions
     20      1.1  christos    that handles the special case of the UTF-8 encoding.  */
     21      1.1  christos 
     22      1.1  christos         /* Cf. unistr/u8-mbtouc.c.  */
     23      1.1  christos         unsigned char c = (unsigned char) p[0];
     24      1.1  christos 
     25      1.1  christos         if (c < 0x80)
     26      1.1  christos           {
     27      1.1  christos             if (pwc != NULL)
     28      1.1  christos               *pwc = c;
     29      1.1  christos             res = (c == 0 ? 0 : 1);
     30      1.1  christos             goto success;
     31      1.1  christos           }
     32      1.1  christos         if (c >= 0xc2)
     33      1.1  christos           {
     34      1.1  christos             if (c < 0xe0)
     35      1.1  christos               {
     36      1.1  christos                 if (m == 1)
     37      1.1  christos                   goto incomplete;
     38      1.1  christos                 else /* m >= 2 */
     39      1.1  christos                   {
     40      1.1  christos                     unsigned char c2 = (unsigned char) p[1];
     41      1.1  christos 
     42      1.1  christos                     if ((c2 ^ 0x80) < 0x40)
     43      1.1  christos                       {
     44      1.1  christos                         if (pwc != NULL)
     45      1.1  christos                           *pwc = ((unsigned int) (c & 0x1f) << 6)
     46      1.1  christos                                  | (unsigned int) (c2 ^ 0x80);
     47      1.1  christos                         res = 2;
     48      1.1  christos                         goto success;
     49      1.1  christos                       }
     50      1.1  christos                   }
     51      1.1  christos               }
     52      1.1  christos             else if (c < 0xf0)
     53      1.1  christos               {
     54      1.1  christos                 if (m == 1)
     55      1.1  christos                   goto incomplete;
     56      1.1  christos                 else
     57      1.1  christos                   {
     58      1.1  christos                     unsigned char c2 = (unsigned char) p[1];
     59      1.1  christos 
     60      1.1  christos                     if ((c2 ^ 0x80) < 0x40
     61      1.1  christos                         && (c >= 0xe1 || c2 >= 0xa0)
     62      1.1  christos                         && (c != 0xed || c2 < 0xa0))
     63      1.1  christos                       {
     64      1.1  christos                         if (m == 2)
     65      1.1  christos                           goto incomplete;
     66      1.1  christos                         else /* m >= 3 */
     67      1.1  christos                           {
     68      1.1  christos                             unsigned char c3 = (unsigned char) p[2];
     69      1.1  christos 
     70      1.1  christos                             if ((c3 ^ 0x80) < 0x40)
     71      1.1  christos                               {
     72      1.1  christos                                 unsigned int wc =
     73      1.1  christos                                   (((unsigned int) (c & 0x0f) << 12)
     74      1.1  christos                                    | ((unsigned int) (c2 ^ 0x80) << 6)
     75      1.1  christos                                    | (unsigned int) (c3 ^ 0x80));
     76      1.1  christos 
     77      1.1  christos                                 if (FITS_IN_CHAR_TYPE (wc))
     78      1.1  christos                                   {
     79      1.1  christos                                     if (pwc != NULL)
     80      1.1  christos                                       *pwc = wc;
     81      1.1  christos                                     res = 3;
     82      1.1  christos                                     goto success;
     83      1.1  christos                                   }
     84      1.1  christos                               }
     85      1.1  christos                           }
     86      1.1  christos                       }
     87      1.1  christos                   }
     88      1.1  christos               }
     89      1.1  christos             else if (c <= 0xf4)
     90      1.1  christos               {
     91      1.1  christos                 if (m == 1)
     92      1.1  christos                   goto incomplete;
     93      1.1  christos                 else
     94      1.1  christos                   {
     95      1.1  christos                     unsigned char c2 = (unsigned char) p[1];
     96      1.1  christos 
     97      1.1  christos                     if ((c2 ^ 0x80) < 0x40
     98      1.1  christos                         && (c >= 0xf1 || c2 >= 0x90)
     99  1.1.1.2  christos                         && (c < 0xf4 || (/* c == 0xf4 && */ c2 < 0x90)))
    100      1.1  christos                       {
    101      1.1  christos                         if (m == 2)
    102      1.1  christos                           goto incomplete;
    103      1.1  christos                         else
    104      1.1  christos                           {
    105      1.1  christos                             unsigned char c3 = (unsigned char) p[2];
    106      1.1  christos 
    107      1.1  christos                             if ((c3 ^ 0x80) < 0x40)
    108      1.1  christos                               {
    109      1.1  christos                                 if (m == 3)
    110      1.1  christos                                   goto incomplete;
    111      1.1  christos                                 else /* m >= 4 */
    112      1.1  christos                                   {
    113      1.1  christos                                     unsigned char c4 = (unsigned char) p[3];
    114      1.1  christos 
    115      1.1  christos                                     if ((c4 ^ 0x80) < 0x40)
    116      1.1  christos                                       {
    117      1.1  christos                                         unsigned int wc =
    118      1.1  christos                                           (((unsigned int) (c & 0x07) << 18)
    119      1.1  christos                                            | ((unsigned int) (c2 ^ 0x80) << 12)
    120      1.1  christos                                            | ((unsigned int) (c3 ^ 0x80) << 6)
    121      1.1  christos                                            | (unsigned int) (c4 ^ 0x80));
    122      1.1  christos 
    123      1.1  christos                                         if (FITS_IN_CHAR_TYPE (wc))
    124      1.1  christos                                           {
    125      1.1  christos                                             if (pwc != NULL)
    126      1.1  christos                                               *pwc = wc;
    127      1.1  christos                                             res = 4;
    128      1.1  christos                                             goto success;
    129      1.1  christos                                           }
    130      1.1  christos                                       }
    131      1.1  christos                                   }
    132      1.1  christos                               }
    133      1.1  christos                           }
    134      1.1  christos                       }
    135      1.1  christos                   }
    136      1.1  christos               }
    137      1.1  christos           }
    138      1.1  christos         goto invalid;
    139