Home | History | Annotate | Line # | Download | only in import
mbrtowc-impl-utf8.h revision 1.1
      1 /* Convert multibyte character to wide character.
      2    Copyright (C) 1999-2002, 2005-2020 Free Software Foundation, Inc.
      3 
      4    This program is free software: you can redistribute it and/or modify
      5    it under the terms of the GNU General Public License as published by
      6    the Free Software Foundation; either version 3 of the License, or
      7    (at your option) any later version.
      8 
      9    This program is distributed in the hope that it will be useful,
     10    but WITHOUT ANY WARRANTY; without even the implied warranty of
     11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12    GNU General Public License for more details.
     13 
     14    You should have received a copy of the GNU General Public License
     15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
     16 
     17 /* Written by Bruno Haible <bruno (at) clisp.org>, 2008.  */
     18 
     19 /* This file contains the part of the body of the mbrtowc and mbrtoc32 functions
     20    that handles the special case of the UTF-8 encoding.  */
     21 
     22         /* Cf. unistr/u8-mbtouc.c.  */
     23         unsigned char c = (unsigned char) p[0];
     24 
     25         if (c < 0x80)
     26           {
     27             if (pwc != NULL)
     28               *pwc = c;
     29             res = (c == 0 ? 0 : 1);
     30             goto success;
     31           }
     32         if (c >= 0xc2)
     33           {
     34             if (c < 0xe0)
     35               {
     36                 if (m == 1)
     37                   goto incomplete;
     38                 else /* m >= 2 */
     39                   {
     40                     unsigned char c2 = (unsigned char) p[1];
     41 
     42                     if ((c2 ^ 0x80) < 0x40)
     43                       {
     44                         if (pwc != NULL)
     45                           *pwc = ((unsigned int) (c & 0x1f) << 6)
     46                                  | (unsigned int) (c2 ^ 0x80);
     47                         res = 2;
     48                         goto success;
     49                       }
     50                   }
     51               }
     52             else if (c < 0xf0)
     53               {
     54                 if (m == 1)
     55                   goto incomplete;
     56                 else
     57                   {
     58                     unsigned char c2 = (unsigned char) p[1];
     59 
     60                     if ((c2 ^ 0x80) < 0x40
     61                         && (c >= 0xe1 || c2 >= 0xa0)
     62                         && (c != 0xed || c2 < 0xa0))
     63                       {
     64                         if (m == 2)
     65                           goto incomplete;
     66                         else /* m >= 3 */
     67                           {
     68                             unsigned char c3 = (unsigned char) p[2];
     69 
     70                             if ((c3 ^ 0x80) < 0x40)
     71                               {
     72                                 unsigned int wc =
     73                                   (((unsigned int) (c & 0x0f) << 12)
     74                                    | ((unsigned int) (c2 ^ 0x80) << 6)
     75                                    | (unsigned int) (c3 ^ 0x80));
     76 
     77                                 if (FITS_IN_CHAR_TYPE (wc))
     78                                   {
     79                                     if (pwc != NULL)
     80                                       *pwc = wc;
     81                                     res = 3;
     82                                     goto success;
     83                                   }
     84                               }
     85                           }
     86                       }
     87                   }
     88               }
     89             else if (c <= 0xf4)
     90               {
     91                 if (m == 1)
     92                   goto incomplete;
     93                 else
     94                   {
     95                     unsigned char c2 = (unsigned char) p[1];
     96 
     97                     if ((c2 ^ 0x80) < 0x40
     98                         && (c >= 0xf1 || c2 >= 0x90)
     99                         && (c < 0xf4 || (c == 0xf4 && c2 < 0x90)))
    100                       {
    101                         if (m == 2)
    102                           goto incomplete;
    103                         else
    104                           {
    105                             unsigned char c3 = (unsigned char) p[2];
    106 
    107                             if ((c3 ^ 0x80) < 0x40)
    108                               {
    109                                 if (m == 3)
    110                                   goto incomplete;
    111                                 else /* m >= 4 */
    112                                   {
    113                                     unsigned char c4 = (unsigned char) p[3];
    114 
    115                                     if ((c4 ^ 0x80) < 0x40)
    116                                       {
    117                                         unsigned int wc =
    118                                           (((unsigned int) (c & 0x07) << 18)
    119                                            | ((unsigned int) (c2 ^ 0x80) << 12)
    120                                            | ((unsigned int) (c3 ^ 0x80) << 6)
    121                                            | (unsigned int) (c4 ^ 0x80));
    122 
    123                                         if (FITS_IN_CHAR_TYPE (wc))
    124                                           {
    125                                             if (pwc != NULL)
    126                                               *pwc = wc;
    127                                             res = 4;
    128                                             goto success;
    129                                           }
    130                                       }
    131                                   }
    132                               }
    133                           }
    134                       }
    135                   }
    136               }
    137           }
    138         goto invalid;
    139