Home | History | Annotate | Line # | Download | only in import
mbrtowc-impl-utf8.h revision 1.1
      1  1.1  christos /* Convert multibyte character to wide character.
      2  1.1  christos    Copyright (C) 1999-2002, 2005-2020 Free Software Foundation, Inc.
      3  1.1  christos 
      4  1.1  christos    This program is free software: you can redistribute it and/or modify
      5  1.1  christos    it under the terms of the GNU General Public License as published by
      6  1.1  christos    the Free Software Foundation; either version 3 of the License, or
      7  1.1  christos    (at your option) any later version.
      8  1.1  christos 
      9  1.1  christos    This program is distributed in the hope that it will be useful,
     10  1.1  christos    but WITHOUT ANY WARRANTY; without even the implied warranty of
     11  1.1  christos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12  1.1  christos    GNU General Public License for more details.
     13  1.1  christos 
     14  1.1  christos    You should have received a copy of the GNU General Public License
     15  1.1  christos    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
     16  1.1  christos 
     17  1.1  christos /* Written by Bruno Haible <bruno (at) clisp.org>, 2008.  */
     18  1.1  christos 
     19  1.1  christos /* This file contains the part of the body of the mbrtowc and mbrtoc32 functions
     20  1.1  christos    that handles the special case of the UTF-8 encoding.  */
     21  1.1  christos 
     22  1.1  christos         /* Cf. unistr/u8-mbtouc.c.  */
     23  1.1  christos         unsigned char c = (unsigned char) p[0];
     24  1.1  christos 
     25  1.1  christos         if (c < 0x80)
     26  1.1  christos           {
     27  1.1  christos             if (pwc != NULL)
     28  1.1  christos               *pwc = c;
     29  1.1  christos             res = (c == 0 ? 0 : 1);
     30  1.1  christos             goto success;
     31  1.1  christos           }
     32  1.1  christos         if (c >= 0xc2)
     33  1.1  christos           {
     34  1.1  christos             if (c < 0xe0)
     35  1.1  christos               {
     36  1.1  christos                 if (m == 1)
     37  1.1  christos                   goto incomplete;
     38  1.1  christos                 else /* m >= 2 */
     39  1.1  christos                   {
     40  1.1  christos                     unsigned char c2 = (unsigned char) p[1];
     41  1.1  christos 
     42  1.1  christos                     if ((c2 ^ 0x80) < 0x40)
     43  1.1  christos                       {
     44  1.1  christos                         if (pwc != NULL)
     45  1.1  christos                           *pwc = ((unsigned int) (c & 0x1f) << 6)
     46  1.1  christos                                  | (unsigned int) (c2 ^ 0x80);
     47  1.1  christos                         res = 2;
     48  1.1  christos                         goto success;
     49  1.1  christos                       }
     50  1.1  christos                   }
     51  1.1  christos               }
     52  1.1  christos             else if (c < 0xf0)
     53  1.1  christos               {
     54  1.1  christos                 if (m == 1)
     55  1.1  christos                   goto incomplete;
     56  1.1  christos                 else
     57  1.1  christos                   {
     58  1.1  christos                     unsigned char c2 = (unsigned char) p[1];
     59  1.1  christos 
     60  1.1  christos                     if ((c2 ^ 0x80) < 0x40
     61  1.1  christos                         && (c >= 0xe1 || c2 >= 0xa0)
     62  1.1  christos                         && (c != 0xed || c2 < 0xa0))
     63  1.1  christos                       {
     64  1.1  christos                         if (m == 2)
     65  1.1  christos                           goto incomplete;
     66  1.1  christos                         else /* m >= 3 */
     67  1.1  christos                           {
     68  1.1  christos                             unsigned char c3 = (unsigned char) p[2];
     69  1.1  christos 
     70  1.1  christos                             if ((c3 ^ 0x80) < 0x40)
     71  1.1  christos                               {
     72  1.1  christos                                 unsigned int wc =
     73  1.1  christos                                   (((unsigned int) (c & 0x0f) << 12)
     74  1.1  christos                                    | ((unsigned int) (c2 ^ 0x80) << 6)
     75  1.1  christos                                    | (unsigned int) (c3 ^ 0x80));
     76  1.1  christos 
     77  1.1  christos                                 if (FITS_IN_CHAR_TYPE (wc))
     78  1.1  christos                                   {
     79  1.1  christos                                     if (pwc != NULL)
     80  1.1  christos                                       *pwc = wc;
     81  1.1  christos                                     res = 3;
     82  1.1  christos                                     goto success;
     83  1.1  christos                                   }
     84  1.1  christos                               }
     85  1.1  christos                           }
     86  1.1  christos                       }
     87  1.1  christos                   }
     88  1.1  christos               }
     89  1.1  christos             else if (c <= 0xf4)
     90  1.1  christos               {
     91  1.1  christos                 if (m == 1)
     92  1.1  christos                   goto incomplete;
     93  1.1  christos                 else
     94  1.1  christos                   {
     95  1.1  christos                     unsigned char c2 = (unsigned char) p[1];
     96  1.1  christos 
     97  1.1  christos                     if ((c2 ^ 0x80) < 0x40
     98  1.1  christos                         && (c >= 0xf1 || c2 >= 0x90)
     99  1.1  christos                         && (c < 0xf4 || (c == 0xf4 && c2 < 0x90)))
    100  1.1  christos                       {
    101  1.1  christos                         if (m == 2)
    102  1.1  christos                           goto incomplete;
    103  1.1  christos                         else
    104  1.1  christos                           {
    105  1.1  christos                             unsigned char c3 = (unsigned char) p[2];
    106  1.1  christos 
    107  1.1  christos                             if ((c3 ^ 0x80) < 0x40)
    108  1.1  christos                               {
    109  1.1  christos                                 if (m == 3)
    110  1.1  christos                                   goto incomplete;
    111  1.1  christos                                 else /* m >= 4 */
    112  1.1  christos                                   {
    113  1.1  christos                                     unsigned char c4 = (unsigned char) p[3];
    114  1.1  christos 
    115  1.1  christos                                     if ((c4 ^ 0x80) < 0x40)
    116  1.1  christos                                       {
    117  1.1  christos                                         unsigned int wc =
    118  1.1  christos                                           (((unsigned int) (c & 0x07) << 18)
    119  1.1  christos                                            | ((unsigned int) (c2 ^ 0x80) << 12)
    120  1.1  christos                                            | ((unsigned int) (c3 ^ 0x80) << 6)
    121  1.1  christos                                            | (unsigned int) (c4 ^ 0x80));
    122  1.1  christos 
    123  1.1  christos                                         if (FITS_IN_CHAR_TYPE (wc))
    124  1.1  christos                                           {
    125  1.1  christos                                             if (pwc != NULL)
    126  1.1  christos                                               *pwc = wc;
    127  1.1  christos                                             res = 4;
    128  1.1  christos                                             goto success;
    129  1.1  christos                                           }
    130  1.1  christos                                       }
    131  1.1  christos                                   }
    132  1.1  christos                               }
    133  1.1  christos                           }
    134  1.1  christos                       }
    135  1.1  christos                   }
    136  1.1  christos               }
    137  1.1  christos           }
    138  1.1  christos         goto invalid;
    139