1 1.1 christos /* Convert multibyte character to wide character. 2 1.1.1.2 christos Copyright (C) 1999-2002, 2005-2022 Free Software Foundation, Inc. 3 1.1 christos 4 1.1.1.2 christos This file is free software: you can redistribute it and/or modify 5 1.1.1.2 christos it under the terms of the GNU Lesser General Public License as 6 1.1.1.2 christos published by the Free Software Foundation; either version 2.1 of the 7 1.1.1.2 christos License, or (at your option) any later version. 8 1.1 christos 9 1.1.1.2 christos This file is distributed in the hope that it will be useful, 10 1.1 christos but WITHOUT ANY WARRANTY; without even the implied warranty of 11 1.1 christos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 1.1.1.2 christos GNU Lesser General Public License for more details. 13 1.1 christos 14 1.1.1.2 christos You should have received a copy of the GNU Lesser General Public License 15 1.1 christos along with this program. If not, see <https://www.gnu.org/licenses/>. */ 16 1.1 christos 17 1.1 christos /* Written by Bruno Haible <bruno (at) clisp.org>, 2008. */ 18 1.1 christos 19 1.1 christos /* This file contains the part of the body of the mbrtowc and mbrtoc32 functions 20 1.1 christos that handles the special case of the UTF-8 encoding. */ 21 1.1 christos 22 1.1 christos /* Cf. unistr/u8-mbtouc.c. */ 23 1.1 christos unsigned char c = (unsigned char) p[0]; 24 1.1 christos 25 1.1 christos if (c < 0x80) 26 1.1 christos { 27 1.1 christos if (pwc != NULL) 28 1.1 christos *pwc = c; 29 1.1 christos res = (c == 0 ? 0 : 1); 30 1.1 christos goto success; 31 1.1 christos } 32 1.1 christos if (c >= 0xc2) 33 1.1 christos { 34 1.1 christos if (c < 0xe0) 35 1.1 christos { 36 1.1 christos if (m == 1) 37 1.1 christos goto incomplete; 38 1.1 christos else /* m >= 2 */ 39 1.1 christos { 40 1.1 christos unsigned char c2 = (unsigned char) p[1]; 41 1.1 christos 42 1.1 christos if ((c2 ^ 0x80) < 0x40) 43 1.1 christos { 44 1.1 christos if (pwc != NULL) 45 1.1 christos *pwc = ((unsigned int) (c & 0x1f) << 6) 46 1.1 christos | (unsigned int) (c2 ^ 0x80); 47 1.1 christos res = 2; 48 1.1 christos goto success; 49 1.1 christos } 50 1.1 christos } 51 1.1 christos } 52 1.1 christos else if (c < 0xf0) 53 1.1 christos { 54 1.1 christos if (m == 1) 55 1.1 christos goto incomplete; 56 1.1 christos else 57 1.1 christos { 58 1.1 christos unsigned char c2 = (unsigned char) p[1]; 59 1.1 christos 60 1.1 christos if ((c2 ^ 0x80) < 0x40 61 1.1 christos && (c >= 0xe1 || c2 >= 0xa0) 62 1.1 christos && (c != 0xed || c2 < 0xa0)) 63 1.1 christos { 64 1.1 christos if (m == 2) 65 1.1 christos goto incomplete; 66 1.1 christos else /* m >= 3 */ 67 1.1 christos { 68 1.1 christos unsigned char c3 = (unsigned char) p[2]; 69 1.1 christos 70 1.1 christos if ((c3 ^ 0x80) < 0x40) 71 1.1 christos { 72 1.1 christos unsigned int wc = 73 1.1 christos (((unsigned int) (c & 0x0f) << 12) 74 1.1 christos | ((unsigned int) (c2 ^ 0x80) << 6) 75 1.1 christos | (unsigned int) (c3 ^ 0x80)); 76 1.1 christos 77 1.1 christos if (FITS_IN_CHAR_TYPE (wc)) 78 1.1 christos { 79 1.1 christos if (pwc != NULL) 80 1.1 christos *pwc = wc; 81 1.1 christos res = 3; 82 1.1 christos goto success; 83 1.1 christos } 84 1.1 christos } 85 1.1 christos } 86 1.1 christos } 87 1.1 christos } 88 1.1 christos } 89 1.1 christos else if (c <= 0xf4) 90 1.1 christos { 91 1.1 christos if (m == 1) 92 1.1 christos goto incomplete; 93 1.1 christos else 94 1.1 christos { 95 1.1 christos unsigned char c2 = (unsigned char) p[1]; 96 1.1 christos 97 1.1 christos if ((c2 ^ 0x80) < 0x40 98 1.1 christos && (c >= 0xf1 || c2 >= 0x90) 99 1.1.1.2 christos && (c < 0xf4 || (/* c == 0xf4 && */ c2 < 0x90))) 100 1.1 christos { 101 1.1 christos if (m == 2) 102 1.1 christos goto incomplete; 103 1.1 christos else 104 1.1 christos { 105 1.1 christos unsigned char c3 = (unsigned char) p[2]; 106 1.1 christos 107 1.1 christos if ((c3 ^ 0x80) < 0x40) 108 1.1 christos { 109 1.1 christos if (m == 3) 110 1.1 christos goto incomplete; 111 1.1 christos else /* m >= 4 */ 112 1.1 christos { 113 1.1 christos unsigned char c4 = (unsigned char) p[3]; 114 1.1 christos 115 1.1 christos if ((c4 ^ 0x80) < 0x40) 116 1.1 christos { 117 1.1 christos unsigned int wc = 118 1.1 christos (((unsigned int) (c & 0x07) << 18) 119 1.1 christos | ((unsigned int) (c2 ^ 0x80) << 12) 120 1.1 christos | ((unsigned int) (c3 ^ 0x80) << 6) 121 1.1 christos | (unsigned int) (c4 ^ 0x80)); 122 1.1 christos 123 1.1 christos if (FITS_IN_CHAR_TYPE (wc)) 124 1.1 christos { 125 1.1 christos if (pwc != NULL) 126 1.1 christos *pwc = wc; 127 1.1 christos res = 4; 128 1.1 christos goto success; 129 1.1 christos } 130 1.1 christos } 131 1.1 christos } 132 1.1 christos } 133 1.1 christos } 134 1.1 christos } 135 1.1 christos } 136 1.1 christos } 137 1.1 christos } 138 1.1 christos goto invalid; 139