1 /* $OpenBSD$ */ 2 3 /* 4 * Copyright (c) 2023 Nicholas Marriott <nicholas.marriott (at) gmail.com> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER 15 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 16 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include <sys/types.h> 20 21 #include <stdlib.h> 22 #include <string.h> 23 #include <wchar.h> 24 25 #include "tmux.h" 26 27 enum hanguljamo_subclass { 28 HANGULJAMO_SUBCLASS_NOT_HANGULJAMO, 29 HANGULJAMO_SUBCLASS_CHOSEONG, // U+1100 - U+1112 30 HANGULJAMO_SUBCLASS_OLD_CHOSEONG, // U+1113 - U+115E 31 HANGULJAMO_SUBCLASS_CHOSEONG_FILLER, // U+115F 32 HANGULJAMO_SUBCLASS_JUNGSEONG_FILLER, // U+1160 33 HANGULJAMO_SUBCLASS_JUNGSEONG, // U+1161 - U+1175 34 HANGULJAMO_SUBCLASS_OLD_JUNGSEONG, // U+1176 - U+11A7 35 HANGULJAMO_SUBCLASS_JONGSEONG, // U+11A8 - U+11C2 36 HANGULJAMO_SUBCLASS_OLD_JONGSEONG, // U+11C3 - U+11FF 37 HANGULJAMO_SUBCLASS_EXTENDED_OLD_CHOSEONG, // U+A960 - U+A97C 38 HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG, // U+D7B0 - U+D7C6 39 HANGULJAMO_SUBCLASS_EXTENDED_OLD_JONGSEONG // U+D7CB - U+D7FB 40 }; 41 42 enum hanguljamo_class { 43 HANGULJAMO_CLASS_NOT_HANGULJAMO, 44 HANGULJAMO_CLASS_CHOSEONG, 45 HANGULJAMO_CLASS_JUNGSEONG, 46 HANGULJAMO_CLASS_JONGSEONG 47 }; 48 49 /* Has this got a zero width joiner at the end? */ 50 int 51 utf8_has_zwj(const struct utf8_data *ud) 52 { 53 if (ud->size < 3) 54 return (0); 55 return (memcmp(ud->data + ud->size - 3, "\342\200\215", 3) == 0); 56 } 57 58 /* Is this zero width joiner U+200D? */ 59 int 60 utf8_is_zwj(const struct utf8_data *ud) 61 { 62 if (ud->size != 3) 63 return (0); 64 return (memcmp(ud->data, "\342\200\215", 3) == 0); 65 } 66 67 /* Is this variation selector U+FE0F? */ 68 int 69 utf8_is_vs(const struct utf8_data *ud) 70 { 71 if (ud->size != 3) 72 return (0); 73 return (memcmp(ud->data, "\357\270\217", 3) == 0); 74 } 75 76 /* Is this Hangul filler U+3164? */ 77 int 78 utf8_is_hangul_filler(const struct utf8_data *ud) 79 { 80 if (ud->size != 3) 81 return (0); 82 return (memcmp(ud->data, "\343\205\244", 3) == 0); 83 } 84 85 /* Should these two characters combine? */ 86 int 87 utf8_should_combine(const struct utf8_data *with, const struct utf8_data *add) 88 { 89 wchar_t w, a; 90 91 if (utf8_towc(with, &w) != UTF8_DONE) 92 return (0); 93 if (utf8_towc(add, &a) != UTF8_DONE) 94 return (0); 95 96 /* Regional indicators. */ 97 if ((a >= 0x1F1E6 && a <= 0x1F1FF) && (w >= 0x1F1E6 && w <= 0x1F1FF)) 98 return (1); 99 100 /* Emoji skin tone modifiers. */ 101 switch (a) { 102 case 0x1F44B: 103 case 0x1F44C: 104 case 0x1F44D: 105 case 0x1F44E: 106 case 0x1F44F: 107 case 0x1F450: 108 case 0x1F466: 109 case 0x1F467: 110 case 0x1F468: 111 case 0x1F469: 112 case 0x1F46E: 113 case 0x1F470: 114 case 0x1F471: 115 case 0x1F472: 116 case 0x1F473: 117 case 0x1F474: 118 case 0x1F475: 119 case 0x1F476: 120 case 0x1F477: 121 case 0x1F478: 122 case 0x1F47C: 123 case 0x1F481: 124 case 0x1F482: 125 case 0x1F483: 126 case 0x1F485: 127 case 0x1F486: 128 case 0x1F487: 129 case 0x1F4AA: 130 case 0x1F575: 131 case 0x1F57A: 132 case 0x1F590: 133 case 0x1F595: 134 case 0x1F596: 135 case 0x1F645: 136 case 0x1F646: 137 case 0x1F647: 138 case 0x1F64B: 139 case 0x1F64C: 140 case 0x1F64D: 141 case 0x1F64E: 142 case 0x1F64F: 143 case 0x1F6B4: 144 case 0x1F6B5: 145 case 0x1F6B6: 146 case 0x1F926: 147 case 0x1F937: 148 case 0x1F938: 149 case 0x1F939: 150 case 0x1F93D: 151 case 0x1F93E: 152 case 0x1F9B5: 153 case 0x1F9B6: 154 case 0x1F9B8: 155 case 0x1F9B9: 156 case 0x1F9CD: 157 case 0x1F9CE: 158 case 0x1F9CF: 159 case 0x1F9D1: 160 case 0x1F9D2: 161 case 0x1F9D3: 162 case 0x1F9D4: 163 case 0x1F9D5: 164 case 0x1F9D6: 165 case 0x1F9D7: 166 case 0x1F9D8: 167 case 0x1F9D9: 168 case 0x1F9DA: 169 case 0x1F9DB: 170 case 0x1F9DC: 171 case 0x1F9DD: 172 case 0x1F9DE: 173 case 0x1F9DF: 174 if (w >= 0x1F3FB && w <= 0x1F3FF) 175 return (1); 176 break; 177 } 178 return 0; 179 } 180 181 static enum hanguljamo_subclass 182 hanguljamo_get_subclass(const u_char *s) 183 { 184 switch (s[0]) { 185 case 0xE1: 186 switch (s[1]) { 187 case 0x84: 188 if (s[2] >= 0x80 && s[2] <= 0x92) 189 return (HANGULJAMO_SUBCLASS_CHOSEONG); 190 if (s[2] >= 0x93 && s[2] <= 0xBF) 191 return (HANGULJAMO_SUBCLASS_OLD_CHOSEONG); 192 break; 193 case 0x85: 194 if (s[2] == 0x9F) 195 return (HANGULJAMO_SUBCLASS_CHOSEONG_FILLER); 196 if (s[2] == 0xA0) 197 return (HANGULJAMO_SUBCLASS_JUNGSEONG_FILLER); 198 if (s[2] >= 0x80 && s[2] <= 0x9E) 199 return (HANGULJAMO_SUBCLASS_OLD_CHOSEONG); 200 if (s[2] >= 0xA1 && s[2] <= 0xB5) 201 return (HANGULJAMO_SUBCLASS_JUNGSEONG); 202 if (s[2] >= 0xB6 && s[2] <= 0xBF) 203 return (HANGULJAMO_SUBCLASS_OLD_JUNGSEONG); 204 break; 205 case 0x86: 206 if (s[2] >= 0x80 && s[2] <= 0xA7) 207 return (HANGULJAMO_SUBCLASS_OLD_JUNGSEONG); 208 if (s[2] >= 0xA8 && s[2] <= 0xBF) 209 return (HANGULJAMO_SUBCLASS_JONGSEONG); 210 break; 211 case 0x87: 212 if (s[2] >= 0x80 && s[2] <= 0x82) 213 return (HANGULJAMO_SUBCLASS_JONGSEONG); 214 if (s[2] >= 0x83 && s[2] <= 0xBF) 215 return (HANGULJAMO_SUBCLASS_OLD_JONGSEONG); 216 break; 217 } 218 break; 219 case 0xEA: 220 if (s[1] == 0xA5 && s[2] >= 0xA0 && s[2] <= 0xBC) 221 return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_CHOSEONG); 222 break; 223 case 0xED: 224 if (s[1] == 0x9E && s[2] >= 0xB0 && s[2] <= 0xBF) 225 return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG); 226 if (s[1] != 0x9F) 227 break; 228 if (s[2] >= 0x80 && s[2] <= 0x86) 229 return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG); 230 if (s[2] >= 0x8B && s[2] <= 0xBB) 231 return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_JONGSEONG); 232 break; 233 } 234 return (HANGULJAMO_SUBCLASS_NOT_HANGULJAMO); 235 } 236 237 static enum hanguljamo_class 238 hanguljamo_get_class(const u_char *s) 239 { 240 switch (hanguljamo_get_subclass(s)) { 241 case HANGULJAMO_SUBCLASS_CHOSEONG: 242 case HANGULJAMO_SUBCLASS_CHOSEONG_FILLER: 243 case HANGULJAMO_SUBCLASS_OLD_CHOSEONG: 244 case HANGULJAMO_SUBCLASS_EXTENDED_OLD_CHOSEONG: 245 return (HANGULJAMO_CLASS_CHOSEONG); 246 case HANGULJAMO_SUBCLASS_JUNGSEONG: 247 case HANGULJAMO_SUBCLASS_JUNGSEONG_FILLER: 248 case HANGULJAMO_SUBCLASS_OLD_JUNGSEONG: 249 case HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG: 250 return (HANGULJAMO_CLASS_JUNGSEONG); 251 case HANGULJAMO_SUBCLASS_JONGSEONG: 252 case HANGULJAMO_SUBCLASS_OLD_JONGSEONG: 253 case HANGULJAMO_SUBCLASS_EXTENDED_OLD_JONGSEONG: 254 return (HANGULJAMO_CLASS_JONGSEONG); 255 case HANGULJAMO_SUBCLASS_NOT_HANGULJAMO: 256 return (HANGULJAMO_CLASS_NOT_HANGULJAMO); 257 } 258 return (HANGULJAMO_CLASS_NOT_HANGULJAMO); 259 } 260 261 enum hanguljamo_state 262 hanguljamo_check_state(const struct utf8_data *p_ud, const struct utf8_data *ud) 263 { 264 const u_char *s; 265 266 if (ud->size != 3) 267 return (HANGULJAMO_STATE_NOT_HANGULJAMO); 268 269 switch (hanguljamo_get_class(ud->data)) { 270 case HANGULJAMO_CLASS_CHOSEONG: 271 return (HANGULJAMO_STATE_CHOSEONG); 272 case HANGULJAMO_CLASS_JUNGSEONG: 273 if (p_ud->size < 3) 274 return (HANGULJAMO_STATE_NOT_COMPOSABLE); 275 s = p_ud->data + p_ud->size - 3; 276 if (hanguljamo_get_class(s) == HANGULJAMO_CLASS_CHOSEONG) 277 return (HANGULJAMO_STATE_COMPOSABLE); 278 return (HANGULJAMO_STATE_NOT_COMPOSABLE); 279 case HANGULJAMO_CLASS_JONGSEONG: 280 if (p_ud->size < 3) 281 return (HANGULJAMO_STATE_NOT_COMPOSABLE); 282 s = p_ud->data + p_ud->size - 3; 283 if (hanguljamo_get_class(s) == HANGULJAMO_CLASS_JUNGSEONG) 284 return (HANGULJAMO_STATE_COMPOSABLE); 285 return (HANGULJAMO_STATE_NOT_COMPOSABLE); 286 case HANGULJAMO_CLASS_NOT_HANGULJAMO: 287 return (HANGULJAMO_STATE_NOT_HANGULJAMO); 288 } 289 return (HANGULJAMO_STATE_NOT_HANGULJAMO); 290 } 291