Home | History | Annotate | Line # | Download | only in dist
      1 /* $OpenBSD$ */
      2 
      3 /*
      4  * Copyright (c) 2023 Nicholas Marriott <nicholas.marriott (at) gmail.com>
      5  *
      6  * Permission to use, copy, modify, and distribute this software for any
      7  * purpose with or without fee is hereby granted, provided that the above
      8  * copyright notice and this permission notice appear in all copies.
      9  *
     10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
     11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
     12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
     13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     14  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
     15  * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
     16  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     17  */
     18 
     19 #include <sys/types.h>
     20 
     21 #include <stdlib.h>
     22 #include <string.h>
     23 #include <wchar.h>
     24 
     25 #include "tmux.h"
     26 
     27 enum hanguljamo_subclass {
     28 	HANGULJAMO_SUBCLASS_NOT_HANGULJAMO,
     29 	HANGULJAMO_SUBCLASS_CHOSEONG,			// U+1100 - U+1112
     30 	HANGULJAMO_SUBCLASS_OLD_CHOSEONG,		// U+1113 - U+115E
     31 	HANGULJAMO_SUBCLASS_CHOSEONG_FILLER,		// U+115F
     32 	HANGULJAMO_SUBCLASS_JUNGSEONG_FILLER,		// U+1160
     33 	HANGULJAMO_SUBCLASS_JUNGSEONG,			// U+1161 - U+1175
     34 	HANGULJAMO_SUBCLASS_OLD_JUNGSEONG,		// U+1176 - U+11A7
     35 	HANGULJAMO_SUBCLASS_JONGSEONG,			// U+11A8 - U+11C2
     36 	HANGULJAMO_SUBCLASS_OLD_JONGSEONG,		// U+11C3 - U+11FF
     37 	HANGULJAMO_SUBCLASS_EXTENDED_OLD_CHOSEONG,	// U+A960 - U+A97C
     38 	HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG,	// U+D7B0 - U+D7C6
     39 	HANGULJAMO_SUBCLASS_EXTENDED_OLD_JONGSEONG	// U+D7CB - U+D7FB
     40 };
     41 
     42 enum hanguljamo_class {
     43 	HANGULJAMO_CLASS_NOT_HANGULJAMO,
     44 	HANGULJAMO_CLASS_CHOSEONG,
     45 	HANGULJAMO_CLASS_JUNGSEONG,
     46 	HANGULJAMO_CLASS_JONGSEONG
     47 };
     48 
     49 /* Has this got a zero width joiner at the end? */
     50 int
     51 utf8_has_zwj(const struct utf8_data *ud)
     52 {
     53 	if (ud->size < 3)
     54 		return (0);
     55 	return (memcmp(ud->data + ud->size - 3, "\342\200\215", 3) == 0);
     56 }
     57 
     58 /* Is this zero width joiner U+200D? */
     59 int
     60 utf8_is_zwj(const struct utf8_data *ud)
     61 {
     62 	if (ud->size != 3)
     63 		return (0);
     64 	return (memcmp(ud->data, "\342\200\215", 3) == 0);
     65 }
     66 
     67 /* Is this variation selector U+FE0F? */
     68 int
     69 utf8_is_vs(const struct utf8_data *ud)
     70 {
     71 	if (ud->size != 3)
     72 		return (0);
     73 	return (memcmp(ud->data, "\357\270\217", 3) == 0);
     74 }
     75 
     76 /* Is this Hangul filler U+3164? */
     77 int
     78 utf8_is_hangul_filler(const struct utf8_data *ud)
     79 {
     80 	if (ud->size != 3)
     81 		return (0);
     82 	return (memcmp(ud->data, "\343\205\244", 3) == 0);
     83 }
     84 
     85 /* Should these two characters combine? */
     86 int
     87 utf8_should_combine(const struct utf8_data *with, const struct utf8_data *add)
     88 {
     89 	wchar_t	w, a;
     90 
     91 	if (utf8_towc(with, &w) != UTF8_DONE)
     92 		return (0);
     93 	if (utf8_towc(add, &a) != UTF8_DONE)
     94 		return (0);
     95 
     96 	/* Regional indicators. */
     97 	if ((a >= 0x1F1E6 && a <= 0x1F1FF) && (w >= 0x1F1E6 && w <= 0x1F1FF))
     98 		return (1);
     99 
    100 	/* Emoji skin tone modifiers. */
    101 	switch (a) {
    102 	case 0x1F44B:
    103 	case 0x1F44C:
    104 	case 0x1F44D:
    105 	case 0x1F44E:
    106 	case 0x1F44F:
    107 	case 0x1F450:
    108 	case 0x1F466:
    109 	case 0x1F467:
    110 	case 0x1F468:
    111 	case 0x1F469:
    112 	case 0x1F46E:
    113 	case 0x1F470:
    114 	case 0x1F471:
    115 	case 0x1F472:
    116 	case 0x1F473:
    117 	case 0x1F474:
    118 	case 0x1F475:
    119 	case 0x1F476:
    120 	case 0x1F477:
    121 	case 0x1F478:
    122 	case 0x1F47C:
    123 	case 0x1F481:
    124 	case 0x1F482:
    125 	case 0x1F483:
    126 	case 0x1F485:
    127 	case 0x1F486:
    128 	case 0x1F487:
    129 	case 0x1F4AA:
    130 	case 0x1F575:
    131 	case 0x1F57A:
    132 	case 0x1F590:
    133 	case 0x1F595:
    134 	case 0x1F596:
    135 	case 0x1F645:
    136 	case 0x1F646:
    137 	case 0x1F647:
    138 	case 0x1F64B:
    139 	case 0x1F64C:
    140 	case 0x1F64D:
    141 	case 0x1F64E:
    142 	case 0x1F64F:
    143 	case 0x1F6B4:
    144 	case 0x1F6B5:
    145 	case 0x1F6B6:
    146 	case 0x1F926:
    147 	case 0x1F937:
    148 	case 0x1F938:
    149 	case 0x1F939:
    150 	case 0x1F93D:
    151 	case 0x1F93E:
    152 	case 0x1F9B5:
    153 	case 0x1F9B6:
    154 	case 0x1F9B8:
    155 	case 0x1F9B9:
    156 	case 0x1F9CD:
    157 	case 0x1F9CE:
    158 	case 0x1F9CF:
    159 	case 0x1F9D1:
    160 	case 0x1F9D2:
    161 	case 0x1F9D3:
    162 	case 0x1F9D4:
    163 	case 0x1F9D5:
    164 	case 0x1F9D6:
    165 	case 0x1F9D7:
    166 	case 0x1F9D8:
    167 	case 0x1F9D9:
    168 	case 0x1F9DA:
    169 	case 0x1F9DB:
    170 	case 0x1F9DC:
    171 	case 0x1F9DD:
    172 	case 0x1F9DE:
    173 	case 0x1F9DF:
    174 		if (w >= 0x1F3FB && w <= 0x1F3FF)
    175 			return (1);
    176 		break;
    177 	}
    178 	return 0;
    179 }
    180 
    181 static enum hanguljamo_subclass
    182 hanguljamo_get_subclass(const u_char *s)
    183 {
    184 	switch (s[0]) {
    185 	case 0xE1:
    186 		switch (s[1]) {
    187 		case 0x84:
    188 			if (s[2] >= 0x80 && s[2] <= 0x92)
    189 				return (HANGULJAMO_SUBCLASS_CHOSEONG);
    190 			if (s[2] >= 0x93 && s[2] <= 0xBF)
    191 				return (HANGULJAMO_SUBCLASS_OLD_CHOSEONG);
    192 			break;
    193 		case 0x85:
    194 			if (s[2] == 0x9F)
    195 				return (HANGULJAMO_SUBCLASS_CHOSEONG_FILLER);
    196 			if (s[2] == 0xA0)
    197 				return (HANGULJAMO_SUBCLASS_JUNGSEONG_FILLER);
    198 			if (s[2] >= 0x80 && s[2] <= 0x9E)
    199 				return (HANGULJAMO_SUBCLASS_OLD_CHOSEONG);
    200 			if (s[2] >= 0xA1 && s[2] <= 0xB5)
    201 				return (HANGULJAMO_SUBCLASS_JUNGSEONG);
    202 			if (s[2] >= 0xB6 && s[2] <= 0xBF)
    203 				return (HANGULJAMO_SUBCLASS_OLD_JUNGSEONG);
    204 			break;
    205 		case 0x86:
    206 			if (s[2] >= 0x80 && s[2] <= 0xA7)
    207 				return (HANGULJAMO_SUBCLASS_OLD_JUNGSEONG);
    208 			if (s[2] >= 0xA8 && s[2] <= 0xBF)
    209 				return (HANGULJAMO_SUBCLASS_JONGSEONG);
    210 			break;
    211 		case 0x87:
    212 			if (s[2] >= 0x80 && s[2] <= 0x82)
    213 				return (HANGULJAMO_SUBCLASS_JONGSEONG);
    214 			if (s[2] >= 0x83 && s[2] <= 0xBF)
    215 				return (HANGULJAMO_SUBCLASS_OLD_JONGSEONG);
    216 			break;
    217 		}
    218 		break;
    219 	case 0xEA:
    220 		if (s[1] == 0xA5 && s[2] >= 0xA0 && s[2] <= 0xBC)
    221 			return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_CHOSEONG);
    222 		break;
    223 	case 0xED:
    224 		if (s[1] == 0x9E && s[2] >= 0xB0 && s[2] <= 0xBF)
    225 			return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG);
    226 		if (s[1] != 0x9F)
    227 			break;
    228 		if (s[2] >= 0x80 && s[2] <= 0x86)
    229 			return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG);
    230 		if (s[2] >= 0x8B && s[2] <= 0xBB)
    231 			return (HANGULJAMO_SUBCLASS_EXTENDED_OLD_JONGSEONG);
    232 		break;
    233 	}
    234 	return (HANGULJAMO_SUBCLASS_NOT_HANGULJAMO);
    235 }
    236 
    237 static enum hanguljamo_class
    238 hanguljamo_get_class(const u_char *s)
    239 {
    240 	switch (hanguljamo_get_subclass(s)) {
    241 	case HANGULJAMO_SUBCLASS_CHOSEONG:
    242 	case HANGULJAMO_SUBCLASS_CHOSEONG_FILLER:
    243 	case HANGULJAMO_SUBCLASS_OLD_CHOSEONG:
    244 	case HANGULJAMO_SUBCLASS_EXTENDED_OLD_CHOSEONG:
    245 		return (HANGULJAMO_CLASS_CHOSEONG);
    246 	case HANGULJAMO_SUBCLASS_JUNGSEONG:
    247 	case HANGULJAMO_SUBCLASS_JUNGSEONG_FILLER:
    248 	case HANGULJAMO_SUBCLASS_OLD_JUNGSEONG:
    249 	case HANGULJAMO_SUBCLASS_EXTENDED_OLD_JUNGSEONG:
    250 		return (HANGULJAMO_CLASS_JUNGSEONG);
    251 	case HANGULJAMO_SUBCLASS_JONGSEONG:
    252 	case HANGULJAMO_SUBCLASS_OLD_JONGSEONG:
    253 	case HANGULJAMO_SUBCLASS_EXTENDED_OLD_JONGSEONG:
    254 		return (HANGULJAMO_CLASS_JONGSEONG);
    255 	case HANGULJAMO_SUBCLASS_NOT_HANGULJAMO:
    256 		return (HANGULJAMO_CLASS_NOT_HANGULJAMO);
    257 	}
    258 	return (HANGULJAMO_CLASS_NOT_HANGULJAMO);
    259 }
    260 
    261 enum hanguljamo_state
    262 hanguljamo_check_state(const struct utf8_data *p_ud, const struct utf8_data *ud)
    263 {
    264 	const u_char	*s;
    265 
    266 	if (ud->size != 3)
    267 		return (HANGULJAMO_STATE_NOT_HANGULJAMO);
    268 
    269 	switch (hanguljamo_get_class(ud->data)) {
    270 	case HANGULJAMO_CLASS_CHOSEONG:
    271 		return (HANGULJAMO_STATE_CHOSEONG);
    272 	case HANGULJAMO_CLASS_JUNGSEONG:
    273 		if (p_ud->size < 3)
    274 			return (HANGULJAMO_STATE_NOT_COMPOSABLE);
    275 		s = p_ud->data + p_ud->size - 3;
    276 		if (hanguljamo_get_class(s) == HANGULJAMO_CLASS_CHOSEONG)
    277 			return (HANGULJAMO_STATE_COMPOSABLE);
    278 		return (HANGULJAMO_STATE_NOT_COMPOSABLE);
    279 	case HANGULJAMO_CLASS_JONGSEONG:
    280 		if (p_ud->size < 3)
    281 			return (HANGULJAMO_STATE_NOT_COMPOSABLE);
    282 		s = p_ud->data + p_ud->size - 3;
    283 		if (hanguljamo_get_class(s) == HANGULJAMO_CLASS_JUNGSEONG)
    284 			return (HANGULJAMO_STATE_COMPOSABLE);
    285 		return (HANGULJAMO_STATE_NOT_COMPOSABLE);
    286 	case HANGULJAMO_CLASS_NOT_HANGULJAMO:
    287 		return (HANGULJAMO_STATE_NOT_HANGULJAMO);
    288 	}
    289 	return (HANGULJAMO_STATE_NOT_HANGULJAMO);
    290 }
    291