1/* $XTermId: xutf8.c,v 1.18 2020/06/23 22:45:51 tom Exp $ */ 2 3/* 4 * Copyright 2002-2019,2020 by Thomas E. Dickey 5 * Copyright (c) 2001 by Juliusz Chroboczek 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sublicense, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included in 16 * all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27#include <xterm.h> 28 29#include <X11/Xlib.h> 30#include <X11/Xatom.h> 31#include <X11/Xutil.h> 32#include <X11/Xmu/Xmu.h> 33 34#include <xutf8.h> 35 36#ifndef X_HAVE_UTF8_STRING 37 38#undef XA_UTF8_STRING 39#define KEYSYM2UCS_INCLUDED 40 41#include "keysym2ucs.c" 42 43Atom 44_xa_utf8_string(Display *dpy) 45{ 46 static AtomPtr p = NULL; 47 48 if (p == NULL) 49 p = XmuMakeAtom("UTF8_STRING"); 50 51 return XmuInternAtom(dpy, p); 52} 53#define XA_UTF8_STRING(dpy) _xa_utf8_string(dpy) 54 55static int 56utf8countBytes(int c) 57{ 58 if (c < 0) 59 return 0; 60 61 if (c <= 0x7F) { 62 return 1; 63 } else if (c <= 0x7FF) { 64 return 2; 65 } else if (c <= 0xFFFF) { 66 return 3; 67 } else 68 return 4; 69} 70 71static void 72utf8insert(char *dest, int c, size_t *len_return) 73{ 74 if (c < 0) 75 return; 76 77 if (c <= 0x7F) { 78 dest[0] = (char) c; 79 *len_return = 1; 80 } else if (c <= 0x7FF) { 81 dest[0] = (char) (0xC0 | ((c >> 6) & 0x1F)); 82 dest[1] = (char) (0x80 | (c & 0x3F)); 83 *len_return = 2; 84 } else if (c <= 0xFFFF) { 85 dest[0] = (char) (0xE0 | ((c >> 12) & 0x0F)); 86 dest[1] = (char) (0x80 | ((c >> 6) & 0x3F)); 87 dest[2] = (char) (0x80 | (c & 0x3F)); 88 *len_return = 3; 89 } else { 90 dest[0] = (char) (0xF0 | ((c >> 18) & 0x07)); 91 dest[1] = (char) (0x80 | ((c >> 12) & 0x3f)); 92 dest[2] = (char) (0x80 | ((c >> 6) & 0x3f)); 93 dest[3] = (char) (0x80 | (c & 0x3f)); 94 *len_return = 4; 95 } 96} 97 98static size_t 99l1countUtf8Bytes(char *s, size_t len) 100{ 101 size_t l = 0; 102 while (len != 0) { 103 if ((*s & 0x80) == 0) 104 l++; 105 else 106 l += 2; 107 s++; 108 len--; 109 } 110 return l; 111} 112 113static void 114l1utf8copy(char *d, char *s, size_t len) 115{ 116 size_t l; 117 while (len != 0) { 118 utf8insert(d, (*s) & 0xFF, &l); 119 d += (int) l; 120 s++; 121 len--; 122 } 123} 124 125static void 126utf8l1strcpy(char *d, char *s) 127{ 128#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0) 129 while (*s) { 130 if ((*s & 0x80) == 0) 131 *d++ = *s++; 132 else if ((*s & 0x7C) == 0x40) { 133 if ((s[1] & 0x80) == 0) { 134 s++; /* incorrect UTF-8 */ 135 continue; 136 } else if ((*s & 0x7C) == 0x40) { 137 *d++ = (char) (((*s & 0x03) << 6) | (s[1] & 0x3F)); 138 s += 2; 139 } else { 140 *d++ = BAD_ASCII; 141 SKIP; 142 } 143 } else { 144 *d++ = BAD_ASCII; 145 SKIP; 146 } 147 } 148 *d = 0; 149#undef SKIP 150} 151 152/* Keep this in sync with utf8l1strcpy! */ 153static int 154utf8l1strlen(char *s) 155{ 156#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0) 157 int len = 0; 158 while (*s) { 159 if ((*s & 0x80) == 0) { 160 s++; 161 len++; 162 } else if ((*s & 0x7C) == 0x40) { 163 if ((s[1] & 0x80) == 0) { 164 s++; 165 continue; 166 } else if ((*s & 0x7C) == 0x40) { 167 len++; 168 s += 2; 169 } else { 170 len++; 171 SKIP; 172 } 173 } else { 174 len++; 175 SKIP; 176 } 177 } 178#undef SKIP 179 return len; 180} 181 182int 183Xutf8TextPropertyToTextList(Display *dpy, 184 const XTextProperty * tp, 185 char ***list_return, 186 int *count_return) 187{ 188 int utf8; 189 char **list; 190 int nelements; 191 char *cp; 192 char *start; 193 size_t i; 194 int j; 195 size_t datalen = tp->nitems; 196 size_t len; 197 198 if (tp->format != 8) 199 return XConverterNotFound; 200 201 if (tp->encoding == XA_STRING) 202 utf8 = 0; 203 else if (tp->encoding == XA_UTF8_STRING(dpy)) 204 utf8 = 1; 205 else 206 return XConverterNotFound; 207 208 if (datalen == 0) { 209 *list_return = NULL; 210 *count_return = 0; 211 return 0; 212 } 213 214 nelements = 1; 215 for (cp = (char *) tp->value, i = datalen; i != 0; cp++, i--) { 216 if (*cp == '\0') 217 nelements++; 218 } 219 220 list = TypeMallocN(char *, (unsigned) nelements); 221 if (!list) 222 return XNoMemory; 223 224 if (utf8) 225 len = datalen; 226 else 227 len = l1countUtf8Bytes((char *) tp->value, datalen); 228 229 start = malloc(len + 1); 230 if (!start) { 231 free(list); 232 return XNoMemory; 233 } 234 235 if (utf8) 236 memcpy(start, (char *) tp->value, datalen); 237 else 238 l1utf8copy(start, (char *) tp->value, datalen); 239 start[len] = '\0'; 240 241 for (cp = start, i = len + 1, j = 0; i != 0; cp++, i--) { 242 if (*cp == '\0') { 243 list[j] = start; 244 start = (cp + 1); 245 j++; 246 } 247 } 248 249 list[j] = NULL; 250 *list_return = list; 251 *count_return = nelements; 252 return 0; 253} 254 255int 256Xutf8TextListToTextProperty(Display *dpy, 257 char **list, 258 int count, 259 XICCEncodingStyle style, 260 XTextProperty * text_prop) 261{ 262 XTextProperty proto; 263 unsigned int nbytes; 264 int i; 265 266 if (style != XStringStyle && 267 style != XCompoundTextStyle && 268 style != XStdICCTextStyle && 269 style != XUTF8StringStyle) 270 return XConverterNotFound; 271 272 if (style == XUTF8StringStyle) { 273 for (i = 0, nbytes = 0; i < count; i++) { 274 nbytes += (unsigned) ((list[i] ? strlen(list[i]) : 0) + 1); 275 } 276 } else { 277 for (i = 0, nbytes = 0; i < count; i++) { 278 nbytes += (unsigned) ((list[i] ? utf8l1strlen(list[i]) : 0) + 1); 279 } 280 } 281 282 if (style == XCompoundTextStyle) 283 proto.encoding = XA_COMPOUND_TEXT(dpy); 284 else if (style == XUTF8StringStyle) 285 proto.encoding = XA_UTF8_STRING(dpy); 286 else 287 proto.encoding = XA_STRING; 288 proto.format = 8; 289 if (nbytes) 290 proto.nitems = nbytes - 1; 291 else 292 proto.nitems = 0; 293 proto.value = NULL; 294 295 if (nbytes > 0) { 296 char *buf = TypeMallocN(char, nbytes); 297 if (!buf) 298 return XNoMemory; 299 300 proto.value = (unsigned char *) buf; 301 for (i = 0; i < count; i++) { 302 char *arg = list[i]; 303 304 if (arg) { 305 if (style == XUTF8StringStyle) { 306 strcpy(buf, arg); 307 } else { 308 utf8l1strcpy(buf, arg); 309 } 310 buf += (strlen(buf) + 1); 311 } else { 312 *buf++ = '\0'; 313 } 314 } 315 } else { 316 proto.value = CastMalloc(unsigned char); /* easier for client */ 317 if (!proto.value) 318 return XNoMemory; 319 320 proto.value[0] = '\0'; 321 } 322 323 *text_prop = proto; 324 return 0; 325} 326 327int 328Xutf8LookupString(XIC ic GCC_UNUSED, 329 XKeyEvent *ev, 330 char *buffer, 331 int nbytes, 332 KeySym * keysym_return, 333 Status * status_return) 334{ 335 int rc; 336 KeySym keysym; 337 int codepoint; 338 size_t len; 339 340 rc = XLookupString(ev, buffer, nbytes, &keysym, NULL); 341 342 if (rc > 0) { 343 codepoint = buffer[0] & 0xFF; 344 } else { 345 codepoint = keysym2ucs(keysym); 346 } 347 348 if (codepoint < 0) { 349 if (keysym == None) { 350 *status_return = XLookupNone; 351 } else { 352 *status_return = XLookupKeySym; 353 *keysym_return = keysym; 354 } 355 return 0; 356 } 357 358 if (nbytes < utf8countBytes(codepoint)) { 359 *status_return = XBufferOverflow; 360 return utf8countBytes(codepoint); 361 } 362 363 utf8insert(buffer, codepoint, &len); 364 365 if (keysym != None) { 366 *keysym_return = keysym; 367 *status_return = XLookupBoth; 368 } else { 369 *status_return = XLookupChars; 370 } 371 return (int) len; 372} 373 374#else /* X_HAVE_UTF8_STRING */ 375/* Silence the compiler */ 376void 377xutf8_dummy(void) 378{ 379 return; 380} 381#endif 382