xutf8.c revision f2e35a3a
1f2e35a3aSmrg/* $XTermId: xutf8.c,v 1.18 2020/06/23 22:45:51 tom Exp $ */ 2d522f475Smrg 3d522f475Smrg/* 4f2e35a3aSmrg * Copyright 2002-2019,2020 by Thomas E. Dickey 50bd37d32Smrg * Copyright (c) 2001 by Juliusz Chroboczek 60bd37d32Smrg * 70bd37d32Smrg * Permission is hereby granted, free of charge, to any person obtaining a 80bd37d32Smrg * copy of this software and associated documentation files (the 90bd37d32Smrg * "Software"), to deal in the Software without restriction, including 100bd37d32Smrg * without limitation the rights to use, copy, modify, merge, publish, 110bd37d32Smrg * distribute, sublicense, and/or sell copies of the Software, and to 120bd37d32Smrg * permit persons to whom the Software is furnished to do so, subject to 130bd37d32Smrg * the following conditions: 140bd37d32Smrg * 150bd37d32Smrg * The above copyright notice and this permission notice shall be included in 160bd37d32Smrg * all copies or substantial portions of the Software. 170bd37d32Smrg * 180bd37d32Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 190bd37d32Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 200bd37d32Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 210bd37d32Smrg * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 220bd37d32Smrg * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 230bd37d32Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 240bd37d32Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 250bd37d32Smrg */ 26d522f475Smrg 27d522f475Smrg#include <xterm.h> 28d522f475Smrg 29d522f475Smrg#include <X11/Xlib.h> 30d522f475Smrg#include <X11/Xatom.h> 31d522f475Smrg#include <X11/Xutil.h> 32d522f475Smrg#include <X11/Xmu/Xmu.h> 33d522f475Smrg 34d522f475Smrg#include <xutf8.h> 35d522f475Smrg 36d522f475Smrg#ifndef X_HAVE_UTF8_STRING 37d522f475Smrg 38d522f475Smrg#undef XA_UTF8_STRING 39d522f475Smrg#define KEYSYM2UCS_INCLUDED 40d522f475Smrg 41d522f475Smrg#include "keysym2ucs.c" 42d522f475Smrg 43d522f475SmrgAtom 44913cc679Smrg_xa_utf8_string(Display *dpy) 45d522f475Smrg{ 46d522f475Smrg static AtomPtr p = NULL; 47d522f475Smrg 48d522f475Smrg if (p == NULL) 49d522f475Smrg p = XmuMakeAtom("UTF8_STRING"); 50d522f475Smrg 51d522f475Smrg return XmuInternAtom(dpy, p); 52d522f475Smrg} 53d522f475Smrg#define XA_UTF8_STRING(dpy) _xa_utf8_string(dpy) 54d522f475Smrg 55d522f475Smrgstatic int 56d522f475Smrgutf8countBytes(int c) 57d522f475Smrg{ 58d522f475Smrg if (c < 0) 59d522f475Smrg return 0; 60d522f475Smrg 61d522f475Smrg if (c <= 0x7F) { 62d522f475Smrg return 1; 63d522f475Smrg } else if (c <= 0x7FF) { 64d522f475Smrg return 2; 65d522f475Smrg } else if (c <= 0xFFFF) { 66d522f475Smrg return 3; 67d522f475Smrg } else 68d522f475Smrg return 4; 69d522f475Smrg} 70d522f475Smrg 71d522f475Smrgstatic void 720bd37d32Smrgutf8insert(char *dest, int c, size_t *len_return) 73d522f475Smrg{ 74d522f475Smrg if (c < 0) 75d522f475Smrg return; 76d522f475Smrg 77d522f475Smrg if (c <= 0x7F) { 78e39b573cSmrg dest[0] = (char) c; 79d522f475Smrg *len_return = 1; 80d522f475Smrg } else if (c <= 0x7FF) { 81e39b573cSmrg dest[0] = (char) (0xC0 | ((c >> 6) & 0x1F)); 82e39b573cSmrg dest[1] = (char) (0x80 | (c & 0x3F)); 83d522f475Smrg *len_return = 2; 84d522f475Smrg } else if (c <= 0xFFFF) { 85e39b573cSmrg dest[0] = (char) (0xE0 | ((c >> 12) & 0x0F)); 86e39b573cSmrg dest[1] = (char) (0x80 | ((c >> 6) & 0x3F)); 87e39b573cSmrg dest[2] = (char) (0x80 | (c & 0x3F)); 88d522f475Smrg *len_return = 3; 89d522f475Smrg } else { 90e39b573cSmrg dest[0] = (char) (0xF0 | ((c >> 18) & 0x07)); 91e39b573cSmrg dest[1] = (char) (0x80 | ((c >> 12) & 0x3f)); 92e39b573cSmrg dest[2] = (char) (0x80 | ((c >> 6) & 0x3f)); 93e39b573cSmrg dest[3] = (char) (0x80 | (c & 0x3f)); 94d522f475Smrg *len_return = 4; 95d522f475Smrg } 96d522f475Smrg} 97d522f475Smrg 98913cc679Smrgstatic size_t 990bd37d32Smrgl1countUtf8Bytes(char *s, size_t len) 100d522f475Smrg{ 101913cc679Smrg size_t l = 0; 1020bd37d32Smrg while (len != 0) { 103d522f475Smrg if ((*s & 0x80) == 0) 104d522f475Smrg l++; 105d522f475Smrg else 106d522f475Smrg l += 2; 107d522f475Smrg s++; 108d522f475Smrg len--; 109d522f475Smrg } 110d522f475Smrg return l; 111d522f475Smrg} 112d522f475Smrg 113d522f475Smrgstatic void 1140bd37d32Smrgl1utf8copy(char *d, char *s, size_t len) 115d522f475Smrg{ 1160bd37d32Smrg size_t l; 1170bd37d32Smrg while (len != 0) { 118d522f475Smrg utf8insert(d, (*s) & 0xFF, &l); 1190bd37d32Smrg d += (int) l; 120d522f475Smrg s++; 121d522f475Smrg len--; 122d522f475Smrg } 123d522f475Smrg} 124d522f475Smrg 125d522f475Smrgstatic void 126d522f475Smrgutf8l1strcpy(char *d, char *s) 127d522f475Smrg{ 128d522f475Smrg#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0) 129d522f475Smrg while (*s) { 130d522f475Smrg if ((*s & 0x80) == 0) 131d522f475Smrg *d++ = *s++; 132d522f475Smrg else if ((*s & 0x7C) == 0x40) { 133d522f475Smrg if ((s[1] & 0x80) == 0) { 134d522f475Smrg s++; /* incorrect UTF-8 */ 135d522f475Smrg continue; 136d522f475Smrg } else if ((*s & 0x7C) == 0x40) { 137e39b573cSmrg *d++ = (char) (((*s & 0x03) << 6) | (s[1] & 0x3F)); 138d522f475Smrg s += 2; 139d522f475Smrg } else { 140f2e35a3aSmrg *d++ = BAD_ASCII; 141d522f475Smrg SKIP; 142d522f475Smrg } 143d522f475Smrg } else { 144f2e35a3aSmrg *d++ = BAD_ASCII; 145d522f475Smrg SKIP; 146d522f475Smrg } 147d522f475Smrg } 148d522f475Smrg *d = 0; 149d522f475Smrg#undef SKIP 150d522f475Smrg} 151d522f475Smrg 152d522f475Smrg/* Keep this in sync with utf8l1strcpy! */ 153d522f475Smrgstatic int 154d522f475Smrgutf8l1strlen(char *s) 155d522f475Smrg{ 156d522f475Smrg#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0) 157d522f475Smrg int len = 0; 158d522f475Smrg while (*s) { 159d522f475Smrg if ((*s & 0x80) == 0) { 160d522f475Smrg s++; 161d522f475Smrg len++; 162d522f475Smrg } else if ((*s & 0x7C) == 0x40) { 163d522f475Smrg if ((s[1] & 0x80) == 0) { 164d522f475Smrg s++; 165d522f475Smrg continue; 166d522f475Smrg } else if ((*s & 0x7C) == 0x40) { 167d522f475Smrg len++; 168d522f475Smrg s += 2; 169d522f475Smrg } else { 170d522f475Smrg len++; 171d522f475Smrg SKIP; 172d522f475Smrg } 173d522f475Smrg } else { 174d522f475Smrg len++; 175d522f475Smrg SKIP; 176d522f475Smrg } 177d522f475Smrg } 178d522f475Smrg#undef SKIP 179d522f475Smrg return len; 180d522f475Smrg} 181d522f475Smrg 182d522f475Smrgint 183913cc679SmrgXutf8TextPropertyToTextList(Display *dpy, 184d522f475Smrg const XTextProperty * tp, 185d522f475Smrg char ***list_return, 186d522f475Smrg int *count_return) 187d522f475Smrg{ 188d522f475Smrg int utf8; 189d522f475Smrg char **list; 190d522f475Smrg int nelements; 191d522f475Smrg char *cp; 192d522f475Smrg char *start; 19320d2c4d2Smrg size_t i; 19420d2c4d2Smrg int j; 19520d2c4d2Smrg size_t datalen = tp->nitems; 19620d2c4d2Smrg size_t len; 197d522f475Smrg 198d522f475Smrg if (tp->format != 8) 199d522f475Smrg return XConverterNotFound; 200d522f475Smrg 201d522f475Smrg if (tp->encoding == XA_STRING) 202d522f475Smrg utf8 = 0; 203d522f475Smrg else if (tp->encoding == XA_UTF8_STRING(dpy)) 204d522f475Smrg utf8 = 1; 205d522f475Smrg else 206d522f475Smrg return XConverterNotFound; 207d522f475Smrg 208d522f475Smrg if (datalen == 0) { 209d522f475Smrg *list_return = NULL; 210d522f475Smrg *count_return = 0; 211d522f475Smrg return 0; 212d522f475Smrg } 213d522f475Smrg 214d522f475Smrg nelements = 1; 21520d2c4d2Smrg for (cp = (char *) tp->value, i = datalen; i != 0; cp++, i--) { 216d522f475Smrg if (*cp == '\0') 217d522f475Smrg nelements++; 218d522f475Smrg } 219d522f475Smrg 22020d2c4d2Smrg list = TypeMallocN(char *, (unsigned) nelements); 221d522f475Smrg if (!list) 222d522f475Smrg return XNoMemory; 223d522f475Smrg 224d522f475Smrg if (utf8) 225d522f475Smrg len = datalen; 226d522f475Smrg else 227d522f475Smrg len = l1countUtf8Bytes((char *) tp->value, datalen); 228d522f475Smrg 229f2e35a3aSmrg start = malloc(len + 1); 230d522f475Smrg if (!start) { 231d522f475Smrg free(list); 232d522f475Smrg return XNoMemory; 233d522f475Smrg } 234d522f475Smrg 235d522f475Smrg if (utf8) 236d522f475Smrg memcpy(start, (char *) tp->value, datalen); 237d522f475Smrg else 238d522f475Smrg l1utf8copy(start, (char *) tp->value, datalen); 239d522f475Smrg start[len] = '\0'; 240d522f475Smrg 24120d2c4d2Smrg for (cp = start, i = len + 1, j = 0; i != 0; cp++, i--) { 242d522f475Smrg if (*cp == '\0') { 243d522f475Smrg list[j] = start; 244d522f475Smrg start = (cp + 1); 245d522f475Smrg j++; 246d522f475Smrg } 247d522f475Smrg } 248d522f475Smrg 249d522f475Smrg list[j] = NULL; 250d522f475Smrg *list_return = list; 251d522f475Smrg *count_return = nelements; 252d522f475Smrg return 0; 253d522f475Smrg} 254d522f475Smrg 255d522f475Smrgint 256913cc679SmrgXutf8TextListToTextProperty(Display *dpy, 257d522f475Smrg char **list, 258d522f475Smrg int count, 259d522f475Smrg XICCEncodingStyle style, 260d522f475Smrg XTextProperty * text_prop) 261d522f475Smrg{ 262d522f475Smrg XTextProperty proto; 263d522f475Smrg unsigned int nbytes; 264d522f475Smrg int i; 265d522f475Smrg 266d522f475Smrg if (style != XStringStyle && 267d522f475Smrg style != XCompoundTextStyle && 268d522f475Smrg style != XStdICCTextStyle && 269d522f475Smrg style != XUTF8StringStyle) 270d522f475Smrg return XConverterNotFound; 271d522f475Smrg 272d522f475Smrg if (style == XUTF8StringStyle) { 273d522f475Smrg for (i = 0, nbytes = 0; i < count; i++) { 274d522f475Smrg nbytes += (unsigned) ((list[i] ? strlen(list[i]) : 0) + 1); 275d522f475Smrg } 276d522f475Smrg } else { 277d522f475Smrg for (i = 0, nbytes = 0; i < count; i++) { 278d522f475Smrg nbytes += (unsigned) ((list[i] ? utf8l1strlen(list[i]) : 0) + 1); 279d522f475Smrg } 280d522f475Smrg } 281d522f475Smrg 282d522f475Smrg if (style == XCompoundTextStyle) 283d522f475Smrg proto.encoding = XA_COMPOUND_TEXT(dpy); 284d522f475Smrg else if (style == XUTF8StringStyle) 285d522f475Smrg proto.encoding = XA_UTF8_STRING(dpy); 286d522f475Smrg else 287d522f475Smrg proto.encoding = XA_STRING; 288d522f475Smrg proto.format = 8; 289d522f475Smrg if (nbytes) 290d522f475Smrg proto.nitems = nbytes - 1; 291d522f475Smrg else 292d522f475Smrg proto.nitems = 0; 293d522f475Smrg proto.value = NULL; 294d522f475Smrg 295d522f475Smrg if (nbytes > 0) { 296d522f475Smrg char *buf = TypeMallocN(char, nbytes); 297d522f475Smrg if (!buf) 298d522f475Smrg return XNoMemory; 299d522f475Smrg 300d522f475Smrg proto.value = (unsigned char *) buf; 301d522f475Smrg for (i = 0; i < count; i++) { 302d522f475Smrg char *arg = list[i]; 303d522f475Smrg 304d522f475Smrg if (arg) { 305d522f475Smrg if (style == XUTF8StringStyle) { 306d522f475Smrg strcpy(buf, arg); 307d522f475Smrg } else { 308d522f475Smrg utf8l1strcpy(buf, arg); 309d522f475Smrg } 310d522f475Smrg buf += (strlen(buf) + 1); 311d522f475Smrg } else { 312d522f475Smrg *buf++ = '\0'; 313d522f475Smrg } 314d522f475Smrg } 315d522f475Smrg } else { 316d522f475Smrg proto.value = CastMalloc(unsigned char); /* easier for client */ 317d522f475Smrg if (!proto.value) 318d522f475Smrg return XNoMemory; 319d522f475Smrg 320d522f475Smrg proto.value[0] = '\0'; 321d522f475Smrg } 322d522f475Smrg 323d522f475Smrg *text_prop = proto; 324d522f475Smrg return 0; 325d522f475Smrg} 326d522f475Smrg 327d522f475Smrgint 328d522f475SmrgXutf8LookupString(XIC ic GCC_UNUSED, 329913cc679Smrg XKeyEvent *ev, 330d522f475Smrg char *buffer, 331d522f475Smrg int nbytes, 332d522f475Smrg KeySym * keysym_return, 333d522f475Smrg Status * status_return) 334d522f475Smrg{ 335d522f475Smrg int rc; 336d522f475Smrg KeySym keysym; 337d522f475Smrg int codepoint; 3380bd37d32Smrg size_t len; 339d522f475Smrg 340d522f475Smrg rc = XLookupString(ev, buffer, nbytes, &keysym, NULL); 341d522f475Smrg 342d522f475Smrg if (rc > 0) { 343d522f475Smrg codepoint = buffer[0] & 0xFF; 344d522f475Smrg } else { 345d522f475Smrg codepoint = keysym2ucs(keysym); 346d522f475Smrg } 347d522f475Smrg 348d522f475Smrg if (codepoint < 0) { 349d522f475Smrg if (keysym == None) { 350d522f475Smrg *status_return = XLookupNone; 351d522f475Smrg } else { 352d522f475Smrg *status_return = XLookupKeySym; 353d522f475Smrg *keysym_return = keysym; 354d522f475Smrg } 355d522f475Smrg return 0; 356d522f475Smrg } 357d522f475Smrg 358d522f475Smrg if (nbytes < utf8countBytes(codepoint)) { 359d522f475Smrg *status_return = XBufferOverflow; 360d522f475Smrg return utf8countBytes(codepoint); 361d522f475Smrg } 362d522f475Smrg 363d522f475Smrg utf8insert(buffer, codepoint, &len); 364d522f475Smrg 365d522f475Smrg if (keysym != None) { 366d522f475Smrg *keysym_return = keysym; 367d522f475Smrg *status_return = XLookupBoth; 368d522f475Smrg } else { 369d522f475Smrg *status_return = XLookupChars; 370d522f475Smrg } 3710bd37d32Smrg return (int) len; 372d522f475Smrg} 373913cc679Smrg 374d522f475Smrg#else /* X_HAVE_UTF8_STRING */ 375d522f475Smrg/* Silence the compiler */ 376d522f475Smrgvoid 377d522f475Smrgxutf8_dummy(void) 378d522f475Smrg{ 379d522f475Smrg return; 380d522f475Smrg} 381d522f475Smrg#endif 382