xutf8.c revision 0bd37d32
10bd37d32Smrg/* $XTermId: xutf8.c,v 1.13 2012/05/09 20:56:09 tom Exp $ */ 2d522f475Smrg 3d522f475Smrg/* 40bd37d32Smrg * Copyright (c) 2001 by Juliusz Chroboczek 50bd37d32Smrg * 60bd37d32Smrg * Permission is hereby granted, free of charge, to any person obtaining a 70bd37d32Smrg * copy of this software and associated documentation files (the 80bd37d32Smrg * "Software"), to deal in the Software without restriction, including 90bd37d32Smrg * without limitation the rights to use, copy, modify, merge, publish, 100bd37d32Smrg * distribute, sublicense, and/or sell copies of the Software, and to 110bd37d32Smrg * permit persons to whom the Software is furnished to do so, subject to 120bd37d32Smrg * the following conditions: 130bd37d32Smrg * 140bd37d32Smrg * The above copyright notice and this permission notice shall be included in 150bd37d32Smrg * all copies or substantial portions of the Software. 160bd37d32Smrg * 170bd37d32Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 180bd37d32Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 190bd37d32Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 200bd37d32Smrg * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 210bd37d32Smrg * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 220bd37d32Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 230bd37d32Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 240bd37d32Smrg */ 25d522f475Smrg 26d522f475Smrg#include <xterm.h> 27d522f475Smrg 28d522f475Smrg#include <X11/Xlib.h> 29d522f475Smrg#include <X11/Xatom.h> 30d522f475Smrg#include <X11/Xutil.h> 31d522f475Smrg#include <X11/Xmu/Xmu.h> 32d522f475Smrg 33d522f475Smrg#include <xutf8.h> 34d522f475Smrg 35d522f475Smrg#ifndef X_HAVE_UTF8_STRING 36d522f475Smrg 37d522f475Smrg#undef XA_UTF8_STRING 38d522f475Smrg#define KEYSYM2UCS_INCLUDED 39d522f475Smrg 40d522f475Smrg#include "keysym2ucs.c" 41d522f475Smrg 42d522f475SmrgAtom 43d522f475Smrg_xa_utf8_string(Display * dpy) 44d522f475Smrg{ 45d522f475Smrg static AtomPtr p = NULL; 46d522f475Smrg 47d522f475Smrg if (p == NULL) 48d522f475Smrg p = XmuMakeAtom("UTF8_STRING"); 49d522f475Smrg 50d522f475Smrg return XmuInternAtom(dpy, p); 51d522f475Smrg} 52d522f475Smrg#define XA_UTF8_STRING(dpy) _xa_utf8_string(dpy) 53d522f475Smrg 54d522f475Smrgstatic int 55d522f475Smrgutf8countBytes(int c) 56d522f475Smrg{ 57d522f475Smrg if (c < 0) 58d522f475Smrg return 0; 59d522f475Smrg 60d522f475Smrg if (c <= 0x7F) { 61d522f475Smrg return 1; 62d522f475Smrg } else if (c <= 0x7FF) { 63d522f475Smrg return 2; 64d522f475Smrg } else if (c <= 0xFFFF) { 65d522f475Smrg return 3; 66d522f475Smrg } else 67d522f475Smrg return 4; 68d522f475Smrg} 69d522f475Smrg 70d522f475Smrgstatic void 710bd37d32Smrgutf8insert(char *dest, int c, size_t *len_return) 72d522f475Smrg{ 73d522f475Smrg if (c < 0) 74d522f475Smrg return; 75d522f475Smrg 76d522f475Smrg if (c <= 0x7F) { 77e39b573cSmrg dest[0] = (char) c; 78d522f475Smrg *len_return = 1; 79d522f475Smrg } else if (c <= 0x7FF) { 80e39b573cSmrg dest[0] = (char) (0xC0 | ((c >> 6) & 0x1F)); 81e39b573cSmrg dest[1] = (char) (0x80 | (c & 0x3F)); 82d522f475Smrg *len_return = 2; 83d522f475Smrg } else if (c <= 0xFFFF) { 84e39b573cSmrg dest[0] = (char) (0xE0 | ((c >> 12) & 0x0F)); 85e39b573cSmrg dest[1] = (char) (0x80 | ((c >> 6) & 0x3F)); 86e39b573cSmrg dest[2] = (char) (0x80 | (c & 0x3F)); 87d522f475Smrg *len_return = 3; 88d522f475Smrg } else { 89e39b573cSmrg dest[0] = (char) (0xF0 | ((c >> 18) & 0x07)); 90e39b573cSmrg dest[1] = (char) (0x80 | ((c >> 12) & 0x3f)); 91e39b573cSmrg dest[2] = (char) (0x80 | ((c >> 6) & 0x3f)); 92e39b573cSmrg dest[3] = (char) (0x80 | (c & 0x3f)); 93d522f475Smrg *len_return = 4; 94d522f475Smrg } 95d522f475Smrg} 96d522f475Smrg 97d522f475Smrgstatic int 980bd37d32Smrgl1countUtf8Bytes(char *s, size_t len) 99d522f475Smrg{ 100d522f475Smrg int l = 0; 1010bd37d32Smrg while (len != 0) { 102d522f475Smrg if ((*s & 0x80) == 0) 103d522f475Smrg l++; 104d522f475Smrg else 105d522f475Smrg l += 2; 106d522f475Smrg s++; 107d522f475Smrg len--; 108d522f475Smrg } 109d522f475Smrg return l; 110d522f475Smrg} 111d522f475Smrg 112d522f475Smrgstatic void 1130bd37d32Smrgl1utf8copy(char *d, char *s, size_t len) 114d522f475Smrg{ 1150bd37d32Smrg size_t l; 1160bd37d32Smrg while (len != 0) { 117d522f475Smrg utf8insert(d, (*s) & 0xFF, &l); 1180bd37d32Smrg d += (int) l; 119d522f475Smrg s++; 120d522f475Smrg len--; 121d522f475Smrg } 122d522f475Smrg} 123d522f475Smrg 124d522f475Smrgstatic void 125d522f475Smrgutf8l1strcpy(char *d, char *s) 126d522f475Smrg{ 127d522f475Smrg#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0) 128d522f475Smrg while (*s) { 129d522f475Smrg if ((*s & 0x80) == 0) 130d522f475Smrg *d++ = *s++; 131d522f475Smrg else if ((*s & 0x7C) == 0x40) { 132d522f475Smrg if ((s[1] & 0x80) == 0) { 133d522f475Smrg s++; /* incorrect UTF-8 */ 134d522f475Smrg continue; 135d522f475Smrg } else if ((*s & 0x7C) == 0x40) { 136e39b573cSmrg *d++ = (char) (((*s & 0x03) << 6) | (s[1] & 0x3F)); 137d522f475Smrg s += 2; 138d522f475Smrg } else { 139d522f475Smrg *d++ = '?'; 140d522f475Smrg SKIP; 141d522f475Smrg } 142d522f475Smrg } else { 143d522f475Smrg *d++ = '?'; 144d522f475Smrg SKIP; 145d522f475Smrg } 146d522f475Smrg } 147d522f475Smrg *d = 0; 148d522f475Smrg#undef SKIP 149d522f475Smrg} 150d522f475Smrg 151d522f475Smrg/* Keep this in sync with utf8l1strcpy! */ 152d522f475Smrgstatic int 153d522f475Smrgutf8l1strlen(char *s) 154d522f475Smrg{ 155d522f475Smrg#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0) 156d522f475Smrg int len = 0; 157d522f475Smrg while (*s) { 158d522f475Smrg if ((*s & 0x80) == 0) { 159d522f475Smrg s++; 160d522f475Smrg len++; 161d522f475Smrg } else if ((*s & 0x7C) == 0x40) { 162d522f475Smrg if ((s[1] & 0x80) == 0) { 163d522f475Smrg s++; 164d522f475Smrg continue; 165d522f475Smrg } else if ((*s & 0x7C) == 0x40) { 166d522f475Smrg len++; 167d522f475Smrg s += 2; 168d522f475Smrg } else { 169d522f475Smrg len++; 170d522f475Smrg SKIP; 171d522f475Smrg } 172d522f475Smrg } else { 173d522f475Smrg len++; 174d522f475Smrg SKIP; 175d522f475Smrg } 176d522f475Smrg } 177d522f475Smrg#undef SKIP 178d522f475Smrg return len; 179d522f475Smrg} 180d522f475Smrg 181d522f475Smrgint 182d522f475SmrgXutf8TextPropertyToTextList(Display * dpy, 183d522f475Smrg const XTextProperty * tp, 184d522f475Smrg char ***list_return, 185d522f475Smrg int *count_return) 186d522f475Smrg{ 187d522f475Smrg int utf8; 188d522f475Smrg char **list; 189d522f475Smrg int nelements; 190d522f475Smrg char *cp; 191d522f475Smrg char *start; 19220d2c4d2Smrg size_t i; 19320d2c4d2Smrg int j; 19420d2c4d2Smrg size_t datalen = tp->nitems; 19520d2c4d2Smrg size_t len; 196d522f475Smrg 197d522f475Smrg if (tp->format != 8) 198d522f475Smrg return XConverterNotFound; 199d522f475Smrg 200d522f475Smrg if (tp->encoding == XA_STRING) 201d522f475Smrg utf8 = 0; 202d522f475Smrg else if (tp->encoding == XA_UTF8_STRING(dpy)) 203d522f475Smrg utf8 = 1; 204d522f475Smrg else 205d522f475Smrg return XConverterNotFound; 206d522f475Smrg 207d522f475Smrg if (datalen == 0) { 208d522f475Smrg *list_return = NULL; 209d522f475Smrg *count_return = 0; 210d522f475Smrg return 0; 211d522f475Smrg } 212d522f475Smrg 213d522f475Smrg nelements = 1; 21420d2c4d2Smrg for (cp = (char *) tp->value, i = datalen; i != 0; cp++, i--) { 215d522f475Smrg if (*cp == '\0') 216d522f475Smrg nelements++; 217d522f475Smrg } 218d522f475Smrg 21920d2c4d2Smrg list = TypeMallocN(char *, (unsigned) nelements); 220d522f475Smrg if (!list) 221d522f475Smrg return XNoMemory; 222d522f475Smrg 223d522f475Smrg if (utf8) 224d522f475Smrg len = datalen; 225d522f475Smrg else 226d522f475Smrg len = l1countUtf8Bytes((char *) tp->value, datalen); 227d522f475Smrg 228d522f475Smrg start = CastMallocN(char, len); 229d522f475Smrg if (!start) { 230d522f475Smrg free(list); 231d522f475Smrg return XNoMemory; 232d522f475Smrg } 233d522f475Smrg 234d522f475Smrg if (utf8) 235d522f475Smrg memcpy(start, (char *) tp->value, datalen); 236d522f475Smrg else 237d522f475Smrg l1utf8copy(start, (char *) tp->value, datalen); 238d522f475Smrg start[len] = '\0'; 239d522f475Smrg 24020d2c4d2Smrg for (cp = start, i = len + 1, j = 0; i != 0; cp++, i--) { 241d522f475Smrg if (*cp == '\0') { 242d522f475Smrg list[j] = start; 243d522f475Smrg start = (cp + 1); 244d522f475Smrg j++; 245d522f475Smrg } 246d522f475Smrg } 247d522f475Smrg 248d522f475Smrg list[j] = NULL; 249d522f475Smrg *list_return = list; 250d522f475Smrg *count_return = nelements; 251d522f475Smrg return 0; 252d522f475Smrg} 253d522f475Smrg 254d522f475Smrgint 255d522f475SmrgXutf8TextListToTextProperty(Display * dpy, 256d522f475Smrg char **list, 257d522f475Smrg int count, 258d522f475Smrg XICCEncodingStyle style, 259d522f475Smrg XTextProperty * text_prop) 260d522f475Smrg{ 261d522f475Smrg XTextProperty proto; 262d522f475Smrg unsigned int nbytes; 263d522f475Smrg int i; 264d522f475Smrg 265d522f475Smrg if (style != XStringStyle && 266d522f475Smrg style != XCompoundTextStyle && 267d522f475Smrg style != XStdICCTextStyle && 268d522f475Smrg style != XUTF8StringStyle) 269d522f475Smrg return XConverterNotFound; 270d522f475Smrg 271d522f475Smrg if (style == XUTF8StringStyle) { 272d522f475Smrg for (i = 0, nbytes = 0; i < count; i++) { 273d522f475Smrg nbytes += (unsigned) ((list[i] ? strlen(list[i]) : 0) + 1); 274d522f475Smrg } 275d522f475Smrg } else { 276d522f475Smrg for (i = 0, nbytes = 0; i < count; i++) { 277d522f475Smrg nbytes += (unsigned) ((list[i] ? utf8l1strlen(list[i]) : 0) + 1); 278d522f475Smrg } 279d522f475Smrg } 280d522f475Smrg 281d522f475Smrg if (style == XCompoundTextStyle) 282d522f475Smrg proto.encoding = XA_COMPOUND_TEXT(dpy); 283d522f475Smrg else if (style == XUTF8StringStyle) 284d522f475Smrg proto.encoding = XA_UTF8_STRING(dpy); 285d522f475Smrg else 286d522f475Smrg proto.encoding = XA_STRING; 287d522f475Smrg proto.format = 8; 288d522f475Smrg if (nbytes) 289d522f475Smrg proto.nitems = nbytes - 1; 290d522f475Smrg else 291d522f475Smrg proto.nitems = 0; 292d522f475Smrg proto.value = NULL; 293d522f475Smrg 294d522f475Smrg if (nbytes > 0) { 295d522f475Smrg char *buf = TypeMallocN(char, nbytes); 296d522f475Smrg if (!buf) 297d522f475Smrg return XNoMemory; 298d522f475Smrg 299d522f475Smrg proto.value = (unsigned char *) buf; 300d522f475Smrg for (i = 0; i < count; i++) { 301d522f475Smrg char *arg = list[i]; 302d522f475Smrg 303d522f475Smrg if (arg) { 304d522f475Smrg if (style == XUTF8StringStyle) { 305d522f475Smrg strcpy(buf, arg); 306d522f475Smrg } else { 307d522f475Smrg utf8l1strcpy(buf, arg); 308d522f475Smrg } 309d522f475Smrg buf += (strlen(buf) + 1); 310d522f475Smrg } else { 311d522f475Smrg *buf++ = '\0'; 312d522f475Smrg } 313d522f475Smrg } 314d522f475Smrg } else { 315d522f475Smrg proto.value = CastMalloc(unsigned char); /* easier for client */ 316d522f475Smrg if (!proto.value) 317d522f475Smrg return XNoMemory; 318d522f475Smrg 319d522f475Smrg proto.value[0] = '\0'; 320d522f475Smrg } 321d522f475Smrg 322d522f475Smrg *text_prop = proto; 323d522f475Smrg return 0; 324d522f475Smrg} 325d522f475Smrg 326d522f475Smrgint 327d522f475SmrgXutf8LookupString(XIC ic GCC_UNUSED, 328d522f475Smrg XKeyEvent * ev, 329d522f475Smrg char *buffer, 330d522f475Smrg int nbytes, 331d522f475Smrg KeySym * keysym_return, 332d522f475Smrg Status * status_return) 333d522f475Smrg{ 334d522f475Smrg int rc; 335d522f475Smrg KeySym keysym; 336d522f475Smrg int codepoint; 3370bd37d32Smrg size_t len; 338d522f475Smrg 339d522f475Smrg rc = XLookupString(ev, buffer, nbytes, &keysym, NULL); 340d522f475Smrg 341d522f475Smrg if (rc > 0) { 342d522f475Smrg codepoint = buffer[0] & 0xFF; 343d522f475Smrg } else { 344d522f475Smrg codepoint = keysym2ucs(keysym); 345d522f475Smrg } 346d522f475Smrg 347d522f475Smrg if (codepoint < 0) { 348d522f475Smrg if (keysym == None) { 349d522f475Smrg *status_return = XLookupNone; 350d522f475Smrg } else { 351d522f475Smrg *status_return = XLookupKeySym; 352d522f475Smrg *keysym_return = keysym; 353d522f475Smrg } 354d522f475Smrg return 0; 355d522f475Smrg } 356d522f475Smrg 357d522f475Smrg if (nbytes < utf8countBytes(codepoint)) { 358d522f475Smrg *status_return = XBufferOverflow; 359d522f475Smrg return utf8countBytes(codepoint); 360d522f475Smrg } 361d522f475Smrg 362d522f475Smrg utf8insert(buffer, codepoint, &len); 363d522f475Smrg 364d522f475Smrg if (keysym != None) { 365d522f475Smrg *keysym_return = keysym; 366d522f475Smrg *status_return = XLookupBoth; 367d522f475Smrg } else { 368d522f475Smrg *status_return = XLookupChars; 369d522f475Smrg } 3700bd37d32Smrg return (int) len; 371d522f475Smrg} 372d522f475Smrg#else /* X_HAVE_UTF8_STRING */ 373d522f475Smrg/* Silence the compiler */ 374d522f475Smrgvoid 375d522f475Smrgxutf8_dummy(void) 376d522f475Smrg{ 377d522f475Smrg return; 378d522f475Smrg} 379d522f475Smrg#endif 380