xutf8.c revision e39b573c
1e39b573cSmrg/* $XTermId: xutf8.c,v 1.10 2011/07/04 13:51:08 tom Exp $ */ 2d522f475Smrg 3d522f475Smrg/* 4d522f475SmrgCopyright (c) 2001 by Juliusz Chroboczek 5d522f475Smrg 6d522f475SmrgPermission is hereby granted, free of charge, to any person obtaining a copy 7d522f475Smrgof this software and associated documentation files (the "Software"), to deal 8d522f475Smrgin the Software without restriction, including without limitation the rights 9d522f475Smrgto use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10d522f475Smrgcopies of the Software, and to permit persons to whom the Software is 11d522f475Smrgfurnished to do so, subject to the following conditions: 12d522f475Smrg 13d522f475SmrgThe above copyright notice and this permission notice shall be included in 14d522f475Smrgall copies or substantial portions of the Software. 15d522f475Smrg 16d522f475SmrgTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17d522f475SmrgIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18d522f475SmrgFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19d522f475SmrgAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20d522f475SmrgLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21d522f475SmrgOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22d522f475SmrgTHE SOFTWARE. 23d522f475Smrg*/ 24d522f475Smrg 25d522f475Smrg#include <xterm.h> 26d522f475Smrg 27d522f475Smrg#include <X11/Xlib.h> 28d522f475Smrg#include <X11/Xatom.h> 29d522f475Smrg#include <X11/Xutil.h> 30d522f475Smrg#include <X11/Xmu/Xmu.h> 31d522f475Smrg 32d522f475Smrg#include <xutf8.h> 33d522f475Smrg 34d522f475Smrg#ifndef X_HAVE_UTF8_STRING 35d522f475Smrg 36d522f475Smrg#undef XA_UTF8_STRING 37d522f475Smrg#define KEYSYM2UCS_INCLUDED 38d522f475Smrg 39d522f475Smrg#include "keysym2ucs.c" 40d522f475Smrg 41d522f475SmrgAtom 42d522f475Smrg_xa_utf8_string(Display * dpy) 43d522f475Smrg{ 44d522f475Smrg static AtomPtr p = NULL; 45d522f475Smrg 46d522f475Smrg if (p == NULL) 47d522f475Smrg p = XmuMakeAtom("UTF8_STRING"); 48d522f475Smrg 49d522f475Smrg return XmuInternAtom(dpy, p); 50d522f475Smrg} 51d522f475Smrg#define XA_UTF8_STRING(dpy) _xa_utf8_string(dpy) 52d522f475Smrg 53d522f475Smrgstatic int 54d522f475Smrgutf8countBytes(int c) 55d522f475Smrg{ 56d522f475Smrg if (c < 0) 57d522f475Smrg return 0; 58d522f475Smrg 59d522f475Smrg if (c <= 0x7F) { 60d522f475Smrg return 1; 61d522f475Smrg } else if (c <= 0x7FF) { 62d522f475Smrg return 2; 63d522f475Smrg } else if (c <= 0xFFFF) { 64d522f475Smrg return 3; 65d522f475Smrg } else 66d522f475Smrg return 4; 67d522f475Smrg} 68d522f475Smrg 69d522f475Smrgstatic void 70d522f475Smrgutf8insert(char *dest, int c, int *len_return) 71d522f475Smrg{ 72d522f475Smrg if (c < 0) 73d522f475Smrg return; 74d522f475Smrg 75d522f475Smrg if (c <= 0x7F) { 76e39b573cSmrg dest[0] = (char) c; 77d522f475Smrg *len_return = 1; 78d522f475Smrg } else if (c <= 0x7FF) { 79e39b573cSmrg dest[0] = (char) (0xC0 | ((c >> 6) & 0x1F)); 80e39b573cSmrg dest[1] = (char) (0x80 | (c & 0x3F)); 81d522f475Smrg *len_return = 2; 82d522f475Smrg } else if (c <= 0xFFFF) { 83e39b573cSmrg dest[0] = (char) (0xE0 | ((c >> 12) & 0x0F)); 84e39b573cSmrg dest[1] = (char) (0x80 | ((c >> 6) & 0x3F)); 85e39b573cSmrg dest[2] = (char) (0x80 | (c & 0x3F)); 86d522f475Smrg *len_return = 3; 87d522f475Smrg } else { 88e39b573cSmrg dest[0] = (char) (0xF0 | ((c >> 18) & 0x07)); 89e39b573cSmrg dest[1] = (char) (0x80 | ((c >> 12) & 0x3f)); 90e39b573cSmrg dest[2] = (char) (0x80 | ((c >> 6) & 0x3f)); 91e39b573cSmrg dest[3] = (char) (0x80 | (c & 0x3f)); 92d522f475Smrg *len_return = 4; 93d522f475Smrg } 94d522f475Smrg} 95d522f475Smrg 96d522f475Smrgstatic int 97d522f475Smrgl1countUtf8Bytes(char *s, int len) 98d522f475Smrg{ 99d522f475Smrg int l = 0; 100d522f475Smrg while (len > 0) { 101d522f475Smrg if ((*s & 0x80) == 0) 102d522f475Smrg l++; 103d522f475Smrg else 104d522f475Smrg l += 2; 105d522f475Smrg s++; 106d522f475Smrg len--; 107d522f475Smrg } 108d522f475Smrg return l; 109d522f475Smrg} 110d522f475Smrg 111d522f475Smrgstatic void 112d522f475Smrgl1utf8copy(char *d, char *s, int len) 113d522f475Smrg{ 114d522f475Smrg int l; 115d522f475Smrg while (len > 0) { 116d522f475Smrg utf8insert(d, (*s) & 0xFF, &l); 117d522f475Smrg d += l; 118d522f475Smrg s++; 119d522f475Smrg len--; 120d522f475Smrg } 121d522f475Smrg} 122d522f475Smrg 123d522f475Smrgstatic void 124d522f475Smrgutf8l1strcpy(char *d, char *s) 125d522f475Smrg{ 126d522f475Smrg#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0) 127d522f475Smrg while (*s) { 128d522f475Smrg if ((*s & 0x80) == 0) 129d522f475Smrg *d++ = *s++; 130d522f475Smrg else if ((*s & 0x7C) == 0x40) { 131d522f475Smrg if ((s[1] & 0x80) == 0) { 132d522f475Smrg s++; /* incorrect UTF-8 */ 133d522f475Smrg continue; 134d522f475Smrg } else if ((*s & 0x7C) == 0x40) { 135e39b573cSmrg *d++ = (char) (((*s & 0x03) << 6) | (s[1] & 0x3F)); 136d522f475Smrg s += 2; 137d522f475Smrg } else { 138d522f475Smrg *d++ = '?'; 139d522f475Smrg SKIP; 140d522f475Smrg } 141d522f475Smrg } else { 142d522f475Smrg *d++ = '?'; 143d522f475Smrg SKIP; 144d522f475Smrg } 145d522f475Smrg } 146d522f475Smrg *d = 0; 147d522f475Smrg#undef SKIP 148d522f475Smrg} 149d522f475Smrg 150d522f475Smrg/* Keep this in sync with utf8l1strcpy! */ 151d522f475Smrgstatic int 152d522f475Smrgutf8l1strlen(char *s) 153d522f475Smrg{ 154d522f475Smrg#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0) 155d522f475Smrg int len = 0; 156d522f475Smrg while (*s) { 157d522f475Smrg if ((*s & 0x80) == 0) { 158d522f475Smrg s++; 159d522f475Smrg len++; 160d522f475Smrg } else if ((*s & 0x7C) == 0x40) { 161d522f475Smrg if ((s[1] & 0x80) == 0) { 162d522f475Smrg s++; 163d522f475Smrg continue; 164d522f475Smrg } else if ((*s & 0x7C) == 0x40) { 165d522f475Smrg len++; 166d522f475Smrg s += 2; 167d522f475Smrg } else { 168d522f475Smrg len++; 169d522f475Smrg SKIP; 170d522f475Smrg } 171d522f475Smrg } else { 172d522f475Smrg len++; 173d522f475Smrg SKIP; 174d522f475Smrg } 175d522f475Smrg } 176d522f475Smrg#undef SKIP 177d522f475Smrg return len; 178d522f475Smrg} 179d522f475Smrg 180d522f475Smrgint 181d522f475SmrgXutf8TextPropertyToTextList(Display * dpy, 182d522f475Smrg const XTextProperty * tp, 183d522f475Smrg char ***list_return, 184d522f475Smrg int *count_return) 185d522f475Smrg{ 186d522f475Smrg int utf8; 187d522f475Smrg char **list; 188d522f475Smrg int nelements; 189d522f475Smrg char *cp; 190d522f475Smrg char *start; 19120d2c4d2Smrg size_t i; 19220d2c4d2Smrg int j; 19320d2c4d2Smrg size_t datalen = tp->nitems; 19420d2c4d2Smrg size_t len; 195d522f475Smrg 196d522f475Smrg if (tp->format != 8) 197d522f475Smrg return XConverterNotFound; 198d522f475Smrg 199d522f475Smrg if (tp->encoding == XA_STRING) 200d522f475Smrg utf8 = 0; 201d522f475Smrg else if (tp->encoding == XA_UTF8_STRING(dpy)) 202d522f475Smrg utf8 = 1; 203d522f475Smrg else 204d522f475Smrg return XConverterNotFound; 205d522f475Smrg 206d522f475Smrg if (datalen == 0) { 207d522f475Smrg *list_return = NULL; 208d522f475Smrg *count_return = 0; 209d522f475Smrg return 0; 210d522f475Smrg } 211d522f475Smrg 212d522f475Smrg nelements = 1; 21320d2c4d2Smrg for (cp = (char *) tp->value, i = datalen; i != 0; cp++, i--) { 214d522f475Smrg if (*cp == '\0') 215d522f475Smrg nelements++; 216d522f475Smrg } 217d522f475Smrg 21820d2c4d2Smrg list = TypeMallocN(char *, (unsigned) nelements); 219d522f475Smrg if (!list) 220d522f475Smrg return XNoMemory; 221d522f475Smrg 222d522f475Smrg if (utf8) 223d522f475Smrg len = datalen; 224d522f475Smrg else 225d522f475Smrg len = l1countUtf8Bytes((char *) tp->value, datalen); 226d522f475Smrg 227d522f475Smrg start = CastMallocN(char, len); 228d522f475Smrg if (!start) { 229d522f475Smrg free(list); 230d522f475Smrg return XNoMemory; 231d522f475Smrg } 232d522f475Smrg 233d522f475Smrg if (utf8) 234d522f475Smrg memcpy(start, (char *) tp->value, datalen); 235d522f475Smrg else 236d522f475Smrg l1utf8copy(start, (char *) tp->value, datalen); 237d522f475Smrg start[len] = '\0'; 238d522f475Smrg 23920d2c4d2Smrg for (cp = start, i = len + 1, j = 0; i != 0; cp++, i--) { 240d522f475Smrg if (*cp == '\0') { 241d522f475Smrg list[j] = start; 242d522f475Smrg start = (cp + 1); 243d522f475Smrg j++; 244d522f475Smrg } 245d522f475Smrg } 246d522f475Smrg 247d522f475Smrg list[j] = NULL; 248d522f475Smrg *list_return = list; 249d522f475Smrg *count_return = nelements; 250d522f475Smrg return 0; 251d522f475Smrg} 252d522f475Smrg 253d522f475Smrgint 254d522f475SmrgXutf8TextListToTextProperty(Display * dpy, 255d522f475Smrg char **list, 256d522f475Smrg int count, 257d522f475Smrg XICCEncodingStyle style, 258d522f475Smrg XTextProperty * text_prop) 259d522f475Smrg{ 260d522f475Smrg XTextProperty proto; 261d522f475Smrg unsigned int nbytes; 262d522f475Smrg int i; 263d522f475Smrg 264d522f475Smrg if (style != XStringStyle && 265d522f475Smrg style != XCompoundTextStyle && 266d522f475Smrg style != XStdICCTextStyle && 267d522f475Smrg style != XUTF8StringStyle) 268d522f475Smrg return XConverterNotFound; 269d522f475Smrg 270d522f475Smrg if (style == XUTF8StringStyle) { 271d522f475Smrg for (i = 0, nbytes = 0; i < count; i++) { 272d522f475Smrg nbytes += (unsigned) ((list[i] ? strlen(list[i]) : 0) + 1); 273d522f475Smrg } 274d522f475Smrg } else { 275d522f475Smrg for (i = 0, nbytes = 0; i < count; i++) { 276d522f475Smrg nbytes += (unsigned) ((list[i] ? utf8l1strlen(list[i]) : 0) + 1); 277d522f475Smrg } 278d522f475Smrg } 279d522f475Smrg 280d522f475Smrg if (style == XCompoundTextStyle) 281d522f475Smrg proto.encoding = XA_COMPOUND_TEXT(dpy); 282d522f475Smrg else if (style == XUTF8StringStyle) 283d522f475Smrg proto.encoding = XA_UTF8_STRING(dpy); 284d522f475Smrg else 285d522f475Smrg proto.encoding = XA_STRING; 286d522f475Smrg proto.format = 8; 287d522f475Smrg if (nbytes) 288d522f475Smrg proto.nitems = nbytes - 1; 289d522f475Smrg else 290d522f475Smrg proto.nitems = 0; 291d522f475Smrg proto.value = NULL; 292d522f475Smrg 293d522f475Smrg if (nbytes > 0) { 294d522f475Smrg char *buf = TypeMallocN(char, nbytes); 295d522f475Smrg if (!buf) 296d522f475Smrg return XNoMemory; 297d522f475Smrg 298d522f475Smrg proto.value = (unsigned char *) buf; 299d522f475Smrg for (i = 0; i < count; i++) { 300d522f475Smrg char *arg = list[i]; 301d522f475Smrg 302d522f475Smrg if (arg) { 303d522f475Smrg if (style == XUTF8StringStyle) { 304d522f475Smrg strcpy(buf, arg); 305d522f475Smrg } else { 306d522f475Smrg utf8l1strcpy(buf, arg); 307d522f475Smrg } 308d522f475Smrg buf += (strlen(buf) + 1); 309d522f475Smrg } else { 310d522f475Smrg *buf++ = '\0'; 311d522f475Smrg } 312d522f475Smrg } 313d522f475Smrg } else { 314d522f475Smrg proto.value = CastMalloc(unsigned char); /* easier for client */ 315d522f475Smrg if (!proto.value) 316d522f475Smrg return XNoMemory; 317d522f475Smrg 318d522f475Smrg proto.value[0] = '\0'; 319d522f475Smrg } 320d522f475Smrg 321d522f475Smrg *text_prop = proto; 322d522f475Smrg return 0; 323d522f475Smrg} 324d522f475Smrg 325d522f475Smrgint 326d522f475SmrgXutf8LookupString(XIC ic GCC_UNUSED, 327d522f475Smrg XKeyEvent * ev, 328d522f475Smrg char *buffer, 329d522f475Smrg int nbytes, 330d522f475Smrg KeySym * keysym_return, 331d522f475Smrg Status * status_return) 332d522f475Smrg{ 333d522f475Smrg int rc; 334d522f475Smrg KeySym keysym; 335d522f475Smrg int codepoint; 336d522f475Smrg int len; 337d522f475Smrg 338d522f475Smrg rc = XLookupString(ev, buffer, nbytes, &keysym, NULL); 339d522f475Smrg 340d522f475Smrg if (rc > 0) { 341d522f475Smrg codepoint = buffer[0] & 0xFF; 342d522f475Smrg } else { 343d522f475Smrg codepoint = keysym2ucs(keysym); 344d522f475Smrg } 345d522f475Smrg 346d522f475Smrg if (codepoint < 0) { 347d522f475Smrg if (keysym == None) { 348d522f475Smrg *status_return = XLookupNone; 349d522f475Smrg } else { 350d522f475Smrg *status_return = XLookupKeySym; 351d522f475Smrg *keysym_return = keysym; 352d522f475Smrg } 353d522f475Smrg return 0; 354d522f475Smrg } 355d522f475Smrg 356d522f475Smrg if (nbytes < utf8countBytes(codepoint)) { 357d522f475Smrg *status_return = XBufferOverflow; 358d522f475Smrg return utf8countBytes(codepoint); 359d522f475Smrg } 360d522f475Smrg 361d522f475Smrg utf8insert(buffer, codepoint, &len); 362d522f475Smrg 363d522f475Smrg if (keysym != None) { 364d522f475Smrg *keysym_return = keysym; 365d522f475Smrg *status_return = XLookupBoth; 366d522f475Smrg } else { 367d522f475Smrg *status_return = XLookupChars; 368d522f475Smrg } 369d522f475Smrg return len; 370d522f475Smrg} 371d522f475Smrg#else /* X_HAVE_UTF8_STRING */ 372d522f475Smrg/* Silence the compiler */ 373d522f475Smrgvoid 374d522f475Smrgxutf8_dummy(void) 375d522f475Smrg{ 376d522f475Smrg return; 377d522f475Smrg} 378d522f475Smrg#endif 379