xutf8.c revision d522f475
1d522f475Smrg/* $XTermId: xutf8.c,v 1.8 2005/01/14 01:50:03 tom Exp $ */ 2d522f475Smrg 3d522f475Smrg/* $XFree86: xc/programs/xterm/xutf8.c,v 1.4 2005/01/14 01:50:03 dickey Exp $ */ 4d522f475Smrg/* 5d522f475SmrgCopyright (c) 2001 by Juliusz Chroboczek 6d522f475Smrg 7d522f475SmrgPermission is hereby granted, free of charge, to any person obtaining a copy 8d522f475Smrgof this software and associated documentation files (the "Software"), to deal 9d522f475Smrgin the Software without restriction, including without limitation the rights 10d522f475Smrgto use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11d522f475Smrgcopies of the Software, and to permit persons to whom the Software is 12d522f475Smrgfurnished to do so, subject to the following conditions: 13d522f475Smrg 14d522f475SmrgThe above copyright notice and this permission notice shall be included in 15d522f475Smrgall copies or substantial portions of the Software. 16d522f475Smrg 17d522f475SmrgTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18d522f475SmrgIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19d522f475SmrgFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20d522f475SmrgAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21d522f475SmrgLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22d522f475SmrgOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23d522f475SmrgTHE SOFTWARE. 24d522f475Smrg*/ 25d522f475Smrg 26d522f475Smrg#include <xterm.h> 27d522f475Smrg 28d522f475Smrg#include <X11/Xlib.h> 29d522f475Smrg#include <X11/Xatom.h> 30d522f475Smrg#include <X11/Xutil.h> 31d522f475Smrg#include <X11/Xmu/Xmu.h> 32d522f475Smrg 33d522f475Smrg#include <xutf8.h> 34d522f475Smrg 35d522f475Smrg#ifndef X_HAVE_UTF8_STRING 36d522f475Smrg 37d522f475Smrg#undef XA_UTF8_STRING 38d522f475Smrg#define KEYSYM2UCS_INCLUDED 39d522f475Smrg 40d522f475Smrg#include "keysym2ucs.c" 41d522f475Smrg 42d522f475SmrgAtom 43d522f475Smrg_xa_utf8_string(Display * dpy) 44d522f475Smrg{ 45d522f475Smrg static AtomPtr p = NULL; 46d522f475Smrg 47d522f475Smrg if (p == NULL) 48d522f475Smrg p = XmuMakeAtom("UTF8_STRING"); 49d522f475Smrg 50d522f475Smrg return XmuInternAtom(dpy, p); 51d522f475Smrg} 52d522f475Smrg#define XA_UTF8_STRING(dpy) _xa_utf8_string(dpy) 53d522f475Smrg 54d522f475Smrgstatic int 55d522f475Smrgutf8countBytes(int c) 56d522f475Smrg{ 57d522f475Smrg if (c < 0) 58d522f475Smrg return 0; 59d522f475Smrg 60d522f475Smrg if (c <= 0x7F) { 61d522f475Smrg return 1; 62d522f475Smrg } else if (c <= 0x7FF) { 63d522f475Smrg return 2; 64d522f475Smrg } else if (c <= 0xFFFF) { 65d522f475Smrg return 3; 66d522f475Smrg } else 67d522f475Smrg return 4; 68d522f475Smrg} 69d522f475Smrg 70d522f475Smrgstatic void 71d522f475Smrgutf8insert(char *dest, int c, int *len_return) 72d522f475Smrg{ 73d522f475Smrg if (c < 0) 74d522f475Smrg return; 75d522f475Smrg 76d522f475Smrg if (c <= 0x7F) { 77d522f475Smrg dest[0] = c; 78d522f475Smrg *len_return = 1; 79d522f475Smrg } else if (c <= 0x7FF) { 80d522f475Smrg dest[0] = 0xC0 | ((c >> 6) & 0x1F); 81d522f475Smrg dest[1] = 0x80 | (c & 0x3F); 82d522f475Smrg *len_return = 2; 83d522f475Smrg } else if (c <= 0xFFFF) { 84d522f475Smrg dest[0] = 0xE0 | ((c >> 12) & 0x0F); 85d522f475Smrg dest[1] = 0x80 | ((c >> 6) & 0x3F); 86d522f475Smrg dest[2] = 0x80 | (c & 0x3F); 87d522f475Smrg *len_return = 3; 88d522f475Smrg } else { 89d522f475Smrg dest[0] = 0xF0 | ((c >> 18) & 0x07); 90d522f475Smrg dest[1] = 0x80 | ((c >> 12) & 0x3f); 91d522f475Smrg dest[2] = 0x80 | ((c >> 6) & 0x3f); 92d522f475Smrg dest[3] = 0x80 | (c & 0x3f); 93d522f475Smrg *len_return = 4; 94d522f475Smrg } 95d522f475Smrg} 96d522f475Smrg 97d522f475Smrgstatic int 98d522f475Smrgl1countUtf8Bytes(char *s, int len) 99d522f475Smrg{ 100d522f475Smrg int l = 0; 101d522f475Smrg while (len > 0) { 102d522f475Smrg if ((*s & 0x80) == 0) 103d522f475Smrg l++; 104d522f475Smrg else 105d522f475Smrg l += 2; 106d522f475Smrg s++; 107d522f475Smrg len--; 108d522f475Smrg } 109d522f475Smrg return l; 110d522f475Smrg} 111d522f475Smrg 112d522f475Smrgstatic void 113d522f475Smrgl1utf8copy(char *d, char *s, int len) 114d522f475Smrg{ 115d522f475Smrg int l; 116d522f475Smrg while (len > 0) { 117d522f475Smrg utf8insert(d, (*s) & 0xFF, &l); 118d522f475Smrg d += l; 119d522f475Smrg s++; 120d522f475Smrg len--; 121d522f475Smrg } 122d522f475Smrg} 123d522f475Smrg 124d522f475Smrgstatic void 125d522f475Smrgutf8l1strcpy(char *d, char *s) 126d522f475Smrg{ 127d522f475Smrg#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0) 128d522f475Smrg while (*s) { 129d522f475Smrg if ((*s & 0x80) == 0) 130d522f475Smrg *d++ = *s++; 131d522f475Smrg else if ((*s & 0x7C) == 0x40) { 132d522f475Smrg if ((s[1] & 0x80) == 0) { 133d522f475Smrg s++; /* incorrect UTF-8 */ 134d522f475Smrg continue; 135d522f475Smrg } else if ((*s & 0x7C) == 0x40) { 136d522f475Smrg *d++ = ((*s & 0x03) << 6) | (s[1] & 0x3F); 137d522f475Smrg s += 2; 138d522f475Smrg } else { 139d522f475Smrg *d++ = '?'; 140d522f475Smrg SKIP; 141d522f475Smrg } 142d522f475Smrg } else { 143d522f475Smrg *d++ = '?'; 144d522f475Smrg SKIP; 145d522f475Smrg } 146d522f475Smrg } 147d522f475Smrg *d = 0; 148d522f475Smrg#undef SKIP 149d522f475Smrg} 150d522f475Smrg 151d522f475Smrg/* Keep this in sync with utf8l1strcpy! */ 152d522f475Smrgstatic int 153d522f475Smrgutf8l1strlen(char *s) 154d522f475Smrg{ 155d522f475Smrg#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0) 156d522f475Smrg int len = 0; 157d522f475Smrg while (*s) { 158d522f475Smrg if ((*s & 0x80) == 0) { 159d522f475Smrg s++; 160d522f475Smrg len++; 161d522f475Smrg } else if ((*s & 0x7C) == 0x40) { 162d522f475Smrg if ((s[1] & 0x80) == 0) { 163d522f475Smrg s++; 164d522f475Smrg continue; 165d522f475Smrg } else if ((*s & 0x7C) == 0x40) { 166d522f475Smrg len++; 167d522f475Smrg s += 2; 168d522f475Smrg } else { 169d522f475Smrg len++; 170d522f475Smrg SKIP; 171d522f475Smrg } 172d522f475Smrg } else { 173d522f475Smrg len++; 174d522f475Smrg SKIP; 175d522f475Smrg } 176d522f475Smrg } 177d522f475Smrg#undef SKIP 178d522f475Smrg return len; 179d522f475Smrg} 180d522f475Smrg 181d522f475Smrgint 182d522f475SmrgXutf8TextPropertyToTextList(Display * dpy, 183d522f475Smrg const XTextProperty * tp, 184d522f475Smrg char ***list_return, 185d522f475Smrg int *count_return) 186d522f475Smrg{ 187d522f475Smrg int utf8; 188d522f475Smrg char **list; 189d522f475Smrg int nelements; 190d522f475Smrg char *cp; 191d522f475Smrg char *start; 192d522f475Smrg int i, j; 193d522f475Smrg int datalen = (int) tp->nitems; 194d522f475Smrg int len; 195d522f475Smrg 196d522f475Smrg if (tp->format != 8) 197d522f475Smrg return XConverterNotFound; 198d522f475Smrg 199d522f475Smrg if (tp->encoding == XA_STRING) 200d522f475Smrg utf8 = 0; 201d522f475Smrg else if (tp->encoding == XA_UTF8_STRING(dpy)) 202d522f475Smrg utf8 = 1; 203d522f475Smrg else 204d522f475Smrg return XConverterNotFound; 205d522f475Smrg 206d522f475Smrg if (datalen == 0) { 207d522f475Smrg *list_return = NULL; 208d522f475Smrg *count_return = 0; 209d522f475Smrg return 0; 210d522f475Smrg } 211d522f475Smrg 212d522f475Smrg nelements = 1; 213d522f475Smrg for (cp = (char *) tp->value, i = datalen; i > 0; cp++, i--) { 214d522f475Smrg if (*cp == '\0') 215d522f475Smrg nelements++; 216d522f475Smrg } 217d522f475Smrg 218d522f475Smrg list = TypeMallocN(char *, nelements); 219d522f475Smrg if (!list) 220d522f475Smrg return XNoMemory; 221d522f475Smrg 222d522f475Smrg if (utf8) 223d522f475Smrg len = datalen; 224d522f475Smrg else 225d522f475Smrg len = l1countUtf8Bytes((char *) tp->value, datalen); 226d522f475Smrg 227d522f475Smrg start = CastMallocN(char, len); 228d522f475Smrg if (!start) { 229d522f475Smrg free(list); 230d522f475Smrg return XNoMemory; 231d522f475Smrg } 232d522f475Smrg 233d522f475Smrg if (utf8) 234d522f475Smrg memcpy(start, (char *) tp->value, datalen); 235d522f475Smrg else 236d522f475Smrg l1utf8copy(start, (char *) tp->value, datalen); 237d522f475Smrg start[len] = '\0'; 238d522f475Smrg 239d522f475Smrg for (cp = start, i = len + 1, j = 0; i > 0; cp++, i--) { 240d522f475Smrg if (*cp == '\0') { 241d522f475Smrg list[j] = start; 242d522f475Smrg start = (cp + 1); 243d522f475Smrg j++; 244d522f475Smrg } 245d522f475Smrg } 246d522f475Smrg 247d522f475Smrg list[j] = NULL; 248d522f475Smrg *list_return = list; 249d522f475Smrg *count_return = nelements; 250d522f475Smrg return 0; 251d522f475Smrg} 252d522f475Smrg 253d522f475Smrgint 254d522f475SmrgXutf8TextListToTextProperty(Display * dpy, 255d522f475Smrg char **list, 256d522f475Smrg int count, 257d522f475Smrg XICCEncodingStyle style, 258d522f475Smrg XTextProperty * text_prop) 259d522f475Smrg{ 260d522f475Smrg XTextProperty proto; 261d522f475Smrg unsigned int nbytes; 262d522f475Smrg int i; 263d522f475Smrg 264d522f475Smrg if (style != XStringStyle && 265d522f475Smrg style != XCompoundTextStyle && 266d522f475Smrg style != XStdICCTextStyle && 267d522f475Smrg style != XUTF8StringStyle) 268d522f475Smrg return XConverterNotFound; 269d522f475Smrg 270d522f475Smrg if (style == XUTF8StringStyle) { 271d522f475Smrg for (i = 0, nbytes = 0; i < count; i++) { 272d522f475Smrg nbytes += (unsigned) ((list[i] ? strlen(list[i]) : 0) + 1); 273d522f475Smrg } 274d522f475Smrg } else { 275d522f475Smrg for (i = 0, nbytes = 0; i < count; i++) { 276d522f475Smrg nbytes += (unsigned) ((list[i] ? utf8l1strlen(list[i]) : 0) + 1); 277d522f475Smrg } 278d522f475Smrg } 279d522f475Smrg 280d522f475Smrg if (style == XCompoundTextStyle) 281d522f475Smrg proto.encoding = XA_COMPOUND_TEXT(dpy); 282d522f475Smrg else if (style == XUTF8StringStyle) 283d522f475Smrg proto.encoding = XA_UTF8_STRING(dpy); 284d522f475Smrg else 285d522f475Smrg proto.encoding = XA_STRING; 286d522f475Smrg proto.format = 8; 287d522f475Smrg if (nbytes) 288d522f475Smrg proto.nitems = nbytes - 1; 289d522f475Smrg else 290d522f475Smrg proto.nitems = 0; 291d522f475Smrg proto.value = NULL; 292d522f475Smrg 293d522f475Smrg if (nbytes > 0) { 294d522f475Smrg char *buf = TypeMallocN(char, nbytes); 295d522f475Smrg if (!buf) 296d522f475Smrg return XNoMemory; 297d522f475Smrg 298d522f475Smrg proto.value = (unsigned char *) buf; 299d522f475Smrg for (i = 0; i < count; i++) { 300d522f475Smrg char *arg = list[i]; 301d522f475Smrg 302d522f475Smrg if (arg) { 303d522f475Smrg if (style == XUTF8StringStyle) { 304d522f475Smrg strcpy(buf, arg); 305d522f475Smrg } else { 306d522f475Smrg utf8l1strcpy(buf, arg); 307d522f475Smrg } 308d522f475Smrg buf += (strlen(buf) + 1); 309d522f475Smrg } else { 310d522f475Smrg *buf++ = '\0'; 311d522f475Smrg } 312d522f475Smrg } 313d522f475Smrg } else { 314d522f475Smrg proto.value = CastMalloc(unsigned char); /* easier for client */ 315d522f475Smrg if (!proto.value) 316d522f475Smrg return XNoMemory; 317d522f475Smrg 318d522f475Smrg proto.value[0] = '\0'; 319d522f475Smrg } 320d522f475Smrg 321d522f475Smrg *text_prop = proto; 322d522f475Smrg return 0; 323d522f475Smrg} 324d522f475Smrg 325d522f475Smrgint 326d522f475SmrgXutf8LookupString(XIC ic GCC_UNUSED, 327d522f475Smrg XKeyEvent * ev, 328d522f475Smrg char *buffer, 329d522f475Smrg int nbytes, 330d522f475Smrg KeySym * keysym_return, 331d522f475Smrg Status * status_return) 332d522f475Smrg{ 333d522f475Smrg int rc; 334d522f475Smrg KeySym keysym; 335d522f475Smrg int codepoint; 336d522f475Smrg int len; 337d522f475Smrg 338d522f475Smrg rc = XLookupString(ev, buffer, nbytes, &keysym, NULL); 339d522f475Smrg 340d522f475Smrg if (rc > 0) { 341d522f475Smrg codepoint = buffer[0] & 0xFF; 342d522f475Smrg } else { 343d522f475Smrg codepoint = keysym2ucs(keysym); 344d522f475Smrg } 345d522f475Smrg 346d522f475Smrg if (codepoint < 0) { 347d522f475Smrg if (keysym == None) { 348d522f475Smrg *status_return = XLookupNone; 349d522f475Smrg } else { 350d522f475Smrg *status_return = XLookupKeySym; 351d522f475Smrg *keysym_return = keysym; 352d522f475Smrg } 353d522f475Smrg return 0; 354d522f475Smrg } 355d522f475Smrg 356d522f475Smrg if (nbytes < utf8countBytes(codepoint)) { 357d522f475Smrg *status_return = XBufferOverflow; 358d522f475Smrg return utf8countBytes(codepoint); 359d522f475Smrg } 360d522f475Smrg 361d522f475Smrg utf8insert(buffer, codepoint, &len); 362d522f475Smrg 363d522f475Smrg if (keysym != None) { 364d522f475Smrg *keysym_return = keysym; 365d522f475Smrg *status_return = XLookupBoth; 366d522f475Smrg } else { 367d522f475Smrg *status_return = XLookupChars; 368d522f475Smrg } 369d522f475Smrg return len; 370d522f475Smrg} 371d522f475Smrg#else /* X_HAVE_UTF8_STRING */ 372d522f475Smrg/* Silence the compiler */ 373d522f475Smrgvoid 374d522f475Smrgxutf8_dummy(void) 375d522f475Smrg{ 376d522f475Smrg return; 377d522f475Smrg} 378d522f475Smrg#endif 379