xutf8.c revision 0bd37d32
1/* $XTermId: xutf8.c,v 1.13 2012/05/09 20:56:09 tom Exp $ */ 2 3/* 4 * Copyright (c) 2001 by Juliusz Chroboczek 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 26#include <xterm.h> 27 28#include <X11/Xlib.h> 29#include <X11/Xatom.h> 30#include <X11/Xutil.h> 31#include <X11/Xmu/Xmu.h> 32 33#include <xutf8.h> 34 35#ifndef X_HAVE_UTF8_STRING 36 37#undef XA_UTF8_STRING 38#define KEYSYM2UCS_INCLUDED 39 40#include "keysym2ucs.c" 41 42Atom 43_xa_utf8_string(Display * dpy) 44{ 45 static AtomPtr p = NULL; 46 47 if (p == NULL) 48 p = XmuMakeAtom("UTF8_STRING"); 49 50 return XmuInternAtom(dpy, p); 51} 52#define XA_UTF8_STRING(dpy) _xa_utf8_string(dpy) 53 54static int 55utf8countBytes(int c) 56{ 57 if (c < 0) 58 return 0; 59 60 if (c <= 0x7F) { 61 return 1; 62 } else if (c <= 0x7FF) { 63 return 2; 64 } else if (c <= 0xFFFF) { 65 return 3; 66 } else 67 return 4; 68} 69 70static void 71utf8insert(char *dest, int c, size_t *len_return) 72{ 73 if (c < 0) 74 return; 75 76 if (c <= 0x7F) { 77 dest[0] = (char) c; 78 *len_return = 1; 79 } else if (c <= 0x7FF) { 80 dest[0] = (char) (0xC0 | ((c >> 6) & 0x1F)); 81 dest[1] = (char) (0x80 | (c & 0x3F)); 82 *len_return = 2; 83 } else if (c <= 0xFFFF) { 84 dest[0] = (char) (0xE0 | ((c >> 12) & 0x0F)); 85 dest[1] = (char) (0x80 | ((c >> 6) & 0x3F)); 86 dest[2] = (char) (0x80 | (c & 0x3F)); 87 *len_return = 3; 88 } else { 89 dest[0] = (char) (0xF0 | ((c >> 18) & 0x07)); 90 dest[1] = (char) (0x80 | ((c >> 12) & 0x3f)); 91 dest[2] = (char) (0x80 | ((c >> 6) & 0x3f)); 92 dest[3] = (char) (0x80 | (c & 0x3f)); 93 *len_return = 4; 94 } 95} 96 97static int 98l1countUtf8Bytes(char *s, size_t len) 99{ 100 int l = 0; 101 while (len != 0) { 102 if ((*s & 0x80) == 0) 103 l++; 104 else 105 l += 2; 106 s++; 107 len--; 108 } 109 return l; 110} 111 112static void 113l1utf8copy(char *d, char *s, size_t len) 114{ 115 size_t l; 116 while (len != 0) { 117 utf8insert(d, (*s) & 0xFF, &l); 118 d += (int) l; 119 s++; 120 len--; 121 } 122} 123 124static void 125utf8l1strcpy(char *d, char *s) 126{ 127#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0) 128 while (*s) { 129 if ((*s & 0x80) == 0) 130 *d++ = *s++; 131 else if ((*s & 0x7C) == 0x40) { 132 if ((s[1] & 0x80) == 0) { 133 s++; /* incorrect UTF-8 */ 134 continue; 135 } else if ((*s & 0x7C) == 0x40) { 136 *d++ = (char) (((*s & 0x03) << 6) | (s[1] & 0x3F)); 137 s += 2; 138 } else { 139 *d++ = '?'; 140 SKIP; 141 } 142 } else { 143 *d++ = '?'; 144 SKIP; 145 } 146 } 147 *d = 0; 148#undef SKIP 149} 150 151/* Keep this in sync with utf8l1strcpy! */ 152static int 153utf8l1strlen(char *s) 154{ 155#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0) 156 int len = 0; 157 while (*s) { 158 if ((*s & 0x80) == 0) { 159 s++; 160 len++; 161 } else if ((*s & 0x7C) == 0x40) { 162 if ((s[1] & 0x80) == 0) { 163 s++; 164 continue; 165 } else if ((*s & 0x7C) == 0x40) { 166 len++; 167 s += 2; 168 } else { 169 len++; 170 SKIP; 171 } 172 } else { 173 len++; 174 SKIP; 175 } 176 } 177#undef SKIP 178 return len; 179} 180 181int 182Xutf8TextPropertyToTextList(Display * dpy, 183 const XTextProperty * tp, 184 char ***list_return, 185 int *count_return) 186{ 187 int utf8; 188 char **list; 189 int nelements; 190 char *cp; 191 char *start; 192 size_t i; 193 int j; 194 size_t datalen = tp->nitems; 195 size_t len; 196 197 if (tp->format != 8) 198 return XConverterNotFound; 199 200 if (tp->encoding == XA_STRING) 201 utf8 = 0; 202 else if (tp->encoding == XA_UTF8_STRING(dpy)) 203 utf8 = 1; 204 else 205 return XConverterNotFound; 206 207 if (datalen == 0) { 208 *list_return = NULL; 209 *count_return = 0; 210 return 0; 211 } 212 213 nelements = 1; 214 for (cp = (char *) tp->value, i = datalen; i != 0; cp++, i--) { 215 if (*cp == '\0') 216 nelements++; 217 } 218 219 list = TypeMallocN(char *, (unsigned) nelements); 220 if (!list) 221 return XNoMemory; 222 223 if (utf8) 224 len = datalen; 225 else 226 len = l1countUtf8Bytes((char *) tp->value, datalen); 227 228 start = CastMallocN(char, len); 229 if (!start) { 230 free(list); 231 return XNoMemory; 232 } 233 234 if (utf8) 235 memcpy(start, (char *) tp->value, datalen); 236 else 237 l1utf8copy(start, (char *) tp->value, datalen); 238 start[len] = '\0'; 239 240 for (cp = start, i = len + 1, j = 0; i != 0; cp++, i--) { 241 if (*cp == '\0') { 242 list[j] = start; 243 start = (cp + 1); 244 j++; 245 } 246 } 247 248 list[j] = NULL; 249 *list_return = list; 250 *count_return = nelements; 251 return 0; 252} 253 254int 255Xutf8TextListToTextProperty(Display * dpy, 256 char **list, 257 int count, 258 XICCEncodingStyle style, 259 XTextProperty * text_prop) 260{ 261 XTextProperty proto; 262 unsigned int nbytes; 263 int i; 264 265 if (style != XStringStyle && 266 style != XCompoundTextStyle && 267 style != XStdICCTextStyle && 268 style != XUTF8StringStyle) 269 return XConverterNotFound; 270 271 if (style == XUTF8StringStyle) { 272 for (i = 0, nbytes = 0; i < count; i++) { 273 nbytes += (unsigned) ((list[i] ? strlen(list[i]) : 0) + 1); 274 } 275 } else { 276 for (i = 0, nbytes = 0; i < count; i++) { 277 nbytes += (unsigned) ((list[i] ? utf8l1strlen(list[i]) : 0) + 1); 278 } 279 } 280 281 if (style == XCompoundTextStyle) 282 proto.encoding = XA_COMPOUND_TEXT(dpy); 283 else if (style == XUTF8StringStyle) 284 proto.encoding = XA_UTF8_STRING(dpy); 285 else 286 proto.encoding = XA_STRING; 287 proto.format = 8; 288 if (nbytes) 289 proto.nitems = nbytes - 1; 290 else 291 proto.nitems = 0; 292 proto.value = NULL; 293 294 if (nbytes > 0) { 295 char *buf = TypeMallocN(char, nbytes); 296 if (!buf) 297 return XNoMemory; 298 299 proto.value = (unsigned char *) buf; 300 for (i = 0; i < count; i++) { 301 char *arg = list[i]; 302 303 if (arg) { 304 if (style == XUTF8StringStyle) { 305 strcpy(buf, arg); 306 } else { 307 utf8l1strcpy(buf, arg); 308 } 309 buf += (strlen(buf) + 1); 310 } else { 311 *buf++ = '\0'; 312 } 313 } 314 } else { 315 proto.value = CastMalloc(unsigned char); /* easier for client */ 316 if (!proto.value) 317 return XNoMemory; 318 319 proto.value[0] = '\0'; 320 } 321 322 *text_prop = proto; 323 return 0; 324} 325 326int 327Xutf8LookupString(XIC ic GCC_UNUSED, 328 XKeyEvent * ev, 329 char *buffer, 330 int nbytes, 331 KeySym * keysym_return, 332 Status * status_return) 333{ 334 int rc; 335 KeySym keysym; 336 int codepoint; 337 size_t len; 338 339 rc = XLookupString(ev, buffer, nbytes, &keysym, NULL); 340 341 if (rc > 0) { 342 codepoint = buffer[0] & 0xFF; 343 } else { 344 codepoint = keysym2ucs(keysym); 345 } 346 347 if (codepoint < 0) { 348 if (keysym == None) { 349 *status_return = XLookupNone; 350 } else { 351 *status_return = XLookupKeySym; 352 *keysym_return = keysym; 353 } 354 return 0; 355 } 356 357 if (nbytes < utf8countBytes(codepoint)) { 358 *status_return = XBufferOverflow; 359 return utf8countBytes(codepoint); 360 } 361 362 utf8insert(buffer, codepoint, &len); 363 364 if (keysym != None) { 365 *keysym_return = keysym; 366 *status_return = XLookupBoth; 367 } else { 368 *status_return = XLookupChars; 369 } 370 return (int) len; 371} 372#else /* X_HAVE_UTF8_STRING */ 373/* Silence the compiler */ 374void 375xutf8_dummy(void) 376{ 377 return; 378} 379#endif 380