xutf8.c revision e39b573c
1/* $XTermId: xutf8.c,v 1.10 2011/07/04 13:51:08 tom Exp $ */ 2 3/* 4Copyright (c) 2001 by Juliusz Chroboczek 5 6Permission is hereby granted, free of charge, to any person obtaining a copy 7of this software and associated documentation files (the "Software"), to deal 8in the Software without restriction, including without limitation the rights 9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10copies of the Software, and to permit persons to whom the Software is 11furnished to do so, subject to the following conditions: 12 13The above copyright notice and this permission notice shall be included in 14all copies or substantial portions of the Software. 15 16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22THE SOFTWARE. 23*/ 24 25#include <xterm.h> 26 27#include <X11/Xlib.h> 28#include <X11/Xatom.h> 29#include <X11/Xutil.h> 30#include <X11/Xmu/Xmu.h> 31 32#include <xutf8.h> 33 34#ifndef X_HAVE_UTF8_STRING 35 36#undef XA_UTF8_STRING 37#define KEYSYM2UCS_INCLUDED 38 39#include "keysym2ucs.c" 40 41Atom 42_xa_utf8_string(Display * dpy) 43{ 44 static AtomPtr p = NULL; 45 46 if (p == NULL) 47 p = XmuMakeAtom("UTF8_STRING"); 48 49 return XmuInternAtom(dpy, p); 50} 51#define XA_UTF8_STRING(dpy) _xa_utf8_string(dpy) 52 53static int 54utf8countBytes(int c) 55{ 56 if (c < 0) 57 return 0; 58 59 if (c <= 0x7F) { 60 return 1; 61 } else if (c <= 0x7FF) { 62 return 2; 63 } else if (c <= 0xFFFF) { 64 return 3; 65 } else 66 return 4; 67} 68 69static void 70utf8insert(char *dest, int c, int *len_return) 71{ 72 if (c < 0) 73 return; 74 75 if (c <= 0x7F) { 76 dest[0] = (char) c; 77 *len_return = 1; 78 } else if (c <= 0x7FF) { 79 dest[0] = (char) (0xC0 | ((c >> 6) & 0x1F)); 80 dest[1] = (char) (0x80 | (c & 0x3F)); 81 *len_return = 2; 82 } else if (c <= 0xFFFF) { 83 dest[0] = (char) (0xE0 | ((c >> 12) & 0x0F)); 84 dest[1] = (char) (0x80 | ((c >> 6) & 0x3F)); 85 dest[2] = (char) (0x80 | (c & 0x3F)); 86 *len_return = 3; 87 } else { 88 dest[0] = (char) (0xF0 | ((c >> 18) & 0x07)); 89 dest[1] = (char) (0x80 | ((c >> 12) & 0x3f)); 90 dest[2] = (char) (0x80 | ((c >> 6) & 0x3f)); 91 dest[3] = (char) (0x80 | (c & 0x3f)); 92 *len_return = 4; 93 } 94} 95 96static int 97l1countUtf8Bytes(char *s, int len) 98{ 99 int l = 0; 100 while (len > 0) { 101 if ((*s & 0x80) == 0) 102 l++; 103 else 104 l += 2; 105 s++; 106 len--; 107 } 108 return l; 109} 110 111static void 112l1utf8copy(char *d, char *s, int len) 113{ 114 int l; 115 while (len > 0) { 116 utf8insert(d, (*s) & 0xFF, &l); 117 d += l; 118 s++; 119 len--; 120 } 121} 122 123static void 124utf8l1strcpy(char *d, char *s) 125{ 126#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0) 127 while (*s) { 128 if ((*s & 0x80) == 0) 129 *d++ = *s++; 130 else if ((*s & 0x7C) == 0x40) { 131 if ((s[1] & 0x80) == 0) { 132 s++; /* incorrect UTF-8 */ 133 continue; 134 } else if ((*s & 0x7C) == 0x40) { 135 *d++ = (char) (((*s & 0x03) << 6) | (s[1] & 0x3F)); 136 s += 2; 137 } else { 138 *d++ = '?'; 139 SKIP; 140 } 141 } else { 142 *d++ = '?'; 143 SKIP; 144 } 145 } 146 *d = 0; 147#undef SKIP 148} 149 150/* Keep this in sync with utf8l1strcpy! */ 151static int 152utf8l1strlen(char *s) 153{ 154#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0) 155 int len = 0; 156 while (*s) { 157 if ((*s & 0x80) == 0) { 158 s++; 159 len++; 160 } else if ((*s & 0x7C) == 0x40) { 161 if ((s[1] & 0x80) == 0) { 162 s++; 163 continue; 164 } else if ((*s & 0x7C) == 0x40) { 165 len++; 166 s += 2; 167 } else { 168 len++; 169 SKIP; 170 } 171 } else { 172 len++; 173 SKIP; 174 } 175 } 176#undef SKIP 177 return len; 178} 179 180int 181Xutf8TextPropertyToTextList(Display * dpy, 182 const XTextProperty * tp, 183 char ***list_return, 184 int *count_return) 185{ 186 int utf8; 187 char **list; 188 int nelements; 189 char *cp; 190 char *start; 191 size_t i; 192 int j; 193 size_t datalen = tp->nitems; 194 size_t len; 195 196 if (tp->format != 8) 197 return XConverterNotFound; 198 199 if (tp->encoding == XA_STRING) 200 utf8 = 0; 201 else if (tp->encoding == XA_UTF8_STRING(dpy)) 202 utf8 = 1; 203 else 204 return XConverterNotFound; 205 206 if (datalen == 0) { 207 *list_return = NULL; 208 *count_return = 0; 209 return 0; 210 } 211 212 nelements = 1; 213 for (cp = (char *) tp->value, i = datalen; i != 0; cp++, i--) { 214 if (*cp == '\0') 215 nelements++; 216 } 217 218 list = TypeMallocN(char *, (unsigned) nelements); 219 if (!list) 220 return XNoMemory; 221 222 if (utf8) 223 len = datalen; 224 else 225 len = l1countUtf8Bytes((char *) tp->value, datalen); 226 227 start = CastMallocN(char, len); 228 if (!start) { 229 free(list); 230 return XNoMemory; 231 } 232 233 if (utf8) 234 memcpy(start, (char *) tp->value, datalen); 235 else 236 l1utf8copy(start, (char *) tp->value, datalen); 237 start[len] = '\0'; 238 239 for (cp = start, i = len + 1, j = 0; i != 0; cp++, i--) { 240 if (*cp == '\0') { 241 list[j] = start; 242 start = (cp + 1); 243 j++; 244 } 245 } 246 247 list[j] = NULL; 248 *list_return = list; 249 *count_return = nelements; 250 return 0; 251} 252 253int 254Xutf8TextListToTextProperty(Display * dpy, 255 char **list, 256 int count, 257 XICCEncodingStyle style, 258 XTextProperty * text_prop) 259{ 260 XTextProperty proto; 261 unsigned int nbytes; 262 int i; 263 264 if (style != XStringStyle && 265 style != XCompoundTextStyle && 266 style != XStdICCTextStyle && 267 style != XUTF8StringStyle) 268 return XConverterNotFound; 269 270 if (style == XUTF8StringStyle) { 271 for (i = 0, nbytes = 0; i < count; i++) { 272 nbytes += (unsigned) ((list[i] ? strlen(list[i]) : 0) + 1); 273 } 274 } else { 275 for (i = 0, nbytes = 0; i < count; i++) { 276 nbytes += (unsigned) ((list[i] ? utf8l1strlen(list[i]) : 0) + 1); 277 } 278 } 279 280 if (style == XCompoundTextStyle) 281 proto.encoding = XA_COMPOUND_TEXT(dpy); 282 else if (style == XUTF8StringStyle) 283 proto.encoding = XA_UTF8_STRING(dpy); 284 else 285 proto.encoding = XA_STRING; 286 proto.format = 8; 287 if (nbytes) 288 proto.nitems = nbytes - 1; 289 else 290 proto.nitems = 0; 291 proto.value = NULL; 292 293 if (nbytes > 0) { 294 char *buf = TypeMallocN(char, nbytes); 295 if (!buf) 296 return XNoMemory; 297 298 proto.value = (unsigned char *) buf; 299 for (i = 0; i < count; i++) { 300 char *arg = list[i]; 301 302 if (arg) { 303 if (style == XUTF8StringStyle) { 304 strcpy(buf, arg); 305 } else { 306 utf8l1strcpy(buf, arg); 307 } 308 buf += (strlen(buf) + 1); 309 } else { 310 *buf++ = '\0'; 311 } 312 } 313 } else { 314 proto.value = CastMalloc(unsigned char); /* easier for client */ 315 if (!proto.value) 316 return XNoMemory; 317 318 proto.value[0] = '\0'; 319 } 320 321 *text_prop = proto; 322 return 0; 323} 324 325int 326Xutf8LookupString(XIC ic GCC_UNUSED, 327 XKeyEvent * ev, 328 char *buffer, 329 int nbytes, 330 KeySym * keysym_return, 331 Status * status_return) 332{ 333 int rc; 334 KeySym keysym; 335 int codepoint; 336 int len; 337 338 rc = XLookupString(ev, buffer, nbytes, &keysym, NULL); 339 340 if (rc > 0) { 341 codepoint = buffer[0] & 0xFF; 342 } else { 343 codepoint = keysym2ucs(keysym); 344 } 345 346 if (codepoint < 0) { 347 if (keysym == None) { 348 *status_return = XLookupNone; 349 } else { 350 *status_return = XLookupKeySym; 351 *keysym_return = keysym; 352 } 353 return 0; 354 } 355 356 if (nbytes < utf8countBytes(codepoint)) { 357 *status_return = XBufferOverflow; 358 return utf8countBytes(codepoint); 359 } 360 361 utf8insert(buffer, codepoint, &len); 362 363 if (keysym != None) { 364 *keysym_return = keysym; 365 *status_return = XLookupBoth; 366 } else { 367 *status_return = XLookupChars; 368 } 369 return len; 370} 371#else /* X_HAVE_UTF8_STRING */ 372/* Silence the compiler */ 373void 374xutf8_dummy(void) 375{ 376 return; 377} 378#endif 379