xutf8.c revision 0bd37d32
1/* $XTermId: xutf8.c,v 1.13 2012/05/09 20:56:09 tom Exp $ */
2
3/*
4 * Copyright (c) 2001 by Juliusz Chroboczek
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26#include <xterm.h>
27
28#include <X11/Xlib.h>
29#include <X11/Xatom.h>
30#include <X11/Xutil.h>
31#include <X11/Xmu/Xmu.h>
32
33#include <xutf8.h>
34
35#ifndef X_HAVE_UTF8_STRING
36
37#undef XA_UTF8_STRING
38#define KEYSYM2UCS_INCLUDED
39
40#include "keysym2ucs.c"
41
42Atom
43_xa_utf8_string(Display * dpy)
44{
45    static AtomPtr p = NULL;
46
47    if (p == NULL)
48	p = XmuMakeAtom("UTF8_STRING");
49
50    return XmuInternAtom(dpy, p);
51}
52#define XA_UTF8_STRING(dpy) _xa_utf8_string(dpy)
53
54static int
55utf8countBytes(int c)
56{
57    if (c < 0)
58	return 0;
59
60    if (c <= 0x7F) {
61	return 1;
62    } else if (c <= 0x7FF) {
63	return 2;
64    } else if (c <= 0xFFFF) {
65	return 3;
66    } else
67	return 4;
68}
69
70static void
71utf8insert(char *dest, int c, size_t *len_return)
72{
73    if (c < 0)
74	return;
75
76    if (c <= 0x7F) {
77	dest[0] = (char) c;
78	*len_return = 1;
79    } else if (c <= 0x7FF) {
80	dest[0] = (char) (0xC0 | ((c >> 6) & 0x1F));
81	dest[1] = (char) (0x80 | (c & 0x3F));
82	*len_return = 2;
83    } else if (c <= 0xFFFF) {
84	dest[0] = (char) (0xE0 | ((c >> 12) & 0x0F));
85	dest[1] = (char) (0x80 | ((c >> 6) & 0x3F));
86	dest[2] = (char) (0x80 | (c & 0x3F));
87	*len_return = 3;
88    } else {
89	dest[0] = (char) (0xF0 | ((c >> 18) & 0x07));
90	dest[1] = (char) (0x80 | ((c >> 12) & 0x3f));
91	dest[2] = (char) (0x80 | ((c >> 6) & 0x3f));
92	dest[3] = (char) (0x80 | (c & 0x3f));
93	*len_return = 4;
94    }
95}
96
97static int
98l1countUtf8Bytes(char *s, size_t len)
99{
100    int l = 0;
101    while (len != 0) {
102	if ((*s & 0x80) == 0)
103	    l++;
104	else
105	    l += 2;
106	s++;
107	len--;
108    }
109    return l;
110}
111
112static void
113l1utf8copy(char *d, char *s, size_t len)
114{
115    size_t l;
116    while (len != 0) {
117	utf8insert(d, (*s) & 0xFF, &l);
118	d += (int) l;
119	s++;
120	len--;
121    }
122}
123
124static void
125utf8l1strcpy(char *d, char *s)
126{
127#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
128    while (*s) {
129	if ((*s & 0x80) == 0)
130	    *d++ = *s++;
131	else if ((*s & 0x7C) == 0x40) {
132	    if ((s[1] & 0x80) == 0) {
133		s++;		/* incorrect UTF-8 */
134		continue;
135	    } else if ((*s & 0x7C) == 0x40) {
136		*d++ = (char) (((*s & 0x03) << 6) | (s[1] & 0x3F));
137		s += 2;
138	    } else {
139		*d++ = '?';
140		SKIP;
141	    }
142	} else {
143	    *d++ = '?';
144	    SKIP;
145	}
146    }
147    *d = 0;
148#undef SKIP
149}
150
151/* Keep this in sync with utf8l1strcpy! */
152static int
153utf8l1strlen(char *s)
154{
155#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
156    int len = 0;
157    while (*s) {
158	if ((*s & 0x80) == 0) {
159	    s++;
160	    len++;
161	} else if ((*s & 0x7C) == 0x40) {
162	    if ((s[1] & 0x80) == 0) {
163		s++;
164		continue;
165	    } else if ((*s & 0x7C) == 0x40) {
166		len++;
167		s += 2;
168	    } else {
169		len++;
170		SKIP;
171	    }
172	} else {
173	    len++;
174	    SKIP;
175	}
176    }
177#undef SKIP
178    return len;
179}
180
181int
182Xutf8TextPropertyToTextList(Display * dpy,
183			    const XTextProperty * tp,
184			    char ***list_return,
185			    int *count_return)
186{
187    int utf8;
188    char **list;
189    int nelements;
190    char *cp;
191    char *start;
192    size_t i;
193    int j;
194    size_t datalen = tp->nitems;
195    size_t len;
196
197    if (tp->format != 8)
198	return XConverterNotFound;
199
200    if (tp->encoding == XA_STRING)
201	utf8 = 0;
202    else if (tp->encoding == XA_UTF8_STRING(dpy))
203	utf8 = 1;
204    else
205	return XConverterNotFound;
206
207    if (datalen == 0) {
208	*list_return = NULL;
209	*count_return = 0;
210	return 0;
211    }
212
213    nelements = 1;
214    for (cp = (char *) tp->value, i = datalen; i != 0; cp++, i--) {
215	if (*cp == '\0')
216	    nelements++;
217    }
218
219    list = TypeMallocN(char *, (unsigned) nelements);
220    if (!list)
221	return XNoMemory;
222
223    if (utf8)
224	len = datalen;
225    else
226	len = l1countUtf8Bytes((char *) tp->value, datalen);
227
228    start = CastMallocN(char, len);
229    if (!start) {
230	free(list);
231	return XNoMemory;
232    }
233
234    if (utf8)
235	memcpy(start, (char *) tp->value, datalen);
236    else
237	l1utf8copy(start, (char *) tp->value, datalen);
238    start[len] = '\0';
239
240    for (cp = start, i = len + 1, j = 0; i != 0; cp++, i--) {
241	if (*cp == '\0') {
242	    list[j] = start;
243	    start = (cp + 1);
244	    j++;
245	}
246    }
247
248    list[j] = NULL;
249    *list_return = list;
250    *count_return = nelements;
251    return 0;
252}
253
254int
255Xutf8TextListToTextProperty(Display * dpy,
256			    char **list,
257			    int count,
258			    XICCEncodingStyle style,
259			    XTextProperty * text_prop)
260{
261    XTextProperty proto;
262    unsigned int nbytes;
263    int i;
264
265    if (style != XStringStyle &&
266	style != XCompoundTextStyle &&
267	style != XStdICCTextStyle &&
268	style != XUTF8StringStyle)
269	return XConverterNotFound;
270
271    if (style == XUTF8StringStyle) {
272	for (i = 0, nbytes = 0; i < count; i++) {
273	    nbytes += (unsigned) ((list[i] ? strlen(list[i]) : 0) + 1);
274	}
275    } else {
276	for (i = 0, nbytes = 0; i < count; i++) {
277	    nbytes += (unsigned) ((list[i] ? utf8l1strlen(list[i]) : 0) + 1);
278	}
279    }
280
281    if (style == XCompoundTextStyle)
282	proto.encoding = XA_COMPOUND_TEXT(dpy);
283    else if (style == XUTF8StringStyle)
284	proto.encoding = XA_UTF8_STRING(dpy);
285    else
286	proto.encoding = XA_STRING;
287    proto.format = 8;
288    if (nbytes)
289	proto.nitems = nbytes - 1;
290    else
291	proto.nitems = 0;
292    proto.value = NULL;
293
294    if (nbytes > 0) {
295	char *buf = TypeMallocN(char, nbytes);
296	if (!buf)
297	    return XNoMemory;
298
299	proto.value = (unsigned char *) buf;
300	for (i = 0; i < count; i++) {
301	    char *arg = list[i];
302
303	    if (arg) {
304		if (style == XUTF8StringStyle) {
305		    strcpy(buf, arg);
306		} else {
307		    utf8l1strcpy(buf, arg);
308		}
309		buf += (strlen(buf) + 1);
310	    } else {
311		*buf++ = '\0';
312	    }
313	}
314    } else {
315	proto.value = CastMalloc(unsigned char);	/* easier for client */
316	if (!proto.value)
317	    return XNoMemory;
318
319	proto.value[0] = '\0';
320    }
321
322    *text_prop = proto;
323    return 0;
324}
325
326int
327Xutf8LookupString(XIC ic GCC_UNUSED,
328		  XKeyEvent * ev,
329		  char *buffer,
330		  int nbytes,
331		  KeySym * keysym_return,
332		  Status * status_return)
333{
334    int rc;
335    KeySym keysym;
336    int codepoint;
337    size_t len;
338
339    rc = XLookupString(ev, buffer, nbytes, &keysym, NULL);
340
341    if (rc > 0) {
342	codepoint = buffer[0] & 0xFF;
343    } else {
344	codepoint = keysym2ucs(keysym);
345    }
346
347    if (codepoint < 0) {
348	if (keysym == None) {
349	    *status_return = XLookupNone;
350	} else {
351	    *status_return = XLookupKeySym;
352	    *keysym_return = keysym;
353	}
354	return 0;
355    }
356
357    if (nbytes < utf8countBytes(codepoint)) {
358	*status_return = XBufferOverflow;
359	return utf8countBytes(codepoint);
360    }
361
362    utf8insert(buffer, codepoint, &len);
363
364    if (keysym != None) {
365	*keysym_return = keysym;
366	*status_return = XLookupBoth;
367    } else {
368	*status_return = XLookupChars;
369    }
370    return (int) len;
371}
372#else /* X_HAVE_UTF8_STRING */
373/* Silence the compiler */
374void
375xutf8_dummy(void)
376{
377    return;
378}
379#endif
380