xutf8.c revision e39b573c
1/* $XTermId: xutf8.c,v 1.10 2011/07/04 13:51:08 tom Exp $ */
2
3/*
4Copyright (c) 2001 by Juliusz Chroboczek
5
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12
13The above copyright notice and this permission notice shall be included in
14all copies or substantial portions of the Software.
15
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22THE SOFTWARE.
23*/
24
25#include <xterm.h>
26
27#include <X11/Xlib.h>
28#include <X11/Xatom.h>
29#include <X11/Xutil.h>
30#include <X11/Xmu/Xmu.h>
31
32#include <xutf8.h>
33
34#ifndef X_HAVE_UTF8_STRING
35
36#undef XA_UTF8_STRING
37#define KEYSYM2UCS_INCLUDED
38
39#include "keysym2ucs.c"
40
41Atom
42_xa_utf8_string(Display * dpy)
43{
44    static AtomPtr p = NULL;
45
46    if (p == NULL)
47	p = XmuMakeAtom("UTF8_STRING");
48
49    return XmuInternAtom(dpy, p);
50}
51#define XA_UTF8_STRING(dpy) _xa_utf8_string(dpy)
52
53static int
54utf8countBytes(int c)
55{
56    if (c < 0)
57	return 0;
58
59    if (c <= 0x7F) {
60	return 1;
61    } else if (c <= 0x7FF) {
62	return 2;
63    } else if (c <= 0xFFFF) {
64	return 3;
65    } else
66	return 4;
67}
68
69static void
70utf8insert(char *dest, int c, int *len_return)
71{
72    if (c < 0)
73	return;
74
75    if (c <= 0x7F) {
76	dest[0] = (char) c;
77	*len_return = 1;
78    } else if (c <= 0x7FF) {
79	dest[0] = (char) (0xC0 | ((c >> 6) & 0x1F));
80	dest[1] = (char) (0x80 | (c & 0x3F));
81	*len_return = 2;
82    } else if (c <= 0xFFFF) {
83	dest[0] = (char) (0xE0 | ((c >> 12) & 0x0F));
84	dest[1] = (char) (0x80 | ((c >> 6) & 0x3F));
85	dest[2] = (char) (0x80 | (c & 0x3F));
86	*len_return = 3;
87    } else {
88	dest[0] = (char) (0xF0 | ((c >> 18) & 0x07));
89	dest[1] = (char) (0x80 | ((c >> 12) & 0x3f));
90	dest[2] = (char) (0x80 | ((c >> 6) & 0x3f));
91	dest[3] = (char) (0x80 | (c & 0x3f));
92	*len_return = 4;
93    }
94}
95
96static int
97l1countUtf8Bytes(char *s, int len)
98{
99    int l = 0;
100    while (len > 0) {
101	if ((*s & 0x80) == 0)
102	    l++;
103	else
104	    l += 2;
105	s++;
106	len--;
107    }
108    return l;
109}
110
111static void
112l1utf8copy(char *d, char *s, int len)
113{
114    int l;
115    while (len > 0) {
116	utf8insert(d, (*s) & 0xFF, &l);
117	d += l;
118	s++;
119	len--;
120    }
121}
122
123static void
124utf8l1strcpy(char *d, char *s)
125{
126#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
127    while (*s) {
128	if ((*s & 0x80) == 0)
129	    *d++ = *s++;
130	else if ((*s & 0x7C) == 0x40) {
131	    if ((s[1] & 0x80) == 0) {
132		s++;		/* incorrect UTF-8 */
133		continue;
134	    } else if ((*s & 0x7C) == 0x40) {
135		*d++ = (char) (((*s & 0x03) << 6) | (s[1] & 0x3F));
136		s += 2;
137	    } else {
138		*d++ = '?';
139		SKIP;
140	    }
141	} else {
142	    *d++ = '?';
143	    SKIP;
144	}
145    }
146    *d = 0;
147#undef SKIP
148}
149
150/* Keep this in sync with utf8l1strcpy! */
151static int
152utf8l1strlen(char *s)
153{
154#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
155    int len = 0;
156    while (*s) {
157	if ((*s & 0x80) == 0) {
158	    s++;
159	    len++;
160	} else if ((*s & 0x7C) == 0x40) {
161	    if ((s[1] & 0x80) == 0) {
162		s++;
163		continue;
164	    } else if ((*s & 0x7C) == 0x40) {
165		len++;
166		s += 2;
167	    } else {
168		len++;
169		SKIP;
170	    }
171	} else {
172	    len++;
173	    SKIP;
174	}
175    }
176#undef SKIP
177    return len;
178}
179
180int
181Xutf8TextPropertyToTextList(Display * dpy,
182			    const XTextProperty * tp,
183			    char ***list_return,
184			    int *count_return)
185{
186    int utf8;
187    char **list;
188    int nelements;
189    char *cp;
190    char *start;
191    size_t i;
192    int j;
193    size_t datalen = tp->nitems;
194    size_t len;
195
196    if (tp->format != 8)
197	return XConverterNotFound;
198
199    if (tp->encoding == XA_STRING)
200	utf8 = 0;
201    else if (tp->encoding == XA_UTF8_STRING(dpy))
202	utf8 = 1;
203    else
204	return XConverterNotFound;
205
206    if (datalen == 0) {
207	*list_return = NULL;
208	*count_return = 0;
209	return 0;
210    }
211
212    nelements = 1;
213    for (cp = (char *) tp->value, i = datalen; i != 0; cp++, i--) {
214	if (*cp == '\0')
215	    nelements++;
216    }
217
218    list = TypeMallocN(char *, (unsigned) nelements);
219    if (!list)
220	return XNoMemory;
221
222    if (utf8)
223	len = datalen;
224    else
225	len = l1countUtf8Bytes((char *) tp->value, datalen);
226
227    start = CastMallocN(char, len);
228    if (!start) {
229	free(list);
230	return XNoMemory;
231    }
232
233    if (utf8)
234	memcpy(start, (char *) tp->value, datalen);
235    else
236	l1utf8copy(start, (char *) tp->value, datalen);
237    start[len] = '\0';
238
239    for (cp = start, i = len + 1, j = 0; i != 0; cp++, i--) {
240	if (*cp == '\0') {
241	    list[j] = start;
242	    start = (cp + 1);
243	    j++;
244	}
245    }
246
247    list[j] = NULL;
248    *list_return = list;
249    *count_return = nelements;
250    return 0;
251}
252
253int
254Xutf8TextListToTextProperty(Display * dpy,
255			    char **list,
256			    int count,
257			    XICCEncodingStyle style,
258			    XTextProperty * text_prop)
259{
260    XTextProperty proto;
261    unsigned int nbytes;
262    int i;
263
264    if (style != XStringStyle &&
265	style != XCompoundTextStyle &&
266	style != XStdICCTextStyle &&
267	style != XUTF8StringStyle)
268	return XConverterNotFound;
269
270    if (style == XUTF8StringStyle) {
271	for (i = 0, nbytes = 0; i < count; i++) {
272	    nbytes += (unsigned) ((list[i] ? strlen(list[i]) : 0) + 1);
273	}
274    } else {
275	for (i = 0, nbytes = 0; i < count; i++) {
276	    nbytes += (unsigned) ((list[i] ? utf8l1strlen(list[i]) : 0) + 1);
277	}
278    }
279
280    if (style == XCompoundTextStyle)
281	proto.encoding = XA_COMPOUND_TEXT(dpy);
282    else if (style == XUTF8StringStyle)
283	proto.encoding = XA_UTF8_STRING(dpy);
284    else
285	proto.encoding = XA_STRING;
286    proto.format = 8;
287    if (nbytes)
288	proto.nitems = nbytes - 1;
289    else
290	proto.nitems = 0;
291    proto.value = NULL;
292
293    if (nbytes > 0) {
294	char *buf = TypeMallocN(char, nbytes);
295	if (!buf)
296	    return XNoMemory;
297
298	proto.value = (unsigned char *) buf;
299	for (i = 0; i < count; i++) {
300	    char *arg = list[i];
301
302	    if (arg) {
303		if (style == XUTF8StringStyle) {
304		    strcpy(buf, arg);
305		} else {
306		    utf8l1strcpy(buf, arg);
307		}
308		buf += (strlen(buf) + 1);
309	    } else {
310		*buf++ = '\0';
311	    }
312	}
313    } else {
314	proto.value = CastMalloc(unsigned char);	/* easier for client */
315	if (!proto.value)
316	    return XNoMemory;
317
318	proto.value[0] = '\0';
319    }
320
321    *text_prop = proto;
322    return 0;
323}
324
325int
326Xutf8LookupString(XIC ic GCC_UNUSED,
327		  XKeyEvent * ev,
328		  char *buffer,
329		  int nbytes,
330		  KeySym * keysym_return,
331		  Status * status_return)
332{
333    int rc;
334    KeySym keysym;
335    int codepoint;
336    int len;
337
338    rc = XLookupString(ev, buffer, nbytes, &keysym, NULL);
339
340    if (rc > 0) {
341	codepoint = buffer[0] & 0xFF;
342    } else {
343	codepoint = keysym2ucs(keysym);
344    }
345
346    if (codepoint < 0) {
347	if (keysym == None) {
348	    *status_return = XLookupNone;
349	} else {
350	    *status_return = XLookupKeySym;
351	    *keysym_return = keysym;
352	}
353	return 0;
354    }
355
356    if (nbytes < utf8countBytes(codepoint)) {
357	*status_return = XBufferOverflow;
358	return utf8countBytes(codepoint);
359    }
360
361    utf8insert(buffer, codepoint, &len);
362
363    if (keysym != None) {
364	*keysym_return = keysym;
365	*status_return = XLookupBoth;
366    } else {
367	*status_return = XLookupChars;
368    }
369    return len;
370}
371#else /* X_HAVE_UTF8_STRING */
372/* Silence the compiler */
373void
374xutf8_dummy(void)
375{
376    return;
377}
378#endif
379