xutf8.c revision d522f475
1/* $XTermId: xutf8.c,v 1.8 2005/01/14 01:50:03 tom Exp $ */
2
3/* $XFree86: xc/programs/xterm/xutf8.c,v 1.4 2005/01/14 01:50:03 dickey Exp $ */
4/*
5Copyright (c) 2001 by Juliusz Chroboczek
6
7Permission is hereby granted, free of charge, to any person obtaining a copy
8of this software and associated documentation files (the "Software"), to deal
9in the Software without restriction, including without limitation the rights
10to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11copies of the Software, and to permit persons to whom the Software is
12furnished to do so, subject to the following conditions:
13
14The above copyright notice and this permission notice shall be included in
15all copies or substantial portions of the Software.
16
17THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
20AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23THE SOFTWARE.
24*/
25
26#include <xterm.h>
27
28#include <X11/Xlib.h>
29#include <X11/Xatom.h>
30#include <X11/Xutil.h>
31#include <X11/Xmu/Xmu.h>
32
33#include <xutf8.h>
34
35#ifndef X_HAVE_UTF8_STRING
36
37#undef XA_UTF8_STRING
38#define KEYSYM2UCS_INCLUDED
39
40#include "keysym2ucs.c"
41
42Atom
43_xa_utf8_string(Display * dpy)
44{
45    static AtomPtr p = NULL;
46
47    if (p == NULL)
48	p = XmuMakeAtom("UTF8_STRING");
49
50    return XmuInternAtom(dpy, p);
51}
52#define XA_UTF8_STRING(dpy) _xa_utf8_string(dpy)
53
54static int
55utf8countBytes(int c)
56{
57    if (c < 0)
58	return 0;
59
60    if (c <= 0x7F) {
61	return 1;
62    } else if (c <= 0x7FF) {
63	return 2;
64    } else if (c <= 0xFFFF) {
65	return 3;
66    } else
67	return 4;
68}
69
70static void
71utf8insert(char *dest, int c, int *len_return)
72{
73    if (c < 0)
74	return;
75
76    if (c <= 0x7F) {
77	dest[0] = c;
78	*len_return = 1;
79    } else if (c <= 0x7FF) {
80	dest[0] = 0xC0 | ((c >> 6) & 0x1F);
81	dest[1] = 0x80 | (c & 0x3F);
82	*len_return = 2;
83    } else if (c <= 0xFFFF) {
84	dest[0] = 0xE0 | ((c >> 12) & 0x0F);
85	dest[1] = 0x80 | ((c >> 6) & 0x3F);
86	dest[2] = 0x80 | (c & 0x3F);
87	*len_return = 3;
88    } else {
89	dest[0] = 0xF0 | ((c >> 18) & 0x07);
90	dest[1] = 0x80 | ((c >> 12) & 0x3f);
91	dest[2] = 0x80 | ((c >> 6) & 0x3f);
92	dest[3] = 0x80 | (c & 0x3f);
93	*len_return = 4;
94    }
95}
96
97static int
98l1countUtf8Bytes(char *s, int len)
99{
100    int l = 0;
101    while (len > 0) {
102	if ((*s & 0x80) == 0)
103	    l++;
104	else
105	    l += 2;
106	s++;
107	len--;
108    }
109    return l;
110}
111
112static void
113l1utf8copy(char *d, char *s, int len)
114{
115    int l;
116    while (len > 0) {
117	utf8insert(d, (*s) & 0xFF, &l);
118	d += l;
119	s++;
120	len--;
121    }
122}
123
124static void
125utf8l1strcpy(char *d, char *s)
126{
127#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
128    while (*s) {
129	if ((*s & 0x80) == 0)
130	    *d++ = *s++;
131	else if ((*s & 0x7C) == 0x40) {
132	    if ((s[1] & 0x80) == 0) {
133		s++;		/* incorrect UTF-8 */
134		continue;
135	    } else if ((*s & 0x7C) == 0x40) {
136		*d++ = ((*s & 0x03) << 6) | (s[1] & 0x3F);
137		s += 2;
138	    } else {
139		*d++ = '?';
140		SKIP;
141	    }
142	} else {
143	    *d++ = '?';
144	    SKIP;
145	}
146    }
147    *d = 0;
148#undef SKIP
149}
150
151/* Keep this in sync with utf8l1strcpy! */
152static int
153utf8l1strlen(char *s)
154{
155#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
156    int len = 0;
157    while (*s) {
158	if ((*s & 0x80) == 0) {
159	    s++;
160	    len++;
161	} else if ((*s & 0x7C) == 0x40) {
162	    if ((s[1] & 0x80) == 0) {
163		s++;
164		continue;
165	    } else if ((*s & 0x7C) == 0x40) {
166		len++;
167		s += 2;
168	    } else {
169		len++;
170		SKIP;
171	    }
172	} else {
173	    len++;
174	    SKIP;
175	}
176    }
177#undef SKIP
178    return len;
179}
180
181int
182Xutf8TextPropertyToTextList(Display * dpy,
183			    const XTextProperty * tp,
184			    char ***list_return,
185			    int *count_return)
186{
187    int utf8;
188    char **list;
189    int nelements;
190    char *cp;
191    char *start;
192    int i, j;
193    int datalen = (int) tp->nitems;
194    int len;
195
196    if (tp->format != 8)
197	return XConverterNotFound;
198
199    if (tp->encoding == XA_STRING)
200	utf8 = 0;
201    else if (tp->encoding == XA_UTF8_STRING(dpy))
202	utf8 = 1;
203    else
204	return XConverterNotFound;
205
206    if (datalen == 0) {
207	*list_return = NULL;
208	*count_return = 0;
209	return 0;
210    }
211
212    nelements = 1;
213    for (cp = (char *) tp->value, i = datalen; i > 0; cp++, i--) {
214	if (*cp == '\0')
215	    nelements++;
216    }
217
218    list = TypeMallocN(char *, nelements);
219    if (!list)
220	return XNoMemory;
221
222    if (utf8)
223	len = datalen;
224    else
225	len = l1countUtf8Bytes((char *) tp->value, datalen);
226
227    start = CastMallocN(char, len);
228    if (!start) {
229	free(list);
230	return XNoMemory;
231    }
232
233    if (utf8)
234	memcpy(start, (char *) tp->value, datalen);
235    else
236	l1utf8copy(start, (char *) tp->value, datalen);
237    start[len] = '\0';
238
239    for (cp = start, i = len + 1, j = 0; i > 0; cp++, i--) {
240	if (*cp == '\0') {
241	    list[j] = start;
242	    start = (cp + 1);
243	    j++;
244	}
245    }
246
247    list[j] = NULL;
248    *list_return = list;
249    *count_return = nelements;
250    return 0;
251}
252
253int
254Xutf8TextListToTextProperty(Display * dpy,
255			    char **list,
256			    int count,
257			    XICCEncodingStyle style,
258			    XTextProperty * text_prop)
259{
260    XTextProperty proto;
261    unsigned int nbytes;
262    int i;
263
264    if (style != XStringStyle &&
265	style != XCompoundTextStyle &&
266	style != XStdICCTextStyle &&
267	style != XUTF8StringStyle)
268	return XConverterNotFound;
269
270    if (style == XUTF8StringStyle) {
271	for (i = 0, nbytes = 0; i < count; i++) {
272	    nbytes += (unsigned) ((list[i] ? strlen(list[i]) : 0) + 1);
273	}
274    } else {
275	for (i = 0, nbytes = 0; i < count; i++) {
276	    nbytes += (unsigned) ((list[i] ? utf8l1strlen(list[i]) : 0) + 1);
277	}
278    }
279
280    if (style == XCompoundTextStyle)
281	proto.encoding = XA_COMPOUND_TEXT(dpy);
282    else if (style == XUTF8StringStyle)
283	proto.encoding = XA_UTF8_STRING(dpy);
284    else
285	proto.encoding = XA_STRING;
286    proto.format = 8;
287    if (nbytes)
288	proto.nitems = nbytes - 1;
289    else
290	proto.nitems = 0;
291    proto.value = NULL;
292
293    if (nbytes > 0) {
294	char *buf = TypeMallocN(char, nbytes);
295	if (!buf)
296	    return XNoMemory;
297
298	proto.value = (unsigned char *) buf;
299	for (i = 0; i < count; i++) {
300	    char *arg = list[i];
301
302	    if (arg) {
303		if (style == XUTF8StringStyle) {
304		    strcpy(buf, arg);
305		} else {
306		    utf8l1strcpy(buf, arg);
307		}
308		buf += (strlen(buf) + 1);
309	    } else {
310		*buf++ = '\0';
311	    }
312	}
313    } else {
314	proto.value = CastMalloc(unsigned char);	/* easier for client */
315	if (!proto.value)
316	    return XNoMemory;
317
318	proto.value[0] = '\0';
319    }
320
321    *text_prop = proto;
322    return 0;
323}
324
325int
326Xutf8LookupString(XIC ic GCC_UNUSED,
327		  XKeyEvent * ev,
328		  char *buffer,
329		  int nbytes,
330		  KeySym * keysym_return,
331		  Status * status_return)
332{
333    int rc;
334    KeySym keysym;
335    int codepoint;
336    int len;
337
338    rc = XLookupString(ev, buffer, nbytes, &keysym, NULL);
339
340    if (rc > 0) {
341	codepoint = buffer[0] & 0xFF;
342    } else {
343	codepoint = keysym2ucs(keysym);
344    }
345
346    if (codepoint < 0) {
347	if (keysym == None) {
348	    *status_return = XLookupNone;
349	} else {
350	    *status_return = XLookupKeySym;
351	    *keysym_return = keysym;
352	}
353	return 0;
354    }
355
356    if (nbytes < utf8countBytes(codepoint)) {
357	*status_return = XBufferOverflow;
358	return utf8countBytes(codepoint);
359    }
360
361    utf8insert(buffer, codepoint, &len);
362
363    if (keysym != None) {
364	*keysym_return = keysym;
365	*status_return = XLookupBoth;
366    } else {
367	*status_return = XLookupChars;
368    }
369    return len;
370}
371#else /* X_HAVE_UTF8_STRING */
372/* Silence the compiler */
373void
374xutf8_dummy(void)
375{
376    return;
377}
378#endif
379