xutf8.c revision e39b573c
1e39b573cSmrg/* $XTermId: xutf8.c,v 1.10 2011/07/04 13:51:08 tom Exp $ */
2d522f475Smrg
3d522f475Smrg/*
4d522f475SmrgCopyright (c) 2001 by Juliusz Chroboczek
5d522f475Smrg
6d522f475SmrgPermission is hereby granted, free of charge, to any person obtaining a copy
7d522f475Smrgof this software and associated documentation files (the "Software"), to deal
8d522f475Smrgin the Software without restriction, including without limitation the rights
9d522f475Smrgto use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10d522f475Smrgcopies of the Software, and to permit persons to whom the Software is
11d522f475Smrgfurnished to do so, subject to the following conditions:
12d522f475Smrg
13d522f475SmrgThe above copyright notice and this permission notice shall be included in
14d522f475Smrgall copies or substantial portions of the Software.
15d522f475Smrg
16d522f475SmrgTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17d522f475SmrgIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18d522f475SmrgFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
19d522f475SmrgAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20d522f475SmrgLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21d522f475SmrgOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22d522f475SmrgTHE SOFTWARE.
23d522f475Smrg*/
24d522f475Smrg
25d522f475Smrg#include <xterm.h>
26d522f475Smrg
27d522f475Smrg#include <X11/Xlib.h>
28d522f475Smrg#include <X11/Xatom.h>
29d522f475Smrg#include <X11/Xutil.h>
30d522f475Smrg#include <X11/Xmu/Xmu.h>
31d522f475Smrg
32d522f475Smrg#include <xutf8.h>
33d522f475Smrg
34d522f475Smrg#ifndef X_HAVE_UTF8_STRING
35d522f475Smrg
36d522f475Smrg#undef XA_UTF8_STRING
37d522f475Smrg#define KEYSYM2UCS_INCLUDED
38d522f475Smrg
39d522f475Smrg#include "keysym2ucs.c"
40d522f475Smrg
41d522f475SmrgAtom
42d522f475Smrg_xa_utf8_string(Display * dpy)
43d522f475Smrg{
44d522f475Smrg    static AtomPtr p = NULL;
45d522f475Smrg
46d522f475Smrg    if (p == NULL)
47d522f475Smrg	p = XmuMakeAtom("UTF8_STRING");
48d522f475Smrg
49d522f475Smrg    return XmuInternAtom(dpy, p);
50d522f475Smrg}
51d522f475Smrg#define XA_UTF8_STRING(dpy) _xa_utf8_string(dpy)
52d522f475Smrg
53d522f475Smrgstatic int
54d522f475Smrgutf8countBytes(int c)
55d522f475Smrg{
56d522f475Smrg    if (c < 0)
57d522f475Smrg	return 0;
58d522f475Smrg
59d522f475Smrg    if (c <= 0x7F) {
60d522f475Smrg	return 1;
61d522f475Smrg    } else if (c <= 0x7FF) {
62d522f475Smrg	return 2;
63d522f475Smrg    } else if (c <= 0xFFFF) {
64d522f475Smrg	return 3;
65d522f475Smrg    } else
66d522f475Smrg	return 4;
67d522f475Smrg}
68d522f475Smrg
69d522f475Smrgstatic void
70d522f475Smrgutf8insert(char *dest, int c, int *len_return)
71d522f475Smrg{
72d522f475Smrg    if (c < 0)
73d522f475Smrg	return;
74d522f475Smrg
75d522f475Smrg    if (c <= 0x7F) {
76e39b573cSmrg	dest[0] = (char) c;
77d522f475Smrg	*len_return = 1;
78d522f475Smrg    } else if (c <= 0x7FF) {
79e39b573cSmrg	dest[0] = (char) (0xC0 | ((c >> 6) & 0x1F));
80e39b573cSmrg	dest[1] = (char) (0x80 | (c & 0x3F));
81d522f475Smrg	*len_return = 2;
82d522f475Smrg    } else if (c <= 0xFFFF) {
83e39b573cSmrg	dest[0] = (char) (0xE0 | ((c >> 12) & 0x0F));
84e39b573cSmrg	dest[1] = (char) (0x80 | ((c >> 6) & 0x3F));
85e39b573cSmrg	dest[2] = (char) (0x80 | (c & 0x3F));
86d522f475Smrg	*len_return = 3;
87d522f475Smrg    } else {
88e39b573cSmrg	dest[0] = (char) (0xF0 | ((c >> 18) & 0x07));
89e39b573cSmrg	dest[1] = (char) (0x80 | ((c >> 12) & 0x3f));
90e39b573cSmrg	dest[2] = (char) (0x80 | ((c >> 6) & 0x3f));
91e39b573cSmrg	dest[3] = (char) (0x80 | (c & 0x3f));
92d522f475Smrg	*len_return = 4;
93d522f475Smrg    }
94d522f475Smrg}
95d522f475Smrg
96d522f475Smrgstatic int
97d522f475Smrgl1countUtf8Bytes(char *s, int len)
98d522f475Smrg{
99d522f475Smrg    int l = 0;
100d522f475Smrg    while (len > 0) {
101d522f475Smrg	if ((*s & 0x80) == 0)
102d522f475Smrg	    l++;
103d522f475Smrg	else
104d522f475Smrg	    l += 2;
105d522f475Smrg	s++;
106d522f475Smrg	len--;
107d522f475Smrg    }
108d522f475Smrg    return l;
109d522f475Smrg}
110d522f475Smrg
111d522f475Smrgstatic void
112d522f475Smrgl1utf8copy(char *d, char *s, int len)
113d522f475Smrg{
114d522f475Smrg    int l;
115d522f475Smrg    while (len > 0) {
116d522f475Smrg	utf8insert(d, (*s) & 0xFF, &l);
117d522f475Smrg	d += l;
118d522f475Smrg	s++;
119d522f475Smrg	len--;
120d522f475Smrg    }
121d522f475Smrg}
122d522f475Smrg
123d522f475Smrgstatic void
124d522f475Smrgutf8l1strcpy(char *d, char *s)
125d522f475Smrg{
126d522f475Smrg#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
127d522f475Smrg    while (*s) {
128d522f475Smrg	if ((*s & 0x80) == 0)
129d522f475Smrg	    *d++ = *s++;
130d522f475Smrg	else if ((*s & 0x7C) == 0x40) {
131d522f475Smrg	    if ((s[1] & 0x80) == 0) {
132d522f475Smrg		s++;		/* incorrect UTF-8 */
133d522f475Smrg		continue;
134d522f475Smrg	    } else if ((*s & 0x7C) == 0x40) {
135e39b573cSmrg		*d++ = (char) (((*s & 0x03) << 6) | (s[1] & 0x3F));
136d522f475Smrg		s += 2;
137d522f475Smrg	    } else {
138d522f475Smrg		*d++ = '?';
139d522f475Smrg		SKIP;
140d522f475Smrg	    }
141d522f475Smrg	} else {
142d522f475Smrg	    *d++ = '?';
143d522f475Smrg	    SKIP;
144d522f475Smrg	}
145d522f475Smrg    }
146d522f475Smrg    *d = 0;
147d522f475Smrg#undef SKIP
148d522f475Smrg}
149d522f475Smrg
150d522f475Smrg/* Keep this in sync with utf8l1strcpy! */
151d522f475Smrgstatic int
152d522f475Smrgutf8l1strlen(char *s)
153d522f475Smrg{
154d522f475Smrg#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
155d522f475Smrg    int len = 0;
156d522f475Smrg    while (*s) {
157d522f475Smrg	if ((*s & 0x80) == 0) {
158d522f475Smrg	    s++;
159d522f475Smrg	    len++;
160d522f475Smrg	} else if ((*s & 0x7C) == 0x40) {
161d522f475Smrg	    if ((s[1] & 0x80) == 0) {
162d522f475Smrg		s++;
163d522f475Smrg		continue;
164d522f475Smrg	    } else if ((*s & 0x7C) == 0x40) {
165d522f475Smrg		len++;
166d522f475Smrg		s += 2;
167d522f475Smrg	    } else {
168d522f475Smrg		len++;
169d522f475Smrg		SKIP;
170d522f475Smrg	    }
171d522f475Smrg	} else {
172d522f475Smrg	    len++;
173d522f475Smrg	    SKIP;
174d522f475Smrg	}
175d522f475Smrg    }
176d522f475Smrg#undef SKIP
177d522f475Smrg    return len;
178d522f475Smrg}
179d522f475Smrg
180d522f475Smrgint
181d522f475SmrgXutf8TextPropertyToTextList(Display * dpy,
182d522f475Smrg			    const XTextProperty * tp,
183d522f475Smrg			    char ***list_return,
184d522f475Smrg			    int *count_return)
185d522f475Smrg{
186d522f475Smrg    int utf8;
187d522f475Smrg    char **list;
188d522f475Smrg    int nelements;
189d522f475Smrg    char *cp;
190d522f475Smrg    char *start;
19120d2c4d2Smrg    size_t i;
19220d2c4d2Smrg    int j;
19320d2c4d2Smrg    size_t datalen = tp->nitems;
19420d2c4d2Smrg    size_t len;
195d522f475Smrg
196d522f475Smrg    if (tp->format != 8)
197d522f475Smrg	return XConverterNotFound;
198d522f475Smrg
199d522f475Smrg    if (tp->encoding == XA_STRING)
200d522f475Smrg	utf8 = 0;
201d522f475Smrg    else if (tp->encoding == XA_UTF8_STRING(dpy))
202d522f475Smrg	utf8 = 1;
203d522f475Smrg    else
204d522f475Smrg	return XConverterNotFound;
205d522f475Smrg
206d522f475Smrg    if (datalen == 0) {
207d522f475Smrg	*list_return = NULL;
208d522f475Smrg	*count_return = 0;
209d522f475Smrg	return 0;
210d522f475Smrg    }
211d522f475Smrg
212d522f475Smrg    nelements = 1;
21320d2c4d2Smrg    for (cp = (char *) tp->value, i = datalen; i != 0; cp++, i--) {
214d522f475Smrg	if (*cp == '\0')
215d522f475Smrg	    nelements++;
216d522f475Smrg    }
217d522f475Smrg
21820d2c4d2Smrg    list = TypeMallocN(char *, (unsigned) nelements);
219d522f475Smrg    if (!list)
220d522f475Smrg	return XNoMemory;
221d522f475Smrg
222d522f475Smrg    if (utf8)
223d522f475Smrg	len = datalen;
224d522f475Smrg    else
225d522f475Smrg	len = l1countUtf8Bytes((char *) tp->value, datalen);
226d522f475Smrg
227d522f475Smrg    start = CastMallocN(char, len);
228d522f475Smrg    if (!start) {
229d522f475Smrg	free(list);
230d522f475Smrg	return XNoMemory;
231d522f475Smrg    }
232d522f475Smrg
233d522f475Smrg    if (utf8)
234d522f475Smrg	memcpy(start, (char *) tp->value, datalen);
235d522f475Smrg    else
236d522f475Smrg	l1utf8copy(start, (char *) tp->value, datalen);
237d522f475Smrg    start[len] = '\0';
238d522f475Smrg
23920d2c4d2Smrg    for (cp = start, i = len + 1, j = 0; i != 0; cp++, i--) {
240d522f475Smrg	if (*cp == '\0') {
241d522f475Smrg	    list[j] = start;
242d522f475Smrg	    start = (cp + 1);
243d522f475Smrg	    j++;
244d522f475Smrg	}
245d522f475Smrg    }
246d522f475Smrg
247d522f475Smrg    list[j] = NULL;
248d522f475Smrg    *list_return = list;
249d522f475Smrg    *count_return = nelements;
250d522f475Smrg    return 0;
251d522f475Smrg}
252d522f475Smrg
253d522f475Smrgint
254d522f475SmrgXutf8TextListToTextProperty(Display * dpy,
255d522f475Smrg			    char **list,
256d522f475Smrg			    int count,
257d522f475Smrg			    XICCEncodingStyle style,
258d522f475Smrg			    XTextProperty * text_prop)
259d522f475Smrg{
260d522f475Smrg    XTextProperty proto;
261d522f475Smrg    unsigned int nbytes;
262d522f475Smrg    int i;
263d522f475Smrg
264d522f475Smrg    if (style != XStringStyle &&
265d522f475Smrg	style != XCompoundTextStyle &&
266d522f475Smrg	style != XStdICCTextStyle &&
267d522f475Smrg	style != XUTF8StringStyle)
268d522f475Smrg	return XConverterNotFound;
269d522f475Smrg
270d522f475Smrg    if (style == XUTF8StringStyle) {
271d522f475Smrg	for (i = 0, nbytes = 0; i < count; i++) {
272d522f475Smrg	    nbytes += (unsigned) ((list[i] ? strlen(list[i]) : 0) + 1);
273d522f475Smrg	}
274d522f475Smrg    } else {
275d522f475Smrg	for (i = 0, nbytes = 0; i < count; i++) {
276d522f475Smrg	    nbytes += (unsigned) ((list[i] ? utf8l1strlen(list[i]) : 0) + 1);
277d522f475Smrg	}
278d522f475Smrg    }
279d522f475Smrg
280d522f475Smrg    if (style == XCompoundTextStyle)
281d522f475Smrg	proto.encoding = XA_COMPOUND_TEXT(dpy);
282d522f475Smrg    else if (style == XUTF8StringStyle)
283d522f475Smrg	proto.encoding = XA_UTF8_STRING(dpy);
284d522f475Smrg    else
285d522f475Smrg	proto.encoding = XA_STRING;
286d522f475Smrg    proto.format = 8;
287d522f475Smrg    if (nbytes)
288d522f475Smrg	proto.nitems = nbytes - 1;
289d522f475Smrg    else
290d522f475Smrg	proto.nitems = 0;
291d522f475Smrg    proto.value = NULL;
292d522f475Smrg
293d522f475Smrg    if (nbytes > 0) {
294d522f475Smrg	char *buf = TypeMallocN(char, nbytes);
295d522f475Smrg	if (!buf)
296d522f475Smrg	    return XNoMemory;
297d522f475Smrg
298d522f475Smrg	proto.value = (unsigned char *) buf;
299d522f475Smrg	for (i = 0; i < count; i++) {
300d522f475Smrg	    char *arg = list[i];
301d522f475Smrg
302d522f475Smrg	    if (arg) {
303d522f475Smrg		if (style == XUTF8StringStyle) {
304d522f475Smrg		    strcpy(buf, arg);
305d522f475Smrg		} else {
306d522f475Smrg		    utf8l1strcpy(buf, arg);
307d522f475Smrg		}
308d522f475Smrg		buf += (strlen(buf) + 1);
309d522f475Smrg	    } else {
310d522f475Smrg		*buf++ = '\0';
311d522f475Smrg	    }
312d522f475Smrg	}
313d522f475Smrg    } else {
314d522f475Smrg	proto.value = CastMalloc(unsigned char);	/* easier for client */
315d522f475Smrg	if (!proto.value)
316d522f475Smrg	    return XNoMemory;
317d522f475Smrg
318d522f475Smrg	proto.value[0] = '\0';
319d522f475Smrg    }
320d522f475Smrg
321d522f475Smrg    *text_prop = proto;
322d522f475Smrg    return 0;
323d522f475Smrg}
324d522f475Smrg
325d522f475Smrgint
326d522f475SmrgXutf8LookupString(XIC ic GCC_UNUSED,
327d522f475Smrg		  XKeyEvent * ev,
328d522f475Smrg		  char *buffer,
329d522f475Smrg		  int nbytes,
330d522f475Smrg		  KeySym * keysym_return,
331d522f475Smrg		  Status * status_return)
332d522f475Smrg{
333d522f475Smrg    int rc;
334d522f475Smrg    KeySym keysym;
335d522f475Smrg    int codepoint;
336d522f475Smrg    int len;
337d522f475Smrg
338d522f475Smrg    rc = XLookupString(ev, buffer, nbytes, &keysym, NULL);
339d522f475Smrg
340d522f475Smrg    if (rc > 0) {
341d522f475Smrg	codepoint = buffer[0] & 0xFF;
342d522f475Smrg    } else {
343d522f475Smrg	codepoint = keysym2ucs(keysym);
344d522f475Smrg    }
345d522f475Smrg
346d522f475Smrg    if (codepoint < 0) {
347d522f475Smrg	if (keysym == None) {
348d522f475Smrg	    *status_return = XLookupNone;
349d522f475Smrg	} else {
350d522f475Smrg	    *status_return = XLookupKeySym;
351d522f475Smrg	    *keysym_return = keysym;
352d522f475Smrg	}
353d522f475Smrg	return 0;
354d522f475Smrg    }
355d522f475Smrg
356d522f475Smrg    if (nbytes < utf8countBytes(codepoint)) {
357d522f475Smrg	*status_return = XBufferOverflow;
358d522f475Smrg	return utf8countBytes(codepoint);
359d522f475Smrg    }
360d522f475Smrg
361d522f475Smrg    utf8insert(buffer, codepoint, &len);
362d522f475Smrg
363d522f475Smrg    if (keysym != None) {
364d522f475Smrg	*keysym_return = keysym;
365d522f475Smrg	*status_return = XLookupBoth;
366d522f475Smrg    } else {
367d522f475Smrg	*status_return = XLookupChars;
368d522f475Smrg    }
369d522f475Smrg    return len;
370d522f475Smrg}
371d522f475Smrg#else /* X_HAVE_UTF8_STRING */
372d522f475Smrg/* Silence the compiler */
373d522f475Smrgvoid
374d522f475Smrgxutf8_dummy(void)
375d522f475Smrg{
376d522f475Smrg    return;
377d522f475Smrg}
378d522f475Smrg#endif
379