xutf8.c revision d522f475
1d522f475Smrg/* $XTermId: xutf8.c,v 1.8 2005/01/14 01:50:03 tom Exp $ */
2d522f475Smrg
3d522f475Smrg/* $XFree86: xc/programs/xterm/xutf8.c,v 1.4 2005/01/14 01:50:03 dickey Exp $ */
4d522f475Smrg/*
5d522f475SmrgCopyright (c) 2001 by Juliusz Chroboczek
6d522f475Smrg
7d522f475SmrgPermission is hereby granted, free of charge, to any person obtaining a copy
8d522f475Smrgof this software and associated documentation files (the "Software"), to deal
9d522f475Smrgin the Software without restriction, including without limitation the rights
10d522f475Smrgto use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11d522f475Smrgcopies of the Software, and to permit persons to whom the Software is
12d522f475Smrgfurnished to do so, subject to the following conditions:
13d522f475Smrg
14d522f475SmrgThe above copyright notice and this permission notice shall be included in
15d522f475Smrgall copies or substantial portions of the Software.
16d522f475Smrg
17d522f475SmrgTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18d522f475SmrgIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19d522f475SmrgFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
20d522f475SmrgAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21d522f475SmrgLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22d522f475SmrgOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23d522f475SmrgTHE SOFTWARE.
24d522f475Smrg*/
25d522f475Smrg
26d522f475Smrg#include <xterm.h>
27d522f475Smrg
28d522f475Smrg#include <X11/Xlib.h>
29d522f475Smrg#include <X11/Xatom.h>
30d522f475Smrg#include <X11/Xutil.h>
31d522f475Smrg#include <X11/Xmu/Xmu.h>
32d522f475Smrg
33d522f475Smrg#include <xutf8.h>
34d522f475Smrg
35d522f475Smrg#ifndef X_HAVE_UTF8_STRING
36d522f475Smrg
37d522f475Smrg#undef XA_UTF8_STRING
38d522f475Smrg#define KEYSYM2UCS_INCLUDED
39d522f475Smrg
40d522f475Smrg#include "keysym2ucs.c"
41d522f475Smrg
42d522f475SmrgAtom
43d522f475Smrg_xa_utf8_string(Display * dpy)
44d522f475Smrg{
45d522f475Smrg    static AtomPtr p = NULL;
46d522f475Smrg
47d522f475Smrg    if (p == NULL)
48d522f475Smrg	p = XmuMakeAtom("UTF8_STRING");
49d522f475Smrg
50d522f475Smrg    return XmuInternAtom(dpy, p);
51d522f475Smrg}
52d522f475Smrg#define XA_UTF8_STRING(dpy) _xa_utf8_string(dpy)
53d522f475Smrg
54d522f475Smrgstatic int
55d522f475Smrgutf8countBytes(int c)
56d522f475Smrg{
57d522f475Smrg    if (c < 0)
58d522f475Smrg	return 0;
59d522f475Smrg
60d522f475Smrg    if (c <= 0x7F) {
61d522f475Smrg	return 1;
62d522f475Smrg    } else if (c <= 0x7FF) {
63d522f475Smrg	return 2;
64d522f475Smrg    } else if (c <= 0xFFFF) {
65d522f475Smrg	return 3;
66d522f475Smrg    } else
67d522f475Smrg	return 4;
68d522f475Smrg}
69d522f475Smrg
70d522f475Smrgstatic void
71d522f475Smrgutf8insert(char *dest, int c, int *len_return)
72d522f475Smrg{
73d522f475Smrg    if (c < 0)
74d522f475Smrg	return;
75d522f475Smrg
76d522f475Smrg    if (c <= 0x7F) {
77d522f475Smrg	dest[0] = c;
78d522f475Smrg	*len_return = 1;
79d522f475Smrg    } else if (c <= 0x7FF) {
80d522f475Smrg	dest[0] = 0xC0 | ((c >> 6) & 0x1F);
81d522f475Smrg	dest[1] = 0x80 | (c & 0x3F);
82d522f475Smrg	*len_return = 2;
83d522f475Smrg    } else if (c <= 0xFFFF) {
84d522f475Smrg	dest[0] = 0xE0 | ((c >> 12) & 0x0F);
85d522f475Smrg	dest[1] = 0x80 | ((c >> 6) & 0x3F);
86d522f475Smrg	dest[2] = 0x80 | (c & 0x3F);
87d522f475Smrg	*len_return = 3;
88d522f475Smrg    } else {
89d522f475Smrg	dest[0] = 0xF0 | ((c >> 18) & 0x07);
90d522f475Smrg	dest[1] = 0x80 | ((c >> 12) & 0x3f);
91d522f475Smrg	dest[2] = 0x80 | ((c >> 6) & 0x3f);
92d522f475Smrg	dest[3] = 0x80 | (c & 0x3f);
93d522f475Smrg	*len_return = 4;
94d522f475Smrg    }
95d522f475Smrg}
96d522f475Smrg
97d522f475Smrgstatic int
98d522f475Smrgl1countUtf8Bytes(char *s, int len)
99d522f475Smrg{
100d522f475Smrg    int l = 0;
101d522f475Smrg    while (len > 0) {
102d522f475Smrg	if ((*s & 0x80) == 0)
103d522f475Smrg	    l++;
104d522f475Smrg	else
105d522f475Smrg	    l += 2;
106d522f475Smrg	s++;
107d522f475Smrg	len--;
108d522f475Smrg    }
109d522f475Smrg    return l;
110d522f475Smrg}
111d522f475Smrg
112d522f475Smrgstatic void
113d522f475Smrgl1utf8copy(char *d, char *s, int len)
114d522f475Smrg{
115d522f475Smrg    int l;
116d522f475Smrg    while (len > 0) {
117d522f475Smrg	utf8insert(d, (*s) & 0xFF, &l);
118d522f475Smrg	d += l;
119d522f475Smrg	s++;
120d522f475Smrg	len--;
121d522f475Smrg    }
122d522f475Smrg}
123d522f475Smrg
124d522f475Smrgstatic void
125d522f475Smrgutf8l1strcpy(char *d, char *s)
126d522f475Smrg{
127d522f475Smrg#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
128d522f475Smrg    while (*s) {
129d522f475Smrg	if ((*s & 0x80) == 0)
130d522f475Smrg	    *d++ = *s++;
131d522f475Smrg	else if ((*s & 0x7C) == 0x40) {
132d522f475Smrg	    if ((s[1] & 0x80) == 0) {
133d522f475Smrg		s++;		/* incorrect UTF-8 */
134d522f475Smrg		continue;
135d522f475Smrg	    } else if ((*s & 0x7C) == 0x40) {
136d522f475Smrg		*d++ = ((*s & 0x03) << 6) | (s[1] & 0x3F);
137d522f475Smrg		s += 2;
138d522f475Smrg	    } else {
139d522f475Smrg		*d++ = '?';
140d522f475Smrg		SKIP;
141d522f475Smrg	    }
142d522f475Smrg	} else {
143d522f475Smrg	    *d++ = '?';
144d522f475Smrg	    SKIP;
145d522f475Smrg	}
146d522f475Smrg    }
147d522f475Smrg    *d = 0;
148d522f475Smrg#undef SKIP
149d522f475Smrg}
150d522f475Smrg
151d522f475Smrg/* Keep this in sync with utf8l1strcpy! */
152d522f475Smrgstatic int
153d522f475Smrgutf8l1strlen(char *s)
154d522f475Smrg{
155d522f475Smrg#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
156d522f475Smrg    int len = 0;
157d522f475Smrg    while (*s) {
158d522f475Smrg	if ((*s & 0x80) == 0) {
159d522f475Smrg	    s++;
160d522f475Smrg	    len++;
161d522f475Smrg	} else if ((*s & 0x7C) == 0x40) {
162d522f475Smrg	    if ((s[1] & 0x80) == 0) {
163d522f475Smrg		s++;
164d522f475Smrg		continue;
165d522f475Smrg	    } else if ((*s & 0x7C) == 0x40) {
166d522f475Smrg		len++;
167d522f475Smrg		s += 2;
168d522f475Smrg	    } else {
169d522f475Smrg		len++;
170d522f475Smrg		SKIP;
171d522f475Smrg	    }
172d522f475Smrg	} else {
173d522f475Smrg	    len++;
174d522f475Smrg	    SKIP;
175d522f475Smrg	}
176d522f475Smrg    }
177d522f475Smrg#undef SKIP
178d522f475Smrg    return len;
179d522f475Smrg}
180d522f475Smrg
181d522f475Smrgint
182d522f475SmrgXutf8TextPropertyToTextList(Display * dpy,
183d522f475Smrg			    const XTextProperty * tp,
184d522f475Smrg			    char ***list_return,
185d522f475Smrg			    int *count_return)
186d522f475Smrg{
187d522f475Smrg    int utf8;
188d522f475Smrg    char **list;
189d522f475Smrg    int nelements;
190d522f475Smrg    char *cp;
191d522f475Smrg    char *start;
192d522f475Smrg    int i, j;
193d522f475Smrg    int datalen = (int) tp->nitems;
194d522f475Smrg    int len;
195d522f475Smrg
196d522f475Smrg    if (tp->format != 8)
197d522f475Smrg	return XConverterNotFound;
198d522f475Smrg
199d522f475Smrg    if (tp->encoding == XA_STRING)
200d522f475Smrg	utf8 = 0;
201d522f475Smrg    else if (tp->encoding == XA_UTF8_STRING(dpy))
202d522f475Smrg	utf8 = 1;
203d522f475Smrg    else
204d522f475Smrg	return XConverterNotFound;
205d522f475Smrg
206d522f475Smrg    if (datalen == 0) {
207d522f475Smrg	*list_return = NULL;
208d522f475Smrg	*count_return = 0;
209d522f475Smrg	return 0;
210d522f475Smrg    }
211d522f475Smrg
212d522f475Smrg    nelements = 1;
213d522f475Smrg    for (cp = (char *) tp->value, i = datalen; i > 0; cp++, i--) {
214d522f475Smrg	if (*cp == '\0')
215d522f475Smrg	    nelements++;
216d522f475Smrg    }
217d522f475Smrg
218d522f475Smrg    list = TypeMallocN(char *, nelements);
219d522f475Smrg    if (!list)
220d522f475Smrg	return XNoMemory;
221d522f475Smrg
222d522f475Smrg    if (utf8)
223d522f475Smrg	len = datalen;
224d522f475Smrg    else
225d522f475Smrg	len = l1countUtf8Bytes((char *) tp->value, datalen);
226d522f475Smrg
227d522f475Smrg    start = CastMallocN(char, len);
228d522f475Smrg    if (!start) {
229d522f475Smrg	free(list);
230d522f475Smrg	return XNoMemory;
231d522f475Smrg    }
232d522f475Smrg
233d522f475Smrg    if (utf8)
234d522f475Smrg	memcpy(start, (char *) tp->value, datalen);
235d522f475Smrg    else
236d522f475Smrg	l1utf8copy(start, (char *) tp->value, datalen);
237d522f475Smrg    start[len] = '\0';
238d522f475Smrg
239d522f475Smrg    for (cp = start, i = len + 1, j = 0; i > 0; cp++, i--) {
240d522f475Smrg	if (*cp == '\0') {
241d522f475Smrg	    list[j] = start;
242d522f475Smrg	    start = (cp + 1);
243d522f475Smrg	    j++;
244d522f475Smrg	}
245d522f475Smrg    }
246d522f475Smrg
247d522f475Smrg    list[j] = NULL;
248d522f475Smrg    *list_return = list;
249d522f475Smrg    *count_return = nelements;
250d522f475Smrg    return 0;
251d522f475Smrg}
252d522f475Smrg
253d522f475Smrgint
254d522f475SmrgXutf8TextListToTextProperty(Display * dpy,
255d522f475Smrg			    char **list,
256d522f475Smrg			    int count,
257d522f475Smrg			    XICCEncodingStyle style,
258d522f475Smrg			    XTextProperty * text_prop)
259d522f475Smrg{
260d522f475Smrg    XTextProperty proto;
261d522f475Smrg    unsigned int nbytes;
262d522f475Smrg    int i;
263d522f475Smrg
264d522f475Smrg    if (style != XStringStyle &&
265d522f475Smrg	style != XCompoundTextStyle &&
266d522f475Smrg	style != XStdICCTextStyle &&
267d522f475Smrg	style != XUTF8StringStyle)
268d522f475Smrg	return XConverterNotFound;
269d522f475Smrg
270d522f475Smrg    if (style == XUTF8StringStyle) {
271d522f475Smrg	for (i = 0, nbytes = 0; i < count; i++) {
272d522f475Smrg	    nbytes += (unsigned) ((list[i] ? strlen(list[i]) : 0) + 1);
273d522f475Smrg	}
274d522f475Smrg    } else {
275d522f475Smrg	for (i = 0, nbytes = 0; i < count; i++) {
276d522f475Smrg	    nbytes += (unsigned) ((list[i] ? utf8l1strlen(list[i]) : 0) + 1);
277d522f475Smrg	}
278d522f475Smrg    }
279d522f475Smrg
280d522f475Smrg    if (style == XCompoundTextStyle)
281d522f475Smrg	proto.encoding = XA_COMPOUND_TEXT(dpy);
282d522f475Smrg    else if (style == XUTF8StringStyle)
283d522f475Smrg	proto.encoding = XA_UTF8_STRING(dpy);
284d522f475Smrg    else
285d522f475Smrg	proto.encoding = XA_STRING;
286d522f475Smrg    proto.format = 8;
287d522f475Smrg    if (nbytes)
288d522f475Smrg	proto.nitems = nbytes - 1;
289d522f475Smrg    else
290d522f475Smrg	proto.nitems = 0;
291d522f475Smrg    proto.value = NULL;
292d522f475Smrg
293d522f475Smrg    if (nbytes > 0) {
294d522f475Smrg	char *buf = TypeMallocN(char, nbytes);
295d522f475Smrg	if (!buf)
296d522f475Smrg	    return XNoMemory;
297d522f475Smrg
298d522f475Smrg	proto.value = (unsigned char *) buf;
299d522f475Smrg	for (i = 0; i < count; i++) {
300d522f475Smrg	    char *arg = list[i];
301d522f475Smrg
302d522f475Smrg	    if (arg) {
303d522f475Smrg		if (style == XUTF8StringStyle) {
304d522f475Smrg		    strcpy(buf, arg);
305d522f475Smrg		} else {
306d522f475Smrg		    utf8l1strcpy(buf, arg);
307d522f475Smrg		}
308d522f475Smrg		buf += (strlen(buf) + 1);
309d522f475Smrg	    } else {
310d522f475Smrg		*buf++ = '\0';
311d522f475Smrg	    }
312d522f475Smrg	}
313d522f475Smrg    } else {
314d522f475Smrg	proto.value = CastMalloc(unsigned char);	/* easier for client */
315d522f475Smrg	if (!proto.value)
316d522f475Smrg	    return XNoMemory;
317d522f475Smrg
318d522f475Smrg	proto.value[0] = '\0';
319d522f475Smrg    }
320d522f475Smrg
321d522f475Smrg    *text_prop = proto;
322d522f475Smrg    return 0;
323d522f475Smrg}
324d522f475Smrg
325d522f475Smrgint
326d522f475SmrgXutf8LookupString(XIC ic GCC_UNUSED,
327d522f475Smrg		  XKeyEvent * ev,
328d522f475Smrg		  char *buffer,
329d522f475Smrg		  int nbytes,
330d522f475Smrg		  KeySym * keysym_return,
331d522f475Smrg		  Status * status_return)
332d522f475Smrg{
333d522f475Smrg    int rc;
334d522f475Smrg    KeySym keysym;
335d522f475Smrg    int codepoint;
336d522f475Smrg    int len;
337d522f475Smrg
338d522f475Smrg    rc = XLookupString(ev, buffer, nbytes, &keysym, NULL);
339d522f475Smrg
340d522f475Smrg    if (rc > 0) {
341d522f475Smrg	codepoint = buffer[0] & 0xFF;
342d522f475Smrg    } else {
343d522f475Smrg	codepoint = keysym2ucs(keysym);
344d522f475Smrg    }
345d522f475Smrg
346d522f475Smrg    if (codepoint < 0) {
347d522f475Smrg	if (keysym == None) {
348d522f475Smrg	    *status_return = XLookupNone;
349d522f475Smrg	} else {
350d522f475Smrg	    *status_return = XLookupKeySym;
351d522f475Smrg	    *keysym_return = keysym;
352d522f475Smrg	}
353d522f475Smrg	return 0;
354d522f475Smrg    }
355d522f475Smrg
356d522f475Smrg    if (nbytes < utf8countBytes(codepoint)) {
357d522f475Smrg	*status_return = XBufferOverflow;
358d522f475Smrg	return utf8countBytes(codepoint);
359d522f475Smrg    }
360d522f475Smrg
361d522f475Smrg    utf8insert(buffer, codepoint, &len);
362d522f475Smrg
363d522f475Smrg    if (keysym != None) {
364d522f475Smrg	*keysym_return = keysym;
365d522f475Smrg	*status_return = XLookupBoth;
366d522f475Smrg    } else {
367d522f475Smrg	*status_return = XLookupChars;
368d522f475Smrg    }
369d522f475Smrg    return len;
370d522f475Smrg}
371d522f475Smrg#else /* X_HAVE_UTF8_STRING */
372d522f475Smrg/* Silence the compiler */
373d522f475Smrgvoid
374d522f475Smrgxutf8_dummy(void)
375d522f475Smrg{
376d522f475Smrg    return;
377d522f475Smrg}
378d522f475Smrg#endif
379