1/* $XTermId: xutf8.c,v 1.18 2020/06/23 22:45:51 tom Exp $ */
2
3/*
4 * Copyright 2002-2019,2020 by Thomas E. Dickey
5 * Copyright (c) 2001 by Juliusz Chroboczek
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
22 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27#include <xterm.h>
28
29#include <X11/Xlib.h>
30#include <X11/Xatom.h>
31#include <X11/Xutil.h>
32#include <X11/Xmu/Xmu.h>
33
34#include <xutf8.h>
35
36#ifndef X_HAVE_UTF8_STRING
37
38#undef XA_UTF8_STRING
39#define KEYSYM2UCS_INCLUDED
40
41#include "keysym2ucs.c"
42
43Atom
44_xa_utf8_string(Display *dpy)
45{
46    static AtomPtr p = NULL;
47
48    if (p == NULL)
49	p = XmuMakeAtom("UTF8_STRING");
50
51    return XmuInternAtom(dpy, p);
52}
53#define XA_UTF8_STRING(dpy) _xa_utf8_string(dpy)
54
55static int
56utf8countBytes(int c)
57{
58    if (c < 0)
59	return 0;
60
61    if (c <= 0x7F) {
62	return 1;
63    } else if (c <= 0x7FF) {
64	return 2;
65    } else if (c <= 0xFFFF) {
66	return 3;
67    } else
68	return 4;
69}
70
71static void
72utf8insert(char *dest, int c, size_t *len_return)
73{
74    if (c < 0)
75	return;
76
77    if (c <= 0x7F) {
78	dest[0] = (char) c;
79	*len_return = 1;
80    } else if (c <= 0x7FF) {
81	dest[0] = (char) (0xC0 | ((c >> 6) & 0x1F));
82	dest[1] = (char) (0x80 | (c & 0x3F));
83	*len_return = 2;
84    } else if (c <= 0xFFFF) {
85	dest[0] = (char) (0xE0 | ((c >> 12) & 0x0F));
86	dest[1] = (char) (0x80 | ((c >> 6) & 0x3F));
87	dest[2] = (char) (0x80 | (c & 0x3F));
88	*len_return = 3;
89    } else {
90	dest[0] = (char) (0xF0 | ((c >> 18) & 0x07));
91	dest[1] = (char) (0x80 | ((c >> 12) & 0x3f));
92	dest[2] = (char) (0x80 | ((c >> 6) & 0x3f));
93	dest[3] = (char) (0x80 | (c & 0x3f));
94	*len_return = 4;
95    }
96}
97
98static size_t
99l1countUtf8Bytes(char *s, size_t len)
100{
101    size_t l = 0;
102    while (len != 0) {
103	if ((*s & 0x80) == 0)
104	    l++;
105	else
106	    l += 2;
107	s++;
108	len--;
109    }
110    return l;
111}
112
113static void
114l1utf8copy(char *d, char *s, size_t len)
115{
116    size_t l;
117    while (len != 0) {
118	utf8insert(d, (*s) & 0xFF, &l);
119	d += (int) l;
120	s++;
121	len--;
122    }
123}
124
125static void
126utf8l1strcpy(char *d, char *s)
127{
128#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
129    while (*s) {
130	if ((*s & 0x80) == 0)
131	    *d++ = *s++;
132	else if ((*s & 0x7C) == 0x40) {
133	    if ((s[1] & 0x80) == 0) {
134		s++;		/* incorrect UTF-8 */
135		continue;
136	    } else if ((*s & 0x7C) == 0x40) {
137		*d++ = (char) (((*s & 0x03) << 6) | (s[1] & 0x3F));
138		s += 2;
139	    } else {
140		*d++ = BAD_ASCII;
141		SKIP;
142	    }
143	} else {
144	    *d++ = BAD_ASCII;
145	    SKIP;
146	}
147    }
148    *d = 0;
149#undef SKIP
150}
151
152/* Keep this in sync with utf8l1strcpy! */
153static int
154utf8l1strlen(char *s)
155{
156#define SKIP do { s++; } while(((*s & 0x80) != 0) && (*s & 0xC0) != 0xC0)
157    int len = 0;
158    while (*s) {
159	if ((*s & 0x80) == 0) {
160	    s++;
161	    len++;
162	} else if ((*s & 0x7C) == 0x40) {
163	    if ((s[1] & 0x80) == 0) {
164		s++;
165		continue;
166	    } else if ((*s & 0x7C) == 0x40) {
167		len++;
168		s += 2;
169	    } else {
170		len++;
171		SKIP;
172	    }
173	} else {
174	    len++;
175	    SKIP;
176	}
177    }
178#undef SKIP
179    return len;
180}
181
182int
183Xutf8TextPropertyToTextList(Display *dpy,
184			    const XTextProperty * tp,
185			    char ***list_return,
186			    int *count_return)
187{
188    int utf8;
189    char **list;
190    int nelements;
191    char *cp;
192    char *start;
193    size_t i;
194    int j;
195    size_t datalen = tp->nitems;
196    size_t len;
197
198    if (tp->format != 8)
199	return XConverterNotFound;
200
201    if (tp->encoding == XA_STRING)
202	utf8 = 0;
203    else if (tp->encoding == XA_UTF8_STRING(dpy))
204	utf8 = 1;
205    else
206	return XConverterNotFound;
207
208    if (datalen == 0) {
209	*list_return = NULL;
210	*count_return = 0;
211	return 0;
212    }
213
214    nelements = 1;
215    for (cp = (char *) tp->value, i = datalen; i != 0; cp++, i--) {
216	if (*cp == '\0')
217	    nelements++;
218    }
219
220    list = TypeMallocN(char *, (unsigned) nelements);
221    if (!list)
222	return XNoMemory;
223
224    if (utf8)
225	len = datalen;
226    else
227	len = l1countUtf8Bytes((char *) tp->value, datalen);
228
229    start = malloc(len + 1);
230    if (!start) {
231	free(list);
232	return XNoMemory;
233    }
234
235    if (utf8)
236	memcpy(start, (char *) tp->value, datalen);
237    else
238	l1utf8copy(start, (char *) tp->value, datalen);
239    start[len] = '\0';
240
241    for (cp = start, i = len + 1, j = 0; i != 0; cp++, i--) {
242	if (*cp == '\0') {
243	    list[j] = start;
244	    start = (cp + 1);
245	    j++;
246	}
247    }
248
249    list[j] = NULL;
250    *list_return = list;
251    *count_return = nelements;
252    return 0;
253}
254
255int
256Xutf8TextListToTextProperty(Display *dpy,
257			    char **list,
258			    int count,
259			    XICCEncodingStyle style,
260			    XTextProperty * text_prop)
261{
262    XTextProperty proto;
263    unsigned int nbytes;
264    int i;
265
266    if (style != XStringStyle &&
267	style != XCompoundTextStyle &&
268	style != XStdICCTextStyle &&
269	style != XUTF8StringStyle)
270	return XConverterNotFound;
271
272    if (style == XUTF8StringStyle) {
273	for (i = 0, nbytes = 0; i < count; i++) {
274	    nbytes += (unsigned) ((list[i] ? strlen(list[i]) : 0) + 1);
275	}
276    } else {
277	for (i = 0, nbytes = 0; i < count; i++) {
278	    nbytes += (unsigned) ((list[i] ? utf8l1strlen(list[i]) : 0) + 1);
279	}
280    }
281
282    if (style == XCompoundTextStyle)
283	proto.encoding = XA_COMPOUND_TEXT(dpy);
284    else if (style == XUTF8StringStyle)
285	proto.encoding = XA_UTF8_STRING(dpy);
286    else
287	proto.encoding = XA_STRING;
288    proto.format = 8;
289    if (nbytes)
290	proto.nitems = nbytes - 1;
291    else
292	proto.nitems = 0;
293    proto.value = NULL;
294
295    if (nbytes > 0) {
296	char *buf = TypeMallocN(char, nbytes);
297	if (!buf)
298	    return XNoMemory;
299
300	proto.value = (unsigned char *) buf;
301	for (i = 0; i < count; i++) {
302	    char *arg = list[i];
303
304	    if (arg) {
305		if (style == XUTF8StringStyle) {
306		    strcpy(buf, arg);
307		} else {
308		    utf8l1strcpy(buf, arg);
309		}
310		buf += (strlen(buf) + 1);
311	    } else {
312		*buf++ = '\0';
313	    }
314	}
315    } else {
316	proto.value = CastMalloc(unsigned char);	/* easier for client */
317	if (!proto.value)
318	    return XNoMemory;
319
320	proto.value[0] = '\0';
321    }
322
323    *text_prop = proto;
324    return 0;
325}
326
327int
328Xutf8LookupString(XIC ic GCC_UNUSED,
329		  XKeyEvent *ev,
330		  char *buffer,
331		  int nbytes,
332		  KeySym * keysym_return,
333		  Status * status_return)
334{
335    int rc;
336    KeySym keysym;
337    int codepoint;
338    size_t len;
339
340    rc = XLookupString(ev, buffer, nbytes, &keysym, NULL);
341
342    if (rc > 0) {
343	codepoint = buffer[0] & 0xFF;
344    } else {
345	codepoint = keysym2ucs(keysym);
346    }
347
348    if (codepoint < 0) {
349	if (keysym == None) {
350	    *status_return = XLookupNone;
351	} else {
352	    *status_return = XLookupKeySym;
353	    *keysym_return = keysym;
354	}
355	return 0;
356    }
357
358    if (nbytes < utf8countBytes(codepoint)) {
359	*status_return = XBufferOverflow;
360	return utf8countBytes(codepoint);
361    }
362
363    utf8insert(buffer, codepoint, &len);
364
365    if (keysym != None) {
366	*keysym_return = keysym;
367	*status_return = XLookupBoth;
368    } else {
369	*status_return = XLookupChars;
370    }
371    return (int) len;
372}
373
374#else /* X_HAVE_UTF8_STRING */
375/* Silence the compiler */
376void
377xutf8_dummy(void)
378{
379    return;
380}
381#endif
382