15104ee6eSmrg/* $XTermId: ptydata.c,v 1.163 2024/12/01 23:48:07 tom Exp $ */
2d522f475Smrg
30bd37d32Smrg/*
404b94745Smrg * Copyright 1999-2023,2024 by Thomas E. Dickey
50bd37d32Smrg *
60bd37d32Smrg *                         All Rights Reserved
70bd37d32Smrg *
80bd37d32Smrg * Permission is hereby granted, free of charge, to any person obtaining a
90bd37d32Smrg * copy of this software and associated documentation files (the
100bd37d32Smrg * "Software"), to deal in the Software without restriction, including
110bd37d32Smrg * without limitation the rights to use, copy, modify, merge, publish,
120bd37d32Smrg * distribute, sublicense, and/or sell copies of the Software, and to
130bd37d32Smrg * permit persons to whom the Software is furnished to do so, subject to
140bd37d32Smrg * the following conditions:
150bd37d32Smrg *
160bd37d32Smrg * The above copyright notice and this permission notice shall be included
170bd37d32Smrg * in all copies or substantial portions of the Software.
180bd37d32Smrg *
190bd37d32Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
200bd37d32Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
210bd37d32Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
220bd37d32Smrg * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
230bd37d32Smrg * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
240bd37d32Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
250bd37d32Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
260bd37d32Smrg *
270bd37d32Smrg * Except as contained in this notice, the name(s) of the above copyright
280bd37d32Smrg * holders shall not be used in advertising or otherwise to promote the
290bd37d32Smrg * sale, use or other dealings in this Software without prior written
300bd37d32Smrg * authorization.
310bd37d32Smrg */
32d522f475Smrg
33d522f475Smrg#include <data.h>
34d522f475Smrg
35d522f475Smrg#if OPT_WIDE_CHARS
36d522f475Smrg#include <menu.h>
37913cc679Smrg#include <wcwidth.h>
38d522f475Smrg#endif
39d522f475Smrg
40f2e35a3aSmrg#ifdef TEST_DRIVER
41f2e35a3aSmrg#undef TRACE
42f2e35a3aSmrg#define TRACE(p) if (1) printf p
43f2e35a3aSmrg#undef TRACE2
44f2e35a3aSmrg#define TRACE2(p) if (0) printf p
45f2e35a3aSmrg#define visibleChars(buf, len) "buffer"
46f2e35a3aSmrg#endif
47f2e35a3aSmrg
48d522f475Smrg/*
49d522f475Smrg * Check for both EAGAIN and EWOULDBLOCK, because some supposedly POSIX
50d522f475Smrg * systems are broken and return EWOULDBLOCK when they should return EAGAIN.
51d522f475Smrg * Note that this macro may evaluate its argument more than once.
52d522f475Smrg */
53d522f475Smrg#if defined(EAGAIN) && defined(EWOULDBLOCK)
54d522f475Smrg#define E_TEST(err) ((err) == EAGAIN || (err) == EWOULDBLOCK)
55d522f475Smrg#else
56d522f475Smrg#ifdef EAGAIN
57d522f475Smrg#define E_TEST(err) ((err) == EAGAIN)
58d522f475Smrg#else
59d522f475Smrg#define E_TEST(err) ((err) == EWOULDBLOCK)
60d522f475Smrg#endif
61d522f475Smrg#endif
62d522f475Smrg
63d522f475Smrg#if OPT_WIDE_CHARS
64d522f475Smrg/*
65d522f475Smrg * Convert the 8-bit codes in data->buffer[] into Unicode in data->utf_data.
66d522f475Smrg * The number of bytes converted will be nonzero iff there is data.
67d522f475Smrg */
68d522f475SmrgBool
69894e0ac8SmrgdecodeUtf8(TScreen *screen, PtyData *data)
70d522f475Smrg{
715307cd1aSmrg    size_t i;
725307cd1aSmrg    size_t length = (size_t) (data->last - data->next);
73d522f475Smrg    int utf_count = 0;
74956cc18dSsnj    unsigned utf_char = 0;
75d522f475Smrg
76d522f475Smrg    data->utf_size = 0;
77d522f475Smrg    for (i = 0; i < length; i++) {
78d522f475Smrg	unsigned c = data->next[i];
79d522f475Smrg
80d522f475Smrg	/* Combine UTF-8 into Unicode */
81d522f475Smrg	if (c < 0x80) {
82d522f475Smrg	    /* We received an ASCII character */
83d522f475Smrg	    if (utf_count > 0) {
84d522f475Smrg		data->utf_data = UCS_REPL;	/* prev. sequence incomplete */
85a1f3da82Smrg		data->utf_size = i;
86d522f475Smrg	    } else {
87956cc18dSsnj		data->utf_data = (IChar) c;
88d522f475Smrg		data->utf_size = 1;
89d522f475Smrg	    }
90d522f475Smrg	    break;
91f2e35a3aSmrg	} else if (screen->vt100_graphics
92f2e35a3aSmrg		   && (c < 0x100)
93f2e35a3aSmrg		   && (utf_count == 0)
94f2e35a3aSmrg		   && screen->gsets[(int) screen->curgr] != nrc_ASCII) {
95f2e35a3aSmrg	    data->utf_data = (IChar) c;
96f2e35a3aSmrg	    data->utf_size = 1;
97f2e35a3aSmrg	    break;
98d522f475Smrg	} else if (c < 0xc0) {
99d522f475Smrg	    /* We received a continuation byte */
100d522f475Smrg	    if (utf_count < 1) {
10104b94745Smrg		if (screen->c1_printable) {
10204b94745Smrg		    data->utf_data = (IChar) c;
10304b94745Smrg		} else if ((i + 1) < length
10404b94745Smrg			   && data->next[i + 1] > 0x20
10504b94745Smrg			   && data->next[i + 1] < 0x80) {
10604b94745Smrg		    /*
10704b94745Smrg		     * Allow for C1 control string if the next byte is
10804b94745Smrg		     * available for inspection.
10904b94745Smrg		     */
11004b94745Smrg		    data->utf_data = (IChar) c;
11104b94745Smrg		} else {
11204b94745Smrg		    /*
11304b94745Smrg		     * We received a continuation byte before receiving a
11404b94745Smrg		     * sequence state, or a failed attempt to use a C1 control
11504b94745Smrg		     * string.
11604b94745Smrg		     */
11704b94745Smrg		    data->utf_data = (IChar) UCS_REPL;
11804b94745Smrg		}
119d522f475Smrg		data->utf_size = (i + 1);
120d522f475Smrg		break;
121f2e35a3aSmrg	    } else if (screen->utf8_weblike
122f2e35a3aSmrg		       && (utf_count == 3
123f2e35a3aSmrg			   && utf_char == 0x04
124f2e35a3aSmrg			   && c >= 0x90)) {
125f2e35a3aSmrg		/* The encoding would form a code point beyond U+10FFFF. */
126f2e35a3aSmrg		data->utf_size = i;
127f2e35a3aSmrg		data->utf_data = UCS_REPL;
128f2e35a3aSmrg		break;
129f2e35a3aSmrg	    } else if (screen->utf8_weblike
130f2e35a3aSmrg		       && (utf_count == 2
131f2e35a3aSmrg			   && utf_char == 0x0d
132f2e35a3aSmrg			   && c >= 0xa0)) {
133f2e35a3aSmrg		/* The encoding would form a surrogate code point. */
134f2e35a3aSmrg		data->utf_size = i;
135f2e35a3aSmrg		data->utf_data = UCS_REPL;
136f2e35a3aSmrg		break;
137d522f475Smrg	    } else {
138d522f475Smrg		/* Check for overlong UTF-8 sequences for which a shorter
139d522f475Smrg		 * encoding would exist and replace them with UCS_REPL.
140d522f475Smrg		 * An overlong UTF-8 sequence can have any of the following
141d522f475Smrg		 * forms:
142d522f475Smrg		 *   1100000x 10xxxxxx
143d522f475Smrg		 *   11100000 100xxxxx 10xxxxxx
144d522f475Smrg		 *   11110000 1000xxxx 10xxxxxx 10xxxxxx
145d522f475Smrg		 *   11111000 10000xxx 10xxxxxx 10xxxxxx 10xxxxxx
146d522f475Smrg		 *   11111100 100000xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
147d522f475Smrg		 */
148d522f475Smrg		if (!utf_char && !((c & 0x7f) >> (7 - utf_count))) {
149f2e35a3aSmrg		    if (screen->utf8_weblike) {
150f2e35a3aSmrg			/* overlong sequence continued */
151f2e35a3aSmrg			data->utf_data = UCS_REPL;
152f2e35a3aSmrg			data->utf_size = i;
153f2e35a3aSmrg			break;
154f2e35a3aSmrg		    } else {
155f2e35a3aSmrg			utf_char = UCS_REPL;
156f2e35a3aSmrg		    }
157d522f475Smrg		}
158d522f475Smrg		utf_char <<= 6;
159d522f475Smrg		utf_char |= (c & 0x3f);
160d522f475Smrg		if ((utf_char >= 0xd800 &&
161d522f475Smrg		     utf_char <= 0xdfff) ||
162d522f475Smrg		    (utf_char == 0xfffe) ||
163d522f475Smrg		    (utf_char == HIDDEN_CHAR)) {
164d522f475Smrg		    utf_char = UCS_REPL;
165d522f475Smrg		}
166d522f475Smrg		utf_count--;
167d522f475Smrg		if (utf_count == 0) {
168956cc18dSsnj#if !OPT_WIDER_ICHAR
169d522f475Smrg		    /* characters outside UCS-2 become UCS_REPL */
170f2e35a3aSmrg		    if (utf_char > NARROW_ICHAR) {
171d522f475Smrg			TRACE(("using replacement for %#x\n", utf_char));
172d522f475Smrg			utf_char = UCS_REPL;
173d522f475Smrg		    }
174956cc18dSsnj#endif
175956cc18dSsnj		    data->utf_data = (IChar) utf_char;
176d522f475Smrg		    data->utf_size = (i + 1);
177d522f475Smrg		    break;
178d522f475Smrg		}
179d522f475Smrg	    }
180d522f475Smrg	} else {
181d522f475Smrg	    /* We received a sequence start byte */
182d522f475Smrg	    if (utf_count > 0) {
183f2e35a3aSmrg		/* previous sequence is incomplete */
184f2e35a3aSmrg		data->utf_data = UCS_REPL;
185f2e35a3aSmrg		data->utf_size = i;
186d522f475Smrg		break;
187d522f475Smrg	    }
188f2e35a3aSmrg	    if (screen->utf8_weblike) {
189f2e35a3aSmrg		if (c < 0xe0) {
190f2e35a3aSmrg		    if (!(c & 0x1e)) {
191f2e35a3aSmrg			/* overlong sequence start */
192f2e35a3aSmrg			data->utf_data = UCS_REPL;
193f2e35a3aSmrg			data->utf_size = (i + 1);
194f2e35a3aSmrg			break;
195f2e35a3aSmrg		    }
196f2e35a3aSmrg		    utf_count = 1;
197f2e35a3aSmrg		    utf_char = (c & 0x1f);
198f2e35a3aSmrg		} else if (c < 0xf0) {
199f2e35a3aSmrg		    utf_count = 2;
200f2e35a3aSmrg		    utf_char = (c & 0x0f);
201f2e35a3aSmrg		} else if (c < 0xf5) {
202f2e35a3aSmrg		    utf_count = 3;
203f2e35a3aSmrg		    utf_char = (c & 0x07);
204f2e35a3aSmrg		} else {
205f2e35a3aSmrg		    data->utf_data = UCS_REPL;
206f2e35a3aSmrg		    data->utf_size = (i + 1);
207f2e35a3aSmrg		    break;
208a1f3da82Smrg		}
209d522f475Smrg	    } else {
210f2e35a3aSmrg		if (c < 0xe0) {
211f2e35a3aSmrg		    utf_count = 1;
212f2e35a3aSmrg		    utf_char = (c & 0x1f);
213f2e35a3aSmrg		    if (!(c & 0x1e)) {
214f2e35a3aSmrg			/* overlong sequence */
215f2e35a3aSmrg			utf_char = UCS_REPL;
216f2e35a3aSmrg		    }
217f2e35a3aSmrg		} else if (c < 0xf0) {
218f2e35a3aSmrg		    utf_count = 2;
219f2e35a3aSmrg		    utf_char = (c & 0x0f);
220f2e35a3aSmrg		} else if (c < 0xf8) {
221f2e35a3aSmrg		    utf_count = 3;
222f2e35a3aSmrg		    utf_char = (c & 0x07);
223f2e35a3aSmrg		} else if (c < 0xfc) {
224f2e35a3aSmrg		    utf_count = 4;
225f2e35a3aSmrg		    utf_char = (c & 0x03);
226f2e35a3aSmrg		} else if (c < 0xfe) {
227f2e35a3aSmrg		    utf_count = 5;
228f2e35a3aSmrg		    utf_char = (c & 0x01);
229f2e35a3aSmrg		} else {
230f2e35a3aSmrg		    data->utf_data = UCS_REPL;
231f2e35a3aSmrg		    data->utf_size = (i + 1);
232f2e35a3aSmrg		    break;
233f2e35a3aSmrg		}
234d522f475Smrg	    }
235d522f475Smrg	}
236d522f475Smrg    }
237d522f475Smrg#if OPT_TRACE > 1
23804b94745Smrg    TRACE(("UTF-8 char %04X [%lu..%lu]\n",
239d522f475Smrg	   data->utf_data,
24004b94745Smrg	   (unsigned long) (data->next - data->buffer),
24104b94745Smrg	   (unsigned long) (data->next - data->buffer + data->utf_size - 1)));
242d522f475Smrg#endif
243d522f475Smrg
244d522f475Smrg    return (data->utf_size != 0);
245d522f475Smrg}
246d522f475Smrg#endif
247d522f475Smrg
248d522f475Smrgint
249894e0ac8SmrgreadPtyData(XtermWidget xw, PtySelect * select_mask, PtyData *data)
250d522f475Smrg{
25120d2c4d2Smrg    TScreen *screen = TScreenOf(xw);
252d522f475Smrg    int size = 0;
253d522f475Smrg
254d522f475Smrg    if (FD_ISSET(screen->respond, select_mask)) {
255956cc18dSsnj	int save_err;
25620d2c4d2Smrg	trimPtyData(xw, data);
257d522f475Smrg
25820d2c4d2Smrg	size = (int) read(screen->respond, (char *) data->last, (size_t) FRG_SIZE);
259956cc18dSsnj	save_err = errno;
260d522f475Smrg#if (defined(i386) && defined(SVR4) && defined(sun)) || defined(__CYGWIN__)
261956cc18dSsnj	/*
262956cc18dSsnj	 * Yes, I know this is a majorly f*ugly hack, however it seems to
263956cc18dSsnj	 * be necessary for Solaris x86.  DWH 11/15/94
264956cc18dSsnj	 * Dunno why though..
265956cc18dSsnj	 * (and now CYGWIN, alanh@xfree86.org 08/15/01
266956cc18dSsnj	 */
267956cc18dSsnj	if (size <= 0) {
268956cc18dSsnj	    if (save_err == EIO || save_err == 0)
2690bd37d32Smrg		NormalExit();
270956cc18dSsnj	    else if (!E_TEST(save_err))
271956cc18dSsnj		Panic("input: read returned unexpected error (%d)\n", save_err);
272956cc18dSsnj	    size = 0;
273956cc18dSsnj	}
274956cc18dSsnj#else /* !f*ugly */
275956cc18dSsnj	if (size < 0) {
276956cc18dSsnj	    if (save_err == EIO)
2770bd37d32Smrg		NormalExit();
278956cc18dSsnj	    else if (!E_TEST(save_err))
279956cc18dSsnj		Panic("input: read returned unexpected error (%d)\n", save_err);
280d522f475Smrg	    size = 0;
281d522f475Smrg	} else if (size == 0) {
2820bd37d32Smrg#if defined(__FreeBSD__)
2830bd37d32Smrg	    NormalExit();
284d522f475Smrg#else
285d522f475Smrg	    Panic("input: read returned zero\n", 0);
286d522f475Smrg#endif
287d522f475Smrg	}
288956cc18dSsnj#endif /* f*ugly */
289d522f475Smrg    }
290d522f475Smrg
291d522f475Smrg    if (size) {
292d522f475Smrg#if OPT_TRACE
293d522f475Smrg	int i;
294d522f475Smrg
295d522f475Smrg	TRACE(("read %d bytes from pty\n", size));
296d522f475Smrg	for (i = 0; i < size; i++) {
297d522f475Smrg	    if (!(i % 16))
298d522f475Smrg		TRACE(("%s", i ? "\n    " : "READ"));
299d522f475Smrg	    TRACE((" %02X", data->last[i]));
300d522f475Smrg	}
301d522f475Smrg	TRACE(("\n"));
302d522f475Smrg#endif
303d522f475Smrg	data->last += size;
304d522f475Smrg#ifdef ALLOWLOGGING
30520d2c4d2Smrg	TScreenOf(term)->logstart = VTbuffer->next;
306d522f475Smrg#endif
307d522f475Smrg    }
308d522f475Smrg
309d522f475Smrg    return (size);
310d522f475Smrg}
311d522f475Smrg
312d522f475Smrg/*
313d522f475Smrg * Return the next value from the input buffer.  Note that morePtyData() is
314d522f475Smrg * always called before this function, so we can do the UTF-8 input conversion
315d522f475Smrg * in that function and simply return the result here.
316d522f475Smrg */
317d522f475Smrg#if OPT_WIDE_CHARS
318d522f475SmrgIChar
319894e0ac8SmrgnextPtyData(TScreen *screen, PtyData *data)
320d522f475Smrg{
321d522f475Smrg    IChar result;
322d522f475Smrg    if (screen->utf8_inparse) {
323f2e35a3aSmrg	skipPtyData(data, result);
324d522f475Smrg    } else {
325d522f475Smrg	result = *((data)->next++);
326956cc18dSsnj	if (!screen->output_eight_bits) {
327956cc18dSsnj	    result = (IChar) (result & 0x7f);
328956cc18dSsnj	}
329d522f475Smrg    }
330d522f475Smrg    TRACE2(("nextPtyData returns %#x\n", result));
331d522f475Smrg    return result;
332d522f475Smrg}
333d522f475Smrg#endif
334d522f475Smrg
335d522f475Smrg#if OPT_WIDE_CHARS
336d522f475Smrg/*
337d522f475Smrg * Called when UTF-8 mode has been turned on/off.
338d522f475Smrg */
339d522f475Smrgvoid
340894e0ac8SmrgswitchPtyData(TScreen *screen, int flag)
341d522f475Smrg{
342d522f475Smrg    if (screen->utf8_mode != flag) {
343d522f475Smrg	screen->utf8_mode = flag;
344956cc18dSsnj	screen->utf8_inparse = (Boolean) (flag != 0);
345913cc679Smrg	mk_wcwidth_init(screen->utf8_mode);
346d522f475Smrg
347d522f475Smrg	TRACE(("turning UTF-8 mode %s\n", BtoS(flag)));
348d522f475Smrg	update_font_utf8_mode();
349d522f475Smrg    }
350d522f475Smrg}
351d522f475Smrg#endif
352d522f475Smrg
353d522f475Smrg/*
354d522f475Smrg * Allocate a buffer.
355d522f475Smrg */
356d522f475Smrgvoid
357894e0ac8SmrginitPtyData(PtyData **result)
358d522f475Smrg{
359d522f475Smrg    PtyData *data;
360d522f475Smrg
361f2e35a3aSmrg    TRACE2(("initPtyData given minBufSize %d, maxBufSize %d\n",
362f2e35a3aSmrg	    FRG_SIZE, BUF_SIZE));
363d522f475Smrg
364d522f475Smrg    if (FRG_SIZE < 64)
365d522f475Smrg	FRG_SIZE = 64;
366d522f475Smrg    if (BUF_SIZE < FRG_SIZE)
367d522f475Smrg	BUF_SIZE = FRG_SIZE;
368d522f475Smrg    if (BUF_SIZE % FRG_SIZE)
369d522f475Smrg	BUF_SIZE = BUF_SIZE + FRG_SIZE - (BUF_SIZE % FRG_SIZE);
370d522f475Smrg
371f2e35a3aSmrg    TRACE2(("initPtyData using minBufSize %d, maxBufSize %d\n",
372f2e35a3aSmrg	    FRG_SIZE, BUF_SIZE));
373d522f475Smrg
374a1f3da82Smrg    data = TypeXtMallocX(PtyData, (BUF_SIZE + FRG_SIZE));
375d522f475Smrg
376d522f475Smrg    memset(data, 0, sizeof(*data));
377d522f475Smrg    data->next = data->buffer;
378d522f475Smrg    data->last = data->buffer;
379d522f475Smrg    *result = data;
380d522f475Smrg}
381d522f475Smrg
382d522f475Smrg/*
38320d2c4d2Smrg * Initialize a buffer for the caller, using its data in 'next'.
384d522f475Smrg */
385d522f475Smrg#if OPT_WIDE_CHARS
386d522f475SmrgPtyData *
387894e0ac8SmrgfakePtyData(PtyData *result, Char *next, Char *last)
388d522f475Smrg{
389d522f475Smrg    PtyData *data = result;
390d522f475Smrg
391d522f475Smrg    memset(data, 0, sizeof(*data));
392d522f475Smrg    data->next = next;
393d522f475Smrg    data->last = last;
394d522f475Smrg
395d522f475Smrg    return data;
396d522f475Smrg}
397d522f475Smrg#endif
398d522f475Smrg
399d522f475Smrg/*
400d522f475Smrg * Remove used data by shifting the buffer down, to make room for more data,
401d522f475Smrg * e.g., a continuation-read.
402d522f475Smrg */
403d522f475Smrgvoid
404f2e35a3aSmrgtrimPtyData(XtermWidget xw, PtyData *data)
405d522f475Smrg{
406f2e35a3aSmrg    (void) xw;
40720d2c4d2Smrg    FlushLog(xw);
408d522f475Smrg
409d522f475Smrg    if (data->next != data->buffer) {
4105307cd1aSmrg	size_t i;
4115307cd1aSmrg	size_t n = (size_t) (data->last - data->next);
412d522f475Smrg
4135307cd1aSmrg	TRACE(("shifting buffer down by %lu\n", (unsigned long) n));
414d522f475Smrg	for (i = 0; i < n; ++i) {
415d522f475Smrg	    data->buffer[i] = data->next[i];
416d522f475Smrg	}
417d522f475Smrg	data->next = data->buffer;
418d522f475Smrg	data->last = data->next + n;
419d522f475Smrg    }
420d522f475Smrg
421d522f475Smrg}
422d522f475Smrg
423d522f475Smrg/*
424d522f475Smrg * Insert new data into the input buffer so the next calls to morePtyData()
425d522f475Smrg * and nextPtyData() will return that.
426d522f475Smrg */
427d522f475Smrgvoid
4285307cd1aSmrgfillPtyData(XtermWidget xw, PtyData *data, const char *value, size_t length)
429d522f475Smrg{
4305307cd1aSmrg    size_t size;
4315307cd1aSmrg    size_t n;
432d522f475Smrg
433d522f475Smrg    /* remove the used portion of the buffer */
43420d2c4d2Smrg    trimPtyData(xw, data);
435d522f475Smrg
436d522f475Smrg    VTbuffer->last += length;
4375307cd1aSmrg    size = (size_t) (VTbuffer->last - VTbuffer->next);
438d522f475Smrg
439d522f475Smrg    /* shift the unused portion up to make room */
440d522f475Smrg    for (n = size; n >= length; --n)
441d522f475Smrg	VTbuffer->next[n] = VTbuffer->next[n - length];
442d522f475Smrg
443d522f475Smrg    /* insert the new bytes to interpret */
444d522f475Smrg    for (n = 0; n < length; n++)
445d522f475Smrg	VTbuffer->next[n] = CharOf(value[n]);
446d522f475Smrg}
447d522f475Smrg
448d522f475Smrg#if OPT_WIDE_CHARS
449f2e35a3aSmrg/*
450f2e35a3aSmrg * Convert an ISO-8859-1 code 'c' to UTF-8, storing the result in the target
451f2e35a3aSmrg * 'lp', and returning a pointer past the converted character.
452f2e35a3aSmrg */
453d522f475SmrgChar *
454894e0ac8SmrgconvertToUTF8(Char *lp, unsigned c)
455d522f475Smrg{
45620d2c4d2Smrg#define CH(n) (Char)((c) >> ((n) * 8))
45720d2c4d2Smrg    if (c < 0x80) {
45820d2c4d2Smrg	/*  0*******  */
45920d2c4d2Smrg	*lp++ = (Char) CH(0);
46020d2c4d2Smrg    } else if (c < 0x800) {
46120d2c4d2Smrg	/*  110***** 10******  */
46220d2c4d2Smrg	*lp++ = (Char) (0xc0 | (CH(0) >> 6) | ((CH(1) & 0x07) << 2));
46320d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
46420d2c4d2Smrg    } else if (c < 0x00010000) {
46520d2c4d2Smrg	/*  1110**** 10****** 10******  */
46620d2c4d2Smrg	*lp++ = (Char) (0xe0 | ((int) (CH(1) & 0xf0) >> 4));
46720d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
46820d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
46920d2c4d2Smrg    } else if (c < 0x00200000) {
47020d2c4d2Smrg	*lp++ = (Char) (0xf0 | ((int) (CH(2) & 0x1f) >> 2));
47120d2c4d2Smrg	*lp++ = (Char) (0x80 |
47220d2c4d2Smrg			((int) (CH(1) & 0xf0) >> 4) |
47320d2c4d2Smrg			((int) (CH(2) & 0x03) << 4));
47420d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
47520d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
47620d2c4d2Smrg    } else if (c < 0x04000000) {
47720d2c4d2Smrg	*lp++ = (Char) (0xf8 | (CH(3) & 0x03));
47820d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(2) >> 2));
47920d2c4d2Smrg	*lp++ = (Char) (0x80 |
48020d2c4d2Smrg			((int) (CH(1) & 0xf0) >> 4) |
48120d2c4d2Smrg			((int) (CH(2) & 0x03) << 4));
48220d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
48320d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
48420d2c4d2Smrg    } else {
48520d2c4d2Smrg	*lp++ = (Char) (0xfc | ((int) (CH(3) & 0x40) >> 6));
48620d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(3) & 0x3f));
48720d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(2) >> 2));
48820d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(1) >> 4) | ((CH(2) & 0x03) << 4));
48920d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
49020d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
491d522f475Smrg    }
492d522f475Smrg    return lp;
49320d2c4d2Smrg#undef CH
494d522f475Smrg}
495d522f475Smrg
496f2e35a3aSmrg/*
497f2e35a3aSmrg * Convert a UTF-8 multibyte character to an Unicode value, returning a pointer
498f2e35a3aSmrg * past the converted UTF-8 input.  The first 256 values align with ISO-8859-1,
499f2e35a3aSmrg * making it possible to use this to convert to Latin-1.
500f2e35a3aSmrg *
501f2e35a3aSmrg * If the conversion fails, return null.
502f2e35a3aSmrg */
503f2e35a3aSmrgChar *
504f2e35a3aSmrgconvertFromUTF8(Char *lp, unsigned *cp)
505f2e35a3aSmrg{
506f2e35a3aSmrg    int want;
507f2e35a3aSmrg
508f2e35a3aSmrg    /*
509f2e35a3aSmrg     * Find the number of bytes we will need from the source.
510f2e35a3aSmrg     */
511f2e35a3aSmrg    if ((*lp & 0x80) == 0) {
512f2e35a3aSmrg	want = 1;
513f2e35a3aSmrg    } else if ((*lp & 0xe0) == 0xc0) {
514f2e35a3aSmrg	want = 2;
515f2e35a3aSmrg    } else if ((*lp & 0xf0) == 0xe0) {
516f2e35a3aSmrg	want = 3;
517f2e35a3aSmrg    } else if ((*lp & 0xf8) == 0xf0) {
518f2e35a3aSmrg	want = 4;
519f2e35a3aSmrg    } else if ((*lp & 0xfc) == 0xf8) {
520f2e35a3aSmrg	want = 5;
521f2e35a3aSmrg    } else if ((*lp & 0xfe) == 0xfc) {
522f2e35a3aSmrg	want = 6;
523f2e35a3aSmrg    } else {
524f2e35a3aSmrg	want = 0;
525f2e35a3aSmrg    }
526f2e35a3aSmrg
527f2e35a3aSmrg    if (want) {
528f2e35a3aSmrg	int have = 1;
529f2e35a3aSmrg
530f2e35a3aSmrg	while (lp[have] != '\0') {
531f2e35a3aSmrg	    if ((lp[have] & 0xc0) != 0x80)
532f2e35a3aSmrg		break;
533f2e35a3aSmrg	    ++have;
534f2e35a3aSmrg	}
535f2e35a3aSmrg	if (want == have) {
536f2e35a3aSmrg	    unsigned mask = 0;
537f2e35a3aSmrg	    int j;
538f2e35a3aSmrg	    int shift = 0;
539f2e35a3aSmrg
540f2e35a3aSmrg	    *cp = 0;
541f2e35a3aSmrg	    switch (want) {
542f2e35a3aSmrg	    case 1:
543f2e35a3aSmrg		mask = (*lp);
544f2e35a3aSmrg		break;
545f2e35a3aSmrg	    case 2:
546f2e35a3aSmrg		mask = (*lp & 0x1f);
547f2e35a3aSmrg		break;
548f2e35a3aSmrg	    case 3:
549f2e35a3aSmrg		mask = (*lp & 0x0f);
550f2e35a3aSmrg		break;
551f2e35a3aSmrg	    case 4:
552f2e35a3aSmrg		mask = (*lp & 0x07);
553f2e35a3aSmrg		break;
554f2e35a3aSmrg	    case 5:
555f2e35a3aSmrg		mask = (*lp & 0x03);
556f2e35a3aSmrg		break;
557f2e35a3aSmrg	    case 6:
558f2e35a3aSmrg		mask = (*lp & 0x01);
559f2e35a3aSmrg		break;
560f2e35a3aSmrg	    default:
561f2e35a3aSmrg		mask = 0;
562f2e35a3aSmrg		break;
563f2e35a3aSmrg	    }
564f2e35a3aSmrg
565f2e35a3aSmrg	    for (j = 1; j < want; j++) {
566f2e35a3aSmrg		*cp |= (unsigned) ((lp[want - j] & 0x3f) << shift);
567f2e35a3aSmrg		shift += 6;
568f2e35a3aSmrg	    }
569f2e35a3aSmrg	    *cp |= mask << shift;
570f2e35a3aSmrg	    lp += want;
571f2e35a3aSmrg	} else {
572f2e35a3aSmrg	    *cp = BAD_ASCII;
573f2e35a3aSmrg	    lp = NULL;
574f2e35a3aSmrg	}
575f2e35a3aSmrg    } else {
576f2e35a3aSmrg	*cp = BAD_ASCII;
577f2e35a3aSmrg	lp = NULL;
578f2e35a3aSmrg    }
579f2e35a3aSmrg    return lp;
580f2e35a3aSmrg}
581f2e35a3aSmrg
582f2e35a3aSmrg/*
583f2e35a3aSmrg * Returns true if the entire string is valid UTF-8.
584f2e35a3aSmrg */
585f2e35a3aSmrgBoolean
586f2e35a3aSmrgisValidUTF8(Char *lp)
587f2e35a3aSmrg{
588f2e35a3aSmrg    Boolean result = True;
589f2e35a3aSmrg    while (*lp) {
590f2e35a3aSmrg	unsigned ch;
591f2e35a3aSmrg	Char *next = convertFromUTF8(lp, &ch);
592f2e35a3aSmrg	if (next == NULL || ch == 0) {
593f2e35a3aSmrg	    result = False;
594f2e35a3aSmrg	    break;
595f2e35a3aSmrg	}
596f2e35a3aSmrg	lp = next;
597f2e35a3aSmrg    }
598f2e35a3aSmrg    return result;
599f2e35a3aSmrg}
600f2e35a3aSmrg
601d522f475Smrg/*
602d522f475Smrg * Write data back to the PTY
603d522f475Smrg */
604d522f475Smrgvoid
6055307cd1aSmrgwritePtyData(int f, IChar *d, size_t len)
606d522f475Smrg{
6075307cd1aSmrg    size_t n = (len << 1);
608d522f475Smrg
609d522f475Smrg    if (VTbuffer->write_len <= len) {
610d522f475Smrg	VTbuffer->write_len = n;
6115307cd1aSmrg	VTbuffer->write_buf = realloc(VTbuffer->write_buf, VTbuffer->write_len);
612d522f475Smrg    }
613d522f475Smrg
614d522f475Smrg    for (n = 0; n < len; n++)
615956cc18dSsnj	VTbuffer->write_buf[n] = (Char) d[n];
616d522f475Smrg
6175307cd1aSmrg    TRACE(("writePtyData %lu:%s\n", (unsigned long) n,
618956cc18dSsnj	   visibleChars(VTbuffer->write_buf, n)));
619d522f475Smrg    v_write(f, VTbuffer->write_buf, n);
620d522f475Smrg}
621d522f475Smrg#endif /* OPT_WIDE_CHARS */
622d522f475Smrg
623d522f475Smrg#ifdef NO_LEAKS
624d522f475Smrgvoid
625d522f475Smrgnoleaks_ptydata(void)
626d522f475Smrg{
6275104ee6eSmrg    if (VTbuffer != NULL) {
628d522f475Smrg#if OPT_WIDE_CHARS
629f2e35a3aSmrg	free(VTbuffer->write_buf);
630f2e35a3aSmrg#endif
631f2e35a3aSmrg	FreeAndNull(VTbuffer);
632f2e35a3aSmrg    }
633f2e35a3aSmrg}
634f2e35a3aSmrg#endif
635f2e35a3aSmrg
636f2e35a3aSmrg#ifdef TEST_DRIVER
637f2e35a3aSmrg
638f2e35a3aSmrg#include "data.c"
639f2e35a3aSmrg
640f2e35a3aSmrgvoid
641f2e35a3aSmrgNormalExit(void)
642f2e35a3aSmrg{
643f2e35a3aSmrg    fprintf(stderr, "NormalExit!\n");
644f2e35a3aSmrg    exit(EXIT_SUCCESS);
645f2e35a3aSmrg}
646f2e35a3aSmrg
647f2e35a3aSmrgvoid
648f2e35a3aSmrgPanic(const char *s, int a)
649f2e35a3aSmrg{
650f2e35a3aSmrg    (void) s;
651f2e35a3aSmrg    (void) a;
652f2e35a3aSmrg    fprintf(stderr, "Panic!\n");
653f2e35a3aSmrg    exit(EXIT_FAILURE);
654f2e35a3aSmrg}
655f2e35a3aSmrg
656f2e35a3aSmrg#if OPT_WIDE_CHARS
657f2e35a3aSmrg
658f2e35a3aSmrg#ifdef ALLOWLOGGING
659f2e35a3aSmrgvoid
660f2e35a3aSmrgFlushLog(XtermWidget xw)
661f2e35a3aSmrg{
662f2e35a3aSmrg    (void) xw;
663f2e35a3aSmrg}
664d522f475Smrg#endif
665f2e35a3aSmrg
666f2e35a3aSmrgvoid
6675307cd1aSmrgv_write(int f, const Char *data, size_t len)
668f2e35a3aSmrg{
669f2e35a3aSmrg    (void) f;
670f2e35a3aSmrg    (void) data;
671f2e35a3aSmrg    (void) len;
672f2e35a3aSmrg}
673f2e35a3aSmrg
674f2e35a3aSmrgvoid
675f2e35a3aSmrgmk_wcwidth_init(int mode)
676f2e35a3aSmrg{
677f2e35a3aSmrg    (void) mode;
678f2e35a3aSmrg}
679f2e35a3aSmrg
680f2e35a3aSmrgvoid
681f2e35a3aSmrgupdate_font_utf8_mode(void)
682f2e35a3aSmrg{
683f2e35a3aSmrg}
684f2e35a3aSmrg
685f2e35a3aSmrgstatic int message_level = 0;
686f2e35a3aSmrgstatic int opt_all = 0;
687f2e35a3aSmrgstatic int opt_illegal = 0;
688f2e35a3aSmrgstatic int opt_convert = 0;
689f2e35a3aSmrgstatic int opt_reverse = 0;
690f2e35a3aSmrgstatic long total_test = 0;
691f2e35a3aSmrgstatic long total_errs = 0;
692f2e35a3aSmrg
693f2e35a3aSmrgstatic void
694f2e35a3aSmrgusage(void)
695f2e35a3aSmrg{
696f2e35a3aSmrg    static const char *msg[] =
697f2e35a3aSmrg    {
698f2e35a3aSmrg	"Usage: test_ptydata [options] [c1[-c1b] [c2-[c2b] [...]]]",
699f2e35a3aSmrg	"",
700f2e35a3aSmrg	"Options:",
701f2e35a3aSmrg	" -a  exercise all legal encode/decode to/from UTF-8",
702f2e35a3aSmrg	" -c  call convertFromUTF8 rather than decodeUTF8",
703f2e35a3aSmrg	" -i  ignore illegal UTF-8 when testing -r option",
704f2e35a3aSmrg	" -q  quieter",
705f2e35a3aSmrg	" -r  reverse/decode from UTF-8 byte-string to/from Unicode",
706f2e35a3aSmrg	" -v  more verbose"
707f2e35a3aSmrg    };
708f2e35a3aSmrg    size_t n;
709f2e35a3aSmrg    for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) {
710f2e35a3aSmrg	fprintf(stderr, "%s\n", msg[n]);
711f2e35a3aSmrg    }
712f2e35a3aSmrg    exit(EXIT_FAILURE);
713f2e35a3aSmrg}
714f2e35a3aSmrg
715f2e35a3aSmrg/*
716f2e35a3aSmrg * http://www.unicode.org/versions/corrigendum1.html, table 3.1B
717f2e35a3aSmrg */
718f2e35a3aSmrg#define OkRange(n,lo,hi) \
719f2e35a3aSmrg 	if (value[n] < lo || value[n] > hi) { \
720f2e35a3aSmrg	    result = False; \
721f2e35a3aSmrg	    break; \
722f2e35a3aSmrg	}
723f2e35a3aSmrgstatic Bool
724f2e35a3aSmrgis_legal_utf8(const Char *value)
725f2e35a3aSmrg{
726f2e35a3aSmrg    Bool result = True;
727f2e35a3aSmrg    Char ch;
728f2e35a3aSmrg    while ((ch = *value) != '\0') {
729f2e35a3aSmrg	if (ch <= 0x7f) {
730f2e35a3aSmrg	    ++value;
731f2e35a3aSmrg	} else if (ch >= 0xc2 && ch <= 0xdf) {
732f2e35a3aSmrg	    OkRange(1, 0x80, 0xbf);
733f2e35a3aSmrg	    value += 2;
734f2e35a3aSmrg	} else if (ch == 0xe0) {
735f2e35a3aSmrg	    OkRange(1, 0xa0, 0xbf);
736f2e35a3aSmrg	    OkRange(2, 0x80, 0xbf);
737f2e35a3aSmrg	    value += 3;
738f2e35a3aSmrg	} else if (ch >= 0xe1 && ch <= 0xef) {
739f2e35a3aSmrg	    OkRange(1, 0x80, 0xbf);
740f2e35a3aSmrg	    OkRange(2, 0x80, 0xbf);
741f2e35a3aSmrg	    value += 3;
742f2e35a3aSmrg	} else if (ch == 0xf0) {
743f2e35a3aSmrg	    OkRange(1, 0x90, 0xbf);
744f2e35a3aSmrg	    OkRange(2, 0x80, 0xbf);
745f2e35a3aSmrg	    OkRange(3, 0x80, 0xbf);
746f2e35a3aSmrg	    value += 4;
747f2e35a3aSmrg	} else if (ch >= 0xf1 && ch <= 0xf3) {
748f2e35a3aSmrg	    OkRange(1, 0x80, 0xbf);
749f2e35a3aSmrg	    OkRange(2, 0x80, 0xbf);
750f2e35a3aSmrg	    OkRange(3, 0x80, 0xbf);
751f2e35a3aSmrg	    value += 4;
752f2e35a3aSmrg	} else if (ch == 0xf4) {
753f2e35a3aSmrg	    OkRange(1, 0x80, 0x8f);
754f2e35a3aSmrg	    OkRange(2, 0x80, 0xbf);
755f2e35a3aSmrg	    OkRange(3, 0x80, 0xbf);
756f2e35a3aSmrg	    value += 4;
757f2e35a3aSmrg	} else {
758f2e35a3aSmrg	    result = False;
759f2e35a3aSmrg	    break;
760f2e35a3aSmrg	}
761f2e35a3aSmrg    }
762f2e35a3aSmrg    return result;
763f2e35a3aSmrg}
764f2e35a3aSmrg
765f2e35a3aSmrgstatic void
766f2e35a3aSmrgtest_utf8_convert(void)
767f2e35a3aSmrg{
768f2e35a3aSmrg    unsigned c_in, c_out;
769f2e35a3aSmrg    Char buffer[10];
770f2e35a3aSmrg    Char *result;
771f2e35a3aSmrg    unsigned limit = 0x110000;
772f2e35a3aSmrg    unsigned success = 0;
773f2e35a3aSmrg    unsigned bucket[256];
774f2e35a3aSmrg
775f2e35a3aSmrg    memset(bucket, 0, sizeof(bucket));
776f2e35a3aSmrg    for (c_in = 0; c_in < limit; ++c_in) {
777f2e35a3aSmrg	memset(buffer, 0, sizeof(buffer));
7785104ee6eSmrg	if ((result = convertToUTF8(buffer, c_in)) == NULL) {
779f2e35a3aSmrg	    TRACE(("conversion of U+%04X to UTF-8 failed\n", c_in));
780f2e35a3aSmrg	} else {
7815104ee6eSmrg	    if ((result = convertFromUTF8(buffer, &c_out)) == NULL) {
782f2e35a3aSmrg		TRACE(("conversion of U+%04X from UTF-8 failed\n", c_in));
783f2e35a3aSmrg	    } else if (c_in != c_out) {
784f2e35a3aSmrg		TRACE(("conversion of U+%04X to/from UTF-8 gave U+%04X\n",
785f2e35a3aSmrg		       c_in, c_out));
786f2e35a3aSmrg	    } else {
787f2e35a3aSmrg		while (result-- != buffer) {
788f2e35a3aSmrg		    bucket[*result]++;
789f2e35a3aSmrg		}
790f2e35a3aSmrg		++success;
791f2e35a3aSmrg	    }
792f2e35a3aSmrg	}
793f2e35a3aSmrg    }
794f2e35a3aSmrg    TRACE(("%u/%u successful\n", success, limit));
795f2e35a3aSmrg    for (c_in = 0; c_in < 256; ++c_in) {
796f2e35a3aSmrg	if ((c_in % 8) == 0) {
797f2e35a3aSmrg	    TRACE((" %02X:", c_in));
798f2e35a3aSmrg	}
799f2e35a3aSmrg	TRACE((" %8X", bucket[c_in]));
800f2e35a3aSmrg	if (((c_in + 1) % 8) == 0) {
801f2e35a3aSmrg	    TRACE(("\n"));
802f2e35a3aSmrg	}
803f2e35a3aSmrg    }
804f2e35a3aSmrg}
805f2e35a3aSmrg
806f2e35a3aSmrgstatic int
807f2e35a3aSmrgdecode_one(const char *source, char **target)
808f2e35a3aSmrg{
809f2e35a3aSmrg    int result = -1;
810f2e35a3aSmrg    long check;
811f2e35a3aSmrg    int radix = 0;
812f2e35a3aSmrg    if ((source[0] == 'u' || source[0] == 'U') && source[1] == '+') {
813f2e35a3aSmrg	source += 2;
814f2e35a3aSmrg	radix = 16;
815f2e35a3aSmrg    } else if (source[0] == '0' && source[1] == 'b') {
816f2e35a3aSmrg	source += 2;
817f2e35a3aSmrg	radix = 2;
818f2e35a3aSmrg    }
819f2e35a3aSmrg    check = strtol(source, target, radix);
820f2e35a3aSmrg    if (*target != NULL && *target != source)
821f2e35a3aSmrg	result = (int) check;
822f2e35a3aSmrg    return result;
823f2e35a3aSmrg}
824f2e35a3aSmrg
825f2e35a3aSmrgstatic int
826f2e35a3aSmrgdecode_range(const char *source, int *lo, int *hi)
827f2e35a3aSmrg{
828f2e35a3aSmrg    int result = 0;
829f2e35a3aSmrg    char *after1;
830f2e35a3aSmrg    char *after2;
831f2e35a3aSmrg    if ((*lo = decode_one(source, &after1)) >= 0) {
832f2e35a3aSmrg	after1 += strspn(after1, ":-.\t ");
833f2e35a3aSmrg	if ((*hi = decode_one(after1, &after2)) < 0) {
834f2e35a3aSmrg	    *hi = *lo;
835f2e35a3aSmrg	}
836f2e35a3aSmrg	result = 1;
837d522f475Smrg    }
838f2e35a3aSmrg    return result;
839d522f475Smrg}
840f2e35a3aSmrg
841f2e35a3aSmrg#define MAX_BYTES 6
842f2e35a3aSmrg
843f2e35a3aSmrgstatic void
844f2e35a3aSmrgdo_range(const char *source)
845f2e35a3aSmrg{
846f2e35a3aSmrg    int lo, hi;
847f2e35a3aSmrg
848f2e35a3aSmrg    TScreen screen;
849f2e35a3aSmrg    memset(&screen, 0, sizeof(screen));
850f2e35a3aSmrg
851f2e35a3aSmrg    if (decode_range(source, &lo, &hi)) {
852f2e35a3aSmrg	while (lo <= hi) {
853f2e35a3aSmrg	    unsigned c_in = (unsigned) lo++;
854f2e35a3aSmrg	    PtyData *data;
855f2e35a3aSmrg	    Char *next;
856f2e35a3aSmrg	    Char buffer[MAX_BYTES + 1];
857f2e35a3aSmrg
858f2e35a3aSmrg	    if (opt_reverse) {
859f2e35a3aSmrg		Bool skip = False;
860f2e35a3aSmrg		Bool first = True;
861f2e35a3aSmrg		int j, k;
862f2e35a3aSmrg		for (j = 0; j < MAX_BYTES; ++j) {
863f2e35a3aSmrg		    unsigned long bits = ((unsigned long) c_in >> (8 * j));
864f2e35a3aSmrg		    if ((buffer[j] = (Char) bits) == 0) {
865f2e35a3aSmrg			skip = (bits != 0);
866f2e35a3aSmrg			break;
867f2e35a3aSmrg		    }
868f2e35a3aSmrg		}
869f2e35a3aSmrg		if (skip)
870f2e35a3aSmrg		    continue;
871f2e35a3aSmrg		initPtyData(&data);
872f2e35a3aSmrg		for (k = 0; k <= j; ++k) {
873f2e35a3aSmrg		    data->buffer[k] = buffer[j - k - 1];
874f2e35a3aSmrg		}
875f2e35a3aSmrg		if (opt_illegal && !is_legal_utf8(data->buffer)) {
876f2e35a3aSmrg		    free(data);
877f2e35a3aSmrg		    continue;
878f2e35a3aSmrg		}
879f2e35a3aSmrg		if (message_level > 1) {
880f2e35a3aSmrg		    printf("TEST ");
881f2e35a3aSmrg		    for (k = 0; k < j; ++k) {
882f2e35a3aSmrg			printf("%02X", data->buffer[k]);
883f2e35a3aSmrg		    }
884f2e35a3aSmrg		}
885f2e35a3aSmrg		data->next = data->buffer;
886f2e35a3aSmrg		data->last = data->buffer + j;
887f2e35a3aSmrg		while (decodeUtf8(&screen, data)) {
888f2e35a3aSmrg		    total_test++;
88904b94745Smrg		    if (is_UCS_SPECIAL(data->utf_data))
890f2e35a3aSmrg			total_errs++;
891f2e35a3aSmrg		    data->next += data->utf_size;
892f2e35a3aSmrg		    if (message_level > 1) {
893f2e35a3aSmrg			printf("%s%04X", first ? " ->" : ", ", data->utf_data);
894f2e35a3aSmrg		    }
895f2e35a3aSmrg		    first = False;
896f2e35a3aSmrg		}
897f2e35a3aSmrg		if (!first)
898f2e35a3aSmrg		    total_test--;
899f2e35a3aSmrg		if (message_level > 1) {
900f2e35a3aSmrg		    printf("\n");
901f2e35a3aSmrg		    fflush(stdout);
902f2e35a3aSmrg		}
903f2e35a3aSmrg		free(data);
904f2e35a3aSmrg	    } else if (opt_convert) {
905f2e35a3aSmrg		unsigned c_out;
906f2e35a3aSmrg		Char *result;
907f2e35a3aSmrg
908f2e35a3aSmrg		memset(buffer, 0, sizeof(buffer));
9095104ee6eSmrg		if ((result = next = convertToUTF8(buffer, c_in)) == NULL) {
910f2e35a3aSmrg		    fprintf(stderr,
911f2e35a3aSmrg			    "conversion of U+%04X to UTF-8 failed\n", c_in);
9125104ee6eSmrg		} else if ((result = convertFromUTF8(buffer, &c_out)) == NULL) {
913f2e35a3aSmrg		    fprintf(stderr,
914f2e35a3aSmrg			    "conversion of U+%04X from UTF-8 failed\n", c_in);
915f2e35a3aSmrg		    total_errs++;
916f2e35a3aSmrg		} else if (c_in != c_out) {
917f2e35a3aSmrg		    fprintf(stderr,
918f2e35a3aSmrg			    "conversion of U+%04X to/from UTF-8 gave U+%04X\n",
919f2e35a3aSmrg			    c_in, c_out);
920f2e35a3aSmrg		} else if (message_level > 1) {
921f2e35a3aSmrg		    *next = '\0';
9225307cd1aSmrg		    printf("TEST %04X (%lu:%s) ->%04X\n", c_in,
9235307cd1aSmrg			   (unsigned long) (next - buffer),
924f2e35a3aSmrg			   buffer,
925f2e35a3aSmrg			   c_out);
926f2e35a3aSmrg		    fflush(stdout);
927f2e35a3aSmrg		}
928f2e35a3aSmrg	    } else {
929f2e35a3aSmrg		initPtyData(&data);
930f2e35a3aSmrg		next = convertToUTF8(data->buffer, c_in);
931f2e35a3aSmrg		*next = 0;
932f2e35a3aSmrg		data->next = data->buffer;
933f2e35a3aSmrg		data->last = next;
934f2e35a3aSmrg		decodeUtf8(&screen, data);
935f2e35a3aSmrg		if (message_level > 1) {
9365307cd1aSmrg		    printf("TEST %04X (%lu:%s) ->%04X\n", c_in,
9375307cd1aSmrg			   (unsigned long) (next - data->buffer),
938f2e35a3aSmrg			   data->buffer,
939f2e35a3aSmrg			   data->utf_data);
940f2e35a3aSmrg		    fflush(stdout);
941f2e35a3aSmrg		}
942f2e35a3aSmrg		if (c_in != data->utf_data) {
943f2e35a3aSmrg		    fprintf(stderr, "Mismatch: %04X vs %04X\n", c_in, data->utf_data);
944f2e35a3aSmrg		    total_errs++;
945f2e35a3aSmrg		}
946f2e35a3aSmrg		free(data);
947f2e35a3aSmrg	    }
948f2e35a3aSmrg	    total_test++;
949f2e35a3aSmrg	}
950f2e35a3aSmrg    }
951f2e35a3aSmrg}
952f2e35a3aSmrg
953f2e35a3aSmrgint
954f2e35a3aSmrgmain(int argc, char **argv)
955f2e35a3aSmrg{
956f2e35a3aSmrg    int ch;
957f2e35a3aSmrg
958f2e35a3aSmrg    setlocale(LC_ALL, "");
959f2e35a3aSmrg    while ((ch = getopt(argc, argv, "aciqrv")) != -1) {
960f2e35a3aSmrg	switch (ch) {
961f2e35a3aSmrg	case 'a':
962f2e35a3aSmrg	    opt_all = 1;
963f2e35a3aSmrg	    break;
964f2e35a3aSmrg	case 'c':
965f2e35a3aSmrg	    opt_convert = 1;
966f2e35a3aSmrg	    break;
967f2e35a3aSmrg	case 'i':
968f2e35a3aSmrg	    opt_illegal = 1;
969f2e35a3aSmrg	    break;
970f2e35a3aSmrg	case 'q':
971f2e35a3aSmrg	    message_level--;
972f2e35a3aSmrg	    break;
973f2e35a3aSmrg	case 'r':
974f2e35a3aSmrg	    opt_reverse = 1;
975f2e35a3aSmrg	    break;
976f2e35a3aSmrg	case 'v':
977f2e35a3aSmrg	    message_level++;
978f2e35a3aSmrg	    break;
979f2e35a3aSmrg	default:
980f2e35a3aSmrg	    usage();
981f2e35a3aSmrg	}
982f2e35a3aSmrg    }
983f2e35a3aSmrg    if (opt_all) {
984f2e35a3aSmrg	test_utf8_convert();
985f2e35a3aSmrg    } else {
986f2e35a3aSmrg	if (optind >= argc)
987f2e35a3aSmrg	    usage();
988f2e35a3aSmrg	while (optind < argc) {
989f2e35a3aSmrg	    do_range(argv[optind++]);
990f2e35a3aSmrg	}
991f2e35a3aSmrg	if (total_test) {
992f2e35a3aSmrg	    printf("%ld/%ld mismatches (%.0f%%)\n",
993f2e35a3aSmrg		   total_errs,
994f2e35a3aSmrg		   total_test,
995f2e35a3aSmrg		   (100.0 * (double) total_errs) / (double) total_test);
996f2e35a3aSmrg	}
997f2e35a3aSmrg    }
998f2e35a3aSmrg    return EXIT_SUCCESS;
999f2e35a3aSmrg}
1000f2e35a3aSmrg#else
1001f2e35a3aSmrgint
1002f2e35a3aSmrgmain(int argc, char **argv)
1003f2e35a3aSmrg{
1004f2e35a3aSmrg    (void) argc;
1005f2e35a3aSmrg    (void) argv;
1006f2e35a3aSmrg    printf("Nothing to be done here...\n");
1007f2e35a3aSmrg    return EXIT_SUCCESS;
1008f2e35a3aSmrg}
1009f2e35a3aSmrg#endif /* OPT_WIDE_CHARS */
1010d522f475Smrg#endif
1011