ptydata.c revision 04b94745
104b94745Smrg/* $XTermId: ptydata.c,v 1.160 2024/05/10 22:54:17 tom Exp $ */
2d522f475Smrg
30bd37d32Smrg/*
404b94745Smrg * Copyright 1999-2023,2024 by Thomas E. Dickey
50bd37d32Smrg *
60bd37d32Smrg *                         All Rights Reserved
70bd37d32Smrg *
80bd37d32Smrg * Permission is hereby granted, free of charge, to any person obtaining a
90bd37d32Smrg * copy of this software and associated documentation files (the
100bd37d32Smrg * "Software"), to deal in the Software without restriction, including
110bd37d32Smrg * without limitation the rights to use, copy, modify, merge, publish,
120bd37d32Smrg * distribute, sublicense, and/or sell copies of the Software, and to
130bd37d32Smrg * permit persons to whom the Software is furnished to do so, subject to
140bd37d32Smrg * the following conditions:
150bd37d32Smrg *
160bd37d32Smrg * The above copyright notice and this permission notice shall be included
170bd37d32Smrg * in all copies or substantial portions of the Software.
180bd37d32Smrg *
190bd37d32Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
200bd37d32Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
210bd37d32Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
220bd37d32Smrg * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
230bd37d32Smrg * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
240bd37d32Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
250bd37d32Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
260bd37d32Smrg *
270bd37d32Smrg * Except as contained in this notice, the name(s) of the above copyright
280bd37d32Smrg * holders shall not be used in advertising or otherwise to promote the
290bd37d32Smrg * sale, use or other dealings in this Software without prior written
300bd37d32Smrg * authorization.
310bd37d32Smrg */
32d522f475Smrg
33d522f475Smrg#include <data.h>
34d522f475Smrg
35d522f475Smrg#if OPT_WIDE_CHARS
36d522f475Smrg#include <menu.h>
37913cc679Smrg#include <wcwidth.h>
38d522f475Smrg#endif
39d522f475Smrg
40f2e35a3aSmrg#ifdef TEST_DRIVER
41f2e35a3aSmrg#undef TRACE
42f2e35a3aSmrg#define TRACE(p) if (1) printf p
43f2e35a3aSmrg#undef TRACE2
44f2e35a3aSmrg#define TRACE2(p) if (0) printf p
45f2e35a3aSmrg#define visibleChars(buf, len) "buffer"
46f2e35a3aSmrg#endif
47f2e35a3aSmrg
48d522f475Smrg/*
49d522f475Smrg * Check for both EAGAIN and EWOULDBLOCK, because some supposedly POSIX
50d522f475Smrg * systems are broken and return EWOULDBLOCK when they should return EAGAIN.
51d522f475Smrg * Note that this macro may evaluate its argument more than once.
52d522f475Smrg */
53d522f475Smrg#if defined(EAGAIN) && defined(EWOULDBLOCK)
54d522f475Smrg#define E_TEST(err) ((err) == EAGAIN || (err) == EWOULDBLOCK)
55d522f475Smrg#else
56d522f475Smrg#ifdef EAGAIN
57d522f475Smrg#define E_TEST(err) ((err) == EAGAIN)
58d522f475Smrg#else
59d522f475Smrg#define E_TEST(err) ((err) == EWOULDBLOCK)
60d522f475Smrg#endif
61d522f475Smrg#endif
62d522f475Smrg
63d522f475Smrg#if OPT_WIDE_CHARS
64d522f475Smrg/*
65d522f475Smrg * Convert the 8-bit codes in data->buffer[] into Unicode in data->utf_data.
66d522f475Smrg * The number of bytes converted will be nonzero iff there is data.
67d522f475Smrg */
68d522f475SmrgBool
69894e0ac8SmrgdecodeUtf8(TScreen *screen, PtyData *data)
70d522f475Smrg{
715307cd1aSmrg    size_t i;
725307cd1aSmrg    size_t length = (size_t) (data->last - data->next);
73d522f475Smrg    int utf_count = 0;
74956cc18dSsnj    unsigned utf_char = 0;
75d522f475Smrg
76d522f475Smrg    data->utf_size = 0;
77d522f475Smrg    for (i = 0; i < length; i++) {
78d522f475Smrg	unsigned c = data->next[i];
79d522f475Smrg
80d522f475Smrg	/* Combine UTF-8 into Unicode */
81d522f475Smrg	if (c < 0x80) {
82d522f475Smrg	    /* We received an ASCII character */
83d522f475Smrg	    if (utf_count > 0) {
84d522f475Smrg		data->utf_data = UCS_REPL;	/* prev. sequence incomplete */
85a1f3da82Smrg		data->utf_size = i;
86d522f475Smrg	    } else {
87956cc18dSsnj		data->utf_data = (IChar) c;
88d522f475Smrg		data->utf_size = 1;
89d522f475Smrg	    }
90d522f475Smrg	    break;
91f2e35a3aSmrg	} else if (screen->vt100_graphics
92f2e35a3aSmrg		   && (c < 0x100)
93f2e35a3aSmrg		   && (utf_count == 0)
94f2e35a3aSmrg		   && screen->gsets[(int) screen->curgr] != nrc_ASCII) {
95f2e35a3aSmrg	    data->utf_data = (IChar) c;
96f2e35a3aSmrg	    data->utf_size = 1;
97f2e35a3aSmrg	    break;
98d522f475Smrg	} else if (c < 0xc0) {
99d522f475Smrg	    /* We received a continuation byte */
100d522f475Smrg	    if (utf_count < 1) {
10104b94745Smrg		if (screen->c1_printable) {
10204b94745Smrg		    data->utf_data = (IChar) c;
10304b94745Smrg		} else if ((i + 1) < length
10404b94745Smrg			   && data->next[i + 1] > 0x20
10504b94745Smrg			   && data->next[i + 1] < 0x80) {
10604b94745Smrg		    /*
10704b94745Smrg		     * Allow for C1 control string if the next byte is
10804b94745Smrg		     * available for inspection.
10904b94745Smrg		     */
11004b94745Smrg		    data->utf_data = (IChar) c;
11104b94745Smrg		} else {
11204b94745Smrg		    /*
11304b94745Smrg		     * We received a continuation byte before receiving a
11404b94745Smrg		     * sequence state, or a failed attempt to use a C1 control
11504b94745Smrg		     * string.
11604b94745Smrg		     */
11704b94745Smrg		    data->utf_data = (IChar) UCS_REPL;
11804b94745Smrg		}
119d522f475Smrg		data->utf_size = (i + 1);
120d522f475Smrg		break;
121f2e35a3aSmrg	    } else if (screen->utf8_weblike
122f2e35a3aSmrg		       && (utf_count == 3
123f2e35a3aSmrg			   && utf_char == 0x04
124f2e35a3aSmrg			   && c >= 0x90)) {
125f2e35a3aSmrg		/* The encoding would form a code point beyond U+10FFFF. */
126f2e35a3aSmrg		data->utf_size = i;
127f2e35a3aSmrg		data->utf_data = UCS_REPL;
128f2e35a3aSmrg		break;
129f2e35a3aSmrg	    } else if (screen->utf8_weblike
130f2e35a3aSmrg		       && (utf_count == 2
131f2e35a3aSmrg			   && utf_char == 0x0d
132f2e35a3aSmrg			   && c >= 0xa0)) {
133f2e35a3aSmrg		/* The encoding would form a surrogate code point. */
134f2e35a3aSmrg		data->utf_size = i;
135f2e35a3aSmrg		data->utf_data = UCS_REPL;
136f2e35a3aSmrg		break;
137d522f475Smrg	    } else {
138d522f475Smrg		/* Check for overlong UTF-8 sequences for which a shorter
139d522f475Smrg		 * encoding would exist and replace them with UCS_REPL.
140d522f475Smrg		 * An overlong UTF-8 sequence can have any of the following
141d522f475Smrg		 * forms:
142d522f475Smrg		 *   1100000x 10xxxxxx
143d522f475Smrg		 *   11100000 100xxxxx 10xxxxxx
144d522f475Smrg		 *   11110000 1000xxxx 10xxxxxx 10xxxxxx
145d522f475Smrg		 *   11111000 10000xxx 10xxxxxx 10xxxxxx 10xxxxxx
146d522f475Smrg		 *   11111100 100000xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
147d522f475Smrg		 */
148d522f475Smrg		if (!utf_char && !((c & 0x7f) >> (7 - utf_count))) {
149f2e35a3aSmrg		    if (screen->utf8_weblike) {
150f2e35a3aSmrg			/* overlong sequence continued */
151f2e35a3aSmrg			data->utf_data = UCS_REPL;
152f2e35a3aSmrg			data->utf_size = i;
153f2e35a3aSmrg			break;
154f2e35a3aSmrg		    } else {
155f2e35a3aSmrg			utf_char = UCS_REPL;
156f2e35a3aSmrg		    }
157d522f475Smrg		}
158d522f475Smrg		utf_char <<= 6;
159d522f475Smrg		utf_char |= (c & 0x3f);
160d522f475Smrg		if ((utf_char >= 0xd800 &&
161d522f475Smrg		     utf_char <= 0xdfff) ||
162d522f475Smrg		    (utf_char == 0xfffe) ||
163d522f475Smrg		    (utf_char == HIDDEN_CHAR)) {
164d522f475Smrg		    utf_char = UCS_REPL;
165d522f475Smrg		}
166d522f475Smrg		utf_count--;
167d522f475Smrg		if (utf_count == 0) {
168956cc18dSsnj#if !OPT_WIDER_ICHAR
169d522f475Smrg		    /* characters outside UCS-2 become UCS_REPL */
170f2e35a3aSmrg		    if (utf_char > NARROW_ICHAR) {
171d522f475Smrg			TRACE(("using replacement for %#x\n", utf_char));
172d522f475Smrg			utf_char = UCS_REPL;
173d522f475Smrg		    }
174956cc18dSsnj#endif
175956cc18dSsnj		    data->utf_data = (IChar) utf_char;
176d522f475Smrg		    data->utf_size = (i + 1);
177d522f475Smrg		    break;
178d522f475Smrg		}
179d522f475Smrg	    }
180d522f475Smrg	} else {
181d522f475Smrg	    /* We received a sequence start byte */
182d522f475Smrg	    if (utf_count > 0) {
183f2e35a3aSmrg		/* previous sequence is incomplete */
184f2e35a3aSmrg		data->utf_data = UCS_REPL;
185f2e35a3aSmrg		data->utf_size = i;
186d522f475Smrg		break;
187d522f475Smrg	    }
188f2e35a3aSmrg	    if (screen->utf8_weblike) {
189f2e35a3aSmrg		if (c < 0xe0) {
190f2e35a3aSmrg		    if (!(c & 0x1e)) {
191f2e35a3aSmrg			/* overlong sequence start */
192f2e35a3aSmrg			data->utf_data = UCS_REPL;
193f2e35a3aSmrg			data->utf_size = (i + 1);
194f2e35a3aSmrg			break;
195f2e35a3aSmrg		    }
196f2e35a3aSmrg		    utf_count = 1;
197f2e35a3aSmrg		    utf_char = (c & 0x1f);
198f2e35a3aSmrg		} else if (c < 0xf0) {
199f2e35a3aSmrg		    utf_count = 2;
200f2e35a3aSmrg		    utf_char = (c & 0x0f);
201f2e35a3aSmrg		} else if (c < 0xf5) {
202f2e35a3aSmrg		    utf_count = 3;
203f2e35a3aSmrg		    utf_char = (c & 0x07);
204f2e35a3aSmrg		} else {
205f2e35a3aSmrg		    data->utf_data = UCS_REPL;
206f2e35a3aSmrg		    data->utf_size = (i + 1);
207f2e35a3aSmrg		    break;
208a1f3da82Smrg		}
209d522f475Smrg	    } else {
210f2e35a3aSmrg		if (c < 0xe0) {
211f2e35a3aSmrg		    utf_count = 1;
212f2e35a3aSmrg		    utf_char = (c & 0x1f);
213f2e35a3aSmrg		    if (!(c & 0x1e)) {
214f2e35a3aSmrg			/* overlong sequence */
215f2e35a3aSmrg			utf_char = UCS_REPL;
216f2e35a3aSmrg		    }
217f2e35a3aSmrg		} else if (c < 0xf0) {
218f2e35a3aSmrg		    utf_count = 2;
219f2e35a3aSmrg		    utf_char = (c & 0x0f);
220f2e35a3aSmrg		} else if (c < 0xf8) {
221f2e35a3aSmrg		    utf_count = 3;
222f2e35a3aSmrg		    utf_char = (c & 0x07);
223f2e35a3aSmrg		} else if (c < 0xfc) {
224f2e35a3aSmrg		    utf_count = 4;
225f2e35a3aSmrg		    utf_char = (c & 0x03);
226f2e35a3aSmrg		} else if (c < 0xfe) {
227f2e35a3aSmrg		    utf_count = 5;
228f2e35a3aSmrg		    utf_char = (c & 0x01);
229f2e35a3aSmrg		} else {
230f2e35a3aSmrg		    data->utf_data = UCS_REPL;
231f2e35a3aSmrg		    data->utf_size = (i + 1);
232f2e35a3aSmrg		    break;
233f2e35a3aSmrg		}
234d522f475Smrg	    }
235d522f475Smrg	}
236d522f475Smrg    }
237d522f475Smrg#if OPT_TRACE > 1
23804b94745Smrg    TRACE(("UTF-8 char %04X [%lu..%lu]\n",
239d522f475Smrg	   data->utf_data,
24004b94745Smrg	   (unsigned long) (data->next - data->buffer),
24104b94745Smrg	   (unsigned long) (data->next - data->buffer + data->utf_size - 1)));
242d522f475Smrg#endif
243d522f475Smrg
244d522f475Smrg    return (data->utf_size != 0);
245d522f475Smrg}
246d522f475Smrg#endif
247d522f475Smrg
248d522f475Smrgint
249894e0ac8SmrgreadPtyData(XtermWidget xw, PtySelect * select_mask, PtyData *data)
250d522f475Smrg{
25120d2c4d2Smrg    TScreen *screen = TScreenOf(xw);
252d522f475Smrg    int size = 0;
253d522f475Smrg
254d522f475Smrg#ifdef VMS
255d522f475Smrg    if (*select_mask & pty_mask) {
25620d2c4d2Smrg	trimPtyData(xw, data);
257d522f475Smrg	if (read_queue.flink != 0) {
258d522f475Smrg	    size = tt_read(data->next);
259d522f475Smrg	    if (size == 0) {
260d522f475Smrg		Panic("input: read returned zero\n", 0);
261d522f475Smrg	    }
262d522f475Smrg	} else {
263d522f475Smrg	    sys$hiber();
264d522f475Smrg	}
265d522f475Smrg    }
266d522f475Smrg#else /* !VMS */
267d522f475Smrg    if (FD_ISSET(screen->respond, select_mask)) {
268956cc18dSsnj	int save_err;
26920d2c4d2Smrg	trimPtyData(xw, data);
270d522f475Smrg
27120d2c4d2Smrg	size = (int) read(screen->respond, (char *) data->last, (size_t) FRG_SIZE);
272956cc18dSsnj	save_err = errno;
273d522f475Smrg#if (defined(i386) && defined(SVR4) && defined(sun)) || defined(__CYGWIN__)
274956cc18dSsnj	/*
275956cc18dSsnj	 * Yes, I know this is a majorly f*ugly hack, however it seems to
276956cc18dSsnj	 * be necessary for Solaris x86.  DWH 11/15/94
277956cc18dSsnj	 * Dunno why though..
278956cc18dSsnj	 * (and now CYGWIN, alanh@xfree86.org 08/15/01
279956cc18dSsnj	 */
280956cc18dSsnj	if (size <= 0) {
281956cc18dSsnj	    if (save_err == EIO || save_err == 0)
2820bd37d32Smrg		NormalExit();
283956cc18dSsnj	    else if (!E_TEST(save_err))
284956cc18dSsnj		Panic("input: read returned unexpected error (%d)\n", save_err);
285956cc18dSsnj	    size = 0;
286956cc18dSsnj	}
287956cc18dSsnj#else /* !f*ugly */
288956cc18dSsnj	if (size < 0) {
289956cc18dSsnj	    if (save_err == EIO)
2900bd37d32Smrg		NormalExit();
291956cc18dSsnj	    else if (!E_TEST(save_err))
292956cc18dSsnj		Panic("input: read returned unexpected error (%d)\n", save_err);
293d522f475Smrg	    size = 0;
294d522f475Smrg	} else if (size == 0) {
2950bd37d32Smrg#if defined(__FreeBSD__)
2960bd37d32Smrg	    NormalExit();
297d522f475Smrg#else
298d522f475Smrg	    Panic("input: read returned zero\n", 0);
299d522f475Smrg#endif
300d522f475Smrg	}
301956cc18dSsnj#endif /* f*ugly */
302d522f475Smrg    }
303d522f475Smrg#endif /* VMS */
304d522f475Smrg
305d522f475Smrg    if (size) {
306d522f475Smrg#if OPT_TRACE
307d522f475Smrg	int i;
308d522f475Smrg
309d522f475Smrg	TRACE(("read %d bytes from pty\n", size));
310d522f475Smrg	for (i = 0; i < size; i++) {
311d522f475Smrg	    if (!(i % 16))
312d522f475Smrg		TRACE(("%s", i ? "\n    " : "READ"));
313d522f475Smrg	    TRACE((" %02X", data->last[i]));
314d522f475Smrg	}
315d522f475Smrg	TRACE(("\n"));
316d522f475Smrg#endif
317d522f475Smrg	data->last += size;
318d522f475Smrg#ifdef ALLOWLOGGING
31920d2c4d2Smrg	TScreenOf(term)->logstart = VTbuffer->next;
320d522f475Smrg#endif
321d522f475Smrg    }
322d522f475Smrg
323d522f475Smrg    return (size);
324d522f475Smrg}
325d522f475Smrg
326d522f475Smrg/*
327d522f475Smrg * Return the next value from the input buffer.  Note that morePtyData() is
328d522f475Smrg * always called before this function, so we can do the UTF-8 input conversion
329d522f475Smrg * in that function and simply return the result here.
330d522f475Smrg */
331d522f475Smrg#if OPT_WIDE_CHARS
332d522f475SmrgIChar
333894e0ac8SmrgnextPtyData(TScreen *screen, PtyData *data)
334d522f475Smrg{
335d522f475Smrg    IChar result;
336d522f475Smrg    if (screen->utf8_inparse) {
337f2e35a3aSmrg	skipPtyData(data, result);
338d522f475Smrg    } else {
339d522f475Smrg	result = *((data)->next++);
340956cc18dSsnj	if (!screen->output_eight_bits) {
341956cc18dSsnj	    result = (IChar) (result & 0x7f);
342956cc18dSsnj	}
343d522f475Smrg    }
344d522f475Smrg    TRACE2(("nextPtyData returns %#x\n", result));
345d522f475Smrg    return result;
346d522f475Smrg}
347d522f475Smrg#endif
348d522f475Smrg
349d522f475Smrg#if OPT_WIDE_CHARS
350d522f475Smrg/*
351d522f475Smrg * Called when UTF-8 mode has been turned on/off.
352d522f475Smrg */
353d522f475Smrgvoid
354894e0ac8SmrgswitchPtyData(TScreen *screen, int flag)
355d522f475Smrg{
356d522f475Smrg    if (screen->utf8_mode != flag) {
357d522f475Smrg	screen->utf8_mode = flag;
358956cc18dSsnj	screen->utf8_inparse = (Boolean) (flag != 0);
359913cc679Smrg	mk_wcwidth_init(screen->utf8_mode);
360d522f475Smrg
361d522f475Smrg	TRACE(("turning UTF-8 mode %s\n", BtoS(flag)));
362d522f475Smrg	update_font_utf8_mode();
363d522f475Smrg    }
364d522f475Smrg}
365d522f475Smrg#endif
366d522f475Smrg
367d522f475Smrg/*
368d522f475Smrg * Allocate a buffer.
369d522f475Smrg */
370d522f475Smrgvoid
371894e0ac8SmrginitPtyData(PtyData **result)
372d522f475Smrg{
373d522f475Smrg    PtyData *data;
374d522f475Smrg
375f2e35a3aSmrg    TRACE2(("initPtyData given minBufSize %d, maxBufSize %d\n",
376f2e35a3aSmrg	    FRG_SIZE, BUF_SIZE));
377d522f475Smrg
378d522f475Smrg    if (FRG_SIZE < 64)
379d522f475Smrg	FRG_SIZE = 64;
380d522f475Smrg    if (BUF_SIZE < FRG_SIZE)
381d522f475Smrg	BUF_SIZE = FRG_SIZE;
382d522f475Smrg    if (BUF_SIZE % FRG_SIZE)
383d522f475Smrg	BUF_SIZE = BUF_SIZE + FRG_SIZE - (BUF_SIZE % FRG_SIZE);
384d522f475Smrg
385f2e35a3aSmrg    TRACE2(("initPtyData using minBufSize %d, maxBufSize %d\n",
386f2e35a3aSmrg	    FRG_SIZE, BUF_SIZE));
387d522f475Smrg
388a1f3da82Smrg    data = TypeXtMallocX(PtyData, (BUF_SIZE + FRG_SIZE));
389d522f475Smrg
390d522f475Smrg    memset(data, 0, sizeof(*data));
391d522f475Smrg    data->next = data->buffer;
392d522f475Smrg    data->last = data->buffer;
393d522f475Smrg    *result = data;
394d522f475Smrg}
395d522f475Smrg
396d522f475Smrg/*
39720d2c4d2Smrg * Initialize a buffer for the caller, using its data in 'next'.
398d522f475Smrg */
399d522f475Smrg#if OPT_WIDE_CHARS
400d522f475SmrgPtyData *
401894e0ac8SmrgfakePtyData(PtyData *result, Char *next, Char *last)
402d522f475Smrg{
403d522f475Smrg    PtyData *data = result;
404d522f475Smrg
405d522f475Smrg    memset(data, 0, sizeof(*data));
406d522f475Smrg    data->next = next;
407d522f475Smrg    data->last = last;
408d522f475Smrg
409d522f475Smrg    return data;
410d522f475Smrg}
411d522f475Smrg#endif
412d522f475Smrg
413d522f475Smrg/*
414d522f475Smrg * Remove used data by shifting the buffer down, to make room for more data,
415d522f475Smrg * e.g., a continuation-read.
416d522f475Smrg */
417d522f475Smrgvoid
418f2e35a3aSmrgtrimPtyData(XtermWidget xw, PtyData *data)
419d522f475Smrg{
420f2e35a3aSmrg    (void) xw;
42120d2c4d2Smrg    FlushLog(xw);
422d522f475Smrg
423d522f475Smrg    if (data->next != data->buffer) {
4245307cd1aSmrg	size_t i;
4255307cd1aSmrg	size_t n = (size_t) (data->last - data->next);
426d522f475Smrg
4275307cd1aSmrg	TRACE(("shifting buffer down by %lu\n", (unsigned long) n));
428d522f475Smrg	for (i = 0; i < n; ++i) {
429d522f475Smrg	    data->buffer[i] = data->next[i];
430d522f475Smrg	}
431d522f475Smrg	data->next = data->buffer;
432d522f475Smrg	data->last = data->next + n;
433d522f475Smrg    }
434d522f475Smrg
435d522f475Smrg}
436d522f475Smrg
437d522f475Smrg/*
438d522f475Smrg * Insert new data into the input buffer so the next calls to morePtyData()
439d522f475Smrg * and nextPtyData() will return that.
440d522f475Smrg */
441d522f475Smrgvoid
4425307cd1aSmrgfillPtyData(XtermWidget xw, PtyData *data, const char *value, size_t length)
443d522f475Smrg{
4445307cd1aSmrg    size_t size;
4455307cd1aSmrg    size_t n;
446d522f475Smrg
447d522f475Smrg    /* remove the used portion of the buffer */
44820d2c4d2Smrg    trimPtyData(xw, data);
449d522f475Smrg
450d522f475Smrg    VTbuffer->last += length;
4515307cd1aSmrg    size = (size_t) (VTbuffer->last - VTbuffer->next);
452d522f475Smrg
453d522f475Smrg    /* shift the unused portion up to make room */
454d522f475Smrg    for (n = size; n >= length; --n)
455d522f475Smrg	VTbuffer->next[n] = VTbuffer->next[n - length];
456d522f475Smrg
457d522f475Smrg    /* insert the new bytes to interpret */
458d522f475Smrg    for (n = 0; n < length; n++)
459d522f475Smrg	VTbuffer->next[n] = CharOf(value[n]);
460d522f475Smrg}
461d522f475Smrg
462d522f475Smrg#if OPT_WIDE_CHARS
463f2e35a3aSmrg/*
464f2e35a3aSmrg * Convert an ISO-8859-1 code 'c' to UTF-8, storing the result in the target
465f2e35a3aSmrg * 'lp', and returning a pointer past the converted character.
466f2e35a3aSmrg */
467d522f475SmrgChar *
468894e0ac8SmrgconvertToUTF8(Char *lp, unsigned c)
469d522f475Smrg{
47020d2c4d2Smrg#define CH(n) (Char)((c) >> ((n) * 8))
47120d2c4d2Smrg    if (c < 0x80) {
47220d2c4d2Smrg	/*  0*******  */
47320d2c4d2Smrg	*lp++ = (Char) CH(0);
47420d2c4d2Smrg    } else if (c < 0x800) {
47520d2c4d2Smrg	/*  110***** 10******  */
47620d2c4d2Smrg	*lp++ = (Char) (0xc0 | (CH(0) >> 6) | ((CH(1) & 0x07) << 2));
47720d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
47820d2c4d2Smrg    } else if (c < 0x00010000) {
47920d2c4d2Smrg	/*  1110**** 10****** 10******  */
48020d2c4d2Smrg	*lp++ = (Char) (0xe0 | ((int) (CH(1) & 0xf0) >> 4));
48120d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
48220d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
48320d2c4d2Smrg    } else if (c < 0x00200000) {
48420d2c4d2Smrg	*lp++ = (Char) (0xf0 | ((int) (CH(2) & 0x1f) >> 2));
48520d2c4d2Smrg	*lp++ = (Char) (0x80 |
48620d2c4d2Smrg			((int) (CH(1) & 0xf0) >> 4) |
48720d2c4d2Smrg			((int) (CH(2) & 0x03) << 4));
48820d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
48920d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
49020d2c4d2Smrg    } else if (c < 0x04000000) {
49120d2c4d2Smrg	*lp++ = (Char) (0xf8 | (CH(3) & 0x03));
49220d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(2) >> 2));
49320d2c4d2Smrg	*lp++ = (Char) (0x80 |
49420d2c4d2Smrg			((int) (CH(1) & 0xf0) >> 4) |
49520d2c4d2Smrg			((int) (CH(2) & 0x03) << 4));
49620d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
49720d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
49820d2c4d2Smrg    } else {
49920d2c4d2Smrg	*lp++ = (Char) (0xfc | ((int) (CH(3) & 0x40) >> 6));
50020d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(3) & 0x3f));
50120d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(2) >> 2));
50220d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(1) >> 4) | ((CH(2) & 0x03) << 4));
50320d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
50420d2c4d2Smrg	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
505d522f475Smrg    }
506d522f475Smrg    return lp;
50720d2c4d2Smrg#undef CH
508d522f475Smrg}
509d522f475Smrg
510f2e35a3aSmrg/*
511f2e35a3aSmrg * Convert a UTF-8 multibyte character to an Unicode value, returning a pointer
512f2e35a3aSmrg * past the converted UTF-8 input.  The first 256 values align with ISO-8859-1,
513f2e35a3aSmrg * making it possible to use this to convert to Latin-1.
514f2e35a3aSmrg *
515f2e35a3aSmrg * If the conversion fails, return null.
516f2e35a3aSmrg */
517f2e35a3aSmrgChar *
518f2e35a3aSmrgconvertFromUTF8(Char *lp, unsigned *cp)
519f2e35a3aSmrg{
520f2e35a3aSmrg    int want;
521f2e35a3aSmrg
522f2e35a3aSmrg    /*
523f2e35a3aSmrg     * Find the number of bytes we will need from the source.
524f2e35a3aSmrg     */
525f2e35a3aSmrg    if ((*lp & 0x80) == 0) {
526f2e35a3aSmrg	want = 1;
527f2e35a3aSmrg    } else if ((*lp & 0xe0) == 0xc0) {
528f2e35a3aSmrg	want = 2;
529f2e35a3aSmrg    } else if ((*lp & 0xf0) == 0xe0) {
530f2e35a3aSmrg	want = 3;
531f2e35a3aSmrg    } else if ((*lp & 0xf8) == 0xf0) {
532f2e35a3aSmrg	want = 4;
533f2e35a3aSmrg    } else if ((*lp & 0xfc) == 0xf8) {
534f2e35a3aSmrg	want = 5;
535f2e35a3aSmrg    } else if ((*lp & 0xfe) == 0xfc) {
536f2e35a3aSmrg	want = 6;
537f2e35a3aSmrg    } else {
538f2e35a3aSmrg	want = 0;
539f2e35a3aSmrg    }
540f2e35a3aSmrg
541f2e35a3aSmrg    if (want) {
542f2e35a3aSmrg	int have = 1;
543f2e35a3aSmrg
544f2e35a3aSmrg	while (lp[have] != '\0') {
545f2e35a3aSmrg	    if ((lp[have] & 0xc0) != 0x80)
546f2e35a3aSmrg		break;
547f2e35a3aSmrg	    ++have;
548f2e35a3aSmrg	}
549f2e35a3aSmrg	if (want == have) {
550f2e35a3aSmrg	    unsigned mask = 0;
551f2e35a3aSmrg	    int j;
552f2e35a3aSmrg	    int shift = 0;
553f2e35a3aSmrg
554f2e35a3aSmrg	    *cp = 0;
555f2e35a3aSmrg	    switch (want) {
556f2e35a3aSmrg	    case 1:
557f2e35a3aSmrg		mask = (*lp);
558f2e35a3aSmrg		break;
559f2e35a3aSmrg	    case 2:
560f2e35a3aSmrg		mask = (*lp & 0x1f);
561f2e35a3aSmrg		break;
562f2e35a3aSmrg	    case 3:
563f2e35a3aSmrg		mask = (*lp & 0x0f);
564f2e35a3aSmrg		break;
565f2e35a3aSmrg	    case 4:
566f2e35a3aSmrg		mask = (*lp & 0x07);
567f2e35a3aSmrg		break;
568f2e35a3aSmrg	    case 5:
569f2e35a3aSmrg		mask = (*lp & 0x03);
570f2e35a3aSmrg		break;
571f2e35a3aSmrg	    case 6:
572f2e35a3aSmrg		mask = (*lp & 0x01);
573f2e35a3aSmrg		break;
574f2e35a3aSmrg	    default:
575f2e35a3aSmrg		mask = 0;
576f2e35a3aSmrg		break;
577f2e35a3aSmrg	    }
578f2e35a3aSmrg
579f2e35a3aSmrg	    for (j = 1; j < want; j++) {
580f2e35a3aSmrg		*cp |= (unsigned) ((lp[want - j] & 0x3f) << shift);
581f2e35a3aSmrg		shift += 6;
582f2e35a3aSmrg	    }
583f2e35a3aSmrg	    *cp |= mask << shift;
584f2e35a3aSmrg	    lp += want;
585f2e35a3aSmrg	} else {
586f2e35a3aSmrg	    *cp = BAD_ASCII;
587f2e35a3aSmrg	    lp = NULL;
588f2e35a3aSmrg	}
589f2e35a3aSmrg    } else {
590f2e35a3aSmrg	*cp = BAD_ASCII;
591f2e35a3aSmrg	lp = NULL;
592f2e35a3aSmrg    }
593f2e35a3aSmrg    return lp;
594f2e35a3aSmrg}
595f2e35a3aSmrg
596f2e35a3aSmrg/*
597f2e35a3aSmrg * Returns true if the entire string is valid UTF-8.
598f2e35a3aSmrg */
599f2e35a3aSmrgBoolean
600f2e35a3aSmrgisValidUTF8(Char *lp)
601f2e35a3aSmrg{
602f2e35a3aSmrg    Boolean result = True;
603f2e35a3aSmrg    while (*lp) {
604f2e35a3aSmrg	unsigned ch;
605f2e35a3aSmrg	Char *next = convertFromUTF8(lp, &ch);
606f2e35a3aSmrg	if (next == NULL || ch == 0) {
607f2e35a3aSmrg	    result = False;
608f2e35a3aSmrg	    break;
609f2e35a3aSmrg	}
610f2e35a3aSmrg	lp = next;
611f2e35a3aSmrg    }
612f2e35a3aSmrg    return result;
613f2e35a3aSmrg}
614f2e35a3aSmrg
615d522f475Smrg/*
616d522f475Smrg * Write data back to the PTY
617d522f475Smrg */
618d522f475Smrgvoid
6195307cd1aSmrgwritePtyData(int f, IChar *d, size_t len)
620d522f475Smrg{
6215307cd1aSmrg    size_t n = (len << 1);
622d522f475Smrg
623d522f475Smrg    if (VTbuffer->write_len <= len) {
624d522f475Smrg	VTbuffer->write_len = n;
6255307cd1aSmrg	VTbuffer->write_buf = realloc(VTbuffer->write_buf, VTbuffer->write_len);
626d522f475Smrg    }
627d522f475Smrg
628d522f475Smrg    for (n = 0; n < len; n++)
629956cc18dSsnj	VTbuffer->write_buf[n] = (Char) d[n];
630d522f475Smrg
6315307cd1aSmrg    TRACE(("writePtyData %lu:%s\n", (unsigned long) n,
632956cc18dSsnj	   visibleChars(VTbuffer->write_buf, n)));
633d522f475Smrg    v_write(f, VTbuffer->write_buf, n);
634d522f475Smrg}
635d522f475Smrg#endif /* OPT_WIDE_CHARS */
636d522f475Smrg
637d522f475Smrg#ifdef NO_LEAKS
638d522f475Smrgvoid
639d522f475Smrgnoleaks_ptydata(void)
640d522f475Smrg{
641d522f475Smrg    if (VTbuffer != 0) {
642d522f475Smrg#if OPT_WIDE_CHARS
643f2e35a3aSmrg	free(VTbuffer->write_buf);
644f2e35a3aSmrg#endif
645f2e35a3aSmrg	FreeAndNull(VTbuffer);
646f2e35a3aSmrg    }
647f2e35a3aSmrg}
648f2e35a3aSmrg#endif
649f2e35a3aSmrg
650f2e35a3aSmrg#ifdef TEST_DRIVER
651f2e35a3aSmrg
652f2e35a3aSmrg#include "data.c"
653f2e35a3aSmrg
654f2e35a3aSmrgvoid
655f2e35a3aSmrgNormalExit(void)
656f2e35a3aSmrg{
657f2e35a3aSmrg    fprintf(stderr, "NormalExit!\n");
658f2e35a3aSmrg    exit(EXIT_SUCCESS);
659f2e35a3aSmrg}
660f2e35a3aSmrg
661f2e35a3aSmrgvoid
662f2e35a3aSmrgPanic(const char *s, int a)
663f2e35a3aSmrg{
664f2e35a3aSmrg    (void) s;
665f2e35a3aSmrg    (void) a;
666f2e35a3aSmrg    fprintf(stderr, "Panic!\n");
667f2e35a3aSmrg    exit(EXIT_FAILURE);
668f2e35a3aSmrg}
669f2e35a3aSmrg
670f2e35a3aSmrg#if OPT_WIDE_CHARS
671f2e35a3aSmrg
672f2e35a3aSmrg#ifdef ALLOWLOGGING
673f2e35a3aSmrgvoid
674f2e35a3aSmrgFlushLog(XtermWidget xw)
675f2e35a3aSmrg{
676f2e35a3aSmrg    (void) xw;
677f2e35a3aSmrg}
678d522f475Smrg#endif
679f2e35a3aSmrg
680f2e35a3aSmrgvoid
6815307cd1aSmrgv_write(int f, const Char *data, size_t len)
682f2e35a3aSmrg{
683f2e35a3aSmrg    (void) f;
684f2e35a3aSmrg    (void) data;
685f2e35a3aSmrg    (void) len;
686f2e35a3aSmrg}
687f2e35a3aSmrg
688f2e35a3aSmrgvoid
689f2e35a3aSmrgmk_wcwidth_init(int mode)
690f2e35a3aSmrg{
691f2e35a3aSmrg    (void) mode;
692f2e35a3aSmrg}
693f2e35a3aSmrg
694f2e35a3aSmrgvoid
695f2e35a3aSmrgupdate_font_utf8_mode(void)
696f2e35a3aSmrg{
697f2e35a3aSmrg}
698f2e35a3aSmrg
699f2e35a3aSmrgstatic int message_level = 0;
700f2e35a3aSmrgstatic int opt_all = 0;
701f2e35a3aSmrgstatic int opt_illegal = 0;
702f2e35a3aSmrgstatic int opt_convert = 0;
703f2e35a3aSmrgstatic int opt_reverse = 0;
704f2e35a3aSmrgstatic long total_test = 0;
705f2e35a3aSmrgstatic long total_errs = 0;
706f2e35a3aSmrg
707f2e35a3aSmrgstatic void
708f2e35a3aSmrgusage(void)
709f2e35a3aSmrg{
710f2e35a3aSmrg    static const char *msg[] =
711f2e35a3aSmrg    {
712f2e35a3aSmrg	"Usage: test_ptydata [options] [c1[-c1b] [c2-[c2b] [...]]]",
713f2e35a3aSmrg	"",
714f2e35a3aSmrg	"Options:",
715f2e35a3aSmrg	" -a  exercise all legal encode/decode to/from UTF-8",
716f2e35a3aSmrg	" -c  call convertFromUTF8 rather than decodeUTF8",
717f2e35a3aSmrg	" -i  ignore illegal UTF-8 when testing -r option",
718f2e35a3aSmrg	" -q  quieter",
719f2e35a3aSmrg	" -r  reverse/decode from UTF-8 byte-string to/from Unicode",
720f2e35a3aSmrg	" -v  more verbose"
721f2e35a3aSmrg    };
722f2e35a3aSmrg    size_t n;
723f2e35a3aSmrg    for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) {
724f2e35a3aSmrg	fprintf(stderr, "%s\n", msg[n]);
725f2e35a3aSmrg    }
726f2e35a3aSmrg    exit(EXIT_FAILURE);
727f2e35a3aSmrg}
728f2e35a3aSmrg
729f2e35a3aSmrg/*
730f2e35a3aSmrg * http://www.unicode.org/versions/corrigendum1.html, table 3.1B
731f2e35a3aSmrg */
732f2e35a3aSmrg#define OkRange(n,lo,hi) \
733f2e35a3aSmrg 	if (value[n] < lo || value[n] > hi) { \
734f2e35a3aSmrg	    result = False; \
735f2e35a3aSmrg	    break; \
736f2e35a3aSmrg	}
737f2e35a3aSmrgstatic Bool
738f2e35a3aSmrgis_legal_utf8(const Char *value)
739f2e35a3aSmrg{
740f2e35a3aSmrg    Bool result = True;
741f2e35a3aSmrg    Char ch;
742f2e35a3aSmrg    while ((ch = *value) != '\0') {
743f2e35a3aSmrg	if (ch <= 0x7f) {
744f2e35a3aSmrg	    ++value;
745f2e35a3aSmrg	} else if (ch >= 0xc2 && ch <= 0xdf) {
746f2e35a3aSmrg	    OkRange(1, 0x80, 0xbf);
747f2e35a3aSmrg	    value += 2;
748f2e35a3aSmrg	} else if (ch == 0xe0) {
749f2e35a3aSmrg	    OkRange(1, 0xa0, 0xbf);
750f2e35a3aSmrg	    OkRange(2, 0x80, 0xbf);
751f2e35a3aSmrg	    value += 3;
752f2e35a3aSmrg	} else if (ch >= 0xe1 && ch <= 0xef) {
753f2e35a3aSmrg	    OkRange(1, 0x80, 0xbf);
754f2e35a3aSmrg	    OkRange(2, 0x80, 0xbf);
755f2e35a3aSmrg	    value += 3;
756f2e35a3aSmrg	} else if (ch == 0xf0) {
757f2e35a3aSmrg	    OkRange(1, 0x90, 0xbf);
758f2e35a3aSmrg	    OkRange(2, 0x80, 0xbf);
759f2e35a3aSmrg	    OkRange(3, 0x80, 0xbf);
760f2e35a3aSmrg	    value += 4;
761f2e35a3aSmrg	} else if (ch >= 0xf1 && ch <= 0xf3) {
762f2e35a3aSmrg	    OkRange(1, 0x80, 0xbf);
763f2e35a3aSmrg	    OkRange(2, 0x80, 0xbf);
764f2e35a3aSmrg	    OkRange(3, 0x80, 0xbf);
765f2e35a3aSmrg	    value += 4;
766f2e35a3aSmrg	} else if (ch == 0xf4) {
767f2e35a3aSmrg	    OkRange(1, 0x80, 0x8f);
768f2e35a3aSmrg	    OkRange(2, 0x80, 0xbf);
769f2e35a3aSmrg	    OkRange(3, 0x80, 0xbf);
770f2e35a3aSmrg	    value += 4;
771f2e35a3aSmrg	} else {
772f2e35a3aSmrg	    result = False;
773f2e35a3aSmrg	    break;
774f2e35a3aSmrg	}
775f2e35a3aSmrg    }
776f2e35a3aSmrg    return result;
777f2e35a3aSmrg}
778f2e35a3aSmrg
779f2e35a3aSmrgstatic void
780f2e35a3aSmrgtest_utf8_convert(void)
781f2e35a3aSmrg{
782f2e35a3aSmrg    unsigned c_in, c_out;
783f2e35a3aSmrg    Char buffer[10];
784f2e35a3aSmrg    Char *result;
785f2e35a3aSmrg    unsigned limit = 0x110000;
786f2e35a3aSmrg    unsigned success = 0;
787f2e35a3aSmrg    unsigned bucket[256];
788f2e35a3aSmrg
789f2e35a3aSmrg    memset(bucket, 0, sizeof(bucket));
790f2e35a3aSmrg    for (c_in = 0; c_in < limit; ++c_in) {
791f2e35a3aSmrg	memset(buffer, 0, sizeof(buffer));
792f2e35a3aSmrg	if ((result = convertToUTF8(buffer, c_in)) == 0) {
793f2e35a3aSmrg	    TRACE(("conversion of U+%04X to UTF-8 failed\n", c_in));
794f2e35a3aSmrg	} else {
795f2e35a3aSmrg	    if ((result = convertFromUTF8(buffer, &c_out)) == 0) {
796f2e35a3aSmrg		TRACE(("conversion of U+%04X from UTF-8 failed\n", c_in));
797f2e35a3aSmrg	    } else if (c_in != c_out) {
798f2e35a3aSmrg		TRACE(("conversion of U+%04X to/from UTF-8 gave U+%04X\n",
799f2e35a3aSmrg		       c_in, c_out));
800f2e35a3aSmrg	    } else {
801f2e35a3aSmrg		while (result-- != buffer) {
802f2e35a3aSmrg		    bucket[*result]++;
803f2e35a3aSmrg		}
804f2e35a3aSmrg		++success;
805f2e35a3aSmrg	    }
806f2e35a3aSmrg	}
807f2e35a3aSmrg    }
808f2e35a3aSmrg    TRACE(("%u/%u successful\n", success, limit));
809f2e35a3aSmrg    for (c_in = 0; c_in < 256; ++c_in) {
810f2e35a3aSmrg	if ((c_in % 8) == 0) {
811f2e35a3aSmrg	    TRACE((" %02X:", c_in));
812f2e35a3aSmrg	}
813f2e35a3aSmrg	TRACE((" %8X", bucket[c_in]));
814f2e35a3aSmrg	if (((c_in + 1) % 8) == 0) {
815f2e35a3aSmrg	    TRACE(("\n"));
816f2e35a3aSmrg	}
817f2e35a3aSmrg    }
818f2e35a3aSmrg}
819f2e35a3aSmrg
820f2e35a3aSmrgstatic int
821f2e35a3aSmrgdecode_one(const char *source, char **target)
822f2e35a3aSmrg{
823f2e35a3aSmrg    int result = -1;
824f2e35a3aSmrg    long check;
825f2e35a3aSmrg    int radix = 0;
826f2e35a3aSmrg    if ((source[0] == 'u' || source[0] == 'U') && source[1] == '+') {
827f2e35a3aSmrg	source += 2;
828f2e35a3aSmrg	radix = 16;
829f2e35a3aSmrg    } else if (source[0] == '0' && source[1] == 'b') {
830f2e35a3aSmrg	source += 2;
831f2e35a3aSmrg	radix = 2;
832f2e35a3aSmrg    }
833f2e35a3aSmrg    check = strtol(source, target, radix);
834f2e35a3aSmrg    if (*target != NULL && *target != source)
835f2e35a3aSmrg	result = (int) check;
836f2e35a3aSmrg    return result;
837f2e35a3aSmrg}
838f2e35a3aSmrg
839f2e35a3aSmrgstatic int
840f2e35a3aSmrgdecode_range(const char *source, int *lo, int *hi)
841f2e35a3aSmrg{
842f2e35a3aSmrg    int result = 0;
843f2e35a3aSmrg    char *after1;
844f2e35a3aSmrg    char *after2;
845f2e35a3aSmrg    if ((*lo = decode_one(source, &after1)) >= 0) {
846f2e35a3aSmrg	after1 += strspn(after1, ":-.\t ");
847f2e35a3aSmrg	if ((*hi = decode_one(after1, &after2)) < 0) {
848f2e35a3aSmrg	    *hi = *lo;
849f2e35a3aSmrg	}
850f2e35a3aSmrg	result = 1;
851d522f475Smrg    }
852f2e35a3aSmrg    return result;
853d522f475Smrg}
854f2e35a3aSmrg
855f2e35a3aSmrg#define MAX_BYTES 6
856f2e35a3aSmrg
857f2e35a3aSmrgstatic void
858f2e35a3aSmrgdo_range(const char *source)
859f2e35a3aSmrg{
860f2e35a3aSmrg    int lo, hi;
861f2e35a3aSmrg
862f2e35a3aSmrg    TScreen screen;
863f2e35a3aSmrg    memset(&screen, 0, sizeof(screen));
864f2e35a3aSmrg
865f2e35a3aSmrg    if (decode_range(source, &lo, &hi)) {
866f2e35a3aSmrg	while (lo <= hi) {
867f2e35a3aSmrg	    unsigned c_in = (unsigned) lo++;
868f2e35a3aSmrg	    PtyData *data;
869f2e35a3aSmrg	    Char *next;
870f2e35a3aSmrg	    Char buffer[MAX_BYTES + 1];
871f2e35a3aSmrg
872f2e35a3aSmrg	    if (opt_reverse) {
873f2e35a3aSmrg		Bool skip = False;
874f2e35a3aSmrg		Bool first = True;
875f2e35a3aSmrg		int j, k;
876f2e35a3aSmrg		for (j = 0; j < MAX_BYTES; ++j) {
877f2e35a3aSmrg		    unsigned long bits = ((unsigned long) c_in >> (8 * j));
878f2e35a3aSmrg		    if ((buffer[j] = (Char) bits) == 0) {
879f2e35a3aSmrg			skip = (bits != 0);
880f2e35a3aSmrg			break;
881f2e35a3aSmrg		    }
882f2e35a3aSmrg		}
883f2e35a3aSmrg		if (skip)
884f2e35a3aSmrg		    continue;
885f2e35a3aSmrg		initPtyData(&data);
886f2e35a3aSmrg		for (k = 0; k <= j; ++k) {
887f2e35a3aSmrg		    data->buffer[k] = buffer[j - k - 1];
888f2e35a3aSmrg		}
889f2e35a3aSmrg		if (opt_illegal && !is_legal_utf8(data->buffer)) {
890f2e35a3aSmrg		    free(data);
891f2e35a3aSmrg		    continue;
892f2e35a3aSmrg		}
893f2e35a3aSmrg		if (message_level > 1) {
894f2e35a3aSmrg		    printf("TEST ");
895f2e35a3aSmrg		    for (k = 0; k < j; ++k) {
896f2e35a3aSmrg			printf("%02X", data->buffer[k]);
897f2e35a3aSmrg		    }
898f2e35a3aSmrg		}
899f2e35a3aSmrg		data->next = data->buffer;
900f2e35a3aSmrg		data->last = data->buffer + j;
901f2e35a3aSmrg		while (decodeUtf8(&screen, data)) {
902f2e35a3aSmrg		    total_test++;
90304b94745Smrg		    if (is_UCS_SPECIAL(data->utf_data))
904f2e35a3aSmrg			total_errs++;
905f2e35a3aSmrg		    data->next += data->utf_size;
906f2e35a3aSmrg		    if (message_level > 1) {
907f2e35a3aSmrg			printf("%s%04X", first ? " ->" : ", ", data->utf_data);
908f2e35a3aSmrg		    }
909f2e35a3aSmrg		    first = False;
910f2e35a3aSmrg		}
911f2e35a3aSmrg		if (!first)
912f2e35a3aSmrg		    total_test--;
913f2e35a3aSmrg		if (message_level > 1) {
914f2e35a3aSmrg		    printf("\n");
915f2e35a3aSmrg		    fflush(stdout);
916f2e35a3aSmrg		}
917f2e35a3aSmrg		free(data);
918f2e35a3aSmrg	    } else if (opt_convert) {
919f2e35a3aSmrg		unsigned c_out;
920f2e35a3aSmrg		Char *result;
921f2e35a3aSmrg
922f2e35a3aSmrg		memset(buffer, 0, sizeof(buffer));
923f2e35a3aSmrg		if ((result = next = convertToUTF8(buffer, c_in)) == 0) {
924f2e35a3aSmrg		    fprintf(stderr,
925f2e35a3aSmrg			    "conversion of U+%04X to UTF-8 failed\n", c_in);
926f2e35a3aSmrg		} else if ((result = convertFromUTF8(buffer, &c_out)) == 0) {
927f2e35a3aSmrg		    fprintf(stderr,
928f2e35a3aSmrg			    "conversion of U+%04X from UTF-8 failed\n", c_in);
929f2e35a3aSmrg		    total_errs++;
930f2e35a3aSmrg		} else if (c_in != c_out) {
931f2e35a3aSmrg		    fprintf(stderr,
932f2e35a3aSmrg			    "conversion of U+%04X to/from UTF-8 gave U+%04X\n",
933f2e35a3aSmrg			    c_in, c_out);
934f2e35a3aSmrg		} else if (message_level > 1) {
935f2e35a3aSmrg		    *next = '\0';
9365307cd1aSmrg		    printf("TEST %04X (%lu:%s) ->%04X\n", c_in,
9375307cd1aSmrg			   (unsigned long) (next - buffer),
938f2e35a3aSmrg			   buffer,
939f2e35a3aSmrg			   c_out);
940f2e35a3aSmrg		    fflush(stdout);
941f2e35a3aSmrg		}
942f2e35a3aSmrg	    } else {
943f2e35a3aSmrg		initPtyData(&data);
944f2e35a3aSmrg		next = convertToUTF8(data->buffer, c_in);
945f2e35a3aSmrg		*next = 0;
946f2e35a3aSmrg		data->next = data->buffer;
947f2e35a3aSmrg		data->last = next;
948f2e35a3aSmrg		decodeUtf8(&screen, data);
949f2e35a3aSmrg		if (message_level > 1) {
9505307cd1aSmrg		    printf("TEST %04X (%lu:%s) ->%04X\n", c_in,
9515307cd1aSmrg			   (unsigned long) (next - data->buffer),
952f2e35a3aSmrg			   data->buffer,
953f2e35a3aSmrg			   data->utf_data);
954f2e35a3aSmrg		    fflush(stdout);
955f2e35a3aSmrg		}
956f2e35a3aSmrg		if (c_in != data->utf_data) {
957f2e35a3aSmrg		    fprintf(stderr, "Mismatch: %04X vs %04X\n", c_in, data->utf_data);
958f2e35a3aSmrg		    total_errs++;
959f2e35a3aSmrg		}
960f2e35a3aSmrg		free(data);
961f2e35a3aSmrg	    }
962f2e35a3aSmrg	    total_test++;
963f2e35a3aSmrg	}
964f2e35a3aSmrg    }
965f2e35a3aSmrg}
966f2e35a3aSmrg
967f2e35a3aSmrgint
968f2e35a3aSmrgmain(int argc, char **argv)
969f2e35a3aSmrg{
970f2e35a3aSmrg    int ch;
971f2e35a3aSmrg
972f2e35a3aSmrg    setlocale(LC_ALL, "");
973f2e35a3aSmrg    while ((ch = getopt(argc, argv, "aciqrv")) != -1) {
974f2e35a3aSmrg	switch (ch) {
975f2e35a3aSmrg	case 'a':
976f2e35a3aSmrg	    opt_all = 1;
977f2e35a3aSmrg	    break;
978f2e35a3aSmrg	case 'c':
979f2e35a3aSmrg	    opt_convert = 1;
980f2e35a3aSmrg	    break;
981f2e35a3aSmrg	case 'i':
982f2e35a3aSmrg	    opt_illegal = 1;
983f2e35a3aSmrg	    break;
984f2e35a3aSmrg	case 'q':
985f2e35a3aSmrg	    message_level--;
986f2e35a3aSmrg	    break;
987f2e35a3aSmrg	case 'r':
988f2e35a3aSmrg	    opt_reverse = 1;
989f2e35a3aSmrg	    break;
990f2e35a3aSmrg	case 'v':
991f2e35a3aSmrg	    message_level++;
992f2e35a3aSmrg	    break;
993f2e35a3aSmrg	default:
994f2e35a3aSmrg	    usage();
995f2e35a3aSmrg	}
996f2e35a3aSmrg    }
997f2e35a3aSmrg    if (opt_all) {
998f2e35a3aSmrg	test_utf8_convert();
999f2e35a3aSmrg    } else {
1000f2e35a3aSmrg	if (optind >= argc)
1001f2e35a3aSmrg	    usage();
1002f2e35a3aSmrg	while (optind < argc) {
1003f2e35a3aSmrg	    do_range(argv[optind++]);
1004f2e35a3aSmrg	}
1005f2e35a3aSmrg	if (total_test) {
1006f2e35a3aSmrg	    printf("%ld/%ld mismatches (%.0f%%)\n",
1007f2e35a3aSmrg		   total_errs,
1008f2e35a3aSmrg		   total_test,
1009f2e35a3aSmrg		   (100.0 * (double) total_errs) / (double) total_test);
1010f2e35a3aSmrg	}
1011f2e35a3aSmrg    }
1012f2e35a3aSmrg    return EXIT_SUCCESS;
1013f2e35a3aSmrg}
1014f2e35a3aSmrg#else
1015f2e35a3aSmrgint
1016f2e35a3aSmrgmain(int argc, char **argv)
1017f2e35a3aSmrg{
1018f2e35a3aSmrg    (void) argc;
1019f2e35a3aSmrg    (void) argv;
1020f2e35a3aSmrg    printf("Nothing to be done here...\n");
1021f2e35a3aSmrg    return EXIT_SUCCESS;
1022f2e35a3aSmrg}
1023f2e35a3aSmrg#endif /* OPT_WIDE_CHARS */
1024d522f475Smrg#endif
1025