ptydata.c revision 5307cd1a
1/* $XTermId: ptydata.c,v 1.157 2022/10/06 21:55:29 tom Exp $ */
2
3/*
4 * Copyright 1999-2020,2022 by Thomas E. Dickey
5 *
6 *                         All Rights Reserved
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sublicense, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
23 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 * Except as contained in this notice, the name(s) of the above copyright
28 * holders shall not be used in advertising or otherwise to promote the
29 * sale, use or other dealings in this Software without prior written
30 * authorization.
31 */
32
33#include <data.h>
34
35#if OPT_WIDE_CHARS
36#include <menu.h>
37#include <wcwidth.h>
38#endif
39
40#ifdef TEST_DRIVER
41#undef TRACE
42#define TRACE(p) if (1) printf p
43#undef TRACE2
44#define TRACE2(p) if (0) printf p
45#define visibleChars(buf, len) "buffer"
46#endif
47
48/*
49 * Check for both EAGAIN and EWOULDBLOCK, because some supposedly POSIX
50 * systems are broken and return EWOULDBLOCK when they should return EAGAIN.
51 * Note that this macro may evaluate its argument more than once.
52 */
53#if defined(EAGAIN) && defined(EWOULDBLOCK)
54#define E_TEST(err) ((err) == EAGAIN || (err) == EWOULDBLOCK)
55#else
56#ifdef EAGAIN
57#define E_TEST(err) ((err) == EAGAIN)
58#else
59#define E_TEST(err) ((err) == EWOULDBLOCK)
60#endif
61#endif
62
63#if OPT_WIDE_CHARS
64/*
65 * Convert the 8-bit codes in data->buffer[] into Unicode in data->utf_data.
66 * The number of bytes converted will be nonzero iff there is data.
67 */
68Bool
69decodeUtf8(TScreen *screen, PtyData *data)
70{
71    size_t i;
72    size_t length = (size_t) (data->last - data->next);
73    int utf_count = 0;
74    unsigned utf_char = 0;
75
76    data->utf_size = 0;
77    for (i = 0; i < length; i++) {
78	unsigned c = data->next[i];
79
80	/* Combine UTF-8 into Unicode */
81	if (c < 0x80) {
82	    /* We received an ASCII character */
83	    if (utf_count > 0) {
84		data->utf_data = UCS_REPL;	/* prev. sequence incomplete */
85		data->utf_size = i;
86	    } else {
87		data->utf_data = (IChar) c;
88		data->utf_size = 1;
89	    }
90	    break;
91	} else if (screen->vt100_graphics
92		   && (c < 0x100)
93		   && (utf_count == 0)
94		   && screen->gsets[(int) screen->curgr] != nrc_ASCII) {
95	    data->utf_data = (IChar) c;
96	    data->utf_size = 1;
97	    break;
98	} else if (c < 0xc0) {
99	    /* We received a continuation byte */
100	    if (utf_count < 1) {
101		/*
102		 * We received a continuation byte before receiving a sequence
103		 * state.  Or an attempt to use a C1 control string.  Either
104		 * way, it is mapped to the replacement character, unless
105		 * allowed by optional feature.
106		 */
107		data->utf_data = (IChar) (screen->c1_printable ? c : UCS_REPL);
108		data->utf_size = (i + 1);
109		break;
110	    } else if (screen->utf8_weblike
111		       && (utf_count == 3
112			   && utf_char == 0x04
113			   && c >= 0x90)) {
114		/* The encoding would form a code point beyond U+10FFFF. */
115		data->utf_size = i;
116		data->utf_data = UCS_REPL;
117		break;
118	    } else if (screen->utf8_weblike
119		       && (utf_count == 2
120			   && utf_char == 0x0d
121			   && c >= 0xa0)) {
122		/* The encoding would form a surrogate code point. */
123		data->utf_size = i;
124		data->utf_data = UCS_REPL;
125		break;
126	    } else {
127		/* Check for overlong UTF-8 sequences for which a shorter
128		 * encoding would exist and replace them with UCS_REPL.
129		 * An overlong UTF-8 sequence can have any of the following
130		 * forms:
131		 *   1100000x 10xxxxxx
132		 *   11100000 100xxxxx 10xxxxxx
133		 *   11110000 1000xxxx 10xxxxxx 10xxxxxx
134		 *   11111000 10000xxx 10xxxxxx 10xxxxxx 10xxxxxx
135		 *   11111100 100000xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
136		 */
137		if (!utf_char && !((c & 0x7f) >> (7 - utf_count))) {
138		    if (screen->utf8_weblike) {
139			/* overlong sequence continued */
140			data->utf_data = UCS_REPL;
141			data->utf_size = i;
142			break;
143		    } else {
144			utf_char = UCS_REPL;
145		    }
146		}
147		utf_char <<= 6;
148		utf_char |= (c & 0x3f);
149		if ((utf_char >= 0xd800 &&
150		     utf_char <= 0xdfff) ||
151		    (utf_char == 0xfffe) ||
152		    (utf_char == HIDDEN_CHAR)) {
153		    utf_char = UCS_REPL;
154		}
155		utf_count--;
156		if (utf_count == 0) {
157#if !OPT_WIDER_ICHAR
158		    /* characters outside UCS-2 become UCS_REPL */
159		    if (utf_char > NARROW_ICHAR) {
160			TRACE(("using replacement for %#x\n", utf_char));
161			utf_char = UCS_REPL;
162		    }
163#endif
164		    data->utf_data = (IChar) utf_char;
165		    data->utf_size = (i + 1);
166		    break;
167		}
168	    }
169	} else {
170	    /* We received a sequence start byte */
171	    if (utf_count > 0) {
172		/* previous sequence is incomplete */
173		data->utf_data = UCS_REPL;
174		data->utf_size = i;
175		break;
176	    }
177	    if (screen->utf8_weblike) {
178		if (c < 0xe0) {
179		    if (!(c & 0x1e)) {
180			/* overlong sequence start */
181			data->utf_data = UCS_REPL;
182			data->utf_size = (i + 1);
183			break;
184		    }
185		    utf_count = 1;
186		    utf_char = (c & 0x1f);
187		} else if (c < 0xf0) {
188		    utf_count = 2;
189		    utf_char = (c & 0x0f);
190		} else if (c < 0xf5) {
191		    utf_count = 3;
192		    utf_char = (c & 0x07);
193		} else {
194		    data->utf_data = UCS_REPL;
195		    data->utf_size = (i + 1);
196		    break;
197		}
198	    } else {
199		if (c < 0xe0) {
200		    utf_count = 1;
201		    utf_char = (c & 0x1f);
202		    if (!(c & 0x1e)) {
203			/* overlong sequence */
204			utf_char = UCS_REPL;
205		    }
206		} else if (c < 0xf0) {
207		    utf_count = 2;
208		    utf_char = (c & 0x0f);
209		} else if (c < 0xf8) {
210		    utf_count = 3;
211		    utf_char = (c & 0x07);
212		} else if (c < 0xfc) {
213		    utf_count = 4;
214		    utf_char = (c & 0x03);
215		} else if (c < 0xfe) {
216		    utf_count = 5;
217		    utf_char = (c & 0x01);
218		} else {
219		    data->utf_data = UCS_REPL;
220		    data->utf_size = (i + 1);
221		    break;
222		}
223	    }
224	}
225    }
226#if OPT_TRACE > 1
227    TRACE(("UTF-8 char %04X [%d..%d]\n",
228	   data->utf_data,
229	   (size_t) (data->next - data->buffer),
230	   (size_t) (data->next - data->buffer + data->utf_size - 1)));
231#endif
232
233    return (data->utf_size != 0);
234}
235#endif
236
237int
238readPtyData(XtermWidget xw, PtySelect * select_mask, PtyData *data)
239{
240    TScreen *screen = TScreenOf(xw);
241    int size = 0;
242
243#ifdef VMS
244    if (*select_mask & pty_mask) {
245	trimPtyData(xw, data);
246	if (read_queue.flink != 0) {
247	    size = tt_read(data->next);
248	    if (size == 0) {
249		Panic("input: read returned zero\n", 0);
250	    }
251	} else {
252	    sys$hiber();
253	}
254    }
255#else /* !VMS */
256    if (FD_ISSET(screen->respond, select_mask)) {
257	int save_err;
258	trimPtyData(xw, data);
259
260	size = (int) read(screen->respond, (char *) data->last, (size_t) FRG_SIZE);
261	save_err = errno;
262#if (defined(i386) && defined(SVR4) && defined(sun)) || defined(__CYGWIN__)
263	/*
264	 * Yes, I know this is a majorly f*ugly hack, however it seems to
265	 * be necessary for Solaris x86.  DWH 11/15/94
266	 * Dunno why though..
267	 * (and now CYGWIN, alanh@xfree86.org 08/15/01
268	 */
269	if (size <= 0) {
270	    if (save_err == EIO || save_err == 0)
271		NormalExit();
272	    else if (!E_TEST(save_err))
273		Panic("input: read returned unexpected error (%d)\n", save_err);
274	    size = 0;
275	}
276#else /* !f*ugly */
277	if (size < 0) {
278	    if (save_err == EIO)
279		NormalExit();
280	    else if (!E_TEST(save_err))
281		Panic("input: read returned unexpected error (%d)\n", save_err);
282	    size = 0;
283	} else if (size == 0) {
284#if defined(__FreeBSD__)
285	    NormalExit();
286#else
287	    Panic("input: read returned zero\n", 0);
288#endif
289	}
290#endif /* f*ugly */
291    }
292#endif /* VMS */
293
294    if (size) {
295#if OPT_TRACE
296	int i;
297
298	TRACE(("read %d bytes from pty\n", size));
299	for (i = 0; i < size; i++) {
300	    if (!(i % 16))
301		TRACE(("%s", i ? "\n    " : "READ"));
302	    TRACE((" %02X", data->last[i]));
303	}
304	TRACE(("\n"));
305#endif
306	data->last += size;
307#ifdef ALLOWLOGGING
308	TScreenOf(term)->logstart = VTbuffer->next;
309#endif
310    }
311
312    return (size);
313}
314
315/*
316 * Return the next value from the input buffer.  Note that morePtyData() is
317 * always called before this function, so we can do the UTF-8 input conversion
318 * in that function and simply return the result here.
319 */
320#if OPT_WIDE_CHARS
321IChar
322nextPtyData(TScreen *screen, PtyData *data)
323{
324    IChar result;
325    if (screen->utf8_inparse) {
326	skipPtyData(data, result);
327    } else {
328	result = *((data)->next++);
329	if (!screen->output_eight_bits) {
330	    result = (IChar) (result & 0x7f);
331	}
332    }
333    TRACE2(("nextPtyData returns %#x\n", result));
334    return result;
335}
336#endif
337
338#if OPT_WIDE_CHARS
339/*
340 * Called when UTF-8 mode has been turned on/off.
341 */
342void
343switchPtyData(TScreen *screen, int flag)
344{
345    if (screen->utf8_mode != flag) {
346	screen->utf8_mode = flag;
347	screen->utf8_inparse = (Boolean) (flag != 0);
348	mk_wcwidth_init(screen->utf8_mode);
349
350	TRACE(("turning UTF-8 mode %s\n", BtoS(flag)));
351	update_font_utf8_mode();
352    }
353}
354#endif
355
356/*
357 * Allocate a buffer.
358 */
359void
360initPtyData(PtyData **result)
361{
362    PtyData *data;
363
364    TRACE2(("initPtyData given minBufSize %d, maxBufSize %d\n",
365	    FRG_SIZE, BUF_SIZE));
366
367    if (FRG_SIZE < 64)
368	FRG_SIZE = 64;
369    if (BUF_SIZE < FRG_SIZE)
370	BUF_SIZE = FRG_SIZE;
371    if (BUF_SIZE % FRG_SIZE)
372	BUF_SIZE = BUF_SIZE + FRG_SIZE - (BUF_SIZE % FRG_SIZE);
373
374    TRACE2(("initPtyData using minBufSize %d, maxBufSize %d\n",
375	    FRG_SIZE, BUF_SIZE));
376
377    data = TypeXtMallocX(PtyData, (BUF_SIZE + FRG_SIZE));
378
379    memset(data, 0, sizeof(*data));
380    data->next = data->buffer;
381    data->last = data->buffer;
382    *result = data;
383}
384
385/*
386 * Initialize a buffer for the caller, using its data in 'next'.
387 */
388#if OPT_WIDE_CHARS
389PtyData *
390fakePtyData(PtyData *result, Char *next, Char *last)
391{
392    PtyData *data = result;
393
394    memset(data, 0, sizeof(*data));
395    data->next = next;
396    data->last = last;
397
398    return data;
399}
400#endif
401
402/*
403 * Remove used data by shifting the buffer down, to make room for more data,
404 * e.g., a continuation-read.
405 */
406void
407trimPtyData(XtermWidget xw, PtyData *data)
408{
409    (void) xw;
410    FlushLog(xw);
411
412    if (data->next != data->buffer) {
413	size_t i;
414	size_t n = (size_t) (data->last - data->next);
415
416	TRACE(("shifting buffer down by %lu\n", (unsigned long) n));
417	for (i = 0; i < n; ++i) {
418	    data->buffer[i] = data->next[i];
419	}
420	data->next = data->buffer;
421	data->last = data->next + n;
422    }
423
424}
425
426/*
427 * Insert new data into the input buffer so the next calls to morePtyData()
428 * and nextPtyData() will return that.
429 */
430void
431fillPtyData(XtermWidget xw, PtyData *data, const char *value, size_t length)
432{
433    size_t size;
434    size_t n;
435
436    /* remove the used portion of the buffer */
437    trimPtyData(xw, data);
438
439    VTbuffer->last += length;
440    size = (size_t) (VTbuffer->last - VTbuffer->next);
441
442    /* shift the unused portion up to make room */
443    for (n = size; n >= length; --n)
444	VTbuffer->next[n] = VTbuffer->next[n - length];
445
446    /* insert the new bytes to interpret */
447    for (n = 0; n < length; n++)
448	VTbuffer->next[n] = CharOf(value[n]);
449}
450
451#if OPT_WIDE_CHARS
452/*
453 * Convert an ISO-8859-1 code 'c' to UTF-8, storing the result in the target
454 * 'lp', and returning a pointer past the converted character.
455 */
456Char *
457convertToUTF8(Char *lp, unsigned c)
458{
459#define CH(n) (Char)((c) >> ((n) * 8))
460    if (c < 0x80) {
461	/*  0*******  */
462	*lp++ = (Char) CH(0);
463    } else if (c < 0x800) {
464	/*  110***** 10******  */
465	*lp++ = (Char) (0xc0 | (CH(0) >> 6) | ((CH(1) & 0x07) << 2));
466	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
467    } else if (c < 0x00010000) {
468	/*  1110**** 10****** 10******  */
469	*lp++ = (Char) (0xe0 | ((int) (CH(1) & 0xf0) >> 4));
470	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
471	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
472    } else if (c < 0x00200000) {
473	*lp++ = (Char) (0xf0 | ((int) (CH(2) & 0x1f) >> 2));
474	*lp++ = (Char) (0x80 |
475			((int) (CH(1) & 0xf0) >> 4) |
476			((int) (CH(2) & 0x03) << 4));
477	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
478	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
479    } else if (c < 0x04000000) {
480	*lp++ = (Char) (0xf8 | (CH(3) & 0x03));
481	*lp++ = (Char) (0x80 | (CH(2) >> 2));
482	*lp++ = (Char) (0x80 |
483			((int) (CH(1) & 0xf0) >> 4) |
484			((int) (CH(2) & 0x03) << 4));
485	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
486	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
487    } else {
488	*lp++ = (Char) (0xfc | ((int) (CH(3) & 0x40) >> 6));
489	*lp++ = (Char) (0x80 | (CH(3) & 0x3f));
490	*lp++ = (Char) (0x80 | (CH(2) >> 2));
491	*lp++ = (Char) (0x80 | (CH(1) >> 4) | ((CH(2) & 0x03) << 4));
492	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
493	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
494    }
495    return lp;
496#undef CH
497}
498
499/*
500 * Convert a UTF-8 multibyte character to an Unicode value, returning a pointer
501 * past the converted UTF-8 input.  The first 256 values align with ISO-8859-1,
502 * making it possible to use this to convert to Latin-1.
503 *
504 * If the conversion fails, return null.
505 */
506Char *
507convertFromUTF8(Char *lp, unsigned *cp)
508{
509    int want;
510
511    /*
512     * Find the number of bytes we will need from the source.
513     */
514    if ((*lp & 0x80) == 0) {
515	want = 1;
516    } else if ((*lp & 0xe0) == 0xc0) {
517	want = 2;
518    } else if ((*lp & 0xf0) == 0xe0) {
519	want = 3;
520    } else if ((*lp & 0xf8) == 0xf0) {
521	want = 4;
522    } else if ((*lp & 0xfc) == 0xf8) {
523	want = 5;
524    } else if ((*lp & 0xfe) == 0xfc) {
525	want = 6;
526    } else {
527	want = 0;
528    }
529
530    if (want) {
531	int have = 1;
532
533	while (lp[have] != '\0') {
534	    if ((lp[have] & 0xc0) != 0x80)
535		break;
536	    ++have;
537	}
538	if (want == have) {
539	    unsigned mask = 0;
540	    int j;
541	    int shift = 0;
542
543	    *cp = 0;
544	    switch (want) {
545	    case 1:
546		mask = (*lp);
547		break;
548	    case 2:
549		mask = (*lp & 0x1f);
550		break;
551	    case 3:
552		mask = (*lp & 0x0f);
553		break;
554	    case 4:
555		mask = (*lp & 0x07);
556		break;
557	    case 5:
558		mask = (*lp & 0x03);
559		break;
560	    case 6:
561		mask = (*lp & 0x01);
562		break;
563	    default:
564		mask = 0;
565		break;
566	    }
567
568	    for (j = 1; j < want; j++) {
569		*cp |= (unsigned) ((lp[want - j] & 0x3f) << shift);
570		shift += 6;
571	    }
572	    *cp |= mask << shift;
573	    lp += want;
574	} else {
575	    *cp = BAD_ASCII;
576	    lp = NULL;
577	}
578    } else {
579	*cp = BAD_ASCII;
580	lp = NULL;
581    }
582    return lp;
583}
584
585/*
586 * Returns true if the entire string is valid UTF-8.
587 */
588Boolean
589isValidUTF8(Char *lp)
590{
591    Boolean result = True;
592    while (*lp) {
593	unsigned ch;
594	Char *next = convertFromUTF8(lp, &ch);
595	if (next == NULL || ch == 0) {
596	    result = False;
597	    break;
598	}
599	lp = next;
600    }
601    return result;
602}
603
604/*
605 * Write data back to the PTY
606 */
607void
608writePtyData(int f, IChar *d, size_t len)
609{
610    size_t n = (len << 1);
611
612    if (VTbuffer->write_len <= len) {
613	VTbuffer->write_len = n;
614	VTbuffer->write_buf = realloc(VTbuffer->write_buf, VTbuffer->write_len);
615    }
616
617    for (n = 0; n < len; n++)
618	VTbuffer->write_buf[n] = (Char) d[n];
619
620    TRACE(("writePtyData %lu:%s\n", (unsigned long) n,
621	   visibleChars(VTbuffer->write_buf, n)));
622    v_write(f, VTbuffer->write_buf, n);
623}
624#endif /* OPT_WIDE_CHARS */
625
626#ifdef NO_LEAKS
627void
628noleaks_ptydata(void)
629{
630    if (VTbuffer != 0) {
631#if OPT_WIDE_CHARS
632	free(VTbuffer->write_buf);
633#endif
634	FreeAndNull(VTbuffer);
635    }
636}
637#endif
638
639#ifdef TEST_DRIVER
640
641#include "data.c"
642
643void
644NormalExit(void)
645{
646    fprintf(stderr, "NormalExit!\n");
647    exit(EXIT_SUCCESS);
648}
649
650void
651Panic(const char *s, int a)
652{
653    (void) s;
654    (void) a;
655    fprintf(stderr, "Panic!\n");
656    exit(EXIT_FAILURE);
657}
658
659#if OPT_WIDE_CHARS
660
661#ifdef ALLOWLOGGING
662void
663FlushLog(XtermWidget xw)
664{
665    (void) xw;
666}
667#endif
668
669void
670v_write(int f, const Char *data, size_t len)
671{
672    (void) f;
673    (void) data;
674    (void) len;
675}
676
677void
678mk_wcwidth_init(int mode)
679{
680    (void) mode;
681}
682
683void
684update_font_utf8_mode(void)
685{
686}
687
688static int message_level = 0;
689static int opt_all = 0;
690static int opt_illegal = 0;
691static int opt_convert = 0;
692static int opt_reverse = 0;
693static long total_test = 0;
694static long total_errs = 0;
695
696static void
697usage(void)
698{
699    static const char *msg[] =
700    {
701	"Usage: test_ptydata [options] [c1[-c1b] [c2-[c2b] [...]]]",
702	"",
703	"Options:",
704	" -a  exercise all legal encode/decode to/from UTF-8",
705	" -c  call convertFromUTF8 rather than decodeUTF8",
706	" -i  ignore illegal UTF-8 when testing -r option",
707	" -q  quieter",
708	" -r  reverse/decode from UTF-8 byte-string to/from Unicode",
709	" -v  more verbose"
710    };
711    size_t n;
712    for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) {
713	fprintf(stderr, "%s\n", msg[n]);
714    }
715    exit(EXIT_FAILURE);
716}
717
718/*
719 * http://www.unicode.org/versions/corrigendum1.html, table 3.1B
720 */
721#define OkRange(n,lo,hi) \
722 	if (value[n] < lo || value[n] > hi) { \
723	    result = False; \
724	    break; \
725	}
726static Bool
727is_legal_utf8(const Char *value)
728{
729    Bool result = True;
730    Char ch;
731    while ((ch = *value) != '\0') {
732	if (ch <= 0x7f) {
733	    ++value;
734	} else if (ch >= 0xc2 && ch <= 0xdf) {
735	    OkRange(1, 0x80, 0xbf);
736	    value += 2;
737	} else if (ch == 0xe0) {
738	    OkRange(1, 0xa0, 0xbf);
739	    OkRange(2, 0x80, 0xbf);
740	    value += 3;
741	} else if (ch >= 0xe1 && ch <= 0xef) {
742	    OkRange(1, 0x80, 0xbf);
743	    OkRange(2, 0x80, 0xbf);
744	    value += 3;
745	} else if (ch == 0xf0) {
746	    OkRange(1, 0x90, 0xbf);
747	    OkRange(2, 0x80, 0xbf);
748	    OkRange(3, 0x80, 0xbf);
749	    value += 4;
750	} else if (ch >= 0xf1 && ch <= 0xf3) {
751	    OkRange(1, 0x80, 0xbf);
752	    OkRange(2, 0x80, 0xbf);
753	    OkRange(3, 0x80, 0xbf);
754	    value += 4;
755	} else if (ch == 0xf4) {
756	    OkRange(1, 0x80, 0x8f);
757	    OkRange(2, 0x80, 0xbf);
758	    OkRange(3, 0x80, 0xbf);
759	    value += 4;
760	} else {
761	    result = False;
762	    break;
763	}
764    }
765    return result;
766}
767
768static void
769test_utf8_convert(void)
770{
771    unsigned c_in, c_out;
772    Char buffer[10];
773    Char *result;
774    unsigned limit = 0x110000;
775    unsigned success = 0;
776    unsigned bucket[256];
777
778    memset(bucket, 0, sizeof(bucket));
779    for (c_in = 0; c_in < limit; ++c_in) {
780	memset(buffer, 0, sizeof(buffer));
781	if ((result = convertToUTF8(buffer, c_in)) == 0) {
782	    TRACE(("conversion of U+%04X to UTF-8 failed\n", c_in));
783	} else {
784	    if ((result = convertFromUTF8(buffer, &c_out)) == 0) {
785		TRACE(("conversion of U+%04X from UTF-8 failed\n", c_in));
786	    } else if (c_in != c_out) {
787		TRACE(("conversion of U+%04X to/from UTF-8 gave U+%04X\n",
788		       c_in, c_out));
789	    } else {
790		while (result-- != buffer) {
791		    bucket[*result]++;
792		}
793		++success;
794	    }
795	}
796    }
797    TRACE(("%u/%u successful\n", success, limit));
798    for (c_in = 0; c_in < 256; ++c_in) {
799	if ((c_in % 8) == 0) {
800	    TRACE((" %02X:", c_in));
801	}
802	TRACE((" %8X", bucket[c_in]));
803	if (((c_in + 1) % 8) == 0) {
804	    TRACE(("\n"));
805	}
806    }
807}
808
809static int
810decode_one(const char *source, char **target)
811{
812    int result = -1;
813    long check;
814    int radix = 0;
815    if ((source[0] == 'u' || source[0] == 'U') && source[1] == '+') {
816	source += 2;
817	radix = 16;
818    } else if (source[0] == '0' && source[1] == 'b') {
819	source += 2;
820	radix = 2;
821    }
822    check = strtol(source, target, radix);
823    if (*target != NULL && *target != source)
824	result = (int) check;
825    return result;
826}
827
828static int
829decode_range(const char *source, int *lo, int *hi)
830{
831    int result = 0;
832    char *after1;
833    char *after2;
834    if ((*lo = decode_one(source, &after1)) >= 0) {
835	after1 += strspn(after1, ":-.\t ");
836	if ((*hi = decode_one(after1, &after2)) < 0) {
837	    *hi = *lo;
838	}
839	result = 1;
840    }
841    return result;
842}
843
844#define MAX_BYTES 6
845
846static void
847do_range(const char *source)
848{
849    int lo, hi;
850
851    TScreen screen;
852    memset(&screen, 0, sizeof(screen));
853
854    if (decode_range(source, &lo, &hi)) {
855	while (lo <= hi) {
856	    unsigned c_in = (unsigned) lo++;
857	    PtyData *data;
858	    Char *next;
859	    Char buffer[MAX_BYTES + 1];
860
861	    if (opt_reverse) {
862		Bool skip = False;
863		Bool first = True;
864		int j, k;
865		for (j = 0; j < MAX_BYTES; ++j) {
866		    unsigned long bits = ((unsigned long) c_in >> (8 * j));
867		    if ((buffer[j] = (Char) bits) == 0) {
868			skip = (bits != 0);
869			break;
870		    }
871		}
872		if (skip)
873		    continue;
874		initPtyData(&data);
875		for (k = 0; k <= j; ++k) {
876		    data->buffer[k] = buffer[j - k - 1];
877		}
878		if (opt_illegal && !is_legal_utf8(data->buffer)) {
879		    free(data);
880		    continue;
881		}
882		if (message_level > 1) {
883		    printf("TEST ");
884		    for (k = 0; k < j; ++k) {
885			printf("%02X", data->buffer[k]);
886		    }
887		}
888		data->next = data->buffer;
889		data->last = data->buffer + j;
890		while (decodeUtf8(&screen, data)) {
891		    total_test++;
892		    if (data->utf_data == UCS_REPL)
893			total_errs++;
894		    data->next += data->utf_size;
895		    if (message_level > 1) {
896			printf("%s%04X", first ? " ->" : ", ", data->utf_data);
897		    }
898		    first = False;
899		}
900		if (!first)
901		    total_test--;
902		if (message_level > 1) {
903		    printf("\n");
904		    fflush(stdout);
905		}
906		free(data);
907	    } else if (opt_convert) {
908		unsigned c_out;
909		Char *result;
910
911		memset(buffer, 0, sizeof(buffer));
912		if ((result = next = convertToUTF8(buffer, c_in)) == 0) {
913		    fprintf(stderr,
914			    "conversion of U+%04X to UTF-8 failed\n", c_in);
915		} else if ((result = convertFromUTF8(buffer, &c_out)) == 0) {
916		    fprintf(stderr,
917			    "conversion of U+%04X from UTF-8 failed\n", c_in);
918		    total_errs++;
919		} else if (c_in != c_out) {
920		    fprintf(stderr,
921			    "conversion of U+%04X to/from UTF-8 gave U+%04X\n",
922			    c_in, c_out);
923		} else if (message_level > 1) {
924		    *next = '\0';
925		    printf("TEST %04X (%lu:%s) ->%04X\n", c_in,
926			   (unsigned long) (next - buffer),
927			   buffer,
928			   c_out);
929		    fflush(stdout);
930		}
931	    } else {
932		initPtyData(&data);
933		next = convertToUTF8(data->buffer, c_in);
934		*next = 0;
935		data->next = data->buffer;
936		data->last = next;
937		decodeUtf8(&screen, data);
938		if (message_level > 1) {
939		    printf("TEST %04X (%lu:%s) ->%04X\n", c_in,
940			   (unsigned long) (next - data->buffer),
941			   data->buffer,
942			   data->utf_data);
943		    fflush(stdout);
944		}
945		if (c_in != data->utf_data) {
946		    fprintf(stderr, "Mismatch: %04X vs %04X\n", c_in, data->utf_data);
947		    total_errs++;
948		}
949		free(data);
950	    }
951	    total_test++;
952	}
953    }
954}
955
956int
957main(int argc, char **argv)
958{
959    int ch;
960
961    setlocale(LC_ALL, "");
962    while ((ch = getopt(argc, argv, "aciqrv")) != -1) {
963	switch (ch) {
964	case 'a':
965	    opt_all = 1;
966	    break;
967	case 'c':
968	    opt_convert = 1;
969	    break;
970	case 'i':
971	    opt_illegal = 1;
972	    break;
973	case 'q':
974	    message_level--;
975	    break;
976	case 'r':
977	    opt_reverse = 1;
978	    break;
979	case 'v':
980	    message_level++;
981	    break;
982	default:
983	    usage();
984	}
985    }
986    if (opt_all) {
987	test_utf8_convert();
988    } else {
989	if (optind >= argc)
990	    usage();
991	while (optind < argc) {
992	    do_range(argv[optind++]);
993	}
994	if (total_test) {
995	    printf("%ld/%ld mismatches (%.0f%%)\n",
996		   total_errs,
997		   total_test,
998		   (100.0 * (double) total_errs) / (double) total_test);
999	}
1000    }
1001    return EXIT_SUCCESS;
1002}
1003#else
1004int
1005main(int argc, char **argv)
1006{
1007    (void) argc;
1008    (void) argv;
1009    printf("Nothing to be done here...\n");
1010    return EXIT_SUCCESS;
1011}
1012#endif /* OPT_WIDE_CHARS */
1013#endif
1014