Home | History | Annotate | Line # | Download | only in dist
      1 /* $XTermId: ptydata.c,v 1.163 2024/12/01 23:48:07 tom Exp $ */
      2 
      3 /*
      4  * Copyright 1999-2023,2024 by Thomas E. Dickey
      5  *
      6  *                         All Rights Reserved
      7  *
      8  * Permission is hereby granted, free of charge, to any person obtaining a
      9  * copy of this software and associated documentation files (the
     10  * "Software"), to deal in the Software without restriction, including
     11  * without limitation the rights to use, copy, modify, merge, publish,
     12  * distribute, sublicense, and/or sell copies of the Software, and to
     13  * permit persons to whom the Software is furnished to do so, subject to
     14  * the following conditions:
     15  *
     16  * The above copyright notice and this permission notice shall be included
     17  * in all copies or substantial portions of the Software.
     18  *
     19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     22  * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
     23  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     26  *
     27  * Except as contained in this notice, the name(s) of the above copyright
     28  * holders shall not be used in advertising or otherwise to promote the
     29  * sale, use or other dealings in this Software without prior written
     30  * authorization.
     31  */
     32 
     33 #include <data.h>
     34 
     35 #if OPT_WIDE_CHARS
     36 #include <menu.h>
     37 #include <wcwidth.h>
     38 #endif
     39 
     40 #ifdef TEST_DRIVER
     41 #undef TRACE
     42 #define TRACE(p) if (1) printf p
     43 #undef TRACE2
     44 #define TRACE2(p) if (0) printf p
     45 #define visibleChars(buf, len) "buffer"
     46 #endif
     47 
     48 /*
     49  * Check for both EAGAIN and EWOULDBLOCK, because some supposedly POSIX
     50  * systems are broken and return EWOULDBLOCK when they should return EAGAIN.
     51  * Note that this macro may evaluate its argument more than once.
     52  */
     53 #if defined(EAGAIN) && defined(EWOULDBLOCK)
     54 #define E_TEST(err) ((err) == EAGAIN || (err) == EWOULDBLOCK)
     55 #else
     56 #ifdef EAGAIN
     57 #define E_TEST(err) ((err) == EAGAIN)
     58 #else
     59 #define E_TEST(err) ((err) == EWOULDBLOCK)
     60 #endif
     61 #endif
     62 
     63 #if OPT_WIDE_CHARS
     64 /*
     65  * Convert the 8-bit codes in data->buffer[] into Unicode in data->utf_data.
     66  * The number of bytes converted will be nonzero iff there is data.
     67  */
     68 Bool
     69 decodeUtf8(TScreen *screen, PtyData *data)
     70 {
     71     size_t i;
     72     size_t length = (size_t) (data->last - data->next);
     73     int utf_count = 0;
     74     unsigned utf_char = 0;
     75 
     76     data->utf_size = 0;
     77     for (i = 0; i < length; i++) {
     78 	unsigned c = data->next[i];
     79 
     80 	/* Combine UTF-8 into Unicode */
     81 	if (c < 0x80) {
     82 	    /* We received an ASCII character */
     83 	    if (utf_count > 0) {
     84 		data->utf_data = UCS_REPL;	/* prev. sequence incomplete */
     85 		data->utf_size = i;
     86 	    } else {
     87 		data->utf_data = (IChar) c;
     88 		data->utf_size = 1;
     89 	    }
     90 	    break;
     91 	} else if (screen->vt100_graphics
     92 		   && (c < 0x100)
     93 		   && (utf_count == 0)
     94 		   && screen->gsets[(int) screen->curgr] != nrc_ASCII) {
     95 	    data->utf_data = (IChar) c;
     96 	    data->utf_size = 1;
     97 	    break;
     98 	} else if (c < 0xc0) {
     99 	    /* We received a continuation byte */
    100 	    if (utf_count < 1) {
    101 		if (screen->c1_printable) {
    102 		    data->utf_data = (IChar) c;
    103 		} else if ((i + 1) < length
    104 			   && data->next[i + 1] > 0x20
    105 			   && data->next[i + 1] < 0x80) {
    106 		    /*
    107 		     * Allow for C1 control string if the next byte is
    108 		     * available for inspection.
    109 		     */
    110 		    data->utf_data = (IChar) c;
    111 		} else {
    112 		    /*
    113 		     * We received a continuation byte before receiving a
    114 		     * sequence state, or a failed attempt to use a C1 control
    115 		     * string.
    116 		     */
    117 		    data->utf_data = (IChar) UCS_REPL;
    118 		}
    119 		data->utf_size = (i + 1);
    120 		break;
    121 	    } else if (screen->utf8_weblike
    122 		       && (utf_count == 3
    123 			   && utf_char == 0x04
    124 			   && c >= 0x90)) {
    125 		/* The encoding would form a code point beyond U+10FFFF. */
    126 		data->utf_size = i;
    127 		data->utf_data = UCS_REPL;
    128 		break;
    129 	    } else if (screen->utf8_weblike
    130 		       && (utf_count == 2
    131 			   && utf_char == 0x0d
    132 			   && c >= 0xa0)) {
    133 		/* The encoding would form a surrogate code point. */
    134 		data->utf_size = i;
    135 		data->utf_data = UCS_REPL;
    136 		break;
    137 	    } else {
    138 		/* Check for overlong UTF-8 sequences for which a shorter
    139 		 * encoding would exist and replace them with UCS_REPL.
    140 		 * An overlong UTF-8 sequence can have any of the following
    141 		 * forms:
    142 		 *   1100000x 10xxxxxx
    143 		 *   11100000 100xxxxx 10xxxxxx
    144 		 *   11110000 1000xxxx 10xxxxxx 10xxxxxx
    145 		 *   11111000 10000xxx 10xxxxxx 10xxxxxx 10xxxxxx
    146 		 *   11111100 100000xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
    147 		 */
    148 		if (!utf_char && !((c & 0x7f) >> (7 - utf_count))) {
    149 		    if (screen->utf8_weblike) {
    150 			/* overlong sequence continued */
    151 			data->utf_data = UCS_REPL;
    152 			data->utf_size = i;
    153 			break;
    154 		    } else {
    155 			utf_char = UCS_REPL;
    156 		    }
    157 		}
    158 		utf_char <<= 6;
    159 		utf_char |= (c & 0x3f);
    160 		if ((utf_char >= 0xd800 &&
    161 		     utf_char <= 0xdfff) ||
    162 		    (utf_char == 0xfffe) ||
    163 		    (utf_char == HIDDEN_CHAR)) {
    164 		    utf_char = UCS_REPL;
    165 		}
    166 		utf_count--;
    167 		if (utf_count == 0) {
    168 #if !OPT_WIDER_ICHAR
    169 		    /* characters outside UCS-2 become UCS_REPL */
    170 		    if (utf_char > NARROW_ICHAR) {
    171 			TRACE(("using replacement for %#x\n", utf_char));
    172 			utf_char = UCS_REPL;
    173 		    }
    174 #endif
    175 		    data->utf_data = (IChar) utf_char;
    176 		    data->utf_size = (i + 1);
    177 		    break;
    178 		}
    179 	    }
    180 	} else {
    181 	    /* We received a sequence start byte */
    182 	    if (utf_count > 0) {
    183 		/* previous sequence is incomplete */
    184 		data->utf_data = UCS_REPL;
    185 		data->utf_size = i;
    186 		break;
    187 	    }
    188 	    if (screen->utf8_weblike) {
    189 		if (c < 0xe0) {
    190 		    if (!(c & 0x1e)) {
    191 			/* overlong sequence start */
    192 			data->utf_data = UCS_REPL;
    193 			data->utf_size = (i + 1);
    194 			break;
    195 		    }
    196 		    utf_count = 1;
    197 		    utf_char = (c & 0x1f);
    198 		} else if (c < 0xf0) {
    199 		    utf_count = 2;
    200 		    utf_char = (c & 0x0f);
    201 		} else if (c < 0xf5) {
    202 		    utf_count = 3;
    203 		    utf_char = (c & 0x07);
    204 		} else {
    205 		    data->utf_data = UCS_REPL;
    206 		    data->utf_size = (i + 1);
    207 		    break;
    208 		}
    209 	    } else {
    210 		if (c < 0xe0) {
    211 		    utf_count = 1;
    212 		    utf_char = (c & 0x1f);
    213 		    if (!(c & 0x1e)) {
    214 			/* overlong sequence */
    215 			utf_char = UCS_REPL;
    216 		    }
    217 		} else if (c < 0xf0) {
    218 		    utf_count = 2;
    219 		    utf_char = (c & 0x0f);
    220 		} else if (c < 0xf8) {
    221 		    utf_count = 3;
    222 		    utf_char = (c & 0x07);
    223 		} else if (c < 0xfc) {
    224 		    utf_count = 4;
    225 		    utf_char = (c & 0x03);
    226 		} else if (c < 0xfe) {
    227 		    utf_count = 5;
    228 		    utf_char = (c & 0x01);
    229 		} else {
    230 		    data->utf_data = UCS_REPL;
    231 		    data->utf_size = (i + 1);
    232 		    break;
    233 		}
    234 	    }
    235 	}
    236     }
    237 #if OPT_TRACE > 1
    238     TRACE(("UTF-8 char %04X [%lu..%lu]\n",
    239 	   data->utf_data,
    240 	   (unsigned long) (data->next - data->buffer),
    241 	   (unsigned long) (data->next - data->buffer + data->utf_size - 1)));
    242 #endif
    243 
    244     return (data->utf_size != 0);
    245 }
    246 #endif
    247 
    248 int
    249 readPtyData(XtermWidget xw, PtySelect * select_mask, PtyData *data)
    250 {
    251     TScreen *screen = TScreenOf(xw);
    252     int size = 0;
    253 
    254     if (FD_ISSET(screen->respond, select_mask)) {
    255 	int save_err;
    256 	trimPtyData(xw, data);
    257 
    258 	size = (int) read(screen->respond, (char *) data->last, (size_t) FRG_SIZE);
    259 	save_err = errno;
    260 #if (defined(i386) && defined(SVR4) && defined(sun)) || defined(__CYGWIN__)
    261 	/*
    262 	 * Yes, I know this is a majorly f*ugly hack, however it seems to
    263 	 * be necessary for Solaris x86.  DWH 11/15/94
    264 	 * Dunno why though..
    265 	 * (and now CYGWIN, alanh (at) xfree86.org 08/15/01
    266 	 */
    267 	if (size <= 0) {
    268 	    if (save_err == EIO || save_err == 0)
    269 		NormalExit();
    270 	    else if (!E_TEST(save_err))
    271 		Panic("input: read returned unexpected error (%d)\n", save_err);
    272 	    size = 0;
    273 	}
    274 #else /* !f*ugly */
    275 	if (size < 0) {
    276 	    if (save_err == EIO)
    277 		NormalExit();
    278 	    else if (!E_TEST(save_err))
    279 		Panic("input: read returned unexpected error (%d)\n", save_err);
    280 	    size = 0;
    281 	} else if (size == 0) {
    282 #if defined(__FreeBSD__)
    283 	    NormalExit();
    284 #else
    285 	    Panic("input: read returned zero\n", 0);
    286 #endif
    287 	}
    288 #endif /* f*ugly */
    289     }
    290 
    291     if (size) {
    292 #if OPT_TRACE
    293 	int i;
    294 
    295 	TRACE(("read %d bytes from pty\n", size));
    296 	for (i = 0; i < size; i++) {
    297 	    if (!(i % 16))
    298 		TRACE(("%s", i ? "\n    " : "READ"));
    299 	    TRACE((" %02X", data->last[i]));
    300 	}
    301 	TRACE(("\n"));
    302 #endif
    303 	data->last += size;
    304 #ifdef ALLOWLOGGING
    305 	TScreenOf(term)->logstart = VTbuffer->next;
    306 #endif
    307     }
    308 
    309     return (size);
    310 }
    311 
    312 /*
    313  * Return the next value from the input buffer.  Note that morePtyData() is
    314  * always called before this function, so we can do the UTF-8 input conversion
    315  * in that function and simply return the result here.
    316  */
    317 #if OPT_WIDE_CHARS
    318 IChar
    319 nextPtyData(TScreen *screen, PtyData *data)
    320 {
    321     IChar result;
    322     if (screen->utf8_inparse) {
    323 	skipPtyData(data, result);
    324     } else {
    325 	result = *((data)->next++);
    326 	if (!screen->output_eight_bits) {
    327 	    result = (IChar) (result & 0x7f);
    328 	}
    329     }
    330     TRACE2(("nextPtyData returns %#x\n", result));
    331     return result;
    332 }
    333 #endif
    334 
    335 #if OPT_WIDE_CHARS
    336 /*
    337  * Called when UTF-8 mode has been turned on/off.
    338  */
    339 void
    340 switchPtyData(TScreen *screen, int flag)
    341 {
    342     if (screen->utf8_mode != flag) {
    343 	screen->utf8_mode = flag;
    344 	screen->utf8_inparse = (Boolean) (flag != 0);
    345 	mk_wcwidth_init(screen->utf8_mode);
    346 
    347 	TRACE(("turning UTF-8 mode %s\n", BtoS(flag)));
    348 	update_font_utf8_mode();
    349     }
    350 }
    351 #endif
    352 
    353 /*
    354  * Allocate a buffer.
    355  */
    356 void
    357 initPtyData(PtyData **result)
    358 {
    359     PtyData *data;
    360 
    361     TRACE2(("initPtyData given minBufSize %d, maxBufSize %d\n",
    362 	    FRG_SIZE, BUF_SIZE));
    363 
    364     if (FRG_SIZE < 64)
    365 	FRG_SIZE = 64;
    366     if (BUF_SIZE < FRG_SIZE)
    367 	BUF_SIZE = FRG_SIZE;
    368     if (BUF_SIZE % FRG_SIZE)
    369 	BUF_SIZE = BUF_SIZE + FRG_SIZE - (BUF_SIZE % FRG_SIZE);
    370 
    371     TRACE2(("initPtyData using minBufSize %d, maxBufSize %d\n",
    372 	    FRG_SIZE, BUF_SIZE));
    373 
    374     data = TypeXtMallocX(PtyData, (BUF_SIZE + FRG_SIZE));
    375 
    376     memset(data, 0, sizeof(*data));
    377     data->next = data->buffer;
    378     data->last = data->buffer;
    379     *result = data;
    380 }
    381 
    382 /*
    383  * Initialize a buffer for the caller, using its data in 'next'.
    384  */
    385 #if OPT_WIDE_CHARS
    386 PtyData *
    387 fakePtyData(PtyData *result, Char *next, Char *last)
    388 {
    389     PtyData *data = result;
    390 
    391     memset(data, 0, sizeof(*data));
    392     data->next = next;
    393     data->last = last;
    394 
    395     return data;
    396 }
    397 #endif
    398 
    399 /*
    400  * Remove used data by shifting the buffer down, to make room for more data,
    401  * e.g., a continuation-read.
    402  */
    403 void
    404 trimPtyData(XtermWidget xw, PtyData *data)
    405 {
    406     (void) xw;
    407     FlushLog(xw);
    408 
    409     if (data->next != data->buffer) {
    410 	size_t i;
    411 	size_t n = (size_t) (data->last - data->next);
    412 
    413 	TRACE(("shifting buffer down by %lu\n", (unsigned long) n));
    414 	for (i = 0; i < n; ++i) {
    415 	    data->buffer[i] = data->next[i];
    416 	}
    417 	data->next = data->buffer;
    418 	data->last = data->next + n;
    419     }
    420 
    421 }
    422 
    423 /*
    424  * Insert new data into the input buffer so the next calls to morePtyData()
    425  * and nextPtyData() will return that.
    426  */
    427 void
    428 fillPtyData(XtermWidget xw, PtyData *data, const char *value, size_t length)
    429 {
    430     size_t size;
    431     size_t n;
    432 
    433     /* remove the used portion of the buffer */
    434     trimPtyData(xw, data);
    435 
    436     VTbuffer->last += length;
    437     size = (size_t) (VTbuffer->last - VTbuffer->next);
    438 
    439     /* shift the unused portion up to make room */
    440     for (n = size; n >= length; --n)
    441 	VTbuffer->next[n] = VTbuffer->next[n - length];
    442 
    443     /* insert the new bytes to interpret */
    444     for (n = 0; n < length; n++)
    445 	VTbuffer->next[n] = CharOf(value[n]);
    446 }
    447 
    448 #if OPT_WIDE_CHARS
    449 /*
    450  * Convert an ISO-8859-1 code 'c' to UTF-8, storing the result in the target
    451  * 'lp', and returning a pointer past the converted character.
    452  */
    453 Char *
    454 convertToUTF8(Char *lp, unsigned c)
    455 {
    456 #define CH(n) (Char)((c) >> ((n) * 8))
    457     if (c < 0x80) {
    458 	/*  0*******  */
    459 	*lp++ = (Char) CH(0);
    460     } else if (c < 0x800) {
    461 	/*  110***** 10******  */
    462 	*lp++ = (Char) (0xc0 | (CH(0) >> 6) | ((CH(1) & 0x07) << 2));
    463 	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
    464     } else if (c < 0x00010000) {
    465 	/*  1110**** 10****** 10******  */
    466 	*lp++ = (Char) (0xe0 | ((int) (CH(1) & 0xf0) >> 4));
    467 	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
    468 	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
    469     } else if (c < 0x00200000) {
    470 	*lp++ = (Char) (0xf0 | ((int) (CH(2) & 0x1f) >> 2));
    471 	*lp++ = (Char) (0x80 |
    472 			((int) (CH(1) & 0xf0) >> 4) |
    473 			((int) (CH(2) & 0x03) << 4));
    474 	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
    475 	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
    476     } else if (c < 0x04000000) {
    477 	*lp++ = (Char) (0xf8 | (CH(3) & 0x03));
    478 	*lp++ = (Char) (0x80 | (CH(2) >> 2));
    479 	*lp++ = (Char) (0x80 |
    480 			((int) (CH(1) & 0xf0) >> 4) |
    481 			((int) (CH(2) & 0x03) << 4));
    482 	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
    483 	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
    484     } else {
    485 	*lp++ = (Char) (0xfc | ((int) (CH(3) & 0x40) >> 6));
    486 	*lp++ = (Char) (0x80 | (CH(3) & 0x3f));
    487 	*lp++ = (Char) (0x80 | (CH(2) >> 2));
    488 	*lp++ = (Char) (0x80 | (CH(1) >> 4) | ((CH(2) & 0x03) << 4));
    489 	*lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2));
    490 	*lp++ = (Char) (0x80 | (CH(0) & 0x3f));
    491     }
    492     return lp;
    493 #undef CH
    494 }
    495 
    496 /*
    497  * Convert a UTF-8 multibyte character to an Unicode value, returning a pointer
    498  * past the converted UTF-8 input.  The first 256 values align with ISO-8859-1,
    499  * making it possible to use this to convert to Latin-1.
    500  *
    501  * If the conversion fails, return null.
    502  */
    503 Char *
    504 convertFromUTF8(Char *lp, unsigned *cp)
    505 {
    506     int want;
    507 
    508     /*
    509      * Find the number of bytes we will need from the source.
    510      */
    511     if ((*lp & 0x80) == 0) {
    512 	want = 1;
    513     } else if ((*lp & 0xe0) == 0xc0) {
    514 	want = 2;
    515     } else if ((*lp & 0xf0) == 0xe0) {
    516 	want = 3;
    517     } else if ((*lp & 0xf8) == 0xf0) {
    518 	want = 4;
    519     } else if ((*lp & 0xfc) == 0xf8) {
    520 	want = 5;
    521     } else if ((*lp & 0xfe) == 0xfc) {
    522 	want = 6;
    523     } else {
    524 	want = 0;
    525     }
    526 
    527     if (want) {
    528 	int have = 1;
    529 
    530 	while (lp[have] != '\0') {
    531 	    if ((lp[have] & 0xc0) != 0x80)
    532 		break;
    533 	    ++have;
    534 	}
    535 	if (want == have) {
    536 	    unsigned mask = 0;
    537 	    int j;
    538 	    int shift = 0;
    539 
    540 	    *cp = 0;
    541 	    switch (want) {
    542 	    case 1:
    543 		mask = (*lp);
    544 		break;
    545 	    case 2:
    546 		mask = (*lp & 0x1f);
    547 		break;
    548 	    case 3:
    549 		mask = (*lp & 0x0f);
    550 		break;
    551 	    case 4:
    552 		mask = (*lp & 0x07);
    553 		break;
    554 	    case 5:
    555 		mask = (*lp & 0x03);
    556 		break;
    557 	    case 6:
    558 		mask = (*lp & 0x01);
    559 		break;
    560 	    default:
    561 		mask = 0;
    562 		break;
    563 	    }
    564 
    565 	    for (j = 1; j < want; j++) {
    566 		*cp |= (unsigned) ((lp[want - j] & 0x3f) << shift);
    567 		shift += 6;
    568 	    }
    569 	    *cp |= mask << shift;
    570 	    lp += want;
    571 	} else {
    572 	    *cp = BAD_ASCII;
    573 	    lp = NULL;
    574 	}
    575     } else {
    576 	*cp = BAD_ASCII;
    577 	lp = NULL;
    578     }
    579     return lp;
    580 }
    581 
    582 /*
    583  * Returns true if the entire string is valid UTF-8.
    584  */
    585 Boolean
    586 isValidUTF8(Char *lp)
    587 {
    588     Boolean result = True;
    589     while (*lp) {
    590 	unsigned ch;
    591 	Char *next = convertFromUTF8(lp, &ch);
    592 	if (next == NULL || ch == 0) {
    593 	    result = False;
    594 	    break;
    595 	}
    596 	lp = next;
    597     }
    598     return result;
    599 }
    600 
    601 /*
    602  * Write data back to the PTY
    603  */
    604 void
    605 writePtyData(int f, IChar *d, size_t len)
    606 {
    607     size_t n = (len << 1);
    608 
    609     if (VTbuffer->write_len <= len) {
    610 	VTbuffer->write_len = n;
    611 	VTbuffer->write_buf = realloc(VTbuffer->write_buf, VTbuffer->write_len);
    612     }
    613 
    614     for (n = 0; n < len; n++)
    615 	VTbuffer->write_buf[n] = (Char) d[n];
    616 
    617     TRACE(("writePtyData %lu:%s\n", (unsigned long) n,
    618 	   visibleChars(VTbuffer->write_buf, n)));
    619     v_write(f, VTbuffer->write_buf, n);
    620 }
    621 #endif /* OPT_WIDE_CHARS */
    622 
    623 #ifdef NO_LEAKS
    624 void
    625 noleaks_ptydata(void)
    626 {
    627     if (VTbuffer != NULL) {
    628 #if OPT_WIDE_CHARS
    629 	free(VTbuffer->write_buf);
    630 #endif
    631 	FreeAndNull(VTbuffer);
    632     }
    633 }
    634 #endif
    635 
    636 #ifdef TEST_DRIVER
    637 
    638 #include "data.c"
    639 
    640 void
    641 NormalExit(void)
    642 {
    643     fprintf(stderr, "NormalExit!\n");
    644     exit(EXIT_SUCCESS);
    645 }
    646 
    647 void
    648 Panic(const char *s, int a)
    649 {
    650     (void) s;
    651     (void) a;
    652     fprintf(stderr, "Panic!\n");
    653     exit(EXIT_FAILURE);
    654 }
    655 
    656 #if OPT_WIDE_CHARS
    657 
    658 #ifdef ALLOWLOGGING
    659 void
    660 FlushLog(XtermWidget xw)
    661 {
    662     (void) xw;
    663 }
    664 #endif
    665 
    666 void
    667 v_write(int f, const Char *data, size_t len)
    668 {
    669     (void) f;
    670     (void) data;
    671     (void) len;
    672 }
    673 
    674 void
    675 mk_wcwidth_init(int mode)
    676 {
    677     (void) mode;
    678 }
    679 
    680 void
    681 update_font_utf8_mode(void)
    682 {
    683 }
    684 
    685 static int message_level = 0;
    686 static int opt_all = 0;
    687 static int opt_illegal = 0;
    688 static int opt_convert = 0;
    689 static int opt_reverse = 0;
    690 static long total_test = 0;
    691 static long total_errs = 0;
    692 
    693 static void
    694 usage(void)
    695 {
    696     static const char *msg[] =
    697     {
    698 	"Usage: test_ptydata [options] [c1[-c1b] [c2-[c2b] [...]]]",
    699 	"",
    700 	"Options:",
    701 	" -a  exercise all legal encode/decode to/from UTF-8",
    702 	" -c  call convertFromUTF8 rather than decodeUTF8",
    703 	" -i  ignore illegal UTF-8 when testing -r option",
    704 	" -q  quieter",
    705 	" -r  reverse/decode from UTF-8 byte-string to/from Unicode",
    706 	" -v  more verbose"
    707     };
    708     size_t n;
    709     for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) {
    710 	fprintf(stderr, "%s\n", msg[n]);
    711     }
    712     exit(EXIT_FAILURE);
    713 }
    714 
    715 /*
    716  * http://www.unicode.org/versions/corrigendum1.html, table 3.1B
    717  */
    718 #define OkRange(n,lo,hi) \
    719  	if (value[n] < lo || value[n] > hi) { \
    720 	    result = False; \
    721 	    break; \
    722 	}
    723 static Bool
    724 is_legal_utf8(const Char *value)
    725 {
    726     Bool result = True;
    727     Char ch;
    728     while ((ch = *value) != '\0') {
    729 	if (ch <= 0x7f) {
    730 	    ++value;
    731 	} else if (ch >= 0xc2 && ch <= 0xdf) {
    732 	    OkRange(1, 0x80, 0xbf);
    733 	    value += 2;
    734 	} else if (ch == 0xe0) {
    735 	    OkRange(1, 0xa0, 0xbf);
    736 	    OkRange(2, 0x80, 0xbf);
    737 	    value += 3;
    738 	} else if (ch >= 0xe1 && ch <= 0xef) {
    739 	    OkRange(1, 0x80, 0xbf);
    740 	    OkRange(2, 0x80, 0xbf);
    741 	    value += 3;
    742 	} else if (ch == 0xf0) {
    743 	    OkRange(1, 0x90, 0xbf);
    744 	    OkRange(2, 0x80, 0xbf);
    745 	    OkRange(3, 0x80, 0xbf);
    746 	    value += 4;
    747 	} else if (ch >= 0xf1 && ch <= 0xf3) {
    748 	    OkRange(1, 0x80, 0xbf);
    749 	    OkRange(2, 0x80, 0xbf);
    750 	    OkRange(3, 0x80, 0xbf);
    751 	    value += 4;
    752 	} else if (ch == 0xf4) {
    753 	    OkRange(1, 0x80, 0x8f);
    754 	    OkRange(2, 0x80, 0xbf);
    755 	    OkRange(3, 0x80, 0xbf);
    756 	    value += 4;
    757 	} else {
    758 	    result = False;
    759 	    break;
    760 	}
    761     }
    762     return result;
    763 }
    764 
    765 static void
    766 test_utf8_convert(void)
    767 {
    768     unsigned c_in, c_out;
    769     Char buffer[10];
    770     Char *result;
    771     unsigned limit = 0x110000;
    772     unsigned success = 0;
    773     unsigned bucket[256];
    774 
    775     memset(bucket, 0, sizeof(bucket));
    776     for (c_in = 0; c_in < limit; ++c_in) {
    777 	memset(buffer, 0, sizeof(buffer));
    778 	if ((result = convertToUTF8(buffer, c_in)) == NULL) {
    779 	    TRACE(("conversion of U+%04X to UTF-8 failed\n", c_in));
    780 	} else {
    781 	    if ((result = convertFromUTF8(buffer, &c_out)) == NULL) {
    782 		TRACE(("conversion of U+%04X from UTF-8 failed\n", c_in));
    783 	    } else if (c_in != c_out) {
    784 		TRACE(("conversion of U+%04X to/from UTF-8 gave U+%04X\n",
    785 		       c_in, c_out));
    786 	    } else {
    787 		while (result-- != buffer) {
    788 		    bucket[*result]++;
    789 		}
    790 		++success;
    791 	    }
    792 	}
    793     }
    794     TRACE(("%u/%u successful\n", success, limit));
    795     for (c_in = 0; c_in < 256; ++c_in) {
    796 	if ((c_in % 8) == 0) {
    797 	    TRACE((" %02X:", c_in));
    798 	}
    799 	TRACE((" %8X", bucket[c_in]));
    800 	if (((c_in + 1) % 8) == 0) {
    801 	    TRACE(("\n"));
    802 	}
    803     }
    804 }
    805 
    806 static int
    807 decode_one(const char *source, char **target)
    808 {
    809     int result = -1;
    810     long check;
    811     int radix = 0;
    812     if ((source[0] == 'u' || source[0] == 'U') && source[1] == '+') {
    813 	source += 2;
    814 	radix = 16;
    815     } else if (source[0] == '0' && source[1] == 'b') {
    816 	source += 2;
    817 	radix = 2;
    818     }
    819     check = strtol(source, target, radix);
    820     if (*target != NULL && *target != source)
    821 	result = (int) check;
    822     return result;
    823 }
    824 
    825 static int
    826 decode_range(const char *source, int *lo, int *hi)
    827 {
    828     int result = 0;
    829     char *after1;
    830     char *after2;
    831     if ((*lo = decode_one(source, &after1)) >= 0) {
    832 	after1 += strspn(after1, ":-.\t ");
    833 	if ((*hi = decode_one(after1, &after2)) < 0) {
    834 	    *hi = *lo;
    835 	}
    836 	result = 1;
    837     }
    838     return result;
    839 }
    840 
    841 #define MAX_BYTES 6
    842 
    843 static void
    844 do_range(const char *source)
    845 {
    846     int lo, hi;
    847 
    848     TScreen screen;
    849     memset(&screen, 0, sizeof(screen));
    850 
    851     if (decode_range(source, &lo, &hi)) {
    852 	while (lo <= hi) {
    853 	    unsigned c_in = (unsigned) lo++;
    854 	    PtyData *data;
    855 	    Char *next;
    856 	    Char buffer[MAX_BYTES + 1];
    857 
    858 	    if (opt_reverse) {
    859 		Bool skip = False;
    860 		Bool first = True;
    861 		int j, k;
    862 		for (j = 0; j < MAX_BYTES; ++j) {
    863 		    unsigned long bits = ((unsigned long) c_in >> (8 * j));
    864 		    if ((buffer[j] = (Char) bits) == 0) {
    865 			skip = (bits != 0);
    866 			break;
    867 		    }
    868 		}
    869 		if (skip)
    870 		    continue;
    871 		initPtyData(&data);
    872 		for (k = 0; k <= j; ++k) {
    873 		    data->buffer[k] = buffer[j - k - 1];
    874 		}
    875 		if (opt_illegal && !is_legal_utf8(data->buffer)) {
    876 		    free(data);
    877 		    continue;
    878 		}
    879 		if (message_level > 1) {
    880 		    printf("TEST ");
    881 		    for (k = 0; k < j; ++k) {
    882 			printf("%02X", data->buffer[k]);
    883 		    }
    884 		}
    885 		data->next = data->buffer;
    886 		data->last = data->buffer + j;
    887 		while (decodeUtf8(&screen, data)) {
    888 		    total_test++;
    889 		    if (is_UCS_SPECIAL(data->utf_data))
    890 			total_errs++;
    891 		    data->next += data->utf_size;
    892 		    if (message_level > 1) {
    893 			printf("%s%04X", first ? " ->" : ", ", data->utf_data);
    894 		    }
    895 		    first = False;
    896 		}
    897 		if (!first)
    898 		    total_test--;
    899 		if (message_level > 1) {
    900 		    printf("\n");
    901 		    fflush(stdout);
    902 		}
    903 		free(data);
    904 	    } else if (opt_convert) {
    905 		unsigned c_out;
    906 		Char *result;
    907 
    908 		memset(buffer, 0, sizeof(buffer));
    909 		if ((result = next = convertToUTF8(buffer, c_in)) == NULL) {
    910 		    fprintf(stderr,
    911 			    "conversion of U+%04X to UTF-8 failed\n", c_in);
    912 		} else if ((result = convertFromUTF8(buffer, &c_out)) == NULL) {
    913 		    fprintf(stderr,
    914 			    "conversion of U+%04X from UTF-8 failed\n", c_in);
    915 		    total_errs++;
    916 		} else if (c_in != c_out) {
    917 		    fprintf(stderr,
    918 			    "conversion of U+%04X to/from UTF-8 gave U+%04X\n",
    919 			    c_in, c_out);
    920 		} else if (message_level > 1) {
    921 		    *next = '\0';
    922 		    printf("TEST %04X (%lu:%s) ->%04X\n", c_in,
    923 			   (unsigned long) (next - buffer),
    924 			   buffer,
    925 			   c_out);
    926 		    fflush(stdout);
    927 		}
    928 	    } else {
    929 		initPtyData(&data);
    930 		next = convertToUTF8(data->buffer, c_in);
    931 		*next = 0;
    932 		data->next = data->buffer;
    933 		data->last = next;
    934 		decodeUtf8(&screen, data);
    935 		if (message_level > 1) {
    936 		    printf("TEST %04X (%lu:%s) ->%04X\n", c_in,
    937 			   (unsigned long) (next - data->buffer),
    938 			   data->buffer,
    939 			   data->utf_data);
    940 		    fflush(stdout);
    941 		}
    942 		if (c_in != data->utf_data) {
    943 		    fprintf(stderr, "Mismatch: %04X vs %04X\n", c_in, data->utf_data);
    944 		    total_errs++;
    945 		}
    946 		free(data);
    947 	    }
    948 	    total_test++;
    949 	}
    950     }
    951 }
    952 
    953 int
    954 main(int argc, char **argv)
    955 {
    956     int ch;
    957 
    958     setlocale(LC_ALL, "");
    959     while ((ch = getopt(argc, argv, "aciqrv")) != -1) {
    960 	switch (ch) {
    961 	case 'a':
    962 	    opt_all = 1;
    963 	    break;
    964 	case 'c':
    965 	    opt_convert = 1;
    966 	    break;
    967 	case 'i':
    968 	    opt_illegal = 1;
    969 	    break;
    970 	case 'q':
    971 	    message_level--;
    972 	    break;
    973 	case 'r':
    974 	    opt_reverse = 1;
    975 	    break;
    976 	case 'v':
    977 	    message_level++;
    978 	    break;
    979 	default:
    980 	    usage();
    981 	}
    982     }
    983     if (opt_all) {
    984 	test_utf8_convert();
    985     } else {
    986 	if (optind >= argc)
    987 	    usage();
    988 	while (optind < argc) {
    989 	    do_range(argv[optind++]);
    990 	}
    991 	if (total_test) {
    992 	    printf("%ld/%ld mismatches (%.0f%%)\n",
    993 		   total_errs,
    994 		   total_test,
    995 		   (100.0 * (double) total_errs) / (double) total_test);
    996 	}
    997     }
    998     return EXIT_SUCCESS;
    999 }
   1000 #else
   1001 int
   1002 main(int argc, char **argv)
   1003 {
   1004     (void) argc;
   1005     (void) argv;
   1006     printf("Nothing to be done here...\n");
   1007     return EXIT_SUCCESS;
   1008 }
   1009 #endif /* OPT_WIDE_CHARS */
   1010 #endif
   1011