ptydata.c revision 5307cd1a
1/* $XTermId: ptydata.c,v 1.157 2022/10/06 21:55:29 tom Exp $ */ 2 3/* 4 * Copyright 1999-2020,2022 by Thomas E. Dickey 5 * 6 * All Rights Reserved 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sublicense, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * The above copyright notice and this permission notice shall be included 17 * in all copies or substantial portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 23 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 * Except as contained in this notice, the name(s) of the above copyright 28 * holders shall not be used in advertising or otherwise to promote the 29 * sale, use or other dealings in this Software without prior written 30 * authorization. 31 */ 32 33#include <data.h> 34 35#if OPT_WIDE_CHARS 36#include <menu.h> 37#include <wcwidth.h> 38#endif 39 40#ifdef TEST_DRIVER 41#undef TRACE 42#define TRACE(p) if (1) printf p 43#undef TRACE2 44#define TRACE2(p) if (0) printf p 45#define visibleChars(buf, len) "buffer" 46#endif 47 48/* 49 * Check for both EAGAIN and EWOULDBLOCK, because some supposedly POSIX 50 * systems are broken and return EWOULDBLOCK when they should return EAGAIN. 51 * Note that this macro may evaluate its argument more than once. 52 */ 53#if defined(EAGAIN) && defined(EWOULDBLOCK) 54#define E_TEST(err) ((err) == EAGAIN || (err) == EWOULDBLOCK) 55#else 56#ifdef EAGAIN 57#define E_TEST(err) ((err) == EAGAIN) 58#else 59#define E_TEST(err) ((err) == EWOULDBLOCK) 60#endif 61#endif 62 63#if OPT_WIDE_CHARS 64/* 65 * Convert the 8-bit codes in data->buffer[] into Unicode in data->utf_data. 66 * The number of bytes converted will be nonzero iff there is data. 67 */ 68Bool 69decodeUtf8(TScreen *screen, PtyData *data) 70{ 71 size_t i; 72 size_t length = (size_t) (data->last - data->next); 73 int utf_count = 0; 74 unsigned utf_char = 0; 75 76 data->utf_size = 0; 77 for (i = 0; i < length; i++) { 78 unsigned c = data->next[i]; 79 80 /* Combine UTF-8 into Unicode */ 81 if (c < 0x80) { 82 /* We received an ASCII character */ 83 if (utf_count > 0) { 84 data->utf_data = UCS_REPL; /* prev. sequence incomplete */ 85 data->utf_size = i; 86 } else { 87 data->utf_data = (IChar) c; 88 data->utf_size = 1; 89 } 90 break; 91 } else if (screen->vt100_graphics 92 && (c < 0x100) 93 && (utf_count == 0) 94 && screen->gsets[(int) screen->curgr] != nrc_ASCII) { 95 data->utf_data = (IChar) c; 96 data->utf_size = 1; 97 break; 98 } else if (c < 0xc0) { 99 /* We received a continuation byte */ 100 if (utf_count < 1) { 101 /* 102 * We received a continuation byte before receiving a sequence 103 * state. Or an attempt to use a C1 control string. Either 104 * way, it is mapped to the replacement character, unless 105 * allowed by optional feature. 106 */ 107 data->utf_data = (IChar) (screen->c1_printable ? c : UCS_REPL); 108 data->utf_size = (i + 1); 109 break; 110 } else if (screen->utf8_weblike 111 && (utf_count == 3 112 && utf_char == 0x04 113 && c >= 0x90)) { 114 /* The encoding would form a code point beyond U+10FFFF. */ 115 data->utf_size = i; 116 data->utf_data = UCS_REPL; 117 break; 118 } else if (screen->utf8_weblike 119 && (utf_count == 2 120 && utf_char == 0x0d 121 && c >= 0xa0)) { 122 /* The encoding would form a surrogate code point. */ 123 data->utf_size = i; 124 data->utf_data = UCS_REPL; 125 break; 126 } else { 127 /* Check for overlong UTF-8 sequences for which a shorter 128 * encoding would exist and replace them with UCS_REPL. 129 * An overlong UTF-8 sequence can have any of the following 130 * forms: 131 * 1100000x 10xxxxxx 132 * 11100000 100xxxxx 10xxxxxx 133 * 11110000 1000xxxx 10xxxxxx 10xxxxxx 134 * 11111000 10000xxx 10xxxxxx 10xxxxxx 10xxxxxx 135 * 11111100 100000xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 136 */ 137 if (!utf_char && !((c & 0x7f) >> (7 - utf_count))) { 138 if (screen->utf8_weblike) { 139 /* overlong sequence continued */ 140 data->utf_data = UCS_REPL; 141 data->utf_size = i; 142 break; 143 } else { 144 utf_char = UCS_REPL; 145 } 146 } 147 utf_char <<= 6; 148 utf_char |= (c & 0x3f); 149 if ((utf_char >= 0xd800 && 150 utf_char <= 0xdfff) || 151 (utf_char == 0xfffe) || 152 (utf_char == HIDDEN_CHAR)) { 153 utf_char = UCS_REPL; 154 } 155 utf_count--; 156 if (utf_count == 0) { 157#if !OPT_WIDER_ICHAR 158 /* characters outside UCS-2 become UCS_REPL */ 159 if (utf_char > NARROW_ICHAR) { 160 TRACE(("using replacement for %#x\n", utf_char)); 161 utf_char = UCS_REPL; 162 } 163#endif 164 data->utf_data = (IChar) utf_char; 165 data->utf_size = (i + 1); 166 break; 167 } 168 } 169 } else { 170 /* We received a sequence start byte */ 171 if (utf_count > 0) { 172 /* previous sequence is incomplete */ 173 data->utf_data = UCS_REPL; 174 data->utf_size = i; 175 break; 176 } 177 if (screen->utf8_weblike) { 178 if (c < 0xe0) { 179 if (!(c & 0x1e)) { 180 /* overlong sequence start */ 181 data->utf_data = UCS_REPL; 182 data->utf_size = (i + 1); 183 break; 184 } 185 utf_count = 1; 186 utf_char = (c & 0x1f); 187 } else if (c < 0xf0) { 188 utf_count = 2; 189 utf_char = (c & 0x0f); 190 } else if (c < 0xf5) { 191 utf_count = 3; 192 utf_char = (c & 0x07); 193 } else { 194 data->utf_data = UCS_REPL; 195 data->utf_size = (i + 1); 196 break; 197 } 198 } else { 199 if (c < 0xe0) { 200 utf_count = 1; 201 utf_char = (c & 0x1f); 202 if (!(c & 0x1e)) { 203 /* overlong sequence */ 204 utf_char = UCS_REPL; 205 } 206 } else if (c < 0xf0) { 207 utf_count = 2; 208 utf_char = (c & 0x0f); 209 } else if (c < 0xf8) { 210 utf_count = 3; 211 utf_char = (c & 0x07); 212 } else if (c < 0xfc) { 213 utf_count = 4; 214 utf_char = (c & 0x03); 215 } else if (c < 0xfe) { 216 utf_count = 5; 217 utf_char = (c & 0x01); 218 } else { 219 data->utf_data = UCS_REPL; 220 data->utf_size = (i + 1); 221 break; 222 } 223 } 224 } 225 } 226#if OPT_TRACE > 1 227 TRACE(("UTF-8 char %04X [%d..%d]\n", 228 data->utf_data, 229 (size_t) (data->next - data->buffer), 230 (size_t) (data->next - data->buffer + data->utf_size - 1))); 231#endif 232 233 return (data->utf_size != 0); 234} 235#endif 236 237int 238readPtyData(XtermWidget xw, PtySelect * select_mask, PtyData *data) 239{ 240 TScreen *screen = TScreenOf(xw); 241 int size = 0; 242 243#ifdef VMS 244 if (*select_mask & pty_mask) { 245 trimPtyData(xw, data); 246 if (read_queue.flink != 0) { 247 size = tt_read(data->next); 248 if (size == 0) { 249 Panic("input: read returned zero\n", 0); 250 } 251 } else { 252 sys$hiber(); 253 } 254 } 255#else /* !VMS */ 256 if (FD_ISSET(screen->respond, select_mask)) { 257 int save_err; 258 trimPtyData(xw, data); 259 260 size = (int) read(screen->respond, (char *) data->last, (size_t) FRG_SIZE); 261 save_err = errno; 262#if (defined(i386) && defined(SVR4) && defined(sun)) || defined(__CYGWIN__) 263 /* 264 * Yes, I know this is a majorly f*ugly hack, however it seems to 265 * be necessary for Solaris x86. DWH 11/15/94 266 * Dunno why though.. 267 * (and now CYGWIN, alanh@xfree86.org 08/15/01 268 */ 269 if (size <= 0) { 270 if (save_err == EIO || save_err == 0) 271 NormalExit(); 272 else if (!E_TEST(save_err)) 273 Panic("input: read returned unexpected error (%d)\n", save_err); 274 size = 0; 275 } 276#else /* !f*ugly */ 277 if (size < 0) { 278 if (save_err == EIO) 279 NormalExit(); 280 else if (!E_TEST(save_err)) 281 Panic("input: read returned unexpected error (%d)\n", save_err); 282 size = 0; 283 } else if (size == 0) { 284#if defined(__FreeBSD__) 285 NormalExit(); 286#else 287 Panic("input: read returned zero\n", 0); 288#endif 289 } 290#endif /* f*ugly */ 291 } 292#endif /* VMS */ 293 294 if (size) { 295#if OPT_TRACE 296 int i; 297 298 TRACE(("read %d bytes from pty\n", size)); 299 for (i = 0; i < size; i++) { 300 if (!(i % 16)) 301 TRACE(("%s", i ? "\n " : "READ")); 302 TRACE((" %02X", data->last[i])); 303 } 304 TRACE(("\n")); 305#endif 306 data->last += size; 307#ifdef ALLOWLOGGING 308 TScreenOf(term)->logstart = VTbuffer->next; 309#endif 310 } 311 312 return (size); 313} 314 315/* 316 * Return the next value from the input buffer. Note that morePtyData() is 317 * always called before this function, so we can do the UTF-8 input conversion 318 * in that function and simply return the result here. 319 */ 320#if OPT_WIDE_CHARS 321IChar 322nextPtyData(TScreen *screen, PtyData *data) 323{ 324 IChar result; 325 if (screen->utf8_inparse) { 326 skipPtyData(data, result); 327 } else { 328 result = *((data)->next++); 329 if (!screen->output_eight_bits) { 330 result = (IChar) (result & 0x7f); 331 } 332 } 333 TRACE2(("nextPtyData returns %#x\n", result)); 334 return result; 335} 336#endif 337 338#if OPT_WIDE_CHARS 339/* 340 * Called when UTF-8 mode has been turned on/off. 341 */ 342void 343switchPtyData(TScreen *screen, int flag) 344{ 345 if (screen->utf8_mode != flag) { 346 screen->utf8_mode = flag; 347 screen->utf8_inparse = (Boolean) (flag != 0); 348 mk_wcwidth_init(screen->utf8_mode); 349 350 TRACE(("turning UTF-8 mode %s\n", BtoS(flag))); 351 update_font_utf8_mode(); 352 } 353} 354#endif 355 356/* 357 * Allocate a buffer. 358 */ 359void 360initPtyData(PtyData **result) 361{ 362 PtyData *data; 363 364 TRACE2(("initPtyData given minBufSize %d, maxBufSize %d\n", 365 FRG_SIZE, BUF_SIZE)); 366 367 if (FRG_SIZE < 64) 368 FRG_SIZE = 64; 369 if (BUF_SIZE < FRG_SIZE) 370 BUF_SIZE = FRG_SIZE; 371 if (BUF_SIZE % FRG_SIZE) 372 BUF_SIZE = BUF_SIZE + FRG_SIZE - (BUF_SIZE % FRG_SIZE); 373 374 TRACE2(("initPtyData using minBufSize %d, maxBufSize %d\n", 375 FRG_SIZE, BUF_SIZE)); 376 377 data = TypeXtMallocX(PtyData, (BUF_SIZE + FRG_SIZE)); 378 379 memset(data, 0, sizeof(*data)); 380 data->next = data->buffer; 381 data->last = data->buffer; 382 *result = data; 383} 384 385/* 386 * Initialize a buffer for the caller, using its data in 'next'. 387 */ 388#if OPT_WIDE_CHARS 389PtyData * 390fakePtyData(PtyData *result, Char *next, Char *last) 391{ 392 PtyData *data = result; 393 394 memset(data, 0, sizeof(*data)); 395 data->next = next; 396 data->last = last; 397 398 return data; 399} 400#endif 401 402/* 403 * Remove used data by shifting the buffer down, to make room for more data, 404 * e.g., a continuation-read. 405 */ 406void 407trimPtyData(XtermWidget xw, PtyData *data) 408{ 409 (void) xw; 410 FlushLog(xw); 411 412 if (data->next != data->buffer) { 413 size_t i; 414 size_t n = (size_t) (data->last - data->next); 415 416 TRACE(("shifting buffer down by %lu\n", (unsigned long) n)); 417 for (i = 0; i < n; ++i) { 418 data->buffer[i] = data->next[i]; 419 } 420 data->next = data->buffer; 421 data->last = data->next + n; 422 } 423 424} 425 426/* 427 * Insert new data into the input buffer so the next calls to morePtyData() 428 * and nextPtyData() will return that. 429 */ 430void 431fillPtyData(XtermWidget xw, PtyData *data, const char *value, size_t length) 432{ 433 size_t size; 434 size_t n; 435 436 /* remove the used portion of the buffer */ 437 trimPtyData(xw, data); 438 439 VTbuffer->last += length; 440 size = (size_t) (VTbuffer->last - VTbuffer->next); 441 442 /* shift the unused portion up to make room */ 443 for (n = size; n >= length; --n) 444 VTbuffer->next[n] = VTbuffer->next[n - length]; 445 446 /* insert the new bytes to interpret */ 447 for (n = 0; n < length; n++) 448 VTbuffer->next[n] = CharOf(value[n]); 449} 450 451#if OPT_WIDE_CHARS 452/* 453 * Convert an ISO-8859-1 code 'c' to UTF-8, storing the result in the target 454 * 'lp', and returning a pointer past the converted character. 455 */ 456Char * 457convertToUTF8(Char *lp, unsigned c) 458{ 459#define CH(n) (Char)((c) >> ((n) * 8)) 460 if (c < 0x80) { 461 /* 0******* */ 462 *lp++ = (Char) CH(0); 463 } else if (c < 0x800) { 464 /* 110***** 10****** */ 465 *lp++ = (Char) (0xc0 | (CH(0) >> 6) | ((CH(1) & 0x07) << 2)); 466 *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 467 } else if (c < 0x00010000) { 468 /* 1110**** 10****** 10****** */ 469 *lp++ = (Char) (0xe0 | ((int) (CH(1) & 0xf0) >> 4)); 470 *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 471 *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 472 } else if (c < 0x00200000) { 473 *lp++ = (Char) (0xf0 | ((int) (CH(2) & 0x1f) >> 2)); 474 *lp++ = (Char) (0x80 | 475 ((int) (CH(1) & 0xf0) >> 4) | 476 ((int) (CH(2) & 0x03) << 4)); 477 *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 478 *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 479 } else if (c < 0x04000000) { 480 *lp++ = (Char) (0xf8 | (CH(3) & 0x03)); 481 *lp++ = (Char) (0x80 | (CH(2) >> 2)); 482 *lp++ = (Char) (0x80 | 483 ((int) (CH(1) & 0xf0) >> 4) | 484 ((int) (CH(2) & 0x03) << 4)); 485 *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 486 *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 487 } else { 488 *lp++ = (Char) (0xfc | ((int) (CH(3) & 0x40) >> 6)); 489 *lp++ = (Char) (0x80 | (CH(3) & 0x3f)); 490 *lp++ = (Char) (0x80 | (CH(2) >> 2)); 491 *lp++ = (Char) (0x80 | (CH(1) >> 4) | ((CH(2) & 0x03) << 4)); 492 *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 493 *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 494 } 495 return lp; 496#undef CH 497} 498 499/* 500 * Convert a UTF-8 multibyte character to an Unicode value, returning a pointer 501 * past the converted UTF-8 input. The first 256 values align with ISO-8859-1, 502 * making it possible to use this to convert to Latin-1. 503 * 504 * If the conversion fails, return null. 505 */ 506Char * 507convertFromUTF8(Char *lp, unsigned *cp) 508{ 509 int want; 510 511 /* 512 * Find the number of bytes we will need from the source. 513 */ 514 if ((*lp & 0x80) == 0) { 515 want = 1; 516 } else if ((*lp & 0xe0) == 0xc0) { 517 want = 2; 518 } else if ((*lp & 0xf0) == 0xe0) { 519 want = 3; 520 } else if ((*lp & 0xf8) == 0xf0) { 521 want = 4; 522 } else if ((*lp & 0xfc) == 0xf8) { 523 want = 5; 524 } else if ((*lp & 0xfe) == 0xfc) { 525 want = 6; 526 } else { 527 want = 0; 528 } 529 530 if (want) { 531 int have = 1; 532 533 while (lp[have] != '\0') { 534 if ((lp[have] & 0xc0) != 0x80) 535 break; 536 ++have; 537 } 538 if (want == have) { 539 unsigned mask = 0; 540 int j; 541 int shift = 0; 542 543 *cp = 0; 544 switch (want) { 545 case 1: 546 mask = (*lp); 547 break; 548 case 2: 549 mask = (*lp & 0x1f); 550 break; 551 case 3: 552 mask = (*lp & 0x0f); 553 break; 554 case 4: 555 mask = (*lp & 0x07); 556 break; 557 case 5: 558 mask = (*lp & 0x03); 559 break; 560 case 6: 561 mask = (*lp & 0x01); 562 break; 563 default: 564 mask = 0; 565 break; 566 } 567 568 for (j = 1; j < want; j++) { 569 *cp |= (unsigned) ((lp[want - j] & 0x3f) << shift); 570 shift += 6; 571 } 572 *cp |= mask << shift; 573 lp += want; 574 } else { 575 *cp = BAD_ASCII; 576 lp = NULL; 577 } 578 } else { 579 *cp = BAD_ASCII; 580 lp = NULL; 581 } 582 return lp; 583} 584 585/* 586 * Returns true if the entire string is valid UTF-8. 587 */ 588Boolean 589isValidUTF8(Char *lp) 590{ 591 Boolean result = True; 592 while (*lp) { 593 unsigned ch; 594 Char *next = convertFromUTF8(lp, &ch); 595 if (next == NULL || ch == 0) { 596 result = False; 597 break; 598 } 599 lp = next; 600 } 601 return result; 602} 603 604/* 605 * Write data back to the PTY 606 */ 607void 608writePtyData(int f, IChar *d, size_t len) 609{ 610 size_t n = (len << 1); 611 612 if (VTbuffer->write_len <= len) { 613 VTbuffer->write_len = n; 614 VTbuffer->write_buf = realloc(VTbuffer->write_buf, VTbuffer->write_len); 615 } 616 617 for (n = 0; n < len; n++) 618 VTbuffer->write_buf[n] = (Char) d[n]; 619 620 TRACE(("writePtyData %lu:%s\n", (unsigned long) n, 621 visibleChars(VTbuffer->write_buf, n))); 622 v_write(f, VTbuffer->write_buf, n); 623} 624#endif /* OPT_WIDE_CHARS */ 625 626#ifdef NO_LEAKS 627void 628noleaks_ptydata(void) 629{ 630 if (VTbuffer != 0) { 631#if OPT_WIDE_CHARS 632 free(VTbuffer->write_buf); 633#endif 634 FreeAndNull(VTbuffer); 635 } 636} 637#endif 638 639#ifdef TEST_DRIVER 640 641#include "data.c" 642 643void 644NormalExit(void) 645{ 646 fprintf(stderr, "NormalExit!\n"); 647 exit(EXIT_SUCCESS); 648} 649 650void 651Panic(const char *s, int a) 652{ 653 (void) s; 654 (void) a; 655 fprintf(stderr, "Panic!\n"); 656 exit(EXIT_FAILURE); 657} 658 659#if OPT_WIDE_CHARS 660 661#ifdef ALLOWLOGGING 662void 663FlushLog(XtermWidget xw) 664{ 665 (void) xw; 666} 667#endif 668 669void 670v_write(int f, const Char *data, size_t len) 671{ 672 (void) f; 673 (void) data; 674 (void) len; 675} 676 677void 678mk_wcwidth_init(int mode) 679{ 680 (void) mode; 681} 682 683void 684update_font_utf8_mode(void) 685{ 686} 687 688static int message_level = 0; 689static int opt_all = 0; 690static int opt_illegal = 0; 691static int opt_convert = 0; 692static int opt_reverse = 0; 693static long total_test = 0; 694static long total_errs = 0; 695 696static void 697usage(void) 698{ 699 static const char *msg[] = 700 { 701 "Usage: test_ptydata [options] [c1[-c1b] [c2-[c2b] [...]]]", 702 "", 703 "Options:", 704 " -a exercise all legal encode/decode to/from UTF-8", 705 " -c call convertFromUTF8 rather than decodeUTF8", 706 " -i ignore illegal UTF-8 when testing -r option", 707 " -q quieter", 708 " -r reverse/decode from UTF-8 byte-string to/from Unicode", 709 " -v more verbose" 710 }; 711 size_t n; 712 for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) { 713 fprintf(stderr, "%s\n", msg[n]); 714 } 715 exit(EXIT_FAILURE); 716} 717 718/* 719 * http://www.unicode.org/versions/corrigendum1.html, table 3.1B 720 */ 721#define OkRange(n,lo,hi) \ 722 if (value[n] < lo || value[n] > hi) { \ 723 result = False; \ 724 break; \ 725 } 726static Bool 727is_legal_utf8(const Char *value) 728{ 729 Bool result = True; 730 Char ch; 731 while ((ch = *value) != '\0') { 732 if (ch <= 0x7f) { 733 ++value; 734 } else if (ch >= 0xc2 && ch <= 0xdf) { 735 OkRange(1, 0x80, 0xbf); 736 value += 2; 737 } else if (ch == 0xe0) { 738 OkRange(1, 0xa0, 0xbf); 739 OkRange(2, 0x80, 0xbf); 740 value += 3; 741 } else if (ch >= 0xe1 && ch <= 0xef) { 742 OkRange(1, 0x80, 0xbf); 743 OkRange(2, 0x80, 0xbf); 744 value += 3; 745 } else if (ch == 0xf0) { 746 OkRange(1, 0x90, 0xbf); 747 OkRange(2, 0x80, 0xbf); 748 OkRange(3, 0x80, 0xbf); 749 value += 4; 750 } else if (ch >= 0xf1 && ch <= 0xf3) { 751 OkRange(1, 0x80, 0xbf); 752 OkRange(2, 0x80, 0xbf); 753 OkRange(3, 0x80, 0xbf); 754 value += 4; 755 } else if (ch == 0xf4) { 756 OkRange(1, 0x80, 0x8f); 757 OkRange(2, 0x80, 0xbf); 758 OkRange(3, 0x80, 0xbf); 759 value += 4; 760 } else { 761 result = False; 762 break; 763 } 764 } 765 return result; 766} 767 768static void 769test_utf8_convert(void) 770{ 771 unsigned c_in, c_out; 772 Char buffer[10]; 773 Char *result; 774 unsigned limit = 0x110000; 775 unsigned success = 0; 776 unsigned bucket[256]; 777 778 memset(bucket, 0, sizeof(bucket)); 779 for (c_in = 0; c_in < limit; ++c_in) { 780 memset(buffer, 0, sizeof(buffer)); 781 if ((result = convertToUTF8(buffer, c_in)) == 0) { 782 TRACE(("conversion of U+%04X to UTF-8 failed\n", c_in)); 783 } else { 784 if ((result = convertFromUTF8(buffer, &c_out)) == 0) { 785 TRACE(("conversion of U+%04X from UTF-8 failed\n", c_in)); 786 } else if (c_in != c_out) { 787 TRACE(("conversion of U+%04X to/from UTF-8 gave U+%04X\n", 788 c_in, c_out)); 789 } else { 790 while (result-- != buffer) { 791 bucket[*result]++; 792 } 793 ++success; 794 } 795 } 796 } 797 TRACE(("%u/%u successful\n", success, limit)); 798 for (c_in = 0; c_in < 256; ++c_in) { 799 if ((c_in % 8) == 0) { 800 TRACE((" %02X:", c_in)); 801 } 802 TRACE((" %8X", bucket[c_in])); 803 if (((c_in + 1) % 8) == 0) { 804 TRACE(("\n")); 805 } 806 } 807} 808 809static int 810decode_one(const char *source, char **target) 811{ 812 int result = -1; 813 long check; 814 int radix = 0; 815 if ((source[0] == 'u' || source[0] == 'U') && source[1] == '+') { 816 source += 2; 817 radix = 16; 818 } else if (source[0] == '0' && source[1] == 'b') { 819 source += 2; 820 radix = 2; 821 } 822 check = strtol(source, target, radix); 823 if (*target != NULL && *target != source) 824 result = (int) check; 825 return result; 826} 827 828static int 829decode_range(const char *source, int *lo, int *hi) 830{ 831 int result = 0; 832 char *after1; 833 char *after2; 834 if ((*lo = decode_one(source, &after1)) >= 0) { 835 after1 += strspn(after1, ":-.\t "); 836 if ((*hi = decode_one(after1, &after2)) < 0) { 837 *hi = *lo; 838 } 839 result = 1; 840 } 841 return result; 842} 843 844#define MAX_BYTES 6 845 846static void 847do_range(const char *source) 848{ 849 int lo, hi; 850 851 TScreen screen; 852 memset(&screen, 0, sizeof(screen)); 853 854 if (decode_range(source, &lo, &hi)) { 855 while (lo <= hi) { 856 unsigned c_in = (unsigned) lo++; 857 PtyData *data; 858 Char *next; 859 Char buffer[MAX_BYTES + 1]; 860 861 if (opt_reverse) { 862 Bool skip = False; 863 Bool first = True; 864 int j, k; 865 for (j = 0; j < MAX_BYTES; ++j) { 866 unsigned long bits = ((unsigned long) c_in >> (8 * j)); 867 if ((buffer[j] = (Char) bits) == 0) { 868 skip = (bits != 0); 869 break; 870 } 871 } 872 if (skip) 873 continue; 874 initPtyData(&data); 875 for (k = 0; k <= j; ++k) { 876 data->buffer[k] = buffer[j - k - 1]; 877 } 878 if (opt_illegal && !is_legal_utf8(data->buffer)) { 879 free(data); 880 continue; 881 } 882 if (message_level > 1) { 883 printf("TEST "); 884 for (k = 0; k < j; ++k) { 885 printf("%02X", data->buffer[k]); 886 } 887 } 888 data->next = data->buffer; 889 data->last = data->buffer + j; 890 while (decodeUtf8(&screen, data)) { 891 total_test++; 892 if (data->utf_data == UCS_REPL) 893 total_errs++; 894 data->next += data->utf_size; 895 if (message_level > 1) { 896 printf("%s%04X", first ? " ->" : ", ", data->utf_data); 897 } 898 first = False; 899 } 900 if (!first) 901 total_test--; 902 if (message_level > 1) { 903 printf("\n"); 904 fflush(stdout); 905 } 906 free(data); 907 } else if (opt_convert) { 908 unsigned c_out; 909 Char *result; 910 911 memset(buffer, 0, sizeof(buffer)); 912 if ((result = next = convertToUTF8(buffer, c_in)) == 0) { 913 fprintf(stderr, 914 "conversion of U+%04X to UTF-8 failed\n", c_in); 915 } else if ((result = convertFromUTF8(buffer, &c_out)) == 0) { 916 fprintf(stderr, 917 "conversion of U+%04X from UTF-8 failed\n", c_in); 918 total_errs++; 919 } else if (c_in != c_out) { 920 fprintf(stderr, 921 "conversion of U+%04X to/from UTF-8 gave U+%04X\n", 922 c_in, c_out); 923 } else if (message_level > 1) { 924 *next = '\0'; 925 printf("TEST %04X (%lu:%s) ->%04X\n", c_in, 926 (unsigned long) (next - buffer), 927 buffer, 928 c_out); 929 fflush(stdout); 930 } 931 } else { 932 initPtyData(&data); 933 next = convertToUTF8(data->buffer, c_in); 934 *next = 0; 935 data->next = data->buffer; 936 data->last = next; 937 decodeUtf8(&screen, data); 938 if (message_level > 1) { 939 printf("TEST %04X (%lu:%s) ->%04X\n", c_in, 940 (unsigned long) (next - data->buffer), 941 data->buffer, 942 data->utf_data); 943 fflush(stdout); 944 } 945 if (c_in != data->utf_data) { 946 fprintf(stderr, "Mismatch: %04X vs %04X\n", c_in, data->utf_data); 947 total_errs++; 948 } 949 free(data); 950 } 951 total_test++; 952 } 953 } 954} 955 956int 957main(int argc, char **argv) 958{ 959 int ch; 960 961 setlocale(LC_ALL, ""); 962 while ((ch = getopt(argc, argv, "aciqrv")) != -1) { 963 switch (ch) { 964 case 'a': 965 opt_all = 1; 966 break; 967 case 'c': 968 opt_convert = 1; 969 break; 970 case 'i': 971 opt_illegal = 1; 972 break; 973 case 'q': 974 message_level--; 975 break; 976 case 'r': 977 opt_reverse = 1; 978 break; 979 case 'v': 980 message_level++; 981 break; 982 default: 983 usage(); 984 } 985 } 986 if (opt_all) { 987 test_utf8_convert(); 988 } else { 989 if (optind >= argc) 990 usage(); 991 while (optind < argc) { 992 do_range(argv[optind++]); 993 } 994 if (total_test) { 995 printf("%ld/%ld mismatches (%.0f%%)\n", 996 total_errs, 997 total_test, 998 (100.0 * (double) total_errs) / (double) total_test); 999 } 1000 } 1001 return EXIT_SUCCESS; 1002} 1003#else 1004int 1005main(int argc, char **argv) 1006{ 1007 (void) argc; 1008 (void) argv; 1009 printf("Nothing to be done here...\n"); 1010 return EXIT_SUCCESS; 1011} 1012#endif /* OPT_WIDE_CHARS */ 1013#endif 1014