ptydata.c revision 5104ee6e
1/* $XTermId: ptydata.c,v 1.163 2024/12/01 23:48:07 tom Exp $ */ 2 3/* 4 * Copyright 1999-2023,2024 by Thomas E. Dickey 5 * 6 * All Rights Reserved 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sublicense, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * The above copyright notice and this permission notice shall be included 17 * in all copies or substantial portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 23 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 * Except as contained in this notice, the name(s) of the above copyright 28 * holders shall not be used in advertising or otherwise to promote the 29 * sale, use or other dealings in this Software without prior written 30 * authorization. 31 */ 32 33#include <data.h> 34 35#if OPT_WIDE_CHARS 36#include <menu.h> 37#include <wcwidth.h> 38#endif 39 40#ifdef TEST_DRIVER 41#undef TRACE 42#define TRACE(p) if (1) printf p 43#undef TRACE2 44#define TRACE2(p) if (0) printf p 45#define visibleChars(buf, len) "buffer" 46#endif 47 48/* 49 * Check for both EAGAIN and EWOULDBLOCK, because some supposedly POSIX 50 * systems are broken and return EWOULDBLOCK when they should return EAGAIN. 51 * Note that this macro may evaluate its argument more than once. 52 */ 53#if defined(EAGAIN) && defined(EWOULDBLOCK) 54#define E_TEST(err) ((err) == EAGAIN || (err) == EWOULDBLOCK) 55#else 56#ifdef EAGAIN 57#define E_TEST(err) ((err) == EAGAIN) 58#else 59#define E_TEST(err) ((err) == EWOULDBLOCK) 60#endif 61#endif 62 63#if OPT_WIDE_CHARS 64/* 65 * Convert the 8-bit codes in data->buffer[] into Unicode in data->utf_data. 66 * The number of bytes converted will be nonzero iff there is data. 67 */ 68Bool 69decodeUtf8(TScreen *screen, PtyData *data) 70{ 71 size_t i; 72 size_t length = (size_t) (data->last - data->next); 73 int utf_count = 0; 74 unsigned utf_char = 0; 75 76 data->utf_size = 0; 77 for (i = 0; i < length; i++) { 78 unsigned c = data->next[i]; 79 80 /* Combine UTF-8 into Unicode */ 81 if (c < 0x80) { 82 /* We received an ASCII character */ 83 if (utf_count > 0) { 84 data->utf_data = UCS_REPL; /* prev. sequence incomplete */ 85 data->utf_size = i; 86 } else { 87 data->utf_data = (IChar) c; 88 data->utf_size = 1; 89 } 90 break; 91 } else if (screen->vt100_graphics 92 && (c < 0x100) 93 && (utf_count == 0) 94 && screen->gsets[(int) screen->curgr] != nrc_ASCII) { 95 data->utf_data = (IChar) c; 96 data->utf_size = 1; 97 break; 98 } else if (c < 0xc0) { 99 /* We received a continuation byte */ 100 if (utf_count < 1) { 101 if (screen->c1_printable) { 102 data->utf_data = (IChar) c; 103 } else if ((i + 1) < length 104 && data->next[i + 1] > 0x20 105 && data->next[i + 1] < 0x80) { 106 /* 107 * Allow for C1 control string if the next byte is 108 * available for inspection. 109 */ 110 data->utf_data = (IChar) c; 111 } else { 112 /* 113 * We received a continuation byte before receiving a 114 * sequence state, or a failed attempt to use a C1 control 115 * string. 116 */ 117 data->utf_data = (IChar) UCS_REPL; 118 } 119 data->utf_size = (i + 1); 120 break; 121 } else if (screen->utf8_weblike 122 && (utf_count == 3 123 && utf_char == 0x04 124 && c >= 0x90)) { 125 /* The encoding would form a code point beyond U+10FFFF. */ 126 data->utf_size = i; 127 data->utf_data = UCS_REPL; 128 break; 129 } else if (screen->utf8_weblike 130 && (utf_count == 2 131 && utf_char == 0x0d 132 && c >= 0xa0)) { 133 /* The encoding would form a surrogate code point. */ 134 data->utf_size = i; 135 data->utf_data = UCS_REPL; 136 break; 137 } else { 138 /* Check for overlong UTF-8 sequences for which a shorter 139 * encoding would exist and replace them with UCS_REPL. 140 * An overlong UTF-8 sequence can have any of the following 141 * forms: 142 * 1100000x 10xxxxxx 143 * 11100000 100xxxxx 10xxxxxx 144 * 11110000 1000xxxx 10xxxxxx 10xxxxxx 145 * 11111000 10000xxx 10xxxxxx 10xxxxxx 10xxxxxx 146 * 11111100 100000xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 147 */ 148 if (!utf_char && !((c & 0x7f) >> (7 - utf_count))) { 149 if (screen->utf8_weblike) { 150 /* overlong sequence continued */ 151 data->utf_data = UCS_REPL; 152 data->utf_size = i; 153 break; 154 } else { 155 utf_char = UCS_REPL; 156 } 157 } 158 utf_char <<= 6; 159 utf_char |= (c & 0x3f); 160 if ((utf_char >= 0xd800 && 161 utf_char <= 0xdfff) || 162 (utf_char == 0xfffe) || 163 (utf_char == HIDDEN_CHAR)) { 164 utf_char = UCS_REPL; 165 } 166 utf_count--; 167 if (utf_count == 0) { 168#if !OPT_WIDER_ICHAR 169 /* characters outside UCS-2 become UCS_REPL */ 170 if (utf_char > NARROW_ICHAR) { 171 TRACE(("using replacement for %#x\n", utf_char)); 172 utf_char = UCS_REPL; 173 } 174#endif 175 data->utf_data = (IChar) utf_char; 176 data->utf_size = (i + 1); 177 break; 178 } 179 } 180 } else { 181 /* We received a sequence start byte */ 182 if (utf_count > 0) { 183 /* previous sequence is incomplete */ 184 data->utf_data = UCS_REPL; 185 data->utf_size = i; 186 break; 187 } 188 if (screen->utf8_weblike) { 189 if (c < 0xe0) { 190 if (!(c & 0x1e)) { 191 /* overlong sequence start */ 192 data->utf_data = UCS_REPL; 193 data->utf_size = (i + 1); 194 break; 195 } 196 utf_count = 1; 197 utf_char = (c & 0x1f); 198 } else if (c < 0xf0) { 199 utf_count = 2; 200 utf_char = (c & 0x0f); 201 } else if (c < 0xf5) { 202 utf_count = 3; 203 utf_char = (c & 0x07); 204 } else { 205 data->utf_data = UCS_REPL; 206 data->utf_size = (i + 1); 207 break; 208 } 209 } else { 210 if (c < 0xe0) { 211 utf_count = 1; 212 utf_char = (c & 0x1f); 213 if (!(c & 0x1e)) { 214 /* overlong sequence */ 215 utf_char = UCS_REPL; 216 } 217 } else if (c < 0xf0) { 218 utf_count = 2; 219 utf_char = (c & 0x0f); 220 } else if (c < 0xf8) { 221 utf_count = 3; 222 utf_char = (c & 0x07); 223 } else if (c < 0xfc) { 224 utf_count = 4; 225 utf_char = (c & 0x03); 226 } else if (c < 0xfe) { 227 utf_count = 5; 228 utf_char = (c & 0x01); 229 } else { 230 data->utf_data = UCS_REPL; 231 data->utf_size = (i + 1); 232 break; 233 } 234 } 235 } 236 } 237#if OPT_TRACE > 1 238 TRACE(("UTF-8 char %04X [%lu..%lu]\n", 239 data->utf_data, 240 (unsigned long) (data->next - data->buffer), 241 (unsigned long) (data->next - data->buffer + data->utf_size - 1))); 242#endif 243 244 return (data->utf_size != 0); 245} 246#endif 247 248int 249readPtyData(XtermWidget xw, PtySelect * select_mask, PtyData *data) 250{ 251 TScreen *screen = TScreenOf(xw); 252 int size = 0; 253 254 if (FD_ISSET(screen->respond, select_mask)) { 255 int save_err; 256 trimPtyData(xw, data); 257 258 size = (int) read(screen->respond, (char *) data->last, (size_t) FRG_SIZE); 259 save_err = errno; 260#if (defined(i386) && defined(SVR4) && defined(sun)) || defined(__CYGWIN__) 261 /* 262 * Yes, I know this is a majorly f*ugly hack, however it seems to 263 * be necessary for Solaris x86. DWH 11/15/94 264 * Dunno why though.. 265 * (and now CYGWIN, alanh@xfree86.org 08/15/01 266 */ 267 if (size <= 0) { 268 if (save_err == EIO || save_err == 0) 269 NormalExit(); 270 else if (!E_TEST(save_err)) 271 Panic("input: read returned unexpected error (%d)\n", save_err); 272 size = 0; 273 } 274#else /* !f*ugly */ 275 if (size < 0) { 276 if (save_err == EIO) 277 NormalExit(); 278 else if (!E_TEST(save_err)) 279 Panic("input: read returned unexpected error (%d)\n", save_err); 280 size = 0; 281 } else if (size == 0) { 282#if defined(__FreeBSD__) 283 NormalExit(); 284#else 285 Panic("input: read returned zero\n", 0); 286#endif 287 } 288#endif /* f*ugly */ 289 } 290 291 if (size) { 292#if OPT_TRACE 293 int i; 294 295 TRACE(("read %d bytes from pty\n", size)); 296 for (i = 0; i < size; i++) { 297 if (!(i % 16)) 298 TRACE(("%s", i ? "\n " : "READ")); 299 TRACE((" %02X", data->last[i])); 300 } 301 TRACE(("\n")); 302#endif 303 data->last += size; 304#ifdef ALLOWLOGGING 305 TScreenOf(term)->logstart = VTbuffer->next; 306#endif 307 } 308 309 return (size); 310} 311 312/* 313 * Return the next value from the input buffer. Note that morePtyData() is 314 * always called before this function, so we can do the UTF-8 input conversion 315 * in that function and simply return the result here. 316 */ 317#if OPT_WIDE_CHARS 318IChar 319nextPtyData(TScreen *screen, PtyData *data) 320{ 321 IChar result; 322 if (screen->utf8_inparse) { 323 skipPtyData(data, result); 324 } else { 325 result = *((data)->next++); 326 if (!screen->output_eight_bits) { 327 result = (IChar) (result & 0x7f); 328 } 329 } 330 TRACE2(("nextPtyData returns %#x\n", result)); 331 return result; 332} 333#endif 334 335#if OPT_WIDE_CHARS 336/* 337 * Called when UTF-8 mode has been turned on/off. 338 */ 339void 340switchPtyData(TScreen *screen, int flag) 341{ 342 if (screen->utf8_mode != flag) { 343 screen->utf8_mode = flag; 344 screen->utf8_inparse = (Boolean) (flag != 0); 345 mk_wcwidth_init(screen->utf8_mode); 346 347 TRACE(("turning UTF-8 mode %s\n", BtoS(flag))); 348 update_font_utf8_mode(); 349 } 350} 351#endif 352 353/* 354 * Allocate a buffer. 355 */ 356void 357initPtyData(PtyData **result) 358{ 359 PtyData *data; 360 361 TRACE2(("initPtyData given minBufSize %d, maxBufSize %d\n", 362 FRG_SIZE, BUF_SIZE)); 363 364 if (FRG_SIZE < 64) 365 FRG_SIZE = 64; 366 if (BUF_SIZE < FRG_SIZE) 367 BUF_SIZE = FRG_SIZE; 368 if (BUF_SIZE % FRG_SIZE) 369 BUF_SIZE = BUF_SIZE + FRG_SIZE - (BUF_SIZE % FRG_SIZE); 370 371 TRACE2(("initPtyData using minBufSize %d, maxBufSize %d\n", 372 FRG_SIZE, BUF_SIZE)); 373 374 data = TypeXtMallocX(PtyData, (BUF_SIZE + FRG_SIZE)); 375 376 memset(data, 0, sizeof(*data)); 377 data->next = data->buffer; 378 data->last = data->buffer; 379 *result = data; 380} 381 382/* 383 * Initialize a buffer for the caller, using its data in 'next'. 384 */ 385#if OPT_WIDE_CHARS 386PtyData * 387fakePtyData(PtyData *result, Char *next, Char *last) 388{ 389 PtyData *data = result; 390 391 memset(data, 0, sizeof(*data)); 392 data->next = next; 393 data->last = last; 394 395 return data; 396} 397#endif 398 399/* 400 * Remove used data by shifting the buffer down, to make room for more data, 401 * e.g., a continuation-read. 402 */ 403void 404trimPtyData(XtermWidget xw, PtyData *data) 405{ 406 (void) xw; 407 FlushLog(xw); 408 409 if (data->next != data->buffer) { 410 size_t i; 411 size_t n = (size_t) (data->last - data->next); 412 413 TRACE(("shifting buffer down by %lu\n", (unsigned long) n)); 414 for (i = 0; i < n; ++i) { 415 data->buffer[i] = data->next[i]; 416 } 417 data->next = data->buffer; 418 data->last = data->next + n; 419 } 420 421} 422 423/* 424 * Insert new data into the input buffer so the next calls to morePtyData() 425 * and nextPtyData() will return that. 426 */ 427void 428fillPtyData(XtermWidget xw, PtyData *data, const char *value, size_t length) 429{ 430 size_t size; 431 size_t n; 432 433 /* remove the used portion of the buffer */ 434 trimPtyData(xw, data); 435 436 VTbuffer->last += length; 437 size = (size_t) (VTbuffer->last - VTbuffer->next); 438 439 /* shift the unused portion up to make room */ 440 for (n = size; n >= length; --n) 441 VTbuffer->next[n] = VTbuffer->next[n - length]; 442 443 /* insert the new bytes to interpret */ 444 for (n = 0; n < length; n++) 445 VTbuffer->next[n] = CharOf(value[n]); 446} 447 448#if OPT_WIDE_CHARS 449/* 450 * Convert an ISO-8859-1 code 'c' to UTF-8, storing the result in the target 451 * 'lp', and returning a pointer past the converted character. 452 */ 453Char * 454convertToUTF8(Char *lp, unsigned c) 455{ 456#define CH(n) (Char)((c) >> ((n) * 8)) 457 if (c < 0x80) { 458 /* 0******* */ 459 *lp++ = (Char) CH(0); 460 } else if (c < 0x800) { 461 /* 110***** 10****** */ 462 *lp++ = (Char) (0xc0 | (CH(0) >> 6) | ((CH(1) & 0x07) << 2)); 463 *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 464 } else if (c < 0x00010000) { 465 /* 1110**** 10****** 10****** */ 466 *lp++ = (Char) (0xe0 | ((int) (CH(1) & 0xf0) >> 4)); 467 *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 468 *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 469 } else if (c < 0x00200000) { 470 *lp++ = (Char) (0xf0 | ((int) (CH(2) & 0x1f) >> 2)); 471 *lp++ = (Char) (0x80 | 472 ((int) (CH(1) & 0xf0) >> 4) | 473 ((int) (CH(2) & 0x03) << 4)); 474 *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 475 *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 476 } else if (c < 0x04000000) { 477 *lp++ = (Char) (0xf8 | (CH(3) & 0x03)); 478 *lp++ = (Char) (0x80 | (CH(2) >> 2)); 479 *lp++ = (Char) (0x80 | 480 ((int) (CH(1) & 0xf0) >> 4) | 481 ((int) (CH(2) & 0x03) << 4)); 482 *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 483 *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 484 } else { 485 *lp++ = (Char) (0xfc | ((int) (CH(3) & 0x40) >> 6)); 486 *lp++ = (Char) (0x80 | (CH(3) & 0x3f)); 487 *lp++ = (Char) (0x80 | (CH(2) >> 2)); 488 *lp++ = (Char) (0x80 | (CH(1) >> 4) | ((CH(2) & 0x03) << 4)); 489 *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 490 *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 491 } 492 return lp; 493#undef CH 494} 495 496/* 497 * Convert a UTF-8 multibyte character to an Unicode value, returning a pointer 498 * past the converted UTF-8 input. The first 256 values align with ISO-8859-1, 499 * making it possible to use this to convert to Latin-1. 500 * 501 * If the conversion fails, return null. 502 */ 503Char * 504convertFromUTF8(Char *lp, unsigned *cp) 505{ 506 int want; 507 508 /* 509 * Find the number of bytes we will need from the source. 510 */ 511 if ((*lp & 0x80) == 0) { 512 want = 1; 513 } else if ((*lp & 0xe0) == 0xc0) { 514 want = 2; 515 } else if ((*lp & 0xf0) == 0xe0) { 516 want = 3; 517 } else if ((*lp & 0xf8) == 0xf0) { 518 want = 4; 519 } else if ((*lp & 0xfc) == 0xf8) { 520 want = 5; 521 } else if ((*lp & 0xfe) == 0xfc) { 522 want = 6; 523 } else { 524 want = 0; 525 } 526 527 if (want) { 528 int have = 1; 529 530 while (lp[have] != '\0') { 531 if ((lp[have] & 0xc0) != 0x80) 532 break; 533 ++have; 534 } 535 if (want == have) { 536 unsigned mask = 0; 537 int j; 538 int shift = 0; 539 540 *cp = 0; 541 switch (want) { 542 case 1: 543 mask = (*lp); 544 break; 545 case 2: 546 mask = (*lp & 0x1f); 547 break; 548 case 3: 549 mask = (*lp & 0x0f); 550 break; 551 case 4: 552 mask = (*lp & 0x07); 553 break; 554 case 5: 555 mask = (*lp & 0x03); 556 break; 557 case 6: 558 mask = (*lp & 0x01); 559 break; 560 default: 561 mask = 0; 562 break; 563 } 564 565 for (j = 1; j < want; j++) { 566 *cp |= (unsigned) ((lp[want - j] & 0x3f) << shift); 567 shift += 6; 568 } 569 *cp |= mask << shift; 570 lp += want; 571 } else { 572 *cp = BAD_ASCII; 573 lp = NULL; 574 } 575 } else { 576 *cp = BAD_ASCII; 577 lp = NULL; 578 } 579 return lp; 580} 581 582/* 583 * Returns true if the entire string is valid UTF-8. 584 */ 585Boolean 586isValidUTF8(Char *lp) 587{ 588 Boolean result = True; 589 while (*lp) { 590 unsigned ch; 591 Char *next = convertFromUTF8(lp, &ch); 592 if (next == NULL || ch == 0) { 593 result = False; 594 break; 595 } 596 lp = next; 597 } 598 return result; 599} 600 601/* 602 * Write data back to the PTY 603 */ 604void 605writePtyData(int f, IChar *d, size_t len) 606{ 607 size_t n = (len << 1); 608 609 if (VTbuffer->write_len <= len) { 610 VTbuffer->write_len = n; 611 VTbuffer->write_buf = realloc(VTbuffer->write_buf, VTbuffer->write_len); 612 } 613 614 for (n = 0; n < len; n++) 615 VTbuffer->write_buf[n] = (Char) d[n]; 616 617 TRACE(("writePtyData %lu:%s\n", (unsigned long) n, 618 visibleChars(VTbuffer->write_buf, n))); 619 v_write(f, VTbuffer->write_buf, n); 620} 621#endif /* OPT_WIDE_CHARS */ 622 623#ifdef NO_LEAKS 624void 625noleaks_ptydata(void) 626{ 627 if (VTbuffer != NULL) { 628#if OPT_WIDE_CHARS 629 free(VTbuffer->write_buf); 630#endif 631 FreeAndNull(VTbuffer); 632 } 633} 634#endif 635 636#ifdef TEST_DRIVER 637 638#include "data.c" 639 640void 641NormalExit(void) 642{ 643 fprintf(stderr, "NormalExit!\n"); 644 exit(EXIT_SUCCESS); 645} 646 647void 648Panic(const char *s, int a) 649{ 650 (void) s; 651 (void) a; 652 fprintf(stderr, "Panic!\n"); 653 exit(EXIT_FAILURE); 654} 655 656#if OPT_WIDE_CHARS 657 658#ifdef ALLOWLOGGING 659void 660FlushLog(XtermWidget xw) 661{ 662 (void) xw; 663} 664#endif 665 666void 667v_write(int f, const Char *data, size_t len) 668{ 669 (void) f; 670 (void) data; 671 (void) len; 672} 673 674void 675mk_wcwidth_init(int mode) 676{ 677 (void) mode; 678} 679 680void 681update_font_utf8_mode(void) 682{ 683} 684 685static int message_level = 0; 686static int opt_all = 0; 687static int opt_illegal = 0; 688static int opt_convert = 0; 689static int opt_reverse = 0; 690static long total_test = 0; 691static long total_errs = 0; 692 693static void 694usage(void) 695{ 696 static const char *msg[] = 697 { 698 "Usage: test_ptydata [options] [c1[-c1b] [c2-[c2b] [...]]]", 699 "", 700 "Options:", 701 " -a exercise all legal encode/decode to/from UTF-8", 702 " -c call convertFromUTF8 rather than decodeUTF8", 703 " -i ignore illegal UTF-8 when testing -r option", 704 " -q quieter", 705 " -r reverse/decode from UTF-8 byte-string to/from Unicode", 706 " -v more verbose" 707 }; 708 size_t n; 709 for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) { 710 fprintf(stderr, "%s\n", msg[n]); 711 } 712 exit(EXIT_FAILURE); 713} 714 715/* 716 * http://www.unicode.org/versions/corrigendum1.html, table 3.1B 717 */ 718#define OkRange(n,lo,hi) \ 719 if (value[n] < lo || value[n] > hi) { \ 720 result = False; \ 721 break; \ 722 } 723static Bool 724is_legal_utf8(const Char *value) 725{ 726 Bool result = True; 727 Char ch; 728 while ((ch = *value) != '\0') { 729 if (ch <= 0x7f) { 730 ++value; 731 } else if (ch >= 0xc2 && ch <= 0xdf) { 732 OkRange(1, 0x80, 0xbf); 733 value += 2; 734 } else if (ch == 0xe0) { 735 OkRange(1, 0xa0, 0xbf); 736 OkRange(2, 0x80, 0xbf); 737 value += 3; 738 } else if (ch >= 0xe1 && ch <= 0xef) { 739 OkRange(1, 0x80, 0xbf); 740 OkRange(2, 0x80, 0xbf); 741 value += 3; 742 } else if (ch == 0xf0) { 743 OkRange(1, 0x90, 0xbf); 744 OkRange(2, 0x80, 0xbf); 745 OkRange(3, 0x80, 0xbf); 746 value += 4; 747 } else if (ch >= 0xf1 && ch <= 0xf3) { 748 OkRange(1, 0x80, 0xbf); 749 OkRange(2, 0x80, 0xbf); 750 OkRange(3, 0x80, 0xbf); 751 value += 4; 752 } else if (ch == 0xf4) { 753 OkRange(1, 0x80, 0x8f); 754 OkRange(2, 0x80, 0xbf); 755 OkRange(3, 0x80, 0xbf); 756 value += 4; 757 } else { 758 result = False; 759 break; 760 } 761 } 762 return result; 763} 764 765static void 766test_utf8_convert(void) 767{ 768 unsigned c_in, c_out; 769 Char buffer[10]; 770 Char *result; 771 unsigned limit = 0x110000; 772 unsigned success = 0; 773 unsigned bucket[256]; 774 775 memset(bucket, 0, sizeof(bucket)); 776 for (c_in = 0; c_in < limit; ++c_in) { 777 memset(buffer, 0, sizeof(buffer)); 778 if ((result = convertToUTF8(buffer, c_in)) == NULL) { 779 TRACE(("conversion of U+%04X to UTF-8 failed\n", c_in)); 780 } else { 781 if ((result = convertFromUTF8(buffer, &c_out)) == NULL) { 782 TRACE(("conversion of U+%04X from UTF-8 failed\n", c_in)); 783 } else if (c_in != c_out) { 784 TRACE(("conversion of U+%04X to/from UTF-8 gave U+%04X\n", 785 c_in, c_out)); 786 } else { 787 while (result-- != buffer) { 788 bucket[*result]++; 789 } 790 ++success; 791 } 792 } 793 } 794 TRACE(("%u/%u successful\n", success, limit)); 795 for (c_in = 0; c_in < 256; ++c_in) { 796 if ((c_in % 8) == 0) { 797 TRACE((" %02X:", c_in)); 798 } 799 TRACE((" %8X", bucket[c_in])); 800 if (((c_in + 1) % 8) == 0) { 801 TRACE(("\n")); 802 } 803 } 804} 805 806static int 807decode_one(const char *source, char **target) 808{ 809 int result = -1; 810 long check; 811 int radix = 0; 812 if ((source[0] == 'u' || source[0] == 'U') && source[1] == '+') { 813 source += 2; 814 radix = 16; 815 } else if (source[0] == '0' && source[1] == 'b') { 816 source += 2; 817 radix = 2; 818 } 819 check = strtol(source, target, radix); 820 if (*target != NULL && *target != source) 821 result = (int) check; 822 return result; 823} 824 825static int 826decode_range(const char *source, int *lo, int *hi) 827{ 828 int result = 0; 829 char *after1; 830 char *after2; 831 if ((*lo = decode_one(source, &after1)) >= 0) { 832 after1 += strspn(after1, ":-.\t "); 833 if ((*hi = decode_one(after1, &after2)) < 0) { 834 *hi = *lo; 835 } 836 result = 1; 837 } 838 return result; 839} 840 841#define MAX_BYTES 6 842 843static void 844do_range(const char *source) 845{ 846 int lo, hi; 847 848 TScreen screen; 849 memset(&screen, 0, sizeof(screen)); 850 851 if (decode_range(source, &lo, &hi)) { 852 while (lo <= hi) { 853 unsigned c_in = (unsigned) lo++; 854 PtyData *data; 855 Char *next; 856 Char buffer[MAX_BYTES + 1]; 857 858 if (opt_reverse) { 859 Bool skip = False; 860 Bool first = True; 861 int j, k; 862 for (j = 0; j < MAX_BYTES; ++j) { 863 unsigned long bits = ((unsigned long) c_in >> (8 * j)); 864 if ((buffer[j] = (Char) bits) == 0) { 865 skip = (bits != 0); 866 break; 867 } 868 } 869 if (skip) 870 continue; 871 initPtyData(&data); 872 for (k = 0; k <= j; ++k) { 873 data->buffer[k] = buffer[j - k - 1]; 874 } 875 if (opt_illegal && !is_legal_utf8(data->buffer)) { 876 free(data); 877 continue; 878 } 879 if (message_level > 1) { 880 printf("TEST "); 881 for (k = 0; k < j; ++k) { 882 printf("%02X", data->buffer[k]); 883 } 884 } 885 data->next = data->buffer; 886 data->last = data->buffer + j; 887 while (decodeUtf8(&screen, data)) { 888 total_test++; 889 if (is_UCS_SPECIAL(data->utf_data)) 890 total_errs++; 891 data->next += data->utf_size; 892 if (message_level > 1) { 893 printf("%s%04X", first ? " ->" : ", ", data->utf_data); 894 } 895 first = False; 896 } 897 if (!first) 898 total_test--; 899 if (message_level > 1) { 900 printf("\n"); 901 fflush(stdout); 902 } 903 free(data); 904 } else if (opt_convert) { 905 unsigned c_out; 906 Char *result; 907 908 memset(buffer, 0, sizeof(buffer)); 909 if ((result = next = convertToUTF8(buffer, c_in)) == NULL) { 910 fprintf(stderr, 911 "conversion of U+%04X to UTF-8 failed\n", c_in); 912 } else if ((result = convertFromUTF8(buffer, &c_out)) == NULL) { 913 fprintf(stderr, 914 "conversion of U+%04X from UTF-8 failed\n", c_in); 915 total_errs++; 916 } else if (c_in != c_out) { 917 fprintf(stderr, 918 "conversion of U+%04X to/from UTF-8 gave U+%04X\n", 919 c_in, c_out); 920 } else if (message_level > 1) { 921 *next = '\0'; 922 printf("TEST %04X (%lu:%s) ->%04X\n", c_in, 923 (unsigned long) (next - buffer), 924 buffer, 925 c_out); 926 fflush(stdout); 927 } 928 } else { 929 initPtyData(&data); 930 next = convertToUTF8(data->buffer, c_in); 931 *next = 0; 932 data->next = data->buffer; 933 data->last = next; 934 decodeUtf8(&screen, data); 935 if (message_level > 1) { 936 printf("TEST %04X (%lu:%s) ->%04X\n", c_in, 937 (unsigned long) (next - data->buffer), 938 data->buffer, 939 data->utf_data); 940 fflush(stdout); 941 } 942 if (c_in != data->utf_data) { 943 fprintf(stderr, "Mismatch: %04X vs %04X\n", c_in, data->utf_data); 944 total_errs++; 945 } 946 free(data); 947 } 948 total_test++; 949 } 950 } 951} 952 953int 954main(int argc, char **argv) 955{ 956 int ch; 957 958 setlocale(LC_ALL, ""); 959 while ((ch = getopt(argc, argv, "aciqrv")) != -1) { 960 switch (ch) { 961 case 'a': 962 opt_all = 1; 963 break; 964 case 'c': 965 opt_convert = 1; 966 break; 967 case 'i': 968 opt_illegal = 1; 969 break; 970 case 'q': 971 message_level--; 972 break; 973 case 'r': 974 opt_reverse = 1; 975 break; 976 case 'v': 977 message_level++; 978 break; 979 default: 980 usage(); 981 } 982 } 983 if (opt_all) { 984 test_utf8_convert(); 985 } else { 986 if (optind >= argc) 987 usage(); 988 while (optind < argc) { 989 do_range(argv[optind++]); 990 } 991 if (total_test) { 992 printf("%ld/%ld mismatches (%.0f%%)\n", 993 total_errs, 994 total_test, 995 (100.0 * (double) total_errs) / (double) total_test); 996 } 997 } 998 return EXIT_SUCCESS; 999} 1000#else 1001int 1002main(int argc, char **argv) 1003{ 1004 (void) argc; 1005 (void) argv; 1006 printf("Nothing to be done here...\n"); 1007 return EXIT_SUCCESS; 1008} 1009#endif /* OPT_WIDE_CHARS */ 1010#endif 1011