ptydata.c revision 04b94745
104b94745Smrg/* $XTermId: ptydata.c,v 1.160 2024/05/10 22:54:17 tom Exp $ */ 2d522f475Smrg 30bd37d32Smrg/* 404b94745Smrg * Copyright 1999-2023,2024 by Thomas E. Dickey 50bd37d32Smrg * 60bd37d32Smrg * All Rights Reserved 70bd37d32Smrg * 80bd37d32Smrg * Permission is hereby granted, free of charge, to any person obtaining a 90bd37d32Smrg * copy of this software and associated documentation files (the 100bd37d32Smrg * "Software"), to deal in the Software without restriction, including 110bd37d32Smrg * without limitation the rights to use, copy, modify, merge, publish, 120bd37d32Smrg * distribute, sublicense, and/or sell copies of the Software, and to 130bd37d32Smrg * permit persons to whom the Software is furnished to do so, subject to 140bd37d32Smrg * the following conditions: 150bd37d32Smrg * 160bd37d32Smrg * The above copyright notice and this permission notice shall be included 170bd37d32Smrg * in all copies or substantial portions of the Software. 180bd37d32Smrg * 190bd37d32Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 200bd37d32Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 210bd37d32Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 220bd37d32Smrg * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 230bd37d32Smrg * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 240bd37d32Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 250bd37d32Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 260bd37d32Smrg * 270bd37d32Smrg * Except as contained in this notice, the name(s) of the above copyright 280bd37d32Smrg * holders shall not be used in advertising or otherwise to promote the 290bd37d32Smrg * sale, use or other dealings in this Software without prior written 300bd37d32Smrg * authorization. 310bd37d32Smrg */ 32d522f475Smrg 33d522f475Smrg#include <data.h> 34d522f475Smrg 35d522f475Smrg#if OPT_WIDE_CHARS 36d522f475Smrg#include <menu.h> 37913cc679Smrg#include <wcwidth.h> 38d522f475Smrg#endif 39d522f475Smrg 40f2e35a3aSmrg#ifdef TEST_DRIVER 41f2e35a3aSmrg#undef TRACE 42f2e35a3aSmrg#define TRACE(p) if (1) printf p 43f2e35a3aSmrg#undef TRACE2 44f2e35a3aSmrg#define TRACE2(p) if (0) printf p 45f2e35a3aSmrg#define visibleChars(buf, len) "buffer" 46f2e35a3aSmrg#endif 47f2e35a3aSmrg 48d522f475Smrg/* 49d522f475Smrg * Check for both EAGAIN and EWOULDBLOCK, because some supposedly POSIX 50d522f475Smrg * systems are broken and return EWOULDBLOCK when they should return EAGAIN. 51d522f475Smrg * Note that this macro may evaluate its argument more than once. 52d522f475Smrg */ 53d522f475Smrg#if defined(EAGAIN) && defined(EWOULDBLOCK) 54d522f475Smrg#define E_TEST(err) ((err) == EAGAIN || (err) == EWOULDBLOCK) 55d522f475Smrg#else 56d522f475Smrg#ifdef EAGAIN 57d522f475Smrg#define E_TEST(err) ((err) == EAGAIN) 58d522f475Smrg#else 59d522f475Smrg#define E_TEST(err) ((err) == EWOULDBLOCK) 60d522f475Smrg#endif 61d522f475Smrg#endif 62d522f475Smrg 63d522f475Smrg#if OPT_WIDE_CHARS 64d522f475Smrg/* 65d522f475Smrg * Convert the 8-bit codes in data->buffer[] into Unicode in data->utf_data. 66d522f475Smrg * The number of bytes converted will be nonzero iff there is data. 67d522f475Smrg */ 68d522f475SmrgBool 69894e0ac8SmrgdecodeUtf8(TScreen *screen, PtyData *data) 70d522f475Smrg{ 715307cd1aSmrg size_t i; 725307cd1aSmrg size_t length = (size_t) (data->last - data->next); 73d522f475Smrg int utf_count = 0; 74956cc18dSsnj unsigned utf_char = 0; 75d522f475Smrg 76d522f475Smrg data->utf_size = 0; 77d522f475Smrg for (i = 0; i < length; i++) { 78d522f475Smrg unsigned c = data->next[i]; 79d522f475Smrg 80d522f475Smrg /* Combine UTF-8 into Unicode */ 81d522f475Smrg if (c < 0x80) { 82d522f475Smrg /* We received an ASCII character */ 83d522f475Smrg if (utf_count > 0) { 84d522f475Smrg data->utf_data = UCS_REPL; /* prev. sequence incomplete */ 85a1f3da82Smrg data->utf_size = i; 86d522f475Smrg } else { 87956cc18dSsnj data->utf_data = (IChar) c; 88d522f475Smrg data->utf_size = 1; 89d522f475Smrg } 90d522f475Smrg break; 91f2e35a3aSmrg } else if (screen->vt100_graphics 92f2e35a3aSmrg && (c < 0x100) 93f2e35a3aSmrg && (utf_count == 0) 94f2e35a3aSmrg && screen->gsets[(int) screen->curgr] != nrc_ASCII) { 95f2e35a3aSmrg data->utf_data = (IChar) c; 96f2e35a3aSmrg data->utf_size = 1; 97f2e35a3aSmrg break; 98d522f475Smrg } else if (c < 0xc0) { 99d522f475Smrg /* We received a continuation byte */ 100d522f475Smrg if (utf_count < 1) { 10104b94745Smrg if (screen->c1_printable) { 10204b94745Smrg data->utf_data = (IChar) c; 10304b94745Smrg } else if ((i + 1) < length 10404b94745Smrg && data->next[i + 1] > 0x20 10504b94745Smrg && data->next[i + 1] < 0x80) { 10604b94745Smrg /* 10704b94745Smrg * Allow for C1 control string if the next byte is 10804b94745Smrg * available for inspection. 10904b94745Smrg */ 11004b94745Smrg data->utf_data = (IChar) c; 11104b94745Smrg } else { 11204b94745Smrg /* 11304b94745Smrg * We received a continuation byte before receiving a 11404b94745Smrg * sequence state, or a failed attempt to use a C1 control 11504b94745Smrg * string. 11604b94745Smrg */ 11704b94745Smrg data->utf_data = (IChar) UCS_REPL; 11804b94745Smrg } 119d522f475Smrg data->utf_size = (i + 1); 120d522f475Smrg break; 121f2e35a3aSmrg } else if (screen->utf8_weblike 122f2e35a3aSmrg && (utf_count == 3 123f2e35a3aSmrg && utf_char == 0x04 124f2e35a3aSmrg && c >= 0x90)) { 125f2e35a3aSmrg /* The encoding would form a code point beyond U+10FFFF. */ 126f2e35a3aSmrg data->utf_size = i; 127f2e35a3aSmrg data->utf_data = UCS_REPL; 128f2e35a3aSmrg break; 129f2e35a3aSmrg } else if (screen->utf8_weblike 130f2e35a3aSmrg && (utf_count == 2 131f2e35a3aSmrg && utf_char == 0x0d 132f2e35a3aSmrg && c >= 0xa0)) { 133f2e35a3aSmrg /* The encoding would form a surrogate code point. */ 134f2e35a3aSmrg data->utf_size = i; 135f2e35a3aSmrg data->utf_data = UCS_REPL; 136f2e35a3aSmrg break; 137d522f475Smrg } else { 138d522f475Smrg /* Check for overlong UTF-8 sequences for which a shorter 139d522f475Smrg * encoding would exist and replace them with UCS_REPL. 140d522f475Smrg * An overlong UTF-8 sequence can have any of the following 141d522f475Smrg * forms: 142d522f475Smrg * 1100000x 10xxxxxx 143d522f475Smrg * 11100000 100xxxxx 10xxxxxx 144d522f475Smrg * 11110000 1000xxxx 10xxxxxx 10xxxxxx 145d522f475Smrg * 11111000 10000xxx 10xxxxxx 10xxxxxx 10xxxxxx 146d522f475Smrg * 11111100 100000xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 147d522f475Smrg */ 148d522f475Smrg if (!utf_char && !((c & 0x7f) >> (7 - utf_count))) { 149f2e35a3aSmrg if (screen->utf8_weblike) { 150f2e35a3aSmrg /* overlong sequence continued */ 151f2e35a3aSmrg data->utf_data = UCS_REPL; 152f2e35a3aSmrg data->utf_size = i; 153f2e35a3aSmrg break; 154f2e35a3aSmrg } else { 155f2e35a3aSmrg utf_char = UCS_REPL; 156f2e35a3aSmrg } 157d522f475Smrg } 158d522f475Smrg utf_char <<= 6; 159d522f475Smrg utf_char |= (c & 0x3f); 160d522f475Smrg if ((utf_char >= 0xd800 && 161d522f475Smrg utf_char <= 0xdfff) || 162d522f475Smrg (utf_char == 0xfffe) || 163d522f475Smrg (utf_char == HIDDEN_CHAR)) { 164d522f475Smrg utf_char = UCS_REPL; 165d522f475Smrg } 166d522f475Smrg utf_count--; 167d522f475Smrg if (utf_count == 0) { 168956cc18dSsnj#if !OPT_WIDER_ICHAR 169d522f475Smrg /* characters outside UCS-2 become UCS_REPL */ 170f2e35a3aSmrg if (utf_char > NARROW_ICHAR) { 171d522f475Smrg TRACE(("using replacement for %#x\n", utf_char)); 172d522f475Smrg utf_char = UCS_REPL; 173d522f475Smrg } 174956cc18dSsnj#endif 175956cc18dSsnj data->utf_data = (IChar) utf_char; 176d522f475Smrg data->utf_size = (i + 1); 177d522f475Smrg break; 178d522f475Smrg } 179d522f475Smrg } 180d522f475Smrg } else { 181d522f475Smrg /* We received a sequence start byte */ 182d522f475Smrg if (utf_count > 0) { 183f2e35a3aSmrg /* previous sequence is incomplete */ 184f2e35a3aSmrg data->utf_data = UCS_REPL; 185f2e35a3aSmrg data->utf_size = i; 186d522f475Smrg break; 187d522f475Smrg } 188f2e35a3aSmrg if (screen->utf8_weblike) { 189f2e35a3aSmrg if (c < 0xe0) { 190f2e35a3aSmrg if (!(c & 0x1e)) { 191f2e35a3aSmrg /* overlong sequence start */ 192f2e35a3aSmrg data->utf_data = UCS_REPL; 193f2e35a3aSmrg data->utf_size = (i + 1); 194f2e35a3aSmrg break; 195f2e35a3aSmrg } 196f2e35a3aSmrg utf_count = 1; 197f2e35a3aSmrg utf_char = (c & 0x1f); 198f2e35a3aSmrg } else if (c < 0xf0) { 199f2e35a3aSmrg utf_count = 2; 200f2e35a3aSmrg utf_char = (c & 0x0f); 201f2e35a3aSmrg } else if (c < 0xf5) { 202f2e35a3aSmrg utf_count = 3; 203f2e35a3aSmrg utf_char = (c & 0x07); 204f2e35a3aSmrg } else { 205f2e35a3aSmrg data->utf_data = UCS_REPL; 206f2e35a3aSmrg data->utf_size = (i + 1); 207f2e35a3aSmrg break; 208a1f3da82Smrg } 209d522f475Smrg } else { 210f2e35a3aSmrg if (c < 0xe0) { 211f2e35a3aSmrg utf_count = 1; 212f2e35a3aSmrg utf_char = (c & 0x1f); 213f2e35a3aSmrg if (!(c & 0x1e)) { 214f2e35a3aSmrg /* overlong sequence */ 215f2e35a3aSmrg utf_char = UCS_REPL; 216f2e35a3aSmrg } 217f2e35a3aSmrg } else if (c < 0xf0) { 218f2e35a3aSmrg utf_count = 2; 219f2e35a3aSmrg utf_char = (c & 0x0f); 220f2e35a3aSmrg } else if (c < 0xf8) { 221f2e35a3aSmrg utf_count = 3; 222f2e35a3aSmrg utf_char = (c & 0x07); 223f2e35a3aSmrg } else if (c < 0xfc) { 224f2e35a3aSmrg utf_count = 4; 225f2e35a3aSmrg utf_char = (c & 0x03); 226f2e35a3aSmrg } else if (c < 0xfe) { 227f2e35a3aSmrg utf_count = 5; 228f2e35a3aSmrg utf_char = (c & 0x01); 229f2e35a3aSmrg } else { 230f2e35a3aSmrg data->utf_data = UCS_REPL; 231f2e35a3aSmrg data->utf_size = (i + 1); 232f2e35a3aSmrg break; 233f2e35a3aSmrg } 234d522f475Smrg } 235d522f475Smrg } 236d522f475Smrg } 237d522f475Smrg#if OPT_TRACE > 1 23804b94745Smrg TRACE(("UTF-8 char %04X [%lu..%lu]\n", 239d522f475Smrg data->utf_data, 24004b94745Smrg (unsigned long) (data->next - data->buffer), 24104b94745Smrg (unsigned long) (data->next - data->buffer + data->utf_size - 1))); 242d522f475Smrg#endif 243d522f475Smrg 244d522f475Smrg return (data->utf_size != 0); 245d522f475Smrg} 246d522f475Smrg#endif 247d522f475Smrg 248d522f475Smrgint 249894e0ac8SmrgreadPtyData(XtermWidget xw, PtySelect * select_mask, PtyData *data) 250d522f475Smrg{ 25120d2c4d2Smrg TScreen *screen = TScreenOf(xw); 252d522f475Smrg int size = 0; 253d522f475Smrg 254d522f475Smrg#ifdef VMS 255d522f475Smrg if (*select_mask & pty_mask) { 25620d2c4d2Smrg trimPtyData(xw, data); 257d522f475Smrg if (read_queue.flink != 0) { 258d522f475Smrg size = tt_read(data->next); 259d522f475Smrg if (size == 0) { 260d522f475Smrg Panic("input: read returned zero\n", 0); 261d522f475Smrg } 262d522f475Smrg } else { 263d522f475Smrg sys$hiber(); 264d522f475Smrg } 265d522f475Smrg } 266d522f475Smrg#else /* !VMS */ 267d522f475Smrg if (FD_ISSET(screen->respond, select_mask)) { 268956cc18dSsnj int save_err; 26920d2c4d2Smrg trimPtyData(xw, data); 270d522f475Smrg 27120d2c4d2Smrg size = (int) read(screen->respond, (char *) data->last, (size_t) FRG_SIZE); 272956cc18dSsnj save_err = errno; 273d522f475Smrg#if (defined(i386) && defined(SVR4) && defined(sun)) || defined(__CYGWIN__) 274956cc18dSsnj /* 275956cc18dSsnj * Yes, I know this is a majorly f*ugly hack, however it seems to 276956cc18dSsnj * be necessary for Solaris x86. DWH 11/15/94 277956cc18dSsnj * Dunno why though.. 278956cc18dSsnj * (and now CYGWIN, alanh@xfree86.org 08/15/01 279956cc18dSsnj */ 280956cc18dSsnj if (size <= 0) { 281956cc18dSsnj if (save_err == EIO || save_err == 0) 2820bd37d32Smrg NormalExit(); 283956cc18dSsnj else if (!E_TEST(save_err)) 284956cc18dSsnj Panic("input: read returned unexpected error (%d)\n", save_err); 285956cc18dSsnj size = 0; 286956cc18dSsnj } 287956cc18dSsnj#else /* !f*ugly */ 288956cc18dSsnj if (size < 0) { 289956cc18dSsnj if (save_err == EIO) 2900bd37d32Smrg NormalExit(); 291956cc18dSsnj else if (!E_TEST(save_err)) 292956cc18dSsnj Panic("input: read returned unexpected error (%d)\n", save_err); 293d522f475Smrg size = 0; 294d522f475Smrg } else if (size == 0) { 2950bd37d32Smrg#if defined(__FreeBSD__) 2960bd37d32Smrg NormalExit(); 297d522f475Smrg#else 298d522f475Smrg Panic("input: read returned zero\n", 0); 299d522f475Smrg#endif 300d522f475Smrg } 301956cc18dSsnj#endif /* f*ugly */ 302d522f475Smrg } 303d522f475Smrg#endif /* VMS */ 304d522f475Smrg 305d522f475Smrg if (size) { 306d522f475Smrg#if OPT_TRACE 307d522f475Smrg int i; 308d522f475Smrg 309d522f475Smrg TRACE(("read %d bytes from pty\n", size)); 310d522f475Smrg for (i = 0; i < size; i++) { 311d522f475Smrg if (!(i % 16)) 312d522f475Smrg TRACE(("%s", i ? "\n " : "READ")); 313d522f475Smrg TRACE((" %02X", data->last[i])); 314d522f475Smrg } 315d522f475Smrg TRACE(("\n")); 316d522f475Smrg#endif 317d522f475Smrg data->last += size; 318d522f475Smrg#ifdef ALLOWLOGGING 31920d2c4d2Smrg TScreenOf(term)->logstart = VTbuffer->next; 320d522f475Smrg#endif 321d522f475Smrg } 322d522f475Smrg 323d522f475Smrg return (size); 324d522f475Smrg} 325d522f475Smrg 326d522f475Smrg/* 327d522f475Smrg * Return the next value from the input buffer. Note that morePtyData() is 328d522f475Smrg * always called before this function, so we can do the UTF-8 input conversion 329d522f475Smrg * in that function and simply return the result here. 330d522f475Smrg */ 331d522f475Smrg#if OPT_WIDE_CHARS 332d522f475SmrgIChar 333894e0ac8SmrgnextPtyData(TScreen *screen, PtyData *data) 334d522f475Smrg{ 335d522f475Smrg IChar result; 336d522f475Smrg if (screen->utf8_inparse) { 337f2e35a3aSmrg skipPtyData(data, result); 338d522f475Smrg } else { 339d522f475Smrg result = *((data)->next++); 340956cc18dSsnj if (!screen->output_eight_bits) { 341956cc18dSsnj result = (IChar) (result & 0x7f); 342956cc18dSsnj } 343d522f475Smrg } 344d522f475Smrg TRACE2(("nextPtyData returns %#x\n", result)); 345d522f475Smrg return result; 346d522f475Smrg} 347d522f475Smrg#endif 348d522f475Smrg 349d522f475Smrg#if OPT_WIDE_CHARS 350d522f475Smrg/* 351d522f475Smrg * Called when UTF-8 mode has been turned on/off. 352d522f475Smrg */ 353d522f475Smrgvoid 354894e0ac8SmrgswitchPtyData(TScreen *screen, int flag) 355d522f475Smrg{ 356d522f475Smrg if (screen->utf8_mode != flag) { 357d522f475Smrg screen->utf8_mode = flag; 358956cc18dSsnj screen->utf8_inparse = (Boolean) (flag != 0); 359913cc679Smrg mk_wcwidth_init(screen->utf8_mode); 360d522f475Smrg 361d522f475Smrg TRACE(("turning UTF-8 mode %s\n", BtoS(flag))); 362d522f475Smrg update_font_utf8_mode(); 363d522f475Smrg } 364d522f475Smrg} 365d522f475Smrg#endif 366d522f475Smrg 367d522f475Smrg/* 368d522f475Smrg * Allocate a buffer. 369d522f475Smrg */ 370d522f475Smrgvoid 371894e0ac8SmrginitPtyData(PtyData **result) 372d522f475Smrg{ 373d522f475Smrg PtyData *data; 374d522f475Smrg 375f2e35a3aSmrg TRACE2(("initPtyData given minBufSize %d, maxBufSize %d\n", 376f2e35a3aSmrg FRG_SIZE, BUF_SIZE)); 377d522f475Smrg 378d522f475Smrg if (FRG_SIZE < 64) 379d522f475Smrg FRG_SIZE = 64; 380d522f475Smrg if (BUF_SIZE < FRG_SIZE) 381d522f475Smrg BUF_SIZE = FRG_SIZE; 382d522f475Smrg if (BUF_SIZE % FRG_SIZE) 383d522f475Smrg BUF_SIZE = BUF_SIZE + FRG_SIZE - (BUF_SIZE % FRG_SIZE); 384d522f475Smrg 385f2e35a3aSmrg TRACE2(("initPtyData using minBufSize %d, maxBufSize %d\n", 386f2e35a3aSmrg FRG_SIZE, BUF_SIZE)); 387d522f475Smrg 388a1f3da82Smrg data = TypeXtMallocX(PtyData, (BUF_SIZE + FRG_SIZE)); 389d522f475Smrg 390d522f475Smrg memset(data, 0, sizeof(*data)); 391d522f475Smrg data->next = data->buffer; 392d522f475Smrg data->last = data->buffer; 393d522f475Smrg *result = data; 394d522f475Smrg} 395d522f475Smrg 396d522f475Smrg/* 39720d2c4d2Smrg * Initialize a buffer for the caller, using its data in 'next'. 398d522f475Smrg */ 399d522f475Smrg#if OPT_WIDE_CHARS 400d522f475SmrgPtyData * 401894e0ac8SmrgfakePtyData(PtyData *result, Char *next, Char *last) 402d522f475Smrg{ 403d522f475Smrg PtyData *data = result; 404d522f475Smrg 405d522f475Smrg memset(data, 0, sizeof(*data)); 406d522f475Smrg data->next = next; 407d522f475Smrg data->last = last; 408d522f475Smrg 409d522f475Smrg return data; 410d522f475Smrg} 411d522f475Smrg#endif 412d522f475Smrg 413d522f475Smrg/* 414d522f475Smrg * Remove used data by shifting the buffer down, to make room for more data, 415d522f475Smrg * e.g., a continuation-read. 416d522f475Smrg */ 417d522f475Smrgvoid 418f2e35a3aSmrgtrimPtyData(XtermWidget xw, PtyData *data) 419d522f475Smrg{ 420f2e35a3aSmrg (void) xw; 42120d2c4d2Smrg FlushLog(xw); 422d522f475Smrg 423d522f475Smrg if (data->next != data->buffer) { 4245307cd1aSmrg size_t i; 4255307cd1aSmrg size_t n = (size_t) (data->last - data->next); 426d522f475Smrg 4275307cd1aSmrg TRACE(("shifting buffer down by %lu\n", (unsigned long) n)); 428d522f475Smrg for (i = 0; i < n; ++i) { 429d522f475Smrg data->buffer[i] = data->next[i]; 430d522f475Smrg } 431d522f475Smrg data->next = data->buffer; 432d522f475Smrg data->last = data->next + n; 433d522f475Smrg } 434d522f475Smrg 435d522f475Smrg} 436d522f475Smrg 437d522f475Smrg/* 438d522f475Smrg * Insert new data into the input buffer so the next calls to morePtyData() 439d522f475Smrg * and nextPtyData() will return that. 440d522f475Smrg */ 441d522f475Smrgvoid 4425307cd1aSmrgfillPtyData(XtermWidget xw, PtyData *data, const char *value, size_t length) 443d522f475Smrg{ 4445307cd1aSmrg size_t size; 4455307cd1aSmrg size_t n; 446d522f475Smrg 447d522f475Smrg /* remove the used portion of the buffer */ 44820d2c4d2Smrg trimPtyData(xw, data); 449d522f475Smrg 450d522f475Smrg VTbuffer->last += length; 4515307cd1aSmrg size = (size_t) (VTbuffer->last - VTbuffer->next); 452d522f475Smrg 453d522f475Smrg /* shift the unused portion up to make room */ 454d522f475Smrg for (n = size; n >= length; --n) 455d522f475Smrg VTbuffer->next[n] = VTbuffer->next[n - length]; 456d522f475Smrg 457d522f475Smrg /* insert the new bytes to interpret */ 458d522f475Smrg for (n = 0; n < length; n++) 459d522f475Smrg VTbuffer->next[n] = CharOf(value[n]); 460d522f475Smrg} 461d522f475Smrg 462d522f475Smrg#if OPT_WIDE_CHARS 463f2e35a3aSmrg/* 464f2e35a3aSmrg * Convert an ISO-8859-1 code 'c' to UTF-8, storing the result in the target 465f2e35a3aSmrg * 'lp', and returning a pointer past the converted character. 466f2e35a3aSmrg */ 467d522f475SmrgChar * 468894e0ac8SmrgconvertToUTF8(Char *lp, unsigned c) 469d522f475Smrg{ 47020d2c4d2Smrg#define CH(n) (Char)((c) >> ((n) * 8)) 47120d2c4d2Smrg if (c < 0x80) { 47220d2c4d2Smrg /* 0******* */ 47320d2c4d2Smrg *lp++ = (Char) CH(0); 47420d2c4d2Smrg } else if (c < 0x800) { 47520d2c4d2Smrg /* 110***** 10****** */ 47620d2c4d2Smrg *lp++ = (Char) (0xc0 | (CH(0) >> 6) | ((CH(1) & 0x07) << 2)); 47720d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 47820d2c4d2Smrg } else if (c < 0x00010000) { 47920d2c4d2Smrg /* 1110**** 10****** 10****** */ 48020d2c4d2Smrg *lp++ = (Char) (0xe0 | ((int) (CH(1) & 0xf0) >> 4)); 48120d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 48220d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 48320d2c4d2Smrg } else if (c < 0x00200000) { 48420d2c4d2Smrg *lp++ = (Char) (0xf0 | ((int) (CH(2) & 0x1f) >> 2)); 48520d2c4d2Smrg *lp++ = (Char) (0x80 | 48620d2c4d2Smrg ((int) (CH(1) & 0xf0) >> 4) | 48720d2c4d2Smrg ((int) (CH(2) & 0x03) << 4)); 48820d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 48920d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 49020d2c4d2Smrg } else if (c < 0x04000000) { 49120d2c4d2Smrg *lp++ = (Char) (0xf8 | (CH(3) & 0x03)); 49220d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(2) >> 2)); 49320d2c4d2Smrg *lp++ = (Char) (0x80 | 49420d2c4d2Smrg ((int) (CH(1) & 0xf0) >> 4) | 49520d2c4d2Smrg ((int) (CH(2) & 0x03) << 4)); 49620d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 49720d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 49820d2c4d2Smrg } else { 49920d2c4d2Smrg *lp++ = (Char) (0xfc | ((int) (CH(3) & 0x40) >> 6)); 50020d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(3) & 0x3f)); 50120d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(2) >> 2)); 50220d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(1) >> 4) | ((CH(2) & 0x03) << 4)); 50320d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 50420d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 505d522f475Smrg } 506d522f475Smrg return lp; 50720d2c4d2Smrg#undef CH 508d522f475Smrg} 509d522f475Smrg 510f2e35a3aSmrg/* 511f2e35a3aSmrg * Convert a UTF-8 multibyte character to an Unicode value, returning a pointer 512f2e35a3aSmrg * past the converted UTF-8 input. The first 256 values align with ISO-8859-1, 513f2e35a3aSmrg * making it possible to use this to convert to Latin-1. 514f2e35a3aSmrg * 515f2e35a3aSmrg * If the conversion fails, return null. 516f2e35a3aSmrg */ 517f2e35a3aSmrgChar * 518f2e35a3aSmrgconvertFromUTF8(Char *lp, unsigned *cp) 519f2e35a3aSmrg{ 520f2e35a3aSmrg int want; 521f2e35a3aSmrg 522f2e35a3aSmrg /* 523f2e35a3aSmrg * Find the number of bytes we will need from the source. 524f2e35a3aSmrg */ 525f2e35a3aSmrg if ((*lp & 0x80) == 0) { 526f2e35a3aSmrg want = 1; 527f2e35a3aSmrg } else if ((*lp & 0xe0) == 0xc0) { 528f2e35a3aSmrg want = 2; 529f2e35a3aSmrg } else if ((*lp & 0xf0) == 0xe0) { 530f2e35a3aSmrg want = 3; 531f2e35a3aSmrg } else if ((*lp & 0xf8) == 0xf0) { 532f2e35a3aSmrg want = 4; 533f2e35a3aSmrg } else if ((*lp & 0xfc) == 0xf8) { 534f2e35a3aSmrg want = 5; 535f2e35a3aSmrg } else if ((*lp & 0xfe) == 0xfc) { 536f2e35a3aSmrg want = 6; 537f2e35a3aSmrg } else { 538f2e35a3aSmrg want = 0; 539f2e35a3aSmrg } 540f2e35a3aSmrg 541f2e35a3aSmrg if (want) { 542f2e35a3aSmrg int have = 1; 543f2e35a3aSmrg 544f2e35a3aSmrg while (lp[have] != '\0') { 545f2e35a3aSmrg if ((lp[have] & 0xc0) != 0x80) 546f2e35a3aSmrg break; 547f2e35a3aSmrg ++have; 548f2e35a3aSmrg } 549f2e35a3aSmrg if (want == have) { 550f2e35a3aSmrg unsigned mask = 0; 551f2e35a3aSmrg int j; 552f2e35a3aSmrg int shift = 0; 553f2e35a3aSmrg 554f2e35a3aSmrg *cp = 0; 555f2e35a3aSmrg switch (want) { 556f2e35a3aSmrg case 1: 557f2e35a3aSmrg mask = (*lp); 558f2e35a3aSmrg break; 559f2e35a3aSmrg case 2: 560f2e35a3aSmrg mask = (*lp & 0x1f); 561f2e35a3aSmrg break; 562f2e35a3aSmrg case 3: 563f2e35a3aSmrg mask = (*lp & 0x0f); 564f2e35a3aSmrg break; 565f2e35a3aSmrg case 4: 566f2e35a3aSmrg mask = (*lp & 0x07); 567f2e35a3aSmrg break; 568f2e35a3aSmrg case 5: 569f2e35a3aSmrg mask = (*lp & 0x03); 570f2e35a3aSmrg break; 571f2e35a3aSmrg case 6: 572f2e35a3aSmrg mask = (*lp & 0x01); 573f2e35a3aSmrg break; 574f2e35a3aSmrg default: 575f2e35a3aSmrg mask = 0; 576f2e35a3aSmrg break; 577f2e35a3aSmrg } 578f2e35a3aSmrg 579f2e35a3aSmrg for (j = 1; j < want; j++) { 580f2e35a3aSmrg *cp |= (unsigned) ((lp[want - j] & 0x3f) << shift); 581f2e35a3aSmrg shift += 6; 582f2e35a3aSmrg } 583f2e35a3aSmrg *cp |= mask << shift; 584f2e35a3aSmrg lp += want; 585f2e35a3aSmrg } else { 586f2e35a3aSmrg *cp = BAD_ASCII; 587f2e35a3aSmrg lp = NULL; 588f2e35a3aSmrg } 589f2e35a3aSmrg } else { 590f2e35a3aSmrg *cp = BAD_ASCII; 591f2e35a3aSmrg lp = NULL; 592f2e35a3aSmrg } 593f2e35a3aSmrg return lp; 594f2e35a3aSmrg} 595f2e35a3aSmrg 596f2e35a3aSmrg/* 597f2e35a3aSmrg * Returns true if the entire string is valid UTF-8. 598f2e35a3aSmrg */ 599f2e35a3aSmrgBoolean 600f2e35a3aSmrgisValidUTF8(Char *lp) 601f2e35a3aSmrg{ 602f2e35a3aSmrg Boolean result = True; 603f2e35a3aSmrg while (*lp) { 604f2e35a3aSmrg unsigned ch; 605f2e35a3aSmrg Char *next = convertFromUTF8(lp, &ch); 606f2e35a3aSmrg if (next == NULL || ch == 0) { 607f2e35a3aSmrg result = False; 608f2e35a3aSmrg break; 609f2e35a3aSmrg } 610f2e35a3aSmrg lp = next; 611f2e35a3aSmrg } 612f2e35a3aSmrg return result; 613f2e35a3aSmrg} 614f2e35a3aSmrg 615d522f475Smrg/* 616d522f475Smrg * Write data back to the PTY 617d522f475Smrg */ 618d522f475Smrgvoid 6195307cd1aSmrgwritePtyData(int f, IChar *d, size_t len) 620d522f475Smrg{ 6215307cd1aSmrg size_t n = (len << 1); 622d522f475Smrg 623d522f475Smrg if (VTbuffer->write_len <= len) { 624d522f475Smrg VTbuffer->write_len = n; 6255307cd1aSmrg VTbuffer->write_buf = realloc(VTbuffer->write_buf, VTbuffer->write_len); 626d522f475Smrg } 627d522f475Smrg 628d522f475Smrg for (n = 0; n < len; n++) 629956cc18dSsnj VTbuffer->write_buf[n] = (Char) d[n]; 630d522f475Smrg 6315307cd1aSmrg TRACE(("writePtyData %lu:%s\n", (unsigned long) n, 632956cc18dSsnj visibleChars(VTbuffer->write_buf, n))); 633d522f475Smrg v_write(f, VTbuffer->write_buf, n); 634d522f475Smrg} 635d522f475Smrg#endif /* OPT_WIDE_CHARS */ 636d522f475Smrg 637d522f475Smrg#ifdef NO_LEAKS 638d522f475Smrgvoid 639d522f475Smrgnoleaks_ptydata(void) 640d522f475Smrg{ 641d522f475Smrg if (VTbuffer != 0) { 642d522f475Smrg#if OPT_WIDE_CHARS 643f2e35a3aSmrg free(VTbuffer->write_buf); 644f2e35a3aSmrg#endif 645f2e35a3aSmrg FreeAndNull(VTbuffer); 646f2e35a3aSmrg } 647f2e35a3aSmrg} 648f2e35a3aSmrg#endif 649f2e35a3aSmrg 650f2e35a3aSmrg#ifdef TEST_DRIVER 651f2e35a3aSmrg 652f2e35a3aSmrg#include "data.c" 653f2e35a3aSmrg 654f2e35a3aSmrgvoid 655f2e35a3aSmrgNormalExit(void) 656f2e35a3aSmrg{ 657f2e35a3aSmrg fprintf(stderr, "NormalExit!\n"); 658f2e35a3aSmrg exit(EXIT_SUCCESS); 659f2e35a3aSmrg} 660f2e35a3aSmrg 661f2e35a3aSmrgvoid 662f2e35a3aSmrgPanic(const char *s, int a) 663f2e35a3aSmrg{ 664f2e35a3aSmrg (void) s; 665f2e35a3aSmrg (void) a; 666f2e35a3aSmrg fprintf(stderr, "Panic!\n"); 667f2e35a3aSmrg exit(EXIT_FAILURE); 668f2e35a3aSmrg} 669f2e35a3aSmrg 670f2e35a3aSmrg#if OPT_WIDE_CHARS 671f2e35a3aSmrg 672f2e35a3aSmrg#ifdef ALLOWLOGGING 673f2e35a3aSmrgvoid 674f2e35a3aSmrgFlushLog(XtermWidget xw) 675f2e35a3aSmrg{ 676f2e35a3aSmrg (void) xw; 677f2e35a3aSmrg} 678d522f475Smrg#endif 679f2e35a3aSmrg 680f2e35a3aSmrgvoid 6815307cd1aSmrgv_write(int f, const Char *data, size_t len) 682f2e35a3aSmrg{ 683f2e35a3aSmrg (void) f; 684f2e35a3aSmrg (void) data; 685f2e35a3aSmrg (void) len; 686f2e35a3aSmrg} 687f2e35a3aSmrg 688f2e35a3aSmrgvoid 689f2e35a3aSmrgmk_wcwidth_init(int mode) 690f2e35a3aSmrg{ 691f2e35a3aSmrg (void) mode; 692f2e35a3aSmrg} 693f2e35a3aSmrg 694f2e35a3aSmrgvoid 695f2e35a3aSmrgupdate_font_utf8_mode(void) 696f2e35a3aSmrg{ 697f2e35a3aSmrg} 698f2e35a3aSmrg 699f2e35a3aSmrgstatic int message_level = 0; 700f2e35a3aSmrgstatic int opt_all = 0; 701f2e35a3aSmrgstatic int opt_illegal = 0; 702f2e35a3aSmrgstatic int opt_convert = 0; 703f2e35a3aSmrgstatic int opt_reverse = 0; 704f2e35a3aSmrgstatic long total_test = 0; 705f2e35a3aSmrgstatic long total_errs = 0; 706f2e35a3aSmrg 707f2e35a3aSmrgstatic void 708f2e35a3aSmrgusage(void) 709f2e35a3aSmrg{ 710f2e35a3aSmrg static const char *msg[] = 711f2e35a3aSmrg { 712f2e35a3aSmrg "Usage: test_ptydata [options] [c1[-c1b] [c2-[c2b] [...]]]", 713f2e35a3aSmrg "", 714f2e35a3aSmrg "Options:", 715f2e35a3aSmrg " -a exercise all legal encode/decode to/from UTF-8", 716f2e35a3aSmrg " -c call convertFromUTF8 rather than decodeUTF8", 717f2e35a3aSmrg " -i ignore illegal UTF-8 when testing -r option", 718f2e35a3aSmrg " -q quieter", 719f2e35a3aSmrg " -r reverse/decode from UTF-8 byte-string to/from Unicode", 720f2e35a3aSmrg " -v more verbose" 721f2e35a3aSmrg }; 722f2e35a3aSmrg size_t n; 723f2e35a3aSmrg for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) { 724f2e35a3aSmrg fprintf(stderr, "%s\n", msg[n]); 725f2e35a3aSmrg } 726f2e35a3aSmrg exit(EXIT_FAILURE); 727f2e35a3aSmrg} 728f2e35a3aSmrg 729f2e35a3aSmrg/* 730f2e35a3aSmrg * http://www.unicode.org/versions/corrigendum1.html, table 3.1B 731f2e35a3aSmrg */ 732f2e35a3aSmrg#define OkRange(n,lo,hi) \ 733f2e35a3aSmrg if (value[n] < lo || value[n] > hi) { \ 734f2e35a3aSmrg result = False; \ 735f2e35a3aSmrg break; \ 736f2e35a3aSmrg } 737f2e35a3aSmrgstatic Bool 738f2e35a3aSmrgis_legal_utf8(const Char *value) 739f2e35a3aSmrg{ 740f2e35a3aSmrg Bool result = True; 741f2e35a3aSmrg Char ch; 742f2e35a3aSmrg while ((ch = *value) != '\0') { 743f2e35a3aSmrg if (ch <= 0x7f) { 744f2e35a3aSmrg ++value; 745f2e35a3aSmrg } else if (ch >= 0xc2 && ch <= 0xdf) { 746f2e35a3aSmrg OkRange(1, 0x80, 0xbf); 747f2e35a3aSmrg value += 2; 748f2e35a3aSmrg } else if (ch == 0xe0) { 749f2e35a3aSmrg OkRange(1, 0xa0, 0xbf); 750f2e35a3aSmrg OkRange(2, 0x80, 0xbf); 751f2e35a3aSmrg value += 3; 752f2e35a3aSmrg } else if (ch >= 0xe1 && ch <= 0xef) { 753f2e35a3aSmrg OkRange(1, 0x80, 0xbf); 754f2e35a3aSmrg OkRange(2, 0x80, 0xbf); 755f2e35a3aSmrg value += 3; 756f2e35a3aSmrg } else if (ch == 0xf0) { 757f2e35a3aSmrg OkRange(1, 0x90, 0xbf); 758f2e35a3aSmrg OkRange(2, 0x80, 0xbf); 759f2e35a3aSmrg OkRange(3, 0x80, 0xbf); 760f2e35a3aSmrg value += 4; 761f2e35a3aSmrg } else if (ch >= 0xf1 && ch <= 0xf3) { 762f2e35a3aSmrg OkRange(1, 0x80, 0xbf); 763f2e35a3aSmrg OkRange(2, 0x80, 0xbf); 764f2e35a3aSmrg OkRange(3, 0x80, 0xbf); 765f2e35a3aSmrg value += 4; 766f2e35a3aSmrg } else if (ch == 0xf4) { 767f2e35a3aSmrg OkRange(1, 0x80, 0x8f); 768f2e35a3aSmrg OkRange(2, 0x80, 0xbf); 769f2e35a3aSmrg OkRange(3, 0x80, 0xbf); 770f2e35a3aSmrg value += 4; 771f2e35a3aSmrg } else { 772f2e35a3aSmrg result = False; 773f2e35a3aSmrg break; 774f2e35a3aSmrg } 775f2e35a3aSmrg } 776f2e35a3aSmrg return result; 777f2e35a3aSmrg} 778f2e35a3aSmrg 779f2e35a3aSmrgstatic void 780f2e35a3aSmrgtest_utf8_convert(void) 781f2e35a3aSmrg{ 782f2e35a3aSmrg unsigned c_in, c_out; 783f2e35a3aSmrg Char buffer[10]; 784f2e35a3aSmrg Char *result; 785f2e35a3aSmrg unsigned limit = 0x110000; 786f2e35a3aSmrg unsigned success = 0; 787f2e35a3aSmrg unsigned bucket[256]; 788f2e35a3aSmrg 789f2e35a3aSmrg memset(bucket, 0, sizeof(bucket)); 790f2e35a3aSmrg for (c_in = 0; c_in < limit; ++c_in) { 791f2e35a3aSmrg memset(buffer, 0, sizeof(buffer)); 792f2e35a3aSmrg if ((result = convertToUTF8(buffer, c_in)) == 0) { 793f2e35a3aSmrg TRACE(("conversion of U+%04X to UTF-8 failed\n", c_in)); 794f2e35a3aSmrg } else { 795f2e35a3aSmrg if ((result = convertFromUTF8(buffer, &c_out)) == 0) { 796f2e35a3aSmrg TRACE(("conversion of U+%04X from UTF-8 failed\n", c_in)); 797f2e35a3aSmrg } else if (c_in != c_out) { 798f2e35a3aSmrg TRACE(("conversion of U+%04X to/from UTF-8 gave U+%04X\n", 799f2e35a3aSmrg c_in, c_out)); 800f2e35a3aSmrg } else { 801f2e35a3aSmrg while (result-- != buffer) { 802f2e35a3aSmrg bucket[*result]++; 803f2e35a3aSmrg } 804f2e35a3aSmrg ++success; 805f2e35a3aSmrg } 806f2e35a3aSmrg } 807f2e35a3aSmrg } 808f2e35a3aSmrg TRACE(("%u/%u successful\n", success, limit)); 809f2e35a3aSmrg for (c_in = 0; c_in < 256; ++c_in) { 810f2e35a3aSmrg if ((c_in % 8) == 0) { 811f2e35a3aSmrg TRACE((" %02X:", c_in)); 812f2e35a3aSmrg } 813f2e35a3aSmrg TRACE((" %8X", bucket[c_in])); 814f2e35a3aSmrg if (((c_in + 1) % 8) == 0) { 815f2e35a3aSmrg TRACE(("\n")); 816f2e35a3aSmrg } 817f2e35a3aSmrg } 818f2e35a3aSmrg} 819f2e35a3aSmrg 820f2e35a3aSmrgstatic int 821f2e35a3aSmrgdecode_one(const char *source, char **target) 822f2e35a3aSmrg{ 823f2e35a3aSmrg int result = -1; 824f2e35a3aSmrg long check; 825f2e35a3aSmrg int radix = 0; 826f2e35a3aSmrg if ((source[0] == 'u' || source[0] == 'U') && source[1] == '+') { 827f2e35a3aSmrg source += 2; 828f2e35a3aSmrg radix = 16; 829f2e35a3aSmrg } else if (source[0] == '0' && source[1] == 'b') { 830f2e35a3aSmrg source += 2; 831f2e35a3aSmrg radix = 2; 832f2e35a3aSmrg } 833f2e35a3aSmrg check = strtol(source, target, radix); 834f2e35a3aSmrg if (*target != NULL && *target != source) 835f2e35a3aSmrg result = (int) check; 836f2e35a3aSmrg return result; 837f2e35a3aSmrg} 838f2e35a3aSmrg 839f2e35a3aSmrgstatic int 840f2e35a3aSmrgdecode_range(const char *source, int *lo, int *hi) 841f2e35a3aSmrg{ 842f2e35a3aSmrg int result = 0; 843f2e35a3aSmrg char *after1; 844f2e35a3aSmrg char *after2; 845f2e35a3aSmrg if ((*lo = decode_one(source, &after1)) >= 0) { 846f2e35a3aSmrg after1 += strspn(after1, ":-.\t "); 847f2e35a3aSmrg if ((*hi = decode_one(after1, &after2)) < 0) { 848f2e35a3aSmrg *hi = *lo; 849f2e35a3aSmrg } 850f2e35a3aSmrg result = 1; 851d522f475Smrg } 852f2e35a3aSmrg return result; 853d522f475Smrg} 854f2e35a3aSmrg 855f2e35a3aSmrg#define MAX_BYTES 6 856f2e35a3aSmrg 857f2e35a3aSmrgstatic void 858f2e35a3aSmrgdo_range(const char *source) 859f2e35a3aSmrg{ 860f2e35a3aSmrg int lo, hi; 861f2e35a3aSmrg 862f2e35a3aSmrg TScreen screen; 863f2e35a3aSmrg memset(&screen, 0, sizeof(screen)); 864f2e35a3aSmrg 865f2e35a3aSmrg if (decode_range(source, &lo, &hi)) { 866f2e35a3aSmrg while (lo <= hi) { 867f2e35a3aSmrg unsigned c_in = (unsigned) lo++; 868f2e35a3aSmrg PtyData *data; 869f2e35a3aSmrg Char *next; 870f2e35a3aSmrg Char buffer[MAX_BYTES + 1]; 871f2e35a3aSmrg 872f2e35a3aSmrg if (opt_reverse) { 873f2e35a3aSmrg Bool skip = False; 874f2e35a3aSmrg Bool first = True; 875f2e35a3aSmrg int j, k; 876f2e35a3aSmrg for (j = 0; j < MAX_BYTES; ++j) { 877f2e35a3aSmrg unsigned long bits = ((unsigned long) c_in >> (8 * j)); 878f2e35a3aSmrg if ((buffer[j] = (Char) bits) == 0) { 879f2e35a3aSmrg skip = (bits != 0); 880f2e35a3aSmrg break; 881f2e35a3aSmrg } 882f2e35a3aSmrg } 883f2e35a3aSmrg if (skip) 884f2e35a3aSmrg continue; 885f2e35a3aSmrg initPtyData(&data); 886f2e35a3aSmrg for (k = 0; k <= j; ++k) { 887f2e35a3aSmrg data->buffer[k] = buffer[j - k - 1]; 888f2e35a3aSmrg } 889f2e35a3aSmrg if (opt_illegal && !is_legal_utf8(data->buffer)) { 890f2e35a3aSmrg free(data); 891f2e35a3aSmrg continue; 892f2e35a3aSmrg } 893f2e35a3aSmrg if (message_level > 1) { 894f2e35a3aSmrg printf("TEST "); 895f2e35a3aSmrg for (k = 0; k < j; ++k) { 896f2e35a3aSmrg printf("%02X", data->buffer[k]); 897f2e35a3aSmrg } 898f2e35a3aSmrg } 899f2e35a3aSmrg data->next = data->buffer; 900f2e35a3aSmrg data->last = data->buffer + j; 901f2e35a3aSmrg while (decodeUtf8(&screen, data)) { 902f2e35a3aSmrg total_test++; 90304b94745Smrg if (is_UCS_SPECIAL(data->utf_data)) 904f2e35a3aSmrg total_errs++; 905f2e35a3aSmrg data->next += data->utf_size; 906f2e35a3aSmrg if (message_level > 1) { 907f2e35a3aSmrg printf("%s%04X", first ? " ->" : ", ", data->utf_data); 908f2e35a3aSmrg } 909f2e35a3aSmrg first = False; 910f2e35a3aSmrg } 911f2e35a3aSmrg if (!first) 912f2e35a3aSmrg total_test--; 913f2e35a3aSmrg if (message_level > 1) { 914f2e35a3aSmrg printf("\n"); 915f2e35a3aSmrg fflush(stdout); 916f2e35a3aSmrg } 917f2e35a3aSmrg free(data); 918f2e35a3aSmrg } else if (opt_convert) { 919f2e35a3aSmrg unsigned c_out; 920f2e35a3aSmrg Char *result; 921f2e35a3aSmrg 922f2e35a3aSmrg memset(buffer, 0, sizeof(buffer)); 923f2e35a3aSmrg if ((result = next = convertToUTF8(buffer, c_in)) == 0) { 924f2e35a3aSmrg fprintf(stderr, 925f2e35a3aSmrg "conversion of U+%04X to UTF-8 failed\n", c_in); 926f2e35a3aSmrg } else if ((result = convertFromUTF8(buffer, &c_out)) == 0) { 927f2e35a3aSmrg fprintf(stderr, 928f2e35a3aSmrg "conversion of U+%04X from UTF-8 failed\n", c_in); 929f2e35a3aSmrg total_errs++; 930f2e35a3aSmrg } else if (c_in != c_out) { 931f2e35a3aSmrg fprintf(stderr, 932f2e35a3aSmrg "conversion of U+%04X to/from UTF-8 gave U+%04X\n", 933f2e35a3aSmrg c_in, c_out); 934f2e35a3aSmrg } else if (message_level > 1) { 935f2e35a3aSmrg *next = '\0'; 9365307cd1aSmrg printf("TEST %04X (%lu:%s) ->%04X\n", c_in, 9375307cd1aSmrg (unsigned long) (next - buffer), 938f2e35a3aSmrg buffer, 939f2e35a3aSmrg c_out); 940f2e35a3aSmrg fflush(stdout); 941f2e35a3aSmrg } 942f2e35a3aSmrg } else { 943f2e35a3aSmrg initPtyData(&data); 944f2e35a3aSmrg next = convertToUTF8(data->buffer, c_in); 945f2e35a3aSmrg *next = 0; 946f2e35a3aSmrg data->next = data->buffer; 947f2e35a3aSmrg data->last = next; 948f2e35a3aSmrg decodeUtf8(&screen, data); 949f2e35a3aSmrg if (message_level > 1) { 9505307cd1aSmrg printf("TEST %04X (%lu:%s) ->%04X\n", c_in, 9515307cd1aSmrg (unsigned long) (next - data->buffer), 952f2e35a3aSmrg data->buffer, 953f2e35a3aSmrg data->utf_data); 954f2e35a3aSmrg fflush(stdout); 955f2e35a3aSmrg } 956f2e35a3aSmrg if (c_in != data->utf_data) { 957f2e35a3aSmrg fprintf(stderr, "Mismatch: %04X vs %04X\n", c_in, data->utf_data); 958f2e35a3aSmrg total_errs++; 959f2e35a3aSmrg } 960f2e35a3aSmrg free(data); 961f2e35a3aSmrg } 962f2e35a3aSmrg total_test++; 963f2e35a3aSmrg } 964f2e35a3aSmrg } 965f2e35a3aSmrg} 966f2e35a3aSmrg 967f2e35a3aSmrgint 968f2e35a3aSmrgmain(int argc, char **argv) 969f2e35a3aSmrg{ 970f2e35a3aSmrg int ch; 971f2e35a3aSmrg 972f2e35a3aSmrg setlocale(LC_ALL, ""); 973f2e35a3aSmrg while ((ch = getopt(argc, argv, "aciqrv")) != -1) { 974f2e35a3aSmrg switch (ch) { 975f2e35a3aSmrg case 'a': 976f2e35a3aSmrg opt_all = 1; 977f2e35a3aSmrg break; 978f2e35a3aSmrg case 'c': 979f2e35a3aSmrg opt_convert = 1; 980f2e35a3aSmrg break; 981f2e35a3aSmrg case 'i': 982f2e35a3aSmrg opt_illegal = 1; 983f2e35a3aSmrg break; 984f2e35a3aSmrg case 'q': 985f2e35a3aSmrg message_level--; 986f2e35a3aSmrg break; 987f2e35a3aSmrg case 'r': 988f2e35a3aSmrg opt_reverse = 1; 989f2e35a3aSmrg break; 990f2e35a3aSmrg case 'v': 991f2e35a3aSmrg message_level++; 992f2e35a3aSmrg break; 993f2e35a3aSmrg default: 994f2e35a3aSmrg usage(); 995f2e35a3aSmrg } 996f2e35a3aSmrg } 997f2e35a3aSmrg if (opt_all) { 998f2e35a3aSmrg test_utf8_convert(); 999f2e35a3aSmrg } else { 1000f2e35a3aSmrg if (optind >= argc) 1001f2e35a3aSmrg usage(); 1002f2e35a3aSmrg while (optind < argc) { 1003f2e35a3aSmrg do_range(argv[optind++]); 1004f2e35a3aSmrg } 1005f2e35a3aSmrg if (total_test) { 1006f2e35a3aSmrg printf("%ld/%ld mismatches (%.0f%%)\n", 1007f2e35a3aSmrg total_errs, 1008f2e35a3aSmrg total_test, 1009f2e35a3aSmrg (100.0 * (double) total_errs) / (double) total_test); 1010f2e35a3aSmrg } 1011f2e35a3aSmrg } 1012f2e35a3aSmrg return EXIT_SUCCESS; 1013f2e35a3aSmrg} 1014f2e35a3aSmrg#else 1015f2e35a3aSmrgint 1016f2e35a3aSmrgmain(int argc, char **argv) 1017f2e35a3aSmrg{ 1018f2e35a3aSmrg (void) argc; 1019f2e35a3aSmrg (void) argv; 1020f2e35a3aSmrg printf("Nothing to be done here...\n"); 1021f2e35a3aSmrg return EXIT_SUCCESS; 1022f2e35a3aSmrg} 1023f2e35a3aSmrg#endif /* OPT_WIDE_CHARS */ 1024d522f475Smrg#endif 1025