15104ee6eSmrg/* $XTermId: ptydata.c,v 1.163 2024/12/01 23:48:07 tom Exp $ */ 2d522f475Smrg 30bd37d32Smrg/* 404b94745Smrg * Copyright 1999-2023,2024 by Thomas E. Dickey 50bd37d32Smrg * 60bd37d32Smrg * All Rights Reserved 70bd37d32Smrg * 80bd37d32Smrg * Permission is hereby granted, free of charge, to any person obtaining a 90bd37d32Smrg * copy of this software and associated documentation files (the 100bd37d32Smrg * "Software"), to deal in the Software without restriction, including 110bd37d32Smrg * without limitation the rights to use, copy, modify, merge, publish, 120bd37d32Smrg * distribute, sublicense, and/or sell copies of the Software, and to 130bd37d32Smrg * permit persons to whom the Software is furnished to do so, subject to 140bd37d32Smrg * the following conditions: 150bd37d32Smrg * 160bd37d32Smrg * The above copyright notice and this permission notice shall be included 170bd37d32Smrg * in all copies or substantial portions of the Software. 180bd37d32Smrg * 190bd37d32Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 200bd37d32Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 210bd37d32Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 220bd37d32Smrg * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 230bd37d32Smrg * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 240bd37d32Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 250bd37d32Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 260bd37d32Smrg * 270bd37d32Smrg * Except as contained in this notice, the name(s) of the above copyright 280bd37d32Smrg * holders shall not be used in advertising or otherwise to promote the 290bd37d32Smrg * sale, use or other dealings in this Software without prior written 300bd37d32Smrg * authorization. 310bd37d32Smrg */ 32d522f475Smrg 33d522f475Smrg#include <data.h> 34d522f475Smrg 35d522f475Smrg#if OPT_WIDE_CHARS 36d522f475Smrg#include <menu.h> 37913cc679Smrg#include <wcwidth.h> 38d522f475Smrg#endif 39d522f475Smrg 40f2e35a3aSmrg#ifdef TEST_DRIVER 41f2e35a3aSmrg#undef TRACE 42f2e35a3aSmrg#define TRACE(p) if (1) printf p 43f2e35a3aSmrg#undef TRACE2 44f2e35a3aSmrg#define TRACE2(p) if (0) printf p 45f2e35a3aSmrg#define visibleChars(buf, len) "buffer" 46f2e35a3aSmrg#endif 47f2e35a3aSmrg 48d522f475Smrg/* 49d522f475Smrg * Check for both EAGAIN and EWOULDBLOCK, because some supposedly POSIX 50d522f475Smrg * systems are broken and return EWOULDBLOCK when they should return EAGAIN. 51d522f475Smrg * Note that this macro may evaluate its argument more than once. 52d522f475Smrg */ 53d522f475Smrg#if defined(EAGAIN) && defined(EWOULDBLOCK) 54d522f475Smrg#define E_TEST(err) ((err) == EAGAIN || (err) == EWOULDBLOCK) 55d522f475Smrg#else 56d522f475Smrg#ifdef EAGAIN 57d522f475Smrg#define E_TEST(err) ((err) == EAGAIN) 58d522f475Smrg#else 59d522f475Smrg#define E_TEST(err) ((err) == EWOULDBLOCK) 60d522f475Smrg#endif 61d522f475Smrg#endif 62d522f475Smrg 63d522f475Smrg#if OPT_WIDE_CHARS 64d522f475Smrg/* 65d522f475Smrg * Convert the 8-bit codes in data->buffer[] into Unicode in data->utf_data. 66d522f475Smrg * The number of bytes converted will be nonzero iff there is data. 67d522f475Smrg */ 68d522f475SmrgBool 69894e0ac8SmrgdecodeUtf8(TScreen *screen, PtyData *data) 70d522f475Smrg{ 715307cd1aSmrg size_t i; 725307cd1aSmrg size_t length = (size_t) (data->last - data->next); 73d522f475Smrg int utf_count = 0; 74956cc18dSsnj unsigned utf_char = 0; 75d522f475Smrg 76d522f475Smrg data->utf_size = 0; 77d522f475Smrg for (i = 0; i < length; i++) { 78d522f475Smrg unsigned c = data->next[i]; 79d522f475Smrg 80d522f475Smrg /* Combine UTF-8 into Unicode */ 81d522f475Smrg if (c < 0x80) { 82d522f475Smrg /* We received an ASCII character */ 83d522f475Smrg if (utf_count > 0) { 84d522f475Smrg data->utf_data = UCS_REPL; /* prev. sequence incomplete */ 85a1f3da82Smrg data->utf_size = i; 86d522f475Smrg } else { 87956cc18dSsnj data->utf_data = (IChar) c; 88d522f475Smrg data->utf_size = 1; 89d522f475Smrg } 90d522f475Smrg break; 91f2e35a3aSmrg } else if (screen->vt100_graphics 92f2e35a3aSmrg && (c < 0x100) 93f2e35a3aSmrg && (utf_count == 0) 94f2e35a3aSmrg && screen->gsets[(int) screen->curgr] != nrc_ASCII) { 95f2e35a3aSmrg data->utf_data = (IChar) c; 96f2e35a3aSmrg data->utf_size = 1; 97f2e35a3aSmrg break; 98d522f475Smrg } else if (c < 0xc0) { 99d522f475Smrg /* We received a continuation byte */ 100d522f475Smrg if (utf_count < 1) { 10104b94745Smrg if (screen->c1_printable) { 10204b94745Smrg data->utf_data = (IChar) c; 10304b94745Smrg } else if ((i + 1) < length 10404b94745Smrg && data->next[i + 1] > 0x20 10504b94745Smrg && data->next[i + 1] < 0x80) { 10604b94745Smrg /* 10704b94745Smrg * Allow for C1 control string if the next byte is 10804b94745Smrg * available for inspection. 10904b94745Smrg */ 11004b94745Smrg data->utf_data = (IChar) c; 11104b94745Smrg } else { 11204b94745Smrg /* 11304b94745Smrg * We received a continuation byte before receiving a 11404b94745Smrg * sequence state, or a failed attempt to use a C1 control 11504b94745Smrg * string. 11604b94745Smrg */ 11704b94745Smrg data->utf_data = (IChar) UCS_REPL; 11804b94745Smrg } 119d522f475Smrg data->utf_size = (i + 1); 120d522f475Smrg break; 121f2e35a3aSmrg } else if (screen->utf8_weblike 122f2e35a3aSmrg && (utf_count == 3 123f2e35a3aSmrg && utf_char == 0x04 124f2e35a3aSmrg && c >= 0x90)) { 125f2e35a3aSmrg /* The encoding would form a code point beyond U+10FFFF. */ 126f2e35a3aSmrg data->utf_size = i; 127f2e35a3aSmrg data->utf_data = UCS_REPL; 128f2e35a3aSmrg break; 129f2e35a3aSmrg } else if (screen->utf8_weblike 130f2e35a3aSmrg && (utf_count == 2 131f2e35a3aSmrg && utf_char == 0x0d 132f2e35a3aSmrg && c >= 0xa0)) { 133f2e35a3aSmrg /* The encoding would form a surrogate code point. */ 134f2e35a3aSmrg data->utf_size = i; 135f2e35a3aSmrg data->utf_data = UCS_REPL; 136f2e35a3aSmrg break; 137d522f475Smrg } else { 138d522f475Smrg /* Check for overlong UTF-8 sequences for which a shorter 139d522f475Smrg * encoding would exist and replace them with UCS_REPL. 140d522f475Smrg * An overlong UTF-8 sequence can have any of the following 141d522f475Smrg * forms: 142d522f475Smrg * 1100000x 10xxxxxx 143d522f475Smrg * 11100000 100xxxxx 10xxxxxx 144d522f475Smrg * 11110000 1000xxxx 10xxxxxx 10xxxxxx 145d522f475Smrg * 11111000 10000xxx 10xxxxxx 10xxxxxx 10xxxxxx 146d522f475Smrg * 11111100 100000xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 147d522f475Smrg */ 148d522f475Smrg if (!utf_char && !((c & 0x7f) >> (7 - utf_count))) { 149f2e35a3aSmrg if (screen->utf8_weblike) { 150f2e35a3aSmrg /* overlong sequence continued */ 151f2e35a3aSmrg data->utf_data = UCS_REPL; 152f2e35a3aSmrg data->utf_size = i; 153f2e35a3aSmrg break; 154f2e35a3aSmrg } else { 155f2e35a3aSmrg utf_char = UCS_REPL; 156f2e35a3aSmrg } 157d522f475Smrg } 158d522f475Smrg utf_char <<= 6; 159d522f475Smrg utf_char |= (c & 0x3f); 160d522f475Smrg if ((utf_char >= 0xd800 && 161d522f475Smrg utf_char <= 0xdfff) || 162d522f475Smrg (utf_char == 0xfffe) || 163d522f475Smrg (utf_char == HIDDEN_CHAR)) { 164d522f475Smrg utf_char = UCS_REPL; 165d522f475Smrg } 166d522f475Smrg utf_count--; 167d522f475Smrg if (utf_count == 0) { 168956cc18dSsnj#if !OPT_WIDER_ICHAR 169d522f475Smrg /* characters outside UCS-2 become UCS_REPL */ 170f2e35a3aSmrg if (utf_char > NARROW_ICHAR) { 171d522f475Smrg TRACE(("using replacement for %#x\n", utf_char)); 172d522f475Smrg utf_char = UCS_REPL; 173d522f475Smrg } 174956cc18dSsnj#endif 175956cc18dSsnj data->utf_data = (IChar) utf_char; 176d522f475Smrg data->utf_size = (i + 1); 177d522f475Smrg break; 178d522f475Smrg } 179d522f475Smrg } 180d522f475Smrg } else { 181d522f475Smrg /* We received a sequence start byte */ 182d522f475Smrg if (utf_count > 0) { 183f2e35a3aSmrg /* previous sequence is incomplete */ 184f2e35a3aSmrg data->utf_data = UCS_REPL; 185f2e35a3aSmrg data->utf_size = i; 186d522f475Smrg break; 187d522f475Smrg } 188f2e35a3aSmrg if (screen->utf8_weblike) { 189f2e35a3aSmrg if (c < 0xe0) { 190f2e35a3aSmrg if (!(c & 0x1e)) { 191f2e35a3aSmrg /* overlong sequence start */ 192f2e35a3aSmrg data->utf_data = UCS_REPL; 193f2e35a3aSmrg data->utf_size = (i + 1); 194f2e35a3aSmrg break; 195f2e35a3aSmrg } 196f2e35a3aSmrg utf_count = 1; 197f2e35a3aSmrg utf_char = (c & 0x1f); 198f2e35a3aSmrg } else if (c < 0xf0) { 199f2e35a3aSmrg utf_count = 2; 200f2e35a3aSmrg utf_char = (c & 0x0f); 201f2e35a3aSmrg } else if (c < 0xf5) { 202f2e35a3aSmrg utf_count = 3; 203f2e35a3aSmrg utf_char = (c & 0x07); 204f2e35a3aSmrg } else { 205f2e35a3aSmrg data->utf_data = UCS_REPL; 206f2e35a3aSmrg data->utf_size = (i + 1); 207f2e35a3aSmrg break; 208a1f3da82Smrg } 209d522f475Smrg } else { 210f2e35a3aSmrg if (c < 0xe0) { 211f2e35a3aSmrg utf_count = 1; 212f2e35a3aSmrg utf_char = (c & 0x1f); 213f2e35a3aSmrg if (!(c & 0x1e)) { 214f2e35a3aSmrg /* overlong sequence */ 215f2e35a3aSmrg utf_char = UCS_REPL; 216f2e35a3aSmrg } 217f2e35a3aSmrg } else if (c < 0xf0) { 218f2e35a3aSmrg utf_count = 2; 219f2e35a3aSmrg utf_char = (c & 0x0f); 220f2e35a3aSmrg } else if (c < 0xf8) { 221f2e35a3aSmrg utf_count = 3; 222f2e35a3aSmrg utf_char = (c & 0x07); 223f2e35a3aSmrg } else if (c < 0xfc) { 224f2e35a3aSmrg utf_count = 4; 225f2e35a3aSmrg utf_char = (c & 0x03); 226f2e35a3aSmrg } else if (c < 0xfe) { 227f2e35a3aSmrg utf_count = 5; 228f2e35a3aSmrg utf_char = (c & 0x01); 229f2e35a3aSmrg } else { 230f2e35a3aSmrg data->utf_data = UCS_REPL; 231f2e35a3aSmrg data->utf_size = (i + 1); 232f2e35a3aSmrg break; 233f2e35a3aSmrg } 234d522f475Smrg } 235d522f475Smrg } 236d522f475Smrg } 237d522f475Smrg#if OPT_TRACE > 1 23804b94745Smrg TRACE(("UTF-8 char %04X [%lu..%lu]\n", 239d522f475Smrg data->utf_data, 24004b94745Smrg (unsigned long) (data->next - data->buffer), 24104b94745Smrg (unsigned long) (data->next - data->buffer + data->utf_size - 1))); 242d522f475Smrg#endif 243d522f475Smrg 244d522f475Smrg return (data->utf_size != 0); 245d522f475Smrg} 246d522f475Smrg#endif 247d522f475Smrg 248d522f475Smrgint 249894e0ac8SmrgreadPtyData(XtermWidget xw, PtySelect * select_mask, PtyData *data) 250d522f475Smrg{ 25120d2c4d2Smrg TScreen *screen = TScreenOf(xw); 252d522f475Smrg int size = 0; 253d522f475Smrg 254d522f475Smrg if (FD_ISSET(screen->respond, select_mask)) { 255956cc18dSsnj int save_err; 25620d2c4d2Smrg trimPtyData(xw, data); 257d522f475Smrg 25820d2c4d2Smrg size = (int) read(screen->respond, (char *) data->last, (size_t) FRG_SIZE); 259956cc18dSsnj save_err = errno; 260d522f475Smrg#if (defined(i386) && defined(SVR4) && defined(sun)) || defined(__CYGWIN__) 261956cc18dSsnj /* 262956cc18dSsnj * Yes, I know this is a majorly f*ugly hack, however it seems to 263956cc18dSsnj * be necessary for Solaris x86. DWH 11/15/94 264956cc18dSsnj * Dunno why though.. 265956cc18dSsnj * (and now CYGWIN, alanh@xfree86.org 08/15/01 266956cc18dSsnj */ 267956cc18dSsnj if (size <= 0) { 268956cc18dSsnj if (save_err == EIO || save_err == 0) 2690bd37d32Smrg NormalExit(); 270956cc18dSsnj else if (!E_TEST(save_err)) 271956cc18dSsnj Panic("input: read returned unexpected error (%d)\n", save_err); 272956cc18dSsnj size = 0; 273956cc18dSsnj } 274956cc18dSsnj#else /* !f*ugly */ 275956cc18dSsnj if (size < 0) { 276956cc18dSsnj if (save_err == EIO) 2770bd37d32Smrg NormalExit(); 278956cc18dSsnj else if (!E_TEST(save_err)) 279956cc18dSsnj Panic("input: read returned unexpected error (%d)\n", save_err); 280d522f475Smrg size = 0; 281d522f475Smrg } else if (size == 0) { 2820bd37d32Smrg#if defined(__FreeBSD__) 2830bd37d32Smrg NormalExit(); 284d522f475Smrg#else 285d522f475Smrg Panic("input: read returned zero\n", 0); 286d522f475Smrg#endif 287d522f475Smrg } 288956cc18dSsnj#endif /* f*ugly */ 289d522f475Smrg } 290d522f475Smrg 291d522f475Smrg if (size) { 292d522f475Smrg#if OPT_TRACE 293d522f475Smrg int i; 294d522f475Smrg 295d522f475Smrg TRACE(("read %d bytes from pty\n", size)); 296d522f475Smrg for (i = 0; i < size; i++) { 297d522f475Smrg if (!(i % 16)) 298d522f475Smrg TRACE(("%s", i ? "\n " : "READ")); 299d522f475Smrg TRACE((" %02X", data->last[i])); 300d522f475Smrg } 301d522f475Smrg TRACE(("\n")); 302d522f475Smrg#endif 303d522f475Smrg data->last += size; 304d522f475Smrg#ifdef ALLOWLOGGING 30520d2c4d2Smrg TScreenOf(term)->logstart = VTbuffer->next; 306d522f475Smrg#endif 307d522f475Smrg } 308d522f475Smrg 309d522f475Smrg return (size); 310d522f475Smrg} 311d522f475Smrg 312d522f475Smrg/* 313d522f475Smrg * Return the next value from the input buffer. Note that morePtyData() is 314d522f475Smrg * always called before this function, so we can do the UTF-8 input conversion 315d522f475Smrg * in that function and simply return the result here. 316d522f475Smrg */ 317d522f475Smrg#if OPT_WIDE_CHARS 318d522f475SmrgIChar 319894e0ac8SmrgnextPtyData(TScreen *screen, PtyData *data) 320d522f475Smrg{ 321d522f475Smrg IChar result; 322d522f475Smrg if (screen->utf8_inparse) { 323f2e35a3aSmrg skipPtyData(data, result); 324d522f475Smrg } else { 325d522f475Smrg result = *((data)->next++); 326956cc18dSsnj if (!screen->output_eight_bits) { 327956cc18dSsnj result = (IChar) (result & 0x7f); 328956cc18dSsnj } 329d522f475Smrg } 330d522f475Smrg TRACE2(("nextPtyData returns %#x\n", result)); 331d522f475Smrg return result; 332d522f475Smrg} 333d522f475Smrg#endif 334d522f475Smrg 335d522f475Smrg#if OPT_WIDE_CHARS 336d522f475Smrg/* 337d522f475Smrg * Called when UTF-8 mode has been turned on/off. 338d522f475Smrg */ 339d522f475Smrgvoid 340894e0ac8SmrgswitchPtyData(TScreen *screen, int flag) 341d522f475Smrg{ 342d522f475Smrg if (screen->utf8_mode != flag) { 343d522f475Smrg screen->utf8_mode = flag; 344956cc18dSsnj screen->utf8_inparse = (Boolean) (flag != 0); 345913cc679Smrg mk_wcwidth_init(screen->utf8_mode); 346d522f475Smrg 347d522f475Smrg TRACE(("turning UTF-8 mode %s\n", BtoS(flag))); 348d522f475Smrg update_font_utf8_mode(); 349d522f475Smrg } 350d522f475Smrg} 351d522f475Smrg#endif 352d522f475Smrg 353d522f475Smrg/* 354d522f475Smrg * Allocate a buffer. 355d522f475Smrg */ 356d522f475Smrgvoid 357894e0ac8SmrginitPtyData(PtyData **result) 358d522f475Smrg{ 359d522f475Smrg PtyData *data; 360d522f475Smrg 361f2e35a3aSmrg TRACE2(("initPtyData given minBufSize %d, maxBufSize %d\n", 362f2e35a3aSmrg FRG_SIZE, BUF_SIZE)); 363d522f475Smrg 364d522f475Smrg if (FRG_SIZE < 64) 365d522f475Smrg FRG_SIZE = 64; 366d522f475Smrg if (BUF_SIZE < FRG_SIZE) 367d522f475Smrg BUF_SIZE = FRG_SIZE; 368d522f475Smrg if (BUF_SIZE % FRG_SIZE) 369d522f475Smrg BUF_SIZE = BUF_SIZE + FRG_SIZE - (BUF_SIZE % FRG_SIZE); 370d522f475Smrg 371f2e35a3aSmrg TRACE2(("initPtyData using minBufSize %d, maxBufSize %d\n", 372f2e35a3aSmrg FRG_SIZE, BUF_SIZE)); 373d522f475Smrg 374a1f3da82Smrg data = TypeXtMallocX(PtyData, (BUF_SIZE + FRG_SIZE)); 375d522f475Smrg 376d522f475Smrg memset(data, 0, sizeof(*data)); 377d522f475Smrg data->next = data->buffer; 378d522f475Smrg data->last = data->buffer; 379d522f475Smrg *result = data; 380d522f475Smrg} 381d522f475Smrg 382d522f475Smrg/* 38320d2c4d2Smrg * Initialize a buffer for the caller, using its data in 'next'. 384d522f475Smrg */ 385d522f475Smrg#if OPT_WIDE_CHARS 386d522f475SmrgPtyData * 387894e0ac8SmrgfakePtyData(PtyData *result, Char *next, Char *last) 388d522f475Smrg{ 389d522f475Smrg PtyData *data = result; 390d522f475Smrg 391d522f475Smrg memset(data, 0, sizeof(*data)); 392d522f475Smrg data->next = next; 393d522f475Smrg data->last = last; 394d522f475Smrg 395d522f475Smrg return data; 396d522f475Smrg} 397d522f475Smrg#endif 398d522f475Smrg 399d522f475Smrg/* 400d522f475Smrg * Remove used data by shifting the buffer down, to make room for more data, 401d522f475Smrg * e.g., a continuation-read. 402d522f475Smrg */ 403d522f475Smrgvoid 404f2e35a3aSmrgtrimPtyData(XtermWidget xw, PtyData *data) 405d522f475Smrg{ 406f2e35a3aSmrg (void) xw; 40720d2c4d2Smrg FlushLog(xw); 408d522f475Smrg 409d522f475Smrg if (data->next != data->buffer) { 4105307cd1aSmrg size_t i; 4115307cd1aSmrg size_t n = (size_t) (data->last - data->next); 412d522f475Smrg 4135307cd1aSmrg TRACE(("shifting buffer down by %lu\n", (unsigned long) n)); 414d522f475Smrg for (i = 0; i < n; ++i) { 415d522f475Smrg data->buffer[i] = data->next[i]; 416d522f475Smrg } 417d522f475Smrg data->next = data->buffer; 418d522f475Smrg data->last = data->next + n; 419d522f475Smrg } 420d522f475Smrg 421d522f475Smrg} 422d522f475Smrg 423d522f475Smrg/* 424d522f475Smrg * Insert new data into the input buffer so the next calls to morePtyData() 425d522f475Smrg * and nextPtyData() will return that. 426d522f475Smrg */ 427d522f475Smrgvoid 4285307cd1aSmrgfillPtyData(XtermWidget xw, PtyData *data, const char *value, size_t length) 429d522f475Smrg{ 4305307cd1aSmrg size_t size; 4315307cd1aSmrg size_t n; 432d522f475Smrg 433d522f475Smrg /* remove the used portion of the buffer */ 43420d2c4d2Smrg trimPtyData(xw, data); 435d522f475Smrg 436d522f475Smrg VTbuffer->last += length; 4375307cd1aSmrg size = (size_t) (VTbuffer->last - VTbuffer->next); 438d522f475Smrg 439d522f475Smrg /* shift the unused portion up to make room */ 440d522f475Smrg for (n = size; n >= length; --n) 441d522f475Smrg VTbuffer->next[n] = VTbuffer->next[n - length]; 442d522f475Smrg 443d522f475Smrg /* insert the new bytes to interpret */ 444d522f475Smrg for (n = 0; n < length; n++) 445d522f475Smrg VTbuffer->next[n] = CharOf(value[n]); 446d522f475Smrg} 447d522f475Smrg 448d522f475Smrg#if OPT_WIDE_CHARS 449f2e35a3aSmrg/* 450f2e35a3aSmrg * Convert an ISO-8859-1 code 'c' to UTF-8, storing the result in the target 451f2e35a3aSmrg * 'lp', and returning a pointer past the converted character. 452f2e35a3aSmrg */ 453d522f475SmrgChar * 454894e0ac8SmrgconvertToUTF8(Char *lp, unsigned c) 455d522f475Smrg{ 45620d2c4d2Smrg#define CH(n) (Char)((c) >> ((n) * 8)) 45720d2c4d2Smrg if (c < 0x80) { 45820d2c4d2Smrg /* 0******* */ 45920d2c4d2Smrg *lp++ = (Char) CH(0); 46020d2c4d2Smrg } else if (c < 0x800) { 46120d2c4d2Smrg /* 110***** 10****** */ 46220d2c4d2Smrg *lp++ = (Char) (0xc0 | (CH(0) >> 6) | ((CH(1) & 0x07) << 2)); 46320d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 46420d2c4d2Smrg } else if (c < 0x00010000) { 46520d2c4d2Smrg /* 1110**** 10****** 10****** */ 46620d2c4d2Smrg *lp++ = (Char) (0xe0 | ((int) (CH(1) & 0xf0) >> 4)); 46720d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 46820d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 46920d2c4d2Smrg } else if (c < 0x00200000) { 47020d2c4d2Smrg *lp++ = (Char) (0xf0 | ((int) (CH(2) & 0x1f) >> 2)); 47120d2c4d2Smrg *lp++ = (Char) (0x80 | 47220d2c4d2Smrg ((int) (CH(1) & 0xf0) >> 4) | 47320d2c4d2Smrg ((int) (CH(2) & 0x03) << 4)); 47420d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 47520d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 47620d2c4d2Smrg } else if (c < 0x04000000) { 47720d2c4d2Smrg *lp++ = (Char) (0xf8 | (CH(3) & 0x03)); 47820d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(2) >> 2)); 47920d2c4d2Smrg *lp++ = (Char) (0x80 | 48020d2c4d2Smrg ((int) (CH(1) & 0xf0) >> 4) | 48120d2c4d2Smrg ((int) (CH(2) & 0x03) << 4)); 48220d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 48320d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 48420d2c4d2Smrg } else { 48520d2c4d2Smrg *lp++ = (Char) (0xfc | ((int) (CH(3) & 0x40) >> 6)); 48620d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(3) & 0x3f)); 48720d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(2) >> 2)); 48820d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(1) >> 4) | ((CH(2) & 0x03) << 4)); 48920d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) >> 6) | ((CH(1) & 0x0f) << 2)); 49020d2c4d2Smrg *lp++ = (Char) (0x80 | (CH(0) & 0x3f)); 491d522f475Smrg } 492d522f475Smrg return lp; 49320d2c4d2Smrg#undef CH 494d522f475Smrg} 495d522f475Smrg 496f2e35a3aSmrg/* 497f2e35a3aSmrg * Convert a UTF-8 multibyte character to an Unicode value, returning a pointer 498f2e35a3aSmrg * past the converted UTF-8 input. The first 256 values align with ISO-8859-1, 499f2e35a3aSmrg * making it possible to use this to convert to Latin-1. 500f2e35a3aSmrg * 501f2e35a3aSmrg * If the conversion fails, return null. 502f2e35a3aSmrg */ 503f2e35a3aSmrgChar * 504f2e35a3aSmrgconvertFromUTF8(Char *lp, unsigned *cp) 505f2e35a3aSmrg{ 506f2e35a3aSmrg int want; 507f2e35a3aSmrg 508f2e35a3aSmrg /* 509f2e35a3aSmrg * Find the number of bytes we will need from the source. 510f2e35a3aSmrg */ 511f2e35a3aSmrg if ((*lp & 0x80) == 0) { 512f2e35a3aSmrg want = 1; 513f2e35a3aSmrg } else if ((*lp & 0xe0) == 0xc0) { 514f2e35a3aSmrg want = 2; 515f2e35a3aSmrg } else if ((*lp & 0xf0) == 0xe0) { 516f2e35a3aSmrg want = 3; 517f2e35a3aSmrg } else if ((*lp & 0xf8) == 0xf0) { 518f2e35a3aSmrg want = 4; 519f2e35a3aSmrg } else if ((*lp & 0xfc) == 0xf8) { 520f2e35a3aSmrg want = 5; 521f2e35a3aSmrg } else if ((*lp & 0xfe) == 0xfc) { 522f2e35a3aSmrg want = 6; 523f2e35a3aSmrg } else { 524f2e35a3aSmrg want = 0; 525f2e35a3aSmrg } 526f2e35a3aSmrg 527f2e35a3aSmrg if (want) { 528f2e35a3aSmrg int have = 1; 529f2e35a3aSmrg 530f2e35a3aSmrg while (lp[have] != '\0') { 531f2e35a3aSmrg if ((lp[have] & 0xc0) != 0x80) 532f2e35a3aSmrg break; 533f2e35a3aSmrg ++have; 534f2e35a3aSmrg } 535f2e35a3aSmrg if (want == have) { 536f2e35a3aSmrg unsigned mask = 0; 537f2e35a3aSmrg int j; 538f2e35a3aSmrg int shift = 0; 539f2e35a3aSmrg 540f2e35a3aSmrg *cp = 0; 541f2e35a3aSmrg switch (want) { 542f2e35a3aSmrg case 1: 543f2e35a3aSmrg mask = (*lp); 544f2e35a3aSmrg break; 545f2e35a3aSmrg case 2: 546f2e35a3aSmrg mask = (*lp & 0x1f); 547f2e35a3aSmrg break; 548f2e35a3aSmrg case 3: 549f2e35a3aSmrg mask = (*lp & 0x0f); 550f2e35a3aSmrg break; 551f2e35a3aSmrg case 4: 552f2e35a3aSmrg mask = (*lp & 0x07); 553f2e35a3aSmrg break; 554f2e35a3aSmrg case 5: 555f2e35a3aSmrg mask = (*lp & 0x03); 556f2e35a3aSmrg break; 557f2e35a3aSmrg case 6: 558f2e35a3aSmrg mask = (*lp & 0x01); 559f2e35a3aSmrg break; 560f2e35a3aSmrg default: 561f2e35a3aSmrg mask = 0; 562f2e35a3aSmrg break; 563f2e35a3aSmrg } 564f2e35a3aSmrg 565f2e35a3aSmrg for (j = 1; j < want; j++) { 566f2e35a3aSmrg *cp |= (unsigned) ((lp[want - j] & 0x3f) << shift); 567f2e35a3aSmrg shift += 6; 568f2e35a3aSmrg } 569f2e35a3aSmrg *cp |= mask << shift; 570f2e35a3aSmrg lp += want; 571f2e35a3aSmrg } else { 572f2e35a3aSmrg *cp = BAD_ASCII; 573f2e35a3aSmrg lp = NULL; 574f2e35a3aSmrg } 575f2e35a3aSmrg } else { 576f2e35a3aSmrg *cp = BAD_ASCII; 577f2e35a3aSmrg lp = NULL; 578f2e35a3aSmrg } 579f2e35a3aSmrg return lp; 580f2e35a3aSmrg} 581f2e35a3aSmrg 582f2e35a3aSmrg/* 583f2e35a3aSmrg * Returns true if the entire string is valid UTF-8. 584f2e35a3aSmrg */ 585f2e35a3aSmrgBoolean 586f2e35a3aSmrgisValidUTF8(Char *lp) 587f2e35a3aSmrg{ 588f2e35a3aSmrg Boolean result = True; 589f2e35a3aSmrg while (*lp) { 590f2e35a3aSmrg unsigned ch; 591f2e35a3aSmrg Char *next = convertFromUTF8(lp, &ch); 592f2e35a3aSmrg if (next == NULL || ch == 0) { 593f2e35a3aSmrg result = False; 594f2e35a3aSmrg break; 595f2e35a3aSmrg } 596f2e35a3aSmrg lp = next; 597f2e35a3aSmrg } 598f2e35a3aSmrg return result; 599f2e35a3aSmrg} 600f2e35a3aSmrg 601d522f475Smrg/* 602d522f475Smrg * Write data back to the PTY 603d522f475Smrg */ 604d522f475Smrgvoid 6055307cd1aSmrgwritePtyData(int f, IChar *d, size_t len) 606d522f475Smrg{ 6075307cd1aSmrg size_t n = (len << 1); 608d522f475Smrg 609d522f475Smrg if (VTbuffer->write_len <= len) { 610d522f475Smrg VTbuffer->write_len = n; 6115307cd1aSmrg VTbuffer->write_buf = realloc(VTbuffer->write_buf, VTbuffer->write_len); 612d522f475Smrg } 613d522f475Smrg 614d522f475Smrg for (n = 0; n < len; n++) 615956cc18dSsnj VTbuffer->write_buf[n] = (Char) d[n]; 616d522f475Smrg 6175307cd1aSmrg TRACE(("writePtyData %lu:%s\n", (unsigned long) n, 618956cc18dSsnj visibleChars(VTbuffer->write_buf, n))); 619d522f475Smrg v_write(f, VTbuffer->write_buf, n); 620d522f475Smrg} 621d522f475Smrg#endif /* OPT_WIDE_CHARS */ 622d522f475Smrg 623d522f475Smrg#ifdef NO_LEAKS 624d522f475Smrgvoid 625d522f475Smrgnoleaks_ptydata(void) 626d522f475Smrg{ 6275104ee6eSmrg if (VTbuffer != NULL) { 628d522f475Smrg#if OPT_WIDE_CHARS 629f2e35a3aSmrg free(VTbuffer->write_buf); 630f2e35a3aSmrg#endif 631f2e35a3aSmrg FreeAndNull(VTbuffer); 632f2e35a3aSmrg } 633f2e35a3aSmrg} 634f2e35a3aSmrg#endif 635f2e35a3aSmrg 636f2e35a3aSmrg#ifdef TEST_DRIVER 637f2e35a3aSmrg 638f2e35a3aSmrg#include "data.c" 639f2e35a3aSmrg 640f2e35a3aSmrgvoid 641f2e35a3aSmrgNormalExit(void) 642f2e35a3aSmrg{ 643f2e35a3aSmrg fprintf(stderr, "NormalExit!\n"); 644f2e35a3aSmrg exit(EXIT_SUCCESS); 645f2e35a3aSmrg} 646f2e35a3aSmrg 647f2e35a3aSmrgvoid 648f2e35a3aSmrgPanic(const char *s, int a) 649f2e35a3aSmrg{ 650f2e35a3aSmrg (void) s; 651f2e35a3aSmrg (void) a; 652f2e35a3aSmrg fprintf(stderr, "Panic!\n"); 653f2e35a3aSmrg exit(EXIT_FAILURE); 654f2e35a3aSmrg} 655f2e35a3aSmrg 656f2e35a3aSmrg#if OPT_WIDE_CHARS 657f2e35a3aSmrg 658f2e35a3aSmrg#ifdef ALLOWLOGGING 659f2e35a3aSmrgvoid 660f2e35a3aSmrgFlushLog(XtermWidget xw) 661f2e35a3aSmrg{ 662f2e35a3aSmrg (void) xw; 663f2e35a3aSmrg} 664d522f475Smrg#endif 665f2e35a3aSmrg 666f2e35a3aSmrgvoid 6675307cd1aSmrgv_write(int f, const Char *data, size_t len) 668f2e35a3aSmrg{ 669f2e35a3aSmrg (void) f; 670f2e35a3aSmrg (void) data; 671f2e35a3aSmrg (void) len; 672f2e35a3aSmrg} 673f2e35a3aSmrg 674f2e35a3aSmrgvoid 675f2e35a3aSmrgmk_wcwidth_init(int mode) 676f2e35a3aSmrg{ 677f2e35a3aSmrg (void) mode; 678f2e35a3aSmrg} 679f2e35a3aSmrg 680f2e35a3aSmrgvoid 681f2e35a3aSmrgupdate_font_utf8_mode(void) 682f2e35a3aSmrg{ 683f2e35a3aSmrg} 684f2e35a3aSmrg 685f2e35a3aSmrgstatic int message_level = 0; 686f2e35a3aSmrgstatic int opt_all = 0; 687f2e35a3aSmrgstatic int opt_illegal = 0; 688f2e35a3aSmrgstatic int opt_convert = 0; 689f2e35a3aSmrgstatic int opt_reverse = 0; 690f2e35a3aSmrgstatic long total_test = 0; 691f2e35a3aSmrgstatic long total_errs = 0; 692f2e35a3aSmrg 693f2e35a3aSmrgstatic void 694f2e35a3aSmrgusage(void) 695f2e35a3aSmrg{ 696f2e35a3aSmrg static const char *msg[] = 697f2e35a3aSmrg { 698f2e35a3aSmrg "Usage: test_ptydata [options] [c1[-c1b] [c2-[c2b] [...]]]", 699f2e35a3aSmrg "", 700f2e35a3aSmrg "Options:", 701f2e35a3aSmrg " -a exercise all legal encode/decode to/from UTF-8", 702f2e35a3aSmrg " -c call convertFromUTF8 rather than decodeUTF8", 703f2e35a3aSmrg " -i ignore illegal UTF-8 when testing -r option", 704f2e35a3aSmrg " -q quieter", 705f2e35a3aSmrg " -r reverse/decode from UTF-8 byte-string to/from Unicode", 706f2e35a3aSmrg " -v more verbose" 707f2e35a3aSmrg }; 708f2e35a3aSmrg size_t n; 709f2e35a3aSmrg for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) { 710f2e35a3aSmrg fprintf(stderr, "%s\n", msg[n]); 711f2e35a3aSmrg } 712f2e35a3aSmrg exit(EXIT_FAILURE); 713f2e35a3aSmrg} 714f2e35a3aSmrg 715f2e35a3aSmrg/* 716f2e35a3aSmrg * http://www.unicode.org/versions/corrigendum1.html, table 3.1B 717f2e35a3aSmrg */ 718f2e35a3aSmrg#define OkRange(n,lo,hi) \ 719f2e35a3aSmrg if (value[n] < lo || value[n] > hi) { \ 720f2e35a3aSmrg result = False; \ 721f2e35a3aSmrg break; \ 722f2e35a3aSmrg } 723f2e35a3aSmrgstatic Bool 724f2e35a3aSmrgis_legal_utf8(const Char *value) 725f2e35a3aSmrg{ 726f2e35a3aSmrg Bool result = True; 727f2e35a3aSmrg Char ch; 728f2e35a3aSmrg while ((ch = *value) != '\0') { 729f2e35a3aSmrg if (ch <= 0x7f) { 730f2e35a3aSmrg ++value; 731f2e35a3aSmrg } else if (ch >= 0xc2 && ch <= 0xdf) { 732f2e35a3aSmrg OkRange(1, 0x80, 0xbf); 733f2e35a3aSmrg value += 2; 734f2e35a3aSmrg } else if (ch == 0xe0) { 735f2e35a3aSmrg OkRange(1, 0xa0, 0xbf); 736f2e35a3aSmrg OkRange(2, 0x80, 0xbf); 737f2e35a3aSmrg value += 3; 738f2e35a3aSmrg } else if (ch >= 0xe1 && ch <= 0xef) { 739f2e35a3aSmrg OkRange(1, 0x80, 0xbf); 740f2e35a3aSmrg OkRange(2, 0x80, 0xbf); 741f2e35a3aSmrg value += 3; 742f2e35a3aSmrg } else if (ch == 0xf0) { 743f2e35a3aSmrg OkRange(1, 0x90, 0xbf); 744f2e35a3aSmrg OkRange(2, 0x80, 0xbf); 745f2e35a3aSmrg OkRange(3, 0x80, 0xbf); 746f2e35a3aSmrg value += 4; 747f2e35a3aSmrg } else if (ch >= 0xf1 && ch <= 0xf3) { 748f2e35a3aSmrg OkRange(1, 0x80, 0xbf); 749f2e35a3aSmrg OkRange(2, 0x80, 0xbf); 750f2e35a3aSmrg OkRange(3, 0x80, 0xbf); 751f2e35a3aSmrg value += 4; 752f2e35a3aSmrg } else if (ch == 0xf4) { 753f2e35a3aSmrg OkRange(1, 0x80, 0x8f); 754f2e35a3aSmrg OkRange(2, 0x80, 0xbf); 755f2e35a3aSmrg OkRange(3, 0x80, 0xbf); 756f2e35a3aSmrg value += 4; 757f2e35a3aSmrg } else { 758f2e35a3aSmrg result = False; 759f2e35a3aSmrg break; 760f2e35a3aSmrg } 761f2e35a3aSmrg } 762f2e35a3aSmrg return result; 763f2e35a3aSmrg} 764f2e35a3aSmrg 765f2e35a3aSmrgstatic void 766f2e35a3aSmrgtest_utf8_convert(void) 767f2e35a3aSmrg{ 768f2e35a3aSmrg unsigned c_in, c_out; 769f2e35a3aSmrg Char buffer[10]; 770f2e35a3aSmrg Char *result; 771f2e35a3aSmrg unsigned limit = 0x110000; 772f2e35a3aSmrg unsigned success = 0; 773f2e35a3aSmrg unsigned bucket[256]; 774f2e35a3aSmrg 775f2e35a3aSmrg memset(bucket, 0, sizeof(bucket)); 776f2e35a3aSmrg for (c_in = 0; c_in < limit; ++c_in) { 777f2e35a3aSmrg memset(buffer, 0, sizeof(buffer)); 7785104ee6eSmrg if ((result = convertToUTF8(buffer, c_in)) == NULL) { 779f2e35a3aSmrg TRACE(("conversion of U+%04X to UTF-8 failed\n", c_in)); 780f2e35a3aSmrg } else { 7815104ee6eSmrg if ((result = convertFromUTF8(buffer, &c_out)) == NULL) { 782f2e35a3aSmrg TRACE(("conversion of U+%04X from UTF-8 failed\n", c_in)); 783f2e35a3aSmrg } else if (c_in != c_out) { 784f2e35a3aSmrg TRACE(("conversion of U+%04X to/from UTF-8 gave U+%04X\n", 785f2e35a3aSmrg c_in, c_out)); 786f2e35a3aSmrg } else { 787f2e35a3aSmrg while (result-- != buffer) { 788f2e35a3aSmrg bucket[*result]++; 789f2e35a3aSmrg } 790f2e35a3aSmrg ++success; 791f2e35a3aSmrg } 792f2e35a3aSmrg } 793f2e35a3aSmrg } 794f2e35a3aSmrg TRACE(("%u/%u successful\n", success, limit)); 795f2e35a3aSmrg for (c_in = 0; c_in < 256; ++c_in) { 796f2e35a3aSmrg if ((c_in % 8) == 0) { 797f2e35a3aSmrg TRACE((" %02X:", c_in)); 798f2e35a3aSmrg } 799f2e35a3aSmrg TRACE((" %8X", bucket[c_in])); 800f2e35a3aSmrg if (((c_in + 1) % 8) == 0) { 801f2e35a3aSmrg TRACE(("\n")); 802f2e35a3aSmrg } 803f2e35a3aSmrg } 804f2e35a3aSmrg} 805f2e35a3aSmrg 806f2e35a3aSmrgstatic int 807f2e35a3aSmrgdecode_one(const char *source, char **target) 808f2e35a3aSmrg{ 809f2e35a3aSmrg int result = -1; 810f2e35a3aSmrg long check; 811f2e35a3aSmrg int radix = 0; 812f2e35a3aSmrg if ((source[0] == 'u' || source[0] == 'U') && source[1] == '+') { 813f2e35a3aSmrg source += 2; 814f2e35a3aSmrg radix = 16; 815f2e35a3aSmrg } else if (source[0] == '0' && source[1] == 'b') { 816f2e35a3aSmrg source += 2; 817f2e35a3aSmrg radix = 2; 818f2e35a3aSmrg } 819f2e35a3aSmrg check = strtol(source, target, radix); 820f2e35a3aSmrg if (*target != NULL && *target != source) 821f2e35a3aSmrg result = (int) check; 822f2e35a3aSmrg return result; 823f2e35a3aSmrg} 824f2e35a3aSmrg 825f2e35a3aSmrgstatic int 826f2e35a3aSmrgdecode_range(const char *source, int *lo, int *hi) 827f2e35a3aSmrg{ 828f2e35a3aSmrg int result = 0; 829f2e35a3aSmrg char *after1; 830f2e35a3aSmrg char *after2; 831f2e35a3aSmrg if ((*lo = decode_one(source, &after1)) >= 0) { 832f2e35a3aSmrg after1 += strspn(after1, ":-.\t "); 833f2e35a3aSmrg if ((*hi = decode_one(after1, &after2)) < 0) { 834f2e35a3aSmrg *hi = *lo; 835f2e35a3aSmrg } 836f2e35a3aSmrg result = 1; 837d522f475Smrg } 838f2e35a3aSmrg return result; 839d522f475Smrg} 840f2e35a3aSmrg 841f2e35a3aSmrg#define MAX_BYTES 6 842f2e35a3aSmrg 843f2e35a3aSmrgstatic void 844f2e35a3aSmrgdo_range(const char *source) 845f2e35a3aSmrg{ 846f2e35a3aSmrg int lo, hi; 847f2e35a3aSmrg 848f2e35a3aSmrg TScreen screen; 849f2e35a3aSmrg memset(&screen, 0, sizeof(screen)); 850f2e35a3aSmrg 851f2e35a3aSmrg if (decode_range(source, &lo, &hi)) { 852f2e35a3aSmrg while (lo <= hi) { 853f2e35a3aSmrg unsigned c_in = (unsigned) lo++; 854f2e35a3aSmrg PtyData *data; 855f2e35a3aSmrg Char *next; 856f2e35a3aSmrg Char buffer[MAX_BYTES + 1]; 857f2e35a3aSmrg 858f2e35a3aSmrg if (opt_reverse) { 859f2e35a3aSmrg Bool skip = False; 860f2e35a3aSmrg Bool first = True; 861f2e35a3aSmrg int j, k; 862f2e35a3aSmrg for (j = 0; j < MAX_BYTES; ++j) { 863f2e35a3aSmrg unsigned long bits = ((unsigned long) c_in >> (8 * j)); 864f2e35a3aSmrg if ((buffer[j] = (Char) bits) == 0) { 865f2e35a3aSmrg skip = (bits != 0); 866f2e35a3aSmrg break; 867f2e35a3aSmrg } 868f2e35a3aSmrg } 869f2e35a3aSmrg if (skip) 870f2e35a3aSmrg continue; 871f2e35a3aSmrg initPtyData(&data); 872f2e35a3aSmrg for (k = 0; k <= j; ++k) { 873f2e35a3aSmrg data->buffer[k] = buffer[j - k - 1]; 874f2e35a3aSmrg } 875f2e35a3aSmrg if (opt_illegal && !is_legal_utf8(data->buffer)) { 876f2e35a3aSmrg free(data); 877f2e35a3aSmrg continue; 878f2e35a3aSmrg } 879f2e35a3aSmrg if (message_level > 1) { 880f2e35a3aSmrg printf("TEST "); 881f2e35a3aSmrg for (k = 0; k < j; ++k) { 882f2e35a3aSmrg printf("%02X", data->buffer[k]); 883f2e35a3aSmrg } 884f2e35a3aSmrg } 885f2e35a3aSmrg data->next = data->buffer; 886f2e35a3aSmrg data->last = data->buffer + j; 887f2e35a3aSmrg while (decodeUtf8(&screen, data)) { 888f2e35a3aSmrg total_test++; 88904b94745Smrg if (is_UCS_SPECIAL(data->utf_data)) 890f2e35a3aSmrg total_errs++; 891f2e35a3aSmrg data->next += data->utf_size; 892f2e35a3aSmrg if (message_level > 1) { 893f2e35a3aSmrg printf("%s%04X", first ? " ->" : ", ", data->utf_data); 894f2e35a3aSmrg } 895f2e35a3aSmrg first = False; 896f2e35a3aSmrg } 897f2e35a3aSmrg if (!first) 898f2e35a3aSmrg total_test--; 899f2e35a3aSmrg if (message_level > 1) { 900f2e35a3aSmrg printf("\n"); 901f2e35a3aSmrg fflush(stdout); 902f2e35a3aSmrg } 903f2e35a3aSmrg free(data); 904f2e35a3aSmrg } else if (opt_convert) { 905f2e35a3aSmrg unsigned c_out; 906f2e35a3aSmrg Char *result; 907f2e35a3aSmrg 908f2e35a3aSmrg memset(buffer, 0, sizeof(buffer)); 9095104ee6eSmrg if ((result = next = convertToUTF8(buffer, c_in)) == NULL) { 910f2e35a3aSmrg fprintf(stderr, 911f2e35a3aSmrg "conversion of U+%04X to UTF-8 failed\n", c_in); 9125104ee6eSmrg } else if ((result = convertFromUTF8(buffer, &c_out)) == NULL) { 913f2e35a3aSmrg fprintf(stderr, 914f2e35a3aSmrg "conversion of U+%04X from UTF-8 failed\n", c_in); 915f2e35a3aSmrg total_errs++; 916f2e35a3aSmrg } else if (c_in != c_out) { 917f2e35a3aSmrg fprintf(stderr, 918f2e35a3aSmrg "conversion of U+%04X to/from UTF-8 gave U+%04X\n", 919f2e35a3aSmrg c_in, c_out); 920f2e35a3aSmrg } else if (message_level > 1) { 921f2e35a3aSmrg *next = '\0'; 9225307cd1aSmrg printf("TEST %04X (%lu:%s) ->%04X\n", c_in, 9235307cd1aSmrg (unsigned long) (next - buffer), 924f2e35a3aSmrg buffer, 925f2e35a3aSmrg c_out); 926f2e35a3aSmrg fflush(stdout); 927f2e35a3aSmrg } 928f2e35a3aSmrg } else { 929f2e35a3aSmrg initPtyData(&data); 930f2e35a3aSmrg next = convertToUTF8(data->buffer, c_in); 931f2e35a3aSmrg *next = 0; 932f2e35a3aSmrg data->next = data->buffer; 933f2e35a3aSmrg data->last = next; 934f2e35a3aSmrg decodeUtf8(&screen, data); 935f2e35a3aSmrg if (message_level > 1) { 9365307cd1aSmrg printf("TEST %04X (%lu:%s) ->%04X\n", c_in, 9375307cd1aSmrg (unsigned long) (next - data->buffer), 938f2e35a3aSmrg data->buffer, 939f2e35a3aSmrg data->utf_data); 940f2e35a3aSmrg fflush(stdout); 941f2e35a3aSmrg } 942f2e35a3aSmrg if (c_in != data->utf_data) { 943f2e35a3aSmrg fprintf(stderr, "Mismatch: %04X vs %04X\n", c_in, data->utf_data); 944f2e35a3aSmrg total_errs++; 945f2e35a3aSmrg } 946f2e35a3aSmrg free(data); 947f2e35a3aSmrg } 948f2e35a3aSmrg total_test++; 949f2e35a3aSmrg } 950f2e35a3aSmrg } 951f2e35a3aSmrg} 952f2e35a3aSmrg 953f2e35a3aSmrgint 954f2e35a3aSmrgmain(int argc, char **argv) 955f2e35a3aSmrg{ 956f2e35a3aSmrg int ch; 957f2e35a3aSmrg 958f2e35a3aSmrg setlocale(LC_ALL, ""); 959f2e35a3aSmrg while ((ch = getopt(argc, argv, "aciqrv")) != -1) { 960f2e35a3aSmrg switch (ch) { 961f2e35a3aSmrg case 'a': 962f2e35a3aSmrg opt_all = 1; 963f2e35a3aSmrg break; 964f2e35a3aSmrg case 'c': 965f2e35a3aSmrg opt_convert = 1; 966f2e35a3aSmrg break; 967f2e35a3aSmrg case 'i': 968f2e35a3aSmrg opt_illegal = 1; 969f2e35a3aSmrg break; 970f2e35a3aSmrg case 'q': 971f2e35a3aSmrg message_level--; 972f2e35a3aSmrg break; 973f2e35a3aSmrg case 'r': 974f2e35a3aSmrg opt_reverse = 1; 975f2e35a3aSmrg break; 976f2e35a3aSmrg case 'v': 977f2e35a3aSmrg message_level++; 978f2e35a3aSmrg break; 979f2e35a3aSmrg default: 980f2e35a3aSmrg usage(); 981f2e35a3aSmrg } 982f2e35a3aSmrg } 983f2e35a3aSmrg if (opt_all) { 984f2e35a3aSmrg test_utf8_convert(); 985f2e35a3aSmrg } else { 986f2e35a3aSmrg if (optind >= argc) 987f2e35a3aSmrg usage(); 988f2e35a3aSmrg while (optind < argc) { 989f2e35a3aSmrg do_range(argv[optind++]); 990f2e35a3aSmrg } 991f2e35a3aSmrg if (total_test) { 992f2e35a3aSmrg printf("%ld/%ld mismatches (%.0f%%)\n", 993f2e35a3aSmrg total_errs, 994f2e35a3aSmrg total_test, 995f2e35a3aSmrg (100.0 * (double) total_errs) / (double) total_test); 996f2e35a3aSmrg } 997f2e35a3aSmrg } 998f2e35a3aSmrg return EXIT_SUCCESS; 999f2e35a3aSmrg} 1000f2e35a3aSmrg#else 1001f2e35a3aSmrgint 1002f2e35a3aSmrgmain(int argc, char **argv) 1003f2e35a3aSmrg{ 1004f2e35a3aSmrg (void) argc; 1005f2e35a3aSmrg (void) argv; 1006f2e35a3aSmrg printf("Nothing to be done here...\n"); 1007f2e35a3aSmrg return EXIT_SUCCESS; 1008f2e35a3aSmrg} 1009f2e35a3aSmrg#endif /* OPT_WIDE_CHARS */ 1010d522f475Smrg#endif 1011