charclass.c revision f2e35a3a
1f2e35a3aSmrg/* $XTermId: charclass.c,v 1.44 2021/02/02 00:19:32 tom Exp $ */ 2d522f475Smrg 3d522f475Smrg/* 4f2e35a3aSmrg * Copyright 2002-2020,2021 by Thomas E. Dickey 5f2e35a3aSmrg * 6f2e35a3aSmrg * All Rights Reserved 7f2e35a3aSmrg * 8f2e35a3aSmrg * Permission is hereby granted, free of charge, to any person obtaining a 9f2e35a3aSmrg * copy of this software and associated documentation files (the 10f2e35a3aSmrg * "Software"), to deal in the Software without restriction, including 11f2e35a3aSmrg * without limitation the rights to use, copy, modify, merge, publish, 12f2e35a3aSmrg * distribute, sublicense, and/or sell copies of the Software, and to 13f2e35a3aSmrg * permit persons to whom the Software is furnished to do so, subject to 14f2e35a3aSmrg * the following conditions: 15f2e35a3aSmrg * 16f2e35a3aSmrg * The above copyright notice and this permission notice shall be included 17f2e35a3aSmrg * in all copies or substantial portions of the Software. 18f2e35a3aSmrg * 19f2e35a3aSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20f2e35a3aSmrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21f2e35a3aSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22f2e35a3aSmrg * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 23f2e35a3aSmrg * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24f2e35a3aSmrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25f2e35a3aSmrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26f2e35a3aSmrg * 27f2e35a3aSmrg * Except as contained in this notice, the name(s) of the above copyright 28f2e35a3aSmrg * holders shall not be used in advertising or otherwise to promote the 29f2e35a3aSmrg * sale, use or other dealings in this Software without prior written 30f2e35a3aSmrg * authorization. 31f2e35a3aSmrg * 32f2e35a3aSmrg *---------------------------------------------------------------------------- 33d522f475Smrg * Compact and efficient reimplementation of the 34d522f475Smrg * xterm character class mechanism for large character sets 35d522f475Smrg * 36d522f475Smrg * Markus Kuhn -- mkuhn@acm.org -- 2000-07-03 37d522f475Smrg * 38894e0ac8Smrg * xterm allows users to select entire words with a double-click on the left 39d522f475Smrg * mouse button. Opinions might differ on what type of characters are part of 40d522f475Smrg * separate words, therefore xterm allows users to configure a class code for 41d522f475Smrg * each 8-bit character. Words are maximum length sequences of neighboring 42d522f475Smrg * characters with identical class code. Extending this mechanism to Unicode 43d522f475Smrg * naively would create an at least 2^16 entries (128 kB) long class code 44d522f475Smrg * table. 45d522f475Smrg * 46d522f475Smrg * Instead, we transform the character class table into a list of intervals, 47d522f475Smrg * that will be accessed via a linear search. Changes made to the table by the 48d522f475Smrg * user will be appended. A special class code IDENT (default) marks 49d522f475Smrg * characters who have their code number as the class code. 50d522f475Smrg * 51d522f475Smrg * We could alternatively use a sorted table of non-overlapping intervals that 52d522f475Smrg * can be accessed via binary search, but merging in new intervals is 53d522f475Smrg * significantly more hassle and not worth the effort here. 54d522f475Smrg */ 55d522f475Smrg 56d522f475Smrg#include <xterm.h> 57d522f475Smrg#include <charclass.h> 58d522f475Smrg 59d522f475Smrg#if OPT_WIDE_CHARS 60d522f475Smrg 61f2e35a3aSmrg#ifdef TEST_DRIVER 62f2e35a3aSmrg 63f2e35a3aSmrg#include <ctype.h> 64f2e35a3aSmrg#include <wchar.h> 65f2e35a3aSmrg#include <wctype.h> 66f2e35a3aSmrg 67f2e35a3aSmrg#if OPT_TRACE 68f2e35a3aSmrg#define Trace if (opt_v) printf 69f2e35a3aSmrg#endif 70f2e35a3aSmrg 71f2e35a3aSmrg#undef OPT_REPORT_CCLASS 72f2e35a3aSmrg#define OPT_REPORT_CCLASS 1 73f2e35a3aSmrg#endif /* TEST_DRIVER */ 74f2e35a3aSmrg 75d522f475Smrgstatic struct classentry { 76d522f475Smrg int cclass; 77d522f475Smrg int first; 78d522f475Smrg int last; 79d522f475Smrg} *classtab; 80d522f475Smrg 81d522f475Smrgtypedef enum { 82d522f475Smrg IDENT = -1, 83f2e35a3aSmrg OTHER = 0, 84d522f475Smrg CNTRL = 1, 85f2e35a3aSmrg ALNUM = 48, 86913cc679Smrg BLANK = 32, 87913cc679Smrg U_CJK = 0x4e00, 88913cc679Smrg U_SUP = 0x2070, 89913cc679Smrg U_SUB = 0x2080, 90913cc679Smrg U_HIR = 0x3040, 91913cc679Smrg U_KAT = 0x30a0, 92913cc679Smrg U_HAN = 0xac00 93d522f475Smrg} Classes; 94d522f475Smrg 95f2e35a3aSmrg#ifdef TEST_DRIVER 96f2e35a3aSmrgstatic int opt_all; 97f2e35a3aSmrgstatic int opt_check; 98f2e35a3aSmrgstatic int opt_quiet; 99f2e35a3aSmrgstatic int opt_v; 100f2e35a3aSmrg#endif 101f2e35a3aSmrg 102d522f475Smrgvoid 103d522f475Smrginit_classtab(void) 104d522f475Smrg{ 105d522f475Smrg const int size = 50; 106d522f475Smrg 107f2e35a3aSmrg TRACE(("init_classtab " TRACE_L "\n")); 108913cc679Smrg 10920d2c4d2Smrg classtab = TypeMallocN(struct classentry, (unsigned) size); 110d522f475Smrg if (!classtab) 111d522f475Smrg abort(); 112d522f475Smrg classtab[0].cclass = size; 113d522f475Smrg classtab[0].first = 1; 114d522f475Smrg classtab[0].last = 0; 115d522f475Smrg 116d522f475Smrg /* old xterm default classes */ 117d522f475Smrg SetCharacterClassRange(0, 0, BLANK); 118d522f475Smrg SetCharacterClassRange(1, 31, CNTRL); 119d522f475Smrg SetCharacterClassRange('\t', '\t', BLANK); 120d522f475Smrg SetCharacterClassRange('0', '9', ALNUM); 121d522f475Smrg SetCharacterClassRange('A', 'Z', ALNUM); 122d522f475Smrg SetCharacterClassRange('_', '_', ALNUM); 123d522f475Smrg SetCharacterClassRange('a', 'z', ALNUM); 124d522f475Smrg SetCharacterClassRange(127, 159, CNTRL); 125d522f475Smrg SetCharacterClassRange(160, 191, IDENT); 126d522f475Smrg SetCharacterClassRange(192, 255, ALNUM); 127d522f475Smrg SetCharacterClassRange(215, 215, IDENT); 128d522f475Smrg SetCharacterClassRange(247, 247, IDENT); 129d522f475Smrg 130d522f475Smrg /* added Unicode classes */ 131d522f475Smrg SetCharacterClassRange(0x0100, 0xffdf, ALNUM); /* mostly characters */ 132d522f475Smrg SetCharacterClassRange(0x037e, 0x037e, IDENT); /* Greek question mark */ 133d522f475Smrg SetCharacterClassRange(0x0387, 0x0387, IDENT); /* Greek ano teleia */ 134d522f475Smrg SetCharacterClassRange(0x055a, 0x055f, IDENT); /* Armenian punctuation */ 135d522f475Smrg SetCharacterClassRange(0x0589, 0x0589, IDENT); /* Armenian full stop */ 136d522f475Smrg SetCharacterClassRange(0x0700, 0x070d, IDENT); /* Syriac punctuation */ 137d522f475Smrg SetCharacterClassRange(0x104a, 0x104f, IDENT); /* Myanmar punctuation */ 138d522f475Smrg SetCharacterClassRange(0x10fb, 0x10fb, IDENT); /* Georgian punctuation */ 139d522f475Smrg SetCharacterClassRange(0x1361, 0x1368, IDENT); /* Ethiopic punctuation */ 140d522f475Smrg SetCharacterClassRange(0x166d, 0x166e, IDENT); /* Canadian Syl. punctuation */ 141d522f475Smrg SetCharacterClassRange(0x17d4, 0x17dc, IDENT); /* Khmer punctuation */ 142d522f475Smrg SetCharacterClassRange(0x1800, 0x180a, IDENT); /* Mongolian punctuation */ 143d522f475Smrg SetCharacterClassRange(0x2000, 0x200a, BLANK); /* spaces */ 144d522f475Smrg SetCharacterClassRange(0x200b, 0x27ff, IDENT); /* punctuation and symbols */ 145913cc679Smrg SetCharacterClassRange(0x2070, 0x207f, U_SUP); /* superscript */ 146913cc679Smrg SetCharacterClassRange(0x2080, 0x208f, U_SUB); /* subscript */ 147d522f475Smrg SetCharacterClassRange(0x3000, 0x3000, BLANK); /* ideographic space */ 148d522f475Smrg SetCharacterClassRange(0x3001, 0x3020, IDENT); /* ideographic punctuation */ 149913cc679Smrg SetCharacterClassRange(0x3040, 0x309f, U_HIR); /* Hiragana */ 150913cc679Smrg SetCharacterClassRange(0x30a0, 0x30ff, U_KAT); /* Katakana */ 151913cc679Smrg SetCharacterClassRange(0x3300, 0x9fff, U_CJK); /* CJK Ideographs */ 152913cc679Smrg SetCharacterClassRange(0xac00, 0xd7a3, U_HAN); /* Hangul Syllables */ 153913cc679Smrg SetCharacterClassRange(0xf900, 0xfaff, U_CJK); /* CJK Ideographs */ 154d522f475Smrg SetCharacterClassRange(0xfe30, 0xfe6b, IDENT); /* punctuation forms */ 155d522f475Smrg SetCharacterClassRange(0xff00, 0xff0f, IDENT); /* half/fullwidth ASCII */ 156d522f475Smrg SetCharacterClassRange(0xff1a, 0xff20, IDENT); /* half/fullwidth ASCII */ 157d522f475Smrg SetCharacterClassRange(0xff3b, 0xff40, IDENT); /* half/fullwidth ASCII */ 158d522f475Smrg SetCharacterClassRange(0xff5b, 0xff64, IDENT); /* half/fullwidth ASCII */ 159d522f475Smrg 160f2e35a3aSmrg TRACE((TRACE_R " init_classtab\n")); 161d522f475Smrg return; 162d522f475Smrg} 163d522f475Smrg 164d522f475Smrgint 165d522f475SmrgCharacterClass(int c) 166d522f475Smrg{ 167d522f475Smrg int i, cclass = IDENT; 168d522f475Smrg 169d522f475Smrg for (i = classtab[0].first; i <= classtab[0].last; i++) 170d522f475Smrg if (classtab[i].first <= c && classtab[i].last >= c) 171d522f475Smrg cclass = classtab[i].cclass; 172d522f475Smrg 173d522f475Smrg if (cclass < 0) 174d522f475Smrg cclass = c; 175d522f475Smrg 176d522f475Smrg return cclass; 177d522f475Smrg} 178d522f475Smrg 179913cc679Smrg#if OPT_REPORT_CCLASS 180913cc679Smrg#define charFormat(code) ((code) > 255 ? "0x%04X" : "%d") 181913cc679Smrgstatic const char * 182913cc679Smrgclass_name(Classes code) 183913cc679Smrg{ 184913cc679Smrg static char buffer[80]; 185913cc679Smrg const char *result = "?"; 186913cc679Smrg switch (code) { 187913cc679Smrg case ALNUM: 188913cc679Smrg result = "ALNUM"; 189913cc679Smrg break; 190f2e35a3aSmrg case BLANK: 191f2e35a3aSmrg result = "BLANK"; 192f2e35a3aSmrg break; 193913cc679Smrg case CNTRL: 194913cc679Smrg result = "CNTRL"; 195913cc679Smrg break; 196f2e35a3aSmrg case OTHER: 197f2e35a3aSmrg result = "OTHER"; 198f2e35a3aSmrg break; 199f2e35a3aSmrg case IDENT: 200f2e35a3aSmrg result = "IDENT"; 201913cc679Smrg break; 202913cc679Smrg case U_SUP: 203913cc679Smrg result = "superscript"; 204913cc679Smrg break; 205913cc679Smrg case U_SUB: 206913cc679Smrg result = "subscript"; 207913cc679Smrg break; 208913cc679Smrg case U_CJK: 209913cc679Smrg result = "CJK Ideographs"; 210913cc679Smrg break; 211913cc679Smrg case U_HIR: 212913cc679Smrg result = "Hiragana"; 213913cc679Smrg break; 214913cc679Smrg case U_KAT: 215913cc679Smrg result = "Katakana"; 216913cc679Smrg break; 217913cc679Smrg case U_HAN: 218913cc679Smrg result = "Hangul Syllables"; 219913cc679Smrg break; 220913cc679Smrg default: 221913cc679Smrg sprintf(buffer, charFormat(code), code); 222913cc679Smrg result = buffer; 223913cc679Smrg break; 224913cc679Smrg } 225913cc679Smrg return result; 226913cc679Smrg} 227913cc679Smrg 228f2e35a3aSmrg/* 229f2e35a3aSmrg * Special convention for classtab[0]: 230f2e35a3aSmrg * - classtab[0].cclass is the allocated number of entries in classtab 231f2e35a3aSmrg * - classtab[0].first = 1 (first used entry in classtab) 232f2e35a3aSmrg * - classtab[0].last is the last used entry in classtab 233f2e35a3aSmrg */ 234f2e35a3aSmrg 235f2e35a3aSmrgint 236f2e35a3aSmrgSetCharacterClassRange(int low, int high, int value) 237f2e35a3aSmrg{ 238f2e35a3aSmrg TRACE(("...SetCharacterClassRange (U+%04X .. U+%04X) = %s\n", 239f2e35a3aSmrg low, high, class_name(value))); 240f2e35a3aSmrg 241f2e35a3aSmrg if (high < low) 242f2e35a3aSmrg return -1; /* nothing to do */ 243f2e35a3aSmrg 244f2e35a3aSmrg /* make sure we have at least one free entry left at table end */ 245f2e35a3aSmrg if (classtab[0].last > classtab[0].cclass - 2) { 246f2e35a3aSmrg classtab[0].cclass += 5 + classtab[0].cclass / 4; 247f2e35a3aSmrg classtab = TypeRealloc(struct classentry, 248f2e35a3aSmrg (unsigned) classtab[0].cclass, classtab); 249f2e35a3aSmrg if (!classtab) 250f2e35a3aSmrg abort(); 251f2e35a3aSmrg } 252f2e35a3aSmrg 253f2e35a3aSmrg /* simply append new interval to end of interval array */ 254f2e35a3aSmrg classtab[0].last++; 255f2e35a3aSmrg classtab[classtab[0].last].first = low; 256f2e35a3aSmrg classtab[classtab[0].last].last = high; 257f2e35a3aSmrg classtab[classtab[0].last].cclass = value; 258f2e35a3aSmrg 259f2e35a3aSmrg return 0; 260f2e35a3aSmrg} 261f2e35a3aSmrg 262913cc679Smrgvoid 263913cc679Smrgreport_wide_char_class(void) 264913cc679Smrg{ 265913cc679Smrg static const Classes known_classes[] = 266913cc679Smrg {IDENT, ALNUM, CNTRL, BLANK, U_SUP, U_SUB, U_HIR, U_KAT, U_CJK, U_HAN}; 267913cc679Smrg int i; 268913cc679Smrg 269913cc679Smrg printf("\n"); 270913cc679Smrg printf("Unicode charClass data uses the last match\n"); 271913cc679Smrg printf("from these overlapping intervals of character codes:\n"); 272913cc679Smrg for (i = classtab[0].first; i <= classtab[0].last; i++) { 273913cc679Smrg printf("\tU+%04X .. U+%04X %s\n", 274913cc679Smrg classtab[i].first, 275913cc679Smrg classtab[i].last, 276f2e35a3aSmrg class_name((Classes) classtab[i].cclass)); 277913cc679Smrg } 278913cc679Smrg printf("\n"); 279913cc679Smrg printf("These class-names are used internally (the first character code in a class):\n"); 280913cc679Smrg for (i = 0; i < (int) XtNumber(known_classes); ++i) { 281913cc679Smrg printf("\t"); 282913cc679Smrg printf(charFormat(known_classes[i]), known_classes[i]); 283913cc679Smrg printf(" = %s\n", class_name(known_classes[i])); 284913cc679Smrg } 285913cc679Smrg} 286913cc679Smrg#endif /* OPT_REPORT_CCLASS */ 287913cc679Smrg 288d522f475Smrg#ifdef NO_LEAKS 289d522f475Smrgvoid 290d522f475Smrgnoleaks_CharacterClass(void) 291d522f475Smrg{ 292f2e35a3aSmrg FreeAndNull(classtab); 293f2e35a3aSmrg} 294f2e35a3aSmrg#endif 295f2e35a3aSmrg#endif /* OPT_WIDE_CHARS */ 296f2e35a3aSmrg 297f2e35a3aSmrg#ifdef TEST_DRIVER 298f2e35a3aSmrg#if OPT_WIDE_CHARS 299f2e35a3aSmrgstatic void 300f2e35a3aSmrgusage(void) 301f2e35a3aSmrg{ 302f2e35a3aSmrg static const char *msg[] = 303f2e35a3aSmrg { 304f2e35a3aSmrg "Usage: test_charclass [options] [c1[-c1b] [c2-[c2b] [...]]]", 305f2e35a3aSmrg "", 306f2e35a3aSmrg "Options:", 307f2e35a3aSmrg " -a show all data", 308f2e35a3aSmrg " -s show only summary", 309f2e35a3aSmrg " -v verbose" 310f2e35a3aSmrg }; 311f2e35a3aSmrg size_t n; 312f2e35a3aSmrg for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) { 313f2e35a3aSmrg fprintf(stderr, "%s\n", msg[n]); 314f2e35a3aSmrg } 315f2e35a3aSmrg exit(EXIT_FAILURE); 316f2e35a3aSmrg} 317f2e35a3aSmrg 318f2e35a3aSmrgstatic int 319f2e35a3aSmrgexpected_class(int wch) 320f2e35a3aSmrg{ 321f2e35a3aSmrg int result = wch; 322f2e35a3aSmrg wint_t ch = (wint_t) wch; 323f2e35a3aSmrg if (ch == '\0' || ch == '\t') { 324f2e35a3aSmrg result = BLANK; 325f2e35a3aSmrg } else if (iswcntrl(ch)) { 326f2e35a3aSmrg result = CNTRL; 327f2e35a3aSmrg } else if (iswspace(ch)) { 328f2e35a3aSmrg result = BLANK; 329f2e35a3aSmrg } else if (ch < 127) { 330f2e35a3aSmrg if (isalnum(ch) || ch == '_') { 331f2e35a3aSmrg result = ALNUM; 332f2e35a3aSmrg } 333f2e35a3aSmrg } else if (ch == 170 || ch == 181 || ch == 186) { 334f2e35a3aSmrg ; 335f2e35a3aSmrg } else if (iswalnum(ch)) { 336f2e35a3aSmrg result = ALNUM; 337f2e35a3aSmrg } 338f2e35a3aSmrg return result; 339f2e35a3aSmrg} 340f2e35a3aSmrg 341f2e35a3aSmrgstatic int 342f2e35a3aSmrgshow_cclass_range(int lo, int hi) 343f2e35a3aSmrg{ 344f2e35a3aSmrg int cclass = CharacterClass(lo); 345f2e35a3aSmrg int ident = (cclass == lo); 346f2e35a3aSmrg int more = 0; 347f2e35a3aSmrg if (ident) { 348f2e35a3aSmrg int ch; 349f2e35a3aSmrg for (ch = lo + 1; ch <= hi; ch++) { 350f2e35a3aSmrg if (CharacterClass(ch) != ch) { 351f2e35a3aSmrg ident = 0; 352f2e35a3aSmrg break; 353f2e35a3aSmrg } 354f2e35a3aSmrg } 355f2e35a3aSmrg if (ident && (hi < 255)) { 356f2e35a3aSmrg ch = hi + 1; 357f2e35a3aSmrg if (CharacterClass(ch) == ch) { 358f2e35a3aSmrg if (ch >= 255 || CharacterClass(ch + 1) != ch) { 359f2e35a3aSmrg more = 1; 360f2e35a3aSmrg } 361f2e35a3aSmrg } 362f2e35a3aSmrg } 363d522f475Smrg } 364f2e35a3aSmrg if (!more) { 365f2e35a3aSmrg if (lo == hi) { 366f2e35a3aSmrg printf("\t%d", lo); 367f2e35a3aSmrg } else { 368f2e35a3aSmrg printf("\t%d-%d", lo, hi); 369f2e35a3aSmrg } 370f2e35a3aSmrg if (!ident) 371f2e35a3aSmrg printf(":%d", cclass); 372f2e35a3aSmrg if (hi < 255) 373f2e35a3aSmrg printf(", \\"); 374f2e35a3aSmrg printf("\n"); 375f2e35a3aSmrg } 376f2e35a3aSmrg return !more; 377f2e35a3aSmrg} 378f2e35a3aSmrg 379f2e35a3aSmrgstatic void 380f2e35a3aSmrgreport_resource(int first, int last) 381f2e35a3aSmrg{ 382f2e35a3aSmrg int class_p; 383f2e35a3aSmrg int ch; 384f2e35a3aSmrg int dh; 385f2e35a3aSmrg 386f2e35a3aSmrg class_p = CharacterClass(dh = first); 387f2e35a3aSmrg for (ch = first; ch < last; ++ch) { 388f2e35a3aSmrg int class_c = CharacterClass(ch); 389f2e35a3aSmrg if (class_c != class_p) { 390f2e35a3aSmrg if (show_cclass_range(dh, ch - 1)) { 391f2e35a3aSmrg dh = ch; 392f2e35a3aSmrg class_p = class_c; 393f2e35a3aSmrg } 394f2e35a3aSmrg } 395f2e35a3aSmrg } 396f2e35a3aSmrg if (dh < last - 1) { 397f2e35a3aSmrg show_cclass_range(dh, last - 1); 398f2e35a3aSmrg } 399f2e35a3aSmrg} 400f2e35a3aSmrg 401f2e35a3aSmrgstatic int 402f2e35a3aSmrgdecode_one(const char *source, char **target) 403f2e35a3aSmrg{ 404f2e35a3aSmrg int result = -1; 405f2e35a3aSmrg long check; 406f2e35a3aSmrg int radix = 0; 407f2e35a3aSmrg if ((source[0] == 'u' || source[0] == 'U') && source[1] == '+') { 408f2e35a3aSmrg source += 2; 409f2e35a3aSmrg radix = 16; 410f2e35a3aSmrg } 411f2e35a3aSmrg check = strtol(source, target, radix); 412f2e35a3aSmrg if (*target != NULL && *target != source) 413f2e35a3aSmrg result = (int) check; 414f2e35a3aSmrg return result; 415d522f475Smrg} 416f2e35a3aSmrg 417f2e35a3aSmrgstatic int 418f2e35a3aSmrgdecode_range(const char *source, int *lo, int *hi) 419f2e35a3aSmrg{ 420f2e35a3aSmrg int result = 0; 421f2e35a3aSmrg char *after1; 422f2e35a3aSmrg char *after2; 423f2e35a3aSmrg if ((*lo = decode_one(source, &after1)) >= 0) { 424f2e35a3aSmrg after1 += strspn(after1, ":-.\t "); 425f2e35a3aSmrg if ((*hi = decode_one(after1, &after2)) < 0) { 426f2e35a3aSmrg *hi = *lo; 427f2e35a3aSmrg } 428f2e35a3aSmrg result = 1; 429f2e35a3aSmrg } 430f2e35a3aSmrg return result; 431f2e35a3aSmrg} 432f2e35a3aSmrg 433f2e35a3aSmrgstatic void 434f2e35a3aSmrgdo_range(const char *source) 435f2e35a3aSmrg{ 436f2e35a3aSmrg int lo, hi; 437f2e35a3aSmrg if (decode_range(source, &lo, &hi)) { 438f2e35a3aSmrg if (opt_all) { 439f2e35a3aSmrg while (lo <= hi) { 440f2e35a3aSmrg int other_rc = CharacterClass(lo); 441f2e35a3aSmrg if (!opt_quiet) 442f2e35a3aSmrg printf("U+%04X\t%s\n", lo, class_name(other_rc)); 443f2e35a3aSmrg ++lo; 444f2e35a3aSmrg } 445f2e35a3aSmrg } else if (opt_check) { 446f2e35a3aSmrg while (lo <= hi) { 447f2e35a3aSmrg int expect = expected_class(lo); 448f2e35a3aSmrg int actual = CharacterClass(lo); 449f2e35a3aSmrg if (actual != expect) 450f2e35a3aSmrg printf("U+%04X\t%s ->%s\n", lo, 451f2e35a3aSmrg class_name(expect), 452f2e35a3aSmrg class_name(actual)); 453f2e35a3aSmrg ++lo; 454f2e35a3aSmrg } 455f2e35a3aSmrg } else { 456f2e35a3aSmrg printf("\"charClass\" resource for [%d..%d]:\n", lo, hi); 457f2e35a3aSmrg report_resource(lo, hi + 1); 458f2e35a3aSmrg } 459f2e35a3aSmrg } 460f2e35a3aSmrg} 461f2e35a3aSmrg#endif /* OPT_WIDE_CHARS */ 462f2e35a3aSmrg 463f2e35a3aSmrg/* 464f2e35a3aSmrg * TODO: add option to show do_range in hex 465f2e35a3aSmrg */ 466f2e35a3aSmrgint 467f2e35a3aSmrgmain(int argc, char **argv ENVP_ARG) 468f2e35a3aSmrg{ 469f2e35a3aSmrg#if OPT_WIDE_CHARS 470f2e35a3aSmrg int ch; 471d522f475Smrg#endif 472d522f475Smrg 473f2e35a3aSmrg (void) argc; 474f2e35a3aSmrg (void) argv; 475f2e35a3aSmrg 476f2e35a3aSmrg#if OPT_WIDE_CHARS 477f2e35a3aSmrg setlocale(LC_ALL, ""); 478f2e35a3aSmrg while ((ch = getopt(argc, argv, "acsv")) != -1) { 479f2e35a3aSmrg switch (ch) { 480f2e35a3aSmrg case 'a': 481f2e35a3aSmrg opt_all = 1; 482f2e35a3aSmrg break; 483f2e35a3aSmrg case 'c': 484f2e35a3aSmrg opt_check = 1; 485f2e35a3aSmrg break; 486f2e35a3aSmrg case 's': 487f2e35a3aSmrg opt_quiet = 1; 488f2e35a3aSmrg break; 489f2e35a3aSmrg case 'v': 490f2e35a3aSmrg opt_v = 1; 491f2e35a3aSmrg break; 492f2e35a3aSmrg default: 493f2e35a3aSmrg usage(); 494f2e35a3aSmrg } 495f2e35a3aSmrg } 496f2e35a3aSmrg init_classtab(); 497f2e35a3aSmrg 498f2e35a3aSmrg if (optind >= argc) { 499f2e35a3aSmrg do_range("0-255"); 500f2e35a3aSmrg } else { 501f2e35a3aSmrg while (optind < argc) { 502f2e35a3aSmrg do_range(argv[optind++]); 503f2e35a3aSmrg } 504f2e35a3aSmrg } 505f2e35a3aSmrg report_wide_char_class(); 506f2e35a3aSmrg#else 507f2e35a3aSmrg printf("wide-character support is not configured\n"); 508d522f475Smrg#endif /* OPT_WIDE_CHARS */ 509f2e35a3aSmrg return 0; 510f2e35a3aSmrg} 511f2e35a3aSmrg#endif /* TEST_DRIVER */ 512