104b94745Smrg/* $XTermId: charclass.c,v 1.50 2023/04/01 00:11:47 tom Exp $ */ 2d522f475Smrg 3d522f475Smrg/* 45307cd1aSmrg * Copyright 2002-2022,2023 by Thomas E. Dickey 5f2e35a3aSmrg * 6f2e35a3aSmrg * All Rights Reserved 7f2e35a3aSmrg * 8f2e35a3aSmrg * Permission is hereby granted, free of charge, to any person obtaining a 9f2e35a3aSmrg * copy of this software and associated documentation files (the 10f2e35a3aSmrg * "Software"), to deal in the Software without restriction, including 11f2e35a3aSmrg * without limitation the rights to use, copy, modify, merge, publish, 12f2e35a3aSmrg * distribute, sublicense, and/or sell copies of the Software, and to 13f2e35a3aSmrg * permit persons to whom the Software is furnished to do so, subject to 14f2e35a3aSmrg * the following conditions: 15f2e35a3aSmrg * 16f2e35a3aSmrg * The above copyright notice and this permission notice shall be included 17f2e35a3aSmrg * in all copies or substantial portions of the Software. 18f2e35a3aSmrg * 19f2e35a3aSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20f2e35a3aSmrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21f2e35a3aSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22f2e35a3aSmrg * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 23f2e35a3aSmrg * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24f2e35a3aSmrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25f2e35a3aSmrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26f2e35a3aSmrg * 27f2e35a3aSmrg * Except as contained in this notice, the name(s) of the above copyright 28f2e35a3aSmrg * holders shall not be used in advertising or otherwise to promote the 29f2e35a3aSmrg * sale, use or other dealings in this Software without prior written 30f2e35a3aSmrg * authorization. 31f2e35a3aSmrg * 32f2e35a3aSmrg *---------------------------------------------------------------------------- 33d522f475Smrg * Compact and efficient reimplementation of the 34d522f475Smrg * xterm character class mechanism for large character sets 35d522f475Smrg * 36d522f475Smrg * Markus Kuhn -- mkuhn@acm.org -- 2000-07-03 37d522f475Smrg * 38894e0ac8Smrg * xterm allows users to select entire words with a double-click on the left 39d522f475Smrg * mouse button. Opinions might differ on what type of characters are part of 40d522f475Smrg * separate words, therefore xterm allows users to configure a class code for 41d522f475Smrg * each 8-bit character. Words are maximum length sequences of neighboring 42d522f475Smrg * characters with identical class code. Extending this mechanism to Unicode 43d522f475Smrg * naively would create an at least 2^16 entries (128 kB) long class code 44d522f475Smrg * table. 45d522f475Smrg * 46d522f475Smrg * Instead, we transform the character class table into a list of intervals, 47d522f475Smrg * that will be accessed via a linear search. Changes made to the table by the 48d522f475Smrg * user will be appended. A special class code IDENT (default) marks 49d522f475Smrg * characters who have their code number as the class code. 50d522f475Smrg * 51d522f475Smrg * We could alternatively use a sorted table of non-overlapping intervals that 52d522f475Smrg * can be accessed via binary search, but merging in new intervals is 53d522f475Smrg * significantly more hassle and not worth the effort here. 54d522f475Smrg */ 55d522f475Smrg 56d522f475Smrg#include <xterm.h> 57d522f475Smrg#include <charclass.h> 58d522f475Smrg 59d522f475Smrg#if OPT_WIDE_CHARS 60d522f475Smrg 61f2e35a3aSmrg#ifdef TEST_DRIVER 62f2e35a3aSmrg 63f2e35a3aSmrg#include <ctype.h> 64f2e35a3aSmrg#include <wchar.h> 65f2e35a3aSmrg#include <wctype.h> 66f2e35a3aSmrg 67f2e35a3aSmrg#if OPT_TRACE 68f2e35a3aSmrg#define Trace if (opt_v) printf 69f2e35a3aSmrg#endif 70f2e35a3aSmrg 71f2e35a3aSmrg#undef OPT_REPORT_CCLASS 72f2e35a3aSmrg#define OPT_REPORT_CCLASS 1 73f2e35a3aSmrg#endif /* TEST_DRIVER */ 74f2e35a3aSmrg 75d522f475Smrgstatic struct classentry { 76d522f475Smrg int cclass; 77d522f475Smrg int first; 78d522f475Smrg int last; 79d522f475Smrg} *classtab; 80d522f475Smrg 81f2e35a3aSmrg#ifdef TEST_DRIVER 82f2e35a3aSmrgstatic int opt_all; 83f2e35a3aSmrgstatic int opt_check; 84f2e35a3aSmrgstatic int opt_quiet; 85f2e35a3aSmrgstatic int opt_v; 86f2e35a3aSmrg#endif 87f2e35a3aSmrg 88d522f475Smrgvoid 89d522f475Smrginit_classtab(void) 90d522f475Smrg{ 91d522f475Smrg const int size = 50; 92d522f475Smrg 93f2e35a3aSmrg TRACE(("init_classtab " TRACE_L "\n")); 94913cc679Smrg 9520d2c4d2Smrg classtab = TypeMallocN(struct classentry, (unsigned) size); 96d522f475Smrg if (!classtab) 97d522f475Smrg abort(); 98d522f475Smrg classtab[0].cclass = size; 99d522f475Smrg classtab[0].first = 1; 100d522f475Smrg classtab[0].last = 0; 101d522f475Smrg 102d522f475Smrg /* old xterm default classes */ 103d522f475Smrg SetCharacterClassRange(0, 0, BLANK); 104d522f475Smrg SetCharacterClassRange(1, 31, CNTRL); 105d522f475Smrg SetCharacterClassRange('\t', '\t', BLANK); 106d522f475Smrg SetCharacterClassRange('0', '9', ALNUM); 107d522f475Smrg SetCharacterClassRange('A', 'Z', ALNUM); 108d522f475Smrg SetCharacterClassRange('_', '_', ALNUM); 109d522f475Smrg SetCharacterClassRange('a', 'z', ALNUM); 110d522f475Smrg SetCharacterClassRange(127, 159, CNTRL); 111d522f475Smrg SetCharacterClassRange(160, 191, IDENT); 112d522f475Smrg SetCharacterClassRange(192, 255, ALNUM); 113d522f475Smrg SetCharacterClassRange(215, 215, IDENT); 114d522f475Smrg SetCharacterClassRange(247, 247, IDENT); 115d522f475Smrg 116d522f475Smrg /* added Unicode classes */ 117d522f475Smrg SetCharacterClassRange(0x0100, 0xffdf, ALNUM); /* mostly characters */ 118d522f475Smrg SetCharacterClassRange(0x037e, 0x037e, IDENT); /* Greek question mark */ 119d522f475Smrg SetCharacterClassRange(0x0387, 0x0387, IDENT); /* Greek ano teleia */ 120d522f475Smrg SetCharacterClassRange(0x055a, 0x055f, IDENT); /* Armenian punctuation */ 121d522f475Smrg SetCharacterClassRange(0x0589, 0x0589, IDENT); /* Armenian full stop */ 122d522f475Smrg SetCharacterClassRange(0x0700, 0x070d, IDENT); /* Syriac punctuation */ 123d522f475Smrg SetCharacterClassRange(0x104a, 0x104f, IDENT); /* Myanmar punctuation */ 124d522f475Smrg SetCharacterClassRange(0x10fb, 0x10fb, IDENT); /* Georgian punctuation */ 125d522f475Smrg SetCharacterClassRange(0x1361, 0x1368, IDENT); /* Ethiopic punctuation */ 126d522f475Smrg SetCharacterClassRange(0x166d, 0x166e, IDENT); /* Canadian Syl. punctuation */ 127d522f475Smrg SetCharacterClassRange(0x17d4, 0x17dc, IDENT); /* Khmer punctuation */ 128d522f475Smrg SetCharacterClassRange(0x1800, 0x180a, IDENT); /* Mongolian punctuation */ 129d522f475Smrg SetCharacterClassRange(0x2000, 0x200a, BLANK); /* spaces */ 1305307cd1aSmrg SetCharacterClassRange(0x200b, 0x200f, CNTRL); /* formatting */ 1315307cd1aSmrg SetCharacterClassRange(0x2010, 0x27ff, IDENT); /* punctuation and symbols */ 1325307cd1aSmrg SetCharacterClassRange(0x202a, 0x202e, CNTRL); /* formatting */ 1335307cd1aSmrg SetCharacterClassRange(0x2060, 0x206f, CNTRL); /* formatting */ 134913cc679Smrg SetCharacterClassRange(0x2070, 0x207f, U_SUP); /* superscript */ 135913cc679Smrg SetCharacterClassRange(0x2080, 0x208f, U_SUB); /* subscript */ 136d522f475Smrg SetCharacterClassRange(0x3000, 0x3000, BLANK); /* ideographic space */ 137d522f475Smrg SetCharacterClassRange(0x3001, 0x3020, IDENT); /* ideographic punctuation */ 138913cc679Smrg SetCharacterClassRange(0x3040, 0x309f, U_HIR); /* Hiragana */ 139913cc679Smrg SetCharacterClassRange(0x30a0, 0x30ff, U_KAT); /* Katakana */ 140913cc679Smrg SetCharacterClassRange(0x3300, 0x9fff, U_CJK); /* CJK Ideographs */ 141913cc679Smrg SetCharacterClassRange(0xac00, 0xd7a3, U_HAN); /* Hangul Syllables */ 142913cc679Smrg SetCharacterClassRange(0xf900, 0xfaff, U_CJK); /* CJK Ideographs */ 143d522f475Smrg SetCharacterClassRange(0xfe30, 0xfe6b, IDENT); /* punctuation forms */ 1445307cd1aSmrg SetCharacterClassRange(0xfeff, 0xfeff, CNTRL); /* formatting */ 145d522f475Smrg SetCharacterClassRange(0xff00, 0xff0f, IDENT); /* half/fullwidth ASCII */ 146d522f475Smrg SetCharacterClassRange(0xff1a, 0xff20, IDENT); /* half/fullwidth ASCII */ 147d522f475Smrg SetCharacterClassRange(0xff3b, 0xff40, IDENT); /* half/fullwidth ASCII */ 148d522f475Smrg SetCharacterClassRange(0xff5b, 0xff64, IDENT); /* half/fullwidth ASCII */ 1495307cd1aSmrg SetCharacterClassRange(0xfff9, 0xfffb, CNTRL); /* formatting */ 150d522f475Smrg 151f2e35a3aSmrg TRACE((TRACE_R " init_classtab\n")); 152d522f475Smrg return; 153d522f475Smrg} 154d522f475Smrg 155d522f475Smrgint 156d522f475SmrgCharacterClass(int c) 157d522f475Smrg{ 158d522f475Smrg int i, cclass = IDENT; 159d522f475Smrg 160d522f475Smrg for (i = classtab[0].first; i <= classtab[0].last; i++) 161d522f475Smrg if (classtab[i].first <= c && classtab[i].last >= c) 162d522f475Smrg cclass = classtab[i].cclass; 163d522f475Smrg 164d522f475Smrg if (cclass < 0) 165d522f475Smrg cclass = c; 166d522f475Smrg 167d522f475Smrg return cclass; 168d522f475Smrg} 169d522f475Smrg 170913cc679Smrg#if OPT_REPORT_CCLASS 171913cc679Smrg#define charFormat(code) ((code) > 255 ? "0x%04X" : "%d") 172913cc679Smrgstatic const char * 173913cc679Smrgclass_name(Classes code) 174913cc679Smrg{ 175913cc679Smrg static char buffer[80]; 176913cc679Smrg const char *result = "?"; 177913cc679Smrg switch (code) { 178913cc679Smrg case ALNUM: 179913cc679Smrg result = "ALNUM"; 180913cc679Smrg break; 181f2e35a3aSmrg case BLANK: 182f2e35a3aSmrg result = "BLANK"; 183f2e35a3aSmrg break; 184913cc679Smrg case CNTRL: 185913cc679Smrg result = "CNTRL"; 186913cc679Smrg break; 187f2e35a3aSmrg case OTHER: 188f2e35a3aSmrg result = "OTHER"; 189f2e35a3aSmrg break; 190f2e35a3aSmrg case IDENT: 191f2e35a3aSmrg result = "IDENT"; 192913cc679Smrg break; 193913cc679Smrg case U_SUP: 194913cc679Smrg result = "superscript"; 195913cc679Smrg break; 196913cc679Smrg case U_SUB: 197913cc679Smrg result = "subscript"; 198913cc679Smrg break; 199913cc679Smrg case U_CJK: 200913cc679Smrg result = "CJK Ideographs"; 201913cc679Smrg break; 202913cc679Smrg case U_HIR: 203913cc679Smrg result = "Hiragana"; 204913cc679Smrg break; 205913cc679Smrg case U_KAT: 206913cc679Smrg result = "Katakana"; 207913cc679Smrg break; 208913cc679Smrg case U_HAN: 209913cc679Smrg result = "Hangul Syllables"; 210913cc679Smrg break; 211913cc679Smrg default: 212913cc679Smrg sprintf(buffer, charFormat(code), code); 213913cc679Smrg result = buffer; 214913cc679Smrg break; 215913cc679Smrg } 216913cc679Smrg return result; 217913cc679Smrg} 218913cc679Smrg 219f2e35a3aSmrg/* 220f2e35a3aSmrg * Special convention for classtab[0]: 221f2e35a3aSmrg * - classtab[0].cclass is the allocated number of entries in classtab 222f2e35a3aSmrg * - classtab[0].first = 1 (first used entry in classtab) 223f2e35a3aSmrg * - classtab[0].last is the last used entry in classtab 224f2e35a3aSmrg */ 225f2e35a3aSmrg 226f2e35a3aSmrgint 227f2e35a3aSmrgSetCharacterClassRange(int low, int high, int value) 228f2e35a3aSmrg{ 229f2e35a3aSmrg TRACE(("...SetCharacterClassRange (U+%04X .. U+%04X) = %s\n", 230f2e35a3aSmrg low, high, class_name(value))); 231f2e35a3aSmrg 232f2e35a3aSmrg if (high < low) 233f2e35a3aSmrg return -1; /* nothing to do */ 234f2e35a3aSmrg 235f2e35a3aSmrg /* make sure we have at least one free entry left at table end */ 236f2e35a3aSmrg if (classtab[0].last > classtab[0].cclass - 2) { 237f2e35a3aSmrg classtab[0].cclass += 5 + classtab[0].cclass / 4; 238f2e35a3aSmrg classtab = TypeRealloc(struct classentry, 239f2e35a3aSmrg (unsigned) classtab[0].cclass, classtab); 240f2e35a3aSmrg if (!classtab) 241f2e35a3aSmrg abort(); 242f2e35a3aSmrg } 243f2e35a3aSmrg 244f2e35a3aSmrg /* simply append new interval to end of interval array */ 245f2e35a3aSmrg classtab[0].last++; 246f2e35a3aSmrg classtab[classtab[0].last].first = low; 247f2e35a3aSmrg classtab[classtab[0].last].last = high; 248f2e35a3aSmrg classtab[classtab[0].last].cclass = value; 249f2e35a3aSmrg 250f2e35a3aSmrg return 0; 251f2e35a3aSmrg} 252f2e35a3aSmrg 253913cc679Smrgvoid 254913cc679Smrgreport_wide_char_class(void) 255913cc679Smrg{ 256913cc679Smrg static const Classes known_classes[] = 257913cc679Smrg {IDENT, ALNUM, CNTRL, BLANK, U_SUP, U_SUB, U_HIR, U_KAT, U_CJK, U_HAN}; 258913cc679Smrg int i; 259913cc679Smrg 260913cc679Smrg printf("\n"); 261913cc679Smrg printf("Unicode charClass data uses the last match\n"); 262913cc679Smrg printf("from these overlapping intervals of character codes:\n"); 263913cc679Smrg for (i = classtab[0].first; i <= classtab[0].last; i++) { 264913cc679Smrg printf("\tU+%04X .. U+%04X %s\n", 2655307cd1aSmrg (unsigned) classtab[i].first, 2665307cd1aSmrg (unsigned) classtab[i].last, 267f2e35a3aSmrg class_name((Classes) classtab[i].cclass)); 268913cc679Smrg } 269913cc679Smrg printf("\n"); 270913cc679Smrg printf("These class-names are used internally (the first character code in a class):\n"); 271913cc679Smrg for (i = 0; i < (int) XtNumber(known_classes); ++i) { 272913cc679Smrg printf("\t"); 273913cc679Smrg printf(charFormat(known_classes[i]), known_classes[i]); 274913cc679Smrg printf(" = %s\n", class_name(known_classes[i])); 275913cc679Smrg } 276913cc679Smrg} 277913cc679Smrg#endif /* OPT_REPORT_CCLASS */ 278913cc679Smrg 279d522f475Smrg#ifdef NO_LEAKS 280d522f475Smrgvoid 281d522f475Smrgnoleaks_CharacterClass(void) 282d522f475Smrg{ 283f2e35a3aSmrg FreeAndNull(classtab); 284f2e35a3aSmrg} 285f2e35a3aSmrg#endif 286f2e35a3aSmrg#endif /* OPT_WIDE_CHARS */ 287f2e35a3aSmrg 288f2e35a3aSmrg#ifdef TEST_DRIVER 289f2e35a3aSmrg#if OPT_WIDE_CHARS 290f2e35a3aSmrgstatic void 291f2e35a3aSmrgusage(void) 292f2e35a3aSmrg{ 293f2e35a3aSmrg static const char *msg[] = 294f2e35a3aSmrg { 295f2e35a3aSmrg "Usage: test_charclass [options] [c1[-c1b] [c2-[c2b] [...]]]", 296f2e35a3aSmrg "", 297f2e35a3aSmrg "Options:", 298f2e35a3aSmrg " -a show all data", 299f2e35a3aSmrg " -s show only summary", 300f2e35a3aSmrg " -v verbose" 301f2e35a3aSmrg }; 302f2e35a3aSmrg size_t n; 303f2e35a3aSmrg for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) { 304f2e35a3aSmrg fprintf(stderr, "%s\n", msg[n]); 305f2e35a3aSmrg } 306f2e35a3aSmrg exit(EXIT_FAILURE); 307f2e35a3aSmrg} 308f2e35a3aSmrg 309f2e35a3aSmrgstatic int 310f2e35a3aSmrgexpected_class(int wch) 311f2e35a3aSmrg{ 312f2e35a3aSmrg int result = wch; 313f2e35a3aSmrg wint_t ch = (wint_t) wch; 3145307cd1aSmrg if (wch < 0 || ch == '\0' || ch == '\t') { 315f2e35a3aSmrg result = BLANK; 316f2e35a3aSmrg } else if (iswcntrl(ch)) { 317f2e35a3aSmrg result = CNTRL; 318f2e35a3aSmrg } else if (iswspace(ch)) { 319f2e35a3aSmrg result = BLANK; 320f2e35a3aSmrg } else if (ch < 127) { 321f2e35a3aSmrg if (isalnum(ch) || ch == '_') { 322f2e35a3aSmrg result = ALNUM; 323f2e35a3aSmrg } 324f2e35a3aSmrg } else if (ch == 170 || ch == 181 || ch == 186) { 325f2e35a3aSmrg ; 326f2e35a3aSmrg } else if (iswalnum(ch)) { 327f2e35a3aSmrg result = ALNUM; 328f2e35a3aSmrg } 329f2e35a3aSmrg return result; 330f2e35a3aSmrg} 331f2e35a3aSmrg 332f2e35a3aSmrgstatic int 333f2e35a3aSmrgshow_cclass_range(int lo, int hi) 334f2e35a3aSmrg{ 335f2e35a3aSmrg int cclass = CharacterClass(lo); 336f2e35a3aSmrg int ident = (cclass == lo); 337f2e35a3aSmrg int more = 0; 338f2e35a3aSmrg if (ident) { 339f2e35a3aSmrg int ch; 340f2e35a3aSmrg for (ch = lo + 1; ch <= hi; ch++) { 341f2e35a3aSmrg if (CharacterClass(ch) != ch) { 342f2e35a3aSmrg ident = 0; 343f2e35a3aSmrg break; 344f2e35a3aSmrg } 345f2e35a3aSmrg } 346f2e35a3aSmrg if (ident && (hi < 255)) { 347f2e35a3aSmrg ch = hi + 1; 348f2e35a3aSmrg if (CharacterClass(ch) == ch) { 349f2e35a3aSmrg if (ch >= 255 || CharacterClass(ch + 1) != ch) { 350f2e35a3aSmrg more = 1; 351f2e35a3aSmrg } 352f2e35a3aSmrg } 353f2e35a3aSmrg } 354d522f475Smrg } 355f2e35a3aSmrg if (!more) { 356f2e35a3aSmrg if (lo == hi) { 357f2e35a3aSmrg printf("\t%d", lo); 358f2e35a3aSmrg } else { 359f2e35a3aSmrg printf("\t%d-%d", lo, hi); 360f2e35a3aSmrg } 361f2e35a3aSmrg if (!ident) 362f2e35a3aSmrg printf(":%d", cclass); 363f2e35a3aSmrg if (hi < 255) 364f2e35a3aSmrg printf(", \\"); 365f2e35a3aSmrg printf("\n"); 366f2e35a3aSmrg } 367f2e35a3aSmrg return !more; 368f2e35a3aSmrg} 369f2e35a3aSmrg 370f2e35a3aSmrgstatic void 371f2e35a3aSmrgreport_resource(int first, int last) 372f2e35a3aSmrg{ 373f2e35a3aSmrg int class_p; 374f2e35a3aSmrg int ch; 375f2e35a3aSmrg int dh; 376f2e35a3aSmrg 377f2e35a3aSmrg class_p = CharacterClass(dh = first); 378f2e35a3aSmrg for (ch = first; ch < last; ++ch) { 379f2e35a3aSmrg int class_c = CharacterClass(ch); 380f2e35a3aSmrg if (class_c != class_p) { 381f2e35a3aSmrg if (show_cclass_range(dh, ch - 1)) { 382f2e35a3aSmrg dh = ch; 383f2e35a3aSmrg class_p = class_c; 384f2e35a3aSmrg } 385f2e35a3aSmrg } 386f2e35a3aSmrg } 387f2e35a3aSmrg if (dh < last - 1) { 388f2e35a3aSmrg show_cclass_range(dh, last - 1); 389f2e35a3aSmrg } 390f2e35a3aSmrg} 391f2e35a3aSmrg 392f2e35a3aSmrgstatic int 393f2e35a3aSmrgdecode_one(const char *source, char **target) 394f2e35a3aSmrg{ 395f2e35a3aSmrg int result = -1; 396f2e35a3aSmrg long check; 397f2e35a3aSmrg int radix = 0; 398f2e35a3aSmrg if ((source[0] == 'u' || source[0] == 'U') && source[1] == '+') { 399f2e35a3aSmrg source += 2; 400f2e35a3aSmrg radix = 16; 401f2e35a3aSmrg } 402f2e35a3aSmrg check = strtol(source, target, radix); 403f2e35a3aSmrg if (*target != NULL && *target != source) 404f2e35a3aSmrg result = (int) check; 405f2e35a3aSmrg return result; 406d522f475Smrg} 407f2e35a3aSmrg 408f2e35a3aSmrgstatic int 409f2e35a3aSmrgdecode_range(const char *source, int *lo, int *hi) 410f2e35a3aSmrg{ 411f2e35a3aSmrg int result = 0; 412f2e35a3aSmrg char *after1; 413f2e35a3aSmrg char *after2; 414f2e35a3aSmrg if ((*lo = decode_one(source, &after1)) >= 0) { 415f2e35a3aSmrg after1 += strspn(after1, ":-.\t "); 416f2e35a3aSmrg if ((*hi = decode_one(after1, &after2)) < 0) { 417f2e35a3aSmrg *hi = *lo; 418f2e35a3aSmrg } 419f2e35a3aSmrg result = 1; 420f2e35a3aSmrg } 421f2e35a3aSmrg return result; 422f2e35a3aSmrg} 423f2e35a3aSmrg 424f2e35a3aSmrgstatic void 425f2e35a3aSmrgdo_range(const char *source) 426f2e35a3aSmrg{ 427f2e35a3aSmrg int lo, hi; 428f2e35a3aSmrg if (decode_range(source, &lo, &hi)) { 429f2e35a3aSmrg if (opt_all) { 430f2e35a3aSmrg while (lo <= hi) { 431f2e35a3aSmrg int other_rc = CharacterClass(lo); 432f2e35a3aSmrg if (!opt_quiet) 433f2e35a3aSmrg printf("U+%04X\t%s\n", lo, class_name(other_rc)); 434f2e35a3aSmrg ++lo; 435f2e35a3aSmrg } 436f2e35a3aSmrg } else if (opt_check) { 437f2e35a3aSmrg while (lo <= hi) { 438f2e35a3aSmrg int expect = expected_class(lo); 439f2e35a3aSmrg int actual = CharacterClass(lo); 440f2e35a3aSmrg if (actual != expect) 441f2e35a3aSmrg printf("U+%04X\t%s ->%s\n", lo, 442f2e35a3aSmrg class_name(expect), 443f2e35a3aSmrg class_name(actual)); 444f2e35a3aSmrg ++lo; 445f2e35a3aSmrg } 446f2e35a3aSmrg } else { 447f2e35a3aSmrg printf("\"charClass\" resource for [%d..%d]:\n", lo, hi); 448f2e35a3aSmrg report_resource(lo, hi + 1); 449f2e35a3aSmrg } 450f2e35a3aSmrg } 451f2e35a3aSmrg} 452f2e35a3aSmrg#endif /* OPT_WIDE_CHARS */ 453f2e35a3aSmrg 454f2e35a3aSmrg/* 455f2e35a3aSmrg * TODO: add option to show do_range in hex 456f2e35a3aSmrg */ 457f2e35a3aSmrgint 458f2e35a3aSmrgmain(int argc, char **argv ENVP_ARG) 459f2e35a3aSmrg{ 460f2e35a3aSmrg#if OPT_WIDE_CHARS 461f2e35a3aSmrg int ch; 462d522f475Smrg#endif 463d522f475Smrg 464f2e35a3aSmrg (void) argc; 465f2e35a3aSmrg (void) argv; 466f2e35a3aSmrg 467f2e35a3aSmrg#if OPT_WIDE_CHARS 468f2e35a3aSmrg setlocale(LC_ALL, ""); 469f2e35a3aSmrg while ((ch = getopt(argc, argv, "acsv")) != -1) { 470f2e35a3aSmrg switch (ch) { 471f2e35a3aSmrg case 'a': 472f2e35a3aSmrg opt_all = 1; 473f2e35a3aSmrg break; 474f2e35a3aSmrg case 'c': 475f2e35a3aSmrg opt_check = 1; 476f2e35a3aSmrg break; 477f2e35a3aSmrg case 's': 478f2e35a3aSmrg opt_quiet = 1; 479f2e35a3aSmrg break; 480f2e35a3aSmrg case 'v': 481f2e35a3aSmrg opt_v = 1; 482f2e35a3aSmrg break; 483f2e35a3aSmrg default: 484f2e35a3aSmrg usage(); 485f2e35a3aSmrg } 486f2e35a3aSmrg } 487f2e35a3aSmrg init_classtab(); 488f2e35a3aSmrg 489f2e35a3aSmrg if (optind >= argc) { 490f2e35a3aSmrg do_range("0-255"); 491f2e35a3aSmrg } else { 492f2e35a3aSmrg while (optind < argc) { 493f2e35a3aSmrg do_range(argv[optind++]); 494f2e35a3aSmrg } 495f2e35a3aSmrg } 496f2e35a3aSmrg report_wide_char_class(); 497f2e35a3aSmrg#else 498f2e35a3aSmrg printf("wide-character support is not configured\n"); 499d522f475Smrg#endif /* OPT_WIDE_CHARS */ 500f2e35a3aSmrg return 0; 501f2e35a3aSmrg} 502f2e35a3aSmrg#endif /* TEST_DRIVER */ 503