charclass.c revision f2e35a3a
1f2e35a3aSmrg/* $XTermId: charclass.c,v 1.44 2021/02/02 00:19:32 tom Exp $ */
2d522f475Smrg
3d522f475Smrg/*
4f2e35a3aSmrg * Copyright 2002-2020,2021 by Thomas E. Dickey
5f2e35a3aSmrg *
6f2e35a3aSmrg *                         All Rights Reserved
7f2e35a3aSmrg *
8f2e35a3aSmrg * Permission is hereby granted, free of charge, to any person obtaining a
9f2e35a3aSmrg * copy of this software and associated documentation files (the
10f2e35a3aSmrg * "Software"), to deal in the Software without restriction, including
11f2e35a3aSmrg * without limitation the rights to use, copy, modify, merge, publish,
12f2e35a3aSmrg * distribute, sublicense, and/or sell copies of the Software, and to
13f2e35a3aSmrg * permit persons to whom the Software is furnished to do so, subject to
14f2e35a3aSmrg * the following conditions:
15f2e35a3aSmrg *
16f2e35a3aSmrg * The above copyright notice and this permission notice shall be included
17f2e35a3aSmrg * in all copies or substantial portions of the Software.
18f2e35a3aSmrg *
19f2e35a3aSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20f2e35a3aSmrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21f2e35a3aSmrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22f2e35a3aSmrg * IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
23f2e35a3aSmrg * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24f2e35a3aSmrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25f2e35a3aSmrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26f2e35a3aSmrg *
27f2e35a3aSmrg * Except as contained in this notice, the name(s) of the above copyright
28f2e35a3aSmrg * holders shall not be used in advertising or otherwise to promote the
29f2e35a3aSmrg * sale, use or other dealings in this Software without prior written
30f2e35a3aSmrg * authorization.
31f2e35a3aSmrg *
32f2e35a3aSmrg *----------------------------------------------------------------------------
33d522f475Smrg * Compact and efficient reimplementation of the
34d522f475Smrg * xterm character class mechanism for large character sets
35d522f475Smrg *
36d522f475Smrg * Markus Kuhn -- mkuhn@acm.org -- 2000-07-03
37d522f475Smrg *
38894e0ac8Smrg * xterm allows users to select entire words with a double-click on the left
39d522f475Smrg * mouse button.  Opinions might differ on what type of characters are part of
40d522f475Smrg * separate words, therefore xterm allows users to configure a class code for
41d522f475Smrg * each 8-bit character.  Words are maximum length sequences of neighboring
42d522f475Smrg * characters with identical class code.  Extending this mechanism to Unicode
43d522f475Smrg * naively would create an at least 2^16 entries (128 kB) long class code
44d522f475Smrg * table.
45d522f475Smrg *
46d522f475Smrg * Instead, we transform the character class table into a list of intervals,
47d522f475Smrg * that will be accessed via a linear search.  Changes made to the table by the
48d522f475Smrg * user will be appended.  A special class code IDENT (default) marks
49d522f475Smrg * characters who have their code number as the class code.
50d522f475Smrg *
51d522f475Smrg * We could alternatively use a sorted table of non-overlapping intervals that
52d522f475Smrg * can be accessed via binary search, but merging in new intervals is
53d522f475Smrg * significantly more hassle and not worth the effort here.
54d522f475Smrg */
55d522f475Smrg
56d522f475Smrg#include <xterm.h>
57d522f475Smrg#include <charclass.h>
58d522f475Smrg
59d522f475Smrg#if OPT_WIDE_CHARS
60d522f475Smrg
61f2e35a3aSmrg#ifdef TEST_DRIVER
62f2e35a3aSmrg
63f2e35a3aSmrg#include <ctype.h>
64f2e35a3aSmrg#include <wchar.h>
65f2e35a3aSmrg#include <wctype.h>
66f2e35a3aSmrg
67f2e35a3aSmrg#if OPT_TRACE
68f2e35a3aSmrg#define Trace if (opt_v) printf
69f2e35a3aSmrg#endif
70f2e35a3aSmrg
71f2e35a3aSmrg#undef OPT_REPORT_CCLASS
72f2e35a3aSmrg#define OPT_REPORT_CCLASS 1
73f2e35a3aSmrg#endif /* TEST_DRIVER */
74f2e35a3aSmrg
75d522f475Smrgstatic struct classentry {
76d522f475Smrg    int cclass;
77d522f475Smrg    int first;
78d522f475Smrg    int last;
79d522f475Smrg} *classtab;
80d522f475Smrg
81d522f475Smrgtypedef enum {
82d522f475Smrg    IDENT = -1,
83f2e35a3aSmrg    OTHER = 0,
84d522f475Smrg    CNTRL = 1,
85f2e35a3aSmrg    ALNUM = 48,
86913cc679Smrg    BLANK = 32,
87913cc679Smrg    U_CJK = 0x4e00,
88913cc679Smrg    U_SUP = 0x2070,
89913cc679Smrg    U_SUB = 0x2080,
90913cc679Smrg    U_HIR = 0x3040,
91913cc679Smrg    U_KAT = 0x30a0,
92913cc679Smrg    U_HAN = 0xac00
93d522f475Smrg} Classes;
94d522f475Smrg
95f2e35a3aSmrg#ifdef TEST_DRIVER
96f2e35a3aSmrgstatic int opt_all;
97f2e35a3aSmrgstatic int opt_check;
98f2e35a3aSmrgstatic int opt_quiet;
99f2e35a3aSmrgstatic int opt_v;
100f2e35a3aSmrg#endif
101f2e35a3aSmrg
102d522f475Smrgvoid
103d522f475Smrginit_classtab(void)
104d522f475Smrg{
105d522f475Smrg    const int size = 50;
106d522f475Smrg
107f2e35a3aSmrg    TRACE(("init_classtab " TRACE_L "\n"));
108913cc679Smrg
10920d2c4d2Smrg    classtab = TypeMallocN(struct classentry, (unsigned) size);
110d522f475Smrg    if (!classtab)
111d522f475Smrg	abort();
112d522f475Smrg    classtab[0].cclass = size;
113d522f475Smrg    classtab[0].first = 1;
114d522f475Smrg    classtab[0].last = 0;
115d522f475Smrg
116d522f475Smrg    /* old xterm default classes */
117d522f475Smrg    SetCharacterClassRange(0, 0, BLANK);
118d522f475Smrg    SetCharacterClassRange(1, 31, CNTRL);
119d522f475Smrg    SetCharacterClassRange('\t', '\t', BLANK);
120d522f475Smrg    SetCharacterClassRange('0', '9', ALNUM);
121d522f475Smrg    SetCharacterClassRange('A', 'Z', ALNUM);
122d522f475Smrg    SetCharacterClassRange('_', '_', ALNUM);
123d522f475Smrg    SetCharacterClassRange('a', 'z', ALNUM);
124d522f475Smrg    SetCharacterClassRange(127, 159, CNTRL);
125d522f475Smrg    SetCharacterClassRange(160, 191, IDENT);
126d522f475Smrg    SetCharacterClassRange(192, 255, ALNUM);
127d522f475Smrg    SetCharacterClassRange(215, 215, IDENT);
128d522f475Smrg    SetCharacterClassRange(247, 247, IDENT);
129d522f475Smrg
130d522f475Smrg    /* added Unicode classes */
131d522f475Smrg    SetCharacterClassRange(0x0100, 0xffdf, ALNUM);	/* mostly characters */
132d522f475Smrg    SetCharacterClassRange(0x037e, 0x037e, IDENT);	/* Greek question mark */
133d522f475Smrg    SetCharacterClassRange(0x0387, 0x0387, IDENT);	/* Greek ano teleia */
134d522f475Smrg    SetCharacterClassRange(0x055a, 0x055f, IDENT);	/* Armenian punctuation */
135d522f475Smrg    SetCharacterClassRange(0x0589, 0x0589, IDENT);	/* Armenian full stop */
136d522f475Smrg    SetCharacterClassRange(0x0700, 0x070d, IDENT);	/* Syriac punctuation */
137d522f475Smrg    SetCharacterClassRange(0x104a, 0x104f, IDENT);	/* Myanmar punctuation */
138d522f475Smrg    SetCharacterClassRange(0x10fb, 0x10fb, IDENT);	/* Georgian punctuation */
139d522f475Smrg    SetCharacterClassRange(0x1361, 0x1368, IDENT);	/* Ethiopic punctuation */
140d522f475Smrg    SetCharacterClassRange(0x166d, 0x166e, IDENT);	/* Canadian Syl. punctuation */
141d522f475Smrg    SetCharacterClassRange(0x17d4, 0x17dc, IDENT);	/* Khmer punctuation */
142d522f475Smrg    SetCharacterClassRange(0x1800, 0x180a, IDENT);	/* Mongolian punctuation */
143d522f475Smrg    SetCharacterClassRange(0x2000, 0x200a, BLANK);	/* spaces */
144d522f475Smrg    SetCharacterClassRange(0x200b, 0x27ff, IDENT);	/* punctuation and symbols */
145913cc679Smrg    SetCharacterClassRange(0x2070, 0x207f, U_SUP);	/* superscript */
146913cc679Smrg    SetCharacterClassRange(0x2080, 0x208f, U_SUB);	/* subscript */
147d522f475Smrg    SetCharacterClassRange(0x3000, 0x3000, BLANK);	/* ideographic space */
148d522f475Smrg    SetCharacterClassRange(0x3001, 0x3020, IDENT);	/* ideographic punctuation */
149913cc679Smrg    SetCharacterClassRange(0x3040, 0x309f, U_HIR);	/* Hiragana */
150913cc679Smrg    SetCharacterClassRange(0x30a0, 0x30ff, U_KAT);	/* Katakana */
151913cc679Smrg    SetCharacterClassRange(0x3300, 0x9fff, U_CJK);	/* CJK Ideographs */
152913cc679Smrg    SetCharacterClassRange(0xac00, 0xd7a3, U_HAN);	/* Hangul Syllables */
153913cc679Smrg    SetCharacterClassRange(0xf900, 0xfaff, U_CJK);	/* CJK Ideographs */
154d522f475Smrg    SetCharacterClassRange(0xfe30, 0xfe6b, IDENT);	/* punctuation forms */
155d522f475Smrg    SetCharacterClassRange(0xff00, 0xff0f, IDENT);	/* half/fullwidth ASCII */
156d522f475Smrg    SetCharacterClassRange(0xff1a, 0xff20, IDENT);	/* half/fullwidth ASCII */
157d522f475Smrg    SetCharacterClassRange(0xff3b, 0xff40, IDENT);	/* half/fullwidth ASCII */
158d522f475Smrg    SetCharacterClassRange(0xff5b, 0xff64, IDENT);	/* half/fullwidth ASCII */
159d522f475Smrg
160f2e35a3aSmrg    TRACE((TRACE_R " init_classtab\n"));
161d522f475Smrg    return;
162d522f475Smrg}
163d522f475Smrg
164d522f475Smrgint
165d522f475SmrgCharacterClass(int c)
166d522f475Smrg{
167d522f475Smrg    int i, cclass = IDENT;
168d522f475Smrg
169d522f475Smrg    for (i = classtab[0].first; i <= classtab[0].last; i++)
170d522f475Smrg	if (classtab[i].first <= c && classtab[i].last >= c)
171d522f475Smrg	    cclass = classtab[i].cclass;
172d522f475Smrg
173d522f475Smrg    if (cclass < 0)
174d522f475Smrg	cclass = c;
175d522f475Smrg
176d522f475Smrg    return cclass;
177d522f475Smrg}
178d522f475Smrg
179913cc679Smrg#if OPT_REPORT_CCLASS
180913cc679Smrg#define charFormat(code) ((code) > 255 ? "0x%04X" : "%d")
181913cc679Smrgstatic const char *
182913cc679Smrgclass_name(Classes code)
183913cc679Smrg{
184913cc679Smrg    static char buffer[80];
185913cc679Smrg    const char *result = "?";
186913cc679Smrg    switch (code) {
187913cc679Smrg    case ALNUM:
188913cc679Smrg	result = "ALNUM";
189913cc679Smrg	break;
190f2e35a3aSmrg    case BLANK:
191f2e35a3aSmrg	result = "BLANK";
192f2e35a3aSmrg	break;
193913cc679Smrg    case CNTRL:
194913cc679Smrg	result = "CNTRL";
195913cc679Smrg	break;
196f2e35a3aSmrg    case OTHER:
197f2e35a3aSmrg	result = "OTHER";
198f2e35a3aSmrg	break;
199f2e35a3aSmrg    case IDENT:
200f2e35a3aSmrg	result = "IDENT";
201913cc679Smrg	break;
202913cc679Smrg    case U_SUP:
203913cc679Smrg	result = "superscript";
204913cc679Smrg	break;
205913cc679Smrg    case U_SUB:
206913cc679Smrg	result = "subscript";
207913cc679Smrg	break;
208913cc679Smrg    case U_CJK:
209913cc679Smrg	result = "CJK Ideographs";
210913cc679Smrg	break;
211913cc679Smrg    case U_HIR:
212913cc679Smrg	result = "Hiragana";
213913cc679Smrg	break;
214913cc679Smrg    case U_KAT:
215913cc679Smrg	result = "Katakana";
216913cc679Smrg	break;
217913cc679Smrg    case U_HAN:
218913cc679Smrg	result = "Hangul Syllables";
219913cc679Smrg	break;
220913cc679Smrg    default:
221913cc679Smrg	sprintf(buffer, charFormat(code), code);
222913cc679Smrg	result = buffer;
223913cc679Smrg	break;
224913cc679Smrg    }
225913cc679Smrg    return result;
226913cc679Smrg}
227913cc679Smrg
228f2e35a3aSmrg/*
229f2e35a3aSmrg * Special convention for classtab[0]:
230f2e35a3aSmrg * - classtab[0].cclass is the allocated number of entries in classtab
231f2e35a3aSmrg * - classtab[0].first = 1 (first used entry in classtab)
232f2e35a3aSmrg * - classtab[0].last is the last used entry in classtab
233f2e35a3aSmrg */
234f2e35a3aSmrg
235f2e35a3aSmrgint
236f2e35a3aSmrgSetCharacterClassRange(int low, int high, int value)
237f2e35a3aSmrg{
238f2e35a3aSmrg    TRACE(("...SetCharacterClassRange (U+%04X .. U+%04X) = %s\n",
239f2e35a3aSmrg	   low, high, class_name(value)));
240f2e35a3aSmrg
241f2e35a3aSmrg    if (high < low)
242f2e35a3aSmrg	return -1;		/* nothing to do */
243f2e35a3aSmrg
244f2e35a3aSmrg    /* make sure we have at least one free entry left at table end */
245f2e35a3aSmrg    if (classtab[0].last > classtab[0].cclass - 2) {
246f2e35a3aSmrg	classtab[0].cclass += 5 + classtab[0].cclass / 4;
247f2e35a3aSmrg	classtab = TypeRealloc(struct classentry,
248f2e35a3aSmrg			         (unsigned) classtab[0].cclass, classtab);
249f2e35a3aSmrg	if (!classtab)
250f2e35a3aSmrg	    abort();
251f2e35a3aSmrg    }
252f2e35a3aSmrg
253f2e35a3aSmrg    /* simply append new interval to end of interval array */
254f2e35a3aSmrg    classtab[0].last++;
255f2e35a3aSmrg    classtab[classtab[0].last].first = low;
256f2e35a3aSmrg    classtab[classtab[0].last].last = high;
257f2e35a3aSmrg    classtab[classtab[0].last].cclass = value;
258f2e35a3aSmrg
259f2e35a3aSmrg    return 0;
260f2e35a3aSmrg}
261f2e35a3aSmrg
262913cc679Smrgvoid
263913cc679Smrgreport_wide_char_class(void)
264913cc679Smrg{
265913cc679Smrg    static const Classes known_classes[] =
266913cc679Smrg    {IDENT, ALNUM, CNTRL, BLANK, U_SUP, U_SUB, U_HIR, U_KAT, U_CJK, U_HAN};
267913cc679Smrg    int i;
268913cc679Smrg
269913cc679Smrg    printf("\n");
270913cc679Smrg    printf("Unicode charClass data uses the last match\n");
271913cc679Smrg    printf("from these overlapping intervals of character codes:\n");
272913cc679Smrg    for (i = classtab[0].first; i <= classtab[0].last; i++) {
273913cc679Smrg	printf("\tU+%04X .. U+%04X %s\n",
274913cc679Smrg	       classtab[i].first,
275913cc679Smrg	       classtab[i].last,
276f2e35a3aSmrg	       class_name((Classes) classtab[i].cclass));
277913cc679Smrg    }
278913cc679Smrg    printf("\n");
279913cc679Smrg    printf("These class-names are used internally (the first character code in a class):\n");
280913cc679Smrg    for (i = 0; i < (int) XtNumber(known_classes); ++i) {
281913cc679Smrg	printf("\t");
282913cc679Smrg	printf(charFormat(known_classes[i]), known_classes[i]);
283913cc679Smrg	printf(" = %s\n", class_name(known_classes[i]));
284913cc679Smrg    }
285913cc679Smrg}
286913cc679Smrg#endif /* OPT_REPORT_CCLASS */
287913cc679Smrg
288d522f475Smrg#ifdef NO_LEAKS
289d522f475Smrgvoid
290d522f475Smrgnoleaks_CharacterClass(void)
291d522f475Smrg{
292f2e35a3aSmrg    FreeAndNull(classtab);
293f2e35a3aSmrg}
294f2e35a3aSmrg#endif
295f2e35a3aSmrg#endif /* OPT_WIDE_CHARS */
296f2e35a3aSmrg
297f2e35a3aSmrg#ifdef TEST_DRIVER
298f2e35a3aSmrg#if OPT_WIDE_CHARS
299f2e35a3aSmrgstatic void
300f2e35a3aSmrgusage(void)
301f2e35a3aSmrg{
302f2e35a3aSmrg    static const char *msg[] =
303f2e35a3aSmrg    {
304f2e35a3aSmrg	"Usage: test_charclass [options] [c1[-c1b] [c2-[c2b] [...]]]",
305f2e35a3aSmrg	"",
306f2e35a3aSmrg	"Options:",
307f2e35a3aSmrg	" -a  show all data",
308f2e35a3aSmrg	" -s  show only summary",
309f2e35a3aSmrg	" -v  verbose"
310f2e35a3aSmrg    };
311f2e35a3aSmrg    size_t n;
312f2e35a3aSmrg    for (n = 0; n < sizeof(msg) / sizeof(msg[0]); ++n) {
313f2e35a3aSmrg	fprintf(stderr, "%s\n", msg[n]);
314f2e35a3aSmrg    }
315f2e35a3aSmrg    exit(EXIT_FAILURE);
316f2e35a3aSmrg}
317f2e35a3aSmrg
318f2e35a3aSmrgstatic int
319f2e35a3aSmrgexpected_class(int wch)
320f2e35a3aSmrg{
321f2e35a3aSmrg    int result = wch;
322f2e35a3aSmrg    wint_t ch = (wint_t) wch;
323f2e35a3aSmrg    if (ch == '\0' || ch == '\t') {
324f2e35a3aSmrg	result = BLANK;
325f2e35a3aSmrg    } else if (iswcntrl(ch)) {
326f2e35a3aSmrg	result = CNTRL;
327f2e35a3aSmrg    } else if (iswspace(ch)) {
328f2e35a3aSmrg	result = BLANK;
329f2e35a3aSmrg    } else if (ch < 127) {
330f2e35a3aSmrg	if (isalnum(ch) || ch == '_') {
331f2e35a3aSmrg	    result = ALNUM;
332f2e35a3aSmrg	}
333f2e35a3aSmrg    } else if (ch == 170 || ch == 181 || ch == 186) {
334f2e35a3aSmrg	;
335f2e35a3aSmrg    } else if (iswalnum(ch)) {
336f2e35a3aSmrg	result = ALNUM;
337f2e35a3aSmrg    }
338f2e35a3aSmrg    return result;
339f2e35a3aSmrg}
340f2e35a3aSmrg
341f2e35a3aSmrgstatic int
342f2e35a3aSmrgshow_cclass_range(int lo, int hi)
343f2e35a3aSmrg{
344f2e35a3aSmrg    int cclass = CharacterClass(lo);
345f2e35a3aSmrg    int ident = (cclass == lo);
346f2e35a3aSmrg    int more = 0;
347f2e35a3aSmrg    if (ident) {
348f2e35a3aSmrg	int ch;
349f2e35a3aSmrg	for (ch = lo + 1; ch <= hi; ch++) {
350f2e35a3aSmrg	    if (CharacterClass(ch) != ch) {
351f2e35a3aSmrg		ident = 0;
352f2e35a3aSmrg		break;
353f2e35a3aSmrg	    }
354f2e35a3aSmrg	}
355f2e35a3aSmrg	if (ident && (hi < 255)) {
356f2e35a3aSmrg	    ch = hi + 1;
357f2e35a3aSmrg	    if (CharacterClass(ch) == ch) {
358f2e35a3aSmrg		if (ch >= 255 || CharacterClass(ch + 1) != ch) {
359f2e35a3aSmrg		    more = 1;
360f2e35a3aSmrg		}
361f2e35a3aSmrg	    }
362f2e35a3aSmrg	}
363d522f475Smrg    }
364f2e35a3aSmrg    if (!more) {
365f2e35a3aSmrg	if (lo == hi) {
366f2e35a3aSmrg	    printf("\t%d", lo);
367f2e35a3aSmrg	} else {
368f2e35a3aSmrg	    printf("\t%d-%d", lo, hi);
369f2e35a3aSmrg	}
370f2e35a3aSmrg	if (!ident)
371f2e35a3aSmrg	    printf(":%d", cclass);
372f2e35a3aSmrg	if (hi < 255)
373f2e35a3aSmrg	    printf(", \\");
374f2e35a3aSmrg	printf("\n");
375f2e35a3aSmrg    }
376f2e35a3aSmrg    return !more;
377f2e35a3aSmrg}
378f2e35a3aSmrg
379f2e35a3aSmrgstatic void
380f2e35a3aSmrgreport_resource(int first, int last)
381f2e35a3aSmrg{
382f2e35a3aSmrg    int class_p;
383f2e35a3aSmrg    int ch;
384f2e35a3aSmrg    int dh;
385f2e35a3aSmrg
386f2e35a3aSmrg    class_p = CharacterClass(dh = first);
387f2e35a3aSmrg    for (ch = first; ch < last; ++ch) {
388f2e35a3aSmrg	int class_c = CharacterClass(ch);
389f2e35a3aSmrg	if (class_c != class_p) {
390f2e35a3aSmrg	    if (show_cclass_range(dh, ch - 1)) {
391f2e35a3aSmrg		dh = ch;
392f2e35a3aSmrg		class_p = class_c;
393f2e35a3aSmrg	    }
394f2e35a3aSmrg	}
395f2e35a3aSmrg    }
396f2e35a3aSmrg    if (dh < last - 1) {
397f2e35a3aSmrg	show_cclass_range(dh, last - 1);
398f2e35a3aSmrg    }
399f2e35a3aSmrg}
400f2e35a3aSmrg
401f2e35a3aSmrgstatic int
402f2e35a3aSmrgdecode_one(const char *source, char **target)
403f2e35a3aSmrg{
404f2e35a3aSmrg    int result = -1;
405f2e35a3aSmrg    long check;
406f2e35a3aSmrg    int radix = 0;
407f2e35a3aSmrg    if ((source[0] == 'u' || source[0] == 'U') && source[1] == '+') {
408f2e35a3aSmrg	source += 2;
409f2e35a3aSmrg	radix = 16;
410f2e35a3aSmrg    }
411f2e35a3aSmrg    check = strtol(source, target, radix);
412f2e35a3aSmrg    if (*target != NULL && *target != source)
413f2e35a3aSmrg	result = (int) check;
414f2e35a3aSmrg    return result;
415d522f475Smrg}
416f2e35a3aSmrg
417f2e35a3aSmrgstatic int
418f2e35a3aSmrgdecode_range(const char *source, int *lo, int *hi)
419f2e35a3aSmrg{
420f2e35a3aSmrg    int result = 0;
421f2e35a3aSmrg    char *after1;
422f2e35a3aSmrg    char *after2;
423f2e35a3aSmrg    if ((*lo = decode_one(source, &after1)) >= 0) {
424f2e35a3aSmrg	after1 += strspn(after1, ":-.\t ");
425f2e35a3aSmrg	if ((*hi = decode_one(after1, &after2)) < 0) {
426f2e35a3aSmrg	    *hi = *lo;
427f2e35a3aSmrg	}
428f2e35a3aSmrg	result = 1;
429f2e35a3aSmrg    }
430f2e35a3aSmrg    return result;
431f2e35a3aSmrg}
432f2e35a3aSmrg
433f2e35a3aSmrgstatic void
434f2e35a3aSmrgdo_range(const char *source)
435f2e35a3aSmrg{
436f2e35a3aSmrg    int lo, hi;
437f2e35a3aSmrg    if (decode_range(source, &lo, &hi)) {
438f2e35a3aSmrg	if (opt_all) {
439f2e35a3aSmrg	    while (lo <= hi) {
440f2e35a3aSmrg		int other_rc = CharacterClass(lo);
441f2e35a3aSmrg		if (!opt_quiet)
442f2e35a3aSmrg		    printf("U+%04X\t%s\n", lo, class_name(other_rc));
443f2e35a3aSmrg		++lo;
444f2e35a3aSmrg	    }
445f2e35a3aSmrg	} else if (opt_check) {
446f2e35a3aSmrg	    while (lo <= hi) {
447f2e35a3aSmrg		int expect = expected_class(lo);
448f2e35a3aSmrg		int actual = CharacterClass(lo);
449f2e35a3aSmrg		if (actual != expect)
450f2e35a3aSmrg		    printf("U+%04X\t%s ->%s\n", lo,
451f2e35a3aSmrg			   class_name(expect),
452f2e35a3aSmrg			   class_name(actual));
453f2e35a3aSmrg		++lo;
454f2e35a3aSmrg	    }
455f2e35a3aSmrg	} else {
456f2e35a3aSmrg	    printf("\"charClass\" resource for [%d..%d]:\n", lo, hi);
457f2e35a3aSmrg	    report_resource(lo, hi + 1);
458f2e35a3aSmrg	}
459f2e35a3aSmrg    }
460f2e35a3aSmrg}
461f2e35a3aSmrg#endif /* OPT_WIDE_CHARS */
462f2e35a3aSmrg
463f2e35a3aSmrg/*
464f2e35a3aSmrg * TODO: add option to show do_range in hex
465f2e35a3aSmrg */
466f2e35a3aSmrgint
467f2e35a3aSmrgmain(int argc, char **argv ENVP_ARG)
468f2e35a3aSmrg{
469f2e35a3aSmrg#if OPT_WIDE_CHARS
470f2e35a3aSmrg    int ch;
471d522f475Smrg#endif
472d522f475Smrg
473f2e35a3aSmrg    (void) argc;
474f2e35a3aSmrg    (void) argv;
475f2e35a3aSmrg
476f2e35a3aSmrg#if OPT_WIDE_CHARS
477f2e35a3aSmrg    setlocale(LC_ALL, "");
478f2e35a3aSmrg    while ((ch = getopt(argc, argv, "acsv")) != -1) {
479f2e35a3aSmrg	switch (ch) {
480f2e35a3aSmrg	case 'a':
481f2e35a3aSmrg	    opt_all = 1;
482f2e35a3aSmrg	    break;
483f2e35a3aSmrg	case 'c':
484f2e35a3aSmrg	    opt_check = 1;
485f2e35a3aSmrg	    break;
486f2e35a3aSmrg	case 's':
487f2e35a3aSmrg	    opt_quiet = 1;
488f2e35a3aSmrg	    break;
489f2e35a3aSmrg	case 'v':
490f2e35a3aSmrg	    opt_v = 1;
491f2e35a3aSmrg	    break;
492f2e35a3aSmrg	default:
493f2e35a3aSmrg	    usage();
494f2e35a3aSmrg	}
495f2e35a3aSmrg    }
496f2e35a3aSmrg    init_classtab();
497f2e35a3aSmrg
498f2e35a3aSmrg    if (optind >= argc) {
499f2e35a3aSmrg	do_range("0-255");
500f2e35a3aSmrg    } else {
501f2e35a3aSmrg	while (optind < argc) {
502f2e35a3aSmrg	    do_range(argv[optind++]);
503f2e35a3aSmrg	}
504f2e35a3aSmrg    }
505f2e35a3aSmrg    report_wide_char_class();
506f2e35a3aSmrg#else
507f2e35a3aSmrg    printf("wide-character support is not configured\n");
508d522f475Smrg#endif /* OPT_WIDE_CHARS */
509f2e35a3aSmrg    return 0;
510f2e35a3aSmrg}
511f2e35a3aSmrg#endif /* TEST_DRIVER */
512