citrus_iso2022.c revision 1.5 1 /* $NetBSD: citrus_iso2022.c,v 1.5 2002/03/28 10:29:11 yamt Exp $ */
2
3 /*-
4 * Copyright (c)1999, 2002 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
29 */
30
31 #include <sys/cdefs.h>
32 #if defined(LIBC_SCCS) && !defined(lint)
33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.5 2002/03/28 10:29:11 yamt Exp $");
34 #endif /* LIBC_SCCS and not lint */
35
36 #include <assert.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stddef.h>
42 #include <locale.h>
43 #include <wchar.h>
44 #include <sys/types.h>
45 #include <limits.h>
46 #include "citrus_module.h"
47 #include "citrus_ctype.h"
48 #include "citrus_iso2022.h"
49
50
51 /* ----------------------------------------------------------------------
52 * private stuffs used by templates
53 */
54
55
56 /*
57 * wchar_t mappings:
58 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx
59 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx
60 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx
61 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx
62 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx
63 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx
64 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx
65 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx
66 * 94x94 charset (ESC & V ESC $ ( F)
67 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
68 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
69 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
70 */
71
72 typedef struct {
73 u_char type;
74 #define CS94 (0U)
75 #define CS96 (1U)
76 #define CS94MULTI (2U)
77 #define CS96MULTI (3U)
78
79 u_char final;
80 u_char interm;
81 u_char vers;
82 } _ISO2022Charset;
83
84 typedef struct {
85 _ISO2022Charset g[4];
86 /* need 3 bits to hold -1, 0, ..., 3 */
87 int gl:3,
88 gr:3,
89 singlegl:3,
90 singlegr:3;
91 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */
92 int chlen;
93 int flags;
94 #define _ISO2022STATE_FLAG_INITIALIZED 1
95 } _ISO2022State;
96
97 typedef struct {
98 _ISO2022Charset *recommend[4];
99 size_t recommendsize[4];
100 _ISO2022Charset initg[4];
101 int maxcharset;
102 int flags;
103 #define F_8BIT 0x0001
104 #define F_NOOLD 0x0002
105 #define F_SI 0x0010 /*0F*/
106 #define F_SO 0x0020 /*0E*/
107 #define F_LS0 0x0010 /*0F*/
108 #define F_LS1 0x0020 /*0E*/
109 #define F_LS2 0x0040 /*ESC n*/
110 #define F_LS3 0x0080 /*ESC o*/
111 #define F_LS1R 0x0100 /*ESC ~*/
112 #define F_LS2R 0x0200 /*ESC }*/
113 #define F_LS3R 0x0400 /*ESC |*/
114 #define F_SS2 0x0800 /*ESC N*/
115 #define F_SS3 0x1000 /*ESC O*/
116 #define F_SS2R 0x2000 /*8E*/
117 #define F_SS3R 0x4000 /*8F*/
118 } _ISO2022EncodingInfo;
119 typedef struct {
120 _ISO2022EncodingInfo ei;
121 struct {
122 /* for future multi-locale facility */
123 _ISO2022State s_mblen;
124 _ISO2022State s_mbrlen;
125 _ISO2022State s_mbrtowc;
126 _ISO2022State s_mbtowc;
127 _ISO2022State s_mbsrtowcs;
128 _ISO2022State s_wcrtomb;
129 _ISO2022State s_wcsrtombs;
130 _ISO2022State s_wctomb;
131 } states;
132 } _ISO2022CTypeInfo;
133
134 #define _TO_EI(_cl_) ((_ISO2022EncodingInfo *)(_cl_))
135 #define _TO_CEI(_cl_) ((_ISO2022CTypeInfo *)(_cl_))
136 #define _TO_STATE(_ps_) ((_ISO2022State *)(_ps_))
137 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
138 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
139
140 #define _FUNCNAME(m) _citrus_ISO2022_##m
141 #define _ENCODING_INFO _ISO2022EncodingInfo
142 #define _CTYPE_INFO _ISO2022CTypeInfo
143 #define _ENCODING_STATE _ISO2022State
144 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
145 #define _ENCODING_IS_STATE_DEPENDENT 1
146 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \
147 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
148
149
150 #define _ISO2022INVALID (wchar_t)-1
151
152 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); }
153 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); }
154 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); }
155 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); }
156 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); }
157 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); }
158 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); }
159 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); }
160
161 static __inline int
162 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs)
163 {
164
165 _DIAGASSERT(p != NULL);
166 _DIAGASSERT(cs != NULL);
167
168 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) {
169 cs->final = (u_char)(p[3] & 0xff);
170 cs->interm = '\0';
171 cs->vers = '\0';
172 cs->type = CS94MULTI;
173 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) {
174 cs->final = (u_char)(p[3] & 0xff);
175 cs->interm = '\0';
176 cs->vers = '\0';
177 cs->type = CS96MULTI;
178 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) {
179 cs->final = (u_char)(p[2] & 0xff);
180 cs->interm = '\0';
181 cs->vers = '\0';
182 cs->type = CS94;
183 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) {
184 cs->final = (u_char )(p[2] & 0xff);
185 cs->interm = '\0';
186 cs->vers = '\0';
187 cs->type = CS96;
188 } else {
189 return 1;
190 }
191
192 return 0;
193 }
194
195
196 #define _NOTMATCH 0
197 #define _MATCH 1
198 #define _PARSEFAIL 2
199
200 static __inline int
201 get_recommend(_ISO2022EncodingInfo * __restrict ei,
202 const char * __restrict token)
203 {
204 int i;
205 _ISO2022Charset cs;
206
207 if (!strchr("0123", token[0]) || token[1] != '=')
208 return (_NOTMATCH);
209
210 if (getcs(&token[2], &cs) == 0)
211 ;
212 else if (!strcmp(&token[2], "94")) {
213 cs.final = (u_char)(token[4]);
214 cs.interm = '\0';
215 cs.vers = '\0';
216 cs.type = CS94;
217 } else if (!strcmp(&token[2], "96")) {
218 cs.final = (u_char)(token[4]);
219 cs.interm = '\0';
220 cs.vers = '\0';
221 cs.type = CS96;
222 } else if (!strcmp(&token[2], "94$")) {
223 cs.final = (u_char)(token[5]);
224 cs.interm = '\0';
225 cs.vers = '\0';
226 cs.type = CS94MULTI;
227 } else if (!strcmp(&token[2], "96$")) {
228 cs.final = (u_char)(token[5]);
229 cs.interm = '\0';
230 cs.vers = '\0';
231 cs.type = CS96MULTI;
232 } else {
233 return (_PARSEFAIL);
234 }
235
236 i = token[0] - '0';
237 ei->recommendsize[i] += 1;
238 if (!ei->recommend[i]) {
239 ei->recommend[i] = malloc(sizeof(_ISO2022Charset));
240 } else {
241 ei->recommend[i] =
242 realloc(ei->recommend[i],
243 sizeof(_ISO2022Charset)* (ei->recommendsize[i]));
244 }
245 if (!ei->recommend[i])
246 return (_PARSEFAIL);
247
248 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final;
249 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm;
250 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers;
251 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type;
252
253 return (_MATCH);
254 }
255
256 static __inline int
257 get_initg(_ISO2022EncodingInfo * __restrict ei,
258 const char * __restrict token)
259 {
260 _ISO2022Charset cs;
261
262 if (strncmp("INIT", &token[0], 4) ||
263 !strchr("0123", token[4]) ||
264 token[5] != '=')
265 return (_NOTMATCH);
266
267 if (getcs(&token[6], &cs) != 0)
268 return (_PARSEFAIL);
269
270 ei->initg[token[4] - '0'].type = cs.type;
271 ei->initg[token[4] - '0'].final = cs.final;
272 ei->initg[token[4] - '0'].interm = cs.interm;
273 ei->initg[token[4] - '0'].vers = cs.vers;
274
275 return (_MATCH);
276 }
277
278 static __inline int
279 get_max(_ISO2022EncodingInfo * __restrict ei,
280 const char * __restrict token)
281 {
282 if (!strcmp(token, "MAX1")) {
283 ei->maxcharset = 1;
284 } else if (!strcmp(token, "MAX2")) {
285 ei->maxcharset = 2;
286 } else if (!strcmp(token, "MAX3")) {
287 ei->maxcharset = 3;
288 } else
289 return (_NOTMATCH);
290
291 return (_MATCH);
292 }
293
294
295 static __inline int
296 get_flags(_ISO2022EncodingInfo * __restrict ei,
297 const char * __restrict token)
298 {
299 int i;
300 static struct {
301 const char *tag;
302 int flag;
303 } const tags[] = {
304 { "DUMMY", 0 },
305 { "8BIT", F_8BIT },
306 { "NOOLD", F_NOOLD },
307 { "SI", F_SI },
308 { "SO", F_SO },
309 { "LS0", F_LS0 },
310 { "LS1", F_LS1 },
311 { "LS2", F_LS2 },
312 { "LS3", F_LS3 },
313 { "LS1R", F_LS1R },
314 { "LS2R", F_LS2R },
315 { "LS3R", F_LS3R },
316 { "SS2", F_SS2 },
317 { "SS3", F_SS3 },
318 { "SS2R", F_SS2R },
319 { "SS3R", F_SS3R },
320 { NULL, 0 }
321 };
322
323 for (i = 0; tags[i].tag; i++) {
324 if (!strcmp(token, tags[i].tag)) {
325 ei->flags |= tags[i].flag;
326 return (_MATCH);
327 }
328 }
329
330 return (_NOTMATCH);
331 }
332
333
334 static __inline int
335 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei,
336 const void * __restrict var, size_t lenvar)
337 {
338 char const *v, *e;
339 char buf[20];
340 int i, len, ret;
341
342 _DIAGASSERT(ei != NULL);
343
344
345 /*
346 * parse VARIABLE section.
347 */
348
349 if (!var)
350 return (EFTYPE);
351
352 v = (const char *) var;
353
354 /* initialize structure */
355 ei->maxcharset = 0;
356 for (i = 0; i < 4; i++) {
357 ei->recommend[i] = NULL;
358 ei->recommendsize[i] = 0;
359 }
360 ei->flags = 0;
361
362 while (*v) {
363 while (*v == ' ' || *v == '\t')
364 ++v;
365
366 /* find the token */
367 e = v;
368 while (*e && *e != ' ' && *e != '\t')
369 ++e;
370 if (*e) {
371 len = e-v;
372 if (len>=sizeof(buf))
373 goto parsefail;
374 sprintf(buf, "%.*s", len, v);
375 ++e;
376 }
377
378 if ((ret = get_recommend(ei, buf)) != _NOTMATCH)
379 ;
380 else if ((ret = get_initg(ei, buf)) != _NOTMATCH)
381 ;
382 else if ((ret = get_max(ei, buf)) != _NOTMATCH)
383 ;
384 else if ((ret = get_flags(ei, buf)) != _NOTMATCH)
385 ;
386 else
387 ret = _PARSEFAIL;
388 if (ret==_PARSEFAIL)
389 goto parsefail;
390 v = e;
391
392 }
393
394 return (0);
395
396 parsefail:
397 free(ei->recommend[0]);
398 free(ei->recommend[1]);
399 free(ei->recommend[2]);
400 free(ei->recommend[3]);
401
402 return (EFTYPE);
403 }
404
405 static __inline void
406 /*ARGSUSED*/
407 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei,
408 _ISO2022State * __restrict s)
409 {
410 int i;
411
412 memset(s, 0, sizeof(*s));
413 s->gl = 0;
414 s->gr = (ei->flags & F_8BIT) ? 1 : -1;
415
416 for (i = 0; i < 4; i++) {
417 if (ei->initg[i].final) {
418 s->g[i].type = ei->initg[i].type;
419 s->g[i].final = ei->initg[i].final;
420 s->g[i].interm = ei->initg[i].interm;
421 }
422 }
423 s->singlegl = s->singlegr = -1;
424 s->flags |= _ISO2022STATE_FLAG_INITIALIZED;
425 }
426
427 static __inline void
428 /*ARGSUSED*/
429 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei,
430 void * __restrict pspriv,
431 const _ISO2022State * __restrict s)
432 {
433 memcpy(pspriv, (const void *)s, sizeof(*s));
434 }
435
436 static __inline void
437 /*ARGSUSED*/
438 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei,
439 _ISO2022State * __restrict s,
440 const void * __restrict pspriv)
441 {
442 memcpy((void *)s, pspriv, sizeof(*s));
443 }
444
445 static int
446 /*ARGSUSED*/
447 _citrus_ISO2022_stdencoding_init(_ISO2022EncodingInfo * __restrict ei,
448 const void * __restrict var, size_t lenvar)
449 {
450
451 _DIAGASSERT(ei != NULL);
452
453 return _citrus_ISO2022_parse_variable(ei, var, lenvar);
454 }
455
456 static void
457 /*ARGSUSED*/
458 _citrus_ISO2022_stdencoding_uninit(_ISO2022EncodingInfo *ei)
459 {
460 }
461
462 #define ESC '\033'
463 #define ECMA -1
464 #define INTERM -2
465 #define OECMA -3
466 static struct seqtable {
467 int type;
468 int csoff;
469 int finaloff;
470 int intermoff;
471 int versoff;
472 int len;
473 int chars[10];
474 } seqtable[] = {
475 /* G0 94MULTI special */
476 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, },
477 /* G0 94MULTI special with version identification */
478 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, },
479 /* G? 94 */
480 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, },
481 /* G? 94 with 2nd intermediate char */
482 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, },
483 /* G? 96 */
484 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, },
485 /* G? 96 with 2nd intermediate char */
486 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, },
487 /* G? 94MULTI */
488 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, },
489 /* G? 96MULTI */
490 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, },
491 /* G? 94MULTI with version specification */
492 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, },
493 /* LS2/3 */
494 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, },
495 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, },
496 /* LS1/2/3R */
497 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, },
498 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, },
499 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, },
500 /* SS2/3 */
501 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, },
502 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, },
503 /* end of records */
504 { 0, }
505 };
506
507 static int
508 seqmatch(const char * __restrict s, size_t n,
509 const struct seqtable * __restrict sp)
510 {
511 const int *p;
512
513 _DIAGASSERT(s != NULL);
514 _DIAGASSERT(sp != NULL);
515
516 p = sp->chars;
517 while (p - sp->chars < n && p - sp->chars < sp->len) {
518 switch (*p) {
519 case ECMA:
520 if (!isecma(*s))
521 goto terminate;
522 break;
523 case OECMA:
524 if (*s && strchr("@AB", *s))
525 break;
526 else
527 goto terminate;
528 case INTERM:
529 if (!isinterm(*s))
530 goto terminate;
531 break;
532 case CS94:
533 if (*s && strchr("()*+", *s))
534 break;
535 else
536 goto terminate;
537 case CS96:
538 if (*s && strchr(",-./", *s))
539 break;
540 else
541 goto terminate;
542 default:
543 if (*s != *p)
544 goto terminate;
545 break;
546 }
547
548 p++;
549 s++;
550 }
551
552 terminate:
553 return p - sp->chars;
554 }
555
556 static wchar_t
557 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei,
558 const char * __restrict string, size_t n,
559 const char ** __restrict result,
560 _ISO2022State * __restrict psenc)
561 {
562 wchar_t wchar = 0;
563 int cur;
564 struct seqtable *sp;
565 int nmatch;
566 int i;
567
568 _DIAGASSERT(ei != NULL);
569 _DIAGASSERT(state != NULL);
570 _DIAGASSERT(string != NULL);
571 /* result may be NULL */
572
573 while (1) {
574 /* SI/SO */
575 if (1 <= n && string[0] == '\017') {
576 psenc->gl = 0;
577 string++;
578 n--;
579 continue;
580 }
581 if (1 <= n && string[0] == '\016') {
582 psenc->gl = 1;
583 string++;
584 n--;
585 continue;
586 }
587
588 /* SS2/3R */
589 if (1 <= n && string[0] && strchr("\217\216", string[0])) {
590 psenc->singlegl = psenc->singlegr =
591 (string[0] - '\216') + 2;
592 string++;
593 n--;
594 continue;
595 }
596
597 /* eat the letter if this is not ESC */
598 if (1 <= n && string[0] != '\033')
599 break;
600
601 /* look for a perfect match from escape sequences */
602 for (sp = &seqtable[0]; sp->len; sp++) {
603 nmatch = seqmatch(string, n, sp);
604 if (sp->len == nmatch && n >= sp->len)
605 break;
606 }
607
608 if (!sp->len)
609 goto notseq;
610
611 if (sp->type != -1) {
612 if (sp->csoff == -1)
613 i = 0;
614 else {
615 switch (sp->type) {
616 case CS94:
617 case CS94MULTI:
618 i = string[sp->csoff] - '(';
619 break;
620 case CS96:
621 case CS96MULTI:
622 i = string[sp->csoff] - ',';
623 break;
624 }
625 }
626 psenc->g[i].type = sp->type;
627 psenc->g[i].final = '\0';
628 psenc->g[i].interm = '\0';
629 psenc->g[i].vers = '\0';
630 /* sp->finaloff must not be -1 */
631 if (sp->finaloff != -1)
632 psenc->g[i].final = string[sp->finaloff];
633 if (sp->intermoff != -1)
634 psenc->g[i].interm = string[sp->intermoff];
635 if (sp->versoff != -1)
636 psenc->g[i].vers = string[sp->versoff];
637
638 string += sp->len;
639 n -= sp->len;
640 continue;
641 }
642
643 /* LS2/3 */
644 if (2 <= n && string[0] == '\033'
645 && string[1] && strchr("no", string[1])) {
646 psenc->gl = string[1] - 'n' + 2;
647 string += 2;
648 n -= 2;
649 continue;
650 }
651
652 /* LS1/2/3R */
653 /* XXX: { for vi showmatch */
654 if (2 <= n && string[0] == '\033'
655 && string[1] && strchr("~}|", string[1])) {
656 psenc->gr = 3 - (string[1] - '|');
657 string += 2;
658 n -= 2;
659 continue;
660 }
661
662 /* SS2/3 */
663 if (2 <= n && string[0] == '\033'
664 && string[1] && strchr("NO", string[1])) {
665 psenc->singlegl = (string[1] - 'N') + 2;
666 string += 2;
667 n -= 2;
668 continue;
669 }
670
671 notseq:
672 /*
673 * if we've got an unknown escape sequence, eat the ESC at the
674 * head. otherwise, wait till full escape sequence comes.
675 */
676 for (sp = &seqtable[0]; sp->len; sp++) {
677 nmatch = seqmatch(string, n, sp);
678 if (!nmatch)
679 continue;
680
681 /*
682 * if we are in the middle of escape sequence,
683 * we still need to wait for more characters to come
684 */
685 if (n < sp->len) {
686 if (nmatch == n) {
687 if (result)
688 *result = string;
689 return (_ISO2022INVALID);
690 }
691 } else {
692 if (nmatch == sp->len) {
693 /* this case should not happen */
694 goto eat;
695 }
696 }
697 }
698
699 break;
700 }
701
702 eat:
703 /* no letter to eat */
704 if (n < 1) {
705 if (result)
706 *result = string;
707 return (_ISO2022INVALID);
708 }
709
710 /* normal chars. always eat C0/C1 as is. */
711 if (iscntl(*string & 0xff))
712 cur = -1;
713 else if (*string & 0x80) {
714 cur = (psenc->singlegr == -1)
715 ? psenc->gr : psenc->singlegr;
716 } else {
717 cur = (psenc->singlegl == -1)
718 ? psenc->gl : psenc->singlegl;
719 }
720
721 if (cur == -1) {
722 asis:
723 wchar = *string++ & 0xff;
724 if (result)
725 *result = string;
726 /* reset single shift state */
727 psenc->singlegr = psenc->singlegl = -1;
728 return wchar;
729 }
730
731 /* length error check */
732 switch (psenc->g[cur].type) {
733 case CS94MULTI:
734 case CS96MULTI:
735 if (!isthree(psenc->g[cur].final)) {
736 if (2 <= n
737 && (string[0] & 0x80) == (string[1] & 0x80))
738 break;
739 } else {
740 if (3 <= n
741 && (string[0] & 0x80) == (string[1] & 0x80)
742 && (string[0] & 0x80) == (string[2] & 0x80))
743 break;
744 }
745
746 /* we still need to wait for more characters to come */
747 if (result)
748 *result = string;
749 return (_ISO2022INVALID);
750
751 case CS94:
752 case CS96:
753 if (1 <= n)
754 break;
755
756 /* we still need to wait for more characters to come */
757 if (result)
758 *result = string;
759 return (_ISO2022INVALID);
760 }
761
762 /* range check */
763 switch (psenc->g[cur].type) {
764 case CS94:
765 if (!(is94(string[0] & 0x7f)))
766 goto asis;
767 case CS96:
768 if (!(is96(string[0] & 0x7f)))
769 goto asis;
770 break;
771 case CS94MULTI:
772 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f)))
773 goto asis;
774 break;
775 case CS96MULTI:
776 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f)))
777 goto asis;
778 break;
779 }
780
781 /* extract the character. */
782 switch (psenc->g[cur].type) {
783 case CS94:
784 /* special case for ASCII. */
785 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) {
786 wchar = *string++;
787 wchar &= 0x7f;
788 break;
789 }
790 wchar = psenc->g[cur].final;
791 wchar = (wchar << 8);
792 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
793 wchar = (wchar << 8);
794 wchar = (wchar << 8) | (*string++ & 0x7f);
795 break;
796 case CS96:
797 /* special case for ISO-8859-1. */
798 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) {
799 wchar = *string++;
800 wchar &= 0x7f;
801 wchar |= 0x80;
802 break;
803 }
804 wchar = psenc->g[cur].final;
805 wchar = (wchar << 8);
806 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
807 wchar = (wchar << 8);
808 wchar = (wchar << 8) | (*string++ & 0x7f);
809 wchar |= 0x80;
810 break;
811 case CS94MULTI:
812 case CS96MULTI:
813 wchar = psenc->g[cur].final;
814 wchar = (wchar << 8);
815 if (isthree(psenc->g[cur].final))
816 wchar |= (*string++ & 0x7f);
817 wchar = (wchar << 8) | (*string++ & 0x7f);
818 wchar = (wchar << 8) | (*string++ & 0x7f);
819 if (psenc->g[cur].type == CS96MULTI)
820 wchar |= 0x80;
821 break;
822 }
823
824 if (result)
825 *result = string;
826 /* reset single shift state */
827 psenc->singlegr = psenc->singlegl = -1;
828 return wchar;
829 }
830
831
832
833 static int
834 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei,
835 wchar_t * __restrict pwc,
836 const char ** __restrict s,
837 size_t n, _ISO2022State * __restrict psenc,
838 size_t * __restrict nresult)
839 {
840 wchar_t wchar;
841 const char *s0, *p, *result;
842 int c;
843 int chlenbak;
844
845 _DIAGASSERT(nresult != 0);
846 _DIAGASSERT(ei != NULL);
847 _DIAGASSERT(psenc != NULL);
848 _DIAGASSERT(s != NULL);
849
850 s0 = *s;
851 c = 0;
852 chlenbak = psenc->chlen;
853
854 /*
855 * if we have something in buffer, use that.
856 * otherwise, skip here
857 */
858 if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) {
859 /* illgeal state */
860 _citrus_ISO2022_init_state(ei, psenc);
861 goto encoding_error;
862 }
863 if (psenc->chlen == 0)
864 goto emptybuf;
865
866 /* buffer is not empty */
867 p = psenc->ch;
868 while (psenc->chlen < sizeof(psenc->ch) && n >= 0) {
869 if (n > 0) {
870 psenc->ch[psenc->chlen++] = *s0++;
871 n--;
872 }
873
874 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch),
875 &result, psenc);
876 if (wchar != _ISO2022INVALID) {
877 c += result - p;
878 if (psenc->chlen > c)
879 memmove(psenc->ch, result, psenc->chlen - c);
880 if (psenc->chlen < c)
881 psenc->chlen = 0;
882 else
883 psenc->chlen -= c;
884 goto output;
885 }
886
887 c += result - p;
888 p = result;
889
890 if (n == 0)
891 goto restart;
892 }
893
894 /* escape sequence too long? */
895 goto encoding_error;
896
897 emptybuf:
898 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc);
899 if (wchar != _ISO2022INVALID) {
900 c += result - s0;
901 psenc->chlen = 0;
902 s0 = result;
903 goto output;
904 }
905 if (result > s0 && n > result - s0) {
906 c += (result - s0);
907 n -= (result - s0);
908 s0 = result;
909 goto emptybuf;
910 }
911 n += c;
912 if (n < sizeof(psenc->ch)) {
913 memcpy(psenc->ch, s0 - c, n);
914 psenc->chlen = n;
915 s0 = result;
916 goto restart;
917 }
918
919 /* escape sequence too long? */
920
921 encoding_error:
922 psenc->chlen = 0;
923 *nresult = (size_t)-1;
924 return (EILSEQ);
925
926 output:
927 *s = s0;
928 if (pwc)
929 *pwc = wchar;
930
931 if (!wchar)
932 *nresult = 0;
933 else
934 *nresult = c - chlenbak;
935
936 return (0);
937
938 restart:
939 *s = s0;
940 *nresult = (size_t)-2;
941
942 return (0);
943 }
944
945 static int
946 recommendation(_ISO2022EncodingInfo * __restrict ei,
947 _ISO2022Charset * __restrict cs)
948 {
949 int i, j;
950 _ISO2022Charset *recommend;
951
952 _DIAGASSERT(ei != NULL);
953 _DIAGASSERT(cs != NULL);
954
955 /* first, try a exact match. */
956 for (i = 0; i < 4; i++) {
957 recommend = ei->recommend[i];
958 for (j = 0; j < ei->recommendsize[i]; j++) {
959 if (cs->type != recommend[j].type)
960 continue;
961 if (cs->final != recommend[j].final)
962 continue;
963 if (cs->interm != recommend[j].interm)
964 continue;
965
966 return i;
967 }
968 }
969
970 /* then, try a wildcard match over final char. */
971 for (i = 0; i < 4; i++) {
972 recommend = ei->recommend[i];
973 for (j = 0; j < ei->recommendsize[i]; j++) {
974 if (cs->type != recommend[j].type)
975 continue;
976 if (cs->final && (cs->final != recommend[j].final))
977 continue;
978 if (cs->interm && (cs->interm != recommend[j].interm))
979 continue;
980
981 return i;
982 }
983 }
984
985 /* there's no recommendation. make a guess. */
986 if (ei->maxcharset == 0) {
987 return 0;
988 } else {
989 switch (cs->type) {
990 case CS94:
991 case CS94MULTI:
992 return 0;
993 case CS96:
994 case CS96MULTI:
995 return 1;
996 }
997 }
998 return 0;
999 }
1000
1001 static int
1002 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t c,
1003 char * __restrict string, size_t n,
1004 char ** __restrict result,
1005 _ISO2022State * __restrict psenc)
1006 {
1007 int i = 0, len;
1008 _ISO2022Charset cs;
1009 char *p;
1010 char tmp[MB_LEN_MAX];
1011 int target;
1012 u_char mask;
1013 int bit8;
1014
1015 _DIAGASSERT(ei != NULL);
1016 _DIAGASSERT(string != NULL);
1017 /* result may be NULL */
1018 /* state appears to be unused */
1019
1020 if (iscntl(c & 0xff)) {
1021 /* go back to ASCII on control chars */
1022 cs.type = CS94;
1023 cs.final = 'B';
1024 cs.interm = '\0';
1025 } else if (!(c & ~0xff)) {
1026 if (c & 0x80) {
1027 /* special treatment for ISO-8859-1 */
1028 cs.type = CS96;
1029 cs.final = 'A';
1030 cs.interm = '\0';
1031 } else {
1032 /* special treatment for ASCII */
1033 cs.type = CS94;
1034 cs.final = 'B';
1035 cs.interm = '\0';
1036 }
1037 } else {
1038 cs.final = (c >> 24) & 0x7f;
1039 if ((c >> 16) & 0x80)
1040 cs.interm = (c >> 16) & 0x7f;
1041 else
1042 cs.interm = '\0';
1043 if (c & 0x80)
1044 cs.type = (c & 0x00007f00) ? CS96MULTI : CS96;
1045 else
1046 cs.type = (c & 0x00007f00) ? CS94MULTI : CS94;
1047 }
1048 target = recommendation(ei, &cs);
1049 p = tmp;
1050 bit8 = ei->flags & F_8BIT;
1051
1052 /* designate the charset onto the target plane(G0/1/2/3). */
1053 if (psenc->g[target].type == cs.type
1054 && psenc->g[target].final == cs.final
1055 && psenc->g[target].interm == cs.interm)
1056 goto planeok;
1057
1058 *p++ = '\033';
1059 if (cs.type == CS94MULTI || cs.type == CS96MULTI)
1060 *p++ = '$';
1061 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final)
1062 && !cs.interm && !(ei->flags & F_NOOLD))
1063 ;
1064 else if (cs.type == CS94 || cs.type == CS94MULTI)
1065 *p++ = "()*+"[target];
1066 else
1067 *p++ = ",-./"[target];
1068 if (cs.interm)
1069 *p++ = cs.interm;
1070 *p++ = cs.final;
1071
1072 psenc->g[target].type = cs.type;
1073 psenc->g[target].final = cs.final;
1074 psenc->g[target].interm = cs.interm;
1075
1076 planeok:
1077
1078 /* invoke the plane onto GL or GR. */
1079 if (psenc->gl == target)
1080 goto sideok;
1081 if (bit8 && psenc->gr == target)
1082 goto sideok;
1083
1084 if (target == 0 && (ei->flags & F_LS0)) {
1085 *p++ = '\017';
1086 psenc->gl = 0;
1087 } else if (target == 1 && (ei->flags & F_LS1)) {
1088 *p++ = '\016';
1089 psenc->gl = 1;
1090 } else if (target == 2 && (ei->flags & F_LS2)) {
1091 *p++ = '\033';
1092 *p++ = 'n';
1093 psenc->gl = 2;
1094 } else if (target == 3 && (ei->flags & F_LS3)) {
1095 *p++ = '\033';
1096 *p++ = 'o';
1097 psenc->gl = 3;
1098 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) {
1099 *p++ = '\033';
1100 *p++ = '~';
1101 psenc->gr = 1;
1102 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) {
1103 *p++ = '\033';
1104 /*{*/
1105 *p++ = '}';
1106 psenc->gr = 2;
1107 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) {
1108 *p++ = '\033';
1109 *p++ = '|';
1110 psenc->gr = 3;
1111 } else if (target == 2 && (ei->flags & F_SS2)) {
1112 *p++ = '\033';
1113 *p++ = 'N';
1114 psenc->singlegl = 2;
1115 } else if (target == 3 && (ei->flags & F_SS3)) {
1116 *p++ = '\033';
1117 *p++ = 'O';
1118 psenc->singlegl = 3;
1119 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) {
1120 *p++ = '\216';
1121 *p++ = 'N';
1122 psenc->singlegl = psenc->singlegr = 2;
1123 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) {
1124 *p++ = '\217';
1125 *p++ = 'O';
1126 psenc->singlegl = psenc->singlegr = 3;
1127 } else
1128 abort();
1129
1130 sideok:
1131 if (psenc->singlegl == target)
1132 mask = 0x00;
1133 else if (psenc->singlegr == target)
1134 mask = 0x80;
1135 else if (psenc->gl == target)
1136 mask = 0x00;
1137 else if ((ei->flags & F_8BIT) && psenc->gr == target)
1138 mask = 0x80;
1139 else
1140 abort();
1141
1142 switch (cs.type) {
1143 case CS94:
1144 case CS96:
1145 i = 1;
1146 break;
1147 case CS94MULTI:
1148 case CS96MULTI:
1149 i = isthree(cs.final) ? 3 : 2;
1150 break;
1151 }
1152 while (i-- > 0)
1153 *p++ = ((c >> (i << 3)) & 0x7f) | mask;
1154
1155 /* reset single shift state */
1156 psenc->singlegl = psenc->singlegr = -1;
1157
1158 len = p - tmp;
1159 if (n < len) {
1160 if (result)
1161 *result = (char *)0;
1162 } else {
1163 if (result)
1164 *result = string + len;
1165 memcpy(string, tmp, len);
1166 }
1167 return len;
1168 }
1169
1170 static int
1171 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei,
1172 char * __restrict s, size_t n, wchar_t wc,
1173 _ISO2022State * __restrict psenc,
1174 size_t * __restrict nresult)
1175 {
1176 char buf[MB_LEN_MAX];
1177 char *result;
1178 int len;
1179
1180 _DIAGASSERT(ei != NULL);
1181 _DIAGASSERT(nresult != 0);
1182 _DIAGASSERT(s != NULL);
1183
1184 /* XXX state will be modified after this operation... */
1185 len = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc);
1186 if (sizeof(buf) < len || n < len) {
1187 /* XXX should recover state? */
1188 goto ilseq;
1189 }
1190
1191 memcpy(s, buf, len);
1192 *nresult = (size_t)len;
1193 return (0);
1194
1195 ilseq:
1196 /* bound check failure */
1197 *nresult = (size_t)-1;
1198 return (EILSEQ);
1199 }
1200
1201 /* ----------------------------------------------------------------------
1202 * public interface for ctype
1203 */
1204
1205 _CITRUS_CTYPE_DECLS(ISO2022);
1206 _CITRUS_CTYPE_DEF_OPS(ISO2022);
1207
1208 #include "citrus_ctype_template.h"
1209