citrus_iso2022.c revision 1.4 1 /* $NetBSD: citrus_iso2022.c,v 1.4 2002/03/28 01:59:50 yamt Exp $ */
2
3 /*-
4 * Copyright (c)1999, 2002 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
29 */
30
31 #include <sys/cdefs.h>
32 #if defined(LIBC_SCCS) && !defined(lint)
33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.4 2002/03/28 01:59:50 yamt Exp $");
34 #endif /* LIBC_SCCS and not lint */
35
36 #include <assert.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stddef.h>
42 #include <locale.h>
43 #include <wchar.h>
44 #include <sys/types.h>
45 #include <limits.h>
46 #include "citrus_module.h"
47 #include "citrus_ctype.h"
48 #include "citrus_iso2022.h"
49
50
51 /* ----------------------------------------------------------------------
52 * private stuffs used by templates
53 */
54
55
56 /*
57 * wchar_t mappings:
58 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx
59 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx
60 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx
61 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx
62 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx
63 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx
64 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx
65 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx
66 * 94x94 charset (ESC & V ESC $ ( F)
67 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
68 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
69 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
70 */
71
72 typedef struct {
73 u_char type;
74 #define CS94 (0U)
75 #define CS96 (1U)
76 #define CS94MULTI (2U)
77 #define CS96MULTI (3U)
78
79 u_char final;
80 u_char interm;
81 u_char vers;
82 } _ISO2022Charset;
83
84 typedef struct {
85 _ISO2022Charset g[4];
86 /* need 3 bits to hold -1, 0, ..., 3 */
87 int gl:3,
88 gr:3,
89 singlegl:3,
90 singlegr:3;
91 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */
92 int chlen;
93 } _ISO2022State;
94
95 typedef struct {
96 _ISO2022Charset *recommend[4];
97 size_t recommendsize[4];
98 _ISO2022Charset initg[4];
99 int maxcharset;
100 int flags;
101 #define F_8BIT 0x0001
102 #define F_NOOLD 0x0002
103 #define F_SI 0x0010 /*0F*/
104 #define F_SO 0x0020 /*0E*/
105 #define F_LS0 0x0010 /*0F*/
106 #define F_LS1 0x0020 /*0E*/
107 #define F_LS2 0x0040 /*ESC n*/
108 #define F_LS3 0x0080 /*ESC o*/
109 #define F_LS1R 0x0100 /*ESC ~*/
110 #define F_LS2R 0x0200 /*ESC }*/
111 #define F_LS3R 0x0400 /*ESC |*/
112 #define F_SS2 0x0800 /*ESC N*/
113 #define F_SS3 0x1000 /*ESC O*/
114 #define F_SS2R 0x2000 /*8E*/
115 #define F_SS3R 0x4000 /*8F*/
116 } _ISO2022EncodingInfo;
117 typedef struct {
118 _ISO2022EncodingInfo ei;
119 struct {
120 /* for future multi-locale facility */
121 _ISO2022State s_mblen;
122 _ISO2022State s_mbrlen;
123 _ISO2022State s_mbrtowc;
124 _ISO2022State s_mbtowc;
125 _ISO2022State s_mbsrtowcs;
126 _ISO2022State s_wcrtomb;
127 _ISO2022State s_wcsrtombs;
128 _ISO2022State s_wctomb;
129 } states;
130 } _ISO2022CTypeInfo;
131
132 #define _TO_EI(_cl_) ((_ISO2022EncodingInfo *)(_cl_))
133 #define _TO_CEI(_cl_) ((_ISO2022CTypeInfo *)(_cl_))
134 #define _TO_STATE(_ps_) ((_ISO2022State *)(_ps_))
135 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
136 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
137
138 #define _FUNCNAME(m) _citrus_ISO2022_##m
139 #define _ENCODING_INFO _ISO2022EncodingInfo
140 #define _CTYPE_INFO _ISO2022CTypeInfo
141 #define _ENCODING_STATE _ISO2022State
142 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
143 #define _ENCODING_IS_STATE_DEPENDENT 1
144
145
146 #define _ISO2022INVALID (wchar_t)-1
147
148 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); }
149 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); }
150 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); }
151 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); }
152 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); }
153 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); }
154 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); }
155 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); }
156
157 static __inline int
158 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs)
159 {
160
161 _DIAGASSERT(p != NULL);
162 _DIAGASSERT(cs != NULL);
163
164 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) {
165 cs->final = (u_char)(p[3] & 0xff);
166 cs->interm = '\0';
167 cs->vers = '\0';
168 cs->type = CS94MULTI;
169 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) {
170 cs->final = (u_char)(p[3] & 0xff);
171 cs->interm = '\0';
172 cs->vers = '\0';
173 cs->type = CS96MULTI;
174 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) {
175 cs->final = (u_char)(p[2] & 0xff);
176 cs->interm = '\0';
177 cs->vers = '\0';
178 cs->type = CS94;
179 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) {
180 cs->final = (u_char )(p[2] & 0xff);
181 cs->interm = '\0';
182 cs->vers = '\0';
183 cs->type = CS96;
184 } else {
185 return 1;
186 }
187
188 return 0;
189 }
190
191
192 #define _NOTMATCH 0
193 #define _MATCH 1
194 #define _PARSEFAIL 2
195
196 static __inline int
197 get_recommend(_ISO2022EncodingInfo * __restrict ei,
198 const char * __restrict token)
199 {
200 int i;
201 _ISO2022Charset cs;
202
203 if (!strchr("0123", token[0]) || token[1] != '=')
204 return (_NOTMATCH);
205
206 if (getcs(&token[2], &cs) == 0)
207 ;
208 else if (!strcmp(&token[2], "94")) {
209 cs.final = (u_char)(token[4]);
210 cs.interm = '\0';
211 cs.vers = '\0';
212 cs.type = CS94;
213 } else if (!strcmp(&token[2], "96")) {
214 cs.final = (u_char)(token[4]);
215 cs.interm = '\0';
216 cs.vers = '\0';
217 cs.type = CS96;
218 } else if (!strcmp(&token[2], "94$")) {
219 cs.final = (u_char)(token[5]);
220 cs.interm = '\0';
221 cs.vers = '\0';
222 cs.type = CS94MULTI;
223 } else if (!strcmp(&token[2], "96$")) {
224 cs.final = (u_char)(token[5]);
225 cs.interm = '\0';
226 cs.vers = '\0';
227 cs.type = CS96MULTI;
228 } else {
229 return (_PARSEFAIL);
230 }
231
232 i = token[0] - '0';
233 ei->recommendsize[i] += 1;
234 if (!ei->recommend[i]) {
235 ei->recommend[i] = malloc(sizeof(_ISO2022Charset));
236 } else {
237 ei->recommend[i] =
238 realloc(ei->recommend[i],
239 sizeof(_ISO2022Charset)* (ei->recommendsize[i]));
240 }
241 if (!ei->recommend[i])
242 return (_PARSEFAIL);
243
244 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final;
245 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm;
246 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers;
247 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type;
248
249 return (_MATCH);
250 }
251
252 static __inline int
253 get_initg(_ISO2022EncodingInfo * __restrict ei,
254 const char * __restrict token)
255 {
256 _ISO2022Charset cs;
257
258 if (strncmp("INIT", &token[0], 4) ||
259 !strchr("0123", token[4]) ||
260 token[5] != '=')
261 return (_NOTMATCH);
262
263 if (getcs(&token[6], &cs) != 0)
264 return (_PARSEFAIL);
265
266 ei->initg[token[4] - '0'].type = cs.type;
267 ei->initg[token[4] - '0'].final = cs.final;
268 ei->initg[token[4] - '0'].interm = cs.interm;
269 ei->initg[token[4] - '0'].vers = cs.vers;
270
271 return (_MATCH);
272 }
273
274 static __inline int
275 get_max(_ISO2022EncodingInfo * __restrict ei,
276 const char * __restrict token)
277 {
278 if (!strcmp(token, "MAX1")) {
279 ei->maxcharset = 1;
280 } else if (!strcmp(token, "MAX2")) {
281 ei->maxcharset = 2;
282 } else if (!strcmp(token, "MAX3")) {
283 ei->maxcharset = 3;
284 } else
285 return (_NOTMATCH);
286
287 return (_MATCH);
288 }
289
290
291 static __inline int
292 get_flags(_ISO2022EncodingInfo * __restrict ei,
293 const char * __restrict token)
294 {
295 int i;
296 static struct {
297 const char *tag;
298 int flag;
299 } const tags[] = {
300 { "DUMMY", 0 },
301 { "8BIT", F_8BIT },
302 { "NOOLD", F_NOOLD },
303 { "SI", F_SI },
304 { "SO", F_SO },
305 { "LS0", F_LS0 },
306 { "LS1", F_LS1 },
307 { "LS2", F_LS2 },
308 { "LS3", F_LS3 },
309 { "LS1R", F_LS1R },
310 { "LS2R", F_LS2R },
311 { "LS3R", F_LS3R },
312 { "SS2", F_SS2 },
313 { "SS3", F_SS3 },
314 { "SS2R", F_SS2R },
315 { "SS3R", F_SS3R },
316 { NULL, 0 }
317 };
318
319 for (i = 0; tags[i].tag; i++) {
320 if (!strcmp(token, tags[i].tag)) {
321 ei->flags |= tags[i].flag;
322 return (_MATCH);
323 }
324 }
325
326 return (_NOTMATCH);
327 }
328
329
330 static __inline int
331 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei,
332 const void * __restrict var, size_t lenvar)
333 {
334 char const *v, *e;
335 char buf[20];
336 int i, len, ret;
337
338 _DIAGASSERT(ei != NULL);
339
340
341 /*
342 * parse VARIABLE section.
343 */
344
345 if (!var)
346 return (EFTYPE);
347
348 v = (const char *) var;
349
350 /* initialize structure */
351 ei->maxcharset = 0;
352 for (i = 0; i < 4; i++) {
353 ei->recommend[i] = NULL;
354 ei->recommendsize[i] = 0;
355 }
356 ei->flags = 0;
357
358 while (*v) {
359 while (*v == ' ' || *v == '\t')
360 ++v;
361
362 /* find the token */
363 e = v;
364 while (*e && *e != ' ' && *e != '\t')
365 ++e;
366 if (*e) {
367 len = e-v;
368 if (len>=sizeof(buf))
369 goto parsefail;
370 sprintf(buf, "%.*s", len, v);
371 ++e;
372 }
373
374 if ((ret = get_recommend(ei, buf)) != _NOTMATCH)
375 ;
376 else if ((ret = get_initg(ei, buf)) != _NOTMATCH)
377 ;
378 else if ((ret = get_max(ei, buf)) != _NOTMATCH)
379 ;
380 else if ((ret = get_flags(ei, buf)) != _NOTMATCH)
381 ;
382 else
383 ret = _PARSEFAIL;
384 if (ret==_PARSEFAIL)
385 goto parsefail;
386 v = e;
387
388 }
389
390 return (0);
391
392 parsefail:
393 free(ei->recommend[0]);
394 free(ei->recommend[1]);
395 free(ei->recommend[2]);
396 free(ei->recommend[3]);
397
398 return (EFTYPE);
399 }
400
401 static __inline void
402 /*ARGSUSED*/
403 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei,
404 _ISO2022State * __restrict s)
405 {
406 int i;
407
408 memset(s, 0, sizeof(*s));
409 s->gl = 0;
410 s->gr = (ei->flags & F_8BIT) ? 1 : -1;
411
412 for (i = 0; i < 4; i++) {
413 if (ei->initg[i].final) {
414 s->g[i].type = ei->initg[i].type;
415 s->g[i].final = ei->initg[i].final;
416 s->g[i].interm = ei->initg[i].interm;
417 }
418 }
419 s->singlegl = s->singlegr = -1;
420 }
421
422 static __inline void
423 /*ARGSUSED*/
424 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei,
425 void * __restrict pspriv,
426 const _ISO2022State * __restrict s)
427 {
428 memcpy(pspriv, (const void *)s, sizeof(*s));
429 }
430
431 static __inline void
432 /*ARGSUSED*/
433 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei,
434 _ISO2022State * __restrict s,
435 const void * __restrict pspriv)
436 {
437 memcpy((void *)s, pspriv, sizeof(*s));
438 }
439
440 static int
441 /*ARGSUSED*/
442 _citrus_ISO2022_stdencoding_init(_ISO2022EncodingInfo * __restrict ei,
443 const void * __restrict var, size_t lenvar)
444 {
445
446 _DIAGASSERT(ei != NULL);
447
448 return _citrus_ISO2022_parse_variable(ei, var, lenvar);
449 }
450
451 static void
452 /*ARGSUSED*/
453 _citrus_ISO2022_stdencoding_uninit(_ISO2022EncodingInfo *ei)
454 {
455 }
456
457 #define ESC '\033'
458 #define ECMA -1
459 #define INTERM -2
460 #define OECMA -3
461 static struct seqtable {
462 int type;
463 int csoff;
464 int finaloff;
465 int intermoff;
466 int versoff;
467 int len;
468 int chars[10];
469 } seqtable[] = {
470 /* G0 94MULTI special */
471 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, },
472 /* G0 94MULTI special with version identification */
473 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, },
474 /* G? 94 */
475 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, },
476 /* G? 94 with 2nd intermediate char */
477 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, },
478 /* G? 96 */
479 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, },
480 /* G? 96 with 2nd intermediate char */
481 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, },
482 /* G? 94MULTI */
483 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, },
484 /* G? 96MULTI */
485 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, },
486 /* G? 94MULTI with version specification */
487 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, },
488 /* LS2/3 */
489 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, },
490 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, },
491 /* LS1/2/3R */
492 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, },
493 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, },
494 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, },
495 /* SS2/3 */
496 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, },
497 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, },
498 /* end of records */
499 { 0, }
500 };
501
502 static int
503 seqmatch(const char * __restrict s, size_t n,
504 const struct seqtable * __restrict sp)
505 {
506 const int *p;
507
508 _DIAGASSERT(s != NULL);
509 _DIAGASSERT(sp != NULL);
510
511 p = sp->chars;
512 while (p - sp->chars < n && p - sp->chars < sp->len) {
513 switch (*p) {
514 case ECMA:
515 if (!isecma(*s))
516 goto terminate;
517 break;
518 case OECMA:
519 if (*s && strchr("@AB", *s))
520 break;
521 else
522 goto terminate;
523 case INTERM:
524 if (!isinterm(*s))
525 goto terminate;
526 break;
527 case CS94:
528 if (*s && strchr("()*+", *s))
529 break;
530 else
531 goto terminate;
532 case CS96:
533 if (*s && strchr(",-./", *s))
534 break;
535 else
536 goto terminate;
537 default:
538 if (*s != *p)
539 goto terminate;
540 break;
541 }
542
543 p++;
544 s++;
545 }
546
547 terminate:
548 return p - sp->chars;
549 }
550
551 static wchar_t
552 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei,
553 const char * __restrict string, size_t n,
554 const char ** __restrict result,
555 _ISO2022State * __restrict psenc)
556 {
557 wchar_t wchar = 0;
558 int cur;
559 struct seqtable *sp;
560 int nmatch;
561 int i;
562
563 _DIAGASSERT(ei != NULL);
564 _DIAGASSERT(state != NULL);
565 _DIAGASSERT(string != NULL);
566 /* result may be NULL */
567
568 while (1) {
569 /* SI/SO */
570 if (1 <= n && string[0] == '\017') {
571 psenc->gl = 0;
572 string++;
573 n--;
574 continue;
575 }
576 if (1 <= n && string[0] == '\016') {
577 psenc->gl = 1;
578 string++;
579 n--;
580 continue;
581 }
582
583 /* SS2/3R */
584 if (1 <= n && string[0] && strchr("\217\216", string[0])) {
585 psenc->singlegl = psenc->singlegr =
586 (string[0] - '\216') + 2;
587 string++;
588 n--;
589 continue;
590 }
591
592 /* eat the letter if this is not ESC */
593 if (1 <= n && string[0] != '\033')
594 break;
595
596 /* look for a perfect match from escape sequences */
597 for (sp = &seqtable[0]; sp->len; sp++) {
598 nmatch = seqmatch(string, n, sp);
599 if (sp->len == nmatch && n >= sp->len)
600 break;
601 }
602
603 if (!sp->len)
604 goto notseq;
605
606 if (sp->type != -1) {
607 if (sp->csoff == -1)
608 i = 0;
609 else {
610 switch (sp->type) {
611 case CS94:
612 case CS94MULTI:
613 i = string[sp->csoff] - '(';
614 break;
615 case CS96:
616 case CS96MULTI:
617 i = string[sp->csoff] - ',';
618 break;
619 }
620 }
621 psenc->g[i].type = sp->type;
622 psenc->g[i].final = '\0';
623 psenc->g[i].interm = '\0';
624 psenc->g[i].vers = '\0';
625 /* sp->finaloff must not be -1 */
626 if (sp->finaloff != -1)
627 psenc->g[i].final = string[sp->finaloff];
628 if (sp->intermoff != -1)
629 psenc->g[i].interm = string[sp->intermoff];
630 if (sp->versoff != -1)
631 psenc->g[i].vers = string[sp->versoff];
632
633 string += sp->len;
634 n -= sp->len;
635 continue;
636 }
637
638 /* LS2/3 */
639 if (2 <= n && string[0] == '\033'
640 && string[1] && strchr("no", string[1])) {
641 psenc->gl = string[1] - 'n' + 2;
642 string += 2;
643 n -= 2;
644 continue;
645 }
646
647 /* LS1/2/3R */
648 /* XXX: { for vi showmatch */
649 if (2 <= n && string[0] == '\033'
650 && string[1] && strchr("~}|", string[1])) {
651 psenc->gr = 3 - (string[1] - '|');
652 string += 2;
653 n -= 2;
654 continue;
655 }
656
657 /* SS2/3 */
658 if (2 <= n && string[0] == '\033'
659 && string[1] && strchr("NO", string[1])) {
660 psenc->singlegl = (string[1] - 'N') + 2;
661 string += 2;
662 n -= 2;
663 continue;
664 }
665
666 notseq:
667 /*
668 * if we've got an unknown escape sequence, eat the ESC at the
669 * head. otherwise, wait till full escape sequence comes.
670 */
671 for (sp = &seqtable[0]; sp->len; sp++) {
672 nmatch = seqmatch(string, n, sp);
673 if (!nmatch)
674 continue;
675
676 /*
677 * if we are in the middle of escape sequence,
678 * we still need to wait for more characters to come
679 */
680 if (n < sp->len) {
681 if (nmatch == n) {
682 if (result)
683 *result = string;
684 return (_ISO2022INVALID);
685 }
686 } else {
687 if (nmatch == sp->len) {
688 /* this case should not happen */
689 goto eat;
690 }
691 }
692 }
693
694 break;
695 }
696
697 eat:
698 /* no letter to eat */
699 if (n < 1) {
700 if (result)
701 *result = string;
702 return (_ISO2022INVALID);
703 }
704
705 /* normal chars. always eat C0/C1 as is. */
706 if (iscntl(*string & 0xff))
707 cur = -1;
708 else if (*string & 0x80) {
709 cur = (psenc->singlegr == -1)
710 ? psenc->gr : psenc->singlegr;
711 } else {
712 cur = (psenc->singlegl == -1)
713 ? psenc->gl : psenc->singlegl;
714 }
715
716 if (cur == -1) {
717 asis:
718 wchar = *string++ & 0xff;
719 if (result)
720 *result = string;
721 /* reset single shift state */
722 psenc->singlegr = psenc->singlegl = -1;
723 return wchar;
724 }
725
726 /* length error check */
727 switch (psenc->g[cur].type) {
728 case CS94MULTI:
729 case CS96MULTI:
730 if (!isthree(psenc->g[cur].final)) {
731 if (2 <= n
732 && (string[0] & 0x80) == (string[1] & 0x80))
733 break;
734 } else {
735 if (3 <= n
736 && (string[0] & 0x80) == (string[1] & 0x80)
737 && (string[0] & 0x80) == (string[2] & 0x80))
738 break;
739 }
740
741 /* we still need to wait for more characters to come */
742 if (result)
743 *result = string;
744 return (_ISO2022INVALID);
745
746 case CS94:
747 case CS96:
748 if (1 <= n)
749 break;
750
751 /* we still need to wait for more characters to come */
752 if (result)
753 *result = string;
754 return (_ISO2022INVALID);
755 }
756
757 /* range check */
758 switch (psenc->g[cur].type) {
759 case CS94:
760 if (!(is94(string[0] & 0x7f)))
761 goto asis;
762 case CS96:
763 if (!(is96(string[0] & 0x7f)))
764 goto asis;
765 break;
766 case CS94MULTI:
767 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f)))
768 goto asis;
769 break;
770 case CS96MULTI:
771 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f)))
772 goto asis;
773 break;
774 }
775
776 /* extract the character. */
777 switch (psenc->g[cur].type) {
778 case CS94:
779 /* special case for ASCII. */
780 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) {
781 wchar = *string++;
782 wchar &= 0x7f;
783 break;
784 }
785 wchar = psenc->g[cur].final;
786 wchar = (wchar << 8);
787 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
788 wchar = (wchar << 8);
789 wchar = (wchar << 8) | (*string++ & 0x7f);
790 break;
791 case CS96:
792 /* special case for ISO-8859-1. */
793 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) {
794 wchar = *string++;
795 wchar &= 0x7f;
796 wchar |= 0x80;
797 break;
798 }
799 wchar = psenc->g[cur].final;
800 wchar = (wchar << 8);
801 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
802 wchar = (wchar << 8);
803 wchar = (wchar << 8) | (*string++ & 0x7f);
804 wchar |= 0x80;
805 break;
806 case CS94MULTI:
807 case CS96MULTI:
808 wchar = psenc->g[cur].final;
809 wchar = (wchar << 8);
810 if (isthree(psenc->g[cur].final))
811 wchar |= (*string++ & 0x7f);
812 wchar = (wchar << 8) | (*string++ & 0x7f);
813 wchar = (wchar << 8) | (*string++ & 0x7f);
814 if (psenc->g[cur].type == CS96MULTI)
815 wchar |= 0x80;
816 break;
817 }
818
819 if (result)
820 *result = string;
821 /* reset single shift state */
822 psenc->singlegr = psenc->singlegl = -1;
823 return wchar;
824 }
825
826
827
828 static int
829 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei,
830 wchar_t * __restrict pwc,
831 const char ** __restrict s,
832 size_t n, _ISO2022State * __restrict psenc,
833 size_t * __restrict nresult)
834 {
835 wchar_t wchar;
836 const char *s0, *p, *result;
837 int c;
838 int chlenbak;
839
840 _DIAGASSERT(nresult != 0);
841 _DIAGASSERT(ei != NULL);
842 _DIAGASSERT(psenc != NULL);
843 _DIAGASSERT(s != NULL);
844
845 s0 = *s;
846 c = 0;
847 chlenbak = psenc->chlen;
848
849 /*
850 * if we have something in buffer, use that.
851 * otherwise, skip here
852 */
853 if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) {
854 /* illgeal state */
855 _citrus_ISO2022_init_state(ei, psenc);
856 goto encoding_error;
857 }
858 if (psenc->chlen == 0)
859 goto emptybuf;
860
861 /* buffer is not empty */
862 p = psenc->ch;
863 while (psenc->chlen < sizeof(psenc->ch) && n >= 0) {
864 if (n > 0) {
865 psenc->ch[psenc->chlen++] = *s0++;
866 n--;
867 }
868
869 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch),
870 &result, psenc);
871 if (wchar != _ISO2022INVALID) {
872 c += result - p;
873 if (psenc->chlen > c)
874 memmove(psenc->ch, result, psenc->chlen - c);
875 if (psenc->chlen < c)
876 psenc->chlen = 0;
877 else
878 psenc->chlen -= c;
879 goto output;
880 }
881
882 c += result - p;
883 p = result;
884
885 if (n == 0)
886 goto restart;
887 }
888
889 /* escape sequence too long? */
890 goto encoding_error;
891
892 emptybuf:
893 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc);
894 if (wchar != _ISO2022INVALID) {
895 c += result - s0;
896 psenc->chlen = 0;
897 s0 = result;
898 goto output;
899 }
900 if (result > s0 && n > result - s0) {
901 c += (result - s0);
902 n -= (result - s0);
903 s0 = result;
904 goto emptybuf;
905 }
906 n += c;
907 if (n < sizeof(psenc->ch)) {
908 memcpy(psenc->ch, s0 - c, n);
909 psenc->chlen = n;
910 s0 = result;
911 goto restart;
912 }
913
914 /* escape sequence too long? */
915
916 encoding_error:
917 psenc->chlen = 0;
918 *nresult = (size_t)-1;
919 return (EILSEQ);
920
921 output:
922 *s = s0;
923 if (pwc)
924 *pwc = wchar;
925
926 if (!wchar)
927 *nresult = 0;
928 else
929 *nresult = c - chlenbak;
930
931 return (0);
932
933 restart:
934 *s = s0;
935 *nresult = (size_t)-2;
936
937 return (0);
938 }
939
940 static int
941 recommendation(_ISO2022EncodingInfo * __restrict ei,
942 _ISO2022Charset * __restrict cs)
943 {
944 int i, j;
945 _ISO2022Charset *recommend;
946
947 _DIAGASSERT(ei != NULL);
948 _DIAGASSERT(cs != NULL);
949
950 /* first, try a exact match. */
951 for (i = 0; i < 4; i++) {
952 recommend = ei->recommend[i];
953 for (j = 0; j < ei->recommendsize[i]; j++) {
954 if (cs->type != recommend[j].type)
955 continue;
956 if (cs->final != recommend[j].final)
957 continue;
958 if (cs->interm != recommend[j].interm)
959 continue;
960
961 return i;
962 }
963 }
964
965 /* then, try a wildcard match over final char. */
966 for (i = 0; i < 4; i++) {
967 recommend = ei->recommend[i];
968 for (j = 0; j < ei->recommendsize[i]; j++) {
969 if (cs->type != recommend[j].type)
970 continue;
971 if (cs->final && (cs->final != recommend[j].final))
972 continue;
973 if (cs->interm && (cs->interm != recommend[j].interm))
974 continue;
975
976 return i;
977 }
978 }
979
980 /* there's no recommendation. make a guess. */
981 if (ei->maxcharset == 0) {
982 return 0;
983 } else {
984 switch (cs->type) {
985 case CS94:
986 case CS94MULTI:
987 return 0;
988 case CS96:
989 case CS96MULTI:
990 return 1;
991 }
992 }
993 return 0;
994 }
995
996 static int
997 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t c,
998 char * __restrict string, size_t n,
999 char ** __restrict result,
1000 _ISO2022State * __restrict psenc)
1001 {
1002 int i = 0, len;
1003 _ISO2022Charset cs;
1004 char *p;
1005 char tmp[MB_LEN_MAX];
1006 int target;
1007 u_char mask;
1008 int bit8;
1009
1010 _DIAGASSERT(ei != NULL);
1011 _DIAGASSERT(string != NULL);
1012 /* result may be NULL */
1013 /* state appears to be unused */
1014
1015 if (iscntl(c & 0xff)) {
1016 /* go back to ASCII on control chars */
1017 cs.type = CS94;
1018 cs.final = 'B';
1019 cs.interm = '\0';
1020 } else if (!(c & ~0xff)) {
1021 if (c & 0x80) {
1022 /* special treatment for ISO-8859-1 */
1023 cs.type = CS96;
1024 cs.final = 'A';
1025 cs.interm = '\0';
1026 } else {
1027 /* special treatment for ASCII */
1028 cs.type = CS94;
1029 cs.final = 'B';
1030 cs.interm = '\0';
1031 }
1032 } else {
1033 cs.final = (c >> 24) & 0x7f;
1034 if ((c >> 16) & 0x80)
1035 cs.interm = (c >> 16) & 0x7f;
1036 else
1037 cs.interm = '\0';
1038 if (c & 0x80)
1039 cs.type = (c & 0x00007f00) ? CS96MULTI : CS96;
1040 else
1041 cs.type = (c & 0x00007f00) ? CS94MULTI : CS94;
1042 }
1043 target = recommendation(ei, &cs);
1044 p = tmp;
1045 bit8 = ei->flags & F_8BIT;
1046
1047 /* designate the charset onto the target plane(G0/1/2/3). */
1048 if (psenc->g[target].type == cs.type
1049 && psenc->g[target].final == cs.final
1050 && psenc->g[target].interm == cs.interm)
1051 goto planeok;
1052
1053 *p++ = '\033';
1054 if (cs.type == CS94MULTI || cs.type == CS96MULTI)
1055 *p++ = '$';
1056 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final)
1057 && !cs.interm && !(ei->flags & F_NOOLD))
1058 ;
1059 else if (cs.type == CS94 || cs.type == CS94MULTI)
1060 *p++ = "()*+"[target];
1061 else
1062 *p++ = ",-./"[target];
1063 if (cs.interm)
1064 *p++ = cs.interm;
1065 *p++ = cs.final;
1066
1067 psenc->g[target].type = cs.type;
1068 psenc->g[target].final = cs.final;
1069 psenc->g[target].interm = cs.interm;
1070
1071 planeok:
1072
1073 /* invoke the plane onto GL or GR. */
1074 if (psenc->gl == target)
1075 goto sideok;
1076 if (bit8 && psenc->gr == target)
1077 goto sideok;
1078
1079 if (target == 0 && (ei->flags & F_LS0)) {
1080 *p++ = '\017';
1081 psenc->gl = 0;
1082 } else if (target == 1 && (ei->flags & F_LS1)) {
1083 *p++ = '\016';
1084 psenc->gl = 1;
1085 } else if (target == 2 && (ei->flags & F_LS2)) {
1086 *p++ = '\033';
1087 *p++ = 'n';
1088 psenc->gl = 2;
1089 } else if (target == 3 && (ei->flags & F_LS3)) {
1090 *p++ = '\033';
1091 *p++ = 'o';
1092 psenc->gl = 3;
1093 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) {
1094 *p++ = '\033';
1095 *p++ = '~';
1096 psenc->gr = 1;
1097 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) {
1098 *p++ = '\033';
1099 /*{*/
1100 *p++ = '}';
1101 psenc->gr = 2;
1102 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) {
1103 *p++ = '\033';
1104 *p++ = '|';
1105 psenc->gr = 3;
1106 } else if (target == 2 && (ei->flags & F_SS2)) {
1107 *p++ = '\033';
1108 *p++ = 'N';
1109 psenc->singlegl = 2;
1110 } else if (target == 3 && (ei->flags & F_SS3)) {
1111 *p++ = '\033';
1112 *p++ = 'O';
1113 psenc->singlegl = 3;
1114 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) {
1115 *p++ = '\216';
1116 *p++ = 'N';
1117 psenc->singlegl = psenc->singlegr = 2;
1118 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) {
1119 *p++ = '\217';
1120 *p++ = 'O';
1121 psenc->singlegl = psenc->singlegr = 3;
1122 } else
1123 abort();
1124
1125 sideok:
1126 if (psenc->singlegl == target)
1127 mask = 0x00;
1128 else if (psenc->singlegr == target)
1129 mask = 0x80;
1130 else if (psenc->gl == target)
1131 mask = 0x00;
1132 else if ((ei->flags & F_8BIT) && psenc->gr == target)
1133 mask = 0x80;
1134 else
1135 abort();
1136
1137 switch (cs.type) {
1138 case CS94:
1139 case CS96:
1140 i = 1;
1141 break;
1142 case CS94MULTI:
1143 case CS96MULTI:
1144 i = isthree(cs.final) ? 3 : 2;
1145 break;
1146 }
1147 while (i-- > 0)
1148 *p++ = ((c >> (i << 3)) & 0x7f) | mask;
1149
1150 /* reset single shift state */
1151 psenc->singlegl = psenc->singlegr = -1;
1152
1153 len = p - tmp;
1154 if (n < len) {
1155 if (result)
1156 *result = (char *)0;
1157 } else {
1158 if (result)
1159 *result = string + len;
1160 memcpy(string, tmp, len);
1161 }
1162 return len;
1163 }
1164
1165 static int
1166 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei,
1167 char * __restrict s, size_t n, wchar_t wc,
1168 _ISO2022State * __restrict psenc,
1169 size_t * __restrict nresult)
1170 {
1171 char buf[MB_LEN_MAX];
1172 char *result;
1173 int len;
1174
1175 _DIAGASSERT(ei != NULL);
1176 _DIAGASSERT(nresult != 0);
1177 _DIAGASSERT(s != NULL);
1178
1179 /* XXX state will be modified after this operation... */
1180 len = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc);
1181 if (sizeof(buf) < len || n < len) {
1182 /* XXX should recover state? */
1183 goto ilseq;
1184 }
1185
1186 memcpy(s, buf, len);
1187 *nresult = (size_t)len;
1188 return (0);
1189
1190 ilseq:
1191 /* bound check failure */
1192 *nresult = (size_t)-1;
1193 return (EILSEQ);
1194 }
1195
1196 /* ----------------------------------------------------------------------
1197 * public interface for ctype
1198 */
1199
1200 _CITRUS_CTYPE_DECLS(ISO2022);
1201 _CITRUS_CTYPE_DEF_OPS(ISO2022);
1202
1203 #include "citrus_ctype_template.h"
1204