citrus_iso2022.c revision 1.7 1 /* $NetBSD: citrus_iso2022.c,v 1.7 2003/06/25 09:51:44 tshiozak Exp $ */
2
3 /*-
4 * Copyright (c)1999, 2002 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
29 */
30
31 #include <sys/cdefs.h>
32 #if defined(LIBC_SCCS) && !defined(lint)
33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.7 2003/06/25 09:51:44 tshiozak Exp $");
34 #endif /* LIBC_SCCS and not lint */
35
36 #include <assert.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stddef.h>
42 #include <locale.h>
43 #include <wchar.h>
44 #include <sys/types.h>
45 #include <limits.h>
46
47 #include "citrus_namespace.h"
48 #include "citrus_types.h"
49 #include "citrus_module.h"
50 #include "citrus_ctype.h"
51 #include "citrus_stdenc.h"
52 #include "citrus_iso2022.h"
53
54
55 /* ----------------------------------------------------------------------
56 * private stuffs used by templates
57 */
58
59
60 /*
61 * wchar_t mappings:
62 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx
63 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx
64 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx
65 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx
66 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx
67 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx
68 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx
69 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx
70 * 94x94 charset (ESC & V ESC $ ( F)
71 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
72 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
73 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
74 */
75
76 typedef struct {
77 u_char type;
78 #define CS94 (0U)
79 #define CS96 (1U)
80 #define CS94MULTI (2U)
81 #define CS96MULTI (3U)
82
83 u_char final;
84 u_char interm;
85 u_char vers;
86 } _ISO2022Charset;
87
88 typedef struct {
89 _ISO2022Charset g[4];
90 /* need 3 bits to hold -1, 0, ..., 3 */
91 int gl:3,
92 gr:3,
93 singlegl:3,
94 singlegr:3;
95 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */
96 int chlen;
97 int flags;
98 #define _ISO2022STATE_FLAG_INITIALIZED 1
99 } _ISO2022State;
100
101 typedef struct {
102 _ISO2022Charset *recommend[4];
103 size_t recommendsize[4];
104 _ISO2022Charset initg[4];
105 int maxcharset;
106 int flags;
107 #define F_8BIT 0x0001
108 #define F_NOOLD 0x0002
109 #define F_SI 0x0010 /*0F*/
110 #define F_SO 0x0020 /*0E*/
111 #define F_LS0 0x0010 /*0F*/
112 #define F_LS1 0x0020 /*0E*/
113 #define F_LS2 0x0040 /*ESC n*/
114 #define F_LS3 0x0080 /*ESC o*/
115 #define F_LS1R 0x0100 /*ESC ~*/
116 #define F_LS2R 0x0200 /*ESC }*/
117 #define F_LS3R 0x0400 /*ESC |*/
118 #define F_SS2 0x0800 /*ESC N*/
119 #define F_SS3 0x1000 /*ESC O*/
120 #define F_SS2R 0x2000 /*8E*/
121 #define F_SS3R 0x4000 /*8F*/
122 } _ISO2022EncodingInfo;
123 typedef struct {
124 _ISO2022EncodingInfo ei;
125 struct {
126 /* for future multi-locale facility */
127 _ISO2022State s_mblen;
128 _ISO2022State s_mbrlen;
129 _ISO2022State s_mbrtowc;
130 _ISO2022State s_mbtowc;
131 _ISO2022State s_mbsrtowcs;
132 _ISO2022State s_wcrtomb;
133 _ISO2022State s_wcsrtombs;
134 _ISO2022State s_wctomb;
135 } states;
136 } _ISO2022CTypeInfo;
137
138 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
139 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
140
141 #define _FUNCNAME(m) _citrus_ISO2022_##m
142 #define _ENCODING_INFO _ISO2022EncodingInfo
143 #define _CTYPE_INFO _ISO2022CTypeInfo
144 #define _ENCODING_STATE _ISO2022State
145 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
146 #define _ENCODING_IS_STATE_DEPENDENT 1
147 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \
148 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
149
150
151 #define _ISO2022INVALID (wchar_t)-1
152
153 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); }
154 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); }
155 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); }
156 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); }
157 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); }
158 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); }
159 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); }
160 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); }
161
162 static __inline int
163 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs)
164 {
165
166 _DIAGASSERT(p != NULL);
167 _DIAGASSERT(cs != NULL);
168
169 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) {
170 cs->final = (u_char)(p[3] & 0xff);
171 cs->interm = '\0';
172 cs->vers = '\0';
173 cs->type = CS94MULTI;
174 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) {
175 cs->final = (u_char)(p[3] & 0xff);
176 cs->interm = '\0';
177 cs->vers = '\0';
178 cs->type = CS96MULTI;
179 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) {
180 cs->final = (u_char)(p[2] & 0xff);
181 cs->interm = '\0';
182 cs->vers = '\0';
183 cs->type = CS94;
184 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) {
185 cs->final = (u_char )(p[2] & 0xff);
186 cs->interm = '\0';
187 cs->vers = '\0';
188 cs->type = CS96;
189 } else {
190 return 1;
191 }
192
193 return 0;
194 }
195
196
197 #define _NOTMATCH 0
198 #define _MATCH 1
199 #define _PARSEFAIL 2
200
201 static __inline int
202 get_recommend(_ISO2022EncodingInfo * __restrict ei,
203 const char * __restrict token)
204 {
205 int i;
206 _ISO2022Charset cs;
207
208 if (!strchr("0123", token[0]) || token[1] != '=')
209 return (_NOTMATCH);
210
211 if (getcs(&token[2], &cs) == 0)
212 ;
213 else if (!strcmp(&token[2], "94")) {
214 cs.final = (u_char)(token[4]);
215 cs.interm = '\0';
216 cs.vers = '\0';
217 cs.type = CS94;
218 } else if (!strcmp(&token[2], "96")) {
219 cs.final = (u_char)(token[4]);
220 cs.interm = '\0';
221 cs.vers = '\0';
222 cs.type = CS96;
223 } else if (!strcmp(&token[2], "94$")) {
224 cs.final = (u_char)(token[5]);
225 cs.interm = '\0';
226 cs.vers = '\0';
227 cs.type = CS94MULTI;
228 } else if (!strcmp(&token[2], "96$")) {
229 cs.final = (u_char)(token[5]);
230 cs.interm = '\0';
231 cs.vers = '\0';
232 cs.type = CS96MULTI;
233 } else {
234 return (_PARSEFAIL);
235 }
236
237 i = token[0] - '0';
238 ei->recommendsize[i] += 1;
239 if (!ei->recommend[i]) {
240 ei->recommend[i] = malloc(sizeof(_ISO2022Charset));
241 } else {
242 ei->recommend[i] =
243 realloc(ei->recommend[i],
244 sizeof(_ISO2022Charset)* (ei->recommendsize[i]));
245 }
246 if (!ei->recommend[i])
247 return (_PARSEFAIL);
248
249 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final;
250 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm;
251 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers;
252 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type;
253
254 return (_MATCH);
255 }
256
257 static __inline int
258 get_initg(_ISO2022EncodingInfo * __restrict ei,
259 const char * __restrict token)
260 {
261 _ISO2022Charset cs;
262
263 if (strncmp("INIT", &token[0], 4) ||
264 !strchr("0123", token[4]) ||
265 token[5] != '=')
266 return (_NOTMATCH);
267
268 if (getcs(&token[6], &cs) != 0)
269 return (_PARSEFAIL);
270
271 ei->initg[token[4] - '0'].type = cs.type;
272 ei->initg[token[4] - '0'].final = cs.final;
273 ei->initg[token[4] - '0'].interm = cs.interm;
274 ei->initg[token[4] - '0'].vers = cs.vers;
275
276 return (_MATCH);
277 }
278
279 static __inline int
280 get_max(_ISO2022EncodingInfo * __restrict ei,
281 const char * __restrict token)
282 {
283 if (!strcmp(token, "MAX1")) {
284 ei->maxcharset = 1;
285 } else if (!strcmp(token, "MAX2")) {
286 ei->maxcharset = 2;
287 } else if (!strcmp(token, "MAX3")) {
288 ei->maxcharset = 3;
289 } else
290 return (_NOTMATCH);
291
292 return (_MATCH);
293 }
294
295
296 static __inline int
297 get_flags(_ISO2022EncodingInfo * __restrict ei,
298 const char * __restrict token)
299 {
300 int i;
301 static struct {
302 const char *tag;
303 int flag;
304 } const tags[] = {
305 { "DUMMY", 0 },
306 { "8BIT", F_8BIT },
307 { "NOOLD", F_NOOLD },
308 { "SI", F_SI },
309 { "SO", F_SO },
310 { "LS0", F_LS0 },
311 { "LS1", F_LS1 },
312 { "LS2", F_LS2 },
313 { "LS3", F_LS3 },
314 { "LS1R", F_LS1R },
315 { "LS2R", F_LS2R },
316 { "LS3R", F_LS3R },
317 { "SS2", F_SS2 },
318 { "SS3", F_SS3 },
319 { "SS2R", F_SS2R },
320 { "SS3R", F_SS3R },
321 { NULL, 0 }
322 };
323
324 for (i = 0; tags[i].tag; i++) {
325 if (!strcmp(token, tags[i].tag)) {
326 ei->flags |= tags[i].flag;
327 return (_MATCH);
328 }
329 }
330
331 return (_NOTMATCH);
332 }
333
334
335 static __inline int
336 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei,
337 const void * __restrict var, size_t lenvar)
338 {
339 char const *v, *e;
340 char buf[20];
341 int i, len, ret;
342
343 _DIAGASSERT(ei != NULL);
344
345
346 /*
347 * parse VARIABLE section.
348 */
349
350 if (!var)
351 return (EFTYPE);
352
353 v = (const char *) var;
354
355 /* initialize structure */
356 ei->maxcharset = 0;
357 for (i = 0; i < 4; i++) {
358 ei->recommend[i] = NULL;
359 ei->recommendsize[i] = 0;
360 }
361 ei->flags = 0;
362
363 while (*v) {
364 while (*v == ' ' || *v == '\t')
365 ++v;
366
367 /* find the token */
368 e = v;
369 while (*e && *e != ' ' && *e != '\t')
370 ++e;
371
372 len = e-v;
373 if (len == 0)
374 break;
375 if (len>=sizeof(buf))
376 goto parsefail;
377 sprintf(buf, "%.*s", len, v);
378
379 if ((ret = get_recommend(ei, buf)) != _NOTMATCH)
380 ;
381 else if ((ret = get_initg(ei, buf)) != _NOTMATCH)
382 ;
383 else if ((ret = get_max(ei, buf)) != _NOTMATCH)
384 ;
385 else if ((ret = get_flags(ei, buf)) != _NOTMATCH)
386 ;
387 else
388 ret = _PARSEFAIL;
389 if (ret==_PARSEFAIL)
390 goto parsefail;
391 v = e;
392
393 }
394
395 return (0);
396
397 parsefail:
398 free(ei->recommend[0]);
399 free(ei->recommend[1]);
400 free(ei->recommend[2]);
401 free(ei->recommend[3]);
402
403 return (EFTYPE);
404 }
405
406 static __inline void
407 /*ARGSUSED*/
408 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei,
409 _ISO2022State * __restrict s)
410 {
411 int i;
412
413 memset(s, 0, sizeof(*s));
414 s->gl = 0;
415 s->gr = (ei->flags & F_8BIT) ? 1 : -1;
416
417 for (i = 0; i < 4; i++) {
418 if (ei->initg[i].final) {
419 s->g[i].type = ei->initg[i].type;
420 s->g[i].final = ei->initg[i].final;
421 s->g[i].interm = ei->initg[i].interm;
422 }
423 }
424 s->singlegl = s->singlegr = -1;
425 s->flags |= _ISO2022STATE_FLAG_INITIALIZED;
426 }
427
428 static __inline void
429 /*ARGSUSED*/
430 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei,
431 void * __restrict pspriv,
432 const _ISO2022State * __restrict s)
433 {
434 memcpy(pspriv, (const void *)s, sizeof(*s));
435 }
436
437 static __inline void
438 /*ARGSUSED*/
439 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei,
440 _ISO2022State * __restrict s,
441 const void * __restrict pspriv)
442 {
443 memcpy((void *)s, pspriv, sizeof(*s));
444 }
445
446 static int
447 /*ARGSUSED*/
448 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei,
449 const void * __restrict var,
450 size_t lenvar)
451 {
452
453 _DIAGASSERT(ei != NULL);
454
455 return _citrus_ISO2022_parse_variable(ei, var, lenvar);
456 }
457
458 static void
459 /*ARGSUSED*/
460 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei)
461 {
462 }
463
464 #define ESC '\033'
465 #define ECMA -1
466 #define INTERM -2
467 #define OECMA -3
468 static struct seqtable {
469 int type;
470 int csoff;
471 int finaloff;
472 int intermoff;
473 int versoff;
474 int len;
475 int chars[10];
476 } seqtable[] = {
477 /* G0 94MULTI special */
478 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, },
479 /* G0 94MULTI special with version identification */
480 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, },
481 /* G? 94 */
482 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, },
483 /* G? 94 with 2nd intermediate char */
484 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, },
485 /* G? 96 */
486 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, },
487 /* G? 96 with 2nd intermediate char */
488 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, },
489 /* G? 94MULTI */
490 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, },
491 /* G? 96MULTI */
492 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, },
493 /* G? 94MULTI with version specification */
494 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, },
495 /* LS2/3 */
496 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, },
497 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, },
498 /* LS1/2/3R */
499 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, },
500 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, },
501 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, },
502 /* SS2/3 */
503 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, },
504 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, },
505 /* end of records */
506 { 0, }
507 };
508
509 static int
510 seqmatch(const char * __restrict s, size_t n,
511 const struct seqtable * __restrict sp)
512 {
513 const int *p;
514
515 _DIAGASSERT(s != NULL);
516 _DIAGASSERT(sp != NULL);
517
518 p = sp->chars;
519 while (p - sp->chars < n && p - sp->chars < sp->len) {
520 switch (*p) {
521 case ECMA:
522 if (!isecma(*s))
523 goto terminate;
524 break;
525 case OECMA:
526 if (*s && strchr("@AB", *s))
527 break;
528 else
529 goto terminate;
530 case INTERM:
531 if (!isinterm(*s))
532 goto terminate;
533 break;
534 case CS94:
535 if (*s && strchr("()*+", *s))
536 break;
537 else
538 goto terminate;
539 case CS96:
540 if (*s && strchr(",-./", *s))
541 break;
542 else
543 goto terminate;
544 default:
545 if (*s != *p)
546 goto terminate;
547 break;
548 }
549
550 p++;
551 s++;
552 }
553
554 terminate:
555 return p - sp->chars;
556 }
557
558 static wchar_t
559 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei,
560 const char * __restrict string, size_t n,
561 const char ** __restrict result,
562 _ISO2022State * __restrict psenc)
563 {
564 wchar_t wchar = 0;
565 int cur;
566 struct seqtable *sp;
567 int nmatch;
568 int i;
569
570 _DIAGASSERT(ei != NULL);
571 _DIAGASSERT(state != NULL);
572 _DIAGASSERT(string != NULL);
573 /* result may be NULL */
574
575 while (1) {
576 /* SI/SO */
577 if (1 <= n && string[0] == '\017') {
578 psenc->gl = 0;
579 string++;
580 n--;
581 continue;
582 }
583 if (1 <= n && string[0] == '\016') {
584 psenc->gl = 1;
585 string++;
586 n--;
587 continue;
588 }
589
590 /* SS2/3R */
591 if (1 <= n && string[0] && strchr("\217\216", string[0])) {
592 psenc->singlegl = psenc->singlegr =
593 (string[0] - '\216') + 2;
594 string++;
595 n--;
596 continue;
597 }
598
599 /* eat the letter if this is not ESC */
600 if (1 <= n && string[0] != '\033')
601 break;
602
603 /* look for a perfect match from escape sequences */
604 for (sp = &seqtable[0]; sp->len; sp++) {
605 nmatch = seqmatch(string, n, sp);
606 if (sp->len == nmatch && n >= sp->len)
607 break;
608 }
609
610 if (!sp->len)
611 goto notseq;
612
613 if (sp->type != -1) {
614 if (sp->csoff == -1)
615 i = 0;
616 else {
617 switch (sp->type) {
618 case CS94:
619 case CS94MULTI:
620 i = string[sp->csoff] - '(';
621 break;
622 case CS96:
623 case CS96MULTI:
624 i = string[sp->csoff] - ',';
625 break;
626 }
627 }
628 psenc->g[i].type = sp->type;
629 psenc->g[i].final = '\0';
630 psenc->g[i].interm = '\0';
631 psenc->g[i].vers = '\0';
632 /* sp->finaloff must not be -1 */
633 if (sp->finaloff != -1)
634 psenc->g[i].final = string[sp->finaloff];
635 if (sp->intermoff != -1)
636 psenc->g[i].interm = string[sp->intermoff];
637 if (sp->versoff != -1)
638 psenc->g[i].vers = string[sp->versoff];
639
640 string += sp->len;
641 n -= sp->len;
642 continue;
643 }
644
645 /* LS2/3 */
646 if (2 <= n && string[0] == '\033'
647 && string[1] && strchr("no", string[1])) {
648 psenc->gl = string[1] - 'n' + 2;
649 string += 2;
650 n -= 2;
651 continue;
652 }
653
654 /* LS1/2/3R */
655 /* XXX: { for vi showmatch */
656 if (2 <= n && string[0] == '\033'
657 && string[1] && strchr("~}|", string[1])) {
658 psenc->gr = 3 - (string[1] - '|');
659 string += 2;
660 n -= 2;
661 continue;
662 }
663
664 /* SS2/3 */
665 if (2 <= n && string[0] == '\033'
666 && string[1] && strchr("NO", string[1])) {
667 psenc->singlegl = (string[1] - 'N') + 2;
668 string += 2;
669 n -= 2;
670 continue;
671 }
672
673 notseq:
674 /*
675 * if we've got an unknown escape sequence, eat the ESC at the
676 * head. otherwise, wait till full escape sequence comes.
677 */
678 for (sp = &seqtable[0]; sp->len; sp++) {
679 nmatch = seqmatch(string, n, sp);
680 if (!nmatch)
681 continue;
682
683 /*
684 * if we are in the middle of escape sequence,
685 * we still need to wait for more characters to come
686 */
687 if (n < sp->len) {
688 if (nmatch == n) {
689 if (result)
690 *result = string;
691 return (_ISO2022INVALID);
692 }
693 } else {
694 if (nmatch == sp->len) {
695 /* this case should not happen */
696 goto eat;
697 }
698 }
699 }
700
701 break;
702 }
703
704 eat:
705 /* no letter to eat */
706 if (n < 1) {
707 if (result)
708 *result = string;
709 return (_ISO2022INVALID);
710 }
711
712 /* normal chars. always eat C0/C1 as is. */
713 if (iscntl(*string & 0xff))
714 cur = -1;
715 else if (*string & 0x80) {
716 cur = (psenc->singlegr == -1)
717 ? psenc->gr : psenc->singlegr;
718 } else {
719 cur = (psenc->singlegl == -1)
720 ? psenc->gl : psenc->singlegl;
721 }
722
723 if (cur == -1) {
724 asis:
725 wchar = *string++ & 0xff;
726 if (result)
727 *result = string;
728 /* reset single shift state */
729 psenc->singlegr = psenc->singlegl = -1;
730 return wchar;
731 }
732
733 /* length error check */
734 switch (psenc->g[cur].type) {
735 case CS94MULTI:
736 case CS96MULTI:
737 if (!isthree(psenc->g[cur].final)) {
738 if (2 <= n
739 && (string[0] & 0x80) == (string[1] & 0x80))
740 break;
741 } else {
742 if (3 <= n
743 && (string[0] & 0x80) == (string[1] & 0x80)
744 && (string[0] & 0x80) == (string[2] & 0x80))
745 break;
746 }
747
748 /* we still need to wait for more characters to come */
749 if (result)
750 *result = string;
751 return (_ISO2022INVALID);
752
753 case CS94:
754 case CS96:
755 if (1 <= n)
756 break;
757
758 /* we still need to wait for more characters to come */
759 if (result)
760 *result = string;
761 return (_ISO2022INVALID);
762 }
763
764 /* range check */
765 switch (psenc->g[cur].type) {
766 case CS94:
767 if (!(is94(string[0] & 0x7f)))
768 goto asis;
769 case CS96:
770 if (!(is96(string[0] & 0x7f)))
771 goto asis;
772 break;
773 case CS94MULTI:
774 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f)))
775 goto asis;
776 break;
777 case CS96MULTI:
778 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f)))
779 goto asis;
780 break;
781 }
782
783 /* extract the character. */
784 switch (psenc->g[cur].type) {
785 case CS94:
786 /* special case for ASCII. */
787 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) {
788 wchar = *string++;
789 wchar &= 0x7f;
790 break;
791 }
792 wchar = psenc->g[cur].final;
793 wchar = (wchar << 8);
794 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
795 wchar = (wchar << 8);
796 wchar = (wchar << 8) | (*string++ & 0x7f);
797 break;
798 case CS96:
799 /* special case for ISO-8859-1. */
800 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) {
801 wchar = *string++;
802 wchar &= 0x7f;
803 wchar |= 0x80;
804 break;
805 }
806 wchar = psenc->g[cur].final;
807 wchar = (wchar << 8);
808 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
809 wchar = (wchar << 8);
810 wchar = (wchar << 8) | (*string++ & 0x7f);
811 wchar |= 0x80;
812 break;
813 case CS94MULTI:
814 case CS96MULTI:
815 wchar = psenc->g[cur].final;
816 wchar = (wchar << 8);
817 if (isthree(psenc->g[cur].final))
818 wchar |= (*string++ & 0x7f);
819 wchar = (wchar << 8) | (*string++ & 0x7f);
820 wchar = (wchar << 8) | (*string++ & 0x7f);
821 if (psenc->g[cur].type == CS96MULTI)
822 wchar |= 0x80;
823 break;
824 }
825
826 if (result)
827 *result = string;
828 /* reset single shift state */
829 psenc->singlegr = psenc->singlegl = -1;
830 return wchar;
831 }
832
833
834
835 static int
836 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei,
837 wchar_t * __restrict pwc,
838 const char ** __restrict s,
839 size_t n, _ISO2022State * __restrict psenc,
840 size_t * __restrict nresult)
841 {
842 wchar_t wchar;
843 const char *s0, *p, *result;
844 int c;
845 int chlenbak;
846
847 _DIAGASSERT(nresult != 0);
848 _DIAGASSERT(ei != NULL);
849 _DIAGASSERT(psenc != NULL);
850 _DIAGASSERT(s != NULL);
851
852 s0 = *s;
853 c = 0;
854 chlenbak = psenc->chlen;
855
856 /*
857 * if we have something in buffer, use that.
858 * otherwise, skip here
859 */
860 if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) {
861 /* illgeal state */
862 _citrus_ISO2022_init_state(ei, psenc);
863 goto encoding_error;
864 }
865 if (psenc->chlen == 0)
866 goto emptybuf;
867
868 /* buffer is not empty */
869 p = psenc->ch;
870 while (psenc->chlen < sizeof(psenc->ch) && n >= 0) {
871 if (n > 0) {
872 psenc->ch[psenc->chlen++] = *s0++;
873 n--;
874 }
875
876 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch),
877 &result, psenc);
878 if (wchar != _ISO2022INVALID) {
879 c += result - p;
880 if (psenc->chlen > c)
881 memmove(psenc->ch, result, psenc->chlen - c);
882 if (psenc->chlen < c)
883 psenc->chlen = 0;
884 else
885 psenc->chlen -= c;
886 goto output;
887 }
888
889 c += result - p;
890 p = result;
891
892 if (n == 0)
893 goto restart;
894 }
895
896 /* escape sequence too long? */
897 goto encoding_error;
898
899 emptybuf:
900 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc);
901 if (wchar != _ISO2022INVALID) {
902 c += result - s0;
903 psenc->chlen = 0;
904 s0 = result;
905 goto output;
906 }
907 if (result > s0 && n > result - s0) {
908 c += (result - s0);
909 n -= (result - s0);
910 s0 = result;
911 goto emptybuf;
912 }
913 n += c;
914 if (n < sizeof(psenc->ch)) {
915 memcpy(psenc->ch, s0 - c, n);
916 psenc->chlen = n;
917 s0 = result;
918 goto restart;
919 }
920
921 /* escape sequence too long? */
922
923 encoding_error:
924 psenc->chlen = 0;
925 *nresult = (size_t)-1;
926 return (EILSEQ);
927
928 output:
929 *s = s0;
930 if (pwc)
931 *pwc = wchar;
932
933 if (!wchar)
934 *nresult = 0;
935 else
936 *nresult = c - chlenbak;
937
938 return (0);
939
940 restart:
941 *s = s0;
942 *nresult = (size_t)-2;
943
944 return (0);
945 }
946
947 static int
948 recommendation(_ISO2022EncodingInfo * __restrict ei,
949 _ISO2022Charset * __restrict cs)
950 {
951 int i, j;
952 _ISO2022Charset *recommend;
953
954 _DIAGASSERT(ei != NULL);
955 _DIAGASSERT(cs != NULL);
956
957 /* first, try a exact match. */
958 for (i = 0; i < 4; i++) {
959 recommend = ei->recommend[i];
960 for (j = 0; j < ei->recommendsize[i]; j++) {
961 if (cs->type != recommend[j].type)
962 continue;
963 if (cs->final != recommend[j].final)
964 continue;
965 if (cs->interm != recommend[j].interm)
966 continue;
967
968 return i;
969 }
970 }
971
972 /* then, try a wildcard match over final char. */
973 for (i = 0; i < 4; i++) {
974 recommend = ei->recommend[i];
975 for (j = 0; j < ei->recommendsize[i]; j++) {
976 if (cs->type != recommend[j].type)
977 continue;
978 if (cs->final && (cs->final != recommend[j].final))
979 continue;
980 if (cs->interm && (cs->interm != recommend[j].interm))
981 continue;
982
983 return i;
984 }
985 }
986
987 /* there's no recommendation. make a guess. */
988 if (ei->maxcharset == 0) {
989 return 0;
990 } else {
991 switch (cs->type) {
992 case CS94:
993 case CS94MULTI:
994 return 0;
995 case CS96:
996 case CS96MULTI:
997 return 1;
998 }
999 }
1000 return 0;
1001 }
1002
1003 static int
1004 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc,
1005 char * __restrict string, size_t n,
1006 char ** __restrict result,
1007 _ISO2022State * __restrict psenc)
1008 {
1009 int i = 0, len;
1010 _ISO2022Charset cs;
1011 char *p;
1012 char tmp[MB_LEN_MAX];
1013 int target;
1014 u_char mask;
1015 int bit8;
1016
1017 _DIAGASSERT(ei != NULL);
1018 _DIAGASSERT(string != NULL);
1019 /* result may be NULL */
1020 /* state appears to be unused */
1021
1022 if (iscntl(wc & 0xff)) {
1023 /* go back to ASCII on control chars */
1024 cs.type = CS94;
1025 cs.final = 'B';
1026 cs.interm = '\0';
1027 } else if (!(wc & ~0xff)) {
1028 if (wc & 0x80) {
1029 /* special treatment for ISO-8859-1 */
1030 cs.type = CS96;
1031 cs.final = 'A';
1032 cs.interm = '\0';
1033 } else {
1034 /* special treatment for ASCII */
1035 cs.type = CS94;
1036 cs.final = 'B';
1037 cs.interm = '\0';
1038 }
1039 } else {
1040 cs.final = (wc >> 24) & 0x7f;
1041 if ((wc >> 16) & 0x80)
1042 cs.interm = (wc >> 16) & 0x7f;
1043 else
1044 cs.interm = '\0';
1045 if (wc & 0x80)
1046 cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96;
1047 else
1048 cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94;
1049 }
1050 target = recommendation(ei, &cs);
1051 p = tmp;
1052 bit8 = ei->flags & F_8BIT;
1053
1054 /* designate the charset onto the target plane(G0/1/2/3). */
1055 if (psenc->g[target].type == cs.type
1056 && psenc->g[target].final == cs.final
1057 && psenc->g[target].interm == cs.interm)
1058 goto planeok;
1059
1060 *p++ = '\033';
1061 if (cs.type == CS94MULTI || cs.type == CS96MULTI)
1062 *p++ = '$';
1063 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final)
1064 && !cs.interm && !(ei->flags & F_NOOLD))
1065 ;
1066 else if (cs.type == CS94 || cs.type == CS94MULTI)
1067 *p++ = "()*+"[target];
1068 else
1069 *p++ = ",-./"[target];
1070 if (cs.interm)
1071 *p++ = cs.interm;
1072 *p++ = cs.final;
1073
1074 psenc->g[target].type = cs.type;
1075 psenc->g[target].final = cs.final;
1076 psenc->g[target].interm = cs.interm;
1077
1078 planeok:
1079 /* invoke the plane onto GL or GR. */
1080 if (psenc->gl == target)
1081 goto sideok;
1082 if (bit8 && psenc->gr == target)
1083 goto sideok;
1084
1085 if (target == 0 && (ei->flags & F_LS0)) {
1086 *p++ = '\017';
1087 psenc->gl = 0;
1088 } else if (target == 1 && (ei->flags & F_LS1)) {
1089 *p++ = '\016';
1090 psenc->gl = 1;
1091 } else if (target == 2 && (ei->flags & F_LS2)) {
1092 *p++ = '\033';
1093 *p++ = 'n';
1094 psenc->gl = 2;
1095 } else if (target == 3 && (ei->flags & F_LS3)) {
1096 *p++ = '\033';
1097 *p++ = 'o';
1098 psenc->gl = 3;
1099 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) {
1100 *p++ = '\033';
1101 *p++ = '~';
1102 psenc->gr = 1;
1103 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) {
1104 *p++ = '\033';
1105 /*{*/
1106 *p++ = '}';
1107 psenc->gr = 2;
1108 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) {
1109 *p++ = '\033';
1110 *p++ = '|';
1111 psenc->gr = 3;
1112 } else if (target == 2 && (ei->flags & F_SS2)) {
1113 *p++ = '\033';
1114 *p++ = 'N';
1115 psenc->singlegl = 2;
1116 } else if (target == 3 && (ei->flags & F_SS3)) {
1117 *p++ = '\033';
1118 *p++ = 'O';
1119 psenc->singlegl = 3;
1120 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) {
1121 *p++ = '\216';
1122 *p++ = 'N';
1123 psenc->singlegl = psenc->singlegr = 2;
1124 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) {
1125 *p++ = '\217';
1126 *p++ = 'O';
1127 psenc->singlegl = psenc->singlegr = 3;
1128 } else
1129 abort();
1130
1131 sideok:
1132 if (psenc->singlegl == target)
1133 mask = 0x00;
1134 else if (psenc->singlegr == target)
1135 mask = 0x80;
1136 else if (psenc->gl == target)
1137 mask = 0x00;
1138 else if ((ei->flags & F_8BIT) && psenc->gr == target)
1139 mask = 0x80;
1140 else
1141 abort();
1142
1143 switch (cs.type) {
1144 case CS94:
1145 case CS96:
1146 i = 1;
1147 break;
1148 case CS94MULTI:
1149 case CS96MULTI:
1150 i = isthree(cs.final) ? 3 : 2;
1151 break;
1152 }
1153 if (wc != 0)
1154 while (i-- > 0)
1155 *p++ = ((wc >> (i << 3)) & 0x7f) | mask;
1156
1157 /* reset single shift state */
1158 psenc->singlegl = psenc->singlegr = -1;
1159
1160 len = p - tmp;
1161 if (n < len) {
1162 if (result)
1163 *result = (char *)0;
1164 } else {
1165 if (result)
1166 *result = string + len;
1167 memcpy(string, tmp, len);
1168 }
1169 return len;
1170 }
1171
1172 static int
1173 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei,
1174 char * __restrict s, size_t n, wchar_t wc,
1175 _ISO2022State * __restrict psenc,
1176 size_t * __restrict nresult)
1177 {
1178 char buf[MB_LEN_MAX];
1179 char *result;
1180 int len, ret;
1181
1182 _DIAGASSERT(ei != NULL);
1183 _DIAGASSERT(nresult != 0);
1184 _DIAGASSERT(s != NULL);
1185
1186 /* XXX state will be modified after this operation... */
1187 len = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc);
1188 if (sizeof(buf) < len || n < len) {
1189 /* XXX should recover state? */
1190 ret = E2BIG;
1191 goto err;
1192 }
1193
1194 memcpy(s, buf, len);
1195 *nresult = (size_t)len;
1196 return (0);
1197
1198 err:
1199 /* bound check failure */
1200 *nresult = (size_t)-1;
1201 return ret;
1202 }
1203
1204 static __inline int
1205 /*ARGSUSED*/
1206 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei,
1207 _csid_t * __restrict csid,
1208 _index_t * __restrict idx, wchar_t wc)
1209 {
1210 wchar_t m, nm;
1211
1212 _DIAGASSERT(csid != NULL && idx != NULL);
1213
1214 m = wc & 0x7FFF8080;
1215 nm = wc & 0x007F7F7F;
1216 if (m & 0x00800000) {
1217 nm &= 0x00007F7F;
1218 } else {
1219 m &= 0x7F008080;
1220 }
1221 if (nm & 0x007F0000) {
1222 /* ^3 mark */
1223 m |= 0x007F0000;
1224 } else if (nm & 0x00007F00) {
1225 /* ^2 mark */
1226 m |= 0x00007F00;
1227 }
1228 *csid = (_csid_t)m;
1229 *idx = (_index_t)nm;
1230
1231 return (0);
1232 }
1233
1234 static __inline int
1235 /*ARGSUSED*/
1236 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei,
1237 wchar_t * __restrict wc,
1238 _csid_t csid, _index_t idx)
1239 {
1240
1241 _DIAGASSERT(ei != NULL && wc != NULL);
1242
1243 *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx;
1244
1245 return (0);
1246 }
1247
1248 /* ----------------------------------------------------------------------
1249 * public interface for ctype
1250 */
1251
1252 _CITRUS_CTYPE_DECLS(ISO2022);
1253 _CITRUS_CTYPE_DEF_OPS(ISO2022);
1254
1255 #include "citrus_ctype_template.h"
1256
1257 /* ----------------------------------------------------------------------
1258 * public interface for stdenc
1259 */
1260
1261 _CITRUS_STDENC_DECLS(ISO2022);
1262 _CITRUS_STDENC_DEF_OPS(ISO2022);
1263
1264 #include "citrus_stdenc_template.h"
1265