citrus_iso2022.c revision 1.10 1 /* $NetBSD: citrus_iso2022.c,v 1.10 2004/01/02 12:25:46 itojun Exp $ */
2
3 /*-
4 * Copyright (c)1999, 2002 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
29 */
30
31 #include <sys/cdefs.h>
32 #if defined(LIBC_SCCS) && !defined(lint)
33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.10 2004/01/02 12:25:46 itojun Exp $");
34 #endif /* LIBC_SCCS and not lint */
35
36 #include <assert.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stddef.h>
42 #include <locale.h>
43 #include <wchar.h>
44 #include <sys/types.h>
45 #include <limits.h>
46
47 #include "citrus_namespace.h"
48 #include "citrus_types.h"
49 #include "citrus_module.h"
50 #include "citrus_ctype.h"
51 #include "citrus_stdenc.h"
52 #include "citrus_iso2022.h"
53
54
55 /* ----------------------------------------------------------------------
56 * private stuffs used by templates
57 */
58
59
60 /*
61 * wchar_t mappings:
62 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx
63 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx
64 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx
65 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx
66 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx
67 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx
68 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx
69 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx
70 * 94x94 charset (ESC & V ESC $ ( F)
71 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
72 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
73 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
74 */
75
76 typedef struct {
77 u_char type;
78 #define CS94 (0U)
79 #define CS96 (1U)
80 #define CS94MULTI (2U)
81 #define CS96MULTI (3U)
82
83 u_char final;
84 u_char interm;
85 u_char vers;
86 } _ISO2022Charset;
87
88 typedef struct {
89 _ISO2022Charset g[4];
90 /* need 3 bits to hold -1, 0, ..., 3 */
91 int gl:3,
92 gr:3,
93 singlegl:3,
94 singlegr:3;
95 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */
96 int chlen;
97 int flags;
98 #define _ISO2022STATE_FLAG_INITIALIZED 1
99 } _ISO2022State;
100
101 typedef struct {
102 _ISO2022Charset *recommend[4];
103 size_t recommendsize[4];
104 _ISO2022Charset initg[4];
105 int maxcharset;
106 int flags;
107 #define F_8BIT 0x0001
108 #define F_NOOLD 0x0002
109 #define F_SI 0x0010 /*0F*/
110 #define F_SO 0x0020 /*0E*/
111 #define F_LS0 0x0010 /*0F*/
112 #define F_LS1 0x0020 /*0E*/
113 #define F_LS2 0x0040 /*ESC n*/
114 #define F_LS3 0x0080 /*ESC o*/
115 #define F_LS1R 0x0100 /*ESC ~*/
116 #define F_LS2R 0x0200 /*ESC }*/
117 #define F_LS3R 0x0400 /*ESC |*/
118 #define F_SS2 0x0800 /*ESC N*/
119 #define F_SS3 0x1000 /*ESC O*/
120 #define F_SS2R 0x2000 /*8E*/
121 #define F_SS3R 0x4000 /*8F*/
122 } _ISO2022EncodingInfo;
123 typedef struct {
124 _ISO2022EncodingInfo ei;
125 struct {
126 /* for future multi-locale facility */
127 _ISO2022State s_mblen;
128 _ISO2022State s_mbrlen;
129 _ISO2022State s_mbrtowc;
130 _ISO2022State s_mbtowc;
131 _ISO2022State s_mbsrtowcs;
132 _ISO2022State s_wcrtomb;
133 _ISO2022State s_wcsrtombs;
134 _ISO2022State s_wctomb;
135 } states;
136 } _ISO2022CTypeInfo;
137
138 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
139 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
140
141 #define _FUNCNAME(m) _citrus_ISO2022_##m
142 #define _ENCODING_INFO _ISO2022EncodingInfo
143 #define _CTYPE_INFO _ISO2022CTypeInfo
144 #define _ENCODING_STATE _ISO2022State
145 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
146 #define _ENCODING_IS_STATE_DEPENDENT 1
147 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \
148 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
149
150
151 #define _ISO2022INVALID (wchar_t)-1
152
153 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); }
154 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); }
155 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); }
156 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); }
157 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); }
158 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); }
159 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); }
160 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); }
161
162 static __inline int
163 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs)
164 {
165
166 _DIAGASSERT(p != NULL);
167 _DIAGASSERT(cs != NULL);
168
169 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) {
170 cs->final = (u_char)(p[3] & 0xff);
171 cs->interm = '\0';
172 cs->vers = '\0';
173 cs->type = CS94MULTI;
174 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) {
175 cs->final = (u_char)(p[3] & 0xff);
176 cs->interm = '\0';
177 cs->vers = '\0';
178 cs->type = CS96MULTI;
179 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) {
180 cs->final = (u_char)(p[2] & 0xff);
181 cs->interm = '\0';
182 cs->vers = '\0';
183 cs->type = CS94;
184 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) {
185 cs->final = (u_char )(p[2] & 0xff);
186 cs->interm = '\0';
187 cs->vers = '\0';
188 cs->type = CS96;
189 } else {
190 return 1;
191 }
192
193 return 0;
194 }
195
196
197 #define _NOTMATCH 0
198 #define _MATCH 1
199 #define _PARSEFAIL 2
200
201 static __inline int
202 get_recommend(_ISO2022EncodingInfo * __restrict ei,
203 const char * __restrict token)
204 {
205 int i;
206 _ISO2022Charset cs, *p;
207
208 if (!strchr("0123", token[0]) || token[1] != '=')
209 return (_NOTMATCH);
210
211 if (getcs(&token[2], &cs) == 0)
212 ;
213 else if (!strcmp(&token[2], "94")) {
214 cs.final = (u_char)(token[4]);
215 cs.interm = '\0';
216 cs.vers = '\0';
217 cs.type = CS94;
218 } else if (!strcmp(&token[2], "96")) {
219 cs.final = (u_char)(token[4]);
220 cs.interm = '\0';
221 cs.vers = '\0';
222 cs.type = CS96;
223 } else if (!strcmp(&token[2], "94$")) {
224 cs.final = (u_char)(token[5]);
225 cs.interm = '\0';
226 cs.vers = '\0';
227 cs.type = CS94MULTI;
228 } else if (!strcmp(&token[2], "96$")) {
229 cs.final = (u_char)(token[5]);
230 cs.interm = '\0';
231 cs.vers = '\0';
232 cs.type = CS96MULTI;
233 } else {
234 return (_PARSEFAIL);
235 }
236
237 i = token[0] - '0';
238 if (!ei->recommend[i]) {
239 ei->recommend[i] = malloc(sizeof(_ISO2022Charset));
240 } else {
241 p = realloc(ei->recommend[i],
242 sizeof(_ISO2022Charset) * (ei->recommendsize[i] + 1));
243 if (!p)
244 return (_PARSEFAIL);
245 ei->recommend[i] = p;
246 }
247 if (!ei->recommend[i])
248 return (_PARSEFAIL);
249 ei->recommendsize[i]++;
250
251 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final;
252 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm;
253 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers;
254 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type;
255
256 return (_MATCH);
257 }
258
259 static __inline int
260 get_initg(_ISO2022EncodingInfo * __restrict ei,
261 const char * __restrict token)
262 {
263 _ISO2022Charset cs;
264
265 if (strncmp("INIT", &token[0], 4) ||
266 !strchr("0123", token[4]) ||
267 token[5] != '=')
268 return (_NOTMATCH);
269
270 if (getcs(&token[6], &cs) != 0)
271 return (_PARSEFAIL);
272
273 ei->initg[token[4] - '0'].type = cs.type;
274 ei->initg[token[4] - '0'].final = cs.final;
275 ei->initg[token[4] - '0'].interm = cs.interm;
276 ei->initg[token[4] - '0'].vers = cs.vers;
277
278 return (_MATCH);
279 }
280
281 static __inline int
282 get_max(_ISO2022EncodingInfo * __restrict ei,
283 const char * __restrict token)
284 {
285 if (!strcmp(token, "MAX1")) {
286 ei->maxcharset = 1;
287 } else if (!strcmp(token, "MAX2")) {
288 ei->maxcharset = 2;
289 } else if (!strcmp(token, "MAX3")) {
290 ei->maxcharset = 3;
291 } else
292 return (_NOTMATCH);
293
294 return (_MATCH);
295 }
296
297
298 static __inline int
299 get_flags(_ISO2022EncodingInfo * __restrict ei,
300 const char * __restrict token)
301 {
302 int i;
303 static struct {
304 const char *tag;
305 int flag;
306 } const tags[] = {
307 { "DUMMY", 0 },
308 { "8BIT", F_8BIT },
309 { "NOOLD", F_NOOLD },
310 { "SI", F_SI },
311 { "SO", F_SO },
312 { "LS0", F_LS0 },
313 { "LS1", F_LS1 },
314 { "LS2", F_LS2 },
315 { "LS3", F_LS3 },
316 { "LS1R", F_LS1R },
317 { "LS2R", F_LS2R },
318 { "LS3R", F_LS3R },
319 { "SS2", F_SS2 },
320 { "SS3", F_SS3 },
321 { "SS2R", F_SS2R },
322 { "SS3R", F_SS3R },
323 { NULL, 0 }
324 };
325
326 for (i = 0; tags[i].tag; i++) {
327 if (!strcmp(token, tags[i].tag)) {
328 ei->flags |= tags[i].flag;
329 return (_MATCH);
330 }
331 }
332
333 return (_NOTMATCH);
334 }
335
336
337 static __inline int
338 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei,
339 const void * __restrict var, size_t lenvar)
340 {
341 char const *v, *e;
342 char buf[20];
343 int i, len, ret;
344
345 _DIAGASSERT(ei != NULL);
346
347
348 /*
349 * parse VARIABLE section.
350 */
351
352 if (!var)
353 return (EFTYPE);
354
355 v = (const char *) var;
356
357 /* initialize structure */
358 ei->maxcharset = 0;
359 for (i = 0; i < 4; i++) {
360 ei->recommend[i] = NULL;
361 ei->recommendsize[i] = 0;
362 }
363 ei->flags = 0;
364
365 while (*v) {
366 while (*v == ' ' || *v == '\t')
367 ++v;
368
369 /* find the token */
370 e = v;
371 while (*e && *e != ' ' && *e != '\t')
372 ++e;
373
374 len = e-v;
375 if (len == 0)
376 break;
377 if (len>=sizeof(buf))
378 goto parsefail;
379 snprintf(buf, sizeof(buf), "%.*s", len, v);
380
381 if ((ret = get_recommend(ei, buf)) != _NOTMATCH)
382 ;
383 else if ((ret = get_initg(ei, buf)) != _NOTMATCH)
384 ;
385 else if ((ret = get_max(ei, buf)) != _NOTMATCH)
386 ;
387 else if ((ret = get_flags(ei, buf)) != _NOTMATCH)
388 ;
389 else
390 ret = _PARSEFAIL;
391 if (ret==_PARSEFAIL)
392 goto parsefail;
393 v = e;
394
395 }
396
397 return (0);
398
399 parsefail:
400 free(ei->recommend[0]);
401 free(ei->recommend[1]);
402 free(ei->recommend[2]);
403 free(ei->recommend[3]);
404
405 return (EFTYPE);
406 }
407
408 static __inline void
409 /*ARGSUSED*/
410 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei,
411 _ISO2022State * __restrict s)
412 {
413 int i;
414
415 memset(s, 0, sizeof(*s));
416 s->gl = 0;
417 s->gr = (ei->flags & F_8BIT) ? 1 : -1;
418
419 for (i = 0; i < 4; i++) {
420 if (ei->initg[i].final) {
421 s->g[i].type = ei->initg[i].type;
422 s->g[i].final = ei->initg[i].final;
423 s->g[i].interm = ei->initg[i].interm;
424 }
425 }
426 s->singlegl = s->singlegr = -1;
427 s->flags |= _ISO2022STATE_FLAG_INITIALIZED;
428 }
429
430 static __inline void
431 /*ARGSUSED*/
432 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei,
433 void * __restrict pspriv,
434 const _ISO2022State * __restrict s)
435 {
436 memcpy(pspriv, (const void *)s, sizeof(*s));
437 }
438
439 static __inline void
440 /*ARGSUSED*/
441 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei,
442 _ISO2022State * __restrict s,
443 const void * __restrict pspriv)
444 {
445 memcpy((void *)s, pspriv, sizeof(*s));
446 }
447
448 static int
449 /*ARGSUSED*/
450 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei,
451 const void * __restrict var,
452 size_t lenvar)
453 {
454
455 _DIAGASSERT(ei != NULL);
456
457 return _citrus_ISO2022_parse_variable(ei, var, lenvar);
458 }
459
460 static void
461 /*ARGSUSED*/
462 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei)
463 {
464 }
465
466 #define ESC '\033'
467 #define ECMA -1
468 #define INTERM -2
469 #define OECMA -3
470 static struct seqtable {
471 int type;
472 int csoff;
473 int finaloff;
474 int intermoff;
475 int versoff;
476 int len;
477 int chars[10];
478 } seqtable[] = {
479 /* G0 94MULTI special */
480 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, },
481 /* G0 94MULTI special with version identification */
482 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, },
483 /* G? 94 */
484 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, },
485 /* G? 94 with 2nd intermediate char */
486 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, },
487 /* G? 96 */
488 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, },
489 /* G? 96 with 2nd intermediate char */
490 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, },
491 /* G? 94MULTI */
492 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, },
493 /* G? 96MULTI */
494 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, },
495 /* G? 94MULTI with version specification */
496 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, },
497 /* LS2/3 */
498 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, },
499 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, },
500 /* LS1/2/3R */
501 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, },
502 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, },
503 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, },
504 /* SS2/3 */
505 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, },
506 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, },
507 /* end of records */
508 { 0, }
509 };
510
511 static int
512 seqmatch(const char * __restrict s, size_t n,
513 const struct seqtable * __restrict sp)
514 {
515 const int *p;
516
517 _DIAGASSERT(s != NULL);
518 _DIAGASSERT(sp != NULL);
519
520 p = sp->chars;
521 while (p - sp->chars < n && p - sp->chars < sp->len) {
522 switch (*p) {
523 case ECMA:
524 if (!isecma(*s))
525 goto terminate;
526 break;
527 case OECMA:
528 if (*s && strchr("@AB", *s))
529 break;
530 else
531 goto terminate;
532 case INTERM:
533 if (!isinterm(*s))
534 goto terminate;
535 break;
536 case CS94:
537 if (*s && strchr("()*+", *s))
538 break;
539 else
540 goto terminate;
541 case CS96:
542 if (*s && strchr(",-./", *s))
543 break;
544 else
545 goto terminate;
546 default:
547 if (*s != *p)
548 goto terminate;
549 break;
550 }
551
552 p++;
553 s++;
554 }
555
556 terminate:
557 return p - sp->chars;
558 }
559
560 static wchar_t
561 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei,
562 const char * __restrict string, size_t n,
563 const char ** __restrict result,
564 _ISO2022State * __restrict psenc)
565 {
566 wchar_t wchar = 0;
567 int cur;
568 struct seqtable *sp;
569 int nmatch;
570 int i;
571
572 _DIAGASSERT(ei != NULL);
573 _DIAGASSERT(state != NULL);
574 _DIAGASSERT(string != NULL);
575 /* result may be NULL */
576
577 while (1) {
578 /* SI/SO */
579 if (1 <= n && string[0] == '\017') {
580 psenc->gl = 0;
581 string++;
582 n--;
583 continue;
584 }
585 if (1 <= n && string[0] == '\016') {
586 psenc->gl = 1;
587 string++;
588 n--;
589 continue;
590 }
591
592 /* SS2/3R */
593 if (1 <= n && string[0] && strchr("\217\216", string[0])) {
594 psenc->singlegl = psenc->singlegr =
595 (string[0] - '\216') + 2;
596 string++;
597 n--;
598 continue;
599 }
600
601 /* eat the letter if this is not ESC */
602 if (1 <= n && string[0] != '\033')
603 break;
604
605 /* look for a perfect match from escape sequences */
606 for (sp = &seqtable[0]; sp->len; sp++) {
607 nmatch = seqmatch(string, n, sp);
608 if (sp->len == nmatch && n >= sp->len)
609 break;
610 }
611
612 if (!sp->len)
613 goto notseq;
614
615 if (sp->type != -1) {
616 if (sp->csoff == -1)
617 i = 0;
618 else {
619 switch (sp->type) {
620 case CS94:
621 case CS94MULTI:
622 i = string[sp->csoff] - '(';
623 break;
624 case CS96:
625 case CS96MULTI:
626 i = string[sp->csoff] - ',';
627 break;
628 }
629 }
630 psenc->g[i].type = sp->type;
631 psenc->g[i].final = '\0';
632 psenc->g[i].interm = '\0';
633 psenc->g[i].vers = '\0';
634 /* sp->finaloff must not be -1 */
635 if (sp->finaloff != -1)
636 psenc->g[i].final = string[sp->finaloff];
637 if (sp->intermoff != -1)
638 psenc->g[i].interm = string[sp->intermoff];
639 if (sp->versoff != -1)
640 psenc->g[i].vers = string[sp->versoff];
641
642 string += sp->len;
643 n -= sp->len;
644 continue;
645 }
646
647 /* LS2/3 */
648 if (2 <= n && string[0] == '\033'
649 && string[1] && strchr("no", string[1])) {
650 psenc->gl = string[1] - 'n' + 2;
651 string += 2;
652 n -= 2;
653 continue;
654 }
655
656 /* LS1/2/3R */
657 /* XXX: { for vi showmatch */
658 if (2 <= n && string[0] == '\033'
659 && string[1] && strchr("~}|", string[1])) {
660 psenc->gr = 3 - (string[1] - '|');
661 string += 2;
662 n -= 2;
663 continue;
664 }
665
666 /* SS2/3 */
667 if (2 <= n && string[0] == '\033'
668 && string[1] && strchr("NO", string[1])) {
669 psenc->singlegl = (string[1] - 'N') + 2;
670 string += 2;
671 n -= 2;
672 continue;
673 }
674
675 notseq:
676 /*
677 * if we've got an unknown escape sequence, eat the ESC at the
678 * head. otherwise, wait till full escape sequence comes.
679 */
680 for (sp = &seqtable[0]; sp->len; sp++) {
681 nmatch = seqmatch(string, n, sp);
682 if (!nmatch)
683 continue;
684
685 /*
686 * if we are in the middle of escape sequence,
687 * we still need to wait for more characters to come
688 */
689 if (n < sp->len) {
690 if (nmatch == n) {
691 if (result)
692 *result = string;
693 return (_ISO2022INVALID);
694 }
695 } else {
696 if (nmatch == sp->len) {
697 /* this case should not happen */
698 goto eat;
699 }
700 }
701 }
702
703 break;
704 }
705
706 eat:
707 /* no letter to eat */
708 if (n < 1) {
709 if (result)
710 *result = string;
711 return (_ISO2022INVALID);
712 }
713
714 /* normal chars. always eat C0/C1 as is. */
715 if (iscntl(*string & 0xff))
716 cur = -1;
717 else if (*string & 0x80) {
718 cur = (psenc->singlegr == -1)
719 ? psenc->gr : psenc->singlegr;
720 } else {
721 cur = (psenc->singlegl == -1)
722 ? psenc->gl : psenc->singlegl;
723 }
724
725 if (cur == -1) {
726 asis:
727 wchar = *string++ & 0xff;
728 if (result)
729 *result = string;
730 /* reset single shift state */
731 psenc->singlegr = psenc->singlegl = -1;
732 return wchar;
733 }
734
735 /* length error check */
736 switch (psenc->g[cur].type) {
737 case CS94MULTI:
738 case CS96MULTI:
739 if (!isthree(psenc->g[cur].final)) {
740 if (2 <= n
741 && (string[0] & 0x80) == (string[1] & 0x80))
742 break;
743 } else {
744 if (3 <= n
745 && (string[0] & 0x80) == (string[1] & 0x80)
746 && (string[0] & 0x80) == (string[2] & 0x80))
747 break;
748 }
749
750 /* we still need to wait for more characters to come */
751 if (result)
752 *result = string;
753 return (_ISO2022INVALID);
754
755 case CS94:
756 case CS96:
757 if (1 <= n)
758 break;
759
760 /* we still need to wait for more characters to come */
761 if (result)
762 *result = string;
763 return (_ISO2022INVALID);
764 }
765
766 /* range check */
767 switch (psenc->g[cur].type) {
768 case CS94:
769 if (!(is94(string[0] & 0x7f)))
770 goto asis;
771 case CS96:
772 if (!(is96(string[0] & 0x7f)))
773 goto asis;
774 break;
775 case CS94MULTI:
776 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f)))
777 goto asis;
778 break;
779 case CS96MULTI:
780 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f)))
781 goto asis;
782 break;
783 }
784
785 /* extract the character. */
786 switch (psenc->g[cur].type) {
787 case CS94:
788 /* special case for ASCII. */
789 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) {
790 wchar = *string++;
791 wchar &= 0x7f;
792 break;
793 }
794 wchar = psenc->g[cur].final;
795 wchar = (wchar << 8);
796 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
797 wchar = (wchar << 8);
798 wchar = (wchar << 8) | (*string++ & 0x7f);
799 break;
800 case CS96:
801 /* special case for ISO-8859-1. */
802 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) {
803 wchar = *string++;
804 wchar &= 0x7f;
805 wchar |= 0x80;
806 break;
807 }
808 wchar = psenc->g[cur].final;
809 wchar = (wchar << 8);
810 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
811 wchar = (wchar << 8);
812 wchar = (wchar << 8) | (*string++ & 0x7f);
813 wchar |= 0x80;
814 break;
815 case CS94MULTI:
816 case CS96MULTI:
817 wchar = psenc->g[cur].final;
818 wchar = (wchar << 8);
819 if (isthree(psenc->g[cur].final))
820 wchar |= (*string++ & 0x7f);
821 wchar = (wchar << 8) | (*string++ & 0x7f);
822 wchar = (wchar << 8) | (*string++ & 0x7f);
823 if (psenc->g[cur].type == CS96MULTI)
824 wchar |= 0x80;
825 break;
826 }
827
828 if (result)
829 *result = string;
830 /* reset single shift state */
831 psenc->singlegr = psenc->singlegl = -1;
832 return wchar;
833 }
834
835
836
837 static int
838 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei,
839 wchar_t * __restrict pwc,
840 const char ** __restrict s,
841 size_t n, _ISO2022State * __restrict psenc,
842 size_t * __restrict nresult)
843 {
844 wchar_t wchar;
845 const char *s0, *p, *result;
846 int c;
847 int chlenbak;
848
849 _DIAGASSERT(nresult != 0);
850 _DIAGASSERT(ei != NULL);
851 _DIAGASSERT(psenc != NULL);
852 _DIAGASSERT(s != NULL);
853
854 s0 = *s;
855 c = 0;
856 chlenbak = psenc->chlen;
857
858 /*
859 * if we have something in buffer, use that.
860 * otherwise, skip here
861 */
862 if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) {
863 /* illgeal state */
864 _citrus_ISO2022_init_state(ei, psenc);
865 goto encoding_error;
866 }
867 if (psenc->chlen == 0)
868 goto emptybuf;
869
870 /* buffer is not empty */
871 p = psenc->ch;
872 while (psenc->chlen < sizeof(psenc->ch) && n >= 0) {
873 if (n > 0) {
874 psenc->ch[psenc->chlen++] = *s0++;
875 n--;
876 }
877
878 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch),
879 &result, psenc);
880 if (wchar != _ISO2022INVALID) {
881 c += result - p;
882 if (psenc->chlen > c)
883 memmove(psenc->ch, result, psenc->chlen - c);
884 if (psenc->chlen < c)
885 psenc->chlen = 0;
886 else
887 psenc->chlen -= c;
888 goto output;
889 }
890
891 c += result - p;
892 p = result;
893
894 if (n == 0)
895 goto restart;
896 }
897
898 /* escape sequence too long? */
899 goto encoding_error;
900
901 emptybuf:
902 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc);
903 if (wchar != _ISO2022INVALID) {
904 c += result - s0;
905 psenc->chlen = 0;
906 s0 = result;
907 goto output;
908 }
909 if (result > s0 && n > result - s0) {
910 c += (result - s0);
911 n -= (result - s0);
912 s0 = result;
913 goto emptybuf;
914 }
915 n += c;
916 if (n < sizeof(psenc->ch)) {
917 memcpy(psenc->ch, s0 - c, n);
918 psenc->chlen = n;
919 s0 = result;
920 goto restart;
921 }
922
923 /* escape sequence too long? */
924
925 encoding_error:
926 psenc->chlen = 0;
927 *nresult = (size_t)-1;
928 return (EILSEQ);
929
930 output:
931 *s = s0;
932 if (pwc)
933 *pwc = wchar;
934
935 if (!wchar)
936 *nresult = 0;
937 else
938 *nresult = c - chlenbak;
939
940 return (0);
941
942 restart:
943 *s = s0;
944 *nresult = (size_t)-2;
945
946 return (0);
947 }
948
949 static int
950 recommendation(_ISO2022EncodingInfo * __restrict ei,
951 _ISO2022Charset * __restrict cs)
952 {
953 int i, j;
954 _ISO2022Charset *recommend;
955
956 _DIAGASSERT(ei != NULL);
957 _DIAGASSERT(cs != NULL);
958
959 /* first, try a exact match. */
960 for (i = 0; i < 4; i++) {
961 recommend = ei->recommend[i];
962 for (j = 0; j < ei->recommendsize[i]; j++) {
963 if (cs->type != recommend[j].type)
964 continue;
965 if (cs->final != recommend[j].final)
966 continue;
967 if (cs->interm != recommend[j].interm)
968 continue;
969
970 return i;
971 }
972 }
973
974 /* then, try a wildcard match over final char. */
975 for (i = 0; i < 4; i++) {
976 recommend = ei->recommend[i];
977 for (j = 0; j < ei->recommendsize[i]; j++) {
978 if (cs->type != recommend[j].type)
979 continue;
980 if (cs->final && (cs->final != recommend[j].final))
981 continue;
982 if (cs->interm && (cs->interm != recommend[j].interm))
983 continue;
984
985 return i;
986 }
987 }
988
989 /* there's no recommendation. make a guess. */
990 if (ei->maxcharset == 0) {
991 return 0;
992 } else {
993 switch (cs->type) {
994 case CS94:
995 case CS94MULTI:
996 return 0;
997 case CS96:
998 case CS96MULTI:
999 return 1;
1000 }
1001 }
1002 return 0;
1003 }
1004
1005 static int
1006 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc,
1007 char * __restrict string, size_t n,
1008 char ** __restrict result,
1009 _ISO2022State * __restrict psenc)
1010 {
1011 int i = 0, len;
1012 _ISO2022Charset cs;
1013 char *p;
1014 char tmp[MB_LEN_MAX];
1015 int target;
1016 u_char mask;
1017 int bit8;
1018
1019 _DIAGASSERT(ei != NULL);
1020 _DIAGASSERT(string != NULL);
1021 /* result may be NULL */
1022 /* state appears to be unused */
1023
1024 if (iscntl(wc & 0xff)) {
1025 /* go back to ASCII on control chars */
1026 cs.type = CS94;
1027 cs.final = 'B';
1028 cs.interm = '\0';
1029 } else if (!(wc & ~0xff)) {
1030 if (wc & 0x80) {
1031 /* special treatment for ISO-8859-1 */
1032 cs.type = CS96;
1033 cs.final = 'A';
1034 cs.interm = '\0';
1035 } else {
1036 /* special treatment for ASCII */
1037 cs.type = CS94;
1038 cs.final = 'B';
1039 cs.interm = '\0';
1040 }
1041 } else {
1042 cs.final = (wc >> 24) & 0x7f;
1043 if ((wc >> 16) & 0x80)
1044 cs.interm = (wc >> 16) & 0x7f;
1045 else
1046 cs.interm = '\0';
1047 if (wc & 0x80)
1048 cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96;
1049 else
1050 cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94;
1051 }
1052 target = recommendation(ei, &cs);
1053 p = tmp;
1054 bit8 = ei->flags & F_8BIT;
1055
1056 /* designate the charset onto the target plane(G0/1/2/3). */
1057 if (psenc->g[target].type == cs.type
1058 && psenc->g[target].final == cs.final
1059 && psenc->g[target].interm == cs.interm)
1060 goto planeok;
1061
1062 *p++ = '\033';
1063 if (cs.type == CS94MULTI || cs.type == CS96MULTI)
1064 *p++ = '$';
1065 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final)
1066 && !cs.interm && !(ei->flags & F_NOOLD))
1067 ;
1068 else if (cs.type == CS94 || cs.type == CS94MULTI)
1069 *p++ = "()*+"[target];
1070 else
1071 *p++ = ",-./"[target];
1072 if (cs.interm)
1073 *p++ = cs.interm;
1074 *p++ = cs.final;
1075
1076 psenc->g[target].type = cs.type;
1077 psenc->g[target].final = cs.final;
1078 psenc->g[target].interm = cs.interm;
1079
1080 planeok:
1081 /* invoke the plane onto GL or GR. */
1082 if (psenc->gl == target)
1083 goto sideok;
1084 if (bit8 && psenc->gr == target)
1085 goto sideok;
1086
1087 if (target == 0 && (ei->flags & F_LS0)) {
1088 *p++ = '\017';
1089 psenc->gl = 0;
1090 } else if (target == 1 && (ei->flags & F_LS1)) {
1091 *p++ = '\016';
1092 psenc->gl = 1;
1093 } else if (target == 2 && (ei->flags & F_LS2)) {
1094 *p++ = '\033';
1095 *p++ = 'n';
1096 psenc->gl = 2;
1097 } else if (target == 3 && (ei->flags & F_LS3)) {
1098 *p++ = '\033';
1099 *p++ = 'o';
1100 psenc->gl = 3;
1101 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) {
1102 *p++ = '\033';
1103 *p++ = '~';
1104 psenc->gr = 1;
1105 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) {
1106 *p++ = '\033';
1107 /*{*/
1108 *p++ = '}';
1109 psenc->gr = 2;
1110 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) {
1111 *p++ = '\033';
1112 *p++ = '|';
1113 psenc->gr = 3;
1114 } else if (target == 2 && (ei->flags & F_SS2)) {
1115 *p++ = '\033';
1116 *p++ = 'N';
1117 psenc->singlegl = 2;
1118 } else if (target == 3 && (ei->flags & F_SS3)) {
1119 *p++ = '\033';
1120 *p++ = 'O';
1121 psenc->singlegl = 3;
1122 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) {
1123 *p++ = '\216';
1124 *p++ = 'N';
1125 psenc->singlegl = psenc->singlegr = 2;
1126 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) {
1127 *p++ = '\217';
1128 *p++ = 'O';
1129 psenc->singlegl = psenc->singlegr = 3;
1130 } else
1131 abort();
1132
1133 sideok:
1134 if (psenc->singlegl == target)
1135 mask = 0x00;
1136 else if (psenc->singlegr == target)
1137 mask = 0x80;
1138 else if (psenc->gl == target)
1139 mask = 0x00;
1140 else if ((ei->flags & F_8BIT) && psenc->gr == target)
1141 mask = 0x80;
1142 else
1143 abort();
1144
1145 switch (cs.type) {
1146 case CS94:
1147 case CS96:
1148 i = 1;
1149 break;
1150 case CS94MULTI:
1151 case CS96MULTI:
1152 i = isthree(cs.final) ? 3 : 2;
1153 break;
1154 }
1155 while (i-- > 0)
1156 *p++ = ((wc >> (i << 3)) & 0x7f) | mask;
1157
1158 /* reset single shift state */
1159 psenc->singlegl = psenc->singlegr = -1;
1160
1161 len = p - tmp;
1162 if (n < len) {
1163 if (result)
1164 *result = (char *)0;
1165 } else {
1166 if (result)
1167 *result = string + len;
1168 memcpy(string, tmp, len);
1169 }
1170 return len;
1171 }
1172
1173 static int
1174 _citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei,
1175 char * __restrict s, size_t n,
1176 _ISO2022State * __restrict psenc,
1177 size_t * __restrict nresult)
1178 {
1179 char buf[MB_LEN_MAX];
1180 char *result;
1181 int len, ret;
1182
1183 _DIAGASSERT(ei != NULL);
1184 _DIAGASSERT(nresult != 0);
1185 _DIAGASSERT(s != NULL);
1186
1187 /* XXX state will be modified after this operation... */
1188 len = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc);
1189 if (len==0) {
1190 ret = EINVAL;
1191 goto err;
1192 }
1193 if (sizeof(buf) < len || n < len-1) {
1194 /* XXX should recover state? */
1195 ret = E2BIG;
1196 goto err;
1197 }
1198
1199 memcpy(s, buf, len-1);
1200 *nresult = (size_t)(len-1);
1201 return (0);
1202
1203 err:
1204 /* bound check failure */
1205 *nresult = (size_t)-1;
1206 return ret;
1207 }
1208
1209 static int
1210 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei,
1211 char * __restrict s, size_t n, wchar_t wc,
1212 _ISO2022State * __restrict psenc,
1213 size_t * __restrict nresult)
1214 {
1215 char buf[MB_LEN_MAX];
1216 char *result;
1217 int len, ret;
1218
1219 _DIAGASSERT(ei != NULL);
1220 _DIAGASSERT(nresult != 0);
1221 _DIAGASSERT(s != NULL);
1222
1223 /* XXX state will be modified after this operation... */
1224 len = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc);
1225 if (sizeof(buf) < len || n < len) {
1226 /* XXX should recover state? */
1227 ret = E2BIG;
1228 goto err;
1229 }
1230
1231 memcpy(s, buf, len);
1232 *nresult = (size_t)len;
1233 return (0);
1234
1235 err:
1236 /* bound check failure */
1237 *nresult = (size_t)-1;
1238 return ret;
1239 }
1240
1241 static __inline int
1242 /*ARGSUSED*/
1243 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei,
1244 _csid_t * __restrict csid,
1245 _index_t * __restrict idx, wchar_t wc)
1246 {
1247 wchar_t m, nm;
1248
1249 _DIAGASSERT(csid != NULL && idx != NULL);
1250
1251 m = wc & 0x7FFF8080;
1252 nm = wc & 0x007F7F7F;
1253 if (m & 0x00800000) {
1254 nm &= 0x00007F7F;
1255 } else {
1256 m &= 0x7F008080;
1257 }
1258 if (nm & 0x007F0000) {
1259 /* ^3 mark */
1260 m |= 0x007F0000;
1261 } else if (nm & 0x00007F00) {
1262 /* ^2 mark */
1263 m |= 0x00007F00;
1264 }
1265 *csid = (_csid_t)m;
1266 *idx = (_index_t)nm;
1267
1268 return (0);
1269 }
1270
1271 static __inline int
1272 /*ARGSUSED*/
1273 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei,
1274 wchar_t * __restrict wc,
1275 _csid_t csid, _index_t idx)
1276 {
1277
1278 _DIAGASSERT(ei != NULL && wc != NULL);
1279
1280 *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx;
1281
1282 return (0);
1283 }
1284
1285 /* ----------------------------------------------------------------------
1286 * public interface for ctype
1287 */
1288
1289 _CITRUS_CTYPE_DECLS(ISO2022);
1290 _CITRUS_CTYPE_DEF_OPS(ISO2022);
1291
1292 #include "citrus_ctype_template.h"
1293
1294 /* ----------------------------------------------------------------------
1295 * public interface for stdenc
1296 */
1297
1298 _CITRUS_STDENC_DECLS(ISO2022);
1299 _CITRUS_STDENC_DEF_OPS(ISO2022);
1300
1301 #include "citrus_stdenc_template.h"
1302