citrus_iso2022.c revision 1.8 1 1.8 tshiozak /* $NetBSD: citrus_iso2022.c,v 1.8 2003/06/26 12:09:58 tshiozak Exp $ */
2 1.1 tshiozak
3 1.1 tshiozak /*-
4 1.1 tshiozak * Copyright (c)1999, 2002 Citrus Project,
5 1.1 tshiozak * All rights reserved.
6 1.1 tshiozak *
7 1.1 tshiozak * Redistribution and use in source and binary forms, with or without
8 1.1 tshiozak * modification, are permitted provided that the following conditions
9 1.1 tshiozak * are met:
10 1.1 tshiozak * 1. Redistributions of source code must retain the above copyright
11 1.1 tshiozak * notice, this list of conditions and the following disclaimer.
12 1.1 tshiozak * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 tshiozak * notice, this list of conditions and the following disclaimer in the
14 1.1 tshiozak * documentation and/or other materials provided with the distribution.
15 1.1 tshiozak *
16 1.1 tshiozak * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 1.1 tshiozak * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 1.1 tshiozak * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 1.1 tshiozak * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 1.1 tshiozak * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 1.1 tshiozak * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 1.1 tshiozak * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 1.1 tshiozak * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 1.1 tshiozak * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 1.1 tshiozak * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 1.1 tshiozak * SUCH DAMAGE.
27 1.1 tshiozak *
28 1.1 tshiozak * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
29 1.1 tshiozak */
30 1.1 tshiozak
31 1.1 tshiozak #include <sys/cdefs.h>
32 1.1 tshiozak #if defined(LIBC_SCCS) && !defined(lint)
33 1.8 tshiozak __RCSID("$NetBSD: citrus_iso2022.c,v 1.8 2003/06/26 12:09:58 tshiozak Exp $");
34 1.1 tshiozak #endif /* LIBC_SCCS and not lint */
35 1.1 tshiozak
36 1.1 tshiozak #include <assert.h>
37 1.1 tshiozak #include <errno.h>
38 1.1 tshiozak #include <string.h>
39 1.1 tshiozak #include <stdio.h>
40 1.1 tshiozak #include <stdlib.h>
41 1.1 tshiozak #include <stddef.h>
42 1.1 tshiozak #include <locale.h>
43 1.1 tshiozak #include <wchar.h>
44 1.1 tshiozak #include <sys/types.h>
45 1.1 tshiozak #include <limits.h>
46 1.7 tshiozak
47 1.7 tshiozak #include "citrus_namespace.h"
48 1.7 tshiozak #include "citrus_types.h"
49 1.1 tshiozak #include "citrus_module.h"
50 1.1 tshiozak #include "citrus_ctype.h"
51 1.7 tshiozak #include "citrus_stdenc.h"
52 1.1 tshiozak #include "citrus_iso2022.h"
53 1.1 tshiozak
54 1.1 tshiozak
55 1.1 tshiozak /* ----------------------------------------------------------------------
56 1.1 tshiozak * private stuffs used by templates
57 1.1 tshiozak */
58 1.1 tshiozak
59 1.1 tshiozak
60 1.1 tshiozak /*
61 1.1 tshiozak * wchar_t mappings:
62 1.1 tshiozak * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx
63 1.1 tshiozak * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx
64 1.1 tshiozak * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx
65 1.1 tshiozak * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx
66 1.1 tshiozak * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx
67 1.1 tshiozak * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx
68 1.1 tshiozak * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx
69 1.1 tshiozak * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx
70 1.1 tshiozak * 94x94 charset (ESC & V ESC $ ( F)
71 1.1 tshiozak * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
72 1.1 tshiozak * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
73 1.1 tshiozak * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
74 1.1 tshiozak */
75 1.1 tshiozak
76 1.1 tshiozak typedef struct {
77 1.1 tshiozak u_char type;
78 1.1 tshiozak #define CS94 (0U)
79 1.1 tshiozak #define CS96 (1U)
80 1.1 tshiozak #define CS94MULTI (2U)
81 1.1 tshiozak #define CS96MULTI (3U)
82 1.1 tshiozak
83 1.1 tshiozak u_char final;
84 1.1 tshiozak u_char interm;
85 1.1 tshiozak u_char vers;
86 1.1 tshiozak } _ISO2022Charset;
87 1.1 tshiozak
88 1.1 tshiozak typedef struct {
89 1.1 tshiozak _ISO2022Charset g[4];
90 1.1 tshiozak /* need 3 bits to hold -1, 0, ..., 3 */
91 1.1 tshiozak int gl:3,
92 1.1 tshiozak gr:3,
93 1.1 tshiozak singlegl:3,
94 1.1 tshiozak singlegr:3;
95 1.1 tshiozak char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */
96 1.1 tshiozak int chlen;
97 1.5 yamt int flags;
98 1.5 yamt #define _ISO2022STATE_FLAG_INITIALIZED 1
99 1.4 yamt } _ISO2022State;
100 1.1 tshiozak
101 1.1 tshiozak typedef struct {
102 1.1 tshiozak _ISO2022Charset *recommend[4];
103 1.1 tshiozak size_t recommendsize[4];
104 1.1 tshiozak _ISO2022Charset initg[4];
105 1.1 tshiozak int maxcharset;
106 1.1 tshiozak int flags;
107 1.1 tshiozak #define F_8BIT 0x0001
108 1.1 tshiozak #define F_NOOLD 0x0002
109 1.1 tshiozak #define F_SI 0x0010 /*0F*/
110 1.1 tshiozak #define F_SO 0x0020 /*0E*/
111 1.1 tshiozak #define F_LS0 0x0010 /*0F*/
112 1.1 tshiozak #define F_LS1 0x0020 /*0E*/
113 1.1 tshiozak #define F_LS2 0x0040 /*ESC n*/
114 1.1 tshiozak #define F_LS3 0x0080 /*ESC o*/
115 1.1 tshiozak #define F_LS1R 0x0100 /*ESC ~*/
116 1.1 tshiozak #define F_LS2R 0x0200 /*ESC }*/
117 1.1 tshiozak #define F_LS3R 0x0400 /*ESC |*/
118 1.1 tshiozak #define F_SS2 0x0800 /*ESC N*/
119 1.1 tshiozak #define F_SS3 0x1000 /*ESC O*/
120 1.1 tshiozak #define F_SS2R 0x2000 /*8E*/
121 1.1 tshiozak #define F_SS3R 0x4000 /*8F*/
122 1.1 tshiozak } _ISO2022EncodingInfo;
123 1.1 tshiozak typedef struct {
124 1.1 tshiozak _ISO2022EncodingInfo ei;
125 1.1 tshiozak struct {
126 1.1 tshiozak /* for future multi-locale facility */
127 1.1 tshiozak _ISO2022State s_mblen;
128 1.1 tshiozak _ISO2022State s_mbrlen;
129 1.1 tshiozak _ISO2022State s_mbrtowc;
130 1.1 tshiozak _ISO2022State s_mbtowc;
131 1.1 tshiozak _ISO2022State s_mbsrtowcs;
132 1.1 tshiozak _ISO2022State s_wcrtomb;
133 1.1 tshiozak _ISO2022State s_wcsrtombs;
134 1.1 tshiozak _ISO2022State s_wctomb;
135 1.1 tshiozak } states;
136 1.1 tshiozak } _ISO2022CTypeInfo;
137 1.1 tshiozak
138 1.1 tshiozak #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
139 1.1 tshiozak #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
140 1.1 tshiozak
141 1.1 tshiozak #define _FUNCNAME(m) _citrus_ISO2022_##m
142 1.1 tshiozak #define _ENCODING_INFO _ISO2022EncodingInfo
143 1.1 tshiozak #define _CTYPE_INFO _ISO2022CTypeInfo
144 1.1 tshiozak #define _ENCODING_STATE _ISO2022State
145 1.2 yamt #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
146 1.1 tshiozak #define _ENCODING_IS_STATE_DEPENDENT 1
147 1.5 yamt #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \
148 1.5 yamt (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
149 1.1 tshiozak
150 1.1 tshiozak
151 1.1 tshiozak #define _ISO2022INVALID (wchar_t)-1
152 1.1 tshiozak
153 1.1 tshiozak static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); }
154 1.1 tshiozak static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); }
155 1.1 tshiozak static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); }
156 1.1 tshiozak static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); }
157 1.1 tshiozak static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); }
158 1.1 tshiozak static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); }
159 1.1 tshiozak static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); }
160 1.1 tshiozak static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); }
161 1.1 tshiozak
162 1.1 tshiozak static __inline int
163 1.1 tshiozak getcs(const char * __restrict p, _ISO2022Charset * __restrict cs)
164 1.1 tshiozak {
165 1.1 tshiozak
166 1.1 tshiozak _DIAGASSERT(p != NULL);
167 1.1 tshiozak _DIAGASSERT(cs != NULL);
168 1.1 tshiozak
169 1.1 tshiozak if (!strncmp(p, "94$", 3) && p[3] && !p[4]) {
170 1.1 tshiozak cs->final = (u_char)(p[3] & 0xff);
171 1.1 tshiozak cs->interm = '\0';
172 1.1 tshiozak cs->vers = '\0';
173 1.1 tshiozak cs->type = CS94MULTI;
174 1.1 tshiozak } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) {
175 1.1 tshiozak cs->final = (u_char)(p[3] & 0xff);
176 1.1 tshiozak cs->interm = '\0';
177 1.1 tshiozak cs->vers = '\0';
178 1.1 tshiozak cs->type = CS96MULTI;
179 1.1 tshiozak } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) {
180 1.1 tshiozak cs->final = (u_char)(p[2] & 0xff);
181 1.1 tshiozak cs->interm = '\0';
182 1.1 tshiozak cs->vers = '\0';
183 1.1 tshiozak cs->type = CS94;
184 1.1 tshiozak } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) {
185 1.1 tshiozak cs->final = (u_char )(p[2] & 0xff);
186 1.1 tshiozak cs->interm = '\0';
187 1.1 tshiozak cs->vers = '\0';
188 1.1 tshiozak cs->type = CS96;
189 1.1 tshiozak } else {
190 1.1 tshiozak return 1;
191 1.1 tshiozak }
192 1.1 tshiozak
193 1.1 tshiozak return 0;
194 1.1 tshiozak }
195 1.1 tshiozak
196 1.1 tshiozak
197 1.1 tshiozak #define _NOTMATCH 0
198 1.1 tshiozak #define _MATCH 1
199 1.1 tshiozak #define _PARSEFAIL 2
200 1.1 tshiozak
201 1.1 tshiozak static __inline int
202 1.1 tshiozak get_recommend(_ISO2022EncodingInfo * __restrict ei,
203 1.1 tshiozak const char * __restrict token)
204 1.1 tshiozak {
205 1.1 tshiozak int i;
206 1.1 tshiozak _ISO2022Charset cs;
207 1.1 tshiozak
208 1.1 tshiozak if (!strchr("0123", token[0]) || token[1] != '=')
209 1.1 tshiozak return (_NOTMATCH);
210 1.1 tshiozak
211 1.1 tshiozak if (getcs(&token[2], &cs) == 0)
212 1.1 tshiozak ;
213 1.1 tshiozak else if (!strcmp(&token[2], "94")) {
214 1.1 tshiozak cs.final = (u_char)(token[4]);
215 1.1 tshiozak cs.interm = '\0';
216 1.1 tshiozak cs.vers = '\0';
217 1.1 tshiozak cs.type = CS94;
218 1.1 tshiozak } else if (!strcmp(&token[2], "96")) {
219 1.1 tshiozak cs.final = (u_char)(token[4]);
220 1.1 tshiozak cs.interm = '\0';
221 1.1 tshiozak cs.vers = '\0';
222 1.1 tshiozak cs.type = CS96;
223 1.1 tshiozak } else if (!strcmp(&token[2], "94$")) {
224 1.1 tshiozak cs.final = (u_char)(token[5]);
225 1.1 tshiozak cs.interm = '\0';
226 1.1 tshiozak cs.vers = '\0';
227 1.1 tshiozak cs.type = CS94MULTI;
228 1.1 tshiozak } else if (!strcmp(&token[2], "96$")) {
229 1.1 tshiozak cs.final = (u_char)(token[5]);
230 1.1 tshiozak cs.interm = '\0';
231 1.1 tshiozak cs.vers = '\0';
232 1.1 tshiozak cs.type = CS96MULTI;
233 1.1 tshiozak } else {
234 1.1 tshiozak return (_PARSEFAIL);
235 1.1 tshiozak }
236 1.1 tshiozak
237 1.1 tshiozak i = token[0] - '0';
238 1.1 tshiozak ei->recommendsize[i] += 1;
239 1.1 tshiozak if (!ei->recommend[i]) {
240 1.1 tshiozak ei->recommend[i] = malloc(sizeof(_ISO2022Charset));
241 1.1 tshiozak } else {
242 1.1 tshiozak ei->recommend[i] =
243 1.1 tshiozak realloc(ei->recommend[i],
244 1.1 tshiozak sizeof(_ISO2022Charset)* (ei->recommendsize[i]));
245 1.1 tshiozak }
246 1.1 tshiozak if (!ei->recommend[i])
247 1.1 tshiozak return (_PARSEFAIL);
248 1.1 tshiozak
249 1.1 tshiozak (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final;
250 1.1 tshiozak (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm;
251 1.1 tshiozak (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers;
252 1.1 tshiozak (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type;
253 1.1 tshiozak
254 1.1 tshiozak return (_MATCH);
255 1.1 tshiozak }
256 1.1 tshiozak
257 1.1 tshiozak static __inline int
258 1.1 tshiozak get_initg(_ISO2022EncodingInfo * __restrict ei,
259 1.1 tshiozak const char * __restrict token)
260 1.1 tshiozak {
261 1.1 tshiozak _ISO2022Charset cs;
262 1.1 tshiozak
263 1.1 tshiozak if (strncmp("INIT", &token[0], 4) ||
264 1.1 tshiozak !strchr("0123", token[4]) ||
265 1.1 tshiozak token[5] != '=')
266 1.1 tshiozak return (_NOTMATCH);
267 1.1 tshiozak
268 1.1 tshiozak if (getcs(&token[6], &cs) != 0)
269 1.1 tshiozak return (_PARSEFAIL);
270 1.1 tshiozak
271 1.1 tshiozak ei->initg[token[4] - '0'].type = cs.type;
272 1.1 tshiozak ei->initg[token[4] - '0'].final = cs.final;
273 1.1 tshiozak ei->initg[token[4] - '0'].interm = cs.interm;
274 1.1 tshiozak ei->initg[token[4] - '0'].vers = cs.vers;
275 1.1 tshiozak
276 1.1 tshiozak return (_MATCH);
277 1.1 tshiozak }
278 1.1 tshiozak
279 1.1 tshiozak static __inline int
280 1.1 tshiozak get_max(_ISO2022EncodingInfo * __restrict ei,
281 1.1 tshiozak const char * __restrict token)
282 1.1 tshiozak {
283 1.1 tshiozak if (!strcmp(token, "MAX1")) {
284 1.1 tshiozak ei->maxcharset = 1;
285 1.1 tshiozak } else if (!strcmp(token, "MAX2")) {
286 1.1 tshiozak ei->maxcharset = 2;
287 1.1 tshiozak } else if (!strcmp(token, "MAX3")) {
288 1.1 tshiozak ei->maxcharset = 3;
289 1.1 tshiozak } else
290 1.1 tshiozak return (_NOTMATCH);
291 1.1 tshiozak
292 1.1 tshiozak return (_MATCH);
293 1.1 tshiozak }
294 1.1 tshiozak
295 1.1 tshiozak
296 1.1 tshiozak static __inline int
297 1.1 tshiozak get_flags(_ISO2022EncodingInfo * __restrict ei,
298 1.1 tshiozak const char * __restrict token)
299 1.1 tshiozak {
300 1.1 tshiozak int i;
301 1.1 tshiozak static struct {
302 1.1 tshiozak const char *tag;
303 1.1 tshiozak int flag;
304 1.1 tshiozak } const tags[] = {
305 1.1 tshiozak { "DUMMY", 0 },
306 1.1 tshiozak { "8BIT", F_8BIT },
307 1.1 tshiozak { "NOOLD", F_NOOLD },
308 1.1 tshiozak { "SI", F_SI },
309 1.1 tshiozak { "SO", F_SO },
310 1.1 tshiozak { "LS0", F_LS0 },
311 1.1 tshiozak { "LS1", F_LS1 },
312 1.1 tshiozak { "LS2", F_LS2 },
313 1.1 tshiozak { "LS3", F_LS3 },
314 1.1 tshiozak { "LS1R", F_LS1R },
315 1.1 tshiozak { "LS2R", F_LS2R },
316 1.1 tshiozak { "LS3R", F_LS3R },
317 1.1 tshiozak { "SS2", F_SS2 },
318 1.1 tshiozak { "SS3", F_SS3 },
319 1.1 tshiozak { "SS2R", F_SS2R },
320 1.1 tshiozak { "SS3R", F_SS3R },
321 1.1 tshiozak { NULL, 0 }
322 1.1 tshiozak };
323 1.1 tshiozak
324 1.1 tshiozak for (i = 0; tags[i].tag; i++) {
325 1.1 tshiozak if (!strcmp(token, tags[i].tag)) {
326 1.1 tshiozak ei->flags |= tags[i].flag;
327 1.1 tshiozak return (_MATCH);
328 1.1 tshiozak }
329 1.1 tshiozak }
330 1.1 tshiozak
331 1.1 tshiozak return (_NOTMATCH);
332 1.1 tshiozak }
333 1.1 tshiozak
334 1.1 tshiozak
335 1.1 tshiozak static __inline int
336 1.1 tshiozak _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei,
337 1.1 tshiozak const void * __restrict var, size_t lenvar)
338 1.1 tshiozak {
339 1.1 tshiozak char const *v, *e;
340 1.1 tshiozak char buf[20];
341 1.1 tshiozak int i, len, ret;
342 1.1 tshiozak
343 1.1 tshiozak _DIAGASSERT(ei != NULL);
344 1.1 tshiozak
345 1.1 tshiozak
346 1.1 tshiozak /*
347 1.1 tshiozak * parse VARIABLE section.
348 1.1 tshiozak */
349 1.1 tshiozak
350 1.1 tshiozak if (!var)
351 1.1 tshiozak return (EFTYPE);
352 1.1 tshiozak
353 1.1 tshiozak v = (const char *) var;
354 1.1 tshiozak
355 1.1 tshiozak /* initialize structure */
356 1.1 tshiozak ei->maxcharset = 0;
357 1.1 tshiozak for (i = 0; i < 4; i++) {
358 1.1 tshiozak ei->recommend[i] = NULL;
359 1.1 tshiozak ei->recommendsize[i] = 0;
360 1.1 tshiozak }
361 1.1 tshiozak ei->flags = 0;
362 1.1 tshiozak
363 1.1 tshiozak while (*v) {
364 1.1 tshiozak while (*v == ' ' || *v == '\t')
365 1.1 tshiozak ++v;
366 1.1 tshiozak
367 1.1 tshiozak /* find the token */
368 1.1 tshiozak e = v;
369 1.1 tshiozak while (*e && *e != ' ' && *e != '\t')
370 1.1 tshiozak ++e;
371 1.7 tshiozak
372 1.7 tshiozak len = e-v;
373 1.7 tshiozak if (len == 0)
374 1.7 tshiozak break;
375 1.7 tshiozak if (len>=sizeof(buf))
376 1.7 tshiozak goto parsefail;
377 1.7 tshiozak sprintf(buf, "%.*s", len, v);
378 1.1 tshiozak
379 1.1 tshiozak if ((ret = get_recommend(ei, buf)) != _NOTMATCH)
380 1.1 tshiozak ;
381 1.1 tshiozak else if ((ret = get_initg(ei, buf)) != _NOTMATCH)
382 1.1 tshiozak ;
383 1.1 tshiozak else if ((ret = get_max(ei, buf)) != _NOTMATCH)
384 1.1 tshiozak ;
385 1.1 tshiozak else if ((ret = get_flags(ei, buf)) != _NOTMATCH)
386 1.1 tshiozak ;
387 1.1 tshiozak else
388 1.1 tshiozak ret = _PARSEFAIL;
389 1.1 tshiozak if (ret==_PARSEFAIL)
390 1.1 tshiozak goto parsefail;
391 1.1 tshiozak v = e;
392 1.1 tshiozak
393 1.1 tshiozak }
394 1.1 tshiozak
395 1.1 tshiozak return (0);
396 1.1 tshiozak
397 1.1 tshiozak parsefail:
398 1.1 tshiozak free(ei->recommend[0]);
399 1.1 tshiozak free(ei->recommend[1]);
400 1.1 tshiozak free(ei->recommend[2]);
401 1.1 tshiozak free(ei->recommend[3]);
402 1.1 tshiozak
403 1.1 tshiozak return (EFTYPE);
404 1.1 tshiozak }
405 1.1 tshiozak
406 1.1 tshiozak static __inline void
407 1.1 tshiozak /*ARGSUSED*/
408 1.1 tshiozak _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei,
409 1.1 tshiozak _ISO2022State * __restrict s)
410 1.1 tshiozak {
411 1.1 tshiozak int i;
412 1.1 tshiozak
413 1.1 tshiozak memset(s, 0, sizeof(*s));
414 1.1 tshiozak s->gl = 0;
415 1.1 tshiozak s->gr = (ei->flags & F_8BIT) ? 1 : -1;
416 1.1 tshiozak
417 1.1 tshiozak for (i = 0; i < 4; i++) {
418 1.1 tshiozak if (ei->initg[i].final) {
419 1.1 tshiozak s->g[i].type = ei->initg[i].type;
420 1.1 tshiozak s->g[i].final = ei->initg[i].final;
421 1.1 tshiozak s->g[i].interm = ei->initg[i].interm;
422 1.1 tshiozak }
423 1.1 tshiozak }
424 1.1 tshiozak s->singlegl = s->singlegr = -1;
425 1.5 yamt s->flags |= _ISO2022STATE_FLAG_INITIALIZED;
426 1.1 tshiozak }
427 1.1 tshiozak
428 1.1 tshiozak static __inline void
429 1.1 tshiozak /*ARGSUSED*/
430 1.1 tshiozak _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei,
431 1.1 tshiozak void * __restrict pspriv,
432 1.1 tshiozak const _ISO2022State * __restrict s)
433 1.1 tshiozak {
434 1.1 tshiozak memcpy(pspriv, (const void *)s, sizeof(*s));
435 1.1 tshiozak }
436 1.1 tshiozak
437 1.1 tshiozak static __inline void
438 1.1 tshiozak /*ARGSUSED*/
439 1.1 tshiozak _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei,
440 1.1 tshiozak _ISO2022State * __restrict s,
441 1.1 tshiozak const void * __restrict pspriv)
442 1.1 tshiozak {
443 1.1 tshiozak memcpy((void *)s, pspriv, sizeof(*s));
444 1.1 tshiozak }
445 1.1 tshiozak
446 1.1 tshiozak static int
447 1.1 tshiozak /*ARGSUSED*/
448 1.7 tshiozak _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei,
449 1.7 tshiozak const void * __restrict var,
450 1.7 tshiozak size_t lenvar)
451 1.1 tshiozak {
452 1.1 tshiozak
453 1.1 tshiozak _DIAGASSERT(ei != NULL);
454 1.1 tshiozak
455 1.1 tshiozak return _citrus_ISO2022_parse_variable(ei, var, lenvar);
456 1.1 tshiozak }
457 1.1 tshiozak
458 1.1 tshiozak static void
459 1.1 tshiozak /*ARGSUSED*/
460 1.7 tshiozak _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei)
461 1.1 tshiozak {
462 1.1 tshiozak }
463 1.1 tshiozak
464 1.1 tshiozak #define ESC '\033'
465 1.1 tshiozak #define ECMA -1
466 1.1 tshiozak #define INTERM -2
467 1.1 tshiozak #define OECMA -3
468 1.1 tshiozak static struct seqtable {
469 1.1 tshiozak int type;
470 1.1 tshiozak int csoff;
471 1.1 tshiozak int finaloff;
472 1.1 tshiozak int intermoff;
473 1.1 tshiozak int versoff;
474 1.1 tshiozak int len;
475 1.1 tshiozak int chars[10];
476 1.1 tshiozak } seqtable[] = {
477 1.1 tshiozak /* G0 94MULTI special */
478 1.1 tshiozak { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, },
479 1.1 tshiozak /* G0 94MULTI special with version identification */
480 1.1 tshiozak { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, },
481 1.1 tshiozak /* G? 94 */
482 1.1 tshiozak { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, },
483 1.1 tshiozak /* G? 94 with 2nd intermediate char */
484 1.1 tshiozak { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, },
485 1.1 tshiozak /* G? 96 */
486 1.1 tshiozak { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, },
487 1.1 tshiozak /* G? 96 with 2nd intermediate char */
488 1.1 tshiozak { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, },
489 1.1 tshiozak /* G? 94MULTI */
490 1.1 tshiozak { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, },
491 1.1 tshiozak /* G? 96MULTI */
492 1.1 tshiozak { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, },
493 1.1 tshiozak /* G? 94MULTI with version specification */
494 1.1 tshiozak { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, },
495 1.1 tshiozak /* LS2/3 */
496 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, },
497 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, },
498 1.1 tshiozak /* LS1/2/3R */
499 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, '~', }, },
500 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, },
501 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, '|', }, },
502 1.1 tshiozak /* SS2/3 */
503 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, },
504 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, },
505 1.1 tshiozak /* end of records */
506 1.1 tshiozak { 0, }
507 1.1 tshiozak };
508 1.1 tshiozak
509 1.1 tshiozak static int
510 1.1 tshiozak seqmatch(const char * __restrict s, size_t n,
511 1.1 tshiozak const struct seqtable * __restrict sp)
512 1.1 tshiozak {
513 1.1 tshiozak const int *p;
514 1.1 tshiozak
515 1.1 tshiozak _DIAGASSERT(s != NULL);
516 1.1 tshiozak _DIAGASSERT(sp != NULL);
517 1.1 tshiozak
518 1.1 tshiozak p = sp->chars;
519 1.1 tshiozak while (p - sp->chars < n && p - sp->chars < sp->len) {
520 1.1 tshiozak switch (*p) {
521 1.1 tshiozak case ECMA:
522 1.1 tshiozak if (!isecma(*s))
523 1.1 tshiozak goto terminate;
524 1.1 tshiozak break;
525 1.1 tshiozak case OECMA:
526 1.1 tshiozak if (*s && strchr("@AB", *s))
527 1.1 tshiozak break;
528 1.1 tshiozak else
529 1.1 tshiozak goto terminate;
530 1.1 tshiozak case INTERM:
531 1.1 tshiozak if (!isinterm(*s))
532 1.1 tshiozak goto terminate;
533 1.1 tshiozak break;
534 1.1 tshiozak case CS94:
535 1.1 tshiozak if (*s && strchr("()*+", *s))
536 1.1 tshiozak break;
537 1.1 tshiozak else
538 1.1 tshiozak goto terminate;
539 1.1 tshiozak case CS96:
540 1.1 tshiozak if (*s && strchr(",-./", *s))
541 1.1 tshiozak break;
542 1.1 tshiozak else
543 1.1 tshiozak goto terminate;
544 1.1 tshiozak default:
545 1.1 tshiozak if (*s != *p)
546 1.1 tshiozak goto terminate;
547 1.1 tshiozak break;
548 1.1 tshiozak }
549 1.1 tshiozak
550 1.1 tshiozak p++;
551 1.1 tshiozak s++;
552 1.1 tshiozak }
553 1.1 tshiozak
554 1.1 tshiozak terminate:
555 1.1 tshiozak return p - sp->chars;
556 1.1 tshiozak }
557 1.1 tshiozak
558 1.1 tshiozak static wchar_t
559 1.1 tshiozak _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei,
560 1.1 tshiozak const char * __restrict string, size_t n,
561 1.1 tshiozak const char ** __restrict result,
562 1.1 tshiozak _ISO2022State * __restrict psenc)
563 1.1 tshiozak {
564 1.1 tshiozak wchar_t wchar = 0;
565 1.1 tshiozak int cur;
566 1.1 tshiozak struct seqtable *sp;
567 1.1 tshiozak int nmatch;
568 1.1 tshiozak int i;
569 1.1 tshiozak
570 1.1 tshiozak _DIAGASSERT(ei != NULL);
571 1.1 tshiozak _DIAGASSERT(state != NULL);
572 1.1 tshiozak _DIAGASSERT(string != NULL);
573 1.1 tshiozak /* result may be NULL */
574 1.1 tshiozak
575 1.1 tshiozak while (1) {
576 1.1 tshiozak /* SI/SO */
577 1.1 tshiozak if (1 <= n && string[0] == '\017') {
578 1.1 tshiozak psenc->gl = 0;
579 1.1 tshiozak string++;
580 1.1 tshiozak n--;
581 1.1 tshiozak continue;
582 1.1 tshiozak }
583 1.1 tshiozak if (1 <= n && string[0] == '\016') {
584 1.1 tshiozak psenc->gl = 1;
585 1.1 tshiozak string++;
586 1.1 tshiozak n--;
587 1.1 tshiozak continue;
588 1.1 tshiozak }
589 1.1 tshiozak
590 1.1 tshiozak /* SS2/3R */
591 1.1 tshiozak if (1 <= n && string[0] && strchr("\217\216", string[0])) {
592 1.1 tshiozak psenc->singlegl = psenc->singlegr =
593 1.1 tshiozak (string[0] - '\216') + 2;
594 1.1 tshiozak string++;
595 1.1 tshiozak n--;
596 1.1 tshiozak continue;
597 1.1 tshiozak }
598 1.1 tshiozak
599 1.1 tshiozak /* eat the letter if this is not ESC */
600 1.1 tshiozak if (1 <= n && string[0] != '\033')
601 1.1 tshiozak break;
602 1.1 tshiozak
603 1.1 tshiozak /* look for a perfect match from escape sequences */
604 1.1 tshiozak for (sp = &seqtable[0]; sp->len; sp++) {
605 1.1 tshiozak nmatch = seqmatch(string, n, sp);
606 1.1 tshiozak if (sp->len == nmatch && n >= sp->len)
607 1.1 tshiozak break;
608 1.1 tshiozak }
609 1.1 tshiozak
610 1.1 tshiozak if (!sp->len)
611 1.1 tshiozak goto notseq;
612 1.1 tshiozak
613 1.1 tshiozak if (sp->type != -1) {
614 1.1 tshiozak if (sp->csoff == -1)
615 1.1 tshiozak i = 0;
616 1.1 tshiozak else {
617 1.1 tshiozak switch (sp->type) {
618 1.1 tshiozak case CS94:
619 1.1 tshiozak case CS94MULTI:
620 1.1 tshiozak i = string[sp->csoff] - '(';
621 1.1 tshiozak break;
622 1.1 tshiozak case CS96:
623 1.1 tshiozak case CS96MULTI:
624 1.1 tshiozak i = string[sp->csoff] - ',';
625 1.1 tshiozak break;
626 1.1 tshiozak }
627 1.1 tshiozak }
628 1.1 tshiozak psenc->g[i].type = sp->type;
629 1.1 tshiozak psenc->g[i].final = '\0';
630 1.1 tshiozak psenc->g[i].interm = '\0';
631 1.1 tshiozak psenc->g[i].vers = '\0';
632 1.1 tshiozak /* sp->finaloff must not be -1 */
633 1.1 tshiozak if (sp->finaloff != -1)
634 1.1 tshiozak psenc->g[i].final = string[sp->finaloff];
635 1.1 tshiozak if (sp->intermoff != -1)
636 1.1 tshiozak psenc->g[i].interm = string[sp->intermoff];
637 1.1 tshiozak if (sp->versoff != -1)
638 1.1 tshiozak psenc->g[i].vers = string[sp->versoff];
639 1.1 tshiozak
640 1.1 tshiozak string += sp->len;
641 1.1 tshiozak n -= sp->len;
642 1.1 tshiozak continue;
643 1.1 tshiozak }
644 1.1 tshiozak
645 1.1 tshiozak /* LS2/3 */
646 1.1 tshiozak if (2 <= n && string[0] == '\033'
647 1.1 tshiozak && string[1] && strchr("no", string[1])) {
648 1.1 tshiozak psenc->gl = string[1] - 'n' + 2;
649 1.1 tshiozak string += 2;
650 1.1 tshiozak n -= 2;
651 1.1 tshiozak continue;
652 1.1 tshiozak }
653 1.1 tshiozak
654 1.1 tshiozak /* LS1/2/3R */
655 1.1 tshiozak /* XXX: { for vi showmatch */
656 1.1 tshiozak if (2 <= n && string[0] == '\033'
657 1.1 tshiozak && string[1] && strchr("~}|", string[1])) {
658 1.1 tshiozak psenc->gr = 3 - (string[1] - '|');
659 1.1 tshiozak string += 2;
660 1.1 tshiozak n -= 2;
661 1.1 tshiozak continue;
662 1.1 tshiozak }
663 1.1 tshiozak
664 1.1 tshiozak /* SS2/3 */
665 1.1 tshiozak if (2 <= n && string[0] == '\033'
666 1.1 tshiozak && string[1] && strchr("NO", string[1])) {
667 1.1 tshiozak psenc->singlegl = (string[1] - 'N') + 2;
668 1.1 tshiozak string += 2;
669 1.1 tshiozak n -= 2;
670 1.1 tshiozak continue;
671 1.1 tshiozak }
672 1.1 tshiozak
673 1.1 tshiozak notseq:
674 1.1 tshiozak /*
675 1.1 tshiozak * if we've got an unknown escape sequence, eat the ESC at the
676 1.1 tshiozak * head. otherwise, wait till full escape sequence comes.
677 1.1 tshiozak */
678 1.1 tshiozak for (sp = &seqtable[0]; sp->len; sp++) {
679 1.1 tshiozak nmatch = seqmatch(string, n, sp);
680 1.1 tshiozak if (!nmatch)
681 1.1 tshiozak continue;
682 1.1 tshiozak
683 1.1 tshiozak /*
684 1.1 tshiozak * if we are in the middle of escape sequence,
685 1.1 tshiozak * we still need to wait for more characters to come
686 1.1 tshiozak */
687 1.1 tshiozak if (n < sp->len) {
688 1.1 tshiozak if (nmatch == n) {
689 1.1 tshiozak if (result)
690 1.1 tshiozak *result = string;
691 1.1 tshiozak return (_ISO2022INVALID);
692 1.1 tshiozak }
693 1.1 tshiozak } else {
694 1.1 tshiozak if (nmatch == sp->len) {
695 1.1 tshiozak /* this case should not happen */
696 1.1 tshiozak goto eat;
697 1.1 tshiozak }
698 1.1 tshiozak }
699 1.1 tshiozak }
700 1.1 tshiozak
701 1.1 tshiozak break;
702 1.1 tshiozak }
703 1.1 tshiozak
704 1.1 tshiozak eat:
705 1.1 tshiozak /* no letter to eat */
706 1.1 tshiozak if (n < 1) {
707 1.1 tshiozak if (result)
708 1.1 tshiozak *result = string;
709 1.1 tshiozak return (_ISO2022INVALID);
710 1.1 tshiozak }
711 1.1 tshiozak
712 1.1 tshiozak /* normal chars. always eat C0/C1 as is. */
713 1.1 tshiozak if (iscntl(*string & 0xff))
714 1.1 tshiozak cur = -1;
715 1.1 tshiozak else if (*string & 0x80) {
716 1.1 tshiozak cur = (psenc->singlegr == -1)
717 1.1 tshiozak ? psenc->gr : psenc->singlegr;
718 1.1 tshiozak } else {
719 1.1 tshiozak cur = (psenc->singlegl == -1)
720 1.1 tshiozak ? psenc->gl : psenc->singlegl;
721 1.1 tshiozak }
722 1.1 tshiozak
723 1.1 tshiozak if (cur == -1) {
724 1.1 tshiozak asis:
725 1.1 tshiozak wchar = *string++ & 0xff;
726 1.1 tshiozak if (result)
727 1.1 tshiozak *result = string;
728 1.1 tshiozak /* reset single shift state */
729 1.1 tshiozak psenc->singlegr = psenc->singlegl = -1;
730 1.1 tshiozak return wchar;
731 1.1 tshiozak }
732 1.1 tshiozak
733 1.1 tshiozak /* length error check */
734 1.1 tshiozak switch (psenc->g[cur].type) {
735 1.1 tshiozak case CS94MULTI:
736 1.1 tshiozak case CS96MULTI:
737 1.1 tshiozak if (!isthree(psenc->g[cur].final)) {
738 1.1 tshiozak if (2 <= n
739 1.1 tshiozak && (string[0] & 0x80) == (string[1] & 0x80))
740 1.1 tshiozak break;
741 1.1 tshiozak } else {
742 1.1 tshiozak if (3 <= n
743 1.1 tshiozak && (string[0] & 0x80) == (string[1] & 0x80)
744 1.1 tshiozak && (string[0] & 0x80) == (string[2] & 0x80))
745 1.1 tshiozak break;
746 1.1 tshiozak }
747 1.1 tshiozak
748 1.1 tshiozak /* we still need to wait for more characters to come */
749 1.1 tshiozak if (result)
750 1.1 tshiozak *result = string;
751 1.1 tshiozak return (_ISO2022INVALID);
752 1.1 tshiozak
753 1.1 tshiozak case CS94:
754 1.1 tshiozak case CS96:
755 1.1 tshiozak if (1 <= n)
756 1.1 tshiozak break;
757 1.1 tshiozak
758 1.1 tshiozak /* we still need to wait for more characters to come */
759 1.1 tshiozak if (result)
760 1.1 tshiozak *result = string;
761 1.1 tshiozak return (_ISO2022INVALID);
762 1.1 tshiozak }
763 1.1 tshiozak
764 1.1 tshiozak /* range check */
765 1.1 tshiozak switch (psenc->g[cur].type) {
766 1.1 tshiozak case CS94:
767 1.1 tshiozak if (!(is94(string[0] & 0x7f)))
768 1.1 tshiozak goto asis;
769 1.1 tshiozak case CS96:
770 1.1 tshiozak if (!(is96(string[0] & 0x7f)))
771 1.1 tshiozak goto asis;
772 1.1 tshiozak break;
773 1.1 tshiozak case CS94MULTI:
774 1.1 tshiozak if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f)))
775 1.1 tshiozak goto asis;
776 1.1 tshiozak break;
777 1.1 tshiozak case CS96MULTI:
778 1.1 tshiozak if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f)))
779 1.1 tshiozak goto asis;
780 1.1 tshiozak break;
781 1.1 tshiozak }
782 1.1 tshiozak
783 1.1 tshiozak /* extract the character. */
784 1.1 tshiozak switch (psenc->g[cur].type) {
785 1.1 tshiozak case CS94:
786 1.1 tshiozak /* special case for ASCII. */
787 1.1 tshiozak if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) {
788 1.1 tshiozak wchar = *string++;
789 1.1 tshiozak wchar &= 0x7f;
790 1.1 tshiozak break;
791 1.1 tshiozak }
792 1.1 tshiozak wchar = psenc->g[cur].final;
793 1.1 tshiozak wchar = (wchar << 8);
794 1.1 tshiozak wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
795 1.1 tshiozak wchar = (wchar << 8);
796 1.1 tshiozak wchar = (wchar << 8) | (*string++ & 0x7f);
797 1.1 tshiozak break;
798 1.1 tshiozak case CS96:
799 1.1 tshiozak /* special case for ISO-8859-1. */
800 1.1 tshiozak if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) {
801 1.1 tshiozak wchar = *string++;
802 1.1 tshiozak wchar &= 0x7f;
803 1.1 tshiozak wchar |= 0x80;
804 1.1 tshiozak break;
805 1.1 tshiozak }
806 1.1 tshiozak wchar = psenc->g[cur].final;
807 1.1 tshiozak wchar = (wchar << 8);
808 1.1 tshiozak wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
809 1.1 tshiozak wchar = (wchar << 8);
810 1.1 tshiozak wchar = (wchar << 8) | (*string++ & 0x7f);
811 1.1 tshiozak wchar |= 0x80;
812 1.1 tshiozak break;
813 1.1 tshiozak case CS94MULTI:
814 1.1 tshiozak case CS96MULTI:
815 1.1 tshiozak wchar = psenc->g[cur].final;
816 1.1 tshiozak wchar = (wchar << 8);
817 1.1 tshiozak if (isthree(psenc->g[cur].final))
818 1.1 tshiozak wchar |= (*string++ & 0x7f);
819 1.1 tshiozak wchar = (wchar << 8) | (*string++ & 0x7f);
820 1.1 tshiozak wchar = (wchar << 8) | (*string++ & 0x7f);
821 1.1 tshiozak if (psenc->g[cur].type == CS96MULTI)
822 1.1 tshiozak wchar |= 0x80;
823 1.1 tshiozak break;
824 1.1 tshiozak }
825 1.1 tshiozak
826 1.1 tshiozak if (result)
827 1.1 tshiozak *result = string;
828 1.1 tshiozak /* reset single shift state */
829 1.1 tshiozak psenc->singlegr = psenc->singlegl = -1;
830 1.1 tshiozak return wchar;
831 1.1 tshiozak }
832 1.1 tshiozak
833 1.1 tshiozak
834 1.1 tshiozak
835 1.1 tshiozak static int
836 1.1 tshiozak _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei,
837 1.1 tshiozak wchar_t * __restrict pwc,
838 1.1 tshiozak const char ** __restrict s,
839 1.1 tshiozak size_t n, _ISO2022State * __restrict psenc,
840 1.1 tshiozak size_t * __restrict nresult)
841 1.1 tshiozak {
842 1.1 tshiozak wchar_t wchar;
843 1.1 tshiozak const char *s0, *p, *result;
844 1.1 tshiozak int c;
845 1.1 tshiozak int chlenbak;
846 1.1 tshiozak
847 1.1 tshiozak _DIAGASSERT(nresult != 0);
848 1.1 tshiozak _DIAGASSERT(ei != NULL);
849 1.1 tshiozak _DIAGASSERT(psenc != NULL);
850 1.1 tshiozak _DIAGASSERT(s != NULL);
851 1.1 tshiozak
852 1.1 tshiozak s0 = *s;
853 1.1 tshiozak c = 0;
854 1.1 tshiozak chlenbak = psenc->chlen;
855 1.1 tshiozak
856 1.1 tshiozak /*
857 1.1 tshiozak * if we have something in buffer, use that.
858 1.1 tshiozak * otherwise, skip here
859 1.1 tshiozak */
860 1.1 tshiozak if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) {
861 1.1 tshiozak /* illgeal state */
862 1.1 tshiozak _citrus_ISO2022_init_state(ei, psenc);
863 1.1 tshiozak goto encoding_error;
864 1.1 tshiozak }
865 1.1 tshiozak if (psenc->chlen == 0)
866 1.1 tshiozak goto emptybuf;
867 1.1 tshiozak
868 1.1 tshiozak /* buffer is not empty */
869 1.1 tshiozak p = psenc->ch;
870 1.1 tshiozak while (psenc->chlen < sizeof(psenc->ch) && n >= 0) {
871 1.1 tshiozak if (n > 0) {
872 1.1 tshiozak psenc->ch[psenc->chlen++] = *s0++;
873 1.1 tshiozak n--;
874 1.1 tshiozak }
875 1.1 tshiozak
876 1.1 tshiozak wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch),
877 1.1 tshiozak &result, psenc);
878 1.1 tshiozak if (wchar != _ISO2022INVALID) {
879 1.1 tshiozak c += result - p;
880 1.1 tshiozak if (psenc->chlen > c)
881 1.1 tshiozak memmove(psenc->ch, result, psenc->chlen - c);
882 1.1 tshiozak if (psenc->chlen < c)
883 1.1 tshiozak psenc->chlen = 0;
884 1.1 tshiozak else
885 1.1 tshiozak psenc->chlen -= c;
886 1.1 tshiozak goto output;
887 1.1 tshiozak }
888 1.1 tshiozak
889 1.1 tshiozak c += result - p;
890 1.1 tshiozak p = result;
891 1.1 tshiozak
892 1.1 tshiozak if (n == 0)
893 1.1 tshiozak goto restart;
894 1.1 tshiozak }
895 1.1 tshiozak
896 1.1 tshiozak /* escape sequence too long? */
897 1.1 tshiozak goto encoding_error;
898 1.1 tshiozak
899 1.1 tshiozak emptybuf:
900 1.1 tshiozak wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc);
901 1.1 tshiozak if (wchar != _ISO2022INVALID) {
902 1.1 tshiozak c += result - s0;
903 1.1 tshiozak psenc->chlen = 0;
904 1.1 tshiozak s0 = result;
905 1.1 tshiozak goto output;
906 1.1 tshiozak }
907 1.1 tshiozak if (result > s0 && n > result - s0) {
908 1.1 tshiozak c += (result - s0);
909 1.1 tshiozak n -= (result - s0);
910 1.1 tshiozak s0 = result;
911 1.1 tshiozak goto emptybuf;
912 1.1 tshiozak }
913 1.1 tshiozak n += c;
914 1.1 tshiozak if (n < sizeof(psenc->ch)) {
915 1.1 tshiozak memcpy(psenc->ch, s0 - c, n);
916 1.1 tshiozak psenc->chlen = n;
917 1.1 tshiozak s0 = result;
918 1.1 tshiozak goto restart;
919 1.1 tshiozak }
920 1.1 tshiozak
921 1.1 tshiozak /* escape sequence too long? */
922 1.1 tshiozak
923 1.1 tshiozak encoding_error:
924 1.1 tshiozak psenc->chlen = 0;
925 1.1 tshiozak *nresult = (size_t)-1;
926 1.1 tshiozak return (EILSEQ);
927 1.1 tshiozak
928 1.1 tshiozak output:
929 1.1 tshiozak *s = s0;
930 1.1 tshiozak if (pwc)
931 1.1 tshiozak *pwc = wchar;
932 1.1 tshiozak
933 1.1 tshiozak if (!wchar)
934 1.1 tshiozak *nresult = 0;
935 1.1 tshiozak else
936 1.1 tshiozak *nresult = c - chlenbak;
937 1.1 tshiozak
938 1.1 tshiozak return (0);
939 1.1 tshiozak
940 1.1 tshiozak restart:
941 1.1 tshiozak *s = s0;
942 1.1 tshiozak *nresult = (size_t)-2;
943 1.1 tshiozak
944 1.1 tshiozak return (0);
945 1.1 tshiozak }
946 1.1 tshiozak
947 1.1 tshiozak static int
948 1.1 tshiozak recommendation(_ISO2022EncodingInfo * __restrict ei,
949 1.1 tshiozak _ISO2022Charset * __restrict cs)
950 1.1 tshiozak {
951 1.1 tshiozak int i, j;
952 1.1 tshiozak _ISO2022Charset *recommend;
953 1.1 tshiozak
954 1.1 tshiozak _DIAGASSERT(ei != NULL);
955 1.1 tshiozak _DIAGASSERT(cs != NULL);
956 1.1 tshiozak
957 1.1 tshiozak /* first, try a exact match. */
958 1.1 tshiozak for (i = 0; i < 4; i++) {
959 1.1 tshiozak recommend = ei->recommend[i];
960 1.1 tshiozak for (j = 0; j < ei->recommendsize[i]; j++) {
961 1.1 tshiozak if (cs->type != recommend[j].type)
962 1.1 tshiozak continue;
963 1.1 tshiozak if (cs->final != recommend[j].final)
964 1.1 tshiozak continue;
965 1.1 tshiozak if (cs->interm != recommend[j].interm)
966 1.1 tshiozak continue;
967 1.1 tshiozak
968 1.1 tshiozak return i;
969 1.1 tshiozak }
970 1.1 tshiozak }
971 1.1 tshiozak
972 1.1 tshiozak /* then, try a wildcard match over final char. */
973 1.1 tshiozak for (i = 0; i < 4; i++) {
974 1.1 tshiozak recommend = ei->recommend[i];
975 1.1 tshiozak for (j = 0; j < ei->recommendsize[i]; j++) {
976 1.1 tshiozak if (cs->type != recommend[j].type)
977 1.1 tshiozak continue;
978 1.1 tshiozak if (cs->final && (cs->final != recommend[j].final))
979 1.1 tshiozak continue;
980 1.1 tshiozak if (cs->interm && (cs->interm != recommend[j].interm))
981 1.1 tshiozak continue;
982 1.1 tshiozak
983 1.1 tshiozak return i;
984 1.1 tshiozak }
985 1.1 tshiozak }
986 1.1 tshiozak
987 1.1 tshiozak /* there's no recommendation. make a guess. */
988 1.1 tshiozak if (ei->maxcharset == 0) {
989 1.1 tshiozak return 0;
990 1.1 tshiozak } else {
991 1.1 tshiozak switch (cs->type) {
992 1.1 tshiozak case CS94:
993 1.1 tshiozak case CS94MULTI:
994 1.1 tshiozak return 0;
995 1.1 tshiozak case CS96:
996 1.1 tshiozak case CS96MULTI:
997 1.1 tshiozak return 1;
998 1.1 tshiozak }
999 1.1 tshiozak }
1000 1.1 tshiozak return 0;
1001 1.1 tshiozak }
1002 1.1 tshiozak
1003 1.1 tshiozak static int
1004 1.7 tshiozak _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc,
1005 1.1 tshiozak char * __restrict string, size_t n,
1006 1.1 tshiozak char ** __restrict result,
1007 1.1 tshiozak _ISO2022State * __restrict psenc)
1008 1.1 tshiozak {
1009 1.1 tshiozak int i = 0, len;
1010 1.1 tshiozak _ISO2022Charset cs;
1011 1.1 tshiozak char *p;
1012 1.1 tshiozak char tmp[MB_LEN_MAX];
1013 1.1 tshiozak int target;
1014 1.1 tshiozak u_char mask;
1015 1.1 tshiozak int bit8;
1016 1.1 tshiozak
1017 1.1 tshiozak _DIAGASSERT(ei != NULL);
1018 1.1 tshiozak _DIAGASSERT(string != NULL);
1019 1.1 tshiozak /* result may be NULL */
1020 1.1 tshiozak /* state appears to be unused */
1021 1.1 tshiozak
1022 1.7 tshiozak if (iscntl(wc & 0xff)) {
1023 1.1 tshiozak /* go back to ASCII on control chars */
1024 1.1 tshiozak cs.type = CS94;
1025 1.1 tshiozak cs.final = 'B';
1026 1.1 tshiozak cs.interm = '\0';
1027 1.7 tshiozak } else if (!(wc & ~0xff)) {
1028 1.7 tshiozak if (wc & 0x80) {
1029 1.1 tshiozak /* special treatment for ISO-8859-1 */
1030 1.1 tshiozak cs.type = CS96;
1031 1.1 tshiozak cs.final = 'A';
1032 1.1 tshiozak cs.interm = '\0';
1033 1.1 tshiozak } else {
1034 1.1 tshiozak /* special treatment for ASCII */
1035 1.1 tshiozak cs.type = CS94;
1036 1.1 tshiozak cs.final = 'B';
1037 1.1 tshiozak cs.interm = '\0';
1038 1.1 tshiozak }
1039 1.1 tshiozak } else {
1040 1.7 tshiozak cs.final = (wc >> 24) & 0x7f;
1041 1.7 tshiozak if ((wc >> 16) & 0x80)
1042 1.7 tshiozak cs.interm = (wc >> 16) & 0x7f;
1043 1.1 tshiozak else
1044 1.1 tshiozak cs.interm = '\0';
1045 1.7 tshiozak if (wc & 0x80)
1046 1.7 tshiozak cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96;
1047 1.1 tshiozak else
1048 1.7 tshiozak cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94;
1049 1.1 tshiozak }
1050 1.1 tshiozak target = recommendation(ei, &cs);
1051 1.1 tshiozak p = tmp;
1052 1.1 tshiozak bit8 = ei->flags & F_8BIT;
1053 1.1 tshiozak
1054 1.1 tshiozak /* designate the charset onto the target plane(G0/1/2/3). */
1055 1.1 tshiozak if (psenc->g[target].type == cs.type
1056 1.1 tshiozak && psenc->g[target].final == cs.final
1057 1.1 tshiozak && psenc->g[target].interm == cs.interm)
1058 1.1 tshiozak goto planeok;
1059 1.1 tshiozak
1060 1.1 tshiozak *p++ = '\033';
1061 1.1 tshiozak if (cs.type == CS94MULTI || cs.type == CS96MULTI)
1062 1.1 tshiozak *p++ = '$';
1063 1.1 tshiozak if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final)
1064 1.1 tshiozak && !cs.interm && !(ei->flags & F_NOOLD))
1065 1.1 tshiozak ;
1066 1.1 tshiozak else if (cs.type == CS94 || cs.type == CS94MULTI)
1067 1.1 tshiozak *p++ = "()*+"[target];
1068 1.1 tshiozak else
1069 1.1 tshiozak *p++ = ",-./"[target];
1070 1.1 tshiozak if (cs.interm)
1071 1.1 tshiozak *p++ = cs.interm;
1072 1.1 tshiozak *p++ = cs.final;
1073 1.1 tshiozak
1074 1.1 tshiozak psenc->g[target].type = cs.type;
1075 1.1 tshiozak psenc->g[target].final = cs.final;
1076 1.1 tshiozak psenc->g[target].interm = cs.interm;
1077 1.1 tshiozak
1078 1.1 tshiozak planeok:
1079 1.1 tshiozak /* invoke the plane onto GL or GR. */
1080 1.1 tshiozak if (psenc->gl == target)
1081 1.1 tshiozak goto sideok;
1082 1.1 tshiozak if (bit8 && psenc->gr == target)
1083 1.1 tshiozak goto sideok;
1084 1.1 tshiozak
1085 1.1 tshiozak if (target == 0 && (ei->flags & F_LS0)) {
1086 1.1 tshiozak *p++ = '\017';
1087 1.1 tshiozak psenc->gl = 0;
1088 1.1 tshiozak } else if (target == 1 && (ei->flags & F_LS1)) {
1089 1.1 tshiozak *p++ = '\016';
1090 1.1 tshiozak psenc->gl = 1;
1091 1.1 tshiozak } else if (target == 2 && (ei->flags & F_LS2)) {
1092 1.1 tshiozak *p++ = '\033';
1093 1.1 tshiozak *p++ = 'n';
1094 1.1 tshiozak psenc->gl = 2;
1095 1.1 tshiozak } else if (target == 3 && (ei->flags & F_LS3)) {
1096 1.1 tshiozak *p++ = '\033';
1097 1.1 tshiozak *p++ = 'o';
1098 1.1 tshiozak psenc->gl = 3;
1099 1.1 tshiozak } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) {
1100 1.1 tshiozak *p++ = '\033';
1101 1.1 tshiozak *p++ = '~';
1102 1.1 tshiozak psenc->gr = 1;
1103 1.1 tshiozak } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) {
1104 1.1 tshiozak *p++ = '\033';
1105 1.1 tshiozak /*{*/
1106 1.1 tshiozak *p++ = '}';
1107 1.1 tshiozak psenc->gr = 2;
1108 1.1 tshiozak } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) {
1109 1.1 tshiozak *p++ = '\033';
1110 1.1 tshiozak *p++ = '|';
1111 1.1 tshiozak psenc->gr = 3;
1112 1.1 tshiozak } else if (target == 2 && (ei->flags & F_SS2)) {
1113 1.1 tshiozak *p++ = '\033';
1114 1.1 tshiozak *p++ = 'N';
1115 1.1 tshiozak psenc->singlegl = 2;
1116 1.1 tshiozak } else if (target == 3 && (ei->flags & F_SS3)) {
1117 1.1 tshiozak *p++ = '\033';
1118 1.1 tshiozak *p++ = 'O';
1119 1.1 tshiozak psenc->singlegl = 3;
1120 1.1 tshiozak } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) {
1121 1.1 tshiozak *p++ = '\216';
1122 1.1 tshiozak *p++ = 'N';
1123 1.1 tshiozak psenc->singlegl = psenc->singlegr = 2;
1124 1.1 tshiozak } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) {
1125 1.1 tshiozak *p++ = '\217';
1126 1.1 tshiozak *p++ = 'O';
1127 1.1 tshiozak psenc->singlegl = psenc->singlegr = 3;
1128 1.1 tshiozak } else
1129 1.1 tshiozak abort();
1130 1.1 tshiozak
1131 1.1 tshiozak sideok:
1132 1.1 tshiozak if (psenc->singlegl == target)
1133 1.1 tshiozak mask = 0x00;
1134 1.1 tshiozak else if (psenc->singlegr == target)
1135 1.1 tshiozak mask = 0x80;
1136 1.1 tshiozak else if (psenc->gl == target)
1137 1.1 tshiozak mask = 0x00;
1138 1.1 tshiozak else if ((ei->flags & F_8BIT) && psenc->gr == target)
1139 1.1 tshiozak mask = 0x80;
1140 1.1 tshiozak else
1141 1.1 tshiozak abort();
1142 1.1 tshiozak
1143 1.1 tshiozak switch (cs.type) {
1144 1.1 tshiozak case CS94:
1145 1.1 tshiozak case CS96:
1146 1.1 tshiozak i = 1;
1147 1.1 tshiozak break;
1148 1.1 tshiozak case CS94MULTI:
1149 1.1 tshiozak case CS96MULTI:
1150 1.1 tshiozak i = isthree(cs.final) ? 3 : 2;
1151 1.1 tshiozak break;
1152 1.1 tshiozak }
1153 1.8 tshiozak while (i-- > 0)
1154 1.8 tshiozak *p++ = ((wc >> (i << 3)) & 0x7f) | mask;
1155 1.1 tshiozak
1156 1.1 tshiozak /* reset single shift state */
1157 1.1 tshiozak psenc->singlegl = psenc->singlegr = -1;
1158 1.1 tshiozak
1159 1.1 tshiozak len = p - tmp;
1160 1.1 tshiozak if (n < len) {
1161 1.1 tshiozak if (result)
1162 1.1 tshiozak *result = (char *)0;
1163 1.1 tshiozak } else {
1164 1.1 tshiozak if (result)
1165 1.1 tshiozak *result = string + len;
1166 1.1 tshiozak memcpy(string, tmp, len);
1167 1.1 tshiozak }
1168 1.1 tshiozak return len;
1169 1.8 tshiozak }
1170 1.8 tshiozak
1171 1.8 tshiozak static int
1172 1.8 tshiozak _citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei,
1173 1.8 tshiozak char * __restrict s, size_t n,
1174 1.8 tshiozak _ISO2022State * __restrict psenc,
1175 1.8 tshiozak size_t * __restrict nresult)
1176 1.8 tshiozak {
1177 1.8 tshiozak char buf[MB_LEN_MAX];
1178 1.8 tshiozak char *result;
1179 1.8 tshiozak int len, ret;
1180 1.8 tshiozak
1181 1.8 tshiozak _DIAGASSERT(ei != NULL);
1182 1.8 tshiozak _DIAGASSERT(nresult != 0);
1183 1.8 tshiozak _DIAGASSERT(s != NULL);
1184 1.8 tshiozak
1185 1.8 tshiozak /* XXX state will be modified after this operation... */
1186 1.8 tshiozak len = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc);
1187 1.8 tshiozak if (len==0) {
1188 1.8 tshiozak ret = EINVAL;
1189 1.8 tshiozak goto err;
1190 1.8 tshiozak }
1191 1.8 tshiozak if (sizeof(buf) < len || n < len-1) {
1192 1.8 tshiozak /* XXX should recover state? */
1193 1.8 tshiozak ret = E2BIG;
1194 1.8 tshiozak goto err;
1195 1.8 tshiozak }
1196 1.8 tshiozak
1197 1.8 tshiozak memcpy(s, buf, len-1);
1198 1.8 tshiozak *nresult = (size_t)(len-1);
1199 1.8 tshiozak return (0);
1200 1.8 tshiozak
1201 1.8 tshiozak err:
1202 1.8 tshiozak /* bound check failure */
1203 1.8 tshiozak *nresult = (size_t)-1;
1204 1.8 tshiozak return ret;
1205 1.1 tshiozak }
1206 1.1 tshiozak
1207 1.1 tshiozak static int
1208 1.1 tshiozak _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei,
1209 1.1 tshiozak char * __restrict s, size_t n, wchar_t wc,
1210 1.1 tshiozak _ISO2022State * __restrict psenc,
1211 1.1 tshiozak size_t * __restrict nresult)
1212 1.1 tshiozak {
1213 1.1 tshiozak char buf[MB_LEN_MAX];
1214 1.1 tshiozak char *result;
1215 1.7 tshiozak int len, ret;
1216 1.1 tshiozak
1217 1.1 tshiozak _DIAGASSERT(ei != NULL);
1218 1.1 tshiozak _DIAGASSERT(nresult != 0);
1219 1.1 tshiozak _DIAGASSERT(s != NULL);
1220 1.1 tshiozak
1221 1.1 tshiozak /* XXX state will be modified after this operation... */
1222 1.1 tshiozak len = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc);
1223 1.1 tshiozak if (sizeof(buf) < len || n < len) {
1224 1.1 tshiozak /* XXX should recover state? */
1225 1.7 tshiozak ret = E2BIG;
1226 1.7 tshiozak goto err;
1227 1.1 tshiozak }
1228 1.1 tshiozak
1229 1.1 tshiozak memcpy(s, buf, len);
1230 1.1 tshiozak *nresult = (size_t)len;
1231 1.1 tshiozak return (0);
1232 1.1 tshiozak
1233 1.7 tshiozak err:
1234 1.1 tshiozak /* bound check failure */
1235 1.1 tshiozak *nresult = (size_t)-1;
1236 1.7 tshiozak return ret;
1237 1.7 tshiozak }
1238 1.7 tshiozak
1239 1.7 tshiozak static __inline int
1240 1.7 tshiozak /*ARGSUSED*/
1241 1.7 tshiozak _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei,
1242 1.7 tshiozak _csid_t * __restrict csid,
1243 1.7 tshiozak _index_t * __restrict idx, wchar_t wc)
1244 1.7 tshiozak {
1245 1.7 tshiozak wchar_t m, nm;
1246 1.7 tshiozak
1247 1.7 tshiozak _DIAGASSERT(csid != NULL && idx != NULL);
1248 1.7 tshiozak
1249 1.7 tshiozak m = wc & 0x7FFF8080;
1250 1.7 tshiozak nm = wc & 0x007F7F7F;
1251 1.7 tshiozak if (m & 0x00800000) {
1252 1.7 tshiozak nm &= 0x00007F7F;
1253 1.7 tshiozak } else {
1254 1.7 tshiozak m &= 0x7F008080;
1255 1.7 tshiozak }
1256 1.7 tshiozak if (nm & 0x007F0000) {
1257 1.7 tshiozak /* ^3 mark */
1258 1.7 tshiozak m |= 0x007F0000;
1259 1.7 tshiozak } else if (nm & 0x00007F00) {
1260 1.7 tshiozak /* ^2 mark */
1261 1.7 tshiozak m |= 0x00007F00;
1262 1.7 tshiozak }
1263 1.7 tshiozak *csid = (_csid_t)m;
1264 1.7 tshiozak *idx = (_index_t)nm;
1265 1.7 tshiozak
1266 1.7 tshiozak return (0);
1267 1.7 tshiozak }
1268 1.7 tshiozak
1269 1.7 tshiozak static __inline int
1270 1.7 tshiozak /*ARGSUSED*/
1271 1.7 tshiozak _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei,
1272 1.7 tshiozak wchar_t * __restrict wc,
1273 1.7 tshiozak _csid_t csid, _index_t idx)
1274 1.7 tshiozak {
1275 1.7 tshiozak
1276 1.7 tshiozak _DIAGASSERT(ei != NULL && wc != NULL);
1277 1.7 tshiozak
1278 1.7 tshiozak *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx;
1279 1.7 tshiozak
1280 1.7 tshiozak return (0);
1281 1.1 tshiozak }
1282 1.1 tshiozak
1283 1.1 tshiozak /* ----------------------------------------------------------------------
1284 1.1 tshiozak * public interface for ctype
1285 1.1 tshiozak */
1286 1.1 tshiozak
1287 1.1 tshiozak _CITRUS_CTYPE_DECLS(ISO2022);
1288 1.1 tshiozak _CITRUS_CTYPE_DEF_OPS(ISO2022);
1289 1.1 tshiozak
1290 1.1 tshiozak #include "citrus_ctype_template.h"
1291 1.7 tshiozak
1292 1.7 tshiozak /* ----------------------------------------------------------------------
1293 1.7 tshiozak * public interface for stdenc
1294 1.7 tshiozak */
1295 1.7 tshiozak
1296 1.7 tshiozak _CITRUS_STDENC_DECLS(ISO2022);
1297 1.7 tshiozak _CITRUS_STDENC_DEF_OPS(ISO2022);
1298 1.7 tshiozak
1299 1.7 tshiozak #include "citrus_stdenc_template.h"
1300