citrus_iso2022.c revision 1.5 1 1.5 yamt /* $NetBSD: citrus_iso2022.c,v 1.5 2002/03/28 10:29:11 yamt Exp $ */
2 1.1 tshiozak
3 1.1 tshiozak /*-
4 1.1 tshiozak * Copyright (c)1999, 2002 Citrus Project,
5 1.1 tshiozak * All rights reserved.
6 1.1 tshiozak *
7 1.1 tshiozak * Redistribution and use in source and binary forms, with or without
8 1.1 tshiozak * modification, are permitted provided that the following conditions
9 1.1 tshiozak * are met:
10 1.1 tshiozak * 1. Redistributions of source code must retain the above copyright
11 1.1 tshiozak * notice, this list of conditions and the following disclaimer.
12 1.1 tshiozak * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 tshiozak * notice, this list of conditions and the following disclaimer in the
14 1.1 tshiozak * documentation and/or other materials provided with the distribution.
15 1.1 tshiozak *
16 1.1 tshiozak * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 1.1 tshiozak * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 1.1 tshiozak * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 1.1 tshiozak * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 1.1 tshiozak * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 1.1 tshiozak * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 1.1 tshiozak * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 1.1 tshiozak * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 1.1 tshiozak * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 1.1 tshiozak * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 1.1 tshiozak * SUCH DAMAGE.
27 1.1 tshiozak *
28 1.1 tshiozak * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
29 1.1 tshiozak */
30 1.1 tshiozak
31 1.1 tshiozak #include <sys/cdefs.h>
32 1.1 tshiozak #if defined(LIBC_SCCS) && !defined(lint)
33 1.5 yamt __RCSID("$NetBSD: citrus_iso2022.c,v 1.5 2002/03/28 10:29:11 yamt Exp $");
34 1.1 tshiozak #endif /* LIBC_SCCS and not lint */
35 1.1 tshiozak
36 1.1 tshiozak #include <assert.h>
37 1.1 tshiozak #include <errno.h>
38 1.1 tshiozak #include <string.h>
39 1.1 tshiozak #include <stdio.h>
40 1.1 tshiozak #include <stdlib.h>
41 1.1 tshiozak #include <stddef.h>
42 1.1 tshiozak #include <locale.h>
43 1.1 tshiozak #include <wchar.h>
44 1.1 tshiozak #include <sys/types.h>
45 1.1 tshiozak #include <limits.h>
46 1.1 tshiozak #include "citrus_module.h"
47 1.1 tshiozak #include "citrus_ctype.h"
48 1.1 tshiozak #include "citrus_iso2022.h"
49 1.1 tshiozak
50 1.1 tshiozak
51 1.1 tshiozak /* ----------------------------------------------------------------------
52 1.1 tshiozak * private stuffs used by templates
53 1.1 tshiozak */
54 1.1 tshiozak
55 1.1 tshiozak
56 1.1 tshiozak /*
57 1.1 tshiozak * wchar_t mappings:
58 1.1 tshiozak * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx
59 1.1 tshiozak * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx
60 1.1 tshiozak * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx
61 1.1 tshiozak * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx
62 1.1 tshiozak * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx
63 1.1 tshiozak * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx
64 1.1 tshiozak * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx
65 1.1 tshiozak * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx
66 1.1 tshiozak * 94x94 charset (ESC & V ESC $ ( F)
67 1.1 tshiozak * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
68 1.1 tshiozak * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
69 1.1 tshiozak * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
70 1.1 tshiozak */
71 1.1 tshiozak
72 1.1 tshiozak typedef struct {
73 1.1 tshiozak u_char type;
74 1.1 tshiozak #define CS94 (0U)
75 1.1 tshiozak #define CS96 (1U)
76 1.1 tshiozak #define CS94MULTI (2U)
77 1.1 tshiozak #define CS96MULTI (3U)
78 1.1 tshiozak
79 1.1 tshiozak u_char final;
80 1.1 tshiozak u_char interm;
81 1.1 tshiozak u_char vers;
82 1.1 tshiozak } _ISO2022Charset;
83 1.1 tshiozak
84 1.1 tshiozak typedef struct {
85 1.1 tshiozak _ISO2022Charset g[4];
86 1.1 tshiozak /* need 3 bits to hold -1, 0, ..., 3 */
87 1.1 tshiozak int gl:3,
88 1.1 tshiozak gr:3,
89 1.1 tshiozak singlegl:3,
90 1.1 tshiozak singlegr:3;
91 1.1 tshiozak char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */
92 1.1 tshiozak int chlen;
93 1.5 yamt int flags;
94 1.5 yamt #define _ISO2022STATE_FLAG_INITIALIZED 1
95 1.4 yamt } _ISO2022State;
96 1.1 tshiozak
97 1.1 tshiozak typedef struct {
98 1.1 tshiozak _ISO2022Charset *recommend[4];
99 1.1 tshiozak size_t recommendsize[4];
100 1.1 tshiozak _ISO2022Charset initg[4];
101 1.1 tshiozak int maxcharset;
102 1.1 tshiozak int flags;
103 1.1 tshiozak #define F_8BIT 0x0001
104 1.1 tshiozak #define F_NOOLD 0x0002
105 1.1 tshiozak #define F_SI 0x0010 /*0F*/
106 1.1 tshiozak #define F_SO 0x0020 /*0E*/
107 1.1 tshiozak #define F_LS0 0x0010 /*0F*/
108 1.1 tshiozak #define F_LS1 0x0020 /*0E*/
109 1.1 tshiozak #define F_LS2 0x0040 /*ESC n*/
110 1.1 tshiozak #define F_LS3 0x0080 /*ESC o*/
111 1.1 tshiozak #define F_LS1R 0x0100 /*ESC ~*/
112 1.1 tshiozak #define F_LS2R 0x0200 /*ESC }*/
113 1.1 tshiozak #define F_LS3R 0x0400 /*ESC |*/
114 1.1 tshiozak #define F_SS2 0x0800 /*ESC N*/
115 1.1 tshiozak #define F_SS3 0x1000 /*ESC O*/
116 1.1 tshiozak #define F_SS2R 0x2000 /*8E*/
117 1.1 tshiozak #define F_SS3R 0x4000 /*8F*/
118 1.1 tshiozak } _ISO2022EncodingInfo;
119 1.1 tshiozak typedef struct {
120 1.1 tshiozak _ISO2022EncodingInfo ei;
121 1.1 tshiozak struct {
122 1.1 tshiozak /* for future multi-locale facility */
123 1.1 tshiozak _ISO2022State s_mblen;
124 1.1 tshiozak _ISO2022State s_mbrlen;
125 1.1 tshiozak _ISO2022State s_mbrtowc;
126 1.1 tshiozak _ISO2022State s_mbtowc;
127 1.1 tshiozak _ISO2022State s_mbsrtowcs;
128 1.1 tshiozak _ISO2022State s_wcrtomb;
129 1.1 tshiozak _ISO2022State s_wcsrtombs;
130 1.1 tshiozak _ISO2022State s_wctomb;
131 1.1 tshiozak } states;
132 1.1 tshiozak } _ISO2022CTypeInfo;
133 1.1 tshiozak
134 1.1 tshiozak #define _TO_EI(_cl_) ((_ISO2022EncodingInfo *)(_cl_))
135 1.1 tshiozak #define _TO_CEI(_cl_) ((_ISO2022CTypeInfo *)(_cl_))
136 1.1 tshiozak #define _TO_STATE(_ps_) ((_ISO2022State *)(_ps_))
137 1.1 tshiozak #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
138 1.1 tshiozak #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
139 1.1 tshiozak
140 1.1 tshiozak #define _FUNCNAME(m) _citrus_ISO2022_##m
141 1.1 tshiozak #define _ENCODING_INFO _ISO2022EncodingInfo
142 1.1 tshiozak #define _CTYPE_INFO _ISO2022CTypeInfo
143 1.1 tshiozak #define _ENCODING_STATE _ISO2022State
144 1.2 yamt #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
145 1.1 tshiozak #define _ENCODING_IS_STATE_DEPENDENT 1
146 1.5 yamt #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \
147 1.5 yamt (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
148 1.1 tshiozak
149 1.1 tshiozak
150 1.1 tshiozak #define _ISO2022INVALID (wchar_t)-1
151 1.1 tshiozak
152 1.1 tshiozak static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); }
153 1.1 tshiozak static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); }
154 1.1 tshiozak static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); }
155 1.1 tshiozak static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); }
156 1.1 tshiozak static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); }
157 1.1 tshiozak static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); }
158 1.1 tshiozak static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); }
159 1.1 tshiozak static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); }
160 1.1 tshiozak
161 1.1 tshiozak static __inline int
162 1.1 tshiozak getcs(const char * __restrict p, _ISO2022Charset * __restrict cs)
163 1.1 tshiozak {
164 1.1 tshiozak
165 1.1 tshiozak _DIAGASSERT(p != NULL);
166 1.1 tshiozak _DIAGASSERT(cs != NULL);
167 1.1 tshiozak
168 1.1 tshiozak if (!strncmp(p, "94$", 3) && p[3] && !p[4]) {
169 1.1 tshiozak cs->final = (u_char)(p[3] & 0xff);
170 1.1 tshiozak cs->interm = '\0';
171 1.1 tshiozak cs->vers = '\0';
172 1.1 tshiozak cs->type = CS94MULTI;
173 1.1 tshiozak } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) {
174 1.1 tshiozak cs->final = (u_char)(p[3] & 0xff);
175 1.1 tshiozak cs->interm = '\0';
176 1.1 tshiozak cs->vers = '\0';
177 1.1 tshiozak cs->type = CS96MULTI;
178 1.1 tshiozak } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) {
179 1.1 tshiozak cs->final = (u_char)(p[2] & 0xff);
180 1.1 tshiozak cs->interm = '\0';
181 1.1 tshiozak cs->vers = '\0';
182 1.1 tshiozak cs->type = CS94;
183 1.1 tshiozak } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) {
184 1.1 tshiozak cs->final = (u_char )(p[2] & 0xff);
185 1.1 tshiozak cs->interm = '\0';
186 1.1 tshiozak cs->vers = '\0';
187 1.1 tshiozak cs->type = CS96;
188 1.1 tshiozak } else {
189 1.1 tshiozak return 1;
190 1.1 tshiozak }
191 1.1 tshiozak
192 1.1 tshiozak return 0;
193 1.1 tshiozak }
194 1.1 tshiozak
195 1.1 tshiozak
196 1.1 tshiozak #define _NOTMATCH 0
197 1.1 tshiozak #define _MATCH 1
198 1.1 tshiozak #define _PARSEFAIL 2
199 1.1 tshiozak
200 1.1 tshiozak static __inline int
201 1.1 tshiozak get_recommend(_ISO2022EncodingInfo * __restrict ei,
202 1.1 tshiozak const char * __restrict token)
203 1.1 tshiozak {
204 1.1 tshiozak int i;
205 1.1 tshiozak _ISO2022Charset cs;
206 1.1 tshiozak
207 1.1 tshiozak if (!strchr("0123", token[0]) || token[1] != '=')
208 1.1 tshiozak return (_NOTMATCH);
209 1.1 tshiozak
210 1.1 tshiozak if (getcs(&token[2], &cs) == 0)
211 1.1 tshiozak ;
212 1.1 tshiozak else if (!strcmp(&token[2], "94")) {
213 1.1 tshiozak cs.final = (u_char)(token[4]);
214 1.1 tshiozak cs.interm = '\0';
215 1.1 tshiozak cs.vers = '\0';
216 1.1 tshiozak cs.type = CS94;
217 1.1 tshiozak } else if (!strcmp(&token[2], "96")) {
218 1.1 tshiozak cs.final = (u_char)(token[4]);
219 1.1 tshiozak cs.interm = '\0';
220 1.1 tshiozak cs.vers = '\0';
221 1.1 tshiozak cs.type = CS96;
222 1.1 tshiozak } else if (!strcmp(&token[2], "94$")) {
223 1.1 tshiozak cs.final = (u_char)(token[5]);
224 1.1 tshiozak cs.interm = '\0';
225 1.1 tshiozak cs.vers = '\0';
226 1.1 tshiozak cs.type = CS94MULTI;
227 1.1 tshiozak } else if (!strcmp(&token[2], "96$")) {
228 1.1 tshiozak cs.final = (u_char)(token[5]);
229 1.1 tshiozak cs.interm = '\0';
230 1.1 tshiozak cs.vers = '\0';
231 1.1 tshiozak cs.type = CS96MULTI;
232 1.1 tshiozak } else {
233 1.1 tshiozak return (_PARSEFAIL);
234 1.1 tshiozak }
235 1.1 tshiozak
236 1.1 tshiozak i = token[0] - '0';
237 1.1 tshiozak ei->recommendsize[i] += 1;
238 1.1 tshiozak if (!ei->recommend[i]) {
239 1.1 tshiozak ei->recommend[i] = malloc(sizeof(_ISO2022Charset));
240 1.1 tshiozak } else {
241 1.1 tshiozak ei->recommend[i] =
242 1.1 tshiozak realloc(ei->recommend[i],
243 1.1 tshiozak sizeof(_ISO2022Charset)* (ei->recommendsize[i]));
244 1.1 tshiozak }
245 1.1 tshiozak if (!ei->recommend[i])
246 1.1 tshiozak return (_PARSEFAIL);
247 1.1 tshiozak
248 1.1 tshiozak (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final;
249 1.1 tshiozak (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm;
250 1.1 tshiozak (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers;
251 1.1 tshiozak (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type;
252 1.1 tshiozak
253 1.1 tshiozak return (_MATCH);
254 1.1 tshiozak }
255 1.1 tshiozak
256 1.1 tshiozak static __inline int
257 1.1 tshiozak get_initg(_ISO2022EncodingInfo * __restrict ei,
258 1.1 tshiozak const char * __restrict token)
259 1.1 tshiozak {
260 1.1 tshiozak _ISO2022Charset cs;
261 1.1 tshiozak
262 1.1 tshiozak if (strncmp("INIT", &token[0], 4) ||
263 1.1 tshiozak !strchr("0123", token[4]) ||
264 1.1 tshiozak token[5] != '=')
265 1.1 tshiozak return (_NOTMATCH);
266 1.1 tshiozak
267 1.1 tshiozak if (getcs(&token[6], &cs) != 0)
268 1.1 tshiozak return (_PARSEFAIL);
269 1.1 tshiozak
270 1.1 tshiozak ei->initg[token[4] - '0'].type = cs.type;
271 1.1 tshiozak ei->initg[token[4] - '0'].final = cs.final;
272 1.1 tshiozak ei->initg[token[4] - '0'].interm = cs.interm;
273 1.1 tshiozak ei->initg[token[4] - '0'].vers = cs.vers;
274 1.1 tshiozak
275 1.1 tshiozak return (_MATCH);
276 1.1 tshiozak }
277 1.1 tshiozak
278 1.1 tshiozak static __inline int
279 1.1 tshiozak get_max(_ISO2022EncodingInfo * __restrict ei,
280 1.1 tshiozak const char * __restrict token)
281 1.1 tshiozak {
282 1.1 tshiozak if (!strcmp(token, "MAX1")) {
283 1.1 tshiozak ei->maxcharset = 1;
284 1.1 tshiozak } else if (!strcmp(token, "MAX2")) {
285 1.1 tshiozak ei->maxcharset = 2;
286 1.1 tshiozak } else if (!strcmp(token, "MAX3")) {
287 1.1 tshiozak ei->maxcharset = 3;
288 1.1 tshiozak } else
289 1.1 tshiozak return (_NOTMATCH);
290 1.1 tshiozak
291 1.1 tshiozak return (_MATCH);
292 1.1 tshiozak }
293 1.1 tshiozak
294 1.1 tshiozak
295 1.1 tshiozak static __inline int
296 1.1 tshiozak get_flags(_ISO2022EncodingInfo * __restrict ei,
297 1.1 tshiozak const char * __restrict token)
298 1.1 tshiozak {
299 1.1 tshiozak int i;
300 1.1 tshiozak static struct {
301 1.1 tshiozak const char *tag;
302 1.1 tshiozak int flag;
303 1.1 tshiozak } const tags[] = {
304 1.1 tshiozak { "DUMMY", 0 },
305 1.1 tshiozak { "8BIT", F_8BIT },
306 1.1 tshiozak { "NOOLD", F_NOOLD },
307 1.1 tshiozak { "SI", F_SI },
308 1.1 tshiozak { "SO", F_SO },
309 1.1 tshiozak { "LS0", F_LS0 },
310 1.1 tshiozak { "LS1", F_LS1 },
311 1.1 tshiozak { "LS2", F_LS2 },
312 1.1 tshiozak { "LS3", F_LS3 },
313 1.1 tshiozak { "LS1R", F_LS1R },
314 1.1 tshiozak { "LS2R", F_LS2R },
315 1.1 tshiozak { "LS3R", F_LS3R },
316 1.1 tshiozak { "SS2", F_SS2 },
317 1.1 tshiozak { "SS3", F_SS3 },
318 1.1 tshiozak { "SS2R", F_SS2R },
319 1.1 tshiozak { "SS3R", F_SS3R },
320 1.1 tshiozak { NULL, 0 }
321 1.1 tshiozak };
322 1.1 tshiozak
323 1.1 tshiozak for (i = 0; tags[i].tag; i++) {
324 1.1 tshiozak if (!strcmp(token, tags[i].tag)) {
325 1.1 tshiozak ei->flags |= tags[i].flag;
326 1.1 tshiozak return (_MATCH);
327 1.1 tshiozak }
328 1.1 tshiozak }
329 1.1 tshiozak
330 1.1 tshiozak return (_NOTMATCH);
331 1.1 tshiozak }
332 1.1 tshiozak
333 1.1 tshiozak
334 1.1 tshiozak static __inline int
335 1.1 tshiozak _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei,
336 1.1 tshiozak const void * __restrict var, size_t lenvar)
337 1.1 tshiozak {
338 1.1 tshiozak char const *v, *e;
339 1.1 tshiozak char buf[20];
340 1.1 tshiozak int i, len, ret;
341 1.1 tshiozak
342 1.1 tshiozak _DIAGASSERT(ei != NULL);
343 1.1 tshiozak
344 1.1 tshiozak
345 1.1 tshiozak /*
346 1.1 tshiozak * parse VARIABLE section.
347 1.1 tshiozak */
348 1.1 tshiozak
349 1.1 tshiozak if (!var)
350 1.1 tshiozak return (EFTYPE);
351 1.1 tshiozak
352 1.1 tshiozak v = (const char *) var;
353 1.1 tshiozak
354 1.1 tshiozak /* initialize structure */
355 1.1 tshiozak ei->maxcharset = 0;
356 1.1 tshiozak for (i = 0; i < 4; i++) {
357 1.1 tshiozak ei->recommend[i] = NULL;
358 1.1 tshiozak ei->recommendsize[i] = 0;
359 1.1 tshiozak }
360 1.1 tshiozak ei->flags = 0;
361 1.1 tshiozak
362 1.1 tshiozak while (*v) {
363 1.1 tshiozak while (*v == ' ' || *v == '\t')
364 1.1 tshiozak ++v;
365 1.1 tshiozak
366 1.1 tshiozak /* find the token */
367 1.1 tshiozak e = v;
368 1.1 tshiozak while (*e && *e != ' ' && *e != '\t')
369 1.1 tshiozak ++e;
370 1.1 tshiozak if (*e) {
371 1.1 tshiozak len = e-v;
372 1.1 tshiozak if (len>=sizeof(buf))
373 1.1 tshiozak goto parsefail;
374 1.1 tshiozak sprintf(buf, "%.*s", len, v);
375 1.1 tshiozak ++e;
376 1.1 tshiozak }
377 1.1 tshiozak
378 1.1 tshiozak if ((ret = get_recommend(ei, buf)) != _NOTMATCH)
379 1.1 tshiozak ;
380 1.1 tshiozak else if ((ret = get_initg(ei, buf)) != _NOTMATCH)
381 1.1 tshiozak ;
382 1.1 tshiozak else if ((ret = get_max(ei, buf)) != _NOTMATCH)
383 1.1 tshiozak ;
384 1.1 tshiozak else if ((ret = get_flags(ei, buf)) != _NOTMATCH)
385 1.1 tshiozak ;
386 1.1 tshiozak else
387 1.1 tshiozak ret = _PARSEFAIL;
388 1.1 tshiozak if (ret==_PARSEFAIL)
389 1.1 tshiozak goto parsefail;
390 1.1 tshiozak v = e;
391 1.1 tshiozak
392 1.1 tshiozak }
393 1.1 tshiozak
394 1.1 tshiozak return (0);
395 1.1 tshiozak
396 1.1 tshiozak parsefail:
397 1.1 tshiozak free(ei->recommend[0]);
398 1.1 tshiozak free(ei->recommend[1]);
399 1.1 tshiozak free(ei->recommend[2]);
400 1.1 tshiozak free(ei->recommend[3]);
401 1.1 tshiozak
402 1.1 tshiozak return (EFTYPE);
403 1.1 tshiozak }
404 1.1 tshiozak
405 1.1 tshiozak static __inline void
406 1.1 tshiozak /*ARGSUSED*/
407 1.1 tshiozak _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei,
408 1.1 tshiozak _ISO2022State * __restrict s)
409 1.1 tshiozak {
410 1.1 tshiozak int i;
411 1.1 tshiozak
412 1.1 tshiozak memset(s, 0, sizeof(*s));
413 1.1 tshiozak s->gl = 0;
414 1.1 tshiozak s->gr = (ei->flags & F_8BIT) ? 1 : -1;
415 1.1 tshiozak
416 1.1 tshiozak for (i = 0; i < 4; i++) {
417 1.1 tshiozak if (ei->initg[i].final) {
418 1.1 tshiozak s->g[i].type = ei->initg[i].type;
419 1.1 tshiozak s->g[i].final = ei->initg[i].final;
420 1.1 tshiozak s->g[i].interm = ei->initg[i].interm;
421 1.1 tshiozak }
422 1.1 tshiozak }
423 1.1 tshiozak s->singlegl = s->singlegr = -1;
424 1.5 yamt s->flags |= _ISO2022STATE_FLAG_INITIALIZED;
425 1.1 tshiozak }
426 1.1 tshiozak
427 1.1 tshiozak static __inline void
428 1.1 tshiozak /*ARGSUSED*/
429 1.1 tshiozak _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei,
430 1.1 tshiozak void * __restrict pspriv,
431 1.1 tshiozak const _ISO2022State * __restrict s)
432 1.1 tshiozak {
433 1.1 tshiozak memcpy(pspriv, (const void *)s, sizeof(*s));
434 1.1 tshiozak }
435 1.1 tshiozak
436 1.1 tshiozak static __inline void
437 1.1 tshiozak /*ARGSUSED*/
438 1.1 tshiozak _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei,
439 1.1 tshiozak _ISO2022State * __restrict s,
440 1.1 tshiozak const void * __restrict pspriv)
441 1.1 tshiozak {
442 1.1 tshiozak memcpy((void *)s, pspriv, sizeof(*s));
443 1.1 tshiozak }
444 1.1 tshiozak
445 1.1 tshiozak static int
446 1.1 tshiozak /*ARGSUSED*/
447 1.1 tshiozak _citrus_ISO2022_stdencoding_init(_ISO2022EncodingInfo * __restrict ei,
448 1.1 tshiozak const void * __restrict var, size_t lenvar)
449 1.1 tshiozak {
450 1.1 tshiozak
451 1.1 tshiozak _DIAGASSERT(ei != NULL);
452 1.1 tshiozak
453 1.1 tshiozak return _citrus_ISO2022_parse_variable(ei, var, lenvar);
454 1.1 tshiozak }
455 1.1 tshiozak
456 1.1 tshiozak static void
457 1.1 tshiozak /*ARGSUSED*/
458 1.1 tshiozak _citrus_ISO2022_stdencoding_uninit(_ISO2022EncodingInfo *ei)
459 1.1 tshiozak {
460 1.1 tshiozak }
461 1.1 tshiozak
462 1.1 tshiozak #define ESC '\033'
463 1.1 tshiozak #define ECMA -1
464 1.1 tshiozak #define INTERM -2
465 1.1 tshiozak #define OECMA -3
466 1.1 tshiozak static struct seqtable {
467 1.1 tshiozak int type;
468 1.1 tshiozak int csoff;
469 1.1 tshiozak int finaloff;
470 1.1 tshiozak int intermoff;
471 1.1 tshiozak int versoff;
472 1.1 tshiozak int len;
473 1.1 tshiozak int chars[10];
474 1.1 tshiozak } seqtable[] = {
475 1.1 tshiozak /* G0 94MULTI special */
476 1.1 tshiozak { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, },
477 1.1 tshiozak /* G0 94MULTI special with version identification */
478 1.1 tshiozak { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, },
479 1.1 tshiozak /* G? 94 */
480 1.1 tshiozak { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, },
481 1.1 tshiozak /* G? 94 with 2nd intermediate char */
482 1.1 tshiozak { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, },
483 1.1 tshiozak /* G? 96 */
484 1.1 tshiozak { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, },
485 1.1 tshiozak /* G? 96 with 2nd intermediate char */
486 1.1 tshiozak { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, },
487 1.1 tshiozak /* G? 94MULTI */
488 1.1 tshiozak { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, },
489 1.1 tshiozak /* G? 96MULTI */
490 1.1 tshiozak { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, },
491 1.1 tshiozak /* G? 94MULTI with version specification */
492 1.1 tshiozak { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, },
493 1.1 tshiozak /* LS2/3 */
494 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, },
495 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, },
496 1.1 tshiozak /* LS1/2/3R */
497 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, '~', }, },
498 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, },
499 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, '|', }, },
500 1.1 tshiozak /* SS2/3 */
501 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, },
502 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, },
503 1.1 tshiozak /* end of records */
504 1.1 tshiozak { 0, }
505 1.1 tshiozak };
506 1.1 tshiozak
507 1.1 tshiozak static int
508 1.1 tshiozak seqmatch(const char * __restrict s, size_t n,
509 1.1 tshiozak const struct seqtable * __restrict sp)
510 1.1 tshiozak {
511 1.1 tshiozak const int *p;
512 1.1 tshiozak
513 1.1 tshiozak _DIAGASSERT(s != NULL);
514 1.1 tshiozak _DIAGASSERT(sp != NULL);
515 1.1 tshiozak
516 1.1 tshiozak p = sp->chars;
517 1.1 tshiozak while (p - sp->chars < n && p - sp->chars < sp->len) {
518 1.1 tshiozak switch (*p) {
519 1.1 tshiozak case ECMA:
520 1.1 tshiozak if (!isecma(*s))
521 1.1 tshiozak goto terminate;
522 1.1 tshiozak break;
523 1.1 tshiozak case OECMA:
524 1.1 tshiozak if (*s && strchr("@AB", *s))
525 1.1 tshiozak break;
526 1.1 tshiozak else
527 1.1 tshiozak goto terminate;
528 1.1 tshiozak case INTERM:
529 1.1 tshiozak if (!isinterm(*s))
530 1.1 tshiozak goto terminate;
531 1.1 tshiozak break;
532 1.1 tshiozak case CS94:
533 1.1 tshiozak if (*s && strchr("()*+", *s))
534 1.1 tshiozak break;
535 1.1 tshiozak else
536 1.1 tshiozak goto terminate;
537 1.1 tshiozak case CS96:
538 1.1 tshiozak if (*s && strchr(",-./", *s))
539 1.1 tshiozak break;
540 1.1 tshiozak else
541 1.1 tshiozak goto terminate;
542 1.1 tshiozak default:
543 1.1 tshiozak if (*s != *p)
544 1.1 tshiozak goto terminate;
545 1.1 tshiozak break;
546 1.1 tshiozak }
547 1.1 tshiozak
548 1.1 tshiozak p++;
549 1.1 tshiozak s++;
550 1.1 tshiozak }
551 1.1 tshiozak
552 1.1 tshiozak terminate:
553 1.1 tshiozak return p - sp->chars;
554 1.1 tshiozak }
555 1.1 tshiozak
556 1.1 tshiozak static wchar_t
557 1.1 tshiozak _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei,
558 1.1 tshiozak const char * __restrict string, size_t n,
559 1.1 tshiozak const char ** __restrict result,
560 1.1 tshiozak _ISO2022State * __restrict psenc)
561 1.1 tshiozak {
562 1.1 tshiozak wchar_t wchar = 0;
563 1.1 tshiozak int cur;
564 1.1 tshiozak struct seqtable *sp;
565 1.1 tshiozak int nmatch;
566 1.1 tshiozak int i;
567 1.1 tshiozak
568 1.1 tshiozak _DIAGASSERT(ei != NULL);
569 1.1 tshiozak _DIAGASSERT(state != NULL);
570 1.1 tshiozak _DIAGASSERT(string != NULL);
571 1.1 tshiozak /* result may be NULL */
572 1.1 tshiozak
573 1.1 tshiozak while (1) {
574 1.1 tshiozak /* SI/SO */
575 1.1 tshiozak if (1 <= n && string[0] == '\017') {
576 1.1 tshiozak psenc->gl = 0;
577 1.1 tshiozak string++;
578 1.1 tshiozak n--;
579 1.1 tshiozak continue;
580 1.1 tshiozak }
581 1.1 tshiozak if (1 <= n && string[0] == '\016') {
582 1.1 tshiozak psenc->gl = 1;
583 1.1 tshiozak string++;
584 1.1 tshiozak n--;
585 1.1 tshiozak continue;
586 1.1 tshiozak }
587 1.1 tshiozak
588 1.1 tshiozak /* SS2/3R */
589 1.1 tshiozak if (1 <= n && string[0] && strchr("\217\216", string[0])) {
590 1.1 tshiozak psenc->singlegl = psenc->singlegr =
591 1.1 tshiozak (string[0] - '\216') + 2;
592 1.1 tshiozak string++;
593 1.1 tshiozak n--;
594 1.1 tshiozak continue;
595 1.1 tshiozak }
596 1.1 tshiozak
597 1.1 tshiozak /* eat the letter if this is not ESC */
598 1.1 tshiozak if (1 <= n && string[0] != '\033')
599 1.1 tshiozak break;
600 1.1 tshiozak
601 1.1 tshiozak /* look for a perfect match from escape sequences */
602 1.1 tshiozak for (sp = &seqtable[0]; sp->len; sp++) {
603 1.1 tshiozak nmatch = seqmatch(string, n, sp);
604 1.1 tshiozak if (sp->len == nmatch && n >= sp->len)
605 1.1 tshiozak break;
606 1.1 tshiozak }
607 1.1 tshiozak
608 1.1 tshiozak if (!sp->len)
609 1.1 tshiozak goto notseq;
610 1.1 tshiozak
611 1.1 tshiozak if (sp->type != -1) {
612 1.1 tshiozak if (sp->csoff == -1)
613 1.1 tshiozak i = 0;
614 1.1 tshiozak else {
615 1.1 tshiozak switch (sp->type) {
616 1.1 tshiozak case CS94:
617 1.1 tshiozak case CS94MULTI:
618 1.1 tshiozak i = string[sp->csoff] - '(';
619 1.1 tshiozak break;
620 1.1 tshiozak case CS96:
621 1.1 tshiozak case CS96MULTI:
622 1.1 tshiozak i = string[sp->csoff] - ',';
623 1.1 tshiozak break;
624 1.1 tshiozak }
625 1.1 tshiozak }
626 1.1 tshiozak psenc->g[i].type = sp->type;
627 1.1 tshiozak psenc->g[i].final = '\0';
628 1.1 tshiozak psenc->g[i].interm = '\0';
629 1.1 tshiozak psenc->g[i].vers = '\0';
630 1.1 tshiozak /* sp->finaloff must not be -1 */
631 1.1 tshiozak if (sp->finaloff != -1)
632 1.1 tshiozak psenc->g[i].final = string[sp->finaloff];
633 1.1 tshiozak if (sp->intermoff != -1)
634 1.1 tshiozak psenc->g[i].interm = string[sp->intermoff];
635 1.1 tshiozak if (sp->versoff != -1)
636 1.1 tshiozak psenc->g[i].vers = string[sp->versoff];
637 1.1 tshiozak
638 1.1 tshiozak string += sp->len;
639 1.1 tshiozak n -= sp->len;
640 1.1 tshiozak continue;
641 1.1 tshiozak }
642 1.1 tshiozak
643 1.1 tshiozak /* LS2/3 */
644 1.1 tshiozak if (2 <= n && string[0] == '\033'
645 1.1 tshiozak && string[1] && strchr("no", string[1])) {
646 1.1 tshiozak psenc->gl = string[1] - 'n' + 2;
647 1.1 tshiozak string += 2;
648 1.1 tshiozak n -= 2;
649 1.1 tshiozak continue;
650 1.1 tshiozak }
651 1.1 tshiozak
652 1.1 tshiozak /* LS1/2/3R */
653 1.1 tshiozak /* XXX: { for vi showmatch */
654 1.1 tshiozak if (2 <= n && string[0] == '\033'
655 1.1 tshiozak && string[1] && strchr("~}|", string[1])) {
656 1.1 tshiozak psenc->gr = 3 - (string[1] - '|');
657 1.1 tshiozak string += 2;
658 1.1 tshiozak n -= 2;
659 1.1 tshiozak continue;
660 1.1 tshiozak }
661 1.1 tshiozak
662 1.1 tshiozak /* SS2/3 */
663 1.1 tshiozak if (2 <= n && string[0] == '\033'
664 1.1 tshiozak && string[1] && strchr("NO", string[1])) {
665 1.1 tshiozak psenc->singlegl = (string[1] - 'N') + 2;
666 1.1 tshiozak string += 2;
667 1.1 tshiozak n -= 2;
668 1.1 tshiozak continue;
669 1.1 tshiozak }
670 1.1 tshiozak
671 1.1 tshiozak notseq:
672 1.1 tshiozak /*
673 1.1 tshiozak * if we've got an unknown escape sequence, eat the ESC at the
674 1.1 tshiozak * head. otherwise, wait till full escape sequence comes.
675 1.1 tshiozak */
676 1.1 tshiozak for (sp = &seqtable[0]; sp->len; sp++) {
677 1.1 tshiozak nmatch = seqmatch(string, n, sp);
678 1.1 tshiozak if (!nmatch)
679 1.1 tshiozak continue;
680 1.1 tshiozak
681 1.1 tshiozak /*
682 1.1 tshiozak * if we are in the middle of escape sequence,
683 1.1 tshiozak * we still need to wait for more characters to come
684 1.1 tshiozak */
685 1.1 tshiozak if (n < sp->len) {
686 1.1 tshiozak if (nmatch == n) {
687 1.1 tshiozak if (result)
688 1.1 tshiozak *result = string;
689 1.1 tshiozak return (_ISO2022INVALID);
690 1.1 tshiozak }
691 1.1 tshiozak } else {
692 1.1 tshiozak if (nmatch == sp->len) {
693 1.1 tshiozak /* this case should not happen */
694 1.1 tshiozak goto eat;
695 1.1 tshiozak }
696 1.1 tshiozak }
697 1.1 tshiozak }
698 1.1 tshiozak
699 1.1 tshiozak break;
700 1.1 tshiozak }
701 1.1 tshiozak
702 1.1 tshiozak eat:
703 1.1 tshiozak /* no letter to eat */
704 1.1 tshiozak if (n < 1) {
705 1.1 tshiozak if (result)
706 1.1 tshiozak *result = string;
707 1.1 tshiozak return (_ISO2022INVALID);
708 1.1 tshiozak }
709 1.1 tshiozak
710 1.1 tshiozak /* normal chars. always eat C0/C1 as is. */
711 1.1 tshiozak if (iscntl(*string & 0xff))
712 1.1 tshiozak cur = -1;
713 1.1 tshiozak else if (*string & 0x80) {
714 1.1 tshiozak cur = (psenc->singlegr == -1)
715 1.1 tshiozak ? psenc->gr : psenc->singlegr;
716 1.1 tshiozak } else {
717 1.1 tshiozak cur = (psenc->singlegl == -1)
718 1.1 tshiozak ? psenc->gl : psenc->singlegl;
719 1.1 tshiozak }
720 1.1 tshiozak
721 1.1 tshiozak if (cur == -1) {
722 1.1 tshiozak asis:
723 1.1 tshiozak wchar = *string++ & 0xff;
724 1.1 tshiozak if (result)
725 1.1 tshiozak *result = string;
726 1.1 tshiozak /* reset single shift state */
727 1.1 tshiozak psenc->singlegr = psenc->singlegl = -1;
728 1.1 tshiozak return wchar;
729 1.1 tshiozak }
730 1.1 tshiozak
731 1.1 tshiozak /* length error check */
732 1.1 tshiozak switch (psenc->g[cur].type) {
733 1.1 tshiozak case CS94MULTI:
734 1.1 tshiozak case CS96MULTI:
735 1.1 tshiozak if (!isthree(psenc->g[cur].final)) {
736 1.1 tshiozak if (2 <= n
737 1.1 tshiozak && (string[0] & 0x80) == (string[1] & 0x80))
738 1.1 tshiozak break;
739 1.1 tshiozak } else {
740 1.1 tshiozak if (3 <= n
741 1.1 tshiozak && (string[0] & 0x80) == (string[1] & 0x80)
742 1.1 tshiozak && (string[0] & 0x80) == (string[2] & 0x80))
743 1.1 tshiozak break;
744 1.1 tshiozak }
745 1.1 tshiozak
746 1.1 tshiozak /* we still need to wait for more characters to come */
747 1.1 tshiozak if (result)
748 1.1 tshiozak *result = string;
749 1.1 tshiozak return (_ISO2022INVALID);
750 1.1 tshiozak
751 1.1 tshiozak case CS94:
752 1.1 tshiozak case CS96:
753 1.1 tshiozak if (1 <= n)
754 1.1 tshiozak break;
755 1.1 tshiozak
756 1.1 tshiozak /* we still need to wait for more characters to come */
757 1.1 tshiozak if (result)
758 1.1 tshiozak *result = string;
759 1.1 tshiozak return (_ISO2022INVALID);
760 1.1 tshiozak }
761 1.1 tshiozak
762 1.1 tshiozak /* range check */
763 1.1 tshiozak switch (psenc->g[cur].type) {
764 1.1 tshiozak case CS94:
765 1.1 tshiozak if (!(is94(string[0] & 0x7f)))
766 1.1 tshiozak goto asis;
767 1.1 tshiozak case CS96:
768 1.1 tshiozak if (!(is96(string[0] & 0x7f)))
769 1.1 tshiozak goto asis;
770 1.1 tshiozak break;
771 1.1 tshiozak case CS94MULTI:
772 1.1 tshiozak if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f)))
773 1.1 tshiozak goto asis;
774 1.1 tshiozak break;
775 1.1 tshiozak case CS96MULTI:
776 1.1 tshiozak if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f)))
777 1.1 tshiozak goto asis;
778 1.1 tshiozak break;
779 1.1 tshiozak }
780 1.1 tshiozak
781 1.1 tshiozak /* extract the character. */
782 1.1 tshiozak switch (psenc->g[cur].type) {
783 1.1 tshiozak case CS94:
784 1.1 tshiozak /* special case for ASCII. */
785 1.1 tshiozak if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) {
786 1.1 tshiozak wchar = *string++;
787 1.1 tshiozak wchar &= 0x7f;
788 1.1 tshiozak break;
789 1.1 tshiozak }
790 1.1 tshiozak wchar = psenc->g[cur].final;
791 1.1 tshiozak wchar = (wchar << 8);
792 1.1 tshiozak wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
793 1.1 tshiozak wchar = (wchar << 8);
794 1.1 tshiozak wchar = (wchar << 8) | (*string++ & 0x7f);
795 1.1 tshiozak break;
796 1.1 tshiozak case CS96:
797 1.1 tshiozak /* special case for ISO-8859-1. */
798 1.1 tshiozak if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) {
799 1.1 tshiozak wchar = *string++;
800 1.1 tshiozak wchar &= 0x7f;
801 1.1 tshiozak wchar |= 0x80;
802 1.1 tshiozak break;
803 1.1 tshiozak }
804 1.1 tshiozak wchar = psenc->g[cur].final;
805 1.1 tshiozak wchar = (wchar << 8);
806 1.1 tshiozak wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
807 1.1 tshiozak wchar = (wchar << 8);
808 1.1 tshiozak wchar = (wchar << 8) | (*string++ & 0x7f);
809 1.1 tshiozak wchar |= 0x80;
810 1.1 tshiozak break;
811 1.1 tshiozak case CS94MULTI:
812 1.1 tshiozak case CS96MULTI:
813 1.1 tshiozak wchar = psenc->g[cur].final;
814 1.1 tshiozak wchar = (wchar << 8);
815 1.1 tshiozak if (isthree(psenc->g[cur].final))
816 1.1 tshiozak wchar |= (*string++ & 0x7f);
817 1.1 tshiozak wchar = (wchar << 8) | (*string++ & 0x7f);
818 1.1 tshiozak wchar = (wchar << 8) | (*string++ & 0x7f);
819 1.1 tshiozak if (psenc->g[cur].type == CS96MULTI)
820 1.1 tshiozak wchar |= 0x80;
821 1.1 tshiozak break;
822 1.1 tshiozak }
823 1.1 tshiozak
824 1.1 tshiozak if (result)
825 1.1 tshiozak *result = string;
826 1.1 tshiozak /* reset single shift state */
827 1.1 tshiozak psenc->singlegr = psenc->singlegl = -1;
828 1.1 tshiozak return wchar;
829 1.1 tshiozak }
830 1.1 tshiozak
831 1.1 tshiozak
832 1.1 tshiozak
833 1.1 tshiozak static int
834 1.1 tshiozak _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei,
835 1.1 tshiozak wchar_t * __restrict pwc,
836 1.1 tshiozak const char ** __restrict s,
837 1.1 tshiozak size_t n, _ISO2022State * __restrict psenc,
838 1.1 tshiozak size_t * __restrict nresult)
839 1.1 tshiozak {
840 1.1 tshiozak wchar_t wchar;
841 1.1 tshiozak const char *s0, *p, *result;
842 1.1 tshiozak int c;
843 1.1 tshiozak int chlenbak;
844 1.1 tshiozak
845 1.1 tshiozak _DIAGASSERT(nresult != 0);
846 1.1 tshiozak _DIAGASSERT(ei != NULL);
847 1.1 tshiozak _DIAGASSERT(psenc != NULL);
848 1.1 tshiozak _DIAGASSERT(s != NULL);
849 1.1 tshiozak
850 1.1 tshiozak s0 = *s;
851 1.1 tshiozak c = 0;
852 1.1 tshiozak chlenbak = psenc->chlen;
853 1.1 tshiozak
854 1.1 tshiozak /*
855 1.1 tshiozak * if we have something in buffer, use that.
856 1.1 tshiozak * otherwise, skip here
857 1.1 tshiozak */
858 1.1 tshiozak if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) {
859 1.1 tshiozak /* illgeal state */
860 1.1 tshiozak _citrus_ISO2022_init_state(ei, psenc);
861 1.1 tshiozak goto encoding_error;
862 1.1 tshiozak }
863 1.1 tshiozak if (psenc->chlen == 0)
864 1.1 tshiozak goto emptybuf;
865 1.1 tshiozak
866 1.1 tshiozak /* buffer is not empty */
867 1.1 tshiozak p = psenc->ch;
868 1.1 tshiozak while (psenc->chlen < sizeof(psenc->ch) && n >= 0) {
869 1.1 tshiozak if (n > 0) {
870 1.1 tshiozak psenc->ch[psenc->chlen++] = *s0++;
871 1.1 tshiozak n--;
872 1.1 tshiozak }
873 1.1 tshiozak
874 1.1 tshiozak wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch),
875 1.1 tshiozak &result, psenc);
876 1.1 tshiozak if (wchar != _ISO2022INVALID) {
877 1.1 tshiozak c += result - p;
878 1.1 tshiozak if (psenc->chlen > c)
879 1.1 tshiozak memmove(psenc->ch, result, psenc->chlen - c);
880 1.1 tshiozak if (psenc->chlen < c)
881 1.1 tshiozak psenc->chlen = 0;
882 1.1 tshiozak else
883 1.1 tshiozak psenc->chlen -= c;
884 1.1 tshiozak goto output;
885 1.1 tshiozak }
886 1.1 tshiozak
887 1.1 tshiozak c += result - p;
888 1.1 tshiozak p = result;
889 1.1 tshiozak
890 1.1 tshiozak if (n == 0)
891 1.1 tshiozak goto restart;
892 1.1 tshiozak }
893 1.1 tshiozak
894 1.1 tshiozak /* escape sequence too long? */
895 1.1 tshiozak goto encoding_error;
896 1.1 tshiozak
897 1.1 tshiozak emptybuf:
898 1.1 tshiozak wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc);
899 1.1 tshiozak if (wchar != _ISO2022INVALID) {
900 1.1 tshiozak c += result - s0;
901 1.1 tshiozak psenc->chlen = 0;
902 1.1 tshiozak s0 = result;
903 1.1 tshiozak goto output;
904 1.1 tshiozak }
905 1.1 tshiozak if (result > s0 && n > result - s0) {
906 1.1 tshiozak c += (result - s0);
907 1.1 tshiozak n -= (result - s0);
908 1.1 tshiozak s0 = result;
909 1.1 tshiozak goto emptybuf;
910 1.1 tshiozak }
911 1.1 tshiozak n += c;
912 1.1 tshiozak if (n < sizeof(psenc->ch)) {
913 1.1 tshiozak memcpy(psenc->ch, s0 - c, n);
914 1.1 tshiozak psenc->chlen = n;
915 1.1 tshiozak s0 = result;
916 1.1 tshiozak goto restart;
917 1.1 tshiozak }
918 1.1 tshiozak
919 1.1 tshiozak /* escape sequence too long? */
920 1.1 tshiozak
921 1.1 tshiozak encoding_error:
922 1.1 tshiozak psenc->chlen = 0;
923 1.1 tshiozak *nresult = (size_t)-1;
924 1.1 tshiozak return (EILSEQ);
925 1.1 tshiozak
926 1.1 tshiozak output:
927 1.1 tshiozak *s = s0;
928 1.1 tshiozak if (pwc)
929 1.1 tshiozak *pwc = wchar;
930 1.1 tshiozak
931 1.1 tshiozak if (!wchar)
932 1.1 tshiozak *nresult = 0;
933 1.1 tshiozak else
934 1.1 tshiozak *nresult = c - chlenbak;
935 1.1 tshiozak
936 1.1 tshiozak return (0);
937 1.1 tshiozak
938 1.1 tshiozak restart:
939 1.1 tshiozak *s = s0;
940 1.1 tshiozak *nresult = (size_t)-2;
941 1.1 tshiozak
942 1.1 tshiozak return (0);
943 1.1 tshiozak }
944 1.1 tshiozak
945 1.1 tshiozak static int
946 1.1 tshiozak recommendation(_ISO2022EncodingInfo * __restrict ei,
947 1.1 tshiozak _ISO2022Charset * __restrict cs)
948 1.1 tshiozak {
949 1.1 tshiozak int i, j;
950 1.1 tshiozak _ISO2022Charset *recommend;
951 1.1 tshiozak
952 1.1 tshiozak _DIAGASSERT(ei != NULL);
953 1.1 tshiozak _DIAGASSERT(cs != NULL);
954 1.1 tshiozak
955 1.1 tshiozak /* first, try a exact match. */
956 1.1 tshiozak for (i = 0; i < 4; i++) {
957 1.1 tshiozak recommend = ei->recommend[i];
958 1.1 tshiozak for (j = 0; j < ei->recommendsize[i]; j++) {
959 1.1 tshiozak if (cs->type != recommend[j].type)
960 1.1 tshiozak continue;
961 1.1 tshiozak if (cs->final != recommend[j].final)
962 1.1 tshiozak continue;
963 1.1 tshiozak if (cs->interm != recommend[j].interm)
964 1.1 tshiozak continue;
965 1.1 tshiozak
966 1.1 tshiozak return i;
967 1.1 tshiozak }
968 1.1 tshiozak }
969 1.1 tshiozak
970 1.1 tshiozak /* then, try a wildcard match over final char. */
971 1.1 tshiozak for (i = 0; i < 4; i++) {
972 1.1 tshiozak recommend = ei->recommend[i];
973 1.1 tshiozak for (j = 0; j < ei->recommendsize[i]; j++) {
974 1.1 tshiozak if (cs->type != recommend[j].type)
975 1.1 tshiozak continue;
976 1.1 tshiozak if (cs->final && (cs->final != recommend[j].final))
977 1.1 tshiozak continue;
978 1.1 tshiozak if (cs->interm && (cs->interm != recommend[j].interm))
979 1.1 tshiozak continue;
980 1.1 tshiozak
981 1.1 tshiozak return i;
982 1.1 tshiozak }
983 1.1 tshiozak }
984 1.1 tshiozak
985 1.1 tshiozak /* there's no recommendation. make a guess. */
986 1.1 tshiozak if (ei->maxcharset == 0) {
987 1.1 tshiozak return 0;
988 1.1 tshiozak } else {
989 1.1 tshiozak switch (cs->type) {
990 1.1 tshiozak case CS94:
991 1.1 tshiozak case CS94MULTI:
992 1.1 tshiozak return 0;
993 1.1 tshiozak case CS96:
994 1.1 tshiozak case CS96MULTI:
995 1.1 tshiozak return 1;
996 1.1 tshiozak }
997 1.1 tshiozak }
998 1.1 tshiozak return 0;
999 1.1 tshiozak }
1000 1.1 tshiozak
1001 1.1 tshiozak static int
1002 1.1 tshiozak _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t c,
1003 1.1 tshiozak char * __restrict string, size_t n,
1004 1.1 tshiozak char ** __restrict result,
1005 1.1 tshiozak _ISO2022State * __restrict psenc)
1006 1.1 tshiozak {
1007 1.1 tshiozak int i = 0, len;
1008 1.1 tshiozak _ISO2022Charset cs;
1009 1.1 tshiozak char *p;
1010 1.1 tshiozak char tmp[MB_LEN_MAX];
1011 1.1 tshiozak int target;
1012 1.1 tshiozak u_char mask;
1013 1.1 tshiozak int bit8;
1014 1.1 tshiozak
1015 1.1 tshiozak _DIAGASSERT(ei != NULL);
1016 1.1 tshiozak _DIAGASSERT(string != NULL);
1017 1.1 tshiozak /* result may be NULL */
1018 1.1 tshiozak /* state appears to be unused */
1019 1.1 tshiozak
1020 1.1 tshiozak if (iscntl(c & 0xff)) {
1021 1.1 tshiozak /* go back to ASCII on control chars */
1022 1.1 tshiozak cs.type = CS94;
1023 1.1 tshiozak cs.final = 'B';
1024 1.1 tshiozak cs.interm = '\0';
1025 1.1 tshiozak } else if (!(c & ~0xff)) {
1026 1.1 tshiozak if (c & 0x80) {
1027 1.1 tshiozak /* special treatment for ISO-8859-1 */
1028 1.1 tshiozak cs.type = CS96;
1029 1.1 tshiozak cs.final = 'A';
1030 1.1 tshiozak cs.interm = '\0';
1031 1.1 tshiozak } else {
1032 1.1 tshiozak /* special treatment for ASCII */
1033 1.1 tshiozak cs.type = CS94;
1034 1.1 tshiozak cs.final = 'B';
1035 1.1 tshiozak cs.interm = '\0';
1036 1.1 tshiozak }
1037 1.1 tshiozak } else {
1038 1.1 tshiozak cs.final = (c >> 24) & 0x7f;
1039 1.1 tshiozak if ((c >> 16) & 0x80)
1040 1.1 tshiozak cs.interm = (c >> 16) & 0x7f;
1041 1.1 tshiozak else
1042 1.1 tshiozak cs.interm = '\0';
1043 1.1 tshiozak if (c & 0x80)
1044 1.1 tshiozak cs.type = (c & 0x00007f00) ? CS96MULTI : CS96;
1045 1.1 tshiozak else
1046 1.1 tshiozak cs.type = (c & 0x00007f00) ? CS94MULTI : CS94;
1047 1.1 tshiozak }
1048 1.1 tshiozak target = recommendation(ei, &cs);
1049 1.1 tshiozak p = tmp;
1050 1.1 tshiozak bit8 = ei->flags & F_8BIT;
1051 1.1 tshiozak
1052 1.1 tshiozak /* designate the charset onto the target plane(G0/1/2/3). */
1053 1.1 tshiozak if (psenc->g[target].type == cs.type
1054 1.1 tshiozak && psenc->g[target].final == cs.final
1055 1.1 tshiozak && psenc->g[target].interm == cs.interm)
1056 1.1 tshiozak goto planeok;
1057 1.1 tshiozak
1058 1.1 tshiozak *p++ = '\033';
1059 1.1 tshiozak if (cs.type == CS94MULTI || cs.type == CS96MULTI)
1060 1.1 tshiozak *p++ = '$';
1061 1.1 tshiozak if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final)
1062 1.1 tshiozak && !cs.interm && !(ei->flags & F_NOOLD))
1063 1.1 tshiozak ;
1064 1.1 tshiozak else if (cs.type == CS94 || cs.type == CS94MULTI)
1065 1.1 tshiozak *p++ = "()*+"[target];
1066 1.1 tshiozak else
1067 1.1 tshiozak *p++ = ",-./"[target];
1068 1.1 tshiozak if (cs.interm)
1069 1.1 tshiozak *p++ = cs.interm;
1070 1.1 tshiozak *p++ = cs.final;
1071 1.1 tshiozak
1072 1.1 tshiozak psenc->g[target].type = cs.type;
1073 1.1 tshiozak psenc->g[target].final = cs.final;
1074 1.1 tshiozak psenc->g[target].interm = cs.interm;
1075 1.1 tshiozak
1076 1.1 tshiozak planeok:
1077 1.1 tshiozak
1078 1.1 tshiozak /* invoke the plane onto GL or GR. */
1079 1.1 tshiozak if (psenc->gl == target)
1080 1.1 tshiozak goto sideok;
1081 1.1 tshiozak if (bit8 && psenc->gr == target)
1082 1.1 tshiozak goto sideok;
1083 1.1 tshiozak
1084 1.1 tshiozak if (target == 0 && (ei->flags & F_LS0)) {
1085 1.1 tshiozak *p++ = '\017';
1086 1.1 tshiozak psenc->gl = 0;
1087 1.1 tshiozak } else if (target == 1 && (ei->flags & F_LS1)) {
1088 1.1 tshiozak *p++ = '\016';
1089 1.1 tshiozak psenc->gl = 1;
1090 1.1 tshiozak } else if (target == 2 && (ei->flags & F_LS2)) {
1091 1.1 tshiozak *p++ = '\033';
1092 1.1 tshiozak *p++ = 'n';
1093 1.1 tshiozak psenc->gl = 2;
1094 1.1 tshiozak } else if (target == 3 && (ei->flags & F_LS3)) {
1095 1.1 tshiozak *p++ = '\033';
1096 1.1 tshiozak *p++ = 'o';
1097 1.1 tshiozak psenc->gl = 3;
1098 1.1 tshiozak } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) {
1099 1.1 tshiozak *p++ = '\033';
1100 1.1 tshiozak *p++ = '~';
1101 1.1 tshiozak psenc->gr = 1;
1102 1.1 tshiozak } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) {
1103 1.1 tshiozak *p++ = '\033';
1104 1.1 tshiozak /*{*/
1105 1.1 tshiozak *p++ = '}';
1106 1.1 tshiozak psenc->gr = 2;
1107 1.1 tshiozak } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) {
1108 1.1 tshiozak *p++ = '\033';
1109 1.1 tshiozak *p++ = '|';
1110 1.1 tshiozak psenc->gr = 3;
1111 1.1 tshiozak } else if (target == 2 && (ei->flags & F_SS2)) {
1112 1.1 tshiozak *p++ = '\033';
1113 1.1 tshiozak *p++ = 'N';
1114 1.1 tshiozak psenc->singlegl = 2;
1115 1.1 tshiozak } else if (target == 3 && (ei->flags & F_SS3)) {
1116 1.1 tshiozak *p++ = '\033';
1117 1.1 tshiozak *p++ = 'O';
1118 1.1 tshiozak psenc->singlegl = 3;
1119 1.1 tshiozak } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) {
1120 1.1 tshiozak *p++ = '\216';
1121 1.1 tshiozak *p++ = 'N';
1122 1.1 tshiozak psenc->singlegl = psenc->singlegr = 2;
1123 1.1 tshiozak } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) {
1124 1.1 tshiozak *p++ = '\217';
1125 1.1 tshiozak *p++ = 'O';
1126 1.1 tshiozak psenc->singlegl = psenc->singlegr = 3;
1127 1.1 tshiozak } else
1128 1.1 tshiozak abort();
1129 1.1 tshiozak
1130 1.1 tshiozak sideok:
1131 1.1 tshiozak if (psenc->singlegl == target)
1132 1.1 tshiozak mask = 0x00;
1133 1.1 tshiozak else if (psenc->singlegr == target)
1134 1.1 tshiozak mask = 0x80;
1135 1.1 tshiozak else if (psenc->gl == target)
1136 1.1 tshiozak mask = 0x00;
1137 1.1 tshiozak else if ((ei->flags & F_8BIT) && psenc->gr == target)
1138 1.1 tshiozak mask = 0x80;
1139 1.1 tshiozak else
1140 1.1 tshiozak abort();
1141 1.1 tshiozak
1142 1.1 tshiozak switch (cs.type) {
1143 1.1 tshiozak case CS94:
1144 1.1 tshiozak case CS96:
1145 1.1 tshiozak i = 1;
1146 1.1 tshiozak break;
1147 1.1 tshiozak case CS94MULTI:
1148 1.1 tshiozak case CS96MULTI:
1149 1.1 tshiozak i = isthree(cs.final) ? 3 : 2;
1150 1.1 tshiozak break;
1151 1.1 tshiozak }
1152 1.1 tshiozak while (i-- > 0)
1153 1.1 tshiozak *p++ = ((c >> (i << 3)) & 0x7f) | mask;
1154 1.1 tshiozak
1155 1.1 tshiozak /* reset single shift state */
1156 1.1 tshiozak psenc->singlegl = psenc->singlegr = -1;
1157 1.1 tshiozak
1158 1.1 tshiozak len = p - tmp;
1159 1.1 tshiozak if (n < len) {
1160 1.1 tshiozak if (result)
1161 1.1 tshiozak *result = (char *)0;
1162 1.1 tshiozak } else {
1163 1.1 tshiozak if (result)
1164 1.1 tshiozak *result = string + len;
1165 1.1 tshiozak memcpy(string, tmp, len);
1166 1.1 tshiozak }
1167 1.1 tshiozak return len;
1168 1.1 tshiozak }
1169 1.1 tshiozak
1170 1.1 tshiozak static int
1171 1.1 tshiozak _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei,
1172 1.1 tshiozak char * __restrict s, size_t n, wchar_t wc,
1173 1.1 tshiozak _ISO2022State * __restrict psenc,
1174 1.1 tshiozak size_t * __restrict nresult)
1175 1.1 tshiozak {
1176 1.1 tshiozak char buf[MB_LEN_MAX];
1177 1.1 tshiozak char *result;
1178 1.1 tshiozak int len;
1179 1.1 tshiozak
1180 1.1 tshiozak _DIAGASSERT(ei != NULL);
1181 1.1 tshiozak _DIAGASSERT(nresult != 0);
1182 1.1 tshiozak _DIAGASSERT(s != NULL);
1183 1.1 tshiozak
1184 1.1 tshiozak /* XXX state will be modified after this operation... */
1185 1.1 tshiozak len = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc);
1186 1.1 tshiozak if (sizeof(buf) < len || n < len) {
1187 1.1 tshiozak /* XXX should recover state? */
1188 1.1 tshiozak goto ilseq;
1189 1.1 tshiozak }
1190 1.1 tshiozak
1191 1.1 tshiozak memcpy(s, buf, len);
1192 1.1 tshiozak *nresult = (size_t)len;
1193 1.1 tshiozak return (0);
1194 1.1 tshiozak
1195 1.1 tshiozak ilseq:
1196 1.1 tshiozak /* bound check failure */
1197 1.1 tshiozak *nresult = (size_t)-1;
1198 1.1 tshiozak return (EILSEQ);
1199 1.1 tshiozak }
1200 1.1 tshiozak
1201 1.1 tshiozak /* ----------------------------------------------------------------------
1202 1.1 tshiozak * public interface for ctype
1203 1.1 tshiozak */
1204 1.1 tshiozak
1205 1.1 tshiozak _CITRUS_CTYPE_DECLS(ISO2022);
1206 1.1 tshiozak _CITRUS_CTYPE_DEF_OPS(ISO2022);
1207 1.1 tshiozak
1208 1.1 tshiozak #include "citrus_ctype_template.h"
1209