citrus_iso2022.c revision 1.4 1 1.4 yamt /* $NetBSD: citrus_iso2022.c,v 1.4 2002/03/28 01:59:50 yamt Exp $ */
2 1.1 tshiozak
3 1.1 tshiozak /*-
4 1.1 tshiozak * Copyright (c)1999, 2002 Citrus Project,
5 1.1 tshiozak * All rights reserved.
6 1.1 tshiozak *
7 1.1 tshiozak * Redistribution and use in source and binary forms, with or without
8 1.1 tshiozak * modification, are permitted provided that the following conditions
9 1.1 tshiozak * are met:
10 1.1 tshiozak * 1. Redistributions of source code must retain the above copyright
11 1.1 tshiozak * notice, this list of conditions and the following disclaimer.
12 1.1 tshiozak * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 tshiozak * notice, this list of conditions and the following disclaimer in the
14 1.1 tshiozak * documentation and/or other materials provided with the distribution.
15 1.1 tshiozak *
16 1.1 tshiozak * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 1.1 tshiozak * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 1.1 tshiozak * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 1.1 tshiozak * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 1.1 tshiozak * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 1.1 tshiozak * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 1.1 tshiozak * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 1.1 tshiozak * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 1.1 tshiozak * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 1.1 tshiozak * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 1.1 tshiozak * SUCH DAMAGE.
27 1.1 tshiozak *
28 1.1 tshiozak * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
29 1.1 tshiozak */
30 1.1 tshiozak
31 1.1 tshiozak #include <sys/cdefs.h>
32 1.1 tshiozak #if defined(LIBC_SCCS) && !defined(lint)
33 1.4 yamt __RCSID("$NetBSD: citrus_iso2022.c,v 1.4 2002/03/28 01:59:50 yamt Exp $");
34 1.1 tshiozak #endif /* LIBC_SCCS and not lint */
35 1.1 tshiozak
36 1.1 tshiozak #include <assert.h>
37 1.1 tshiozak #include <errno.h>
38 1.1 tshiozak #include <string.h>
39 1.1 tshiozak #include <stdio.h>
40 1.1 tshiozak #include <stdlib.h>
41 1.1 tshiozak #include <stddef.h>
42 1.1 tshiozak #include <locale.h>
43 1.1 tshiozak #include <wchar.h>
44 1.1 tshiozak #include <sys/types.h>
45 1.1 tshiozak #include <limits.h>
46 1.1 tshiozak #include "citrus_module.h"
47 1.1 tshiozak #include "citrus_ctype.h"
48 1.1 tshiozak #include "citrus_iso2022.h"
49 1.1 tshiozak
50 1.1 tshiozak
51 1.1 tshiozak /* ----------------------------------------------------------------------
52 1.1 tshiozak * private stuffs used by templates
53 1.1 tshiozak */
54 1.1 tshiozak
55 1.1 tshiozak
56 1.1 tshiozak /*
57 1.1 tshiozak * wchar_t mappings:
58 1.1 tshiozak * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx
59 1.1 tshiozak * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx
60 1.1 tshiozak * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx
61 1.1 tshiozak * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx
62 1.1 tshiozak * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx
63 1.1 tshiozak * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx
64 1.1 tshiozak * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx
65 1.1 tshiozak * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx
66 1.1 tshiozak * 94x94 charset (ESC & V ESC $ ( F)
67 1.1 tshiozak * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
68 1.1 tshiozak * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
69 1.1 tshiozak * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
70 1.1 tshiozak */
71 1.1 tshiozak
72 1.1 tshiozak typedef struct {
73 1.1 tshiozak u_char type;
74 1.1 tshiozak #define CS94 (0U)
75 1.1 tshiozak #define CS96 (1U)
76 1.1 tshiozak #define CS94MULTI (2U)
77 1.1 tshiozak #define CS96MULTI (3U)
78 1.1 tshiozak
79 1.1 tshiozak u_char final;
80 1.1 tshiozak u_char interm;
81 1.1 tshiozak u_char vers;
82 1.1 tshiozak } _ISO2022Charset;
83 1.1 tshiozak
84 1.1 tshiozak typedef struct {
85 1.1 tshiozak _ISO2022Charset g[4];
86 1.1 tshiozak /* need 3 bits to hold -1, 0, ..., 3 */
87 1.1 tshiozak int gl:3,
88 1.1 tshiozak gr:3,
89 1.1 tshiozak singlegl:3,
90 1.1 tshiozak singlegr:3;
91 1.1 tshiozak char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */
92 1.1 tshiozak int chlen;
93 1.4 yamt } _ISO2022State;
94 1.1 tshiozak
95 1.1 tshiozak typedef struct {
96 1.1 tshiozak _ISO2022Charset *recommend[4];
97 1.1 tshiozak size_t recommendsize[4];
98 1.1 tshiozak _ISO2022Charset initg[4];
99 1.1 tshiozak int maxcharset;
100 1.1 tshiozak int flags;
101 1.1 tshiozak #define F_8BIT 0x0001
102 1.1 tshiozak #define F_NOOLD 0x0002
103 1.1 tshiozak #define F_SI 0x0010 /*0F*/
104 1.1 tshiozak #define F_SO 0x0020 /*0E*/
105 1.1 tshiozak #define F_LS0 0x0010 /*0F*/
106 1.1 tshiozak #define F_LS1 0x0020 /*0E*/
107 1.1 tshiozak #define F_LS2 0x0040 /*ESC n*/
108 1.1 tshiozak #define F_LS3 0x0080 /*ESC o*/
109 1.1 tshiozak #define F_LS1R 0x0100 /*ESC ~*/
110 1.1 tshiozak #define F_LS2R 0x0200 /*ESC }*/
111 1.1 tshiozak #define F_LS3R 0x0400 /*ESC |*/
112 1.1 tshiozak #define F_SS2 0x0800 /*ESC N*/
113 1.1 tshiozak #define F_SS3 0x1000 /*ESC O*/
114 1.1 tshiozak #define F_SS2R 0x2000 /*8E*/
115 1.1 tshiozak #define F_SS3R 0x4000 /*8F*/
116 1.1 tshiozak } _ISO2022EncodingInfo;
117 1.1 tshiozak typedef struct {
118 1.1 tshiozak _ISO2022EncodingInfo ei;
119 1.1 tshiozak struct {
120 1.1 tshiozak /* for future multi-locale facility */
121 1.1 tshiozak _ISO2022State s_mblen;
122 1.1 tshiozak _ISO2022State s_mbrlen;
123 1.1 tshiozak _ISO2022State s_mbrtowc;
124 1.1 tshiozak _ISO2022State s_mbtowc;
125 1.1 tshiozak _ISO2022State s_mbsrtowcs;
126 1.1 tshiozak _ISO2022State s_wcrtomb;
127 1.1 tshiozak _ISO2022State s_wcsrtombs;
128 1.1 tshiozak _ISO2022State s_wctomb;
129 1.1 tshiozak } states;
130 1.1 tshiozak } _ISO2022CTypeInfo;
131 1.1 tshiozak
132 1.1 tshiozak #define _TO_EI(_cl_) ((_ISO2022EncodingInfo *)(_cl_))
133 1.1 tshiozak #define _TO_CEI(_cl_) ((_ISO2022CTypeInfo *)(_cl_))
134 1.1 tshiozak #define _TO_STATE(_ps_) ((_ISO2022State *)(_ps_))
135 1.1 tshiozak #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
136 1.1 tshiozak #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
137 1.1 tshiozak
138 1.1 tshiozak #define _FUNCNAME(m) _citrus_ISO2022_##m
139 1.1 tshiozak #define _ENCODING_INFO _ISO2022EncodingInfo
140 1.1 tshiozak #define _CTYPE_INFO _ISO2022CTypeInfo
141 1.1 tshiozak #define _ENCODING_STATE _ISO2022State
142 1.2 yamt #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
143 1.1 tshiozak #define _ENCODING_IS_STATE_DEPENDENT 1
144 1.1 tshiozak
145 1.1 tshiozak
146 1.1 tshiozak #define _ISO2022INVALID (wchar_t)-1
147 1.1 tshiozak
148 1.1 tshiozak static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); }
149 1.1 tshiozak static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); }
150 1.1 tshiozak static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); }
151 1.1 tshiozak static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); }
152 1.1 tshiozak static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); }
153 1.1 tshiozak static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); }
154 1.1 tshiozak static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); }
155 1.1 tshiozak static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); }
156 1.1 tshiozak
157 1.1 tshiozak static __inline int
158 1.1 tshiozak getcs(const char * __restrict p, _ISO2022Charset * __restrict cs)
159 1.1 tshiozak {
160 1.1 tshiozak
161 1.1 tshiozak _DIAGASSERT(p != NULL);
162 1.1 tshiozak _DIAGASSERT(cs != NULL);
163 1.1 tshiozak
164 1.1 tshiozak if (!strncmp(p, "94$", 3) && p[3] && !p[4]) {
165 1.1 tshiozak cs->final = (u_char)(p[3] & 0xff);
166 1.1 tshiozak cs->interm = '\0';
167 1.1 tshiozak cs->vers = '\0';
168 1.1 tshiozak cs->type = CS94MULTI;
169 1.1 tshiozak } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) {
170 1.1 tshiozak cs->final = (u_char)(p[3] & 0xff);
171 1.1 tshiozak cs->interm = '\0';
172 1.1 tshiozak cs->vers = '\0';
173 1.1 tshiozak cs->type = CS96MULTI;
174 1.1 tshiozak } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) {
175 1.1 tshiozak cs->final = (u_char)(p[2] & 0xff);
176 1.1 tshiozak cs->interm = '\0';
177 1.1 tshiozak cs->vers = '\0';
178 1.1 tshiozak cs->type = CS94;
179 1.1 tshiozak } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) {
180 1.1 tshiozak cs->final = (u_char )(p[2] & 0xff);
181 1.1 tshiozak cs->interm = '\0';
182 1.1 tshiozak cs->vers = '\0';
183 1.1 tshiozak cs->type = CS96;
184 1.1 tshiozak } else {
185 1.1 tshiozak return 1;
186 1.1 tshiozak }
187 1.1 tshiozak
188 1.1 tshiozak return 0;
189 1.1 tshiozak }
190 1.1 tshiozak
191 1.1 tshiozak
192 1.1 tshiozak #define _NOTMATCH 0
193 1.1 tshiozak #define _MATCH 1
194 1.1 tshiozak #define _PARSEFAIL 2
195 1.1 tshiozak
196 1.1 tshiozak static __inline int
197 1.1 tshiozak get_recommend(_ISO2022EncodingInfo * __restrict ei,
198 1.1 tshiozak const char * __restrict token)
199 1.1 tshiozak {
200 1.1 tshiozak int i;
201 1.1 tshiozak _ISO2022Charset cs;
202 1.1 tshiozak
203 1.1 tshiozak if (!strchr("0123", token[0]) || token[1] != '=')
204 1.1 tshiozak return (_NOTMATCH);
205 1.1 tshiozak
206 1.1 tshiozak if (getcs(&token[2], &cs) == 0)
207 1.1 tshiozak ;
208 1.1 tshiozak else if (!strcmp(&token[2], "94")) {
209 1.1 tshiozak cs.final = (u_char)(token[4]);
210 1.1 tshiozak cs.interm = '\0';
211 1.1 tshiozak cs.vers = '\0';
212 1.1 tshiozak cs.type = CS94;
213 1.1 tshiozak } else if (!strcmp(&token[2], "96")) {
214 1.1 tshiozak cs.final = (u_char)(token[4]);
215 1.1 tshiozak cs.interm = '\0';
216 1.1 tshiozak cs.vers = '\0';
217 1.1 tshiozak cs.type = CS96;
218 1.1 tshiozak } else if (!strcmp(&token[2], "94$")) {
219 1.1 tshiozak cs.final = (u_char)(token[5]);
220 1.1 tshiozak cs.interm = '\0';
221 1.1 tshiozak cs.vers = '\0';
222 1.1 tshiozak cs.type = CS94MULTI;
223 1.1 tshiozak } else if (!strcmp(&token[2], "96$")) {
224 1.1 tshiozak cs.final = (u_char)(token[5]);
225 1.1 tshiozak cs.interm = '\0';
226 1.1 tshiozak cs.vers = '\0';
227 1.1 tshiozak cs.type = CS96MULTI;
228 1.1 tshiozak } else {
229 1.1 tshiozak return (_PARSEFAIL);
230 1.1 tshiozak }
231 1.1 tshiozak
232 1.1 tshiozak i = token[0] - '0';
233 1.1 tshiozak ei->recommendsize[i] += 1;
234 1.1 tshiozak if (!ei->recommend[i]) {
235 1.1 tshiozak ei->recommend[i] = malloc(sizeof(_ISO2022Charset));
236 1.1 tshiozak } else {
237 1.1 tshiozak ei->recommend[i] =
238 1.1 tshiozak realloc(ei->recommend[i],
239 1.1 tshiozak sizeof(_ISO2022Charset)* (ei->recommendsize[i]));
240 1.1 tshiozak }
241 1.1 tshiozak if (!ei->recommend[i])
242 1.1 tshiozak return (_PARSEFAIL);
243 1.1 tshiozak
244 1.1 tshiozak (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final;
245 1.1 tshiozak (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm;
246 1.1 tshiozak (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers;
247 1.1 tshiozak (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type;
248 1.1 tshiozak
249 1.1 tshiozak return (_MATCH);
250 1.1 tshiozak }
251 1.1 tshiozak
252 1.1 tshiozak static __inline int
253 1.1 tshiozak get_initg(_ISO2022EncodingInfo * __restrict ei,
254 1.1 tshiozak const char * __restrict token)
255 1.1 tshiozak {
256 1.1 tshiozak _ISO2022Charset cs;
257 1.1 tshiozak
258 1.1 tshiozak if (strncmp("INIT", &token[0], 4) ||
259 1.1 tshiozak !strchr("0123", token[4]) ||
260 1.1 tshiozak token[5] != '=')
261 1.1 tshiozak return (_NOTMATCH);
262 1.1 tshiozak
263 1.1 tshiozak if (getcs(&token[6], &cs) != 0)
264 1.1 tshiozak return (_PARSEFAIL);
265 1.1 tshiozak
266 1.1 tshiozak ei->initg[token[4] - '0'].type = cs.type;
267 1.1 tshiozak ei->initg[token[4] - '0'].final = cs.final;
268 1.1 tshiozak ei->initg[token[4] - '0'].interm = cs.interm;
269 1.1 tshiozak ei->initg[token[4] - '0'].vers = cs.vers;
270 1.1 tshiozak
271 1.1 tshiozak return (_MATCH);
272 1.1 tshiozak }
273 1.1 tshiozak
274 1.1 tshiozak static __inline int
275 1.1 tshiozak get_max(_ISO2022EncodingInfo * __restrict ei,
276 1.1 tshiozak const char * __restrict token)
277 1.1 tshiozak {
278 1.1 tshiozak if (!strcmp(token, "MAX1")) {
279 1.1 tshiozak ei->maxcharset = 1;
280 1.1 tshiozak } else if (!strcmp(token, "MAX2")) {
281 1.1 tshiozak ei->maxcharset = 2;
282 1.1 tshiozak } else if (!strcmp(token, "MAX3")) {
283 1.1 tshiozak ei->maxcharset = 3;
284 1.1 tshiozak } else
285 1.1 tshiozak return (_NOTMATCH);
286 1.1 tshiozak
287 1.1 tshiozak return (_MATCH);
288 1.1 tshiozak }
289 1.1 tshiozak
290 1.1 tshiozak
291 1.1 tshiozak static __inline int
292 1.1 tshiozak get_flags(_ISO2022EncodingInfo * __restrict ei,
293 1.1 tshiozak const char * __restrict token)
294 1.1 tshiozak {
295 1.1 tshiozak int i;
296 1.1 tshiozak static struct {
297 1.1 tshiozak const char *tag;
298 1.1 tshiozak int flag;
299 1.1 tshiozak } const tags[] = {
300 1.1 tshiozak { "DUMMY", 0 },
301 1.1 tshiozak { "8BIT", F_8BIT },
302 1.1 tshiozak { "NOOLD", F_NOOLD },
303 1.1 tshiozak { "SI", F_SI },
304 1.1 tshiozak { "SO", F_SO },
305 1.1 tshiozak { "LS0", F_LS0 },
306 1.1 tshiozak { "LS1", F_LS1 },
307 1.1 tshiozak { "LS2", F_LS2 },
308 1.1 tshiozak { "LS3", F_LS3 },
309 1.1 tshiozak { "LS1R", F_LS1R },
310 1.1 tshiozak { "LS2R", F_LS2R },
311 1.1 tshiozak { "LS3R", F_LS3R },
312 1.1 tshiozak { "SS2", F_SS2 },
313 1.1 tshiozak { "SS3", F_SS3 },
314 1.1 tshiozak { "SS2R", F_SS2R },
315 1.1 tshiozak { "SS3R", F_SS3R },
316 1.1 tshiozak { NULL, 0 }
317 1.1 tshiozak };
318 1.1 tshiozak
319 1.1 tshiozak for (i = 0; tags[i].tag; i++) {
320 1.1 tshiozak if (!strcmp(token, tags[i].tag)) {
321 1.1 tshiozak ei->flags |= tags[i].flag;
322 1.1 tshiozak return (_MATCH);
323 1.1 tshiozak }
324 1.1 tshiozak }
325 1.1 tshiozak
326 1.1 tshiozak return (_NOTMATCH);
327 1.1 tshiozak }
328 1.1 tshiozak
329 1.1 tshiozak
330 1.1 tshiozak static __inline int
331 1.1 tshiozak _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei,
332 1.1 tshiozak const void * __restrict var, size_t lenvar)
333 1.1 tshiozak {
334 1.1 tshiozak char const *v, *e;
335 1.1 tshiozak char buf[20];
336 1.1 tshiozak int i, len, ret;
337 1.1 tshiozak
338 1.1 tshiozak _DIAGASSERT(ei != NULL);
339 1.1 tshiozak
340 1.1 tshiozak
341 1.1 tshiozak /*
342 1.1 tshiozak * parse VARIABLE section.
343 1.1 tshiozak */
344 1.1 tshiozak
345 1.1 tshiozak if (!var)
346 1.1 tshiozak return (EFTYPE);
347 1.1 tshiozak
348 1.1 tshiozak v = (const char *) var;
349 1.1 tshiozak
350 1.1 tshiozak /* initialize structure */
351 1.1 tshiozak ei->maxcharset = 0;
352 1.1 tshiozak for (i = 0; i < 4; i++) {
353 1.1 tshiozak ei->recommend[i] = NULL;
354 1.1 tshiozak ei->recommendsize[i] = 0;
355 1.1 tshiozak }
356 1.1 tshiozak ei->flags = 0;
357 1.1 tshiozak
358 1.1 tshiozak while (*v) {
359 1.1 tshiozak while (*v == ' ' || *v == '\t')
360 1.1 tshiozak ++v;
361 1.1 tshiozak
362 1.1 tshiozak /* find the token */
363 1.1 tshiozak e = v;
364 1.1 tshiozak while (*e && *e != ' ' && *e != '\t')
365 1.1 tshiozak ++e;
366 1.1 tshiozak if (*e) {
367 1.1 tshiozak len = e-v;
368 1.1 tshiozak if (len>=sizeof(buf))
369 1.1 tshiozak goto parsefail;
370 1.1 tshiozak sprintf(buf, "%.*s", len, v);
371 1.1 tshiozak ++e;
372 1.1 tshiozak }
373 1.1 tshiozak
374 1.1 tshiozak if ((ret = get_recommend(ei, buf)) != _NOTMATCH)
375 1.1 tshiozak ;
376 1.1 tshiozak else if ((ret = get_initg(ei, buf)) != _NOTMATCH)
377 1.1 tshiozak ;
378 1.1 tshiozak else if ((ret = get_max(ei, buf)) != _NOTMATCH)
379 1.1 tshiozak ;
380 1.1 tshiozak else if ((ret = get_flags(ei, buf)) != _NOTMATCH)
381 1.1 tshiozak ;
382 1.1 tshiozak else
383 1.1 tshiozak ret = _PARSEFAIL;
384 1.1 tshiozak if (ret==_PARSEFAIL)
385 1.1 tshiozak goto parsefail;
386 1.1 tshiozak v = e;
387 1.1 tshiozak
388 1.1 tshiozak }
389 1.1 tshiozak
390 1.1 tshiozak return (0);
391 1.1 tshiozak
392 1.1 tshiozak parsefail:
393 1.1 tshiozak free(ei->recommend[0]);
394 1.1 tshiozak free(ei->recommend[1]);
395 1.1 tshiozak free(ei->recommend[2]);
396 1.1 tshiozak free(ei->recommend[3]);
397 1.1 tshiozak
398 1.1 tshiozak return (EFTYPE);
399 1.1 tshiozak }
400 1.1 tshiozak
401 1.1 tshiozak static __inline void
402 1.1 tshiozak /*ARGSUSED*/
403 1.1 tshiozak _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei,
404 1.1 tshiozak _ISO2022State * __restrict s)
405 1.1 tshiozak {
406 1.1 tshiozak int i;
407 1.1 tshiozak
408 1.1 tshiozak memset(s, 0, sizeof(*s));
409 1.1 tshiozak s->gl = 0;
410 1.1 tshiozak s->gr = (ei->flags & F_8BIT) ? 1 : -1;
411 1.1 tshiozak
412 1.1 tshiozak for (i = 0; i < 4; i++) {
413 1.1 tshiozak if (ei->initg[i].final) {
414 1.1 tshiozak s->g[i].type = ei->initg[i].type;
415 1.1 tshiozak s->g[i].final = ei->initg[i].final;
416 1.1 tshiozak s->g[i].interm = ei->initg[i].interm;
417 1.1 tshiozak }
418 1.1 tshiozak }
419 1.1 tshiozak s->singlegl = s->singlegr = -1;
420 1.1 tshiozak }
421 1.1 tshiozak
422 1.1 tshiozak static __inline void
423 1.1 tshiozak /*ARGSUSED*/
424 1.1 tshiozak _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei,
425 1.1 tshiozak void * __restrict pspriv,
426 1.1 tshiozak const _ISO2022State * __restrict s)
427 1.1 tshiozak {
428 1.1 tshiozak memcpy(pspriv, (const void *)s, sizeof(*s));
429 1.1 tshiozak }
430 1.1 tshiozak
431 1.1 tshiozak static __inline void
432 1.1 tshiozak /*ARGSUSED*/
433 1.1 tshiozak _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei,
434 1.1 tshiozak _ISO2022State * __restrict s,
435 1.1 tshiozak const void * __restrict pspriv)
436 1.1 tshiozak {
437 1.1 tshiozak memcpy((void *)s, pspriv, sizeof(*s));
438 1.1 tshiozak }
439 1.1 tshiozak
440 1.1 tshiozak static int
441 1.1 tshiozak /*ARGSUSED*/
442 1.1 tshiozak _citrus_ISO2022_stdencoding_init(_ISO2022EncodingInfo * __restrict ei,
443 1.1 tshiozak const void * __restrict var, size_t lenvar)
444 1.1 tshiozak {
445 1.1 tshiozak
446 1.1 tshiozak _DIAGASSERT(ei != NULL);
447 1.1 tshiozak
448 1.1 tshiozak return _citrus_ISO2022_parse_variable(ei, var, lenvar);
449 1.1 tshiozak }
450 1.1 tshiozak
451 1.1 tshiozak static void
452 1.1 tshiozak /*ARGSUSED*/
453 1.1 tshiozak _citrus_ISO2022_stdencoding_uninit(_ISO2022EncodingInfo *ei)
454 1.1 tshiozak {
455 1.1 tshiozak }
456 1.1 tshiozak
457 1.1 tshiozak #define ESC '\033'
458 1.1 tshiozak #define ECMA -1
459 1.1 tshiozak #define INTERM -2
460 1.1 tshiozak #define OECMA -3
461 1.1 tshiozak static struct seqtable {
462 1.1 tshiozak int type;
463 1.1 tshiozak int csoff;
464 1.1 tshiozak int finaloff;
465 1.1 tshiozak int intermoff;
466 1.1 tshiozak int versoff;
467 1.1 tshiozak int len;
468 1.1 tshiozak int chars[10];
469 1.1 tshiozak } seqtable[] = {
470 1.1 tshiozak /* G0 94MULTI special */
471 1.1 tshiozak { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, },
472 1.1 tshiozak /* G0 94MULTI special with version identification */
473 1.1 tshiozak { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, },
474 1.1 tshiozak /* G? 94 */
475 1.1 tshiozak { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, },
476 1.1 tshiozak /* G? 94 with 2nd intermediate char */
477 1.1 tshiozak { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, },
478 1.1 tshiozak /* G? 96 */
479 1.1 tshiozak { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, },
480 1.1 tshiozak /* G? 96 with 2nd intermediate char */
481 1.1 tshiozak { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, },
482 1.1 tshiozak /* G? 94MULTI */
483 1.1 tshiozak { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, },
484 1.1 tshiozak /* G? 96MULTI */
485 1.1 tshiozak { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, },
486 1.1 tshiozak /* G? 94MULTI with version specification */
487 1.1 tshiozak { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, },
488 1.1 tshiozak /* LS2/3 */
489 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, },
490 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, },
491 1.1 tshiozak /* LS1/2/3R */
492 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, '~', }, },
493 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, },
494 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, '|', }, },
495 1.1 tshiozak /* SS2/3 */
496 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, },
497 1.1 tshiozak { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, },
498 1.1 tshiozak /* end of records */
499 1.1 tshiozak { 0, }
500 1.1 tshiozak };
501 1.1 tshiozak
502 1.1 tshiozak static int
503 1.1 tshiozak seqmatch(const char * __restrict s, size_t n,
504 1.1 tshiozak const struct seqtable * __restrict sp)
505 1.1 tshiozak {
506 1.1 tshiozak const int *p;
507 1.1 tshiozak
508 1.1 tshiozak _DIAGASSERT(s != NULL);
509 1.1 tshiozak _DIAGASSERT(sp != NULL);
510 1.1 tshiozak
511 1.1 tshiozak p = sp->chars;
512 1.1 tshiozak while (p - sp->chars < n && p - sp->chars < sp->len) {
513 1.1 tshiozak switch (*p) {
514 1.1 tshiozak case ECMA:
515 1.1 tshiozak if (!isecma(*s))
516 1.1 tshiozak goto terminate;
517 1.1 tshiozak break;
518 1.1 tshiozak case OECMA:
519 1.1 tshiozak if (*s && strchr("@AB", *s))
520 1.1 tshiozak break;
521 1.1 tshiozak else
522 1.1 tshiozak goto terminate;
523 1.1 tshiozak case INTERM:
524 1.1 tshiozak if (!isinterm(*s))
525 1.1 tshiozak goto terminate;
526 1.1 tshiozak break;
527 1.1 tshiozak case CS94:
528 1.1 tshiozak if (*s && strchr("()*+", *s))
529 1.1 tshiozak break;
530 1.1 tshiozak else
531 1.1 tshiozak goto terminate;
532 1.1 tshiozak case CS96:
533 1.1 tshiozak if (*s && strchr(",-./", *s))
534 1.1 tshiozak break;
535 1.1 tshiozak else
536 1.1 tshiozak goto terminate;
537 1.1 tshiozak default:
538 1.1 tshiozak if (*s != *p)
539 1.1 tshiozak goto terminate;
540 1.1 tshiozak break;
541 1.1 tshiozak }
542 1.1 tshiozak
543 1.1 tshiozak p++;
544 1.1 tshiozak s++;
545 1.1 tshiozak }
546 1.1 tshiozak
547 1.1 tshiozak terminate:
548 1.1 tshiozak return p - sp->chars;
549 1.1 tshiozak }
550 1.1 tshiozak
551 1.1 tshiozak static wchar_t
552 1.1 tshiozak _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei,
553 1.1 tshiozak const char * __restrict string, size_t n,
554 1.1 tshiozak const char ** __restrict result,
555 1.1 tshiozak _ISO2022State * __restrict psenc)
556 1.1 tshiozak {
557 1.1 tshiozak wchar_t wchar = 0;
558 1.1 tshiozak int cur;
559 1.1 tshiozak struct seqtable *sp;
560 1.1 tshiozak int nmatch;
561 1.1 tshiozak int i;
562 1.1 tshiozak
563 1.1 tshiozak _DIAGASSERT(ei != NULL);
564 1.1 tshiozak _DIAGASSERT(state != NULL);
565 1.1 tshiozak _DIAGASSERT(string != NULL);
566 1.1 tshiozak /* result may be NULL */
567 1.1 tshiozak
568 1.1 tshiozak while (1) {
569 1.1 tshiozak /* SI/SO */
570 1.1 tshiozak if (1 <= n && string[0] == '\017') {
571 1.1 tshiozak psenc->gl = 0;
572 1.1 tshiozak string++;
573 1.1 tshiozak n--;
574 1.1 tshiozak continue;
575 1.1 tshiozak }
576 1.1 tshiozak if (1 <= n && string[0] == '\016') {
577 1.1 tshiozak psenc->gl = 1;
578 1.1 tshiozak string++;
579 1.1 tshiozak n--;
580 1.1 tshiozak continue;
581 1.1 tshiozak }
582 1.1 tshiozak
583 1.1 tshiozak /* SS2/3R */
584 1.1 tshiozak if (1 <= n && string[0] && strchr("\217\216", string[0])) {
585 1.1 tshiozak psenc->singlegl = psenc->singlegr =
586 1.1 tshiozak (string[0] - '\216') + 2;
587 1.1 tshiozak string++;
588 1.1 tshiozak n--;
589 1.1 tshiozak continue;
590 1.1 tshiozak }
591 1.1 tshiozak
592 1.1 tshiozak /* eat the letter if this is not ESC */
593 1.1 tshiozak if (1 <= n && string[0] != '\033')
594 1.1 tshiozak break;
595 1.1 tshiozak
596 1.1 tshiozak /* look for a perfect match from escape sequences */
597 1.1 tshiozak for (sp = &seqtable[0]; sp->len; sp++) {
598 1.1 tshiozak nmatch = seqmatch(string, n, sp);
599 1.1 tshiozak if (sp->len == nmatch && n >= sp->len)
600 1.1 tshiozak break;
601 1.1 tshiozak }
602 1.1 tshiozak
603 1.1 tshiozak if (!sp->len)
604 1.1 tshiozak goto notseq;
605 1.1 tshiozak
606 1.1 tshiozak if (sp->type != -1) {
607 1.1 tshiozak if (sp->csoff == -1)
608 1.1 tshiozak i = 0;
609 1.1 tshiozak else {
610 1.1 tshiozak switch (sp->type) {
611 1.1 tshiozak case CS94:
612 1.1 tshiozak case CS94MULTI:
613 1.1 tshiozak i = string[sp->csoff] - '(';
614 1.1 tshiozak break;
615 1.1 tshiozak case CS96:
616 1.1 tshiozak case CS96MULTI:
617 1.1 tshiozak i = string[sp->csoff] - ',';
618 1.1 tshiozak break;
619 1.1 tshiozak }
620 1.1 tshiozak }
621 1.1 tshiozak psenc->g[i].type = sp->type;
622 1.1 tshiozak psenc->g[i].final = '\0';
623 1.1 tshiozak psenc->g[i].interm = '\0';
624 1.1 tshiozak psenc->g[i].vers = '\0';
625 1.1 tshiozak /* sp->finaloff must not be -1 */
626 1.1 tshiozak if (sp->finaloff != -1)
627 1.1 tshiozak psenc->g[i].final = string[sp->finaloff];
628 1.1 tshiozak if (sp->intermoff != -1)
629 1.1 tshiozak psenc->g[i].interm = string[sp->intermoff];
630 1.1 tshiozak if (sp->versoff != -1)
631 1.1 tshiozak psenc->g[i].vers = string[sp->versoff];
632 1.1 tshiozak
633 1.1 tshiozak string += sp->len;
634 1.1 tshiozak n -= sp->len;
635 1.1 tshiozak continue;
636 1.1 tshiozak }
637 1.1 tshiozak
638 1.1 tshiozak /* LS2/3 */
639 1.1 tshiozak if (2 <= n && string[0] == '\033'
640 1.1 tshiozak && string[1] && strchr("no", string[1])) {
641 1.1 tshiozak psenc->gl = string[1] - 'n' + 2;
642 1.1 tshiozak string += 2;
643 1.1 tshiozak n -= 2;
644 1.1 tshiozak continue;
645 1.1 tshiozak }
646 1.1 tshiozak
647 1.1 tshiozak /* LS1/2/3R */
648 1.1 tshiozak /* XXX: { for vi showmatch */
649 1.1 tshiozak if (2 <= n && string[0] == '\033'
650 1.1 tshiozak && string[1] && strchr("~}|", string[1])) {
651 1.1 tshiozak psenc->gr = 3 - (string[1] - '|');
652 1.1 tshiozak string += 2;
653 1.1 tshiozak n -= 2;
654 1.1 tshiozak continue;
655 1.1 tshiozak }
656 1.1 tshiozak
657 1.1 tshiozak /* SS2/3 */
658 1.1 tshiozak if (2 <= n && string[0] == '\033'
659 1.1 tshiozak && string[1] && strchr("NO", string[1])) {
660 1.1 tshiozak psenc->singlegl = (string[1] - 'N') + 2;
661 1.1 tshiozak string += 2;
662 1.1 tshiozak n -= 2;
663 1.1 tshiozak continue;
664 1.1 tshiozak }
665 1.1 tshiozak
666 1.1 tshiozak notseq:
667 1.1 tshiozak /*
668 1.1 tshiozak * if we've got an unknown escape sequence, eat the ESC at the
669 1.1 tshiozak * head. otherwise, wait till full escape sequence comes.
670 1.1 tshiozak */
671 1.1 tshiozak for (sp = &seqtable[0]; sp->len; sp++) {
672 1.1 tshiozak nmatch = seqmatch(string, n, sp);
673 1.1 tshiozak if (!nmatch)
674 1.1 tshiozak continue;
675 1.1 tshiozak
676 1.1 tshiozak /*
677 1.1 tshiozak * if we are in the middle of escape sequence,
678 1.1 tshiozak * we still need to wait for more characters to come
679 1.1 tshiozak */
680 1.1 tshiozak if (n < sp->len) {
681 1.1 tshiozak if (nmatch == n) {
682 1.1 tshiozak if (result)
683 1.1 tshiozak *result = string;
684 1.1 tshiozak return (_ISO2022INVALID);
685 1.1 tshiozak }
686 1.1 tshiozak } else {
687 1.1 tshiozak if (nmatch == sp->len) {
688 1.1 tshiozak /* this case should not happen */
689 1.1 tshiozak goto eat;
690 1.1 tshiozak }
691 1.1 tshiozak }
692 1.1 tshiozak }
693 1.1 tshiozak
694 1.1 tshiozak break;
695 1.1 tshiozak }
696 1.1 tshiozak
697 1.1 tshiozak eat:
698 1.1 tshiozak /* no letter to eat */
699 1.1 tshiozak if (n < 1) {
700 1.1 tshiozak if (result)
701 1.1 tshiozak *result = string;
702 1.1 tshiozak return (_ISO2022INVALID);
703 1.1 tshiozak }
704 1.1 tshiozak
705 1.1 tshiozak /* normal chars. always eat C0/C1 as is. */
706 1.1 tshiozak if (iscntl(*string & 0xff))
707 1.1 tshiozak cur = -1;
708 1.1 tshiozak else if (*string & 0x80) {
709 1.1 tshiozak cur = (psenc->singlegr == -1)
710 1.1 tshiozak ? psenc->gr : psenc->singlegr;
711 1.1 tshiozak } else {
712 1.1 tshiozak cur = (psenc->singlegl == -1)
713 1.1 tshiozak ? psenc->gl : psenc->singlegl;
714 1.1 tshiozak }
715 1.1 tshiozak
716 1.1 tshiozak if (cur == -1) {
717 1.1 tshiozak asis:
718 1.1 tshiozak wchar = *string++ & 0xff;
719 1.1 tshiozak if (result)
720 1.1 tshiozak *result = string;
721 1.1 tshiozak /* reset single shift state */
722 1.1 tshiozak psenc->singlegr = psenc->singlegl = -1;
723 1.1 tshiozak return wchar;
724 1.1 tshiozak }
725 1.1 tshiozak
726 1.1 tshiozak /* length error check */
727 1.1 tshiozak switch (psenc->g[cur].type) {
728 1.1 tshiozak case CS94MULTI:
729 1.1 tshiozak case CS96MULTI:
730 1.1 tshiozak if (!isthree(psenc->g[cur].final)) {
731 1.1 tshiozak if (2 <= n
732 1.1 tshiozak && (string[0] & 0x80) == (string[1] & 0x80))
733 1.1 tshiozak break;
734 1.1 tshiozak } else {
735 1.1 tshiozak if (3 <= n
736 1.1 tshiozak && (string[0] & 0x80) == (string[1] & 0x80)
737 1.1 tshiozak && (string[0] & 0x80) == (string[2] & 0x80))
738 1.1 tshiozak break;
739 1.1 tshiozak }
740 1.1 tshiozak
741 1.1 tshiozak /* we still need to wait for more characters to come */
742 1.1 tshiozak if (result)
743 1.1 tshiozak *result = string;
744 1.1 tshiozak return (_ISO2022INVALID);
745 1.1 tshiozak
746 1.1 tshiozak case CS94:
747 1.1 tshiozak case CS96:
748 1.1 tshiozak if (1 <= n)
749 1.1 tshiozak break;
750 1.1 tshiozak
751 1.1 tshiozak /* we still need to wait for more characters to come */
752 1.1 tshiozak if (result)
753 1.1 tshiozak *result = string;
754 1.1 tshiozak return (_ISO2022INVALID);
755 1.1 tshiozak }
756 1.1 tshiozak
757 1.1 tshiozak /* range check */
758 1.1 tshiozak switch (psenc->g[cur].type) {
759 1.1 tshiozak case CS94:
760 1.1 tshiozak if (!(is94(string[0] & 0x7f)))
761 1.1 tshiozak goto asis;
762 1.1 tshiozak case CS96:
763 1.1 tshiozak if (!(is96(string[0] & 0x7f)))
764 1.1 tshiozak goto asis;
765 1.1 tshiozak break;
766 1.1 tshiozak case CS94MULTI:
767 1.1 tshiozak if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f)))
768 1.1 tshiozak goto asis;
769 1.1 tshiozak break;
770 1.1 tshiozak case CS96MULTI:
771 1.1 tshiozak if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f)))
772 1.1 tshiozak goto asis;
773 1.1 tshiozak break;
774 1.1 tshiozak }
775 1.1 tshiozak
776 1.1 tshiozak /* extract the character. */
777 1.1 tshiozak switch (psenc->g[cur].type) {
778 1.1 tshiozak case CS94:
779 1.1 tshiozak /* special case for ASCII. */
780 1.1 tshiozak if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) {
781 1.1 tshiozak wchar = *string++;
782 1.1 tshiozak wchar &= 0x7f;
783 1.1 tshiozak break;
784 1.1 tshiozak }
785 1.1 tshiozak wchar = psenc->g[cur].final;
786 1.1 tshiozak wchar = (wchar << 8);
787 1.1 tshiozak wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
788 1.1 tshiozak wchar = (wchar << 8);
789 1.1 tshiozak wchar = (wchar << 8) | (*string++ & 0x7f);
790 1.1 tshiozak break;
791 1.1 tshiozak case CS96:
792 1.1 tshiozak /* special case for ISO-8859-1. */
793 1.1 tshiozak if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) {
794 1.1 tshiozak wchar = *string++;
795 1.1 tshiozak wchar &= 0x7f;
796 1.1 tshiozak wchar |= 0x80;
797 1.1 tshiozak break;
798 1.1 tshiozak }
799 1.1 tshiozak wchar = psenc->g[cur].final;
800 1.1 tshiozak wchar = (wchar << 8);
801 1.1 tshiozak wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
802 1.1 tshiozak wchar = (wchar << 8);
803 1.1 tshiozak wchar = (wchar << 8) | (*string++ & 0x7f);
804 1.1 tshiozak wchar |= 0x80;
805 1.1 tshiozak break;
806 1.1 tshiozak case CS94MULTI:
807 1.1 tshiozak case CS96MULTI:
808 1.1 tshiozak wchar = psenc->g[cur].final;
809 1.1 tshiozak wchar = (wchar << 8);
810 1.1 tshiozak if (isthree(psenc->g[cur].final))
811 1.1 tshiozak wchar |= (*string++ & 0x7f);
812 1.1 tshiozak wchar = (wchar << 8) | (*string++ & 0x7f);
813 1.1 tshiozak wchar = (wchar << 8) | (*string++ & 0x7f);
814 1.1 tshiozak if (psenc->g[cur].type == CS96MULTI)
815 1.1 tshiozak wchar |= 0x80;
816 1.1 tshiozak break;
817 1.1 tshiozak }
818 1.1 tshiozak
819 1.1 tshiozak if (result)
820 1.1 tshiozak *result = string;
821 1.1 tshiozak /* reset single shift state */
822 1.1 tshiozak psenc->singlegr = psenc->singlegl = -1;
823 1.1 tshiozak return wchar;
824 1.1 tshiozak }
825 1.1 tshiozak
826 1.1 tshiozak
827 1.1 tshiozak
828 1.1 tshiozak static int
829 1.1 tshiozak _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei,
830 1.1 tshiozak wchar_t * __restrict pwc,
831 1.1 tshiozak const char ** __restrict s,
832 1.1 tshiozak size_t n, _ISO2022State * __restrict psenc,
833 1.1 tshiozak size_t * __restrict nresult)
834 1.1 tshiozak {
835 1.1 tshiozak wchar_t wchar;
836 1.1 tshiozak const char *s0, *p, *result;
837 1.1 tshiozak int c;
838 1.1 tshiozak int chlenbak;
839 1.1 tshiozak
840 1.1 tshiozak _DIAGASSERT(nresult != 0);
841 1.1 tshiozak _DIAGASSERT(ei != NULL);
842 1.1 tshiozak _DIAGASSERT(psenc != NULL);
843 1.1 tshiozak _DIAGASSERT(s != NULL);
844 1.1 tshiozak
845 1.1 tshiozak s0 = *s;
846 1.1 tshiozak c = 0;
847 1.1 tshiozak chlenbak = psenc->chlen;
848 1.1 tshiozak
849 1.1 tshiozak /*
850 1.1 tshiozak * if we have something in buffer, use that.
851 1.1 tshiozak * otherwise, skip here
852 1.1 tshiozak */
853 1.1 tshiozak if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) {
854 1.1 tshiozak /* illgeal state */
855 1.1 tshiozak _citrus_ISO2022_init_state(ei, psenc);
856 1.1 tshiozak goto encoding_error;
857 1.1 tshiozak }
858 1.1 tshiozak if (psenc->chlen == 0)
859 1.1 tshiozak goto emptybuf;
860 1.1 tshiozak
861 1.1 tshiozak /* buffer is not empty */
862 1.1 tshiozak p = psenc->ch;
863 1.1 tshiozak while (psenc->chlen < sizeof(psenc->ch) && n >= 0) {
864 1.1 tshiozak if (n > 0) {
865 1.1 tshiozak psenc->ch[psenc->chlen++] = *s0++;
866 1.1 tshiozak n--;
867 1.1 tshiozak }
868 1.1 tshiozak
869 1.1 tshiozak wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch),
870 1.1 tshiozak &result, psenc);
871 1.1 tshiozak if (wchar != _ISO2022INVALID) {
872 1.1 tshiozak c += result - p;
873 1.1 tshiozak if (psenc->chlen > c)
874 1.1 tshiozak memmove(psenc->ch, result, psenc->chlen - c);
875 1.1 tshiozak if (psenc->chlen < c)
876 1.1 tshiozak psenc->chlen = 0;
877 1.1 tshiozak else
878 1.1 tshiozak psenc->chlen -= c;
879 1.1 tshiozak goto output;
880 1.1 tshiozak }
881 1.1 tshiozak
882 1.1 tshiozak c += result - p;
883 1.1 tshiozak p = result;
884 1.1 tshiozak
885 1.1 tshiozak if (n == 0)
886 1.1 tshiozak goto restart;
887 1.1 tshiozak }
888 1.1 tshiozak
889 1.1 tshiozak /* escape sequence too long? */
890 1.1 tshiozak goto encoding_error;
891 1.1 tshiozak
892 1.1 tshiozak emptybuf:
893 1.1 tshiozak wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc);
894 1.1 tshiozak if (wchar != _ISO2022INVALID) {
895 1.1 tshiozak c += result - s0;
896 1.1 tshiozak psenc->chlen = 0;
897 1.1 tshiozak s0 = result;
898 1.1 tshiozak goto output;
899 1.1 tshiozak }
900 1.1 tshiozak if (result > s0 && n > result - s0) {
901 1.1 tshiozak c += (result - s0);
902 1.1 tshiozak n -= (result - s0);
903 1.1 tshiozak s0 = result;
904 1.1 tshiozak goto emptybuf;
905 1.1 tshiozak }
906 1.1 tshiozak n += c;
907 1.1 tshiozak if (n < sizeof(psenc->ch)) {
908 1.1 tshiozak memcpy(psenc->ch, s0 - c, n);
909 1.1 tshiozak psenc->chlen = n;
910 1.1 tshiozak s0 = result;
911 1.1 tshiozak goto restart;
912 1.1 tshiozak }
913 1.1 tshiozak
914 1.1 tshiozak /* escape sequence too long? */
915 1.1 tshiozak
916 1.1 tshiozak encoding_error:
917 1.1 tshiozak psenc->chlen = 0;
918 1.1 tshiozak *nresult = (size_t)-1;
919 1.1 tshiozak return (EILSEQ);
920 1.1 tshiozak
921 1.1 tshiozak output:
922 1.1 tshiozak *s = s0;
923 1.1 tshiozak if (pwc)
924 1.1 tshiozak *pwc = wchar;
925 1.1 tshiozak
926 1.1 tshiozak if (!wchar)
927 1.1 tshiozak *nresult = 0;
928 1.1 tshiozak else
929 1.1 tshiozak *nresult = c - chlenbak;
930 1.1 tshiozak
931 1.1 tshiozak return (0);
932 1.1 tshiozak
933 1.1 tshiozak restart:
934 1.1 tshiozak *s = s0;
935 1.1 tshiozak *nresult = (size_t)-2;
936 1.1 tshiozak
937 1.1 tshiozak return (0);
938 1.1 tshiozak }
939 1.1 tshiozak
940 1.1 tshiozak static int
941 1.1 tshiozak recommendation(_ISO2022EncodingInfo * __restrict ei,
942 1.1 tshiozak _ISO2022Charset * __restrict cs)
943 1.1 tshiozak {
944 1.1 tshiozak int i, j;
945 1.1 tshiozak _ISO2022Charset *recommend;
946 1.1 tshiozak
947 1.1 tshiozak _DIAGASSERT(ei != NULL);
948 1.1 tshiozak _DIAGASSERT(cs != NULL);
949 1.1 tshiozak
950 1.1 tshiozak /* first, try a exact match. */
951 1.1 tshiozak for (i = 0; i < 4; i++) {
952 1.1 tshiozak recommend = ei->recommend[i];
953 1.1 tshiozak for (j = 0; j < ei->recommendsize[i]; j++) {
954 1.1 tshiozak if (cs->type != recommend[j].type)
955 1.1 tshiozak continue;
956 1.1 tshiozak if (cs->final != recommend[j].final)
957 1.1 tshiozak continue;
958 1.1 tshiozak if (cs->interm != recommend[j].interm)
959 1.1 tshiozak continue;
960 1.1 tshiozak
961 1.1 tshiozak return i;
962 1.1 tshiozak }
963 1.1 tshiozak }
964 1.1 tshiozak
965 1.1 tshiozak /* then, try a wildcard match over final char. */
966 1.1 tshiozak for (i = 0; i < 4; i++) {
967 1.1 tshiozak recommend = ei->recommend[i];
968 1.1 tshiozak for (j = 0; j < ei->recommendsize[i]; j++) {
969 1.1 tshiozak if (cs->type != recommend[j].type)
970 1.1 tshiozak continue;
971 1.1 tshiozak if (cs->final && (cs->final != recommend[j].final))
972 1.1 tshiozak continue;
973 1.1 tshiozak if (cs->interm && (cs->interm != recommend[j].interm))
974 1.1 tshiozak continue;
975 1.1 tshiozak
976 1.1 tshiozak return i;
977 1.1 tshiozak }
978 1.1 tshiozak }
979 1.1 tshiozak
980 1.1 tshiozak /* there's no recommendation. make a guess. */
981 1.1 tshiozak if (ei->maxcharset == 0) {
982 1.1 tshiozak return 0;
983 1.1 tshiozak } else {
984 1.1 tshiozak switch (cs->type) {
985 1.1 tshiozak case CS94:
986 1.1 tshiozak case CS94MULTI:
987 1.1 tshiozak return 0;
988 1.1 tshiozak case CS96:
989 1.1 tshiozak case CS96MULTI:
990 1.1 tshiozak return 1;
991 1.1 tshiozak }
992 1.1 tshiozak }
993 1.1 tshiozak return 0;
994 1.1 tshiozak }
995 1.1 tshiozak
996 1.1 tshiozak static int
997 1.1 tshiozak _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t c,
998 1.1 tshiozak char * __restrict string, size_t n,
999 1.1 tshiozak char ** __restrict result,
1000 1.1 tshiozak _ISO2022State * __restrict psenc)
1001 1.1 tshiozak {
1002 1.1 tshiozak int i = 0, len;
1003 1.1 tshiozak _ISO2022Charset cs;
1004 1.1 tshiozak char *p;
1005 1.1 tshiozak char tmp[MB_LEN_MAX];
1006 1.1 tshiozak int target;
1007 1.1 tshiozak u_char mask;
1008 1.1 tshiozak int bit8;
1009 1.1 tshiozak
1010 1.1 tshiozak _DIAGASSERT(ei != NULL);
1011 1.1 tshiozak _DIAGASSERT(string != NULL);
1012 1.1 tshiozak /* result may be NULL */
1013 1.1 tshiozak /* state appears to be unused */
1014 1.1 tshiozak
1015 1.1 tshiozak if (iscntl(c & 0xff)) {
1016 1.1 tshiozak /* go back to ASCII on control chars */
1017 1.1 tshiozak cs.type = CS94;
1018 1.1 tshiozak cs.final = 'B';
1019 1.1 tshiozak cs.interm = '\0';
1020 1.1 tshiozak } else if (!(c & ~0xff)) {
1021 1.1 tshiozak if (c & 0x80) {
1022 1.1 tshiozak /* special treatment for ISO-8859-1 */
1023 1.1 tshiozak cs.type = CS96;
1024 1.1 tshiozak cs.final = 'A';
1025 1.1 tshiozak cs.interm = '\0';
1026 1.1 tshiozak } else {
1027 1.1 tshiozak /* special treatment for ASCII */
1028 1.1 tshiozak cs.type = CS94;
1029 1.1 tshiozak cs.final = 'B';
1030 1.1 tshiozak cs.interm = '\0';
1031 1.1 tshiozak }
1032 1.1 tshiozak } else {
1033 1.1 tshiozak cs.final = (c >> 24) & 0x7f;
1034 1.1 tshiozak if ((c >> 16) & 0x80)
1035 1.1 tshiozak cs.interm = (c >> 16) & 0x7f;
1036 1.1 tshiozak else
1037 1.1 tshiozak cs.interm = '\0';
1038 1.1 tshiozak if (c & 0x80)
1039 1.1 tshiozak cs.type = (c & 0x00007f00) ? CS96MULTI : CS96;
1040 1.1 tshiozak else
1041 1.1 tshiozak cs.type = (c & 0x00007f00) ? CS94MULTI : CS94;
1042 1.1 tshiozak }
1043 1.1 tshiozak target = recommendation(ei, &cs);
1044 1.1 tshiozak p = tmp;
1045 1.1 tshiozak bit8 = ei->flags & F_8BIT;
1046 1.1 tshiozak
1047 1.1 tshiozak /* designate the charset onto the target plane(G0/1/2/3). */
1048 1.1 tshiozak if (psenc->g[target].type == cs.type
1049 1.1 tshiozak && psenc->g[target].final == cs.final
1050 1.1 tshiozak && psenc->g[target].interm == cs.interm)
1051 1.1 tshiozak goto planeok;
1052 1.1 tshiozak
1053 1.1 tshiozak *p++ = '\033';
1054 1.1 tshiozak if (cs.type == CS94MULTI || cs.type == CS96MULTI)
1055 1.1 tshiozak *p++ = '$';
1056 1.1 tshiozak if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final)
1057 1.1 tshiozak && !cs.interm && !(ei->flags & F_NOOLD))
1058 1.1 tshiozak ;
1059 1.1 tshiozak else if (cs.type == CS94 || cs.type == CS94MULTI)
1060 1.1 tshiozak *p++ = "()*+"[target];
1061 1.1 tshiozak else
1062 1.1 tshiozak *p++ = ",-./"[target];
1063 1.1 tshiozak if (cs.interm)
1064 1.1 tshiozak *p++ = cs.interm;
1065 1.1 tshiozak *p++ = cs.final;
1066 1.1 tshiozak
1067 1.1 tshiozak psenc->g[target].type = cs.type;
1068 1.1 tshiozak psenc->g[target].final = cs.final;
1069 1.1 tshiozak psenc->g[target].interm = cs.interm;
1070 1.1 tshiozak
1071 1.1 tshiozak planeok:
1072 1.1 tshiozak
1073 1.1 tshiozak /* invoke the plane onto GL or GR. */
1074 1.1 tshiozak if (psenc->gl == target)
1075 1.1 tshiozak goto sideok;
1076 1.1 tshiozak if (bit8 && psenc->gr == target)
1077 1.1 tshiozak goto sideok;
1078 1.1 tshiozak
1079 1.1 tshiozak if (target == 0 && (ei->flags & F_LS0)) {
1080 1.1 tshiozak *p++ = '\017';
1081 1.1 tshiozak psenc->gl = 0;
1082 1.1 tshiozak } else if (target == 1 && (ei->flags & F_LS1)) {
1083 1.1 tshiozak *p++ = '\016';
1084 1.1 tshiozak psenc->gl = 1;
1085 1.1 tshiozak } else if (target == 2 && (ei->flags & F_LS2)) {
1086 1.1 tshiozak *p++ = '\033';
1087 1.1 tshiozak *p++ = 'n';
1088 1.1 tshiozak psenc->gl = 2;
1089 1.1 tshiozak } else if (target == 3 && (ei->flags & F_LS3)) {
1090 1.1 tshiozak *p++ = '\033';
1091 1.1 tshiozak *p++ = 'o';
1092 1.1 tshiozak psenc->gl = 3;
1093 1.1 tshiozak } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) {
1094 1.1 tshiozak *p++ = '\033';
1095 1.1 tshiozak *p++ = '~';
1096 1.1 tshiozak psenc->gr = 1;
1097 1.1 tshiozak } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) {
1098 1.1 tshiozak *p++ = '\033';
1099 1.1 tshiozak /*{*/
1100 1.1 tshiozak *p++ = '}';
1101 1.1 tshiozak psenc->gr = 2;
1102 1.1 tshiozak } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) {
1103 1.1 tshiozak *p++ = '\033';
1104 1.1 tshiozak *p++ = '|';
1105 1.1 tshiozak psenc->gr = 3;
1106 1.1 tshiozak } else if (target == 2 && (ei->flags & F_SS2)) {
1107 1.1 tshiozak *p++ = '\033';
1108 1.1 tshiozak *p++ = 'N';
1109 1.1 tshiozak psenc->singlegl = 2;
1110 1.1 tshiozak } else if (target == 3 && (ei->flags & F_SS3)) {
1111 1.1 tshiozak *p++ = '\033';
1112 1.1 tshiozak *p++ = 'O';
1113 1.1 tshiozak psenc->singlegl = 3;
1114 1.1 tshiozak } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) {
1115 1.1 tshiozak *p++ = '\216';
1116 1.1 tshiozak *p++ = 'N';
1117 1.1 tshiozak psenc->singlegl = psenc->singlegr = 2;
1118 1.1 tshiozak } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) {
1119 1.1 tshiozak *p++ = '\217';
1120 1.1 tshiozak *p++ = 'O';
1121 1.1 tshiozak psenc->singlegl = psenc->singlegr = 3;
1122 1.1 tshiozak } else
1123 1.1 tshiozak abort();
1124 1.1 tshiozak
1125 1.1 tshiozak sideok:
1126 1.1 tshiozak if (psenc->singlegl == target)
1127 1.1 tshiozak mask = 0x00;
1128 1.1 tshiozak else if (psenc->singlegr == target)
1129 1.1 tshiozak mask = 0x80;
1130 1.1 tshiozak else if (psenc->gl == target)
1131 1.1 tshiozak mask = 0x00;
1132 1.1 tshiozak else if ((ei->flags & F_8BIT) && psenc->gr == target)
1133 1.1 tshiozak mask = 0x80;
1134 1.1 tshiozak else
1135 1.1 tshiozak abort();
1136 1.1 tshiozak
1137 1.1 tshiozak switch (cs.type) {
1138 1.1 tshiozak case CS94:
1139 1.1 tshiozak case CS96:
1140 1.1 tshiozak i = 1;
1141 1.1 tshiozak break;
1142 1.1 tshiozak case CS94MULTI:
1143 1.1 tshiozak case CS96MULTI:
1144 1.1 tshiozak i = isthree(cs.final) ? 3 : 2;
1145 1.1 tshiozak break;
1146 1.1 tshiozak }
1147 1.1 tshiozak while (i-- > 0)
1148 1.1 tshiozak *p++ = ((c >> (i << 3)) & 0x7f) | mask;
1149 1.1 tshiozak
1150 1.1 tshiozak /* reset single shift state */
1151 1.1 tshiozak psenc->singlegl = psenc->singlegr = -1;
1152 1.1 tshiozak
1153 1.1 tshiozak len = p - tmp;
1154 1.1 tshiozak if (n < len) {
1155 1.1 tshiozak if (result)
1156 1.1 tshiozak *result = (char *)0;
1157 1.1 tshiozak } else {
1158 1.1 tshiozak if (result)
1159 1.1 tshiozak *result = string + len;
1160 1.1 tshiozak memcpy(string, tmp, len);
1161 1.1 tshiozak }
1162 1.1 tshiozak return len;
1163 1.1 tshiozak }
1164 1.1 tshiozak
1165 1.1 tshiozak static int
1166 1.1 tshiozak _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei,
1167 1.1 tshiozak char * __restrict s, size_t n, wchar_t wc,
1168 1.1 tshiozak _ISO2022State * __restrict psenc,
1169 1.1 tshiozak size_t * __restrict nresult)
1170 1.1 tshiozak {
1171 1.1 tshiozak char buf[MB_LEN_MAX];
1172 1.1 tshiozak char *result;
1173 1.1 tshiozak int len;
1174 1.1 tshiozak
1175 1.1 tshiozak _DIAGASSERT(ei != NULL);
1176 1.1 tshiozak _DIAGASSERT(nresult != 0);
1177 1.1 tshiozak _DIAGASSERT(s != NULL);
1178 1.1 tshiozak
1179 1.1 tshiozak /* XXX state will be modified after this operation... */
1180 1.1 tshiozak len = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc);
1181 1.1 tshiozak if (sizeof(buf) < len || n < len) {
1182 1.1 tshiozak /* XXX should recover state? */
1183 1.1 tshiozak goto ilseq;
1184 1.1 tshiozak }
1185 1.1 tshiozak
1186 1.1 tshiozak memcpy(s, buf, len);
1187 1.1 tshiozak *nresult = (size_t)len;
1188 1.1 tshiozak return (0);
1189 1.1 tshiozak
1190 1.1 tshiozak ilseq:
1191 1.1 tshiozak /* bound check failure */
1192 1.1 tshiozak *nresult = (size_t)-1;
1193 1.1 tshiozak return (EILSEQ);
1194 1.1 tshiozak }
1195 1.1 tshiozak
1196 1.1 tshiozak /* ----------------------------------------------------------------------
1197 1.1 tshiozak * public interface for ctype
1198 1.1 tshiozak */
1199 1.1 tshiozak
1200 1.1 tshiozak _CITRUS_CTYPE_DECLS(ISO2022);
1201 1.1 tshiozak _CITRUS_CTYPE_DEF_OPS(ISO2022);
1202 1.1 tshiozak
1203 1.1 tshiozak #include "citrus_ctype_template.h"
1204