citrus_euc.c revision 1.9 1 /* $NetBSD: citrus_euc.c,v 1.9 2005/10/18 06:42:12 tshiozak Exp $ */
2
3 /*-
4 * Copyright (c)2002 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*-
30 * Copyright (c) 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * Paul Borman at Krystal Technologies.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61 #include <sys/cdefs.h>
62 #if defined(LIBC_SCCS) && !defined(lint)
63 __RCSID("$NetBSD: citrus_euc.c,v 1.9 2005/10/18 06:42:12 tshiozak Exp $");
64 #endif /* LIBC_SCCS and not lint */
65
66 #include <assert.h>
67 #include <errno.h>
68 #include <string.h>
69 #include <stdio.h>
70 #include <stdlib.h>
71 #include <stddef.h>
72 #include <locale.h>
73 #include <wchar.h>
74 #include <sys/types.h>
75 #include <limits.h>
76
77 #include "citrus_namespace.h"
78 #include "citrus_types.h"
79 #include "citrus_module.h"
80 #include "citrus_ctype.h"
81 #include "citrus_stdenc.h"
82 #include "citrus_euc.h"
83
84
85 /* ----------------------------------------------------------------------
86 * private stuffs used by templates
87 */
88
89 typedef struct {
90 char ch[3];
91 int chlen;
92 } _EUCState;
93
94 typedef struct {
95 unsigned count[4];
96 wchar_t bits[4];
97 wchar_t mask;
98 unsigned mb_cur_max;
99 } _EUCEncodingInfo;
100
101 typedef struct {
102 _EUCEncodingInfo ei;
103 struct {
104 /* for future multi-locale facility */
105 _EUCState s_mblen;
106 _EUCState s_mbrlen;
107 _EUCState s_mbrtowc;
108 _EUCState s_mbtowc;
109 _EUCState s_mbsrtowcs;
110 _EUCState s_wcrtomb;
111 _EUCState s_wcsrtombs;
112 _EUCState s_wctomb;
113 } states;
114 } _EUCCTypeInfo;
115
116 #define _SS2 0x008e
117 #define _SS3 0x008f
118
119 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
120 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
121
122 #define _FUNCNAME(m) _citrus_EUC_##m
123 #define _ENCODING_INFO _EUCEncodingInfo
124 #define _CTYPE_INFO _EUCCTypeInfo
125 #define _ENCODING_STATE _EUCState
126 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max
127 #define _ENCODING_IS_STATE_DEPENDENT 0
128 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0
129
130
131 static __inline int
132 _citrus_EUC_cs(unsigned int c)
133 {
134 c &= 0xff;
135
136 return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0);
137 }
138
139 static __inline int
140 _citrus_EUC_parse_variable(_EUCEncodingInfo *ei,
141 const void *var, size_t lenvar)
142 {
143 const char *v, *e;
144 int x;
145
146 /* parse variable string */
147 if (!var)
148 return (EFTYPE);
149
150 v = (const char *) var;
151
152 while (*v == ' ' || *v == '\t')
153 ++v;
154
155 ei->mb_cur_max = 1;
156 for (x = 0; x < 4; ++x) {
157 ei->count[x] = (int) strtol(v, (char **)&e, 0);
158 if (v == e || !(v = e) || ei->count[x]<1 || ei->count[x]>4) {
159 return (EFTYPE);
160 }
161 if (ei->mb_cur_max < ei->count[x])
162 ei->mb_cur_max = ei->count[x];
163 while (*v == ' ' || *v == '\t')
164 ++v;
165 ei->bits[x] = (int) strtol(v, (char **)&e, 0);
166 if (v == e || !(v = e)) {
167 return (EFTYPE);
168 }
169 while (*v == ' ' || *v == '\t')
170 ++v;
171 }
172 ei->mask = (int)strtol(v, (char **)&e, 0);
173 if (v == e || !(v = e)) {
174 return (EFTYPE);
175 }
176
177 return 0;
178 }
179
180
181 static __inline void
182 /*ARGSUSED*/
183 _citrus_EUC_init_state(_EUCEncodingInfo *ei, _EUCState *s)
184 {
185 memset(s, 0, sizeof(*s));
186 }
187
188 static __inline void
189 /*ARGSUSED*/
190 _citrus_EUC_pack_state(_EUCEncodingInfo *ei, void *pspriv, const _EUCState *s)
191 {
192 memcpy(pspriv, (const void *)s, sizeof(*s));
193 }
194
195 static __inline void
196 /*ARGSUSED*/
197 _citrus_EUC_unpack_state(_EUCEncodingInfo *ei, _EUCState *s,
198 const void *pspriv)
199 {
200 memcpy((void *)s, pspriv, sizeof(*s));
201 }
202
203 static int
204 _citrus_EUC_mbrtowc_priv(_EUCEncodingInfo *ei, wchar_t *pwc, const char **s,
205 size_t n, _EUCState *psenc, size_t *nresult)
206 {
207 wchar_t wchar;
208 int c, cs, len;
209 int chlenbak;
210 const char *s0, *s1 = NULL;
211
212 _DIAGASSERT(nresult != 0);
213 _DIAGASSERT(ei != NULL);
214 _DIAGASSERT(psenc != NULL);
215 _DIAGASSERT(s != NULL);
216
217 s0 = *s;
218
219 if (s0 == NULL) {
220 _citrus_EUC_init_state(ei, psenc);
221 *nresult = 0; /* state independent */
222 return (0);
223 }
224
225 chlenbak = psenc->chlen;
226
227 /* make sure we have the first byte in the buffer */
228 switch (psenc->chlen) {
229 case 0:
230 if (n < 1)
231 goto restart;
232 psenc->ch[0] = *s0++;
233 psenc->chlen = 1;
234 n--;
235 break;
236 case 1:
237 case 2:
238 break;
239 default:
240 /* illgeal state */
241 goto encoding_error;
242 }
243
244 c = ei->count[cs = _citrus_EUC_cs(psenc->ch[0] & 0xff)];
245 if (c == 0)
246 goto encoding_error;
247 while (psenc->chlen < c) {
248 if (n < 1)
249 goto restart;
250 psenc->ch[psenc->chlen] = *s0++;
251 psenc->chlen++;
252 n--;
253 }
254 *s = s0;
255
256 switch (cs) {
257 case 3:
258 case 2:
259 /* skip SS2/SS3 */
260 len = c - 1;
261 s1 = &psenc->ch[1];
262 break;
263 case 1:
264 case 0:
265 len = c;
266 s1 = &psenc->ch[0];
267 break;
268 }
269 wchar = 0;
270 while (len-- > 0)
271 wchar = (wchar << 8) | (*s1++ & 0xff);
272 wchar = (wchar & ~ei->mask) | ei->bits[cs];
273
274 psenc->chlen = 0;
275 if (pwc)
276 *pwc = wchar;
277
278 if (!wchar) {
279 *nresult = 0;
280 } else {
281 *nresult = (size_t)(c - chlenbak);
282 }
283
284 return 0;
285
286 encoding_error:
287 psenc->chlen = 0;
288 *nresult = (size_t)-1;
289 return (EILSEQ);
290
291 restart:
292 *nresult = (size_t)-2;
293 *s = s0;
294 return (0);
295 }
296
297 static int
298 _citrus_EUC_wcrtomb_priv(_EUCEncodingInfo *ei, char *s, size_t n, wchar_t wc,
299 _EUCState *psenc, size_t *nresult)
300 {
301 wchar_t m, nm;
302 int cs, i, ret;
303
304 _DIAGASSERT(ei != NULL);
305 _DIAGASSERT(nresult != 0);
306 _DIAGASSERT(s != NULL);
307
308 m = wc & ei->mask;
309 nm = wc & ~m;
310
311 for (cs = 0;
312 cs < sizeof(ei->count)/sizeof(ei->count[0]);
313 cs++) {
314 if (m == ei->bits[cs])
315 break;
316 }
317 /* fallback case - not sure if it is necessary */
318 if (cs == sizeof(ei->count)/sizeof(ei->count[0]))
319 cs = 1;
320
321 i = ei->count[cs];
322 if (n < i) {
323 ret = E2BIG;
324 goto err;
325 }
326 m = (cs) ? 0x80 : 0x00;
327 switch (cs) {
328 case 2:
329 *s++ = _SS2;
330 i--;
331 break;
332 case 3:
333 *s++ = _SS3;
334 i--;
335 break;
336 }
337
338 while (i-- > 0)
339 *s++ = ((nm >> (i << 3)) & 0xff) | m;
340
341 *nresult = (size_t)ei->count[cs];
342 return 0;
343
344 err:
345 *nresult = (size_t)-1;
346 return ret;
347 }
348
349 static __inline int
350 /*ARGSUSED*/
351 _citrus_EUC_stdenc_wctocs(_EUCEncodingInfo * __restrict ei,
352 _csid_t * __restrict csid,
353 _index_t * __restrict idx, wchar_t wc)
354 {
355 wchar_t m, nm;
356
357 _DIAGASSERT(ei != NULL && csid != NULL && idx != NULL);
358
359 m = wc & ei->mask;
360 nm = wc & ~m;
361
362 *csid = (_citrus_csid_t)m;
363 *idx = (_citrus_index_t)nm;
364
365 return (0);
366 }
367
368 static __inline int
369 /*ARGSUSED*/
370 _citrus_EUC_stdenc_cstowc(_EUCEncodingInfo * __restrict ei,
371 wchar_t * __restrict wc,
372 _csid_t csid, _index_t idx)
373 {
374
375 _DIAGASSERT(ei != NULL && wc != NULL);
376
377 if ((csid & ~ei->mask) != 0 || (idx & ei->mask) != 0)
378 return (EINVAL);
379
380 *wc = (wchar_t)csid | (wchar_t)idx;
381
382 return (0);
383 }
384
385 static int
386 /*ARGSUSED*/
387 _citrus_EUC_encoding_module_init(_EUCEncodingInfo * __restrict ei,
388 const void * __restrict var, size_t lenvar)
389 {
390
391 _DIAGASSERT(ei != NULL);
392
393 return (_citrus_EUC_parse_variable(ei, var, lenvar));
394 }
395
396 static void
397 /*ARGSUSED*/
398 _citrus_EUC_encoding_module_uninit(_EUCEncodingInfo * __restrict ei)
399 {
400 }
401
402 /* ----------------------------------------------------------------------
403 * public interface for ctype
404 */
405
406 _CITRUS_CTYPE_DECLS(EUC);
407 _CITRUS_CTYPE_DEF_OPS(EUC);
408
409 #include "citrus_ctype_template.h"
410
411 /* ----------------------------------------------------------------------
412 * public interface for stdenc
413 */
414
415 _CITRUS_STDENC_DECLS(EUC);
416 _CITRUS_STDENC_DEF_OPS(EUC);
417
418 #include "citrus_stdenc_template.h"
419