citrus_dechanyu.c revision 1.2 1 /* $NetBSD: citrus_dechanyu.c,v 1.2 2007/04/24 15:42:08 tnozaki Exp $ */
2
3 /*-
4 * Copyright (c)2007 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28 #include <sys/cdefs.h>
29 #if defined(LIBC_SCCS) && !defined(lint)
30 __RCSID("$NetBSD: citrus_dechanyu.c,v 1.2 2007/04/24 15:42:08 tnozaki Exp $");
31 #endif /* LIBC_SCCS and not lint */
32
33 #include <sys/types.h>
34 #include <assert.h>
35 #include <errno.h>
36 #include <string.h>
37 #include <stdint.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <stddef.h>
41 #include <locale.h>
42 #include <wchar.h>
43 #include <limits.h>
44
45 #include "citrus_namespace.h"
46 #include "citrus_types.h"
47 #include "citrus_bcs.h"
48 #include "citrus_module.h"
49 #include "citrus_ctype.h"
50 #include "citrus_stdenc.h"
51 #include "citrus_dechanyu.h"
52
53 /* ----------------------------------------------------------------------
54 * private stuffs used by templates
55 */
56
57 typedef struct {
58 int chlen;
59 char ch[4];
60 } _DECHanyuState;
61
62 typedef struct {
63 int dummy;
64 } _DECHanyuEncodingInfo;
65
66 typedef struct {
67 _DECHanyuEncodingInfo ei;
68 struct {
69 /* for future multi-locale facility */
70 _DECHanyuState s_mblen;
71 _DECHanyuState s_mbrlen;
72 _DECHanyuState s_mbrtowc;
73 _DECHanyuState s_mbtowc;
74 _DECHanyuState s_mbsrtowcs;
75 _DECHanyuState s_wcrtomb;
76 _DECHanyuState s_wcsrtombs;
77 _DECHanyuState s_wctomb;
78 } states;
79 } _DECHanyuCTypeInfo;
80
81 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
82 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.__CONCAT(s_,_func_)
83
84 #define _FUNCNAME(m) __CONCAT(_citrus_DECHanyu_,m)
85 #define _ENCODING_INFO _DECHanyuEncodingInfo
86 #define _CTYPE_INFO _DECHanyuCTypeInfo
87 #define _ENCODING_STATE _DECHanyuState
88 #define _ENCODING_MB_CUR_MAX(_ei_) 4
89 #define _ENCODING_IS_STATE_DEPENDENT 0
90 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0
91
92 static __inline void
93 /*ARGSUSED*/
94 _citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei,
95 _DECHanyuState * __restrict psenc)
96 {
97 /* ei may be null */
98 _DIAGASSERT(psenc != NULL);
99
100 psenc->chlen = 0;
101 }
102
103 static __inline void
104 /*ARGSUSED*/
105 _citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei,
106 void * __restrict pspriv,
107 const _DECHanyuState * __restrict psenc)
108 {
109 /* ei may be null */
110 _DIAGASSERT(pspriv != NULL);
111 _DIAGASSERT(psenc != NULL);
112
113 memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
114 }
115
116 static __inline void
117 /*ARGSUSED*/
118 _citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei,
119 _DECHanyuState * __restrict psenc,
120 const void * __restrict pspriv)
121 {
122 /* ei may be null */
123 _DIAGASSERT(psenc != NULL);
124 _DIAGASSERT(pspriv != NULL);
125
126 memcpy((void *)psenc, pspriv, sizeof(*psenc));
127 }
128
129 static void
130 /*ARGSUSED*/
131 _citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei)
132 {
133 /* ei may be null */
134 }
135
136 static int
137 /*ARGSUSED*/
138 _citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei,
139 const void * __restrict var, size_t lenvar)
140 {
141 /* ei may be null */
142 return 0;
143 }
144
145 static __inline int
146 is_singlebyte(int c)
147 {
148 return c <= 0x7F;
149 }
150
151 static __inline int
152 is_leadbyte(int c)
153 {
154 return c >= 0xA1 && c <= 0xFE;
155 }
156
157 static __inline int
158 is_trailbyte(int c)
159 {
160 c &= ~0x80;
161 return c >= 0x21 && c <= 0x7E;
162 }
163
164 static __inline int
165 is_hanyu1(int c)
166 {
167 return c == 0xC2;
168 }
169
170 static __inline int
171 is_hanyu2(int c)
172 {
173 return c == 0xCB;
174 }
175
176 #define HANYUBIT 0xC2CB0000
177
178 static __inline int
179 is_94charset(int c)
180 {
181 return c >= 0x21 && c <= 0x7E;
182 }
183
184 static int
185 /*ARGSUSED*/
186 _citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,
187 wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
188 _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
189 {
190 const char *s0;
191 int ch, i;
192 wchar_t wc;
193
194 /* ei may be unused */
195 _DIAGASSERT(s != NULL);
196 _DIAGASSERT(psenc != NULL);
197 _DIAGASSERT(nresult != NULL);
198
199 if (*s == NULL) {
200 _citrus_DECHanyu_init_state(ei, psenc);
201 *nresult = _ENCODING_IS_STATE_DEPENDENT;
202 return 0;
203 }
204 s0 = *s;
205
206 wc = (wchar_t)0;
207 switch (psenc->chlen) {
208 case 0:
209 if (n-- < 1)
210 goto restart;
211 ch = *s0++ & 0xFF;
212 if (is_singlebyte(ch) != 0) {
213 if (pwc != NULL)
214 *pwc = (wchar_t)ch;
215 *nresult = (size_t)((ch == 0) ? 0 : 1);
216 *s = s0;
217 return 0;
218 }
219 if (is_leadbyte(ch) == 0)
220 goto ilseq;
221 psenc->ch[psenc->chlen++] = ch;
222 break;
223 case 1:
224 ch = psenc->ch[0] & 0xFF;
225 if (is_leadbyte(ch) == 0)
226 return EINVAL;
227 break;
228 case 2: case 3:
229 ch = psenc->ch[0] & 0xFF;
230 if (is_hanyu1(ch) != 0) {
231 ch = psenc->ch[1] & 0xFF;
232 if (is_hanyu2(ch) != 0) {
233 wc |= (wchar_t)HANYUBIT;
234 break;
235 }
236 }
237 /*FALLTHROUGH*/
238 default:
239 return EINVAL;
240 }
241
242 switch (psenc->chlen) {
243 case 1:
244 if (is_hanyu1(ch) != 0) {
245 if (n-- < 1)
246 goto restart;
247 ch = *s0++ & 0xFF;
248 if (is_hanyu2(ch) == 0)
249 goto ilseq;
250 psenc->ch[psenc->chlen++] = ch;
251 wc |= (wchar_t)HANYUBIT;
252 if (n-- < 1)
253 goto restart;
254 ch = *s0++ & 0xFF;
255 if (is_leadbyte(ch) == 0)
256 goto ilseq;
257 psenc->ch[psenc->chlen++] = ch;
258 }
259 break;
260 case 2:
261 if (n-- < 1)
262 goto restart;
263 ch = *s0++ & 0xFF;
264 if (is_leadbyte(ch) == 0)
265 goto ilseq;
266 psenc->ch[psenc->chlen++] = ch;
267 break;
268 case 3:
269 ch = psenc->ch[2] & 0xFF;
270 if (is_leadbyte(ch) == 0)
271 return EINVAL;
272 }
273 if (n-- < 1)
274 goto restart;
275 wc |= (wchar_t)(ch << 8);
276 ch = *s0++ & 0xFF;
277 if (is_trailbyte(ch) == 0)
278 goto ilseq;
279 wc |= (wchar_t)ch;
280 if (pwc != NULL)
281 *pwc = wc;
282 *nresult = (size_t)(s0 - *s);
283 *s = s0;
284 psenc->chlen = 0;
285
286 return 0;
287
288 restart:
289 *nresult = (size_t)-2;
290 *s = s0;
291 return 0;
292
293 ilseq:
294 *nresult = (size_t)-1;
295 return EILSEQ;
296 }
297
298 static int
299 /*ARGSUSED*/
300 _citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei,
301 char * __restrict s, size_t n, wchar_t wc,
302 _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
303 {
304 int ch;
305
306 /* ei may be unused */
307 _DIAGASSERT(s != NULL);
308 _DIAGASSERT(psenc != NULL);
309 _DIAGASSERT(nresult != NULL);
310
311 if (psenc->chlen != 0)
312 return EINVAL;
313
314 /* XXX: assume wchar_t as int */
315 if ((uint32_t)wc <= 0x7F) {
316 ch = wc & 0xFF;
317 } else {
318 if ((uint32_t)wc > 0xFFFF) {
319 if ((wc & ~0xFFFF) != HANYUBIT)
320 goto ilseq;
321 psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF;
322 psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF;
323 wc &= 0xFFFF;
324 }
325 ch = (wc >> 8) & 0xFF;
326 if (!is_leadbyte(ch))
327 goto ilseq;
328 psenc->ch[psenc->chlen++] = ch;
329 ch = wc & 0xFF;
330 if (is_trailbyte(ch) == 0)
331 goto ilseq;
332 }
333 psenc->ch[psenc->chlen++] = ch;
334 if (n < psenc->chlen) {
335 *nresult = (size_t)-1;
336 return E2BIG;
337 }
338 memcpy(s, psenc->ch, psenc->chlen);
339 *nresult = psenc->chlen;
340 psenc->chlen = 0;
341
342 return 0;
343
344 ilseq:
345 *nresult = (size_t)-1;
346 return EILSEQ;
347 }
348
349 static __inline int
350 /*ARGSUSED*/
351 _citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei,
352 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
353 {
354 int plane;
355 wchar_t mask;
356
357 /* ei may be unused */
358 _DIAGASSERT(csid != NULL);
359 _DIAGASSERT(idx != NULL);
360
361 plane = 0;
362 mask = 0x7F;
363 /* XXX: assume wchar_t as int */
364 if ((uint32_t)wc > 0x7F) {
365 if ((uint32_t)wc > 0xFFFF) {
366 if ((wc & ~0xFFFF) != HANYUBIT)
367 return EILSEQ;
368 plane += 2;
369 }
370 if (is_leadbyte((wc >> 8) & 0xFF) == 0 ||
371 is_trailbyte(wc & 0xFF) == 0)
372 return EILSEQ;
373 plane += (wc & 0x80) ? 1 : 2;
374 mask |= 0x7F00;
375 }
376 *csid = plane;
377 *idx = (_index_t)(wc & mask);
378
379 return 0;
380 }
381
382 static __inline int
383 /*ARGSUSED*/
384 _citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei,
385 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
386 {
387 /* ei may be unused */
388 _DIAGASSERT(wc != NULL);
389
390 if (csid == 0) {
391 if (idx > 0x7F)
392 return EILSEQ;
393 } else if (csid <= 4) {
394 if (is_94charset(idx >> 8) == 0)
395 return EILSEQ;
396 if (is_94charset(idx & 0xFF) == 0)
397 return EILSEQ;
398 if (csid % 2)
399 idx |= 0x80;
400 idx |= 0x8000;
401 if (csid > 2)
402 idx |= HANYUBIT;
403 } else
404 return EILSEQ;
405 *wc = (wchar_t)idx;
406 return 0;
407 }
408
409 static __inline int
410 /*ARGSUSED*/
411 _citrus_DECHanyu_stdenc_get_state_desc_generic(
412 _DECHanyuEncodingInfo * __restrict ei,
413 _DECHanyuState * __restrict psenc, int * __restrict rstate)
414 {
415 /* ei may be unused */
416 _DIAGASSERT(psenc != NULL);
417 _DIAGASSERT(rstate != NULL);
418
419 *rstate = (psenc->chlen == 0)
420 ? _STDENC_SDGEN_INITIAL
421 : _STDENC_SDGEN_INCOMPLETE_CHAR;
422 return 0;
423 }
424
425 /* ----------------------------------------------------------------------
426 * public interface for ctype
427 */
428
429 _CITRUS_CTYPE_DECLS(DECHanyu);
430 _CITRUS_CTYPE_DEF_OPS(DECHanyu);
431
432 #include "citrus_ctype_template.h"
433
434
435 /* ----------------------------------------------------------------------
436 * public interface for stdenc
437 */
438
439 _CITRUS_STDENC_DECLS(DECHanyu);
440 _CITRUS_STDENC_DEF_OPS(DECHanyu);
441
442 #include "citrus_stdenc_template.h"
443