citrus_dechanyu.c revision 1.3.2.2 1 /* $NetBSD: citrus_dechanyu.c,v 1.3.2.2 2014/05/22 11:36:50 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2007 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28 #include <sys/cdefs.h>
29 #if defined(LIBC_SCCS) && !defined(lint)
30 __RCSID("$NetBSD: citrus_dechanyu.c,v 1.3.2.2 2014/05/22 11:36:50 yamt Exp $");
31 #endif /* LIBC_SCCS and not lint */
32
33 #include <sys/types.h>
34 #include <assert.h>
35 #include <errno.h>
36 #include <string.h>
37 #include <stdint.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <stddef.h>
41 #include <wchar.h>
42 #include <limits.h>
43
44 #include "citrus_namespace.h"
45 #include "citrus_types.h"
46 #include "citrus_bcs.h"
47 #include "citrus_module.h"
48 #include "citrus_ctype.h"
49 #include "citrus_stdenc.h"
50 #include "citrus_dechanyu.h"
51
52 /* ----------------------------------------------------------------------
53 * private stuffs used by templates
54 */
55
56 typedef struct {
57 int chlen;
58 char ch[4];
59 } _DECHanyuState;
60
61 typedef struct {
62 int dummy;
63 } _DECHanyuEncodingInfo;
64
65 typedef struct {
66 _DECHanyuEncodingInfo ei;
67 struct {
68 /* for future multi-locale facility */
69 _DECHanyuState s_mblen;
70 _DECHanyuState s_mbrlen;
71 _DECHanyuState s_mbrtowc;
72 _DECHanyuState s_mbtowc;
73 _DECHanyuState s_mbsrtowcs;
74 _DECHanyuState s_mbsnrtowcs;
75 _DECHanyuState s_wcrtomb;
76 _DECHanyuState s_wcsrtombs;
77 _DECHanyuState s_wcsnrtombs;
78 _DECHanyuState s_wctomb;
79 } states;
80 } _DECHanyuCTypeInfo;
81
82 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
83 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.__CONCAT(s_,_func_)
84
85 #define _FUNCNAME(m) __CONCAT(_citrus_DECHanyu_,m)
86 #define _ENCODING_INFO _DECHanyuEncodingInfo
87 #define _CTYPE_INFO _DECHanyuCTypeInfo
88 #define _ENCODING_STATE _DECHanyuState
89 #define _ENCODING_MB_CUR_MAX(_ei_) 4
90 #define _ENCODING_IS_STATE_DEPENDENT 0
91 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0
92
93 static __inline void
94 /*ARGSUSED*/
95 _citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei,
96 _DECHanyuState * __restrict psenc)
97 {
98 /* ei may be null */
99 _DIAGASSERT(psenc != NULL);
100
101 psenc->chlen = 0;
102 }
103
104 static __inline void
105 /*ARGSUSED*/
106 _citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei,
107 void * __restrict pspriv,
108 const _DECHanyuState * __restrict psenc)
109 {
110 /* ei may be null */
111 _DIAGASSERT(pspriv != NULL);
112 _DIAGASSERT(psenc != NULL);
113
114 memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
115 }
116
117 static __inline void
118 /*ARGSUSED*/
119 _citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei,
120 _DECHanyuState * __restrict psenc,
121 const void * __restrict pspriv)
122 {
123 /* ei may be null */
124 _DIAGASSERT(psenc != NULL);
125 _DIAGASSERT(pspriv != NULL);
126
127 memcpy((void *)psenc, pspriv, sizeof(*psenc));
128 }
129
130 static void
131 /*ARGSUSED*/
132 _citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei)
133 {
134 /* ei may be null */
135 }
136
137 static int
138 /*ARGSUSED*/
139 _citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei,
140 const void * __restrict var, size_t lenvar)
141 {
142 /* ei may be null */
143 return 0;
144 }
145
146 static __inline int
147 is_singlebyte(int c)
148 {
149 return c <= 0x7F;
150 }
151
152 static __inline int
153 is_leadbyte(int c)
154 {
155 return c >= 0xA1 && c <= 0xFE;
156 }
157
158 static __inline int
159 is_trailbyte(int c)
160 {
161 c &= ~0x80;
162 return c >= 0x21 && c <= 0x7E;
163 }
164
165 static __inline int
166 is_hanyu1(int c)
167 {
168 return c == 0xC2;
169 }
170
171 static __inline int
172 is_hanyu2(int c)
173 {
174 return c == 0xCB;
175 }
176
177 #define HANYUBIT 0xC2CB0000
178
179 static __inline int
180 is_94charset(int c)
181 {
182 return c >= 0x21 && c <= 0x7E;
183 }
184
185 static int
186 /*ARGSUSED*/
187 _citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,
188 wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
189 _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
190 {
191 const char *s0;
192 int ch;
193 wchar_t wc;
194
195 /* ei may be unused */
196 _DIAGASSERT(s != NULL);
197 _DIAGASSERT(psenc != NULL);
198 _DIAGASSERT(nresult != NULL);
199
200 if (*s == NULL) {
201 _citrus_DECHanyu_init_state(ei, psenc);
202 *nresult = _ENCODING_IS_STATE_DEPENDENT;
203 return 0;
204 }
205 s0 = *s;
206
207 wc = (wchar_t)0;
208 switch (psenc->chlen) {
209 case 0:
210 if (n-- < 1)
211 goto restart;
212 ch = *s0++ & 0xFF;
213 if (is_singlebyte(ch) != 0) {
214 if (pwc != NULL)
215 *pwc = (wchar_t)ch;
216 *nresult = (size_t)((ch == 0) ? 0 : 1);
217 *s = s0;
218 return 0;
219 }
220 if (is_leadbyte(ch) == 0)
221 goto ilseq;
222 psenc->ch[psenc->chlen++] = ch;
223 break;
224 case 1:
225 ch = psenc->ch[0] & 0xFF;
226 if (is_leadbyte(ch) == 0)
227 return EINVAL;
228 break;
229 case 2: case 3:
230 ch = psenc->ch[0] & 0xFF;
231 if (is_hanyu1(ch) != 0) {
232 ch = psenc->ch[1] & 0xFF;
233 if (is_hanyu2(ch) != 0) {
234 wc |= (wchar_t)HANYUBIT;
235 break;
236 }
237 }
238 /*FALLTHROUGH*/
239 default:
240 return EINVAL;
241 }
242
243 switch (psenc->chlen) {
244 case 1:
245 if (is_hanyu1(ch) != 0) {
246 if (n-- < 1)
247 goto restart;
248 ch = *s0++ & 0xFF;
249 if (is_hanyu2(ch) == 0)
250 goto ilseq;
251 psenc->ch[psenc->chlen++] = ch;
252 wc |= (wchar_t)HANYUBIT;
253 if (n-- < 1)
254 goto restart;
255 ch = *s0++ & 0xFF;
256 if (is_leadbyte(ch) == 0)
257 goto ilseq;
258 psenc->ch[psenc->chlen++] = ch;
259 }
260 break;
261 case 2:
262 if (n-- < 1)
263 goto restart;
264 ch = *s0++ & 0xFF;
265 if (is_leadbyte(ch) == 0)
266 goto ilseq;
267 psenc->ch[psenc->chlen++] = ch;
268 break;
269 case 3:
270 ch = psenc->ch[2] & 0xFF;
271 if (is_leadbyte(ch) == 0)
272 return EINVAL;
273 }
274 if (n-- < 1)
275 goto restart;
276 wc |= (wchar_t)(ch << 8);
277 ch = *s0++ & 0xFF;
278 if (is_trailbyte(ch) == 0)
279 goto ilseq;
280 wc |= (wchar_t)ch;
281 if (pwc != NULL)
282 *pwc = wc;
283 *nresult = (size_t)(s0 - *s);
284 *s = s0;
285 psenc->chlen = 0;
286
287 return 0;
288
289 restart:
290 *nresult = (size_t)-2;
291 *s = s0;
292 return 0;
293
294 ilseq:
295 *nresult = (size_t)-1;
296 return EILSEQ;
297 }
298
299 static int
300 /*ARGSUSED*/
301 _citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei,
302 char * __restrict s, size_t n, wchar_t wc,
303 _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
304 {
305 int ch;
306
307 /* ei may be unused */
308 _DIAGASSERT(s != NULL);
309 _DIAGASSERT(psenc != NULL);
310 _DIAGASSERT(nresult != NULL);
311
312 if (psenc->chlen != 0)
313 return EINVAL;
314
315 /* XXX: assume wchar_t as int */
316 if ((uint32_t)wc <= 0x7F) {
317 ch = wc & 0xFF;
318 } else {
319 if ((uint32_t)wc > 0xFFFF) {
320 if ((wc & ~0xFFFF) != HANYUBIT)
321 goto ilseq;
322 psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF;
323 psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF;
324 wc &= 0xFFFF;
325 }
326 ch = (wc >> 8) & 0xFF;
327 if (!is_leadbyte(ch))
328 goto ilseq;
329 psenc->ch[psenc->chlen++] = ch;
330 ch = wc & 0xFF;
331 if (is_trailbyte(ch) == 0)
332 goto ilseq;
333 }
334 psenc->ch[psenc->chlen++] = ch;
335 if (n < psenc->chlen) {
336 *nresult = (size_t)-1;
337 return E2BIG;
338 }
339 memcpy(s, psenc->ch, psenc->chlen);
340 *nresult = psenc->chlen;
341 psenc->chlen = 0;
342
343 return 0;
344
345 ilseq:
346 *nresult = (size_t)-1;
347 return EILSEQ;
348 }
349
350 static __inline int
351 /*ARGSUSED*/
352 _citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei,
353 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
354 {
355 int plane;
356 wchar_t mask;
357
358 /* ei may be unused */
359 _DIAGASSERT(csid != NULL);
360 _DIAGASSERT(idx != NULL);
361
362 plane = 0;
363 mask = 0x7F;
364 /* XXX: assume wchar_t as int */
365 if ((uint32_t)wc > 0x7F) {
366 if ((uint32_t)wc > 0xFFFF) {
367 if ((wc & ~0xFFFF) != HANYUBIT)
368 return EILSEQ;
369 plane += 2;
370 }
371 if (is_leadbyte((wc >> 8) & 0xFF) == 0 ||
372 is_trailbyte(wc & 0xFF) == 0)
373 return EILSEQ;
374 plane += (wc & 0x80) ? 1 : 2;
375 mask |= 0x7F00;
376 }
377 *csid = plane;
378 *idx = (_index_t)(wc & mask);
379
380 return 0;
381 }
382
383 static __inline int
384 /*ARGSUSED*/
385 _citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei,
386 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
387 {
388 /* ei may be unused */
389 _DIAGASSERT(wc != NULL);
390
391 if (csid == 0) {
392 if (idx > 0x7F)
393 return EILSEQ;
394 } else if (csid <= 4) {
395 if (is_94charset(idx >> 8) == 0)
396 return EILSEQ;
397 if (is_94charset(idx & 0xFF) == 0)
398 return EILSEQ;
399 if (csid % 2)
400 idx |= 0x80;
401 idx |= 0x8000;
402 if (csid > 2)
403 idx |= HANYUBIT;
404 } else
405 return EILSEQ;
406 *wc = (wchar_t)idx;
407 return 0;
408 }
409
410 static __inline int
411 /*ARGSUSED*/
412 _citrus_DECHanyu_stdenc_get_state_desc_generic(
413 _DECHanyuEncodingInfo * __restrict ei,
414 _DECHanyuState * __restrict psenc, int * __restrict rstate)
415 {
416 /* ei may be unused */
417 _DIAGASSERT(psenc != NULL);
418 _DIAGASSERT(rstate != NULL);
419
420 *rstate = (psenc->chlen == 0)
421 ? _STDENC_SDGEN_INITIAL
422 : _STDENC_SDGEN_INCOMPLETE_CHAR;
423 return 0;
424 }
425
426 /* ----------------------------------------------------------------------
427 * public interface for ctype
428 */
429
430 _CITRUS_CTYPE_DECLS(DECHanyu);
431 _CITRUS_CTYPE_DEF_OPS(DECHanyu);
432
433 #include "citrus_ctype_template.h"
434
435
436 /* ----------------------------------------------------------------------
437 * public interface for stdenc
438 */
439
440 _CITRUS_STDENC_DECLS(DECHanyu);
441 _CITRUS_STDENC_DEF_OPS(DECHanyu);
442
443 #include "citrus_stdenc_template.h"
444