Home | History | Annotate | Line # | Download | only in modules
citrus_dechanyu.c revision 1.2
      1 /* $NetBSD: citrus_dechanyu.c,v 1.2 2007/04/24 15:42:08 tnozaki Exp $ */
      2 
      3 /*-
      4  * Copyright (c)2007 Citrus Project,
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 #include <sys/cdefs.h>
     29 #if defined(LIBC_SCCS) && !defined(lint)
     30 __RCSID("$NetBSD: citrus_dechanyu.c,v 1.2 2007/04/24 15:42:08 tnozaki Exp $");
     31 #endif /* LIBC_SCCS and not lint */
     32 
     33 #include <sys/types.h>
     34 #include <assert.h>
     35 #include <errno.h>
     36 #include <string.h>
     37 #include <stdint.h>
     38 #include <stdio.h>
     39 #include <stdlib.h>
     40 #include <stddef.h>
     41 #include <locale.h>
     42 #include <wchar.h>
     43 #include <limits.h>
     44 
     45 #include "citrus_namespace.h"
     46 #include "citrus_types.h"
     47 #include "citrus_bcs.h"
     48 #include "citrus_module.h"
     49 #include "citrus_ctype.h"
     50 #include "citrus_stdenc.h"
     51 #include "citrus_dechanyu.h"
     52 
     53 /* ----------------------------------------------------------------------
     54  * private stuffs used by templates
     55  */
     56 
     57 typedef struct {
     58 	int chlen;
     59 	char ch[4];
     60 } _DECHanyuState;
     61 
     62 typedef struct {
     63 	int dummy;
     64 } _DECHanyuEncodingInfo;
     65 
     66 typedef struct {
     67 	_DECHanyuEncodingInfo	ei;
     68 	struct {
     69 		/* for future multi-locale facility */
     70 		_DECHanyuState	s_mblen;
     71 		_DECHanyuState	s_mbrlen;
     72 		_DECHanyuState	s_mbrtowc;
     73 		_DECHanyuState	s_mbtowc;
     74 		_DECHanyuState	s_mbsrtowcs;
     75 		_DECHanyuState	s_wcrtomb;
     76 		_DECHanyuState	s_wcsrtombs;
     77 		_DECHanyuState	s_wctomb;
     78 	} states;
     79 } _DECHanyuCTypeInfo;
     80 
     81 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
     82 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.__CONCAT(s_,_func_)
     83 
     84 #define _FUNCNAME(m)			__CONCAT(_citrus_DECHanyu_,m)
     85 #define _ENCODING_INFO			_DECHanyuEncodingInfo
     86 #define _CTYPE_INFO			_DECHanyuCTypeInfo
     87 #define _ENCODING_STATE			_DECHanyuState
     88 #define _ENCODING_MB_CUR_MAX(_ei_)		4
     89 #define _ENCODING_IS_STATE_DEPENDENT		0
     90 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
     91 
     92 static __inline void
     93 /*ARGSUSED*/
     94 _citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei,
     95 	_DECHanyuState * __restrict psenc)
     96 {
     97 	/* ei may be null */
     98 	_DIAGASSERT(psenc != NULL);
     99 
    100 	psenc->chlen = 0;
    101 }
    102 
    103 static __inline void
    104 /*ARGSUSED*/
    105 _citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei,
    106 	void * __restrict pspriv,
    107 	const _DECHanyuState * __restrict psenc)
    108 {
    109 	/* ei may be null */
    110 	_DIAGASSERT(pspriv != NULL);
    111 	_DIAGASSERT(psenc != NULL);
    112 
    113 	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
    114 }
    115 
    116 static __inline void
    117 /*ARGSUSED*/
    118 _citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei,
    119 	_DECHanyuState * __restrict psenc,
    120 	const void * __restrict pspriv)
    121 {
    122 	/* ei may be null */
    123 	_DIAGASSERT(psenc != NULL);
    124 	_DIAGASSERT(pspriv != NULL);
    125 
    126 	memcpy((void *)psenc, pspriv, sizeof(*psenc));
    127 }
    128 
    129 static void
    130 /*ARGSUSED*/
    131 _citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei)
    132 {
    133 	/* ei may be null */
    134 }
    135 
    136 static int
    137 /*ARGSUSED*/
    138 _citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei,
    139 	const void * __restrict var, size_t lenvar)
    140 {
    141 	/* ei may be null */
    142 	return 0;
    143 }
    144 
    145 static __inline int
    146 is_singlebyte(int c)
    147 {
    148 	return c <= 0x7F;
    149 }
    150 
    151 static __inline int
    152 is_leadbyte(int c)
    153 {
    154 	return c >= 0xA1 && c <= 0xFE;
    155 }
    156 
    157 static __inline int
    158 is_trailbyte(int c)
    159 {
    160 	c &= ~0x80;
    161 	return c >= 0x21 && c <= 0x7E;
    162 }
    163 
    164 static __inline int
    165 is_hanyu1(int c)
    166 {
    167 	return c == 0xC2;
    168 }
    169 
    170 static __inline int
    171 is_hanyu2(int c)
    172 {
    173 	return c == 0xCB;
    174 }
    175 
    176 #define HANYUBIT	0xC2CB0000
    177 
    178 static __inline int
    179 is_94charset(int c)
    180 {
    181 	return c >= 0x21 && c <= 0x7E;
    182 }
    183 
    184 static int
    185 /*ARGSUSED*/
    186 _citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,
    187 	wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
    188 	_DECHanyuState * __restrict psenc, size_t * __restrict nresult)
    189 {
    190 	const char *s0;
    191 	int ch, i;
    192 	wchar_t wc;
    193 
    194 	/* ei may be unused */
    195 	_DIAGASSERT(s != NULL);
    196 	_DIAGASSERT(psenc != NULL);
    197 	_DIAGASSERT(nresult != NULL);
    198 
    199 	if (*s == NULL) {
    200 		_citrus_DECHanyu_init_state(ei, psenc);
    201 		*nresult = _ENCODING_IS_STATE_DEPENDENT;
    202 		return 0;
    203 	}
    204 	s0 = *s;
    205 
    206 	wc = (wchar_t)0;
    207 	switch (psenc->chlen) {
    208 	case 0:
    209 		if (n-- < 1)
    210 			goto restart;
    211 		ch = *s0++ & 0xFF;
    212 		if (is_singlebyte(ch) != 0) {
    213 			if (pwc != NULL)
    214 				*pwc = (wchar_t)ch;
    215 			*nresult = (size_t)((ch == 0) ? 0 : 1);
    216 			*s = s0;
    217 			return 0;
    218 		}
    219 		if (is_leadbyte(ch) == 0)
    220 			goto ilseq;
    221 		psenc->ch[psenc->chlen++] = ch;
    222 		break;
    223 	case 1:
    224 		ch = psenc->ch[0] & 0xFF;
    225 		if (is_leadbyte(ch) == 0)
    226 			return EINVAL;
    227 		break;
    228 	case 2: case 3:
    229 		ch = psenc->ch[0] & 0xFF;
    230 		if (is_hanyu1(ch) != 0) {
    231 			ch = psenc->ch[1] & 0xFF;
    232 			if (is_hanyu2(ch) != 0) {
    233 				wc |= (wchar_t)HANYUBIT;
    234 				break;
    235 			}
    236 		}
    237 	/*FALLTHROUGH*/
    238 	default:
    239 		return EINVAL;
    240 	}
    241 
    242 	switch (psenc->chlen) {
    243 	case 1:
    244 		if (is_hanyu1(ch) != 0) {
    245 			if (n-- < 1)
    246 				goto restart;
    247 			ch = *s0++ & 0xFF;
    248 			if (is_hanyu2(ch) == 0)
    249 				goto ilseq;
    250 			psenc->ch[psenc->chlen++] = ch;
    251 			wc |= (wchar_t)HANYUBIT;
    252 			if (n-- < 1)
    253 				goto restart;
    254 			ch = *s0++ & 0xFF;
    255 			if (is_leadbyte(ch) == 0)
    256 				goto ilseq;
    257 			psenc->ch[psenc->chlen++] = ch;
    258 		}
    259 		break;
    260 	case 2:
    261 		if (n-- < 1)
    262 			goto restart;
    263 		ch = *s0++ & 0xFF;
    264 		if (is_leadbyte(ch) == 0)
    265 			goto ilseq;
    266 		psenc->ch[psenc->chlen++] = ch;
    267 		break;
    268 	case 3:
    269 		ch = psenc->ch[2] & 0xFF;
    270 		if (is_leadbyte(ch) == 0)
    271 			return EINVAL;
    272 	}
    273 	if (n-- < 1)
    274 		goto restart;
    275 	wc |= (wchar_t)(ch << 8);
    276 	ch = *s0++ & 0xFF;
    277 	if (is_trailbyte(ch) == 0)
    278 		goto ilseq;
    279 	wc |= (wchar_t)ch;
    280 	if (pwc != NULL)
    281 		*pwc = wc;
    282 	*nresult = (size_t)(s0 - *s);
    283 	*s = s0;
    284 	psenc->chlen = 0;
    285 
    286 	return 0;
    287 
    288 restart:
    289 	*nresult = (size_t)-2;
    290 	*s = s0;
    291 	return 0;
    292 
    293 ilseq:
    294 	*nresult = (size_t)-1;
    295 	return EILSEQ;
    296 }
    297 
    298 static int
    299 /*ARGSUSED*/
    300 _citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei,
    301 	char * __restrict s, size_t n, wchar_t wc,
    302 	_DECHanyuState * __restrict psenc, size_t * __restrict nresult)
    303 {
    304 	int ch;
    305 
    306 	/* ei may be unused */
    307 	_DIAGASSERT(s != NULL);
    308 	_DIAGASSERT(psenc != NULL);
    309 	_DIAGASSERT(nresult != NULL);
    310 
    311 	if (psenc->chlen != 0)
    312 		return EINVAL;
    313 
    314 	/* XXX: assume wchar_t as int */
    315 	if ((uint32_t)wc <= 0x7F) {
    316 		ch = wc & 0xFF;
    317 	} else {
    318 		if ((uint32_t)wc > 0xFFFF) {
    319 			if ((wc & ~0xFFFF) != HANYUBIT)
    320 				goto ilseq;
    321 			psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF;
    322 			psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF;
    323 			wc &= 0xFFFF;
    324 		}
    325 		ch = (wc >> 8) & 0xFF;
    326 		if (!is_leadbyte(ch))
    327 			goto ilseq;
    328 		psenc->ch[psenc->chlen++] = ch;
    329 		ch = wc & 0xFF;
    330 		if (is_trailbyte(ch) == 0)
    331 			goto ilseq;
    332 	}
    333 	psenc->ch[psenc->chlen++] = ch;
    334 	if (n < psenc->chlen) {
    335 		*nresult = (size_t)-1;
    336 		return E2BIG;
    337 	}
    338 	memcpy(s, psenc->ch, psenc->chlen);
    339 	*nresult = psenc->chlen;
    340 	psenc->chlen = 0;
    341 
    342 	return 0;
    343 
    344 ilseq:
    345 	*nresult = (size_t)-1;
    346 	return EILSEQ;
    347 }
    348 
    349 static __inline int
    350 /*ARGSUSED*/
    351 _citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei,
    352 	_csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
    353 {
    354 	int plane;
    355 	wchar_t mask;
    356 
    357 	/* ei may be unused */
    358 	_DIAGASSERT(csid != NULL);
    359 	_DIAGASSERT(idx != NULL);
    360 
    361 	plane = 0;
    362 	mask = 0x7F;
    363 	/* XXX: assume wchar_t as int */
    364 	if ((uint32_t)wc > 0x7F) {
    365 		if ((uint32_t)wc > 0xFFFF) {
    366 			if ((wc & ~0xFFFF) != HANYUBIT)
    367 				return EILSEQ;
    368 			plane += 2;
    369 		}
    370 		if (is_leadbyte((wc >> 8) & 0xFF) == 0 ||
    371 		    is_trailbyte(wc & 0xFF) == 0)
    372 			return EILSEQ;
    373 		plane += (wc & 0x80) ? 1 : 2;
    374 		mask |= 0x7F00;
    375 	}
    376 	*csid = plane;
    377 	*idx = (_index_t)(wc & mask);
    378 
    379 	return 0;
    380 }
    381 
    382 static __inline int
    383 /*ARGSUSED*/
    384 _citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei,
    385 	wchar_t * __restrict wc, _csid_t csid, _index_t idx)
    386 {
    387 	/* ei may be unused */
    388 	_DIAGASSERT(wc != NULL);
    389 
    390 	if (csid == 0) {
    391 		if (idx > 0x7F)
    392 			return EILSEQ;
    393 	} else if (csid <= 4) {
    394 		if (is_94charset(idx >> 8) == 0)
    395 			return EILSEQ;
    396 		if (is_94charset(idx & 0xFF) == 0)
    397 			return EILSEQ;
    398 		if (csid % 2)
    399 			idx |= 0x80;
    400 		idx |= 0x8000;
    401 		if (csid > 2)
    402 			idx |= HANYUBIT;
    403 	} else
    404 		return EILSEQ;
    405 	*wc = (wchar_t)idx;
    406 	return 0;
    407 }
    408 
    409 static __inline int
    410 /*ARGSUSED*/
    411 _citrus_DECHanyu_stdenc_get_state_desc_generic(
    412 	_DECHanyuEncodingInfo * __restrict ei,
    413 	_DECHanyuState * __restrict psenc, int * __restrict rstate)
    414 {
    415 	/* ei may be unused */
    416 	_DIAGASSERT(psenc != NULL);
    417 	_DIAGASSERT(rstate != NULL);
    418 
    419 	*rstate = (psenc->chlen == 0)
    420 	    ? _STDENC_SDGEN_INITIAL
    421 	    : _STDENC_SDGEN_INCOMPLETE_CHAR;
    422 	return 0;
    423 }
    424 
    425 /* ----------------------------------------------------------------------
    426  * public interface for ctype
    427  */
    428 
    429 _CITRUS_CTYPE_DECLS(DECHanyu);
    430 _CITRUS_CTYPE_DEF_OPS(DECHanyu);
    431 
    432 #include "citrus_ctype_template.h"
    433 
    434 
    435 /* ----------------------------------------------------------------------
    436  * public interface for stdenc
    437  */
    438 
    439 _CITRUS_STDENC_DECLS(DECHanyu);
    440 _CITRUS_STDENC_DEF_OPS(DECHanyu);
    441 
    442 #include "citrus_stdenc_template.h"
    443