Home | History | Annotate | Line # | Download | only in modules
citrus_dechanyu.c revision 1.2.12.1
      1 /* $NetBSD: citrus_dechanyu.c,v 1.2.12.1 2008/06/23 04:29:31 wrstuden Exp $ */
      2 
      3 /*-
      4  * Copyright (c)2007 Citrus Project,
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 #include <sys/cdefs.h>
     29 #if defined(LIBC_SCCS) && !defined(lint)
     30 __RCSID("$NetBSD: citrus_dechanyu.c,v 1.2.12.1 2008/06/23 04:29:31 wrstuden Exp $");
     31 #endif /* LIBC_SCCS and not lint */
     32 
     33 #include <sys/types.h>
     34 #include <assert.h>
     35 #include <errno.h>
     36 #include <string.h>
     37 #include <stdint.h>
     38 #include <stdio.h>
     39 #include <stdlib.h>
     40 #include <stddef.h>
     41 #include <wchar.h>
     42 #include <limits.h>
     43 
     44 #include "citrus_namespace.h"
     45 #include "citrus_types.h"
     46 #include "citrus_bcs.h"
     47 #include "citrus_module.h"
     48 #include "citrus_ctype.h"
     49 #include "citrus_stdenc.h"
     50 #include "citrus_dechanyu.h"
     51 
     52 /* ----------------------------------------------------------------------
     53  * private stuffs used by templates
     54  */
     55 
     56 typedef struct {
     57 	int chlen;
     58 	char ch[4];
     59 } _DECHanyuState;
     60 
     61 typedef struct {
     62 	int dummy;
     63 } _DECHanyuEncodingInfo;
     64 
     65 typedef struct {
     66 	_DECHanyuEncodingInfo	ei;
     67 	struct {
     68 		/* for future multi-locale facility */
     69 		_DECHanyuState	s_mblen;
     70 		_DECHanyuState	s_mbrlen;
     71 		_DECHanyuState	s_mbrtowc;
     72 		_DECHanyuState	s_mbtowc;
     73 		_DECHanyuState	s_mbsrtowcs;
     74 		_DECHanyuState	s_wcrtomb;
     75 		_DECHanyuState	s_wcsrtombs;
     76 		_DECHanyuState	s_wctomb;
     77 	} states;
     78 } _DECHanyuCTypeInfo;
     79 
     80 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
     81 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.__CONCAT(s_,_func_)
     82 
     83 #define _FUNCNAME(m)			__CONCAT(_citrus_DECHanyu_,m)
     84 #define _ENCODING_INFO			_DECHanyuEncodingInfo
     85 #define _CTYPE_INFO			_DECHanyuCTypeInfo
     86 #define _ENCODING_STATE			_DECHanyuState
     87 #define _ENCODING_MB_CUR_MAX(_ei_)		4
     88 #define _ENCODING_IS_STATE_DEPENDENT		0
     89 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
     90 
     91 static __inline void
     92 /*ARGSUSED*/
     93 _citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei,
     94 	_DECHanyuState * __restrict psenc)
     95 {
     96 	/* ei may be null */
     97 	_DIAGASSERT(psenc != NULL);
     98 
     99 	psenc->chlen = 0;
    100 }
    101 
    102 static __inline void
    103 /*ARGSUSED*/
    104 _citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei,
    105 	void * __restrict pspriv,
    106 	const _DECHanyuState * __restrict psenc)
    107 {
    108 	/* ei may be null */
    109 	_DIAGASSERT(pspriv != NULL);
    110 	_DIAGASSERT(psenc != NULL);
    111 
    112 	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
    113 }
    114 
    115 static __inline void
    116 /*ARGSUSED*/
    117 _citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei,
    118 	_DECHanyuState * __restrict psenc,
    119 	const void * __restrict pspriv)
    120 {
    121 	/* ei may be null */
    122 	_DIAGASSERT(psenc != NULL);
    123 	_DIAGASSERT(pspriv != NULL);
    124 
    125 	memcpy((void *)psenc, pspriv, sizeof(*psenc));
    126 }
    127 
    128 static void
    129 /*ARGSUSED*/
    130 _citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei)
    131 {
    132 	/* ei may be null */
    133 }
    134 
    135 static int
    136 /*ARGSUSED*/
    137 _citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei,
    138 	const void * __restrict var, size_t lenvar)
    139 {
    140 	/* ei may be null */
    141 	return 0;
    142 }
    143 
    144 static __inline int
    145 is_singlebyte(int c)
    146 {
    147 	return c <= 0x7F;
    148 }
    149 
    150 static __inline int
    151 is_leadbyte(int c)
    152 {
    153 	return c >= 0xA1 && c <= 0xFE;
    154 }
    155 
    156 static __inline int
    157 is_trailbyte(int c)
    158 {
    159 	c &= ~0x80;
    160 	return c >= 0x21 && c <= 0x7E;
    161 }
    162 
    163 static __inline int
    164 is_hanyu1(int c)
    165 {
    166 	return c == 0xC2;
    167 }
    168 
    169 static __inline int
    170 is_hanyu2(int c)
    171 {
    172 	return c == 0xCB;
    173 }
    174 
    175 #define HANYUBIT	0xC2CB0000
    176 
    177 static __inline int
    178 is_94charset(int c)
    179 {
    180 	return c >= 0x21 && c <= 0x7E;
    181 }
    182 
    183 static int
    184 /*ARGSUSED*/
    185 _citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,
    186 	wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
    187 	_DECHanyuState * __restrict psenc, size_t * __restrict nresult)
    188 {
    189 	const char *s0;
    190 	int ch, i;
    191 	wchar_t wc;
    192 
    193 	/* ei may be unused */
    194 	_DIAGASSERT(s != NULL);
    195 	_DIAGASSERT(psenc != NULL);
    196 	_DIAGASSERT(nresult != NULL);
    197 
    198 	if (*s == NULL) {
    199 		_citrus_DECHanyu_init_state(ei, psenc);
    200 		*nresult = _ENCODING_IS_STATE_DEPENDENT;
    201 		return 0;
    202 	}
    203 	s0 = *s;
    204 
    205 	wc = (wchar_t)0;
    206 	switch (psenc->chlen) {
    207 	case 0:
    208 		if (n-- < 1)
    209 			goto restart;
    210 		ch = *s0++ & 0xFF;
    211 		if (is_singlebyte(ch) != 0) {
    212 			if (pwc != NULL)
    213 				*pwc = (wchar_t)ch;
    214 			*nresult = (size_t)((ch == 0) ? 0 : 1);
    215 			*s = s0;
    216 			return 0;
    217 		}
    218 		if (is_leadbyte(ch) == 0)
    219 			goto ilseq;
    220 		psenc->ch[psenc->chlen++] = ch;
    221 		break;
    222 	case 1:
    223 		ch = psenc->ch[0] & 0xFF;
    224 		if (is_leadbyte(ch) == 0)
    225 			return EINVAL;
    226 		break;
    227 	case 2: case 3:
    228 		ch = psenc->ch[0] & 0xFF;
    229 		if (is_hanyu1(ch) != 0) {
    230 			ch = psenc->ch[1] & 0xFF;
    231 			if (is_hanyu2(ch) != 0) {
    232 				wc |= (wchar_t)HANYUBIT;
    233 				break;
    234 			}
    235 		}
    236 	/*FALLTHROUGH*/
    237 	default:
    238 		return EINVAL;
    239 	}
    240 
    241 	switch (psenc->chlen) {
    242 	case 1:
    243 		if (is_hanyu1(ch) != 0) {
    244 			if (n-- < 1)
    245 				goto restart;
    246 			ch = *s0++ & 0xFF;
    247 			if (is_hanyu2(ch) == 0)
    248 				goto ilseq;
    249 			psenc->ch[psenc->chlen++] = ch;
    250 			wc |= (wchar_t)HANYUBIT;
    251 			if (n-- < 1)
    252 				goto restart;
    253 			ch = *s0++ & 0xFF;
    254 			if (is_leadbyte(ch) == 0)
    255 				goto ilseq;
    256 			psenc->ch[psenc->chlen++] = ch;
    257 		}
    258 		break;
    259 	case 2:
    260 		if (n-- < 1)
    261 			goto restart;
    262 		ch = *s0++ & 0xFF;
    263 		if (is_leadbyte(ch) == 0)
    264 			goto ilseq;
    265 		psenc->ch[psenc->chlen++] = ch;
    266 		break;
    267 	case 3:
    268 		ch = psenc->ch[2] & 0xFF;
    269 		if (is_leadbyte(ch) == 0)
    270 			return EINVAL;
    271 	}
    272 	if (n-- < 1)
    273 		goto restart;
    274 	wc |= (wchar_t)(ch << 8);
    275 	ch = *s0++ & 0xFF;
    276 	if (is_trailbyte(ch) == 0)
    277 		goto ilseq;
    278 	wc |= (wchar_t)ch;
    279 	if (pwc != NULL)
    280 		*pwc = wc;
    281 	*nresult = (size_t)(s0 - *s);
    282 	*s = s0;
    283 	psenc->chlen = 0;
    284 
    285 	return 0;
    286 
    287 restart:
    288 	*nresult = (size_t)-2;
    289 	*s = s0;
    290 	return 0;
    291 
    292 ilseq:
    293 	*nresult = (size_t)-1;
    294 	return EILSEQ;
    295 }
    296 
    297 static int
    298 /*ARGSUSED*/
    299 _citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei,
    300 	char * __restrict s, size_t n, wchar_t wc,
    301 	_DECHanyuState * __restrict psenc, size_t * __restrict nresult)
    302 {
    303 	int ch;
    304 
    305 	/* ei may be unused */
    306 	_DIAGASSERT(s != NULL);
    307 	_DIAGASSERT(psenc != NULL);
    308 	_DIAGASSERT(nresult != NULL);
    309 
    310 	if (psenc->chlen != 0)
    311 		return EINVAL;
    312 
    313 	/* XXX: assume wchar_t as int */
    314 	if ((uint32_t)wc <= 0x7F) {
    315 		ch = wc & 0xFF;
    316 	} else {
    317 		if ((uint32_t)wc > 0xFFFF) {
    318 			if ((wc & ~0xFFFF) != HANYUBIT)
    319 				goto ilseq;
    320 			psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF;
    321 			psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF;
    322 			wc &= 0xFFFF;
    323 		}
    324 		ch = (wc >> 8) & 0xFF;
    325 		if (!is_leadbyte(ch))
    326 			goto ilseq;
    327 		psenc->ch[psenc->chlen++] = ch;
    328 		ch = wc & 0xFF;
    329 		if (is_trailbyte(ch) == 0)
    330 			goto ilseq;
    331 	}
    332 	psenc->ch[psenc->chlen++] = ch;
    333 	if (n < psenc->chlen) {
    334 		*nresult = (size_t)-1;
    335 		return E2BIG;
    336 	}
    337 	memcpy(s, psenc->ch, psenc->chlen);
    338 	*nresult = psenc->chlen;
    339 	psenc->chlen = 0;
    340 
    341 	return 0;
    342 
    343 ilseq:
    344 	*nresult = (size_t)-1;
    345 	return EILSEQ;
    346 }
    347 
    348 static __inline int
    349 /*ARGSUSED*/
    350 _citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei,
    351 	_csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
    352 {
    353 	int plane;
    354 	wchar_t mask;
    355 
    356 	/* ei may be unused */
    357 	_DIAGASSERT(csid != NULL);
    358 	_DIAGASSERT(idx != NULL);
    359 
    360 	plane = 0;
    361 	mask = 0x7F;
    362 	/* XXX: assume wchar_t as int */
    363 	if ((uint32_t)wc > 0x7F) {
    364 		if ((uint32_t)wc > 0xFFFF) {
    365 			if ((wc & ~0xFFFF) != HANYUBIT)
    366 				return EILSEQ;
    367 			plane += 2;
    368 		}
    369 		if (is_leadbyte((wc >> 8) & 0xFF) == 0 ||
    370 		    is_trailbyte(wc & 0xFF) == 0)
    371 			return EILSEQ;
    372 		plane += (wc & 0x80) ? 1 : 2;
    373 		mask |= 0x7F00;
    374 	}
    375 	*csid = plane;
    376 	*idx = (_index_t)(wc & mask);
    377 
    378 	return 0;
    379 }
    380 
    381 static __inline int
    382 /*ARGSUSED*/
    383 _citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei,
    384 	wchar_t * __restrict wc, _csid_t csid, _index_t idx)
    385 {
    386 	/* ei may be unused */
    387 	_DIAGASSERT(wc != NULL);
    388 
    389 	if (csid == 0) {
    390 		if (idx > 0x7F)
    391 			return EILSEQ;
    392 	} else if (csid <= 4) {
    393 		if (is_94charset(idx >> 8) == 0)
    394 			return EILSEQ;
    395 		if (is_94charset(idx & 0xFF) == 0)
    396 			return EILSEQ;
    397 		if (csid % 2)
    398 			idx |= 0x80;
    399 		idx |= 0x8000;
    400 		if (csid > 2)
    401 			idx |= HANYUBIT;
    402 	} else
    403 		return EILSEQ;
    404 	*wc = (wchar_t)idx;
    405 	return 0;
    406 }
    407 
    408 static __inline int
    409 /*ARGSUSED*/
    410 _citrus_DECHanyu_stdenc_get_state_desc_generic(
    411 	_DECHanyuEncodingInfo * __restrict ei,
    412 	_DECHanyuState * __restrict psenc, int * __restrict rstate)
    413 {
    414 	/* ei may be unused */
    415 	_DIAGASSERT(psenc != NULL);
    416 	_DIAGASSERT(rstate != NULL);
    417 
    418 	*rstate = (psenc->chlen == 0)
    419 	    ? _STDENC_SDGEN_INITIAL
    420 	    : _STDENC_SDGEN_INCOMPLETE_CHAR;
    421 	return 0;
    422 }
    423 
    424 /* ----------------------------------------------------------------------
    425  * public interface for ctype
    426  */
    427 
    428 _CITRUS_CTYPE_DECLS(DECHanyu);
    429 _CITRUS_CTYPE_DEF_OPS(DECHanyu);
    430 
    431 #include "citrus_ctype_template.h"
    432 
    433 
    434 /* ----------------------------------------------------------------------
    435  * public interface for stdenc
    436  */
    437 
    438 _CITRUS_STDENC_DECLS(DECHanyu);
    439 _CITRUS_STDENC_DEF_OPS(DECHanyu);
    440 
    441 #include "citrus_stdenc_template.h"
    442