Home | History | Annotate | Line # | Download | only in modules
      1 /* $NetBSD: citrus_dechanyu.c,v 1.5 2013/05/28 16:57:56 joerg Exp $ */
      2 
      3 /*-
      4  * Copyright (c)2007 Citrus Project,
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 #include <sys/cdefs.h>
     29 #if defined(LIBC_SCCS) && !defined(lint)
     30 __RCSID("$NetBSD: citrus_dechanyu.c,v 1.5 2013/05/28 16:57:56 joerg Exp $");
     31 #endif /* LIBC_SCCS and not lint */
     32 
     33 #include <sys/types.h>
     34 #include <assert.h>
     35 #include <errno.h>
     36 #include <string.h>
     37 #include <stdint.h>
     38 #include <stdio.h>
     39 #include <stdlib.h>
     40 #include <stddef.h>
     41 #include <wchar.h>
     42 #include <limits.h>
     43 
     44 #include "citrus_namespace.h"
     45 #include "citrus_types.h"
     46 #include "citrus_bcs.h"
     47 #include "citrus_module.h"
     48 #include "citrus_ctype.h"
     49 #include "citrus_stdenc.h"
     50 #include "citrus_dechanyu.h"
     51 
     52 /* ----------------------------------------------------------------------
     53  * private stuffs used by templates
     54  */
     55 
     56 typedef struct {
     57 	int chlen;
     58 	char ch[4];
     59 } _DECHanyuState;
     60 
     61 typedef struct {
     62 	int dummy;
     63 } _DECHanyuEncodingInfo;
     64 
     65 typedef struct {
     66 	_DECHanyuEncodingInfo	ei;
     67 	struct {
     68 		/* for future multi-locale facility */
     69 		_DECHanyuState	s_mblen;
     70 		_DECHanyuState	s_mbrlen;
     71 		_DECHanyuState	s_mbrtowc;
     72 		_DECHanyuState	s_mbtowc;
     73 		_DECHanyuState	s_mbsrtowcs;
     74 		_DECHanyuState	s_mbsnrtowcs;
     75 		_DECHanyuState	s_wcrtomb;
     76 		_DECHanyuState	s_wcsrtombs;
     77 		_DECHanyuState	s_wcsnrtombs;
     78 		_DECHanyuState	s_wctomb;
     79 	} states;
     80 } _DECHanyuCTypeInfo;
     81 
     82 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
     83 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.__CONCAT(s_,_func_)
     84 
     85 #define _FUNCNAME(m)			__CONCAT(_citrus_DECHanyu_,m)
     86 #define _ENCODING_INFO			_DECHanyuEncodingInfo
     87 #define _CTYPE_INFO			_DECHanyuCTypeInfo
     88 #define _ENCODING_STATE			_DECHanyuState
     89 #define _ENCODING_MB_CUR_MAX(_ei_)		4
     90 #define _ENCODING_IS_STATE_DEPENDENT		0
     91 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
     92 
     93 static __inline void
     94 /*ARGSUSED*/
     95 _citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei,
     96 	_DECHanyuState * __restrict psenc)
     97 {
     98 	/* ei may be null */
     99 	_DIAGASSERT(psenc != NULL);
    100 
    101 	psenc->chlen = 0;
    102 }
    103 
    104 static __inline void
    105 /*ARGSUSED*/
    106 _citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei,
    107 	void * __restrict pspriv,
    108 	const _DECHanyuState * __restrict psenc)
    109 {
    110 	/* ei may be null */
    111 	_DIAGASSERT(pspriv != NULL);
    112 	_DIAGASSERT(psenc != NULL);
    113 
    114 	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
    115 }
    116 
    117 static __inline void
    118 /*ARGSUSED*/
    119 _citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei,
    120 	_DECHanyuState * __restrict psenc,
    121 	const void * __restrict pspriv)
    122 {
    123 	/* ei may be null */
    124 	_DIAGASSERT(psenc != NULL);
    125 	_DIAGASSERT(pspriv != NULL);
    126 
    127 	memcpy((void *)psenc, pspriv, sizeof(*psenc));
    128 }
    129 
    130 static void
    131 /*ARGSUSED*/
    132 _citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei)
    133 {
    134 	/* ei may be null */
    135 }
    136 
    137 static int
    138 /*ARGSUSED*/
    139 _citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei,
    140 	const void * __restrict var, size_t lenvar)
    141 {
    142 	/* ei may be null */
    143 	return 0;
    144 }
    145 
    146 static __inline int
    147 is_singlebyte(int c)
    148 {
    149 	return c <= 0x7F;
    150 }
    151 
    152 static __inline int
    153 is_leadbyte(int c)
    154 {
    155 	return c >= 0xA1 && c <= 0xFE;
    156 }
    157 
    158 static __inline int
    159 is_trailbyte(int c)
    160 {
    161 	c &= ~0x80;
    162 	return c >= 0x21 && c <= 0x7E;
    163 }
    164 
    165 static __inline int
    166 is_hanyu1(int c)
    167 {
    168 	return c == 0xC2;
    169 }
    170 
    171 static __inline int
    172 is_hanyu2(int c)
    173 {
    174 	return c == 0xCB;
    175 }
    176 
    177 #define HANYUBIT	0xC2CB0000
    178 
    179 static __inline int
    180 is_94charset(int c)
    181 {
    182 	return c >= 0x21 && c <= 0x7E;
    183 }
    184 
    185 static int
    186 /*ARGSUSED*/
    187 _citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,
    188 	wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
    189 	_DECHanyuState * __restrict psenc, size_t * __restrict nresult)
    190 {
    191 	const char *s0;
    192 	int ch;
    193 	wchar_t wc;
    194 
    195 	/* ei may be unused */
    196 	_DIAGASSERT(s != NULL);
    197 	_DIAGASSERT(psenc != NULL);
    198 	_DIAGASSERT(nresult != NULL);
    199 
    200 	if (*s == NULL) {
    201 		_citrus_DECHanyu_init_state(ei, psenc);
    202 		*nresult = _ENCODING_IS_STATE_DEPENDENT;
    203 		return 0;
    204 	}
    205 	s0 = *s;
    206 
    207 	wc = (wchar_t)0;
    208 	switch (psenc->chlen) {
    209 	case 0:
    210 		if (n-- < 1)
    211 			goto restart;
    212 		ch = *s0++ & 0xFF;
    213 		if (is_singlebyte(ch) != 0) {
    214 			if (pwc != NULL)
    215 				*pwc = (wchar_t)ch;
    216 			*nresult = (size_t)((ch == 0) ? 0 : 1);
    217 			*s = s0;
    218 			return 0;
    219 		}
    220 		if (is_leadbyte(ch) == 0)
    221 			goto ilseq;
    222 		psenc->ch[psenc->chlen++] = ch;
    223 		break;
    224 	case 1:
    225 		ch = psenc->ch[0] & 0xFF;
    226 		if (is_leadbyte(ch) == 0)
    227 			return EINVAL;
    228 		break;
    229 	case 2: case 3:
    230 		ch = psenc->ch[0] & 0xFF;
    231 		if (is_hanyu1(ch) != 0) {
    232 			ch = psenc->ch[1] & 0xFF;
    233 			if (is_hanyu2(ch) != 0) {
    234 				wc |= (wchar_t)HANYUBIT;
    235 				break;
    236 			}
    237 		}
    238 	/*FALLTHROUGH*/
    239 	default:
    240 		return EINVAL;
    241 	}
    242 
    243 	switch (psenc->chlen) {
    244 	case 1:
    245 		if (is_hanyu1(ch) != 0) {
    246 			if (n-- < 1)
    247 				goto restart;
    248 			ch = *s0++ & 0xFF;
    249 			if (is_hanyu2(ch) == 0)
    250 				goto ilseq;
    251 			psenc->ch[psenc->chlen++] = ch;
    252 			wc |= (wchar_t)HANYUBIT;
    253 			if (n-- < 1)
    254 				goto restart;
    255 			ch = *s0++ & 0xFF;
    256 			if (is_leadbyte(ch) == 0)
    257 				goto ilseq;
    258 			psenc->ch[psenc->chlen++] = ch;
    259 		}
    260 		break;
    261 	case 2:
    262 		if (n-- < 1)
    263 			goto restart;
    264 		ch = *s0++ & 0xFF;
    265 		if (is_leadbyte(ch) == 0)
    266 			goto ilseq;
    267 		psenc->ch[psenc->chlen++] = ch;
    268 		break;
    269 	case 3:
    270 		ch = psenc->ch[2] & 0xFF;
    271 		if (is_leadbyte(ch) == 0)
    272 			return EINVAL;
    273 	}
    274 	if (n-- < 1)
    275 		goto restart;
    276 	wc |= (wchar_t)(ch << 8);
    277 	ch = *s0++ & 0xFF;
    278 	if (is_trailbyte(ch) == 0)
    279 		goto ilseq;
    280 	wc |= (wchar_t)ch;
    281 	if (pwc != NULL)
    282 		*pwc = wc;
    283 	*nresult = (size_t)(s0 - *s);
    284 	*s = s0;
    285 	psenc->chlen = 0;
    286 
    287 	return 0;
    288 
    289 restart:
    290 	*nresult = (size_t)-2;
    291 	*s = s0;
    292 	return 0;
    293 
    294 ilseq:
    295 	*nresult = (size_t)-1;
    296 	return EILSEQ;
    297 }
    298 
    299 static int
    300 /*ARGSUSED*/
    301 _citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei,
    302 	char * __restrict s, size_t n, wchar_t wc,
    303 	_DECHanyuState * __restrict psenc, size_t * __restrict nresult)
    304 {
    305 	int ch;
    306 
    307 	/* ei may be unused */
    308 	_DIAGASSERT(s != NULL);
    309 	_DIAGASSERT(psenc != NULL);
    310 	_DIAGASSERT(nresult != NULL);
    311 
    312 	if (psenc->chlen != 0)
    313 		return EINVAL;
    314 
    315 	/* XXX: assume wchar_t as int */
    316 	if ((uint32_t)wc <= 0x7F) {
    317 		ch = wc & 0xFF;
    318 	} else {
    319 		if ((uint32_t)wc > 0xFFFF) {
    320 			if ((wc & ~0xFFFF) != HANYUBIT)
    321 				goto ilseq;
    322 			psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF;
    323 			psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF;
    324 			wc &= 0xFFFF;
    325 		}
    326 		ch = (wc >> 8) & 0xFF;
    327 		if (!is_leadbyte(ch))
    328 			goto ilseq;
    329 		psenc->ch[psenc->chlen++] = ch;
    330 		ch = wc & 0xFF;
    331 		if (is_trailbyte(ch) == 0)
    332 			goto ilseq;
    333 	}
    334 	psenc->ch[psenc->chlen++] = ch;
    335 	if (n < psenc->chlen) {
    336 		*nresult = (size_t)-1;
    337 		return E2BIG;
    338 	}
    339 	memcpy(s, psenc->ch, psenc->chlen);
    340 	*nresult = psenc->chlen;
    341 	psenc->chlen = 0;
    342 
    343 	return 0;
    344 
    345 ilseq:
    346 	*nresult = (size_t)-1;
    347 	return EILSEQ;
    348 }
    349 
    350 static __inline int
    351 /*ARGSUSED*/
    352 _citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei,
    353 	_csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
    354 {
    355 	int plane;
    356 	wchar_t mask;
    357 
    358 	/* ei may be unused */
    359 	_DIAGASSERT(csid != NULL);
    360 	_DIAGASSERT(idx != NULL);
    361 
    362 	plane = 0;
    363 	mask = 0x7F;
    364 	/* XXX: assume wchar_t as int */
    365 	if ((uint32_t)wc > 0x7F) {
    366 		if ((uint32_t)wc > 0xFFFF) {
    367 			if ((wc & ~0xFFFF) != HANYUBIT)
    368 				return EILSEQ;
    369 			plane += 2;
    370 		}
    371 		if (is_leadbyte((wc >> 8) & 0xFF) == 0 ||
    372 		    is_trailbyte(wc & 0xFF) == 0)
    373 			return EILSEQ;
    374 		plane += (wc & 0x80) ? 1 : 2;
    375 		mask |= 0x7F00;
    376 	}
    377 	*csid = plane;
    378 	*idx = (_index_t)(wc & mask);
    379 
    380 	return 0;
    381 }
    382 
    383 static __inline int
    384 /*ARGSUSED*/
    385 _citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei,
    386 	wchar_t * __restrict wc, _csid_t csid, _index_t idx)
    387 {
    388 	/* ei may be unused */
    389 	_DIAGASSERT(wc != NULL);
    390 
    391 	if (csid == 0) {
    392 		if (idx > 0x7F)
    393 			return EILSEQ;
    394 	} else if (csid <= 4) {
    395 		if (is_94charset(idx >> 8) == 0)
    396 			return EILSEQ;
    397 		if (is_94charset(idx & 0xFF) == 0)
    398 			return EILSEQ;
    399 		if (csid % 2)
    400 			idx |= 0x80;
    401 		idx |= 0x8000;
    402 		if (csid > 2)
    403 			idx |= HANYUBIT;
    404 	} else
    405 		return EILSEQ;
    406 	*wc = (wchar_t)idx;
    407 	return 0;
    408 }
    409 
    410 static __inline int
    411 /*ARGSUSED*/
    412 _citrus_DECHanyu_stdenc_get_state_desc_generic(
    413 	_DECHanyuEncodingInfo * __restrict ei,
    414 	_DECHanyuState * __restrict psenc, int * __restrict rstate)
    415 {
    416 	/* ei may be unused */
    417 	_DIAGASSERT(psenc != NULL);
    418 	_DIAGASSERT(rstate != NULL);
    419 
    420 	*rstate = (psenc->chlen == 0)
    421 	    ? _STDENC_SDGEN_INITIAL
    422 	    : _STDENC_SDGEN_INCOMPLETE_CHAR;
    423 	return 0;
    424 }
    425 
    426 /* ----------------------------------------------------------------------
    427  * public interface for ctype
    428  */
    429 
    430 _CITRUS_CTYPE_DECLS(DECHanyu);
    431 _CITRUS_CTYPE_DEF_OPS(DECHanyu);
    432 
    433 #include "citrus_ctype_template.h"
    434 
    435 
    436 /* ----------------------------------------------------------------------
    437  * public interface for stdenc
    438  */
    439 
    440 _CITRUS_STDENC_DECLS(DECHanyu);
    441 _CITRUS_STDENC_DEF_OPS(DECHanyu);
    442 
    443 #include "citrus_stdenc_template.h"
    444