Home | History | Annotate | Line # | Download | only in modules
citrus_big5.c revision 1.15.18.2
      1 /*	$NetBSD: citrus_big5.c,v 1.15.18.2 2017/07/21 20:22:29 perseant Exp $	*/
      2 
      3 /*-
      4  * Copyright (c)2002, 2006 Citrus Project,
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 /*-
     30  * Copyright (c) 1993
     31  *	The Regents of the University of California.  All rights reserved.
     32  *
     33  * This code is derived from software contributed to Berkeley by
     34  * Paul Borman at Krystal Technologies.
     35  *
     36  * Redistribution and use in source and binary forms, with or without
     37  * modification, are permitted provided that the following conditions
     38  * are met:
     39  * 1. Redistributions of source code must retain the above copyright
     40  *    notice, this list of conditions and the following disclaimer.
     41  * 2. Redistributions in binary form must reproduce the above copyright
     42  *    notice, this list of conditions and the following disclaimer in the
     43  *    documentation and/or other materials provided with the distribution.
     44  * 3. Neither the name of the University nor the names of its contributors
     45  *    may be used to endorse or promote products derived from this software
     46  *    without specific prior written permission.
     47  *
     48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     58  * SUCH DAMAGE.
     59  */
     60 
     61 #include <sys/cdefs.h>
     62 #if defined(LIBC_SCCS) && !defined(lint)
     63 __RCSID("$NetBSD: citrus_big5.c,v 1.15.18.2 2017/07/21 20:22:29 perseant Exp $");
     64 #endif /* LIBC_SCCS and not lint */
     65 
     66 #include <sys/queue.h>
     67 #include <sys/types.h>
     68 #include <assert.h>
     69 #include <errno.h>
     70 #include <string.h>
     71 #include <stdint.h>
     72 #include <stdio.h>
     73 #include <stdlib.h>
     74 #include <stddef.h>
     75 #include <wchar.h>
     76 #include <limits.h>
     77 
     78 #include "citrus_namespace.h"
     79 #include "citrus_types.h"
     80 #include "citrus_bcs.h"
     81 #include "citrus_module.h"
     82 #include "citrus_ctype.h"
     83 #include "citrus_stdenc.h"
     84 #include "citrus_big5.h"
     85 
     86 #include "citrus_prop.h"
     87 
     88 /* ----------------------------------------------------------------------
     89  * private stuffs used by templates
     90  */
     91 
     92 typedef struct {
     93 	char ch[2];
     94 	int chlen;
     95 } _BIG5State;
     96 
     97 typedef struct _BIG5Exclude {
     98 	TAILQ_ENTRY(_BIG5Exclude) entry;
     99 	wint_kuten_t start, end;
    100 } _BIG5Exclude;
    101 
    102 typedef TAILQ_HEAD(_BIG5ExcludeList, _BIG5Exclude) _BIG5ExcludeList;
    103 
    104 typedef struct {
    105 	int cell[0x100];
    106 	_BIG5ExcludeList excludes;
    107 } _BIG5EncodingInfo;
    108 
    109 typedef struct {
    110 	_BIG5EncodingInfo	ei;
    111 	struct {
    112 		/* for future multi-locale facility */
    113 		_BIG5State	s_mblen;
    114 		_BIG5State	s_mbrlen;
    115 		_BIG5State	s_mbrtowc;
    116 		_BIG5State	s_mbtowc;
    117 		_BIG5State	s_mbsrtowcs;
    118 		_BIG5State	s_mbsnrtowcs;
    119 		_BIG5State	s_wcrtomb;
    120 		_BIG5State	s_wcsrtombs;
    121 		_BIG5State	s_wcsnrtombs;
    122 		_BIG5State	s_wctomb;
    123 	} states;
    124 } _BIG5CTypeInfo;
    125 
    126 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
    127 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
    128 
    129 #define _FUNCNAME(m)			_citrus_BIG5_##m
    130 #define _ENCODING_INFO			_BIG5EncodingInfo
    131 #define _CTYPE_INFO			_BIG5CTypeInfo
    132 #define _ENCODING_STATE			_BIG5State
    133 #define _ENCODING_MB_CUR_MAX(_ei_)	2
    134 #define _ENCODING_IS_STATE_DEPENDENT	0
    135 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
    136 
    137 #include "citrus_big5_data.h"
    138 
    139 static __inline int
    140 /*ARGSUSED*/
    141 _FUNCNAME(ucs2kt)(_ENCODING_INFO * __restrict ei,
    142 		  wchar_kuten_t * __restrict ktp, wchar_ucs4_t wc)
    143 {
    144 	struct unicode2kuten_lookup *uk;
    145 
    146 	_DIAGASSERT(ktp != NULL);
    147 
    148 	/* US-ASCII are not in the list */
    149 	if (wc < 0x80) {
    150 		*ktp = wc;
    151 		return 0;
    152 	}
    153 
    154 	uk = _citrus_uk_bsearch(wc, __big5_table__unicode2kuten_lookup, _BIG5_TABLE__U2K_LIST_LENGTH);
    155 
    156 	if (uk == NULL)
    157 		*ktp = WEOF;
    158 	else
    159 		*ktp = uk->value;
    160 
    161 	return 0;
    162 }
    163 
    164 static __inline int
    165 /*ARGSUSED*/
    166 _FUNCNAME(kt2ucs)(_ENCODING_INFO * __restrict ei,
    167 		  wchar_ucs4_t * __restrict up, wchar_kuten_t kt)
    168 {
    169 	_csid_t csid;
    170 	_index_t idx;
    171 	struct unicode2kuten_lookup *uk, *table;
    172 
    173 	_DIAGASSERT(up != NULL);
    174 
    175 	table = NULL;
    176 
    177 	if (kt < 0x80) {
    178 		*up = kt;
    179 		return 0;
    180 	}
    181 
    182 	uk = _citrus_uk_bsearch(kt, __big5_table__kuten2unicode_lookup, _BIG5_TABLE__K2U_LIST_LENGTH);
    183 
    184 	if (uk == NULL)
    185 		*up = WEOF;
    186 	else
    187 		*up = uk->value;
    188 
    189 	return 0;
    190 }
    191 
    192 static __inline void
    193 /*ARGSUSED*/
    194 _citrus_BIG5_init_state(_BIG5EncodingInfo * __restrict ei,
    195 			_BIG5State * __restrict s)
    196 {
    197 	memset(s, 0, sizeof(*s));
    198 }
    199 
    200 static __inline void
    201 /*ARGSUSED*/
    202 _citrus_BIG5_pack_state(_BIG5EncodingInfo * __restrict ei,
    203 			void * __restrict pspriv,
    204 			const _BIG5State * __restrict s)
    205 {
    206 	memcpy(pspriv, (const void *)s, sizeof(*s));
    207 }
    208 
    209 static __inline void
    210 /*ARGSUSED*/
    211 _citrus_BIG5_unpack_state(_BIG5EncodingInfo * __restrict ei,
    212 			  _BIG5State * __restrict s,
    213 			  const void * __restrict pspriv)
    214 {
    215 	memcpy((void *)s, pspriv, sizeof(*s));
    216 }
    217 
    218 static __inline int
    219 _citrus_BIG5_check(_BIG5EncodingInfo *ei, u_int c)
    220 {
    221 	_DIAGASSERT(ei != NULL);
    222 
    223 	return (ei->cell[c & 0xFF] & 0x1) ? 2 : 1;
    224 }
    225 
    226 static __inline int
    227 _citrus_BIG5_check2(_BIG5EncodingInfo *ei, u_int c)
    228 {
    229 	_DIAGASSERT(ei != NULL);
    230 
    231 	return (ei->cell[c & 0xFF] & 0x2) ? 1 : 0;
    232 }
    233 
    234 static __inline int
    235 _citrus_BIG5_check_excludes(_BIG5EncodingInfo *ei, wint_kuten_t c)
    236 {
    237 	_BIG5Exclude *exclude;
    238 
    239 	_DIAGASSERT(ei != NULL);
    240 
    241 	TAILQ_FOREACH(exclude, &ei->excludes, entry) {
    242 		if (c >= exclude->start && c <= exclude->end)
    243 			return EILSEQ;
    244 	}
    245 	return 0;
    246 }
    247 
    248 static int
    249 _citrus_BIG5_fill_rowcol(void * __restrict ctx, const char * __restrict s,
    250 	uint64_t start, uint64_t end)
    251 {
    252 	_BIG5EncodingInfo *ei;
    253 	int i;
    254 	uint64_t n;
    255 
    256 	_DIAGASSERT(ctx != NULL);
    257 
    258 	if (start > 0xFF || end > 0xFF)
    259 		return EINVAL;
    260 	ei = (_BIG5EncodingInfo *)ctx;
    261 	i = strcmp("row", s) ? 1 : 0;
    262 	i = 1 << i;
    263 	for (n = start; n <= end; ++n)
    264 		ei->cell[n & 0xFF] |= i;
    265 	return 0;
    266 }
    267 
    268 static int
    269 /*ARGSUSED*/
    270 _citrus_BIG5_fill_excludes(void * __restrict ctx, const char * __restrict s,
    271 	uint64_t start, uint64_t end)
    272 {
    273 	_BIG5EncodingInfo *ei;
    274 	_BIG5Exclude *exclude;
    275 
    276 	_DIAGASSERT(ctx != NULL);
    277 
    278 	if (start > 0xFFFF || end > 0xFFFF)
    279 		return EINVAL;
    280 	ei = (_BIG5EncodingInfo *)ctx;
    281 	exclude = TAILQ_LAST(&ei->excludes, _BIG5ExcludeList);
    282 	if (exclude != NULL && (wint_kuten_t)start <= exclude->end)
    283 		return EINVAL;
    284 	exclude = (void *)malloc(sizeof(*exclude));
    285 	if (exclude == NULL)
    286 		return ENOMEM;
    287 	exclude->start = (wint_kuten_t)start;
    288 	exclude->end = (wint_kuten_t)end;
    289 	TAILQ_INSERT_TAIL(&ei->excludes, exclude, entry);
    290 
    291 	return 0;
    292 }
    293 
    294 static const _citrus_prop_hint_t root_hints[] = {
    295     _CITRUS_PROP_HINT_NUM("row", &_citrus_BIG5_fill_rowcol),
    296     _CITRUS_PROP_HINT_NUM("col", &_citrus_BIG5_fill_rowcol),
    297     _CITRUS_PROP_HINT_NUM("excludes", &_citrus_BIG5_fill_excludes),
    298     _CITRUS_PROP_HINT_END
    299 };
    300 
    301 static void
    302 /*ARGSUSED*/
    303 _citrus_BIG5_encoding_module_uninit(_BIG5EncodingInfo *ei)
    304 {
    305 	_BIG5Exclude *exclude;
    306 
    307 	_DIAGASSERT(ei != NULL);
    308 
    309 	while ((exclude = TAILQ_FIRST(&ei->excludes)) != NULL) {
    310 		TAILQ_REMOVE(&ei->excludes, exclude, entry);
    311 		free(exclude);
    312 	}
    313 }
    314 
    315 static int
    316 /*ARGSUSED*/
    317 _citrus_BIG5_encoding_module_init(_BIG5EncodingInfo * __restrict ei,
    318 				  const void * __restrict var, size_t lenvar)
    319 {
    320 	int err;
    321 	const char *s;
    322 
    323 	_DIAGASSERT(ei != NULL);
    324 
    325 	memset((void *)ei, 0, sizeof(*ei));
    326 	TAILQ_INIT(&ei->excludes);
    327 
    328 	if (lenvar > 0 && var != NULL) {
    329 		s = _bcs_skip_ws_len((const char *)var, &lenvar);
    330 		if (lenvar > 0 && *s != '\0') {
    331 			err = _citrus_prop_parse_variable(
    332 			    root_hints, (void *)ei, s, lenvar);
    333 			if (err == 0)
    334 				return 0;
    335 
    336 			_citrus_BIG5_encoding_module_uninit(ei);
    337 			memset((void *)ei, 0, sizeof(*ei));
    338 			TAILQ_INIT(&ei->excludes);
    339 		}
    340 	}
    341 
    342 	/* fallback Big5-1984, for backward compatibility. */
    343 	_citrus_BIG5_fill_rowcol(ei, "row", 0xA1, 0xFE);
    344 	_citrus_BIG5_fill_rowcol(ei, "col", 0x40, 0x7E);
    345 	_citrus_BIG5_fill_rowcol(ei, "col", 0xA1, 0xFE);
    346 
    347 	return 0;
    348 }
    349 
    350 static int
    351 /*ARGSUSED*/
    352 _citrus_BIG5_mbrtowc_priv(_BIG5EncodingInfo * __restrict ei,
    353 			  wchar_ucs4_t * __restrict pwc,
    354 			  const char ** __restrict s, size_t n,
    355 			  _BIG5State * __restrict psenc,
    356 			  size_t * __restrict nresult)
    357 {
    358 	wchar_kuten_t wchar;
    359 	int c;
    360 	int chlenbak;
    361 	const char *s0;
    362 
    363 	_DIAGASSERT(nresult != 0);
    364 	_DIAGASSERT(ei != NULL);
    365 	_DIAGASSERT(psenc != NULL);
    366 	_DIAGASSERT(s != NULL && *s != NULL);
    367 
    368 	s0 = *s;
    369 
    370 	if (s0 == NULL) {
    371 		_citrus_BIG5_init_state(ei, psenc);
    372 		*nresult = 0;
    373 		return (0);
    374 	}
    375 
    376 	chlenbak = psenc->chlen;
    377 
    378 	/* make sure we have the first byte in the buffer */
    379 	switch (psenc->chlen) {
    380 	case 0:
    381 		if (n < 1)
    382 			goto restart;
    383 		psenc->ch[0] = *s0++;
    384 		psenc->chlen = 1;
    385 		n--;
    386 		break;
    387 	case 1:
    388 		break;
    389 	default:
    390 		/* illegal state */
    391 		goto ilseq;
    392 	}
    393 
    394 	c = _citrus_BIG5_check(ei, psenc->ch[0] & 0xff);
    395 	if (c == 0)
    396 		goto ilseq;
    397 	while (psenc->chlen < c) {
    398 		if (n < 1) {
    399 			goto restart;
    400 		}
    401 		psenc->ch[psenc->chlen] = *s0++;
    402 		psenc->chlen++;
    403 		n--;
    404 	}
    405 
    406 	switch (c) {
    407 	case 1:
    408 		wchar = psenc->ch[0] & 0xff;
    409 		break;
    410 	case 2:
    411 		if (!_citrus_BIG5_check2(ei, psenc->ch[1] & 0xff))
    412 			goto ilseq;
    413 		wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff);
    414 		break;
    415 	default:
    416 		/* illegal state */
    417 		goto ilseq;
    418 	}
    419 
    420 	if (_citrus_BIG5_check_excludes(ei, (wint_kuten_t)wchar) != 0)
    421 		goto ilseq;
    422 
    423 	*s = s0;
    424 	psenc->chlen = 0;
    425 	if (pwc)
    426 		_citrus_BIG5_kt2ucs(ei, pwc, wchar);
    427 	if (!wchar)
    428 		*nresult = 0;
    429 	else
    430 		*nresult = c - chlenbak;
    431 
    432 	return (0);
    433 
    434 ilseq:
    435 	psenc->chlen = 0;
    436 	*nresult = (size_t)-1;
    437 	return (EILSEQ);
    438 
    439 restart:
    440 	*s = s0;
    441 	*nresult = (size_t)-2;
    442 	return (0);
    443 }
    444 
    445 static int
    446 /*ARGSUSED*/
    447 _citrus_BIG5_wcrtomb_priv(_BIG5EncodingInfo * __restrict ei,
    448 			  char * __restrict s,
    449 			  size_t n, wchar_ucs4_t wc, _BIG5State * __restrict psenc,
    450 			  size_t * __restrict nresult)
    451 {
    452 	size_t l, ret;
    453 
    454 	_DIAGASSERT(ei != NULL);
    455 	_DIAGASSERT(nresult != 0);
    456 	_DIAGASSERT(s != NULL);
    457 
    458 	_citrus_BIG5_ucs2kt(ei, &wc, wc);
    459 
    460 	/* check invalid sequence */
    461 	if (wc & ~0xffff ||
    462 	    _citrus_BIG5_check_excludes(ei, (wint_kuten_t)wc) != 0) {
    463 		ret = EILSEQ;
    464 		goto err;
    465 	}
    466 
    467 	if (wc & 0x8000) {
    468 		if (_citrus_BIG5_check(ei, (wc >> 8) & 0xff) != 2 ||
    469 		    !_citrus_BIG5_check2(ei, wc & 0xff)) {
    470 			ret = EILSEQ;
    471 			goto err;
    472 		}
    473 		l = 2;
    474 	} else {
    475 		if (wc & ~0xff || !_citrus_BIG5_check(ei, wc & 0xff)) {
    476 			ret = EILSEQ;
    477 			goto err;
    478 		}
    479 		l = 1;
    480 	}
    481 
    482 	if (n < l) {
    483 		/* bound check failure */
    484 		ret = E2BIG;
    485 		goto err;
    486 	}
    487 
    488 	if (l == 2) {
    489 		s[0] = (wc >> 8) & 0xff;
    490 		s[1] = wc & 0xff;
    491 	} else
    492 		s[0] = wc & 0xff;
    493 
    494 	*nresult = l;
    495 
    496 	return 0;
    497 
    498 err:
    499 	*nresult = (size_t)-1;
    500 	return ret;
    501 }
    502 
    503 static int
    504 /*ARGSUSED*/
    505 _citrus_BIG5_stdenc_wctocs(struct _citrus_stdenc *ce,
    506 			   _csid_t * __restrict csid,
    507 			   _index_t * __restrict idx, wchar_kuten_t wc)
    508 {
    509 
    510 	_DIAGASSERT(csid != NULL && idx != NULL);
    511 
    512 	*csid = (wc < 0x100) ? 0 : 1;
    513 	*idx = (_index_t)wc;
    514 
    515 	return 0;
    516 }
    517 
    518 static int
    519 /*ARGSUSED*/
    520 _citrus_BIG5_stdenc_cstowc(struct _citrus_stdenc *ce,
    521 			   wchar_kuten_t * __restrict wc,
    522 			   _csid_t csid, _index_t idx)
    523 {
    524 	_DIAGASSERT(wc != NULL);
    525 
    526 	switch (csid) {
    527 	case 0:
    528 	case 1:
    529 		*wc = (wchar_kuten_t)idx;
    530 		break;
    531 	default:
    532 		return EILSEQ;
    533 	}
    534 
    535 	return 0;
    536 }
    537 
    538 static __inline int
    539 /*ARGSUSED*/
    540 _citrus_BIG5_stdenc_get_state_desc_generic(_BIG5EncodingInfo * __restrict ei,
    541 					   _BIG5State * __restrict psenc,
    542 					   int * __restrict rstate)
    543 {
    544 
    545 	if (psenc->chlen == 0)
    546 		*rstate = _STDENC_SDGEN_INITIAL;
    547 	else
    548 		*rstate = _STDENC_SDGEN_INCOMPLETE_CHAR;
    549 
    550 	return 0;
    551 }
    552 
    553 /* ----------------------------------------------------------------------
    554  * public interface for ctype
    555  */
    556 
    557 _CITRUS_CTYPE_DECLS(BIG5);
    558 _CITRUS_CTYPE_DEF_OPS(BIG5);
    559 
    560 #include "citrus_ctype_template.h"
    561 
    562 
    563 /* ----------------------------------------------------------------------
    564  * public interface for stdenc
    565  */
    566 
    567 _CITRUS_STDENC_DECLS(BIG5);
    568 _CITRUS_STDENC_DEF_OPS(BIG5);
    569 
    570 #include "citrus_stdenc_template.h"
    571