Home | History | Annotate | Line # | Download | only in modules
citrus_gbk2k.c revision 1.1
      1 /* $NetBSD: citrus_gbk2k.c,v 1.1 2003/03/25 18:26:54 tshiozak Exp $ */
      2 
      3 /*-
      4  * Copyright (c)2003 Citrus Project,
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 #if defined(LIBC_SCCS) && !defined(lint)
     31 __RCSID("$NetBSD: citrus_gbk2k.c,v 1.1 2003/03/25 18:26:54 tshiozak Exp $");
     32 #endif /* LIBC_SCCS and not lint */
     33 
     34 #include <assert.h>
     35 #include <errno.h>
     36 #include <string.h>
     37 #include <stdio.h>
     38 #include <stdlib.h>
     39 #include <stddef.h>
     40 #include <locale.h>
     41 #include <wchar.h>
     42 #include <sys/types.h>
     43 #include <limits.h>
     44 #include "citrus_module.h"
     45 #include "citrus_ctype.h"
     46 #include "citrus_gbk2k.h"
     47 
     48 
     49 /* ----------------------------------------------------------------------
     50  * private stuffs used by templates
     51  */
     52 
     53 typedef struct _GBK2KState {
     54 	char ch[4];
     55 	int chlen;
     56 } _GBK2KState;
     57 
     58 typedef struct {
     59 	int dummy;
     60 } _GBK2KEncodingInfo;
     61 
     62 typedef struct {
     63 	_GBK2KEncodingInfo	ei;
     64 	struct {
     65 		/* for future multi-locale facility */
     66 		_GBK2KState	s_mblen;
     67 		_GBK2KState	s_mbrlen;
     68 		_GBK2KState	s_mbrtowc;
     69 		_GBK2KState	s_mbtowc;
     70 		_GBK2KState	s_mbsrtowcs;
     71 		_GBK2KState	s_wcrtomb;
     72 		_GBK2KState	s_wcsrtombs;
     73 		_GBK2KState	s_wctomb;
     74 	} states;
     75 } _GBK2KCTypeInfo;
     76 
     77 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
     78 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
     79 
     80 #define _FUNCNAME(m)			_citrus_GBK2K_##m
     81 #define _ENCODING_INFO			_GBK2KEncodingInfo
     82 #define _CTYPE_INFO			_GBK2KCTypeInfo
     83 #define _ENCODING_STATE			_GBK2KState
     84 #define _ENCODING_MB_CUR_MAX(_ei_)	4
     85 #define _ENCODING_IS_STATE_DEPENDENT	0
     86 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
     87 
     88 static __inline void
     89 /*ARGSUSED*/
     90 _citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei,
     91 			 _GBK2KState * __restrict s)
     92 {
     93 	memset(s, 0, sizeof(*s));
     94 }
     95 
     96 static __inline void
     97 /*ARGSUSED*/
     98 _citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei,
     99 			 void * __restrict pspriv,
    100 			 const _GBK2KState * __restrict s)
    101 {
    102 	memcpy(pspriv, (const void *)s, sizeof(*s));
    103 }
    104 
    105 static __inline void
    106 /*ARGSUSED*/
    107 _citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei,
    108 			   _GBK2KState * __restrict s,
    109 			   const void * __restrict pspriv)
    110 {
    111 	memcpy((void *)s, pspriv, sizeof(*s));
    112 }
    113 
    114 static  __inline int
    115 _mb_singlebyte(int c)
    116 {
    117 	c &= 0xff;
    118 	return (c <= 0x7f);
    119 }
    120 
    121 static __inline int
    122 _mb_leadbyte(int c)
    123 {
    124 	c &= 0xff;
    125 	return (0x81 <= c && c <= 0xfe);
    126 }
    127 
    128 static __inline int
    129 _mb_trailbyte(int c)
    130 {
    131 	c &= 0xff;
    132 	return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe));
    133 }
    134 
    135 static __inline int
    136 _mb_surrogate(int c)
    137 {
    138 	c &= 0xff;
    139 	return (0x30 <= c && c <= 0x39);
    140 }
    141 
    142 static __inline int
    143 _mb_count(wchar_t v)
    144 {
    145 	u_int32_t c;
    146 
    147 	c = (u_int32_t)v; /* XXX */
    148 	if (!(c & 0xffffff00))
    149 		return (1);
    150 	if (!(c & 0xffff0000))
    151 		return (2);
    152 	return (4);
    153 }
    154 
    155 #define	_PSENC		(psenc->ch[psenc->chlen - 1])
    156 #define	_PUSH_PSENC(c)	(psenc->ch[psenc->chlen++] = (c))
    157 
    158 static int
    159 _citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei,
    160 			   wchar_t * __restrict pwc,
    161 			   const char ** __restrict s, size_t n,
    162 			   _GBK2KState * __restrict psenc,
    163 			   size_t * __restrict nresult)
    164 {
    165 	int chlenbak, len;
    166 	const char *s0, *s1;
    167 	wchar_t wc;
    168 
    169 	_DIAGASSERT(ei != NULL);
    170 	/* pwc may be NULL */
    171 	_DIAGASSERT(s != NULL);
    172 	_DIAGASSERT(psenc != NULL);
    173 
    174 	s0 = *s;
    175 
    176 	if (s0 == NULL) {
    177 		/* _citrus_GBK2K_init_state(ei, psenc); */
    178 		psenc->chlen = 0;
    179 		*nresult = 0;
    180 		return (0);
    181 	}
    182 
    183 	chlenbak = psenc->chlen;
    184 
    185 	switch (psenc->chlen) {
    186 	case 3:
    187 		if (!_mb_leadbyte (_PSENC))
    188 			goto invalid;
    189 	/* FALLTHROUGH */
    190 	case 2:
    191 		if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC))
    192 			goto invalid;
    193 	/* FALLTHROUGH */
    194 	case 1:
    195 		if (!_mb_leadbyte (_PSENC))
    196 			goto invalid;
    197 	/* FALLTHOROUGH */
    198 	case 0:
    199 		break;
    200 	default:
    201 		goto invalid;
    202 	}
    203 
    204 	for (;;) {
    205 		if (n-- < 1)
    206 			goto restart;
    207 
    208 		_PUSH_PSENC(*s0++);
    209 
    210 		switch (psenc->chlen) {
    211 		case 1:
    212 			if (_mb_singlebyte(_PSENC))
    213 				goto convert;
    214 			if (_mb_leadbyte  (_PSENC))
    215 				continue;
    216 			goto ilseq;
    217 		case 2:
    218 			if (_mb_trailbyte (_PSENC))
    219 				goto convert;
    220 			if (_mb_surrogate (_PSENC))
    221 				continue;
    222 			goto ilseq;
    223 		case 3:
    224 			if (_mb_leadbyte  (_PSENC))
    225 				continue;
    226 			goto ilseq;
    227 		case 4:
    228 			if (_mb_surrogate (_PSENC))
    229 				goto convert;
    230 			goto ilseq;
    231 		default:
    232 		/* NOT REACHED */
    233 		}
    234 	}
    235 
    236 convert:
    237 	len = psenc->chlen;
    238 	s1  = &psenc->ch[0];
    239 	wc  = 0;
    240 	while (len-- > 0)
    241 		wc = (wc << 8) | (*s1++ & 0xff);
    242 
    243 	if (pwc != NULL)
    244 		*pwc = wc;
    245 	*s = s0;
    246 	*nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak;
    247 	/* _citrus_GBK2K_init_state(ei, psenc); */
    248 	psenc->chlen = 0;
    249 
    250 	return (0);
    251 
    252 restart:
    253 	*s = s0;
    254 	*nresult = (size_t)-2;
    255 
    256 	return (0);
    257 
    258 invalid:
    259 	return (EINVAL);
    260 
    261 ilseq:
    262 	*nresult = (size_t)-1;
    263 	return (EILSEQ);
    264 }
    265 
    266 static int
    267 _citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei,
    268 			   char * __restrict s, size_t n, wchar_t wc,
    269 			   _GBK2KState * __restrict psenc,
    270 			   size_t * __restrict nresult)
    271 {
    272 	int len;
    273 
    274 	_DIAGASSERT(ei != NULL);
    275 	_DIAGASSERT(s != NULL);
    276 	_DIAGASSERT(psenc != NULL);
    277 
    278 	if (psenc->chlen != 0)
    279 		goto invalid;
    280 
    281 	len = _mb_count(wc);
    282 	if (n < len)
    283 		goto ilseq;
    284 
    285 	switch (len) {
    286 	case 1:
    287 		if (!_mb_singlebyte(_PUSH_PSENC(wc     )))
    288 			goto ilseq;
    289 		break;
    290 	case 2:
    291 		if (!_mb_leadbyte  (_PUSH_PSENC(wc >> 8)) ||
    292 		    !_mb_trailbyte (_PUSH_PSENC(wc     )))
    293 			goto ilseq;
    294 		break;
    295 	case 4:
    296 		if (!_mb_leadbyte  (_PUSH_PSENC(wc >> 24)) ||
    297 		    !_mb_surrogate (_PUSH_PSENC(wc >> 16)) ||
    298 		    !_mb_leadbyte  (_PUSH_PSENC(wc >>  8)) ||
    299 		    !_mb_surrogate (_PUSH_PSENC(wc      )))
    300 			goto ilseq;
    301 		break;
    302 	default:
    303 	/* NOT REACHED */
    304 	}
    305 
    306 	_DIAGASSERT(len == psenc->chlen);
    307 
    308 	memcpy(s, psenc->ch, psenc->chlen);
    309 	*nresult = psenc->chlen;
    310 	/* _citrus_GBK2K_init_state(ei, psenc); */
    311 	psenc->chlen = 0;
    312 
    313 	return (0);
    314 
    315 invalid:
    316 	return (EINVAL);
    317 
    318 ilseq:
    319 	*nresult = (size_t)-1;
    320 	return (EILSEQ);
    321 }
    322 
    323 static int
    324 /*ARGSUSED*/
    325 _citrus_GBK2K_stdencoding_init(_GBK2KEncodingInfo * __restrict ei,
    326 			       const void * __restrict var, size_t lenvar)
    327 {
    328 	_DIAGASSERT(ei != NULL);
    329 
    330 	memset((void *)ei, 0, sizeof(*ei));
    331 	return (0);
    332 }
    333 
    334 static void
    335 /*ARGSUSED*/
    336 _citrus_GBK2K_stdencoding_uninit(_GBK2KEncodingInfo *ei)
    337 {
    338 }
    339 
    340 
    341 /* ----------------------------------------------------------------------
    342  * public interface for ctype
    343  */
    344 
    345 _CITRUS_CTYPE_DECLS(GBK2K);
    346 _CITRUS_CTYPE_DEF_OPS(GBK2K);
    347 
    348 #include "citrus_ctype_template.h"
    349