Home | History | Annotate | Line # | Download | only in modules
      1 /*	$NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $	*/
      2 
      3 /*-
      4  * Copyright (c)2003 Citrus Project,
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 #if defined(LIBC_SCCS) && !defined(lint)
     31 __RCSID("$NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $");
     32 #endif /* LIBC_SCCS and not lint */
     33 
     34 #include <assert.h>
     35 #include <errno.h>
     36 #include <limits.h>
     37 #include <stdio.h>
     38 #include <stdlib.h>
     39 #include <string.h>
     40 #include <machine/endian.h>
     41 #include <sys/queue.h>
     42 
     43 #include "citrus_namespace.h"
     44 #include "citrus_types.h"
     45 #include "citrus_module.h"
     46 #include "citrus_region.h"
     47 #include "citrus_mmap.h"
     48 #include "citrus_hash.h"
     49 #include "citrus_iconv.h"
     50 #include "citrus_stdenc.h"
     51 #include "citrus_mapper.h"
     52 #include "citrus_csmapper.h"
     53 #include "citrus_memstream.h"
     54 #include "citrus_iconv_std.h"
     55 #include "citrus_esdb.h"
     56 
     57 /* ---------------------------------------------------------------------- */
     58 
     59 _CITRUS_ICONV_DECLS(iconv_std);
     60 _CITRUS_ICONV_DEF_OPS(iconv_std);
     61 
     62 
     63 /* ---------------------------------------------------------------------- */
     64 
     65 int
     66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
     67 			       u_int32_t expected_version)
     68 {
     69 	if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
     70 		return (EINVAL);
     71 
     72 	memcpy(ops, &_citrus_iconv_std_iconv_ops,
     73 	       sizeof(_citrus_iconv_std_iconv_ops));
     74 
     75 	return (0);
     76 }
     77 
     78 /* ---------------------------------------------------------------------- */
     79 
     80 /*
     81  * convenience routines for stdenc.
     82  */
     83 static __inline void
     84 save_encoding_state(struct _citrus_iconv_std_encoding *se)
     85 {
     86 	if (se->se_ps)
     87 		memcpy(se->se_pssaved, se->se_ps,
     88 		       _stdenc_get_state_size(se->se_handle));
     89 }
     90 
     91 static __inline void
     92 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
     93 {
     94 	if (se->se_ps)
     95 		memcpy(se->se_ps, se->se_pssaved,
     96 		       _stdenc_get_state_size(se->se_handle));
     97 }
     98 
     99 static __inline void
    100 init_encoding_state(struct _citrus_iconv_std_encoding *se)
    101 {
    102 	if (se->se_ps)
    103 		_stdenc_init_state(se->se_handle, se->se_ps);
    104 }
    105 
    106 static __inline int
    107 mbtocsx(struct _citrus_iconv_std_encoding *se,
    108 	_csid_t *csid, _index_t *idx, const char **s, size_t n,
    109 	size_t *nresult)
    110 {
    111 	return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
    112 			      nresult);
    113 }
    114 
    115 static __inline int
    116 cstombx(struct _citrus_iconv_std_encoding *se,
    117 	char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
    118 {
    119 	return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
    120 			      nresult);
    121 }
    122 
    123 static __inline int
    124 wctombx(struct _citrus_iconv_std_encoding *se,
    125 	char *s, size_t n, _wc_t wc, size_t *nresult)
    126 {
    127 	return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
    128 }
    129 
    130 static __inline int
    131 put_state_resetx(struct _citrus_iconv_std_encoding *se,
    132 		 char *s, size_t n, size_t *nresult)
    133 {
    134 	return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult);
    135 }
    136 
    137 static __inline int
    138 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate)
    139 {
    140 	int ret;
    141 	struct _stdenc_state_desc ssd;
    142 
    143 	ret = _stdenc_get_state_desc(se->se_handle, se->se_ps,
    144 				     _STDENC_SDID_GENERIC, &ssd);
    145 	if (!ret)
    146 		*rstate = ssd.u.generic.state;
    147 
    148 	return ret;
    149 }
    150 
    151 /*
    152  * init encoding context
    153  */
    154 static int
    155 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
    156 	      void *ps1, void *ps2)
    157 {
    158 	int ret = -1;
    159 
    160 	se->se_handle = cs;
    161 	se->se_ps = ps1;
    162 	se->se_pssaved = ps2;
    163 
    164 	if (se->se_ps)
    165 		ret = _stdenc_init_state(cs, se->se_ps);
    166 	if (!ret && se->se_pssaved)
    167 		ret = _stdenc_init_state(cs, se->se_pssaved);
    168 
    169 	return ret;
    170 }
    171 
    172 static int
    173 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
    174 	      unsigned long *rnorm)
    175 {
    176 	int ret;
    177 	struct _csmapper *cm;
    178 
    179 	ret = _csmapper_open(&cm, src, dst, 0, rnorm);
    180 	if (ret)
    181 		return ret;
    182 	if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
    183 	    _csmapper_get_state_size(cm) != 0) {
    184 		_csmapper_close(cm);
    185 		return EINVAL;
    186 	}
    187 
    188 	*rcm = cm;
    189 
    190 	return 0;
    191 }
    192 
    193 static void
    194 close_dsts(struct _citrus_iconv_std_dst_list *dl)
    195 {
    196 	struct _citrus_iconv_std_dst *sd;
    197 
    198 	while ((sd=TAILQ_FIRST(dl)) != NULL) {
    199 		TAILQ_REMOVE(dl, sd, sd_entry);
    200 		_csmapper_close(sd->sd_mapper);
    201 		free(sd);
    202 	}
    203 }
    204 
    205 static int
    206 open_dsts(struct _citrus_iconv_std_dst_list *dl,
    207 	  const struct _esdb_charset *ec, const struct _esdb *dbdst)
    208 {
    209 	int i, ret;
    210 	struct _citrus_iconv_std_dst *sd, *sdtmp;
    211 	unsigned long norm;
    212 
    213 	sd = malloc(sizeof(*sd));
    214 	if (sd == NULL)
    215 		return errno;
    216 
    217 	for (i=0; i<dbdst->db_num_charsets; i++) {
    218 		ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
    219 				    dbdst->db_charsets[i].ec_csname, &norm);
    220 		if (ret == 0) {
    221 			sd->sd_csid = dbdst->db_charsets[i].ec_csid;
    222 			sd->sd_norm = norm;
    223 			/* insert this mapper by sorted order. */
    224 			TAILQ_FOREACH(sdtmp, dl, sd_entry) {
    225 				if (sdtmp->sd_norm > norm) {
    226 					TAILQ_INSERT_BEFORE(sdtmp, sd,
    227 							    sd_entry);
    228 					sd = NULL;
    229 					break;
    230 				}
    231 			}
    232 			if (sd)
    233 				TAILQ_INSERT_TAIL(dl, sd, sd_entry);
    234 			sd = malloc(sizeof(*sd));
    235 			if (sd == NULL) {
    236 				ret = errno;
    237 				close_dsts(dl);
    238 				return ret;
    239 			}
    240 		} else if (ret != ENOENT) {
    241 			close_dsts(dl);
    242 			free(sd);
    243 			return ret;
    244 		}
    245 	}
    246 	free(sd);
    247 	return 0;
    248 }
    249 
    250 static void
    251 close_srcs(struct _citrus_iconv_std_src_list *sl)
    252 {
    253 	struct _citrus_iconv_std_src *ss;
    254 
    255 	while ((ss=TAILQ_FIRST(sl)) != NULL) {
    256 		TAILQ_REMOVE(sl, ss, ss_entry);
    257 		close_dsts(&ss->ss_dsts);
    258 		free(ss);
    259 	}
    260 }
    261 
    262 static int
    263 open_srcs(struct _citrus_iconv_std_src_list *sl,
    264 	  const struct _esdb *dbsrc, const struct _esdb *dbdst)
    265 {
    266 	int i, ret, count = 0;
    267 	struct _citrus_iconv_std_src *ss;
    268 
    269 	ss = malloc(sizeof(*ss));
    270 	if (ss == NULL)
    271 		return errno;
    272 
    273 	TAILQ_INIT(&ss->ss_dsts);
    274 
    275 	for (i=0; i<dbsrc->db_num_charsets; i++) {
    276 		ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
    277 		if (ret)
    278 			goto err;
    279 		if (!TAILQ_EMPTY(&ss->ss_dsts)) {
    280 			ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
    281 			TAILQ_INSERT_TAIL(sl, ss, ss_entry);
    282 			ss = malloc(sizeof(*ss));
    283 			if (ss == NULL) {
    284 				ret = errno;
    285 				goto err;
    286 			}
    287 			count++;
    288 			TAILQ_INIT(&ss->ss_dsts);
    289 		}
    290 	}
    291 	free(ss);
    292 
    293 	return count ? 0 : ENOENT;
    294 
    295 err:
    296 	free(ss);
    297 	close_srcs(sl);
    298 	return ret;
    299 }
    300 
    301 /* do convert a character */
    302 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
    303 static int
    304 /*ARGSUSED*/
    305 do_conv(const struct _citrus_iconv_std_shared *is,
    306 	struct _citrus_iconv_std_context *sc, _csid_t *csid, _index_t *idx)
    307 {
    308 	_index_t tmpidx;
    309 	int ret;
    310 	struct _citrus_iconv_std_src *ss;
    311 	struct _citrus_iconv_std_dst *sd;
    312 
    313 	TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
    314 		if (ss->ss_csid == *csid) {
    315 			TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
    316 				ret = _csmapper_convert(sd->sd_mapper,
    317 							&tmpidx, *idx, NULL);
    318 				switch (ret) {
    319 				case _MAPPER_CONVERT_SUCCESS:
    320 					*csid = sd->sd_csid;
    321 					*idx = tmpidx;
    322 					return 0;
    323 				case _MAPPER_CONVERT_NONIDENTICAL:
    324 					break;
    325 				case _MAPPER_CONVERT_SRC_MORE:
    326 					/*FALLTHROUGH*/
    327 				case _MAPPER_CONVERT_DST_MORE:
    328 					/*FALLTHROUGH*/
    329 				case _MAPPER_CONVERT_FATAL:
    330 					return EINVAL;
    331 				case _MAPPER_CONVERT_ILSEQ:
    332 					return EILSEQ;
    333 				}
    334 			}
    335 			break;
    336 		}
    337 	}
    338 
    339 	return E_NO_CORRESPONDING_CHAR;
    340 }
    341 /* ---------------------------------------------------------------------- */
    342 
    343 static int
    344 /*ARGSUSED*/
    345 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
    346 				    const char * __restrict curdir,
    347 				    const char * __restrict src,
    348 				    const char * __restrict dst,
    349 				    const void * __restrict var, size_t lenvar)
    350 {
    351 	int ret;
    352 	struct _citrus_iconv_std_shared *is;
    353 	struct _citrus_esdb esdbsrc, esdbdst;
    354 
    355 	is = malloc(sizeof(*is));
    356 	if (is==NULL) {
    357 		ret = errno;
    358 		goto err0;
    359 	}
    360 	ret = _citrus_esdb_open(&esdbsrc, src);
    361 	if (ret)
    362 		goto err1;
    363 	ret = _citrus_esdb_open(&esdbdst, dst);
    364 	if (ret)
    365 		goto err2;
    366 	ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
    367 			   esdbsrc.db_variable, esdbsrc.db_len_variable);
    368 	if (ret)
    369 		goto err3;
    370 	ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
    371 			   esdbdst.db_variable, esdbdst.db_len_variable);
    372 	if (ret)
    373 		goto err4;
    374 	is->is_use_invalid = esdbdst.db_use_invalid;
    375 	is->is_invalid = esdbdst.db_invalid;
    376 
    377 	TAILQ_INIT(&is->is_srcs);
    378 	ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
    379 	if (ret)
    380 		goto err5;
    381 
    382 	_esdb_close(&esdbsrc);
    383 	_esdb_close(&esdbdst);
    384 	ci->ci_closure = is;
    385 
    386 	return 0;
    387 
    388 err5:
    389 	_stdenc_close(is->is_dst_encoding);
    390 err4:
    391 	_stdenc_close(is->is_src_encoding);
    392 err3:
    393 	_esdb_close(&esdbdst);
    394 err2:
    395 	_esdb_close(&esdbsrc);
    396 err1:
    397 	free(is);
    398 err0:
    399 	return ret;
    400 }
    401 
    402 static void
    403 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
    404 {
    405 	struct _citrus_iconv_std_shared *is = ci->ci_closure;
    406 
    407 	if (is == NULL)
    408 		return;
    409 
    410 	_stdenc_close(is->is_src_encoding);
    411 	_stdenc_close(is->is_dst_encoding);
    412 	close_srcs(&is->is_srcs);
    413 	free(is);
    414 }
    415 
    416 static int
    417 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
    418 {
    419 	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
    420 	struct _citrus_iconv_std_context *sc;
    421 	size_t szpssrc, szpsdst, sz;
    422 	char *ptr;
    423 
    424 	szpssrc = _stdenc_get_state_size(is->is_src_encoding);
    425 	szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
    426 
    427 	sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
    428 	sc = malloc(sz);
    429 	if (sc == NULL)
    430 		return errno;
    431 
    432 	ptr = (char *)&sc[1];
    433 	if (szpssrc)
    434 		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
    435 			      ptr, ptr+szpssrc);
    436 	else
    437 		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
    438 			      NULL, NULL);
    439 	ptr += szpssrc*2;
    440 	if (szpsdst)
    441 		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
    442 			      ptr, ptr+szpsdst);
    443 	else
    444 		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
    445 			      NULL, NULL);
    446 
    447 	cv->cv_closure = (void *)sc;
    448 
    449 	return 0;
    450 }
    451 
    452 static void
    453 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
    454 {
    455 	free(cv->cv_closure);
    456 }
    457 
    458 static int
    459 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
    460 				const char * __restrict * __restrict in,
    461 				size_t * __restrict inbytes,
    462 				char * __restrict * __restrict out,
    463 				size_t * __restrict outbytes, u_int32_t flags,
    464 				size_t * __restrict invalids)
    465 {
    466 	const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
    467 	struct _citrus_iconv_std_context *sc = cv->cv_closure;
    468 	_index_t idx;
    469 	_csid_t csid;
    470 	int ret, state;
    471 	size_t szrin, szrout;
    472 	size_t inval;
    473 	const char *tmpin;
    474 
    475 	inval = 0;
    476 	if (in==NULL || *in==NULL) {
    477 		/* special cases */
    478 		if (out!=NULL && *out!=NULL) {
    479 			/* init output state and store the shift sequence */
    480 			save_encoding_state(&sc->sc_src_encoding);
    481 			save_encoding_state(&sc->sc_dst_encoding);
    482 			szrout = 0;
    483 
    484 			ret = put_state_resetx(&sc->sc_dst_encoding,
    485 					       *out, *outbytes,
    486 					       &szrout);
    487 			if (ret)
    488 				goto err;
    489 
    490 			if (szrout == (size_t)-2) {
    491 				/* too small to store the character */
    492 				ret = EINVAL;
    493 				goto err;
    494 			}
    495 			*out += szrout;
    496 			*outbytes -= szrout;
    497 		} else
    498 			/* otherwise, discard the shift sequence */
    499 			init_encoding_state(&sc->sc_dst_encoding);
    500 		init_encoding_state(&sc->sc_src_encoding);
    501 		*invalids = 0;
    502 		return 0;
    503 	}
    504 
    505 	/* normal case */
    506 	for (;;) {
    507 		if (*inbytes==0) {
    508 			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
    509 			if (state == _STDENC_SDGEN_INITIAL ||
    510 			    state == _STDENC_SDGEN_STABLE)
    511 				break;
    512 		}
    513 
    514 		/* save the encoding states for the error recovery */
    515 		save_encoding_state(&sc->sc_src_encoding);
    516 		save_encoding_state(&sc->sc_dst_encoding);
    517 
    518 		/* mb -> csid/index */
    519 		tmpin = *in;
    520 		szrin = szrout = 0;
    521 		ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx,
    522 			      &tmpin, *inbytes, &szrin);
    523 		if (ret)
    524 			goto err;
    525 
    526 		if (szrin == (size_t)-2) {
    527 			/* incompleted character */
    528 			ret = get_state_desc_gen(&sc->sc_src_encoding, &state);
    529 			if (ret) {
    530 				ret = EINVAL;
    531 				goto err;
    532 			}
    533 			switch (state) {
    534 			case _STDENC_SDGEN_INITIAL:
    535 			case _STDENC_SDGEN_STABLE:
    536 				/* fetch shift sequences only. */
    537 				goto next;
    538 			}
    539 			ret = EINVAL;
    540 			goto err;
    541 		}
    542 		/* convert the character */
    543 		ret = do_conv(is, sc, &csid, &idx);
    544 		if (ret) {
    545 			if (ret == E_NO_CORRESPONDING_CHAR) {
    546 				inval++;
    547 				szrout = 0;
    548 				if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
    549 				    is->is_use_invalid) {
    550 					ret = wctombx(&sc->sc_dst_encoding,
    551 						      *out, *outbytes,
    552 						      is->is_invalid,
    553 						      &szrout);
    554 					if (ret)
    555 						goto err;
    556 				}
    557 				goto next;
    558 			} else {
    559 				goto err;
    560 			}
    561 		}
    562 		/* csid/index -> mb */
    563 		ret = cstombx(&sc->sc_dst_encoding,
    564 			      *out, *outbytes, csid, idx, &szrout);
    565 		if (ret)
    566 			goto err;
    567 next:
    568 		_DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
    569 		*inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
    570 		*in = tmpin;
    571 		*outbytes -= szrout;
    572 		*out += szrout;
    573 	}
    574 	*invalids = inval;
    575 
    576 	return 0;
    577 
    578 err:
    579 	restore_encoding_state(&sc->sc_src_encoding);
    580 	restore_encoding_state(&sc->sc_dst_encoding);
    581 err_norestore:
    582 	*invalids = inval;
    583 
    584 	return ret;
    585 }
    586