Home | History | Annotate | Line # | Download | only in modules
citrus_iconv_std.c revision 1.3
      1 /*	$NetBSD: citrus_iconv_std.c,v 1.3 2003/07/01 08:34:04 tshiozak Exp $	*/
      2 
      3 /*-
      4  * Copyright (c)2003 Citrus Project,
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 #if defined(LIBC_SCCS) && !defined(lint)
     31 __RCSID("$NetBSD: citrus_iconv_std.c,v 1.3 2003/07/01 08:34:04 tshiozak Exp $");
     32 #endif /* LIBC_SCCS and not lint */
     33 
     34 #include <assert.h>
     35 #include <errno.h>
     36 #include <limits.h>
     37 #include <stdio.h>
     38 #include <stdlib.h>
     39 #include <string.h>
     40 #include <sys/endian.h>
     41 #include <sys/queue.h>
     42 
     43 #include "citrus_namespace.h"
     44 #include "citrus_types.h"
     45 #include "citrus_module.h"
     46 #include "citrus_region.h"
     47 #include "citrus_mmap.h"
     48 #include "citrus_iconv.h"
     49 #include "citrus_stdenc.h"
     50 #include "citrus_hash.h"
     51 #include "citrus_mapper.h"
     52 #include "citrus_csmapper.h"
     53 #include "citrus_memstream.h"
     54 #include "citrus_iconv_std.h"
     55 #include "citrus_esdb.h"
     56 
     57 /* ---------------------------------------------------------------------- */
     58 
     59 _CITRUS_ICONV_DECLS(iconv_std);
     60 _CITRUS_ICONV_DEF_OPS(iconv_std);
     61 
     62 
     63 /* ---------------------------------------------------------------------- */
     64 
     65 int
     66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
     67 			       u_int32_t expected_version)
     68 {
     69 	if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
     70 		return (EINVAL);
     71 
     72 	memcpy(ops, &_citrus_iconv_std_iconv_ops,
     73 	       sizeof(_citrus_iconv_std_iconv_ops));
     74 
     75 	return (0);
     76 }
     77 
     78 /* ---------------------------------------------------------------------- */
     79 
     80 /*
     81  * convenience routines for stdenc.
     82  */
     83 static __inline void
     84 save_encoding_state(struct _citrus_iconv_std_encoding *se)
     85 {
     86 	if (se->se_ps)
     87 		memcpy(se->se_pssaved, se->se_ps,
     88 		       _stdenc_get_state_size(se->se_handle));
     89 }
     90 
     91 static __inline void
     92 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
     93 {
     94 	if (se->se_ps)
     95 		memcpy(se->se_ps, se->se_pssaved,
     96 		       _stdenc_get_state_size(se->se_handle));
     97 }
     98 
     99 static __inline void
    100 init_encoding_state(struct _citrus_iconv_std_encoding *se)
    101 {
    102 	if (se->se_ps)
    103 		_stdenc_init_state(se->se_handle, se->se_ps);
    104 }
    105 
    106 static __inline int
    107 mbtocsx(struct _citrus_iconv_std_encoding *se,
    108 	_csid_t *csid, _index_t *idx, const char **s, size_t n,
    109 	size_t *nresult)
    110 {
    111 	return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
    112 			      nresult);
    113 }
    114 
    115 static __inline int
    116 cstombx(struct _citrus_iconv_std_encoding *se,
    117 	char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
    118 {
    119 	return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
    120 			      nresult);
    121 }
    122 
    123 static __inline int
    124 wctombx(struct _citrus_iconv_std_encoding *se,
    125 	char *s, size_t n, _wc_t wc, size_t *nresult)
    126 {
    127 	return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
    128 }
    129 
    130 static __inline int
    131 put_state_resetx(struct _citrus_iconv_std_encoding *se,
    132 		 char *s, size_t n, size_t *nresult)
    133 {
    134 	return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult);
    135 }
    136 
    137 
    138 /*
    139  * open/close an encoding.
    140  */
    141 static __inline void
    142 close_encoding(struct _citrus_iconv_std_encoding *se)
    143 {
    144 	free(se->se_ps); se->se_ps = NULL;
    145 	free(se->se_pssaved); se->se_pssaved = NULL;
    146 }
    147 
    148 static __inline int
    149 open_encoding(struct _citrus_iconv_std_encoding *se, struct _esdb *db)
    150 {
    151 	int ret;
    152 
    153 	se->se_ps = se->se_pssaved = NULL;
    154 	ret = _stdenc_open(&se->se_handle, db->db_encname,
    155 			   db->db_variable, db->db_len_variable);
    156 	if (ret)
    157 		return ret;
    158 
    159 	if (_stdenc_get_state_size(se->se_handle) == 0)
    160 		return 0;
    161 
    162 	se->se_ps = malloc(_stdenc_get_state_size(se->se_handle));
    163 	if (se->se_ps == NULL) {
    164 		ret = errno;
    165 		goto err;
    166 	}
    167 	ret = _stdenc_init_state(se->se_handle, se->se_ps);
    168 	if (ret)
    169 		goto err;
    170 	se->se_pssaved = malloc(_stdenc_get_state_size(se->se_handle));
    171 	if (se->se_pssaved == NULL) {
    172 		ret = errno;
    173 		goto err;
    174 	}
    175 	ret = _stdenc_init_state(se->se_handle, se->se_pssaved);
    176 	if (ret)
    177 		goto err;
    178 	return 0;
    179 
    180 err:
    181 	close_encoding(se);
    182 	return ret;
    183 }
    184 
    185 static int
    186 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
    187 	      unsigned long *rnorm)
    188 {
    189 	int ret;
    190 	struct _csmapper *cm;
    191 
    192 	ret = _csmapper_open(&cm, src, dst, 0, rnorm);
    193 	if (ret)
    194 		return ret;
    195 	if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
    196 	    _csmapper_get_state_size(cm) != 0) {
    197 		_csmapper_close(cm);
    198 		return EINVAL;
    199 	}
    200 
    201 	*rcm = cm;
    202 
    203 	return 0;
    204 }
    205 
    206 static void
    207 close_dsts(struct _citrus_iconv_std_dst_list *dl)
    208 {
    209 	struct _citrus_iconv_std_dst *sd;
    210 
    211 	while ((sd=TAILQ_FIRST(dl)) != NULL) {
    212 		TAILQ_REMOVE(dl, sd, sd_entry);
    213 		_csmapper_close(sd->sd_mapper);
    214 		free(sd);
    215 	}
    216 }
    217 
    218 static int
    219 open_dsts(struct _citrus_iconv_std_dst_list *dl,
    220 	  struct _esdb_charset *ec, struct _esdb *dbdst)
    221 {
    222 	int i, ret;
    223 	struct _citrus_iconv_std_dst *sd, *sdtmp;
    224 	unsigned long norm;
    225 
    226 	sd = malloc(sizeof(*sd));
    227 	if (sd == NULL)
    228 		return errno;
    229 
    230 	for (i=0; i<dbdst->db_num_charsets; i++) {
    231 		ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
    232 				    dbdst->db_charsets[i].ec_csname, &norm);
    233 		if (ret == 0) {
    234 			sd->sd_csid = dbdst->db_charsets[i].ec_csid;
    235 			sd->sd_norm = norm;
    236 			/* insert this mapper by sorted order. */
    237 			TAILQ_FOREACH(sdtmp, dl, sd_entry) {
    238 				if (sdtmp->sd_norm > norm) {
    239 					TAILQ_INSERT_BEFORE(sdtmp, sd,
    240 							    sd_entry);
    241 					sd = NULL;
    242 					break;
    243 				}
    244 			}
    245 			if (sd)
    246 				TAILQ_INSERT_TAIL(dl, sd, sd_entry);
    247 			sd = malloc(sizeof(*sd));
    248 			if (sd == NULL) {
    249 				ret = errno;
    250 				close_dsts(dl);
    251 				return ret;
    252 			}
    253 		} else if (ret != ENOENT) {
    254 			close_dsts(dl);
    255 			free(sd);
    256 			return ret;
    257 		}
    258 	}
    259 	free(sd);
    260 	return 0;
    261 }
    262 
    263 static void
    264 close_srcs(struct _citrus_iconv_std_src_list *sl)
    265 {
    266 	struct _citrus_iconv_std_src *ss;
    267 
    268 	while ((ss=TAILQ_FIRST(sl)) != NULL) {
    269 		TAILQ_REMOVE(sl, ss, ss_entry);
    270 		close_dsts(&ss->ss_dsts);
    271 		free(ss);
    272 	}
    273 }
    274 
    275 static int
    276 open_srcs(struct _citrus_iconv_std_src_list *sl,
    277 	  struct _esdb *dbsrc, struct _esdb *dbdst)
    278 {
    279 	int i, ret, count = 0;
    280 	struct _citrus_iconv_std_src *ss;
    281 
    282 	ss = malloc(sizeof(*ss));
    283 	if (ss == NULL)
    284 		return errno;
    285 
    286 	TAILQ_INIT(&ss->ss_dsts);
    287 
    288 	for (i=0; i<dbsrc->db_num_charsets; i++) {
    289 		ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
    290 		if (ret)
    291 			goto err;
    292 		if (!TAILQ_EMPTY(&ss->ss_dsts)) {
    293 			ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
    294 			TAILQ_INSERT_TAIL(sl, ss, ss_entry);
    295 			ss = malloc(sizeof(*ss));
    296 			if (ss == NULL) {
    297 				ret = errno;
    298 				goto err;
    299 			}
    300 			count++;
    301 			TAILQ_INIT(&ss->ss_dsts);
    302 		}
    303 	}
    304 	free(ss);
    305 
    306 	return count ? 0 : ENOENT;
    307 
    308 err:
    309 	free(ss);
    310 	close_srcs(sl);
    311 	return ret;
    312 }
    313 
    314 /* do convert a character */
    315 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
    316 static int
    317 do_conv(struct _citrus_iconv_std *is, _csid_t *csid, _index_t *idx)
    318 {
    319 	_index_t tmpidx;
    320 	int ret;
    321 	struct _citrus_iconv_std_src *ss;
    322 	struct _citrus_iconv_std_dst *sd;
    323 
    324 	TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
    325 		if (ss->ss_csid == *csid) {
    326 			TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
    327 				ret = _csmapper_convert(sd->sd_mapper,
    328 							&tmpidx, *idx, NULL);
    329 				switch (ret) {
    330 				case _CITRUS_MAPPER_CONVERT_SUCCESS:
    331 					*csid = sd->sd_csid;
    332 					*idx = tmpidx;
    333 					return 0;
    334 				case _CITRUS_MAPPER_CONVERT_INVAL:
    335 					break;
    336 				case _CITRUS_MAPPER_CONVERT_SRC_MORE:
    337 					/*FALLTHROUGH*/
    338 				case _CITRUS_MAPPER_CONVERT_DST_MORE:
    339 					/*FALLTHROUGH*/
    340 				case _CITRUS_MAPPER_CONVERT_FATAL:
    341 					return EINVAL;
    342 				case _CITRUS_MAPPER_CONVERT_ILSEQ:
    343 					return EILSEQ;
    344 				}
    345 			}
    346 			break;
    347 		}
    348 	}
    349 
    350 	return E_NO_CORRESPONDING_CHAR;
    351 }
    352 /* ---------------------------------------------------------------------- */
    353 
    354 static int
    355 /*ARGSUSED*/
    356 _citrus_iconv_std_iconv_init(struct _citrus_iconv *ci,
    357 			     const char * __restrict curdir,
    358 			     const char * __restrict src,
    359 			     const char * __restrict dst,
    360 			     const void * __restrict var, size_t lenvar)
    361 {
    362 	int ret;
    363 	struct _citrus_iconv_std *is;
    364 	struct _citrus_esdb esdbsrc, esdbdst;
    365 
    366 	is = malloc(sizeof(*is));
    367 	if (is==NULL) {
    368 		ret = errno;
    369 		goto err0;
    370 	}
    371 	ret = _citrus_esdb_open(&esdbsrc, src);
    372 	if (ret)
    373 		goto err1;
    374 	ret = _citrus_esdb_open(&esdbdst, dst);
    375 	if (ret)
    376 		goto err2;
    377 	ret = open_encoding(&is->is_src_encoding, &esdbsrc);
    378 	if (ret)
    379 		goto err3;
    380 	ret = open_encoding(&is->is_dst_encoding, &esdbdst);
    381 	if (ret)
    382 		goto err4;
    383 	is->is_use_invalid = esdbdst.db_use_invalid;
    384 	is->is_invalid = esdbdst.db_invalid;
    385 
    386 	TAILQ_INIT(&is->is_srcs);
    387 	ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
    388 	if (ret)
    389 		goto err5;
    390 
    391 	_esdb_close(&esdbsrc);
    392 	_esdb_close(&esdbdst);
    393 	ci->ci_closure = is;
    394 
    395 	return 0;
    396 
    397 err5:
    398 	close_encoding(&is->is_dst_encoding);
    399 err4:
    400 	close_encoding(&is->is_src_encoding);
    401 err3:
    402 	_esdb_close(&esdbdst);
    403 err2:
    404 	_esdb_close(&esdbsrc);
    405 err1:
    406 	free(is);
    407 err0:
    408 	return ret;
    409 }
    410 
    411 static void
    412 /*ARGSUSED*/
    413 _citrus_iconv_std_iconv_uninit(struct _citrus_iconv *ci)
    414 {
    415 	struct _citrus_iconv_std *is;
    416 
    417 	if (ci->ci_closure == NULL)
    418 		return;
    419 
    420 	is = ci->ci_closure;
    421 	close_encoding(&is->is_src_encoding);
    422 	close_encoding(&is->is_dst_encoding);
    423 	close_srcs(&is->is_srcs);
    424 	free(is);
    425 }
    426 
    427 static int
    428 /*ARGSUSED*/
    429 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict ci,
    430 				const char * __restrict * __restrict in,
    431 				size_t * __restrict inbytes,
    432 				char * __restrict * __restrict out,
    433 				size_t * __restrict outbytes, u_int32_t flags,
    434 				size_t * __restrict invalids)
    435 {
    436 	struct _citrus_iconv_std *is = ci->ci_closure;
    437 	_index_t idx;
    438 	_csid_t csid;
    439 	int ret;
    440 	size_t szrin, szrout;
    441 	size_t inval;
    442 	const char *tmpin;
    443 
    444 	inval = 0;
    445 	if (in==NULL || *in==NULL) {
    446 		/* special cases */
    447 		if (out!=NULL && *out!=NULL) {
    448 			/* init output state */
    449 			save_encoding_state(&is->is_src_encoding);
    450 			save_encoding_state(&is->is_dst_encoding);
    451 			szrout = 0;
    452 
    453 			ret = put_state_resetx(&is->is_dst_encoding,
    454 					       *out, *outbytes,
    455 					       &szrout);
    456 			if (ret)
    457 				goto err;
    458 
    459 			if (szrout == (size_t)-2) {
    460 				/* too small to store the character */
    461 				ret = EINVAL;
    462 				goto err;
    463 			}
    464 			*out += szrout;
    465 			*outbytes -= szrout;
    466 		}
    467 		*invalids = 0;
    468 		init_encoding_state(&is->is_src_encoding);
    469 		return 0;
    470 	}
    471 
    472 	/* normal case */
    473 	for (;;) {
    474 		/* save the encoding states for the error recovery */
    475 		save_encoding_state(&is->is_src_encoding);
    476 		save_encoding_state(&is->is_dst_encoding);
    477 
    478 		/* mb -> csid/index */
    479 		tmpin = *in;
    480 		szrin = szrout = 0;
    481 		ret = mbtocsx(&is->is_src_encoding, &csid, &idx,
    482 			     &tmpin, *inbytes, &szrin);
    483 		if (ret)
    484 			goto err;
    485 
    486 		if (szrin == (size_t)-2) {
    487 			/* incompleted character */
    488 			ret = EINVAL;
    489 			goto err;
    490 		}
    491 		/* convert the character */
    492 		ret = do_conv(is, &csid, &idx);
    493 		if (ret) {
    494 			if (ret == E_NO_CORRESPONDING_CHAR) {
    495 				inval ++;
    496 				szrout = 0;
    497 				if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
    498 				    is->is_use_invalid) {
    499 					ret = wctombx(&is->is_dst_encoding,
    500 						      *out, *outbytes,
    501 						      is->is_invalid,
    502 						      &szrout);
    503 					if (ret)
    504 						goto err;
    505 				}
    506 				goto next;
    507 			} else {
    508 				goto err;
    509 			}
    510 		}
    511 		/* csid/index -> mb */
    512 		ret = cstombx(&is->is_dst_encoding,
    513 			      *out, *outbytes, csid, idx, &szrout);
    514 		if (ret)
    515 			goto err;
    516 next:
    517 		_DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
    518 		*inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
    519 		*in = tmpin;
    520 		*outbytes -= szrout;
    521 		*out += szrout;
    522 		if (*inbytes==0)
    523 			break;
    524 		if (*outbytes == 0) {
    525 			ret = E2BIG;
    526 			goto err_norestore;
    527 		}
    528 	}
    529 	*invalids = inval;
    530 
    531 	return 0;
    532 
    533 err:
    534 	restore_encoding_state(&is->is_src_encoding);
    535 	restore_encoding_state(&is->is_dst_encoding);
    536 err_norestore:
    537 	*invalids = inval;
    538 
    539 	return ret;
    540 }
    541