Home | History | Annotate | Line # | Download | only in modules
citrus_iconv_std.c revision 1.1
      1 /*	$NetBSD: citrus_iconv_std.c,v 1.1 2003/06/25 09:51:43 tshiozak Exp $	*/
      2 
      3 /*-
      4  * Copyright (c)2003 Citrus Project,
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 #if defined(LIBC_SCCS) && !defined(lint)
     31 __RCSID("$NetBSD: citrus_iconv_std.c,v 1.1 2003/06/25 09:51:43 tshiozak Exp $");
     32 #endif /* LIBC_SCCS and not lint */
     33 
     34 #include <assert.h>
     35 #include <errno.h>
     36 #include <limits.h>
     37 #include <stdio.h>
     38 #include <stdlib.h>
     39 #include <string.h>
     40 #include <sys/endian.h>
     41 #include <sys/queue.h>
     42 
     43 #include "citrus_namespace.h"
     44 #include "citrus_types.h"
     45 #include "citrus_module.h"
     46 #include "citrus_region.h"
     47 #include "citrus_mmap.h"
     48 #include "citrus_iconv.h"
     49 #include "citrus_stdenc.h"
     50 #include "citrus_hash.h"
     51 #include "citrus_mapper.h"
     52 #include "citrus_csmapper.h"
     53 #include "citrus_memstream.h"
     54 #include "citrus_iconv_std.h"
     55 #include "citrus_esdb.h"
     56 
     57 /* ---------------------------------------------------------------------- */
     58 
     59 _CITRUS_ICONV_DECLS(iconv_std);
     60 _CITRUS_ICONV_DEF_OPS(iconv_std);
     61 
     62 
     63 /* ---------------------------------------------------------------------- */
     64 
     65 int
     66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
     67 			       u_int32_t expected_version)
     68 {
     69 	if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
     70 		return (EINVAL);
     71 
     72 	memcpy(ops, &_citrus_iconv_std_iconv_ops,
     73 	       sizeof(_citrus_iconv_std_iconv_ops));
     74 
     75 	return (0);
     76 }
     77 
     78 /* ---------------------------------------------------------------------- */
     79 
     80 /*
     81  * convenience routines for stdenc.
     82  */
     83 static __inline void
     84 save_encoding_state(struct _citrus_iconv_std_encoding *se)
     85 {
     86 	if (se->se_ps)
     87 		memcpy(se->se_pssaved, se->se_ps,
     88 		       _stdenc_get_state_size(se->se_handle));
     89 }
     90 
     91 static __inline void
     92 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
     93 {
     94 	if (se->se_ps)
     95 		memcpy(se->se_ps, se->se_pssaved,
     96 		       _stdenc_get_state_size(se->se_handle));
     97 }
     98 
     99 static __inline void
    100 init_encoding_state(struct _citrus_iconv_std_encoding *se)
    101 {
    102 	if (se->se_ps)
    103 		_stdenc_init_state(se->se_handle, se->se_ps);
    104 }
    105 
    106 static __inline int
    107 mbtocsx(struct _citrus_iconv_std_encoding *se,
    108 	_csid_t *csid, _index_t *idx, const char **s, size_t n,
    109 	size_t *nresult)
    110 {
    111 	return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
    112 			      nresult);
    113 }
    114 
    115 static __inline int
    116 cstombx(struct _citrus_iconv_std_encoding *se,
    117 	char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
    118 {
    119 	return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
    120 			      nresult);
    121 }
    122 
    123 static __inline int
    124 wctombx(struct _citrus_iconv_std_encoding *se,
    125 	char *s, size_t n, _wc_t wc, size_t *nresult)
    126 {
    127 	return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
    128 }
    129 
    130 
    131 /*
    132  * open/close an encoding.
    133  */
    134 static __inline void
    135 close_encoding(struct _citrus_iconv_std_encoding *se)
    136 {
    137 	free(se->se_ps); se->se_ps = NULL;
    138 	free(se->se_pssaved); se->se_pssaved = NULL;
    139 }
    140 
    141 static __inline int
    142 open_encoding(struct _citrus_iconv_std_encoding *se, struct _esdb *db)
    143 {
    144 	int ret;
    145 
    146 	se->se_ps = se->se_pssaved = NULL;
    147 	ret = _stdenc_open(&se->se_handle, db->db_encname,
    148 			   db->db_variable, db->db_len_variable);
    149 	if (ret)
    150 		return ret;
    151 
    152 	if (_stdenc_get_state_size(se->se_handle) == 0)
    153 		return 0;
    154 
    155 	se->se_ps = malloc(_stdenc_get_state_size(se->se_handle));
    156 	if (se->se_ps == NULL) {
    157 		ret = errno;
    158 		goto err;
    159 	}
    160 	ret = _stdenc_init_state(se->se_handle, se->se_ps);
    161 	if (ret)
    162 		goto err;
    163 	se->se_pssaved = malloc(_stdenc_get_state_size(se->se_handle));
    164 	if (se->se_pssaved == NULL) {
    165 		ret = errno;
    166 		goto err;
    167 	}
    168 	ret = _stdenc_init_state(se->se_handle, se->se_pssaved);
    169 	if (ret)
    170 		goto err;
    171 	return 0;
    172 
    173 err:
    174 	close_encoding(se);
    175 	return ret;
    176 }
    177 
    178 static int
    179 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
    180 	      unsigned long *rnorm)
    181 {
    182 	int ret;
    183 	struct _csmapper *cm;
    184 
    185 	ret = _csmapper_open(&cm, src, dst, 0, rnorm);
    186 	if (ret)
    187 		return ret;
    188 	if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
    189 	    _csmapper_get_state_size(cm) != 0) {
    190 		_csmapper_close(cm);
    191 		return EINVAL;
    192 	}
    193 
    194 	*rcm = cm;
    195 
    196 	return 0;
    197 }
    198 
    199 static void
    200 close_dsts(struct _citrus_iconv_std_dst_list *dl)
    201 {
    202 	struct _citrus_iconv_std_dst *sd;
    203 
    204 	while ((sd=TAILQ_FIRST(dl)) != NULL) {
    205 		TAILQ_REMOVE(dl, sd, sd_entry);
    206 		_csmapper_close(sd->sd_mapper);
    207 		free(sd);
    208 	}
    209 }
    210 
    211 static int
    212 open_dsts(struct _citrus_iconv_std_dst_list *dl,
    213 	  struct _esdb_charset *ec, struct _esdb *dbdst)
    214 {
    215 	int i, ret;
    216 	struct _citrus_iconv_std_dst *sd, *sdtmp;
    217 	unsigned long norm;
    218 
    219 	sd = malloc(sizeof(*sd));
    220 	if (sd == NULL)
    221 		return errno;
    222 
    223 	for (i=0; i<dbdst->db_num_charsets; i++) {
    224 		ret = open_csmapper(&sd->sd_mapper,ec->ec_csname,
    225 				    dbdst->db_charsets[i].ec_csname, &norm);
    226 		if (ret == 0) {
    227 			sd->sd_csid = dbdst->db_charsets[i].ec_csid;
    228 			sd->sd_norm = norm;
    229 			/* insert this mapper by sorted order. */
    230 			TAILQ_FOREACH(sdtmp, dl, sd_entry) {
    231 				if (sdtmp->sd_norm > norm) {
    232 					TAILQ_INSERT_BEFORE(sdtmp, sd,
    233 							    sd_entry);
    234 					sd = NULL;
    235 					break;
    236 				}
    237 			}
    238 			if (sd)
    239 				TAILQ_INSERT_TAIL(dl, sd, sd_entry);
    240 			sd = malloc(sizeof(*sd));
    241 			if (sd == NULL) {
    242 				ret = errno;
    243 				close_dsts(dl);
    244 				return ret;
    245 			}
    246 		} else if (ret != ENOENT) {
    247 			close_dsts(dl);
    248 			free(sd);
    249 			return ret;
    250 		}
    251 	}
    252 	free(sd);
    253 	return 0;
    254 }
    255 
    256 static void
    257 close_srcs(struct _citrus_iconv_std_src_list *sl)
    258 {
    259 	struct _citrus_iconv_std_src *ss;
    260 
    261 	while ((ss=TAILQ_FIRST(sl)) != NULL) {
    262 		TAILQ_REMOVE(sl, ss, ss_entry);
    263 		close_dsts(&ss->ss_dsts);
    264 		free(ss);
    265 	}
    266 }
    267 
    268 static int
    269 open_srcs(struct _citrus_iconv_std_src_list *sl,
    270 	  struct _esdb *dbsrc, struct _esdb *dbdst)
    271 {
    272 	int i, ret, count = 0;
    273 	struct _citrus_iconv_std_src *ss;
    274 
    275 	ss = malloc(sizeof(*ss));
    276 	if (ss == NULL)
    277 		return errno;
    278 
    279 	TAILQ_INIT(&ss->ss_dsts);
    280 
    281 	for (i=0; i<dbsrc->db_num_charsets; i++) {
    282 		ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
    283 		if (ret)
    284 			goto err;
    285 		if (!TAILQ_EMPTY(&ss->ss_dsts)) {
    286 			ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
    287 			TAILQ_INSERT_TAIL(sl, ss, ss_entry);
    288 			ss = malloc(sizeof(*ss));
    289 			if (ss == NULL) {
    290 				ret = errno;
    291 				goto err;
    292 			}
    293 			count++;
    294 			TAILQ_INIT(&ss->ss_dsts);
    295 		}
    296 	}
    297 	free(ss);
    298 
    299 	return count ? 0 : ENOENT;
    300 
    301 err:
    302 	free(ss);
    303 	close_srcs(sl);
    304 	return ret;
    305 }
    306 
    307 /* do convert a character */
    308 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
    309 static int
    310 do_conv(struct _citrus_iconv_std *is, _csid_t *csid, _index_t *idx)
    311 {
    312 	_index_t tmpidx;
    313 	int ret;
    314 	struct _citrus_iconv_std_src *ss;
    315 	struct _citrus_iconv_std_dst *sd;
    316 
    317 	TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
    318 		if (ss->ss_csid == *csid) {
    319 			TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
    320 				ret = _csmapper_convert(sd->sd_mapper,
    321 							&tmpidx, *idx, NULL);
    322 				switch (ret) {
    323 				case _CITRUS_MAPPER_CONVERT_SUCCESS:
    324 					*csid = sd->sd_csid;
    325 					*idx = tmpidx;
    326 					return 0;
    327 				case _CITRUS_MAPPER_CONVERT_INVAL:
    328 					break;
    329 				case _CITRUS_MAPPER_CONVERT_SRC_MORE:
    330 					/*FALLTHROUGH*/
    331 				case _CITRUS_MAPPER_CONVERT_DST_MORE:
    332 					/*FALLTHROUGH*/
    333 				case _CITRUS_MAPPER_CONVERT_FATAL:
    334 					return EINVAL;
    335 				case _CITRUS_MAPPER_CONVERT_ILSEQ:
    336 					return EILSEQ;
    337 				}
    338 			}
    339 			break;
    340 		}
    341 	}
    342 
    343 	return E_NO_CORRESPONDING_CHAR;
    344 }
    345 /* ---------------------------------------------------------------------- */
    346 
    347 static int
    348 /*ARGSUSED*/
    349 _citrus_iconv_std_iconv_init(struct _citrus_iconv *ci,
    350 			     const char * __restrict curdir,
    351 			     const char * __restrict src,
    352 			     const char * __restrict dst,
    353 			     const void * __restrict var, size_t lenvar)
    354 {
    355 	int ret;
    356 	struct _citrus_iconv_std *is;
    357 	struct _citrus_esdb esdbsrc, esdbdst;
    358 
    359 	is = malloc(sizeof(*is));
    360 	if (is==NULL) {
    361 		ret = errno;
    362 		goto err0;
    363 	}
    364 	ret = _citrus_esdb_open(&esdbsrc, src);
    365 	if (ret)
    366 		goto err1;
    367 	ret = _citrus_esdb_open(&esdbdst, dst);
    368 	if (ret)
    369 		goto err2;
    370 	ret = open_encoding(&is->is_src_encoding, &esdbsrc);
    371 	if (ret)
    372 		goto err3;
    373 	ret = open_encoding(&is->is_dst_encoding, &esdbdst);
    374 	if (ret)
    375 		goto err4;
    376 	is->is_use_invalid = esdbdst.db_use_invalid;
    377 	is->is_invalid = esdbdst.db_invalid;
    378 
    379 	TAILQ_INIT(&is->is_srcs);
    380 	ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
    381 	if (ret)
    382 		goto err5;
    383 
    384 	_esdb_close(&esdbsrc);
    385 	_esdb_close(&esdbdst);
    386 	ci->ci_closure = is;
    387 
    388 	return 0;
    389 
    390 err5:
    391 	close_encoding(&is->is_dst_encoding);
    392 err4:
    393 	close_encoding(&is->is_src_encoding);
    394 err3:
    395 	_esdb_close(&esdbdst);
    396 err2:
    397 	_esdb_close(&esdbsrc);
    398 err1:
    399 	free(is);
    400 err0:
    401 	return ret;
    402 }
    403 
    404 static void
    405 /*ARGSUSED*/
    406 _citrus_iconv_std_iconv_uninit(struct _citrus_iconv *ci)
    407 {
    408 	struct _citrus_iconv_std *is;
    409 
    410 	if (ci->ci_closure == NULL)
    411 		return;
    412 
    413 	is = ci->ci_closure;
    414 	close_encoding(&is->is_src_encoding);
    415 	close_encoding(&is->is_dst_encoding);
    416 	close_srcs(&is->is_srcs);
    417 	free(is);
    418 }
    419 
    420 static int
    421 /*ARGSUSED*/
    422 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict ci,
    423 				const char * __restrict * __restrict in,
    424 				size_t * __restrict inbytes,
    425 				char * __restrict * __restrict out,
    426 				size_t * __restrict outbytes, u_int32_t flags,
    427 				size_t * __restrict invalids)
    428 {
    429 	struct _citrus_iconv_std *is = ci->ci_closure;
    430 	_index_t idx;
    431 	_csid_t csid;
    432 	int ret;
    433 	size_t szrin, szrout;
    434 	size_t inval;
    435 	const char *tmpin;
    436 
    437 	inval = 0;
    438 	if (in==NULL || *in==NULL) {
    439 		/* special cases */
    440 		if (out!=NULL && *out!=NULL) {
    441 			/* init output state */
    442 			save_encoding_state(&is->is_src_encoding);
    443 			save_encoding_state(&is->is_dst_encoding);
    444 			szrout = 0;
    445 
    446 			ret = cstombx(&is->is_dst_encoding,
    447 				      *out, *outbytes,
    448 				      _CITRUS_CSID_INVALID,
    449 				      0, &szrout);
    450 			if (ret)
    451 				goto err;
    452 
    453 			if (szrout == (size_t)-2) {
    454 				/* too small to store the character */
    455 				ret = EINVAL;
    456 				goto err;
    457 			}
    458 			*out += szrout;
    459 			*outbytes -= szrout;
    460 		}
    461 		*invalids = 0;
    462 		init_encoding_state(&is->is_src_encoding);
    463 		return 0;
    464 	}
    465 
    466 	/* normal case */
    467 	for (;;) {
    468 		/* save the encoding states for the error recovery */
    469 		save_encoding_state(&is->is_src_encoding);
    470 		save_encoding_state(&is->is_dst_encoding);
    471 
    472 		/* mb -> csid/index */
    473 		tmpin = *in;
    474 		szrin = szrout = 0;
    475 		ret = mbtocsx(&is->is_src_encoding, &csid, &idx,
    476 			     &tmpin, *inbytes, &szrin);
    477 		if (ret)
    478 			goto err;
    479 
    480 		if (szrin == (size_t)-2) {
    481 			/* incompleted character */
    482 			ret = EINVAL;
    483 			goto err;
    484 		}
    485 		/* convert the character */
    486 		ret = do_conv(is, &csid, &idx);
    487 		if (ret) {
    488 			if (ret == E_NO_CORRESPONDING_CHAR) {
    489 				inval ++;
    490 				if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
    491 				    is->is_use_invalid) {
    492 					ret = wctombx(&is->is_dst_encoding,
    493 						      *out, *outbytes,
    494 						      is->is_invalid,
    495 						      &szrout);
    496 					if (ret)
    497 						goto err;
    498 				}
    499 				goto next;
    500 			} else {
    501 				goto err;
    502 			}
    503 		}
    504 		/* csid/index -> mb */
    505 		ret = cstombx(&is->is_dst_encoding,
    506 			      *out, *outbytes, csid, idx, &szrout);
    507 		if (ret)
    508 			goto err;
    509 next:
    510 		_DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
    511 		*inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
    512 		*in = tmpin;
    513 		*outbytes -= szrout;
    514 		*out += szrout;
    515 		if (*inbytes==0)
    516 			break;
    517 		if (*outbytes == 0) {
    518 			ret = E2BIG;
    519 			goto err_norestore;
    520 		}
    521 	}
    522 	*invalids = inval;
    523 
    524 	return 0;
    525 
    526 err:
    527 	restore_encoding_state(&is->is_src_encoding);
    528 	restore_encoding_state(&is->is_dst_encoding);
    529 err_norestore:
    530 	*invalids = inval;
    531 
    532 	return ret;
    533 }
    534