citrus_iconv_std.c revision 1.1 1 /* $NetBSD: citrus_iconv_std.c,v 1.1 2003/06/25 09:51:43 tshiozak Exp $ */
2
3 /*-
4 * Copyright (c)2003 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #if defined(LIBC_SCCS) && !defined(lint)
31 __RCSID("$NetBSD: citrus_iconv_std.c,v 1.1 2003/06/25 09:51:43 tshiozak Exp $");
32 #endif /* LIBC_SCCS and not lint */
33
34 #include <assert.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sys/endian.h>
41 #include <sys/queue.h>
42
43 #include "citrus_namespace.h"
44 #include "citrus_types.h"
45 #include "citrus_module.h"
46 #include "citrus_region.h"
47 #include "citrus_mmap.h"
48 #include "citrus_iconv.h"
49 #include "citrus_stdenc.h"
50 #include "citrus_hash.h"
51 #include "citrus_mapper.h"
52 #include "citrus_csmapper.h"
53 #include "citrus_memstream.h"
54 #include "citrus_iconv_std.h"
55 #include "citrus_esdb.h"
56
57 /* ---------------------------------------------------------------------- */
58
59 _CITRUS_ICONV_DECLS(iconv_std);
60 _CITRUS_ICONV_DEF_OPS(iconv_std);
61
62
63 /* ---------------------------------------------------------------------- */
64
65 int
66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
67 u_int32_t expected_version)
68 {
69 if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
70 return (EINVAL);
71
72 memcpy(ops, &_citrus_iconv_std_iconv_ops,
73 sizeof(_citrus_iconv_std_iconv_ops));
74
75 return (0);
76 }
77
78 /* ---------------------------------------------------------------------- */
79
80 /*
81 * convenience routines for stdenc.
82 */
83 static __inline void
84 save_encoding_state(struct _citrus_iconv_std_encoding *se)
85 {
86 if (se->se_ps)
87 memcpy(se->se_pssaved, se->se_ps,
88 _stdenc_get_state_size(se->se_handle));
89 }
90
91 static __inline void
92 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
93 {
94 if (se->se_ps)
95 memcpy(se->se_ps, se->se_pssaved,
96 _stdenc_get_state_size(se->se_handle));
97 }
98
99 static __inline void
100 init_encoding_state(struct _citrus_iconv_std_encoding *se)
101 {
102 if (se->se_ps)
103 _stdenc_init_state(se->se_handle, se->se_ps);
104 }
105
106 static __inline int
107 mbtocsx(struct _citrus_iconv_std_encoding *se,
108 _csid_t *csid, _index_t *idx, const char **s, size_t n,
109 size_t *nresult)
110 {
111 return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
112 nresult);
113 }
114
115 static __inline int
116 cstombx(struct _citrus_iconv_std_encoding *se,
117 char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
118 {
119 return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
120 nresult);
121 }
122
123 static __inline int
124 wctombx(struct _citrus_iconv_std_encoding *se,
125 char *s, size_t n, _wc_t wc, size_t *nresult)
126 {
127 return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
128 }
129
130
131 /*
132 * open/close an encoding.
133 */
134 static __inline void
135 close_encoding(struct _citrus_iconv_std_encoding *se)
136 {
137 free(se->se_ps); se->se_ps = NULL;
138 free(se->se_pssaved); se->se_pssaved = NULL;
139 }
140
141 static __inline int
142 open_encoding(struct _citrus_iconv_std_encoding *se, struct _esdb *db)
143 {
144 int ret;
145
146 se->se_ps = se->se_pssaved = NULL;
147 ret = _stdenc_open(&se->se_handle, db->db_encname,
148 db->db_variable, db->db_len_variable);
149 if (ret)
150 return ret;
151
152 if (_stdenc_get_state_size(se->se_handle) == 0)
153 return 0;
154
155 se->se_ps = malloc(_stdenc_get_state_size(se->se_handle));
156 if (se->se_ps == NULL) {
157 ret = errno;
158 goto err;
159 }
160 ret = _stdenc_init_state(se->se_handle, se->se_ps);
161 if (ret)
162 goto err;
163 se->se_pssaved = malloc(_stdenc_get_state_size(se->se_handle));
164 if (se->se_pssaved == NULL) {
165 ret = errno;
166 goto err;
167 }
168 ret = _stdenc_init_state(se->se_handle, se->se_pssaved);
169 if (ret)
170 goto err;
171 return 0;
172
173 err:
174 close_encoding(se);
175 return ret;
176 }
177
178 static int
179 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
180 unsigned long *rnorm)
181 {
182 int ret;
183 struct _csmapper *cm;
184
185 ret = _csmapper_open(&cm, src, dst, 0, rnorm);
186 if (ret)
187 return ret;
188 if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
189 _csmapper_get_state_size(cm) != 0) {
190 _csmapper_close(cm);
191 return EINVAL;
192 }
193
194 *rcm = cm;
195
196 return 0;
197 }
198
199 static void
200 close_dsts(struct _citrus_iconv_std_dst_list *dl)
201 {
202 struct _citrus_iconv_std_dst *sd;
203
204 while ((sd=TAILQ_FIRST(dl)) != NULL) {
205 TAILQ_REMOVE(dl, sd, sd_entry);
206 _csmapper_close(sd->sd_mapper);
207 free(sd);
208 }
209 }
210
211 static int
212 open_dsts(struct _citrus_iconv_std_dst_list *dl,
213 struct _esdb_charset *ec, struct _esdb *dbdst)
214 {
215 int i, ret;
216 struct _citrus_iconv_std_dst *sd, *sdtmp;
217 unsigned long norm;
218
219 sd = malloc(sizeof(*sd));
220 if (sd == NULL)
221 return errno;
222
223 for (i=0; i<dbdst->db_num_charsets; i++) {
224 ret = open_csmapper(&sd->sd_mapper,ec->ec_csname,
225 dbdst->db_charsets[i].ec_csname, &norm);
226 if (ret == 0) {
227 sd->sd_csid = dbdst->db_charsets[i].ec_csid;
228 sd->sd_norm = norm;
229 /* insert this mapper by sorted order. */
230 TAILQ_FOREACH(sdtmp, dl, sd_entry) {
231 if (sdtmp->sd_norm > norm) {
232 TAILQ_INSERT_BEFORE(sdtmp, sd,
233 sd_entry);
234 sd = NULL;
235 break;
236 }
237 }
238 if (sd)
239 TAILQ_INSERT_TAIL(dl, sd, sd_entry);
240 sd = malloc(sizeof(*sd));
241 if (sd == NULL) {
242 ret = errno;
243 close_dsts(dl);
244 return ret;
245 }
246 } else if (ret != ENOENT) {
247 close_dsts(dl);
248 free(sd);
249 return ret;
250 }
251 }
252 free(sd);
253 return 0;
254 }
255
256 static void
257 close_srcs(struct _citrus_iconv_std_src_list *sl)
258 {
259 struct _citrus_iconv_std_src *ss;
260
261 while ((ss=TAILQ_FIRST(sl)) != NULL) {
262 TAILQ_REMOVE(sl, ss, ss_entry);
263 close_dsts(&ss->ss_dsts);
264 free(ss);
265 }
266 }
267
268 static int
269 open_srcs(struct _citrus_iconv_std_src_list *sl,
270 struct _esdb *dbsrc, struct _esdb *dbdst)
271 {
272 int i, ret, count = 0;
273 struct _citrus_iconv_std_src *ss;
274
275 ss = malloc(sizeof(*ss));
276 if (ss == NULL)
277 return errno;
278
279 TAILQ_INIT(&ss->ss_dsts);
280
281 for (i=0; i<dbsrc->db_num_charsets; i++) {
282 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
283 if (ret)
284 goto err;
285 if (!TAILQ_EMPTY(&ss->ss_dsts)) {
286 ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
287 TAILQ_INSERT_TAIL(sl, ss, ss_entry);
288 ss = malloc(sizeof(*ss));
289 if (ss == NULL) {
290 ret = errno;
291 goto err;
292 }
293 count++;
294 TAILQ_INIT(&ss->ss_dsts);
295 }
296 }
297 free(ss);
298
299 return count ? 0 : ENOENT;
300
301 err:
302 free(ss);
303 close_srcs(sl);
304 return ret;
305 }
306
307 /* do convert a character */
308 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
309 static int
310 do_conv(struct _citrus_iconv_std *is, _csid_t *csid, _index_t *idx)
311 {
312 _index_t tmpidx;
313 int ret;
314 struct _citrus_iconv_std_src *ss;
315 struct _citrus_iconv_std_dst *sd;
316
317 TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
318 if (ss->ss_csid == *csid) {
319 TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
320 ret = _csmapper_convert(sd->sd_mapper,
321 &tmpidx, *idx, NULL);
322 switch (ret) {
323 case _CITRUS_MAPPER_CONVERT_SUCCESS:
324 *csid = sd->sd_csid;
325 *idx = tmpidx;
326 return 0;
327 case _CITRUS_MAPPER_CONVERT_INVAL:
328 break;
329 case _CITRUS_MAPPER_CONVERT_SRC_MORE:
330 /*FALLTHROUGH*/
331 case _CITRUS_MAPPER_CONVERT_DST_MORE:
332 /*FALLTHROUGH*/
333 case _CITRUS_MAPPER_CONVERT_FATAL:
334 return EINVAL;
335 case _CITRUS_MAPPER_CONVERT_ILSEQ:
336 return EILSEQ;
337 }
338 }
339 break;
340 }
341 }
342
343 return E_NO_CORRESPONDING_CHAR;
344 }
345 /* ---------------------------------------------------------------------- */
346
347 static int
348 /*ARGSUSED*/
349 _citrus_iconv_std_iconv_init(struct _citrus_iconv *ci,
350 const char * __restrict curdir,
351 const char * __restrict src,
352 const char * __restrict dst,
353 const void * __restrict var, size_t lenvar)
354 {
355 int ret;
356 struct _citrus_iconv_std *is;
357 struct _citrus_esdb esdbsrc, esdbdst;
358
359 is = malloc(sizeof(*is));
360 if (is==NULL) {
361 ret = errno;
362 goto err0;
363 }
364 ret = _citrus_esdb_open(&esdbsrc, src);
365 if (ret)
366 goto err1;
367 ret = _citrus_esdb_open(&esdbdst, dst);
368 if (ret)
369 goto err2;
370 ret = open_encoding(&is->is_src_encoding, &esdbsrc);
371 if (ret)
372 goto err3;
373 ret = open_encoding(&is->is_dst_encoding, &esdbdst);
374 if (ret)
375 goto err4;
376 is->is_use_invalid = esdbdst.db_use_invalid;
377 is->is_invalid = esdbdst.db_invalid;
378
379 TAILQ_INIT(&is->is_srcs);
380 ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
381 if (ret)
382 goto err5;
383
384 _esdb_close(&esdbsrc);
385 _esdb_close(&esdbdst);
386 ci->ci_closure = is;
387
388 return 0;
389
390 err5:
391 close_encoding(&is->is_dst_encoding);
392 err4:
393 close_encoding(&is->is_src_encoding);
394 err3:
395 _esdb_close(&esdbdst);
396 err2:
397 _esdb_close(&esdbsrc);
398 err1:
399 free(is);
400 err0:
401 return ret;
402 }
403
404 static void
405 /*ARGSUSED*/
406 _citrus_iconv_std_iconv_uninit(struct _citrus_iconv *ci)
407 {
408 struct _citrus_iconv_std *is;
409
410 if (ci->ci_closure == NULL)
411 return;
412
413 is = ci->ci_closure;
414 close_encoding(&is->is_src_encoding);
415 close_encoding(&is->is_dst_encoding);
416 close_srcs(&is->is_srcs);
417 free(is);
418 }
419
420 static int
421 /*ARGSUSED*/
422 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict ci,
423 const char * __restrict * __restrict in,
424 size_t * __restrict inbytes,
425 char * __restrict * __restrict out,
426 size_t * __restrict outbytes, u_int32_t flags,
427 size_t * __restrict invalids)
428 {
429 struct _citrus_iconv_std *is = ci->ci_closure;
430 _index_t idx;
431 _csid_t csid;
432 int ret;
433 size_t szrin, szrout;
434 size_t inval;
435 const char *tmpin;
436
437 inval = 0;
438 if (in==NULL || *in==NULL) {
439 /* special cases */
440 if (out!=NULL && *out!=NULL) {
441 /* init output state */
442 save_encoding_state(&is->is_src_encoding);
443 save_encoding_state(&is->is_dst_encoding);
444 szrout = 0;
445
446 ret = cstombx(&is->is_dst_encoding,
447 *out, *outbytes,
448 _CITRUS_CSID_INVALID,
449 0, &szrout);
450 if (ret)
451 goto err;
452
453 if (szrout == (size_t)-2) {
454 /* too small to store the character */
455 ret = EINVAL;
456 goto err;
457 }
458 *out += szrout;
459 *outbytes -= szrout;
460 }
461 *invalids = 0;
462 init_encoding_state(&is->is_src_encoding);
463 return 0;
464 }
465
466 /* normal case */
467 for (;;) {
468 /* save the encoding states for the error recovery */
469 save_encoding_state(&is->is_src_encoding);
470 save_encoding_state(&is->is_dst_encoding);
471
472 /* mb -> csid/index */
473 tmpin = *in;
474 szrin = szrout = 0;
475 ret = mbtocsx(&is->is_src_encoding, &csid, &idx,
476 &tmpin, *inbytes, &szrin);
477 if (ret)
478 goto err;
479
480 if (szrin == (size_t)-2) {
481 /* incompleted character */
482 ret = EINVAL;
483 goto err;
484 }
485 /* convert the character */
486 ret = do_conv(is, &csid, &idx);
487 if (ret) {
488 if (ret == E_NO_CORRESPONDING_CHAR) {
489 inval ++;
490 if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
491 is->is_use_invalid) {
492 ret = wctombx(&is->is_dst_encoding,
493 *out, *outbytes,
494 is->is_invalid,
495 &szrout);
496 if (ret)
497 goto err;
498 }
499 goto next;
500 } else {
501 goto err;
502 }
503 }
504 /* csid/index -> mb */
505 ret = cstombx(&is->is_dst_encoding,
506 *out, *outbytes, csid, idx, &szrout);
507 if (ret)
508 goto err;
509 next:
510 _DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
511 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
512 *in = tmpin;
513 *outbytes -= szrout;
514 *out += szrout;
515 if (*inbytes==0)
516 break;
517 if (*outbytes == 0) {
518 ret = E2BIG;
519 goto err_norestore;
520 }
521 }
522 *invalids = inval;
523
524 return 0;
525
526 err:
527 restore_encoding_state(&is->is_src_encoding);
528 restore_encoding_state(&is->is_dst_encoding);
529 err_norestore:
530 *invalids = inval;
531
532 return ret;
533 }
534