citrus_iconv_std.c revision 1.5 1 /* $NetBSD: citrus_iconv_std.c,v 1.5 2003/07/12 15:39:20 tshiozak Exp $ */
2
3 /*-
4 * Copyright (c)2003 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #if defined(LIBC_SCCS) && !defined(lint)
31 __RCSID("$NetBSD: citrus_iconv_std.c,v 1.5 2003/07/12 15:39:20 tshiozak Exp $");
32 #endif /* LIBC_SCCS and not lint */
33
34 #include <assert.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sys/endian.h>
41 #include <sys/queue.h>
42
43 #include "citrus_namespace.h"
44 #include "citrus_types.h"
45 #include "citrus_module.h"
46 #include "citrus_region.h"
47 #include "citrus_mmap.h"
48 #include "citrus_hash.h"
49 #include "citrus_iconv.h"
50 #include "citrus_stdenc.h"
51 #include "citrus_mapper.h"
52 #include "citrus_csmapper.h"
53 #include "citrus_memstream.h"
54 #include "citrus_iconv_std.h"
55 #include "citrus_esdb.h"
56
57 /* ---------------------------------------------------------------------- */
58
59 _CITRUS_ICONV_DECLS(iconv_std);
60 _CITRUS_ICONV_DEF_OPS(iconv_std);
61
62
63 /* ---------------------------------------------------------------------- */
64
65 int
66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
67 u_int32_t expected_version)
68 {
69 if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
70 return (EINVAL);
71
72 memcpy(ops, &_citrus_iconv_std_iconv_ops,
73 sizeof(_citrus_iconv_std_iconv_ops));
74
75 return (0);
76 }
77
78 /* ---------------------------------------------------------------------- */
79
80 /*
81 * convenience routines for stdenc.
82 */
83 static __inline void
84 save_encoding_state(struct _citrus_iconv_std_encoding *se)
85 {
86 if (se->se_ps)
87 memcpy(se->se_pssaved, se->se_ps,
88 _stdenc_get_state_size(se->se_handle));
89 }
90
91 static __inline void
92 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
93 {
94 if (se->se_ps)
95 memcpy(se->se_ps, se->se_pssaved,
96 _stdenc_get_state_size(se->se_handle));
97 }
98
99 static __inline void
100 init_encoding_state(struct _citrus_iconv_std_encoding *se)
101 {
102 if (se->se_ps)
103 _stdenc_init_state(se->se_handle, se->se_ps);
104 }
105
106 static __inline int
107 mbtocsx(struct _citrus_iconv_std_encoding *se,
108 _csid_t *csid, _index_t *idx, const char **s, size_t n,
109 size_t *nresult)
110 {
111 return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
112 nresult);
113 }
114
115 static __inline int
116 cstombx(struct _citrus_iconv_std_encoding *se,
117 char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
118 {
119 return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
120 nresult);
121 }
122
123 static __inline int
124 wctombx(struct _citrus_iconv_std_encoding *se,
125 char *s, size_t n, _wc_t wc, size_t *nresult)
126 {
127 return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
128 }
129
130 static __inline int
131 put_state_resetx(struct _citrus_iconv_std_encoding *se,
132 char *s, size_t n, size_t *nresult)
133 {
134 return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult);
135 }
136
137 /*
138 * init encoding context
139 */
140 static int
141 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
142 void *ps1, void *ps2)
143 {
144 int ret;
145
146 se->se_handle = cs;
147 se->se_ps = ps1;
148 se->se_pssaved = ps2;
149
150 if (se->se_ps)
151 ret = _stdenc_init_state(cs, se->se_ps);
152 if (!ret && se->se_pssaved)
153 ret = _stdenc_init_state(cs, se->se_pssaved);
154
155 return ret;
156 }
157
158 static int
159 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
160 unsigned long *rnorm)
161 {
162 int ret;
163 struct _csmapper *cm;
164
165 ret = _csmapper_open(&cm, src, dst, 0, rnorm);
166 if (ret)
167 return ret;
168 if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
169 _csmapper_get_state_size(cm) != 0) {
170 _csmapper_close(cm);
171 return EINVAL;
172 }
173
174 *rcm = cm;
175
176 return 0;
177 }
178
179 static void
180 close_dsts(struct _citrus_iconv_std_dst_list *dl)
181 {
182 struct _citrus_iconv_std_dst *sd;
183
184 while ((sd=TAILQ_FIRST(dl)) != NULL) {
185 TAILQ_REMOVE(dl, sd, sd_entry);
186 _csmapper_close(sd->sd_mapper);
187 free(sd);
188 }
189 }
190
191 static int
192 open_dsts(struct _citrus_iconv_std_dst_list *dl,
193 struct _esdb_charset *ec, struct _esdb *dbdst)
194 {
195 int i, ret;
196 struct _citrus_iconv_std_dst *sd, *sdtmp;
197 unsigned long norm;
198
199 sd = malloc(sizeof(*sd));
200 if (sd == NULL)
201 return errno;
202
203 for (i=0; i<dbdst->db_num_charsets; i++) {
204 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
205 dbdst->db_charsets[i].ec_csname, &norm);
206 if (ret == 0) {
207 sd->sd_csid = dbdst->db_charsets[i].ec_csid;
208 sd->sd_norm = norm;
209 /* insert this mapper by sorted order. */
210 TAILQ_FOREACH(sdtmp, dl, sd_entry) {
211 if (sdtmp->sd_norm > norm) {
212 TAILQ_INSERT_BEFORE(sdtmp, sd,
213 sd_entry);
214 sd = NULL;
215 break;
216 }
217 }
218 if (sd)
219 TAILQ_INSERT_TAIL(dl, sd, sd_entry);
220 sd = malloc(sizeof(*sd));
221 if (sd == NULL) {
222 ret = errno;
223 close_dsts(dl);
224 return ret;
225 }
226 } else if (ret != ENOENT) {
227 close_dsts(dl);
228 free(sd);
229 return ret;
230 }
231 }
232 free(sd);
233 return 0;
234 }
235
236 static void
237 close_srcs(struct _citrus_iconv_std_src_list *sl)
238 {
239 struct _citrus_iconv_std_src *ss;
240
241 while ((ss=TAILQ_FIRST(sl)) != NULL) {
242 TAILQ_REMOVE(sl, ss, ss_entry);
243 close_dsts(&ss->ss_dsts);
244 free(ss);
245 }
246 }
247
248 static int
249 open_srcs(struct _citrus_iconv_std_src_list *sl,
250 struct _esdb *dbsrc, struct _esdb *dbdst)
251 {
252 int i, ret, count = 0;
253 struct _citrus_iconv_std_src *ss;
254
255 ss = malloc(sizeof(*ss));
256 if (ss == NULL)
257 return errno;
258
259 TAILQ_INIT(&ss->ss_dsts);
260
261 for (i=0; i<dbsrc->db_num_charsets; i++) {
262 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
263 if (ret)
264 goto err;
265 if (!TAILQ_EMPTY(&ss->ss_dsts)) {
266 ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
267 TAILQ_INSERT_TAIL(sl, ss, ss_entry);
268 ss = malloc(sizeof(*ss));
269 if (ss == NULL) {
270 ret = errno;
271 goto err;
272 }
273 count++;
274 TAILQ_INIT(&ss->ss_dsts);
275 }
276 }
277 free(ss);
278
279 return count ? 0 : ENOENT;
280
281 err:
282 free(ss);
283 close_srcs(sl);
284 return ret;
285 }
286
287 /* do convert a character */
288 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
289 static int
290 /*ARGSUSED*/
291 do_conv(struct _citrus_iconv_std_shared *is,
292 struct _citrus_iconv_std_context *sc, _csid_t *csid, _index_t *idx)
293 {
294 _index_t tmpidx;
295 int ret;
296 struct _citrus_iconv_std_src *ss;
297 struct _citrus_iconv_std_dst *sd;
298
299 TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
300 if (ss->ss_csid == *csid) {
301 TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
302 ret = _csmapper_convert(sd->sd_mapper,
303 &tmpidx, *idx, NULL);
304 switch (ret) {
305 case _MAPPER_CONVERT_SUCCESS:
306 *csid = sd->sd_csid;
307 *idx = tmpidx;
308 return 0;
309 case _MAPPER_CONVERT_NONIDENTICAL:
310 break;
311 case _MAPPER_CONVERT_SRC_MORE:
312 /*FALLTHROUGH*/
313 case _MAPPER_CONVERT_DST_MORE:
314 /*FALLTHROUGH*/
315 case _MAPPER_CONVERT_FATAL:
316 return EINVAL;
317 case _MAPPER_CONVERT_ILSEQ:
318 return EILSEQ;
319 }
320 }
321 break;
322 }
323 }
324
325 return E_NO_CORRESPONDING_CHAR;
326 }
327 /* ---------------------------------------------------------------------- */
328
329 static int
330 /*ARGSUSED*/
331 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
332 const char * __restrict curdir,
333 const char * __restrict src,
334 const char * __restrict dst,
335 const void * __restrict var, size_t lenvar)
336 {
337 int ret;
338 struct _citrus_iconv_std_shared *is;
339 struct _citrus_esdb esdbsrc, esdbdst;
340
341 is = malloc(sizeof(*is));
342 if (is==NULL) {
343 ret = errno;
344 goto err0;
345 }
346 ret = _citrus_esdb_open(&esdbsrc, src);
347 if (ret)
348 goto err1;
349 ret = _citrus_esdb_open(&esdbdst, dst);
350 if (ret)
351 goto err2;
352 ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
353 esdbsrc.db_variable, esdbsrc.db_len_variable);
354 if (ret)
355 goto err3;
356 ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
357 esdbdst.db_variable, esdbdst.db_len_variable);
358 if (ret)
359 goto err4;
360 is->is_use_invalid = esdbdst.db_use_invalid;
361 is->is_invalid = esdbdst.db_invalid;
362
363 TAILQ_INIT(&is->is_srcs);
364 ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
365 if (ret)
366 goto err5;
367
368 _esdb_close(&esdbsrc);
369 _esdb_close(&esdbdst);
370 ci->ci_closure = is;
371
372 return 0;
373
374 err5:
375 _stdenc_close(is->is_dst_encoding);
376 err4:
377 _stdenc_close(is->is_src_encoding);
378 err3:
379 _esdb_close(&esdbdst);
380 err2:
381 _esdb_close(&esdbsrc);
382 err1:
383 free(is);
384 err0:
385 return ret;
386 }
387
388 static void
389 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
390 {
391 struct _citrus_iconv_std_shared *is = ci->ci_closure;
392
393 if (is == NULL)
394 return;
395
396 _stdenc_close(is->is_src_encoding);
397 _stdenc_close(is->is_dst_encoding);
398 close_srcs(&is->is_srcs);
399 free(is);
400 }
401
402 static int
403 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
404 {
405 struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
406 struct _citrus_iconv_std_context *sc;
407 int ret;
408 size_t szpssrc, szpsdst, sz;
409 char *ptr;
410
411 szpssrc = _stdenc_get_state_size(is->is_src_encoding);
412 szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
413
414 sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
415 sc = malloc(sz);
416 if (sc == NULL)
417 return errno;
418
419 ptr = (char *)&sc[1];
420 if (szpssrc)
421 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
422 ptr, ptr+szpssrc);
423 else
424 init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
425 NULL, NULL);
426 ptr += szpssrc*2;
427 if (szpsdst)
428 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
429 ptr, ptr+szpsdst);
430 else
431 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
432 NULL, NULL);
433
434 cv->cv_closure = (void *)sc;
435
436 return 0;
437 }
438
439 static void
440 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
441 {
442 free(cv->cv_closure);
443 }
444
445 static int
446 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
447 const char * __restrict * __restrict in,
448 size_t * __restrict inbytes,
449 char * __restrict * __restrict out,
450 size_t * __restrict outbytes, u_int32_t flags,
451 size_t * __restrict invalids)
452 {
453 struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
454 struct _citrus_iconv_std_context *sc = cv->cv_closure;
455 _index_t idx;
456 _csid_t csid;
457 int ret;
458 size_t szrin, szrout;
459 size_t inval;
460 const char *tmpin;
461
462 inval = 0;
463 if (in==NULL || *in==NULL) {
464 /* special cases */
465 if (out!=NULL && *out!=NULL) {
466 /* init output state */
467 save_encoding_state(&sc->sc_src_encoding);
468 save_encoding_state(&sc->sc_dst_encoding);
469 szrout = 0;
470
471 ret = put_state_resetx(&sc->sc_dst_encoding,
472 *out, *outbytes,
473 &szrout);
474 if (ret)
475 goto err;
476
477 if (szrout == (size_t)-2) {
478 /* too small to store the character */
479 ret = EINVAL;
480 goto err;
481 }
482 *out += szrout;
483 *outbytes -= szrout;
484 }
485 *invalids = 0;
486 init_encoding_state(&sc->sc_src_encoding);
487 return 0;
488 }
489
490 /* normal case */
491 for (;;) {
492 /* save the encoding states for the error recovery */
493 save_encoding_state(&sc->sc_src_encoding);
494 save_encoding_state(&sc->sc_dst_encoding);
495
496 /* mb -> csid/index */
497 tmpin = *in;
498 szrin = szrout = 0;
499 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx,
500 &tmpin, *inbytes, &szrin);
501 if (ret)
502 goto err;
503
504 if (szrin == (size_t)-2) {
505 /* incompleted character */
506 ret = EINVAL;
507 goto err;
508 }
509 /* convert the character */
510 ret = do_conv(is, sc, &csid, &idx);
511 if (ret) {
512 if (ret == E_NO_CORRESPONDING_CHAR) {
513 inval ++;
514 szrout = 0;
515 if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
516 is->is_use_invalid) {
517 ret = wctombx(&sc->sc_dst_encoding,
518 *out, *outbytes,
519 is->is_invalid,
520 &szrout);
521 if (ret)
522 goto err;
523 }
524 goto next;
525 } else {
526 goto err;
527 }
528 }
529 /* csid/index -> mb */
530 ret = cstombx(&sc->sc_dst_encoding,
531 *out, *outbytes, csid, idx, &szrout);
532 if (ret)
533 goto err;
534 next:
535 _DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
536 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
537 *in = tmpin;
538 *outbytes -= szrout;
539 *out += szrout;
540 if (*inbytes==0)
541 break;
542 if (*outbytes == 0) {
543 ret = E2BIG;
544 goto err_norestore;
545 }
546 }
547 *invalids = inval;
548
549 return 0;
550
551 err:
552 restore_encoding_state(&sc->sc_src_encoding);
553 restore_encoding_state(&sc->sc_dst_encoding);
554 err_norestore:
555 *invalids = inval;
556
557 return ret;
558 }
559