citrus_iconv_std.c revision 1.3 1 /* $NetBSD: citrus_iconv_std.c,v 1.3 2003/07/01 08:34:04 tshiozak Exp $ */
2
3 /*-
4 * Copyright (c)2003 Citrus Project,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #if defined(LIBC_SCCS) && !defined(lint)
31 __RCSID("$NetBSD: citrus_iconv_std.c,v 1.3 2003/07/01 08:34:04 tshiozak Exp $");
32 #endif /* LIBC_SCCS and not lint */
33
34 #include <assert.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sys/endian.h>
41 #include <sys/queue.h>
42
43 #include "citrus_namespace.h"
44 #include "citrus_types.h"
45 #include "citrus_module.h"
46 #include "citrus_region.h"
47 #include "citrus_mmap.h"
48 #include "citrus_iconv.h"
49 #include "citrus_stdenc.h"
50 #include "citrus_hash.h"
51 #include "citrus_mapper.h"
52 #include "citrus_csmapper.h"
53 #include "citrus_memstream.h"
54 #include "citrus_iconv_std.h"
55 #include "citrus_esdb.h"
56
57 /* ---------------------------------------------------------------------- */
58
59 _CITRUS_ICONV_DECLS(iconv_std);
60 _CITRUS_ICONV_DEF_OPS(iconv_std);
61
62
63 /* ---------------------------------------------------------------------- */
64
65 int
66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
67 u_int32_t expected_version)
68 {
69 if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
70 return (EINVAL);
71
72 memcpy(ops, &_citrus_iconv_std_iconv_ops,
73 sizeof(_citrus_iconv_std_iconv_ops));
74
75 return (0);
76 }
77
78 /* ---------------------------------------------------------------------- */
79
80 /*
81 * convenience routines for stdenc.
82 */
83 static __inline void
84 save_encoding_state(struct _citrus_iconv_std_encoding *se)
85 {
86 if (se->se_ps)
87 memcpy(se->se_pssaved, se->se_ps,
88 _stdenc_get_state_size(se->se_handle));
89 }
90
91 static __inline void
92 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
93 {
94 if (se->se_ps)
95 memcpy(se->se_ps, se->se_pssaved,
96 _stdenc_get_state_size(se->se_handle));
97 }
98
99 static __inline void
100 init_encoding_state(struct _citrus_iconv_std_encoding *se)
101 {
102 if (se->se_ps)
103 _stdenc_init_state(se->se_handle, se->se_ps);
104 }
105
106 static __inline int
107 mbtocsx(struct _citrus_iconv_std_encoding *se,
108 _csid_t *csid, _index_t *idx, const char **s, size_t n,
109 size_t *nresult)
110 {
111 return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
112 nresult);
113 }
114
115 static __inline int
116 cstombx(struct _citrus_iconv_std_encoding *se,
117 char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
118 {
119 return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
120 nresult);
121 }
122
123 static __inline int
124 wctombx(struct _citrus_iconv_std_encoding *se,
125 char *s, size_t n, _wc_t wc, size_t *nresult)
126 {
127 return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
128 }
129
130 static __inline int
131 put_state_resetx(struct _citrus_iconv_std_encoding *se,
132 char *s, size_t n, size_t *nresult)
133 {
134 return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult);
135 }
136
137
138 /*
139 * open/close an encoding.
140 */
141 static __inline void
142 close_encoding(struct _citrus_iconv_std_encoding *se)
143 {
144 free(se->se_ps); se->se_ps = NULL;
145 free(se->se_pssaved); se->se_pssaved = NULL;
146 }
147
148 static __inline int
149 open_encoding(struct _citrus_iconv_std_encoding *se, struct _esdb *db)
150 {
151 int ret;
152
153 se->se_ps = se->se_pssaved = NULL;
154 ret = _stdenc_open(&se->se_handle, db->db_encname,
155 db->db_variable, db->db_len_variable);
156 if (ret)
157 return ret;
158
159 if (_stdenc_get_state_size(se->se_handle) == 0)
160 return 0;
161
162 se->se_ps = malloc(_stdenc_get_state_size(se->se_handle));
163 if (se->se_ps == NULL) {
164 ret = errno;
165 goto err;
166 }
167 ret = _stdenc_init_state(se->se_handle, se->se_ps);
168 if (ret)
169 goto err;
170 se->se_pssaved = malloc(_stdenc_get_state_size(se->se_handle));
171 if (se->se_pssaved == NULL) {
172 ret = errno;
173 goto err;
174 }
175 ret = _stdenc_init_state(se->se_handle, se->se_pssaved);
176 if (ret)
177 goto err;
178 return 0;
179
180 err:
181 close_encoding(se);
182 return ret;
183 }
184
185 static int
186 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
187 unsigned long *rnorm)
188 {
189 int ret;
190 struct _csmapper *cm;
191
192 ret = _csmapper_open(&cm, src, dst, 0, rnorm);
193 if (ret)
194 return ret;
195 if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
196 _csmapper_get_state_size(cm) != 0) {
197 _csmapper_close(cm);
198 return EINVAL;
199 }
200
201 *rcm = cm;
202
203 return 0;
204 }
205
206 static void
207 close_dsts(struct _citrus_iconv_std_dst_list *dl)
208 {
209 struct _citrus_iconv_std_dst *sd;
210
211 while ((sd=TAILQ_FIRST(dl)) != NULL) {
212 TAILQ_REMOVE(dl, sd, sd_entry);
213 _csmapper_close(sd->sd_mapper);
214 free(sd);
215 }
216 }
217
218 static int
219 open_dsts(struct _citrus_iconv_std_dst_list *dl,
220 struct _esdb_charset *ec, struct _esdb *dbdst)
221 {
222 int i, ret;
223 struct _citrus_iconv_std_dst *sd, *sdtmp;
224 unsigned long norm;
225
226 sd = malloc(sizeof(*sd));
227 if (sd == NULL)
228 return errno;
229
230 for (i=0; i<dbdst->db_num_charsets; i++) {
231 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
232 dbdst->db_charsets[i].ec_csname, &norm);
233 if (ret == 0) {
234 sd->sd_csid = dbdst->db_charsets[i].ec_csid;
235 sd->sd_norm = norm;
236 /* insert this mapper by sorted order. */
237 TAILQ_FOREACH(sdtmp, dl, sd_entry) {
238 if (sdtmp->sd_norm > norm) {
239 TAILQ_INSERT_BEFORE(sdtmp, sd,
240 sd_entry);
241 sd = NULL;
242 break;
243 }
244 }
245 if (sd)
246 TAILQ_INSERT_TAIL(dl, sd, sd_entry);
247 sd = malloc(sizeof(*sd));
248 if (sd == NULL) {
249 ret = errno;
250 close_dsts(dl);
251 return ret;
252 }
253 } else if (ret != ENOENT) {
254 close_dsts(dl);
255 free(sd);
256 return ret;
257 }
258 }
259 free(sd);
260 return 0;
261 }
262
263 static void
264 close_srcs(struct _citrus_iconv_std_src_list *sl)
265 {
266 struct _citrus_iconv_std_src *ss;
267
268 while ((ss=TAILQ_FIRST(sl)) != NULL) {
269 TAILQ_REMOVE(sl, ss, ss_entry);
270 close_dsts(&ss->ss_dsts);
271 free(ss);
272 }
273 }
274
275 static int
276 open_srcs(struct _citrus_iconv_std_src_list *sl,
277 struct _esdb *dbsrc, struct _esdb *dbdst)
278 {
279 int i, ret, count = 0;
280 struct _citrus_iconv_std_src *ss;
281
282 ss = malloc(sizeof(*ss));
283 if (ss == NULL)
284 return errno;
285
286 TAILQ_INIT(&ss->ss_dsts);
287
288 for (i=0; i<dbsrc->db_num_charsets; i++) {
289 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
290 if (ret)
291 goto err;
292 if (!TAILQ_EMPTY(&ss->ss_dsts)) {
293 ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
294 TAILQ_INSERT_TAIL(sl, ss, ss_entry);
295 ss = malloc(sizeof(*ss));
296 if (ss == NULL) {
297 ret = errno;
298 goto err;
299 }
300 count++;
301 TAILQ_INIT(&ss->ss_dsts);
302 }
303 }
304 free(ss);
305
306 return count ? 0 : ENOENT;
307
308 err:
309 free(ss);
310 close_srcs(sl);
311 return ret;
312 }
313
314 /* do convert a character */
315 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
316 static int
317 do_conv(struct _citrus_iconv_std *is, _csid_t *csid, _index_t *idx)
318 {
319 _index_t tmpidx;
320 int ret;
321 struct _citrus_iconv_std_src *ss;
322 struct _citrus_iconv_std_dst *sd;
323
324 TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
325 if (ss->ss_csid == *csid) {
326 TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
327 ret = _csmapper_convert(sd->sd_mapper,
328 &tmpidx, *idx, NULL);
329 switch (ret) {
330 case _CITRUS_MAPPER_CONVERT_SUCCESS:
331 *csid = sd->sd_csid;
332 *idx = tmpidx;
333 return 0;
334 case _CITRUS_MAPPER_CONVERT_INVAL:
335 break;
336 case _CITRUS_MAPPER_CONVERT_SRC_MORE:
337 /*FALLTHROUGH*/
338 case _CITRUS_MAPPER_CONVERT_DST_MORE:
339 /*FALLTHROUGH*/
340 case _CITRUS_MAPPER_CONVERT_FATAL:
341 return EINVAL;
342 case _CITRUS_MAPPER_CONVERT_ILSEQ:
343 return EILSEQ;
344 }
345 }
346 break;
347 }
348 }
349
350 return E_NO_CORRESPONDING_CHAR;
351 }
352 /* ---------------------------------------------------------------------- */
353
354 static int
355 /*ARGSUSED*/
356 _citrus_iconv_std_iconv_init(struct _citrus_iconv *ci,
357 const char * __restrict curdir,
358 const char * __restrict src,
359 const char * __restrict dst,
360 const void * __restrict var, size_t lenvar)
361 {
362 int ret;
363 struct _citrus_iconv_std *is;
364 struct _citrus_esdb esdbsrc, esdbdst;
365
366 is = malloc(sizeof(*is));
367 if (is==NULL) {
368 ret = errno;
369 goto err0;
370 }
371 ret = _citrus_esdb_open(&esdbsrc, src);
372 if (ret)
373 goto err1;
374 ret = _citrus_esdb_open(&esdbdst, dst);
375 if (ret)
376 goto err2;
377 ret = open_encoding(&is->is_src_encoding, &esdbsrc);
378 if (ret)
379 goto err3;
380 ret = open_encoding(&is->is_dst_encoding, &esdbdst);
381 if (ret)
382 goto err4;
383 is->is_use_invalid = esdbdst.db_use_invalid;
384 is->is_invalid = esdbdst.db_invalid;
385
386 TAILQ_INIT(&is->is_srcs);
387 ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
388 if (ret)
389 goto err5;
390
391 _esdb_close(&esdbsrc);
392 _esdb_close(&esdbdst);
393 ci->ci_closure = is;
394
395 return 0;
396
397 err5:
398 close_encoding(&is->is_dst_encoding);
399 err4:
400 close_encoding(&is->is_src_encoding);
401 err3:
402 _esdb_close(&esdbdst);
403 err2:
404 _esdb_close(&esdbsrc);
405 err1:
406 free(is);
407 err0:
408 return ret;
409 }
410
411 static void
412 /*ARGSUSED*/
413 _citrus_iconv_std_iconv_uninit(struct _citrus_iconv *ci)
414 {
415 struct _citrus_iconv_std *is;
416
417 if (ci->ci_closure == NULL)
418 return;
419
420 is = ci->ci_closure;
421 close_encoding(&is->is_src_encoding);
422 close_encoding(&is->is_dst_encoding);
423 close_srcs(&is->is_srcs);
424 free(is);
425 }
426
427 static int
428 /*ARGSUSED*/
429 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict ci,
430 const char * __restrict * __restrict in,
431 size_t * __restrict inbytes,
432 char * __restrict * __restrict out,
433 size_t * __restrict outbytes, u_int32_t flags,
434 size_t * __restrict invalids)
435 {
436 struct _citrus_iconv_std *is = ci->ci_closure;
437 _index_t idx;
438 _csid_t csid;
439 int ret;
440 size_t szrin, szrout;
441 size_t inval;
442 const char *tmpin;
443
444 inval = 0;
445 if (in==NULL || *in==NULL) {
446 /* special cases */
447 if (out!=NULL && *out!=NULL) {
448 /* init output state */
449 save_encoding_state(&is->is_src_encoding);
450 save_encoding_state(&is->is_dst_encoding);
451 szrout = 0;
452
453 ret = put_state_resetx(&is->is_dst_encoding,
454 *out, *outbytes,
455 &szrout);
456 if (ret)
457 goto err;
458
459 if (szrout == (size_t)-2) {
460 /* too small to store the character */
461 ret = EINVAL;
462 goto err;
463 }
464 *out += szrout;
465 *outbytes -= szrout;
466 }
467 *invalids = 0;
468 init_encoding_state(&is->is_src_encoding);
469 return 0;
470 }
471
472 /* normal case */
473 for (;;) {
474 /* save the encoding states for the error recovery */
475 save_encoding_state(&is->is_src_encoding);
476 save_encoding_state(&is->is_dst_encoding);
477
478 /* mb -> csid/index */
479 tmpin = *in;
480 szrin = szrout = 0;
481 ret = mbtocsx(&is->is_src_encoding, &csid, &idx,
482 &tmpin, *inbytes, &szrin);
483 if (ret)
484 goto err;
485
486 if (szrin == (size_t)-2) {
487 /* incompleted character */
488 ret = EINVAL;
489 goto err;
490 }
491 /* convert the character */
492 ret = do_conv(is, &csid, &idx);
493 if (ret) {
494 if (ret == E_NO_CORRESPONDING_CHAR) {
495 inval ++;
496 szrout = 0;
497 if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
498 is->is_use_invalid) {
499 ret = wctombx(&is->is_dst_encoding,
500 *out, *outbytes,
501 is->is_invalid,
502 &szrout);
503 if (ret)
504 goto err;
505 }
506 goto next;
507 } else {
508 goto err;
509 }
510 }
511 /* csid/index -> mb */
512 ret = cstombx(&is->is_dst_encoding,
513 *out, *outbytes, csid, idx, &szrout);
514 if (ret)
515 goto err;
516 next:
517 _DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
518 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
519 *in = tmpin;
520 *outbytes -= szrout;
521 *out += szrout;
522 if (*inbytes==0)
523 break;
524 if (*outbytes == 0) {
525 ret = E2BIG;
526 goto err_norestore;
527 }
528 }
529 *invalids = inval;
530
531 return 0;
532
533 err:
534 restore_encoding_state(&is->is_src_encoding);
535 restore_encoding_state(&is->is_dst_encoding);
536 err_norestore:
537 *invalids = inval;
538
539 return ret;
540 }
541