Home | History | Annotate | Line # | Download | only in modules
citrus_utf7.c revision 1.1
      1 /*	$NetBSD: citrus_utf7.c,v 1.1 2005/03/05 18:05:15 tnozaki Exp $	*/
      2 
      3 /*-
      4  * Copyright (c)2004, 2005 Citrus Project,
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  *
     28  */
     29 
     30 #include <sys/cdefs.h>
     31 #if defined(LIB_SCCS) && !defined(lint)
     32 __RCSID("$NetBSD: citrus_utf7.c,v 1.1 2005/03/05 18:05:15 tnozaki Exp $");
     33 #endif /* LIB_SCCS and not lint */
     34 
     35 #include <assert.h>
     36 #include <errno.h>
     37 #include <string.h>
     38 #include <stdio.h>
     39 #include <stdint.h>
     40 #include <stdlib.h>
     41 #include <limits.h>
     42 #include <wchar.h>
     43 
     44 #include "citrus_namespace.h"
     45 #include "citrus_types.h"
     46 #include "citrus_module.h"
     47 #include "citrus_ctype.h"
     48 #include "citrus_stdenc.h"
     49 #include "citrus_utf7.h"
     50 
     51 /* ----------------------------------------------------------------------
     52  * private stuffs used by templates
     53  */
     54 
     55 typedef struct {
     56 	uint16_t	cell[0x80];
     57 #define	EI_MASK		UINT16_C(0xff)
     58 #define EI_DIRECT	UINT16_C(0x100)
     59 #define EI_OPTION	UINT16_C(0x200)
     60 #define EI_SPACE	UINT16_C(0x400)
     61 } _UTF7EncodingInfo;
     62 
     63 typedef struct {
     64 	unsigned int
     65 		chlen: 3,	/* need to hold 0 - 4 */
     66 		mode: 1,	/* whether base64 mode */
     67 		bits: 4,	/* need to hold 0 - 15 */
     68 		cache: 22,	/* 22 = BASE64_BIT + UTF16_BIT */
     69 		surrogate: 1,	/* whether surrogate pair or not */
     70 		unused: 1;
     71 } _UTF7StatePriv;
     72 
     73 typedef struct {
     74 	union {
     75 		uint32_t	chlen;
     76 		_UTF7StatePriv	state;
     77 	};
     78 	char ch[4]; /* BASE64_IN, 3 * 6 = 18, most closed to UTF16_BIT */
     79 } _UTF7State;
     80 
     81 typedef struct {
     82 	_UTF7EncodingInfo	ei;
     83 	struct {
     84 		/* for future multi-locale facility */
     85 		_UTF7State	s_mblen;
     86 		_UTF7State	s_mbrlen;
     87 		_UTF7State	s_mbrtowc;
     88 		_UTF7State	s_mbtowc;
     89 		_UTF7State	s_mbsrtowcs;
     90 		_UTF7State	s_wcrtomb;
     91 		_UTF7State	s_wcsrtombs;
     92 		_UTF7State	s_wctomb;
     93 	} states;
     94 } _UTF7CTypeInfo;
     95 
     96 #define	_CEI_TO_EI(_cei_)		(&(_cei_)->ei)
     97 #define	_CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
     98 
     99 #define	_FUNCNAME(m)			_citrus_UTF7_##m
    100 #define	_ENCODING_INFO			_UTF7EncodingInfo
    101 #define	_CTYPE_INFO			_UTF7CTypeInfo
    102 #define	_ENCODING_STATE			_UTF7State
    103 #define	_ENCODING_MB_CUR_MAX(_ei_)		4
    104 #define	_ENCODING_IS_STATE_DEPENDENT		1
    105 #define	_STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
    106 
    107 static __inline void
    108 /*ARGSUSED*/
    109 _citrus_UTF7_init_state(_UTF7EncodingInfo * __restrict ei,
    110 	_UTF7State * __restrict s)
    111 {
    112 	/* ei appears to be unused */
    113 	_DIAGASSERT(s != NULL);
    114 
    115 	memset((void *)s, 0, sizeof(*s));
    116 }
    117 
    118 static __inline void
    119 /*ARGSUSED*/
    120 _citrus_UTF7_pack_state(_UTF7EncodingInfo * __restrict ei,
    121 	void *__restrict pspriv, const _UTF7State * __restrict s)
    122 {
    123 	/* ei seem to be unused */
    124 	_DIAGASSERT(pspriv != NULL);
    125 	_DIAGASSERT(s != NULL);
    126 
    127 	memcpy(pspriv, (const void *)s, sizeof(*s));
    128 }
    129 
    130 static __inline void
    131 /*ARGSUSED*/
    132 _citrus_UTF7_unpack_state(_UTF7EncodingInfo * __restrict ei,
    133 	_UTF7State * __restrict s, const void * __restrict pspriv)
    134 {
    135 	/* ei seem to be unused */
    136 	_DIAGASSERT(s != NULL);
    137 	_DIAGASSERT(pspriv != NULL);
    138 
    139 	memcpy((void *)s, pspriv, sizeof(*s));
    140 }
    141 
    142 static const char base64[] =
    143 	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    144 	"abcdefghijklmnopqrstuvwxyz"
    145 	"0123456789+/";
    146 
    147 static const char direct[] =
    148 	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    149 	"abcdefghijklmnopqrstuvwxyz"
    150 	"0123456789(),-./:?";
    151 
    152 static const char option[] = "!\"#$%&';<=>@[]^_`{|}";
    153 static const char spaces[] = " \t\r\n";
    154 
    155 #define	BASE64_BIT	6
    156 #define	UTF16_BIT	16
    157 
    158 #define	BASE64_MAX	0x3f
    159 #define	UTF16_MAX	UINT16_C(0xffff)
    160 #define	UTF32_MAX	UINT32_C(0x10ffff)
    161 
    162 #define	BASE64_IN	'+'
    163 #define	BASE64_OUT	'-'
    164 
    165 #define	SHIFT7BIT(c)	((c) >> 7)
    166 #define	ISSPECIAL(c)	((c) == '\0' || (c) == BASE64_IN)
    167 
    168 #define	FINDLEN(ei, c) \
    169 	(SHIFT7BIT((c)) ? -1 : (((ei)->cell[(c)] & EI_MASK) - 1))
    170 
    171 #define	ISDIRECT(ei, c)	(!SHIFT7BIT((c)) && (ISSPECIAL((c)) || \
    172 	ei->cell[(c)] & (EI_DIRECT | EI_OPTION | EI_SPACE)))
    173 
    174 #define	ISSAFE(ei, c)	(!SHIFT7BIT((c)) && (ISSPECIAL((c)) || \
    175 	ei->cell[(c)] & (EI_DIRECT | EI_SPACE)))
    176 
    177 /* surrogate pair */
    178 #define	SRG_BASE	UINT32_C(0x10000)
    179 #define	HISRG_MIN	UINT16_C(0xd800)
    180 #define	HISRG_MAX	UINT16_C(0xdbff)
    181 #define	LOSRG_MIN	UINT16_C(0xdc00)
    182 #define	LOSRG_MAX	UINT16_C(0xdfff)
    183 #define	CHECK_SRG(st, c, act)					\
    184 do {								\
    185 	if (!(st)->surrogate) {					\
    186 		if ((c) >= HISRG_MIN && (c) <= HISRG_MAX)	\
    187 			(st)->surrogate = 1;			\
    188 	} else {						\
    189 		if ((c) < LOSRG_MIN || (c) > LOSRG_MAX)		\
    190 			act;					\
    191 		(st)->surrogate = 0;				\
    192 	}							\
    193 } while (/*CONSTCOND*/0)
    194 
    195 static int
    196 _mbtoutf16(_UTF7EncodingInfo * __restrict ei,
    197 	uint16_t * __restrict u16, const char ** __restrict s, size_t n,
    198 	_UTF7State * __restrict psenc, size_t * __restrict nresult)
    199 {
    200 	_UTF7StatePriv *st, sv;
    201 	const char *s0;
    202 	int i, done, len;
    203 
    204 	_DIAGASSERT(ei != NULL);
    205 	_DIAGASSERT(s != NULL && *s != NULL);
    206 	_DIAGASSERT(psenc != NULL);
    207 
    208 	s0 = *s;
    209 	sv = *(st = &psenc->state);
    210 
    211 	for (i = 0, done = 0; done == 0; i++) {
    212 		_DIAGASSERT(i <= st->chlen);
    213 		if (i == st->chlen) {
    214 			if (n-- < 1) {
    215 				*nresult = (size_t)-2;
    216 				*s = s0;
    217 				sv.chlen = st->chlen;
    218 				*st = sv;
    219 				return (0);
    220 			}
    221 			psenc->ch[st->chlen++] = *s0++;
    222 		}
    223 		if (SHIFT7BIT((int)psenc->ch[i]))
    224 			goto ilseq;
    225 		if (!st->mode) {
    226 			if (st->bits > 0 || st->cache > 0)
    227 				return (EINVAL);
    228 			if (psenc->ch[i] == BASE64_IN) {
    229 				st->mode = 1;
    230 			} else {
    231 				if (!ISDIRECT(ei, (int)psenc->ch[i]))
    232 					goto ilseq;
    233 				*u16 = (uint16_t)psenc->ch[i];
    234 				done = 1;
    235 				continue;
    236 			}
    237 		} else {
    238 			if (psenc->ch[i] == BASE64_OUT && st->cache == 0) {
    239 				st->mode = 0;
    240 				*u16 = (uint16_t)BASE64_IN;
    241 				done = 1;
    242 				continue;
    243 			}
    244 			len = FINDLEN(ei, (int)psenc->ch[i]);
    245 			if (len < 0) {
    246 				if (st->bits >= BASE64_BIT)
    247 					return (EINVAL);
    248 				st->mode = 0;
    249 				st->bits = st->cache = 0;
    250 				if (psenc->ch[i] != BASE64_OUT) {
    251 					if (!ISDIRECT(ei, (int)psenc->ch[i]))
    252 						goto ilseq;
    253 					*u16 = (uint16_t)psenc->ch[i];
    254 					done = 1;
    255 				}
    256 			} else {
    257 				st->cache = (st->cache << BASE64_BIT) | len;
    258 				switch (st->bits) {
    259 				case 0: case 2: case 4: case 6: case 8:
    260 					st->bits += BASE64_BIT;
    261 					break;
    262 				case 10: case 12: case 14:
    263 					st->bits -= (UTF16_BIT - BASE64_BIT);
    264 					*u16 = (st->cache >> st->bits)
    265 					    & UTF16_MAX;
    266 					CHECK_SRG(st, *u16, goto ilseq);
    267 					done = 1;
    268 					break;
    269 				default:
    270 					return (EINVAL);
    271 				}
    272 			}
    273 		}
    274 	}
    275 
    276 	if (st->chlen > i)
    277 		return (EINVAL);
    278 	st->chlen = 0;
    279 	*nresult = (size_t)((*u16 == 0) ? 0 : s0 - *s);
    280 	*s = s0;
    281 
    282 	return (0);
    283 
    284 ilseq:
    285 	*nresult = (size_t)-1;
    286 	return (EILSEQ);
    287 }
    288 
    289 static int
    290 _citrus_UTF7_mbrtowc_priv(_UTF7EncodingInfo * __restrict ei,
    291 	wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
    292 	_UTF7State * __restrict psenc, size_t * __restrict nresult)
    293 {
    294 	_UTF7StatePriv *st;
    295 	uint32_t u32;
    296 	uint16_t hi, lo;
    297 	size_t siz;
    298 	int err;
    299 
    300 	_DIAGASSERT(ei != NULL);
    301 	/* pwc may be null */
    302 	_DIAGASSERT(s != NULL);
    303 	_DIAGASSERT(psenc != NULL);
    304 
    305 	if (*s == NULL) {
    306 		_citrus_UTF7_init_state(ei, psenc);
    307 		*nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
    308 		return (0);
    309 	}
    310 
    311 	st = &psenc->state;
    312 	if (st->surrogate) {
    313 		hi = (st->cache >> 2) & UTF16_MAX;
    314 		if (hi >= HISRG_MIN && hi <= HISRG_MAX)
    315 			return (EINVAL);
    316 		siz = 0;
    317 	} else {
    318 		err = _mbtoutf16(ei, &hi, s, n, psenc, nresult);
    319 		if (err || *nresult == (size_t)-2)
    320 			return (err);
    321 		n -= *nresult;
    322 		siz = *nresult;
    323 	}
    324 	if (!st->surrogate) {
    325 		u32 = (uint32_t)hi;
    326 	} else {
    327 		err = _mbtoutf16(ei, &lo, s, n, psenc, nresult);
    328 		if (err || *nresult == (size_t)-2)
    329 			return (err);
    330 		_DIAGASSERT(!st->surrogate &&
    331 			lo >= LOSRG_MIN && lo <= LOSRG_MAX);
    332 		hi -= HISRG_MIN;
    333 		lo -= LOSRG_MIN;
    334 		u32 = (hi << 10 | lo) + SRG_BASE;
    335 		*nresult += siz;
    336 	}
    337 	if (pwc != NULL)
    338 		*pwc = (wchar_t)u32;
    339 
    340 	return (0);
    341 }
    342 
    343 static __inline int
    344 _utf16tomb(_UTF7EncodingInfo * __restrict ei,
    345 	uint16_t u16, _UTF7State * __restrict psenc)
    346 {
    347 	_UTF7StatePriv *st;
    348 	int bits, i;
    349 
    350 	_DIAGASSERT(ei != NULL);
    351 	_DIAGASSERT(psenc != NULL);
    352 
    353 	st = &psenc->state;
    354 	if (st->chlen != 0 || st->bits > BASE64_BIT)
    355 		return (EINVAL);
    356 	CHECK_SRG(st, u16, return (EILSEQ));
    357 
    358 	if (ISSAFE(ei, u16)) {
    359 		if (st->mode) {
    360 			if (st->bits > 0) {
    361 				bits = BASE64_BIT - st->bits;
    362 				i = (st->cache << bits) & BASE64_MAX;
    363 				psenc->ch[st->chlen++] = base64[i];
    364 				st->bits = st->cache = 0;
    365 			}
    366 			if (u16 == BASE64_OUT || FINDLEN(ei, u16) >= 0)
    367 				psenc->ch[st->chlen++] = BASE64_OUT;
    368 			st->mode = 0;
    369 		}
    370 		if (st->bits != 0)
    371 			return (EINVAL);
    372 		psenc->ch[st->chlen++] = (char)u16;
    373 		if (u16 == BASE64_IN)
    374 			psenc->ch[st->chlen++] = BASE64_OUT;
    375 	} else {
    376 		if (!st->mode) {
    377 			if (st->bits > 0)
    378 				return (EINVAL);
    379 			psenc->ch[st->chlen++] = BASE64_IN;
    380 			st->mode = 1;
    381 		}
    382 		st->cache = (st->cache << UTF16_BIT) | u16;
    383 		bits = UTF16_BIT + st->bits;
    384 		st->bits = bits % BASE64_BIT;
    385 		while ((bits -= BASE64_BIT) >= 0) {
    386 			i = (st->cache >> bits) & BASE64_MAX;
    387 			psenc->ch[st->chlen++] = base64[i];
    388 		}
    389 	}
    390 
    391 	return (0);
    392 }
    393 
    394 static int
    395 _citrus_UTF7_wcrtomb_priv(_UTF7EncodingInfo * __restrict ei,
    396 	char * __restrict s, size_t n, wchar_t wchar,
    397 	_UTF7State * __restrict psenc, size_t * __restrict nresult)
    398 {
    399 	_UTF7StatePriv sv, *st;
    400 	uint32_t u32;
    401 	uint16_t u16[2];
    402 	int err, len, i;
    403 	size_t nr;
    404 
    405 	_DIAGASSERT(ei != NULL);
    406 	_DIAGASSERT(s != NULL);
    407 	_DIAGASSERT(psenc != NULL);
    408 	_DIAGASSERT(*nresult != NULL);
    409 
    410 	u32 = (uint32_t)wchar;
    411 	if (u32 <= UTF16_MAX) {
    412 		u16[0] = (uint16_t)u32;
    413 		len = 1;
    414 	} else if (u32 <= UTF32_MAX) {
    415 		u32 -= SRG_BASE;
    416 		u16[0] = (u32 >> 10) + HISRG_MIN;
    417 		u16[1] = ((uint16_t)(u32 & UINT32_C(0x3ff))) + LOSRG_MIN;
    418 		len = 2;
    419 	} else {
    420 		*nresult = (size_t)-1;
    421 		return (EILSEQ);
    422 	}
    423 
    424 	sv = *(st = &psenc->state);
    425 	nr = 0;
    426 	for (i = 0; i < len; i++) {
    427 		err = _utf16tomb(ei, u16[i], psenc);
    428 		switch (err) {
    429 		case 0:
    430 			if (st->chlen <= n)
    431 				break;
    432 			*st = sv;
    433 			err = (E2BIG);
    434 		case EILSEQ:
    435 			*nresult = (size_t)-1;
    436 		/*FALLTHROUGH*/
    437 		default:
    438 			return (err);
    439 		}
    440 		n -= st->chlen;
    441 		memcpy(s, psenc->ch, st->chlen);
    442 		s += st->chlen;
    443 		nr += st->chlen;
    444 		st->chlen = 0;
    445 	}
    446 	*nresult = nr;
    447 
    448 	return (0);
    449 }
    450 
    451 static int
    452 /* ARGSUSED */
    453 _citrus_UTF7_put_state_reset(_UTF7EncodingInfo * __restrict ei,
    454 	char * __restrict s, size_t n, _UTF7State * __restrict psenc,
    455 	size_t * __restrict nresult)
    456 {
    457 	_UTF7StatePriv *st;
    458 	int bits, pos;
    459 
    460 	_DIAGASSERT(ei != NULL);
    461 	_DIAGASSERT(s != NULL);
    462 	_DIAGASSERT(psenc != NULL);
    463 	_DIAGASSERT(nresult != NULL);
    464 
    465 	st = &psenc->state;
    466 	if (st->chlen != 0 || st->bits > BASE64_BIT || st->surrogate)
    467 		return (EINVAL);
    468 
    469 	if (st->mode) {
    470 		if (st->bits > 0) {
    471 			if (n-- < 1)
    472 				return (E2BIG);
    473 			bits = BASE64_BIT - st->bits;
    474 			pos = (st->cache << bits) & BASE64_MAX;
    475 			psenc->ch[st->chlen++] = base64[pos];
    476 			psenc->ch[st->chlen++] = BASE64_OUT;
    477 			st->bits = st->cache = 0;
    478 		}
    479 		st->mode = 0;
    480 	}
    481 	if (st->bits != 0)
    482 		return (EINVAL);
    483 	if (n-- < 1)
    484 		return (E2BIG);
    485 
    486 	_DIAGASSERT(n >= st->chlen);
    487 	*nresult = (size_t)st->chlen;
    488 	if (st->chlen > 0) {
    489 		memcpy(s, psenc->ch, st->chlen);
    490 		st->chlen = 0;
    491 	}
    492 
    493 	return (0);
    494 }
    495 
    496 static __inline int
    497 /*ARGSUSED*/
    498 _citrus_UTF7_stdenc_wctocs(_UTF7EncodingInfo * __restrict ei,
    499 			   _csid_t * __restrict csid,
    500 			   _index_t * __restrict idx, wchar_t wc)
    501 {
    502 	/* ei seem to be unused */
    503 	_DIAGASSERT(csid != NULL);
    504 	_DIAGASSERT(idx != NULL);
    505 
    506 	*csid = 0;
    507 	*idx = (_index_t)wc;
    508 
    509 	return (0);
    510 }
    511 
    512 static __inline int
    513 /*ARGSUSED*/
    514 _citrus_UTF7_stdenc_cstowc(_UTF7EncodingInfo * __restrict ei,
    515 			   wchar_t * __restrict wc,
    516 			   _csid_t csid, _index_t idx)
    517 {
    518 	/* ei seem to be unused */
    519 	_DIAGASSERT(wc != NULL);
    520 
    521 	if (csid != 0)
    522 		return (EILSEQ);
    523 	*wc = (wchar_t)idx;
    524 
    525 	return (0);
    526 }
    527 
    528 static void
    529 /*ARGSUSED*/
    530 _citrus_UTF7_encoding_module_uninit(_UTF7EncodingInfo *ei)
    531 {
    532 	/* ei seems to be unused */
    533 }
    534 
    535 
    536 static int
    537 /*ARGSUSED*/
    538 _citrus_UTF7_encoding_module_init(_UTF7EncodingInfo * __restrict ei,
    539 				  const void * __restrict var, size_t lenvar)
    540 {
    541 	const char *s;
    542 
    543 	_DIAGASSERT(ei != NULL);
    544 	/* var may be null */
    545 
    546 	memset(ei, 0, sizeof(*ei));
    547 
    548 #define FILL(str, flag)				\
    549 do {						\
    550 	for (s = str; *s != '\0'; s++)		\
    551 		ei->cell[*s & 0x7f] |= flag;	\
    552 } while (/*CONSTCOND*/0)
    553 
    554 	FILL(base64, (s - base64) + 1);
    555 	FILL(direct, EI_DIRECT);
    556 	FILL(option, EI_OPTION);
    557 	FILL(spaces, EI_SPACE);
    558 
    559 	return (0);
    560 }
    561 
    562 /* ----------------------------------------------------------------------
    563  * public interface for ctype
    564  */
    565 
    566 _CITRUS_CTYPE_DECLS(UTF7);
    567 _CITRUS_CTYPE_DEF_OPS(UTF7);
    568 
    569 #include "citrus_ctype_template.h"
    570 
    571 /* ----------------------------------------------------------------------
    572  * public interface for stdenc
    573  */
    574 
    575 _CITRUS_STDENC_DECLS(UTF7);
    576 _CITRUS_STDENC_DEF_OPS(UTF7);
    577 
    578 #include "citrus_stdenc_template.h"
    579