Home | History | Annotate | Line # | Download | only in locale
      1 /*	$NetBSD: t_c16rtomb.c,v 1.6 2024/08/19 16:22:10 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2002 Tim J. Robbins
      5  * All rights reserved.
      6  *
      7  * Copyright (c) 2013 Ed Schouten <ed (at) FreeBSD.org>
      8  * All rights reserved.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 /*
     32  * Test program for c16rtomb() as specified by ISO/IEC 9899:2011.
     33  */
     34 
     35 #include <sys/cdefs.h>
     36 __RCSID("$NetBSD: t_c16rtomb.c,v 1.6 2024/08/19 16:22:10 riastradh Exp $");
     37 
     38 #include <errno.h>
     39 #include <limits.h>
     40 #include <locale.h>
     41 #include <stdio.h>
     42 #include <string.h>
     43 #include <uchar.h>
     44 
     45 #include <atf-c.h>
     46 
     47 static void
     48 require_lc_ctype(const char *locale_name)
     49 {
     50 	char *lc_ctype_set;
     51 
     52 	lc_ctype_set = setlocale(LC_CTYPE, locale_name);
     53 	if (lc_ctype_set == NULL)
     54 		atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
     55 		    locale_name, errno);
     56 
     57 	ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
     58 	    "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name);
     59 }
     60 
     61 static mbstate_t s;
     62 static char buf[7*MB_LEN_MAX + 1];
     63 
     64 ATF_TC_WITHOUT_HEAD(c16rtomb_c_locale_test);
     65 ATF_TC_BODY(c16rtomb_c_locale_test, tc)
     66 {
     67 	size_t n;
     68 
     69 	require_lc_ctype("C");
     70 
     71 	/*
     72 	 * If the buffer argument is NULL, c16 is implicitly 0,
     73 	 * c16rtomb() resets its internal state.
     74 	 */
     75 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
     76 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, 0xdc00, NULL)), 1, "n=%zu", n);
     77 
     78 	/* Null wide character. */
     79 	memset(&s, 0, sizeof(s));
     80 	memset(buf, 0xcc, sizeof(buf));
     81 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0, &s)), 1, "n=%zu", n);
     82 	ATF_CHECK_MSG(((unsigned char)buf[0] == 0 &&
     83 		(unsigned char)buf[1] == 0xcc),
     84 	    "buf=[%02x %02x]", buf[0], buf[1]);
     85 
     86 	/* Latin letter A, internal state. */
     87 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
     88 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'A', NULL)), 1, "n=%zu", n);
     89 
     90 	/* Latin letter A. */
     91 	memset(&s, 0, sizeof(s));
     92 	memset(buf, 0xcc, sizeof(buf));
     93 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'A', &s)), 1, "n=%zu", n);
     94 	ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' &&
     95 		(unsigned char)buf[1] == 0xcc),
     96 	    "buf=[%02x %02x]", buf[0], buf[1]);
     97 
     98 	/* Unicode character 'Pile of poo'. */
     99 	memset(&s, 0, sizeof(s));
    100 	memset(buf, 0xcc, sizeof(buf));
    101 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
    102 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), (size_t)-1,
    103 	    "n=%zu", n);
    104 	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
    105 	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
    106 
    107 	/* Incomplete Unicode character 'Pile of poo', interrupted by NUL. */
    108 	memset(&s, 0, sizeof(s));
    109 	memset(buf, 0xcc, sizeof(buf));
    110 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
    111 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'\0', &s)), 1, "n=%zu", n);
    112 	ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
    113 		(unsigned char)buf[1] == 0xcc),
    114 	    "buf=[%02x %02x]", buf[0], buf[1]);
    115 }
    116 
    117 ATF_TC_WITHOUT_HEAD(c16rtomb_iso2022jp_locale_test);
    118 ATF_TC_BODY(c16rtomb_iso2022jp_locale_test, tc)
    119 {
    120 	char *p;
    121 	size_t n;
    122 
    123 	require_lc_ctype("ja_JP.ISO-2022-JP");
    124 
    125 	/*
    126 	 * If the buffer argument is NULL, c16 is implicitly 0,
    127 	 * c16rtomb() resets its internal state.
    128 	 */
    129 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
    130 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, 0xdc00, NULL)), 1, "n=%zu", n);
    131 
    132 	/* Null wide character. */
    133 	memset(&s, 0, sizeof(s));
    134 	memset(buf, 0xcc, sizeof(buf));
    135 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0, &s)), 1, "n=%zu", n);
    136 	ATF_CHECK_MSG(((unsigned char)buf[0] == 0 &&
    137 		(unsigned char)buf[1] == 0xcc),
    138 	    "buf=[%02x %02x]", buf[0], buf[1]);
    139 
    140 	/* Latin letter A, internal state. */
    141 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
    142 	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'A', NULL)), 1, "n=%zu", n);
    143 
    144 	/*
    145 	 * 1. U+0042 LATIN CAPITAL LETTER A
    146 	 * 2. U+00A5 YEN SIGN
    147 	 * 3. U+00A5 YEN SIGN (again, no shift needed)
    148 	 * 4. U+30A2 KATAKANA LETTER A
    149 	 * 5. U+30A2 KATAKANA LETTER A (again, no shift needed)
    150 	 * 6. incomplete UTF-16 surrogate pair -- no output
    151 	 * 7. U+0000 NUL (plus shift sequence to initial state)
    152 	 */
    153 	memset(&s, 0, sizeof(s));
    154 	memset(buf, 0xcc, sizeof(buf));
    155 	p = buf;
    156 	ATF_CHECK_EQ_MSG((n = c16rtomb(p, L'A', &s)), 1, "n=%zu", n); /* 1 */
    157 	p += 1;
    158 	ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xa5, &s)), 4, "n=%zu", n); /* 2 */
    159 	p += 4;
    160 	ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xa5, &s)), 1, "n=%zu", n); /* 3 */
    161 	p += 1;
    162 	ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0x30a2, &s)), 5, "n=%zu", n); /* 4 */
    163 	p += 5;
    164 	ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0x30a2, &s)), 2, "n=%zu", n); /* 5 */
    165 	p += 2;
    166 	ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xd800, &s)), 0, "n=%zu", n); /* 6 */
    167 	ATF_CHECK_EQ_MSG((n = c16rtomb(p, L'\0', &s)), 4, "n=%zu", n); /* 7 */
    168 	p += 4;
    169 	ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' &&
    170 		(unsigned char)buf[1] == 0x1b && /* shift ISO/IEC 646:JP */
    171 		(unsigned char)buf[2] == '(' &&
    172 		(unsigned char)buf[3] == 'J' &&
    173 		(unsigned char)buf[4] == 0x5c && /* YEN SIGN */
    174 		(unsigned char)buf[5] == 0x5c && /* YEN SIGN */
    175 		(unsigned char)buf[6] == 0x1b && /* shift JIS X 0208 */
    176 		(unsigned char)buf[7] == '$' &&
    177 		(unsigned char)buf[8] == 'B' &&
    178 		(unsigned char)buf[9] == 0x25 && /* KATAKANA LETTER A */
    179 		(unsigned char)buf[10] == 0x22 &&
    180 		(unsigned char)buf[11] == 0x25 && /* KATAKANA LETTER A */
    181 		(unsigned char)buf[12] == 0x22 &&
    182 		(unsigned char)buf[13] == 0x1b && /* shift US-ASCII */
    183 		(unsigned char)buf[14] == '(' &&
    184 		(unsigned char)buf[15] == 'B' &&
    185 		(unsigned char)buf[16] == '\0' &&
    186 		(unsigned char)buf[17] == 0xcc),
    187 	    "buf=[%02x %02x %02x %02x  %02x %02x %02x %02x "
    188 	    " %02x %02x %02x %02x  %02x %02x %02x %02x "
    189 	    " %02x %02x]",
    190 	    buf[0], buf[1], buf[2], buf[3],
    191 	    buf[4], buf[5], buf[6], buf[7],
    192 	    buf[8], buf[9], buf[10], buf[11],
    193 	    buf[12], buf[13], buf[14], buf[15],
    194 	    buf[16], buf[17]);
    195 }
    196 
    197 ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_1_test);
    198 ATF_TC_BODY(c16rtomb_iso_8859_1_test, tc)
    199 {
    200 	size_t n;
    201 
    202 	require_lc_ctype("en_US.ISO8859-1");
    203 
    204 	/* Unicode character 'Euro sign'. */
    205 	memset(&s, 0, sizeof(s));
    206 	memset(buf, 0xcc, sizeof(buf));
    207 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0x20ac, &s)), (size_t)-1,
    208 	    "n=%zu", n);
    209 	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
    210 	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
    211 }
    212 
    213 ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_15_test);
    214 ATF_TC_BODY(c16rtomb_iso_8859_15_test, tc)
    215 {
    216 	size_t n;
    217 
    218 	require_lc_ctype("en_US.ISO8859-15");
    219 
    220 	/* Unicode character 'Euro sign'. */
    221 	memset(&s, 0, sizeof(s));
    222 	memset(buf, 0xcc, sizeof(buf));
    223 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0x20ac, &s)), 1, "n=%zu", n);
    224 	ATF_CHECK_MSG(((unsigned char)buf[0] == 0xa4 &&
    225 		(unsigned char)buf[1] == 0xcc),
    226 	    "buf=[%02x %02x]", buf[0], buf[1]);
    227 }
    228 
    229 ATF_TC_WITHOUT_HEAD(c16rtomb_utf_8_test);
    230 ATF_TC_BODY(c16rtomb_utf_8_test, tc)
    231 {
    232 	size_t n;
    233 
    234 	require_lc_ctype("en_US.UTF-8");
    235 
    236 	/* Unicode character 'Pile of poo'. */
    237 	memset(&s, 0, sizeof(s));
    238 	memset(buf, 0xcc, sizeof(buf));
    239 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
    240 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), 4, "n=%zu", n);
    241 	ATF_CHECK_MSG(((unsigned char)buf[0] == 0xf0 &&
    242 		(unsigned char)buf[1] == 0x9f &&
    243 		(unsigned char)buf[2] == 0x92 &&
    244 		(unsigned char)buf[3] == 0xa9 &&
    245 		(unsigned char)buf[4] == 0xcc),
    246 	    "buf=[%02x %02x %02x %02x %02x]",
    247 	    buf[0], buf[1], buf[2], buf[3], buf[4]);
    248 
    249 	/* Invalid code; 'Pile of poo' without the trail surrogate. */
    250 	memset(&s, 0, sizeof(s));
    251 	memset(buf, 0xcc, sizeof(buf));
    252 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
    253 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'A', &s)), (size_t)-1,
    254 	    "n=%zu", n);
    255 	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
    256 	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
    257 
    258 	/* Invalid code; 'Pile of poo' without the lead surrogate. */
    259 	memset(&s, 0, sizeof(s));
    260 	memset(buf, 0xcc, sizeof(buf));
    261 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), (size_t)-1,
    262 	    "n=%zu", n);
    263 	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
    264 	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
    265 
    266 	/* Incomplete Unicode character 'Pile of poo', interrupted by NUL. */
    267 	memset(&s, 0, sizeof(s));
    268 	memset(buf, 0xcc, sizeof(buf));
    269 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
    270 	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'\0', &s)), 1,
    271 	    "n=%zu", n);
    272 	ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
    273 		(unsigned char)buf[1] == 0xcc),
    274 	    "buf=[%02x %02x]", buf[0], buf[1]);
    275 }
    276 
    277 ATF_TP_ADD_TCS(tp)
    278 {
    279 
    280 	ATF_TP_ADD_TC(tp, c16rtomb_c_locale_test);
    281 	ATF_TP_ADD_TC(tp, c16rtomb_iso2022jp_locale_test);
    282 	ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_1_test);
    283 	ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_15_test);
    284 	ATF_TP_ADD_TC(tp, c16rtomb_utf_8_test);
    285 
    286 	return (atf_no_error());
    287 }
    288