t_mbrtoc16.c revision 1.1 1 /* $NetBSD: t_mbrtoc16.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2002 Tim J. Robbins
5 * All rights reserved.
6 *
7 * Copyright (c) 2013 Ed Schouten <ed (at) FreeBSD.org>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31 /*
32 * Test program for mbrtoc16() as specified by ISO/IEC 9899:2011.
33 */
34
35 #include <sys/cdefs.h>
36 __RCSID("$NetBSD: t_mbrtoc16.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $");
37
38 #include <errno.h>
39 #include <inttypes.h>
40 #include <limits.h>
41 #include <locale.h>
42 #include <string.h>
43 #include <uchar.h>
44
45 #include <atf-c.h>
46
47 static void
48 require_lc_ctype(const char *locale_name)
49 {
50 char *lc_ctype_set;
51
52 lc_ctype_set = setlocale(LC_CTYPE, locale_name);
53 if (lc_ctype_set == NULL)
54 atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
55 locale_name, errno);
56
57 ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
58 "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name);
59 }
60
61 static mbstate_t s;
62 static char16_t c16;
63
64 ATF_TC_WITHOUT_HEAD(mbrtoc16_c_locale_test);
65 ATF_TC_BODY(mbrtoc16_c_locale_test, tc)
66 {
67 size_t n;
68
69 require_lc_ctype("C");
70
71 /* Null wide character, internal state. */
72 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n);
73 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
74
75 /* Null wide character. */
76 memset(&s, 0, sizeof(s));
77 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n);
78 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
79
80 /* Latin letter A, internal state. */
81 ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
82 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n);
83 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
84 (uint16_t)c16, (uint16_t)L'A');
85
86 /* Latin letter A. */
87 memset(&s, 0, sizeof(s));
88 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n);
89 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
90 (uint16_t)c16, (uint16_t)L'A');
91
92 /* Incomplete character sequence. */
93 c16 = L'z';
94 memset(&s, 0, sizeof(s));
95 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
96 "n=%zu", n);
97 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
98 (uint16_t)c16, (uint16_t)L'z');
99
100 /* Check that mbrtoc16() doesn't access the buffer when n == 0. */
101 c16 = L'z';
102 memset(&s, 0, sizeof(s));
103 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
104 "n=%zu", n);
105 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
106 (uint16_t)c16, (uint16_t)L'z');
107
108 /* Check that mbrtoc16() doesn't read ahead too aggressively. */
109 memset(&s, 0, sizeof(s));
110 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "AB", 2, &s)), 1, "n=%zu", n);
111 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
112 (uint16_t)c16, (uint16_t)L'A');
113 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "C", 1, &s)), 1, "n=%zu", n);
114 ATF_CHECK_EQ_MSG(c16, L'C', "c16=U+%"PRIx16" L'C'=U+%"PRIx16,
115 (uint16_t)c16, (uint16_t)L'C');
116
117 }
118
119 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_1_test);
120 ATF_TC_BODY(mbrtoc16_iso_8859_1_test, tc)
121 {
122 size_t n;
123
124 require_lc_ctype("en_US.ISO8859-1");
125
126 /* Currency sign. */
127 memset(&s, 0, sizeof(s));
128 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n);
129 ATF_CHECK_EQ_MSG(c16, 0xa4, "c16=U+%"PRIx16, (uint16_t)c16);
130 }
131
132 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_15_test);
133 ATF_TC_BODY(mbrtoc16_iso_8859_15_test, tc)
134 {
135 size_t n;
136
137 require_lc_ctype("en_US.ISO8859-15");
138
139 /* Euro sign. */
140 memset(&s, 0, sizeof(s));
141 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n);
142 ATF_CHECK_EQ_MSG(c16, 0x20ac, "c16=U+%"PRIx16, (uint16_t)c16);
143 }
144
145 ATF_TC_WITHOUT_HEAD(mbrtoc16_utf_8_test);
146 ATF_TC_BODY(mbrtoc16_utf_8_test, tc)
147 {
148 size_t n;
149
150 require_lc_ctype("en_US.UTF-8");
151
152 /* Null wide character, internal state. */
153 ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
154 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n);
155 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
156
157 /* Null wide character. */
158 memset(&s, 0, sizeof(s));
159 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n);
160 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
161
162 /* Latin letter A, internal state. */
163 ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
164 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n);
165 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
166 (uint16_t)c16, (uint16_t)L'A');
167
168 /* Latin letter A. */
169 memset(&s, 0, sizeof(s));
170 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n);
171 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
172 (uint16_t)c16, (uint16_t)L'A');
173
174 /* Incomplete character sequence (zero length). */
175 c16 = L'z';
176 memset(&s, 0, sizeof(s));
177 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
178 "n=%zu", n);
179 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
180 (uint16_t)c16, (uint16_t)L'z');
181
182 /* Incomplete character sequence (truncated double-byte). */
183 memset(&s, 0, sizeof(s));
184 c16 = 0;
185 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2,
186 "n=%zu", n);
187
188 /* Same as above, but complete. */
189 memset(&s, 0, sizeof(s));
190 c16 = 0;
191 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\x84", 2, &s)), 2,
192 "n=%zu", n);
193 ATF_CHECK_EQ_MSG(c16, 0xc4, "c16=U+%"PRIx16, (uint16_t)c16);
194
195 /* Test restarting behaviour. */
196 memset(&s, 0, sizeof(s));
197 c16 = 0;
198 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2,
199 "n=%zu", n);
200 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
201 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xb7", 1, &s)), 1, "n=%zu", n);
202 ATF_CHECK_EQ_MSG(c16, 0xf7, "c16=U+%"PRIx16, (uint16_t)c16);
203
204 /* Surrogate pair. */
205 memset(&s, 0, sizeof(s));
206 c16 = 0;
207 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xf0\x9f\x92\xa9", 4, &s)), 4,
208 "n=%zu", n);
209 ATF_CHECK_EQ_MSG(c16, 0xd83d, "c16=U+%"PRIx16, (uint16_t)c16);
210 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-3,
211 "n=%zu", n);
212 ATF_CHECK_EQ_MSG(c16, 0xdca9, "c16=U+%"PRIx16, (uint16_t)c16);
213
214 /* Letter e with acute, precomposed. */
215 memset(&s, 0, sizeof(s));
216 c16 = 0;
217 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\xa9", 2, &s)), 2,
218 "n=%zu", n);
219 ATF_CHECK_EQ_MSG(c16, 0xe9, "c16=U+%"PRIx16, (uint16_t)c16);
220
221 /* Letter e with acute, combined. */
222 memset(&s, 0, sizeof(s));
223 c16 = 0;
224 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x65\xcc\x81", 3, &s)), 1,
225 "n=%zu", n);
226 ATF_CHECK_EQ_MSG(c16, 0x65, "c16=U+%"PRIx16, (uint16_t)c16);
227 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xcc\x81", 2, &s)), 2,
228 "n=%zu", n);
229 ATF_CHECK_EQ_MSG(c16, 0x301, "c16=U+%"PRIx16, (uint16_t)c16);
230 }
231
232 ATF_TP_ADD_TCS(tp)
233 {
234
235 ATF_TP_ADD_TC(tp, mbrtoc16_c_locale_test);
236 ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_1_test);
237 ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_15_test);
238 ATF_TP_ADD_TC(tp, mbrtoc16_utf_8_test);
239
240 return (atf_no_error());
241 }
242