t_mbrtoc16.c revision 1.3 1 /* $NetBSD: t_mbrtoc16.c,v 1.3 2024/08/20 17:43:09 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2002 Tim J. Robbins
5 * All rights reserved.
6 *
7 * Copyright (c) 2013 Ed Schouten <ed (at) FreeBSD.org>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31 /*
32 * Test program for mbrtoc16() as specified by ISO/IEC 9899:2011.
33 */
34
35 #include <sys/cdefs.h>
36 __RCSID("$NetBSD: t_mbrtoc16.c,v 1.3 2024/08/20 17:43:09 riastradh Exp $");
37
38 #include <errno.h>
39 #include <inttypes.h>
40 #include <limits.h>
41 #include <locale.h>
42 #include <string.h>
43 #include <uchar.h>
44
45 #include <atf-c.h>
46
47 static void
48 require_lc_ctype(const char *locale_name)
49 {
50 char *lc_ctype_set;
51
52 lc_ctype_set = setlocale(LC_CTYPE, locale_name);
53 if (lc_ctype_set == NULL)
54 atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
55 locale_name, errno);
56
57 ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
58 "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name);
59 }
60
61 static mbstate_t s;
62 static char16_t c16;
63
64 ATF_TC_WITHOUT_HEAD(mbrtoc16_c_locale_test);
65 ATF_TC_BODY(mbrtoc16_c_locale_test, tc)
66 {
67 size_t n;
68
69 require_lc_ctype("C");
70
71 /* Null wide character, internal state. */
72 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n);
73 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
74
75 /* Null wide character. */
76 memset(&s, 0, sizeof(s));
77 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n);
78 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
79
80 /* Latin letter A, internal state. */
81 ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
82 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n);
83 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
84 (uint16_t)c16, (uint16_t)L'A');
85
86 /* Latin letter A. */
87 memset(&s, 0, sizeof(s));
88 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n);
89 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
90 (uint16_t)c16, (uint16_t)L'A');
91
92 /* Incomplete character sequence. */
93 c16 = L'z';
94 memset(&s, 0, sizeof(s));
95 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
96 "n=%zu", n);
97 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
98 (uint16_t)c16, (uint16_t)L'z');
99
100 /* Check that mbrtoc16() doesn't access the buffer when n == 0. */
101 c16 = L'z';
102 memset(&s, 0, sizeof(s));
103 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
104 "n=%zu", n);
105 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
106 (uint16_t)c16, (uint16_t)L'z');
107
108 /* Check that mbrtoc16() doesn't read ahead too aggressively. */
109 memset(&s, 0, sizeof(s));
110 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "AB", 2, &s)), 1, "n=%zu", n);
111 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
112 (uint16_t)c16, (uint16_t)L'A');
113 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "C", 1, &s)), 1, "n=%zu", n);
114 ATF_CHECK_EQ_MSG(c16, L'C', "c16=U+%"PRIx16" L'C'=U+%"PRIx16,
115 (uint16_t)c16, (uint16_t)L'C');
116 }
117
118 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso2022jp_locale_test);
119 ATF_TC_BODY(mbrtoc16_iso2022jp_locale_test, tc)
120 {
121 size_t n;
122
123 require_lc_ctype("ja_JP.ISO-2022-JP");
124
125 /* Null wide character, internal state. */
126 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n);
127 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
128
129 /* Null wide character. */
130 memset(&s, 0, sizeof(s));
131 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n);
132 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
133
134 /* Latin letter A, internal state. */
135 ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
136 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n);
137 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16" L'A'=U+%04"PRIx16,
138 (uint16_t)c16, (uint16_t)L'A');
139
140 /* Latin letter A. */
141 memset(&s, 0, sizeof(s));
142 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n);
143 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16" L'A'=U+%04"PRIx16,
144 (uint16_t)c16, (uint16_t)L'A');
145
146 /* Incomplete character sequence. */
147 c16 = L'z';
148 memset(&s, 0, sizeof(s));
149 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
150 "n=%zu", n);
151 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%04"PRIx16" L'z'=U+%04"PRIx16,
152 (uint16_t)c16, (uint16_t)L'z');
153
154 /* Check that mbrtoc16() doesn't access the buffer when n == 0. */
155 c16 = L'z';
156 memset(&s, 0, sizeof(s));
157 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
158 "n=%zu", n);
159 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%04"PRIx16" L'z'=U+%04"PRIx16,
160 (uint16_t)c16, (uint16_t)L'z');
161
162 /* Check that mbrtoc16() doesn't read ahead too aggressively. */
163 memset(&s, 0, sizeof(s));
164 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "AB", 2, &s)), 1, "n=%zu", n);
165 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16" L'A'=U+%04"PRIx16,
166 (uint16_t)c16, (uint16_t)L'A');
167 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "C", 1, &s)), 1, "n=%zu", n);
168 ATF_CHECK_EQ_MSG(c16, L'C', "c16=U+%04"PRIx16" L'C'=U+%04"PRIx16,
169 (uint16_t)c16, (uint16_t)L'C');
170
171 /* Incomplete character sequence (shift sequence only). */
172 memset(&s, 0, sizeof(s));
173 c16 = 0;
174 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(J", 3, &s)), (size_t)-2,
175 "n=%zu", n);
176 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
177
178 /* Same as above, but complete (U+00A5 YEN SIGN). */
179 memset(&s, 0, sizeof(s));
180 c16 = 0;
181 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(J\x5c", 4, &s)), 4,
182 "n=%zu", n);
183 ATF_CHECK_EQ_MSG(c16, 0xa5, "c16=U+%04"PRIx16, (uint16_t)c16);
184
185 /* Test restarting behaviour. */
186 memset(&s, 0, sizeof(s));
187 c16 = 0;
188 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(", 2, &s)), (size_t)-2,
189 "n=%zu", n);
190 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
191 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "J\x5c", 2, &s)), 2, "n=%zu", n);
192 ATF_CHECK_EQ_MSG(c16, 0xa5, "c16=U+%04"PRIx16, (uint16_t)c16);
193
194 /*
195 * Test shift sequence state in various increments:
196 * 1. U+0042 LATIN CAPITAL LETTER A
197 * 2. (shift ISO/IEC 646:JP) U+00A5 YEN SIGN
198 * 3. U+00A5 YEN SIGN
199 * 4. (shift JIS X 0208) U+30A2 KATAKANA LETTER A
200 * 5. U+30A2 KATAKANA LETTER A
201 * 6. (shift to initial state) U+0000 NUL
202 */
203 memset(&s, 0, sizeof(s));
204 c16 = 0;
205 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A\x1b(J", 4, &s)), 1,
206 "n=%zu", n);
207 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16, (uint16_t)c16);
208 c16 = 0;
209 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(J", 3, &s)), (size_t)-2,
210 "n=%zu", n);
211 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
212 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x5c\x5c", 2, &s)), 1,
213 "n=%zu", n);
214 ATF_CHECK_EQ_MSG(c16, 0x00a5, "c16=U+%04"PRIx16, (uint16_t)c16);
215 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x5c\x1b$", 3, &s)), 1,
216 "n=%zu", n);
217 ATF_CHECK_EQ_MSG(c16, 0x00a5, "c16=U+%04"PRIx16, (uint16_t)c16);
218 c16 = 0x1234;
219 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b", 1, &s)), (size_t)-2,
220 "n=%zu", n);
221 ATF_CHECK_EQ_MSG(c16, 0x1234, "c16=U+%04"PRIx16, (uint16_t)c16);
222 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "$B\x25\x22", 4, &s)), 4,
223 "n=%zu", n);
224 ATF_CHECK_EQ_MSG(c16, 0x30a2, "c16=U+%04"PRIx16, (uint16_t)c16);
225 c16 = 0;
226 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x25", 1, &s)), (size_t)-2,
227 "n=%zu", n);
228 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
229 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x22\x1b(B\x00", 5, &s)), 1,
230 "n=%zu", n);
231 ATF_CHECK_EQ_MSG(c16, 0x30a2, "c16=U+%04"PRIx16, (uint16_t)c16);
232 c16 = 0;
233 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(", 2, &s)), (size_t)-2,
234 "n=%zu", n);
235 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
236 c16 = 42;
237 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "B\x00", 2, &s)), 0, "n=%zu", n);
238 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
239 }
240
241 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_1_test);
242 ATF_TC_BODY(mbrtoc16_iso_8859_1_test, tc)
243 {
244 size_t n;
245
246 require_lc_ctype("en_US.ISO8859-1");
247
248 /* Currency sign. */
249 memset(&s, 0, sizeof(s));
250 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n);
251 ATF_CHECK_EQ_MSG(c16, 0xa4, "c16=U+%"PRIx16, (uint16_t)c16);
252 }
253
254 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_15_test);
255 ATF_TC_BODY(mbrtoc16_iso_8859_15_test, tc)
256 {
257 size_t n;
258
259 require_lc_ctype("en_US.ISO8859-15");
260
261 /* Euro sign. */
262 memset(&s, 0, sizeof(s));
263 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n);
264 ATF_CHECK_EQ_MSG(c16, 0x20ac, "c16=U+%"PRIx16, (uint16_t)c16);
265 }
266
267 ATF_TC_WITHOUT_HEAD(mbrtoc16_utf_8_test);
268 ATF_TC_BODY(mbrtoc16_utf_8_test, tc)
269 {
270 size_t n;
271
272 require_lc_ctype("en_US.UTF-8");
273
274 /* Null wide character, internal state. */
275 ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
276 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n);
277 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
278
279 /* Null wide character. */
280 memset(&s, 0, sizeof(s));
281 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n);
282 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
283
284 /* Latin letter A, internal state. */
285 ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
286 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n);
287 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
288 (uint16_t)c16, (uint16_t)L'A');
289
290 /* Latin letter A. */
291 memset(&s, 0, sizeof(s));
292 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n);
293 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
294 (uint16_t)c16, (uint16_t)L'A');
295
296 /* Incomplete character sequence (zero length). */
297 c16 = L'z';
298 memset(&s, 0, sizeof(s));
299 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
300 "n=%zu", n);
301 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
302 (uint16_t)c16, (uint16_t)L'z');
303
304 /* Incomplete character sequence (truncated double-byte). */
305 memset(&s, 0, sizeof(s));
306 c16 = 0;
307 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2,
308 "n=%zu", n);
309
310 /* Same as above, but complete. */
311 memset(&s, 0, sizeof(s));
312 c16 = 0;
313 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\x84", 2, &s)), 2,
314 "n=%zu", n);
315 ATF_CHECK_EQ_MSG(c16, 0xc4, "c16=U+%"PRIx16, (uint16_t)c16);
316
317 /* Test restarting behaviour. */
318 memset(&s, 0, sizeof(s));
319 c16 = 0;
320 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2,
321 "n=%zu", n);
322 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
323 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xb7", 1, &s)), 1, "n=%zu", n);
324 ATF_CHECK_EQ_MSG(c16, 0xf7, "c16=U+%"PRIx16, (uint16_t)c16);
325
326 /* Surrogate pair. */
327 memset(&s, 0, sizeof(s));
328 c16 = 0;
329 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xf0\x9f\x92\xa9", 4, &s)), 4,
330 "n=%zu", n);
331 ATF_CHECK_EQ_MSG(c16, 0xd83d, "c16=U+%"PRIx16, (uint16_t)c16);
332 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-3,
333 "n=%zu", n);
334 ATF_CHECK_EQ_MSG(c16, 0xdca9, "c16=U+%"PRIx16, (uint16_t)c16);
335
336 /* Letter e with acute, precomposed. */
337 memset(&s, 0, sizeof(s));
338 c16 = 0;
339 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\xa9", 2, &s)), 2,
340 "n=%zu", n);
341 ATF_CHECK_EQ_MSG(c16, 0xe9, "c16=U+%"PRIx16, (uint16_t)c16);
342
343 /* Letter e with acute, combined. */
344 memset(&s, 0, sizeof(s));
345 c16 = 0;
346 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x65\xcc\x81", 3, &s)), 1,
347 "n=%zu", n);
348 ATF_CHECK_EQ_MSG(c16, 0x65, "c16=U+%"PRIx16, (uint16_t)c16);
349 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xcc\x81", 2, &s)), 2,
350 "n=%zu", n);
351 ATF_CHECK_EQ_MSG(c16, 0x301, "c16=U+%"PRIx16, (uint16_t)c16);
352 }
353
354 ATF_TP_ADD_TCS(tp)
355 {
356
357 ATF_TP_ADD_TC(tp, mbrtoc16_c_locale_test);
358 ATF_TP_ADD_TC(tp, mbrtoc16_iso2022jp_locale_test);
359 ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_1_test);
360 ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_15_test);
361 ATF_TP_ADD_TC(tp, mbrtoc16_utf_8_test);
362
363 return (atf_no_error());
364 }
365