t_mbrtoc16.c revision 1.2 1 /* $NetBSD: t_mbrtoc16.c,v 1.2 2024/08/19 16:24:05 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2002 Tim J. Robbins
5 * All rights reserved.
6 *
7 * Copyright (c) 2013 Ed Schouten <ed (at) FreeBSD.org>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31 /*
32 * Test program for mbrtoc16() as specified by ISO/IEC 9899:2011.
33 */
34
35 #include <sys/cdefs.h>
36 __RCSID("$NetBSD: t_mbrtoc16.c,v 1.2 2024/08/19 16:24:05 riastradh Exp $");
37
38 #include <errno.h>
39 #include <inttypes.h>
40 #include <limits.h>
41 #include <locale.h>
42 #include <string.h>
43 #include <uchar.h>
44
45 #include <atf-c.h>
46
47 static void
48 require_lc_ctype(const char *locale_name)
49 {
50 char *lc_ctype_set;
51
52 lc_ctype_set = setlocale(LC_CTYPE, locale_name);
53 if (lc_ctype_set == NULL)
54 atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
55 locale_name, errno);
56
57 ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
58 "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name);
59 }
60
61 static mbstate_t s;
62 static char16_t c16;
63
64 ATF_TC_WITHOUT_HEAD(mbrtoc16_c_locale_test);
65 ATF_TC_BODY(mbrtoc16_c_locale_test, tc)
66 {
67 size_t n;
68
69 require_lc_ctype("C");
70
71 /* Null wide character, internal state. */
72 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n);
73 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
74
75 /* Null wide character. */
76 memset(&s, 0, sizeof(s));
77 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n);
78 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
79
80 /* Latin letter A, internal state. */
81 ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
82 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n);
83 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
84 (uint16_t)c16, (uint16_t)L'A');
85
86 /* Latin letter A. */
87 memset(&s, 0, sizeof(s));
88 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n);
89 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
90 (uint16_t)c16, (uint16_t)L'A');
91
92 /* Incomplete character sequence. */
93 c16 = L'z';
94 memset(&s, 0, sizeof(s));
95 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
96 "n=%zu", n);
97 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
98 (uint16_t)c16, (uint16_t)L'z');
99
100 /* Check that mbrtoc16() doesn't access the buffer when n == 0. */
101 c16 = L'z';
102 memset(&s, 0, sizeof(s));
103 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
104 "n=%zu", n);
105 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
106 (uint16_t)c16, (uint16_t)L'z');
107
108 /* Check that mbrtoc16() doesn't read ahead too aggressively. */
109 memset(&s, 0, sizeof(s));
110 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "AB", 2, &s)), 1, "n=%zu", n);
111 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
112 (uint16_t)c16, (uint16_t)L'A');
113 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "C", 1, &s)), 1, "n=%zu", n);
114 ATF_CHECK_EQ_MSG(c16, L'C', "c16=U+%"PRIx16" L'C'=U+%"PRIx16,
115 (uint16_t)c16, (uint16_t)L'C');
116 }
117
118 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso2022jp_locale_test);
119 ATF_TC_BODY(mbrtoc16_iso2022jp_locale_test, tc)
120 {
121 size_t n;
122
123 require_lc_ctype("ja_JP.ISO-2022-JP");
124
125 /* Null wide character, internal state. */
126 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n);
127 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
128
129 /* Null wide character. */
130 memset(&s, 0, sizeof(s));
131 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n);
132 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
133
134 /* Latin letter A, internal state. */
135 ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
136 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n);
137 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16" L'A'=U+%04"PRIx16,
138 (uint16_t)c16, (uint16_t)L'A');
139
140 /* Latin letter A. */
141 memset(&s, 0, sizeof(s));
142 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n);
143 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16" L'A'=U+%04"PRIx16,
144 (uint16_t)c16, (uint16_t)L'A');
145
146 /* Incomplete character sequence. */
147 c16 = L'z';
148 memset(&s, 0, sizeof(s));
149 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
150 "n=%zu", n);
151 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%04"PRIx16" L'z'=U+%04"PRIx16,
152 (uint16_t)c16, (uint16_t)L'z');
153
154 /* Check that mbrtoc16() doesn't access the buffer when n == 0. */
155 c16 = L'z';
156 memset(&s, 0, sizeof(s));
157 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
158 "n=%zu", n);
159 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%04"PRIx16" L'z'=U+%04"PRIx16,
160 (uint16_t)c16, (uint16_t)L'z');
161
162 /* Check that mbrtoc16() doesn't read ahead too aggressively. */
163 memset(&s, 0, sizeof(s));
164 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "AB", 2, &s)), 1, "n=%zu", n);
165 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16" L'A'=U+%04"PRIx16,
166 (uint16_t)c16, (uint16_t)L'A');
167 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "C", 1, &s)), 1, "n=%zu", n);
168 ATF_CHECK_EQ_MSG(c16, L'C', "c16=U+%04"PRIx16" L'C'=U+%04"PRIx16,
169 (uint16_t)c16, (uint16_t)L'C');
170
171 /* Incomplete character sequence (shift sequence only). */
172 memset(&s, 0, sizeof(s));
173 c16 = 0;
174 atf_tc_expect_fail("PR lib/58618:"
175 " mbrtocN(3) fails to keep shift state");
176 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(J", 3, &s)), (size_t)-2,
177 "n=%zu", n);
178 atf_tc_expect_pass();
179 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
180
181 /* Same as above, but complete (U+00A5 YEN SIGN). */
182 memset(&s, 0, sizeof(s));
183 c16 = 0;
184 atf_tc_expect_fail("PR lib/58618:"
185 " mbrtocN(3) fails to keep shift state");
186 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(J\x5c", 4, &s)), 4,
187 "n=%zu", n);
188 ATF_CHECK_EQ_MSG(c16, 0xa5, "c16=U+%04"PRIx16, (uint16_t)c16);
189 atf_tc_expect_pass();
190
191 /* Test restarting behaviour. */
192 memset(&s, 0, sizeof(s));
193 c16 = 0;
194 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(", 2, &s)), (size_t)-2,
195 "n=%zu", n);
196 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
197 atf_tc_expect_fail("PR lib/58618:"
198 " mbrtocN(3) fails to keep shift state");
199 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "J\x5c", 2, &s)), 2, "n=%zu", n);
200 ATF_CHECK_EQ_MSG(c16, 0xa5, "c16=U+%04"PRIx16, (uint16_t)c16);
201 atf_tc_expect_pass();
202
203 /*
204 * Test shift sequence state in various increments:
205 * 1. U+0042 LATIN CAPITAL LETTER A
206 * 2. (shift ISO/IEC 646:JP) U+00A5 YEN SIGN
207 * 3. U+00A5 YEN SIGN
208 * 4. (shift JIS X 0208) U+30A2 KATAKANA LETTER A
209 * 5. U+30A2 KATAKANA LETTER A
210 * 6. (shift to initial state) U+0000 NUL
211 */
212 memset(&s, 0, sizeof(s));
213 c16 = 0;
214 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A\x1b(J", 4, &s)), 1,
215 "n=%zu", n);
216 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%04"PRIx16, (uint16_t)c16);
217 c16 = 0;
218 atf_tc_expect_fail("PR lib/58618:"
219 " mbrtocN(3) fails to keep shift state");
220 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(J", 3, &s)), (size_t)-2,
221 "n=%zu", n);
222 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
223 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x5c\x5c", 2, &s)), 1,
224 "n=%zu", n);
225 ATF_CHECK_EQ_MSG(c16, 0x00a5, "c16=U+%04"PRIx16, (uint16_t)c16);
226 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x5c\x1b$", 3, &s)), 1,
227 "n=%zu", n);
228 ATF_CHECK_EQ_MSG(c16, 0x00a5, "c16=U+%04"PRIx16, (uint16_t)c16);
229 c16 = 0x1234;
230 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b", 1, &s)), (size_t)-2,
231 "n=%zu", n);
232 ATF_CHECK_EQ_MSG(c16, 0x1234, "c16=U+%04"PRIx16, (uint16_t)c16);
233 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "$B\x25\x22", 4, &s)), 4,
234 "n=%zu", n);
235 ATF_CHECK_EQ_MSG(c16, 0x30a2, "c16=U+%04"PRIx16, (uint16_t)c16);
236 c16 = 0;
237 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x25", 1, &s)), (size_t)-2,
238 "n=%zu", n);
239 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
240 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x22\x1b(B\x00", 5, &s)), 1,
241 "n=%zu", n);
242 ATF_CHECK_EQ_MSG(c16, 0x30a2, "c16=U+%04"PRIx16, (uint16_t)c16);
243 atf_tc_expect_pass();
244 c16 = 0;
245 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x1b(", 2, &s)), (size_t)-2,
246 "n=%zu", n);
247 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
248 c16 = 42;
249 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "B\x00", 2, &s)), 0, "n=%zu", n);
250 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%04"PRIx16, (uint16_t)c16);
251 }
252
253 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_1_test);
254 ATF_TC_BODY(mbrtoc16_iso_8859_1_test, tc)
255 {
256 size_t n;
257
258 require_lc_ctype("en_US.ISO8859-1");
259
260 /* Currency sign. */
261 memset(&s, 0, sizeof(s));
262 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n);
263 ATF_CHECK_EQ_MSG(c16, 0xa4, "c16=U+%"PRIx16, (uint16_t)c16);
264 }
265
266 ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_15_test);
267 ATF_TC_BODY(mbrtoc16_iso_8859_15_test, tc)
268 {
269 size_t n;
270
271 require_lc_ctype("en_US.ISO8859-15");
272
273 /* Euro sign. */
274 memset(&s, 0, sizeof(s));
275 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n);
276 ATF_CHECK_EQ_MSG(c16, 0x20ac, "c16=U+%"PRIx16, (uint16_t)c16);
277 }
278
279 ATF_TC_WITHOUT_HEAD(mbrtoc16_utf_8_test);
280 ATF_TC_BODY(mbrtoc16_utf_8_test, tc)
281 {
282 size_t n;
283
284 require_lc_ctype("en_US.UTF-8");
285
286 /* Null wide character, internal state. */
287 ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
288 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n);
289 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
290
291 /* Null wide character. */
292 memset(&s, 0, sizeof(s));
293 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n);
294 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
295
296 /* Latin letter A, internal state. */
297 ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
298 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n);
299 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
300 (uint16_t)c16, (uint16_t)L'A');
301
302 /* Latin letter A. */
303 memset(&s, 0, sizeof(s));
304 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n);
305 ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
306 (uint16_t)c16, (uint16_t)L'A');
307
308 /* Incomplete character sequence (zero length). */
309 c16 = L'z';
310 memset(&s, 0, sizeof(s));
311 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
312 "n=%zu", n);
313 ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
314 (uint16_t)c16, (uint16_t)L'z');
315
316 /* Incomplete character sequence (truncated double-byte). */
317 memset(&s, 0, sizeof(s));
318 c16 = 0;
319 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2,
320 "n=%zu", n);
321
322 /* Same as above, but complete. */
323 memset(&s, 0, sizeof(s));
324 c16 = 0;
325 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\x84", 2, &s)), 2,
326 "n=%zu", n);
327 ATF_CHECK_EQ_MSG(c16, 0xc4, "c16=U+%"PRIx16, (uint16_t)c16);
328
329 /* Test restarting behaviour. */
330 memset(&s, 0, sizeof(s));
331 c16 = 0;
332 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2,
333 "n=%zu", n);
334 ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
335 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xb7", 1, &s)), 1, "n=%zu", n);
336 ATF_CHECK_EQ_MSG(c16, 0xf7, "c16=U+%"PRIx16, (uint16_t)c16);
337
338 /* Surrogate pair. */
339 memset(&s, 0, sizeof(s));
340 c16 = 0;
341 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xf0\x9f\x92\xa9", 4, &s)), 4,
342 "n=%zu", n);
343 ATF_CHECK_EQ_MSG(c16, 0xd83d, "c16=U+%"PRIx16, (uint16_t)c16);
344 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-3,
345 "n=%zu", n);
346 ATF_CHECK_EQ_MSG(c16, 0xdca9, "c16=U+%"PRIx16, (uint16_t)c16);
347
348 /* Letter e with acute, precomposed. */
349 memset(&s, 0, sizeof(s));
350 c16 = 0;
351 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\xa9", 2, &s)), 2,
352 "n=%zu", n);
353 ATF_CHECK_EQ_MSG(c16, 0xe9, "c16=U+%"PRIx16, (uint16_t)c16);
354
355 /* Letter e with acute, combined. */
356 memset(&s, 0, sizeof(s));
357 c16 = 0;
358 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x65\xcc\x81", 3, &s)), 1,
359 "n=%zu", n);
360 ATF_CHECK_EQ_MSG(c16, 0x65, "c16=U+%"PRIx16, (uint16_t)c16);
361 ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xcc\x81", 2, &s)), 2,
362 "n=%zu", n);
363 ATF_CHECK_EQ_MSG(c16, 0x301, "c16=U+%"PRIx16, (uint16_t)c16);
364 }
365
366 ATF_TP_ADD_TCS(tp)
367 {
368
369 ATF_TP_ADD_TC(tp, mbrtoc16_c_locale_test);
370 ATF_TP_ADD_TC(tp, mbrtoc16_iso2022jp_locale_test);
371 ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_1_test);
372 ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_15_test);
373 ATF_TP_ADD_TC(tp, mbrtoc16_utf_8_test);
374
375 return (atf_no_error());
376 }
377