t_c8rtomb.c revision 1.4 1 /* $NetBSD: t_c8rtomb.c,v 1.4 2024/08/18 04:51:16 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2002 Tim J. Robbins
5 * All rights reserved.
6 *
7 * Copyright (c) 2013 Ed Schouten <ed (at) FreeBSD.org>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31 /*
32 * Test program for c8rtomb() as specified by C23.
33 */
34
35 #include <sys/cdefs.h>
36 __RCSID("$NetBSD: t_c8rtomb.c,v 1.4 2024/08/18 04:51:16 riastradh Exp $");
37
38 #include <errno.h>
39 #include <limits.h>
40 #include <locale.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <uchar.h>
44
45 #include <atf-c.h>
46
47 static void
48 require_lc_ctype(const char *locale_name)
49 {
50 char *lc_ctype_set;
51
52 lc_ctype_set = setlocale(LC_CTYPE, locale_name);
53 if (lc_ctype_set == NULL)
54 atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
55 locale_name, errno);
56
57 ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
58 "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name);
59 }
60
61 static mbstate_t s;
62 static char buf[7*MB_LEN_MAX + 1];
63
64 ATF_TC_WITHOUT_HEAD(c8rtomb_c_locale_test);
65 ATF_TC_BODY(c8rtomb_c_locale_test, tc)
66 {
67 size_t n;
68
69 require_lc_ctype("C");
70
71 /*
72 * If the buffer argument is NULL, c8 is implicitly 0,
73 * c8rtomb() resets its internal state.
74 */
75 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n);
76 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0x80, NULL)), 1, "n=%zu", n);
77 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xc0, NULL)), 1, "n=%zu", n);
78 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xe0, NULL)), 1, "n=%zu", n);
79 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf0, NULL)), 1, "n=%zu", n);
80 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf8, NULL)), 1, "n=%zu", n);
81 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfc, NULL)), 1, "n=%zu", n);
82 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfe, NULL)), 1, "n=%zu", n);
83 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xff, NULL)), 1, "n=%zu", n);
84
85 /* Null wide character. */
86 memset(&s, 0, sizeof(s));
87 memset(buf, 0xcc, sizeof(buf));
88 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0, &s)), 1, "n=%zu", n);
89 ATF_CHECK_MSG(((unsigned char)buf[0] == 0 &&
90 (unsigned char)buf[1] == 0xcc),
91 "buf=[%02x %02x]", buf[0], buf[1]);
92
93 /* Latin letter A, internal state. */
94 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n);
95 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 'A', NULL)), 1, "n=%zu", n);
96
97 /* Latin letter A. */
98 memset(&s, 0, sizeof(s));
99 memset(buf, 0xcc, sizeof(buf));
100 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 'A', &s)), 1, "n=%zu", n);
101 ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' &&
102 (unsigned char)buf[1] == 0xcc),
103 "buf=[%02x %02x]", buf[0], buf[1]);
104
105 /* Unicode character 'Pile of poo'. */
106 memset(&s, 0, sizeof(s));
107 memset(buf, 0xcc, sizeof(buf));
108 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
109 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
110 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n);
111 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xa9, &s)), (size_t)-1,
112 "n=%zu", n);
113 ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
114 ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
115
116 /* Incomplete Unicode character 'Pile of poo', interrupted by NUL. */
117 memset(&s, 0, sizeof(s));
118 memset(buf, 0xcc, sizeof(buf));
119 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
120 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n);
121 ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
122 (unsigned char)buf[1] == 0xcc),
123 "buf=[%02x %02x]", buf[0], buf[1]);
124
125 memset(&s, 0, sizeof(s));
126 memset(buf, 0xcc, sizeof(buf));
127 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
128 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
129 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n);
130 ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
131 (unsigned char)buf[1] == 0xcc),
132 "buf=[%02x %02x]", buf[0], buf[1]);
133
134 memset(&s, 0, sizeof(s));
135 memset(buf, 0xcc, sizeof(buf));
136 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
137 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
138 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n);
139 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n);
140 ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
141 (unsigned char)buf[1] == 0xcc),
142 "buf=[%02x %02x]", buf[0], buf[1]);
143 }
144
145 ATF_TC_WITHOUT_HEAD(c8rtomb_iso2022jp_locale_test);
146 ATF_TC_BODY(c8rtomb_iso2022jp_locale_test, tc)
147 {
148 char *p;
149 size_t n;
150
151 require_lc_ctype("ja_JP.ISO-2022-JP");
152
153 /*
154 * If the buffer argument is NULL, c8 is implicitly 0,
155 * c8rtomb() resets its internal state.
156 */
157 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n);
158 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0x80, NULL)), 1, "n=%zu", n);
159 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xc0, NULL)), 1, "n=%zu", n);
160 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xe0, NULL)), 1, "n=%zu", n);
161 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf0, NULL)), 1, "n=%zu", n);
162 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf8, NULL)), 1, "n=%zu", n);
163 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfc, NULL)), 1, "n=%zu", n);
164 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfe, NULL)), 1, "n=%zu", n);
165 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xff, NULL)), 1, "n=%zu", n);
166
167 /* Null wide character. */
168 memset(&s, 0, sizeof(s));
169 memset(buf, 0xcc, sizeof(buf));
170 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0, &s)), 1, "n=%zu", n);
171 ATF_CHECK_MSG(((unsigned char)buf[0] == 0 &&
172 (unsigned char)buf[1] == 0xcc),
173 "buf=[%02x %02x]", buf[0], buf[1]);
174
175 /* Latin letter A, internal state. */
176 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n);
177 ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 'A', NULL)), 1, "n=%zu", n);
178
179 /*
180 * 1. U+0042 LATIN CAPITAL LETTER A
181 * 2. U+00A5 YEN SIGN
182 * 3. U+00A5 YEN SIGN (again, no shift needed)
183 * 4. U+30A2 KATAKANA LETTER A
184 * 5. U+30A2 KATAKANA LETTER A (again, no shift needed)
185 * 6. incomplete UTF-8 multibyte sequence -- no output
186 * 7. U+0000 NUL (plus shift sequence to initial state)
187 */
188 memset(&s, 0, sizeof(s));
189 memset(buf, 0xcc, sizeof(buf));
190 p = buf;
191 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 'A', &s)), 1, "n=%zu", n); /* 1 */
192 p += 1;
193 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xc2, &s)), 0, "n=%zu", n); /* 2 */
194 atf_tc_expect_fail("PR lib/58612:"
195 " c8rtomb/c16rtomb/c32rtomb yield suboptimal shift sequences");
196 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa5, &s)), 4, "n=%zu", n);
197 p += 4;
198 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xc2, &s)), 0, "n=%zu", n); /* 3 */
199 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa5, &s)), 1, "n=%zu", n);
200 p += 1;
201 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 4 */
202 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n);
203 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa2, &s)), 4, "n=%zu", n);
204 p += 5;
205 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 5 */
206 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n);
207 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa2, &s)), 2, "n=%zu", n);
208 p += 2;
209 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 6 */
210 ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n);
211 ATF_CHECK_EQ_MSG((n = c8rtomb(p, '\0', &s)), 4, "n=%zu", n); /* 7 */
212 p += 4;
213 ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' &&
214 (unsigned char)buf[1] == 0x1b && /* shift ISO/IEC 646:JP */
215 (unsigned char)buf[2] == '(' &&
216 (unsigned char)buf[3] == 'J' &&
217 (unsigned char)buf[4] == 0x5c && /* YEN SIGN */
218 (unsigned char)buf[5] == 0x5c && /* YEN SIGN */
219 (unsigned char)buf[6] == 0x1b && /* shift JIS X 0208-1978 */
220 (unsigned char)buf[7] == '$' &&
221 (unsigned char)buf[8] == 'B' &&
222 (unsigned char)buf[9] == 0x25 && /* KATAKANA LETTER A */
223 (unsigned char)buf[10] == 0x22 &&
224 (unsigned char)buf[11] == 0x25 && /* KATAKANA LETTER A */
225 (unsigned char)buf[12] == 0x22 &&
226 (unsigned char)buf[13] == 0x1b && /* shift US-ASCII */
227 (unsigned char)buf[14] == '(' &&
228 (unsigned char)buf[15] == 'B' &&
229 (unsigned char)buf[16] == '\0' &&
230 (unsigned char)buf[17] == 0xcc),
231 "buf=[%02x %02x %02x %02x %02x %02x %02x %02x "
232 " %02x %02x %02x %02x %02x %02x %02x %02x "
233 " %02x %02x]",
234 buf[0], buf[1], buf[2], buf[3],
235 buf[4], buf[5], buf[6], buf[7],
236 buf[8], buf[9], buf[10], buf[11],
237 buf[12], buf[13], buf[14], buf[15],
238 buf[16], buf[17]);
239 }
240
241 ATF_TC_WITHOUT_HEAD(c8rtomb_iso_8859_1_test);
242 ATF_TC_BODY(c8rtomb_iso_8859_1_test, tc)
243 {
244 size_t n;
245
246 require_lc_ctype("en_US.ISO8859-1");
247
248 /* Unicode character 'Euro sign'. */
249 memset(&s, 0, sizeof(s));
250 memset(buf, 0xcc, sizeof(buf));
251 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xe2, &s)), 0, "n=%zu", n);
252 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x82, &s)), 0, "n=%zu", n);
253 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xac, &s)), (size_t)-1,
254 "n=%zu", n);
255 ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
256 ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
257 }
258
259 ATF_TC_WITHOUT_HEAD(c8rtomb_iso_8859_15_test);
260 ATF_TC_BODY(c8rtomb_iso_8859_15_test, tc)
261 {
262 size_t n;
263
264 require_lc_ctype("en_US.ISO8859-15");
265
266 /* Unicode character 'Euro sign'. */
267 memset(&s, 0, sizeof(s));
268 memset(buf, 0xcc, sizeof(buf));
269 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xe2, &s)), 0, "n=%zu", n);
270 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x82, &s)), 0, "n=%zu", n);
271 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xac, &s)), 1, "n=%zu", n);
272 ATF_CHECK_MSG(((unsigned char)buf[0] == 0xa4 &&
273 (unsigned char)buf[1] == 0xcc),
274 "buf=[%02x %02x]", buf[0], buf[1]);
275 }
276
277 ATF_TC_WITHOUT_HEAD(c8rtomb_utf_8_test);
278 ATF_TC_BODY(c8rtomb_utf_8_test, tc)
279 {
280 size_t n;
281
282 require_lc_ctype("en_US.UTF-8");
283
284 /* Unicode character 'Pile of poo'. */
285 memset(&s, 0, sizeof(s));
286 memset(buf, 0xcc, sizeof(buf));
287 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
288 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
289 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n);
290 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xa9, &s)), 4, "n=%zu", n);
291 ATF_CHECK_MSG(((unsigned char)buf[0] == 0xf0 &&
292 (unsigned char)buf[1] == 0x9f &&
293 (unsigned char)buf[2] == 0x92 &&
294 (unsigned char)buf[3] == 0xa9 &&
295 (unsigned char)buf[4] == 0xcc),
296 "buf=[%02x %02x %02x %02x %02x]",
297 buf[0], buf[1], buf[2], buf[3], buf[4]);
298
299 /* Invalid code; 'Pile of poo' without the last byte. */
300 memset(&s, 0, sizeof(s));
301 memset(buf, 0xcc, sizeof(buf));
302 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
303 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
304 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n);
305 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 'A', &s)), (size_t)-1,
306 "n=%zu", n);
307 ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
308 ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
309
310 /* Invalid code; 'Pile of poo' without the first byte. */
311 memset(&s, 0, sizeof(s));
312 memset(buf, 0xcc, sizeof(buf));
313 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), (size_t)-1,
314 "n=%zu", n);
315 ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
316 ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
317
318 /* Incomplete Unicode character 'Pile of poo', interrupted by NUL. */
319 memset(&s, 0, sizeof(s));
320 memset(buf, 0xcc, sizeof(buf));
321 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
322 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n);
323 ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
324 (unsigned char)buf[1] == 0xcc),
325 "buf=[%02x %02x]", buf[0], buf[1]);
326
327 memset(&s, 0, sizeof(s));
328 memset(buf, 0xcc, sizeof(buf));
329 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
330 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
331 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n);
332 ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
333 (unsigned char)buf[1] == 0xcc),
334 "buf=[%02x %02x]", buf[0], buf[1]);
335
336 memset(&s, 0, sizeof(s));
337 memset(buf, 0xcc, sizeof(buf));
338 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
339 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
340 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n);
341 ATF_CHECK_EQ_MSG((n = c8rtomb(buf, '\0', &s)), 1, "n=%zu", n);
342 ATF_CHECK_MSG(((unsigned char)buf[0] == '\0' &&
343 (unsigned char)buf[1] == 0xcc),
344 "buf=[%02x %02x]", buf[0], buf[1]);
345 }
346
347 ATF_TP_ADD_TCS(tp)
348 {
349
350 ATF_TP_ADD_TC(tp, c8rtomb_c_locale_test);
351 ATF_TP_ADD_TC(tp, c8rtomb_iso2022jp_locale_test);
352 ATF_TP_ADD_TC(tp, c8rtomb_iso_8859_1_test);
353 ATF_TP_ADD_TC(tp, c8rtomb_iso_8859_15_test);
354 ATF_TP_ADD_TC(tp, c8rtomb_utf_8_test);
355
356 return (atf_no_error());
357 }
358