t_mbrtoc8.c revision 1.1 1 /* $NetBSD: t_mbrtoc8.c,v 1.1 2024/08/15 21:19:45 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2002 Tim J. Robbins
5 * All rights reserved.
6 *
7 * Copyright (c) 2013 Ed Schouten <ed (at) FreeBSD.org>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31 /*
32 * Test program for mbrtoc8() as specified by C23.
33 */
34
35 #include <sys/cdefs.h>
36 __RCSID("$NetBSD: t_mbrtoc8.c,v 1.1 2024/08/15 21:19:45 riastradh Exp $");
37
38 #include <errno.h>
39 #include <inttypes.h>
40 #include <limits.h>
41 #include <locale.h>
42 #include <string.h>
43 #include <uchar.h>
44
45 #include <atf-c.h>
46
47 static void
48 require_lc_ctype(const char *locale_name)
49 {
50 char *lc_ctype_set;
51
52 lc_ctype_set = setlocale(LC_CTYPE, locale_name);
53 if (lc_ctype_set == NULL)
54 atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
55 locale_name, errno);
56
57 ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
58 "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name);
59 }
60
61 static mbstate_t s;
62 static char8_t c8;
63
64 ATF_TC_WITHOUT_HEAD(mbrtoc8_c_locale_test);
65 ATF_TC_BODY(mbrtoc8_c_locale_test, tc)
66 {
67 size_t n;
68
69 require_lc_ctype("C");
70
71 /* Null wide character, internal state. */
72 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 1, NULL)), 0, "n=%zu", n);
73 ATF_CHECK_EQ_MSG(c8, 0, "c8=0x%"PRIx8, (uint8_t)c8);
74
75 /* Null wide character. */
76 memset(&s, 0, sizeof(s));
77 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 1, &s)), 0, "n=%zu", n);
78 ATF_CHECK_EQ_MSG(c8, 0, "c8=0x%"PRIx8, (uint8_t)c8);
79
80 /* Latin letter A, internal state. */
81 ATF_CHECK_EQ_MSG((n = mbrtoc8(NULL, 0, 0, NULL)), 0, "n=%zu", n);
82 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "A", 1, NULL)), 1, "n=%zu", n);
83 ATF_CHECK_EQ_MSG(c8, 'A', "c8=0x%"PRIx8" 'A'=0x%"PRIx8,
84 (uint8_t)c8, (uint8_t)'A');
85
86 /* Latin letter A. */
87 memset(&s, 0, sizeof(s));
88 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "A", 1, &s)), 1, "n=%zu", n);
89 ATF_CHECK_EQ_MSG(c8, 'A', "c8=0x%"PRIx8" 'A'=0x%"PRIx8,
90 (uint8_t)c8, (uint8_t)'A');
91
92 /* Incomplete character sequence. */
93 c8 = 'z';
94 memset(&s, 0, sizeof(s));
95 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-2,
96 "n=%zu", n);
97 ATF_CHECK_EQ_MSG(c8, 'z', "c8=0x%"PRIx8" 'z'=0x%"PRIx8,
98 (uint8_t)c8, (uint8_t)'z');
99
100 /* Check that mbrtoc8() doesn't access the buffer when n == 0. */
101 c8 = 'z';
102 memset(&s, 0, sizeof(s));
103 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-2,
104 "n=%zu", n);
105 ATF_CHECK_EQ_MSG(c8, 'z', "c8=0x%"PRIx8" 'z'=0x%"PRIx8,
106 (uint8_t)c8, (uint8_t)'z');
107
108 /* Check that mbrtoc8() doesn't read ahead too aggressively. */
109 memset(&s, 0, sizeof(s));
110 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "AB", 2, &s)), 1, "n=%zu", n);
111 ATF_CHECK_EQ_MSG(c8, 'A', "c8=0x%"PRIx8" 'A'=0x%"PRIx8,
112 (uint8_t)c8, (uint8_t)'A');
113 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "C", 1, &s)), 1, "n=%zu", n);
114 ATF_CHECK_EQ_MSG(c8, 'C', "c8=0x%"PRIx8" 'C'=0x%"PRIx8,
115 (uint8_t)c8, (uint8_t)'C');
116
117 }
118
119 ATF_TC_WITHOUT_HEAD(mbrtoc8_iso_8859_1_test);
120 ATF_TC_BODY(mbrtoc8_iso_8859_1_test, tc)
121 {
122 size_t n;
123
124 require_lc_ctype("en_US.ISO8859-1");
125
126 /* Currency sign. */
127 memset(&s, 0, sizeof(s));
128 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xa4", 1, &s)), 1, "n=%zu", n);
129 ATF_CHECK_EQ_MSG(c8, 0xc2, "c8=0x%"PRIx8, (uint8_t)c8);
130 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
131 "n=%zu", n);
132 ATF_CHECK_EQ_MSG(c8, 0xa4, "c8=0x%"PRIx8, (uint8_t)c8);
133 }
134
135 ATF_TC_WITHOUT_HEAD(mbrtoc8_iso_8859_15_test);
136 ATF_TC_BODY(mbrtoc8_iso_8859_15_test, tc)
137 {
138 size_t n;
139
140 require_lc_ctype("en_US.ISO8859-15");
141
142 /* Euro sign. */
143 memset(&s, 0, sizeof(s));
144 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xa4", 1, &s)), 1, "n=%zu", n);
145 ATF_CHECK_EQ_MSG(c8, 0xe2, "c8=0x%"PRIx8, (uint8_t)c8);
146 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
147 "n=%zu", n);
148 ATF_CHECK_EQ_MSG(c8, 0x82, "c8=0x%"PRIx8, (uint8_t)c8);
149 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
150 "n=%zu", n);
151 ATF_CHECK_EQ_MSG(c8, 0xac, "c8=0x%"PRIx8, (uint8_t)c8);
152 }
153
154 ATF_TC_WITHOUT_HEAD(mbrtoc8_utf_8_test);
155 ATF_TC_BODY(mbrtoc8_utf_8_test, tc)
156 {
157 size_t n;
158
159 require_lc_ctype("en_US.UTF-8");
160
161 /* Null wide character, internal state. */
162 ATF_CHECK_EQ_MSG((n = mbrtoc8(NULL, 0, 0, NULL)), 0, "n=%zu", n);
163 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 1, NULL)), 0, "n=%zu", n);
164 ATF_CHECK_EQ_MSG(c8, 0, "c8=0x%"PRIx8, (uint8_t)c8);
165
166 /* Null wide character. */
167 memset(&s, 0, sizeof(s));
168 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 1, &s)), 0, "n=%zu", n);
169 ATF_CHECK_EQ_MSG(c8, 0, "c8=0x%"PRIx8, (uint8_t)c8);
170
171 /* Latin letter A, internal state. */
172 ATF_CHECK_EQ_MSG((n = mbrtoc8(NULL, 0, 0, NULL)), 0, "n=%zu", n);
173 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "A", 1, NULL)), 1, "n=%zu", n);
174 ATF_CHECK_EQ_MSG(c8, 'A', "c8=0x%"PRIx8" 'A'=0x%"PRIx8,
175 (uint8_t)c8, (uint8_t)'A');
176
177 /* Latin letter A. */
178 memset(&s, 0, sizeof(s));
179 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "A", 1, &s)), 1, "n=%zu", n);
180 ATF_CHECK_EQ_MSG(c8, 'A', "c8=0x%"PRIx8" 'A'=0x%"PRIx8,
181 (uint8_t)c8, (uint8_t)'A');
182
183 /* Incomplete character sequence (zero length). */
184 c8 = 'z';
185 memset(&s, 0, sizeof(s));
186 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-2,
187 "n=%zu", n);
188 ATF_CHECK_EQ_MSG(c8, 'z', "c8=0x%"PRIx8" 'z'=0x%"PRIx8,
189 (uint8_t)c8, (uint8_t)'z');
190
191 /* Incomplete character sequence (truncated double-byte). */
192 memset(&s, 0, sizeof(s));
193 c8 = 0;
194 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xc3", 1, &s)), (size_t)-2,
195 "n=%zu", n);
196
197 /* Same as above, but complete. */
198 memset(&s, 0, sizeof(s));
199 c8 = 0;
200 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xc3\x84", 2, &s)), 2,
201 "n=%zu", n);
202 ATF_CHECK_EQ_MSG(c8, 0xc3, "c8=0x%"PRIx8, (uint8_t)c8);
203 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
204 "n=%zu", n);
205 ATF_CHECK_EQ_MSG(c8, 0x84, "c8=0x%"PRIx8, (uint8_t)c8);
206
207 /* Test restarting behaviour. */
208 memset(&s, 0, sizeof(s));
209 c8 = 0;
210 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xc3", 1, &s)), (size_t)-2,
211 "n=%zu", n);
212 ATF_CHECK_EQ_MSG(c8, 0, "c8=0x%"PRIx8, (uint8_t)c8);
213 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xb7", 1, &s)), 1, "n=%zu", n);
214 ATF_CHECK_EQ_MSG(c8, 0xc3, "c8=0x%"PRIx8, (uint8_t)c8);
215 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
216 "n=%zu", n);
217 ATF_CHECK_EQ_MSG(c8, 0xb7, "c8=0x%"PRIx8, (uint8_t)c8);
218
219 /* Four-byte sequence. */
220 memset(&s, 0, sizeof(s));
221 c8 = 0;
222 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xf0\x9f\x92\xa9", 4, &s)), 4,
223 "n=%zu", n);
224 ATF_CHECK_EQ_MSG(c8, 0xf0, "c8=0x%"PRIx8, (uint8_t)c8);
225 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
226 "n=%zu", n);
227 ATF_CHECK_EQ_MSG(c8, 0x9f, "c8=0x%"PRIx8, (uint8_t)c8);
228 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
229 "n=%zu", n);
230 ATF_CHECK_EQ_MSG(c8, 0x92, "c8=0x%"PRIx8, (uint8_t)c8);
231 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
232 "n=%zu", n);
233 ATF_CHECK_EQ_MSG(c8, 0xa9, "c8=0x%"PRIx8, (uint8_t)c8);
234
235 /* Letter e with acute, precomposed. */
236 memset(&s, 0, sizeof(s));
237 c8 = 0;
238 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xc3\xa9", 2, &s)), 2,
239 "n=%zu", n);
240 ATF_CHECK_EQ_MSG(c8, 0xc3, "c8=0x%"PRIx8, (uint8_t)c8);
241 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
242 "n=%zu", n);
243 ATF_CHECK_EQ_MSG(c8, 0xa9, "c8=0x%"PRIx8, (uint8_t)c8);
244
245 /* Letter e with acute, combined. */
246 memset(&s, 0, sizeof(s));
247 c8 = 0;
248 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\x65\xcc\x81", 3, &s)), 1,
249 "n=%zu", n);
250 ATF_CHECK_EQ_MSG(c8, 0x65, "c8=0x%"PRIx8, (uint8_t)c8);
251 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xcc\x81", 2, &s)), 2,
252 "n=%zu", n);
253 ATF_CHECK_EQ_MSG(c8, 0xcc, "c8=0x%"PRIx8, (uint8_t)c8);
254 ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
255 "n=%zu", n);
256 ATF_CHECK_EQ_MSG(c8, 0x81, "c8=0x%"PRIx8, (uint8_t)c8);
257 }
258
259 ATF_TP_ADD_TCS(tp)
260 {
261
262 ATF_TP_ADD_TC(tp, mbrtoc8_c_locale_test);
263 ATF_TP_ADD_TC(tp, mbrtoc8_iso_8859_1_test);
264 ATF_TP_ADD_TC(tp, mbrtoc8_iso_8859_15_test);
265 ATF_TP_ADD_TC(tp, mbrtoc8_utf_8_test);
266
267 return (atf_no_error());
268 }
269