aApplyR.d revision 1.1 1 1.1 mrg /**
2 1.1 mrg * This code handles decoding UTF strings for foreach_reverse loops. There are
3 1.1 mrg * 6 combinations of conversions between char, wchar, and dchar, and 2 of each
4 1.1 mrg * of those.
5 1.1 mrg *
6 1.1 mrg * Copyright: Copyright Digital Mars 2004 - 2010.
7 1.1 mrg * License: $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
8 1.1 mrg * Authors: Walter Bright, Sean Kelly
9 1.1 mrg */
10 1.1 mrg
11 1.1 mrg /* Copyright Digital Mars 2004 - 2010.
12 1.1 mrg * Distributed under the Boost Software License, Version 1.0.
13 1.1 mrg * (See accompanying file LICENSE or copy at
14 1.1 mrg * http://www.boost.org/LICENSE_1_0.txt)
15 1.1 mrg */
16 1.1 mrg module rt.aApplyR;
17 1.1 mrg
18 1.1 mrg /* This code handles decoding UTF strings for foreach_reverse loops.
19 1.1 mrg * There are 6 combinations of conversions between char, wchar,
20 1.1 mrg * and dchar, and 2 of each of those.
21 1.1 mrg */
22 1.1 mrg
23 1.1 mrg private import rt.util.utf;
24 1.1 mrg
25 1.1 mrg /**********************************************/
26 1.1 mrg /* 1 argument versions */
27 1.1 mrg
28 1.1 mrg // dg is D, but _aApplyRcd() is C
29 1.1 mrg extern (D) alias int delegate(void *) dg_t;
30 1.1 mrg
31 1.1 mrg extern (C) int _aApplyRcd1(in char[] aa, dg_t dg)
32 1.1 mrg { int result;
33 1.1 mrg
34 1.1 mrg debug(apply) printf("_aApplyRcd1(), len = %d\n", aa.length);
35 1.1 mrg for (size_t i = aa.length; i != 0; )
36 1.1 mrg { dchar d;
37 1.1 mrg
38 1.1 mrg i--;
39 1.1 mrg d = aa[i];
40 1.1 mrg if (d & 0x80)
41 1.1 mrg { char c = cast(char)d;
42 1.1 mrg uint j;
43 1.1 mrg uint m = 0x3F;
44 1.1 mrg d = 0;
45 1.1 mrg while ((c & 0xC0) != 0xC0)
46 1.1 mrg { if (i == 0)
47 1.1 mrg onUnicodeError("Invalid UTF-8 sequence", 0);
48 1.1 mrg i--;
49 1.1 mrg d |= (c & 0x3F) << j;
50 1.1 mrg j += 6;
51 1.1 mrg m >>= 1;
52 1.1 mrg c = aa[i];
53 1.1 mrg }
54 1.1 mrg d |= (c & m) << j;
55 1.1 mrg }
56 1.1 mrg result = dg(cast(void *)&d);
57 1.1 mrg if (result)
58 1.1 mrg break;
59 1.1 mrg }
60 1.1 mrg return result;
61 1.1 mrg }
62 1.1 mrg
63 1.1 mrg unittest
64 1.1 mrg {
65 1.1 mrg debug(apply) printf("_aApplyRcd1.unittest\n");
66 1.1 mrg
67 1.1 mrg auto s = "hello"c[];
68 1.1 mrg int i;
69 1.1 mrg
70 1.1 mrg foreach_reverse (dchar d; s)
71 1.1 mrg {
72 1.1 mrg switch (i)
73 1.1 mrg {
74 1.1 mrg case 0: assert(d == 'o'); break;
75 1.1 mrg case 1: assert(d == 'l'); break;
76 1.1 mrg case 2: assert(d == 'l'); break;
77 1.1 mrg case 3: assert(d == 'e'); break;
78 1.1 mrg case 4: assert(d == 'h'); break;
79 1.1 mrg default: assert(0);
80 1.1 mrg }
81 1.1 mrg i++;
82 1.1 mrg }
83 1.1 mrg assert(i == 5);
84 1.1 mrg
85 1.1 mrg s = "a\u1234\U000A0456b";
86 1.1 mrg i = 0;
87 1.1 mrg foreach_reverse (dchar d; s)
88 1.1 mrg {
89 1.1 mrg //printf("i = %d, d = %x\n", i, d);
90 1.1 mrg switch (i)
91 1.1 mrg {
92 1.1 mrg case 0: assert(d == 'b'); break;
93 1.1 mrg case 1: assert(d == '\U000A0456'); break;
94 1.1 mrg case 2: assert(d == '\u1234'); break;
95 1.1 mrg case 3: assert(d == 'a'); break;
96 1.1 mrg default: assert(0);
97 1.1 mrg }
98 1.1 mrg i++;
99 1.1 mrg }
100 1.1 mrg assert(i == 4);
101 1.1 mrg }
102 1.1 mrg
103 1.1 mrg /*****************************/
104 1.1 mrg
105 1.1 mrg extern (C) int _aApplyRwd1(in wchar[] aa, dg_t dg)
106 1.1 mrg { int result;
107 1.1 mrg
108 1.1 mrg debug(apply) printf("_aApplyRwd1(), len = %d\n", aa.length);
109 1.1 mrg for (size_t i = aa.length; i != 0; )
110 1.1 mrg { dchar d;
111 1.1 mrg
112 1.1 mrg i--;
113 1.1 mrg d = aa[i];
114 1.1 mrg if (d >= 0xDC00 && d <= 0xDFFF)
115 1.1 mrg { if (i == 0)
116 1.1 mrg onUnicodeError("Invalid UTF-16 sequence", 0);
117 1.1 mrg i--;
118 1.1 mrg d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
119 1.1 mrg }
120 1.1 mrg result = dg(cast(void *)&d);
121 1.1 mrg if (result)
122 1.1 mrg break;
123 1.1 mrg }
124 1.1 mrg return result;
125 1.1 mrg }
126 1.1 mrg
127 1.1 mrg unittest
128 1.1 mrg {
129 1.1 mrg debug(apply) printf("_aApplyRwd1.unittest\n");
130 1.1 mrg
131 1.1 mrg auto s = "hello"w[];
132 1.1 mrg int i;
133 1.1 mrg
134 1.1 mrg foreach_reverse (dchar d; s)
135 1.1 mrg {
136 1.1 mrg switch (i)
137 1.1 mrg {
138 1.1 mrg case 0: assert(d == 'o'); break;
139 1.1 mrg case 1: assert(d == 'l'); break;
140 1.1 mrg case 2: assert(d == 'l'); break;
141 1.1 mrg case 3: assert(d == 'e'); break;
142 1.1 mrg case 4: assert(d == 'h'); break;
143 1.1 mrg default: assert(0);
144 1.1 mrg }
145 1.1 mrg i++;
146 1.1 mrg }
147 1.1 mrg assert(i == 5);
148 1.1 mrg
149 1.1 mrg s = "a\u1234\U000A0456b";
150 1.1 mrg i = 0;
151 1.1 mrg foreach_reverse (dchar d; s)
152 1.1 mrg {
153 1.1 mrg //printf("i = %d, d = %x\n", i, d);
154 1.1 mrg switch (i)
155 1.1 mrg {
156 1.1 mrg case 0: assert(d == 'b'); break;
157 1.1 mrg case 1: assert(d == '\U000A0456'); break;
158 1.1 mrg case 2: assert(d == '\u1234'); break;
159 1.1 mrg case 3: assert(d == 'a'); break;
160 1.1 mrg default: assert(0);
161 1.1 mrg }
162 1.1 mrg i++;
163 1.1 mrg }
164 1.1 mrg assert(i == 4);
165 1.1 mrg }
166 1.1 mrg
167 1.1 mrg /*****************************/
168 1.1 mrg
169 1.1 mrg extern (C) int _aApplyRcw1(in char[] aa, dg_t dg)
170 1.1 mrg { int result;
171 1.1 mrg
172 1.1 mrg debug(apply) printf("_aApplyRcw1(), len = %d\n", aa.length);
173 1.1 mrg for (size_t i = aa.length; i != 0; )
174 1.1 mrg { dchar d;
175 1.1 mrg wchar w;
176 1.1 mrg
177 1.1 mrg i--;
178 1.1 mrg w = aa[i];
179 1.1 mrg if (w & 0x80)
180 1.1 mrg { char c = cast(char)w;
181 1.1 mrg uint j;
182 1.1 mrg uint m = 0x3F;
183 1.1 mrg d = 0;
184 1.1 mrg while ((c & 0xC0) != 0xC0)
185 1.1 mrg { if (i == 0)
186 1.1 mrg onUnicodeError("Invalid UTF-8 sequence", 0);
187 1.1 mrg i--;
188 1.1 mrg d |= (c & 0x3F) << j;
189 1.1 mrg j += 6;
190 1.1 mrg m >>= 1;
191 1.1 mrg c = aa[i];
192 1.1 mrg }
193 1.1 mrg d |= (c & m) << j;
194 1.1 mrg
195 1.1 mrg if (d <= 0xFFFF)
196 1.1 mrg w = cast(wchar) d;
197 1.1 mrg else
198 1.1 mrg {
199 1.1 mrg w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
200 1.1 mrg result = dg(cast(void *)&w);
201 1.1 mrg if (result)
202 1.1 mrg break;
203 1.1 mrg w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
204 1.1 mrg }
205 1.1 mrg }
206 1.1 mrg result = dg(cast(void *)&w);
207 1.1 mrg if (result)
208 1.1 mrg break;
209 1.1 mrg }
210 1.1 mrg return result;
211 1.1 mrg }
212 1.1 mrg
213 1.1 mrg unittest
214 1.1 mrg {
215 1.1 mrg debug(apply) printf("_aApplyRcw1.unittest\n");
216 1.1 mrg
217 1.1 mrg auto s = "hello"c[];
218 1.1 mrg int i;
219 1.1 mrg
220 1.1 mrg foreach_reverse (wchar d; s)
221 1.1 mrg {
222 1.1 mrg switch (i)
223 1.1 mrg {
224 1.1 mrg case 0: assert(d == 'o'); break;
225 1.1 mrg case 1: assert(d == 'l'); break;
226 1.1 mrg case 2: assert(d == 'l'); break;
227 1.1 mrg case 3: assert(d == 'e'); break;
228 1.1 mrg case 4: assert(d == 'h'); break;
229 1.1 mrg default: assert(0);
230 1.1 mrg }
231 1.1 mrg i++;
232 1.1 mrg }
233 1.1 mrg assert(i == 5);
234 1.1 mrg
235 1.1 mrg s = "a\u1234\U000A0456b";
236 1.1 mrg i = 0;
237 1.1 mrg foreach_reverse (wchar d; s)
238 1.1 mrg {
239 1.1 mrg //printf("i = %d, d = %x\n", i, d);
240 1.1 mrg switch (i)
241 1.1 mrg {
242 1.1 mrg case 0: assert(d == 'b'); break;
243 1.1 mrg case 1: assert(d == 0xDA41); break;
244 1.1 mrg case 2: assert(d == 0xDC56); break;
245 1.1 mrg case 3: assert(d == 0x1234); break;
246 1.1 mrg case 4: assert(d == 'a'); break;
247 1.1 mrg default: assert(0);
248 1.1 mrg }
249 1.1 mrg i++;
250 1.1 mrg }
251 1.1 mrg assert(i == 5);
252 1.1 mrg }
253 1.1 mrg
254 1.1 mrg /*****************************/
255 1.1 mrg
256 1.1 mrg extern (C) int _aApplyRwc1(in wchar[] aa, dg_t dg)
257 1.1 mrg { int result;
258 1.1 mrg
259 1.1 mrg debug(apply) printf("_aApplyRwc1(), len = %d\n", aa.length);
260 1.1 mrg for (size_t i = aa.length; i != 0; )
261 1.1 mrg { dchar d;
262 1.1 mrg char c;
263 1.1 mrg
264 1.1 mrg i--;
265 1.1 mrg d = aa[i];
266 1.1 mrg if (d >= 0xDC00 && d <= 0xDFFF)
267 1.1 mrg { if (i == 0)
268 1.1 mrg onUnicodeError("Invalid UTF-16 sequence", 0);
269 1.1 mrg i--;
270 1.1 mrg d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
271 1.1 mrg }
272 1.1 mrg
273 1.1 mrg if (d & ~0x7F)
274 1.1 mrg {
275 1.1 mrg char[4] buf = void;
276 1.1 mrg
277 1.1 mrg auto b = toUTF8(buf, d);
278 1.1 mrg foreach (char c2; b)
279 1.1 mrg {
280 1.1 mrg result = dg(cast(void *)&c2);
281 1.1 mrg if (result)
282 1.1 mrg return result;
283 1.1 mrg }
284 1.1 mrg continue;
285 1.1 mrg }
286 1.1 mrg c = cast(char)d;
287 1.1 mrg result = dg(cast(void *)&c);
288 1.1 mrg if (result)
289 1.1 mrg break;
290 1.1 mrg }
291 1.1 mrg return result;
292 1.1 mrg }
293 1.1 mrg
294 1.1 mrg unittest
295 1.1 mrg {
296 1.1 mrg debug(apply) printf("_aApplyRwc1.unittest\n");
297 1.1 mrg
298 1.1 mrg auto s = "hello"w[];
299 1.1 mrg int i;
300 1.1 mrg
301 1.1 mrg foreach_reverse (char d; s)
302 1.1 mrg {
303 1.1 mrg switch (i)
304 1.1 mrg {
305 1.1 mrg case 0: assert(d == 'o'); break;
306 1.1 mrg case 1: assert(d == 'l'); break;
307 1.1 mrg case 2: assert(d == 'l'); break;
308 1.1 mrg case 3: assert(d == 'e'); break;
309 1.1 mrg case 4: assert(d == 'h'); break;
310 1.1 mrg default: assert(0);
311 1.1 mrg }
312 1.1 mrg i++;
313 1.1 mrg }
314 1.1 mrg assert(i == 5);
315 1.1 mrg
316 1.1 mrg s = "a\u1234\U000A0456b";
317 1.1 mrg i = 0;
318 1.1 mrg foreach_reverse (char d; s)
319 1.1 mrg {
320 1.1 mrg //printf("i = %d, d = %x\n", i, d);
321 1.1 mrg switch (i)
322 1.1 mrg {
323 1.1 mrg case 0: assert(d == 'b'); break;
324 1.1 mrg case 1: assert(d == 0xF2); break;
325 1.1 mrg case 2: assert(d == 0xA0); break;
326 1.1 mrg case 3: assert(d == 0x91); break;
327 1.1 mrg case 4: assert(d == 0x96); break;
328 1.1 mrg case 5: assert(d == 0xE1); break;
329 1.1 mrg case 6: assert(d == 0x88); break;
330 1.1 mrg case 7: assert(d == 0xB4); break;
331 1.1 mrg case 8: assert(d == 'a'); break;
332 1.1 mrg default: assert(0);
333 1.1 mrg }
334 1.1 mrg i++;
335 1.1 mrg }
336 1.1 mrg assert(i == 9);
337 1.1 mrg }
338 1.1 mrg
339 1.1 mrg /*****************************/
340 1.1 mrg
341 1.1 mrg extern (C) int _aApplyRdc1(in dchar[] aa, dg_t dg)
342 1.1 mrg { int result;
343 1.1 mrg
344 1.1 mrg debug(apply) printf("_aApplyRdc1(), len = %d\n", aa.length);
345 1.1 mrg for (size_t i = aa.length; i != 0;)
346 1.1 mrg { dchar d = aa[--i];
347 1.1 mrg char c;
348 1.1 mrg
349 1.1 mrg if (d & ~0x7F)
350 1.1 mrg {
351 1.1 mrg char[4] buf = void;
352 1.1 mrg
353 1.1 mrg auto b = toUTF8(buf, d);
354 1.1 mrg foreach (char c2; b)
355 1.1 mrg {
356 1.1 mrg result = dg(cast(void *)&c2);
357 1.1 mrg if (result)
358 1.1 mrg return result;
359 1.1 mrg }
360 1.1 mrg continue;
361 1.1 mrg }
362 1.1 mrg else
363 1.1 mrg {
364 1.1 mrg c = cast(char)d;
365 1.1 mrg }
366 1.1 mrg result = dg(cast(void *)&c);
367 1.1 mrg if (result)
368 1.1 mrg break;
369 1.1 mrg }
370 1.1 mrg return result;
371 1.1 mrg }
372 1.1 mrg
373 1.1 mrg unittest
374 1.1 mrg {
375 1.1 mrg debug(apply) printf("_aApplyRdc1.unittest\n");
376 1.1 mrg
377 1.1 mrg auto s = "hello"d[];
378 1.1 mrg int i;
379 1.1 mrg
380 1.1 mrg foreach_reverse (char d; s)
381 1.1 mrg {
382 1.1 mrg switch (i)
383 1.1 mrg {
384 1.1 mrg case 0: assert(d == 'o'); break;
385 1.1 mrg case 1: assert(d == 'l'); break;
386 1.1 mrg case 2: assert(d == 'l'); break;
387 1.1 mrg case 3: assert(d == 'e'); break;
388 1.1 mrg case 4: assert(d == 'h'); break;
389 1.1 mrg default: assert(0);
390 1.1 mrg }
391 1.1 mrg i++;
392 1.1 mrg }
393 1.1 mrg assert(i == 5);
394 1.1 mrg
395 1.1 mrg s = "a\u1234\U000A0456b";
396 1.1 mrg i = 0;
397 1.1 mrg foreach_reverse (char d; s)
398 1.1 mrg {
399 1.1 mrg //printf("i = %d, d = %x\n", i, d);
400 1.1 mrg switch (i)
401 1.1 mrg {
402 1.1 mrg case 0: assert(d == 'b'); break;
403 1.1 mrg case 1: assert(d == 0xF2); break;
404 1.1 mrg case 2: assert(d == 0xA0); break;
405 1.1 mrg case 3: assert(d == 0x91); break;
406 1.1 mrg case 4: assert(d == 0x96); break;
407 1.1 mrg case 5: assert(d == 0xE1); break;
408 1.1 mrg case 6: assert(d == 0x88); break;
409 1.1 mrg case 7: assert(d == 0xB4); break;
410 1.1 mrg case 8: assert(d == 'a'); break;
411 1.1 mrg default: assert(0);
412 1.1 mrg }
413 1.1 mrg i++;
414 1.1 mrg }
415 1.1 mrg assert(i == 9);
416 1.1 mrg }
417 1.1 mrg
418 1.1 mrg /*****************************/
419 1.1 mrg
420 1.1 mrg extern (C) int _aApplyRdw1(in dchar[] aa, dg_t dg)
421 1.1 mrg { int result;
422 1.1 mrg
423 1.1 mrg debug(apply) printf("_aApplyRdw1(), len = %d\n", aa.length);
424 1.1 mrg for (size_t i = aa.length; i != 0; )
425 1.1 mrg { dchar d = aa[--i];
426 1.1 mrg wchar w;
427 1.1 mrg
428 1.1 mrg if (d <= 0xFFFF)
429 1.1 mrg w = cast(wchar) d;
430 1.1 mrg else
431 1.1 mrg {
432 1.1 mrg w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
433 1.1 mrg result = dg(cast(void *)&w);
434 1.1 mrg if (result)
435 1.1 mrg break;
436 1.1 mrg w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
437 1.1 mrg }
438 1.1 mrg result = dg(cast(void *)&w);
439 1.1 mrg if (result)
440 1.1 mrg break;
441 1.1 mrg }
442 1.1 mrg return result;
443 1.1 mrg }
444 1.1 mrg
445 1.1 mrg unittest
446 1.1 mrg {
447 1.1 mrg debug(apply) printf("_aApplyRdw1.unittest\n");
448 1.1 mrg
449 1.1 mrg auto s = "hello"d[];
450 1.1 mrg int i;
451 1.1 mrg
452 1.1 mrg foreach_reverse (wchar d; s)
453 1.1 mrg {
454 1.1 mrg switch (i)
455 1.1 mrg {
456 1.1 mrg case 0: assert(d == 'o'); break;
457 1.1 mrg case 1: assert(d == 'l'); break;
458 1.1 mrg case 2: assert(d == 'l'); break;
459 1.1 mrg case 3: assert(d == 'e'); break;
460 1.1 mrg case 4: assert(d == 'h'); break;
461 1.1 mrg default: assert(0);
462 1.1 mrg }
463 1.1 mrg i++;
464 1.1 mrg }
465 1.1 mrg assert(i == 5);
466 1.1 mrg
467 1.1 mrg s = "a\u1234\U000A0456b";
468 1.1 mrg i = 0;
469 1.1 mrg foreach_reverse (wchar d; s)
470 1.1 mrg {
471 1.1 mrg //printf("i = %d, d = %x\n", i, d);
472 1.1 mrg switch (i)
473 1.1 mrg {
474 1.1 mrg case 0: assert(d == 'b'); break;
475 1.1 mrg case 1: assert(d == 0xDA41); break;
476 1.1 mrg case 2: assert(d == 0xDC56); break;
477 1.1 mrg case 3: assert(d == 0x1234); break;
478 1.1 mrg case 4: assert(d == 'a'); break;
479 1.1 mrg default: assert(0);
480 1.1 mrg }
481 1.1 mrg i++;
482 1.1 mrg }
483 1.1 mrg assert(i == 5);
484 1.1 mrg }
485 1.1 mrg
486 1.1 mrg
487 1.1 mrg /****************************************************************************/
488 1.1 mrg /* 2 argument versions */
489 1.1 mrg
490 1.1 mrg // dg is D, but _aApplyRcd2() is C
491 1.1 mrg extern (D) alias int delegate(void *, void *) dg2_t;
492 1.1 mrg
493 1.1 mrg extern (C) int _aApplyRcd2(in char[] aa, dg2_t dg)
494 1.1 mrg { int result;
495 1.1 mrg size_t i;
496 1.1 mrg size_t len = aa.length;
497 1.1 mrg
498 1.1 mrg debug(apply) printf("_aApplyRcd2(), len = %d\n", len);
499 1.1 mrg for (i = len; i != 0; )
500 1.1 mrg { dchar d;
501 1.1 mrg
502 1.1 mrg i--;
503 1.1 mrg d = aa[i];
504 1.1 mrg if (d & 0x80)
505 1.1 mrg { char c = cast(char)d;
506 1.1 mrg uint j;
507 1.1 mrg uint m = 0x3F;
508 1.1 mrg d = 0;
509 1.1 mrg while ((c & 0xC0) != 0xC0)
510 1.1 mrg { if (i == 0)
511 1.1 mrg onUnicodeError("Invalid UTF-8 sequence", 0);
512 1.1 mrg i--;
513 1.1 mrg d |= (c & 0x3F) << j;
514 1.1 mrg j += 6;
515 1.1 mrg m >>= 1;
516 1.1 mrg c = aa[i];
517 1.1 mrg }
518 1.1 mrg d |= (c & m) << j;
519 1.1 mrg }
520 1.1 mrg result = dg(&i, cast(void *)&d);
521 1.1 mrg if (result)
522 1.1 mrg break;
523 1.1 mrg }
524 1.1 mrg return result;
525 1.1 mrg }
526 1.1 mrg
527 1.1 mrg unittest
528 1.1 mrg {
529 1.1 mrg debug(apply) printf("_aApplyRcd2.unittest\n");
530 1.1 mrg
531 1.1 mrg auto s = "hello"c[];
532 1.1 mrg int i;
533 1.1 mrg
534 1.1 mrg foreach_reverse (k, dchar d; s)
535 1.1 mrg {
536 1.1 mrg assert(k == 4 - i);
537 1.1 mrg switch (i)
538 1.1 mrg {
539 1.1 mrg case 0: assert(d == 'o'); break;
540 1.1 mrg case 1: assert(d == 'l'); break;
541 1.1 mrg case 2: assert(d == 'l'); break;
542 1.1 mrg case 3: assert(d == 'e'); break;
543 1.1 mrg case 4: assert(d == 'h'); break;
544 1.1 mrg default: assert(0);
545 1.1 mrg }
546 1.1 mrg i++;
547 1.1 mrg }
548 1.1 mrg assert(i == 5);
549 1.1 mrg
550 1.1 mrg s = "a\u1234\U000A0456b";
551 1.1 mrg i = 0;
552 1.1 mrg foreach_reverse (k, dchar d; s)
553 1.1 mrg {
554 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d);
555 1.1 mrg switch (i)
556 1.1 mrg {
557 1.1 mrg case 0: assert(d == 'b'); assert(k == 8); break;
558 1.1 mrg case 1: assert(d == '\U000A0456'); assert(k == 4); break;
559 1.1 mrg case 2: assert(d == '\u1234'); assert(k == 1); break;
560 1.1 mrg case 3: assert(d == 'a'); assert(k == 0); break;
561 1.1 mrg default: assert(0);
562 1.1 mrg }
563 1.1 mrg i++;
564 1.1 mrg }
565 1.1 mrg assert(i == 4);
566 1.1 mrg }
567 1.1 mrg
568 1.1 mrg /*****************************/
569 1.1 mrg
570 1.1 mrg extern (C) int _aApplyRwd2(in wchar[] aa, dg2_t dg)
571 1.1 mrg { int result;
572 1.1 mrg
573 1.1 mrg debug(apply) printf("_aApplyRwd2(), len = %d\n", aa.length);
574 1.1 mrg for (size_t i = aa.length; i != 0; )
575 1.1 mrg { dchar d;
576 1.1 mrg
577 1.1 mrg i--;
578 1.1 mrg d = aa[i];
579 1.1 mrg if (d >= 0xDC00 && d <= 0xDFFF)
580 1.1 mrg { if (i == 0)
581 1.1 mrg onUnicodeError("Invalid UTF-16 sequence", 0);
582 1.1 mrg i--;
583 1.1 mrg d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
584 1.1 mrg }
585 1.1 mrg result = dg(&i, cast(void *)&d);
586 1.1 mrg if (result)
587 1.1 mrg break;
588 1.1 mrg }
589 1.1 mrg return result;
590 1.1 mrg }
591 1.1 mrg
592 1.1 mrg unittest
593 1.1 mrg {
594 1.1 mrg debug(apply) printf("_aApplyRwd2.unittest\n");
595 1.1 mrg
596 1.1 mrg auto s = "hello"w[];
597 1.1 mrg int i;
598 1.1 mrg
599 1.1 mrg foreach_reverse (k, dchar d; s)
600 1.1 mrg {
601 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d);
602 1.1 mrg assert(k == 4 - i);
603 1.1 mrg switch (i)
604 1.1 mrg {
605 1.1 mrg case 0: assert(d == 'o'); break;
606 1.1 mrg case 1: assert(d == 'l'); break;
607 1.1 mrg case 2: assert(d == 'l'); break;
608 1.1 mrg case 3: assert(d == 'e'); break;
609 1.1 mrg case 4: assert(d == 'h'); break;
610 1.1 mrg default: assert(0);
611 1.1 mrg }
612 1.1 mrg i++;
613 1.1 mrg }
614 1.1 mrg assert(i == 5);
615 1.1 mrg
616 1.1 mrg s = "a\u1234\U000A0456b";
617 1.1 mrg i = 0;
618 1.1 mrg foreach_reverse (k, dchar d; s)
619 1.1 mrg {
620 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d);
621 1.1 mrg switch (i)
622 1.1 mrg {
623 1.1 mrg case 0: assert(k == 4); assert(d == 'b'); break;
624 1.1 mrg case 1: assert(k == 2); assert(d == '\U000A0456'); break;
625 1.1 mrg case 2: assert(k == 1); assert(d == '\u1234'); break;
626 1.1 mrg case 3: assert(k == 0); assert(d == 'a'); break;
627 1.1 mrg default: assert(0);
628 1.1 mrg }
629 1.1 mrg i++;
630 1.1 mrg }
631 1.1 mrg assert(i == 4);
632 1.1 mrg }
633 1.1 mrg
634 1.1 mrg /*****************************/
635 1.1 mrg
636 1.1 mrg extern (C) int _aApplyRcw2(in char[] aa, dg2_t dg)
637 1.1 mrg { int result;
638 1.1 mrg
639 1.1 mrg debug(apply) printf("_aApplyRcw2(), len = %d\n", aa.length);
640 1.1 mrg for (size_t i = aa.length; i != 0; )
641 1.1 mrg { dchar d;
642 1.1 mrg wchar w;
643 1.1 mrg
644 1.1 mrg i--;
645 1.1 mrg w = aa[i];
646 1.1 mrg if (w & 0x80)
647 1.1 mrg { char c = cast(char)w;
648 1.1 mrg uint j;
649 1.1 mrg uint m = 0x3F;
650 1.1 mrg d = 0;
651 1.1 mrg while ((c & 0xC0) != 0xC0)
652 1.1 mrg { if (i == 0)
653 1.1 mrg onUnicodeError("Invalid UTF-8 sequence", 0);
654 1.1 mrg i--;
655 1.1 mrg d |= (c & 0x3F) << j;
656 1.1 mrg j += 6;
657 1.1 mrg m >>= 1;
658 1.1 mrg c = aa[i];
659 1.1 mrg }
660 1.1 mrg d |= (c & m) << j;
661 1.1 mrg
662 1.1 mrg if (d <= 0xFFFF)
663 1.1 mrg w = cast(wchar) d;
664 1.1 mrg else
665 1.1 mrg {
666 1.1 mrg w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
667 1.1 mrg result = dg(&i, cast(void *)&w);
668 1.1 mrg if (result)
669 1.1 mrg break;
670 1.1 mrg w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
671 1.1 mrg }
672 1.1 mrg }
673 1.1 mrg result = dg(&i, cast(void *)&w);
674 1.1 mrg if (result)
675 1.1 mrg break;
676 1.1 mrg }
677 1.1 mrg return result;
678 1.1 mrg }
679 1.1 mrg
680 1.1 mrg unittest
681 1.1 mrg {
682 1.1 mrg debug(apply) printf("_aApplyRcw2.unittest\n");
683 1.1 mrg
684 1.1 mrg auto s = "hello"c[];
685 1.1 mrg int i;
686 1.1 mrg
687 1.1 mrg foreach_reverse (k, wchar d; s)
688 1.1 mrg {
689 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d);
690 1.1 mrg assert(k == 4 - i);
691 1.1 mrg switch (i)
692 1.1 mrg {
693 1.1 mrg case 0: assert(d == 'o'); break;
694 1.1 mrg case 1: assert(d == 'l'); break;
695 1.1 mrg case 2: assert(d == 'l'); break;
696 1.1 mrg case 3: assert(d == 'e'); break;
697 1.1 mrg case 4: assert(d == 'h'); break;
698 1.1 mrg default: assert(0);
699 1.1 mrg }
700 1.1 mrg i++;
701 1.1 mrg }
702 1.1 mrg assert(i == 5);
703 1.1 mrg
704 1.1 mrg s = "a\u1234\U000A0456b";
705 1.1 mrg i = 0;
706 1.1 mrg foreach_reverse (k, wchar d; s)
707 1.1 mrg {
708 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d);
709 1.1 mrg switch (i)
710 1.1 mrg {
711 1.1 mrg case 0: assert(k == 8); assert(d == 'b'); break;
712 1.1 mrg case 1: assert(k == 4); assert(d == 0xDA41); break;
713 1.1 mrg case 2: assert(k == 4); assert(d == 0xDC56); break;
714 1.1 mrg case 3: assert(k == 1); assert(d == 0x1234); break;
715 1.1 mrg case 4: assert(k == 0); assert(d == 'a'); break;
716 1.1 mrg default: assert(0);
717 1.1 mrg }
718 1.1 mrg i++;
719 1.1 mrg }
720 1.1 mrg assert(i == 5);
721 1.1 mrg }
722 1.1 mrg
723 1.1 mrg /*****************************/
724 1.1 mrg
725 1.1 mrg extern (C) int _aApplyRwc2(in wchar[] aa, dg2_t dg)
726 1.1 mrg { int result;
727 1.1 mrg
728 1.1 mrg debug(apply) printf("_aApplyRwc2(), len = %d\n", aa.length);
729 1.1 mrg for (size_t i = aa.length; i != 0; )
730 1.1 mrg { dchar d;
731 1.1 mrg char c;
732 1.1 mrg
733 1.1 mrg i--;
734 1.1 mrg d = aa[i];
735 1.1 mrg if (d >= 0xDC00 && d <= 0xDFFF)
736 1.1 mrg { if (i == 0)
737 1.1 mrg onUnicodeError("Invalid UTF-16 sequence", 0);
738 1.1 mrg i--;
739 1.1 mrg d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
740 1.1 mrg }
741 1.1 mrg
742 1.1 mrg if (d & ~0x7F)
743 1.1 mrg {
744 1.1 mrg char[4] buf = void;
745 1.1 mrg
746 1.1 mrg auto b = toUTF8(buf, d);
747 1.1 mrg foreach (char c2; b)
748 1.1 mrg {
749 1.1 mrg result = dg(&i, cast(void *)&c2);
750 1.1 mrg if (result)
751 1.1 mrg return result;
752 1.1 mrg }
753 1.1 mrg continue;
754 1.1 mrg }
755 1.1 mrg c = cast(char)d;
756 1.1 mrg result = dg(&i, cast(void *)&c);
757 1.1 mrg if (result)
758 1.1 mrg break;
759 1.1 mrg }
760 1.1 mrg return result;
761 1.1 mrg }
762 1.1 mrg
763 1.1 mrg unittest
764 1.1 mrg {
765 1.1 mrg debug(apply) printf("_aApplyRwc2.unittest\n");
766 1.1 mrg
767 1.1 mrg auto s = "hello"w[];
768 1.1 mrg int i;
769 1.1 mrg
770 1.1 mrg foreach_reverse (k, char d; s)
771 1.1 mrg {
772 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d);
773 1.1 mrg assert(k == 4 - i);
774 1.1 mrg switch (i)
775 1.1 mrg {
776 1.1 mrg case 0: assert(d == 'o'); break;
777 1.1 mrg case 1: assert(d == 'l'); break;
778 1.1 mrg case 2: assert(d == 'l'); break;
779 1.1 mrg case 3: assert(d == 'e'); break;
780 1.1 mrg case 4: assert(d == 'h'); break;
781 1.1 mrg default: assert(0);
782 1.1 mrg }
783 1.1 mrg i++;
784 1.1 mrg }
785 1.1 mrg assert(i == 5);
786 1.1 mrg
787 1.1 mrg s = "a\u1234\U000A0456b";
788 1.1 mrg i = 0;
789 1.1 mrg foreach_reverse (k, char d; s)
790 1.1 mrg {
791 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d);
792 1.1 mrg switch (i)
793 1.1 mrg {
794 1.1 mrg case 0: assert(k == 4); assert(d == 'b'); break;
795 1.1 mrg case 1: assert(k == 2); assert(d == 0xF2); break;
796 1.1 mrg case 2: assert(k == 2); assert(d == 0xA0); break;
797 1.1 mrg case 3: assert(k == 2); assert(d == 0x91); break;
798 1.1 mrg case 4: assert(k == 2); assert(d == 0x96); break;
799 1.1 mrg case 5: assert(k == 1); assert(d == 0xE1); break;
800 1.1 mrg case 6: assert(k == 1); assert(d == 0x88); break;
801 1.1 mrg case 7: assert(k == 1); assert(d == 0xB4); break;
802 1.1 mrg case 8: assert(k == 0); assert(d == 'a'); break;
803 1.1 mrg default: assert(0);
804 1.1 mrg }
805 1.1 mrg i++;
806 1.1 mrg }
807 1.1 mrg assert(i == 9);
808 1.1 mrg }
809 1.1 mrg
810 1.1 mrg /*****************************/
811 1.1 mrg
812 1.1 mrg extern (C) int _aApplyRdc2(in dchar[] aa, dg2_t dg)
813 1.1 mrg { int result;
814 1.1 mrg
815 1.1 mrg debug(apply) printf("_aApplyRdc2(), len = %d\n", aa.length);
816 1.1 mrg for (size_t i = aa.length; i != 0; )
817 1.1 mrg { dchar d = aa[--i];
818 1.1 mrg char c;
819 1.1 mrg
820 1.1 mrg if (d & ~0x7F)
821 1.1 mrg {
822 1.1 mrg char[4] buf = void;
823 1.1 mrg
824 1.1 mrg auto b = toUTF8(buf, d);
825 1.1 mrg foreach (char c2; b)
826 1.1 mrg {
827 1.1 mrg result = dg(&i, cast(void *)&c2);
828 1.1 mrg if (result)
829 1.1 mrg return result;
830 1.1 mrg }
831 1.1 mrg continue;
832 1.1 mrg }
833 1.1 mrg else
834 1.1 mrg { c = cast(char)d;
835 1.1 mrg }
836 1.1 mrg result = dg(&i, cast(void *)&c);
837 1.1 mrg if (result)
838 1.1 mrg break;
839 1.1 mrg }
840 1.1 mrg return result;
841 1.1 mrg }
842 1.1 mrg
843 1.1 mrg unittest
844 1.1 mrg {
845 1.1 mrg debug(apply) printf("_aApplyRdc2.unittest\n");
846 1.1 mrg
847 1.1 mrg auto s = "hello"d[];
848 1.1 mrg int i;
849 1.1 mrg
850 1.1 mrg foreach_reverse (k, char d; s)
851 1.1 mrg {
852 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d);
853 1.1 mrg assert(k == 4 - i);
854 1.1 mrg switch (i)
855 1.1 mrg {
856 1.1 mrg case 0: assert(d == 'o'); break;
857 1.1 mrg case 1: assert(d == 'l'); break;
858 1.1 mrg case 2: assert(d == 'l'); break;
859 1.1 mrg case 3: assert(d == 'e'); break;
860 1.1 mrg case 4: assert(d == 'h'); break;
861 1.1 mrg default: assert(0);
862 1.1 mrg }
863 1.1 mrg i++;
864 1.1 mrg }
865 1.1 mrg assert(i == 5);
866 1.1 mrg
867 1.1 mrg s = "a\u1234\U000A0456b";
868 1.1 mrg i = 0;
869 1.1 mrg foreach_reverse (k, char d; s)
870 1.1 mrg {
871 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d);
872 1.1 mrg switch (i)
873 1.1 mrg {
874 1.1 mrg case 0: assert(k == 3); assert(d == 'b'); break;
875 1.1 mrg case 1: assert(k == 2); assert(d == 0xF2); break;
876 1.1 mrg case 2: assert(k == 2); assert(d == 0xA0); break;
877 1.1 mrg case 3: assert(k == 2); assert(d == 0x91); break;
878 1.1 mrg case 4: assert(k == 2); assert(d == 0x96); break;
879 1.1 mrg case 5: assert(k == 1); assert(d == 0xE1); break;
880 1.1 mrg case 6: assert(k == 1); assert(d == 0x88); break;
881 1.1 mrg case 7: assert(k == 1); assert(d == 0xB4); break;
882 1.1 mrg case 8: assert(k == 0); assert(d == 'a'); break;
883 1.1 mrg default: assert(0);
884 1.1 mrg }
885 1.1 mrg i++;
886 1.1 mrg }
887 1.1 mrg assert(i == 9);
888 1.1 mrg }
889 1.1 mrg
890 1.1 mrg /*****************************/
891 1.1 mrg
892 1.1 mrg extern (C) int _aApplyRdw2(in dchar[] aa, dg2_t dg)
893 1.1 mrg { int result;
894 1.1 mrg
895 1.1 mrg debug(apply) printf("_aApplyRdw2(), len = %d\n", aa.length);
896 1.1 mrg for (size_t i = aa.length; i != 0; )
897 1.1 mrg { dchar d = aa[--i];
898 1.1 mrg wchar w;
899 1.1 mrg
900 1.1 mrg if (d <= 0xFFFF)
901 1.1 mrg w = cast(wchar) d;
902 1.1 mrg else
903 1.1 mrg {
904 1.1 mrg w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
905 1.1 mrg result = dg(&i, cast(void *)&w);
906 1.1 mrg if (result)
907 1.1 mrg break;
908 1.1 mrg w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
909 1.1 mrg }
910 1.1 mrg result = dg(&i, cast(void *)&w);
911 1.1 mrg if (result)
912 1.1 mrg break;
913 1.1 mrg }
914 1.1 mrg return result;
915 1.1 mrg }
916 1.1 mrg
917 1.1 mrg unittest
918 1.1 mrg {
919 1.1 mrg debug(apply) printf("_aApplyRdw2.unittest\n");
920 1.1 mrg
921 1.1 mrg auto s = "hello"d[];
922 1.1 mrg int i;
923 1.1 mrg
924 1.1 mrg foreach_reverse (k, wchar d; s)
925 1.1 mrg {
926 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d);
927 1.1 mrg assert(k == 4 - i);
928 1.1 mrg switch (i)
929 1.1 mrg {
930 1.1 mrg case 0: assert(d == 'o'); break;
931 1.1 mrg case 1: assert(d == 'l'); break;
932 1.1 mrg case 2: assert(d == 'l'); break;
933 1.1 mrg case 3: assert(d == 'e'); break;
934 1.1 mrg case 4: assert(d == 'h'); break;
935 1.1 mrg default: assert(0);
936 1.1 mrg }
937 1.1 mrg i++;
938 1.1 mrg }
939 1.1 mrg assert(i == 5);
940 1.1 mrg
941 1.1 mrg s = "a\u1234\U000A0456b";
942 1.1 mrg i = 0;
943 1.1 mrg foreach_reverse (k, wchar d; s)
944 1.1 mrg {
945 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d);
946 1.1 mrg switch (i)
947 1.1 mrg {
948 1.1 mrg case 0: assert(k == 3); assert(d == 'b'); break;
949 1.1 mrg case 1: assert(k == 2); assert(d == 0xDA41); break;
950 1.1 mrg case 2: assert(k == 2); assert(d == 0xDC56); break;
951 1.1 mrg case 3: assert(k == 1); assert(d == 0x1234); break;
952 1.1 mrg case 4: assert(k == 0); assert(d == 'a'); break;
953 1.1 mrg default: assert(0);
954 1.1 mrg }
955 1.1 mrg i++;
956 1.1 mrg }
957 1.1 mrg assert(i == 5);
958 1.1 mrg }
959