1 1.1 mrg /** 2 1.1 mrg * This code handles decoding UTF strings for foreach_reverse loops. There are 3 1.1 mrg * 6 combinations of conversions between char, wchar, and dchar, and 2 of each 4 1.1 mrg * of those. 5 1.1 mrg * 6 1.1 mrg * Copyright: Copyright Digital Mars 2004 - 2010. 7 1.1.1.2 mrg * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 8 1.1 mrg * Authors: Walter Bright, Sean Kelly 9 1.1.1.2 mrg * Source: $(DRUNTIMESRC rt/_aApplyR.d) 10 1.1 mrg */ 11 1.1 mrg module rt.aApplyR; 12 1.1 mrg 13 1.1.1.2 mrg import core.internal.utf; 14 1.1 mrg 15 1.1 mrg /**********************************************/ 16 1.1 mrg /* 1 argument versions */ 17 1.1 mrg 18 1.1 mrg // dg is D, but _aApplyRcd() is C 19 1.1 mrg extern (D) alias int delegate(void *) dg_t; 20 1.1 mrg 21 1.1 mrg extern (C) int _aApplyRcd1(in char[] aa, dg_t dg) 22 1.1 mrg { int result; 23 1.1 mrg 24 1.1 mrg debug(apply) printf("_aApplyRcd1(), len = %d\n", aa.length); 25 1.1 mrg for (size_t i = aa.length; i != 0; ) 26 1.1 mrg { dchar d; 27 1.1 mrg 28 1.1 mrg i--; 29 1.1 mrg d = aa[i]; 30 1.1 mrg if (d & 0x80) 31 1.1 mrg { char c = cast(char)d; 32 1.1 mrg uint j; 33 1.1 mrg uint m = 0x3F; 34 1.1 mrg d = 0; 35 1.1 mrg while ((c & 0xC0) != 0xC0) 36 1.1 mrg { if (i == 0) 37 1.1 mrg onUnicodeError("Invalid UTF-8 sequence", 0); 38 1.1 mrg i--; 39 1.1 mrg d |= (c & 0x3F) << j; 40 1.1 mrg j += 6; 41 1.1 mrg m >>= 1; 42 1.1 mrg c = aa[i]; 43 1.1 mrg } 44 1.1 mrg d |= (c & m) << j; 45 1.1 mrg } 46 1.1 mrg result = dg(cast(void *)&d); 47 1.1 mrg if (result) 48 1.1 mrg break; 49 1.1 mrg } 50 1.1 mrg return result; 51 1.1 mrg } 52 1.1 mrg 53 1.1 mrg unittest 54 1.1 mrg { 55 1.1 mrg debug(apply) printf("_aApplyRcd1.unittest\n"); 56 1.1 mrg 57 1.1 mrg auto s = "hello"c[]; 58 1.1 mrg int i; 59 1.1 mrg 60 1.1 mrg foreach_reverse (dchar d; s) 61 1.1 mrg { 62 1.1 mrg switch (i) 63 1.1 mrg { 64 1.1 mrg case 0: assert(d == 'o'); break; 65 1.1 mrg case 1: assert(d == 'l'); break; 66 1.1 mrg case 2: assert(d == 'l'); break; 67 1.1 mrg case 3: assert(d == 'e'); break; 68 1.1 mrg case 4: assert(d == 'h'); break; 69 1.1 mrg default: assert(0); 70 1.1 mrg } 71 1.1 mrg i++; 72 1.1 mrg } 73 1.1 mrg assert(i == 5); 74 1.1 mrg 75 1.1 mrg s = "a\u1234\U000A0456b"; 76 1.1 mrg i = 0; 77 1.1 mrg foreach_reverse (dchar d; s) 78 1.1 mrg { 79 1.1 mrg //printf("i = %d, d = %x\n", i, d); 80 1.1 mrg switch (i) 81 1.1 mrg { 82 1.1 mrg case 0: assert(d == 'b'); break; 83 1.1 mrg case 1: assert(d == '\U000A0456'); break; 84 1.1 mrg case 2: assert(d == '\u1234'); break; 85 1.1 mrg case 3: assert(d == 'a'); break; 86 1.1 mrg default: assert(0); 87 1.1 mrg } 88 1.1 mrg i++; 89 1.1 mrg } 90 1.1 mrg assert(i == 4); 91 1.1 mrg } 92 1.1 mrg 93 1.1 mrg /*****************************/ 94 1.1 mrg 95 1.1 mrg extern (C) int _aApplyRwd1(in wchar[] aa, dg_t dg) 96 1.1 mrg { int result; 97 1.1 mrg 98 1.1 mrg debug(apply) printf("_aApplyRwd1(), len = %d\n", aa.length); 99 1.1 mrg for (size_t i = aa.length; i != 0; ) 100 1.1 mrg { dchar d; 101 1.1 mrg 102 1.1 mrg i--; 103 1.1 mrg d = aa[i]; 104 1.1 mrg if (d >= 0xDC00 && d <= 0xDFFF) 105 1.1 mrg { if (i == 0) 106 1.1 mrg onUnicodeError("Invalid UTF-16 sequence", 0); 107 1.1 mrg i--; 108 1.1 mrg d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); 109 1.1 mrg } 110 1.1 mrg result = dg(cast(void *)&d); 111 1.1 mrg if (result) 112 1.1 mrg break; 113 1.1 mrg } 114 1.1 mrg return result; 115 1.1 mrg } 116 1.1 mrg 117 1.1 mrg unittest 118 1.1 mrg { 119 1.1 mrg debug(apply) printf("_aApplyRwd1.unittest\n"); 120 1.1 mrg 121 1.1 mrg auto s = "hello"w[]; 122 1.1 mrg int i; 123 1.1 mrg 124 1.1 mrg foreach_reverse (dchar d; s) 125 1.1 mrg { 126 1.1 mrg switch (i) 127 1.1 mrg { 128 1.1 mrg case 0: assert(d == 'o'); break; 129 1.1 mrg case 1: assert(d == 'l'); break; 130 1.1 mrg case 2: assert(d == 'l'); break; 131 1.1 mrg case 3: assert(d == 'e'); break; 132 1.1 mrg case 4: assert(d == 'h'); break; 133 1.1 mrg default: assert(0); 134 1.1 mrg } 135 1.1 mrg i++; 136 1.1 mrg } 137 1.1 mrg assert(i == 5); 138 1.1 mrg 139 1.1 mrg s = "a\u1234\U000A0456b"; 140 1.1 mrg i = 0; 141 1.1 mrg foreach_reverse (dchar d; s) 142 1.1 mrg { 143 1.1 mrg //printf("i = %d, d = %x\n", i, d); 144 1.1 mrg switch (i) 145 1.1 mrg { 146 1.1 mrg case 0: assert(d == 'b'); break; 147 1.1 mrg case 1: assert(d == '\U000A0456'); break; 148 1.1 mrg case 2: assert(d == '\u1234'); break; 149 1.1 mrg case 3: assert(d == 'a'); break; 150 1.1 mrg default: assert(0); 151 1.1 mrg } 152 1.1 mrg i++; 153 1.1 mrg } 154 1.1 mrg assert(i == 4); 155 1.1 mrg } 156 1.1 mrg 157 1.1 mrg /*****************************/ 158 1.1 mrg 159 1.1 mrg extern (C) int _aApplyRcw1(in char[] aa, dg_t dg) 160 1.1 mrg { int result; 161 1.1 mrg 162 1.1 mrg debug(apply) printf("_aApplyRcw1(), len = %d\n", aa.length); 163 1.1 mrg for (size_t i = aa.length; i != 0; ) 164 1.1 mrg { dchar d; 165 1.1 mrg wchar w; 166 1.1 mrg 167 1.1 mrg i--; 168 1.1 mrg w = aa[i]; 169 1.1 mrg if (w & 0x80) 170 1.1 mrg { char c = cast(char)w; 171 1.1 mrg uint j; 172 1.1 mrg uint m = 0x3F; 173 1.1 mrg d = 0; 174 1.1 mrg while ((c & 0xC0) != 0xC0) 175 1.1 mrg { if (i == 0) 176 1.1 mrg onUnicodeError("Invalid UTF-8 sequence", 0); 177 1.1 mrg i--; 178 1.1 mrg d |= (c & 0x3F) << j; 179 1.1 mrg j += 6; 180 1.1 mrg m >>= 1; 181 1.1 mrg c = aa[i]; 182 1.1 mrg } 183 1.1 mrg d |= (c & m) << j; 184 1.1 mrg 185 1.1 mrg if (d <= 0xFFFF) 186 1.1 mrg w = cast(wchar) d; 187 1.1 mrg else 188 1.1 mrg { 189 1.1 mrg w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); 190 1.1 mrg result = dg(cast(void *)&w); 191 1.1 mrg if (result) 192 1.1 mrg break; 193 1.1 mrg w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); 194 1.1 mrg } 195 1.1 mrg } 196 1.1 mrg result = dg(cast(void *)&w); 197 1.1 mrg if (result) 198 1.1 mrg break; 199 1.1 mrg } 200 1.1 mrg return result; 201 1.1 mrg } 202 1.1 mrg 203 1.1 mrg unittest 204 1.1 mrg { 205 1.1 mrg debug(apply) printf("_aApplyRcw1.unittest\n"); 206 1.1 mrg 207 1.1 mrg auto s = "hello"c[]; 208 1.1 mrg int i; 209 1.1 mrg 210 1.1 mrg foreach_reverse (wchar d; s) 211 1.1 mrg { 212 1.1 mrg switch (i) 213 1.1 mrg { 214 1.1 mrg case 0: assert(d == 'o'); break; 215 1.1 mrg case 1: assert(d == 'l'); break; 216 1.1 mrg case 2: assert(d == 'l'); break; 217 1.1 mrg case 3: assert(d == 'e'); break; 218 1.1 mrg case 4: assert(d == 'h'); break; 219 1.1 mrg default: assert(0); 220 1.1 mrg } 221 1.1 mrg i++; 222 1.1 mrg } 223 1.1 mrg assert(i == 5); 224 1.1 mrg 225 1.1 mrg s = "a\u1234\U000A0456b"; 226 1.1 mrg i = 0; 227 1.1 mrg foreach_reverse (wchar d; s) 228 1.1 mrg { 229 1.1 mrg //printf("i = %d, d = %x\n", i, d); 230 1.1 mrg switch (i) 231 1.1 mrg { 232 1.1 mrg case 0: assert(d == 'b'); break; 233 1.1 mrg case 1: assert(d == 0xDA41); break; 234 1.1 mrg case 2: assert(d == 0xDC56); break; 235 1.1 mrg case 3: assert(d == 0x1234); break; 236 1.1 mrg case 4: assert(d == 'a'); break; 237 1.1 mrg default: assert(0); 238 1.1 mrg } 239 1.1 mrg i++; 240 1.1 mrg } 241 1.1 mrg assert(i == 5); 242 1.1 mrg } 243 1.1 mrg 244 1.1 mrg /*****************************/ 245 1.1 mrg 246 1.1 mrg extern (C) int _aApplyRwc1(in wchar[] aa, dg_t dg) 247 1.1 mrg { int result; 248 1.1 mrg 249 1.1 mrg debug(apply) printf("_aApplyRwc1(), len = %d\n", aa.length); 250 1.1 mrg for (size_t i = aa.length; i != 0; ) 251 1.1 mrg { dchar d; 252 1.1 mrg char c; 253 1.1 mrg 254 1.1 mrg i--; 255 1.1 mrg d = aa[i]; 256 1.1 mrg if (d >= 0xDC00 && d <= 0xDFFF) 257 1.1 mrg { if (i == 0) 258 1.1 mrg onUnicodeError("Invalid UTF-16 sequence", 0); 259 1.1 mrg i--; 260 1.1 mrg d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); 261 1.1 mrg } 262 1.1 mrg 263 1.1 mrg if (d & ~0x7F) 264 1.1 mrg { 265 1.1 mrg char[4] buf = void; 266 1.1 mrg 267 1.1 mrg auto b = toUTF8(buf, d); 268 1.1 mrg foreach (char c2; b) 269 1.1 mrg { 270 1.1 mrg result = dg(cast(void *)&c2); 271 1.1 mrg if (result) 272 1.1 mrg return result; 273 1.1 mrg } 274 1.1 mrg continue; 275 1.1 mrg } 276 1.1 mrg c = cast(char)d; 277 1.1 mrg result = dg(cast(void *)&c); 278 1.1 mrg if (result) 279 1.1 mrg break; 280 1.1 mrg } 281 1.1 mrg return result; 282 1.1 mrg } 283 1.1 mrg 284 1.1 mrg unittest 285 1.1 mrg { 286 1.1 mrg debug(apply) printf("_aApplyRwc1.unittest\n"); 287 1.1 mrg 288 1.1 mrg auto s = "hello"w[]; 289 1.1 mrg int i; 290 1.1 mrg 291 1.1 mrg foreach_reverse (char d; s) 292 1.1 mrg { 293 1.1 mrg switch (i) 294 1.1 mrg { 295 1.1 mrg case 0: assert(d == 'o'); break; 296 1.1 mrg case 1: assert(d == 'l'); break; 297 1.1 mrg case 2: assert(d == 'l'); break; 298 1.1 mrg case 3: assert(d == 'e'); break; 299 1.1 mrg case 4: assert(d == 'h'); break; 300 1.1 mrg default: assert(0); 301 1.1 mrg } 302 1.1 mrg i++; 303 1.1 mrg } 304 1.1 mrg assert(i == 5); 305 1.1 mrg 306 1.1 mrg s = "a\u1234\U000A0456b"; 307 1.1 mrg i = 0; 308 1.1 mrg foreach_reverse (char d; s) 309 1.1 mrg { 310 1.1 mrg //printf("i = %d, d = %x\n", i, d); 311 1.1 mrg switch (i) 312 1.1 mrg { 313 1.1 mrg case 0: assert(d == 'b'); break; 314 1.1 mrg case 1: assert(d == 0xF2); break; 315 1.1 mrg case 2: assert(d == 0xA0); break; 316 1.1 mrg case 3: assert(d == 0x91); break; 317 1.1 mrg case 4: assert(d == 0x96); break; 318 1.1 mrg case 5: assert(d == 0xE1); break; 319 1.1 mrg case 6: assert(d == 0x88); break; 320 1.1 mrg case 7: assert(d == 0xB4); break; 321 1.1 mrg case 8: assert(d == 'a'); break; 322 1.1 mrg default: assert(0); 323 1.1 mrg } 324 1.1 mrg i++; 325 1.1 mrg } 326 1.1 mrg assert(i == 9); 327 1.1 mrg } 328 1.1 mrg 329 1.1 mrg /*****************************/ 330 1.1 mrg 331 1.1 mrg extern (C) int _aApplyRdc1(in dchar[] aa, dg_t dg) 332 1.1 mrg { int result; 333 1.1 mrg 334 1.1 mrg debug(apply) printf("_aApplyRdc1(), len = %d\n", aa.length); 335 1.1 mrg for (size_t i = aa.length; i != 0;) 336 1.1 mrg { dchar d = aa[--i]; 337 1.1 mrg char c; 338 1.1 mrg 339 1.1 mrg if (d & ~0x7F) 340 1.1 mrg { 341 1.1 mrg char[4] buf = void; 342 1.1 mrg 343 1.1 mrg auto b = toUTF8(buf, d); 344 1.1 mrg foreach (char c2; b) 345 1.1 mrg { 346 1.1 mrg result = dg(cast(void *)&c2); 347 1.1 mrg if (result) 348 1.1 mrg return result; 349 1.1 mrg } 350 1.1 mrg continue; 351 1.1 mrg } 352 1.1 mrg else 353 1.1 mrg { 354 1.1 mrg c = cast(char)d; 355 1.1 mrg } 356 1.1 mrg result = dg(cast(void *)&c); 357 1.1 mrg if (result) 358 1.1 mrg break; 359 1.1 mrg } 360 1.1 mrg return result; 361 1.1 mrg } 362 1.1 mrg 363 1.1 mrg unittest 364 1.1 mrg { 365 1.1 mrg debug(apply) printf("_aApplyRdc1.unittest\n"); 366 1.1 mrg 367 1.1 mrg auto s = "hello"d[]; 368 1.1 mrg int i; 369 1.1 mrg 370 1.1 mrg foreach_reverse (char d; s) 371 1.1 mrg { 372 1.1 mrg switch (i) 373 1.1 mrg { 374 1.1 mrg case 0: assert(d == 'o'); break; 375 1.1 mrg case 1: assert(d == 'l'); break; 376 1.1 mrg case 2: assert(d == 'l'); break; 377 1.1 mrg case 3: assert(d == 'e'); break; 378 1.1 mrg case 4: assert(d == 'h'); break; 379 1.1 mrg default: assert(0); 380 1.1 mrg } 381 1.1 mrg i++; 382 1.1 mrg } 383 1.1 mrg assert(i == 5); 384 1.1 mrg 385 1.1 mrg s = "a\u1234\U000A0456b"; 386 1.1 mrg i = 0; 387 1.1 mrg foreach_reverse (char d; s) 388 1.1 mrg { 389 1.1 mrg //printf("i = %d, d = %x\n", i, d); 390 1.1 mrg switch (i) 391 1.1 mrg { 392 1.1 mrg case 0: assert(d == 'b'); break; 393 1.1 mrg case 1: assert(d == 0xF2); break; 394 1.1 mrg case 2: assert(d == 0xA0); break; 395 1.1 mrg case 3: assert(d == 0x91); break; 396 1.1 mrg case 4: assert(d == 0x96); break; 397 1.1 mrg case 5: assert(d == 0xE1); break; 398 1.1 mrg case 6: assert(d == 0x88); break; 399 1.1 mrg case 7: assert(d == 0xB4); break; 400 1.1 mrg case 8: assert(d == 'a'); break; 401 1.1 mrg default: assert(0); 402 1.1 mrg } 403 1.1 mrg i++; 404 1.1 mrg } 405 1.1 mrg assert(i == 9); 406 1.1 mrg } 407 1.1 mrg 408 1.1 mrg /*****************************/ 409 1.1 mrg 410 1.1 mrg extern (C) int _aApplyRdw1(in dchar[] aa, dg_t dg) 411 1.1 mrg { int result; 412 1.1 mrg 413 1.1 mrg debug(apply) printf("_aApplyRdw1(), len = %d\n", aa.length); 414 1.1 mrg for (size_t i = aa.length; i != 0; ) 415 1.1 mrg { dchar d = aa[--i]; 416 1.1 mrg wchar w; 417 1.1 mrg 418 1.1 mrg if (d <= 0xFFFF) 419 1.1 mrg w = cast(wchar) d; 420 1.1 mrg else 421 1.1 mrg { 422 1.1 mrg w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); 423 1.1 mrg result = dg(cast(void *)&w); 424 1.1 mrg if (result) 425 1.1 mrg break; 426 1.1 mrg w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); 427 1.1 mrg } 428 1.1 mrg result = dg(cast(void *)&w); 429 1.1 mrg if (result) 430 1.1 mrg break; 431 1.1 mrg } 432 1.1 mrg return result; 433 1.1 mrg } 434 1.1 mrg 435 1.1 mrg unittest 436 1.1 mrg { 437 1.1 mrg debug(apply) printf("_aApplyRdw1.unittest\n"); 438 1.1 mrg 439 1.1 mrg auto s = "hello"d[]; 440 1.1 mrg int i; 441 1.1 mrg 442 1.1 mrg foreach_reverse (wchar d; s) 443 1.1 mrg { 444 1.1 mrg switch (i) 445 1.1 mrg { 446 1.1 mrg case 0: assert(d == 'o'); break; 447 1.1 mrg case 1: assert(d == 'l'); break; 448 1.1 mrg case 2: assert(d == 'l'); break; 449 1.1 mrg case 3: assert(d == 'e'); break; 450 1.1 mrg case 4: assert(d == 'h'); break; 451 1.1 mrg default: assert(0); 452 1.1 mrg } 453 1.1 mrg i++; 454 1.1 mrg } 455 1.1 mrg assert(i == 5); 456 1.1 mrg 457 1.1 mrg s = "a\u1234\U000A0456b"; 458 1.1 mrg i = 0; 459 1.1 mrg foreach_reverse (wchar d; s) 460 1.1 mrg { 461 1.1 mrg //printf("i = %d, d = %x\n", i, d); 462 1.1 mrg switch (i) 463 1.1 mrg { 464 1.1 mrg case 0: assert(d == 'b'); break; 465 1.1 mrg case 1: assert(d == 0xDA41); break; 466 1.1 mrg case 2: assert(d == 0xDC56); break; 467 1.1 mrg case 3: assert(d == 0x1234); break; 468 1.1 mrg case 4: assert(d == 'a'); break; 469 1.1 mrg default: assert(0); 470 1.1 mrg } 471 1.1 mrg i++; 472 1.1 mrg } 473 1.1 mrg assert(i == 5); 474 1.1 mrg } 475 1.1 mrg 476 1.1 mrg 477 1.1 mrg /****************************************************************************/ 478 1.1 mrg /* 2 argument versions */ 479 1.1 mrg 480 1.1 mrg // dg is D, but _aApplyRcd2() is C 481 1.1 mrg extern (D) alias int delegate(void *, void *) dg2_t; 482 1.1 mrg 483 1.1 mrg extern (C) int _aApplyRcd2(in char[] aa, dg2_t dg) 484 1.1 mrg { int result; 485 1.1 mrg size_t i; 486 1.1 mrg size_t len = aa.length; 487 1.1 mrg 488 1.1 mrg debug(apply) printf("_aApplyRcd2(), len = %d\n", len); 489 1.1 mrg for (i = len; i != 0; ) 490 1.1 mrg { dchar d; 491 1.1 mrg 492 1.1 mrg i--; 493 1.1 mrg d = aa[i]; 494 1.1 mrg if (d & 0x80) 495 1.1 mrg { char c = cast(char)d; 496 1.1 mrg uint j; 497 1.1 mrg uint m = 0x3F; 498 1.1 mrg d = 0; 499 1.1 mrg while ((c & 0xC0) != 0xC0) 500 1.1 mrg { if (i == 0) 501 1.1 mrg onUnicodeError("Invalid UTF-8 sequence", 0); 502 1.1 mrg i--; 503 1.1 mrg d |= (c & 0x3F) << j; 504 1.1 mrg j += 6; 505 1.1 mrg m >>= 1; 506 1.1 mrg c = aa[i]; 507 1.1 mrg } 508 1.1 mrg d |= (c & m) << j; 509 1.1 mrg } 510 1.1 mrg result = dg(&i, cast(void *)&d); 511 1.1 mrg if (result) 512 1.1 mrg break; 513 1.1 mrg } 514 1.1 mrg return result; 515 1.1 mrg } 516 1.1 mrg 517 1.1 mrg unittest 518 1.1 mrg { 519 1.1 mrg debug(apply) printf("_aApplyRcd2.unittest\n"); 520 1.1 mrg 521 1.1 mrg auto s = "hello"c[]; 522 1.1 mrg int i; 523 1.1 mrg 524 1.1 mrg foreach_reverse (k, dchar d; s) 525 1.1 mrg { 526 1.1 mrg assert(k == 4 - i); 527 1.1 mrg switch (i) 528 1.1 mrg { 529 1.1 mrg case 0: assert(d == 'o'); break; 530 1.1 mrg case 1: assert(d == 'l'); break; 531 1.1 mrg case 2: assert(d == 'l'); break; 532 1.1 mrg case 3: assert(d == 'e'); break; 533 1.1 mrg case 4: assert(d == 'h'); break; 534 1.1 mrg default: assert(0); 535 1.1 mrg } 536 1.1 mrg i++; 537 1.1 mrg } 538 1.1 mrg assert(i == 5); 539 1.1 mrg 540 1.1 mrg s = "a\u1234\U000A0456b"; 541 1.1 mrg i = 0; 542 1.1 mrg foreach_reverse (k, dchar d; s) 543 1.1 mrg { 544 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d); 545 1.1 mrg switch (i) 546 1.1 mrg { 547 1.1 mrg case 0: assert(d == 'b'); assert(k == 8); break; 548 1.1 mrg case 1: assert(d == '\U000A0456'); assert(k == 4); break; 549 1.1 mrg case 2: assert(d == '\u1234'); assert(k == 1); break; 550 1.1 mrg case 3: assert(d == 'a'); assert(k == 0); break; 551 1.1 mrg default: assert(0); 552 1.1 mrg } 553 1.1 mrg i++; 554 1.1 mrg } 555 1.1 mrg assert(i == 4); 556 1.1 mrg } 557 1.1 mrg 558 1.1 mrg /*****************************/ 559 1.1 mrg 560 1.1 mrg extern (C) int _aApplyRwd2(in wchar[] aa, dg2_t dg) 561 1.1 mrg { int result; 562 1.1 mrg 563 1.1 mrg debug(apply) printf("_aApplyRwd2(), len = %d\n", aa.length); 564 1.1 mrg for (size_t i = aa.length; i != 0; ) 565 1.1 mrg { dchar d; 566 1.1 mrg 567 1.1 mrg i--; 568 1.1 mrg d = aa[i]; 569 1.1 mrg if (d >= 0xDC00 && d <= 0xDFFF) 570 1.1 mrg { if (i == 0) 571 1.1 mrg onUnicodeError("Invalid UTF-16 sequence", 0); 572 1.1 mrg i--; 573 1.1 mrg d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); 574 1.1 mrg } 575 1.1 mrg result = dg(&i, cast(void *)&d); 576 1.1 mrg if (result) 577 1.1 mrg break; 578 1.1 mrg } 579 1.1 mrg return result; 580 1.1 mrg } 581 1.1 mrg 582 1.1 mrg unittest 583 1.1 mrg { 584 1.1 mrg debug(apply) printf("_aApplyRwd2.unittest\n"); 585 1.1 mrg 586 1.1 mrg auto s = "hello"w[]; 587 1.1 mrg int i; 588 1.1 mrg 589 1.1 mrg foreach_reverse (k, dchar d; s) 590 1.1 mrg { 591 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d); 592 1.1 mrg assert(k == 4 - i); 593 1.1 mrg switch (i) 594 1.1 mrg { 595 1.1 mrg case 0: assert(d == 'o'); break; 596 1.1 mrg case 1: assert(d == 'l'); break; 597 1.1 mrg case 2: assert(d == 'l'); break; 598 1.1 mrg case 3: assert(d == 'e'); break; 599 1.1 mrg case 4: assert(d == 'h'); break; 600 1.1 mrg default: assert(0); 601 1.1 mrg } 602 1.1 mrg i++; 603 1.1 mrg } 604 1.1 mrg assert(i == 5); 605 1.1 mrg 606 1.1 mrg s = "a\u1234\U000A0456b"; 607 1.1 mrg i = 0; 608 1.1 mrg foreach_reverse (k, dchar d; s) 609 1.1 mrg { 610 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d); 611 1.1 mrg switch (i) 612 1.1 mrg { 613 1.1 mrg case 0: assert(k == 4); assert(d == 'b'); break; 614 1.1 mrg case 1: assert(k == 2); assert(d == '\U000A0456'); break; 615 1.1 mrg case 2: assert(k == 1); assert(d == '\u1234'); break; 616 1.1 mrg case 3: assert(k == 0); assert(d == 'a'); break; 617 1.1 mrg default: assert(0); 618 1.1 mrg } 619 1.1 mrg i++; 620 1.1 mrg } 621 1.1 mrg assert(i == 4); 622 1.1 mrg } 623 1.1 mrg 624 1.1 mrg /*****************************/ 625 1.1 mrg 626 1.1 mrg extern (C) int _aApplyRcw2(in char[] aa, dg2_t dg) 627 1.1 mrg { int result; 628 1.1 mrg 629 1.1 mrg debug(apply) printf("_aApplyRcw2(), len = %d\n", aa.length); 630 1.1 mrg for (size_t i = aa.length; i != 0; ) 631 1.1 mrg { dchar d; 632 1.1 mrg wchar w; 633 1.1 mrg 634 1.1 mrg i--; 635 1.1 mrg w = aa[i]; 636 1.1 mrg if (w & 0x80) 637 1.1 mrg { char c = cast(char)w; 638 1.1 mrg uint j; 639 1.1 mrg uint m = 0x3F; 640 1.1 mrg d = 0; 641 1.1 mrg while ((c & 0xC0) != 0xC0) 642 1.1 mrg { if (i == 0) 643 1.1 mrg onUnicodeError("Invalid UTF-8 sequence", 0); 644 1.1 mrg i--; 645 1.1 mrg d |= (c & 0x3F) << j; 646 1.1 mrg j += 6; 647 1.1 mrg m >>= 1; 648 1.1 mrg c = aa[i]; 649 1.1 mrg } 650 1.1 mrg d |= (c & m) << j; 651 1.1 mrg 652 1.1 mrg if (d <= 0xFFFF) 653 1.1 mrg w = cast(wchar) d; 654 1.1 mrg else 655 1.1 mrg { 656 1.1 mrg w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); 657 1.1 mrg result = dg(&i, cast(void *)&w); 658 1.1 mrg if (result) 659 1.1 mrg break; 660 1.1 mrg w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); 661 1.1 mrg } 662 1.1 mrg } 663 1.1 mrg result = dg(&i, cast(void *)&w); 664 1.1 mrg if (result) 665 1.1 mrg break; 666 1.1 mrg } 667 1.1 mrg return result; 668 1.1 mrg } 669 1.1 mrg 670 1.1 mrg unittest 671 1.1 mrg { 672 1.1 mrg debug(apply) printf("_aApplyRcw2.unittest\n"); 673 1.1 mrg 674 1.1 mrg auto s = "hello"c[]; 675 1.1 mrg int i; 676 1.1 mrg 677 1.1 mrg foreach_reverse (k, wchar d; s) 678 1.1 mrg { 679 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d); 680 1.1 mrg assert(k == 4 - i); 681 1.1 mrg switch (i) 682 1.1 mrg { 683 1.1 mrg case 0: assert(d == 'o'); break; 684 1.1 mrg case 1: assert(d == 'l'); break; 685 1.1 mrg case 2: assert(d == 'l'); break; 686 1.1 mrg case 3: assert(d == 'e'); break; 687 1.1 mrg case 4: assert(d == 'h'); break; 688 1.1 mrg default: assert(0); 689 1.1 mrg } 690 1.1 mrg i++; 691 1.1 mrg } 692 1.1 mrg assert(i == 5); 693 1.1 mrg 694 1.1 mrg s = "a\u1234\U000A0456b"; 695 1.1 mrg i = 0; 696 1.1 mrg foreach_reverse (k, wchar d; s) 697 1.1 mrg { 698 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d); 699 1.1 mrg switch (i) 700 1.1 mrg { 701 1.1 mrg case 0: assert(k == 8); assert(d == 'b'); break; 702 1.1 mrg case 1: assert(k == 4); assert(d == 0xDA41); break; 703 1.1 mrg case 2: assert(k == 4); assert(d == 0xDC56); break; 704 1.1 mrg case 3: assert(k == 1); assert(d == 0x1234); break; 705 1.1 mrg case 4: assert(k == 0); assert(d == 'a'); break; 706 1.1 mrg default: assert(0); 707 1.1 mrg } 708 1.1 mrg i++; 709 1.1 mrg } 710 1.1 mrg assert(i == 5); 711 1.1 mrg } 712 1.1 mrg 713 1.1 mrg /*****************************/ 714 1.1 mrg 715 1.1 mrg extern (C) int _aApplyRwc2(in wchar[] aa, dg2_t dg) 716 1.1 mrg { int result; 717 1.1 mrg 718 1.1 mrg debug(apply) printf("_aApplyRwc2(), len = %d\n", aa.length); 719 1.1 mrg for (size_t i = aa.length; i != 0; ) 720 1.1 mrg { dchar d; 721 1.1 mrg char c; 722 1.1 mrg 723 1.1 mrg i--; 724 1.1 mrg d = aa[i]; 725 1.1 mrg if (d >= 0xDC00 && d <= 0xDFFF) 726 1.1 mrg { if (i == 0) 727 1.1 mrg onUnicodeError("Invalid UTF-16 sequence", 0); 728 1.1 mrg i--; 729 1.1 mrg d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); 730 1.1 mrg } 731 1.1 mrg 732 1.1 mrg if (d & ~0x7F) 733 1.1 mrg { 734 1.1 mrg char[4] buf = void; 735 1.1 mrg 736 1.1 mrg auto b = toUTF8(buf, d); 737 1.1 mrg foreach (char c2; b) 738 1.1 mrg { 739 1.1 mrg result = dg(&i, cast(void *)&c2); 740 1.1 mrg if (result) 741 1.1 mrg return result; 742 1.1 mrg } 743 1.1 mrg continue; 744 1.1 mrg } 745 1.1 mrg c = cast(char)d; 746 1.1 mrg result = dg(&i, cast(void *)&c); 747 1.1 mrg if (result) 748 1.1 mrg break; 749 1.1 mrg } 750 1.1 mrg return result; 751 1.1 mrg } 752 1.1 mrg 753 1.1 mrg unittest 754 1.1 mrg { 755 1.1 mrg debug(apply) printf("_aApplyRwc2.unittest\n"); 756 1.1 mrg 757 1.1 mrg auto s = "hello"w[]; 758 1.1 mrg int i; 759 1.1 mrg 760 1.1 mrg foreach_reverse (k, char d; s) 761 1.1 mrg { 762 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d); 763 1.1 mrg assert(k == 4 - i); 764 1.1 mrg switch (i) 765 1.1 mrg { 766 1.1 mrg case 0: assert(d == 'o'); break; 767 1.1 mrg case 1: assert(d == 'l'); break; 768 1.1 mrg case 2: assert(d == 'l'); break; 769 1.1 mrg case 3: assert(d == 'e'); break; 770 1.1 mrg case 4: assert(d == 'h'); break; 771 1.1 mrg default: assert(0); 772 1.1 mrg } 773 1.1 mrg i++; 774 1.1 mrg } 775 1.1 mrg assert(i == 5); 776 1.1 mrg 777 1.1 mrg s = "a\u1234\U000A0456b"; 778 1.1 mrg i = 0; 779 1.1 mrg foreach_reverse (k, char d; s) 780 1.1 mrg { 781 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d); 782 1.1 mrg switch (i) 783 1.1 mrg { 784 1.1 mrg case 0: assert(k == 4); assert(d == 'b'); break; 785 1.1 mrg case 1: assert(k == 2); assert(d == 0xF2); break; 786 1.1 mrg case 2: assert(k == 2); assert(d == 0xA0); break; 787 1.1 mrg case 3: assert(k == 2); assert(d == 0x91); break; 788 1.1 mrg case 4: assert(k == 2); assert(d == 0x96); break; 789 1.1 mrg case 5: assert(k == 1); assert(d == 0xE1); break; 790 1.1 mrg case 6: assert(k == 1); assert(d == 0x88); break; 791 1.1 mrg case 7: assert(k == 1); assert(d == 0xB4); break; 792 1.1 mrg case 8: assert(k == 0); assert(d == 'a'); break; 793 1.1 mrg default: assert(0); 794 1.1 mrg } 795 1.1 mrg i++; 796 1.1 mrg } 797 1.1 mrg assert(i == 9); 798 1.1 mrg } 799 1.1 mrg 800 1.1 mrg /*****************************/ 801 1.1 mrg 802 1.1 mrg extern (C) int _aApplyRdc2(in dchar[] aa, dg2_t dg) 803 1.1 mrg { int result; 804 1.1 mrg 805 1.1 mrg debug(apply) printf("_aApplyRdc2(), len = %d\n", aa.length); 806 1.1 mrg for (size_t i = aa.length; i != 0; ) 807 1.1 mrg { dchar d = aa[--i]; 808 1.1 mrg char c; 809 1.1 mrg 810 1.1 mrg if (d & ~0x7F) 811 1.1 mrg { 812 1.1 mrg char[4] buf = void; 813 1.1 mrg 814 1.1 mrg auto b = toUTF8(buf, d); 815 1.1 mrg foreach (char c2; b) 816 1.1 mrg { 817 1.1 mrg result = dg(&i, cast(void *)&c2); 818 1.1 mrg if (result) 819 1.1 mrg return result; 820 1.1 mrg } 821 1.1 mrg continue; 822 1.1 mrg } 823 1.1 mrg else 824 1.1 mrg { c = cast(char)d; 825 1.1 mrg } 826 1.1 mrg result = dg(&i, cast(void *)&c); 827 1.1 mrg if (result) 828 1.1 mrg break; 829 1.1 mrg } 830 1.1 mrg return result; 831 1.1 mrg } 832 1.1 mrg 833 1.1 mrg unittest 834 1.1 mrg { 835 1.1 mrg debug(apply) printf("_aApplyRdc2.unittest\n"); 836 1.1 mrg 837 1.1 mrg auto s = "hello"d[]; 838 1.1 mrg int i; 839 1.1 mrg 840 1.1 mrg foreach_reverse (k, char d; s) 841 1.1 mrg { 842 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d); 843 1.1 mrg assert(k == 4 - i); 844 1.1 mrg switch (i) 845 1.1 mrg { 846 1.1 mrg case 0: assert(d == 'o'); break; 847 1.1 mrg case 1: assert(d == 'l'); break; 848 1.1 mrg case 2: assert(d == 'l'); break; 849 1.1 mrg case 3: assert(d == 'e'); break; 850 1.1 mrg case 4: assert(d == 'h'); break; 851 1.1 mrg default: assert(0); 852 1.1 mrg } 853 1.1 mrg i++; 854 1.1 mrg } 855 1.1 mrg assert(i == 5); 856 1.1 mrg 857 1.1 mrg s = "a\u1234\U000A0456b"; 858 1.1 mrg i = 0; 859 1.1 mrg foreach_reverse (k, char d; s) 860 1.1 mrg { 861 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d); 862 1.1 mrg switch (i) 863 1.1 mrg { 864 1.1 mrg case 0: assert(k == 3); assert(d == 'b'); break; 865 1.1 mrg case 1: assert(k == 2); assert(d == 0xF2); break; 866 1.1 mrg case 2: assert(k == 2); assert(d == 0xA0); break; 867 1.1 mrg case 3: assert(k == 2); assert(d == 0x91); break; 868 1.1 mrg case 4: assert(k == 2); assert(d == 0x96); break; 869 1.1 mrg case 5: assert(k == 1); assert(d == 0xE1); break; 870 1.1 mrg case 6: assert(k == 1); assert(d == 0x88); break; 871 1.1 mrg case 7: assert(k == 1); assert(d == 0xB4); break; 872 1.1 mrg case 8: assert(k == 0); assert(d == 'a'); break; 873 1.1 mrg default: assert(0); 874 1.1 mrg } 875 1.1 mrg i++; 876 1.1 mrg } 877 1.1 mrg assert(i == 9); 878 1.1 mrg } 879 1.1 mrg 880 1.1 mrg /*****************************/ 881 1.1 mrg 882 1.1 mrg extern (C) int _aApplyRdw2(in dchar[] aa, dg2_t dg) 883 1.1 mrg { int result; 884 1.1 mrg 885 1.1 mrg debug(apply) printf("_aApplyRdw2(), len = %d\n", aa.length); 886 1.1 mrg for (size_t i = aa.length; i != 0; ) 887 1.1 mrg { dchar d = aa[--i]; 888 1.1 mrg wchar w; 889 1.1 mrg 890 1.1 mrg if (d <= 0xFFFF) 891 1.1 mrg w = cast(wchar) d; 892 1.1 mrg else 893 1.1 mrg { 894 1.1 mrg w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); 895 1.1 mrg result = dg(&i, cast(void *)&w); 896 1.1 mrg if (result) 897 1.1 mrg break; 898 1.1 mrg w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); 899 1.1 mrg } 900 1.1 mrg result = dg(&i, cast(void *)&w); 901 1.1 mrg if (result) 902 1.1 mrg break; 903 1.1 mrg } 904 1.1 mrg return result; 905 1.1 mrg } 906 1.1 mrg 907 1.1 mrg unittest 908 1.1 mrg { 909 1.1 mrg debug(apply) printf("_aApplyRdw2.unittest\n"); 910 1.1 mrg 911 1.1 mrg auto s = "hello"d[]; 912 1.1 mrg int i; 913 1.1 mrg 914 1.1 mrg foreach_reverse (k, wchar d; s) 915 1.1 mrg { 916 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d); 917 1.1 mrg assert(k == 4 - i); 918 1.1 mrg switch (i) 919 1.1 mrg { 920 1.1 mrg case 0: assert(d == 'o'); break; 921 1.1 mrg case 1: assert(d == 'l'); break; 922 1.1 mrg case 2: assert(d == 'l'); break; 923 1.1 mrg case 3: assert(d == 'e'); break; 924 1.1 mrg case 4: assert(d == 'h'); break; 925 1.1 mrg default: assert(0); 926 1.1 mrg } 927 1.1 mrg i++; 928 1.1 mrg } 929 1.1 mrg assert(i == 5); 930 1.1 mrg 931 1.1 mrg s = "a\u1234\U000A0456b"; 932 1.1 mrg i = 0; 933 1.1 mrg foreach_reverse (k, wchar d; s) 934 1.1 mrg { 935 1.1 mrg //printf("i = %d, k = %d, d = %x\n", i, k, d); 936 1.1 mrg switch (i) 937 1.1 mrg { 938 1.1 mrg case 0: assert(k == 3); assert(d == 'b'); break; 939 1.1 mrg case 1: assert(k == 2); assert(d == 0xDA41); break; 940 1.1 mrg case 2: assert(k == 2); assert(d == 0xDC56); break; 941 1.1 mrg case 3: assert(k == 1); assert(d == 0x1234); break; 942 1.1 mrg case 4: assert(k == 0); assert(d == 'a'); break; 943 1.1 mrg default: assert(0); 944 1.1 mrg } 945 1.1 mrg i++; 946 1.1 mrg } 947 1.1 mrg assert(i == 5); 948 1.1 mrg } 949