Home | History | Annotate | Line # | Download | only in rt
aApplyR.d revision 1.1
      1  1.1  mrg /**
      2  1.1  mrg  * This code handles decoding UTF strings for foreach_reverse loops.  There are
      3  1.1  mrg  * 6 combinations of conversions between char, wchar, and dchar, and 2 of each
      4  1.1  mrg  * of those.
      5  1.1  mrg  *
      6  1.1  mrg  * Copyright: Copyright Digital Mars 2004 - 2010.
      7  1.1  mrg  * License:   $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
      8  1.1  mrg  * Authors:   Walter Bright, Sean Kelly
      9  1.1  mrg  */
     10  1.1  mrg 
     11  1.1  mrg /*          Copyright Digital Mars 2004 - 2010.
     12  1.1  mrg  * Distributed under the Boost Software License, Version 1.0.
     13  1.1  mrg  *    (See accompanying file LICENSE or copy at
     14  1.1  mrg  *          http://www.boost.org/LICENSE_1_0.txt)
     15  1.1  mrg  */
     16  1.1  mrg module rt.aApplyR;
     17  1.1  mrg 
     18  1.1  mrg /* This code handles decoding UTF strings for foreach_reverse loops.
     19  1.1  mrg  * There are 6 combinations of conversions between char, wchar,
     20  1.1  mrg  * and dchar, and 2 of each of those.
     21  1.1  mrg  */
     22  1.1  mrg 
     23  1.1  mrg private import rt.util.utf;
     24  1.1  mrg 
     25  1.1  mrg /**********************************************/
     26  1.1  mrg /* 1 argument versions */
     27  1.1  mrg 
     28  1.1  mrg // dg is D, but _aApplyRcd() is C
     29  1.1  mrg extern (D) alias int delegate(void *) dg_t;
     30  1.1  mrg 
     31  1.1  mrg extern (C) int _aApplyRcd1(in char[] aa, dg_t dg)
     32  1.1  mrg {   int result;
     33  1.1  mrg 
     34  1.1  mrg     debug(apply) printf("_aApplyRcd1(), len = %d\n", aa.length);
     35  1.1  mrg     for (size_t i = aa.length; i != 0; )
     36  1.1  mrg     {   dchar d;
     37  1.1  mrg 
     38  1.1  mrg         i--;
     39  1.1  mrg         d = aa[i];
     40  1.1  mrg         if (d & 0x80)
     41  1.1  mrg         {   char c = cast(char)d;
     42  1.1  mrg             uint j;
     43  1.1  mrg             uint m = 0x3F;
     44  1.1  mrg             d = 0;
     45  1.1  mrg             while ((c & 0xC0) != 0xC0)
     46  1.1  mrg             {   if (i == 0)
     47  1.1  mrg                     onUnicodeError("Invalid UTF-8 sequence", 0);
     48  1.1  mrg                 i--;
     49  1.1  mrg                 d |= (c & 0x3F) << j;
     50  1.1  mrg                 j += 6;
     51  1.1  mrg                 m >>= 1;
     52  1.1  mrg                 c = aa[i];
     53  1.1  mrg             }
     54  1.1  mrg             d |= (c & m) << j;
     55  1.1  mrg         }
     56  1.1  mrg         result = dg(cast(void *)&d);
     57  1.1  mrg         if (result)
     58  1.1  mrg             break;
     59  1.1  mrg     }
     60  1.1  mrg     return result;
     61  1.1  mrg }
     62  1.1  mrg 
     63  1.1  mrg unittest
     64  1.1  mrg {
     65  1.1  mrg     debug(apply) printf("_aApplyRcd1.unittest\n");
     66  1.1  mrg 
     67  1.1  mrg     auto s = "hello"c[];
     68  1.1  mrg     int i;
     69  1.1  mrg 
     70  1.1  mrg     foreach_reverse (dchar d; s)
     71  1.1  mrg     {
     72  1.1  mrg         switch (i)
     73  1.1  mrg         {
     74  1.1  mrg             case 0:     assert(d == 'o'); break;
     75  1.1  mrg             case 1:     assert(d == 'l'); break;
     76  1.1  mrg             case 2:     assert(d == 'l'); break;
     77  1.1  mrg             case 3:     assert(d == 'e'); break;
     78  1.1  mrg             case 4:     assert(d == 'h'); break;
     79  1.1  mrg             default:    assert(0);
     80  1.1  mrg         }
     81  1.1  mrg         i++;
     82  1.1  mrg     }
     83  1.1  mrg     assert(i == 5);
     84  1.1  mrg 
     85  1.1  mrg     s = "a\u1234\U000A0456b";
     86  1.1  mrg     i = 0;
     87  1.1  mrg     foreach_reverse (dchar d; s)
     88  1.1  mrg     {
     89  1.1  mrg         //printf("i = %d, d = %x\n", i, d);
     90  1.1  mrg         switch (i)
     91  1.1  mrg         {
     92  1.1  mrg             case 0:     assert(d == 'b'); break;
     93  1.1  mrg             case 1:     assert(d == '\U000A0456'); break;
     94  1.1  mrg             case 2:     assert(d == '\u1234'); break;
     95  1.1  mrg             case 3:     assert(d == 'a'); break;
     96  1.1  mrg             default:    assert(0);
     97  1.1  mrg         }
     98  1.1  mrg         i++;
     99  1.1  mrg     }
    100  1.1  mrg     assert(i == 4);
    101  1.1  mrg }
    102  1.1  mrg 
    103  1.1  mrg /*****************************/
    104  1.1  mrg 
    105  1.1  mrg extern (C) int _aApplyRwd1(in wchar[] aa, dg_t dg)
    106  1.1  mrg {   int result;
    107  1.1  mrg 
    108  1.1  mrg     debug(apply) printf("_aApplyRwd1(), len = %d\n", aa.length);
    109  1.1  mrg     for (size_t i = aa.length; i != 0; )
    110  1.1  mrg     {   dchar d;
    111  1.1  mrg 
    112  1.1  mrg         i--;
    113  1.1  mrg         d = aa[i];
    114  1.1  mrg         if (d >= 0xDC00 && d <= 0xDFFF)
    115  1.1  mrg         {   if (i == 0)
    116  1.1  mrg                 onUnicodeError("Invalid UTF-16 sequence", 0);
    117  1.1  mrg             i--;
    118  1.1  mrg             d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
    119  1.1  mrg         }
    120  1.1  mrg         result = dg(cast(void *)&d);
    121  1.1  mrg         if (result)
    122  1.1  mrg             break;
    123  1.1  mrg     }
    124  1.1  mrg     return result;
    125  1.1  mrg }
    126  1.1  mrg 
    127  1.1  mrg unittest
    128  1.1  mrg {
    129  1.1  mrg     debug(apply) printf("_aApplyRwd1.unittest\n");
    130  1.1  mrg 
    131  1.1  mrg     auto s = "hello"w[];
    132  1.1  mrg     int i;
    133  1.1  mrg 
    134  1.1  mrg     foreach_reverse (dchar d; s)
    135  1.1  mrg     {
    136  1.1  mrg         switch (i)
    137  1.1  mrg         {
    138  1.1  mrg             case 0:     assert(d == 'o'); break;
    139  1.1  mrg             case 1:     assert(d == 'l'); break;
    140  1.1  mrg             case 2:     assert(d == 'l'); break;
    141  1.1  mrg             case 3:     assert(d == 'e'); break;
    142  1.1  mrg             case 4:     assert(d == 'h'); break;
    143  1.1  mrg             default:    assert(0);
    144  1.1  mrg         }
    145  1.1  mrg         i++;
    146  1.1  mrg     }
    147  1.1  mrg     assert(i == 5);
    148  1.1  mrg 
    149  1.1  mrg     s = "a\u1234\U000A0456b";
    150  1.1  mrg     i = 0;
    151  1.1  mrg     foreach_reverse (dchar d; s)
    152  1.1  mrg     {
    153  1.1  mrg         //printf("i = %d, d = %x\n", i, d);
    154  1.1  mrg         switch (i)
    155  1.1  mrg         {
    156  1.1  mrg             case 0:     assert(d == 'b'); break;
    157  1.1  mrg             case 1:     assert(d == '\U000A0456'); break;
    158  1.1  mrg             case 2:     assert(d == '\u1234'); break;
    159  1.1  mrg             case 3:     assert(d == 'a'); break;
    160  1.1  mrg             default:    assert(0);
    161  1.1  mrg         }
    162  1.1  mrg         i++;
    163  1.1  mrg     }
    164  1.1  mrg     assert(i == 4);
    165  1.1  mrg }
    166  1.1  mrg 
    167  1.1  mrg /*****************************/
    168  1.1  mrg 
    169  1.1  mrg extern (C) int _aApplyRcw1(in char[] aa, dg_t dg)
    170  1.1  mrg {   int result;
    171  1.1  mrg 
    172  1.1  mrg     debug(apply) printf("_aApplyRcw1(), len = %d\n", aa.length);
    173  1.1  mrg     for (size_t i = aa.length; i != 0; )
    174  1.1  mrg     {   dchar d;
    175  1.1  mrg         wchar w;
    176  1.1  mrg 
    177  1.1  mrg         i--;
    178  1.1  mrg         w = aa[i];
    179  1.1  mrg         if (w & 0x80)
    180  1.1  mrg         {   char c = cast(char)w;
    181  1.1  mrg             uint j;
    182  1.1  mrg             uint m = 0x3F;
    183  1.1  mrg             d = 0;
    184  1.1  mrg             while ((c & 0xC0) != 0xC0)
    185  1.1  mrg             {   if (i == 0)
    186  1.1  mrg                     onUnicodeError("Invalid UTF-8 sequence", 0);
    187  1.1  mrg                 i--;
    188  1.1  mrg                 d |= (c & 0x3F) << j;
    189  1.1  mrg                 j += 6;
    190  1.1  mrg                 m >>= 1;
    191  1.1  mrg                 c = aa[i];
    192  1.1  mrg             }
    193  1.1  mrg             d |= (c & m) << j;
    194  1.1  mrg 
    195  1.1  mrg             if (d <= 0xFFFF)
    196  1.1  mrg                 w = cast(wchar) d;
    197  1.1  mrg             else
    198  1.1  mrg             {
    199  1.1  mrg                 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
    200  1.1  mrg                 result = dg(cast(void *)&w);
    201  1.1  mrg                 if (result)
    202  1.1  mrg                     break;
    203  1.1  mrg                 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
    204  1.1  mrg             }
    205  1.1  mrg         }
    206  1.1  mrg         result = dg(cast(void *)&w);
    207  1.1  mrg         if (result)
    208  1.1  mrg             break;
    209  1.1  mrg     }
    210  1.1  mrg     return result;
    211  1.1  mrg }
    212  1.1  mrg 
    213  1.1  mrg unittest
    214  1.1  mrg {
    215  1.1  mrg     debug(apply) printf("_aApplyRcw1.unittest\n");
    216  1.1  mrg 
    217  1.1  mrg     auto s = "hello"c[];
    218  1.1  mrg     int i;
    219  1.1  mrg 
    220  1.1  mrg     foreach_reverse (wchar d; s)
    221  1.1  mrg     {
    222  1.1  mrg         switch (i)
    223  1.1  mrg         {
    224  1.1  mrg             case 0:     assert(d == 'o'); break;
    225  1.1  mrg             case 1:     assert(d == 'l'); break;
    226  1.1  mrg             case 2:     assert(d == 'l'); break;
    227  1.1  mrg             case 3:     assert(d == 'e'); break;
    228  1.1  mrg             case 4:     assert(d == 'h'); break;
    229  1.1  mrg             default:    assert(0);
    230  1.1  mrg         }
    231  1.1  mrg         i++;
    232  1.1  mrg     }
    233  1.1  mrg     assert(i == 5);
    234  1.1  mrg 
    235  1.1  mrg     s = "a\u1234\U000A0456b";
    236  1.1  mrg     i = 0;
    237  1.1  mrg     foreach_reverse (wchar d; s)
    238  1.1  mrg     {
    239  1.1  mrg         //printf("i = %d, d = %x\n", i, d);
    240  1.1  mrg         switch (i)
    241  1.1  mrg         {
    242  1.1  mrg             case 0:     assert(d == 'b'); break;
    243  1.1  mrg             case 1:     assert(d == 0xDA41); break;
    244  1.1  mrg             case 2:     assert(d == 0xDC56); break;
    245  1.1  mrg             case 3:     assert(d == 0x1234); break;
    246  1.1  mrg             case 4:     assert(d == 'a'); break;
    247  1.1  mrg             default:    assert(0);
    248  1.1  mrg         }
    249  1.1  mrg         i++;
    250  1.1  mrg     }
    251  1.1  mrg     assert(i == 5);
    252  1.1  mrg }
    253  1.1  mrg 
    254  1.1  mrg /*****************************/
    255  1.1  mrg 
    256  1.1  mrg extern (C) int _aApplyRwc1(in wchar[] aa, dg_t dg)
    257  1.1  mrg {   int result;
    258  1.1  mrg 
    259  1.1  mrg     debug(apply) printf("_aApplyRwc1(), len = %d\n", aa.length);
    260  1.1  mrg     for (size_t i = aa.length; i != 0; )
    261  1.1  mrg     {   dchar d;
    262  1.1  mrg         char c;
    263  1.1  mrg 
    264  1.1  mrg         i--;
    265  1.1  mrg         d = aa[i];
    266  1.1  mrg         if (d >= 0xDC00 && d <= 0xDFFF)
    267  1.1  mrg         {   if (i == 0)
    268  1.1  mrg                 onUnicodeError("Invalid UTF-16 sequence", 0);
    269  1.1  mrg             i--;
    270  1.1  mrg             d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
    271  1.1  mrg         }
    272  1.1  mrg 
    273  1.1  mrg         if (d & ~0x7F)
    274  1.1  mrg         {
    275  1.1  mrg             char[4] buf = void;
    276  1.1  mrg 
    277  1.1  mrg             auto b = toUTF8(buf, d);
    278  1.1  mrg             foreach (char c2; b)
    279  1.1  mrg             {
    280  1.1  mrg                 result = dg(cast(void *)&c2);
    281  1.1  mrg                 if (result)
    282  1.1  mrg                     return result;
    283  1.1  mrg             }
    284  1.1  mrg             continue;
    285  1.1  mrg         }
    286  1.1  mrg         c = cast(char)d;
    287  1.1  mrg         result = dg(cast(void *)&c);
    288  1.1  mrg         if (result)
    289  1.1  mrg             break;
    290  1.1  mrg     }
    291  1.1  mrg     return result;
    292  1.1  mrg }
    293  1.1  mrg 
    294  1.1  mrg unittest
    295  1.1  mrg {
    296  1.1  mrg     debug(apply) printf("_aApplyRwc1.unittest\n");
    297  1.1  mrg 
    298  1.1  mrg     auto s = "hello"w[];
    299  1.1  mrg     int i;
    300  1.1  mrg 
    301  1.1  mrg     foreach_reverse (char d; s)
    302  1.1  mrg     {
    303  1.1  mrg         switch (i)
    304  1.1  mrg         {
    305  1.1  mrg             case 0:     assert(d == 'o'); break;
    306  1.1  mrg             case 1:     assert(d == 'l'); break;
    307  1.1  mrg             case 2:     assert(d == 'l'); break;
    308  1.1  mrg             case 3:     assert(d == 'e'); break;
    309  1.1  mrg             case 4:     assert(d == 'h'); break;
    310  1.1  mrg             default:    assert(0);
    311  1.1  mrg         }
    312  1.1  mrg         i++;
    313  1.1  mrg     }
    314  1.1  mrg     assert(i == 5);
    315  1.1  mrg 
    316  1.1  mrg     s = "a\u1234\U000A0456b";
    317  1.1  mrg     i = 0;
    318  1.1  mrg     foreach_reverse (char d; s)
    319  1.1  mrg     {
    320  1.1  mrg         //printf("i = %d, d = %x\n", i, d);
    321  1.1  mrg         switch (i)
    322  1.1  mrg         {
    323  1.1  mrg             case 0:     assert(d == 'b'); break;
    324  1.1  mrg             case 1:     assert(d == 0xF2); break;
    325  1.1  mrg             case 2:     assert(d == 0xA0); break;
    326  1.1  mrg             case 3:     assert(d == 0x91); break;
    327  1.1  mrg             case 4:     assert(d == 0x96); break;
    328  1.1  mrg             case 5:     assert(d == 0xE1); break;
    329  1.1  mrg             case 6:     assert(d == 0x88); break;
    330  1.1  mrg             case 7:     assert(d == 0xB4); break;
    331  1.1  mrg             case 8:     assert(d == 'a'); break;
    332  1.1  mrg             default:    assert(0);
    333  1.1  mrg         }
    334  1.1  mrg         i++;
    335  1.1  mrg     }
    336  1.1  mrg     assert(i == 9);
    337  1.1  mrg }
    338  1.1  mrg 
    339  1.1  mrg /*****************************/
    340  1.1  mrg 
    341  1.1  mrg extern (C) int _aApplyRdc1(in dchar[] aa, dg_t dg)
    342  1.1  mrg {   int result;
    343  1.1  mrg 
    344  1.1  mrg     debug(apply) printf("_aApplyRdc1(), len = %d\n", aa.length);
    345  1.1  mrg     for (size_t i = aa.length; i != 0;)
    346  1.1  mrg     {   dchar d = aa[--i];
    347  1.1  mrg         char c;
    348  1.1  mrg 
    349  1.1  mrg         if (d & ~0x7F)
    350  1.1  mrg         {
    351  1.1  mrg             char[4] buf = void;
    352  1.1  mrg 
    353  1.1  mrg             auto b = toUTF8(buf, d);
    354  1.1  mrg             foreach (char c2; b)
    355  1.1  mrg             {
    356  1.1  mrg                 result = dg(cast(void *)&c2);
    357  1.1  mrg                 if (result)
    358  1.1  mrg                     return result;
    359  1.1  mrg             }
    360  1.1  mrg             continue;
    361  1.1  mrg         }
    362  1.1  mrg         else
    363  1.1  mrg         {
    364  1.1  mrg             c = cast(char)d;
    365  1.1  mrg         }
    366  1.1  mrg         result = dg(cast(void *)&c);
    367  1.1  mrg         if (result)
    368  1.1  mrg             break;
    369  1.1  mrg     }
    370  1.1  mrg     return result;
    371  1.1  mrg }
    372  1.1  mrg 
    373  1.1  mrg unittest
    374  1.1  mrg {
    375  1.1  mrg     debug(apply) printf("_aApplyRdc1.unittest\n");
    376  1.1  mrg 
    377  1.1  mrg     auto s = "hello"d[];
    378  1.1  mrg     int i;
    379  1.1  mrg 
    380  1.1  mrg     foreach_reverse (char d; s)
    381  1.1  mrg     {
    382  1.1  mrg         switch (i)
    383  1.1  mrg         {
    384  1.1  mrg             case 0:     assert(d == 'o'); break;
    385  1.1  mrg             case 1:     assert(d == 'l'); break;
    386  1.1  mrg             case 2:     assert(d == 'l'); break;
    387  1.1  mrg             case 3:     assert(d == 'e'); break;
    388  1.1  mrg             case 4:     assert(d == 'h'); break;
    389  1.1  mrg             default:    assert(0);
    390  1.1  mrg         }
    391  1.1  mrg         i++;
    392  1.1  mrg     }
    393  1.1  mrg     assert(i == 5);
    394  1.1  mrg 
    395  1.1  mrg     s = "a\u1234\U000A0456b";
    396  1.1  mrg     i = 0;
    397  1.1  mrg     foreach_reverse (char d; s)
    398  1.1  mrg     {
    399  1.1  mrg         //printf("i = %d, d = %x\n", i, d);
    400  1.1  mrg         switch (i)
    401  1.1  mrg         {
    402  1.1  mrg             case 0:     assert(d == 'b'); break;
    403  1.1  mrg             case 1:     assert(d == 0xF2); break;
    404  1.1  mrg             case 2:     assert(d == 0xA0); break;
    405  1.1  mrg             case 3:     assert(d == 0x91); break;
    406  1.1  mrg             case 4:     assert(d == 0x96); break;
    407  1.1  mrg             case 5:     assert(d == 0xE1); break;
    408  1.1  mrg             case 6:     assert(d == 0x88); break;
    409  1.1  mrg             case 7:     assert(d == 0xB4); break;
    410  1.1  mrg             case 8:     assert(d == 'a'); break;
    411  1.1  mrg             default:    assert(0);
    412  1.1  mrg         }
    413  1.1  mrg         i++;
    414  1.1  mrg     }
    415  1.1  mrg     assert(i == 9);
    416  1.1  mrg }
    417  1.1  mrg 
    418  1.1  mrg /*****************************/
    419  1.1  mrg 
    420  1.1  mrg extern (C) int _aApplyRdw1(in dchar[] aa, dg_t dg)
    421  1.1  mrg {   int result;
    422  1.1  mrg 
    423  1.1  mrg     debug(apply) printf("_aApplyRdw1(), len = %d\n", aa.length);
    424  1.1  mrg     for (size_t i = aa.length; i != 0; )
    425  1.1  mrg     {   dchar d = aa[--i];
    426  1.1  mrg         wchar w;
    427  1.1  mrg 
    428  1.1  mrg         if (d <= 0xFFFF)
    429  1.1  mrg             w = cast(wchar) d;
    430  1.1  mrg         else
    431  1.1  mrg         {
    432  1.1  mrg             w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
    433  1.1  mrg             result = dg(cast(void *)&w);
    434  1.1  mrg             if (result)
    435  1.1  mrg                 break;
    436  1.1  mrg             w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
    437  1.1  mrg         }
    438  1.1  mrg         result = dg(cast(void *)&w);
    439  1.1  mrg         if (result)
    440  1.1  mrg             break;
    441  1.1  mrg     }
    442  1.1  mrg     return result;
    443  1.1  mrg }
    444  1.1  mrg 
    445  1.1  mrg unittest
    446  1.1  mrg {
    447  1.1  mrg     debug(apply) printf("_aApplyRdw1.unittest\n");
    448  1.1  mrg 
    449  1.1  mrg     auto s = "hello"d[];
    450  1.1  mrg     int i;
    451  1.1  mrg 
    452  1.1  mrg     foreach_reverse (wchar d; s)
    453  1.1  mrg     {
    454  1.1  mrg         switch (i)
    455  1.1  mrg         {
    456  1.1  mrg             case 0:     assert(d == 'o'); break;
    457  1.1  mrg             case 1:     assert(d == 'l'); break;
    458  1.1  mrg             case 2:     assert(d == 'l'); break;
    459  1.1  mrg             case 3:     assert(d == 'e'); break;
    460  1.1  mrg             case 4:     assert(d == 'h'); break;
    461  1.1  mrg             default:    assert(0);
    462  1.1  mrg         }
    463  1.1  mrg         i++;
    464  1.1  mrg     }
    465  1.1  mrg     assert(i == 5);
    466  1.1  mrg 
    467  1.1  mrg     s = "a\u1234\U000A0456b";
    468  1.1  mrg     i = 0;
    469  1.1  mrg     foreach_reverse (wchar d; s)
    470  1.1  mrg     {
    471  1.1  mrg         //printf("i = %d, d = %x\n", i, d);
    472  1.1  mrg         switch (i)
    473  1.1  mrg         {
    474  1.1  mrg             case 0:     assert(d == 'b'); break;
    475  1.1  mrg             case 1:     assert(d == 0xDA41); break;
    476  1.1  mrg             case 2:     assert(d == 0xDC56); break;
    477  1.1  mrg             case 3:     assert(d == 0x1234); break;
    478  1.1  mrg             case 4:     assert(d == 'a'); break;
    479  1.1  mrg             default:    assert(0);
    480  1.1  mrg         }
    481  1.1  mrg         i++;
    482  1.1  mrg     }
    483  1.1  mrg     assert(i == 5);
    484  1.1  mrg }
    485  1.1  mrg 
    486  1.1  mrg 
    487  1.1  mrg /****************************************************************************/
    488  1.1  mrg /* 2 argument versions */
    489  1.1  mrg 
    490  1.1  mrg // dg is D, but _aApplyRcd2() is C
    491  1.1  mrg extern (D) alias int delegate(void *, void *) dg2_t;
    492  1.1  mrg 
    493  1.1  mrg extern (C) int _aApplyRcd2(in char[] aa, dg2_t dg)
    494  1.1  mrg {   int result;
    495  1.1  mrg     size_t i;
    496  1.1  mrg     size_t len = aa.length;
    497  1.1  mrg 
    498  1.1  mrg     debug(apply) printf("_aApplyRcd2(), len = %d\n", len);
    499  1.1  mrg     for (i = len; i != 0; )
    500  1.1  mrg     {   dchar d;
    501  1.1  mrg 
    502  1.1  mrg         i--;
    503  1.1  mrg         d = aa[i];
    504  1.1  mrg         if (d & 0x80)
    505  1.1  mrg         {   char c = cast(char)d;
    506  1.1  mrg             uint j;
    507  1.1  mrg             uint m = 0x3F;
    508  1.1  mrg             d = 0;
    509  1.1  mrg             while ((c & 0xC0) != 0xC0)
    510  1.1  mrg             {   if (i == 0)
    511  1.1  mrg                     onUnicodeError("Invalid UTF-8 sequence", 0);
    512  1.1  mrg                 i--;
    513  1.1  mrg                 d |= (c & 0x3F) << j;
    514  1.1  mrg                 j += 6;
    515  1.1  mrg                 m >>= 1;
    516  1.1  mrg                 c = aa[i];
    517  1.1  mrg             }
    518  1.1  mrg             d |= (c & m) << j;
    519  1.1  mrg         }
    520  1.1  mrg         result = dg(&i, cast(void *)&d);
    521  1.1  mrg         if (result)
    522  1.1  mrg             break;
    523  1.1  mrg     }
    524  1.1  mrg     return result;
    525  1.1  mrg }
    526  1.1  mrg 
    527  1.1  mrg unittest
    528  1.1  mrg {
    529  1.1  mrg     debug(apply) printf("_aApplyRcd2.unittest\n");
    530  1.1  mrg 
    531  1.1  mrg     auto s = "hello"c[];
    532  1.1  mrg     int i;
    533  1.1  mrg 
    534  1.1  mrg     foreach_reverse (k, dchar d; s)
    535  1.1  mrg     {
    536  1.1  mrg         assert(k == 4 - i);
    537  1.1  mrg         switch (i)
    538  1.1  mrg         {
    539  1.1  mrg             case 0:     assert(d == 'o'); break;
    540  1.1  mrg             case 1:     assert(d == 'l'); break;
    541  1.1  mrg             case 2:     assert(d == 'l'); break;
    542  1.1  mrg             case 3:     assert(d == 'e'); break;
    543  1.1  mrg             case 4:     assert(d == 'h'); break;
    544  1.1  mrg             default:    assert(0);
    545  1.1  mrg         }
    546  1.1  mrg         i++;
    547  1.1  mrg     }
    548  1.1  mrg     assert(i == 5);
    549  1.1  mrg 
    550  1.1  mrg     s = "a\u1234\U000A0456b";
    551  1.1  mrg     i = 0;
    552  1.1  mrg     foreach_reverse (k, dchar d; s)
    553  1.1  mrg     {
    554  1.1  mrg         //printf("i = %d, k = %d, d = %x\n", i, k, d);
    555  1.1  mrg         switch (i)
    556  1.1  mrg         {
    557  1.1  mrg             case 0:     assert(d == 'b'); assert(k == 8); break;
    558  1.1  mrg             case 1:     assert(d == '\U000A0456'); assert(k == 4); break;
    559  1.1  mrg             case 2:     assert(d == '\u1234'); assert(k == 1); break;
    560  1.1  mrg             case 3:     assert(d == 'a'); assert(k == 0); break;
    561  1.1  mrg             default:    assert(0);
    562  1.1  mrg         }
    563  1.1  mrg         i++;
    564  1.1  mrg     }
    565  1.1  mrg     assert(i == 4);
    566  1.1  mrg }
    567  1.1  mrg 
    568  1.1  mrg /*****************************/
    569  1.1  mrg 
    570  1.1  mrg extern (C) int _aApplyRwd2(in wchar[] aa, dg2_t dg)
    571  1.1  mrg {   int result;
    572  1.1  mrg 
    573  1.1  mrg     debug(apply) printf("_aApplyRwd2(), len = %d\n", aa.length);
    574  1.1  mrg     for (size_t i = aa.length; i != 0; )
    575  1.1  mrg     {   dchar d;
    576  1.1  mrg 
    577  1.1  mrg         i--;
    578  1.1  mrg         d = aa[i];
    579  1.1  mrg         if (d >= 0xDC00 && d <= 0xDFFF)
    580  1.1  mrg         {   if (i == 0)
    581  1.1  mrg                 onUnicodeError("Invalid UTF-16 sequence", 0);
    582  1.1  mrg             i--;
    583  1.1  mrg             d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
    584  1.1  mrg         }
    585  1.1  mrg         result = dg(&i, cast(void *)&d);
    586  1.1  mrg         if (result)
    587  1.1  mrg             break;
    588  1.1  mrg     }
    589  1.1  mrg     return result;
    590  1.1  mrg }
    591  1.1  mrg 
    592  1.1  mrg unittest
    593  1.1  mrg {
    594  1.1  mrg     debug(apply) printf("_aApplyRwd2.unittest\n");
    595  1.1  mrg 
    596  1.1  mrg     auto s = "hello"w[];
    597  1.1  mrg     int i;
    598  1.1  mrg 
    599  1.1  mrg     foreach_reverse (k, dchar d; s)
    600  1.1  mrg     {
    601  1.1  mrg         //printf("i = %d, k = %d, d = %x\n", i, k, d);
    602  1.1  mrg         assert(k == 4 - i);
    603  1.1  mrg         switch (i)
    604  1.1  mrg         {
    605  1.1  mrg             case 0:     assert(d == 'o'); break;
    606  1.1  mrg             case 1:     assert(d == 'l'); break;
    607  1.1  mrg             case 2:     assert(d == 'l'); break;
    608  1.1  mrg             case 3:     assert(d == 'e'); break;
    609  1.1  mrg             case 4:     assert(d == 'h'); break;
    610  1.1  mrg             default:    assert(0);
    611  1.1  mrg         }
    612  1.1  mrg         i++;
    613  1.1  mrg     }
    614  1.1  mrg     assert(i == 5);
    615  1.1  mrg 
    616  1.1  mrg     s = "a\u1234\U000A0456b";
    617  1.1  mrg     i = 0;
    618  1.1  mrg     foreach_reverse (k, dchar d; s)
    619  1.1  mrg     {
    620  1.1  mrg         //printf("i = %d, k = %d, d = %x\n", i, k, d);
    621  1.1  mrg         switch (i)
    622  1.1  mrg         {
    623  1.1  mrg             case 0:     assert(k == 4); assert(d == 'b'); break;
    624  1.1  mrg             case 1:     assert(k == 2); assert(d == '\U000A0456'); break;
    625  1.1  mrg             case 2:     assert(k == 1); assert(d == '\u1234'); break;
    626  1.1  mrg             case 3:     assert(k == 0); assert(d == 'a'); break;
    627  1.1  mrg             default:    assert(0);
    628  1.1  mrg         }
    629  1.1  mrg         i++;
    630  1.1  mrg     }
    631  1.1  mrg     assert(i == 4);
    632  1.1  mrg }
    633  1.1  mrg 
    634  1.1  mrg /*****************************/
    635  1.1  mrg 
    636  1.1  mrg extern (C) int _aApplyRcw2(in char[] aa, dg2_t dg)
    637  1.1  mrg {   int result;
    638  1.1  mrg 
    639  1.1  mrg     debug(apply) printf("_aApplyRcw2(), len = %d\n", aa.length);
    640  1.1  mrg     for (size_t i = aa.length; i != 0; )
    641  1.1  mrg     {   dchar d;
    642  1.1  mrg         wchar w;
    643  1.1  mrg 
    644  1.1  mrg         i--;
    645  1.1  mrg         w = aa[i];
    646  1.1  mrg         if (w & 0x80)
    647  1.1  mrg         {   char c = cast(char)w;
    648  1.1  mrg             uint j;
    649  1.1  mrg             uint m = 0x3F;
    650  1.1  mrg             d = 0;
    651  1.1  mrg             while ((c & 0xC0) != 0xC0)
    652  1.1  mrg             {   if (i == 0)
    653  1.1  mrg                     onUnicodeError("Invalid UTF-8 sequence", 0);
    654  1.1  mrg                 i--;
    655  1.1  mrg                 d |= (c & 0x3F) << j;
    656  1.1  mrg                 j += 6;
    657  1.1  mrg                 m >>= 1;
    658  1.1  mrg                 c = aa[i];
    659  1.1  mrg             }
    660  1.1  mrg             d |= (c & m) << j;
    661  1.1  mrg 
    662  1.1  mrg             if (d <= 0xFFFF)
    663  1.1  mrg                 w = cast(wchar) d;
    664  1.1  mrg             else
    665  1.1  mrg             {
    666  1.1  mrg                 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
    667  1.1  mrg                 result = dg(&i, cast(void *)&w);
    668  1.1  mrg                 if (result)
    669  1.1  mrg                     break;
    670  1.1  mrg                 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
    671  1.1  mrg             }
    672  1.1  mrg         }
    673  1.1  mrg         result = dg(&i, cast(void *)&w);
    674  1.1  mrg         if (result)
    675  1.1  mrg             break;
    676  1.1  mrg     }
    677  1.1  mrg     return result;
    678  1.1  mrg }
    679  1.1  mrg 
    680  1.1  mrg unittest
    681  1.1  mrg {
    682  1.1  mrg     debug(apply) printf("_aApplyRcw2.unittest\n");
    683  1.1  mrg 
    684  1.1  mrg     auto s = "hello"c[];
    685  1.1  mrg     int i;
    686  1.1  mrg 
    687  1.1  mrg     foreach_reverse (k, wchar d; s)
    688  1.1  mrg     {
    689  1.1  mrg         //printf("i = %d, k = %d, d = %x\n", i, k, d);
    690  1.1  mrg         assert(k == 4 - i);
    691  1.1  mrg         switch (i)
    692  1.1  mrg         {
    693  1.1  mrg             case 0:     assert(d == 'o'); break;
    694  1.1  mrg             case 1:     assert(d == 'l'); break;
    695  1.1  mrg             case 2:     assert(d == 'l'); break;
    696  1.1  mrg             case 3:     assert(d == 'e'); break;
    697  1.1  mrg             case 4:     assert(d == 'h'); break;
    698  1.1  mrg             default:    assert(0);
    699  1.1  mrg         }
    700  1.1  mrg         i++;
    701  1.1  mrg     }
    702  1.1  mrg     assert(i == 5);
    703  1.1  mrg 
    704  1.1  mrg     s = "a\u1234\U000A0456b";
    705  1.1  mrg     i = 0;
    706  1.1  mrg     foreach_reverse (k, wchar d; s)
    707  1.1  mrg     {
    708  1.1  mrg         //printf("i = %d, k = %d, d = %x\n", i, k, d);
    709  1.1  mrg         switch (i)
    710  1.1  mrg         {
    711  1.1  mrg             case 0:     assert(k == 8); assert(d == 'b'); break;
    712  1.1  mrg             case 1:     assert(k == 4); assert(d == 0xDA41); break;
    713  1.1  mrg             case 2:     assert(k == 4); assert(d == 0xDC56); break;
    714  1.1  mrg             case 3:     assert(k == 1); assert(d == 0x1234); break;
    715  1.1  mrg             case 4:     assert(k == 0); assert(d == 'a'); break;
    716  1.1  mrg             default:    assert(0);
    717  1.1  mrg         }
    718  1.1  mrg         i++;
    719  1.1  mrg     }
    720  1.1  mrg     assert(i == 5);
    721  1.1  mrg }
    722  1.1  mrg 
    723  1.1  mrg /*****************************/
    724  1.1  mrg 
    725  1.1  mrg extern (C) int _aApplyRwc2(in wchar[] aa, dg2_t dg)
    726  1.1  mrg {   int result;
    727  1.1  mrg 
    728  1.1  mrg     debug(apply) printf("_aApplyRwc2(), len = %d\n", aa.length);
    729  1.1  mrg     for (size_t i = aa.length; i != 0; )
    730  1.1  mrg     {   dchar d;
    731  1.1  mrg         char c;
    732  1.1  mrg 
    733  1.1  mrg         i--;
    734  1.1  mrg         d = aa[i];
    735  1.1  mrg         if (d >= 0xDC00 && d <= 0xDFFF)
    736  1.1  mrg         {   if (i == 0)
    737  1.1  mrg                 onUnicodeError("Invalid UTF-16 sequence", 0);
    738  1.1  mrg             i--;
    739  1.1  mrg             d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00);
    740  1.1  mrg         }
    741  1.1  mrg 
    742  1.1  mrg         if (d & ~0x7F)
    743  1.1  mrg         {
    744  1.1  mrg             char[4] buf = void;
    745  1.1  mrg 
    746  1.1  mrg             auto b = toUTF8(buf, d);
    747  1.1  mrg             foreach (char c2; b)
    748  1.1  mrg             {
    749  1.1  mrg                 result = dg(&i, cast(void *)&c2);
    750  1.1  mrg                 if (result)
    751  1.1  mrg                     return result;
    752  1.1  mrg             }
    753  1.1  mrg             continue;
    754  1.1  mrg         }
    755  1.1  mrg         c = cast(char)d;
    756  1.1  mrg         result = dg(&i, cast(void *)&c);
    757  1.1  mrg         if (result)
    758  1.1  mrg             break;
    759  1.1  mrg     }
    760  1.1  mrg     return result;
    761  1.1  mrg }
    762  1.1  mrg 
    763  1.1  mrg unittest
    764  1.1  mrg {
    765  1.1  mrg     debug(apply) printf("_aApplyRwc2.unittest\n");
    766  1.1  mrg 
    767  1.1  mrg     auto s = "hello"w[];
    768  1.1  mrg     int i;
    769  1.1  mrg 
    770  1.1  mrg     foreach_reverse (k, char d; s)
    771  1.1  mrg     {
    772  1.1  mrg         //printf("i = %d, k = %d, d = %x\n", i, k, d);
    773  1.1  mrg         assert(k == 4 - i);
    774  1.1  mrg         switch (i)
    775  1.1  mrg         {
    776  1.1  mrg             case 0:     assert(d == 'o'); break;
    777  1.1  mrg             case 1:     assert(d == 'l'); break;
    778  1.1  mrg             case 2:     assert(d == 'l'); break;
    779  1.1  mrg             case 3:     assert(d == 'e'); break;
    780  1.1  mrg             case 4:     assert(d == 'h'); break;
    781  1.1  mrg             default:    assert(0);
    782  1.1  mrg         }
    783  1.1  mrg         i++;
    784  1.1  mrg     }
    785  1.1  mrg     assert(i == 5);
    786  1.1  mrg 
    787  1.1  mrg     s = "a\u1234\U000A0456b";
    788  1.1  mrg     i = 0;
    789  1.1  mrg     foreach_reverse (k, char d; s)
    790  1.1  mrg     {
    791  1.1  mrg         //printf("i = %d, k = %d, d = %x\n", i, k, d);
    792  1.1  mrg         switch (i)
    793  1.1  mrg         {
    794  1.1  mrg             case 0:     assert(k == 4); assert(d == 'b'); break;
    795  1.1  mrg             case 1:     assert(k == 2); assert(d == 0xF2); break;
    796  1.1  mrg             case 2:     assert(k == 2); assert(d == 0xA0); break;
    797  1.1  mrg             case 3:     assert(k == 2); assert(d == 0x91); break;
    798  1.1  mrg             case 4:     assert(k == 2); assert(d == 0x96); break;
    799  1.1  mrg             case 5:     assert(k == 1); assert(d == 0xE1); break;
    800  1.1  mrg             case 6:     assert(k == 1); assert(d == 0x88); break;
    801  1.1  mrg             case 7:     assert(k == 1); assert(d == 0xB4); break;
    802  1.1  mrg             case 8:     assert(k == 0); assert(d == 'a'); break;
    803  1.1  mrg             default:    assert(0);
    804  1.1  mrg         }
    805  1.1  mrg         i++;
    806  1.1  mrg     }
    807  1.1  mrg     assert(i == 9);
    808  1.1  mrg }
    809  1.1  mrg 
    810  1.1  mrg /*****************************/
    811  1.1  mrg 
    812  1.1  mrg extern (C) int _aApplyRdc2(in dchar[] aa, dg2_t dg)
    813  1.1  mrg {   int result;
    814  1.1  mrg 
    815  1.1  mrg     debug(apply) printf("_aApplyRdc2(), len = %d\n", aa.length);
    816  1.1  mrg     for (size_t i = aa.length; i != 0; )
    817  1.1  mrg     {   dchar d = aa[--i];
    818  1.1  mrg         char c;
    819  1.1  mrg 
    820  1.1  mrg         if (d & ~0x7F)
    821  1.1  mrg         {
    822  1.1  mrg             char[4] buf = void;
    823  1.1  mrg 
    824  1.1  mrg             auto b = toUTF8(buf, d);
    825  1.1  mrg             foreach (char c2; b)
    826  1.1  mrg             {
    827  1.1  mrg                 result = dg(&i, cast(void *)&c2);
    828  1.1  mrg                 if (result)
    829  1.1  mrg                     return result;
    830  1.1  mrg             }
    831  1.1  mrg             continue;
    832  1.1  mrg         }
    833  1.1  mrg         else
    834  1.1  mrg         {   c = cast(char)d;
    835  1.1  mrg         }
    836  1.1  mrg         result = dg(&i, cast(void *)&c);
    837  1.1  mrg         if (result)
    838  1.1  mrg             break;
    839  1.1  mrg     }
    840  1.1  mrg     return result;
    841  1.1  mrg }
    842  1.1  mrg 
    843  1.1  mrg unittest
    844  1.1  mrg {
    845  1.1  mrg     debug(apply) printf("_aApplyRdc2.unittest\n");
    846  1.1  mrg 
    847  1.1  mrg     auto s = "hello"d[];
    848  1.1  mrg     int i;
    849  1.1  mrg 
    850  1.1  mrg     foreach_reverse (k, char d; s)
    851  1.1  mrg     {
    852  1.1  mrg         //printf("i = %d, k = %d, d = %x\n", i, k, d);
    853  1.1  mrg         assert(k == 4 - i);
    854  1.1  mrg         switch (i)
    855  1.1  mrg         {
    856  1.1  mrg             case 0:     assert(d == 'o'); break;
    857  1.1  mrg             case 1:     assert(d == 'l'); break;
    858  1.1  mrg             case 2:     assert(d == 'l'); break;
    859  1.1  mrg             case 3:     assert(d == 'e'); break;
    860  1.1  mrg             case 4:     assert(d == 'h'); break;
    861  1.1  mrg             default:    assert(0);
    862  1.1  mrg         }
    863  1.1  mrg         i++;
    864  1.1  mrg     }
    865  1.1  mrg     assert(i == 5);
    866  1.1  mrg 
    867  1.1  mrg     s = "a\u1234\U000A0456b";
    868  1.1  mrg     i = 0;
    869  1.1  mrg     foreach_reverse (k, char d; s)
    870  1.1  mrg     {
    871  1.1  mrg         //printf("i = %d, k = %d, d = %x\n", i, k, d);
    872  1.1  mrg         switch (i)
    873  1.1  mrg         {
    874  1.1  mrg             case 0:     assert(k == 3); assert(d == 'b'); break;
    875  1.1  mrg             case 1:     assert(k == 2); assert(d == 0xF2); break;
    876  1.1  mrg             case 2:     assert(k == 2); assert(d == 0xA0); break;
    877  1.1  mrg             case 3:     assert(k == 2); assert(d == 0x91); break;
    878  1.1  mrg             case 4:     assert(k == 2); assert(d == 0x96); break;
    879  1.1  mrg             case 5:     assert(k == 1); assert(d == 0xE1); break;
    880  1.1  mrg             case 6:     assert(k == 1); assert(d == 0x88); break;
    881  1.1  mrg             case 7:     assert(k == 1); assert(d == 0xB4); break;
    882  1.1  mrg             case 8:     assert(k == 0); assert(d == 'a'); break;
    883  1.1  mrg             default:    assert(0);
    884  1.1  mrg         }
    885  1.1  mrg         i++;
    886  1.1  mrg     }
    887  1.1  mrg     assert(i == 9);
    888  1.1  mrg }
    889  1.1  mrg 
    890  1.1  mrg /*****************************/
    891  1.1  mrg 
    892  1.1  mrg extern (C) int _aApplyRdw2(in dchar[] aa, dg2_t dg)
    893  1.1  mrg {   int result;
    894  1.1  mrg 
    895  1.1  mrg     debug(apply) printf("_aApplyRdw2(), len = %d\n", aa.length);
    896  1.1  mrg     for (size_t i = aa.length; i != 0; )
    897  1.1  mrg     {   dchar d = aa[--i];
    898  1.1  mrg         wchar w;
    899  1.1  mrg 
    900  1.1  mrg         if (d <= 0xFFFF)
    901  1.1  mrg             w = cast(wchar) d;
    902  1.1  mrg         else
    903  1.1  mrg         {
    904  1.1  mrg             w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800);
    905  1.1  mrg             result = dg(&i, cast(void *)&w);
    906  1.1  mrg             if (result)
    907  1.1  mrg                 break;
    908  1.1  mrg             w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00);
    909  1.1  mrg         }
    910  1.1  mrg         result = dg(&i, cast(void *)&w);
    911  1.1  mrg         if (result)
    912  1.1  mrg             break;
    913  1.1  mrg     }
    914  1.1  mrg     return result;
    915  1.1  mrg }
    916  1.1  mrg 
    917  1.1  mrg unittest
    918  1.1  mrg {
    919  1.1  mrg     debug(apply) printf("_aApplyRdw2.unittest\n");
    920  1.1  mrg 
    921  1.1  mrg     auto s = "hello"d[];
    922  1.1  mrg     int i;
    923  1.1  mrg 
    924  1.1  mrg     foreach_reverse (k, wchar d; s)
    925  1.1  mrg     {
    926  1.1  mrg         //printf("i = %d, k = %d, d = %x\n", i, k, d);
    927  1.1  mrg         assert(k == 4 - i);
    928  1.1  mrg         switch (i)
    929  1.1  mrg         {
    930  1.1  mrg             case 0:     assert(d == 'o'); break;
    931  1.1  mrg             case 1:     assert(d == 'l'); break;
    932  1.1  mrg             case 2:     assert(d == 'l'); break;
    933  1.1  mrg             case 3:     assert(d == 'e'); break;
    934  1.1  mrg             case 4:     assert(d == 'h'); break;
    935  1.1  mrg             default:    assert(0);
    936  1.1  mrg         }
    937  1.1  mrg         i++;
    938  1.1  mrg     }
    939  1.1  mrg     assert(i == 5);
    940  1.1  mrg 
    941  1.1  mrg     s = "a\u1234\U000A0456b";
    942  1.1  mrg     i = 0;
    943  1.1  mrg     foreach_reverse (k, wchar d; s)
    944  1.1  mrg     {
    945  1.1  mrg         //printf("i = %d, k = %d, d = %x\n", i, k, d);
    946  1.1  mrg         switch (i)
    947  1.1  mrg         {
    948  1.1  mrg             case 0:     assert(k == 3); assert(d == 'b'); break;
    949  1.1  mrg             case 1:     assert(k == 2); assert(d == 0xDA41); break;
    950  1.1  mrg             case 2:     assert(k == 2); assert(d == 0xDC56); break;
    951  1.1  mrg             case 3:     assert(k == 1); assert(d == 0x1234); break;
    952  1.1  mrg             case 4:     assert(k == 0); assert(d == 'a'); break;
    953  1.1  mrg             default:    assert(0);
    954  1.1  mrg         }
    955  1.1  mrg         i++;
    956  1.1  mrg     }
    957  1.1  mrg     assert(i == 5);
    958  1.1  mrg }
    959