Home | History | Annotate | Line # | Download | only in root
      1 /**
      2  * Spell checker
      3  *
      4  * Does not have any dependencies on the rest of DMD.
      5  *
      6  * Copyright: Copyright (C) 1999-2022 by The D Language Foundation, All Rights Reserved
      7  * Authors:   Walter Bright, https://www.digitalmars.com
      8  * License:   $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
      9  * Source:    $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/speller.d, root/_speller.d)
     10  * Documentation:  https://dlang.org/phobos/dmd_root_speller.html
     11  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/speller.d
     12  */
     13 
     14 module dmd.root.speller;
     15 
     16 /**************************************************
     17  * Looks for correct spelling.
     18  * Looks a distance of up to two.
     19  * This does an exhaustive search, so can potentially be very slow.
     20  * Params:
     21  *      seed = wrongly spelled word
     22  *      dg = search delegate of the form `T delegate(const(char)[] p, out int cost)`
     23  * Returns:
     24  *      T.init = no correct spellings found,
     25  *      otherwise the value returned by dg() for first possible correct spelling
     26  */
     27 auto speller(alias dg)(const(char)[] seed)
     28 if (isSearchFunction!dg)
     29 {
     30     const size_t maxdist = seed.length < 4 ? seed.length / 2 : 2;
     31     foreach (distance; 0 .. maxdist)
     32     {
     33         if (auto p = spellerX!dg(seed, distance != 0))
     34             return p;
     35         //      if (seedlen > 10)
     36         //          break;
     37     }
     38     return null; // didn't find it
     39 }
     40 
     41 private:
     42 
     43 import core.stdc.stdlib;
     44 import core.stdc.string;
     45 import dmd.common.string : SmallBuffer;
     46 
     47 enum isSearchFunction(alias fun) = is(searchFunctionType!fun);
     48 alias searchFunctionType(alias fun) = typeof(() {int x; return fun("", x);}());
     49 
     50 /*************************************
     51  * Spell check level 1.
     52  * Params:
     53  *      dg = delegate that looks up string in dictionary AA and returns value found
     54  *      seed = starting string
     55  *      flag = if true, do 2 level lookup otherwise 1 level
     56  * Returns:
     57  *      whatever dg returns, null if no match
     58  */
     59 auto spellerX(alias dg)(const(char)[] seed, bool flag)
     60 {
     61     if (!seed.length)
     62         return null;
     63 
     64     /* Need buffer to store trial strings in
     65      */
     66     char[30] tmp = void;
     67     auto sb = SmallBuffer!char(seed.length + 1, tmp[]);
     68     char[] buf = sb[];
     69 
     70     int cost = int.max;
     71     searchFunctionType!dg p = null;
     72 
     73     /* Deletions */
     74     buf[0 .. seed.length - 1] = seed[1 .. $];
     75     foreach (i; 0 .. seed.length)
     76     {
     77         //printf("del buf = '%s'\n", buf);
     78         int ncost;
     79         auto np = flag ? spellerY!dg(buf[0 .. seed.length - 1], i, ncost)
     80                        : dg(buf[0 .. seed.length - 1], ncost);
     81         if (combineSpellerResult(p, cost, np, ncost))
     82             return p;
     83         buf[i] = seed[i];
     84     }
     85 
     86     /* Transpositions */
     87     if (!flag)
     88     {
     89         buf[0 .. seed.length] = seed;
     90         for (size_t i = 0; i + 1 < seed.length; i++)
     91         {
     92             // swap [i] and [i + 1]
     93             buf[i] = seed[i + 1];
     94             buf[i + 1] = seed[i];
     95             //printf("tra buf = '%s'\n", buf);
     96             int ncost;
     97             auto np = dg(buf[0 .. seed.length], ncost);
     98             if (combineSpellerResult(p, cost, np, ncost))
     99                 return p;
    100             buf[i] = seed[i];
    101         }
    102     }
    103 
    104     /* Substitutions */
    105     buf[0 .. seed.length] = seed;
    106     foreach (i; 0 .. seed.length)
    107     {
    108         foreach (s; idchars)
    109         {
    110             buf[i] = s;
    111             //printf("sub buf = '%s'\n", buf);
    112             int ncost;
    113             auto np = flag ? spellerY!dg(buf[0 .. seed.length], i + 1, ncost)
    114                            : dg(buf[0 .. seed.length], ncost);
    115             if (combineSpellerResult(p, cost, np, ncost))
    116                 return p;
    117         }
    118         buf[i] = seed[i];
    119     }
    120 
    121     /* Insertions */
    122     buf[1 .. seed.length + 1] = seed;
    123     foreach (i; 0 .. seed.length + 1) // yes, do seed.length+1 iterations
    124     {
    125         foreach (s; idchars)
    126         {
    127             buf[i] = s;
    128             //printf("ins buf = '%s'\n", buf);
    129             int ncost;
    130             auto np = flag ? spellerY!dg(buf[0 .. seed.length + 1], i + 1, ncost)
    131                            : dg(buf[0 .. seed.length + 1], ncost);
    132             if (combineSpellerResult(p, cost, np, ncost))
    133                 return p;
    134         }
    135         if (i < seed.length)
    136             buf[i] = seed[i];
    137     }
    138 
    139     return p; // return "best" result
    140 }
    141 
    142 /**********************************************
    143  * Do second level of spell matching.
    144  * Params:
    145  *      dg = delegate that looks up string in dictionary AA and returns value found
    146  *      seed = starting string
    147  *      index = index into seed[] that is where we will mutate it
    148  *      cost = set to cost of match
    149  * Returns:
    150  *      whatever dg returns, null if no match
    151  */
    152 auto spellerY(alias dg)(const(char)[] seed, size_t index, out int cost)
    153 {
    154     if (!seed.length)
    155         return null;
    156 
    157     /* Allocate a buf to store the new string to play with, needs
    158      * space for an extra char for insertions
    159      */
    160     char[30] tmp = void;        // stack allocations are fastest
    161     auto sb = SmallBuffer!char(seed.length + 1, tmp[]);
    162     char[] buf = sb[];
    163     buf[0 .. index] = seed[0 .. index];
    164 
    165     cost = int.max;             // start with worst possible match
    166     searchFunctionType!dg p = null;
    167 
    168     /* Delete character at seed[index] */
    169     if (index < seed.length)
    170     {
    171         buf[index .. seed.length - 1] = seed[index + 1 .. $]; // seed[] with deleted character
    172         int ncost;
    173         auto np = dg(buf[0 .. seed.length - 1], ncost); // look it up
    174         if (combineSpellerResult(p, cost, np, ncost))   // compare with prev match
    175             return p;                                   // cannot get any better
    176     }
    177 
    178     /* Substitute character at seed[index] */
    179     if (index < seed.length)
    180     {
    181         buf[0 .. seed.length] = seed;
    182         foreach (s; idchars)
    183         {
    184             buf[index] = s;     // seed[] with substituted character
    185             //printf("sub buf = '%s'\n", buf);
    186             int ncost;
    187             auto np = dg(buf[0 .. seed.length], ncost);
    188             if (combineSpellerResult(p, cost, np, ncost))
    189                 return p;
    190         }
    191     }
    192 
    193     /* Insert character at seed[index] */
    194     buf[index + 1 .. seed.length + 1] = seed[index .. $];
    195     foreach (s; idchars)
    196     {
    197         buf[index] = s;
    198         //printf("ins buf = '%s'\n", buf);
    199         int ncost;
    200         auto np = dg(buf[0 .. seed.length + 1], ncost);
    201         if (combineSpellerResult(p, cost, np, ncost))
    202             return p;
    203     }
    204     return p; // return "best" result
    205 }
    206 
    207 
    208 /* Characters used to substitute ones in the string we're checking
    209  * the spelling on.
    210  */
    211 immutable string idchars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
    212 
    213 /**************************************************
    214  * Combine a new result from the spell checker to
    215  * find the one with the closest symbol with
    216  * respect to the cost defined by the search function
    217  * Params:
    218  *      p = best found spelling so far, T.init if none found yet.
    219  *          If np is better, p is replaced with np
    220  *      cost = cost of p (int.max if none found yet).
    221  *          If np is better, cost is replaced with ncost
    222  *      np = current spelling to check against p, T.init if none
    223  *      ncost = cost of np if np is not T.init
    224  * Returns:
    225  *      true    if the cost is less or equal 0, meaning we can stop looking
    226  *      false   otherwise
    227  */
    228 bool combineSpellerResult(T)(ref T p, ref int cost, T np, int ncost)
    229 {
    230     if (np && ncost < cost) // if np is better
    231     {
    232         p = np;             // np is new best match
    233         cost = ncost;
    234         if (cost <= 0)
    235             return true;    // meaning we can stop looking
    236     }
    237     return false;
    238 }
    239 
    240 /************************************* Tests ***********************/
    241 
    242 unittest
    243 {
    244     static immutable string[][] cases =
    245     [
    246         ["hello", "hell", "y"],
    247         ["hello", "hel", "y"],
    248         ["hello", "ello", "y"],
    249         ["hello", "llo", "y"],
    250         ["hello", "hellox", "y"],
    251         ["hello", "helloxy", "y"],
    252         ["hello", "xhello", "y"],
    253         ["hello", "xyhello", "y"],
    254         ["hello", "ehllo", "y"],
    255         ["hello", "helol", "y"],
    256         ["hello", "abcd", "n"],
    257         ["hello", "helxxlo", "y"],
    258         ["hello", "ehlxxlo", "n"],
    259         ["hello", "heaao", "y"],
    260         ["_123456789_123456789_123456789_123456789", "_123456789_123456789_123456789_12345678", "y"],
    261     ];
    262     //printf("unittest_speller()\n");
    263 
    264     string dgarg;
    265 
    266     string speller_test(const(char)[] s, ref int cost)
    267     {
    268         assert(s[$-1] != '\0');
    269         //printf("speller_test(%s, %s)\n", dgarg, s);
    270         cost = 0;
    271         if (dgarg == s)
    272             return dgarg;
    273         return null;
    274     }
    275 
    276     dgarg = "hell";
    277     auto p = speller!speller_test("hello");
    278     assert(p !is null);
    279     foreach (testCase; cases)
    280     {
    281         //printf("case [%d]\n", i);
    282         dgarg = testCase[1];
    283         auto p2 = speller!speller_test(testCase[0]);
    284         if (p2)
    285             assert(testCase[2][0] == 'y');
    286         else
    287             assert(testCase[2][0] == 'n');
    288     }
    289     //printf("unittest_speller() success\n");
    290 }
    291