Home | History | Annotate | Line # | Download | only in gcc
      1  1.1  mrg /* Find near-matches for strings.
      2  1.1  mrg    Copyright (C) 2015-2022 Free Software Foundation, Inc.
      3  1.1  mrg 
      4  1.1  mrg This file is part of GCC.
      5  1.1  mrg 
      6  1.1  mrg GCC is free software; you can redistribute it and/or modify it under
      7  1.1  mrg the terms of the GNU General Public License as published by the Free
      8  1.1  mrg Software Foundation; either version 3, or (at your option) any later
      9  1.1  mrg version.
     10  1.1  mrg 
     11  1.1  mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     12  1.1  mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
     13  1.1  mrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     14  1.1  mrg for more details.
     15  1.1  mrg 
     16  1.1  mrg You should have received a copy of the GNU General Public License
     17  1.1  mrg along with GCC; see the file COPYING3.  If not see
     18  1.1  mrg <http://www.gnu.org/licenses/>.  */
     19  1.1  mrg 
     20  1.1  mrg #include "config.h"
     21  1.1  mrg #include "system.h"
     22  1.1  mrg #include "coretypes.h"
     23  1.1  mrg #include "tm.h"
     24  1.1  mrg #include "tree.h"
     25  1.1  mrg #include "spellcheck.h"
     26  1.1  mrg #include "selftest.h"
     27  1.1  mrg 
     28  1.1  mrg /* Cost of a case transformation.  */
     29  1.1  mrg #define CASE_COST 1
     30  1.1  mrg 
     31  1.1  mrg /* Cost of another kind of edit.  */
     32  1.1  mrg #define BASE_COST 2
     33  1.1  mrg 
     34  1.1  mrg /* Get the edit distance between the two strings: the minimal
     35  1.1  mrg    number of edits that are needed to change one string into another,
     36  1.1  mrg    where edits can be one-character insertions, removals, or substitutions,
     37  1.1  mrg    or transpositions of two adjacent characters (counting as one "edit").
     38  1.1  mrg 
     39  1.1  mrg    This implementation uses a modified variant of the Wagner-Fischer
     40  1.1  mrg    algorithm for the Damerau-Levenshtein distance; specifically, the
     41  1.1  mrg    "optimal string alignment distance" or "restricted edit distance"
     42  1.1  mrg    variant.  This implementation has been further modified to take
     43  1.1  mrg    case into account.  */
     44  1.1  mrg 
     45  1.1  mrg edit_distance_t
     46  1.1  mrg get_edit_distance (const char *s, int len_s,
     47  1.1  mrg 		   const char *t, int len_t)
     48  1.1  mrg {
     49  1.1  mrg   const bool debug = false;
     50  1.1  mrg 
     51  1.1  mrg   if (debug)
     52  1.1  mrg     {
     53  1.1  mrg       printf ("s: \"%s\" (len_s=%i)\n", s, len_s);
     54  1.1  mrg       printf ("t: \"%s\" (len_t=%i)\n", t, len_t);
     55  1.1  mrg     }
     56  1.1  mrg 
     57  1.1  mrg   if (len_s == 0)
     58  1.1  mrg     return BASE_COST * len_t;
     59  1.1  mrg   if (len_t == 0)
     60  1.1  mrg     return BASE_COST * len_s;
     61  1.1  mrg 
     62  1.1  mrg   /* We effectively build a matrix where each (i, j) contains the
     63  1.1  mrg      distance between the prefix strings s[0:j] and t[0:i].
     64  1.1  mrg      Rather than actually build an (len_t + 1) * (len_s + 1) matrix,
     65  1.1  mrg      we simply keep track of the last two rows, v_one_ago and v_two_ago,
     66  1.1  mrg      and a new row, v_next, which avoids an (len_t + 1) * (len_s + 1)
     67  1.1  mrg      allocation and memory accesses in favor of three (len_s + 1)
     68  1.1  mrg      allocations.  These could potentially be
     69  1.1  mrg      statically-allocated if we impose a maximum length on the
     70  1.1  mrg      strings of interest.  */
     71  1.1  mrg   edit_distance_t *v_two_ago = new edit_distance_t[len_s + 1];
     72  1.1  mrg   edit_distance_t *v_one_ago = new edit_distance_t[len_s + 1];
     73  1.1  mrg   edit_distance_t *v_next = new edit_distance_t[len_s + 1];
     74  1.1  mrg 
     75  1.1  mrg   /* The first row is for the case of an empty target string, which
     76  1.1  mrg      we can reach by deleting every character in the source string.  */
     77  1.1  mrg   for (int i = 0; i < len_s + 1; i++)
     78  1.1  mrg     v_one_ago[i] = i * BASE_COST;
     79  1.1  mrg 
     80  1.1  mrg   /* Build successive rows.  */
     81  1.1  mrg   for (int i = 0; i < len_t; i++)
     82  1.1  mrg     {
     83  1.1  mrg       if (debug)
     84  1.1  mrg 	{
     85  1.1  mrg 	  printf ("i:%i v_one_ago = ", i);
     86  1.1  mrg 	  for (int j = 0; j < len_s + 1; j++)
     87  1.1  mrg 	    printf ("%i ", v_one_ago[j]);
     88  1.1  mrg 	  printf ("\n");
     89  1.1  mrg 	}
     90  1.1  mrg 
     91  1.1  mrg       /* The initial column is for the case of an empty source string; we
     92  1.1  mrg 	 can reach prefixes of the target string of length i
     93  1.1  mrg 	 by inserting i characters.  */
     94  1.1  mrg       v_next[0] = (i + 1) * BASE_COST;
     95  1.1  mrg 
     96  1.1  mrg       /* Build the rest of the row by considering neighbors to
     97  1.1  mrg 	 the north, west and northwest.  */
     98  1.1  mrg       for (int j = 0; j < len_s; j++)
     99  1.1  mrg 	{
    100  1.1  mrg 	  edit_distance_t cost;
    101  1.1  mrg 
    102  1.1  mrg 	  if (s[j] == t[i])
    103  1.1  mrg 	    cost = 0;
    104  1.1  mrg 	  else if (TOLOWER (s[j]) == TOLOWER (t[i]))
    105  1.1  mrg 	    cost = CASE_COST;
    106  1.1  mrg 	  else
    107  1.1  mrg 	    cost = BASE_COST;
    108  1.1  mrg 	  edit_distance_t deletion     = v_next[j] + BASE_COST;
    109  1.1  mrg 	  edit_distance_t insertion    = v_one_ago[j + 1] + BASE_COST;
    110  1.1  mrg 	  edit_distance_t substitution = v_one_ago[j] + cost;
    111  1.1  mrg 	  edit_distance_t cheapest = MIN (deletion, insertion);
    112  1.1  mrg 	  cheapest = MIN (cheapest, substitution);
    113  1.1  mrg 	  if (i > 0 && j > 0 && s[j] == t[i - 1] && s[j - 1] == t[i])
    114  1.1  mrg 	    {
    115  1.1  mrg 	      edit_distance_t transposition = v_two_ago[j - 1] + BASE_COST;
    116  1.1  mrg 	      cheapest = MIN (cheapest, transposition);
    117  1.1  mrg 	    }
    118  1.1  mrg 	  v_next[j + 1] = cheapest;
    119  1.1  mrg 	}
    120  1.1  mrg 
    121  1.1  mrg       /* Prepare to move on to next row.  */
    122  1.1  mrg       for (int j = 0; j < len_s + 1; j++)
    123  1.1  mrg 	{
    124  1.1  mrg 	  v_two_ago[j] = v_one_ago[j];
    125  1.1  mrg 	  v_one_ago[j] = v_next[j];
    126  1.1  mrg 	}
    127  1.1  mrg     }
    128  1.1  mrg 
    129  1.1  mrg   if (debug)
    130  1.1  mrg     {
    131  1.1  mrg       printf ("final v_next = ");
    132  1.1  mrg       for (int j = 0; j < len_s + 1; j++)
    133  1.1  mrg 	printf ("%i ", v_next[j]);
    134  1.1  mrg       printf ("\n");
    135  1.1  mrg     }
    136  1.1  mrg 
    137  1.1  mrg   edit_distance_t result = v_next[len_s];
    138  1.1  mrg   delete[] v_two_ago;
    139  1.1  mrg   delete[] v_one_ago;
    140  1.1  mrg   delete[] v_next;
    141  1.1  mrg   return result;
    142  1.1  mrg }
    143  1.1  mrg 
    144  1.1  mrg /* Get the edit distance between two nil-terminated strings.  */
    145  1.1  mrg 
    146  1.1  mrg edit_distance_t
    147  1.1  mrg get_edit_distance (const char *s, const char *t)
    148  1.1  mrg {
    149  1.1  mrg   return get_edit_distance (s, strlen (s), t, strlen (t));
    150  1.1  mrg }
    151  1.1  mrg 
    152  1.1  mrg /* Given TARGET, a non-NULL string, and CANDIDATES, a non-NULL ptr to
    153  1.1  mrg    an autovec of non-NULL strings, determine which element within
    154  1.1  mrg    CANDIDATES has the lowest edit distance to TARGET.  If there are
    155  1.1  mrg    multiple elements with the same minimal distance, the first in the
    156  1.1  mrg    vector wins.
    157  1.1  mrg 
    158  1.1  mrg    If more than half of the letters were misspelled, the suggestion is
    159  1.1  mrg    likely to be meaningless, so return NULL for this case.  */
    160  1.1  mrg 
    161  1.1  mrg const char *
    162  1.1  mrg find_closest_string (const char *target,
    163  1.1  mrg 		     const auto_vec<const char *> *candidates)
    164  1.1  mrg {
    165  1.1  mrg   gcc_assert (target);
    166  1.1  mrg   gcc_assert (candidates);
    167  1.1  mrg 
    168  1.1  mrg   int i;
    169  1.1  mrg   const char *candidate;
    170  1.1  mrg   best_match<const char *, const char *> bm (target);
    171  1.1  mrg   FOR_EACH_VEC_ELT (*candidates, i, candidate)
    172  1.1  mrg     {
    173  1.1  mrg       gcc_assert (candidate);
    174  1.1  mrg       bm.consider (candidate);
    175  1.1  mrg     }
    176  1.1  mrg 
    177  1.1  mrg   return bm.get_best_meaningful_candidate ();
    178  1.1  mrg }
    179  1.1  mrg 
    180  1.1  mrg /* Generate the maximum edit distance for which we consider a suggestion
    181  1.1  mrg    to be meaningful, given a goal of length GOAL_LEN and a candidate of
    182  1.1  mrg    length CANDIDATE_LEN.
    183  1.1  mrg 
    184  1.1  mrg    This is a third of the length of the candidate or of the goal,
    185  1.1  mrg    whichever is bigger.  */
    186  1.1  mrg 
    187  1.1  mrg edit_distance_t
    188  1.1  mrg get_edit_distance_cutoff (size_t goal_len, size_t candidate_len)
    189  1.1  mrg {
    190  1.1  mrg   size_t max_length = MAX (goal_len, candidate_len);
    191  1.1  mrg   size_t min_length = MIN (goal_len, candidate_len);
    192  1.1  mrg 
    193  1.1  mrg   gcc_assert (max_length >= min_length);
    194  1.1  mrg 
    195  1.1  mrg   /* Special case: don't offer suggestions for a pair of
    196  1.1  mrg      length == 1 strings (or empty strings).  */
    197  1.1  mrg   if (max_length <= 1)
    198  1.1  mrg     return 0;
    199  1.1  mrg 
    200  1.1  mrg   /* If the lengths are close, then round down.  */
    201  1.1  mrg   if (max_length - min_length <= 1)
    202  1.1  mrg     /* ...but allow an edit distance of at least 1.  */
    203  1.1  mrg     return BASE_COST * MAX (max_length / 3, 1);
    204  1.1  mrg 
    205  1.1  mrg   /* Otherwise, round up (thus giving a little extra leeway to some cases
    206  1.1  mrg      involving insertions/deletions).  */
    207  1.1  mrg   return BASE_COST * (max_length + 2) / 3;
    208  1.1  mrg }
    209  1.1  mrg 
    210  1.1  mrg #if CHECKING_P
    211  1.1  mrg 
    212  1.1  mrg namespace selftest {
    213  1.1  mrg 
    214  1.1  mrg /* Selftests.  */
    215  1.1  mrg 
    216  1.1  mrg /* Verify that get_edit_distance (A, B) equals the expected value.  */
    217  1.1  mrg 
    218  1.1  mrg static void
    219  1.1  mrg test_get_edit_distance_one_way (const char *a, const char *b,
    220  1.1  mrg 				edit_distance_t expected)
    221  1.1  mrg {
    222  1.1  mrg   edit_distance_t actual = get_edit_distance (a, b);
    223  1.1  mrg   ASSERT_EQ (actual, expected);
    224  1.1  mrg }
    225  1.1  mrg 
    226  1.1  mrg /* Verify that both
    227  1.1  mrg      get_edit_distance (A, B)
    228  1.1  mrg    and
    229  1.1  mrg      get_edit_distance (B, A)
    230  1.1  mrg    equal the expected value, to ensure that the function is symmetric.  */
    231  1.1  mrg 
    232  1.1  mrg static void
    233  1.1  mrg test_get_edit_distance_both_ways (const char *a, const char *b,
    234  1.1  mrg 			     edit_distance_t expected)
    235  1.1  mrg {
    236  1.1  mrg   test_get_edit_distance_one_way (a, b, expected);
    237  1.1  mrg   test_get_edit_distance_one_way (b, a, expected);
    238  1.1  mrg }
    239  1.1  mrg 
    240  1.1  mrg /* Verify get_edit_distance for a variety of pairs of pre-canned
    241  1.1  mrg    inputs, comparing against known-good values.  */
    242  1.1  mrg 
    243  1.1  mrg static void
    244  1.1  mrg test_edit_distances ()
    245  1.1  mrg {
    246  1.1  mrg   test_get_edit_distance_both_ways ("", "nonempty",
    247  1.1  mrg 				    BASE_COST * strlen ("nonempty"));
    248  1.1  mrg   test_get_edit_distance_both_ways ("saturday", "sunday",
    249  1.1  mrg 				    BASE_COST * 3);
    250  1.1  mrg   test_get_edit_distance_both_ways ("foo", "m_foo", BASE_COST * 2);
    251  1.1  mrg   test_get_edit_distance_both_ways ("hello_world", "HelloWorld", 4);
    252  1.1  mrg   test_get_edit_distance_both_ways
    253  1.1  mrg     ("the quick brown fox jumps over the lazy dog", "dog", BASE_COST * 40);
    254  1.1  mrg   test_get_edit_distance_both_ways
    255  1.1  mrg     ("the quick brown fox jumps over the lazy dog",
    256  1.1  mrg      "the quick brown dog jumps over the lazy fox",
    257  1.1  mrg      BASE_COST * 4);
    258  1.1  mrg   test_get_edit_distance_both_ways
    259  1.1  mrg     ("Lorem ipsum dolor sit amet, consectetur adipiscing elit,",
    260  1.1  mrg      "All your base are belong to us",
    261  1.1  mrg      BASE_COST * 44);
    262  1.1  mrg   test_get_edit_distance_both_ways ("foo", "FOO", 3);
    263  1.1  mrg   test_get_edit_distance_both_ways ("fee", "deed", BASE_COST * 2);
    264  1.1  mrg   test_get_edit_distance_both_ways ("coorzd1", "coordx1", BASE_COST * 2);
    265  1.1  mrg   test_get_edit_distance_both_ways ("assert", "sqrt", BASE_COST * 3);
    266  1.1  mrg   test_get_edit_distance_both_ways ("PATH_MAX", "INT8_MAX", BASE_COST * 3);
    267  1.1  mrg   test_get_edit_distance_both_ways ("time", "nice", BASE_COST * 2);
    268  1.1  mrg   test_get_edit_distance_both_ways ("bar", "carg", BASE_COST * 2);
    269  1.1  mrg   test_get_edit_distance_both_ways ("gtk_widget_show_all",
    270  1.1  mrg 				    "GtkWidgetShowAll", 10);
    271  1.1  mrg   test_get_edit_distance_both_ways ("m_bar", "bar", BASE_COST * 2);
    272  1.1  mrg   test_get_edit_distance_both_ways ("MACRO", "MACRAME", BASE_COST * 3);
    273  1.1  mrg   test_get_edit_distance_both_ways ("ab", "ac", BASE_COST * 1);
    274  1.1  mrg   test_get_edit_distance_both_ways ("ab", "a", BASE_COST * 1);
    275  1.1  mrg   test_get_edit_distance_both_ways ("a", "b", BASE_COST * 1);
    276  1.1  mrg   test_get_edit_distance_both_ways ("nanl", "name", BASE_COST * 2);
    277  1.1  mrg   test_get_edit_distance_both_ways ("char", "bar", BASE_COST * 2);
    278  1.1  mrg   test_get_edit_distance_both_ways ("-optimize", "fsanitize", BASE_COST * 5);
    279  1.1  mrg   test_get_edit_distance_both_ways ("__DATE__", "__i386__", BASE_COST * 4);
    280  1.1  mrg 
    281  1.1  mrg   /* Examples where transposition helps.  */
    282  1.1  mrg   test_get_edit_distance_both_ways ("ab", "ba", BASE_COST * 1);
    283  1.1  mrg   test_get_edit_distance_both_ways ("ba", "abc", BASE_COST * 2);
    284  1.1  mrg   test_get_edit_distance_both_ways ("coorzd1", "coordz1", BASE_COST * 1);
    285  1.1  mrg   test_get_edit_distance_both_ways ("abcdefghijklmnopqrstuvwxyz",
    286  1.1  mrg 				    "bacdefghijklmnopqrstuvwxzy",
    287  1.1  mrg 				    BASE_COST * 2);
    288  1.1  mrg   test_get_edit_distance_both_ways ("saturday", "sundya", BASE_COST * 4);
    289  1.1  mrg   test_get_edit_distance_both_ways ("signed", "singed", BASE_COST * 1);
    290  1.1  mrg }
    291  1.1  mrg 
    292  1.1  mrg /* Subroutine of test_get_edit_distance_cutoff, for emulating the
    293  1.1  mrg    spellchecking cutoff in up to GCC 8.  */
    294  1.1  mrg 
    295  1.1  mrg static edit_distance_t
    296  1.1  mrg get_old_cutoff (size_t goal_len, size_t candidate_len)
    297  1.1  mrg {
    298  1.1  mrg   return BASE_COST * MAX (goal_len, candidate_len) / 2;
    299  1.1  mrg }
    300  1.1  mrg 
    301  1.1  mrg /* Verify that the cutoff for "meaningfulness" of suggestions is at least as
    302  1.1  mrg    conservative as in older GCC releases.
    303  1.1  mrg 
    304  1.1  mrg    This should ensure that we don't offer additional meaningless
    305  1.1  mrg    suggestions (apart from those for which transposition has helped).  */
    306  1.1  mrg 
    307  1.1  mrg static void
    308  1.1  mrg test_get_edit_distance_cutoff ()
    309  1.1  mrg {
    310  1.1  mrg   for (size_t goal_len = 0; goal_len < 30; goal_len++)
    311  1.1  mrg     for (size_t candidate_len = 0; candidate_len < 30; candidate_len++)
    312  1.1  mrg       ASSERT_TRUE (get_edit_distance_cutoff (goal_len, candidate_len)
    313  1.1  mrg 		   <= get_old_cutoff (goal_len, candidate_len));
    314  1.1  mrg }
    315  1.1  mrg 
    316  1.1  mrg /* Assert that CANDIDATE is offered as a suggestion for TARGET.  */
    317  1.1  mrg 
    318  1.1  mrg static void
    319  1.1  mrg assert_suggested_for (const location &loc, const char *candidate,
    320  1.1  mrg 		      const char *target)
    321  1.1  mrg {
    322  1.1  mrg   auto_vec<const char *> candidates;
    323  1.1  mrg   candidates.safe_push (candidate);
    324  1.1  mrg   ASSERT_EQ_AT (loc, candidate, find_closest_string (target, &candidates));
    325  1.1  mrg }
    326  1.1  mrg 
    327  1.1  mrg /* Assert that CANDIDATE is offered as a suggestion for TARGET.  */
    328  1.1  mrg 
    329  1.1  mrg #define ASSERT_SUGGESTED_FOR(CANDIDATE, TARGET)			\
    330  1.1  mrg   SELFTEST_BEGIN_STMT							\
    331  1.1  mrg     assert_suggested_for (SELFTEST_LOCATION, CANDIDATE, TARGET);	\
    332  1.1  mrg   SELFTEST_END_STMT
    333  1.1  mrg 
    334  1.1  mrg /* Assert that CANDIDATE is not offered as a suggestion for TARGET.  */
    335  1.1  mrg 
    336  1.1  mrg static void
    337  1.1  mrg assert_not_suggested_for (const location &loc, const char *candidate,
    338  1.1  mrg 			  const char *target)
    339  1.1  mrg {
    340  1.1  mrg   auto_vec<const char *> candidates;
    341  1.1  mrg   candidates.safe_push (candidate);
    342  1.1  mrg   ASSERT_EQ_AT (loc, NULL, find_closest_string (target, &candidates));
    343  1.1  mrg }
    344  1.1  mrg 
    345  1.1  mrg /* Assert that CANDIDATE is not offered as a suggestion for TARGET.  */
    346  1.1  mrg 
    347  1.1  mrg #define ASSERT_NOT_SUGGESTED_FOR(CANDIDATE, TARGET)			\
    348  1.1  mrg   SELFTEST_BEGIN_STMT							\
    349  1.1  mrg     assert_not_suggested_for (SELFTEST_LOCATION, CANDIDATE, TARGET);	\
    350  1.1  mrg   SELFTEST_END_STMT
    351  1.1  mrg 
    352  1.1  mrg /* Verify that we offer varous suggestions that are meaningful,
    353  1.1  mrg    and that we don't offer various other ones that aren't (PR c/82967).  */
    354  1.1  mrg 
    355  1.1  mrg static void
    356  1.1  mrg test_suggestions ()
    357  1.1  mrg {
    358  1.1  mrg   /* Good suggestions.  */
    359  1.1  mrg 
    360  1.1  mrg   ASSERT_SUGGESTED_FOR ("m_bar", "bar");
    361  1.1  mrg   // dist == 2, max_length == 5, min_length == 3
    362  1.1  mrg 
    363  1.1  mrg   ASSERT_SUGGESTED_FOR ("MACRO", "MACRAME");
    364  1.1  mrg   // dist == 3, max_length == 7, min_length == 5
    365  1.1  mrg 
    366  1.1  mrg   ASSERT_SUGGESTED_FOR ("gtk_widget_show_all", "GtkWidgetShowAll");
    367  1.1  mrg   // dist == 7, max_length == 16, min_length = 19
    368  1.1  mrg 
    369  1.1  mrg   ASSERT_SUGGESTED_FOR ("ab", "ac");
    370  1.1  mrg   // dist == 1, max_length == min_length = 2
    371  1.1  mrg 
    372  1.1  mrg   ASSERT_SUGGESTED_FOR ("ab", "a");
    373  1.1  mrg   // dist == 1, max_length == 2, min_length = 1
    374  1.1  mrg 
    375  1.1  mrg   /* Bad suggestions.  */
    376  1.1  mrg 
    377  1.1  mrg   ASSERT_NOT_SUGGESTED_FOR ("a", "b");
    378  1.1  mrg   // dist == 1, max_length == min_length = 1
    379  1.1  mrg 
    380  1.1  mrg   ASSERT_NOT_SUGGESTED_FOR ("sqrt", "assert");
    381  1.1  mrg   // dist == 3, max_length 6, min_length == 4
    382  1.1  mrg 
    383  1.1  mrg   ASSERT_NOT_SUGGESTED_FOR ("INT8_MAX", "PATH_MAX");
    384  1.1  mrg   // dist == 3, max_length == min_length == 8
    385  1.1  mrg 
    386  1.1  mrg   ASSERT_NOT_SUGGESTED_FOR ("nice", "time");
    387  1.1  mrg   ASSERT_NOT_SUGGESTED_FOR ("nanl", "name");
    388  1.1  mrg   // dist == 2, max_length == min_length == 4
    389  1.1  mrg 
    390  1.1  mrg   ASSERT_NOT_SUGGESTED_FOR ("carg", "bar");
    391  1.1  mrg   ASSERT_NOT_SUGGESTED_FOR ("char", "bar");
    392  1.1  mrg   // dist == 2, max_length == 4, min_length == 3
    393  1.1  mrg 
    394  1.1  mrg   ASSERT_NOT_SUGGESTED_FOR ("-optimize", "fsanitize");
    395  1.1  mrg   // dist == 5, max_length == min_length == 9
    396  1.1  mrg 
    397  1.1  mrg   ASSERT_NOT_SUGGESTED_FOR ("__DATE__", "__i386__");
    398  1.1  mrg   // dist == 4, max_length == min_length == 8
    399  1.1  mrg 
    400  1.1  mrg   ASSERT_NOT_SUGGESTED_FOR ("start_input_device", "InputDevice");
    401  1.1  mrg   // dist == 9, max_length == 18, min_length == 11
    402  1.1  mrg }
    403  1.1  mrg 
    404  1.1  mrg /* Verify that find_closest_string is sane.  */
    405  1.1  mrg 
    406  1.1  mrg static void
    407  1.1  mrg test_find_closest_string ()
    408  1.1  mrg {
    409  1.1  mrg   auto_vec<const char *> candidates;
    410  1.1  mrg 
    411  1.1  mrg   /* Verify that it can handle an empty vec.  */
    412  1.1  mrg   ASSERT_EQ (NULL, find_closest_string ("", &candidates));
    413  1.1  mrg 
    414  1.1  mrg   /* Verify that it works sanely for non-empty vecs.  */
    415  1.1  mrg   candidates.safe_push ("apple");
    416  1.1  mrg   candidates.safe_push ("banana");
    417  1.1  mrg   candidates.safe_push ("cherry");
    418  1.1  mrg 
    419  1.1  mrg   ASSERT_STREQ ("apple", find_closest_string ("app", &candidates));
    420  1.1  mrg   ASSERT_STREQ ("banana", find_closest_string ("banyan", &candidates));
    421  1.1  mrg   ASSERT_STREQ ("cherry", find_closest_string ("berry", &candidates));
    422  1.1  mrg   ASSERT_EQ (NULL, find_closest_string ("not like the others", &candidates));
    423  1.1  mrg 
    424  1.1  mrg   /* The order of the vec can matter, but it should not matter for these
    425  1.1  mrg      inputs.  */
    426  1.1  mrg   candidates.truncate (0);
    427  1.1  mrg   candidates.safe_push ("cherry");
    428  1.1  mrg   candidates.safe_push ("banana");
    429  1.1  mrg   candidates.safe_push ("apple");
    430  1.1  mrg   ASSERT_STREQ ("apple", find_closest_string ("app", &candidates));
    431  1.1  mrg   ASSERT_STREQ ("banana", find_closest_string ("banyan", &candidates));
    432  1.1  mrg   ASSERT_STREQ ("cherry", find_closest_string ("berry", &candidates));
    433  1.1  mrg   ASSERT_EQ (NULL, find_closest_string ("not like the others", &candidates));
    434  1.1  mrg 
    435  1.1  mrg   /* If the goal string somehow makes it into the candidate list, offering
    436  1.1  mrg      it as a suggestion will be nonsensical.  Verify that we don't offer such
    437  1.1  mrg      suggestions.  */
    438  1.1  mrg   ASSERT_EQ (NULL, find_closest_string ("banana", &candidates));
    439  1.1  mrg 
    440  1.1  mrg   /* Example from PR 69968 where transposition helps.  */
    441  1.1  mrg   candidates.truncate (0);
    442  1.1  mrg   candidates.safe_push("coordx");
    443  1.1  mrg   candidates.safe_push("coordy");
    444  1.1  mrg   candidates.safe_push("coordz");
    445  1.1  mrg   candidates.safe_push("coordx1");
    446  1.1  mrg   candidates.safe_push("coordy1");
    447  1.1  mrg   candidates.safe_push("coordz1");
    448  1.1  mrg   ASSERT_STREQ ("coordz1", find_closest_string ("coorzd1", &candidates));
    449  1.1  mrg 
    450  1.1  mrg   candidates.truncate (0);
    451  1.1  mrg   candidates.safe_push ("DWARF_GNAT_ENCODINGS_GDB");
    452  1.1  mrg   candidates.safe_push ("DWARF_GNAT_ENCODINGS_ALL");
    453  1.1  mrg   candidates.safe_push ("DWARF_GNAT_ENCODINGS_MINIMAL");
    454  1.1  mrg   ASSERT_STREQ ("DWARF_GNAT_ENCODINGS_ALL",
    455  1.1  mrg 		find_closest_string ("DWARF_GNAT_ENCODINGS_all",
    456  1.1  mrg 				     &candidates));
    457  1.1  mrg 
    458  1.1  mrg   /* The same as the previous test, but with a different order of
    459  1.1  mrg      candidates.  */
    460  1.1  mrg   candidates.truncate (0);
    461  1.1  mrg   candidates.safe_push ("DWARF_GNAT_ENCODINGS_ALL");
    462  1.1  mrg   candidates.safe_push ("DWARF_GNAT_ENCODINGS_GDB");
    463  1.1  mrg   candidates.safe_push ("DWARF_GNAT_ENCODINGS_MINIMAL");
    464  1.1  mrg   ASSERT_STREQ ("DWARF_GNAT_ENCODINGS_ALL",
    465  1.1  mrg 		find_closest_string ("DWARF_GNAT_ENCODINGS_all",
    466  1.1  mrg 				     &candidates));
    467  1.1  mrg }
    468  1.1  mrg 
    469  1.1  mrg /* Test data for test_metric_conditions.  */
    470  1.1  mrg 
    471  1.1  mrg static const char * const test_data[] = {
    472  1.1  mrg   "",
    473  1.1  mrg   "foo",
    474  1.1  mrg   "food",
    475  1.1  mrg   "boo",
    476  1.1  mrg   "1234567890123456789012345678901234567890123456789012345678901234567890",
    477  1.1  mrg   "abc",
    478  1.1  mrg   "ac",
    479  1.1  mrg   "ca",
    480  1.1  mrg };
    481  1.1  mrg 
    482  1.1  mrg /* Verify that get_edit_distance appears to be a sane distance function,
    483  1.1  mrg    even though it doesn't satisfy the conditions for being a metric.  (This
    484  1.1  mrg    is because the triangle inequality fails to hold: the distance between
    485  1.1  mrg    "ca" and "ac" is 1, and so is the distance between "abc" and "ac", but
    486  1.1  mrg    the distance between "abc" and "ca" is 3. Algorithms that calculate the
    487  1.1  mrg    true Levenshtein-Damerau metric are much more expensive.)  */
    488  1.1  mrg 
    489  1.1  mrg static void
    490  1.1  mrg test_metric_conditions ()
    491  1.1  mrg {
    492  1.1  mrg   const int num_test_cases = sizeof (test_data) / sizeof (test_data[0]);
    493  1.1  mrg 
    494  1.1  mrg   for (int i = 0; i < num_test_cases; i++)
    495  1.1  mrg     {
    496  1.1  mrg       for (int j = 0; j < num_test_cases; j++)
    497  1.1  mrg 	{
    498  1.1  mrg 	  edit_distance_t dist_ij
    499  1.1  mrg 	    = get_edit_distance (test_data[i], test_data[j]);
    500  1.1  mrg 
    501  1.1  mrg 	  /* Identity of indiscernibles: d(i, j) > 0 iff i == j.  */
    502  1.1  mrg 	  if (i == j)
    503  1.1  mrg 	    ASSERT_EQ (dist_ij, 0);
    504  1.1  mrg 	  else
    505  1.1  mrg 	    ASSERT_TRUE (dist_ij > 0);
    506  1.1  mrg 
    507  1.1  mrg 	  /* Symmetry: d(i, j) == d(j, i).  */
    508  1.1  mrg 	  edit_distance_t dist_ji
    509  1.1  mrg 	    = get_edit_distance (test_data[j], test_data[i]);
    510  1.1  mrg 	  ASSERT_EQ (dist_ij, dist_ji);
    511  1.1  mrg 	}
    512  1.1  mrg     }
    513  1.1  mrg }
    514  1.1  mrg 
    515  1.1  mrg /* Run all of the selftests within this file.  */
    516  1.1  mrg 
    517  1.1  mrg void
    518  1.1  mrg spellcheck_cc_tests ()
    519  1.1  mrg {
    520  1.1  mrg   test_edit_distances ();
    521  1.1  mrg   test_get_edit_distance_cutoff ();
    522  1.1  mrg   test_suggestions ();
    523  1.1  mrg   test_find_closest_string ();
    524  1.1  mrg   test_metric_conditions ();
    525  1.1  mrg }
    526  1.1  mrg 
    527  1.1  mrg } // namespace selftest
    528  1.1  mrg 
    529  1.1  mrg #endif /* #if CHECKING_P */
    530